Text Generation
Transformers
Safetensors
qwen3
agents
terminal
code
software-engineering
sft
cold-start
conversational
text-generation-inference
Instructions to use open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForMultimodalLM tokenizer = AutoTokenizer.from_pretrained("open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL") model = AutoModelForMultimodalLM.from_pretrained("open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL
- SGLang
How to use open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL with Docker Model Runner:
docker model run hf.co/open-thoughts/OpenThinkerAgent-8B-ColdStartSFTForRL
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 4130, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00847457627118644, | |
| "grad_norm": 12.236435102750375, | |
| "learning_rate": 3.8740920096852305e-07, | |
| "loss": 0.8295, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2684570252895355, | |
| "step": 5, | |
| "valid_targets_mean": 4368.4, | |
| "valid_targets_min": 2939 | |
| }, | |
| { | |
| "epoch": 0.01694915254237288, | |
| "grad_norm": 10.058309821274516, | |
| "learning_rate": 8.716707021791768e-07, | |
| "loss": 0.8055, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.5616639852523804, | |
| "step": 10, | |
| "valid_targets_mean": 9545.8, | |
| "valid_targets_min": 4547 | |
| }, | |
| { | |
| "epoch": 0.025423728813559324, | |
| "grad_norm": 8.52945178256474, | |
| "learning_rate": 1.3559322033898307e-06, | |
| "loss": 0.7631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4021530747413635, | |
| "step": 15, | |
| "valid_targets_mean": 9586.9, | |
| "valid_targets_min": 5112 | |
| }, | |
| { | |
| "epoch": 0.03389830508474576, | |
| "grad_norm": 5.980922520646845, | |
| "learning_rate": 1.8401937046004845e-06, | |
| "loss": 0.7736, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.41938316822052, | |
| "step": 20, | |
| "valid_targets_mean": 8228.5, | |
| "valid_targets_min": 3550 | |
| }, | |
| { | |
| "epoch": 0.0423728813559322, | |
| "grad_norm": 3.7984612636995125, | |
| "learning_rate": 2.324455205811138e-06, | |
| "loss": 0.7283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4073244035243988, | |
| "step": 25, | |
| "valid_targets_mean": 10135.8, | |
| "valid_targets_min": 4168 | |
| }, | |
| { | |
| "epoch": 0.05084745762711865, | |
| "grad_norm": 2.94113455953092, | |
| "learning_rate": 2.808716707021792e-06, | |
| "loss": 0.7287, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3122481405735016, | |
| "step": 30, | |
| "valid_targets_mean": 8561.0, | |
| "valid_targets_min": 3843 | |
| }, | |
| { | |
| "epoch": 0.059322033898305086, | |
| "grad_norm": 2.719124371329246, | |
| "learning_rate": 3.2929782082324455e-06, | |
| "loss": 0.7333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.31176847219467163, | |
| "step": 35, | |
| "valid_targets_mean": 5976.4, | |
| "valid_targets_min": 3033 | |
| }, | |
| { | |
| "epoch": 0.06779661016949153, | |
| "grad_norm": 1.8367152849661372, | |
| "learning_rate": 3.7772397094430994e-06, | |
| "loss": 0.6223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3117915391921997, | |
| "step": 40, | |
| "valid_targets_mean": 6918.5, | |
| "valid_targets_min": 3257 | |
| }, | |
| { | |
| "epoch": 0.07627118644067797, | |
| "grad_norm": 1.202874459401654, | |
| "learning_rate": 4.261501210653753e-06, | |
| "loss": 0.6556, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.36333340406417847, | |
| "step": 45, | |
| "valid_targets_mean": 9124.4, | |
| "valid_targets_min": 2913 | |
| }, | |
| { | |
| "epoch": 0.0847457627118644, | |
| "grad_norm": 1.0786502836699934, | |
| "learning_rate": 4.745762711864408e-06, | |
| "loss": 0.5957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32077574729919434, | |
| "step": 50, | |
| "valid_targets_mean": 9160.0, | |
| "valid_targets_min": 4550 | |
| }, | |
| { | |
| "epoch": 0.09322033898305085, | |
| "grad_norm": 0.9850973794351479, | |
| "learning_rate": 5.230024213075061e-06, | |
| "loss": 0.5574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28348320722579956, | |
| "step": 55, | |
| "valid_targets_mean": 7648.8, | |
| "valid_targets_min": 2230 | |
| }, | |
| { | |
| "epoch": 0.1016949152542373, | |
| "grad_norm": 0.9506271905194752, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.5513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.35213959217071533, | |
| "step": 60, | |
| "valid_targets_mean": 9959.1, | |
| "valid_targets_min": 6620 | |
| }, | |
| { | |
| "epoch": 0.11016949152542373, | |
| "grad_norm": 0.7568590634359315, | |
| "learning_rate": 6.198547215496369e-06, | |
| "loss": 0.5441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32353514432907104, | |
| "step": 65, | |
| "valid_targets_mean": 8611.0, | |
| "valid_targets_min": 3485 | |
| }, | |
| { | |
| "epoch": 0.11864406779661017, | |
| "grad_norm": 0.8003434826466195, | |
| "learning_rate": 6.682808716707022e-06, | |
| "loss": 0.5294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2963908612728119, | |
| "step": 70, | |
| "valid_targets_mean": 8577.6, | |
| "valid_targets_min": 4103 | |
| }, | |
| { | |
| "epoch": 0.1271186440677966, | |
| "grad_norm": 0.6819382378620414, | |
| "learning_rate": 7.1670702179176766e-06, | |
| "loss": 0.5266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29446882009506226, | |
| "step": 75, | |
| "valid_targets_mean": 9525.9, | |
| "valid_targets_min": 2096 | |
| }, | |
| { | |
| "epoch": 0.13559322033898305, | |
| "grad_norm": 0.5426614937193089, | |
| "learning_rate": 7.65133171912833e-06, | |
| "loss": 0.4912, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1997358202934265, | |
| "step": 80, | |
| "valid_targets_mean": 6563.4, | |
| "valid_targets_min": 2943 | |
| }, | |
| { | |
| "epoch": 0.1440677966101695, | |
| "grad_norm": 0.5948300852884347, | |
| "learning_rate": 8.135593220338983e-06, | |
| "loss": 0.5251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.4521056115627289, | |
| "step": 85, | |
| "valid_targets_mean": 12116.9, | |
| "valid_targets_min": 3622 | |
| }, | |
| { | |
| "epoch": 0.15254237288135594, | |
| "grad_norm": 0.49709515079932287, | |
| "learning_rate": 8.619854721549637e-06, | |
| "loss": 0.5206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.284382164478302, | |
| "step": 90, | |
| "valid_targets_mean": 8606.2, | |
| "valid_targets_min": 3501 | |
| }, | |
| { | |
| "epoch": 0.16101694915254236, | |
| "grad_norm": 0.4713133451299385, | |
| "learning_rate": 9.10411622276029e-06, | |
| "loss": 0.474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15513190627098083, | |
| "step": 95, | |
| "valid_targets_mean": 6271.8, | |
| "valid_targets_min": 3023 | |
| }, | |
| { | |
| "epoch": 0.1694915254237288, | |
| "grad_norm": 0.4457304349817586, | |
| "learning_rate": 9.588377723970946e-06, | |
| "loss": 0.4661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2053360641002655, | |
| "step": 100, | |
| "valid_targets_mean": 7486.0, | |
| "valid_targets_min": 3465 | |
| }, | |
| { | |
| "epoch": 0.17796610169491525, | |
| "grad_norm": 0.44154277874061937, | |
| "learning_rate": 1.0072639225181599e-05, | |
| "loss": 0.4511, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20671820640563965, | |
| "step": 105, | |
| "valid_targets_mean": 6911.2, | |
| "valid_targets_min": 2853 | |
| }, | |
| { | |
| "epoch": 0.1864406779661017, | |
| "grad_norm": 0.48622015880781266, | |
| "learning_rate": 1.0556900726392252e-05, | |
| "loss": 0.4333, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.292965292930603, | |
| "step": 110, | |
| "valid_targets_mean": 8936.8, | |
| "valid_targets_min": 1718 | |
| }, | |
| { | |
| "epoch": 0.19491525423728814, | |
| "grad_norm": 0.4945674916588067, | |
| "learning_rate": 1.1041162227602906e-05, | |
| "loss": 0.46, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15357142686843872, | |
| "step": 115, | |
| "valid_targets_mean": 6594.0, | |
| "valid_targets_min": 1819 | |
| }, | |
| { | |
| "epoch": 0.2033898305084746, | |
| "grad_norm": 0.5260662505419903, | |
| "learning_rate": 1.1525423728813561e-05, | |
| "loss": 0.4256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2100408673286438, | |
| "step": 120, | |
| "valid_targets_mean": 6844.6, | |
| "valid_targets_min": 2819 | |
| }, | |
| { | |
| "epoch": 0.211864406779661, | |
| "grad_norm": 0.4859160469605518, | |
| "learning_rate": 1.2009685230024215e-05, | |
| "loss": 0.4549, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29052165150642395, | |
| "step": 125, | |
| "valid_targets_mean": 9443.6, | |
| "valid_targets_min": 2351 | |
| }, | |
| { | |
| "epoch": 0.22033898305084745, | |
| "grad_norm": 0.4887408500988149, | |
| "learning_rate": 1.2493946731234868e-05, | |
| "loss": 0.4533, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32517391443252563, | |
| "step": 130, | |
| "valid_targets_mean": 9598.9, | |
| "valid_targets_min": 3221 | |
| }, | |
| { | |
| "epoch": 0.2288135593220339, | |
| "grad_norm": 0.44709723187595857, | |
| "learning_rate": 1.2978208232445521e-05, | |
| "loss": 0.4038, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22177180647850037, | |
| "step": 135, | |
| "valid_targets_mean": 7938.1, | |
| "valid_targets_min": 1772 | |
| }, | |
| { | |
| "epoch": 0.23728813559322035, | |
| "grad_norm": 0.47189922671995305, | |
| "learning_rate": 1.3462469733656177e-05, | |
| "loss": 0.4447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19433565437793732, | |
| "step": 140, | |
| "valid_targets_mean": 7033.0, | |
| "valid_targets_min": 3731 | |
| }, | |
| { | |
| "epoch": 0.2457627118644068, | |
| "grad_norm": 0.7896370250308062, | |
| "learning_rate": 1.3946731234866828e-05, | |
| "loss": 0.4819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25007662177085876, | |
| "step": 145, | |
| "valid_targets_mean": 7750.5, | |
| "valid_targets_min": 3290 | |
| }, | |
| { | |
| "epoch": 0.2542372881355932, | |
| "grad_norm": 1.167473809507481, | |
| "learning_rate": 1.4430992736077482e-05, | |
| "loss": 0.3776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19537419080734253, | |
| "step": 150, | |
| "valid_targets_mean": 7780.4, | |
| "valid_targets_min": 2228 | |
| }, | |
| { | |
| "epoch": 0.2627118644067797, | |
| "grad_norm": 0.4604199039648987, | |
| "learning_rate": 1.4915254237288137e-05, | |
| "loss": 0.4206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20237131416797638, | |
| "step": 155, | |
| "valid_targets_mean": 6237.5, | |
| "valid_targets_min": 2797 | |
| }, | |
| { | |
| "epoch": 0.2711864406779661, | |
| "grad_norm": 1.2851421474093927, | |
| "learning_rate": 1.5399515738498792e-05, | |
| "loss": 0.3927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1901024878025055, | |
| "step": 160, | |
| "valid_targets_mean": 7075.9, | |
| "valid_targets_min": 3662 | |
| }, | |
| { | |
| "epoch": 0.2796610169491525, | |
| "grad_norm": 0.4580396609863836, | |
| "learning_rate": 1.5883777239709442e-05, | |
| "loss": 0.3924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.146134614944458, | |
| "step": 165, | |
| "valid_targets_mean": 7100.8, | |
| "valid_targets_min": 3506 | |
| }, | |
| { | |
| "epoch": 0.288135593220339, | |
| "grad_norm": 0.4849064596017498, | |
| "learning_rate": 1.63680387409201e-05, | |
| "loss": 0.4289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18006592988967896, | |
| "step": 170, | |
| "valid_targets_mean": 8865.8, | |
| "valid_targets_min": 3261 | |
| }, | |
| { | |
| "epoch": 0.2966101694915254, | |
| "grad_norm": 0.4697056355654206, | |
| "learning_rate": 1.6852300242130752e-05, | |
| "loss": 0.3988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23008233308792114, | |
| "step": 175, | |
| "valid_targets_mean": 8906.1, | |
| "valid_targets_min": 3874 | |
| }, | |
| { | |
| "epoch": 0.3050847457627119, | |
| "grad_norm": 0.4186025820722969, | |
| "learning_rate": 1.7336561743341406e-05, | |
| "loss": 0.4076, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21983414888381958, | |
| "step": 180, | |
| "valid_targets_mean": 9217.2, | |
| "valid_targets_min": 4157 | |
| }, | |
| { | |
| "epoch": 0.3135593220338983, | |
| "grad_norm": 0.4880177558476617, | |
| "learning_rate": 1.782082324455206e-05, | |
| "loss": 0.408, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20033296942710876, | |
| "step": 185, | |
| "valid_targets_mean": 8644.5, | |
| "valid_targets_min": 5639 | |
| }, | |
| { | |
| "epoch": 0.3220338983050847, | |
| "grad_norm": 0.506835537780777, | |
| "learning_rate": 1.8305084745762713e-05, | |
| "loss": 0.4345, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15411603450775146, | |
| "step": 190, | |
| "valid_targets_mean": 5940.5, | |
| "valid_targets_min": 2975 | |
| }, | |
| { | |
| "epoch": 0.3305084745762712, | |
| "grad_norm": 0.5175806823084516, | |
| "learning_rate": 1.8789346246973366e-05, | |
| "loss": 0.3823, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22352342307567596, | |
| "step": 195, | |
| "valid_targets_mean": 6518.9, | |
| "valid_targets_min": 1922 | |
| }, | |
| { | |
| "epoch": 0.3389830508474576, | |
| "grad_norm": 0.39209882371312016, | |
| "learning_rate": 1.927360774818402e-05, | |
| "loss": 0.4033, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23113977909088135, | |
| "step": 200, | |
| "valid_targets_mean": 11211.2, | |
| "valid_targets_min": 5692 | |
| }, | |
| { | |
| "epoch": 0.3474576271186441, | |
| "grad_norm": 0.5642312831981638, | |
| "learning_rate": 1.9757869249394673e-05, | |
| "loss": 0.4274, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29515933990478516, | |
| "step": 205, | |
| "valid_targets_mean": 12401.2, | |
| "valid_targets_min": 5602 | |
| }, | |
| { | |
| "epoch": 0.3559322033898305, | |
| "grad_norm": 0.4694784872364969, | |
| "learning_rate": 2.0242130750605327e-05, | |
| "loss": 0.3671, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22254446148872375, | |
| "step": 210, | |
| "valid_targets_mean": 9149.9, | |
| "valid_targets_min": 3145 | |
| }, | |
| { | |
| "epoch": 0.3644067796610169, | |
| "grad_norm": 0.4729181140659464, | |
| "learning_rate": 2.072639225181598e-05, | |
| "loss": 0.3811, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1420256644487381, | |
| "step": 215, | |
| "valid_targets_mean": 6157.2, | |
| "valid_targets_min": 2493 | |
| }, | |
| { | |
| "epoch": 0.3728813559322034, | |
| "grad_norm": 0.48751693402766566, | |
| "learning_rate": 2.1210653753026637e-05, | |
| "loss": 0.3637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.30695676803588867, | |
| "step": 220, | |
| "valid_targets_mean": 10364.8, | |
| "valid_targets_min": 4940 | |
| }, | |
| { | |
| "epoch": 0.3813559322033898, | |
| "grad_norm": 0.4492368775328314, | |
| "learning_rate": 2.169491525423729e-05, | |
| "loss": 0.392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1817527413368225, | |
| "step": 225, | |
| "valid_targets_mean": 8587.1, | |
| "valid_targets_min": 6431 | |
| }, | |
| { | |
| "epoch": 0.3898305084745763, | |
| "grad_norm": 0.5264407991637193, | |
| "learning_rate": 2.2179176755447944e-05, | |
| "loss": 0.3598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.144405335187912, | |
| "step": 230, | |
| "valid_targets_mean": 6391.9, | |
| "valid_targets_min": 2800 | |
| }, | |
| { | |
| "epoch": 0.3983050847457627, | |
| "grad_norm": 0.44859050212635043, | |
| "learning_rate": 2.2663438256658597e-05, | |
| "loss": 0.4306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12371104955673218, | |
| "step": 235, | |
| "valid_targets_mean": 6035.9, | |
| "valid_targets_min": 2422 | |
| }, | |
| { | |
| "epoch": 0.4067796610169492, | |
| "grad_norm": 0.4487952241314178, | |
| "learning_rate": 2.3147699757869254e-05, | |
| "loss": 0.3496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15325893461704254, | |
| "step": 240, | |
| "valid_targets_mean": 7694.0, | |
| "valid_targets_min": 2322 | |
| }, | |
| { | |
| "epoch": 0.4152542372881356, | |
| "grad_norm": 0.5180958481856792, | |
| "learning_rate": 2.3631961259079904e-05, | |
| "loss": 0.3907, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20144130289554596, | |
| "step": 245, | |
| "valid_targets_mean": 6840.8, | |
| "valid_targets_min": 2925 | |
| }, | |
| { | |
| "epoch": 0.423728813559322, | |
| "grad_norm": 0.4491176387970824, | |
| "learning_rate": 2.4116222760290558e-05, | |
| "loss": 0.3935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20885750651359558, | |
| "step": 250, | |
| "valid_targets_mean": 8512.1, | |
| "valid_targets_min": 2493 | |
| }, | |
| { | |
| "epoch": 0.4322033898305085, | |
| "grad_norm": 0.4628691361446054, | |
| "learning_rate": 2.460048426150121e-05, | |
| "loss": 0.3732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18055710196495056, | |
| "step": 255, | |
| "valid_targets_mean": 7962.2, | |
| "valid_targets_min": 3404 | |
| }, | |
| { | |
| "epoch": 0.4406779661016949, | |
| "grad_norm": 0.4696155991888429, | |
| "learning_rate": 2.5084745762711865e-05, | |
| "loss": 0.3845, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2186737358570099, | |
| "step": 260, | |
| "valid_targets_mean": 8228.9, | |
| "valid_targets_min": 4428 | |
| }, | |
| { | |
| "epoch": 0.4491525423728814, | |
| "grad_norm": 0.4797313441924088, | |
| "learning_rate": 2.556900726392252e-05, | |
| "loss": 0.3806, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18754912912845612, | |
| "step": 265, | |
| "valid_targets_mean": 8026.2, | |
| "valid_targets_min": 2632 | |
| }, | |
| { | |
| "epoch": 0.4576271186440678, | |
| "grad_norm": 0.500792419812694, | |
| "learning_rate": 2.6053268765133175e-05, | |
| "loss": 0.3691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14188027381896973, | |
| "step": 270, | |
| "valid_targets_mean": 6896.5, | |
| "valid_targets_min": 2844 | |
| }, | |
| { | |
| "epoch": 0.4661016949152542, | |
| "grad_norm": 0.5194573027568329, | |
| "learning_rate": 2.653753026634383e-05, | |
| "loss": 0.4269, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.25647997856140137, | |
| "step": 275, | |
| "valid_targets_mean": 10153.5, | |
| "valid_targets_min": 2859 | |
| }, | |
| { | |
| "epoch": 0.4745762711864407, | |
| "grad_norm": 0.4835165912338022, | |
| "learning_rate": 2.702179176755448e-05, | |
| "loss": 0.4008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2677648067474365, | |
| "step": 280, | |
| "valid_targets_mean": 10436.8, | |
| "valid_targets_min": 2643 | |
| }, | |
| { | |
| "epoch": 0.4830508474576271, | |
| "grad_norm": 0.4697980137297522, | |
| "learning_rate": 2.7506053268765135e-05, | |
| "loss": 0.3844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23836316168308258, | |
| "step": 285, | |
| "valid_targets_mean": 9596.2, | |
| "valid_targets_min": 4758 | |
| }, | |
| { | |
| "epoch": 0.4915254237288136, | |
| "grad_norm": 0.7658555007951203, | |
| "learning_rate": 2.799031476997579e-05, | |
| "loss": 0.389, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1868828684091568, | |
| "step": 290, | |
| "valid_targets_mean": 7418.0, | |
| "valid_targets_min": 2145 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.47702422177750037, | |
| "learning_rate": 2.8474576271186442e-05, | |
| "loss": 0.3657, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20087997615337372, | |
| "step": 295, | |
| "valid_targets_mean": 7950.9, | |
| "valid_targets_min": 1969 | |
| }, | |
| { | |
| "epoch": 0.5084745762711864, | |
| "grad_norm": 0.46273864653365726, | |
| "learning_rate": 2.8958837772397096e-05, | |
| "loss": 0.3648, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1638944149017334, | |
| "step": 300, | |
| "valid_targets_mean": 7513.8, | |
| "valid_targets_min": 1750 | |
| }, | |
| { | |
| "epoch": 0.5169491525423728, | |
| "grad_norm": 0.47460686381697653, | |
| "learning_rate": 2.9443099273607753e-05, | |
| "loss": 0.3583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22977076470851898, | |
| "step": 305, | |
| "valid_targets_mean": 10273.2, | |
| "valid_targets_min": 6552 | |
| }, | |
| { | |
| "epoch": 0.5254237288135594, | |
| "grad_norm": 0.5179070401819209, | |
| "learning_rate": 2.9927360774818406e-05, | |
| "loss": 0.3636, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1554882526397705, | |
| "step": 310, | |
| "valid_targets_mean": 6126.1, | |
| "valid_targets_min": 2357 | |
| }, | |
| { | |
| "epoch": 0.5338983050847458, | |
| "grad_norm": 0.4667464306864076, | |
| "learning_rate": 3.041162227602906e-05, | |
| "loss": 0.3599, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14054939150810242, | |
| "step": 315, | |
| "valid_targets_mean": 7663.6, | |
| "valid_targets_min": 5239 | |
| }, | |
| { | |
| "epoch": 0.5423728813559322, | |
| "grad_norm": 0.4781556906105198, | |
| "learning_rate": 3.089588377723971e-05, | |
| "loss": 0.3804, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19416019320487976, | |
| "step": 320, | |
| "valid_targets_mean": 7567.8, | |
| "valid_targets_min": 1645 | |
| }, | |
| { | |
| "epoch": 0.5508474576271186, | |
| "grad_norm": 0.511629903592572, | |
| "learning_rate": 3.138014527845036e-05, | |
| "loss": 0.357, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12012749165296555, | |
| "step": 325, | |
| "valid_targets_mean": 5964.8, | |
| "valid_targets_min": 4470 | |
| }, | |
| { | |
| "epoch": 0.559322033898305, | |
| "grad_norm": 0.4480119776210039, | |
| "learning_rate": 3.186440677966102e-05, | |
| "loss": 0.3552, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.162928506731987, | |
| "step": 330, | |
| "valid_targets_mean": 7623.0, | |
| "valid_targets_min": 2591 | |
| }, | |
| { | |
| "epoch": 0.5677966101694916, | |
| "grad_norm": 0.4937825533870499, | |
| "learning_rate": 3.234866828087168e-05, | |
| "loss": 0.3694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19250673055648804, | |
| "step": 335, | |
| "valid_targets_mean": 8913.8, | |
| "valid_targets_min": 2840 | |
| }, | |
| { | |
| "epoch": 0.576271186440678, | |
| "grad_norm": 0.4878075980066041, | |
| "learning_rate": 3.283292978208233e-05, | |
| "loss": 0.3802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16013866662979126, | |
| "step": 340, | |
| "valid_targets_mean": 6744.0, | |
| "valid_targets_min": 1682 | |
| }, | |
| { | |
| "epoch": 0.5847457627118644, | |
| "grad_norm": 0.45319045459430685, | |
| "learning_rate": 3.3317191283292984e-05, | |
| "loss": 0.3863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14388547837734222, | |
| "step": 345, | |
| "valid_targets_mean": 7284.5, | |
| "valid_targets_min": 1990 | |
| }, | |
| { | |
| "epoch": 0.5932203389830508, | |
| "grad_norm": 0.47236199339171486, | |
| "learning_rate": 3.3801452784503634e-05, | |
| "loss": 0.3804, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.231609508395195, | |
| "step": 350, | |
| "valid_targets_mean": 9248.5, | |
| "valid_targets_min": 2566 | |
| }, | |
| { | |
| "epoch": 0.6016949152542372, | |
| "grad_norm": 0.5119661968795367, | |
| "learning_rate": 3.4285714285714284e-05, | |
| "loss": 0.3726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21987944841384888, | |
| "step": 355, | |
| "valid_targets_mean": 8192.4, | |
| "valid_targets_min": 2871 | |
| }, | |
| { | |
| "epoch": 0.6101694915254238, | |
| "grad_norm": 0.42304412282644255, | |
| "learning_rate": 3.476997578692494e-05, | |
| "loss": 0.3661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13173159956932068, | |
| "step": 360, | |
| "valid_targets_mean": 8175.8, | |
| "valid_targets_min": 5029 | |
| }, | |
| { | |
| "epoch": 0.6186440677966102, | |
| "grad_norm": 0.47803298114996845, | |
| "learning_rate": 3.52542372881356e-05, | |
| "loss": 0.3882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14908023178577423, | |
| "step": 365, | |
| "valid_targets_mean": 6414.9, | |
| "valid_targets_min": 3910 | |
| }, | |
| { | |
| "epoch": 0.6271186440677966, | |
| "grad_norm": 0.4881529791768924, | |
| "learning_rate": 3.573849878934625e-05, | |
| "loss": 0.3723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16491301357746124, | |
| "step": 370, | |
| "valid_targets_mean": 7414.9, | |
| "valid_targets_min": 2965 | |
| }, | |
| { | |
| "epoch": 0.635593220338983, | |
| "grad_norm": 0.5108906486160846, | |
| "learning_rate": 3.6222760290556904e-05, | |
| "loss": 0.3844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22124037146568298, | |
| "step": 375, | |
| "valid_targets_mean": 7832.9, | |
| "valid_targets_min": 2970 | |
| }, | |
| { | |
| "epoch": 0.6440677966101694, | |
| "grad_norm": 0.5139550621670309, | |
| "learning_rate": 3.670702179176756e-05, | |
| "loss": 0.3794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24905268847942352, | |
| "step": 380, | |
| "valid_targets_mean": 8240.8, | |
| "valid_targets_min": 5678 | |
| }, | |
| { | |
| "epoch": 0.652542372881356, | |
| "grad_norm": 0.4939392728062636, | |
| "learning_rate": 3.719128329297821e-05, | |
| "loss": 0.398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1989961713552475, | |
| "step": 385, | |
| "valid_targets_mean": 9710.9, | |
| "valid_targets_min": 4394 | |
| }, | |
| { | |
| "epoch": 0.6610169491525424, | |
| "grad_norm": 0.47183364711246323, | |
| "learning_rate": 3.767554479418886e-05, | |
| "loss": 0.3727, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17190328240394592, | |
| "step": 390, | |
| "valid_targets_mean": 7330.9, | |
| "valid_targets_min": 2921 | |
| }, | |
| { | |
| "epoch": 0.6694915254237288, | |
| "grad_norm": 0.4406788345603544, | |
| "learning_rate": 3.815980629539952e-05, | |
| "loss": 0.3487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15205857157707214, | |
| "step": 395, | |
| "valid_targets_mean": 6784.4, | |
| "valid_targets_min": 2346 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.4456952077678055, | |
| "learning_rate": 3.8644067796610175e-05, | |
| "loss": 0.3699, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1788387894630432, | |
| "step": 400, | |
| "valid_targets_mean": 7900.6, | |
| "valid_targets_min": 3235 | |
| }, | |
| { | |
| "epoch": 0.6864406779661016, | |
| "grad_norm": 0.5523324054394816, | |
| "learning_rate": 3.9128329297820825e-05, | |
| "loss": 0.3574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13928887248039246, | |
| "step": 405, | |
| "valid_targets_mean": 5171.9, | |
| "valid_targets_min": 3136 | |
| }, | |
| { | |
| "epoch": 0.6949152542372882, | |
| "grad_norm": 0.5002722226670426, | |
| "learning_rate": 3.961259079903148e-05, | |
| "loss": 0.385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1312628537416458, | |
| "step": 410, | |
| "valid_targets_mean": 6787.4, | |
| "valid_targets_min": 3693 | |
| }, | |
| { | |
| "epoch": 0.7033898305084746, | |
| "grad_norm": 0.42843480849686666, | |
| "learning_rate": 3.9999992856441635e-05, | |
| "loss": 0.3493, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1744052767753601, | |
| "step": 415, | |
| "valid_targets_mean": 7211.9, | |
| "valid_targets_min": 2393 | |
| }, | |
| { | |
| "epoch": 0.711864406779661, | |
| "grad_norm": 0.4679043644147995, | |
| "learning_rate": 3.999974283243472e-05, | |
| "loss": 0.3639, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1082448959350586, | |
| "step": 420, | |
| "valid_targets_mean": 6552.6, | |
| "valid_targets_min": 2437 | |
| }, | |
| { | |
| "epoch": 0.7203389830508474, | |
| "grad_norm": 0.47331069467877396, | |
| "learning_rate": 3.9999135635612606e-05, | |
| "loss": 0.4011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23097260296344757, | |
| "step": 425, | |
| "valid_targets_mean": 8409.1, | |
| "valid_targets_min": 3741 | |
| }, | |
| { | |
| "epoch": 0.7288135593220338, | |
| "grad_norm": 0.5135319796540361, | |
| "learning_rate": 3.999817127681917e-05, | |
| "loss": 0.3778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.33611786365509033, | |
| "step": 430, | |
| "valid_targets_mean": 11845.2, | |
| "valid_targets_min": 1936 | |
| }, | |
| { | |
| "epoch": 0.7372881355932204, | |
| "grad_norm": 0.5557676245987662, | |
| "learning_rate": 3.9996849773276764e-05, | |
| "loss": 0.3458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12023050338029861, | |
| "step": 435, | |
| "valid_targets_mean": 4693.2, | |
| "valid_targets_min": 3425 | |
| }, | |
| { | |
| "epoch": 0.7457627118644068, | |
| "grad_norm": 0.5410110705579056, | |
| "learning_rate": 3.9995171148585944e-05, | |
| "loss": 0.3175, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16153821349143982, | |
| "step": 440, | |
| "valid_targets_mean": 6421.5, | |
| "valid_targets_min": 3319 | |
| }, | |
| { | |
| "epoch": 0.7542372881355932, | |
| "grad_norm": 0.4951775086532844, | |
| "learning_rate": 3.999313543272505e-05, | |
| "loss": 0.3564, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1502537578344345, | |
| "step": 445, | |
| "valid_targets_mean": 7038.1, | |
| "valid_targets_min": 3514 | |
| }, | |
| { | |
| "epoch": 0.7627118644067796, | |
| "grad_norm": 0.47906530418320226, | |
| "learning_rate": 3.999074266204967e-05, | |
| "loss": 0.3263, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16620051860809326, | |
| "step": 450, | |
| "valid_targets_mean": 8577.1, | |
| "valid_targets_min": 3324 | |
| }, | |
| { | |
| "epoch": 0.7711864406779662, | |
| "grad_norm": 0.5177860251716547, | |
| "learning_rate": 3.998799287929199e-05, | |
| "loss": 0.3375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20412178337574005, | |
| "step": 455, | |
| "valid_targets_mean": 7626.9, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 0.7796610169491526, | |
| "grad_norm": 0.46635973969392164, | |
| "learning_rate": 3.998488613356002e-05, | |
| "loss": 0.3464, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1993708461523056, | |
| "step": 460, | |
| "valid_targets_mean": 9630.6, | |
| "valid_targets_min": 4060 | |
| }, | |
| { | |
| "epoch": 0.788135593220339, | |
| "grad_norm": 0.48835104343419244, | |
| "learning_rate": 3.998142248033673e-05, | |
| "loss": 0.3796, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20394505560398102, | |
| "step": 465, | |
| "valid_targets_mean": 7548.1, | |
| "valid_targets_min": 3516 | |
| }, | |
| { | |
| "epoch": 0.7966101694915254, | |
| "grad_norm": 0.4557086785487366, | |
| "learning_rate": 3.997760198147904e-05, | |
| "loss": 0.3486, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15721432864665985, | |
| "step": 470, | |
| "valid_targets_mean": 6833.1, | |
| "valid_targets_min": 2772 | |
| }, | |
| { | |
| "epoch": 0.8050847457627118, | |
| "grad_norm": 0.4515063413898264, | |
| "learning_rate": 3.9973424705216774e-05, | |
| "loss": 0.3784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1749514490365982, | |
| "step": 475, | |
| "valid_targets_mean": 7408.5, | |
| "valid_targets_min": 1943 | |
| }, | |
| { | |
| "epoch": 0.8135593220338984, | |
| "grad_norm": 0.4428676672373464, | |
| "learning_rate": 3.996889072615134e-05, | |
| "loss": 0.3258, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21249344944953918, | |
| "step": 480, | |
| "valid_targets_mean": 9819.9, | |
| "valid_targets_min": 3010 | |
| }, | |
| { | |
| "epoch": 0.8220338983050848, | |
| "grad_norm": 0.49524032365604226, | |
| "learning_rate": 3.99640001252545e-05, | |
| "loss": 0.375, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1323350965976715, | |
| "step": 485, | |
| "valid_targets_mean": 5834.0, | |
| "valid_targets_min": 3776 | |
| }, | |
| { | |
| "epoch": 0.8305084745762712, | |
| "grad_norm": 0.47661852289977547, | |
| "learning_rate": 3.9958752989866856e-05, | |
| "loss": 0.3669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2077844887971878, | |
| "step": 490, | |
| "valid_targets_mean": 7829.8, | |
| "valid_targets_min": 3541 | |
| }, | |
| { | |
| "epoch": 0.8389830508474576, | |
| "grad_norm": 0.5368833318390628, | |
| "learning_rate": 3.995314941369631e-05, | |
| "loss": 0.3385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1979312300682068, | |
| "step": 495, | |
| "valid_targets_mean": 7753.1, | |
| "valid_targets_min": 3245 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 0.43413683875323356, | |
| "learning_rate": 3.994718949681642e-05, | |
| "loss": 0.3241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08601364493370056, | |
| "step": 500, | |
| "valid_targets_mean": 4876.6, | |
| "valid_targets_min": 1373 | |
| }, | |
| { | |
| "epoch": 0.8559322033898306, | |
| "grad_norm": 0.45697287254458874, | |
| "learning_rate": 3.994087334566455e-05, | |
| "loss": 0.3471, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1718927025794983, | |
| "step": 505, | |
| "valid_targets_mean": 7721.6, | |
| "valid_targets_min": 4226 | |
| }, | |
| { | |
| "epoch": 0.864406779661017, | |
| "grad_norm": 0.47862116333913, | |
| "learning_rate": 3.9934201073040035e-05, | |
| "loss": 0.3587, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1376647651195526, | |
| "step": 510, | |
| "valid_targets_mean": 7147.1, | |
| "valid_targets_min": 4853 | |
| }, | |
| { | |
| "epoch": 0.8728813559322034, | |
| "grad_norm": 0.4429781490950345, | |
| "learning_rate": 3.992717279810213e-05, | |
| "loss": 0.3546, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17605769634246826, | |
| "step": 515, | |
| "valid_targets_mean": 7849.5, | |
| "valid_targets_min": 4396 | |
| }, | |
| { | |
| "epoch": 0.8813559322033898, | |
| "grad_norm": 0.45127457745702443, | |
| "learning_rate": 3.991978864636788e-05, | |
| "loss": 0.3421, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1218881830573082, | |
| "step": 520, | |
| "valid_targets_mean": 6575.2, | |
| "valid_targets_min": 2730 | |
| }, | |
| { | |
| "epoch": 0.8898305084745762, | |
| "grad_norm": 0.4345536633734186, | |
| "learning_rate": 3.9912048749709896e-05, | |
| "loss": 0.3497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1432826966047287, | |
| "step": 525, | |
| "valid_targets_mean": 8336.4, | |
| "valid_targets_min": 3085 | |
| }, | |
| { | |
| "epoch": 0.8983050847457628, | |
| "grad_norm": 0.4843526513333187, | |
| "learning_rate": 3.990395324635399e-05, | |
| "loss": 0.3294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18618427217006683, | |
| "step": 530, | |
| "valid_targets_mean": 8355.0, | |
| "valid_targets_min": 4107 | |
| }, | |
| { | |
| "epoch": 0.9067796610169492, | |
| "grad_norm": 0.44356795524997744, | |
| "learning_rate": 3.989550228087671e-05, | |
| "loss": 0.3497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1626293659210205, | |
| "step": 535, | |
| "valid_targets_mean": 7243.9, | |
| "valid_targets_min": 2909 | |
| }, | |
| { | |
| "epoch": 0.9152542372881356, | |
| "grad_norm": 0.41561791071560467, | |
| "learning_rate": 3.988669600420275e-05, | |
| "loss": 0.3201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1661594808101654, | |
| "step": 540, | |
| "valid_targets_mean": 9455.1, | |
| "valid_targets_min": 4298 | |
| }, | |
| { | |
| "epoch": 0.923728813559322, | |
| "grad_norm": 0.4358520648605049, | |
| "learning_rate": 3.9877534573602254e-05, | |
| "loss": 0.3617, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.27095770835876465, | |
| "step": 545, | |
| "valid_targets_mean": 8973.9, | |
| "valid_targets_min": 2827 | |
| }, | |
| { | |
| "epoch": 0.9322033898305084, | |
| "grad_norm": 0.4813894750533826, | |
| "learning_rate": 3.9868018152688044e-05, | |
| "loss": 0.3558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17274773120880127, | |
| "step": 550, | |
| "valid_targets_mean": 7341.4, | |
| "valid_targets_min": 1721 | |
| }, | |
| { | |
| "epoch": 0.940677966101695, | |
| "grad_norm": 0.46263973848113316, | |
| "learning_rate": 3.985814691141263e-05, | |
| "loss": 0.3598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2267192006111145, | |
| "step": 555, | |
| "valid_targets_mean": 9122.1, | |
| "valid_targets_min": 3336 | |
| }, | |
| { | |
| "epoch": 0.9491525423728814, | |
| "grad_norm": 0.46520816240799234, | |
| "learning_rate": 3.984792102606524e-05, | |
| "loss": 0.3821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11244647204875946, | |
| "step": 560, | |
| "valid_targets_mean": 6185.9, | |
| "valid_targets_min": 2997 | |
| }, | |
| { | |
| "epoch": 0.9576271186440678, | |
| "grad_norm": 0.5072756278635104, | |
| "learning_rate": 3.9837340679268634e-05, | |
| "loss": 0.315, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11192071437835693, | |
| "step": 565, | |
| "valid_targets_mean": 6087.0, | |
| "valid_targets_min": 3094 | |
| }, | |
| { | |
| "epoch": 0.9661016949152542, | |
| "grad_norm": 0.4165557159597932, | |
| "learning_rate": 3.982640605997585e-05, | |
| "loss": 0.329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14271879196166992, | |
| "step": 570, | |
| "valid_targets_mean": 7421.4, | |
| "valid_targets_min": 3182 | |
| }, | |
| { | |
| "epoch": 0.9745762711864406, | |
| "grad_norm": 0.4465966835957301, | |
| "learning_rate": 3.981511736346684e-05, | |
| "loss": 0.3512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1899096965789795, | |
| "step": 575, | |
| "valid_targets_mean": 8330.2, | |
| "valid_targets_min": 3990 | |
| }, | |
| { | |
| "epoch": 0.9830508474576272, | |
| "grad_norm": 0.47188153745533346, | |
| "learning_rate": 3.980347479134498e-05, | |
| "loss": 0.3326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16237373650074005, | |
| "step": 580, | |
| "valid_targets_mean": 7614.1, | |
| "valid_targets_min": 3425 | |
| }, | |
| { | |
| "epoch": 0.9915254237288136, | |
| "grad_norm": 0.45659300690082466, | |
| "learning_rate": 3.9791478551533435e-05, | |
| "loss": 0.3241, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1724914014339447, | |
| "step": 585, | |
| "valid_targets_mean": 7735.9, | |
| "valid_targets_min": 3159 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.4731948101985973, | |
| "learning_rate": 3.977912885827151e-05, | |
| "loss": 0.342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1396312415599823, | |
| "step": 590, | |
| "valid_targets_mean": 6537.0, | |
| "valid_targets_min": 2991 | |
| }, | |
| { | |
| "epoch": 1.0084745762711864, | |
| "grad_norm": 0.4627342217897108, | |
| "learning_rate": 3.976642593211079e-05, | |
| "loss": 0.3613, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24919381737709045, | |
| "step": 595, | |
| "valid_targets_mean": 10379.8, | |
| "valid_targets_min": 3903 | |
| }, | |
| { | |
| "epoch": 1.0169491525423728, | |
| "grad_norm": 0.4536741195006425, | |
| "learning_rate": 3.9753369999911166e-05, | |
| "loss": 0.3309, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15797701478004456, | |
| "step": 600, | |
| "valid_targets_mean": 7904.0, | |
| "valid_targets_min": 3709 | |
| }, | |
| { | |
| "epoch": 1.0254237288135593, | |
| "grad_norm": 0.4589851155496452, | |
| "learning_rate": 3.973996129483684e-05, | |
| "loss": 0.3171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10587269067764282, | |
| "step": 605, | |
| "valid_targets_mean": 7432.1, | |
| "valid_targets_min": 2532 | |
| }, | |
| { | |
| "epoch": 1.0338983050847457, | |
| "grad_norm": 0.47318377384255594, | |
| "learning_rate": 3.972620005635216e-05, | |
| "loss": 0.2935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1729205846786499, | |
| "step": 610, | |
| "valid_targets_mean": 7595.9, | |
| "valid_targets_min": 2825 | |
| }, | |
| { | |
| "epoch": 1.042372881355932, | |
| "grad_norm": 0.4469128531341844, | |
| "learning_rate": 3.971208653021727e-05, | |
| "loss": 0.3171, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20472905039787292, | |
| "step": 615, | |
| "valid_targets_mean": 8200.1, | |
| "valid_targets_min": 2509 | |
| }, | |
| { | |
| "epoch": 1.0508474576271187, | |
| "grad_norm": 0.4833955669012435, | |
| "learning_rate": 3.969762096848383e-05, | |
| "loss": 0.3673, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15367567539215088, | |
| "step": 620, | |
| "valid_targets_mean": 7303.2, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 1.0593220338983051, | |
| "grad_norm": 0.4180140372553057, | |
| "learning_rate": 3.968280362949042e-05, | |
| "loss": 0.329, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20632800459861755, | |
| "step": 625, | |
| "valid_targets_mean": 9386.4, | |
| "valid_targets_min": 4698 | |
| }, | |
| { | |
| "epoch": 1.0677966101694916, | |
| "grad_norm": 0.40174569986041325, | |
| "learning_rate": 3.9667634777857975e-05, | |
| "loss": 0.3588, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14220260083675385, | |
| "step": 630, | |
| "valid_targets_mean": 8459.8, | |
| "valid_targets_min": 3319 | |
| }, | |
| { | |
| "epoch": 1.076271186440678, | |
| "grad_norm": 0.44484170138321344, | |
| "learning_rate": 3.965211468448505e-05, | |
| "loss": 0.3388, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14559875428676605, | |
| "step": 635, | |
| "valid_targets_mean": 7179.8, | |
| "valid_targets_min": 3830 | |
| }, | |
| { | |
| "epoch": 1.0847457627118644, | |
| "grad_norm": 0.4395541639351123, | |
| "learning_rate": 3.963624362654299e-05, | |
| "loss": 0.3324, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13254770636558533, | |
| "step": 640, | |
| "valid_targets_mean": 7157.9, | |
| "valid_targets_min": 1936 | |
| }, | |
| { | |
| "epoch": 1.0932203389830508, | |
| "grad_norm": 0.4654264001987176, | |
| "learning_rate": 3.962002188747096e-05, | |
| "loss": 0.3234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28930819034576416, | |
| "step": 645, | |
| "valid_targets_mean": 10538.6, | |
| "valid_targets_min": 2967 | |
| }, | |
| { | |
| "epoch": 1.1016949152542372, | |
| "grad_norm": 0.40719297523344944, | |
| "learning_rate": 3.9603449756970877e-05, | |
| "loss": 0.3219, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09647984802722931, | |
| "step": 650, | |
| "valid_targets_mean": 6410.5, | |
| "valid_targets_min": 2114 | |
| }, | |
| { | |
| "epoch": 1.1101694915254237, | |
| "grad_norm": 0.6812110649265294, | |
| "learning_rate": 3.95865275310023e-05, | |
| "loss": 0.3275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09337997436523438, | |
| "step": 655, | |
| "valid_targets_mean": 4987.0, | |
| "valid_targets_min": 2152 | |
| }, | |
| { | |
| "epoch": 1.11864406779661, | |
| "grad_norm": 0.4778768969090781, | |
| "learning_rate": 3.9569255511777054e-05, | |
| "loss": 0.3253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1386423110961914, | |
| "step": 660, | |
| "valid_targets_mean": 6117.6, | |
| "valid_targets_min": 2957 | |
| }, | |
| { | |
| "epoch": 1.1271186440677967, | |
| "grad_norm": 0.4193417013796084, | |
| "learning_rate": 3.955163400775389e-05, | |
| "loss": 0.361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12261968106031418, | |
| "step": 665, | |
| "valid_targets_mean": 6347.0, | |
| "valid_targets_min": 3535 | |
| }, | |
| { | |
| "epoch": 1.1355932203389831, | |
| "grad_norm": 0.5013613951602334, | |
| "learning_rate": 3.953366333363297e-05, | |
| "loss": 0.3222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12408879399299622, | |
| "step": 670, | |
| "valid_targets_mean": 6869.2, | |
| "valid_targets_min": 1409 | |
| }, | |
| { | |
| "epoch": 1.1440677966101696, | |
| "grad_norm": 0.4160706590681276, | |
| "learning_rate": 3.9515343810350236e-05, | |
| "loss": 0.3091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2585446834564209, | |
| "step": 675, | |
| "valid_targets_mean": 11118.4, | |
| "valid_targets_min": 2837 | |
| }, | |
| { | |
| "epoch": 1.152542372881356, | |
| "grad_norm": 0.48357167974115245, | |
| "learning_rate": 3.949667576507168e-05, | |
| "loss": 0.3338, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2446514070034027, | |
| "step": 680, | |
| "valid_targets_mean": 7852.5, | |
| "valid_targets_min": 2865 | |
| }, | |
| { | |
| "epoch": 1.1610169491525424, | |
| "grad_norm": 0.5207580063819728, | |
| "learning_rate": 3.9477659531187484e-05, | |
| "loss": 0.3305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2768232822418213, | |
| "step": 685, | |
| "valid_targets_mean": 9521.2, | |
| "valid_targets_min": 1901 | |
| }, | |
| { | |
| "epoch": 1.1694915254237288, | |
| "grad_norm": 0.4352619530283247, | |
| "learning_rate": 3.9458295448306134e-05, | |
| "loss": 0.3273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.169637992978096, | |
| "step": 690, | |
| "valid_targets_mean": 8180.2, | |
| "valid_targets_min": 3197 | |
| }, | |
| { | |
| "epoch": 1.1779661016949152, | |
| "grad_norm": 0.6375401638430952, | |
| "learning_rate": 3.943858386224825e-05, | |
| "loss": 0.3542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20666490495204926, | |
| "step": 695, | |
| "valid_targets_mean": 7918.8, | |
| "valid_targets_min": 2972 | |
| }, | |
| { | |
| "epoch": 1.1864406779661016, | |
| "grad_norm": 0.5237074435643403, | |
| "learning_rate": 3.941852512504052e-05, | |
| "loss": 0.3069, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16815276443958282, | |
| "step": 700, | |
| "valid_targets_mean": 8423.9, | |
| "valid_targets_min": 3467 | |
| }, | |
| { | |
| "epoch": 1.194915254237288, | |
| "grad_norm": 0.46230638915825684, | |
| "learning_rate": 3.939811959490931e-05, | |
| "loss": 0.3183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22283053398132324, | |
| "step": 705, | |
| "valid_targets_mean": 11047.0, | |
| "valid_targets_min": 4423 | |
| }, | |
| { | |
| "epoch": 1.2033898305084745, | |
| "grad_norm": 0.4545188486641544, | |
| "learning_rate": 3.937736763627435e-05, | |
| "loss": 0.3157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19005794823169708, | |
| "step": 710, | |
| "valid_targets_mean": 8591.1, | |
| "valid_targets_min": 3981 | |
| }, | |
| { | |
| "epoch": 1.211864406779661, | |
| "grad_norm": 0.4552644672198827, | |
| "learning_rate": 3.935626961974217e-05, | |
| "loss": 0.3603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14236797392368317, | |
| "step": 715, | |
| "valid_targets_mean": 6977.0, | |
| "valid_targets_min": 3031 | |
| }, | |
| { | |
| "epoch": 1.2203389830508475, | |
| "grad_norm": 0.46886754247998963, | |
| "learning_rate": 3.933482592209951e-05, | |
| "loss": 0.3314, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26897650957107544, | |
| "step": 720, | |
| "valid_targets_mean": 9310.5, | |
| "valid_targets_min": 3393 | |
| }, | |
| { | |
| "epoch": 1.228813559322034, | |
| "grad_norm": 0.48772266789453395, | |
| "learning_rate": 3.931303692630661e-05, | |
| "loss": 0.3101, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16506057977676392, | |
| "step": 725, | |
| "valid_targets_mean": 7079.0, | |
| "valid_targets_min": 2432 | |
| }, | |
| { | |
| "epoch": 1.2372881355932204, | |
| "grad_norm": 0.42871408585096393, | |
| "learning_rate": 3.92909030214903e-05, | |
| "loss": 0.3268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11513777822256088, | |
| "step": 730, | |
| "valid_targets_mean": 6985.9, | |
| "valid_targets_min": 3986 | |
| }, | |
| { | |
| "epoch": 1.2457627118644068, | |
| "grad_norm": 0.44587055811958604, | |
| "learning_rate": 3.9268424602937124e-05, | |
| "loss": 0.3199, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17389066517353058, | |
| "step": 735, | |
| "valid_targets_mean": 8847.4, | |
| "valid_targets_min": 2642 | |
| }, | |
| { | |
| "epoch": 1.2542372881355932, | |
| "grad_norm": 0.47861212973477374, | |
| "learning_rate": 3.9245602072086246e-05, | |
| "loss": 0.3732, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10631030052900314, | |
| "step": 740, | |
| "valid_targets_mean": 5761.5, | |
| "valid_targets_min": 4648 | |
| }, | |
| { | |
| "epoch": 1.2627118644067796, | |
| "grad_norm": 0.49947791123701396, | |
| "learning_rate": 3.9222435836522286e-05, | |
| "loss": 0.3373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1645757257938385, | |
| "step": 745, | |
| "valid_targets_mean": 9582.9, | |
| "valid_targets_min": 1652 | |
| }, | |
| { | |
| "epoch": 1.271186440677966, | |
| "grad_norm": 0.42913688486951385, | |
| "learning_rate": 3.919892630996804e-05, | |
| "loss": 0.3352, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14238014817237854, | |
| "step": 750, | |
| "valid_targets_mean": 6065.2, | |
| "valid_targets_min": 2457 | |
| }, | |
| { | |
| "epoch": 1.2796610169491525, | |
| "grad_norm": 0.4255980165569159, | |
| "learning_rate": 3.9175073912277107e-05, | |
| "loss": 0.305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11966271698474884, | |
| "step": 755, | |
| "valid_targets_mean": 6778.5, | |
| "valid_targets_min": 3511 | |
| }, | |
| { | |
| "epoch": 1.288135593220339, | |
| "grad_norm": 0.4383294865094197, | |
| "learning_rate": 3.915087906942635e-05, | |
| "loss": 0.3196, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18853090703487396, | |
| "step": 760, | |
| "valid_targets_mean": 7773.5, | |
| "valid_targets_min": 2965 | |
| }, | |
| { | |
| "epoch": 1.2966101694915255, | |
| "grad_norm": 0.42624494152006837, | |
| "learning_rate": 3.912634221350834e-05, | |
| "loss": 0.3482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20799216628074646, | |
| "step": 765, | |
| "valid_targets_mean": 10677.5, | |
| "valid_targets_min": 2482 | |
| }, | |
| { | |
| "epoch": 1.305084745762712, | |
| "grad_norm": 0.4740167806151876, | |
| "learning_rate": 3.910146378272361e-05, | |
| "loss": 0.316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1015789806842804, | |
| "step": 770, | |
| "valid_targets_mean": 5372.8, | |
| "valid_targets_min": 2801 | |
| }, | |
| { | |
| "epoch": 1.3135593220338984, | |
| "grad_norm": 0.5006756358318566, | |
| "learning_rate": 3.9076244221372824e-05, | |
| "loss": 0.321, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1526796519756317, | |
| "step": 775, | |
| "valid_targets_mean": 6574.1, | |
| "valid_targets_min": 2460 | |
| }, | |
| { | |
| "epoch": 1.3220338983050848, | |
| "grad_norm": 0.4267845459550884, | |
| "learning_rate": 3.905068397984886e-05, | |
| "loss": 0.3168, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16761890053749084, | |
| "step": 780, | |
| "valid_targets_mean": 8788.9, | |
| "valid_targets_min": 4154 | |
| }, | |
| { | |
| "epoch": 1.3305084745762712, | |
| "grad_norm": 0.41392713104247275, | |
| "learning_rate": 3.9024783514628765e-05, | |
| "loss": 0.3568, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17586758732795715, | |
| "step": 785, | |
| "valid_targets_mean": 10475.6, | |
| "valid_targets_min": 3622 | |
| }, | |
| { | |
| "epoch": 1.3389830508474576, | |
| "grad_norm": 0.5337105064121502, | |
| "learning_rate": 3.899854328826559e-05, | |
| "loss": 0.3425, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0900270864367485, | |
| "step": 790, | |
| "valid_targets_mean": 4031.1, | |
| "valid_targets_min": 1420 | |
| }, | |
| { | |
| "epoch": 1.347457627118644, | |
| "grad_norm": 0.4488578510290259, | |
| "learning_rate": 3.897196376938013e-05, | |
| "loss": 0.318, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17309337854385376, | |
| "step": 795, | |
| "valid_targets_mean": 6397.8, | |
| "valid_targets_min": 2425 | |
| }, | |
| { | |
| "epoch": 1.3559322033898304, | |
| "grad_norm": 0.39620489867483555, | |
| "learning_rate": 3.8945045432652575e-05, | |
| "loss": 0.3404, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08745632320642471, | |
| "step": 800, | |
| "valid_targets_mean": 6055.2, | |
| "valid_targets_min": 3550 | |
| }, | |
| { | |
| "epoch": 1.3644067796610169, | |
| "grad_norm": 0.42992870791787763, | |
| "learning_rate": 3.8917788758814e-05, | |
| "loss": 0.3174, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13015861809253693, | |
| "step": 805, | |
| "valid_targets_mean": 7460.4, | |
| "valid_targets_min": 3055 | |
| }, | |
| { | |
| "epoch": 1.3728813559322033, | |
| "grad_norm": 0.40354106059339273, | |
| "learning_rate": 3.889019423463783e-05, | |
| "loss": 0.3149, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09485109150409698, | |
| "step": 810, | |
| "valid_targets_mean": 6865.6, | |
| "valid_targets_min": 2911 | |
| }, | |
| { | |
| "epoch": 1.3813559322033897, | |
| "grad_norm": 0.4686368291789137, | |
| "learning_rate": 3.8862262352931075e-05, | |
| "loss": 0.3294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1537308543920517, | |
| "step": 815, | |
| "valid_targets_mean": 6440.1, | |
| "valid_targets_min": 2110 | |
| }, | |
| { | |
| "epoch": 1.3898305084745763, | |
| "grad_norm": 0.4408642589858194, | |
| "learning_rate": 3.88339936125256e-05, | |
| "loss": 0.3157, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17375317215919495, | |
| "step": 820, | |
| "valid_targets_mean": 8280.6, | |
| "valid_targets_min": 2743 | |
| }, | |
| { | |
| "epoch": 1.3983050847457628, | |
| "grad_norm": 0.46062681778063724, | |
| "learning_rate": 3.8805388518269184e-05, | |
| "loss": 0.2962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17423167824745178, | |
| "step": 825, | |
| "valid_targets_mean": 8636.1, | |
| "valid_targets_min": 4169 | |
| }, | |
| { | |
| "epoch": 1.4067796610169492, | |
| "grad_norm": 0.4486860590784266, | |
| "learning_rate": 3.877644758101648e-05, | |
| "loss": 0.3156, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17933771014213562, | |
| "step": 830, | |
| "valid_targets_mean": 8570.9, | |
| "valid_targets_min": 1893 | |
| }, | |
| { | |
| "epoch": 1.4152542372881356, | |
| "grad_norm": 0.4719151934288519, | |
| "learning_rate": 3.8747171317619955e-05, | |
| "loss": 0.3143, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09397554397583008, | |
| "step": 835, | |
| "valid_targets_mean": 5133.0, | |
| "valid_targets_min": 2254 | |
| }, | |
| { | |
| "epoch": 1.423728813559322, | |
| "grad_norm": 0.4667473106135975, | |
| "learning_rate": 3.871756025092059e-05, | |
| "loss": 0.3479, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1407933086156845, | |
| "step": 840, | |
| "valid_targets_mean": 6628.8, | |
| "valid_targets_min": 2795 | |
| }, | |
| { | |
| "epoch": 1.4322033898305084, | |
| "grad_norm": 0.4412306378722383, | |
| "learning_rate": 3.868761490973859e-05, | |
| "loss": 0.307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1313735395669937, | |
| "step": 845, | |
| "valid_targets_mean": 6751.1, | |
| "valid_targets_min": 3064 | |
| }, | |
| { | |
| "epoch": 1.4406779661016949, | |
| "grad_norm": 0.43177538465939785, | |
| "learning_rate": 3.8657335828863924e-05, | |
| "loss": 0.3316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1634959578514099, | |
| "step": 850, | |
| "valid_targets_mean": 7480.4, | |
| "valid_targets_min": 3151 | |
| }, | |
| { | |
| "epoch": 1.4491525423728815, | |
| "grad_norm": 0.43807884753609994, | |
| "learning_rate": 3.8626723549046774e-05, | |
| "loss": 0.3268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12339450418949127, | |
| "step": 855, | |
| "valid_targets_mean": 6659.2, | |
| "valid_targets_min": 2840 | |
| }, | |
| { | |
| "epoch": 1.457627118644068, | |
| "grad_norm": 0.4727987965750298, | |
| "learning_rate": 3.859577861698787e-05, | |
| "loss": 0.2922, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15785086154937744, | |
| "step": 860, | |
| "valid_targets_mean": 7509.2, | |
| "valid_targets_min": 3028 | |
| }, | |
| { | |
| "epoch": 1.4661016949152543, | |
| "grad_norm": 0.4384303554477187, | |
| "learning_rate": 3.856450158532875e-05, | |
| "loss": 0.323, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1519169807434082, | |
| "step": 865, | |
| "valid_targets_mean": 7232.5, | |
| "valid_targets_min": 3849 | |
| }, | |
| { | |
| "epoch": 1.4745762711864407, | |
| "grad_norm": 0.49727270409272106, | |
| "learning_rate": 3.853289301264187e-05, | |
| "loss": 0.3358, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1139516681432724, | |
| "step": 870, | |
| "valid_targets_mean": 6231.1, | |
| "valid_targets_min": 1612 | |
| }, | |
| { | |
| "epoch": 1.4830508474576272, | |
| "grad_norm": 0.4426633905595123, | |
| "learning_rate": 3.850095346342064e-05, | |
| "loss": 0.3273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16769997775554657, | |
| "step": 875, | |
| "valid_targets_mean": 7338.5, | |
| "valid_targets_min": 3859 | |
| }, | |
| { | |
| "epoch": 1.4915254237288136, | |
| "grad_norm": 0.4593726038548315, | |
| "learning_rate": 3.84686835080693e-05, | |
| "loss": 0.3099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11144979298114777, | |
| "step": 880, | |
| "valid_targets_mean": 6119.1, | |
| "valid_targets_min": 3785 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.49604203241194217, | |
| "learning_rate": 3.843608372289283e-05, | |
| "loss": 0.3271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15156549215316772, | |
| "step": 885, | |
| "valid_targets_mean": 7399.4, | |
| "valid_targets_min": 3299 | |
| }, | |
| { | |
| "epoch": 1.5084745762711864, | |
| "grad_norm": 0.545080093883971, | |
| "learning_rate": 3.8403154690086564e-05, | |
| "loss": 0.3361, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12989340722560883, | |
| "step": 890, | |
| "valid_targets_mean": 7236.0, | |
| "valid_targets_min": 3225 | |
| }, | |
| { | |
| "epoch": 1.5169491525423728, | |
| "grad_norm": 0.45075511600659357, | |
| "learning_rate": 3.836989699772582e-05, | |
| "loss": 0.3229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1870659589767456, | |
| "step": 895, | |
| "valid_targets_mean": 7038.4, | |
| "valid_targets_min": 2187 | |
| }, | |
| { | |
| "epoch": 1.5254237288135593, | |
| "grad_norm": 0.47217639934643396, | |
| "learning_rate": 3.8336311239755424e-05, | |
| "loss": 0.3622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15166038274765015, | |
| "step": 900, | |
| "valid_targets_mean": 7118.4, | |
| "valid_targets_min": 2202 | |
| }, | |
| { | |
| "epoch": 1.5338983050847457, | |
| "grad_norm": 0.41709638199225046, | |
| "learning_rate": 3.830239801597907e-05, | |
| "loss": 0.3125, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14974796772003174, | |
| "step": 905, | |
| "valid_targets_mean": 7620.0, | |
| "valid_targets_min": 2350 | |
| }, | |
| { | |
| "epoch": 1.542372881355932, | |
| "grad_norm": 0.41637607079903677, | |
| "learning_rate": 3.826815793204863e-05, | |
| "loss": 0.3013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1806432008743286, | |
| "step": 910, | |
| "valid_targets_mean": 9408.8, | |
| "valid_targets_min": 4175 | |
| }, | |
| { | |
| "epoch": 1.5508474576271185, | |
| "grad_norm": 0.5361115971794117, | |
| "learning_rate": 3.823359159945332e-05, | |
| "loss": 0.3193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1987413465976715, | |
| "step": 915, | |
| "valid_targets_mean": 8330.9, | |
| "valid_targets_min": 3259 | |
| }, | |
| { | |
| "epoch": 1.559322033898305, | |
| "grad_norm": 0.44132736535406036, | |
| "learning_rate": 3.8198699635508805e-05, | |
| "loss": 0.2982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18994596600532532, | |
| "step": 920, | |
| "valid_targets_mean": 9074.9, | |
| "valid_targets_min": 3436 | |
| }, | |
| { | |
| "epoch": 1.5677966101694916, | |
| "grad_norm": 0.3816717467254405, | |
| "learning_rate": 3.816348266334614e-05, | |
| "loss": 0.319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12687310576438904, | |
| "step": 925, | |
| "valid_targets_mean": 8785.6, | |
| "valid_targets_min": 4974 | |
| }, | |
| { | |
| "epoch": 1.576271186440678, | |
| "grad_norm": 0.4412025073597514, | |
| "learning_rate": 3.812794131190066e-05, | |
| "loss": 0.3306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13384410738945007, | |
| "step": 930, | |
| "valid_targets_mean": 7849.2, | |
| "valid_targets_min": 3985 | |
| }, | |
| { | |
| "epoch": 1.5847457627118644, | |
| "grad_norm": 0.396693457802134, | |
| "learning_rate": 3.8092076215900765e-05, | |
| "loss": 0.303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20873287320137024, | |
| "step": 935, | |
| "valid_targets_mean": 11243.4, | |
| "valid_targets_min": 5339 | |
| }, | |
| { | |
| "epoch": 1.5932203389830508, | |
| "grad_norm": 0.4285610045500551, | |
| "learning_rate": 3.805588801585654e-05, | |
| "loss": 0.3188, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08912678807973862, | |
| "step": 940, | |
| "valid_targets_mean": 4676.0, | |
| "valid_targets_min": 2847 | |
| }, | |
| { | |
| "epoch": 1.6016949152542372, | |
| "grad_norm": 0.41295836604849834, | |
| "learning_rate": 3.801937735804838e-05, | |
| "loss": 0.3439, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17002055048942566, | |
| "step": 945, | |
| "valid_targets_mean": 8253.2, | |
| "valid_targets_min": 2292 | |
| }, | |
| { | |
| "epoch": 1.6101694915254239, | |
| "grad_norm": 0.4110467780511449, | |
| "learning_rate": 3.798254489451539e-05, | |
| "loss": 0.3218, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26079848408699036, | |
| "step": 950, | |
| "valid_targets_mean": 11661.2, | |
| "valid_targets_min": 2093 | |
| }, | |
| { | |
| "epoch": 1.6186440677966103, | |
| "grad_norm": 0.5284185078380007, | |
| "learning_rate": 3.794539128304374e-05, | |
| "loss": 0.3327, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21993231773376465, | |
| "step": 955, | |
| "valid_targets_mean": 10140.0, | |
| "valid_targets_min": 4917 | |
| }, | |
| { | |
| "epoch": 1.6271186440677967, | |
| "grad_norm": 0.43465978137133837, | |
| "learning_rate": 3.790791718715498e-05, | |
| "loss": 0.3362, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16745583713054657, | |
| "step": 960, | |
| "valid_targets_mean": 7656.5, | |
| "valid_targets_min": 2997 | |
| }, | |
| { | |
| "epoch": 1.6355932203389831, | |
| "grad_norm": 0.46353679599089426, | |
| "learning_rate": 3.7870123276094134e-05, | |
| "loss": 0.3063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14307042956352234, | |
| "step": 965, | |
| "valid_targets_mean": 6720.0, | |
| "valid_targets_min": 2036 | |
| }, | |
| { | |
| "epoch": 1.6440677966101696, | |
| "grad_norm": 0.42508115955425385, | |
| "learning_rate": 3.783201022481775e-05, | |
| "loss": 0.307, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16600486636161804, | |
| "step": 970, | |
| "valid_targets_mean": 7660.0, | |
| "valid_targets_min": 4016 | |
| }, | |
| { | |
| "epoch": 1.652542372881356, | |
| "grad_norm": 0.47460145753384014, | |
| "learning_rate": 3.7793578713981876e-05, | |
| "loss": 0.2713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17931526899337769, | |
| "step": 975, | |
| "valid_targets_mean": 7207.9, | |
| "valid_targets_min": 2066 | |
| }, | |
| { | |
| "epoch": 1.6610169491525424, | |
| "grad_norm": 0.4565761001462879, | |
| "learning_rate": 3.775482942992989e-05, | |
| "loss": 0.296, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10052239894866943, | |
| "step": 980, | |
| "valid_targets_mean": 5350.4, | |
| "valid_targets_min": 3691 | |
| }, | |
| { | |
| "epoch": 1.6694915254237288, | |
| "grad_norm": 0.404024020976862, | |
| "learning_rate": 3.771576306468022e-05, | |
| "loss": 0.331, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13798417150974274, | |
| "step": 985, | |
| "valid_targets_mean": 8586.6, | |
| "valid_targets_min": 2541 | |
| }, | |
| { | |
| "epoch": 1.6779661016949152, | |
| "grad_norm": 0.46889190610408676, | |
| "learning_rate": 3.7676380315914025e-05, | |
| "loss": 0.3316, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09154243022203445, | |
| "step": 990, | |
| "valid_targets_mean": 5740.6, | |
| "valid_targets_min": 1962 | |
| }, | |
| { | |
| "epoch": 1.6864406779661016, | |
| "grad_norm": 0.4064487350790923, | |
| "learning_rate": 3.7636681886962724e-05, | |
| "loss": 0.3665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15788796544075012, | |
| "step": 995, | |
| "valid_targets_mean": 7709.4, | |
| "valid_targets_min": 2560 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "grad_norm": 0.7897794567174825, | |
| "learning_rate": 3.759666848679539e-05, | |
| "loss": 0.304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15532286465168, | |
| "step": 1000, | |
| "valid_targets_mean": 9089.0, | |
| "valid_targets_min": 5046 | |
| }, | |
| { | |
| "epoch": 1.7033898305084745, | |
| "grad_norm": 0.6035847206124464, | |
| "learning_rate": 3.755634083000617e-05, | |
| "loss": 0.3292, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1320587694644928, | |
| "step": 1005, | |
| "valid_targets_mean": 6896.9, | |
| "valid_targets_min": 2965 | |
| }, | |
| { | |
| "epoch": 1.711864406779661, | |
| "grad_norm": 0.41483149367263833, | |
| "learning_rate": 3.751569963680146e-05, | |
| "loss": 0.2949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20763173699378967, | |
| "step": 1010, | |
| "valid_targets_mean": 9600.8, | |
| "valid_targets_min": 5071 | |
| }, | |
| { | |
| "epoch": 1.7203389830508473, | |
| "grad_norm": 0.45501656641530847, | |
| "learning_rate": 3.747474563298705e-05, | |
| "loss": 0.3288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13265684247016907, | |
| "step": 1015, | |
| "valid_targets_mean": 6442.4, | |
| "valid_targets_min": 1875 | |
| }, | |
| { | |
| "epoch": 1.7288135593220337, | |
| "grad_norm": 0.5257033113939008, | |
| "learning_rate": 3.743347954995519e-05, | |
| "loss": 0.3268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13818372786045074, | |
| "step": 1020, | |
| "valid_targets_mean": 6209.2, | |
| "valid_targets_min": 3843 | |
| }, | |
| { | |
| "epoch": 1.7372881355932204, | |
| "grad_norm": 0.4383597245709914, | |
| "learning_rate": 3.7391902124671516e-05, | |
| "loss": 0.3244, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19095399975776672, | |
| "step": 1025, | |
| "valid_targets_mean": 8865.8, | |
| "valid_targets_min": 3685 | |
| }, | |
| { | |
| "epoch": 1.7457627118644068, | |
| "grad_norm": 0.39724550405667364, | |
| "learning_rate": 3.7350014099661874e-05, | |
| "loss": 0.3066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20521703362464905, | |
| "step": 1030, | |
| "valid_targets_mean": 9690.9, | |
| "valid_targets_min": 3548 | |
| }, | |
| { | |
| "epoch": 1.7542372881355932, | |
| "grad_norm": 0.39927522299758583, | |
| "learning_rate": 3.730781622299907e-05, | |
| "loss": 0.3211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17346695065498352, | |
| "step": 1035, | |
| "valid_targets_mean": 8075.1, | |
| "valid_targets_min": 3805 | |
| }, | |
| { | |
| "epoch": 1.7627118644067796, | |
| "grad_norm": 0.4350255084725687, | |
| "learning_rate": 3.7265309248289516e-05, | |
| "loss": 0.3455, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16887761652469635, | |
| "step": 1040, | |
| "valid_targets_mean": 7783.6, | |
| "valid_targets_min": 3913 | |
| }, | |
| { | |
| "epoch": 1.7711864406779663, | |
| "grad_norm": 0.421663580685689, | |
| "learning_rate": 3.7222493934659765e-05, | |
| "loss": 0.3466, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1530277132987976, | |
| "step": 1045, | |
| "valid_targets_mean": 9355.5, | |
| "valid_targets_min": 4980 | |
| }, | |
| { | |
| "epoch": 1.7796610169491527, | |
| "grad_norm": 0.4369588066271297, | |
| "learning_rate": 3.717937104674296e-05, | |
| "loss": 0.2978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13133104145526886, | |
| "step": 1050, | |
| "valid_targets_mean": 7752.2, | |
| "valid_targets_min": 4407 | |
| }, | |
| { | |
| "epoch": 1.788135593220339, | |
| "grad_norm": 0.4368294132363595, | |
| "learning_rate": 3.713594135466515e-05, | |
| "loss": 0.3225, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17027533054351807, | |
| "step": 1055, | |
| "valid_targets_mean": 8704.5, | |
| "valid_targets_min": 2855 | |
| }, | |
| { | |
| "epoch": 1.7966101694915255, | |
| "grad_norm": 0.43449669149693804, | |
| "learning_rate": 3.7092205634031595e-05, | |
| "loss": 0.3478, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08701007068157196, | |
| "step": 1060, | |
| "valid_targets_mean": 4998.2, | |
| "valid_targets_min": 2374 | |
| }, | |
| { | |
| "epoch": 1.805084745762712, | |
| "grad_norm": 0.39372753859603943, | |
| "learning_rate": 3.704816466591286e-05, | |
| "loss": 0.3163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12855243682861328, | |
| "step": 1065, | |
| "valid_targets_mean": 7622.5, | |
| "valid_targets_min": 4093 | |
| }, | |
| { | |
| "epoch": 1.8135593220338984, | |
| "grad_norm": 0.3964647607911203, | |
| "learning_rate": 3.700381923683087e-05, | |
| "loss": 0.3364, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1131635457277298, | |
| "step": 1070, | |
| "valid_targets_mean": 7209.1, | |
| "valid_targets_min": 2797 | |
| }, | |
| { | |
| "epoch": 1.8220338983050848, | |
| "grad_norm": 0.4159371295645834, | |
| "learning_rate": 3.695917013874491e-05, | |
| "loss": 0.2832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13864587247371674, | |
| "step": 1075, | |
| "valid_targets_mean": 7083.8, | |
| "valid_targets_min": 3551 | |
| }, | |
| { | |
| "epoch": 1.8305084745762712, | |
| "grad_norm": 0.41132981890721415, | |
| "learning_rate": 3.691421816903744e-05, | |
| "loss": 0.3373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20368866622447968, | |
| "step": 1080, | |
| "valid_targets_mean": 8503.0, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 1.8389830508474576, | |
| "grad_norm": 0.43908713798714083, | |
| "learning_rate": 3.686896413049985e-05, | |
| "loss": 0.2886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12140090763568878, | |
| "step": 1085, | |
| "valid_targets_mean": 6773.4, | |
| "valid_targets_min": 3364 | |
| }, | |
| { | |
| "epoch": 1.847457627118644, | |
| "grad_norm": 0.4338791941921327, | |
| "learning_rate": 3.6823408831318154e-05, | |
| "loss": 0.3105, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16254982352256775, | |
| "step": 1090, | |
| "valid_targets_mean": 7966.9, | |
| "valid_targets_min": 2532 | |
| }, | |
| { | |
| "epoch": 1.8559322033898304, | |
| "grad_norm": 0.41721828273489936, | |
| "learning_rate": 3.677755308505854e-05, | |
| "loss": 0.3423, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1637614667415619, | |
| "step": 1095, | |
| "valid_targets_mean": 7360.9, | |
| "valid_targets_min": 3864 | |
| }, | |
| { | |
| "epoch": 1.8644067796610169, | |
| "grad_norm": 0.4152523100279547, | |
| "learning_rate": 3.673139771065282e-05, | |
| "loss": 0.3229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11550319194793701, | |
| "step": 1100, | |
| "valid_targets_mean": 6838.5, | |
| "valid_targets_min": 3869 | |
| }, | |
| { | |
| "epoch": 1.8728813559322033, | |
| "grad_norm": 0.37899639866690693, | |
| "learning_rate": 3.6684943532383866e-05, | |
| "loss": 0.3448, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18879902362823486, | |
| "step": 1105, | |
| "valid_targets_mean": 10441.6, | |
| "valid_targets_min": 3589 | |
| }, | |
| { | |
| "epoch": 1.8813559322033897, | |
| "grad_norm": 0.40369311396663893, | |
| "learning_rate": 3.663819137987081e-05, | |
| "loss": 0.3342, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16418354213237762, | |
| "step": 1110, | |
| "valid_targets_mean": 9130.6, | |
| "valid_targets_min": 6228 | |
| }, | |
| { | |
| "epoch": 1.8898305084745761, | |
| "grad_norm": 0.4216701104223837, | |
| "learning_rate": 3.65911420880543e-05, | |
| "loss": 0.3106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2091543972492218, | |
| "step": 1115, | |
| "valid_targets_mean": 8209.8, | |
| "valid_targets_min": 3449 | |
| }, | |
| { | |
| "epoch": 1.8983050847457628, | |
| "grad_norm": 0.39934323363745766, | |
| "learning_rate": 3.654379649718153e-05, | |
| "loss": 0.3059, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16052965819835663, | |
| "step": 1120, | |
| "valid_targets_mean": 8141.9, | |
| "valid_targets_min": 2341 | |
| }, | |
| { | |
| "epoch": 1.9067796610169492, | |
| "grad_norm": 0.425180656486338, | |
| "learning_rate": 3.649615545279127e-05, | |
| "loss": 0.3443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23255954682826996, | |
| "step": 1125, | |
| "valid_targets_mean": 8977.1, | |
| "valid_targets_min": 1589 | |
| }, | |
| { | |
| "epoch": 1.9152542372881356, | |
| "grad_norm": 0.4134843945605497, | |
| "learning_rate": 3.644821980569876e-05, | |
| "loss": 0.3293, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18696650862693787, | |
| "step": 1130, | |
| "valid_targets_mean": 9344.6, | |
| "valid_targets_min": 2897 | |
| }, | |
| { | |
| "epoch": 1.923728813559322, | |
| "grad_norm": 0.4247315776579201, | |
| "learning_rate": 3.639999041198051e-05, | |
| "loss": 0.3106, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15916836261749268, | |
| "step": 1135, | |
| "valid_targets_mean": 7757.0, | |
| "valid_targets_min": 2240 | |
| }, | |
| { | |
| "epoch": 1.9322033898305084, | |
| "grad_norm": 0.4285608095339475, | |
| "learning_rate": 3.635146813295902e-05, | |
| "loss": 0.3347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12798747420310974, | |
| "step": 1140, | |
| "valid_targets_mean": 7018.9, | |
| "valid_targets_min": 3392 | |
| }, | |
| { | |
| "epoch": 1.940677966101695, | |
| "grad_norm": 0.4320951231170634, | |
| "learning_rate": 3.6302653835187366e-05, | |
| "loss": 0.3154, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2193724513053894, | |
| "step": 1145, | |
| "valid_targets_mean": 8454.5, | |
| "valid_targets_min": 4385 | |
| }, | |
| { | |
| "epoch": 1.9491525423728815, | |
| "grad_norm": 0.3679163355593773, | |
| "learning_rate": 3.625354839043377e-05, | |
| "loss": 0.2863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2157258540391922, | |
| "step": 1150, | |
| "valid_targets_mean": 12249.4, | |
| "valid_targets_min": 4241 | |
| }, | |
| { | |
| "epoch": 1.957627118644068, | |
| "grad_norm": 0.4102455825218708, | |
| "learning_rate": 3.620415267566601e-05, | |
| "loss": 0.3306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12105512619018555, | |
| "step": 1155, | |
| "valid_targets_mean": 7516.5, | |
| "valid_targets_min": 3222 | |
| }, | |
| { | |
| "epoch": 1.9661016949152543, | |
| "grad_norm": 0.4175220017891001, | |
| "learning_rate": 3.615446757303575e-05, | |
| "loss": 0.3221, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17339207231998444, | |
| "step": 1160, | |
| "valid_targets_mean": 8759.4, | |
| "valid_targets_min": 2960 | |
| }, | |
| { | |
| "epoch": 1.9745762711864407, | |
| "grad_norm": 0.44844776216498816, | |
| "learning_rate": 3.610449396986281e-05, | |
| "loss": 0.3084, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09378059208393097, | |
| "step": 1165, | |
| "valid_targets_mean": 5834.0, | |
| "valid_targets_min": 2096 | |
| }, | |
| { | |
| "epoch": 1.9830508474576272, | |
| "grad_norm": 0.44812744257447257, | |
| "learning_rate": 3.6054232758619274e-05, | |
| "loss": 0.3161, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1947910189628601, | |
| "step": 1170, | |
| "valid_targets_mean": 7453.2, | |
| "valid_targets_min": 2451 | |
| }, | |
| { | |
| "epoch": 1.9915254237288136, | |
| "grad_norm": 0.4242232674193107, | |
| "learning_rate": 3.600368483691361e-05, | |
| "loss": 0.3279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13258054852485657, | |
| "step": 1175, | |
| "valid_targets_mean": 7682.2, | |
| "valid_targets_min": 2492 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.45289022894013076, | |
| "learning_rate": 3.59528511074746e-05, | |
| "loss": 0.299, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12481173872947693, | |
| "step": 1180, | |
| "valid_targets_mean": 5148.2, | |
| "valid_targets_min": 1999 | |
| }, | |
| { | |
| "epoch": 2.0084745762711864, | |
| "grad_norm": 0.47796518296033785, | |
| "learning_rate": 3.5901732478135235e-05, | |
| "loss": 0.3136, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15180028975009918, | |
| "step": 1185, | |
| "valid_targets_mean": 6321.6, | |
| "valid_targets_min": 2236 | |
| }, | |
| { | |
| "epoch": 2.016949152542373, | |
| "grad_norm": 0.438352359074808, | |
| "learning_rate": 3.5850329861816475e-05, | |
| "loss": 0.2817, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12889432907104492, | |
| "step": 1190, | |
| "valid_targets_mean": 6916.6, | |
| "valid_targets_min": 2771 | |
| }, | |
| { | |
| "epoch": 2.0254237288135593, | |
| "grad_norm": 0.4126031654763985, | |
| "learning_rate": 3.5798644176511e-05, | |
| "loss": 0.2969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09737379848957062, | |
| "step": 1195, | |
| "valid_targets_mean": 6256.8, | |
| "valid_targets_min": 3050 | |
| }, | |
| { | |
| "epoch": 2.0338983050847457, | |
| "grad_norm": 0.408111961548012, | |
| "learning_rate": 3.574667634526676e-05, | |
| "loss": 0.3122, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14440008997917175, | |
| "step": 1200, | |
| "valid_targets_mean": 7384.1, | |
| "valid_targets_min": 2779 | |
| }, | |
| { | |
| "epoch": 2.042372881355932, | |
| "grad_norm": 0.455628995610143, | |
| "learning_rate": 3.5694427296170514e-05, | |
| "loss": 0.3048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16236944496631622, | |
| "step": 1205, | |
| "valid_targets_mean": 8072.6, | |
| "valid_targets_min": 4463 | |
| }, | |
| { | |
| "epoch": 2.0508474576271185, | |
| "grad_norm": 0.43337894990516485, | |
| "learning_rate": 3.564189796233125e-05, | |
| "loss": 0.3144, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22974911332130432, | |
| "step": 1210, | |
| "valid_targets_mean": 9636.9, | |
| "valid_targets_min": 3097 | |
| }, | |
| { | |
| "epoch": 2.059322033898305, | |
| "grad_norm": 0.40732481179936597, | |
| "learning_rate": 3.5589089281863547e-05, | |
| "loss": 0.2969, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1029449924826622, | |
| "step": 1215, | |
| "valid_targets_mean": 7567.2, | |
| "valid_targets_min": 2751 | |
| }, | |
| { | |
| "epoch": 2.0677966101694913, | |
| "grad_norm": 0.4252300433877439, | |
| "learning_rate": 3.553600219787079e-05, | |
| "loss": 0.2906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19273892045021057, | |
| "step": 1220, | |
| "valid_targets_mean": 8599.9, | |
| "valid_targets_min": 3639 | |
| }, | |
| { | |
| "epoch": 2.0762711864406778, | |
| "grad_norm": 0.4546211585832868, | |
| "learning_rate": 3.5482637658428315e-05, | |
| "loss": 0.2949, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23350149393081665, | |
| "step": 1225, | |
| "valid_targets_mean": 9294.4, | |
| "valid_targets_min": 1933 | |
| }, | |
| { | |
| "epoch": 2.084745762711864, | |
| "grad_norm": 0.4145817713099008, | |
| "learning_rate": 3.542899661656653e-05, | |
| "loss": 0.3212, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1794952154159546, | |
| "step": 1230, | |
| "valid_targets_mean": 9705.6, | |
| "valid_targets_min": 4885 | |
| }, | |
| { | |
| "epoch": 2.093220338983051, | |
| "grad_norm": 0.5271058668855069, | |
| "learning_rate": 3.5375080030253826e-05, | |
| "loss": 0.3432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12348256260156631, | |
| "step": 1235, | |
| "valid_targets_mean": 6719.0, | |
| "valid_targets_min": 3708 | |
| }, | |
| { | |
| "epoch": 2.1016949152542375, | |
| "grad_norm": 0.4232353398881956, | |
| "learning_rate": 3.532088886237956e-05, | |
| "loss": 0.3022, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10980045795440674, | |
| "step": 1240, | |
| "valid_targets_mean": 6338.2, | |
| "valid_targets_min": 3309 | |
| }, | |
| { | |
| "epoch": 2.110169491525424, | |
| "grad_norm": 0.45183791890941466, | |
| "learning_rate": 3.5266424080736765e-05, | |
| "loss": 0.3079, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15499433875083923, | |
| "step": 1245, | |
| "valid_targets_mean": 7531.9, | |
| "valid_targets_min": 3690 | |
| }, | |
| { | |
| "epoch": 2.1186440677966103, | |
| "grad_norm": 0.43508072611158927, | |
| "learning_rate": 3.521168665800491e-05, | |
| "loss": 0.3222, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2509422302246094, | |
| "step": 1250, | |
| "valid_targets_mean": 13284.1, | |
| "valid_targets_min": 5966 | |
| }, | |
| { | |
| "epoch": 2.1271186440677967, | |
| "grad_norm": 0.42735089982934016, | |
| "learning_rate": 3.515667757173254e-05, | |
| "loss": 0.2953, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13675671815872192, | |
| "step": 1255, | |
| "valid_targets_mean": 7142.9, | |
| "valid_targets_min": 2413 | |
| }, | |
| { | |
| "epoch": 2.135593220338983, | |
| "grad_norm": 0.40760862132146264, | |
| "learning_rate": 3.5101397804319794e-05, | |
| "loss": 0.2859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17705723643302917, | |
| "step": 1260, | |
| "valid_targets_mean": 8875.0, | |
| "valid_targets_min": 3101 | |
| }, | |
| { | |
| "epoch": 2.1440677966101696, | |
| "grad_norm": 0.4900166239838904, | |
| "learning_rate": 3.5045848343000876e-05, | |
| "loss": 0.2858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1725788712501526, | |
| "step": 1265, | |
| "valid_targets_mean": 7732.4, | |
| "valid_targets_min": 2984 | |
| }, | |
| { | |
| "epoch": 2.152542372881356, | |
| "grad_norm": 0.428816419240488, | |
| "learning_rate": 3.4990030179826414e-05, | |
| "loss": 0.3253, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20709286630153656, | |
| "step": 1270, | |
| "valid_targets_mean": 10128.1, | |
| "valid_targets_min": 4459 | |
| }, | |
| { | |
| "epoch": 2.1610169491525424, | |
| "grad_norm": 0.40014932477968135, | |
| "learning_rate": 3.493394431164576e-05, | |
| "loss": 0.2927, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15721966326236725, | |
| "step": 1275, | |
| "valid_targets_mean": 8368.9, | |
| "valid_targets_min": 4914 | |
| }, | |
| { | |
| "epoch": 2.169491525423729, | |
| "grad_norm": 0.460349336503394, | |
| "learning_rate": 3.4877591740089144e-05, | |
| "loss": 0.3034, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12180677056312561, | |
| "step": 1280, | |
| "valid_targets_mean": 6484.2, | |
| "valid_targets_min": 2984 | |
| }, | |
| { | |
| "epoch": 2.1779661016949152, | |
| "grad_norm": 0.4167414151848438, | |
| "learning_rate": 3.482097347154986e-05, | |
| "loss": 0.2945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15773698687553406, | |
| "step": 1285, | |
| "valid_targets_mean": 8676.9, | |
| "valid_targets_min": 2724 | |
| }, | |
| { | |
| "epoch": 2.1864406779661016, | |
| "grad_norm": 0.4119767997336977, | |
| "learning_rate": 3.476409051716621e-05, | |
| "loss": 0.3075, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19150680303573608, | |
| "step": 1290, | |
| "valid_targets_mean": 9972.0, | |
| "valid_targets_min": 4367 | |
| }, | |
| { | |
| "epoch": 2.194915254237288, | |
| "grad_norm": 0.46842012389861176, | |
| "learning_rate": 3.470694389280352e-05, | |
| "loss": 0.2894, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13265065848827362, | |
| "step": 1295, | |
| "valid_targets_mean": 6506.4, | |
| "valid_targets_min": 2591 | |
| }, | |
| { | |
| "epoch": 2.2033898305084745, | |
| "grad_norm": 0.5070785490329287, | |
| "learning_rate": 3.464953461903593e-05, | |
| "loss": 0.2815, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16730618476867676, | |
| "step": 1300, | |
| "valid_targets_mean": 7640.6, | |
| "valid_targets_min": 2556 | |
| }, | |
| { | |
| "epoch": 2.211864406779661, | |
| "grad_norm": 0.44925860012957636, | |
| "learning_rate": 3.459186372112824e-05, | |
| "loss": 0.3067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12842459976673126, | |
| "step": 1305, | |
| "valid_targets_mean": 6798.6, | |
| "valid_targets_min": 3979 | |
| }, | |
| { | |
| "epoch": 2.2203389830508473, | |
| "grad_norm": 0.41138242525433244, | |
| "learning_rate": 3.453393222901753e-05, | |
| "loss": 0.3239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09415019303560257, | |
| "step": 1310, | |
| "valid_targets_mean": 7214.1, | |
| "valid_targets_min": 2008 | |
| }, | |
| { | |
| "epoch": 2.2288135593220337, | |
| "grad_norm": 0.41131324862852436, | |
| "learning_rate": 3.44757411772948e-05, | |
| "loss": 0.3134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.173304945230484, | |
| "step": 1315, | |
| "valid_targets_mean": 8934.2, | |
| "valid_targets_min": 3731 | |
| }, | |
| { | |
| "epoch": 2.23728813559322, | |
| "grad_norm": 0.47453310588934633, | |
| "learning_rate": 3.441729160518652e-05, | |
| "loss": 0.326, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20205450057983398, | |
| "step": 1320, | |
| "valid_targets_mean": 8631.2, | |
| "valid_targets_min": 3796 | |
| }, | |
| { | |
| "epoch": 2.2457627118644066, | |
| "grad_norm": 0.43201686834502273, | |
| "learning_rate": 3.435858455653601e-05, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13337936997413635, | |
| "step": 1325, | |
| "valid_targets_mean": 7937.0, | |
| "valid_targets_min": 2699 | |
| }, | |
| { | |
| "epoch": 2.2542372881355934, | |
| "grad_norm": 0.458779232154746, | |
| "learning_rate": 3.429962107978485e-05, | |
| "loss": 0.3096, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1039549931883812, | |
| "step": 1330, | |
| "valid_targets_mean": 5822.1, | |
| "valid_targets_min": 2106 | |
| }, | |
| { | |
| "epoch": 2.26271186440678, | |
| "grad_norm": 0.40314998050323314, | |
| "learning_rate": 3.424040222795412e-05, | |
| "loss": 0.3012, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10799141228199005, | |
| "step": 1335, | |
| "valid_targets_mean": 6850.5, | |
| "valid_targets_min": 3619 | |
| }, | |
| { | |
| "epoch": 2.2711864406779663, | |
| "grad_norm": 0.4249827679361363, | |
| "learning_rate": 3.418092905862563e-05, | |
| "loss": 0.2919, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1282862275838852, | |
| "step": 1340, | |
| "valid_targets_mean": 7501.6, | |
| "valid_targets_min": 4154 | |
| }, | |
| { | |
| "epoch": 2.2796610169491527, | |
| "grad_norm": 0.45225084379688923, | |
| "learning_rate": 3.412120263392301e-05, | |
| "loss": 0.2935, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15831294655799866, | |
| "step": 1345, | |
| "valid_targets_mean": 8395.9, | |
| "valid_targets_min": 5077 | |
| }, | |
| { | |
| "epoch": 2.288135593220339, | |
| "grad_norm": 0.4323979115648768, | |
| "learning_rate": 3.406122402049272e-05, | |
| "loss": 0.3187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1440098136663437, | |
| "step": 1350, | |
| "valid_targets_mean": 8358.2, | |
| "valid_targets_min": 3033 | |
| }, | |
| { | |
| "epoch": 2.2966101694915255, | |
| "grad_norm": 0.44798843437203717, | |
| "learning_rate": 3.400099428948505e-05, | |
| "loss": 0.2964, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24259638786315918, | |
| "step": 1355, | |
| "valid_targets_mean": 9828.9, | |
| "valid_targets_min": 3934 | |
| }, | |
| { | |
| "epoch": 2.305084745762712, | |
| "grad_norm": 0.4118065309385498, | |
| "learning_rate": 3.394051451653496e-05, | |
| "loss": 0.2987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12573987245559692, | |
| "step": 1360, | |
| "valid_targets_mean": 7931.0, | |
| "valid_targets_min": 5094 | |
| }, | |
| { | |
| "epoch": 2.3135593220338984, | |
| "grad_norm": 0.43851923519918656, | |
| "learning_rate": 3.3879785781742875e-05, | |
| "loss": 0.3164, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18017522990703583, | |
| "step": 1365, | |
| "valid_targets_mean": 8832.6, | |
| "valid_targets_min": 3053 | |
| }, | |
| { | |
| "epoch": 2.3220338983050848, | |
| "grad_norm": 0.49539123931874524, | |
| "learning_rate": 3.381880916965541e-05, | |
| "loss": 0.2939, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10733641684055328, | |
| "step": 1370, | |
| "valid_targets_mean": 5227.6, | |
| "valid_targets_min": 2791 | |
| }, | |
| { | |
| "epoch": 2.330508474576271, | |
| "grad_norm": 0.512139157052037, | |
| "learning_rate": 3.3757585769245964e-05, | |
| "loss": 0.312, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.151132732629776, | |
| "step": 1375, | |
| "valid_targets_mean": 7262.0, | |
| "valid_targets_min": 3945 | |
| }, | |
| { | |
| "epoch": 2.3389830508474576, | |
| "grad_norm": 0.4416566028201391, | |
| "learning_rate": 3.3696116673895314e-05, | |
| "loss": 0.3211, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2037435919046402, | |
| "step": 1380, | |
| "valid_targets_mean": 9578.2, | |
| "valid_targets_min": 3736 | |
| }, | |
| { | |
| "epoch": 2.347457627118644, | |
| "grad_norm": 0.45082149557407764, | |
| "learning_rate": 3.363440298137207e-05, | |
| "loss": 0.3071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12679685652256012, | |
| "step": 1385, | |
| "valid_targets_mean": 6039.9, | |
| "valid_targets_min": 2300 | |
| }, | |
| { | |
| "epoch": 2.3559322033898304, | |
| "grad_norm": 0.4421724624356205, | |
| "learning_rate": 3.357244579381306e-05, | |
| "loss": 0.2908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14652232825756073, | |
| "step": 1390, | |
| "valid_targets_mean": 7244.0, | |
| "valid_targets_min": 1903 | |
| }, | |
| { | |
| "epoch": 2.364406779661017, | |
| "grad_norm": 0.4495061538438427, | |
| "learning_rate": 3.351024621770369e-05, | |
| "loss": 0.2983, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0919191762804985, | |
| "step": 1395, | |
| "valid_targets_mean": 5043.9, | |
| "valid_targets_min": 2452 | |
| }, | |
| { | |
| "epoch": 2.3728813559322033, | |
| "grad_norm": 0.45212052889171317, | |
| "learning_rate": 3.34478053638581e-05, | |
| "loss": 0.2859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14459501206874847, | |
| "step": 1400, | |
| "valid_targets_mean": 7165.5, | |
| "valid_targets_min": 1936 | |
| }, | |
| { | |
| "epoch": 2.3813559322033897, | |
| "grad_norm": 0.45753710310314344, | |
| "learning_rate": 3.3385124347399415e-05, | |
| "loss": 0.2757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11004475504159927, | |
| "step": 1405, | |
| "valid_targets_mean": 7322.9, | |
| "valid_targets_min": 2350 | |
| }, | |
| { | |
| "epoch": 2.389830508474576, | |
| "grad_norm": 0.4527197832006006, | |
| "learning_rate": 3.33222042877398e-05, | |
| "loss": 0.301, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17083138227462769, | |
| "step": 1410, | |
| "valid_targets_mean": 7809.4, | |
| "valid_targets_min": 2393 | |
| }, | |
| { | |
| "epoch": 2.3983050847457625, | |
| "grad_norm": 0.3772050754545617, | |
| "learning_rate": 3.325904630856042e-05, | |
| "loss": 0.2956, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18102893233299255, | |
| "step": 1415, | |
| "valid_targets_mean": 10666.2, | |
| "valid_targets_min": 3513 | |
| }, | |
| { | |
| "epoch": 2.406779661016949, | |
| "grad_norm": 0.4563452741739858, | |
| "learning_rate": 3.319565153779146e-05, | |
| "loss": 0.3467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26052290201187134, | |
| "step": 1420, | |
| "valid_targets_mean": 10017.8, | |
| "valid_targets_min": 5124 | |
| }, | |
| { | |
| "epoch": 2.415254237288136, | |
| "grad_norm": 0.46401872953783496, | |
| "learning_rate": 3.31320211075919e-05, | |
| "loss": 0.3201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1795731782913208, | |
| "step": 1425, | |
| "valid_targets_mean": 7820.4, | |
| "valid_targets_min": 2278 | |
| }, | |
| { | |
| "epoch": 2.423728813559322, | |
| "grad_norm": 0.4298768255585762, | |
| "learning_rate": 3.306815615432936e-05, | |
| "loss": 0.3383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1531204879283905, | |
| "step": 1430, | |
| "valid_targets_mean": 8088.8, | |
| "valid_targets_min": 2989 | |
| }, | |
| { | |
| "epoch": 2.4322033898305087, | |
| "grad_norm": 0.42943975120474337, | |
| "learning_rate": 3.300405781855975e-05, | |
| "loss": 0.2942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19165225327014923, | |
| "step": 1435, | |
| "valid_targets_mean": 9201.4, | |
| "valid_targets_min": 6019 | |
| }, | |
| { | |
| "epoch": 2.440677966101695, | |
| "grad_norm": 0.3977047765538059, | |
| "learning_rate": 3.2939727245006956e-05, | |
| "loss": 0.2999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15969790518283844, | |
| "step": 1440, | |
| "valid_targets_mean": 8977.9, | |
| "valid_targets_min": 2996 | |
| }, | |
| { | |
| "epoch": 2.4491525423728815, | |
| "grad_norm": 0.43392603484157843, | |
| "learning_rate": 3.2875165582542347e-05, | |
| "loss": 0.3049, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09724732488393784, | |
| "step": 1445, | |
| "valid_targets_mean": 6136.5, | |
| "valid_targets_min": 3622 | |
| }, | |
| { | |
| "epoch": 2.457627118644068, | |
| "grad_norm": 0.39026794378770674, | |
| "learning_rate": 3.281037398416427e-05, | |
| "loss": 0.2698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12890513241291046, | |
| "step": 1450, | |
| "valid_targets_mean": 6924.1, | |
| "valid_targets_min": 2394 | |
| }, | |
| { | |
| "epoch": 2.4661016949152543, | |
| "grad_norm": 0.4242919077715597, | |
| "learning_rate": 3.2745353606977506e-05, | |
| "loss": 0.3159, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1096186563372612, | |
| "step": 1455, | |
| "valid_targets_mean": 7928.4, | |
| "valid_targets_min": 3062 | |
| }, | |
| { | |
| "epoch": 2.4745762711864407, | |
| "grad_norm": 0.4289426904620235, | |
| "learning_rate": 3.268010561217253e-05, | |
| "loss": 0.308, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13930077850818634, | |
| "step": 1460, | |
| "valid_targets_mean": 8676.9, | |
| "valid_targets_min": 4315 | |
| }, | |
| { | |
| "epoch": 2.483050847457627, | |
| "grad_norm": 0.4051021933769338, | |
| "learning_rate": 3.261463116500483e-05, | |
| "loss": 0.2942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10786758363246918, | |
| "step": 1465, | |
| "valid_targets_mean": 6350.8, | |
| "valid_targets_min": 2847 | |
| }, | |
| { | |
| "epoch": 2.4915254237288136, | |
| "grad_norm": 0.4019638092945469, | |
| "learning_rate": 3.254893143477408e-05, | |
| "loss": 0.2916, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20479941368103027, | |
| "step": 1470, | |
| "valid_targets_mean": 10263.6, | |
| "valid_targets_min": 2865 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.4299334544244074, | |
| "learning_rate": 3.248300759480323e-05, | |
| "loss": 0.3183, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2492760419845581, | |
| "step": 1475, | |
| "valid_targets_mean": 10219.6, | |
| "valid_targets_min": 5175 | |
| }, | |
| { | |
| "epoch": 2.5084745762711864, | |
| "grad_norm": 0.4023371948985823, | |
| "learning_rate": 3.241686082241761e-05, | |
| "loss": 0.2857, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1464785933494568, | |
| "step": 1480, | |
| "valid_targets_mean": 8838.9, | |
| "valid_targets_min": 2493 | |
| }, | |
| { | |
| "epoch": 2.516949152542373, | |
| "grad_norm": 0.41294672992405235, | |
| "learning_rate": 3.235049229892384e-05, | |
| "loss": 0.2655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12958091497421265, | |
| "step": 1485, | |
| "valid_targets_mean": 7342.2, | |
| "valid_targets_min": 4829 | |
| }, | |
| { | |
| "epoch": 2.5254237288135593, | |
| "grad_norm": 0.3831925532498252, | |
| "learning_rate": 3.2283903209588795e-05, | |
| "loss": 0.3178, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1335603892803192, | |
| "step": 1490, | |
| "valid_targets_mean": 7712.6, | |
| "valid_targets_min": 2385 | |
| }, | |
| { | |
| "epoch": 2.5338983050847457, | |
| "grad_norm": 0.4157070224630618, | |
| "learning_rate": 3.221709474361838e-05, | |
| "loss": 0.3008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12635986506938934, | |
| "step": 1495, | |
| "valid_targets_mean": 6957.0, | |
| "valid_targets_min": 2932 | |
| }, | |
| { | |
| "epoch": 2.542372881355932, | |
| "grad_norm": 0.4224344033837063, | |
| "learning_rate": 3.215006809413633e-05, | |
| "loss": 0.3194, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1392759382724762, | |
| "step": 1500, | |
| "valid_targets_mean": 6384.5, | |
| "valid_targets_min": 2434 | |
| }, | |
| { | |
| "epoch": 2.5508474576271185, | |
| "grad_norm": 0.43012730740401234, | |
| "learning_rate": 3.20828244581629e-05, | |
| "loss": 0.2878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1791025698184967, | |
| "step": 1505, | |
| "valid_targets_mean": 7117.2, | |
| "valid_targets_min": 1495 | |
| }, | |
| { | |
| "epoch": 2.559322033898305, | |
| "grad_norm": 0.4845914897087292, | |
| "learning_rate": 3.2015365036593466e-05, | |
| "loss": 0.3306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1442946195602417, | |
| "step": 1510, | |
| "valid_targets_mean": 9361.6, | |
| "valid_targets_min": 4450 | |
| }, | |
| { | |
| "epoch": 2.5677966101694913, | |
| "grad_norm": 0.4020720093483181, | |
| "learning_rate": 3.194769103417709e-05, | |
| "loss": 0.2901, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2034224271774292, | |
| "step": 1515, | |
| "valid_targets_mean": 8227.0, | |
| "valid_targets_min": 2913 | |
| }, | |
| { | |
| "epoch": 2.576271186440678, | |
| "grad_norm": 0.47515037487881684, | |
| "learning_rate": 3.1879803659495015e-05, | |
| "loss": 0.2974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09358513355255127, | |
| "step": 1520, | |
| "valid_targets_mean": 5848.9, | |
| "valid_targets_min": 2633 | |
| }, | |
| { | |
| "epoch": 2.584745762711864, | |
| "grad_norm": 0.4525136249871132, | |
| "learning_rate": 3.181170412493906e-05, | |
| "loss": 0.2957, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14096501469612122, | |
| "step": 1525, | |
| "valid_targets_mean": 5804.8, | |
| "valid_targets_min": 2873 | |
| }, | |
| { | |
| "epoch": 2.593220338983051, | |
| "grad_norm": 0.44499842842422144, | |
| "learning_rate": 3.1743393646689996e-05, | |
| "loss": 0.2689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13607238233089447, | |
| "step": 1530, | |
| "valid_targets_mean": 7015.5, | |
| "valid_targets_min": 3650 | |
| }, | |
| { | |
| "epoch": 2.601694915254237, | |
| "grad_norm": 0.39010435281227923, | |
| "learning_rate": 3.1674873444695804e-05, | |
| "loss": 0.2838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15358000993728638, | |
| "step": 1535, | |
| "valid_targets_mean": 8212.6, | |
| "valid_targets_min": 4561 | |
| }, | |
| { | |
| "epoch": 2.610169491525424, | |
| "grad_norm": 0.38705505466621165, | |
| "learning_rate": 3.1606144742649875e-05, | |
| "loss": 0.2974, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18068253993988037, | |
| "step": 1540, | |
| "valid_targets_mean": 9366.8, | |
| "valid_targets_min": 3442 | |
| }, | |
| { | |
| "epoch": 2.6186440677966103, | |
| "grad_norm": 0.4645250007783615, | |
| "learning_rate": 3.15372087679692e-05, | |
| "loss": 0.2697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16335920989513397, | |
| "step": 1545, | |
| "valid_targets_mean": 7294.8, | |
| "valid_targets_min": 2880 | |
| }, | |
| { | |
| "epoch": 2.6271186440677967, | |
| "grad_norm": 0.5969381310791982, | |
| "learning_rate": 3.1468066751772415e-05, | |
| "loss": 0.2987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20474021136760712, | |
| "step": 1550, | |
| "valid_targets_mean": 9866.2, | |
| "valid_targets_min": 3896 | |
| }, | |
| { | |
| "epoch": 2.635593220338983, | |
| "grad_norm": 0.39986488663805625, | |
| "learning_rate": 3.1398719928857834e-05, | |
| "loss": 0.2852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16469399631023407, | |
| "step": 1555, | |
| "valid_targets_mean": 9266.1, | |
| "valid_targets_min": 2559 | |
| }, | |
| { | |
| "epoch": 2.6440677966101696, | |
| "grad_norm": 0.42603497362335596, | |
| "learning_rate": 3.132916953768137e-05, | |
| "loss": 0.2842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15886007249355316, | |
| "step": 1560, | |
| "valid_targets_mean": 7792.9, | |
| "valid_targets_min": 2695 | |
| }, | |
| { | |
| "epoch": 2.652542372881356, | |
| "grad_norm": 0.4477773300510562, | |
| "learning_rate": 3.1259416820334446e-05, | |
| "loss": 0.2917, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1468237042427063, | |
| "step": 1565, | |
| "valid_targets_mean": 6308.1, | |
| "valid_targets_min": 2530 | |
| }, | |
| { | |
| "epoch": 2.6610169491525424, | |
| "grad_norm": 0.413038718975138, | |
| "learning_rate": 3.118946302252181e-05, | |
| "loss": 0.2752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11161170154809952, | |
| "step": 1570, | |
| "valid_targets_mean": 5875.2, | |
| "valid_targets_min": 3706 | |
| }, | |
| { | |
| "epoch": 2.669491525423729, | |
| "grad_norm": 0.40102607092450826, | |
| "learning_rate": 3.1119309393539256e-05, | |
| "loss": 0.2763, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1047399640083313, | |
| "step": 1575, | |
| "valid_targets_mean": 6100.2, | |
| "valid_targets_min": 2617 | |
| }, | |
| { | |
| "epoch": 2.6779661016949152, | |
| "grad_norm": 0.4487526831600451, | |
| "learning_rate": 3.104895718625136e-05, | |
| "loss": 0.2875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19679084420204163, | |
| "step": 1580, | |
| "valid_targets_mean": 9194.9, | |
| "valid_targets_min": 3588 | |
| }, | |
| { | |
| "epoch": 2.6864406779661016, | |
| "grad_norm": 0.4319081942251288, | |
| "learning_rate": 3.097840765706908e-05, | |
| "loss": 0.3087, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16104386746883392, | |
| "step": 1585, | |
| "valid_targets_mean": 7758.5, | |
| "valid_targets_min": 2699 | |
| }, | |
| { | |
| "epoch": 2.694915254237288, | |
| "grad_norm": 0.4362156095926864, | |
| "learning_rate": 3.0907662065927297e-05, | |
| "loss": 0.2998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17210246622562408, | |
| "step": 1590, | |
| "valid_targets_mean": 7771.1, | |
| "valid_targets_min": 4059 | |
| }, | |
| { | |
| "epoch": 2.7033898305084745, | |
| "grad_norm": 0.3805861970091732, | |
| "learning_rate": 3.083672167626237e-05, | |
| "loss": 0.3063, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1351141780614853, | |
| "step": 1595, | |
| "valid_targets_mean": 8147.1, | |
| "valid_targets_min": 3440 | |
| }, | |
| { | |
| "epoch": 2.711864406779661, | |
| "grad_norm": 0.41117431133265786, | |
| "learning_rate": 3.0765587754989516e-05, | |
| "loss": 0.2884, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2080633044242859, | |
| "step": 1600, | |
| "valid_targets_mean": 10313.1, | |
| "valid_targets_min": 4746 | |
| }, | |
| { | |
| "epoch": 2.7203389830508473, | |
| "grad_norm": 0.4193393539029648, | |
| "learning_rate": 3.069426157248022e-05, | |
| "loss": 0.2883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1242239847779274, | |
| "step": 1605, | |
| "valid_targets_mean": 7685.4, | |
| "valid_targets_min": 2538 | |
| }, | |
| { | |
| "epoch": 2.7288135593220337, | |
| "grad_norm": 0.40390165294921654, | |
| "learning_rate": 3.062274440253953e-05, | |
| "loss": 0.3238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1672280728816986, | |
| "step": 1610, | |
| "valid_targets_mean": 8736.0, | |
| "valid_targets_min": 2510 | |
| }, | |
| { | |
| "epoch": 2.7372881355932206, | |
| "grad_norm": 0.4597361076436166, | |
| "learning_rate": 3.0551037522383325e-05, | |
| "loss": 0.3077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13972532749176025, | |
| "step": 1615, | |
| "valid_targets_mean": 6083.9, | |
| "valid_targets_min": 2965 | |
| }, | |
| { | |
| "epoch": 2.7457627118644066, | |
| "grad_norm": 0.3801055818275529, | |
| "learning_rate": 3.0479142212615457e-05, | |
| "loss": 0.3031, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14896857738494873, | |
| "step": 1620, | |
| "valid_targets_mean": 8904.1, | |
| "valid_targets_min": 2302 | |
| }, | |
| { | |
| "epoch": 2.7542372881355934, | |
| "grad_norm": 0.37492323834267705, | |
| "learning_rate": 3.0407059757204968e-05, | |
| "loss": 0.3108, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11933213472366333, | |
| "step": 1625, | |
| "valid_targets_mean": 7426.5, | |
| "valid_targets_min": 2795 | |
| }, | |
| { | |
| "epoch": 2.7627118644067794, | |
| "grad_norm": 0.39976463787165845, | |
| "learning_rate": 3.033479144346308e-05, | |
| "loss": 0.3374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16365092992782593, | |
| "step": 1630, | |
| "valid_targets_mean": 8906.2, | |
| "valid_targets_min": 4011 | |
| }, | |
| { | |
| "epoch": 2.7711864406779663, | |
| "grad_norm": 0.3997723207190587, | |
| "learning_rate": 3.0262338562020246e-05, | |
| "loss": 0.3026, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11927295476198196, | |
| "step": 1635, | |
| "valid_targets_mean": 6491.0, | |
| "valid_targets_min": 2532 | |
| }, | |
| { | |
| "epoch": 2.7796610169491527, | |
| "grad_norm": 0.36564215466073935, | |
| "learning_rate": 3.018970240680308e-05, | |
| "loss": 0.3071, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19709117710590363, | |
| "step": 1640, | |
| "valid_targets_mean": 11646.2, | |
| "valid_targets_min": 4338 | |
| }, | |
| { | |
| "epoch": 2.788135593220339, | |
| "grad_norm": 0.40505026371761105, | |
| "learning_rate": 3.011688427501126e-05, | |
| "loss": 0.3058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19479559361934662, | |
| "step": 1645, | |
| "valid_targets_mean": 10186.4, | |
| "valid_targets_min": 4347 | |
| }, | |
| { | |
| "epoch": 2.7966101694915255, | |
| "grad_norm": 0.39795771334611546, | |
| "learning_rate": 3.0043885467094382e-05, | |
| "loss": 0.3121, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09027592837810516, | |
| "step": 1650, | |
| "valid_targets_mean": 5524.0, | |
| "valid_targets_min": 2597 | |
| }, | |
| { | |
| "epoch": 2.805084745762712, | |
| "grad_norm": 0.4041662599563723, | |
| "learning_rate": 2.9970707286728676e-05, | |
| "loss": 0.2997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1600339710712433, | |
| "step": 1655, | |
| "valid_targets_mean": 7957.8, | |
| "valid_targets_min": 3054 | |
| }, | |
| { | |
| "epoch": 2.8135593220338984, | |
| "grad_norm": 0.437401544209369, | |
| "learning_rate": 2.9897351040793786e-05, | |
| "loss": 0.2776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11536328494548798, | |
| "step": 1660, | |
| "valid_targets_mean": 5436.1, | |
| "valid_targets_min": 2823 | |
| }, | |
| { | |
| "epoch": 2.8220338983050848, | |
| "grad_norm": 0.37522109968246403, | |
| "learning_rate": 2.9823818039349407e-05, | |
| "loss": 0.2783, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15367770195007324, | |
| "step": 1665, | |
| "valid_targets_mean": 8713.4, | |
| "valid_targets_min": 2676 | |
| }, | |
| { | |
| "epoch": 2.830508474576271, | |
| "grad_norm": 0.3849792093810754, | |
| "learning_rate": 2.9750109595611884e-05, | |
| "loss": 0.2882, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14932164549827576, | |
| "step": 1670, | |
| "valid_targets_mean": 8313.9, | |
| "valid_targets_min": 2584 | |
| }, | |
| { | |
| "epoch": 2.8389830508474576, | |
| "grad_norm": 0.4863619459847622, | |
| "learning_rate": 2.967622702593074e-05, | |
| "loss": 0.2853, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1424635946750641, | |
| "step": 1675, | |
| "valid_targets_mean": 6796.4, | |
| "valid_targets_min": 2567 | |
| }, | |
| { | |
| "epoch": 2.847457627118644, | |
| "grad_norm": 0.396918930200827, | |
| "learning_rate": 2.9602171649765235e-05, | |
| "loss": 0.3067, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14382398128509521, | |
| "step": 1680, | |
| "valid_targets_mean": 9150.9, | |
| "valid_targets_min": 4866 | |
| }, | |
| { | |
| "epoch": 2.8559322033898304, | |
| "grad_norm": 0.4963487698731554, | |
| "learning_rate": 2.9527944789660732e-05, | |
| "loss": 0.2893, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13725848495960236, | |
| "step": 1685, | |
| "valid_targets_mean": 8448.1, | |
| "valid_targets_min": 3413 | |
| }, | |
| { | |
| "epoch": 2.864406779661017, | |
| "grad_norm": 0.4856540030020508, | |
| "learning_rate": 2.9453547771225088e-05, | |
| "loss": 0.2873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13087928295135498, | |
| "step": 1690, | |
| "valid_targets_mean": 6625.8, | |
| "valid_targets_min": 2060 | |
| }, | |
| { | |
| "epoch": 2.8728813559322033, | |
| "grad_norm": 0.4718602599329464, | |
| "learning_rate": 2.9378981923105026e-05, | |
| "loss": 0.2803, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12425769120454788, | |
| "step": 1695, | |
| "valid_targets_mean": 6502.9, | |
| "valid_targets_min": 3374 | |
| }, | |
| { | |
| "epoch": 2.8813559322033897, | |
| "grad_norm": 0.429811308643893, | |
| "learning_rate": 2.930424857696236e-05, | |
| "loss": 0.3189, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11679084599018097, | |
| "step": 1700, | |
| "valid_targets_mean": 6725.0, | |
| "valid_targets_min": 3680 | |
| }, | |
| { | |
| "epoch": 2.889830508474576, | |
| "grad_norm": 0.6106285244411059, | |
| "learning_rate": 2.922934906745024e-05, | |
| "loss": 0.3187, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17136754095554352, | |
| "step": 1705, | |
| "valid_targets_mean": 8815.4, | |
| "valid_targets_min": 3296 | |
| }, | |
| { | |
| "epoch": 2.898305084745763, | |
| "grad_norm": 0.40881667558157, | |
| "learning_rate": 2.9154284732189285e-05, | |
| "loss": 0.3147, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.103614941239357, | |
| "step": 1710, | |
| "valid_targets_mean": 7344.6, | |
| "valid_targets_min": 3896 | |
| }, | |
| { | |
| "epoch": 2.906779661016949, | |
| "grad_norm": 0.40167051142083, | |
| "learning_rate": 2.907905691174374e-05, | |
| "loss": 0.2885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15157365798950195, | |
| "step": 1715, | |
| "valid_targets_mean": 7305.6, | |
| "valid_targets_min": 2406 | |
| }, | |
| { | |
| "epoch": 2.915254237288136, | |
| "grad_norm": 0.44454100171373195, | |
| "learning_rate": 2.90036669495975e-05, | |
| "loss": 0.3193, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2535397410392761, | |
| "step": 1720, | |
| "valid_targets_mean": 10899.9, | |
| "valid_targets_min": 4113 | |
| }, | |
| { | |
| "epoch": 2.923728813559322, | |
| "grad_norm": 0.40742031882359336, | |
| "learning_rate": 2.8928116192130127e-05, | |
| "loss": 0.3201, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15399280190467834, | |
| "step": 1725, | |
| "valid_targets_mean": 8970.1, | |
| "valid_targets_min": 4604 | |
| }, | |
| { | |
| "epoch": 2.9322033898305087, | |
| "grad_norm": 0.3808270536515648, | |
| "learning_rate": 2.8852405988592802e-05, | |
| "loss": 0.2801, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.125367671251297, | |
| "step": 1730, | |
| "valid_targets_mean": 6888.9, | |
| "valid_targets_min": 3687 | |
| }, | |
| { | |
| "epoch": 2.940677966101695, | |
| "grad_norm": 0.38551744389083203, | |
| "learning_rate": 2.8776537691084247e-05, | |
| "loss": 0.2971, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2000572681427002, | |
| "step": 1735, | |
| "valid_targets_mean": 9901.8, | |
| "valid_targets_min": 5423 | |
| }, | |
| { | |
| "epoch": 2.9491525423728815, | |
| "grad_norm": 0.3958071016569807, | |
| "learning_rate": 2.8700512654526543e-05, | |
| "loss": 0.303, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1151888519525528, | |
| "step": 1740, | |
| "valid_targets_mean": 6846.0, | |
| "valid_targets_min": 3356 | |
| }, | |
| { | |
| "epoch": 2.957627118644068, | |
| "grad_norm": 0.4049169362567717, | |
| "learning_rate": 2.8624332236640975e-05, | |
| "loss": 0.2857, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15298178791999817, | |
| "step": 1745, | |
| "valid_targets_mean": 8275.1, | |
| "valid_targets_min": 3234 | |
| }, | |
| { | |
| "epoch": 2.9661016949152543, | |
| "grad_norm": 0.40926795857010767, | |
| "learning_rate": 2.854799779792375e-05, | |
| "loss": 0.2807, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.122572161257267, | |
| "step": 1750, | |
| "valid_targets_mean": 6125.0, | |
| "valid_targets_min": 1440 | |
| }, | |
| { | |
| "epoch": 2.9745762711864407, | |
| "grad_norm": 0.4120145141696389, | |
| "learning_rate": 2.8471510701621708e-05, | |
| "loss": 0.2838, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14707563817501068, | |
| "step": 1755, | |
| "valid_targets_mean": 8006.0, | |
| "valid_targets_min": 2888 | |
| }, | |
| { | |
| "epoch": 2.983050847457627, | |
| "grad_norm": 0.4336376958519921, | |
| "learning_rate": 2.8394872313707998e-05, | |
| "loss": 0.3134, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2632421851158142, | |
| "step": 1760, | |
| "valid_targets_mean": 9177.8, | |
| "valid_targets_min": 2312 | |
| }, | |
| { | |
| "epoch": 2.9915254237288136, | |
| "grad_norm": 0.44528199555148923, | |
| "learning_rate": 2.8318084002857654e-05, | |
| "loss": 0.3, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14627692103385925, | |
| "step": 1765, | |
| "valid_targets_mean": 6893.6, | |
| "valid_targets_min": 4435 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.4240883829673781, | |
| "learning_rate": 2.824114714042317e-05, | |
| "loss": 0.2799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1557423323392868, | |
| "step": 1770, | |
| "valid_targets_mean": 6871.1, | |
| "valid_targets_min": 2648 | |
| }, | |
| { | |
| "epoch": 3.0084745762711864, | |
| "grad_norm": 0.3897900675954003, | |
| "learning_rate": 2.8164063100409992e-05, | |
| "loss": 0.306, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17762136459350586, | |
| "step": 1775, | |
| "valid_targets_mean": 11193.6, | |
| "valid_targets_min": 5241 | |
| }, | |
| { | |
| "epoch": 3.016949152542373, | |
| "grad_norm": 0.47647182837107177, | |
| "learning_rate": 2.8086833259452006e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1492662876844406, | |
| "step": 1780, | |
| "valid_targets_mean": 7162.5, | |
| "valid_targets_min": 2970 | |
| }, | |
| { | |
| "epoch": 3.0254237288135593, | |
| "grad_norm": 0.48623120655281904, | |
| "learning_rate": 2.8009458996786934e-05, | |
| "loss": 0.2875, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12112700194120407, | |
| "step": 1785, | |
| "valid_targets_mean": 7915.6, | |
| "valid_targets_min": 4167 | |
| }, | |
| { | |
| "epoch": 3.0338983050847457, | |
| "grad_norm": 0.48482652840024115, | |
| "learning_rate": 2.7931941694231705e-05, | |
| "loss": 0.3197, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19166266918182373, | |
| "step": 1790, | |
| "valid_targets_mean": 9350.4, | |
| "valid_targets_min": 3952 | |
| }, | |
| { | |
| "epoch": 3.042372881355932, | |
| "grad_norm": 0.43430659240936026, | |
| "learning_rate": 2.7854282736157777e-05, | |
| "loss": 0.2766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10274215042591095, | |
| "step": 1795, | |
| "valid_targets_mean": 6751.6, | |
| "valid_targets_min": 3033 | |
| }, | |
| { | |
| "epoch": 3.0508474576271185, | |
| "grad_norm": 0.42824230463335977, | |
| "learning_rate": 2.777648350946642e-05, | |
| "loss": 0.3051, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14262977242469788, | |
| "step": 1800, | |
| "valid_targets_mean": 7915.8, | |
| "valid_targets_min": 4544 | |
| }, | |
| { | |
| "epoch": 3.059322033898305, | |
| "grad_norm": 0.39171848978515866, | |
| "learning_rate": 2.7698545403563934e-05, | |
| "loss": 0.2846, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17258086800575256, | |
| "step": 1805, | |
| "valid_targets_mean": 10852.6, | |
| "valid_targets_min": 4410 | |
| }, | |
| { | |
| "epoch": 3.0677966101694913, | |
| "grad_norm": 0.42586366102628226, | |
| "learning_rate": 2.7620469810336854e-05, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10813435912132263, | |
| "step": 1810, | |
| "valid_targets_mean": 7451.9, | |
| "valid_targets_min": 3605 | |
| }, | |
| { | |
| "epoch": 3.0762711864406778, | |
| "grad_norm": 0.5024156841387737, | |
| "learning_rate": 2.754225812412708e-05, | |
| "loss": 0.286, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2416592836380005, | |
| "step": 1815, | |
| "valid_targets_mean": 10524.9, | |
| "valid_targets_min": 3310 | |
| }, | |
| { | |
| "epoch": 3.084745762711864, | |
| "grad_norm": 0.4048547245262977, | |
| "learning_rate": 2.7463911741706982e-05, | |
| "loss": 0.3173, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.207644984126091, | |
| "step": 1820, | |
| "valid_targets_mean": 12621.1, | |
| "valid_targets_min": 5029 | |
| }, | |
| { | |
| "epoch": 3.093220338983051, | |
| "grad_norm": 0.4307490078277596, | |
| "learning_rate": 2.738543206225445e-05, | |
| "loss": 0.3163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1587986946105957, | |
| "step": 1825, | |
| "valid_targets_mean": 7724.6, | |
| "valid_targets_min": 3033 | |
| }, | |
| { | |
| "epoch": 3.1016949152542375, | |
| "grad_norm": 0.3979456347157598, | |
| "learning_rate": 2.7306820487327906e-05, | |
| "loss": 0.2543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09165980666875839, | |
| "step": 1830, | |
| "valid_targets_mean": 6352.9, | |
| "valid_targets_min": 3636 | |
| }, | |
| { | |
| "epoch": 3.110169491525424, | |
| "grad_norm": 0.41012860288675734, | |
| "learning_rate": 2.7228078420841277e-05, | |
| "loss": 0.2854, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15160031616687775, | |
| "step": 1835, | |
| "valid_targets_mean": 8778.5, | |
| "valid_targets_min": 4517 | |
| }, | |
| { | |
| "epoch": 3.1186440677966103, | |
| "grad_norm": 0.3676896512476812, | |
| "learning_rate": 2.714920726903892e-05, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12810014188289642, | |
| "step": 1840, | |
| "valid_targets_mean": 9615.2, | |
| "valid_targets_min": 2341 | |
| }, | |
| { | |
| "epoch": 3.1271186440677967, | |
| "grad_norm": 0.4376407960674012, | |
| "learning_rate": 2.7070208440470525e-05, | |
| "loss": 0.2995, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10664518177509308, | |
| "step": 1845, | |
| "valid_targets_mean": 6728.5, | |
| "valid_targets_min": 1635 | |
| }, | |
| { | |
| "epoch": 3.135593220338983, | |
| "grad_norm": 0.4408544143757849, | |
| "learning_rate": 2.699108334596592e-05, | |
| "loss": 0.2933, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13383883237838745, | |
| "step": 1850, | |
| "valid_targets_mean": 6220.6, | |
| "valid_targets_min": 1893 | |
| }, | |
| { | |
| "epoch": 3.1440677966101696, | |
| "grad_norm": 0.4171325307219931, | |
| "learning_rate": 2.6911833398609923e-05, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13895325362682343, | |
| "step": 1855, | |
| "valid_targets_mean": 7073.8, | |
| "valid_targets_min": 3127 | |
| }, | |
| { | |
| "epoch": 3.152542372881356, | |
| "grad_norm": 0.38458629105978887, | |
| "learning_rate": 2.683246001371706e-05, | |
| "loss": 0.2848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15742285549640656, | |
| "step": 1860, | |
| "valid_targets_mean": 8551.6, | |
| "valid_targets_min": 3754 | |
| }, | |
| { | |
| "epoch": 3.1610169491525424, | |
| "grad_norm": 0.43171473858147835, | |
| "learning_rate": 2.6752964608806338e-05, | |
| "loss": 0.2832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16160979866981506, | |
| "step": 1865, | |
| "valid_targets_mean": 8368.9, | |
| "valid_targets_min": 2778 | |
| }, | |
| { | |
| "epoch": 3.169491525423729, | |
| "grad_norm": 0.4050340176255496, | |
| "learning_rate": 2.6673348603575884e-05, | |
| "loss": 0.2756, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11083655059337616, | |
| "step": 1870, | |
| "valid_targets_mean": 7184.9, | |
| "valid_targets_min": 2213 | |
| }, | |
| { | |
| "epoch": 3.1779661016949152, | |
| "grad_norm": 0.387647020584462, | |
| "learning_rate": 2.6593613419877615e-05, | |
| "loss": 0.2906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.143922820687294, | |
| "step": 1875, | |
| "valid_targets_mean": 7900.6, | |
| "valid_targets_min": 3621 | |
| }, | |
| { | |
| "epoch": 3.1864406779661016, | |
| "grad_norm": 0.42761047108532324, | |
| "learning_rate": 2.6513760481691842e-05, | |
| "loss": 0.3078, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20920830965042114, | |
| "step": 1880, | |
| "valid_targets_mean": 10595.6, | |
| "valid_targets_min": 4433 | |
| }, | |
| { | |
| "epoch": 3.194915254237288, | |
| "grad_norm": 0.41992267855603493, | |
| "learning_rate": 2.6433791215101848e-05, | |
| "loss": 0.2867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16483736038208008, | |
| "step": 1885, | |
| "valid_targets_mean": 8851.0, | |
| "valid_targets_min": 2382 | |
| }, | |
| { | |
| "epoch": 3.2033898305084745, | |
| "grad_norm": 0.4957805375483976, | |
| "learning_rate": 2.6353707048268397e-05, | |
| "loss": 0.3328, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19370196759700775, | |
| "step": 1890, | |
| "valid_targets_mean": 6473.1, | |
| "valid_targets_min": 2295 | |
| }, | |
| { | |
| "epoch": 3.211864406779661, | |
| "grad_norm": 0.44788686707718267, | |
| "learning_rate": 2.6273509411404234e-05, | |
| "loss": 0.2979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1875634789466858, | |
| "step": 1895, | |
| "valid_targets_mean": 8210.6, | |
| "valid_targets_min": 2503 | |
| }, | |
| { | |
| "epoch": 3.2203389830508473, | |
| "grad_norm": 0.4152706213229902, | |
| "learning_rate": 2.6193199736748572e-05, | |
| "loss": 0.2844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1070324182510376, | |
| "step": 1900, | |
| "valid_targets_mean": 6541.2, | |
| "valid_targets_min": 3099 | |
| }, | |
| { | |
| "epoch": 3.2288135593220337, | |
| "grad_norm": 0.4205334323132011, | |
| "learning_rate": 2.611277945854148e-05, | |
| "loss": 0.3005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12432655692100525, | |
| "step": 1905, | |
| "valid_targets_mean": 7172.9, | |
| "valid_targets_min": 2347 | |
| }, | |
| { | |
| "epoch": 3.23728813559322, | |
| "grad_norm": 0.42085706446217025, | |
| "learning_rate": 2.603225001299829e-05, | |
| "loss": 0.2999, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1846511960029602, | |
| "step": 1910, | |
| "valid_targets_mean": 9515.6, | |
| "valid_targets_min": 2967 | |
| }, | |
| { | |
| "epoch": 3.2457627118644066, | |
| "grad_norm": 0.45800092129429243, | |
| "learning_rate": 2.595161283828392e-05, | |
| "loss": 0.3104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10589482635259628, | |
| "step": 1915, | |
| "valid_targets_mean": 5909.1, | |
| "valid_targets_min": 2628 | |
| }, | |
| { | |
| "epoch": 3.2542372881355934, | |
| "grad_norm": 0.4274729974353385, | |
| "learning_rate": 2.5870869374487227e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1856462061405182, | |
| "step": 1920, | |
| "valid_targets_mean": 8283.9, | |
| "valid_targets_min": 4400 | |
| }, | |
| { | |
| "epoch": 3.26271186440678, | |
| "grad_norm": 0.45054085287704343, | |
| "learning_rate": 2.579002106359527e-05, | |
| "loss": 0.2954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16199690103530884, | |
| "step": 1925, | |
| "valid_targets_mean": 6181.9, | |
| "valid_targets_min": 2319 | |
| }, | |
| { | |
| "epoch": 3.2711864406779663, | |
| "grad_norm": 0.40980194755456195, | |
| "learning_rate": 2.5709069349467562e-05, | |
| "loss": 0.3383, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13299697637557983, | |
| "step": 1930, | |
| "valid_targets_mean": 8032.4, | |
| "valid_targets_min": 2865 | |
| }, | |
| { | |
| "epoch": 3.2796610169491527, | |
| "grad_norm": 0.3829926624037664, | |
| "learning_rate": 2.562801567781026e-05, | |
| "loss": 0.2986, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10366646200418472, | |
| "step": 1935, | |
| "valid_targets_mean": 8115.5, | |
| "valid_targets_min": 3637 | |
| }, | |
| { | |
| "epoch": 3.288135593220339, | |
| "grad_norm": 0.42359013428814046, | |
| "learning_rate": 2.554686149615038e-05, | |
| "loss": 0.2566, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12953156232833862, | |
| "step": 1940, | |
| "valid_targets_mean": 8511.0, | |
| "valid_targets_min": 5177 | |
| }, | |
| { | |
| "epoch": 3.2966101694915255, | |
| "grad_norm": 0.39940930819276965, | |
| "learning_rate": 2.5465608253809944e-05, | |
| "loss": 0.2642, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10232742875814438, | |
| "step": 1945, | |
| "valid_targets_mean": 7172.9, | |
| "valid_targets_min": 1999 | |
| }, | |
| { | |
| "epoch": 3.305084745762712, | |
| "grad_norm": 0.43955208594397505, | |
| "learning_rate": 2.5384257401880077e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1324247121810913, | |
| "step": 1950, | |
| "valid_targets_mean": 7481.0, | |
| "valid_targets_min": 3559 | |
| }, | |
| { | |
| "epoch": 3.3135593220338984, | |
| "grad_norm": 0.4759603073565482, | |
| "learning_rate": 2.530281039319509e-05, | |
| "loss": 0.2531, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1068073958158493, | |
| "step": 1955, | |
| "valid_targets_mean": 5274.9, | |
| "valid_targets_min": 2989 | |
| }, | |
| { | |
| "epoch": 3.3220338983050848, | |
| "grad_norm": 0.39533664131007507, | |
| "learning_rate": 2.5221268682306565e-05, | |
| "loss": 0.2614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12157115340232849, | |
| "step": 1960, | |
| "valid_targets_mean": 8086.8, | |
| "valid_targets_min": 3023 | |
| }, | |
| { | |
| "epoch": 3.330508474576271, | |
| "grad_norm": 0.4025452212806949, | |
| "learning_rate": 2.5139633725457345e-05, | |
| "loss": 0.3042, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1479698270559311, | |
| "step": 1965, | |
| "valid_targets_mean": 8655.5, | |
| "valid_targets_min": 3906 | |
| }, | |
| { | |
| "epoch": 3.3389830508474576, | |
| "grad_norm": 0.42521063572004575, | |
| "learning_rate": 2.505790698055554e-05, | |
| "loss": 0.2786, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11045683920383453, | |
| "step": 1970, | |
| "valid_targets_mean": 7279.1, | |
| "valid_targets_min": 5657 | |
| }, | |
| { | |
| "epoch": 3.347457627118644, | |
| "grad_norm": 0.48755661717841947, | |
| "learning_rate": 2.49760899071485e-05, | |
| "loss": 0.2911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17295318841934204, | |
| "step": 1975, | |
| "valid_targets_mean": 7348.9, | |
| "valid_targets_min": 2427 | |
| }, | |
| { | |
| "epoch": 3.3559322033898304, | |
| "grad_norm": 0.4254688178029983, | |
| "learning_rate": 2.4894183966396735e-05, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09378187358379364, | |
| "step": 1980, | |
| "valid_targets_mean": 5475.5, | |
| "valid_targets_min": 2778 | |
| }, | |
| { | |
| "epoch": 3.364406779661017, | |
| "grad_norm": 0.367434521784498, | |
| "learning_rate": 2.4812190621047822e-05, | |
| "loss": 0.294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10478046536445618, | |
| "step": 1985, | |
| "valid_targets_mean": 8775.1, | |
| "valid_targets_min": 4384 | |
| }, | |
| { | |
| "epoch": 3.3728813559322033, | |
| "grad_norm": 0.4189507633838683, | |
| "learning_rate": 2.4730111335410298e-05, | |
| "loss": 0.304, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17582011222839355, | |
| "step": 1990, | |
| "valid_targets_mean": 7836.2, | |
| "valid_targets_min": 1645 | |
| }, | |
| { | |
| "epoch": 3.3813559322033897, | |
| "grad_norm": 0.400233301929351, | |
| "learning_rate": 2.4647947575327468e-05, | |
| "loss": 0.2775, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09607855975627899, | |
| "step": 1995, | |
| "valid_targets_mean": 5813.9, | |
| "valid_targets_min": 3494 | |
| }, | |
| { | |
| "epoch": 3.389830508474576, | |
| "grad_norm": 0.4336791016592384, | |
| "learning_rate": 2.4565700808151297e-05, | |
| "loss": 0.2513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10696402192115784, | |
| "step": 2000, | |
| "valid_targets_mean": 6734.2, | |
| "valid_targets_min": 3928 | |
| }, | |
| { | |
| "epoch": 3.3983050847457625, | |
| "grad_norm": 0.41428448011026164, | |
| "learning_rate": 2.4483372502716124e-05, | |
| "loss": 0.2799, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0847843810915947, | |
| "step": 2005, | |
| "valid_targets_mean": 5492.8, | |
| "valid_targets_min": 3844 | |
| }, | |
| { | |
| "epoch": 3.406779661016949, | |
| "grad_norm": 0.40964037100046546, | |
| "learning_rate": 2.44009641293125e-05, | |
| "loss": 0.2883, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14137449860572815, | |
| "step": 2010, | |
| "valid_targets_mean": 7062.4, | |
| "valid_targets_min": 2779 | |
| }, | |
| { | |
| "epoch": 3.415254237288136, | |
| "grad_norm": 0.38682559049197435, | |
| "learning_rate": 2.431847715966087e-05, | |
| "loss": 0.2797, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0912858098745346, | |
| "step": 2015, | |
| "valid_targets_mean": 6138.2, | |
| "valid_targets_min": 2715 | |
| }, | |
| { | |
| "epoch": 3.423728813559322, | |
| "grad_norm": 0.48692004450222665, | |
| "learning_rate": 2.423591306688534e-05, | |
| "loss": 0.2708, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11212123930454254, | |
| "step": 2020, | |
| "valid_targets_mean": 6439.9, | |
| "valid_targets_min": 2940 | |
| }, | |
| { | |
| "epoch": 3.4322033898305087, | |
| "grad_norm": 0.5255657570421769, | |
| "learning_rate": 2.4153273325487346e-05, | |
| "loss": 0.279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1764291524887085, | |
| "step": 2025, | |
| "valid_targets_mean": 8640.4, | |
| "valid_targets_min": 2608 | |
| }, | |
| { | |
| "epoch": 3.440677966101695, | |
| "grad_norm": 0.4266344445319651, | |
| "learning_rate": 2.407055941131932e-05, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18718381226062775, | |
| "step": 2030, | |
| "valid_targets_mean": 7492.9, | |
| "valid_targets_min": 2960 | |
| }, | |
| { | |
| "epoch": 3.4491525423728815, | |
| "grad_norm": 0.4211828179123291, | |
| "learning_rate": 2.3987772801558328e-05, | |
| "loss": 0.2858, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21861864626407623, | |
| "step": 2035, | |
| "valid_targets_mean": 8366.5, | |
| "valid_targets_min": 3066 | |
| }, | |
| { | |
| "epoch": 3.457627118644068, | |
| "grad_norm": 0.4160233489369109, | |
| "learning_rate": 2.3904914974679705e-05, | |
| "loss": 0.2831, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10223515331745148, | |
| "step": 2040, | |
| "valid_targets_mean": 6495.2, | |
| "valid_targets_min": 3637 | |
| }, | |
| { | |
| "epoch": 3.4661016949152543, | |
| "grad_norm": 0.40882160997249284, | |
| "learning_rate": 2.3821987410430646e-05, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12576085329055786, | |
| "step": 2045, | |
| "valid_targets_mean": 7358.2, | |
| "valid_targets_min": 2607 | |
| }, | |
| { | |
| "epoch": 3.4745762711864407, | |
| "grad_norm": 0.36672520626623, | |
| "learning_rate": 2.3738991589803783e-05, | |
| "loss": 0.2965, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20486734807491302, | |
| "step": 2050, | |
| "valid_targets_mean": 12180.8, | |
| "valid_targets_min": 7787 | |
| }, | |
| { | |
| "epoch": 3.483050847457627, | |
| "grad_norm": 0.3853682329233998, | |
| "learning_rate": 2.36559289950107e-05, | |
| "loss": 0.2794, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18601134419441223, | |
| "step": 2055, | |
| "valid_targets_mean": 9524.1, | |
| "valid_targets_min": 3073 | |
| }, | |
| { | |
| "epoch": 3.4915254237288136, | |
| "grad_norm": 0.3801472492117076, | |
| "learning_rate": 2.357280110945552e-05, | |
| "loss": 0.2966, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15141558647155762, | |
| "step": 2060, | |
| "valid_targets_mean": 9554.1, | |
| "valid_targets_min": 4884 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.38281552006385716, | |
| "learning_rate": 2.3489609417708383e-05, | |
| "loss": 0.3091, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07897169888019562, | |
| "step": 2065, | |
| "valid_targets_mean": 6056.4, | |
| "valid_targets_min": 2487 | |
| }, | |
| { | |
| "epoch": 3.5084745762711864, | |
| "grad_norm": 0.4084426268312189, | |
| "learning_rate": 2.340635540547892e-05, | |
| "loss": 0.2594, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09317970275878906, | |
| "step": 2070, | |
| "valid_targets_mean": 6611.2, | |
| "valid_targets_min": 2457 | |
| }, | |
| { | |
| "epoch": 3.516949152542373, | |
| "grad_norm": 0.43668463366477434, | |
| "learning_rate": 2.3323040559589755e-05, | |
| "loss": 0.2841, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1499471664428711, | |
| "step": 2075, | |
| "valid_targets_mean": 7579.8, | |
| "valid_targets_min": 2966 | |
| }, | |
| { | |
| "epoch": 3.5254237288135593, | |
| "grad_norm": 0.3983631104396672, | |
| "learning_rate": 2.3239666367949917e-05, | |
| "loss": 0.2827, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1473519206047058, | |
| "step": 2080, | |
| "valid_targets_mean": 7799.1, | |
| "valid_targets_min": 3621 | |
| }, | |
| { | |
| "epoch": 3.5338983050847457, | |
| "grad_norm": 0.4120452974947437, | |
| "learning_rate": 2.3156234319528295e-05, | |
| "loss": 0.2647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0906115174293518, | |
| "step": 2085, | |
| "valid_targets_mean": 6207.0, | |
| "valid_targets_min": 2847 | |
| }, | |
| { | |
| "epoch": 3.542372881355932, | |
| "grad_norm": 0.44363425545735335, | |
| "learning_rate": 2.3072745904327015e-05, | |
| "loss": 0.2936, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09172677993774414, | |
| "step": 2090, | |
| "valid_targets_mean": 6414.6, | |
| "valid_targets_min": 5234 | |
| }, | |
| { | |
| "epoch": 3.5508474576271185, | |
| "grad_norm": 0.38869052034458823, | |
| "learning_rate": 2.298920261335488e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09527749568223953, | |
| "step": 2095, | |
| "valid_targets_mean": 6330.2, | |
| "valid_targets_min": 2577 | |
| }, | |
| { | |
| "epoch": 3.559322033898305, | |
| "grad_norm": 0.36643178759344713, | |
| "learning_rate": 2.2905605938600685e-05, | |
| "loss": 0.2891, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20668168365955353, | |
| "step": 2100, | |
| "valid_targets_mean": 12464.9, | |
| "valid_targets_min": 6539 | |
| }, | |
| { | |
| "epoch": 3.5677966101694913, | |
| "grad_norm": 0.3959504607284564, | |
| "learning_rate": 2.2821957373006617e-05, | |
| "loss": 0.262, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18114525079727173, | |
| "step": 2105, | |
| "valid_targets_mean": 10751.5, | |
| "valid_targets_min": 6678 | |
| }, | |
| { | |
| "epoch": 3.576271186440678, | |
| "grad_norm": 0.41062663468779714, | |
| "learning_rate": 2.273825841044156e-05, | |
| "loss": 0.2941, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15731394290924072, | |
| "step": 2110, | |
| "valid_targets_mean": 8485.2, | |
| "valid_targets_min": 2609 | |
| }, | |
| { | |
| "epoch": 3.584745762711864, | |
| "grad_norm": 0.374346290872505, | |
| "learning_rate": 2.2654510545674442e-05, | |
| "loss": 0.2772, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1397765576839447, | |
| "step": 2115, | |
| "valid_targets_mean": 8244.4, | |
| "valid_targets_min": 3700 | |
| }, | |
| { | |
| "epoch": 3.593220338983051, | |
| "grad_norm": 0.39875361896903927, | |
| "learning_rate": 2.257071527434753e-05, | |
| "loss": 0.2967, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14811114966869354, | |
| "step": 2120, | |
| "valid_targets_mean": 7846.0, | |
| "valid_targets_min": 3187 | |
| }, | |
| { | |
| "epoch": 3.601694915254237, | |
| "grad_norm": 0.39347828360733145, | |
| "learning_rate": 2.2486874092949708e-05, | |
| "loss": 0.2814, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12575045228004456, | |
| "step": 2125, | |
| "valid_targets_mean": 8018.4, | |
| "valid_targets_min": 3985 | |
| }, | |
| { | |
| "epoch": 3.610169491525424, | |
| "grad_norm": 0.4164173242177716, | |
| "learning_rate": 2.2402988498789767e-05, | |
| "loss": 0.2911, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10205131769180298, | |
| "step": 2130, | |
| "valid_targets_mean": 7143.9, | |
| "valid_targets_min": 2676 | |
| }, | |
| { | |
| "epoch": 3.6186440677966103, | |
| "grad_norm": 0.4125444447963192, | |
| "learning_rate": 2.2319059989969668e-05, | |
| "loss": 0.2925, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16392400860786438, | |
| "step": 2135, | |
| "valid_targets_mean": 7902.1, | |
| "valid_targets_min": 3675 | |
| }, | |
| { | |
| "epoch": 3.6271186440677967, | |
| "grad_norm": 0.40524649599732243, | |
| "learning_rate": 2.2235090065357773e-05, | |
| "loss": 0.2945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18411213159561157, | |
| "step": 2140, | |
| "valid_targets_mean": 8892.9, | |
| "valid_targets_min": 4234 | |
| }, | |
| { | |
| "epoch": 3.635593220338983, | |
| "grad_norm": 0.4088874114510119, | |
| "learning_rate": 2.215108022456208e-05, | |
| "loss": 0.3024, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14849057793617249, | |
| "step": 2145, | |
| "valid_targets_mean": 8279.2, | |
| "valid_targets_min": 4495 | |
| }, | |
| { | |
| "epoch": 3.6440677966101696, | |
| "grad_norm": 0.4831151083725686, | |
| "learning_rate": 2.2067031967903443e-05, | |
| "loss": 0.2542, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13207237422466278, | |
| "step": 2150, | |
| "valid_targets_mean": 9388.2, | |
| "valid_targets_min": 4290 | |
| }, | |
| { | |
| "epoch": 3.652542372881356, | |
| "grad_norm": 0.3733280663746573, | |
| "learning_rate": 2.1982946796388788e-05, | |
| "loss": 0.3061, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17002075910568237, | |
| "step": 2155, | |
| "valid_targets_mean": 11468.6, | |
| "valid_targets_min": 4610 | |
| }, | |
| { | |
| "epoch": 3.6610169491525424, | |
| "grad_norm": 0.4173341617806821, | |
| "learning_rate": 2.1898826211684297e-05, | |
| "loss": 0.2735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1557663381099701, | |
| "step": 2160, | |
| "valid_targets_mean": 8174.9, | |
| "valid_targets_min": 3753 | |
| }, | |
| { | |
| "epoch": 3.669491525423729, | |
| "grad_norm": 0.4186670500997302, | |
| "learning_rate": 2.1814671716088593e-05, | |
| "loss": 0.3066, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08113788068294525, | |
| "step": 2165, | |
| "valid_targets_mean": 6345.8, | |
| "valid_targets_min": 2595 | |
| }, | |
| { | |
| "epoch": 3.6779661016949152, | |
| "grad_norm": 0.4101685556228967, | |
| "learning_rate": 2.17304848125059e-05, | |
| "loss": 0.2923, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16531658172607422, | |
| "step": 2170, | |
| "valid_targets_mean": 8883.8, | |
| "valid_targets_min": 4246 | |
| }, | |
| { | |
| "epoch": 3.6864406779661016, | |
| "grad_norm": 0.41763151825852196, | |
| "learning_rate": 2.1646267004419218e-05, | |
| "loss": 0.2746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1397768259048462, | |
| "step": 2175, | |
| "valid_targets_mean": 7207.2, | |
| "valid_targets_min": 2605 | |
| }, | |
| { | |
| "epoch": 3.694915254237288, | |
| "grad_norm": 0.440242674224947, | |
| "learning_rate": 2.1562019795863463e-05, | |
| "loss": 0.2954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22693797945976257, | |
| "step": 2180, | |
| "valid_targets_mean": 9268.8, | |
| "valid_targets_min": 4187 | |
| }, | |
| { | |
| "epoch": 3.7033898305084745, | |
| "grad_norm": 0.44197333796797894, | |
| "learning_rate": 2.147774469139863e-05, | |
| "loss": 0.2516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10422711074352264, | |
| "step": 2185, | |
| "valid_targets_mean": 6519.4, | |
| "valid_targets_min": 3589 | |
| }, | |
| { | |
| "epoch": 3.711864406779661, | |
| "grad_norm": 0.40539121412831586, | |
| "learning_rate": 2.1393443196082867e-05, | |
| "loss": 0.283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2202359437942505, | |
| "step": 2190, | |
| "valid_targets_mean": 11366.1, | |
| "valid_targets_min": 4141 | |
| }, | |
| { | |
| "epoch": 3.7203389830508473, | |
| "grad_norm": 0.430547381803079, | |
| "learning_rate": 2.1309116815445665e-05, | |
| "loss": 0.2757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09388360381126404, | |
| "step": 2195, | |
| "valid_targets_mean": 5664.4, | |
| "valid_targets_min": 3034 | |
| }, | |
| { | |
| "epoch": 3.7288135593220337, | |
| "grad_norm": 0.42608356334048153, | |
| "learning_rate": 2.1224767055460922e-05, | |
| "loss": 0.2968, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10148052871227264, | |
| "step": 2200, | |
| "valid_targets_mean": 6436.9, | |
| "valid_targets_min": 3169 | |
| }, | |
| { | |
| "epoch": 3.7372881355932206, | |
| "grad_norm": 0.4210448690634892, | |
| "learning_rate": 2.114039542252008e-05, | |
| "loss": 0.2988, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1500019133090973, | |
| "step": 2205, | |
| "valid_targets_mean": 7466.1, | |
| "valid_targets_min": 3197 | |
| }, | |
| { | |
| "epoch": 3.7457627118644066, | |
| "grad_norm": 0.8119317714900413, | |
| "learning_rate": 2.1056003423405178e-05, | |
| "loss": 0.3019, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22710689902305603, | |
| "step": 2210, | |
| "valid_targets_mean": 8778.1, | |
| "valid_targets_min": 2972 | |
| }, | |
| { | |
| "epoch": 3.7542372881355934, | |
| "grad_norm": 0.4821378308753357, | |
| "learning_rate": 2.0971592565262005e-05, | |
| "loss": 0.2924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19904181361198425, | |
| "step": 2215, | |
| "valid_targets_mean": 10680.4, | |
| "valid_targets_min": 6006 | |
| }, | |
| { | |
| "epoch": 3.7627118644067794, | |
| "grad_norm": 0.4372433360996204, | |
| "learning_rate": 2.088716435557313e-05, | |
| "loss": 0.2795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10062790662050247, | |
| "step": 2220, | |
| "valid_targets_mean": 7107.2, | |
| "valid_targets_min": 3544 | |
| }, | |
| { | |
| "epoch": 3.7711864406779663, | |
| "grad_norm": 0.37024338787318023, | |
| "learning_rate": 2.0802720302131016e-05, | |
| "loss": 0.3064, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1332758069038391, | |
| "step": 2225, | |
| "valid_targets_mean": 8794.9, | |
| "valid_targets_min": 1389 | |
| }, | |
| { | |
| "epoch": 3.7796610169491527, | |
| "grad_norm": 0.41076254617672314, | |
| "learning_rate": 2.0718261913011055e-05, | |
| "loss": 0.2873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15456616878509521, | |
| "step": 2230, | |
| "valid_targets_mean": 7351.8, | |
| "valid_targets_min": 3400 | |
| }, | |
| { | |
| "epoch": 3.788135593220339, | |
| "grad_norm": 0.42262471917169897, | |
| "learning_rate": 2.063379069654468e-05, | |
| "loss": 0.2419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09343097358942032, | |
| "step": 2235, | |
| "valid_targets_mean": 5882.2, | |
| "valid_targets_min": 1584 | |
| }, | |
| { | |
| "epoch": 3.7966101694915255, | |
| "grad_norm": 0.4069130983703195, | |
| "learning_rate": 2.0549308161292405e-05, | |
| "loss": 0.3008, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1469895988702774, | |
| "step": 2240, | |
| "valid_targets_mean": 7706.8, | |
| "valid_targets_min": 2311 | |
| }, | |
| { | |
| "epoch": 3.805084745762712, | |
| "grad_norm": 0.4321629041786689, | |
| "learning_rate": 2.0464815816016866e-05, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11798998713493347, | |
| "step": 2245, | |
| "valid_targets_mean": 6210.6, | |
| "valid_targets_min": 3443 | |
| }, | |
| { | |
| "epoch": 3.8135593220338984, | |
| "grad_norm": 0.3846555018208555, | |
| "learning_rate": 2.0380315169655905e-05, | |
| "loss": 0.3082, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15529337525367737, | |
| "step": 2250, | |
| "valid_targets_mean": 8531.0, | |
| "valid_targets_min": 3768 | |
| }, | |
| { | |
| "epoch": 3.8220338983050848, | |
| "grad_norm": 0.43425368654131496, | |
| "learning_rate": 2.0295807731295628e-05, | |
| "loss": 0.2841, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16532611846923828, | |
| "step": 2255, | |
| "valid_targets_mean": 10186.9, | |
| "valid_targets_min": 5887 | |
| }, | |
| { | |
| "epoch": 3.830508474576271, | |
| "grad_norm": 0.4060353944725796, | |
| "learning_rate": 2.021129501014342e-05, | |
| "loss": 0.2618, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20302706956863403, | |
| "step": 2260, | |
| "valid_targets_mean": 9178.1, | |
| "valid_targets_min": 3145 | |
| }, | |
| { | |
| "epoch": 3.8389830508474576, | |
| "grad_norm": 0.4419753942797279, | |
| "learning_rate": 2.0126778515501e-05, | |
| "loss": 0.2847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1332482248544693, | |
| "step": 2265, | |
| "valid_targets_mean": 6810.1, | |
| "valid_targets_min": 2611 | |
| }, | |
| { | |
| "epoch": 3.847457627118644, | |
| "grad_norm": 0.3860661265841699, | |
| "learning_rate": 2.0042259756737508e-05, | |
| "loss": 0.3056, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12230737507343292, | |
| "step": 2270, | |
| "valid_targets_mean": 7196.2, | |
| "valid_targets_min": 2703 | |
| }, | |
| { | |
| "epoch": 3.8559322033898304, | |
| "grad_norm": 0.4232795474780064, | |
| "learning_rate": 1.99577402432625e-05, | |
| "loss": 0.2909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14375734329223633, | |
| "step": 2275, | |
| "valid_targets_mean": 7267.1, | |
| "valid_targets_min": 3181 | |
| }, | |
| { | |
| "epoch": 3.864406779661017, | |
| "grad_norm": 0.4224770166929963, | |
| "learning_rate": 1.9873221484499006e-05, | |
| "loss": 0.2665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17108933627605438, | |
| "step": 2280, | |
| "valid_targets_mean": 8647.1, | |
| "valid_targets_min": 2676 | |
| }, | |
| { | |
| "epoch": 3.8728813559322033, | |
| "grad_norm": 0.38764218963486163, | |
| "learning_rate": 1.978870498985659e-05, | |
| "loss": 0.2931, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1058858335018158, | |
| "step": 2285, | |
| "valid_targets_mean": 6942.6, | |
| "valid_targets_min": 2277 | |
| }, | |
| { | |
| "epoch": 3.8813559322033897, | |
| "grad_norm": 0.4395517223806546, | |
| "learning_rate": 1.9704192268704372e-05, | |
| "loss": 0.2693, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10128942131996155, | |
| "step": 2290, | |
| "valid_targets_mean": 5997.0, | |
| "valid_targets_min": 1707 | |
| }, | |
| { | |
| "epoch": 3.889830508474576, | |
| "grad_norm": 0.37259316195160275, | |
| "learning_rate": 1.96196848303441e-05, | |
| "loss": 0.2855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18444445729255676, | |
| "step": 2295, | |
| "valid_targets_mean": 9367.5, | |
| "valid_targets_min": 4451 | |
| }, | |
| { | |
| "epoch": 3.898305084745763, | |
| "grad_norm": 0.44009690363461895, | |
| "learning_rate": 1.953518418398314e-05, | |
| "loss": 0.2737, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14901164174079895, | |
| "step": 2300, | |
| "valid_targets_mean": 6996.6, | |
| "valid_targets_min": 2612 | |
| }, | |
| { | |
| "epoch": 3.906779661016949, | |
| "grad_norm": 0.4064339811743784, | |
| "learning_rate": 1.94506918387076e-05, | |
| "loss": 0.2818, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15096023678779602, | |
| "step": 2305, | |
| "valid_targets_mean": 8826.8, | |
| "valid_targets_min": 3590 | |
| }, | |
| { | |
| "epoch": 3.915254237288136, | |
| "grad_norm": 0.4395725069154411, | |
| "learning_rate": 1.9366209303455322e-05, | |
| "loss": 0.2886, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14768223464488983, | |
| "step": 2310, | |
| "valid_targets_mean": 8623.0, | |
| "valid_targets_min": 5117 | |
| }, | |
| { | |
| "epoch": 3.923728813559322, | |
| "grad_norm": 0.3938918935844931, | |
| "learning_rate": 1.928173808698895e-05, | |
| "loss": 0.2872, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08866434544324875, | |
| "step": 2315, | |
| "valid_targets_mean": 6831.0, | |
| "valid_targets_min": 4091 | |
| }, | |
| { | |
| "epoch": 3.9322033898305087, | |
| "grad_norm": 0.38259904582950616, | |
| "learning_rate": 1.919727969786899e-05, | |
| "loss": 0.283, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14592121541500092, | |
| "step": 2320, | |
| "valid_targets_mean": 8793.0, | |
| "valid_targets_min": 2865 | |
| }, | |
| { | |
| "epoch": 3.940677966101695, | |
| "grad_norm": 0.3874042376620344, | |
| "learning_rate": 1.911283564442687e-05, | |
| "loss": 0.2718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15052124857902527, | |
| "step": 2325, | |
| "valid_targets_mean": 8260.5, | |
| "valid_targets_min": 2791 | |
| }, | |
| { | |
| "epoch": 3.9491525423728815, | |
| "grad_norm": 0.48083269380960864, | |
| "learning_rate": 1.9028407434737998e-05, | |
| "loss": 0.2774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15742509067058563, | |
| "step": 2330, | |
| "valid_targets_mean": 6578.2, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 3.957627118644068, | |
| "grad_norm": 0.3922858402598884, | |
| "learning_rate": 1.894399657659483e-05, | |
| "loss": 0.2829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1299702376127243, | |
| "step": 2335, | |
| "valid_targets_mean": 8018.9, | |
| "valid_targets_min": 4466 | |
| }, | |
| { | |
| "epoch": 3.9661016949152543, | |
| "grad_norm": 0.4193645141156418, | |
| "learning_rate": 1.8859604577479927e-05, | |
| "loss": 0.2677, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1529259979724884, | |
| "step": 2340, | |
| "valid_targets_mean": 7416.6, | |
| "valid_targets_min": 2801 | |
| }, | |
| { | |
| "epoch": 3.9745762711864407, | |
| "grad_norm": 0.4485830985098884, | |
| "learning_rate": 1.877523294453908e-05, | |
| "loss": 0.2741, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11698532849550247, | |
| "step": 2345, | |
| "valid_targets_mean": 5913.9, | |
| "valid_targets_min": 2271 | |
| }, | |
| { | |
| "epoch": 3.983050847457627, | |
| "grad_norm": 0.39041637106352106, | |
| "learning_rate": 1.869088318455434e-05, | |
| "loss": 0.2821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12944526970386505, | |
| "step": 2350, | |
| "valid_targets_mean": 7937.2, | |
| "valid_targets_min": 4038 | |
| }, | |
| { | |
| "epoch": 3.9915254237288136, | |
| "grad_norm": 0.4206277849161821, | |
| "learning_rate": 1.8606556803917136e-05, | |
| "loss": 0.233, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12252271175384521, | |
| "step": 2355, | |
| "valid_targets_mean": 7135.5, | |
| "valid_targets_min": 2557 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.42395282370877047, | |
| "learning_rate": 1.8522255308601378e-05, | |
| "loss": 0.2707, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14860565960407257, | |
| "step": 2360, | |
| "valid_targets_mean": 6954.8, | |
| "valid_targets_min": 1915 | |
| }, | |
| { | |
| "epoch": 4.008474576271187, | |
| "grad_norm": 0.4394981579743167, | |
| "learning_rate": 1.8437980204136537e-05, | |
| "loss": 0.2836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1371266394853592, | |
| "step": 2365, | |
| "valid_targets_mean": 6881.1, | |
| "valid_targets_min": 3617 | |
| }, | |
| { | |
| "epoch": 4.016949152542373, | |
| "grad_norm": 0.39865371969450675, | |
| "learning_rate": 1.8353732995580786e-05, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1319272816181183, | |
| "step": 2370, | |
| "valid_targets_mean": 7666.8, | |
| "valid_targets_min": 3523 | |
| }, | |
| { | |
| "epoch": 4.02542372881356, | |
| "grad_norm": 0.4335576658492927, | |
| "learning_rate": 1.826951518749411e-05, | |
| "loss": 0.279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15865981578826904, | |
| "step": 2375, | |
| "valid_targets_mean": 9701.0, | |
| "valid_targets_min": 4588 | |
| }, | |
| { | |
| "epoch": 4.033898305084746, | |
| "grad_norm": 0.37429007480712123, | |
| "learning_rate": 1.8185328283911417e-05, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11880265921354294, | |
| "step": 2380, | |
| "valid_targets_mean": 8349.1, | |
| "valid_targets_min": 3445 | |
| }, | |
| { | |
| "epoch": 4.0423728813559325, | |
| "grad_norm": 0.4653775490052777, | |
| "learning_rate": 1.8101173788315707e-05, | |
| "loss": 0.2996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17142072319984436, | |
| "step": 2385, | |
| "valid_targets_mean": 8055.1, | |
| "valid_targets_min": 2989 | |
| }, | |
| { | |
| "epoch": 4.0508474576271185, | |
| "grad_norm": 0.38759263121062476, | |
| "learning_rate": 1.8017053203611215e-05, | |
| "loss": 0.2565, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13516907393932343, | |
| "step": 2390, | |
| "valid_targets_mean": 8046.6, | |
| "valid_targets_min": 3542 | |
| }, | |
| { | |
| "epoch": 4.059322033898305, | |
| "grad_norm": 0.40174040501426045, | |
| "learning_rate": 1.7932968032096564e-05, | |
| "loss": 0.2761, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07665864378213882, | |
| "step": 2395, | |
| "valid_targets_mean": 5400.4, | |
| "valid_targets_min": 2670 | |
| }, | |
| { | |
| "epoch": 4.067796610169491, | |
| "grad_norm": 0.40431475588899, | |
| "learning_rate": 1.7848919775437924e-05, | |
| "loss": 0.2665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11547114700078964, | |
| "step": 2400, | |
| "valid_targets_mean": 7075.5, | |
| "valid_targets_min": 2911 | |
| }, | |
| { | |
| "epoch": 4.076271186440678, | |
| "grad_norm": 0.41851164508496586, | |
| "learning_rate": 1.776490993464223e-05, | |
| "loss": 0.2873, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13930678367614746, | |
| "step": 2405, | |
| "valid_targets_mean": 8866.9, | |
| "valid_targets_min": 4799 | |
| }, | |
| { | |
| "epoch": 4.084745762711864, | |
| "grad_norm": 0.42772766629398995, | |
| "learning_rate": 1.768094001003033e-05, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12916797399520874, | |
| "step": 2410, | |
| "valid_targets_mean": 7858.2, | |
| "valid_targets_min": 4104 | |
| }, | |
| { | |
| "epoch": 4.093220338983051, | |
| "grad_norm": 0.42053509359988245, | |
| "learning_rate": 1.759701150121024e-05, | |
| "loss": 0.2457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12978839874267578, | |
| "step": 2415, | |
| "valid_targets_mean": 7875.4, | |
| "valid_targets_min": 2319 | |
| }, | |
| { | |
| "epoch": 4.101694915254237, | |
| "grad_norm": 0.5009497034489835, | |
| "learning_rate": 1.7513125907050302e-05, | |
| "loss": 0.2897, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22473761439323425, | |
| "step": 2420, | |
| "valid_targets_mean": 6372.8, | |
| "valid_targets_min": 2271 | |
| }, | |
| { | |
| "epoch": 4.110169491525424, | |
| "grad_norm": 0.43870007930354155, | |
| "learning_rate": 1.742928472565248e-05, | |
| "loss": 0.27, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09245385229587555, | |
| "step": 2425, | |
| "valid_targets_mean": 5360.8, | |
| "valid_targets_min": 2382 | |
| }, | |
| { | |
| "epoch": 4.11864406779661, | |
| "grad_norm": 0.44775043625125593, | |
| "learning_rate": 1.7345489454325564e-05, | |
| "loss": 0.2703, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0709337666630745, | |
| "step": 2430, | |
| "valid_targets_mean": 5598.9, | |
| "valid_targets_min": 2645 | |
| }, | |
| { | |
| "epoch": 4.127118644067797, | |
| "grad_norm": 0.39217639662993736, | |
| "learning_rate": 1.7261741589558448e-05, | |
| "loss": 0.288, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0978158563375473, | |
| "step": 2435, | |
| "valid_targets_mean": 7005.9, | |
| "valid_targets_min": 3151 | |
| }, | |
| { | |
| "epoch": 4.135593220338983, | |
| "grad_norm": 0.4552504409470467, | |
| "learning_rate": 1.717804262699339e-05, | |
| "loss": 0.2712, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1586516797542572, | |
| "step": 2440, | |
| "valid_targets_mean": 8164.1, | |
| "valid_targets_min": 4684 | |
| }, | |
| { | |
| "epoch": 4.1440677966101696, | |
| "grad_norm": 0.6110641883530884, | |
| "learning_rate": 1.7094394061399318e-05, | |
| "loss": 0.2571, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11182674765586853, | |
| "step": 2445, | |
| "valid_targets_mean": 6517.5, | |
| "valid_targets_min": 3535 | |
| }, | |
| { | |
| "epoch": 4.1525423728813555, | |
| "grad_norm": 0.4189713731531144, | |
| "learning_rate": 1.701079738664512e-05, | |
| "loss": 0.2609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1106354147195816, | |
| "step": 2450, | |
| "valid_targets_mean": 7306.5, | |
| "valid_targets_min": 2984 | |
| }, | |
| { | |
| "epoch": 4.161016949152542, | |
| "grad_norm": 0.4353514390157063, | |
| "learning_rate": 1.6927254095672992e-05, | |
| "loss": 0.2641, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10617899894714355, | |
| "step": 2455, | |
| "valid_targets_mean": 5812.9, | |
| "valid_targets_min": 2453 | |
| }, | |
| { | |
| "epoch": 4.169491525423728, | |
| "grad_norm": 0.4219809852768852, | |
| "learning_rate": 1.6843765680471715e-05, | |
| "loss": 0.2742, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11135134100914001, | |
| "step": 2460, | |
| "valid_targets_mean": 7021.0, | |
| "valid_targets_min": 3043 | |
| }, | |
| { | |
| "epoch": 4.177966101694915, | |
| "grad_norm": 0.5654406681363515, | |
| "learning_rate": 1.6760333632050086e-05, | |
| "loss": 0.2835, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11446435004472733, | |
| "step": 2465, | |
| "valid_targets_mean": 6612.6, | |
| "valid_targets_min": 3022 | |
| }, | |
| { | |
| "epoch": 4.186440677966102, | |
| "grad_norm": 0.47653784967954316, | |
| "learning_rate": 1.667695944041025e-05, | |
| "loss": 0.2723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11347555369138718, | |
| "step": 2470, | |
| "valid_targets_mean": 7101.2, | |
| "valid_targets_min": 2948 | |
| }, | |
| { | |
| "epoch": 4.194915254237288, | |
| "grad_norm": 0.4662821933614443, | |
| "learning_rate": 1.6593644594521082e-05, | |
| "loss": 0.2996, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13076099753379822, | |
| "step": 2475, | |
| "valid_targets_mean": 7465.9, | |
| "valid_targets_min": 2341 | |
| }, | |
| { | |
| "epoch": 4.203389830508475, | |
| "grad_norm": 0.4080427666049444, | |
| "learning_rate": 1.6510390582291624e-05, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18516531586647034, | |
| "step": 2480, | |
| "valid_targets_mean": 9916.2, | |
| "valid_targets_min": 5024 | |
| }, | |
| { | |
| "epoch": 4.211864406779661, | |
| "grad_norm": 0.42008095154266495, | |
| "learning_rate": 1.6427198890544483e-05, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14330461621284485, | |
| "step": 2485, | |
| "valid_targets_mean": 8390.0, | |
| "valid_targets_min": 2523 | |
| }, | |
| { | |
| "epoch": 4.220338983050848, | |
| "grad_norm": 0.41276959099838073, | |
| "learning_rate": 1.634407100498931e-05, | |
| "loss": 0.264, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11455489695072174, | |
| "step": 2490, | |
| "valid_targets_mean": 7962.2, | |
| "valid_targets_min": 4119 | |
| }, | |
| { | |
| "epoch": 4.228813559322034, | |
| "grad_norm": 0.43611233307799147, | |
| "learning_rate": 1.626100841019623e-05, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11222334951162338, | |
| "step": 2495, | |
| "valid_targets_mean": 6606.1, | |
| "valid_targets_min": 3702 | |
| }, | |
| { | |
| "epoch": 4.237288135593221, | |
| "grad_norm": 0.5784961476511713, | |
| "learning_rate": 1.6178012589569358e-05, | |
| "loss": 0.2881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20426484942436218, | |
| "step": 2500, | |
| "valid_targets_mean": 9576.0, | |
| "valid_targets_min": 4395 | |
| }, | |
| { | |
| "epoch": 4.245762711864407, | |
| "grad_norm": 0.41472930754303877, | |
| "learning_rate": 1.60950850253203e-05, | |
| "loss": 0.2774, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18941691517829895, | |
| "step": 2505, | |
| "valid_targets_mean": 9656.6, | |
| "valid_targets_min": 4846 | |
| }, | |
| { | |
| "epoch": 4.254237288135593, | |
| "grad_norm": 0.39746772521027784, | |
| "learning_rate": 1.601222719844168e-05, | |
| "loss": 0.2598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10123373568058014, | |
| "step": 2510, | |
| "valid_targets_mean": 8234.5, | |
| "valid_targets_min": 4070 | |
| }, | |
| { | |
| "epoch": 4.262711864406779, | |
| "grad_norm": 0.40487688980861414, | |
| "learning_rate": 1.5929440588680688e-05, | |
| "loss": 0.239, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1368967741727829, | |
| "step": 2515, | |
| "valid_targets_mean": 9803.1, | |
| "valid_targets_min": 3305 | |
| }, | |
| { | |
| "epoch": 4.271186440677966, | |
| "grad_norm": 0.38573108303487735, | |
| "learning_rate": 1.5846726674512654e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09613785147666931, | |
| "step": 2520, | |
| "valid_targets_mean": 7835.9, | |
| "valid_targets_min": 3348 | |
| }, | |
| { | |
| "epoch": 4.279661016949152, | |
| "grad_norm": 0.43145138808268946, | |
| "learning_rate": 1.576408693311466e-05, | |
| "loss": 0.2675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18399053812026978, | |
| "step": 2525, | |
| "valid_targets_mean": 9229.1, | |
| "valid_targets_min": 3336 | |
| }, | |
| { | |
| "epoch": 4.288135593220339, | |
| "grad_norm": 0.4562660091012742, | |
| "learning_rate": 1.5681522840339143e-05, | |
| "loss": 0.2722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1121688187122345, | |
| "step": 2530, | |
| "valid_targets_mean": 6697.4, | |
| "valid_targets_min": 1611 | |
| }, | |
| { | |
| "epoch": 4.296610169491525, | |
| "grad_norm": 0.5087600350546465, | |
| "learning_rate": 1.5599035870687515e-05, | |
| "loss": 0.2855, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12653526663780212, | |
| "step": 2535, | |
| "valid_targets_mean": 6205.5, | |
| "valid_targets_min": 2504 | |
| }, | |
| { | |
| "epoch": 4.305084745762712, | |
| "grad_norm": 0.438881885643239, | |
| "learning_rate": 1.5516627497283882e-05, | |
| "loss": 0.2753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19421055912971497, | |
| "step": 2540, | |
| "valid_targets_mean": 9667.5, | |
| "valid_targets_min": 2638 | |
| }, | |
| { | |
| "epoch": 4.313559322033898, | |
| "grad_norm": 0.45645782507464455, | |
| "learning_rate": 1.5434299191848713e-05, | |
| "loss": 0.2795, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1491662859916687, | |
| "step": 2545, | |
| "valid_targets_mean": 7809.4, | |
| "valid_targets_min": 2630 | |
| }, | |
| { | |
| "epoch": 4.322033898305085, | |
| "grad_norm": 0.4353487772661872, | |
| "learning_rate": 1.5352052424672535e-05, | |
| "loss": 0.2644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13219210505485535, | |
| "step": 2550, | |
| "valid_targets_mean": 6678.6, | |
| "valid_targets_min": 1032 | |
| }, | |
| { | |
| "epoch": 4.330508474576272, | |
| "grad_norm": 0.4386593211818938, | |
| "learning_rate": 1.5269888664589712e-05, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09329979121685028, | |
| "step": 2555, | |
| "valid_targets_mean": 6891.2, | |
| "valid_targets_min": 3901 | |
| }, | |
| { | |
| "epoch": 4.338983050847458, | |
| "grad_norm": 0.45062923880238476, | |
| "learning_rate": 1.5187809378952182e-05, | |
| "loss": 0.2555, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10724156349897385, | |
| "step": 2560, | |
| "valid_targets_mean": 6354.0, | |
| "valid_targets_min": 1847 | |
| }, | |
| { | |
| "epoch": 4.3474576271186445, | |
| "grad_norm": 0.4720009798967789, | |
| "learning_rate": 1.5105816033603266e-05, | |
| "loss": 0.2433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09511254727840424, | |
| "step": 2565, | |
| "valid_targets_mean": 6438.9, | |
| "valid_targets_min": 3573 | |
| }, | |
| { | |
| "epoch": 4.3559322033898304, | |
| "grad_norm": 0.47501420296893465, | |
| "learning_rate": 1.5023910092851509e-05, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14761820435523987, | |
| "step": 2570, | |
| "valid_targets_mean": 7794.8, | |
| "valid_targets_min": 4409 | |
| }, | |
| { | |
| "epoch": 4.364406779661017, | |
| "grad_norm": 0.46228965195292865, | |
| "learning_rate": 1.4942093019444468e-05, | |
| "loss": 0.2713, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21561656892299652, | |
| "step": 2575, | |
| "valid_targets_mean": 7940.1, | |
| "valid_targets_min": 2271 | |
| }, | |
| { | |
| "epoch": 4.372881355932203, | |
| "grad_norm": 0.4310289841712482, | |
| "learning_rate": 1.4860366274542663e-05, | |
| "loss": 0.2577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15172511339187622, | |
| "step": 2580, | |
| "valid_targets_mean": 6876.4, | |
| "valid_targets_min": 3609 | |
| }, | |
| { | |
| "epoch": 4.38135593220339, | |
| "grad_norm": 0.3817393899077175, | |
| "learning_rate": 1.4778731317693442e-05, | |
| "loss": 0.2474, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1488693356513977, | |
| "step": 2585, | |
| "valid_targets_mean": 10913.8, | |
| "valid_targets_min": 2336 | |
| }, | |
| { | |
| "epoch": 4.389830508474576, | |
| "grad_norm": 0.4396698768007295, | |
| "learning_rate": 1.4697189606804914e-05, | |
| "loss": 0.2492, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11185161769390106, | |
| "step": 2590, | |
| "valid_targets_mean": 7252.6, | |
| "valid_targets_min": 4278 | |
| }, | |
| { | |
| "epoch": 4.398305084745763, | |
| "grad_norm": 0.4425465024994567, | |
| "learning_rate": 1.4615742598119927e-05, | |
| "loss": 0.2621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10830656439065933, | |
| "step": 2595, | |
| "valid_targets_mean": 7119.5, | |
| "valid_targets_min": 2530 | |
| }, | |
| { | |
| "epoch": 4.406779661016949, | |
| "grad_norm": 0.4071900412191274, | |
| "learning_rate": 1.4534391746190056e-05, | |
| "loss": 0.2762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10529286414384842, | |
| "step": 2600, | |
| "valid_targets_mean": 7428.5, | |
| "valid_targets_min": 2578 | |
| }, | |
| { | |
| "epoch": 4.415254237288136, | |
| "grad_norm": 0.40288822455342677, | |
| "learning_rate": 1.4453138503849622e-05, | |
| "loss": 0.3073, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11631416529417038, | |
| "step": 2605, | |
| "valid_targets_mean": 7269.8, | |
| "valid_targets_min": 3017 | |
| }, | |
| { | |
| "epoch": 4.423728813559322, | |
| "grad_norm": 0.42750798117550126, | |
| "learning_rate": 1.4371984322189754e-05, | |
| "loss": 0.2646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14900711178779602, | |
| "step": 2610, | |
| "valid_targets_mean": 8732.9, | |
| "valid_targets_min": 4282 | |
| }, | |
| { | |
| "epoch": 4.432203389830509, | |
| "grad_norm": 0.44095759717826927, | |
| "learning_rate": 1.4290930650532448e-05, | |
| "loss": 0.2676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1209695041179657, | |
| "step": 2615, | |
| "valid_targets_mean": 6879.2, | |
| "valid_targets_min": 3422 | |
| }, | |
| { | |
| "epoch": 4.440677966101695, | |
| "grad_norm": 0.424552739901633, | |
| "learning_rate": 1.4209978936404731e-05, | |
| "loss": 0.2878, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1218380331993103, | |
| "step": 2620, | |
| "valid_targets_mean": 6677.4, | |
| "valid_targets_min": 3274 | |
| }, | |
| { | |
| "epoch": 4.4491525423728815, | |
| "grad_norm": 0.39167231606947506, | |
| "learning_rate": 1.4129130625512776e-05, | |
| "loss": 0.2973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12196886539459229, | |
| "step": 2625, | |
| "valid_targets_mean": 9059.2, | |
| "valid_targets_min": 3095 | |
| }, | |
| { | |
| "epoch": 4.4576271186440675, | |
| "grad_norm": 0.3954677586985334, | |
| "learning_rate": 1.4048387161716086e-05, | |
| "loss": 0.2739, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13891199231147766, | |
| "step": 2630, | |
| "valid_targets_mean": 8787.0, | |
| "valid_targets_min": 4182 | |
| }, | |
| { | |
| "epoch": 4.466101694915254, | |
| "grad_norm": 0.3922770972346297, | |
| "learning_rate": 1.3967749987001717e-05, | |
| "loss": 0.2644, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15331467986106873, | |
| "step": 2635, | |
| "valid_targets_mean": 8593.4, | |
| "valid_targets_min": 2049 | |
| }, | |
| { | |
| "epoch": 4.47457627118644, | |
| "grad_norm": 0.416498484310529, | |
| "learning_rate": 1.3887220541458521e-05, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14011646807193756, | |
| "step": 2640, | |
| "valid_targets_mean": 6794.2, | |
| "valid_targets_min": 1697 | |
| }, | |
| { | |
| "epoch": 4.483050847457627, | |
| "grad_norm": 0.3977702816938163, | |
| "learning_rate": 1.380680026325143e-05, | |
| "loss": 0.236, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10949341207742691, | |
| "step": 2645, | |
| "valid_targets_mean": 7184.6, | |
| "valid_targets_min": 1466 | |
| }, | |
| { | |
| "epoch": 4.491525423728813, | |
| "grad_norm": 0.43575966118057613, | |
| "learning_rate": 1.3726490588595776e-05, | |
| "loss": 0.267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16025930643081665, | |
| "step": 2650, | |
| "valid_targets_mean": 8080.8, | |
| "valid_targets_min": 4200 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.4929537665321967, | |
| "learning_rate": 1.3646292951731615e-05, | |
| "loss": 0.2622, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12223613262176514, | |
| "step": 2655, | |
| "valid_targets_mean": 7641.6, | |
| "valid_targets_min": 2772 | |
| }, | |
| { | |
| "epoch": 4.508474576271187, | |
| "grad_norm": 0.4542816770985378, | |
| "learning_rate": 1.3566208784898157e-05, | |
| "loss": 0.2684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10347205400466919, | |
| "step": 2660, | |
| "valid_targets_mean": 7062.9, | |
| "valid_targets_min": 3917 | |
| }, | |
| { | |
| "epoch": 4.516949152542373, | |
| "grad_norm": 0.6154023708778347, | |
| "learning_rate": 1.348623951830816e-05, | |
| "loss": 0.2597, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12198853492736816, | |
| "step": 2665, | |
| "valid_targets_mean": 6628.5, | |
| "valid_targets_min": 2337 | |
| }, | |
| { | |
| "epoch": 4.52542372881356, | |
| "grad_norm": 0.4401656665983104, | |
| "learning_rate": 1.3406386580122389e-05, | |
| "loss": 0.2879, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06955887377262115, | |
| "step": 2670, | |
| "valid_targets_mean": 4901.9, | |
| "valid_targets_min": 2179 | |
| }, | |
| { | |
| "epoch": 4.533898305084746, | |
| "grad_norm": 0.407257406794763, | |
| "learning_rate": 1.332665139642412e-05, | |
| "loss": 0.2733, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21901419758796692, | |
| "step": 2675, | |
| "valid_targets_mean": 10808.1, | |
| "valid_targets_min": 3359 | |
| }, | |
| { | |
| "epoch": 4.5423728813559325, | |
| "grad_norm": 0.4171993331704572, | |
| "learning_rate": 1.3247035391193664e-05, | |
| "loss": 0.268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08096012473106384, | |
| "step": 2680, | |
| "valid_targets_mean": 6991.5, | |
| "valid_targets_min": 3342 | |
| }, | |
| { | |
| "epoch": 4.5508474576271185, | |
| "grad_norm": 0.4216667198517297, | |
| "learning_rate": 1.3167539986282938e-05, | |
| "loss": 0.2414, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12701958417892456, | |
| "step": 2685, | |
| "valid_targets_mean": 7187.9, | |
| "valid_targets_min": 3835 | |
| }, | |
| { | |
| "epoch": 4.559322033898305, | |
| "grad_norm": 0.4007002554100059, | |
| "learning_rate": 1.3088166601390087e-05, | |
| "loss": 0.2757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13677562773227692, | |
| "step": 2690, | |
| "valid_targets_mean": 8478.9, | |
| "valid_targets_min": 3949 | |
| }, | |
| { | |
| "epoch": 4.567796610169491, | |
| "grad_norm": 0.44906505243084627, | |
| "learning_rate": 1.3008916654034085e-05, | |
| "loss": 0.3112, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12453994154930115, | |
| "step": 2695, | |
| "valid_targets_mean": 6475.2, | |
| "valid_targets_min": 2863 | |
| }, | |
| { | |
| "epoch": 4.576271186440678, | |
| "grad_norm": 0.4695955316852692, | |
| "learning_rate": 1.2929791559529484e-05, | |
| "loss": 0.2944, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1315167248249054, | |
| "step": 2700, | |
| "valid_targets_mean": 8320.2, | |
| "valid_targets_min": 4323 | |
| }, | |
| { | |
| "epoch": 4.584745762711864, | |
| "grad_norm": 0.480923032502113, | |
| "learning_rate": 1.2850792730961082e-05, | |
| "loss": 0.2867, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10258597880601883, | |
| "step": 2705, | |
| "valid_targets_mean": 6287.9, | |
| "valid_targets_min": 2432 | |
| }, | |
| { | |
| "epoch": 4.593220338983051, | |
| "grad_norm": 0.4053593691587642, | |
| "learning_rate": 1.2771921579158728e-05, | |
| "loss": 0.2853, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20100140571594238, | |
| "step": 2710, | |
| "valid_targets_mean": 11134.0, | |
| "valid_targets_min": 5394 | |
| }, | |
| { | |
| "epoch": 4.601694915254237, | |
| "grad_norm": 0.5133696354264329, | |
| "learning_rate": 1.26931795126721e-05, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19328990578651428, | |
| "step": 2715, | |
| "valid_targets_mean": 8126.9, | |
| "valid_targets_min": 2713 | |
| }, | |
| { | |
| "epoch": 4.610169491525424, | |
| "grad_norm": 0.40346643930406395, | |
| "learning_rate": 1.2614567937745554e-05, | |
| "loss": 0.2749, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12850111722946167, | |
| "step": 2720, | |
| "valid_targets_mean": 8410.1, | |
| "valid_targets_min": 3309 | |
| }, | |
| { | |
| "epoch": 4.61864406779661, | |
| "grad_norm": 0.4112102027826456, | |
| "learning_rate": 1.2536088258293018e-05, | |
| "loss": 0.2723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11901889741420746, | |
| "step": 2725, | |
| "valid_targets_mean": 7852.2, | |
| "valid_targets_min": 3343 | |
| }, | |
| { | |
| "epoch": 4.627118644067797, | |
| "grad_norm": 0.43842191631154037, | |
| "learning_rate": 1.2457741875872928e-05, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12838314473628998, | |
| "step": 2730, | |
| "valid_targets_mean": 8761.0, | |
| "valid_targets_min": 3660 | |
| }, | |
| { | |
| "epoch": 4.635593220338983, | |
| "grad_norm": 0.4232260475024782, | |
| "learning_rate": 1.2379530189663156e-05, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16140376031398773, | |
| "step": 2735, | |
| "valid_targets_mean": 7674.8, | |
| "valid_targets_min": 1973 | |
| }, | |
| { | |
| "epoch": 4.6440677966101696, | |
| "grad_norm": 0.4001326172915943, | |
| "learning_rate": 1.2301454596436076e-05, | |
| "loss": 0.2578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1508510857820511, | |
| "step": 2740, | |
| "valid_targets_mean": 8669.1, | |
| "valid_targets_min": 3636 | |
| }, | |
| { | |
| "epoch": 4.652542372881356, | |
| "grad_norm": 0.46641967025775893, | |
| "learning_rate": 1.222351649053359e-05, | |
| "loss": 0.2457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08667419850826263, | |
| "step": 2745, | |
| "valid_targets_mean": 6366.4, | |
| "valid_targets_min": 2728 | |
| }, | |
| { | |
| "epoch": 4.661016949152542, | |
| "grad_norm": 0.41058878739853244, | |
| "learning_rate": 1.2145717263842228e-05, | |
| "loss": 0.2752, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14657464623451233, | |
| "step": 2750, | |
| "valid_targets_mean": 10316.6, | |
| "valid_targets_min": 4889 | |
| }, | |
| { | |
| "epoch": 4.669491525423728, | |
| "grad_norm": 0.43047052682197245, | |
| "learning_rate": 1.2068058305768298e-05, | |
| "loss": 0.2773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17472457885742188, | |
| "step": 2755, | |
| "valid_targets_mean": 8935.2, | |
| "valid_targets_min": 1559 | |
| }, | |
| { | |
| "epoch": 4.677966101694915, | |
| "grad_norm": 0.48052761275513234, | |
| "learning_rate": 1.1990541003213072e-05, | |
| "loss": 0.2782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1159568727016449, | |
| "step": 2760, | |
| "valid_targets_mean": 5699.4, | |
| "valid_targets_min": 1794 | |
| }, | |
| { | |
| "epoch": 4.686440677966102, | |
| "grad_norm": 0.42086376935943387, | |
| "learning_rate": 1.1913166740547999e-05, | |
| "loss": 0.2616, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15755105018615723, | |
| "step": 2765, | |
| "valid_targets_mean": 8291.0, | |
| "valid_targets_min": 2110 | |
| }, | |
| { | |
| "epoch": 4.694915254237288, | |
| "grad_norm": 0.4261819528196372, | |
| "learning_rate": 1.1835936899590017e-05, | |
| "loss": 0.2767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12719427049160004, | |
| "step": 2770, | |
| "valid_targets_mean": 7391.2, | |
| "valid_targets_min": 3754 | |
| }, | |
| { | |
| "epoch": 4.703389830508475, | |
| "grad_norm": 0.41188015404081924, | |
| "learning_rate": 1.1758852859576842e-05, | |
| "loss": 0.2791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09580564498901367, | |
| "step": 2775, | |
| "valid_targets_mean": 6902.0, | |
| "valid_targets_min": 4403 | |
| }, | |
| { | |
| "epoch": 4.711864406779661, | |
| "grad_norm": 0.42367310030699584, | |
| "learning_rate": 1.1681915997142354e-05, | |
| "loss": 0.2698, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1462278664112091, | |
| "step": 2780, | |
| "valid_targets_mean": 8892.1, | |
| "valid_targets_min": 2956 | |
| }, | |
| { | |
| "epoch": 4.720338983050848, | |
| "grad_norm": 0.3983024142470498, | |
| "learning_rate": 1.1605127686292009e-05, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18142619729042053, | |
| "step": 2785, | |
| "valid_targets_mean": 11195.0, | |
| "valid_targets_min": 4341 | |
| }, | |
| { | |
| "epoch": 4.728813559322034, | |
| "grad_norm": 0.43308748942127245, | |
| "learning_rate": 1.15284892983783e-05, | |
| "loss": 0.2784, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11833596974611282, | |
| "step": 2790, | |
| "valid_targets_mean": 7267.1, | |
| "valid_targets_min": 3360 | |
| }, | |
| { | |
| "epoch": 4.737288135593221, | |
| "grad_norm": 0.42170297161536185, | |
| "learning_rate": 1.1452002202076256e-05, | |
| "loss": 0.289, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1312982141971588, | |
| "step": 2795, | |
| "valid_targets_mean": 8055.4, | |
| "valid_targets_min": 2530 | |
| }, | |
| { | |
| "epoch": 4.745762711864407, | |
| "grad_norm": 0.3919530604528147, | |
| "learning_rate": 1.1375667763359031e-05, | |
| "loss": 0.2649, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14995722472667694, | |
| "step": 2800, | |
| "valid_targets_mean": 8652.5, | |
| "valid_targets_min": 3923 | |
| }, | |
| { | |
| "epoch": 4.754237288135593, | |
| "grad_norm": 0.4210088857440684, | |
| "learning_rate": 1.1299487345473457e-05, | |
| "loss": 0.2776, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10921230912208557, | |
| "step": 2805, | |
| "valid_targets_mean": 7063.4, | |
| "valid_targets_min": 3390 | |
| }, | |
| { | |
| "epoch": 4.762711864406779, | |
| "grad_norm": 0.46524547077364725, | |
| "learning_rate": 1.1223462308915767e-05, | |
| "loss": 0.2773, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16076411306858063, | |
| "step": 2810, | |
| "valid_targets_mean": 7103.1, | |
| "valid_targets_min": 2713 | |
| }, | |
| { | |
| "epoch": 4.771186440677966, | |
| "grad_norm": 0.3987547098085568, | |
| "learning_rate": 1.1147594011407203e-05, | |
| "loss": 0.2921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15100906789302826, | |
| "step": 2815, | |
| "valid_targets_mean": 9900.6, | |
| "valid_targets_min": 4761 | |
| }, | |
| { | |
| "epoch": 4.779661016949152, | |
| "grad_norm": 0.4867606351030108, | |
| "learning_rate": 1.1071883807869886e-05, | |
| "loss": 0.2769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1709168553352356, | |
| "step": 2820, | |
| "valid_targets_mean": 7995.1, | |
| "valid_targets_min": 3848 | |
| }, | |
| { | |
| "epoch": 4.788135593220339, | |
| "grad_norm": 0.4243770648885672, | |
| "learning_rate": 1.0996333050402503e-05, | |
| "loss": 0.29, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11102272570133209, | |
| "step": 2825, | |
| "valid_targets_mean": 6283.8, | |
| "valid_targets_min": 3432 | |
| }, | |
| { | |
| "epoch": 4.796610169491525, | |
| "grad_norm": 0.37807523819250266, | |
| "learning_rate": 1.0920943088256266e-05, | |
| "loss": 0.2929, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17590108513832092, | |
| "step": 2830, | |
| "valid_targets_mean": 11326.0, | |
| "valid_targets_min": 3696 | |
| }, | |
| { | |
| "epoch": 4.805084745762712, | |
| "grad_norm": 0.5285744602943411, | |
| "learning_rate": 1.0845715267810716e-05, | |
| "loss": 0.2683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16316203773021698, | |
| "step": 2835, | |
| "valid_targets_mean": 6556.4, | |
| "valid_targets_min": 1843 | |
| }, | |
| { | |
| "epoch": 4.813559322033898, | |
| "grad_norm": 0.409867873854923, | |
| "learning_rate": 1.0770650932549768e-05, | |
| "loss": 0.2768, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16258347034454346, | |
| "step": 2840, | |
| "valid_targets_mean": 9091.2, | |
| "valid_targets_min": 3104 | |
| }, | |
| { | |
| "epoch": 4.822033898305085, | |
| "grad_norm": 0.4612130048394203, | |
| "learning_rate": 1.0695751423037639e-05, | |
| "loss": 0.2593, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14501486718654633, | |
| "step": 2845, | |
| "valid_targets_mean": 9458.6, | |
| "valid_targets_min": 5624 | |
| }, | |
| { | |
| "epoch": 4.830508474576272, | |
| "grad_norm": 0.4165122048257007, | |
| "learning_rate": 1.0621018076894984e-05, | |
| "loss": 0.2691, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1871585100889206, | |
| "step": 2850, | |
| "valid_targets_mean": 9650.6, | |
| "valid_targets_min": 2319 | |
| }, | |
| { | |
| "epoch": 4.838983050847458, | |
| "grad_norm": 0.4447768666450171, | |
| "learning_rate": 1.0546452228774919e-05, | |
| "loss": 0.2932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2016182541847229, | |
| "step": 2855, | |
| "valid_targets_mean": 9618.0, | |
| "valid_targets_min": 2098 | |
| }, | |
| { | |
| "epoch": 4.847457627118644, | |
| "grad_norm": 0.45706910235659043, | |
| "learning_rate": 1.047205521033928e-05, | |
| "loss": 0.2659, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07524390518665314, | |
| "step": 2860, | |
| "valid_targets_mean": 5177.8, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 4.8559322033898304, | |
| "grad_norm": 0.4082417794116573, | |
| "learning_rate": 1.0397828350234765e-05, | |
| "loss": 0.2715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07515836507081985, | |
| "step": 2865, | |
| "valid_targets_mean": 5618.4, | |
| "valid_targets_min": 3548 | |
| }, | |
| { | |
| "epoch": 4.864406779661017, | |
| "grad_norm": 0.43699485002864896, | |
| "learning_rate": 1.032377297406926e-05, | |
| "loss": 0.2669, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15624886751174927, | |
| "step": 2870, | |
| "valid_targets_mean": 8017.9, | |
| "valid_targets_min": 3896 | |
| }, | |
| { | |
| "epoch": 4.872881355932203, | |
| "grad_norm": 0.4475219215390075, | |
| "learning_rate": 1.0249890404388121e-05, | |
| "loss": 0.2769, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13607046008110046, | |
| "step": 2875, | |
| "valid_targets_mean": 7762.4, | |
| "valid_targets_min": 2731 | |
| }, | |
| { | |
| "epoch": 4.88135593220339, | |
| "grad_norm": 0.4010893872939635, | |
| "learning_rate": 1.0176181960650595e-05, | |
| "loss": 0.2606, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2050163745880127, | |
| "step": 2880, | |
| "valid_targets_mean": 10443.4, | |
| "valid_targets_min": 4967 | |
| }, | |
| { | |
| "epoch": 4.889830508474576, | |
| "grad_norm": 0.43399489366456434, | |
| "learning_rate": 1.0102648959206212e-05, | |
| "loss": 0.2465, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09470526874065399, | |
| "step": 2885, | |
| "valid_targets_mean": 6960.6, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 4.898305084745763, | |
| "grad_norm": 0.409246582546555, | |
| "learning_rate": 1.0029292713271334e-05, | |
| "loss": 0.265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16161762177944183, | |
| "step": 2890, | |
| "valid_targets_mean": 7954.8, | |
| "valid_targets_min": 3290 | |
| }, | |
| { | |
| "epoch": 4.906779661016949, | |
| "grad_norm": 0.43312261146531655, | |
| "learning_rate": 9.956114532905625e-06, | |
| "loss": 0.3058, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12446765601634979, | |
| "step": 2895, | |
| "valid_targets_mean": 7838.4, | |
| "valid_targets_min": 4758 | |
| }, | |
| { | |
| "epoch": 4.915254237288136, | |
| "grad_norm": 0.4301864357851658, | |
| "learning_rate": 9.883115724988743e-06, | |
| "loss": 0.2753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12632432579994202, | |
| "step": 2900, | |
| "valid_targets_mean": 7188.4, | |
| "valid_targets_min": 2425 | |
| }, | |
| { | |
| "epoch": 4.923728813559322, | |
| "grad_norm": 0.5636109539825138, | |
| "learning_rate": 9.810297593196922e-06, | |
| "loss": 0.2839, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1164482980966568, | |
| "step": 2905, | |
| "valid_targets_mean": 7041.4, | |
| "valid_targets_min": 1517 | |
| }, | |
| { | |
| "epoch": 4.932203389830509, | |
| "grad_norm": 0.41163855796829296, | |
| "learning_rate": 9.73766143797976e-06, | |
| "loss": 0.2975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18512123823165894, | |
| "step": 2910, | |
| "valid_targets_mean": 9377.4, | |
| "valid_targets_min": 3063 | |
| }, | |
| { | |
| "epoch": 4.940677966101695, | |
| "grad_norm": 0.3999145511854064, | |
| "learning_rate": 9.665208556536918e-06, | |
| "loss": 0.255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15674389898777008, | |
| "step": 2915, | |
| "valid_targets_mean": 10361.1, | |
| "valid_targets_min": 2778 | |
| }, | |
| { | |
| "epoch": 4.9491525423728815, | |
| "grad_norm": 0.41681357372277467, | |
| "learning_rate": 9.592940242795035e-06, | |
| "loss": 0.2692, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1276121735572815, | |
| "step": 2920, | |
| "valid_targets_mean": 7945.4, | |
| "valid_targets_min": 4230 | |
| }, | |
| { | |
| "epoch": 4.9576271186440675, | |
| "grad_norm": 0.4103109872648721, | |
| "learning_rate": 9.520857787384548e-06, | |
| "loss": 0.2913, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2234308123588562, | |
| "step": 2925, | |
| "valid_targets_mean": 10142.1, | |
| "valid_targets_min": 4081 | |
| }, | |
| { | |
| "epoch": 4.966101694915254, | |
| "grad_norm": 0.4773090185998591, | |
| "learning_rate": 9.44896247761669e-06, | |
| "loss": 0.2859, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11663774400949478, | |
| "step": 2930, | |
| "valid_targets_mean": 7561.9, | |
| "valid_targets_min": 2388 | |
| }, | |
| { | |
| "epoch": 4.97457627118644, | |
| "grad_norm": 0.4519877157560931, | |
| "learning_rate": 9.377255597460469e-06, | |
| "loss": 0.2697, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14655426144599915, | |
| "step": 2935, | |
| "valid_targets_mean": 7012.6, | |
| "valid_targets_min": 2452 | |
| }, | |
| { | |
| "epoch": 4.983050847457627, | |
| "grad_norm": 0.46442379360691927, | |
| "learning_rate": 9.305738427519782e-06, | |
| "loss": 0.2943, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10421495884656906, | |
| "step": 2940, | |
| "valid_targets_mean": 5663.6, | |
| "valid_targets_min": 2072 | |
| }, | |
| { | |
| "epoch": 4.991525423728813, | |
| "grad_norm": 0.38014541504707516, | |
| "learning_rate": 9.234412245010482e-06, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16515886783599854, | |
| "step": 2945, | |
| "valid_targets_mean": 9815.4, | |
| "valid_targets_min": 2340 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.40418734474649126, | |
| "learning_rate": 9.163278323737635e-06, | |
| "loss": 0.2861, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17972296476364136, | |
| "step": 2950, | |
| "valid_targets_mean": 9544.8, | |
| "valid_targets_min": 3286 | |
| }, | |
| { | |
| "epoch": 5.008474576271187, | |
| "grad_norm": 0.38884902460392823, | |
| "learning_rate": 9.092337934072703e-06, | |
| "loss": 0.27, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12223666906356812, | |
| "step": 2955, | |
| "valid_targets_mean": 8354.1, | |
| "valid_targets_min": 3525 | |
| }, | |
| { | |
| "epoch": 5.016949152542373, | |
| "grad_norm": 0.4358731046466529, | |
| "learning_rate": 9.021592342930928e-06, | |
| "loss": 0.2311, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10257917642593384, | |
| "step": 2960, | |
| "valid_targets_mean": 8100.0, | |
| "valid_targets_min": 4173 | |
| }, | |
| { | |
| "epoch": 5.02542372881356, | |
| "grad_norm": 0.43373781383804855, | |
| "learning_rate": 8.951042813748645e-06, | |
| "loss": 0.2664, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18158259987831116, | |
| "step": 2965, | |
| "valid_targets_mean": 10387.9, | |
| "valid_targets_min": 5211 | |
| }, | |
| { | |
| "epoch": 5.033898305084746, | |
| "grad_norm": 0.5249679818676168, | |
| "learning_rate": 8.880690606460754e-06, | |
| "loss": 0.2915, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10569009184837341, | |
| "step": 2970, | |
| "valid_targets_mean": 6634.9, | |
| "valid_targets_min": 2482 | |
| }, | |
| { | |
| "epoch": 5.0423728813559325, | |
| "grad_norm": 0.42560397590925436, | |
| "learning_rate": 8.8105369774782e-06, | |
| "loss": 0.2722, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.157941997051239, | |
| "step": 2975, | |
| "valid_targets_mean": 8599.6, | |
| "valid_targets_min": 2981 | |
| }, | |
| { | |
| "epoch": 5.0508474576271185, | |
| "grad_norm": 0.4355648091689131, | |
| "learning_rate": 8.74058317966556e-06, | |
| "loss": 0.2488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08466358482837677, | |
| "step": 2980, | |
| "valid_targets_mean": 5620.9, | |
| "valid_targets_min": 3876 | |
| }, | |
| { | |
| "epoch": 5.059322033898305, | |
| "grad_norm": 0.4188697143373527, | |
| "learning_rate": 8.670830462318633e-06, | |
| "loss": 0.2516, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06649868935346603, | |
| "step": 2985, | |
| "valid_targets_mean": 5346.4, | |
| "valid_targets_min": 2357 | |
| }, | |
| { | |
| "epoch": 5.067796610169491, | |
| "grad_norm": 0.43278797414764, | |
| "learning_rate": 8.60128007114217e-06, | |
| "loss": 0.2706, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1381576806306839, | |
| "step": 2990, | |
| "valid_targets_mean": 8565.6, | |
| "valid_targets_min": 1759 | |
| }, | |
| { | |
| "epoch": 5.076271186440678, | |
| "grad_norm": 0.40143576428343275, | |
| "learning_rate": 8.531933248227582e-06, | |
| "loss": 0.2392, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10228410363197327, | |
| "step": 2995, | |
| "valid_targets_mean": 5543.6, | |
| "valid_targets_min": 2229 | |
| }, | |
| { | |
| "epoch": 5.084745762711864, | |
| "grad_norm": 0.4286280311865031, | |
| "learning_rate": 8.462791232030803e-06, | |
| "loss": 0.2759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13835610449314117, | |
| "step": 3000, | |
| "valid_targets_mean": 8272.5, | |
| "valid_targets_min": 4373 | |
| }, | |
| { | |
| "epoch": 5.093220338983051, | |
| "grad_norm": 0.41785258691109217, | |
| "learning_rate": 8.393855257350132e-06, | |
| "loss": 0.2515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11477389186620712, | |
| "step": 3005, | |
| "valid_targets_mean": 8371.8, | |
| "valid_targets_min": 4218 | |
| }, | |
| { | |
| "epoch": 5.101694915254237, | |
| "grad_norm": 0.42839185232988447, | |
| "learning_rate": 8.325126555304208e-06, | |
| "loss": 0.2614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1777895838022232, | |
| "step": 3010, | |
| "valid_targets_mean": 9394.4, | |
| "valid_targets_min": 4110 | |
| }, | |
| { | |
| "epoch": 5.110169491525424, | |
| "grad_norm": 0.4337647626076782, | |
| "learning_rate": 8.256606353310003e-06, | |
| "loss": 0.2472, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14281994104385376, | |
| "step": 3015, | |
| "valid_targets_mean": 9484.0, | |
| "valid_targets_min": 3434 | |
| }, | |
| { | |
| "epoch": 5.11864406779661, | |
| "grad_norm": 0.4295355742774288, | |
| "learning_rate": 8.188295875060943e-06, | |
| "loss": 0.2746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0776737704873085, | |
| "step": 3020, | |
| "valid_targets_mean": 4949.9, | |
| "valid_targets_min": 3294 | |
| }, | |
| { | |
| "epoch": 5.127118644067797, | |
| "grad_norm": 0.3676364308403277, | |
| "learning_rate": 8.12019634050499e-06, | |
| "loss": 0.2271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08112052828073502, | |
| "step": 3025, | |
| "valid_targets_mean": 7908.5, | |
| "valid_targets_min": 4680 | |
| }, | |
| { | |
| "epoch": 5.135593220338983, | |
| "grad_norm": 0.4454936935125421, | |
| "learning_rate": 8.052308965822916e-06, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1395069807767868, | |
| "step": 3030, | |
| "valid_targets_mean": 7846.9, | |
| "valid_targets_min": 4498 | |
| }, | |
| { | |
| "epoch": 5.1440677966101696, | |
| "grad_norm": 0.4122037084305391, | |
| "learning_rate": 7.984634963406537e-06, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14162138104438782, | |
| "step": 3035, | |
| "valid_targets_mean": 8179.1, | |
| "valid_targets_min": 4548 | |
| }, | |
| { | |
| "epoch": 5.1525423728813555, | |
| "grad_norm": 0.463084879888614, | |
| "learning_rate": 7.917175541837101e-06, | |
| "loss": 0.2842, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09783188998699188, | |
| "step": 3040, | |
| "valid_targets_mean": 5998.4, | |
| "valid_targets_min": 2460 | |
| }, | |
| { | |
| "epoch": 5.161016949152542, | |
| "grad_norm": 0.4748887059748165, | |
| "learning_rate": 7.849931905863672e-06, | |
| "loss": 0.2885, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12707798182964325, | |
| "step": 3045, | |
| "valid_targets_mean": 6415.8, | |
| "valid_targets_min": 3785 | |
| }, | |
| { | |
| "epoch": 5.169491525423728, | |
| "grad_norm": 0.45526749296808317, | |
| "learning_rate": 7.78290525638163e-06, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18409213423728943, | |
| "step": 3050, | |
| "valid_targets_mean": 11028.6, | |
| "valid_targets_min": 3734 | |
| }, | |
| { | |
| "epoch": 5.177966101694915, | |
| "grad_norm": 0.4462181398544631, | |
| "learning_rate": 7.71609679041121e-06, | |
| "loss": 0.2503, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15138982236385345, | |
| "step": 3055, | |
| "valid_targets_mean": 7039.2, | |
| "valid_targets_min": 3502 | |
| }, | |
| { | |
| "epoch": 5.186440677966102, | |
| "grad_norm": 0.3963924524169505, | |
| "learning_rate": 7.649507701076164e-06, | |
| "loss": 0.2633, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17887073755264282, | |
| "step": 3060, | |
| "valid_targets_mean": 10073.6, | |
| "valid_targets_min": 2114 | |
| }, | |
| { | |
| "epoch": 5.194915254237288, | |
| "grad_norm": 0.5348841144000112, | |
| "learning_rate": 7.583139177582395e-06, | |
| "loss": 0.2634, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2061377465724945, | |
| "step": 3065, | |
| "valid_targets_mean": 7855.2, | |
| "valid_targets_min": 3111 | |
| }, | |
| { | |
| "epoch": 5.203389830508475, | |
| "grad_norm": 0.41577883860215764, | |
| "learning_rate": 7.516992405196772e-06, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13540074229240417, | |
| "step": 3070, | |
| "valid_targets_mean": 8547.1, | |
| "valid_targets_min": 4854 | |
| }, | |
| { | |
| "epoch": 5.211864406779661, | |
| "grad_norm": 0.4271021340563035, | |
| "learning_rate": 7.45106856522593e-06, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10686710476875305, | |
| "step": 3075, | |
| "valid_targets_mean": 5644.2, | |
| "valid_targets_min": 2827 | |
| }, | |
| { | |
| "epoch": 5.220338983050848, | |
| "grad_norm": 0.4303321911070651, | |
| "learning_rate": 7.385368834995168e-06, | |
| "loss": 0.2305, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07251991331577301, | |
| "step": 3080, | |
| "valid_targets_mean": 5377.9, | |
| "valid_targets_min": 2896 | |
| }, | |
| { | |
| "epoch": 5.228813559322034, | |
| "grad_norm": 0.4414950670458604, | |
| "learning_rate": 7.319894387827473e-06, | |
| "loss": 0.2513, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12613925337791443, | |
| "step": 3085, | |
| "valid_targets_mean": 7042.0, | |
| "valid_targets_min": 3869 | |
| }, | |
| { | |
| "epoch": 5.237288135593221, | |
| "grad_norm": 0.42597819657193176, | |
| "learning_rate": 7.254646393022502e-06, | |
| "loss": 0.2804, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1456468105316162, | |
| "step": 3090, | |
| "valid_targets_mean": 8970.8, | |
| "valid_targets_min": 2240 | |
| }, | |
| { | |
| "epoch": 5.245762711864407, | |
| "grad_norm": 0.3910096201156548, | |
| "learning_rate": 7.189626015835733e-06, | |
| "loss": 0.2832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14637431502342224, | |
| "step": 3095, | |
| "valid_targets_mean": 9771.9, | |
| "valid_targets_min": 2504 | |
| }, | |
| { | |
| "epoch": 5.254237288135593, | |
| "grad_norm": 0.4720410475372794, | |
| "learning_rate": 7.124834417457664e-06, | |
| "loss": 0.2499, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10337448865175247, | |
| "step": 3100, | |
| "valid_targets_mean": 6589.2, | |
| "valid_targets_min": 2630 | |
| }, | |
| { | |
| "epoch": 5.262711864406779, | |
| "grad_norm": 0.4165414378767662, | |
| "learning_rate": 7.060272754993051e-06, | |
| "loss": 0.2528, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10730390250682831, | |
| "step": 3105, | |
| "valid_targets_mean": 7689.1, | |
| "valid_targets_min": 3877 | |
| }, | |
| { | |
| "epoch": 5.271186440677966, | |
| "grad_norm": 0.4773925338374391, | |
| "learning_rate": 6.9959421814402494e-06, | |
| "loss": 0.2686, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09607205539941788, | |
| "step": 3110, | |
| "valid_targets_mean": 6103.0, | |
| "valid_targets_min": 2958 | |
| }, | |
| { | |
| "epoch": 5.279661016949152, | |
| "grad_norm": 0.405934436370515, | |
| "learning_rate": 6.931843845670647e-06, | |
| "loss": 0.2782, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18584570288658142, | |
| "step": 3115, | |
| "valid_targets_mean": 11376.5, | |
| "valid_targets_min": 6700 | |
| }, | |
| { | |
| "epoch": 5.288135593220339, | |
| "grad_norm": 0.40944718673488073, | |
| "learning_rate": 6.867978892408101e-06, | |
| "loss": 0.2812, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06526018679141998, | |
| "step": 3120, | |
| "valid_targets_mean": 5554.6, | |
| "valid_targets_min": 1909 | |
| }, | |
| { | |
| "epoch": 5.296610169491525, | |
| "grad_norm": 0.3969787353916221, | |
| "learning_rate": 6.804348462208548e-06, | |
| "loss": 0.251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14641955494880676, | |
| "step": 3125, | |
| "valid_targets_mean": 10160.4, | |
| "valid_targets_min": 4904 | |
| }, | |
| { | |
| "epoch": 5.305084745762712, | |
| "grad_norm": 0.40381438825051413, | |
| "learning_rate": 6.7409536914395866e-06, | |
| "loss": 0.255, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12536650896072388, | |
| "step": 3130, | |
| "valid_targets_mean": 8634.0, | |
| "valid_targets_min": 6440 | |
| }, | |
| { | |
| "epoch": 5.313559322033898, | |
| "grad_norm": 0.4407810032396024, | |
| "learning_rate": 6.677795712260206e-06, | |
| "loss": 0.2689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10065967589616776, | |
| "step": 3135, | |
| "valid_targets_mean": 6554.6, | |
| "valid_targets_min": 3413 | |
| }, | |
| { | |
| "epoch": 5.322033898305085, | |
| "grad_norm": 0.43225307653715944, | |
| "learning_rate": 6.6148756526005855e-06, | |
| "loss": 0.2621, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17093005776405334, | |
| "step": 3140, | |
| "valid_targets_mean": 7674.1, | |
| "valid_targets_min": 2705 | |
| }, | |
| { | |
| "epoch": 5.330508474576272, | |
| "grad_norm": 0.42168841714997635, | |
| "learning_rate": 6.552194636141909e-06, | |
| "loss": 0.2444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1504831612110138, | |
| "step": 3145, | |
| "valid_targets_mean": 8807.0, | |
| "valid_targets_min": 3609 | |
| }, | |
| { | |
| "epoch": 5.338983050847458, | |
| "grad_norm": 0.4721877133160371, | |
| "learning_rate": 6.489753782296315e-06, | |
| "loss": 0.2603, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1312541961669922, | |
| "step": 3150, | |
| "valid_targets_mean": 6869.5, | |
| "valid_targets_min": 2014 | |
| }, | |
| { | |
| "epoch": 5.3474576271186445, | |
| "grad_norm": 0.5627993433253363, | |
| "learning_rate": 6.427554206186939e-06, | |
| "loss": 0.2598, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12869901955127716, | |
| "step": 3155, | |
| "valid_targets_mean": 8781.0, | |
| "valid_targets_min": 3073 | |
| }, | |
| { | |
| "epoch": 5.3559322033898304, | |
| "grad_norm": 0.5975905344692939, | |
| "learning_rate": 6.3655970186279314e-06, | |
| "loss": 0.279, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15334156155586243, | |
| "step": 3160, | |
| "valid_targets_mean": 7974.1, | |
| "valid_targets_min": 4571 | |
| }, | |
| { | |
| "epoch": 5.364406779661017, | |
| "grad_norm": 0.4399240043257231, | |
| "learning_rate": 6.30388332610469e-06, | |
| "loss": 0.2482, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09615137428045273, | |
| "step": 3165, | |
| "valid_targets_mean": 6191.0, | |
| "valid_targets_min": 2960 | |
| }, | |
| { | |
| "epoch": 5.372881355932203, | |
| "grad_norm": 0.43555573486472715, | |
| "learning_rate": 6.242414230754044e-06, | |
| "loss": 0.2651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1269509643316269, | |
| "step": 3170, | |
| "valid_targets_mean": 7501.0, | |
| "valid_targets_min": 2635 | |
| }, | |
| { | |
| "epoch": 5.38135593220339, | |
| "grad_norm": 0.4707060650067185, | |
| "learning_rate": 6.181190830344601e-06, | |
| "loss": 0.2865, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14338397979736328, | |
| "step": 3175, | |
| "valid_targets_mean": 6411.2, | |
| "valid_targets_min": 3296 | |
| }, | |
| { | |
| "epoch": 5.389830508474576, | |
| "grad_norm": 0.42574272563993854, | |
| "learning_rate": 6.120214218257128e-06, | |
| "loss": 0.2553, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20924800634384155, | |
| "step": 3180, | |
| "valid_targets_mean": 9852.9, | |
| "valid_targets_min": 4562 | |
| }, | |
| { | |
| "epoch": 5.398305084745763, | |
| "grad_norm": 0.39823088488510644, | |
| "learning_rate": 6.059485483465048e-06, | |
| "loss": 0.2489, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12313079833984375, | |
| "step": 3185, | |
| "valid_targets_mean": 7619.0, | |
| "valid_targets_min": 3737 | |
| }, | |
| { | |
| "epoch": 5.406779661016949, | |
| "grad_norm": 0.9536002287476373, | |
| "learning_rate": 5.999005710514956e-06, | |
| "loss": 0.2446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09335184842348099, | |
| "step": 3190, | |
| "valid_targets_mean": 5375.0, | |
| "valid_targets_min": 1635 | |
| }, | |
| { | |
| "epoch": 5.415254237288136, | |
| "grad_norm": 0.431612528522527, | |
| "learning_rate": 5.938775979507287e-06, | |
| "loss": 0.2625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13442601263523102, | |
| "step": 3195, | |
| "valid_targets_mean": 7720.2, | |
| "valid_targets_min": 2911 | |
| }, | |
| { | |
| "epoch": 5.423728813559322, | |
| "grad_norm": 0.44860126013223406, | |
| "learning_rate": 5.878797366076994e-06, | |
| "loss": 0.2427, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12965793907642365, | |
| "step": 3200, | |
| "valid_targets_mean": 6444.4, | |
| "valid_targets_min": 2340 | |
| }, | |
| { | |
| "epoch": 5.432203389830509, | |
| "grad_norm": 0.42018341577045487, | |
| "learning_rate": 5.819070941374368e-06, | |
| "loss": 0.2481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14158044755458832, | |
| "step": 3205, | |
| "valid_targets_mean": 8188.9, | |
| "valid_targets_min": 3917 | |
| }, | |
| { | |
| "epoch": 5.440677966101695, | |
| "grad_norm": 0.48422403137134756, | |
| "learning_rate": 5.759597772045882e-06, | |
| "loss": 0.2805, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1876494586467743, | |
| "step": 3210, | |
| "valid_targets_mean": 9145.9, | |
| "valid_targets_min": 5285 | |
| }, | |
| { | |
| "epoch": 5.4491525423728815, | |
| "grad_norm": 0.46211420065830605, | |
| "learning_rate": 5.700378920215159e-06, | |
| "loss": 0.2735, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21201786398887634, | |
| "step": 3215, | |
| "valid_targets_mean": 7421.4, | |
| "valid_targets_min": 2476 | |
| }, | |
| { | |
| "epoch": 5.4576271186440675, | |
| "grad_norm": 0.4264912650604428, | |
| "learning_rate": 5.641415443463994e-06, | |
| "loss": 0.2615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1297842115163803, | |
| "step": 3220, | |
| "valid_targets_mean": 7719.5, | |
| "valid_targets_min": 1584 | |
| }, | |
| { | |
| "epoch": 5.466101694915254, | |
| "grad_norm": 0.5482676590741974, | |
| "learning_rate": 5.5827083948134876e-06, | |
| "loss": 0.2821, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11752267181873322, | |
| "step": 3225, | |
| "valid_targets_mean": 6575.1, | |
| "valid_targets_min": 1240 | |
| }, | |
| { | |
| "epoch": 5.47457627118644, | |
| "grad_norm": 0.5019414160677941, | |
| "learning_rate": 5.524258822705202e-06, | |
| "loss": 0.2297, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12094447016716003, | |
| "step": 3230, | |
| "valid_targets_mean": 7180.8, | |
| "valid_targets_min": 4446 | |
| }, | |
| { | |
| "epoch": 5.483050847457627, | |
| "grad_norm": 0.4037586289857663, | |
| "learning_rate": 5.466067770982482e-06, | |
| "loss": 0.2937, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10668241232633591, | |
| "step": 3235, | |
| "valid_targets_mean": 6875.6, | |
| "valid_targets_min": 2801 | |
| }, | |
| { | |
| "epoch": 5.491525423728813, | |
| "grad_norm": 0.4432879666748049, | |
| "learning_rate": 5.4081362788717625e-06, | |
| "loss": 0.2908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09486261010169983, | |
| "step": 3240, | |
| "valid_targets_mean": 5720.0, | |
| "valid_targets_min": 2827 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "grad_norm": 0.41525930005316086, | |
| "learning_rate": 5.350465380964076e-06, | |
| "loss": 0.2694, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12814128398895264, | |
| "step": 3245, | |
| "valid_targets_mean": 8286.4, | |
| "valid_targets_min": 3278 | |
| }, | |
| { | |
| "epoch": 5.508474576271187, | |
| "grad_norm": 0.3967241521552427, | |
| "learning_rate": 5.293056107196488e-06, | |
| "loss": 0.2819, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10649764537811279, | |
| "step": 3250, | |
| "valid_targets_mean": 9235.5, | |
| "valid_targets_min": 3054 | |
| }, | |
| { | |
| "epoch": 5.516949152542373, | |
| "grad_norm": 0.4104500751756487, | |
| "learning_rate": 5.235909482833797e-06, | |
| "loss": 0.2718, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07652749121189117, | |
| "step": 3255, | |
| "valid_targets_mean": 5837.8, | |
| "valid_targets_min": 3543 | |
| }, | |
| { | |
| "epoch": 5.52542372881356, | |
| "grad_norm": 0.4548802593937043, | |
| "learning_rate": 5.179026528450146e-06, | |
| "loss": 0.2637, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10874311625957489, | |
| "step": 3260, | |
| "valid_targets_mean": 6713.8, | |
| "valid_targets_min": 3128 | |
| }, | |
| { | |
| "epoch": 5.533898305084746, | |
| "grad_norm": 0.42626630165063617, | |
| "learning_rate": 5.1224082599108584e-06, | |
| "loss": 0.2444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11963523179292679, | |
| "step": 3265, | |
| "valid_targets_mean": 7900.5, | |
| "valid_targets_min": 4232 | |
| }, | |
| { | |
| "epoch": 5.5423728813559325, | |
| "grad_norm": 0.42226966542492606, | |
| "learning_rate": 5.066055688354246e-06, | |
| "loss": 0.2764, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1330696940422058, | |
| "step": 3270, | |
| "valid_targets_mean": 7405.2, | |
| "valid_targets_min": 3859 | |
| }, | |
| { | |
| "epoch": 5.5508474576271185, | |
| "grad_norm": 0.41559115903205107, | |
| "learning_rate": 5.0099698201735855e-06, | |
| "loss": 0.2615, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18021991848945618, | |
| "step": 3275, | |
| "valid_targets_mean": 9442.4, | |
| "valid_targets_min": 2425 | |
| }, | |
| { | |
| "epoch": 5.559322033898305, | |
| "grad_norm": 0.4473698575482938, | |
| "learning_rate": 4.954151656999122e-06, | |
| "loss": 0.2595, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18893000483512878, | |
| "step": 3280, | |
| "valid_targets_mean": 8873.5, | |
| "valid_targets_min": 2503 | |
| }, | |
| { | |
| "epoch": 5.567796610169491, | |
| "grad_norm": 0.4126655990570961, | |
| "learning_rate": 4.898602195680214e-06, | |
| "loss": 0.257, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17205744981765747, | |
| "step": 3285, | |
| "valid_targets_mean": 10166.2, | |
| "valid_targets_min": 2566 | |
| }, | |
| { | |
| "epoch": 5.576271186440678, | |
| "grad_norm": 0.4436803493070262, | |
| "learning_rate": 4.843322428267465e-06, | |
| "loss": 0.2574, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1269497573375702, | |
| "step": 3290, | |
| "valid_targets_mean": 7204.2, | |
| "valid_targets_min": 2145 | |
| }, | |
| { | |
| "epoch": 5.584745762711864, | |
| "grad_norm": 0.41757091681380304, | |
| "learning_rate": 4.788313341995096e-06, | |
| "loss": 0.3007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12728919088840485, | |
| "step": 3295, | |
| "valid_targets_mean": 7854.6, | |
| "valid_targets_min": 3467 | |
| }, | |
| { | |
| "epoch": 5.593220338983051, | |
| "grad_norm": 0.4333169695340451, | |
| "learning_rate": 4.733575919263238e-06, | |
| "loss": 0.2715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.197248175740242, | |
| "step": 3300, | |
| "valid_targets_mean": 9669.5, | |
| "valid_targets_min": 2530 | |
| }, | |
| { | |
| "epoch": 5.601694915254237, | |
| "grad_norm": 0.4217198882582936, | |
| "learning_rate": 4.679111137620442e-06, | |
| "loss": 0.2766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0994122177362442, | |
| "step": 3305, | |
| "valid_targets_mean": 7220.2, | |
| "valid_targets_min": 2826 | |
| }, | |
| { | |
| "epoch": 5.610169491525424, | |
| "grad_norm": 0.45922876277681324, | |
| "learning_rate": 4.624919969746171e-06, | |
| "loss": 0.2755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.143977552652359, | |
| "step": 3310, | |
| "valid_targets_mean": 7108.6, | |
| "valid_targets_min": 3254 | |
| }, | |
| { | |
| "epoch": 5.61864406779661, | |
| "grad_norm": 0.4497468341605269, | |
| "learning_rate": 4.571003383433481e-06, | |
| "loss": 0.2898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15260854363441467, | |
| "step": 3315, | |
| "valid_targets_mean": 7907.2, | |
| "valid_targets_min": 2748 | |
| }, | |
| { | |
| "epoch": 5.627118644067797, | |
| "grad_norm": 0.45353082867834693, | |
| "learning_rate": 4.517362341571687e-06, | |
| "loss": 0.2759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1295567750930786, | |
| "step": 3320, | |
| "valid_targets_mean": 7593.5, | |
| "valid_targets_min": 2355 | |
| }, | |
| { | |
| "epoch": 5.635593220338983, | |
| "grad_norm": 0.4119838194637205, | |
| "learning_rate": 4.463997802129221e-06, | |
| "loss": 0.3005, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1388033777475357, | |
| "step": 3325, | |
| "valid_targets_mean": 8385.2, | |
| "valid_targets_min": 4455 | |
| }, | |
| { | |
| "epoch": 5.6440677966101696, | |
| "grad_norm": 0.4140759472855946, | |
| "learning_rate": 4.410910718136454e-06, | |
| "loss": 0.2646, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15042278170585632, | |
| "step": 3330, | |
| "valid_targets_mean": 9002.2, | |
| "valid_targets_min": 2717 | |
| }, | |
| { | |
| "epoch": 5.652542372881356, | |
| "grad_norm": 0.4400439343964711, | |
| "learning_rate": 4.3581020376687566e-06, | |
| "loss": 0.2625, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10155346989631653, | |
| "step": 3335, | |
| "valid_targets_mean": 7740.9, | |
| "valid_targets_min": 3495 | |
| }, | |
| { | |
| "epoch": 5.661016949152542, | |
| "grad_norm": 0.45980924813766, | |
| "learning_rate": 4.305572703829495e-06, | |
| "loss": 0.2508, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11865295469760895, | |
| "step": 3340, | |
| "valid_targets_mean": 8350.5, | |
| "valid_targets_min": 4097 | |
| }, | |
| { | |
| "epoch": 5.669491525423728, | |
| "grad_norm": 0.47385129359550743, | |
| "learning_rate": 4.253323654733248e-06, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10272833704948425, | |
| "step": 3345, | |
| "valid_targets_mean": 6288.5, | |
| "valid_targets_min": 2754 | |
| }, | |
| { | |
| "epoch": 5.677966101694915, | |
| "grad_norm": 0.42022120557595954, | |
| "learning_rate": 4.201355823488999e-06, | |
| "loss": 0.2676, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14795033633708954, | |
| "step": 3350, | |
| "valid_targets_mean": 7618.1, | |
| "valid_targets_min": 2832 | |
| }, | |
| { | |
| "epoch": 5.686440677966102, | |
| "grad_norm": 0.43924218046367447, | |
| "learning_rate": 4.149670138183526e-06, | |
| "loss": 0.2512, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08661969006061554, | |
| "step": 3355, | |
| "valid_targets_mean": 6429.2, | |
| "valid_targets_min": 1434 | |
| }, | |
| { | |
| "epoch": 5.694915254237288, | |
| "grad_norm": 0.4633969763685704, | |
| "learning_rate": 4.098267521864772e-06, | |
| "loss": 0.2385, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1861276477575302, | |
| "step": 3360, | |
| "valid_targets_mean": 9499.4, | |
| "valid_targets_min": 2276 | |
| }, | |
| { | |
| "epoch": 5.703389830508475, | |
| "grad_norm": 0.45898066026283985, | |
| "learning_rate": 4.047148892525403e-06, | |
| "loss": 0.229, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1299194097518921, | |
| "step": 3365, | |
| "valid_targets_mean": 6270.8, | |
| "valid_targets_min": 2471 | |
| }, | |
| { | |
| "epoch": 5.711864406779661, | |
| "grad_norm": 0.43083386950054814, | |
| "learning_rate": 3.996315163086391e-06, | |
| "loss": 0.2581, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15286263823509216, | |
| "step": 3370, | |
| "valid_targets_mean": 8112.6, | |
| "valid_targets_min": 1794 | |
| }, | |
| { | |
| "epoch": 5.720338983050848, | |
| "grad_norm": 0.467520944139597, | |
| "learning_rate": 3.945767241380732e-06, | |
| "loss": 0.2898, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18376997113227844, | |
| "step": 3375, | |
| "valid_targets_mean": 7821.9, | |
| "valid_targets_min": 1652 | |
| }, | |
| { | |
| "epoch": 5.728813559322034, | |
| "grad_norm": 0.41645203978501605, | |
| "learning_rate": 3.895506030137195e-06, | |
| "loss": 0.2476, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12087373435497284, | |
| "step": 3380, | |
| "valid_targets_mean": 7218.1, | |
| "valid_targets_min": 3939 | |
| }, | |
| { | |
| "epoch": 5.737288135593221, | |
| "grad_norm": 0.41823635458160535, | |
| "learning_rate": 3.845532426964252e-06, | |
| "loss": 0.2501, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10257397592067719, | |
| "step": 3385, | |
| "valid_targets_mean": 6861.1, | |
| "valid_targets_min": 3023 | |
| }, | |
| { | |
| "epoch": 5.745762711864407, | |
| "grad_norm": 0.43232642787196573, | |
| "learning_rate": 3.79584732433399e-06, | |
| "loss": 0.2825, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12480692565441132, | |
| "step": 3390, | |
| "valid_targets_mean": 7853.9, | |
| "valid_targets_min": 2572 | |
| }, | |
| { | |
| "epoch": 5.754237288135593, | |
| "grad_norm": 0.49325336882654114, | |
| "learning_rate": 3.746451609566233e-06, | |
| "loss": 0.267, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24762919545173645, | |
| "step": 3395, | |
| "valid_targets_mean": 9721.4, | |
| "valid_targets_min": 5420 | |
| }, | |
| { | |
| "epoch": 5.762711864406779, | |
| "grad_norm": 0.4212695658266242, | |
| "learning_rate": 3.697346164812643e-06, | |
| "loss": 0.2424, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11336623877286911, | |
| "step": 3400, | |
| "valid_targets_mean": 7380.5, | |
| "valid_targets_min": 3715 | |
| }, | |
| { | |
| "epoch": 5.771186440677966, | |
| "grad_norm": 0.4588086641728118, | |
| "learning_rate": 3.6485318670409896e-06, | |
| "loss": 0.2611, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10371188074350357, | |
| "step": 3405, | |
| "valid_targets_mean": 6874.6, | |
| "valid_targets_min": 2581 | |
| }, | |
| { | |
| "epoch": 5.779661016949152, | |
| "grad_norm": 0.44398724002047824, | |
| "learning_rate": 3.6000095880194905e-06, | |
| "loss": 0.2979, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11031177639961243, | |
| "step": 3410, | |
| "valid_targets_mean": 7567.9, | |
| "valid_targets_min": 4255 | |
| }, | |
| { | |
| "epoch": 5.788135593220339, | |
| "grad_norm": 0.42924629006736004, | |
| "learning_rate": 3.5517801943012443e-06, | |
| "loss": 0.2643, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1974351704120636, | |
| "step": 3415, | |
| "valid_targets_mean": 9567.6, | |
| "valid_targets_min": 4475 | |
| }, | |
| { | |
| "epoch": 5.796610169491525, | |
| "grad_norm": 0.46016588572316125, | |
| "learning_rate": 3.5038445472087324e-06, | |
| "loss": 0.2755, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13061898946762085, | |
| "step": 3420, | |
| "valid_targets_mean": 7301.4, | |
| "valid_targets_min": 2394 | |
| }, | |
| { | |
| "epoch": 5.805084745762712, | |
| "grad_norm": 0.4058426308160901, | |
| "learning_rate": 3.456203502818476e-06, | |
| "loss": 0.2596, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11977847665548325, | |
| "step": 3425, | |
| "valid_targets_mean": 7847.4, | |
| "valid_targets_min": 3901 | |
| }, | |
| { | |
| "epoch": 5.813559322033898, | |
| "grad_norm": 0.48970336832788647, | |
| "learning_rate": 3.408857911945702e-06, | |
| "loss": 0.2906, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18240872025489807, | |
| "step": 3430, | |
| "valid_targets_mean": 9729.4, | |
| "valid_targets_min": 2322 | |
| }, | |
| { | |
| "epoch": 5.822033898305085, | |
| "grad_norm": 0.39563623240144213, | |
| "learning_rate": 3.36180862012919e-06, | |
| "loss": 0.265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20780718326568604, | |
| "step": 3435, | |
| "valid_targets_mean": 11158.8, | |
| "valid_targets_min": 3305 | |
| }, | |
| { | |
| "epoch": 5.830508474576272, | |
| "grad_norm": 0.49787796279197905, | |
| "learning_rate": 3.3150564676161402e-06, | |
| "loss": 0.2354, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10818520188331604, | |
| "step": 3440, | |
| "valid_targets_mean": 5966.9, | |
| "valid_targets_min": 3336 | |
| }, | |
| { | |
| "epoch": 5.838983050847458, | |
| "grad_norm": 0.43084579663109984, | |
| "learning_rate": 3.268602289347185e-06, | |
| "loss": 0.2569, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19690270721912384, | |
| "step": 3445, | |
| "valid_targets_mean": 11298.9, | |
| "valid_targets_min": 7754 | |
| }, | |
| { | |
| "epoch": 5.847457627118644, | |
| "grad_norm": 0.4218069624746767, | |
| "learning_rate": 3.222446914941468e-06, | |
| "loss": 0.2746, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17221492528915405, | |
| "step": 3450, | |
| "valid_targets_mean": 8413.1, | |
| "valid_targets_min": 3225 | |
| }, | |
| { | |
| "epoch": 5.8559322033898304, | |
| "grad_norm": 0.4671618570113744, | |
| "learning_rate": 3.176591168681851e-06, | |
| "loss": 0.2577, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13562491536140442, | |
| "step": 3455, | |
| "valid_targets_mean": 7066.1, | |
| "valid_targets_min": 2472 | |
| }, | |
| { | |
| "epoch": 5.864406779661017, | |
| "grad_norm": 0.4590320225196124, | |
| "learning_rate": 3.131035869500152e-06, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09656020998954773, | |
| "step": 3460, | |
| "valid_targets_mean": 5605.9, | |
| "valid_targets_min": 3660 | |
| }, | |
| { | |
| "epoch": 5.872881355932203, | |
| "grad_norm": 0.4238092707837205, | |
| "learning_rate": 3.085781830962564e-06, | |
| "loss": 0.26, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11015133559703827, | |
| "step": 3465, | |
| "valid_targets_mean": 6491.9, | |
| "valid_targets_min": 2791 | |
| }, | |
| { | |
| "epoch": 5.88135593220339, | |
| "grad_norm": 0.4388528153492548, | |
| "learning_rate": 3.0408298612550878e-06, | |
| "loss": 0.2349, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17622537910938263, | |
| "step": 3470, | |
| "valid_targets_mean": 8512.5, | |
| "valid_targets_min": 2873 | |
| }, | |
| { | |
| "epoch": 5.889830508474576, | |
| "grad_norm": 0.4306978345104024, | |
| "learning_rate": 2.996180763169132e-06, | |
| "loss": 0.2504, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09644504636526108, | |
| "step": 3475, | |
| "valid_targets_mean": 5898.6, | |
| "valid_targets_min": 1697 | |
| }, | |
| { | |
| "epoch": 5.898305084745763, | |
| "grad_norm": 0.4429192567068347, | |
| "learning_rate": 2.95183533408715e-06, | |
| "loss": 0.2562, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09832319617271423, | |
| "step": 3480, | |
| "valid_targets_mean": 6330.5, | |
| "valid_targets_min": 4248 | |
| }, | |
| { | |
| "epoch": 5.906779661016949, | |
| "grad_norm": 0.5042107811710015, | |
| "learning_rate": 2.907794365968413e-06, | |
| "loss": 0.2604, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14449542760849, | |
| "step": 3485, | |
| "valid_targets_mean": 7709.2, | |
| "valid_targets_min": 4466 | |
| }, | |
| { | |
| "epoch": 5.915254237288136, | |
| "grad_norm": 0.38111377370566013, | |
| "learning_rate": 2.864058645334853e-06, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1592138707637787, | |
| "step": 3490, | |
| "valid_targets_mean": 9714.1, | |
| "valid_targets_min": 4445 | |
| }, | |
| { | |
| "epoch": 5.923728813559322, | |
| "grad_norm": 0.4523598810623572, | |
| "learning_rate": 2.820628953257052e-06, | |
| "loss": 0.2723, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11317625641822815, | |
| "step": 3495, | |
| "valid_targets_mean": 6696.9, | |
| "valid_targets_min": 1438 | |
| }, | |
| { | |
| "epoch": 5.932203389830509, | |
| "grad_norm": 0.38284023698249975, | |
| "learning_rate": 2.7775060653402387e-06, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1514039784669876, | |
| "step": 3500, | |
| "valid_targets_mean": 9845.6, | |
| "valid_targets_min": 4052 | |
| }, | |
| { | |
| "epoch": 5.940677966101695, | |
| "grad_norm": 0.4164588979342514, | |
| "learning_rate": 2.7346907517104894e-06, | |
| "loss": 0.2939, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.209864541888237, | |
| "step": 3505, | |
| "valid_targets_mean": 11032.0, | |
| "valid_targets_min": 5443 | |
| }, | |
| { | |
| "epoch": 5.9491525423728815, | |
| "grad_norm": 0.3835673805693966, | |
| "learning_rate": 2.692183777000932e-06, | |
| "loss": 0.2667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12400268763303757, | |
| "step": 3510, | |
| "valid_targets_mean": 9057.1, | |
| "valid_targets_min": 2882 | |
| }, | |
| { | |
| "epoch": 5.9576271186440675, | |
| "grad_norm": 0.48693557523835473, | |
| "learning_rate": 2.64998590033813e-06, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09107373654842377, | |
| "step": 3515, | |
| "valid_targets_mean": 5536.4, | |
| "valid_targets_min": 1342 | |
| }, | |
| { | |
| "epoch": 5.966101694915254, | |
| "grad_norm": 0.46983127864689195, | |
| "learning_rate": 2.608097875328488e-06, | |
| "loss": 0.2467, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08910076320171356, | |
| "step": 3520, | |
| "valid_targets_mean": 4977.9, | |
| "valid_targets_min": 1896 | |
| }, | |
| { | |
| "epoch": 5.97457627118644, | |
| "grad_norm": 0.45050680482743777, | |
| "learning_rate": 2.5665204500448137e-06, | |
| "loss": 0.2483, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06933078914880753, | |
| "step": 3525, | |
| "valid_targets_mean": 4338.5, | |
| "valid_targets_min": 1973 | |
| }, | |
| { | |
| "epoch": 5.983050847457627, | |
| "grad_norm": 0.449771116335777, | |
| "learning_rate": 2.525254367012955e-06, | |
| "loss": 0.2675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17051050066947937, | |
| "step": 3530, | |
| "valid_targets_mean": 8545.4, | |
| "valid_targets_min": 3602 | |
| }, | |
| { | |
| "epoch": 5.991525423728813, | |
| "grad_norm": 0.46300251369792844, | |
| "learning_rate": 2.4843003631985486e-06, | |
| "loss": 0.2955, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07504920661449432, | |
| "step": 3535, | |
| "valid_targets_mean": 5244.5, | |
| "valid_targets_min": 3387 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.4255488380121361, | |
| "learning_rate": 2.4436591699938305e-06, | |
| "loss": 0.2468, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10396578907966614, | |
| "step": 3540, | |
| "valid_targets_mean": 8307.2, | |
| "valid_targets_min": 3542 | |
| }, | |
| { | |
| "epoch": 6.008474576271187, | |
| "grad_norm": 0.4079638692535048, | |
| "learning_rate": 2.403331513204612e-06, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.132585346698761, | |
| "step": 3545, | |
| "valid_targets_mean": 8225.9, | |
| "valid_targets_min": 3132 | |
| }, | |
| { | |
| "epoch": 6.016949152542373, | |
| "grad_norm": 0.42095335043120474, | |
| "learning_rate": 2.363318113037283e-06, | |
| "loss": 0.2426, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19237245619297028, | |
| "step": 3550, | |
| "valid_targets_mean": 8534.2, | |
| "valid_targets_min": 2336 | |
| }, | |
| { | |
| "epoch": 6.02542372881356, | |
| "grad_norm": 0.4247831459112695, | |
| "learning_rate": 2.323619684085976e-06, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10242437571287155, | |
| "step": 3555, | |
| "valid_targets_mean": 8372.6, | |
| "valid_targets_min": 3019 | |
| }, | |
| { | |
| "epoch": 6.033898305084746, | |
| "grad_norm": 0.5101711223801056, | |
| "learning_rate": 2.2842369353197858e-06, | |
| "loss": 0.2399, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10061325132846832, | |
| "step": 3560, | |
| "valid_targets_mean": 6458.5, | |
| "valid_targets_min": 3200 | |
| }, | |
| { | |
| "epoch": 6.0423728813559325, | |
| "grad_norm": 0.46951651616591455, | |
| "learning_rate": 2.2451705700701185e-06, | |
| "loss": 0.2441, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09043577313423157, | |
| "step": 3565, | |
| "valid_targets_mean": 6405.8, | |
| "valid_targets_min": 1973 | |
| }, | |
| { | |
| "epoch": 6.0508474576271185, | |
| "grad_norm": 0.43838570005499733, | |
| "learning_rate": 2.2064212860181258e-06, | |
| "loss": 0.2347, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1115138828754425, | |
| "step": 3570, | |
| "valid_targets_mean": 6562.5, | |
| "valid_targets_min": 2966 | |
| }, | |
| { | |
| "epoch": 6.059322033898305, | |
| "grad_norm": 0.4105990788568166, | |
| "learning_rate": 2.1679897751822532e-06, | |
| "loss": 0.245, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14000532031059265, | |
| "step": 3575, | |
| "valid_targets_mean": 8664.9, | |
| "valid_targets_min": 3036 | |
| }, | |
| { | |
| "epoch": 6.067796610169491, | |
| "grad_norm": 0.4000234466993258, | |
| "learning_rate": 2.1298767239058684e-06, | |
| "loss": 0.2781, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17599308490753174, | |
| "step": 3580, | |
| "valid_targets_mean": 10298.4, | |
| "valid_targets_min": 3167 | |
| }, | |
| { | |
| "epoch": 6.076271186440678, | |
| "grad_norm": 0.4630894417075392, | |
| "learning_rate": 2.0920828128450197e-06, | |
| "loss": 0.2412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16314397752285004, | |
| "step": 3585, | |
| "valid_targets_mean": 8534.8, | |
| "valid_targets_min": 4293 | |
| }, | |
| { | |
| "epoch": 6.084745762711864, | |
| "grad_norm": 0.4227879675646018, | |
| "learning_rate": 2.054608716956259e-06, | |
| "loss": 0.2497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1060272678732872, | |
| "step": 3590, | |
| "valid_targets_mean": 7418.1, | |
| "valid_targets_min": 1916 | |
| }, | |
| { | |
| "epoch": 6.093220338983051, | |
| "grad_norm": 0.4463726939777628, | |
| "learning_rate": 2.0174551054846158e-06, | |
| "loss": 0.2406, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11649863421916962, | |
| "step": 3595, | |
| "valid_targets_mean": 6681.9, | |
| "valid_targets_min": 2538 | |
| }, | |
| { | |
| "epoch": 6.101694915254237, | |
| "grad_norm": 0.44505007275052016, | |
| "learning_rate": 1.9806226419516195e-06, | |
| "loss": 0.2847, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10530796647071838, | |
| "step": 3600, | |
| "valid_targets_mean": 7766.4, | |
| "valid_targets_min": 5614 | |
| }, | |
| { | |
| "epoch": 6.110169491525424, | |
| "grad_norm": 0.38470113587130134, | |
| "learning_rate": 1.94411198414346e-06, | |
| "loss": 0.2665, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14323177933692932, | |
| "step": 3605, | |
| "valid_targets_mean": 10445.9, | |
| "valid_targets_min": 2694 | |
| }, | |
| { | |
| "epoch": 6.11864406779661, | |
| "grad_norm": 0.5125177576701928, | |
| "learning_rate": 1.9079237840992416e-06, | |
| "loss": 0.2332, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09224593639373779, | |
| "step": 3610, | |
| "valid_targets_mean": 6256.4, | |
| "valid_targets_min": 2591 | |
| }, | |
| { | |
| "epoch": 6.127118644067797, | |
| "grad_norm": 0.40141614645721524, | |
| "learning_rate": 1.8720586880993452e-06, | |
| "loss": 0.2753, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13916637003421783, | |
| "step": 3615, | |
| "valid_targets_mean": 8134.5, | |
| "valid_targets_min": 1875 | |
| }, | |
| { | |
| "epoch": 6.135593220338983, | |
| "grad_norm": 0.4648377214616805, | |
| "learning_rate": 1.8365173366538647e-06, | |
| "loss": 0.2984, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1737988442182541, | |
| "step": 3620, | |
| "valid_targets_mean": 8593.6, | |
| "valid_targets_min": 2950 | |
| }, | |
| { | |
| "epoch": 6.1440677966101696, | |
| "grad_norm": 0.3970149435933971, | |
| "learning_rate": 1.8013003644911987e-06, | |
| "loss": 0.2869, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1591392606496811, | |
| "step": 3625, | |
| "valid_targets_mean": 7797.2, | |
| "valid_targets_min": 2927 | |
| }, | |
| { | |
| "epoch": 6.1525423728813555, | |
| "grad_norm": 0.4329881802584791, | |
| "learning_rate": 1.7664084005466796e-06, | |
| "loss": 0.2586, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1466069221496582, | |
| "step": 3630, | |
| "valid_targets_mean": 7398.9, | |
| "valid_targets_min": 2493 | |
| }, | |
| { | |
| "epoch": 6.161016949152542, | |
| "grad_norm": 0.40044188924352225, | |
| "learning_rate": 1.7318420679513725e-06, | |
| "loss": 0.2675, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09767980873584747, | |
| "step": 3635, | |
| "valid_targets_mean": 7647.6, | |
| "valid_targets_min": 5285 | |
| }, | |
| { | |
| "epoch": 6.169491525423728, | |
| "grad_norm": 0.4666515135940152, | |
| "learning_rate": 1.6976019840209334e-06, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13460057973861694, | |
| "step": 3640, | |
| "valid_targets_mean": 6764.6, | |
| "valid_targets_min": 3017 | |
| }, | |
| { | |
| "epoch": 6.177966101694915, | |
| "grad_norm": 0.4310188952800369, | |
| "learning_rate": 1.6636887602445839e-06, | |
| "loss": 0.2494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14499178528785706, | |
| "step": 3645, | |
| "valid_targets_mean": 8610.5, | |
| "valid_targets_min": 4089 | |
| }, | |
| { | |
| "epoch": 6.186440677966102, | |
| "grad_norm": 0.47475101684614096, | |
| "learning_rate": 1.6301030022741837e-06, | |
| "loss": 0.265, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12736810743808746, | |
| "step": 3650, | |
| "valid_targets_mean": 6779.5, | |
| "valid_targets_min": 2699 | |
| }, | |
| { | |
| "epoch": 6.194915254237288, | |
| "grad_norm": 0.5568318024157459, | |
| "learning_rate": 1.596845309913444e-06, | |
| "loss": 0.2766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13458159565925598, | |
| "step": 3655, | |
| "valid_targets_mean": 6805.9, | |
| "valid_targets_min": 1820 | |
| }, | |
| { | |
| "epoch": 6.203389830508475, | |
| "grad_norm": 0.4321942421836466, | |
| "learning_rate": 1.5639162771071736e-06, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17277520895004272, | |
| "step": 3660, | |
| "valid_targets_mean": 8875.1, | |
| "valid_targets_min": 3888 | |
| }, | |
| { | |
| "epoch": 6.211864406779661, | |
| "grad_norm": 0.4459991928762019, | |
| "learning_rate": 1.5313164919307033e-06, | |
| "loss": 0.2631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11502149701118469, | |
| "step": 3665, | |
| "valid_targets_mean": 7180.1, | |
| "valid_targets_min": 4359 | |
| }, | |
| { | |
| "epoch": 6.220338983050848, | |
| "grad_norm": 0.42250806309461797, | |
| "learning_rate": 1.499046536579374e-06, | |
| "loss": 0.282, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17123055458068848, | |
| "step": 3670, | |
| "valid_targets_mean": 8993.6, | |
| "valid_targets_min": 4613 | |
| }, | |
| { | |
| "epoch": 6.228813559322034, | |
| "grad_norm": 0.4108820724486728, | |
| "learning_rate": 1.4671069873581312e-06, | |
| "loss": 0.2852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15709635615348816, | |
| "step": 3675, | |
| "valid_targets_mean": 9469.9, | |
| "valid_targets_min": 5070 | |
| }, | |
| { | |
| "epoch": 6.237288135593221, | |
| "grad_norm": 0.4437412702980212, | |
| "learning_rate": 1.4354984146712503e-06, | |
| "loss": 0.2632, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11063840240240097, | |
| "step": 3680, | |
| "valid_targets_mean": 6998.9, | |
| "valid_targets_min": 2213 | |
| }, | |
| { | |
| "epoch": 6.245762711864407, | |
| "grad_norm": 0.4733988446311304, | |
| "learning_rate": 1.4042213830121344e-06, | |
| "loss": 0.2515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14029532670974731, | |
| "step": 3685, | |
| "valid_targets_mean": 6649.2, | |
| "valid_targets_min": 3248 | |
| }, | |
| { | |
| "epoch": 6.254237288135593, | |
| "grad_norm": 0.38655289252851993, | |
| "learning_rate": 1.3732764509532316e-06, | |
| "loss": 0.2575, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13541369140148163, | |
| "step": 3690, | |
| "valid_targets_mean": 9996.2, | |
| "valid_targets_min": 5577 | |
| }, | |
| { | |
| "epoch": 6.262711864406779, | |
| "grad_norm": 0.4419355294886104, | |
| "learning_rate": 1.3426641711360788e-06, | |
| "loss": 0.2543, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12257914245128632, | |
| "step": 3695, | |
| "valid_targets_mean": 7234.6, | |
| "valid_targets_min": 3465 | |
| }, | |
| { | |
| "epoch": 6.271186440677966, | |
| "grad_norm": 0.4944609384643766, | |
| "learning_rate": 1.3123850902614143e-06, | |
| "loss": 0.2433, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09550976008176804, | |
| "step": 3700, | |
| "valid_targets_mean": 5357.8, | |
| "valid_targets_min": 2337 | |
| }, | |
| { | |
| "epoch": 6.279661016949152, | |
| "grad_norm": 0.458073682292656, | |
| "learning_rate": 1.2824397490794115e-06, | |
| "loss": 0.2791, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12735655903816223, | |
| "step": 3705, | |
| "valid_targets_mean": 8300.4, | |
| "valid_targets_min": 2948 | |
| }, | |
| { | |
| "epoch": 6.288135593220339, | |
| "grad_norm": 0.44512304664485036, | |
| "learning_rate": 1.2528286823800495e-06, | |
| "loss": 0.2496, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12010197341442108, | |
| "step": 3710, | |
| "valid_targets_mean": 6729.8, | |
| "valid_targets_min": 2551 | |
| }, | |
| { | |
| "epoch": 6.296610169491525, | |
| "grad_norm": 0.4241959010800955, | |
| "learning_rate": 1.223552418983518e-06, | |
| "loss": 0.2651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1606883406639099, | |
| "step": 3715, | |
| "valid_targets_mean": 8563.4, | |
| "valid_targets_min": 2422 | |
| }, | |
| { | |
| "epoch": 6.305084745762712, | |
| "grad_norm": 0.4458826979380267, | |
| "learning_rate": 1.1946114817308207e-06, | |
| "loss": 0.2453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.126150980591774, | |
| "step": 3720, | |
| "valid_targets_mean": 6960.0, | |
| "valid_targets_min": 2437 | |
| }, | |
| { | |
| "epoch": 6.313559322033898, | |
| "grad_norm": 0.39768023598211966, | |
| "learning_rate": 1.166006387474401e-06, | |
| "loss": 0.2429, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1836184561252594, | |
| "step": 3725, | |
| "valid_targets_mean": 10689.0, | |
| "valid_targets_min": 3353 | |
| }, | |
| { | |
| "epoch": 6.322033898305085, | |
| "grad_norm": 0.4787935493604884, | |
| "learning_rate": 1.1377376470689328e-06, | |
| "loss": 0.271, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12405265867710114, | |
| "step": 3730, | |
| "valid_targets_mean": 6661.1, | |
| "valid_targets_min": 3367 | |
| }, | |
| { | |
| "epoch": 6.330508474576272, | |
| "grad_norm": 0.513993676048688, | |
| "learning_rate": 1.1098057653621775e-06, | |
| "loss": 0.2535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16209466755390167, | |
| "step": 3735, | |
| "valid_targets_mean": 7460.2, | |
| "valid_targets_min": 3795 | |
| }, | |
| { | |
| "epoch": 6.338983050847458, | |
| "grad_norm": 0.45485260063869304, | |
| "learning_rate": 1.0822112411860042e-06, | |
| "loss": 0.2653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0905480831861496, | |
| "step": 3740, | |
| "valid_targets_mean": 5860.2, | |
| "valid_targets_min": 2406 | |
| }, | |
| { | |
| "epoch": 6.3474576271186445, | |
| "grad_norm": 0.47528112960496416, | |
| "learning_rate": 1.0549545673474304e-06, | |
| "loss": 0.2461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0847092866897583, | |
| "step": 3745, | |
| "valid_targets_mean": 6441.5, | |
| "valid_targets_min": 3400 | |
| }, | |
| { | |
| "epoch": 6.3559322033898304, | |
| "grad_norm": 0.417189000217903, | |
| "learning_rate": 1.0280362306198732e-06, | |
| "loss": 0.2609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1653599739074707, | |
| "step": 3750, | |
| "valid_targets_mean": 9874.0, | |
| "valid_targets_min": 3847 | |
| }, | |
| { | |
| "epoch": 6.364406779661017, | |
| "grad_norm": 0.44496662963253514, | |
| "learning_rate": 1.0014567117344121e-06, | |
| "loss": 0.2689, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14929382503032684, | |
| "step": 3755, | |
| "valid_targets_mean": 6836.2, | |
| "valid_targets_min": 2605 | |
| }, | |
| { | |
| "epoch": 6.372881355932203, | |
| "grad_norm": 0.4711935019321799, | |
| "learning_rate": 9.752164853712355e-07, | |
| "loss": 0.2432, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11264555901288986, | |
| "step": 3760, | |
| "valid_targets_mean": 7332.9, | |
| "valid_targets_min": 3422 | |
| }, | |
| { | |
| "epoch": 6.38135593220339, | |
| "grad_norm": 0.503630625795691, | |
| "learning_rate": 9.493160201511409e-07, | |
| "loss": 0.2658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11310052871704102, | |
| "step": 3765, | |
| "valid_targets_mean": 7008.2, | |
| "valid_targets_min": 1707 | |
| }, | |
| { | |
| "epoch": 6.389830508474576, | |
| "grad_norm": 0.5778340405809688, | |
| "learning_rate": 9.2375577862718e-07, | |
| "loss": 0.2638, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09299856424331665, | |
| "step": 3770, | |
| "valid_targets_mean": 6915.5, | |
| "valid_targets_min": 2350 | |
| }, | |
| { | |
| "epoch": 6.398305084745763, | |
| "grad_norm": 0.42295528500037055, | |
| "learning_rate": 8.985362172763933e-07, | |
| "loss": 0.2444, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09429045021533966, | |
| "step": 3775, | |
| "valid_targets_mean": 6875.8, | |
| "valid_targets_min": 2364 | |
| }, | |
| { | |
| "epoch": 6.406779661016949, | |
| "grad_norm": 0.39896345678565265, | |
| "learning_rate": 8.736577864916617e-07, | |
| "loss": 0.2497, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11996320635080338, | |
| "step": 3780, | |
| "valid_targets_mean": 7433.8, | |
| "valid_targets_min": 2417 | |
| }, | |
| { | |
| "epoch": 6.415254237288136, | |
| "grad_norm": 0.42165435770722925, | |
| "learning_rate": 8.491209305736525e-07, | |
| "loss": 0.2535, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13380880653858185, | |
| "step": 3785, | |
| "valid_targets_mean": 7627.2, | |
| "valid_targets_min": 2303 | |
| }, | |
| { | |
| "epoch": 6.423728813559322, | |
| "grad_norm": 0.4505139819346616, | |
| "learning_rate": 8.249260877228993e-07, | |
| "loss": 0.2862, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14228764176368713, | |
| "step": 3790, | |
| "valid_targets_mean": 8098.2, | |
| "valid_targets_min": 3002 | |
| }, | |
| { | |
| "epoch": 6.432203389830509, | |
| "grad_norm": 0.4321271164525556, | |
| "learning_rate": 8.010736900319615e-07, | |
| "loss": 0.2721, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09055182337760925, | |
| "step": 3795, | |
| "valid_targets_mean": 6403.5, | |
| "valid_targets_min": 2779 | |
| }, | |
| { | |
| "epoch": 6.440677966101695, | |
| "grad_norm": 0.4445613645730953, | |
| "learning_rate": 7.775641634777198e-07, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11796161532402039, | |
| "step": 3800, | |
| "valid_targets_mean": 7846.5, | |
| "valid_targets_min": 3128 | |
| }, | |
| { | |
| "epoch": 6.4491525423728815, | |
| "grad_norm": 0.4983181318128694, | |
| "learning_rate": 7.543979279137592e-07, | |
| "loss": 0.2583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11737058311700821, | |
| "step": 3805, | |
| "valid_targets_mean": 6364.1, | |
| "valid_targets_min": 2068 | |
| }, | |
| { | |
| "epoch": 6.4576271186440675, | |
| "grad_norm": 0.4637887645159089, | |
| "learning_rate": 7.315753970628825e-07, | |
| "loss": 0.2487, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09322785586118698, | |
| "step": 3810, | |
| "valid_targets_mean": 6619.9, | |
| "valid_targets_min": 2636 | |
| }, | |
| { | |
| "epoch": 6.466101694915254, | |
| "grad_norm": 0.40047532995209967, | |
| "learning_rate": 7.090969785097046e-07, | |
| "loss": 0.2647, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13654407858848572, | |
| "step": 3815, | |
| "valid_targets_mean": 9421.6, | |
| "valid_targets_min": 5966 | |
| }, | |
| { | |
| "epoch": 6.47457627118644, | |
| "grad_norm": 0.4390708368570101, | |
| "learning_rate": 6.869630736933963e-07, | |
| "loss": 0.242, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07607319951057434, | |
| "step": 3820, | |
| "valid_targets_mean": 6052.5, | |
| "valid_targets_min": 3828 | |
| }, | |
| { | |
| "epoch": 6.483050847457627, | |
| "grad_norm": 0.42364553715790104, | |
| "learning_rate": 6.651740779004878e-07, | |
| "loss": 0.2651, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1431792676448822, | |
| "step": 3825, | |
| "valid_targets_mean": 8558.6, | |
| "valid_targets_min": 2566 | |
| }, | |
| { | |
| "epoch": 6.491525423728813, | |
| "grad_norm": 0.47872145294990065, | |
| "learning_rate": 6.437303802578365e-07, | |
| "loss": 0.2624, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09658294916152954, | |
| "step": 3830, | |
| "valid_targets_mean": 5816.0, | |
| "valid_targets_min": 2772 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "grad_norm": 0.42695972686370715, | |
| "learning_rate": 6.226323637256592e-07, | |
| "loss": 0.2558, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09363295882940292, | |
| "step": 3835, | |
| "valid_targets_mean": 6977.9, | |
| "valid_targets_min": 2840 | |
| }, | |
| { | |
| "epoch": 6.508474576271187, | |
| "grad_norm": 0.448749456041159, | |
| "learning_rate": 6.018804050906957e-07, | |
| "loss": 0.259, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1705683469772339, | |
| "step": 3840, | |
| "valid_targets_mean": 9966.0, | |
| "valid_targets_min": 3703 | |
| }, | |
| { | |
| "epoch": 6.516949152542373, | |
| "grad_norm": 0.44110593342725196, | |
| "learning_rate": 5.814748749594845e-07, | |
| "loss": 0.2921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15280672907829285, | |
| "step": 3845, | |
| "valid_targets_mean": 7834.5, | |
| "valid_targets_min": 2385 | |
| }, | |
| { | |
| "epoch": 6.52542372881356, | |
| "grad_norm": 0.41011102728994003, | |
| "learning_rate": 5.614161377517491e-07, | |
| "loss": 0.2395, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10929296910762787, | |
| "step": 3850, | |
| "valid_targets_mean": 7395.1, | |
| "valid_targets_min": 2328 | |
| }, | |
| { | |
| "epoch": 6.533898305084746, | |
| "grad_norm": 0.37684976767116324, | |
| "learning_rate": 5.417045516938712e-07, | |
| "loss": 0.2607, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15998807549476624, | |
| "step": 3855, | |
| "valid_targets_mean": 11794.1, | |
| "valid_targets_min": 3559 | |
| }, | |
| { | |
| "epoch": 6.5423728813559325, | |
| "grad_norm": 0.4502091870017217, | |
| "learning_rate": 5.223404688125189e-07, | |
| "loss": 0.2652, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08542191982269287, | |
| "step": 3860, | |
| "valid_targets_mean": 5364.5, | |
| "valid_targets_min": 1501 | |
| }, | |
| { | |
| "epoch": 6.5508474576271185, | |
| "grad_norm": 0.4518810494268951, | |
| "learning_rate": 5.033242349283307e-07, | |
| "loss": 0.2461, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13581258058547974, | |
| "step": 3865, | |
| "valid_targets_mean": 7274.4, | |
| "valid_targets_min": 2654 | |
| }, | |
| { | |
| "epoch": 6.559322033898305, | |
| "grad_norm": 0.467029591171528, | |
| "learning_rate": 4.846561896497682e-07, | |
| "loss": 0.2871, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10872132331132889, | |
| "step": 3870, | |
| "valid_targets_mean": 7022.8, | |
| "valid_targets_min": 4600 | |
| }, | |
| { | |
| "epoch": 6.567796610169491, | |
| "grad_norm": 0.4409609140769841, | |
| "learning_rate": 4.6633666636703325e-07, | |
| "loss": 0.256, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11535359919071198, | |
| "step": 3875, | |
| "valid_targets_mean": 7674.4, | |
| "valid_targets_min": 4116 | |
| }, | |
| { | |
| "epoch": 6.576271186440678, | |
| "grad_norm": 0.42972659095916, | |
| "learning_rate": 4.4836599224611586e-07, | |
| "loss": 0.2518, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09469720721244812, | |
| "step": 3880, | |
| "valid_targets_mean": 7193.9, | |
| "valid_targets_min": 712 | |
| }, | |
| { | |
| "epoch": 6.584745762711864, | |
| "grad_norm": 0.45334276617935493, | |
| "learning_rate": 4.3074448822295165e-07, | |
| "loss": 0.266, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23430021107196808, | |
| "step": 3885, | |
| "valid_targets_mean": 11505.8, | |
| "valid_targets_min": 2009 | |
| }, | |
| { | |
| "epoch": 6.593220338983051, | |
| "grad_norm": 0.43929363356639645, | |
| "learning_rate": 4.1347246899770435e-07, | |
| "loss": 0.252, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16690164804458618, | |
| "step": 3890, | |
| "valid_targets_mean": 7407.8, | |
| "valid_targets_min": 3494 | |
| }, | |
| { | |
| "epoch": 6.601694915254237, | |
| "grad_norm": 0.4147188602054449, | |
| "learning_rate": 3.965502430291235e-07, | |
| "loss": 0.2583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16083547472953796, | |
| "step": 3895, | |
| "valid_targets_mean": 11111.1, | |
| "valid_targets_min": 5126 | |
| }, | |
| { | |
| "epoch": 6.610169491525424, | |
| "grad_norm": 0.455809497113382, | |
| "learning_rate": 3.7997811252905135e-07, | |
| "loss": 0.2462, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10246642678976059, | |
| "step": 3900, | |
| "valid_targets_mean": 6430.8, | |
| "valid_targets_min": 2509 | |
| }, | |
| { | |
| "epoch": 6.61864406779661, | |
| "grad_norm": 0.48481248655451537, | |
| "learning_rate": 3.6375637345701376e-07, | |
| "loss": 0.2921, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12926767766475677, | |
| "step": 3905, | |
| "valid_targets_mean": 8199.2, | |
| "valid_targets_min": 3433 | |
| }, | |
| { | |
| "epoch": 6.627118644067797, | |
| "grad_norm": 0.42600486230878915, | |
| "learning_rate": 3.4788531551495307e-07, | |
| "loss": 0.2337, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1657295525074005, | |
| "step": 3910, | |
| "valid_targets_mean": 9773.5, | |
| "valid_targets_min": 3644 | |
| }, | |
| { | |
| "epoch": 6.635593220338983, | |
| "grad_norm": 0.44131521564665016, | |
| "learning_rate": 3.323652221420326e-07, | |
| "loss": 0.2662, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11407070606946945, | |
| "step": 3915, | |
| "valid_targets_mean": 7225.0, | |
| "valid_targets_min": 3796 | |
| }, | |
| { | |
| "epoch": 6.6440677966101696, | |
| "grad_norm": 0.4377236209372105, | |
| "learning_rate": 3.1719637050958706e-07, | |
| "loss": 0.2443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16772839426994324, | |
| "step": 3920, | |
| "valid_targets_mean": 7740.8, | |
| "valid_targets_min": 2711 | |
| }, | |
| { | |
| "epoch": 6.652542372881356, | |
| "grad_norm": 0.48773453863500077, | |
| "learning_rate": 3.023790315161734e-07, | |
| "loss": 0.234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08592482656240463, | |
| "step": 3925, | |
| "valid_targets_mean": 5208.9, | |
| "valid_targets_min": 2740 | |
| }, | |
| { | |
| "epoch": 6.661016949152542, | |
| "grad_norm": 0.4720937825623047, | |
| "learning_rate": 2.8791346978273015e-07, | |
| "loss": 0.2457, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14076513051986694, | |
| "step": 3930, | |
| "valid_targets_mean": 7652.5, | |
| "valid_targets_min": 2939 | |
| }, | |
| { | |
| "epoch": 6.669491525423728, | |
| "grad_norm": 0.3859609875387887, | |
| "learning_rate": 2.7379994364784556e-07, | |
| "loss": 0.2748, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.149990051984787, | |
| "step": 3935, | |
| "valid_targets_mean": 10438.9, | |
| "valid_targets_min": 6415 | |
| }, | |
| { | |
| "epoch": 6.677966101694915, | |
| "grad_norm": 0.5550308272010621, | |
| "learning_rate": 2.60038705163157e-07, | |
| "loss": 0.2458, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08144932985305786, | |
| "step": 3940, | |
| "valid_targets_mean": 5604.2, | |
| "valid_targets_min": 3387 | |
| }, | |
| { | |
| "epoch": 6.686440677966102, | |
| "grad_norm": 0.3979338951489735, | |
| "learning_rate": 2.4663000008883664e-07, | |
| "loss": 0.2958, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16274738311767578, | |
| "step": 3945, | |
| "valid_targets_mean": 9629.6, | |
| "valid_targets_min": 3399 | |
| }, | |
| { | |
| "epoch": 6.694915254237288, | |
| "grad_norm": 0.46083448051365644, | |
| "learning_rate": 2.335740678892129e-07, | |
| "loss": 0.2563, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14789345860481262, | |
| "step": 3950, | |
| "valid_targets_mean": 7863.1, | |
| "valid_targets_min": 2423 | |
| }, | |
| { | |
| "epoch": 6.703389830508475, | |
| "grad_norm": 0.41684119058343516, | |
| "learning_rate": 2.20871141728487e-07, | |
| "loss": 0.2757, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2013491988182068, | |
| "step": 3955, | |
| "valid_targets_mean": 10909.6, | |
| "valid_targets_min": 3286 | |
| }, | |
| { | |
| "epoch": 6.711864406779661, | |
| "grad_norm": 0.40804130143632766, | |
| "learning_rate": 2.0852144846656764e-07, | |
| "loss": 0.254, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11485549807548523, | |
| "step": 3960, | |
| "valid_targets_mean": 7442.1, | |
| "valid_targets_min": 1795 | |
| }, | |
| { | |
| "epoch": 6.720338983050848, | |
| "grad_norm": 0.41669778757995984, | |
| "learning_rate": 1.9652520865502734e-07, | |
| "loss": 0.2667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11961879581212997, | |
| "step": 3965, | |
| "valid_targets_mean": 6853.5, | |
| "valid_targets_min": 2648 | |
| }, | |
| { | |
| "epoch": 6.728813559322034, | |
| "grad_norm": 0.4152131127329952, | |
| "learning_rate": 1.848826365331613e-07, | |
| "loss": 0.2728, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09015417098999023, | |
| "step": 3970, | |
| "valid_targets_mean": 6583.1, | |
| "valid_targets_min": 3468 | |
| }, | |
| { | |
| "epoch": 6.737288135593221, | |
| "grad_norm": 0.40715982611325285, | |
| "learning_rate": 1.7359394002415265e-07, | |
| "loss": 0.2537, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0808727890253067, | |
| "step": 3975, | |
| "valid_targets_mean": 7092.9, | |
| "valid_targets_min": 2970 | |
| }, | |
| { | |
| "epoch": 6.745762711864407, | |
| "grad_norm": 0.5044552290709712, | |
| "learning_rate": 1.626593207313709e-07, | |
| "loss": 0.2494, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08215104788541794, | |
| "step": 3980, | |
| "valid_targets_mean": 5966.9, | |
| "valid_targets_min": 2607 | |
| }, | |
| { | |
| "epoch": 6.754237288135593, | |
| "grad_norm": 0.43110120943383057, | |
| "learning_rate": 1.520789739347617e-07, | |
| "loss": 0.2726, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14837504923343658, | |
| "step": 3985, | |
| "valid_targets_mean": 8823.1, | |
| "valid_targets_min": 4021 | |
| }, | |
| { | |
| "epoch": 6.762711864406779, | |
| "grad_norm": 0.4055433819445956, | |
| "learning_rate": 1.4185308858737367e-07, | |
| "loss": 0.2864, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1624079942703247, | |
| "step": 3990, | |
| "valid_targets_mean": 7929.6, | |
| "valid_targets_min": 2855 | |
| }, | |
| { | |
| "epoch": 6.771186440677966, | |
| "grad_norm": 0.477581742178108, | |
| "learning_rate": 1.3198184731196162e-07, | |
| "loss": 0.2545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10930319130420685, | |
| "step": 3995, | |
| "valid_targets_mean": 7842.5, | |
| "valid_targets_min": 3884 | |
| }, | |
| { | |
| "epoch": 6.779661016949152, | |
| "grad_norm": 0.46632621619817743, | |
| "learning_rate": 1.2246542639774871e-07, | |
| "loss": 0.2893, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08825379610061646, | |
| "step": 4000, | |
| "valid_targets_mean": 5714.2, | |
| "valid_targets_min": 2503 | |
| }, | |
| { | |
| "epoch": 6.788135593220339, | |
| "grad_norm": 0.40822795025329855, | |
| "learning_rate": 1.1330399579725814e-07, | |
| "loss": 0.2502, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12100020796060562, | |
| "step": 4005, | |
| "valid_targets_mean": 7386.6, | |
| "valid_targets_min": 2520 | |
| }, | |
| { | |
| "epoch": 6.796610169491525, | |
| "grad_norm": 0.4869071996289484, | |
| "learning_rate": 1.0449771912329543e-07, | |
| "loss": 0.2679, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13639283180236816, | |
| "step": 4010, | |
| "valid_targets_mean": 6874.1, | |
| "valid_targets_min": 3308 | |
| }, | |
| { | |
| "epoch": 6.805084745762712, | |
| "grad_norm": 0.4514466969309618, | |
| "learning_rate": 9.604675364601079e-08, | |
| "loss": 0.2802, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16514557600021362, | |
| "step": 4015, | |
| "valid_targets_mean": 8557.4, | |
| "valid_targets_min": 2551 | |
| }, | |
| { | |
| "epoch": 6.813559322033898, | |
| "grad_norm": 0.45927796348806976, | |
| "learning_rate": 8.795125029010587e-08, | |
| "loss": 0.2344, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14747881889343262, | |
| "step": 4020, | |
| "valid_targets_mean": 7801.5, | |
| "valid_targets_min": 1934 | |
| }, | |
| { | |
| "epoch": 6.822033898305085, | |
| "grad_norm": 0.4736824137520606, | |
| "learning_rate": 8.021135363212252e-08, | |
| "loss": 0.2668, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1297898292541504, | |
| "step": 4025, | |
| "valid_targets_mean": 7571.8, | |
| "valid_targets_min": 3105 | |
| }, | |
| { | |
| "epoch": 6.830508474576272, | |
| "grad_norm": 0.44820578616508494, | |
| "learning_rate": 7.282720189787374e-08, | |
| "loss": 0.2582, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11103338748216629, | |
| "step": 4030, | |
| "valid_targets_mean": 7009.0, | |
| "valid_targets_min": 2167 | |
| }, | |
| { | |
| "epoch": 6.838983050847458, | |
| "grad_norm": 0.41724902537057335, | |
| "learning_rate": 6.579892695996571e-08, | |
| "loss": 0.2534, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11125913262367249, | |
| "step": 4035, | |
| "valid_targets_mean": 7157.2, | |
| "valid_targets_min": 4536 | |
| }, | |
| { | |
| "epoch": 6.847457627118644, | |
| "grad_norm": 0.43675935476999767, | |
| "learning_rate": 5.912665433545517e-08, | |
| "loss": 0.2585, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13949739933013916, | |
| "step": 4040, | |
| "valid_targets_mean": 7369.8, | |
| "valid_targets_min": 2319 | |
| }, | |
| { | |
| "epoch": 6.8559322033898304, | |
| "grad_norm": 0.3967356930940284, | |
| "learning_rate": 5.281050318358683e-08, | |
| "loss": 0.2403, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11958187818527222, | |
| "step": 4045, | |
| "valid_targets_mean": 9524.0, | |
| "valid_targets_min": 3222 | |
| }, | |
| { | |
| "epoch": 6.864406779661017, | |
| "grad_norm": 0.3764643187198624, | |
| "learning_rate": 4.685058630369055e-08, | |
| "loss": 0.2759, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14520995318889618, | |
| "step": 4050, | |
| "valid_targets_mean": 10037.6, | |
| "valid_targets_min": 3726 | |
| }, | |
| { | |
| "epoch": 6.872881355932203, | |
| "grad_norm": 0.4472913726578498, | |
| "learning_rate": 4.124701013314969e-08, | |
| "loss": 0.2816, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13814541697502136, | |
| "step": 4055, | |
| "valid_targets_mean": 8044.9, | |
| "valid_targets_min": 1559 | |
| }, | |
| { | |
| "epoch": 6.88135593220339, | |
| "grad_norm": 0.4304758387859473, | |
| "learning_rate": 3.599987474550259e-08, | |
| "loss": 0.2661, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17096751928329468, | |
| "step": 4060, | |
| "valid_targets_mean": 8318.6, | |
| "valid_targets_min": 2625 | |
| }, | |
| { | |
| "epoch": 6.889830508474576, | |
| "grad_norm": 0.4255133064645767, | |
| "learning_rate": 3.110927384865958e-08, | |
| "loss": 0.2771, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18236063420772552, | |
| "step": 4065, | |
| "valid_targets_mean": 10562.4, | |
| "valid_targets_min": 4446 | |
| }, | |
| { | |
| "epoch": 6.898305084745763, | |
| "grad_norm": 0.42579147928580874, | |
| "learning_rate": 2.6575294783230954e-08, | |
| "loss": 0.232, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0787440836429596, | |
| "step": 4070, | |
| "valid_targets_mean": 5780.5, | |
| "valid_targets_min": 2433 | |
| }, | |
| { | |
| "epoch": 6.906779661016949, | |
| "grad_norm": 0.41394744030559266, | |
| "learning_rate": 2.239801852095935e-08, | |
| "loss": 0.2446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.160111665725708, | |
| "step": 4075, | |
| "valid_targets_mean": 10663.0, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 6.915254237288136, | |
| "grad_norm": 0.4398804522507922, | |
| "learning_rate": 1.8577519663278697e-08, | |
| "loss": 0.2397, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11937999725341797, | |
| "step": 4080, | |
| "valid_targets_mean": 8688.0, | |
| "valid_targets_min": 4993 | |
| }, | |
| { | |
| "epoch": 6.923728813559322, | |
| "grad_norm": 0.4242647115439553, | |
| "learning_rate": 1.5113866439986357e-08, | |
| "loss": 0.2832, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16489309072494507, | |
| "step": 4085, | |
| "valid_targets_mean": 8224.9, | |
| "valid_targets_min": 2579 | |
| }, | |
| { | |
| "epoch": 6.932203389830509, | |
| "grad_norm": 0.4463993067738115, | |
| "learning_rate": 1.2007120708015241e-08, | |
| "loss": 0.2719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13091351091861725, | |
| "step": 4090, | |
| "valid_targets_mean": 6447.1, | |
| "valid_targets_min": 2442 | |
| }, | |
| { | |
| "epoch": 6.940677966101695, | |
| "grad_norm": 0.3869465593180131, | |
| "learning_rate": 9.257337950332456e-09, | |
| "loss": 0.2656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12474340200424194, | |
| "step": 4095, | |
| "valid_targets_mean": 8366.1, | |
| "valid_targets_min": 3401 | |
| }, | |
| { | |
| "epoch": 6.9491525423728815, | |
| "grad_norm": 0.4310417118559466, | |
| "learning_rate": 6.864567274955658e-09, | |
| "loss": 0.2695, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1725938618183136, | |
| "step": 4100, | |
| "valid_targets_mean": 8291.4, | |
| "valid_targets_min": 2433 | |
| }, | |
| { | |
| "epoch": 6.9576271186440675, | |
| "grad_norm": 0.495997210458029, | |
| "learning_rate": 4.828851414062641e-09, | |
| "loss": 0.2767, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15047688782215118, | |
| "step": 4105, | |
| "valid_targets_mean": 9112.9, | |
| "valid_targets_min": 4448 | |
| }, | |
| { | |
| "epoch": 6.966101694915254, | |
| "grad_norm": 0.40916895754360966, | |
| "learning_rate": 3.150226723243055e-09, | |
| "loss": 0.2685, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12793532013893127, | |
| "step": 4110, | |
| "valid_targets_mean": 8339.1, | |
| "valid_targets_min": 2695 | |
| }, | |
| { | |
| "epoch": 6.97457627118644, | |
| "grad_norm": 0.4136374005938968, | |
| "learning_rate": 1.8287231808322702e-09, | |
| "loss": 0.2591, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14656734466552734, | |
| "step": 4115, | |
| "valid_targets_mean": 10133.8, | |
| "valid_targets_min": 6907 | |
| }, | |
| { | |
| "epoch": 6.983050847457627, | |
| "grad_norm": 0.44077122285507164, | |
| "learning_rate": 8.643643873962326e-10, | |
| "loss": 0.2447, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10336880385875702, | |
| "step": 4120, | |
| "valid_targets_mean": 7540.5, | |
| "valid_targets_min": 4074 | |
| }, | |
| { | |
| "epoch": 6.991525423728813, | |
| "grad_norm": 1.279550969019676, | |
| "learning_rate": 2.5716756528959553e-10, | |
| "loss": 0.2545, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16964712738990784, | |
| "step": 4125, | |
| "valid_targets_mean": 9376.9, | |
| "valid_targets_min": 3622 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.42781761566450044, | |
| "learning_rate": 7.143558364841597e-12, | |
| "loss": 0.2477, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07686486095190048, | |
| "step": 4130, | |
| "valid_targets_mean": 6255.1, | |
| "valid_targets_min": 3251 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07686486095190048, | |
| "step": 4130, | |
| "total_flos": 2.65446196155043e+18, | |
| "train_loss": 0.30306281665335555, | |
| "train_runtime": 64515.1665, | |
| "train_samples_per_second": 1.024, | |
| "train_steps_per_second": 0.064, | |
| "valid_targets_mean": 6255.1, | |
| "valid_targets_min": 3251 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 4130, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.65446196155043e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |