diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..b517a18939acfb53322520eb24aba2000178b5b6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-8500/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-8600/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-8674/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6f94bad92d63cd881fe83a15f669364f09185457 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +library_name: peft +license: other +base_model: Qwen/Qwen2.5-VL-7B-Instruct +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +pipeline_tag: text-generation +model-index: +- name: Qwen2.5-VL-7B-diversifier-sft-valid + results: [] +--- + + + +# Qwen2.5-VL-7B-diversifier-sft-valid + +This model is a fine-tuned version of [Qwen/Qwen2.5-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct) on the diversifier_sft_valid dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 2e-06 +- train_batch_size: 2 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- gradient_accumulation_steps: 2 +- total_train_batch_size: 4 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 0.05 +- num_epochs: 2 + +### Training results + + + +### Framework versions + +- PEFT 0.18.1 +- Transformers 5.2.0 +- Pytorch 2.5.1+cu124 +- Datasets 4.0.0 +- Tokenizers 0.22.2 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f36e32e61c434af152644134a13070b69334e6c --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.0.mlp.up_proj", + "layers.16.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.4.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.19.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.18.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.8.mlp.down_proj", + "layers.7.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.6.mlp.down_proj", + "layers.27.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.12.mlp.down_proj", + "layers.24.mlp.down_proj", + "layers.17.mlp.gate_proj", + "layers.21.mlp.down_proj", + "layers.15.mlp.gate_proj", + "layers.11.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.10.mlp.up_proj", + "layers.4.mlp.gate_proj", + "layers.6.mlp.up_proj", + "layers.10.mlp.gate_proj", + "layers.26.mlp.up_proj", + "q_proj", + "layers.20.mlp.gate_proj", + "layers.19.mlp.down_proj", + "layers.2.mlp.gate_proj", + "layers.20.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.3.mlp.up_proj", + "layers.15.mlp.up_proj", + "layers.22.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.8.mlp.gate_proj", + "layers.0.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.0.mlp.gate_proj", + "layers.12.mlp.gate_proj", + "layers.2.mlp.down_proj", + "layers.17.mlp.up_proj", + "layers.25.mlp.up_proj", + "k_proj", + "layers.1.mlp.up_proj", + "layers.24.mlp.gate_proj", + "layers.9.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.5.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.9.mlp.up_proj", + "o_proj", + "layers.19.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.6.mlp.gate_proj", + "layers.12.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.16.mlp.gate_proj", + "layers.21.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.27.mlp.up_proj", + "layers.3.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.25.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.23.mlp.gate_proj", + "layers.10.mlp.down_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ebb12ed330789be5f2e1499163e626ac73edf85 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df8ac9086865872cf3b09183d585810d90e12118d7385364d0defec1f0d81db +size 40428088 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..96117cbcbd37faadb325e68d5cd0eadb0d1ef7da --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 2994036868841472.0, + "train_loss": 0.5227575608908595, + "train_runtime": 21685.2, + "train_samples_per_second": 1.6, + "train_steps_per_second": 0.4 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-8500/README.md b/checkpoint-8500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-8500/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-8500/adapter_config.json b/checkpoint-8500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f36e32e61c434af152644134a13070b69334e6c --- /dev/null +++ b/checkpoint-8500/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.0.mlp.up_proj", + "layers.16.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.4.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.19.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.18.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.8.mlp.down_proj", + "layers.7.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.6.mlp.down_proj", + "layers.27.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.12.mlp.down_proj", + "layers.24.mlp.down_proj", + "layers.17.mlp.gate_proj", + "layers.21.mlp.down_proj", + "layers.15.mlp.gate_proj", + "layers.11.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.10.mlp.up_proj", + "layers.4.mlp.gate_proj", + "layers.6.mlp.up_proj", + "layers.10.mlp.gate_proj", + "layers.26.mlp.up_proj", + "q_proj", + "layers.20.mlp.gate_proj", + "layers.19.mlp.down_proj", + "layers.2.mlp.gate_proj", + "layers.20.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.3.mlp.up_proj", + "layers.15.mlp.up_proj", + "layers.22.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.8.mlp.gate_proj", + "layers.0.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.0.mlp.gate_proj", + "layers.12.mlp.gate_proj", + "layers.2.mlp.down_proj", + "layers.17.mlp.up_proj", + "layers.25.mlp.up_proj", + "k_proj", + "layers.1.mlp.up_proj", + "layers.24.mlp.gate_proj", + "layers.9.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.5.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.9.mlp.up_proj", + "o_proj", + "layers.19.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.6.mlp.gate_proj", + "layers.12.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.16.mlp.gate_proj", + "layers.21.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.27.mlp.up_proj", + "layers.3.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.25.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.23.mlp.gate_proj", + "layers.10.mlp.down_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-8500/adapter_model.safetensors b/checkpoint-8500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e09f4134be0602a0c5ccf705287940694d2fbcda --- /dev/null +++ b/checkpoint-8500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c55302e3420c52605c0bacfb23d9b80949beaba56587a0d8276953099374f3 +size 40428088 diff --git a/checkpoint-8500/chat_template.jinja b/checkpoint-8500/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-8500/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-8500/global_step8500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-8500/global_step8500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b67a0db7786026cdfefbcfd57673f4f68e60e643 --- /dev/null +++ b/checkpoint-8500/global_step8500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b7bd07c2dc696938e91d8acb76d2137642d141c3a1ec585f47d0b6e4cef5a8 +size 242224880 diff --git a/checkpoint-8500/global_step8500/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-8500/global_step8500/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f2e65d015c5b20981505ca45f3436f33368caea --- /dev/null +++ b/checkpoint-8500/global_step8500/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8974a75b5d07f3ed0ce487bb7f3e8b36379d743de40566f2b8e7a6c8e0d1c168 +size 460630 diff --git a/checkpoint-8500/latest b/checkpoint-8500/latest new file mode 100644 index 0000000000000000000000000000000000000000..a450348b0838b4468f619777572a0bf08a822fa9 --- /dev/null +++ b/checkpoint-8500/latest @@ -0,0 +1 @@ +global_step8500 \ No newline at end of file diff --git a/checkpoint-8500/processor_config.json b/checkpoint-8500/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-8500/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-8500/rng_state.pth b/checkpoint-8500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fcf11c9b78de2c2c55fdfc44daef09cd9181c14 --- /dev/null +++ b/checkpoint-8500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc398a73e46bca50defc25b4467441315246a33383a5d6c80985d238e57127f +size 14244 diff --git a/checkpoint-8500/scheduler.pt b/checkpoint-8500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4467aed16cd96102d51149a7fdfcf5d58665be38 --- /dev/null +++ b/checkpoint-8500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a752fbbe1deaf98e87b4e650f24cb3577d134390f740503c5fddab44633e02da +size 1000 diff --git a/checkpoint-8500/tokenizer.json b/checkpoint-8500/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-8500/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-8500/tokenizer_config.json b/checkpoint-8500/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-8500/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-8500/trainer_state.json b/checkpoint-8500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..816bd60b89314ddc09bd922421457119f6db4382 --- /dev/null +++ b/checkpoint-8500/trainer_state.json @@ -0,0 +1,59534 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.959880101452617, + "eval_steps": 500, + "global_step": 8500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00023057412958266084, + "grad_norm": 0.5456158480642083, + "learning_rate": 0.0, + "loss": 1.2793785333633423, + "step": 1 + }, + { + "epoch": 0.0004611482591653217, + "grad_norm": 0.5348414425588685, + "learning_rate": 4.6082949308755755e-09, + "loss": 1.2810249328613281, + "step": 2 + }, + { + "epoch": 0.0006917223887479825, + "grad_norm": 0.5742665952103186, + "learning_rate": 9.216589861751151e-09, + "loss": 1.5180970430374146, + "step": 3 + }, + { + "epoch": 0.0009222965183306433, + "grad_norm": 0.47570843593061296, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.2771815061569214, + "step": 4 + }, + { + "epoch": 0.001152870647913304, + "grad_norm": 0.6179854753010914, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.6275714635849, + "step": 5 + }, + { + "epoch": 0.001383444777495965, + "grad_norm": 0.5728287935763549, + "learning_rate": 2.304147465437788e-08, + "loss": 1.4852838516235352, + "step": 6 + }, + { + "epoch": 0.0016140189070786258, + "grad_norm": 0.7402806033919309, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.3845010995864868, + "step": 7 + }, + { + "epoch": 0.0018445930366612867, + "grad_norm": 0.5357861516775319, + "learning_rate": 3.225806451612903e-08, + "loss": 1.2716574668884277, + "step": 8 + }, + { + "epoch": 0.0020751671662439476, + "grad_norm": 0.49378309074438254, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.4046194553375244, + "step": 9 + }, + { + "epoch": 0.002305741295826608, + "grad_norm": 0.5231726157264511, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.4988269805908203, + "step": 10 + }, + { + "epoch": 0.002536315425409269, + "grad_norm": 0.5469518790093721, + "learning_rate": 4.608294930875576e-08, + "loss": 1.3523340225219727, + "step": 11 + }, + { + "epoch": 0.00276688955499193, + "grad_norm": 0.5125117134786147, + "learning_rate": 5.069124423963134e-08, + "loss": 1.3664941787719727, + "step": 12 + }, + { + "epoch": 0.0029974636845745907, + "grad_norm": 0.5526794406387441, + "learning_rate": 5.529953917050691e-08, + "loss": 1.4892609119415283, + "step": 13 + }, + { + "epoch": 0.0032280378141572516, + "grad_norm": 0.5197262159341672, + "learning_rate": 5.990783410138249e-08, + "loss": 1.305836796760559, + "step": 14 + }, + { + "epoch": 0.0034586119437399125, + "grad_norm": 0.5214120337499729, + "learning_rate": 6.451612903225806e-08, + "loss": 1.3458774089813232, + "step": 15 + }, + { + "epoch": 0.0036891860733225734, + "grad_norm": 0.5249821302153419, + "learning_rate": 6.912442396313364e-08, + "loss": 1.4305222034454346, + "step": 16 + }, + { + "epoch": 0.003919760202905234, + "grad_norm": 0.48597332722440695, + "learning_rate": 7.373271889400921e-08, + "loss": 1.4247705936431885, + "step": 17 + }, + { + "epoch": 0.004150334332487895, + "grad_norm": 0.5492563451667527, + "learning_rate": 7.834101382488478e-08, + "loss": 1.4151098728179932, + "step": 18 + }, + { + "epoch": 0.004380908462070556, + "grad_norm": 0.4931832122178826, + "learning_rate": 8.294930875576037e-08, + "loss": 1.4633708000183105, + "step": 19 + }, + { + "epoch": 0.004611482591653216, + "grad_norm": 0.4601872454406169, + "learning_rate": 8.755760368663594e-08, + "loss": 1.2271082401275635, + "step": 20 + }, + { + "epoch": 0.004842056721235877, + "grad_norm": 0.5482366075993729, + "learning_rate": 9.216589861751152e-08, + "loss": 1.493757724761963, + "step": 21 + }, + { + "epoch": 0.005072630850818538, + "grad_norm": 0.5190439230451068, + "learning_rate": 9.677419354838709e-08, + "loss": 1.446916103363037, + "step": 22 + }, + { + "epoch": 0.005303204980401199, + "grad_norm": 0.5010656217784003, + "learning_rate": 1.0138248847926267e-07, + "loss": 1.4575269222259521, + "step": 23 + }, + { + "epoch": 0.00553377910998386, + "grad_norm": 0.5983934917725938, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.5000505447387695, + "step": 24 + }, + { + "epoch": 0.005764353239566521, + "grad_norm": 0.5264341016273323, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.32895827293396, + "step": 25 + }, + { + "epoch": 0.005994927369149181, + "grad_norm": 0.5507902323042685, + "learning_rate": 1.152073732718894e-07, + "loss": 1.479337215423584, + "step": 26 + }, + { + "epoch": 0.006225501498731842, + "grad_norm": 0.4597707182389027, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.3543293476104736, + "step": 27 + }, + { + "epoch": 0.006456075628314503, + "grad_norm": 0.4984681813259071, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.3075106143951416, + "step": 28 + }, + { + "epoch": 0.006686649757897164, + "grad_norm": 0.540668752320374, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.2077248096466064, + "step": 29 + }, + { + "epoch": 0.006917223887479825, + "grad_norm": 0.5053904313535789, + "learning_rate": 1.336405529953917e-07, + "loss": 1.2841781377792358, + "step": 30 + }, + { + "epoch": 0.0071477980170624855, + "grad_norm": 0.5007265235886551, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.4022557735443115, + "step": 31 + }, + { + "epoch": 0.007378372146645147, + "grad_norm": 0.5376464155945276, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.4971141815185547, + "step": 32 + }, + { + "epoch": 0.007608946276227807, + "grad_norm": 0.49485432736210644, + "learning_rate": 1.4746543778801842e-07, + "loss": 1.3699426651000977, + "step": 33 + }, + { + "epoch": 0.007839520405810468, + "grad_norm": 0.602690054138726, + "learning_rate": 1.52073732718894e-07, + "loss": 1.466570258140564, + "step": 34 + }, + { + "epoch": 0.008070094535393129, + "grad_norm": 0.544784030735669, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.3031455278396606, + "step": 35 + }, + { + "epoch": 0.00830066866497579, + "grad_norm": 0.5516628365932859, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.3989369869232178, + "step": 36 + }, + { + "epoch": 0.00853124279455845, + "grad_norm": 0.5375908894429152, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.41139817237854, + "step": 37 + }, + { + "epoch": 0.008761816924141111, + "grad_norm": 0.4923010186613349, + "learning_rate": 1.705069124423963e-07, + "loss": 1.305363655090332, + "step": 38 + }, + { + "epoch": 0.008992391053723773, + "grad_norm": 0.5782996548067549, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.3931915760040283, + "step": 39 + }, + { + "epoch": 0.009222965183306432, + "grad_norm": 0.5425552369520273, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.4728009700775146, + "step": 40 + }, + { + "epoch": 0.009453539312889093, + "grad_norm": 0.5162050268750099, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.4165544509887695, + "step": 41 + }, + { + "epoch": 0.009684113442471755, + "grad_norm": 0.509079818266607, + "learning_rate": 1.889400921658986e-07, + "loss": 1.3693115711212158, + "step": 42 + }, + { + "epoch": 0.009914687572054416, + "grad_norm": 0.5804116282906935, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.468721866607666, + "step": 43 + }, + { + "epoch": 0.010145261701637076, + "grad_norm": 0.5466645633601509, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.4732704162597656, + "step": 44 + }, + { + "epoch": 0.010375835831219737, + "grad_norm": 0.4534942899185725, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.2579209804534912, + "step": 45 + }, + { + "epoch": 0.010606409960802398, + "grad_norm": 0.4766380716605293, + "learning_rate": 2.073732718894009e-07, + "loss": 1.3587429523468018, + "step": 46 + }, + { + "epoch": 0.010836984090385058, + "grad_norm": 0.5409254453286721, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.368800401687622, + "step": 47 + }, + { + "epoch": 0.01106755821996772, + "grad_norm": 0.5103994243466702, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.2960132360458374, + "step": 48 + }, + { + "epoch": 0.01129813234955038, + "grad_norm": 0.47493679434319974, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.3035235404968262, + "step": 49 + }, + { + "epoch": 0.011528706479133042, + "grad_norm": 0.5271868916321076, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.5074443817138672, + "step": 50 + }, + { + "epoch": 0.011759280608715702, + "grad_norm": 0.5381217045242119, + "learning_rate": 2.304147465437788e-07, + "loss": 1.4689760208129883, + "step": 51 + }, + { + "epoch": 0.011989854738298363, + "grad_norm": 0.4629483381608022, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.3542251586914062, + "step": 52 + }, + { + "epoch": 0.012220428867881024, + "grad_norm": 0.4592532760230554, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.3521728515625, + "step": 53 + }, + { + "epoch": 0.012451002997463684, + "grad_norm": 0.5030837073491258, + "learning_rate": 2.442396313364055e-07, + "loss": 1.3577494621276855, + "step": 54 + }, + { + "epoch": 0.012681577127046345, + "grad_norm": 0.5438911836333451, + "learning_rate": 2.488479262672811e-07, + "loss": 1.459476351737976, + "step": 55 + }, + { + "epoch": 0.012912151256629006, + "grad_norm": 0.52516269169267, + "learning_rate": 2.534562211981567e-07, + "loss": 1.484410047531128, + "step": 56 + }, + { + "epoch": 0.013142725386211668, + "grad_norm": 0.5188914022486312, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.3589065074920654, + "step": 57 + }, + { + "epoch": 0.013373299515794327, + "grad_norm": 0.5619229477118247, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.3558262586593628, + "step": 58 + }, + { + "epoch": 0.013603873645376989, + "grad_norm": 0.5534574014271282, + "learning_rate": 2.672811059907834e-07, + "loss": 1.5165367126464844, + "step": 59 + }, + { + "epoch": 0.01383444777495965, + "grad_norm": 0.47598313164662104, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.3051776885986328, + "step": 60 + }, + { + "epoch": 0.01406502190454231, + "grad_norm": 0.45011107968146047, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.2916524410247803, + "step": 61 + }, + { + "epoch": 0.014295596034124971, + "grad_norm": 0.513792634149487, + "learning_rate": 2.8110599078341015e-07, + "loss": 1.440261721611023, + "step": 62 + }, + { + "epoch": 0.014526170163707632, + "grad_norm": 0.5424492375693261, + "learning_rate": 2.857142857142857e-07, + "loss": 1.3422625064849854, + "step": 63 + }, + { + "epoch": 0.014756744293290294, + "grad_norm": 0.4598784526258713, + "learning_rate": 2.903225806451613e-07, + "loss": 1.374439001083374, + "step": 64 + }, + { + "epoch": 0.014987318422872953, + "grad_norm": 0.5339252174305668, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.4382294416427612, + "step": 65 + }, + { + "epoch": 0.015217892552455614, + "grad_norm": 0.5302645203365586, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.3971002101898193, + "step": 66 + }, + { + "epoch": 0.015448466682038276, + "grad_norm": 0.5711144083332746, + "learning_rate": 3.04147465437788e-07, + "loss": 1.376272439956665, + "step": 67 + }, + { + "epoch": 0.015679040811620935, + "grad_norm": 0.5016109357973636, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.3135097026824951, + "step": 68 + }, + { + "epoch": 0.015909614941203597, + "grad_norm": 0.5041882505031982, + "learning_rate": 3.133640552995391e-07, + "loss": 1.2688875198364258, + "step": 69 + }, + { + "epoch": 0.016140189070786258, + "grad_norm": 0.544108037399583, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.4380691051483154, + "step": 70 + }, + { + "epoch": 0.01637076320036892, + "grad_norm": 0.5634345795303867, + "learning_rate": 3.225806451612903e-07, + "loss": 1.319260835647583, + "step": 71 + }, + { + "epoch": 0.01660133732995158, + "grad_norm": 0.5352869486400713, + "learning_rate": 3.271889400921659e-07, + "loss": 1.4083738327026367, + "step": 72 + }, + { + "epoch": 0.01683191145953424, + "grad_norm": 0.5524091199068598, + "learning_rate": 3.317972350230415e-07, + "loss": 1.4904775619506836, + "step": 73 + }, + { + "epoch": 0.0170624855891169, + "grad_norm": 0.5488563092854116, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.4534616470336914, + "step": 74 + }, + { + "epoch": 0.01729305971869956, + "grad_norm": 0.621117268365485, + "learning_rate": 3.410138248847926e-07, + "loss": 1.6545689105987549, + "step": 75 + }, + { + "epoch": 0.017523633848282223, + "grad_norm": 0.4834761822798673, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.2267192602157593, + "step": 76 + }, + { + "epoch": 0.017754207977864884, + "grad_norm": 0.5801091305703396, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.4207227230072021, + "step": 77 + }, + { + "epoch": 0.017984782107447545, + "grad_norm": 0.5253671028782199, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.4952092170715332, + "step": 78 + }, + { + "epoch": 0.018215356237030206, + "grad_norm": 0.4832223487637491, + "learning_rate": 3.594470046082949e-07, + "loss": 1.2932121753692627, + "step": 79 + }, + { + "epoch": 0.018445930366612864, + "grad_norm": 0.5623376259320272, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.3855851888656616, + "step": 80 + }, + { + "epoch": 0.018676504496195526, + "grad_norm": 0.45682252121341854, + "learning_rate": 3.686635944700461e-07, + "loss": 1.3645650148391724, + "step": 81 + }, + { + "epoch": 0.018907078625778187, + "grad_norm": 0.49579660369860507, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.322283387184143, + "step": 82 + }, + { + "epoch": 0.01913765275536085, + "grad_norm": 0.5177315365924456, + "learning_rate": 3.778801843317972e-07, + "loss": 1.3363629579544067, + "step": 83 + }, + { + "epoch": 0.01936822688494351, + "grad_norm": 0.616201260540867, + "learning_rate": 3.824884792626728e-07, + "loss": 1.553279161453247, + "step": 84 + }, + { + "epoch": 0.01959880101452617, + "grad_norm": 0.5198473540371843, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.4434814453125, + "step": 85 + }, + { + "epoch": 0.019829375144108832, + "grad_norm": 0.5923570018189629, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.5134285688400269, + "step": 86 + }, + { + "epoch": 0.02005994927369149, + "grad_norm": 0.5850924486743854, + "learning_rate": 3.963133640552995e-07, + "loss": 1.4244651794433594, + "step": 87 + }, + { + "epoch": 0.02029052340327415, + "grad_norm": 0.560105193358992, + "learning_rate": 4.009216589861751e-07, + "loss": 1.4571855068206787, + "step": 88 + }, + { + "epoch": 0.020521097532856813, + "grad_norm": 0.48108556089196525, + "learning_rate": 4.055299539170507e-07, + "loss": 1.2940685749053955, + "step": 89 + }, + { + "epoch": 0.020751671662439474, + "grad_norm": 0.5203979535892653, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.3537572622299194, + "step": 90 + }, + { + "epoch": 0.020982245792022135, + "grad_norm": 0.5791117780548783, + "learning_rate": 4.147465437788018e-07, + "loss": 1.524500846862793, + "step": 91 + }, + { + "epoch": 0.021212819921604797, + "grad_norm": 0.4890632694429427, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.4414368867874146, + "step": 92 + }, + { + "epoch": 0.021443394051187458, + "grad_norm": 0.49954451696473423, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.284010887145996, + "step": 93 + }, + { + "epoch": 0.021673968180770116, + "grad_norm": 0.6088073736973271, + "learning_rate": 4.285714285714285e-07, + "loss": 1.5901892185211182, + "step": 94 + }, + { + "epoch": 0.021904542310352777, + "grad_norm": 0.5856129890195899, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.4408211708068848, + "step": 95 + }, + { + "epoch": 0.02213511643993544, + "grad_norm": 0.49571353442310634, + "learning_rate": 4.377880184331797e-07, + "loss": 1.2293554544448853, + "step": 96 + }, + { + "epoch": 0.0223656905695181, + "grad_norm": 0.570508723127356, + "learning_rate": 4.423963133640553e-07, + "loss": 1.4144377708435059, + "step": 97 + }, + { + "epoch": 0.02259626469910076, + "grad_norm": 0.5952794755762669, + "learning_rate": 4.4700460829493084e-07, + "loss": 1.359034776687622, + "step": 98 + }, + { + "epoch": 0.022826838828683423, + "grad_norm": 0.5878914385748992, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.3299517631530762, + "step": 99 + }, + { + "epoch": 0.023057412958266084, + "grad_norm": 0.5039341997298462, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.3072423934936523, + "step": 100 + }, + { + "epoch": 0.023287987087848742, + "grad_norm": 0.6205508042108064, + "learning_rate": 4.608294930875576e-07, + "loss": 1.5683096647262573, + "step": 101 + }, + { + "epoch": 0.023518561217431403, + "grad_norm": 0.6300075069307655, + "learning_rate": 4.654377880184331e-07, + "loss": 1.6294015645980835, + "step": 102 + }, + { + "epoch": 0.023749135347014064, + "grad_norm": 0.5245849244619794, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.424511194229126, + "step": 103 + }, + { + "epoch": 0.023979709476596726, + "grad_norm": 0.5471205081131801, + "learning_rate": 4.746543778801843e-07, + "loss": 1.4169164896011353, + "step": 104 + }, + { + "epoch": 0.024210283606179387, + "grad_norm": 0.5854813174619509, + "learning_rate": 4.792626728110599e-07, + "loss": 1.3933480978012085, + "step": 105 + }, + { + "epoch": 0.02444085773576205, + "grad_norm": 0.6166413586526565, + "learning_rate": 4.838709677419355e-07, + "loss": 1.488750696182251, + "step": 106 + }, + { + "epoch": 0.02467143186534471, + "grad_norm": 0.6052025315612124, + "learning_rate": 4.88479262672811e-07, + "loss": 1.4852150678634644, + "step": 107 + }, + { + "epoch": 0.024902005994927368, + "grad_norm": 0.5750922845804657, + "learning_rate": 4.930875576036866e-07, + "loss": 1.4256765842437744, + "step": 108 + }, + { + "epoch": 0.02513258012451003, + "grad_norm": 0.5231547313189364, + "learning_rate": 4.976958525345622e-07, + "loss": 1.3063642978668213, + "step": 109 + }, + { + "epoch": 0.02536315425409269, + "grad_norm": 0.5734263022927267, + "learning_rate": 5.023041474654378e-07, + "loss": 1.549802303314209, + "step": 110 + }, + { + "epoch": 0.02559372838367535, + "grad_norm": 0.5041709928346361, + "learning_rate": 5.069124423963134e-07, + "loss": 1.301950454711914, + "step": 111 + }, + { + "epoch": 0.025824302513258013, + "grad_norm": 0.5567596794280206, + "learning_rate": 5.11520737327189e-07, + "loss": 1.3025325536727905, + "step": 112 + }, + { + "epoch": 0.026054876642840674, + "grad_norm": 0.5369405016436734, + "learning_rate": 5.161290322580645e-07, + "loss": 1.40749192237854, + "step": 113 + }, + { + "epoch": 0.026285450772423335, + "grad_norm": 0.5208396194792263, + "learning_rate": 5.2073732718894e-07, + "loss": 1.3216793537139893, + "step": 114 + }, + { + "epoch": 0.026516024902005993, + "grad_norm": 0.5052494958784187, + "learning_rate": 5.253456221198155e-07, + "loss": 1.3189308643341064, + "step": 115 + }, + { + "epoch": 0.026746599031588655, + "grad_norm": 0.5632602249643789, + "learning_rate": 5.299539170506912e-07, + "loss": 1.430384635925293, + "step": 116 + }, + { + "epoch": 0.026977173161171316, + "grad_norm": 0.5516062364182813, + "learning_rate": 5.345622119815668e-07, + "loss": 1.4081478118896484, + "step": 117 + }, + { + "epoch": 0.027207747290753977, + "grad_norm": 0.6385508559977366, + "learning_rate": 5.391705069124423e-07, + "loss": 1.434388518333435, + "step": 118 + }, + { + "epoch": 0.02743832142033664, + "grad_norm": 0.6138756203209041, + "learning_rate": 5.437788018433179e-07, + "loss": 1.4139282703399658, + "step": 119 + }, + { + "epoch": 0.0276688955499193, + "grad_norm": 0.5683069275087388, + "learning_rate": 5.483870967741935e-07, + "loss": 1.4511487483978271, + "step": 120 + }, + { + "epoch": 0.02789946967950196, + "grad_norm": 0.6423215590072974, + "learning_rate": 5.529953917050691e-07, + "loss": 1.5713481903076172, + "step": 121 + }, + { + "epoch": 0.02813004380908462, + "grad_norm": 0.5705917499340588, + "learning_rate": 5.576036866359447e-07, + "loss": 1.4315730333328247, + "step": 122 + }, + { + "epoch": 0.02836061793866728, + "grad_norm": 0.5316898536625556, + "learning_rate": 5.622119815668203e-07, + "loss": 1.3283708095550537, + "step": 123 + }, + { + "epoch": 0.028591192068249942, + "grad_norm": 0.6184222176453401, + "learning_rate": 5.668202764976958e-07, + "loss": 1.4329016208648682, + "step": 124 + }, + { + "epoch": 0.028821766197832603, + "grad_norm": 0.5872933055537319, + "learning_rate": 5.714285714285714e-07, + "loss": 1.444648265838623, + "step": 125 + }, + { + "epoch": 0.029052340327415264, + "grad_norm": 0.5205647887621043, + "learning_rate": 5.760368663594469e-07, + "loss": 1.3584785461425781, + "step": 126 + }, + { + "epoch": 0.029282914456997926, + "grad_norm": 0.5687232002808722, + "learning_rate": 5.806451612903226e-07, + "loss": 1.2815918922424316, + "step": 127 + }, + { + "epoch": 0.029513488586580587, + "grad_norm": 0.5252774303203537, + "learning_rate": 5.852534562211982e-07, + "loss": 1.3332037925720215, + "step": 128 + }, + { + "epoch": 0.029744062716163245, + "grad_norm": 0.5694649769044726, + "learning_rate": 5.898617511520737e-07, + "loss": 1.4522390365600586, + "step": 129 + }, + { + "epoch": 0.029974636845745906, + "grad_norm": 0.5607244925516301, + "learning_rate": 5.944700460829493e-07, + "loss": 1.4362024068832397, + "step": 130 + }, + { + "epoch": 0.030205210975328568, + "grad_norm": 0.5432906779366606, + "learning_rate": 5.990783410138249e-07, + "loss": 1.3271276950836182, + "step": 131 + }, + { + "epoch": 0.03043578510491123, + "grad_norm": 0.6175056690394787, + "learning_rate": 6.036866359447004e-07, + "loss": 1.5936369895935059, + "step": 132 + }, + { + "epoch": 0.03066635923449389, + "grad_norm": 0.5887629397700789, + "learning_rate": 6.08294930875576e-07, + "loss": 1.4786381721496582, + "step": 133 + }, + { + "epoch": 0.03089693336407655, + "grad_norm": 0.5490770556101789, + "learning_rate": 6.129032258064516e-07, + "loss": 1.3499064445495605, + "step": 134 + }, + { + "epoch": 0.031127507493659213, + "grad_norm": 0.583021664079577, + "learning_rate": 6.175115207373271e-07, + "loss": 1.4434795379638672, + "step": 135 + }, + { + "epoch": 0.03135808162324187, + "grad_norm": 0.6037371306112707, + "learning_rate": 6.221198156682027e-07, + "loss": 1.4064602851867676, + "step": 136 + }, + { + "epoch": 0.03158865575282453, + "grad_norm": 0.5005511365111003, + "learning_rate": 6.267281105990782e-07, + "loss": 1.3325507640838623, + "step": 137 + }, + { + "epoch": 0.03181922988240719, + "grad_norm": 0.516984621863849, + "learning_rate": 6.313364055299539e-07, + "loss": 1.2584879398345947, + "step": 138 + }, + { + "epoch": 0.032049804011989855, + "grad_norm": 0.5401703370709408, + "learning_rate": 6.359447004608295e-07, + "loss": 1.3754582405090332, + "step": 139 + }, + { + "epoch": 0.032280378141572516, + "grad_norm": 0.5773695778497429, + "learning_rate": 6.40552995391705e-07, + "loss": 1.2700412273406982, + "step": 140 + }, + { + "epoch": 0.03251095227115518, + "grad_norm": 0.580045410672373, + "learning_rate": 6.451612903225806e-07, + "loss": 1.395858645439148, + "step": 141 + }, + { + "epoch": 0.03274152640073784, + "grad_norm": 0.6146943532430481, + "learning_rate": 6.497695852534562e-07, + "loss": 1.402890682220459, + "step": 142 + }, + { + "epoch": 0.0329721005303205, + "grad_norm": 0.5736524878471048, + "learning_rate": 6.543778801843318e-07, + "loss": 1.5405397415161133, + "step": 143 + }, + { + "epoch": 0.03320267465990316, + "grad_norm": 0.5418174501474893, + "learning_rate": 6.589861751152074e-07, + "loss": 1.2394921779632568, + "step": 144 + }, + { + "epoch": 0.03343324878948582, + "grad_norm": 0.6276742940359161, + "learning_rate": 6.63594470046083e-07, + "loss": 1.453255295753479, + "step": 145 + }, + { + "epoch": 0.03366382291906848, + "grad_norm": 0.6191808042065741, + "learning_rate": 6.682027649769585e-07, + "loss": 1.3661112785339355, + "step": 146 + }, + { + "epoch": 0.03389439704865114, + "grad_norm": 0.5260230971069313, + "learning_rate": 6.728110599078341e-07, + "loss": 1.2952282428741455, + "step": 147 + }, + { + "epoch": 0.0341249711782338, + "grad_norm": 0.6693704726704671, + "learning_rate": 6.774193548387096e-07, + "loss": 1.396565318107605, + "step": 148 + }, + { + "epoch": 0.03435554530781646, + "grad_norm": 0.5881355966882998, + "learning_rate": 6.820276497695853e-07, + "loss": 1.3207082748413086, + "step": 149 + }, + { + "epoch": 0.03458611943739912, + "grad_norm": 0.5727010424261832, + "learning_rate": 6.866359447004608e-07, + "loss": 1.4085125923156738, + "step": 150 + }, + { + "epoch": 0.034816693566981784, + "grad_norm": 0.6667208730018341, + "learning_rate": 6.912442396313363e-07, + "loss": 1.5698528289794922, + "step": 151 + }, + { + "epoch": 0.035047267696564445, + "grad_norm": 0.5847511619477141, + "learning_rate": 6.958525345622119e-07, + "loss": 1.4091004133224487, + "step": 152 + }, + { + "epoch": 0.035277841826147106, + "grad_norm": 0.5143540253572731, + "learning_rate": 7.004608294930875e-07, + "loss": 1.2392504215240479, + "step": 153 + }, + { + "epoch": 0.03550841595572977, + "grad_norm": 0.6061996419355483, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3355891704559326, + "step": 154 + }, + { + "epoch": 0.03573899008531243, + "grad_norm": 0.5654677060773288, + "learning_rate": 7.096774193548387e-07, + "loss": 1.330599308013916, + "step": 155 + }, + { + "epoch": 0.03596956421489509, + "grad_norm": 0.5625277163359125, + "learning_rate": 7.142857142857143e-07, + "loss": 1.344653844833374, + "step": 156 + }, + { + "epoch": 0.03620013834447775, + "grad_norm": 0.5693935421186345, + "learning_rate": 7.188940092165898e-07, + "loss": 1.341560959815979, + "step": 157 + }, + { + "epoch": 0.03643071247406041, + "grad_norm": 0.5761507210889462, + "learning_rate": 7.235023041474654e-07, + "loss": 1.2242077589035034, + "step": 158 + }, + { + "epoch": 0.036661286603643074, + "grad_norm": 0.61477283253827, + "learning_rate": 7.281105990783409e-07, + "loss": 1.2858202457427979, + "step": 159 + }, + { + "epoch": 0.03689186073322573, + "grad_norm": 0.6410836439864531, + "learning_rate": 7.327188940092166e-07, + "loss": 1.479524850845337, + "step": 160 + }, + { + "epoch": 0.03712243486280839, + "grad_norm": 0.5918139936623208, + "learning_rate": 7.373271889400922e-07, + "loss": 1.43915855884552, + "step": 161 + }, + { + "epoch": 0.03735300899239105, + "grad_norm": 0.6478814183526712, + "learning_rate": 7.419354838709677e-07, + "loss": 1.3939034938812256, + "step": 162 + }, + { + "epoch": 0.03758358312197371, + "grad_norm": 0.6065250961726126, + "learning_rate": 7.465437788018433e-07, + "loss": 1.2733443975448608, + "step": 163 + }, + { + "epoch": 0.037814157251556374, + "grad_norm": 0.5670760124517911, + "learning_rate": 7.511520737327189e-07, + "loss": 1.3436474800109863, + "step": 164 + }, + { + "epoch": 0.038044731381139035, + "grad_norm": 0.622037546591312, + "learning_rate": 7.557603686635944e-07, + "loss": 1.4250465631484985, + "step": 165 + }, + { + "epoch": 0.0382753055107217, + "grad_norm": 0.607298640184171, + "learning_rate": 7.603686635944701e-07, + "loss": 1.4244422912597656, + "step": 166 + }, + { + "epoch": 0.03850587964030436, + "grad_norm": 0.6986289389542176, + "learning_rate": 7.649769585253457e-07, + "loss": 1.5487544536590576, + "step": 167 + }, + { + "epoch": 0.03873645376988702, + "grad_norm": 0.5793907792629099, + "learning_rate": 7.695852534562211e-07, + "loss": 1.3282281160354614, + "step": 168 + }, + { + "epoch": 0.03896702789946968, + "grad_norm": 0.5428953608010194, + "learning_rate": 7.741935483870967e-07, + "loss": 1.2823774814605713, + "step": 169 + }, + { + "epoch": 0.03919760202905234, + "grad_norm": 0.5889853233557574, + "learning_rate": 7.788018433179722e-07, + "loss": 1.2402329444885254, + "step": 170 + }, + { + "epoch": 0.039428176158635, + "grad_norm": 0.6219537569729359, + "learning_rate": 7.834101382488479e-07, + "loss": 1.3755587339401245, + "step": 171 + }, + { + "epoch": 0.039658750288217665, + "grad_norm": 0.5509851701904478, + "learning_rate": 7.880184331797235e-07, + "loss": 1.3403921127319336, + "step": 172 + }, + { + "epoch": 0.039889324417800326, + "grad_norm": 0.5971512014225002, + "learning_rate": 7.92626728110599e-07, + "loss": 1.3742129802703857, + "step": 173 + }, + { + "epoch": 0.04011989854738298, + "grad_norm": 0.7068161569826883, + "learning_rate": 7.972350230414746e-07, + "loss": 1.6444599628448486, + "step": 174 + }, + { + "epoch": 0.04035047267696564, + "grad_norm": 0.6019721571978455, + "learning_rate": 8.018433179723502e-07, + "loss": 1.3891929388046265, + "step": 175 + }, + { + "epoch": 0.0405810468065483, + "grad_norm": 0.5520157347061957, + "learning_rate": 8.064516129032257e-07, + "loss": 1.2279409170150757, + "step": 176 + }, + { + "epoch": 0.040811620936130964, + "grad_norm": 0.6346481492269727, + "learning_rate": 8.110599078341014e-07, + "loss": 1.4576997756958008, + "step": 177 + }, + { + "epoch": 0.041042195065713626, + "grad_norm": 0.612489332435889, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3585199117660522, + "step": 178 + }, + { + "epoch": 0.04127276919529629, + "grad_norm": 0.5908354773562909, + "learning_rate": 8.202764976958525e-07, + "loss": 1.3056905269622803, + "step": 179 + }, + { + "epoch": 0.04150334332487895, + "grad_norm": 0.5749600887070265, + "learning_rate": 8.248847926267281e-07, + "loss": 1.3029698133468628, + "step": 180 + }, + { + "epoch": 0.04173391745446161, + "grad_norm": 0.6598409427706357, + "learning_rate": 8.294930875576036e-07, + "loss": 1.4368736743927002, + "step": 181 + }, + { + "epoch": 0.04196449158404427, + "grad_norm": 0.5781034108869284, + "learning_rate": 8.341013824884793e-07, + "loss": 1.3243422508239746, + "step": 182 + }, + { + "epoch": 0.04219506571362693, + "grad_norm": 0.5206395827762466, + "learning_rate": 8.387096774193549e-07, + "loss": 1.232081413269043, + "step": 183 + }, + { + "epoch": 0.042425639843209594, + "grad_norm": 0.656527379150416, + "learning_rate": 8.433179723502303e-07, + "loss": 1.4601390361785889, + "step": 184 + }, + { + "epoch": 0.042656213972792255, + "grad_norm": 0.7159376690159417, + "learning_rate": 8.479262672811059e-07, + "loss": 1.3778860569000244, + "step": 185 + }, + { + "epoch": 0.042886788102374916, + "grad_norm": 0.590059263278645, + "learning_rate": 8.525345622119815e-07, + "loss": 1.3235092163085938, + "step": 186 + }, + { + "epoch": 0.04311736223195758, + "grad_norm": 0.6886704124574455, + "learning_rate": 8.57142857142857e-07, + "loss": 1.4480581283569336, + "step": 187 + }, + { + "epoch": 0.04334793636154023, + "grad_norm": 0.6346582437238362, + "learning_rate": 8.617511520737327e-07, + "loss": 1.4530816078186035, + "step": 188 + }, + { + "epoch": 0.04357851049112289, + "grad_norm": 0.6767670706852607, + "learning_rate": 8.663594470046083e-07, + "loss": 1.4447407722473145, + "step": 189 + }, + { + "epoch": 0.043809084620705555, + "grad_norm": 0.6049885392306779, + "learning_rate": 8.709677419354838e-07, + "loss": 1.3610244989395142, + "step": 190 + }, + { + "epoch": 0.044039658750288216, + "grad_norm": 0.6415008170468611, + "learning_rate": 8.755760368663594e-07, + "loss": 1.4084277153015137, + "step": 191 + }, + { + "epoch": 0.04427023287987088, + "grad_norm": 0.579530872526008, + "learning_rate": 8.801843317972349e-07, + "loss": 1.3652758598327637, + "step": 192 + }, + { + "epoch": 0.04450080700945354, + "grad_norm": 0.7106489880805067, + "learning_rate": 8.847926267281106e-07, + "loss": 1.4791496992111206, + "step": 193 + }, + { + "epoch": 0.0447313811390362, + "grad_norm": 0.6211187249917176, + "learning_rate": 8.894009216589862e-07, + "loss": 1.3958008289337158, + "step": 194 + }, + { + "epoch": 0.04496195526861886, + "grad_norm": 0.700016972508283, + "learning_rate": 8.940092165898617e-07, + "loss": 1.4134410619735718, + "step": 195 + }, + { + "epoch": 0.04519252939820152, + "grad_norm": 0.6911089974612981, + "learning_rate": 8.986175115207373e-07, + "loss": 1.4062776565551758, + "step": 196 + }, + { + "epoch": 0.045423103527784184, + "grad_norm": 0.6823334536756955, + "learning_rate": 9.032258064516129e-07, + "loss": 1.375224232673645, + "step": 197 + }, + { + "epoch": 0.045653677657366845, + "grad_norm": 0.6003343488972004, + "learning_rate": 9.078341013824884e-07, + "loss": 1.2440606355667114, + "step": 198 + }, + { + "epoch": 0.045884251786949506, + "grad_norm": 0.6737684280449967, + "learning_rate": 9.124423963133641e-07, + "loss": 1.4068349599838257, + "step": 199 + }, + { + "epoch": 0.04611482591653217, + "grad_norm": 0.6181499859340271, + "learning_rate": 9.170506912442397e-07, + "loss": 1.3797581195831299, + "step": 200 + }, + { + "epoch": 0.04634540004611483, + "grad_norm": 0.6445170966825345, + "learning_rate": 9.216589861751152e-07, + "loss": 1.4441678524017334, + "step": 201 + }, + { + "epoch": 0.046575974175697483, + "grad_norm": 0.6677276378953197, + "learning_rate": 9.262672811059907e-07, + "loss": 1.4727370738983154, + "step": 202 + }, + { + "epoch": 0.046806548305280145, + "grad_norm": 0.7032332117559357, + "learning_rate": 9.308755760368662e-07, + "loss": 1.448495864868164, + "step": 203 + }, + { + "epoch": 0.047037122434862806, + "grad_norm": 0.674429398641426, + "learning_rate": 9.354838709677418e-07, + "loss": 1.3727293014526367, + "step": 204 + }, + { + "epoch": 0.04726769656444547, + "grad_norm": 0.6701259318687961, + "learning_rate": 9.400921658986175e-07, + "loss": 1.4234352111816406, + "step": 205 + }, + { + "epoch": 0.04749827069402813, + "grad_norm": 0.5974678653003657, + "learning_rate": 9.44700460829493e-07, + "loss": 1.2407056093215942, + "step": 206 + }, + { + "epoch": 0.04772884482361079, + "grad_norm": 0.672276356974357, + "learning_rate": 9.493087557603686e-07, + "loss": 1.3502311706542969, + "step": 207 + }, + { + "epoch": 0.04795941895319345, + "grad_norm": 0.7465400676066979, + "learning_rate": 9.539170506912442e-07, + "loss": 1.4618254899978638, + "step": 208 + }, + { + "epoch": 0.04818999308277611, + "grad_norm": 0.681303163705478, + "learning_rate": 9.585253456221198e-07, + "loss": 1.3624317646026611, + "step": 209 + }, + { + "epoch": 0.048420567212358774, + "grad_norm": 0.7608712138693399, + "learning_rate": 9.631336405529954e-07, + "loss": 1.512046456336975, + "step": 210 + }, + { + "epoch": 0.048651141341941435, + "grad_norm": 0.6018077766578277, + "learning_rate": 9.67741935483871e-07, + "loss": 1.2896164655685425, + "step": 211 + }, + { + "epoch": 0.0488817154715241, + "grad_norm": 0.7063578249182565, + "learning_rate": 9.723502304147466e-07, + "loss": 1.5507850646972656, + "step": 212 + }, + { + "epoch": 0.04911228960110676, + "grad_norm": 0.7081498572564182, + "learning_rate": 9.76958525345622e-07, + "loss": 1.425408124923706, + "step": 213 + }, + { + "epoch": 0.04934286373068942, + "grad_norm": 0.7025877080602252, + "learning_rate": 9.815668202764976e-07, + "loss": 1.347771406173706, + "step": 214 + }, + { + "epoch": 0.04957343786027208, + "grad_norm": 0.7201983919068122, + "learning_rate": 9.861751152073732e-07, + "loss": 1.4044904708862305, + "step": 215 + }, + { + "epoch": 0.049804011989854735, + "grad_norm": 0.7045020078596302, + "learning_rate": 9.907834101382488e-07, + "loss": 1.3507332801818848, + "step": 216 + }, + { + "epoch": 0.050034586119437396, + "grad_norm": 0.6820424993070572, + "learning_rate": 9.953917050691244e-07, + "loss": 1.3022946119308472, + "step": 217 + }, + { + "epoch": 0.05026516024902006, + "grad_norm": 0.6561516180690095, + "learning_rate": 1e-06, + "loss": 1.284754991531372, + "step": 218 + }, + { + "epoch": 0.05049573437860272, + "grad_norm": 0.6003085662526402, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.2985923290252686, + "step": 219 + }, + { + "epoch": 0.05072630850818538, + "grad_norm": 0.6214608767923379, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.3855717182159424, + "step": 220 + }, + { + "epoch": 0.05095688263776804, + "grad_norm": 0.675694738994849, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.357919692993164, + "step": 221 + }, + { + "epoch": 0.0511874567673507, + "grad_norm": 0.6736529895786637, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.2818949222564697, + "step": 222 + }, + { + "epoch": 0.051418030896933364, + "grad_norm": 0.6226203332882617, + "learning_rate": 1.023041474654378e-06, + "loss": 1.2488511800765991, + "step": 223 + }, + { + "epoch": 0.051648605026516026, + "grad_norm": 0.7420146271711324, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.3824148178100586, + "step": 224 + }, + { + "epoch": 0.05187917915609869, + "grad_norm": 0.6473939851836901, + "learning_rate": 1.032258064516129e-06, + "loss": 1.3114633560180664, + "step": 225 + }, + { + "epoch": 0.05210975328568135, + "grad_norm": 0.6372141360329365, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.272273063659668, + "step": 226 + }, + { + "epoch": 0.05234032741526401, + "grad_norm": 0.8216490037105428, + "learning_rate": 1.04147465437788e-06, + "loss": 1.5072649717330933, + "step": 227 + }, + { + "epoch": 0.05257090154484667, + "grad_norm": 0.7183581578734374, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.4087142944335938, + "step": 228 + }, + { + "epoch": 0.05280147567442933, + "grad_norm": 0.8332625481322393, + "learning_rate": 1.050691244239631e-06, + "loss": 1.4866605997085571, + "step": 229 + }, + { + "epoch": 0.05303204980401199, + "grad_norm": 0.6315632875144884, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.3377184867858887, + "step": 230 + }, + { + "epoch": 0.05326262393359465, + "grad_norm": 0.6695801561741619, + "learning_rate": 1.0599078341013825e-06, + "loss": 1.4009103775024414, + "step": 231 + }, + { + "epoch": 0.05349319806317731, + "grad_norm": 0.7832755910275336, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.4878556728363037, + "step": 232 + }, + { + "epoch": 0.05372377219275997, + "grad_norm": 0.7218421394327601, + "learning_rate": 1.0691244239631337e-06, + "loss": 1.4002021551132202, + "step": 233 + }, + { + "epoch": 0.05395434632234263, + "grad_norm": 0.6918832056192313, + "learning_rate": 1.073732718894009e-06, + "loss": 1.337146520614624, + "step": 234 + }, + { + "epoch": 0.05418492045192529, + "grad_norm": 0.7101215642172168, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.4084792137145996, + "step": 235 + }, + { + "epoch": 0.054415494581507955, + "grad_norm": 0.8413614642264606, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.4131449460983276, + "step": 236 + }, + { + "epoch": 0.054646068711090616, + "grad_norm": 0.6587637953772119, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.1869292259216309, + "step": 237 + }, + { + "epoch": 0.05487664284067328, + "grad_norm": 0.7608337119634553, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.3970961570739746, + "step": 238 + }, + { + "epoch": 0.05510721697025594, + "grad_norm": 0.7677503323555195, + "learning_rate": 1.096774193548387e-06, + "loss": 1.2682442665100098, + "step": 239 + }, + { + "epoch": 0.0553377910998386, + "grad_norm": 0.6546621813731868, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2983934879302979, + "step": 240 + }, + { + "epoch": 0.05556836522942126, + "grad_norm": 0.7451544478647047, + "learning_rate": 1.1059907834101382e-06, + "loss": 1.3980869054794312, + "step": 241 + }, + { + "epoch": 0.05579893935900392, + "grad_norm": 0.6116475273591584, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.3068631887435913, + "step": 242 + }, + { + "epoch": 0.056029513488586584, + "grad_norm": 0.7974654782353883, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.5353353023529053, + "step": 243 + }, + { + "epoch": 0.05626008761816924, + "grad_norm": 0.663054900024182, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.290163278579712, + "step": 244 + }, + { + "epoch": 0.0564906617477519, + "grad_norm": 0.6761997400626832, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.3671848773956299, + "step": 245 + }, + { + "epoch": 0.05672123587733456, + "grad_norm": 0.6294209937786865, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.3020408153533936, + "step": 246 + }, + { + "epoch": 0.05695181000691722, + "grad_norm": 0.7207247726421506, + "learning_rate": 1.1336405529953916e-06, + "loss": 1.3159775733947754, + "step": 247 + }, + { + "epoch": 0.057182384136499884, + "grad_norm": 0.6708051542823367, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.3163995742797852, + "step": 248 + }, + { + "epoch": 0.057412958266082545, + "grad_norm": 0.8019994049858626, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.5215930938720703, + "step": 249 + }, + { + "epoch": 0.057643532395665206, + "grad_norm": 0.6559479072990889, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.2870161533355713, + "step": 250 + }, + { + "epoch": 0.05787410652524787, + "grad_norm": 0.7147869966218979, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.2624198198318481, + "step": 251 + }, + { + "epoch": 0.05810468065483053, + "grad_norm": 0.7319832858668294, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.2778981924057007, + "step": 252 + }, + { + "epoch": 0.05833525478441319, + "grad_norm": 0.6564800467165074, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.1934442520141602, + "step": 253 + }, + { + "epoch": 0.05856582891399585, + "grad_norm": 0.7291335446235057, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.3840088844299316, + "step": 254 + }, + { + "epoch": 0.05879640304357851, + "grad_norm": 0.7017610521536986, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.373002290725708, + "step": 255 + }, + { + "epoch": 0.059026977173161174, + "grad_norm": 0.6853330554611681, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.3614685535430908, + "step": 256 + }, + { + "epoch": 0.059257551302743836, + "grad_norm": 0.7170055632885292, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.3525335788726807, + "step": 257 + }, + { + "epoch": 0.05948812543232649, + "grad_norm": 0.7471586447698318, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.3806469440460205, + "step": 258 + }, + { + "epoch": 0.05971869956190915, + "grad_norm": 0.7262354481718393, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.372736930847168, + "step": 259 + }, + { + "epoch": 0.05994927369149181, + "grad_norm": 0.7470794959515278, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.309061050415039, + "step": 260 + }, + { + "epoch": 0.060179847821074474, + "grad_norm": 0.7217295951903909, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.3500525951385498, + "step": 261 + }, + { + "epoch": 0.060410421950657135, + "grad_norm": 0.7498906773328822, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.4197357892990112, + "step": 262 + }, + { + "epoch": 0.0606409960802398, + "grad_norm": 0.9553336191863615, + "learning_rate": 1.207373271889401e-06, + "loss": 1.6454131603240967, + "step": 263 + }, + { + "epoch": 0.06087157020982246, + "grad_norm": 0.7361372249879211, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.269604206085205, + "step": 264 + }, + { + "epoch": 0.06110214433940512, + "grad_norm": 0.6596823046141973, + "learning_rate": 1.216589861751152e-06, + "loss": 1.2358057498931885, + "step": 265 + }, + { + "epoch": 0.06133271846898778, + "grad_norm": 0.7203751630823346, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.2713422775268555, + "step": 266 + }, + { + "epoch": 0.06156329259857044, + "grad_norm": 0.7033446179657081, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.225820779800415, + "step": 267 + }, + { + "epoch": 0.0617938667281531, + "grad_norm": 0.6900817599997362, + "learning_rate": 1.2304147465437787e-06, + "loss": 1.279617190361023, + "step": 268 + }, + { + "epoch": 0.062024440857735764, + "grad_norm": 0.6800159728233099, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.2081385850906372, + "step": 269 + }, + { + "epoch": 0.062255014987318426, + "grad_norm": 0.7378639399050563, + "learning_rate": 1.23963133640553e-06, + "loss": 1.3121249675750732, + "step": 270 + }, + { + "epoch": 0.06248558911690109, + "grad_norm": 0.7497904685097676, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.28495454788208, + "step": 271 + }, + { + "epoch": 0.06271616324648374, + "grad_norm": 0.7749777957183016, + "learning_rate": 1.248847926267281e-06, + "loss": 1.3837053775787354, + "step": 272 + }, + { + "epoch": 0.0629467373760664, + "grad_norm": 0.7210838772374344, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.2119230031967163, + "step": 273 + }, + { + "epoch": 0.06317731150564906, + "grad_norm": 0.7143072591295863, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.323190450668335, + "step": 274 + }, + { + "epoch": 0.06340788563523173, + "grad_norm": 0.7546501032980093, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.4300715923309326, + "step": 275 + }, + { + "epoch": 0.06363845976481439, + "grad_norm": 0.7154461007442852, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.1680996417999268, + "step": 276 + }, + { + "epoch": 0.06386903389439705, + "grad_norm": 0.8088364505140268, + "learning_rate": 1.271889400921659e-06, + "loss": 1.3980211019515991, + "step": 277 + }, + { + "epoch": 0.06409960802397971, + "grad_norm": 0.7801914373505492, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.40798020362854, + "step": 278 + }, + { + "epoch": 0.06433018215356237, + "grad_norm": 0.7237186405433459, + "learning_rate": 1.28110599078341e-06, + "loss": 1.2535033226013184, + "step": 279 + }, + { + "epoch": 0.06456075628314503, + "grad_norm": 0.7779219570683336, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.3866907358169556, + "step": 280 + }, + { + "epoch": 0.0647913304127277, + "grad_norm": 0.7036374523288562, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.1985647678375244, + "step": 281 + }, + { + "epoch": 0.06502190454231035, + "grad_norm": 0.8186126171093759, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.3741936683654785, + "step": 282 + }, + { + "epoch": 0.06525247867189302, + "grad_norm": 0.7795060457073558, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.3684422969818115, + "step": 283 + }, + { + "epoch": 0.06548305280147568, + "grad_norm": 0.7685811594695469, + "learning_rate": 1.304147465437788e-06, + "loss": 1.3792086839675903, + "step": 284 + }, + { + "epoch": 0.06571362693105834, + "grad_norm": 0.8541112738893439, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.3252873420715332, + "step": 285 + }, + { + "epoch": 0.065944201060641, + "grad_norm": 0.7272989570317888, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.1918525695800781, + "step": 286 + }, + { + "epoch": 0.06617477519022366, + "grad_norm": 0.8825171015262823, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.3760654926300049, + "step": 287 + }, + { + "epoch": 0.06640534931980632, + "grad_norm": 0.8100539272477522, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.3452839851379395, + "step": 288 + }, + { + "epoch": 0.06663592344938898, + "grad_norm": 0.7635396360128843, + "learning_rate": 1.327188940092166e-06, + "loss": 1.321220874786377, + "step": 289 + }, + { + "epoch": 0.06686649757897165, + "grad_norm": 0.724002123288283, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.222012996673584, + "step": 290 + }, + { + "epoch": 0.0670970717085543, + "grad_norm": 0.7939713970528558, + "learning_rate": 1.336405529953917e-06, + "loss": 1.3209044933319092, + "step": 291 + }, + { + "epoch": 0.06732764583813695, + "grad_norm": 0.834643855588948, + "learning_rate": 1.3410138248847927e-06, + "loss": 1.3250432014465332, + "step": 292 + }, + { + "epoch": 0.06755821996771962, + "grad_norm": 0.6522445861220314, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.1738805770874023, + "step": 293 + }, + { + "epoch": 0.06778879409730228, + "grad_norm": 0.7430324759377445, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.238675832748413, + "step": 294 + }, + { + "epoch": 0.06801936822688494, + "grad_norm": 0.6872443402637277, + "learning_rate": 1.354838709677419e-06, + "loss": 1.2162814140319824, + "step": 295 + }, + { + "epoch": 0.0682499423564676, + "grad_norm": 0.7451321254668013, + "learning_rate": 1.359447004608295e-06, + "loss": 1.2087210416793823, + "step": 296 + }, + { + "epoch": 0.06848051648605026, + "grad_norm": 0.7183129418570579, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.2657420635223389, + "step": 297 + }, + { + "epoch": 0.06871109061563292, + "grad_norm": 0.8828866176671843, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.496249794960022, + "step": 298 + }, + { + "epoch": 0.06894166474521558, + "grad_norm": 0.7852198432087445, + "learning_rate": 1.3732718894009217e-06, + "loss": 1.2698930501937866, + "step": 299 + }, + { + "epoch": 0.06917223887479824, + "grad_norm": 0.723866375282328, + "learning_rate": 1.377880184331797e-06, + "loss": 1.2088165283203125, + "step": 300 + }, + { + "epoch": 0.0694028130043809, + "grad_norm": 0.764377981893855, + "learning_rate": 1.3824884792626727e-06, + "loss": 1.392000436782837, + "step": 301 + }, + { + "epoch": 0.06963338713396357, + "grad_norm": 0.7252481501169622, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.366544485092163, + "step": 302 + }, + { + "epoch": 0.06986396126354623, + "grad_norm": 0.7900814443800929, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.3276031017303467, + "step": 303 + }, + { + "epoch": 0.07009453539312889, + "grad_norm": 0.7000339586583599, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1413768529891968, + "step": 304 + }, + { + "epoch": 0.07032510952271155, + "grad_norm": 0.7903483195817192, + "learning_rate": 1.400921658986175e-06, + "loss": 1.2958520650863647, + "step": 305 + }, + { + "epoch": 0.07055568365229421, + "grad_norm": 0.7651988170590107, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.3514549732208252, + "step": 306 + }, + { + "epoch": 0.07078625778187687, + "grad_norm": 0.767117117462576, + "learning_rate": 1.410138248847926e-06, + "loss": 1.332120418548584, + "step": 307 + }, + { + "epoch": 0.07101683191145954, + "grad_norm": 0.8380945550826328, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.282820463180542, + "step": 308 + }, + { + "epoch": 0.0712474060410422, + "grad_norm": 0.7478573370757386, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.3927665948867798, + "step": 309 + }, + { + "epoch": 0.07147798017062486, + "grad_norm": 0.7471336867744233, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.2459386587142944, + "step": 310 + }, + { + "epoch": 0.07170855430020752, + "grad_norm": 0.715680538211599, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.1996700763702393, + "step": 311 + }, + { + "epoch": 0.07193912842979018, + "grad_norm": 0.7466366577926873, + "learning_rate": 1.433179723502304e-06, + "loss": 1.1007883548736572, + "step": 312 + }, + { + "epoch": 0.07216970255937284, + "grad_norm": 0.6505103448142013, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.211327314376831, + "step": 313 + }, + { + "epoch": 0.0724002766889555, + "grad_norm": 0.7475198907178121, + "learning_rate": 1.4423963133640554e-06, + "loss": 1.314349889755249, + "step": 314 + }, + { + "epoch": 0.07263085081853816, + "grad_norm": 0.7782372886671983, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.2270662784576416, + "step": 315 + }, + { + "epoch": 0.07286142494812083, + "grad_norm": 0.7521500862086049, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.1802537441253662, + "step": 316 + }, + { + "epoch": 0.07309199907770349, + "grad_norm": 0.7684137773026678, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.275806188583374, + "step": 317 + }, + { + "epoch": 0.07332257320728615, + "grad_norm": 0.789590997753613, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.2713148593902588, + "step": 318 + }, + { + "epoch": 0.07355314733686881, + "grad_norm": 0.8345280857312554, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.3091093301773071, + "step": 319 + }, + { + "epoch": 0.07378372146645146, + "grad_norm": 0.7108154017524825, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.1274672746658325, + "step": 320 + }, + { + "epoch": 0.07401429559603412, + "grad_norm": 0.7137227522476419, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.236955165863037, + "step": 321 + }, + { + "epoch": 0.07424486972561678, + "grad_norm": 0.7825967305477171, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.2561366558074951, + "step": 322 + }, + { + "epoch": 0.07447544385519944, + "grad_norm": 0.7250730413423113, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.1229519844055176, + "step": 323 + }, + { + "epoch": 0.0747060179847821, + "grad_norm": 0.7688658143017724, + "learning_rate": 1.4884792626728112e-06, + "loss": 1.200115442276001, + "step": 324 + }, + { + "epoch": 0.07493659211436476, + "grad_norm": 0.7499295220603182, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1930850744247437, + "step": 325 + }, + { + "epoch": 0.07516716624394743, + "grad_norm": 0.8209913282027874, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.3204331398010254, + "step": 326 + }, + { + "epoch": 0.07539774037353009, + "grad_norm": 0.7429612395335268, + "learning_rate": 1.5023041474654377e-06, + "loss": 1.109247088432312, + "step": 327 + }, + { + "epoch": 0.07562831450311275, + "grad_norm": 0.7097388789784923, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.1239254474639893, + "step": 328 + }, + { + "epoch": 0.07585888863269541, + "grad_norm": 0.7867677832004493, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.22686767578125, + "step": 329 + }, + { + "epoch": 0.07608946276227807, + "grad_norm": 0.8425243281826544, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.2846856117248535, + "step": 330 + }, + { + "epoch": 0.07632003689186073, + "grad_norm": 0.7611030204070008, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.1720764636993408, + "step": 331 + }, + { + "epoch": 0.0765506110214434, + "grad_norm": 0.6783089545901869, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.05867338180542, + "step": 332 + }, + { + "epoch": 0.07678118515102605, + "grad_norm": 0.781197296597327, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.2652220726013184, + "step": 333 + }, + { + "epoch": 0.07701175928060872, + "grad_norm": 0.7674267376615101, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.1367218494415283, + "step": 334 + }, + { + "epoch": 0.07724233341019138, + "grad_norm": 0.7149265599125916, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.169439673423767, + "step": 335 + }, + { + "epoch": 0.07747290753977404, + "grad_norm": 0.8284832797024527, + "learning_rate": 1.543778801843318e-06, + "loss": 1.265104055404663, + "step": 336 + }, + { + "epoch": 0.0777034816693567, + "grad_norm": 0.6605498491920537, + "learning_rate": 1.5483870967741935e-06, + "loss": 1.059098243713379, + "step": 337 + }, + { + "epoch": 0.07793405579893936, + "grad_norm": 0.8255024678570093, + "learning_rate": 1.552995391705069e-06, + "loss": 1.0998419523239136, + "step": 338 + }, + { + "epoch": 0.07816462992852202, + "grad_norm": 0.8285993940213782, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.1361349821090698, + "step": 339 + }, + { + "epoch": 0.07839520405810468, + "grad_norm": 0.7677612111698353, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.1051890850067139, + "step": 340 + }, + { + "epoch": 0.07862577818768735, + "grad_norm": 0.8204078401725609, + "learning_rate": 1.5668202764976959e-06, + "loss": 1.1675043106079102, + "step": 341 + }, + { + "epoch": 0.07885635231727, + "grad_norm": 0.8428908363907526, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.180741786956787, + "step": 342 + }, + { + "epoch": 0.07908692644685267, + "grad_norm": 0.8559354133772745, + "learning_rate": 1.576036866359447e-06, + "loss": 1.241147518157959, + "step": 343 + }, + { + "epoch": 0.07931750057643533, + "grad_norm": 0.848204694935563, + "learning_rate": 1.5806451612903224e-06, + "loss": 1.2831401824951172, + "step": 344 + }, + { + "epoch": 0.07954807470601799, + "grad_norm": 0.7281233645086155, + "learning_rate": 1.585253456221198e-06, + "loss": 1.2328094244003296, + "step": 345 + }, + { + "epoch": 0.07977864883560065, + "grad_norm": 0.7932743453051899, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.296494960784912, + "step": 346 + }, + { + "epoch": 0.08000922296518331, + "grad_norm": 0.7368517201206619, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.1802153587341309, + "step": 347 + }, + { + "epoch": 0.08023979709476596, + "grad_norm": 0.8829436639082808, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.2387690544128418, + "step": 348 + }, + { + "epoch": 0.08047037122434862, + "grad_norm": 0.8002618721063425, + "learning_rate": 1.6036866359447004e-06, + "loss": 1.1307916641235352, + "step": 349 + }, + { + "epoch": 0.08070094535393128, + "grad_norm": 0.8185303488247757, + "learning_rate": 1.608294930875576e-06, + "loss": 1.117497444152832, + "step": 350 + }, + { + "epoch": 0.08093151948351394, + "grad_norm": 0.7524331692605707, + "learning_rate": 1.6129032258064514e-06, + "loss": 1.1360805034637451, + "step": 351 + }, + { + "epoch": 0.0811620936130966, + "grad_norm": 0.7626049955851422, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.1756231784820557, + "step": 352 + }, + { + "epoch": 0.08139266774267927, + "grad_norm": 0.7605864356179197, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.0260417461395264, + "step": 353 + }, + { + "epoch": 0.08162324187226193, + "grad_norm": 0.6949706544727091, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.0863536596298218, + "step": 354 + }, + { + "epoch": 0.08185381600184459, + "grad_norm": 0.7427032746567218, + "learning_rate": 1.631336405529954e-06, + "loss": 1.0529779195785522, + "step": 355 + }, + { + "epoch": 0.08208439013142725, + "grad_norm": 0.7626426518406405, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.0374994277954102, + "step": 356 + }, + { + "epoch": 0.08231496426100991, + "grad_norm": 0.7762352327056515, + "learning_rate": 1.640552995391705e-06, + "loss": 1.153419017791748, + "step": 357 + }, + { + "epoch": 0.08254553839059257, + "grad_norm": 0.7455681546697154, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.0155376195907593, + "step": 358 + }, + { + "epoch": 0.08277611252017524, + "grad_norm": 0.779838920397346, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1288530826568604, + "step": 359 + }, + { + "epoch": 0.0830066866497579, + "grad_norm": 0.8920666311969824, + "learning_rate": 1.6543778801843317e-06, + "loss": 1.1493456363677979, + "step": 360 + }, + { + "epoch": 0.08323726077934056, + "grad_norm": 0.8383114858680324, + "learning_rate": 1.6589861751152071e-06, + "loss": 1.1064895391464233, + "step": 361 + }, + { + "epoch": 0.08346783490892322, + "grad_norm": 0.752156167882629, + "learning_rate": 1.663594470046083e-06, + "loss": 1.0102828741073608, + "step": 362 + }, + { + "epoch": 0.08369840903850588, + "grad_norm": 0.8341451005387022, + "learning_rate": 1.6682027649769585e-06, + "loss": 1.0750138759613037, + "step": 363 + }, + { + "epoch": 0.08392898316808854, + "grad_norm": 0.8504953523340792, + "learning_rate": 1.672811059907834e-06, + "loss": 1.1611195802688599, + "step": 364 + }, + { + "epoch": 0.0841595572976712, + "grad_norm": 0.8228646683486963, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.2799829244613647, + "step": 365 + }, + { + "epoch": 0.08439013142725386, + "grad_norm": 0.9626273899315478, + "learning_rate": 1.682027649769585e-06, + "loss": 1.2427947521209717, + "step": 366 + }, + { + "epoch": 0.08462070555683653, + "grad_norm": 0.724553415716276, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0379959344863892, + "step": 367 + }, + { + "epoch": 0.08485127968641919, + "grad_norm": 0.7173602639018404, + "learning_rate": 1.6912442396313363e-06, + "loss": 0.8439304828643799, + "step": 368 + }, + { + "epoch": 0.08508185381600185, + "grad_norm": 0.8477542480910312, + "learning_rate": 1.6958525345622119e-06, + "loss": 1.1249288320541382, + "step": 369 + }, + { + "epoch": 0.08531242794558451, + "grad_norm": 0.8715705993798011, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.186207890510559, + "step": 370 + }, + { + "epoch": 0.08554300207516717, + "grad_norm": 0.9990300341847143, + "learning_rate": 1.705069124423963e-06, + "loss": 1.1181306838989258, + "step": 371 + }, + { + "epoch": 0.08577357620474983, + "grad_norm": 0.8792678686182055, + "learning_rate": 1.7096774193548387e-06, + "loss": 0.9828017950057983, + "step": 372 + }, + { + "epoch": 0.0860041503343325, + "grad_norm": 0.7710250186072433, + "learning_rate": 1.714285714285714e-06, + "loss": 1.1158804893493652, + "step": 373 + }, + { + "epoch": 0.08623472446391516, + "grad_norm": 0.9602707019706166, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.1771481037139893, + "step": 374 + }, + { + "epoch": 0.08646529859349782, + "grad_norm": 0.8137176951163696, + "learning_rate": 1.7235023041474655e-06, + "loss": 1.1378540992736816, + "step": 375 + }, + { + "epoch": 0.08669587272308046, + "grad_norm": 0.819557644912057, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.2011152505874634, + "step": 376 + }, + { + "epoch": 0.08692644685266313, + "grad_norm": 0.8779923853134601, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0932848453521729, + "step": 377 + }, + { + "epoch": 0.08715702098224579, + "grad_norm": 0.7579888078286682, + "learning_rate": 1.737327188940092e-06, + "loss": 1.0530626773834229, + "step": 378 + }, + { + "epoch": 0.08738759511182845, + "grad_norm": 0.8123881302713649, + "learning_rate": 1.7419354838709676e-06, + "loss": 1.09238600730896, + "step": 379 + }, + { + "epoch": 0.08761816924141111, + "grad_norm": 0.8179032370650432, + "learning_rate": 1.7465437788018434e-06, + "loss": 1.10097336769104, + "step": 380 + }, + { + "epoch": 0.08784874337099377, + "grad_norm": 0.9066182701404021, + "learning_rate": 1.7511520737327188e-06, + "loss": 1.1483392715454102, + "step": 381 + }, + { + "epoch": 0.08807931750057643, + "grad_norm": 0.7929757896387074, + "learning_rate": 1.7557603686635944e-06, + "loss": 0.9776606559753418, + "step": 382 + }, + { + "epoch": 0.08830989163015909, + "grad_norm": 0.7070713392242878, + "learning_rate": 1.7603686635944698e-06, + "loss": 0.9363219738006592, + "step": 383 + }, + { + "epoch": 0.08854046575974175, + "grad_norm": 0.8829017901239412, + "learning_rate": 1.7649769585253456e-06, + "loss": 1.1259841918945312, + "step": 384 + }, + { + "epoch": 0.08877103988932442, + "grad_norm": 0.8379913612296851, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.0652339458465576, + "step": 385 + }, + { + "epoch": 0.08900161401890708, + "grad_norm": 0.9016264696692738, + "learning_rate": 1.7741935483870966e-06, + "loss": 1.1088197231292725, + "step": 386 + }, + { + "epoch": 0.08923218814848974, + "grad_norm": 0.8434226175443441, + "learning_rate": 1.7788018433179724e-06, + "loss": 1.0171717405319214, + "step": 387 + }, + { + "epoch": 0.0894627622780724, + "grad_norm": 0.893116506697827, + "learning_rate": 1.7834101382488478e-06, + "loss": 1.0391405820846558, + "step": 388 + }, + { + "epoch": 0.08969333640765506, + "grad_norm": 0.9558704899064524, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9970325231552124, + "step": 389 + }, + { + "epoch": 0.08992391053723772, + "grad_norm": 0.8304308575964876, + "learning_rate": 1.792626728110599e-06, + "loss": 1.1427147388458252, + "step": 390 + }, + { + "epoch": 0.09015448466682038, + "grad_norm": 0.8319398781501527, + "learning_rate": 1.7972350230414746e-06, + "loss": 0.8830767273902893, + "step": 391 + }, + { + "epoch": 0.09038505879640304, + "grad_norm": 0.8983385232838542, + "learning_rate": 1.8018433179723502e-06, + "loss": 1.0469788312911987, + "step": 392 + }, + { + "epoch": 0.0906156329259857, + "grad_norm": 1.0033385350969977, + "learning_rate": 1.8064516129032258e-06, + "loss": 1.022156834602356, + "step": 393 + }, + { + "epoch": 0.09084620705556837, + "grad_norm": 0.8626168210196775, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.0723674297332764, + "step": 394 + }, + { + "epoch": 0.09107678118515103, + "grad_norm": 0.8060308252194399, + "learning_rate": 1.8156682027649767e-06, + "loss": 0.9089772701263428, + "step": 395 + }, + { + "epoch": 0.09130735531473369, + "grad_norm": 0.8875270675183294, + "learning_rate": 1.8202764976958525e-06, + "loss": 1.1029877662658691, + "step": 396 + }, + { + "epoch": 0.09153792944431635, + "grad_norm": 0.94113090982248, + "learning_rate": 1.8248847926267281e-06, + "loss": 0.998812198638916, + "step": 397 + }, + { + "epoch": 0.09176850357389901, + "grad_norm": 1.0016962443263888, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.116652250289917, + "step": 398 + }, + { + "epoch": 0.09199907770348167, + "grad_norm": 0.8575568562545252, + "learning_rate": 1.8341013824884793e-06, + "loss": 1.0071923732757568, + "step": 399 + }, + { + "epoch": 0.09222965183306434, + "grad_norm": 0.9758059413772218, + "learning_rate": 1.8387096774193547e-06, + "loss": 1.0713586807250977, + "step": 400 + }, + { + "epoch": 0.092460225962647, + "grad_norm": 0.8883854169226675, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.0897400379180908, + "step": 401 + }, + { + "epoch": 0.09269080009222966, + "grad_norm": 0.9342253113098401, + "learning_rate": 1.8479262672811061e-06, + "loss": 0.9571444392204285, + "step": 402 + }, + { + "epoch": 0.09292137422181232, + "grad_norm": 0.9173411430110425, + "learning_rate": 1.8525345622119815e-06, + "loss": 0.9822309017181396, + "step": 403 + }, + { + "epoch": 0.09315194835139497, + "grad_norm": 0.8821702665182305, + "learning_rate": 1.857142857142857e-06, + "loss": 1.0010900497436523, + "step": 404 + }, + { + "epoch": 0.09338252248097763, + "grad_norm": 0.8417761058687274, + "learning_rate": 1.8617511520737325e-06, + "loss": 0.8548961877822876, + "step": 405 + }, + { + "epoch": 0.09361309661056029, + "grad_norm": 0.9390158571311362, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.0856781005859375, + "step": 406 + }, + { + "epoch": 0.09384367074014295, + "grad_norm": 0.9100547740927183, + "learning_rate": 1.8709677419354837e-06, + "loss": 1.0913856029510498, + "step": 407 + }, + { + "epoch": 0.09407424486972561, + "grad_norm": 1.0379606890495185, + "learning_rate": 1.8755760368663593e-06, + "loss": 0.9409916400909424, + "step": 408 + }, + { + "epoch": 0.09430481899930827, + "grad_norm": 0.9523962354053698, + "learning_rate": 1.880184331797235e-06, + "loss": 0.9950551390647888, + "step": 409 + }, + { + "epoch": 0.09453539312889093, + "grad_norm": 0.861704297563458, + "learning_rate": 1.8847926267281104e-06, + "loss": 0.9915211200714111, + "step": 410 + }, + { + "epoch": 0.0947659672584736, + "grad_norm": 0.9290893256356082, + "learning_rate": 1.889400921658986e-06, + "loss": 1.0381574630737305, + "step": 411 + }, + { + "epoch": 0.09499654138805626, + "grad_norm": 0.9228539253940193, + "learning_rate": 1.8940092165898616e-06, + "loss": 0.8911284804344177, + "step": 412 + }, + { + "epoch": 0.09522711551763892, + "grad_norm": 0.9426577567548815, + "learning_rate": 1.8986175115207372e-06, + "loss": 0.8757172226905823, + "step": 413 + }, + { + "epoch": 0.09545768964722158, + "grad_norm": 0.7971911677154941, + "learning_rate": 1.9032258064516128e-06, + "loss": 0.8362075090408325, + "step": 414 + }, + { + "epoch": 0.09568826377680424, + "grad_norm": 0.9051810749284879, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.906524658203125, + "step": 415 + }, + { + "epoch": 0.0959188379063869, + "grad_norm": 0.9304511138009018, + "learning_rate": 1.912442396313364e-06, + "loss": 1.100447654724121, + "step": 416 + }, + { + "epoch": 0.09614941203596956, + "grad_norm": 0.8321943001479206, + "learning_rate": 1.9170506912442396e-06, + "loss": 0.9658455848693848, + "step": 417 + }, + { + "epoch": 0.09637998616555223, + "grad_norm": 0.9393736008547379, + "learning_rate": 1.921658986175115e-06, + "loss": 0.971304714679718, + "step": 418 + }, + { + "epoch": 0.09661056029513489, + "grad_norm": 0.8792304256570437, + "learning_rate": 1.926267281105991e-06, + "loss": 0.916153073310852, + "step": 419 + }, + { + "epoch": 0.09684113442471755, + "grad_norm": 0.960700719296913, + "learning_rate": 1.930875576036866e-06, + "loss": 0.9166572093963623, + "step": 420 + }, + { + "epoch": 0.09707170855430021, + "grad_norm": 0.8385154496673872, + "learning_rate": 1.935483870967742e-06, + "loss": 0.8754867315292358, + "step": 421 + }, + { + "epoch": 0.09730228268388287, + "grad_norm": 0.8951117289542856, + "learning_rate": 1.9400921658986174e-06, + "loss": 0.9507668018341064, + "step": 422 + }, + { + "epoch": 0.09753285681346553, + "grad_norm": 1.0251554467069826, + "learning_rate": 1.944700460829493e-06, + "loss": 0.8977904319763184, + "step": 423 + }, + { + "epoch": 0.0977634309430482, + "grad_norm": 0.8433365129133346, + "learning_rate": 1.9493087557603686e-06, + "loss": 0.8359580039978027, + "step": 424 + }, + { + "epoch": 0.09799400507263085, + "grad_norm": 0.8653781711190967, + "learning_rate": 1.953917050691244e-06, + "loss": 0.8928875923156738, + "step": 425 + }, + { + "epoch": 0.09822457920221352, + "grad_norm": 1.016156538051323, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.9031360149383545, + "step": 426 + }, + { + "epoch": 0.09845515333179618, + "grad_norm": 0.9535004151409068, + "learning_rate": 1.963133640552995e-06, + "loss": 0.9135938286781311, + "step": 427 + }, + { + "epoch": 0.09868572746137884, + "grad_norm": 0.9913179989235431, + "learning_rate": 1.967741935483871e-06, + "loss": 0.8978056907653809, + "step": 428 + }, + { + "epoch": 0.0989163015909615, + "grad_norm": 0.7393338474601954, + "learning_rate": 1.9723502304147463e-06, + "loss": 0.8236517906188965, + "step": 429 + }, + { + "epoch": 0.09914687572054416, + "grad_norm": 0.9578937542491764, + "learning_rate": 1.976958525345622e-06, + "loss": 0.8279497027397156, + "step": 430 + }, + { + "epoch": 0.09937744985012681, + "grad_norm": 0.8687224271614162, + "learning_rate": 1.9815668202764975e-06, + "loss": 0.9273175001144409, + "step": 431 + }, + { + "epoch": 0.09960802397970947, + "grad_norm": 0.9008857811722423, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.8990100622177124, + "step": 432 + }, + { + "epoch": 0.09983859810929213, + "grad_norm": 0.9051637314581525, + "learning_rate": 1.9907834101382487e-06, + "loss": 0.9221487045288086, + "step": 433 + }, + { + "epoch": 0.10006917223887479, + "grad_norm": 0.8468556051112544, + "learning_rate": 1.995391705069124e-06, + "loss": 0.7376757264137268, + "step": 434 + }, + { + "epoch": 0.10029974636845745, + "grad_norm": 0.8651656722450953, + "learning_rate": 2e-06, + "loss": 0.8496265411376953, + "step": 435 + }, + { + "epoch": 0.10053032049804012, + "grad_norm": 0.8177327534577133, + "learning_rate": 1.9999999273199326e-06, + "loss": 0.73260897397995, + "step": 436 + }, + { + "epoch": 0.10076089462762278, + "grad_norm": 1.2545811776233549, + "learning_rate": 1.999999709279741e-06, + "loss": 0.9583776593208313, + "step": 437 + }, + { + "epoch": 0.10099146875720544, + "grad_norm": 0.7771019547302918, + "learning_rate": 1.9999993458794573e-06, + "loss": 0.810507595539093, + "step": 438 + }, + { + "epoch": 0.1012220428867881, + "grad_norm": 0.8756547566965167, + "learning_rate": 1.9999988371191337e-06, + "loss": 0.7957329750061035, + "step": 439 + }, + { + "epoch": 0.10145261701637076, + "grad_norm": 0.8325539024899065, + "learning_rate": 1.9999981829988444e-06, + "loss": 0.8141027688980103, + "step": 440 + }, + { + "epoch": 0.10168319114595342, + "grad_norm": 0.9256731752358246, + "learning_rate": 1.9999973835186847e-06, + "loss": 0.8454669117927551, + "step": 441 + }, + { + "epoch": 0.10191376527553608, + "grad_norm": 0.9086105801784582, + "learning_rate": 1.9999964386787706e-06, + "loss": 0.7966687679290771, + "step": 442 + }, + { + "epoch": 0.10214433940511874, + "grad_norm": 0.8420803725442093, + "learning_rate": 1.9999953484792394e-06, + "loss": 0.8623852133750916, + "step": 443 + }, + { + "epoch": 0.1023749135347014, + "grad_norm": 0.976279238987049, + "learning_rate": 1.9999941129202494e-06, + "loss": 0.9604165554046631, + "step": 444 + }, + { + "epoch": 0.10260548766428407, + "grad_norm": 0.8427059790049124, + "learning_rate": 1.999992732001981e-06, + "loss": 0.7461415529251099, + "step": 445 + }, + { + "epoch": 0.10283606179386673, + "grad_norm": 0.8066869506045082, + "learning_rate": 1.9999912057246342e-06, + "loss": 0.7243722677230835, + "step": 446 + }, + { + "epoch": 0.10306663592344939, + "grad_norm": 0.8507773615519725, + "learning_rate": 1.999989534088431e-06, + "loss": 0.8466402292251587, + "step": 447 + }, + { + "epoch": 0.10329721005303205, + "grad_norm": 0.9504023717644374, + "learning_rate": 1.9999877170936142e-06, + "loss": 0.8062578439712524, + "step": 448 + }, + { + "epoch": 0.10352778418261471, + "grad_norm": 0.8134117517887439, + "learning_rate": 1.9999857547404484e-06, + "loss": 0.8979625701904297, + "step": 449 + }, + { + "epoch": 0.10375835831219737, + "grad_norm": 0.7889840834274454, + "learning_rate": 1.999983647029219e-06, + "loss": 0.7970046401023865, + "step": 450 + }, + { + "epoch": 0.10398893244178004, + "grad_norm": 0.8933195109789729, + "learning_rate": 1.999981393960231e-06, + "loss": 0.9027936458587646, + "step": 451 + }, + { + "epoch": 0.1042195065713627, + "grad_norm": 0.9428128689196352, + "learning_rate": 1.9999789955338133e-06, + "loss": 0.8347916007041931, + "step": 452 + }, + { + "epoch": 0.10445008070094536, + "grad_norm": 0.7636783217821816, + "learning_rate": 1.9999764517503146e-06, + "loss": 0.7856979370117188, + "step": 453 + }, + { + "epoch": 0.10468065483052802, + "grad_norm": 0.8588750023960529, + "learning_rate": 1.9999737626101037e-06, + "loss": 0.8370383381843567, + "step": 454 + }, + { + "epoch": 0.10491122896011068, + "grad_norm": 0.7607065236764231, + "learning_rate": 1.9999709281135718e-06, + "loss": 0.8629742860794067, + "step": 455 + }, + { + "epoch": 0.10514180308969334, + "grad_norm": 0.7031266959727278, + "learning_rate": 1.9999679482611315e-06, + "loss": 0.8187414407730103, + "step": 456 + }, + { + "epoch": 0.105372377219276, + "grad_norm": 0.7996485745988237, + "learning_rate": 1.9999648230532156e-06, + "loss": 0.8169279098510742, + "step": 457 + }, + { + "epoch": 0.10560295134885866, + "grad_norm": 0.7291726430068795, + "learning_rate": 1.999961552490278e-06, + "loss": 0.7186012268066406, + "step": 458 + }, + { + "epoch": 0.10583352547844131, + "grad_norm": 0.8814433348597316, + "learning_rate": 1.9999581365727947e-06, + "loss": 0.8088201284408569, + "step": 459 + }, + { + "epoch": 0.10606409960802397, + "grad_norm": 0.8945815471698739, + "learning_rate": 1.999954575301262e-06, + "loss": 0.7067796587944031, + "step": 460 + }, + { + "epoch": 0.10629467373760663, + "grad_norm": 0.8727386643724712, + "learning_rate": 1.9999508686761974e-06, + "loss": 0.8839461803436279, + "step": 461 + }, + { + "epoch": 0.1065252478671893, + "grad_norm": 0.7752145606049893, + "learning_rate": 1.99994701669814e-06, + "loss": 0.750046968460083, + "step": 462 + }, + { + "epoch": 0.10675582199677196, + "grad_norm": 0.8246620057663118, + "learning_rate": 1.999943019367649e-06, + "loss": 0.7954964637756348, + "step": 463 + }, + { + "epoch": 0.10698639612635462, + "grad_norm": 0.8139454190246876, + "learning_rate": 1.9999388766853065e-06, + "loss": 0.7178900241851807, + "step": 464 + }, + { + "epoch": 0.10721697025593728, + "grad_norm": 0.7775108685144316, + "learning_rate": 1.999934588651714e-06, + "loss": 0.7583869695663452, + "step": 465 + }, + { + "epoch": 0.10744754438551994, + "grad_norm": 0.7294165374555056, + "learning_rate": 1.999930155267495e-06, + "loss": 0.8068876266479492, + "step": 466 + }, + { + "epoch": 0.1076781185151026, + "grad_norm": 0.7396884936816651, + "learning_rate": 1.9999255765332946e-06, + "loss": 0.7507776021957397, + "step": 467 + }, + { + "epoch": 0.10790869264468526, + "grad_norm": 0.7418847797451098, + "learning_rate": 1.999920852449777e-06, + "loss": 0.7719494104385376, + "step": 468 + }, + { + "epoch": 0.10813926677426793, + "grad_norm": 0.7666886626519035, + "learning_rate": 1.99991598301763e-06, + "loss": 0.7420990467071533, + "step": 469 + }, + { + "epoch": 0.10836984090385059, + "grad_norm": 0.7701810012003275, + "learning_rate": 1.9999109682375606e-06, + "loss": 0.7152374386787415, + "step": 470 + }, + { + "epoch": 0.10860041503343325, + "grad_norm": 0.6850973266115482, + "learning_rate": 1.9999058081102985e-06, + "loss": 0.7971220016479492, + "step": 471 + }, + { + "epoch": 0.10883098916301591, + "grad_norm": 0.7306176016482578, + "learning_rate": 1.9999005026365936e-06, + "loss": 0.774874746799469, + "step": 472 + }, + { + "epoch": 0.10906156329259857, + "grad_norm": 0.8957955356096076, + "learning_rate": 1.999895051817216e-06, + "loss": 0.7567731142044067, + "step": 473 + }, + { + "epoch": 0.10929213742218123, + "grad_norm": 0.9679087986333686, + "learning_rate": 1.99988945565296e-06, + "loss": 0.7221060991287231, + "step": 474 + }, + { + "epoch": 0.1095227115517639, + "grad_norm": 0.7758710632294333, + "learning_rate": 1.9998837141446378e-06, + "loss": 0.8064852952957153, + "step": 475 + }, + { + "epoch": 0.10975328568134655, + "grad_norm": 0.7342367942239104, + "learning_rate": 1.9998778272930842e-06, + "loss": 0.7329462766647339, + "step": 476 + }, + { + "epoch": 0.10998385981092922, + "grad_norm": 0.6944047501493505, + "learning_rate": 1.999871795099155e-06, + "loss": 0.715752363204956, + "step": 477 + }, + { + "epoch": 0.11021443394051188, + "grad_norm": 1.250464562888065, + "learning_rate": 1.9998656175637265e-06, + "loss": 0.8702882528305054, + "step": 478 + }, + { + "epoch": 0.11044500807009454, + "grad_norm": 0.9132853105204283, + "learning_rate": 1.9998592946876976e-06, + "loss": 0.8559622764587402, + "step": 479 + }, + { + "epoch": 0.1106755821996772, + "grad_norm": 1.0302853941011325, + "learning_rate": 1.999852826471987e-06, + "loss": 0.910442590713501, + "step": 480 + }, + { + "epoch": 0.11090615632925986, + "grad_norm": 0.7658983046756905, + "learning_rate": 1.9998462129175347e-06, + "loss": 0.8159372806549072, + "step": 481 + }, + { + "epoch": 0.11113673045884252, + "grad_norm": 0.6814545269174561, + "learning_rate": 1.9998394540253022e-06, + "loss": 0.8120635747909546, + "step": 482 + }, + { + "epoch": 0.11136730458842518, + "grad_norm": 0.9382461503301303, + "learning_rate": 1.999832549796272e-06, + "loss": 0.7867682576179504, + "step": 483 + }, + { + "epoch": 0.11159787871800785, + "grad_norm": 0.7285854274509946, + "learning_rate": 1.999825500231448e-06, + "loss": 0.695517897605896, + "step": 484 + }, + { + "epoch": 0.1118284528475905, + "grad_norm": 0.7426222297635688, + "learning_rate": 1.999818305331854e-06, + "loss": 0.8402971029281616, + "step": 485 + }, + { + "epoch": 0.11205902697717317, + "grad_norm": 0.9496598665654408, + "learning_rate": 1.9998109650985372e-06, + "loss": 0.7987074851989746, + "step": 486 + }, + { + "epoch": 0.11228960110675582, + "grad_norm": 0.7601824170608918, + "learning_rate": 1.9998034795325634e-06, + "loss": 0.6525362133979797, + "step": 487 + }, + { + "epoch": 0.11252017523633848, + "grad_norm": 0.6649425764525309, + "learning_rate": 1.999795848635021e-06, + "loss": 0.6218863725662231, + "step": 488 + }, + { + "epoch": 0.11275074936592114, + "grad_norm": 0.6793237780262881, + "learning_rate": 1.99978807240702e-06, + "loss": 0.7225729823112488, + "step": 489 + }, + { + "epoch": 0.1129813234955038, + "grad_norm": 0.7289774462660574, + "learning_rate": 1.9997801508496893e-06, + "loss": 0.7553551197052002, + "step": 490 + }, + { + "epoch": 0.11321189762508646, + "grad_norm": 0.7070554840091658, + "learning_rate": 1.999772083964182e-06, + "loss": 0.6695772409439087, + "step": 491 + }, + { + "epoch": 0.11344247175466912, + "grad_norm": 0.7937000317220514, + "learning_rate": 1.999763871751669e-06, + "loss": 0.7683162689208984, + "step": 492 + }, + { + "epoch": 0.11367304588425178, + "grad_norm": 0.7958897510308529, + "learning_rate": 1.9997555142133457e-06, + "loss": 0.7761441469192505, + "step": 493 + }, + { + "epoch": 0.11390362001383444, + "grad_norm": 0.8391915745578431, + "learning_rate": 1.999747011350426e-06, + "loss": 0.7204692959785461, + "step": 494 + }, + { + "epoch": 0.1141341941434171, + "grad_norm": 0.6535908344557003, + "learning_rate": 1.999738363164146e-06, + "loss": 0.6960519552230835, + "step": 495 + }, + { + "epoch": 0.11436476827299977, + "grad_norm": 0.669834933810116, + "learning_rate": 1.999729569655763e-06, + "loss": 0.7502788305282593, + "step": 496 + }, + { + "epoch": 0.11459534240258243, + "grad_norm": 0.7119093873273127, + "learning_rate": 1.999720630826555e-06, + "loss": 0.7649067640304565, + "step": 497 + }, + { + "epoch": 0.11482591653216509, + "grad_norm": 0.865452520980124, + "learning_rate": 1.9997115466778214e-06, + "loss": 0.6867918968200684, + "step": 498 + }, + { + "epoch": 0.11505649066174775, + "grad_norm": 0.7725462530919065, + "learning_rate": 1.9997023172108828e-06, + "loss": 0.7324330806732178, + "step": 499 + }, + { + "epoch": 0.11528706479133041, + "grad_norm": 0.7493898462804314, + "learning_rate": 1.999692942427081e-06, + "loss": 0.7452527284622192, + "step": 500 + }, + { + "epoch": 0.11551763892091307, + "grad_norm": 0.8849003751162662, + "learning_rate": 1.9996834223277775e-06, + "loss": 0.8311381340026855, + "step": 501 + }, + { + "epoch": 0.11574821305049574, + "grad_norm": 0.7698737492516583, + "learning_rate": 1.999673756914358e-06, + "loss": 0.6955340504646301, + "step": 502 + }, + { + "epoch": 0.1159787871800784, + "grad_norm": 0.9035827861690212, + "learning_rate": 1.999663946188226e-06, + "loss": 0.802892804145813, + "step": 503 + }, + { + "epoch": 0.11620936130966106, + "grad_norm": 0.9827928009523055, + "learning_rate": 1.9996539901508086e-06, + "loss": 0.8307123184204102, + "step": 504 + }, + { + "epoch": 0.11643993543924372, + "grad_norm": 0.7167523084062808, + "learning_rate": 1.9996438888035525e-06, + "loss": 0.7604272365570068, + "step": 505 + }, + { + "epoch": 0.11667050956882638, + "grad_norm": 0.7887244154559485, + "learning_rate": 1.9996336421479256e-06, + "loss": 0.798006534576416, + "step": 506 + }, + { + "epoch": 0.11690108369840904, + "grad_norm": 0.9102232519285063, + "learning_rate": 1.999623250185418e-06, + "loss": 0.7342728972434998, + "step": 507 + }, + { + "epoch": 0.1171316578279917, + "grad_norm": 0.689331248687117, + "learning_rate": 1.9996127129175402e-06, + "loss": 0.7659468650817871, + "step": 508 + }, + { + "epoch": 0.11736223195757436, + "grad_norm": 0.9057052272338976, + "learning_rate": 1.999602030345824e-06, + "loss": 0.6467913389205933, + "step": 509 + }, + { + "epoch": 0.11759280608715703, + "grad_norm": 0.9026632882900626, + "learning_rate": 1.9995912024718214e-06, + "loss": 0.8207371234893799, + "step": 510 + }, + { + "epoch": 0.11782338021673969, + "grad_norm": 0.6427345565408408, + "learning_rate": 1.999580229297108e-06, + "loss": 0.6865919232368469, + "step": 511 + }, + { + "epoch": 0.11805395434632235, + "grad_norm": 0.9123825063372557, + "learning_rate": 1.999569110823277e-06, + "loss": 0.7367759346961975, + "step": 512 + }, + { + "epoch": 0.11828452847590501, + "grad_norm": 0.7732312467631449, + "learning_rate": 1.9995578470519455e-06, + "loss": 0.678460955619812, + "step": 513 + }, + { + "epoch": 0.11851510260548767, + "grad_norm": 0.9273893139854266, + "learning_rate": 1.999546437984751e-06, + "loss": 0.7442954182624817, + "step": 514 + }, + { + "epoch": 0.11874567673507032, + "grad_norm": 0.7064385006159516, + "learning_rate": 1.9995348836233515e-06, + "loss": 0.6881241798400879, + "step": 515 + }, + { + "epoch": 0.11897625086465298, + "grad_norm": 0.7494917485319132, + "learning_rate": 1.9995231839694267e-06, + "loss": 0.6957181692123413, + "step": 516 + }, + { + "epoch": 0.11920682499423564, + "grad_norm": 1.0228956088069594, + "learning_rate": 1.9995113390246773e-06, + "loss": 0.655665934085846, + "step": 517 + }, + { + "epoch": 0.1194373991238183, + "grad_norm": 0.8789756041062182, + "learning_rate": 1.9994993487908245e-06, + "loss": 0.8156173229217529, + "step": 518 + }, + { + "epoch": 0.11966797325340096, + "grad_norm": 0.8973364358315123, + "learning_rate": 1.9994872132696125e-06, + "loss": 0.7063135504722595, + "step": 519 + }, + { + "epoch": 0.11989854738298363, + "grad_norm": 0.91785396837973, + "learning_rate": 1.9994749324628046e-06, + "loss": 0.694409966468811, + "step": 520 + }, + { + "epoch": 0.12012912151256629, + "grad_norm": 0.7331348179727938, + "learning_rate": 1.9994625063721852e-06, + "loss": 0.8167020082473755, + "step": 521 + }, + { + "epoch": 0.12035969564214895, + "grad_norm": 0.9326590546614593, + "learning_rate": 1.9994499349995615e-06, + "loss": 0.7214051485061646, + "step": 522 + }, + { + "epoch": 0.12059026977173161, + "grad_norm": 0.8993621490561152, + "learning_rate": 1.999437218346761e-06, + "loss": 0.8798317909240723, + "step": 523 + }, + { + "epoch": 0.12082084390131427, + "grad_norm": 0.6552492075288662, + "learning_rate": 1.9994243564156316e-06, + "loss": 0.684230387210846, + "step": 524 + }, + { + "epoch": 0.12105141803089693, + "grad_norm": 0.9112132053465716, + "learning_rate": 1.999411349208043e-06, + "loss": 0.7519755363464355, + "step": 525 + }, + { + "epoch": 0.1212819921604796, + "grad_norm": 0.8052315425352758, + "learning_rate": 1.9993981967258857e-06, + "loss": 0.8420398235321045, + "step": 526 + }, + { + "epoch": 0.12151256629006225, + "grad_norm": 0.7105743668928439, + "learning_rate": 1.999384898971073e-06, + "loss": 0.8349270820617676, + "step": 527 + }, + { + "epoch": 0.12174314041964492, + "grad_norm": 1.0983006521395142, + "learning_rate": 1.999371455945536e-06, + "loss": 0.794980525970459, + "step": 528 + }, + { + "epoch": 0.12197371454922758, + "grad_norm": 1.1816598770476783, + "learning_rate": 1.9993578676512294e-06, + "loss": 0.666529655456543, + "step": 529 + }, + { + "epoch": 0.12220428867881024, + "grad_norm": 0.7564948773505585, + "learning_rate": 1.999344134090129e-06, + "loss": 0.7356991767883301, + "step": 530 + }, + { + "epoch": 0.1224348628083929, + "grad_norm": 0.8210277180950322, + "learning_rate": 1.9993302552642305e-06, + "loss": 0.6289858818054199, + "step": 531 + }, + { + "epoch": 0.12266543693797556, + "grad_norm": 0.7570779839057131, + "learning_rate": 1.9993162311755516e-06, + "loss": 0.706937313079834, + "step": 532 + }, + { + "epoch": 0.12289601106755822, + "grad_norm": 0.8676215771749471, + "learning_rate": 1.99930206182613e-06, + "loss": 0.7265158891677856, + "step": 533 + }, + { + "epoch": 0.12312658519714088, + "grad_norm": 0.7802472371537522, + "learning_rate": 1.999287747218027e-06, + "loss": 0.6575910449028015, + "step": 534 + }, + { + "epoch": 0.12335715932672355, + "grad_norm": 0.6298254280489823, + "learning_rate": 1.999273287353322e-06, + "loss": 0.6696841716766357, + "step": 535 + }, + { + "epoch": 0.1235877334563062, + "grad_norm": 1.071079002554872, + "learning_rate": 1.9992586822341177e-06, + "loss": 0.7749101519584656, + "step": 536 + }, + { + "epoch": 0.12381830758588887, + "grad_norm": 0.9432884782892066, + "learning_rate": 1.9992439318625367e-06, + "loss": 0.6880518198013306, + "step": 537 + }, + { + "epoch": 0.12404888171547153, + "grad_norm": 0.7827285978985046, + "learning_rate": 1.999229036240723e-06, + "loss": 0.6871178150177002, + "step": 538 + }, + { + "epoch": 0.12427945584505419, + "grad_norm": 0.7976778538474537, + "learning_rate": 1.999213995370842e-06, + "loss": 0.5867285132408142, + "step": 539 + }, + { + "epoch": 0.12451002997463685, + "grad_norm": 0.9357527236724963, + "learning_rate": 1.99919880925508e-06, + "loss": 0.8276966214179993, + "step": 540 + }, + { + "epoch": 0.12474060410421951, + "grad_norm": 1.0175450529032033, + "learning_rate": 1.9991834778956445e-06, + "loss": 0.7710754871368408, + "step": 541 + }, + { + "epoch": 0.12497117823380217, + "grad_norm": 0.9390745817535735, + "learning_rate": 1.9991680012947642e-06, + "loss": 0.7753217816352844, + "step": 542 + }, + { + "epoch": 0.12520175236338482, + "grad_norm": 0.8094522929040034, + "learning_rate": 1.9991523794546886e-06, + "loss": 0.7906090617179871, + "step": 543 + }, + { + "epoch": 0.12543232649296748, + "grad_norm": 0.9340000664605023, + "learning_rate": 1.9991366123776885e-06, + "loss": 0.7199760675430298, + "step": 544 + }, + { + "epoch": 0.12566290062255014, + "grad_norm": 0.7023452308433018, + "learning_rate": 1.9991207000660556e-06, + "loss": 0.671667218208313, + "step": 545 + }, + { + "epoch": 0.1258934747521328, + "grad_norm": 0.8347026711317173, + "learning_rate": 1.9991046425221036e-06, + "loss": 0.7289182543754578, + "step": 546 + }, + { + "epoch": 0.12612404888171547, + "grad_norm": 0.7827652568460417, + "learning_rate": 1.999088439748166e-06, + "loss": 0.6894270181655884, + "step": 547 + }, + { + "epoch": 0.12635462301129813, + "grad_norm": 0.7280796152072353, + "learning_rate": 1.9990720917465983e-06, + "loss": 0.5861620306968689, + "step": 548 + }, + { + "epoch": 0.1265851971408808, + "grad_norm": 0.9057106564897087, + "learning_rate": 1.999055598519777e-06, + "loss": 0.7082245349884033, + "step": 549 + }, + { + "epoch": 0.12681577127046345, + "grad_norm": 0.9647506404446157, + "learning_rate": 1.999038960070099e-06, + "loss": 0.6746149659156799, + "step": 550 + }, + { + "epoch": 0.1270463454000461, + "grad_norm": 0.8620899067636014, + "learning_rate": 1.999022176399983e-06, + "loss": 0.7791188955307007, + "step": 551 + }, + { + "epoch": 0.12727691952962877, + "grad_norm": 0.7157725370776972, + "learning_rate": 1.999005247511869e-06, + "loss": 0.6371017694473267, + "step": 552 + }, + { + "epoch": 0.12750749365921143, + "grad_norm": 1.0373263968991309, + "learning_rate": 1.9989881734082182e-06, + "loss": 0.7006558179855347, + "step": 553 + }, + { + "epoch": 0.1277380677887941, + "grad_norm": 1.0670128946400503, + "learning_rate": 1.9989709540915115e-06, + "loss": 0.7011476755142212, + "step": 554 + }, + { + "epoch": 0.12796864191837676, + "grad_norm": 0.7293348024241428, + "learning_rate": 1.998953589564252e-06, + "loss": 0.6518280506134033, + "step": 555 + }, + { + "epoch": 0.12819921604795942, + "grad_norm": 1.013490270581775, + "learning_rate": 1.9989360798289646e-06, + "loss": 0.703351616859436, + "step": 556 + }, + { + "epoch": 0.12842979017754208, + "grad_norm": 0.9007382613729068, + "learning_rate": 1.998918424888194e-06, + "loss": 0.7498817443847656, + "step": 557 + }, + { + "epoch": 0.12866036430712474, + "grad_norm": 0.7936147649672419, + "learning_rate": 1.998900624744507e-06, + "loss": 0.647042989730835, + "step": 558 + }, + { + "epoch": 0.1288909384367074, + "grad_norm": 1.058658035724676, + "learning_rate": 1.99888267940049e-06, + "loss": 0.7519131898880005, + "step": 559 + }, + { + "epoch": 0.12912151256629006, + "grad_norm": 0.9392201849899589, + "learning_rate": 1.9988645888587524e-06, + "loss": 0.8416757583618164, + "step": 560 + }, + { + "epoch": 0.12935208669587273, + "grad_norm": 0.7856467653874107, + "learning_rate": 1.9988463531219238e-06, + "loss": 0.7044156193733215, + "step": 561 + }, + { + "epoch": 0.1295826608254554, + "grad_norm": 0.7712707168267965, + "learning_rate": 1.9988279721926547e-06, + "loss": 0.5429179668426514, + "step": 562 + }, + { + "epoch": 0.12981323495503805, + "grad_norm": 0.8186921939471294, + "learning_rate": 1.9988094460736173e-06, + "loss": 0.6146735548973083, + "step": 563 + }, + { + "epoch": 0.1300438090846207, + "grad_norm": 0.8439852070799176, + "learning_rate": 1.9987907747675038e-06, + "loss": 0.7544587850570679, + "step": 564 + }, + { + "epoch": 0.13027438321420337, + "grad_norm": 0.9760725928946941, + "learning_rate": 1.998771958277029e-06, + "loss": 0.7344266772270203, + "step": 565 + }, + { + "epoch": 0.13050495734378603, + "grad_norm": 0.8485941936610121, + "learning_rate": 1.9987529966049276e-06, + "loss": 0.6952091455459595, + "step": 566 + }, + { + "epoch": 0.1307355314733687, + "grad_norm": 0.7996168239987546, + "learning_rate": 1.9987338897539563e-06, + "loss": 0.6164644956588745, + "step": 567 + }, + { + "epoch": 0.13096610560295135, + "grad_norm": 1.04815525718601, + "learning_rate": 1.998714637726892e-06, + "loss": 0.7554208636283875, + "step": 568 + }, + { + "epoch": 0.13119667973253402, + "grad_norm": 0.97358719596577, + "learning_rate": 1.9986952405265336e-06, + "loss": 0.6640980243682861, + "step": 569 + }, + { + "epoch": 0.13142725386211668, + "grad_norm": 0.8089360786109361, + "learning_rate": 1.9986756981557005e-06, + "loss": 0.6947968006134033, + "step": 570 + }, + { + "epoch": 0.13165782799169934, + "grad_norm": 0.8239726316605849, + "learning_rate": 1.9986560106172332e-06, + "loss": 0.5987592935562134, + "step": 571 + }, + { + "epoch": 0.131888402121282, + "grad_norm": 0.709030479654625, + "learning_rate": 1.9986361779139944e-06, + "loss": 0.5830701589584351, + "step": 572 + }, + { + "epoch": 0.13211897625086466, + "grad_norm": 1.1719328645727012, + "learning_rate": 1.9986162000488655e-06, + "loss": 0.6589827537536621, + "step": 573 + }, + { + "epoch": 0.13234955038044732, + "grad_norm": 0.795778409153881, + "learning_rate": 1.9985960770247514e-06, + "loss": 0.7761766910552979, + "step": 574 + }, + { + "epoch": 0.13258012451002998, + "grad_norm": 0.8403074018612, + "learning_rate": 1.998575808844577e-06, + "loss": 0.6817613244056702, + "step": 575 + }, + { + "epoch": 0.13281069863961265, + "grad_norm": 0.8817998372104671, + "learning_rate": 1.998555395511289e-06, + "loss": 0.553085207939148, + "step": 576 + }, + { + "epoch": 0.1330412727691953, + "grad_norm": 0.6885856342268037, + "learning_rate": 1.998534837027854e-06, + "loss": 0.6500711441040039, + "step": 577 + }, + { + "epoch": 0.13327184689877797, + "grad_norm": 1.046231764034874, + "learning_rate": 1.9985141333972605e-06, + "loss": 0.7818950414657593, + "step": 578 + }, + { + "epoch": 0.13350242102836063, + "grad_norm": 0.7987907466299384, + "learning_rate": 1.9984932846225178e-06, + "loss": 0.7030247449874878, + "step": 579 + }, + { + "epoch": 0.1337329951579433, + "grad_norm": 0.7031460051202854, + "learning_rate": 1.9984722907066572e-06, + "loss": 0.6336206197738647, + "step": 580 + }, + { + "epoch": 0.13396356928752595, + "grad_norm": 0.8178681347907562, + "learning_rate": 1.9984511516527295e-06, + "loss": 0.7483044862747192, + "step": 581 + }, + { + "epoch": 0.1341941434171086, + "grad_norm": 0.8070808524670383, + "learning_rate": 1.9984298674638084e-06, + "loss": 0.7124725580215454, + "step": 582 + }, + { + "epoch": 0.13442471754669127, + "grad_norm": 0.8209937510618921, + "learning_rate": 1.998408438142987e-06, + "loss": 0.623436450958252, + "step": 583 + }, + { + "epoch": 0.1346552916762739, + "grad_norm": 0.8592886051949084, + "learning_rate": 1.9983868636933804e-06, + "loss": 0.646303653717041, + "step": 584 + }, + { + "epoch": 0.13488586580585657, + "grad_norm": 0.715391883952278, + "learning_rate": 1.998365144118125e-06, + "loss": 0.6349619626998901, + "step": 585 + }, + { + "epoch": 0.13511643993543923, + "grad_norm": 0.842094849315078, + "learning_rate": 1.9983432794203778e-06, + "loss": 0.5222466588020325, + "step": 586 + }, + { + "epoch": 0.1353470140650219, + "grad_norm": 0.7893129778630776, + "learning_rate": 1.998321269603317e-06, + "loss": 0.7210453152656555, + "step": 587 + }, + { + "epoch": 0.13557758819460455, + "grad_norm": 0.8260995902689467, + "learning_rate": 1.998299114670142e-06, + "loss": 0.6829872131347656, + "step": 588 + }, + { + "epoch": 0.13580816232418721, + "grad_norm": 0.714861095640182, + "learning_rate": 1.998276814624073e-06, + "loss": 0.6493744254112244, + "step": 589 + }, + { + "epoch": 0.13603873645376988, + "grad_norm": 0.8350239344719634, + "learning_rate": 1.998254369468352e-06, + "loss": 0.6885819435119629, + "step": 590 + }, + { + "epoch": 0.13626931058335254, + "grad_norm": 0.7070632175859811, + "learning_rate": 1.9982317792062415e-06, + "loss": 0.6393503546714783, + "step": 591 + }, + { + "epoch": 0.1364998847129352, + "grad_norm": 1.010551624947432, + "learning_rate": 1.998209043841025e-06, + "loss": 0.7243417501449585, + "step": 592 + }, + { + "epoch": 0.13673045884251786, + "grad_norm": 0.693273868923859, + "learning_rate": 1.9981861633760073e-06, + "loss": 0.5955190658569336, + "step": 593 + }, + { + "epoch": 0.13696103297210052, + "grad_norm": 0.89841301134605, + "learning_rate": 1.9981631378145147e-06, + "loss": 0.6907675862312317, + "step": 594 + }, + { + "epoch": 0.13719160710168318, + "grad_norm": 1.022542216960162, + "learning_rate": 1.9981399671598938e-06, + "loss": 0.8540418148040771, + "step": 595 + }, + { + "epoch": 0.13742218123126584, + "grad_norm": 0.850573072747265, + "learning_rate": 1.9981166514155128e-06, + "loss": 0.6558555364608765, + "step": 596 + }, + { + "epoch": 0.1376527553608485, + "grad_norm": 0.9448807343375427, + "learning_rate": 1.9980931905847607e-06, + "loss": 0.6902164220809937, + "step": 597 + }, + { + "epoch": 0.13788332949043117, + "grad_norm": 1.240663469028779, + "learning_rate": 1.9980695846710485e-06, + "loss": 0.7090387344360352, + "step": 598 + }, + { + "epoch": 0.13811390362001383, + "grad_norm": 0.8847772852436644, + "learning_rate": 1.9980458336778067e-06, + "loss": 0.5913621187210083, + "step": 599 + }, + { + "epoch": 0.1383444777495965, + "grad_norm": 0.864647475805302, + "learning_rate": 1.998021937608488e-06, + "loss": 0.6742709279060364, + "step": 600 + }, + { + "epoch": 0.13857505187917915, + "grad_norm": 0.9253166862332501, + "learning_rate": 1.997997896466566e-06, + "loss": 0.7156273126602173, + "step": 601 + }, + { + "epoch": 0.1388056260087618, + "grad_norm": 0.7104566809406643, + "learning_rate": 1.9979737102555358e-06, + "loss": 0.6039655208587646, + "step": 602 + }, + { + "epoch": 0.13903620013834447, + "grad_norm": 0.7521323143425293, + "learning_rate": 1.9979493789789123e-06, + "loss": 0.6437175273895264, + "step": 603 + }, + { + "epoch": 0.13926677426792713, + "grad_norm": 0.7922747435817725, + "learning_rate": 1.9979249026402327e-06, + "loss": 0.6037663221359253, + "step": 604 + }, + { + "epoch": 0.1394973483975098, + "grad_norm": 0.8526913554693543, + "learning_rate": 1.9979002812430544e-06, + "loss": 0.6014829874038696, + "step": 605 + }, + { + "epoch": 0.13972792252709246, + "grad_norm": 0.9960319429386536, + "learning_rate": 1.9978755147909575e-06, + "loss": 0.5644428133964539, + "step": 606 + }, + { + "epoch": 0.13995849665667512, + "grad_norm": 0.7146930597248379, + "learning_rate": 1.997850603287541e-06, + "loss": 0.5483256578445435, + "step": 607 + }, + { + "epoch": 0.14018907078625778, + "grad_norm": 0.941628560636658, + "learning_rate": 1.9978255467364264e-06, + "loss": 0.6323236227035522, + "step": 608 + }, + { + "epoch": 0.14041964491584044, + "grad_norm": 0.8661204864695959, + "learning_rate": 1.9978003451412563e-06, + "loss": 0.677186131477356, + "step": 609 + }, + { + "epoch": 0.1406502190454231, + "grad_norm": 0.7467694215725664, + "learning_rate": 1.9977749985056934e-06, + "loss": 0.6768285036087036, + "step": 610 + }, + { + "epoch": 0.14088079317500576, + "grad_norm": 0.6978429335446755, + "learning_rate": 1.997749506833422e-06, + "loss": 0.5347047448158264, + "step": 611 + }, + { + "epoch": 0.14111136730458843, + "grad_norm": 0.8856138167235749, + "learning_rate": 1.9977238701281484e-06, + "loss": 0.7459336519241333, + "step": 612 + }, + { + "epoch": 0.1413419414341711, + "grad_norm": 0.7081494897690513, + "learning_rate": 1.9976980883935982e-06, + "loss": 0.6617337465286255, + "step": 613 + }, + { + "epoch": 0.14157251556375375, + "grad_norm": 0.766248846701343, + "learning_rate": 1.9976721616335197e-06, + "loss": 0.6214765310287476, + "step": 614 + }, + { + "epoch": 0.1418030896933364, + "grad_norm": 0.9664061776833217, + "learning_rate": 1.9976460898516814e-06, + "loss": 0.7468793392181396, + "step": 615 + }, + { + "epoch": 0.14203366382291907, + "grad_norm": 0.9401860990707812, + "learning_rate": 1.9976198730518733e-06, + "loss": 0.676013708114624, + "step": 616 + }, + { + "epoch": 0.14226423795250173, + "grad_norm": 0.7984359669803877, + "learning_rate": 1.9975935112379057e-06, + "loss": 0.6350057125091553, + "step": 617 + }, + { + "epoch": 0.1424948120820844, + "grad_norm": 0.7941645196610473, + "learning_rate": 1.997567004413611e-06, + "loss": 0.6743426322937012, + "step": 618 + }, + { + "epoch": 0.14272538621166705, + "grad_norm": 0.9456320720036326, + "learning_rate": 1.9975403525828423e-06, + "loss": 0.5894836187362671, + "step": 619 + }, + { + "epoch": 0.14295596034124972, + "grad_norm": 1.1964423414511856, + "learning_rate": 1.9975135557494735e-06, + "loss": 0.7142415046691895, + "step": 620 + }, + { + "epoch": 0.14318653447083238, + "grad_norm": 0.7973360588907056, + "learning_rate": 1.9974866139174e-06, + "loss": 0.6402454972267151, + "step": 621 + }, + { + "epoch": 0.14341710860041504, + "grad_norm": 0.8197617379148621, + "learning_rate": 1.997459527090538e-06, + "loss": 0.6870661973953247, + "step": 622 + }, + { + "epoch": 0.1436476827299977, + "grad_norm": 0.9660987988063562, + "learning_rate": 1.9974322952728247e-06, + "loss": 0.5526704788208008, + "step": 623 + }, + { + "epoch": 0.14387825685958036, + "grad_norm": 0.8373386744091922, + "learning_rate": 1.9974049184682186e-06, + "loss": 0.6712762117385864, + "step": 624 + }, + { + "epoch": 0.14410883098916302, + "grad_norm": 0.8330659804365839, + "learning_rate": 1.997377396680699e-06, + "loss": 0.6064080595970154, + "step": 625 + }, + { + "epoch": 0.14433940511874568, + "grad_norm": 0.7758896299152315, + "learning_rate": 1.997349729914267e-06, + "loss": 0.5540767908096313, + "step": 626 + }, + { + "epoch": 0.14456997924832835, + "grad_norm": 0.7444906414234538, + "learning_rate": 1.997321918172944e-06, + "loss": 0.52143394947052, + "step": 627 + }, + { + "epoch": 0.144800553377911, + "grad_norm": 0.8091707705607726, + "learning_rate": 1.9972939614607723e-06, + "loss": 0.7708792686462402, + "step": 628 + }, + { + "epoch": 0.14503112750749367, + "grad_norm": 1.0019252225174067, + "learning_rate": 1.997265859781816e-06, + "loss": 0.706872284412384, + "step": 629 + }, + { + "epoch": 0.14526170163707633, + "grad_norm": 0.7978488701627702, + "learning_rate": 1.99723761314016e-06, + "loss": 0.6643307209014893, + "step": 630 + }, + { + "epoch": 0.145492275766659, + "grad_norm": 1.0319728160628425, + "learning_rate": 1.9972092215399107e-06, + "loss": 0.6582880020141602, + "step": 631 + }, + { + "epoch": 0.14572284989624165, + "grad_norm": 0.7041979367649327, + "learning_rate": 1.997180684985194e-06, + "loss": 0.5704749822616577, + "step": 632 + }, + { + "epoch": 0.1459534240258243, + "grad_norm": 0.9160954038448087, + "learning_rate": 1.997152003480159e-06, + "loss": 0.6021866798400879, + "step": 633 + }, + { + "epoch": 0.14618399815540697, + "grad_norm": 1.0186739140184302, + "learning_rate": 1.9971231770289745e-06, + "loss": 0.6980762481689453, + "step": 634 + }, + { + "epoch": 0.14641457228498964, + "grad_norm": 0.9102171344238382, + "learning_rate": 1.9970942056358307e-06, + "loss": 0.6252140998840332, + "step": 635 + }, + { + "epoch": 0.1466451464145723, + "grad_norm": 0.8257085970836279, + "learning_rate": 1.9970650893049384e-06, + "loss": 0.5938589572906494, + "step": 636 + }, + { + "epoch": 0.14687572054415496, + "grad_norm": 0.7561297866548697, + "learning_rate": 1.997035828040531e-06, + "loss": 0.48420464992523193, + "step": 637 + }, + { + "epoch": 0.14710629467373762, + "grad_norm": 1.1749911282917564, + "learning_rate": 1.997006421846861e-06, + "loss": 0.6917499303817749, + "step": 638 + }, + { + "epoch": 0.14733686880332028, + "grad_norm": 0.9636395596462505, + "learning_rate": 1.9969768707282034e-06, + "loss": 0.7040522694587708, + "step": 639 + }, + { + "epoch": 0.14756744293290291, + "grad_norm": 0.7956128694692409, + "learning_rate": 1.9969471746888535e-06, + "loss": 0.6131860017776489, + "step": 640 + }, + { + "epoch": 0.14779801706248558, + "grad_norm": 0.8000550155014501, + "learning_rate": 1.996917333733128e-06, + "loss": 0.7042062282562256, + "step": 641 + }, + { + "epoch": 0.14802859119206824, + "grad_norm": 0.9440344299424565, + "learning_rate": 1.9968873478653647e-06, + "loss": 0.6729326844215393, + "step": 642 + }, + { + "epoch": 0.1482591653216509, + "grad_norm": 0.8065631083250541, + "learning_rate": 1.996857217089922e-06, + "loss": 0.5801228880882263, + "step": 643 + }, + { + "epoch": 0.14848973945123356, + "grad_norm": 0.9584481605552773, + "learning_rate": 1.99682694141118e-06, + "loss": 0.6657989025115967, + "step": 644 + }, + { + "epoch": 0.14872031358081622, + "grad_norm": 0.8276892521273487, + "learning_rate": 1.9967965208335395e-06, + "loss": 0.5915562510490417, + "step": 645 + }, + { + "epoch": 0.14895088771039888, + "grad_norm": 0.8005079741579677, + "learning_rate": 1.9967659553614225e-06, + "loss": 0.6651759147644043, + "step": 646 + }, + { + "epoch": 0.14918146183998154, + "grad_norm": 0.785500734493462, + "learning_rate": 1.996735244999272e-06, + "loss": 0.625860333442688, + "step": 647 + }, + { + "epoch": 0.1494120359695642, + "grad_norm": 0.891334856659417, + "learning_rate": 1.996704389751552e-06, + "loss": 0.5731238126754761, + "step": 648 + }, + { + "epoch": 0.14964261009914687, + "grad_norm": 0.8662032133236818, + "learning_rate": 1.996673389622748e-06, + "loss": 0.6233615875244141, + "step": 649 + }, + { + "epoch": 0.14987318422872953, + "grad_norm": 0.7037223780792468, + "learning_rate": 1.9966422446173655e-06, + "loss": 0.5294947028160095, + "step": 650 + }, + { + "epoch": 0.1501037583583122, + "grad_norm": 0.8024689158972043, + "learning_rate": 1.996610954739932e-06, + "loss": 0.6234334707260132, + "step": 651 + }, + { + "epoch": 0.15033433248789485, + "grad_norm": 0.9863259301950934, + "learning_rate": 1.996579519994996e-06, + "loss": 0.5800126194953918, + "step": 652 + }, + { + "epoch": 0.1505649066174775, + "grad_norm": 0.9145794705086053, + "learning_rate": 1.9965479403871268e-06, + "loss": 0.7072441577911377, + "step": 653 + }, + { + "epoch": 0.15079548074706017, + "grad_norm": 0.8604804316966843, + "learning_rate": 1.996516215920915e-06, + "loss": 0.6350210309028625, + "step": 654 + }, + { + "epoch": 0.15102605487664283, + "grad_norm": 0.8272551438363688, + "learning_rate": 1.996484346600971e-06, + "loss": 0.6098944544792175, + "step": 655 + }, + { + "epoch": 0.1512566290062255, + "grad_norm": 0.7942772112843086, + "learning_rate": 1.996452332431929e-06, + "loss": 0.6593213081359863, + "step": 656 + }, + { + "epoch": 0.15148720313580816, + "grad_norm": 1.0870788996229426, + "learning_rate": 1.9964201734184413e-06, + "loss": 0.6997909545898438, + "step": 657 + }, + { + "epoch": 0.15171777726539082, + "grad_norm": 0.8320533396880808, + "learning_rate": 1.996387869565183e-06, + "loss": 0.5672277212142944, + "step": 658 + }, + { + "epoch": 0.15194835139497348, + "grad_norm": 0.8777194103988153, + "learning_rate": 1.99635542087685e-06, + "loss": 0.5835613012313843, + "step": 659 + }, + { + "epoch": 0.15217892552455614, + "grad_norm": 1.0025309187744094, + "learning_rate": 1.9963228273581587e-06, + "loss": 0.6001917123794556, + "step": 660 + }, + { + "epoch": 0.1524094996541388, + "grad_norm": 0.9582174045063777, + "learning_rate": 1.996290089013847e-06, + "loss": 0.6421242356300354, + "step": 661 + }, + { + "epoch": 0.15264007378372146, + "grad_norm": 0.8996449559898986, + "learning_rate": 1.996257205848674e-06, + "loss": 0.6888365745544434, + "step": 662 + }, + { + "epoch": 0.15287064791330413, + "grad_norm": 0.8017642329752841, + "learning_rate": 1.9962241778674193e-06, + "loss": 0.6694042682647705, + "step": 663 + }, + { + "epoch": 0.1531012220428868, + "grad_norm": 0.8362235694997654, + "learning_rate": 1.9961910050748836e-06, + "loss": 0.6754042506217957, + "step": 664 + }, + { + "epoch": 0.15333179617246945, + "grad_norm": 0.9429947161447709, + "learning_rate": 1.9961576874758893e-06, + "loss": 0.576134979724884, + "step": 665 + }, + { + "epoch": 0.1535623703020521, + "grad_norm": 0.8634505888713511, + "learning_rate": 1.9961242250752796e-06, + "loss": 0.6548957824707031, + "step": 666 + }, + { + "epoch": 0.15379294443163477, + "grad_norm": 0.8494612034918267, + "learning_rate": 1.9960906178779183e-06, + "loss": 0.553372859954834, + "step": 667 + }, + { + "epoch": 0.15402351856121743, + "grad_norm": 0.8776559544848238, + "learning_rate": 1.9960568658886904e-06, + "loss": 0.6749063730239868, + "step": 668 + }, + { + "epoch": 0.1542540926908001, + "grad_norm": 0.8490449157821316, + "learning_rate": 1.9960229691125023e-06, + "loss": 0.6083666086196899, + "step": 669 + }, + { + "epoch": 0.15448466682038275, + "grad_norm": 0.9102216407598661, + "learning_rate": 1.995988927554281e-06, + "loss": 0.6468017101287842, + "step": 670 + }, + { + "epoch": 0.15471524094996542, + "grad_norm": 0.9054463862187181, + "learning_rate": 1.995954741218976e-06, + "loss": 0.7095121145248413, + "step": 671 + }, + { + "epoch": 0.15494581507954808, + "grad_norm": 0.8984210973740085, + "learning_rate": 1.995920410111555e-06, + "loss": 0.7167302966117859, + "step": 672 + }, + { + "epoch": 0.15517638920913074, + "grad_norm": 0.9754903087688545, + "learning_rate": 1.995885934237009e-06, + "loss": 0.6563462018966675, + "step": 673 + }, + { + "epoch": 0.1554069633387134, + "grad_norm": 0.7833661271069817, + "learning_rate": 1.9958513136003495e-06, + "loss": 0.638554573059082, + "step": 674 + }, + { + "epoch": 0.15563753746829606, + "grad_norm": 1.1119382875058637, + "learning_rate": 1.995816548206609e-06, + "loss": 0.7051291465759277, + "step": 675 + }, + { + "epoch": 0.15586811159787872, + "grad_norm": 0.879000690907415, + "learning_rate": 1.995781638060841e-06, + "loss": 0.6292394399642944, + "step": 676 + }, + { + "epoch": 0.15609868572746138, + "grad_norm": 0.7328696227145686, + "learning_rate": 1.99574658316812e-06, + "loss": 0.5266016721725464, + "step": 677 + }, + { + "epoch": 0.15632925985704405, + "grad_norm": 0.8021809147598078, + "learning_rate": 1.9957113835335415e-06, + "loss": 0.6059033870697021, + "step": 678 + }, + { + "epoch": 0.1565598339866267, + "grad_norm": 1.0012445200078677, + "learning_rate": 1.995676039162222e-06, + "loss": 0.5252447128295898, + "step": 679 + }, + { + "epoch": 0.15679040811620937, + "grad_norm": 0.9661534967224599, + "learning_rate": 1.9956405500593e-06, + "loss": 0.5963196754455566, + "step": 680 + }, + { + "epoch": 0.15702098224579203, + "grad_norm": 1.1191160767100459, + "learning_rate": 1.9956049162299322e-06, + "loss": 0.7262317538261414, + "step": 681 + }, + { + "epoch": 0.1572515563753747, + "grad_norm": 0.6929567178003186, + "learning_rate": 1.995569137679301e-06, + "loss": 0.6701623201370239, + "step": 682 + }, + { + "epoch": 0.15748213050495735, + "grad_norm": 1.1067508842107727, + "learning_rate": 1.9955332144126048e-06, + "loss": 0.6201569437980652, + "step": 683 + }, + { + "epoch": 0.15771270463454, + "grad_norm": 0.8729576302308473, + "learning_rate": 1.9954971464350673e-06, + "loss": 0.5338399410247803, + "step": 684 + }, + { + "epoch": 0.15794327876412267, + "grad_norm": 1.0541267316046437, + "learning_rate": 1.99546093375193e-06, + "loss": 0.6784210205078125, + "step": 685 + }, + { + "epoch": 0.15817385289370534, + "grad_norm": 0.7386088048688241, + "learning_rate": 1.9954245763684574e-06, + "loss": 0.6752813458442688, + "step": 686 + }, + { + "epoch": 0.158404427023288, + "grad_norm": 0.92655840240498, + "learning_rate": 1.9953880742899344e-06, + "loss": 0.6734355688095093, + "step": 687 + }, + { + "epoch": 0.15863500115287066, + "grad_norm": 1.0183777461857344, + "learning_rate": 1.995351427521667e-06, + "loss": 0.4857062101364136, + "step": 688 + }, + { + "epoch": 0.15886557528245332, + "grad_norm": 1.0292686670210065, + "learning_rate": 1.995314636068982e-06, + "loss": 0.6014343500137329, + "step": 689 + }, + { + "epoch": 0.15909614941203598, + "grad_norm": 0.6804392354384567, + "learning_rate": 1.995277699937227e-06, + "loss": 0.571649432182312, + "step": 690 + }, + { + "epoch": 0.15932672354161864, + "grad_norm": 0.8504096595688001, + "learning_rate": 1.9952406191317717e-06, + "loss": 0.5195556879043579, + "step": 691 + }, + { + "epoch": 0.1595572976712013, + "grad_norm": 1.0458950135227758, + "learning_rate": 1.995203393658006e-06, + "loss": 0.6520895957946777, + "step": 692 + }, + { + "epoch": 0.15978787180078396, + "grad_norm": 0.8415432435774023, + "learning_rate": 1.995166023521341e-06, + "loss": 0.7223460674285889, + "step": 693 + }, + { + "epoch": 0.16001844593036663, + "grad_norm": 0.9976828679541363, + "learning_rate": 1.9951285087272085e-06, + "loss": 0.5540120005607605, + "step": 694 + }, + { + "epoch": 0.1602490200599493, + "grad_norm": 0.9583028785849829, + "learning_rate": 1.995090849281062e-06, + "loss": 0.6539945602416992, + "step": 695 + }, + { + "epoch": 0.16047959418953192, + "grad_norm": 0.6996553037894581, + "learning_rate": 1.995053045188376e-06, + "loss": 0.595169186592102, + "step": 696 + }, + { + "epoch": 0.16071016831911458, + "grad_norm": 0.7841493951031693, + "learning_rate": 1.995015096454645e-06, + "loss": 0.564440131187439, + "step": 697 + }, + { + "epoch": 0.16094074244869724, + "grad_norm": 0.8288568147288248, + "learning_rate": 1.9949770030853857e-06, + "loss": 0.5934277772903442, + "step": 698 + }, + { + "epoch": 0.1611713165782799, + "grad_norm": 0.8284586150514878, + "learning_rate": 1.9949387650861353e-06, + "loss": 0.5645352602005005, + "step": 699 + }, + { + "epoch": 0.16140189070786257, + "grad_norm": 0.7431587516594325, + "learning_rate": 1.9949003824624517e-06, + "loss": 0.6437552571296692, + "step": 700 + }, + { + "epoch": 0.16163246483744523, + "grad_norm": 0.9720884796741701, + "learning_rate": 1.9948618552199147e-06, + "loss": 0.7052004337310791, + "step": 701 + }, + { + "epoch": 0.1618630389670279, + "grad_norm": 0.869867046800395, + "learning_rate": 1.994823183364124e-06, + "loss": 0.6547686457633972, + "step": 702 + }, + { + "epoch": 0.16209361309661055, + "grad_norm": 0.8852938288883528, + "learning_rate": 1.994784366900702e-06, + "loss": 0.582744836807251, + "step": 703 + }, + { + "epoch": 0.1623241872261932, + "grad_norm": 0.9493941174588165, + "learning_rate": 1.99474540583529e-06, + "loss": 0.6668936014175415, + "step": 704 + }, + { + "epoch": 0.16255476135577587, + "grad_norm": 0.8294615633120708, + "learning_rate": 1.994706300173552e-06, + "loss": 0.6076918840408325, + "step": 705 + }, + { + "epoch": 0.16278533548535853, + "grad_norm": 0.8313694025786441, + "learning_rate": 1.994667049921172e-06, + "loss": 0.5053621530532837, + "step": 706 + }, + { + "epoch": 0.1630159096149412, + "grad_norm": 0.7898437620774408, + "learning_rate": 1.994627655083856e-06, + "loss": 0.5480915904045105, + "step": 707 + }, + { + "epoch": 0.16324648374452386, + "grad_norm": 0.8758549357955973, + "learning_rate": 1.99458811566733e-06, + "loss": 0.5851327776908875, + "step": 708 + }, + { + "epoch": 0.16347705787410652, + "grad_norm": 0.8484239464634123, + "learning_rate": 1.9945484316773415e-06, + "loss": 0.7058213949203491, + "step": 709 + }, + { + "epoch": 0.16370763200368918, + "grad_norm": 1.019538936894149, + "learning_rate": 1.9945086031196588e-06, + "loss": 0.6900246739387512, + "step": 710 + }, + { + "epoch": 0.16393820613327184, + "grad_norm": 0.9247299002550031, + "learning_rate": 1.994468630000072e-06, + "loss": 0.6088757514953613, + "step": 711 + }, + { + "epoch": 0.1641687802628545, + "grad_norm": 0.82117755294185, + "learning_rate": 1.9944285123243908e-06, + "loss": 0.6167945861816406, + "step": 712 + }, + { + "epoch": 0.16439935439243716, + "grad_norm": 0.8171354955480022, + "learning_rate": 1.994388250098447e-06, + "loss": 0.5842427015304565, + "step": 713 + }, + { + "epoch": 0.16462992852201982, + "grad_norm": 1.0833616769520091, + "learning_rate": 1.9943478433280937e-06, + "loss": 0.6709132194519043, + "step": 714 + }, + { + "epoch": 0.1648605026516025, + "grad_norm": 0.9486447603343945, + "learning_rate": 1.994307292019204e-06, + "loss": 0.5600479245185852, + "step": 715 + }, + { + "epoch": 0.16509107678118515, + "grad_norm": 0.9425877157645439, + "learning_rate": 1.994266596177672e-06, + "loss": 0.59420245885849, + "step": 716 + }, + { + "epoch": 0.1653216509107678, + "grad_norm": 0.8878954538957776, + "learning_rate": 1.994225755809414e-06, + "loss": 0.6098697185516357, + "step": 717 + }, + { + "epoch": 0.16555222504035047, + "grad_norm": 0.9792435497913993, + "learning_rate": 1.994184770920366e-06, + "loss": 0.5626084804534912, + "step": 718 + }, + { + "epoch": 0.16578279916993313, + "grad_norm": 0.827415177568412, + "learning_rate": 1.9941436415164854e-06, + "loss": 0.633317232131958, + "step": 719 + }, + { + "epoch": 0.1660133732995158, + "grad_norm": 0.7458775266643737, + "learning_rate": 1.994102367603752e-06, + "loss": 0.6629287004470825, + "step": 720 + }, + { + "epoch": 0.16624394742909845, + "grad_norm": 0.8804838237561229, + "learning_rate": 1.994060949188164e-06, + "loss": 0.6281176805496216, + "step": 721 + }, + { + "epoch": 0.16647452155868112, + "grad_norm": 0.7448717784104247, + "learning_rate": 1.994019386275743e-06, + "loss": 0.49195849895477295, + "step": 722 + }, + { + "epoch": 0.16670509568826378, + "grad_norm": 0.8001133040698483, + "learning_rate": 1.9939776788725295e-06, + "loss": 0.5165697932243347, + "step": 723 + }, + { + "epoch": 0.16693566981784644, + "grad_norm": 0.7747636914973149, + "learning_rate": 1.9939358269845867e-06, + "loss": 0.6294844150543213, + "step": 724 + }, + { + "epoch": 0.1671662439474291, + "grad_norm": 0.944854174617811, + "learning_rate": 1.9938938306179986e-06, + "loss": 0.6117822527885437, + "step": 725 + }, + { + "epoch": 0.16739681807701176, + "grad_norm": 0.8223415721013929, + "learning_rate": 1.9938516897788693e-06, + "loss": 0.5904515981674194, + "step": 726 + }, + { + "epoch": 0.16762739220659442, + "grad_norm": 0.9451811550082199, + "learning_rate": 1.9938094044733247e-06, + "loss": 0.5453853011131287, + "step": 727 + }, + { + "epoch": 0.16785796633617708, + "grad_norm": 1.0093698810967915, + "learning_rate": 1.9937669747075107e-06, + "loss": 0.6724731922149658, + "step": 728 + }, + { + "epoch": 0.16808854046575974, + "grad_norm": 0.8787203913390783, + "learning_rate": 1.993724400487596e-06, + "loss": 0.4844778776168823, + "step": 729 + }, + { + "epoch": 0.1683191145953424, + "grad_norm": 1.0150110817624924, + "learning_rate": 1.9936816818197682e-06, + "loss": 0.6666063070297241, + "step": 730 + }, + { + "epoch": 0.16854968872492507, + "grad_norm": 0.8363215992575103, + "learning_rate": 1.9936388187102374e-06, + "loss": 0.49354803562164307, + "step": 731 + }, + { + "epoch": 0.16878026285450773, + "grad_norm": 1.011739420494133, + "learning_rate": 1.993595811165234e-06, + "loss": 0.6587027311325073, + "step": 732 + }, + { + "epoch": 0.1690108369840904, + "grad_norm": 0.8706809761457309, + "learning_rate": 1.9935526591910095e-06, + "loss": 0.5618065595626831, + "step": 733 + }, + { + "epoch": 0.16924141111367305, + "grad_norm": 1.0230867510580486, + "learning_rate": 1.993509362793837e-06, + "loss": 0.6332052946090698, + "step": 734 + }, + { + "epoch": 0.1694719852432557, + "grad_norm": 0.8938300688074264, + "learning_rate": 1.9934659219800095e-06, + "loss": 0.5888797044754028, + "step": 735 + }, + { + "epoch": 0.16970255937283837, + "grad_norm": 0.9600504381358347, + "learning_rate": 1.9934223367558418e-06, + "loss": 0.6995177865028381, + "step": 736 + }, + { + "epoch": 0.16993313350242104, + "grad_norm": 0.8183852978697493, + "learning_rate": 1.9933786071276693e-06, + "loss": 0.6117641925811768, + "step": 737 + }, + { + "epoch": 0.1701637076320037, + "grad_norm": 0.8824726889784998, + "learning_rate": 1.9933347331018487e-06, + "loss": 0.7138235569000244, + "step": 738 + }, + { + "epoch": 0.17039428176158636, + "grad_norm": 0.9234925675447027, + "learning_rate": 1.993290714684758e-06, + "loss": 0.6139661073684692, + "step": 739 + }, + { + "epoch": 0.17062485589116902, + "grad_norm": 0.9457487351494172, + "learning_rate": 1.9932465518827945e-06, + "loss": 0.6998997926712036, + "step": 740 + }, + { + "epoch": 0.17085543002075168, + "grad_norm": 0.8625145077640682, + "learning_rate": 1.9932022447023787e-06, + "loss": 0.5736757516860962, + "step": 741 + }, + { + "epoch": 0.17108600415033434, + "grad_norm": 0.7768775382949296, + "learning_rate": 1.993157793149951e-06, + "loss": 0.6069833040237427, + "step": 742 + }, + { + "epoch": 0.171316578279917, + "grad_norm": 0.9368489446003049, + "learning_rate": 1.9931131972319726e-06, + "loss": 0.618720531463623, + "step": 743 + }, + { + "epoch": 0.17154715240949966, + "grad_norm": 1.1182101771495103, + "learning_rate": 1.9930684569549263e-06, + "loss": 0.6918530464172363, + "step": 744 + }, + { + "epoch": 0.17177772653908233, + "grad_norm": 0.9107072762217621, + "learning_rate": 1.993023572325315e-06, + "loss": 0.5303134322166443, + "step": 745 + }, + { + "epoch": 0.172008300668665, + "grad_norm": 1.163525853024132, + "learning_rate": 1.9929785433496637e-06, + "loss": 0.5017606019973755, + "step": 746 + }, + { + "epoch": 0.17223887479824765, + "grad_norm": 0.8248835281602814, + "learning_rate": 1.9929333700345176e-06, + "loss": 0.5683910846710205, + "step": 747 + }, + { + "epoch": 0.1724694489278303, + "grad_norm": 1.024957040527593, + "learning_rate": 1.992888052386443e-06, + "loss": 0.7594112157821655, + "step": 748 + }, + { + "epoch": 0.17270002305741297, + "grad_norm": 0.8415419064063624, + "learning_rate": 1.9928425904120272e-06, + "loss": 0.5817109942436218, + "step": 749 + }, + { + "epoch": 0.17293059718699563, + "grad_norm": 0.9772344685918459, + "learning_rate": 1.9927969841178785e-06, + "loss": 0.74810391664505, + "step": 750 + }, + { + "epoch": 0.17316117131657827, + "grad_norm": 0.7709842631317299, + "learning_rate": 1.992751233510627e-06, + "loss": 0.5620408654212952, + "step": 751 + }, + { + "epoch": 0.17339174544616093, + "grad_norm": 0.9147017514524429, + "learning_rate": 1.9927053385969224e-06, + "loss": 0.5661174654960632, + "step": 752 + }, + { + "epoch": 0.1736223195757436, + "grad_norm": 0.8721149149743948, + "learning_rate": 1.992659299383436e-06, + "loss": 0.6170656681060791, + "step": 753 + }, + { + "epoch": 0.17385289370532625, + "grad_norm": 0.8946316220934861, + "learning_rate": 1.99261311587686e-06, + "loss": 0.6399837136268616, + "step": 754 + }, + { + "epoch": 0.1740834678349089, + "grad_norm": 0.7741035474142021, + "learning_rate": 1.992566788083908e-06, + "loss": 0.646568775177002, + "step": 755 + }, + { + "epoch": 0.17431404196449157, + "grad_norm": 0.8936741351690501, + "learning_rate": 1.992520316011314e-06, + "loss": 0.6836358904838562, + "step": 756 + }, + { + "epoch": 0.17454461609407423, + "grad_norm": 0.8304614027509832, + "learning_rate": 1.9924736996658327e-06, + "loss": 0.7077229619026184, + "step": 757 + }, + { + "epoch": 0.1747751902236569, + "grad_norm": 0.87551528703017, + "learning_rate": 1.9924269390542408e-06, + "loss": 0.5127657651901245, + "step": 758 + }, + { + "epoch": 0.17500576435323956, + "grad_norm": 0.9006786249451013, + "learning_rate": 1.992380034183336e-06, + "loss": 0.49244552850723267, + "step": 759 + }, + { + "epoch": 0.17523633848282222, + "grad_norm": 0.8017561502743571, + "learning_rate": 1.9923329850599353e-06, + "loss": 0.6145986318588257, + "step": 760 + }, + { + "epoch": 0.17546691261240488, + "grad_norm": 1.0163805424999015, + "learning_rate": 1.9922857916908784e-06, + "loss": 0.5233397483825684, + "step": 761 + }, + { + "epoch": 0.17569748674198754, + "grad_norm": 0.9596772303146165, + "learning_rate": 1.992238454083025e-06, + "loss": 0.6296844482421875, + "step": 762 + }, + { + "epoch": 0.1759280608715702, + "grad_norm": 0.7860963753584104, + "learning_rate": 1.9921909722432565e-06, + "loss": 0.5274437665939331, + "step": 763 + }, + { + "epoch": 0.17615863500115286, + "grad_norm": 0.8930810667791799, + "learning_rate": 1.9921433461784744e-06, + "loss": 0.6365554332733154, + "step": 764 + }, + { + "epoch": 0.17638920913073552, + "grad_norm": 0.9611521576454714, + "learning_rate": 1.992095575895602e-06, + "loss": 0.6256603002548218, + "step": 765 + }, + { + "epoch": 0.17661978326031819, + "grad_norm": 0.9488006285824869, + "learning_rate": 1.9920476614015827e-06, + "loss": 0.6914918422698975, + "step": 766 + }, + { + "epoch": 0.17685035738990085, + "grad_norm": 0.9925839476608436, + "learning_rate": 1.9919996027033823e-06, + "loss": 0.618436336517334, + "step": 767 + }, + { + "epoch": 0.1770809315194835, + "grad_norm": 1.0637307823847924, + "learning_rate": 1.9919513998079857e-06, + "loss": 0.7496027946472168, + "step": 768 + }, + { + "epoch": 0.17731150564906617, + "grad_norm": 0.873569070894671, + "learning_rate": 1.9919030527224e-06, + "loss": 0.6188616752624512, + "step": 769 + }, + { + "epoch": 0.17754207977864883, + "grad_norm": 0.9573370107752551, + "learning_rate": 1.991854561453653e-06, + "loss": 0.6525505185127258, + "step": 770 + }, + { + "epoch": 0.1777726539082315, + "grad_norm": 0.8791752874309303, + "learning_rate": 1.9918059260087933e-06, + "loss": 0.6302521228790283, + "step": 771 + }, + { + "epoch": 0.17800322803781415, + "grad_norm": 0.7767159097983319, + "learning_rate": 1.9917571463948905e-06, + "loss": 0.48817628622055054, + "step": 772 + }, + { + "epoch": 0.17823380216739682, + "grad_norm": 0.9997756560425097, + "learning_rate": 1.9917082226190357e-06, + "loss": 0.7571396231651306, + "step": 773 + }, + { + "epoch": 0.17846437629697948, + "grad_norm": 0.9019653117383005, + "learning_rate": 1.99165915468834e-06, + "loss": 0.6416890025138855, + "step": 774 + }, + { + "epoch": 0.17869495042656214, + "grad_norm": 0.9030141776784474, + "learning_rate": 1.9916099426099357e-06, + "loss": 0.5668659210205078, + "step": 775 + }, + { + "epoch": 0.1789255245561448, + "grad_norm": 0.8616948701360102, + "learning_rate": 1.991560586390977e-06, + "loss": 0.5491495132446289, + "step": 776 + }, + { + "epoch": 0.17915609868572746, + "grad_norm": 0.8461739489170892, + "learning_rate": 1.991511086038637e-06, + "loss": 0.5596655607223511, + "step": 777 + }, + { + "epoch": 0.17938667281531012, + "grad_norm": 0.948797979696852, + "learning_rate": 1.991461441560113e-06, + "loss": 0.606618344783783, + "step": 778 + }, + { + "epoch": 0.17961724694489278, + "grad_norm": 0.8682290862864503, + "learning_rate": 1.9914116529626195e-06, + "loss": 0.6534444093704224, + "step": 779 + }, + { + "epoch": 0.17984782107447544, + "grad_norm": 0.7942772802909244, + "learning_rate": 1.9913617202533956e-06, + "loss": 0.6566994190216064, + "step": 780 + }, + { + "epoch": 0.1800783952040581, + "grad_norm": 0.8753236598884384, + "learning_rate": 1.9913116434396976e-06, + "loss": 0.6745898723602295, + "step": 781 + }, + { + "epoch": 0.18030896933364077, + "grad_norm": 0.8904483654623074, + "learning_rate": 1.991261422528806e-06, + "loss": 0.6260639429092407, + "step": 782 + }, + { + "epoch": 0.18053954346322343, + "grad_norm": 1.095081708934966, + "learning_rate": 1.9912110575280203e-06, + "loss": 0.6937930583953857, + "step": 783 + }, + { + "epoch": 0.1807701175928061, + "grad_norm": 0.7535766751550929, + "learning_rate": 1.991160548444662e-06, + "loss": 0.5220614671707153, + "step": 784 + }, + { + "epoch": 0.18100069172238875, + "grad_norm": 1.0171096783148863, + "learning_rate": 1.9911098952860725e-06, + "loss": 0.630463719367981, + "step": 785 + }, + { + "epoch": 0.1812312658519714, + "grad_norm": 0.9064677619585607, + "learning_rate": 1.9910590980596154e-06, + "loss": 0.5476818084716797, + "step": 786 + }, + { + "epoch": 0.18146183998155407, + "grad_norm": 0.8827497683061851, + "learning_rate": 1.9910081567726745e-06, + "loss": 0.619910478591919, + "step": 787 + }, + { + "epoch": 0.18169241411113674, + "grad_norm": 0.9583246792904453, + "learning_rate": 1.990957071432654e-06, + "loss": 0.759405255317688, + "step": 788 + }, + { + "epoch": 0.1819229882407194, + "grad_norm": 0.9249642030902185, + "learning_rate": 1.9909058420469808e-06, + "loss": 0.6093606948852539, + "step": 789 + }, + { + "epoch": 0.18215356237030206, + "grad_norm": 1.0777393301256872, + "learning_rate": 1.9908544686231e-06, + "loss": 0.5358198285102844, + "step": 790 + }, + { + "epoch": 0.18238413649988472, + "grad_norm": 0.8619190562873736, + "learning_rate": 1.9908029511684806e-06, + "loss": 0.577926754951477, + "step": 791 + }, + { + "epoch": 0.18261471062946738, + "grad_norm": 1.0298704295501269, + "learning_rate": 1.990751289690611e-06, + "loss": 0.6232448816299438, + "step": 792 + }, + { + "epoch": 0.18284528475905004, + "grad_norm": 0.9837349749201401, + "learning_rate": 1.9906994841970005e-06, + "loss": 0.5461868047714233, + "step": 793 + }, + { + "epoch": 0.1830758588886327, + "grad_norm": 0.9430576362377001, + "learning_rate": 1.9906475346951793e-06, + "loss": 0.6074671745300293, + "step": 794 + }, + { + "epoch": 0.18330643301821536, + "grad_norm": 0.9936839742941572, + "learning_rate": 1.990595441192699e-06, + "loss": 0.7101696729660034, + "step": 795 + }, + { + "epoch": 0.18353700714779803, + "grad_norm": 0.950260898814123, + "learning_rate": 1.9905432036971318e-06, + "loss": 0.6507722735404968, + "step": 796 + }, + { + "epoch": 0.1837675812773807, + "grad_norm": 0.8942288113166778, + "learning_rate": 1.9904908222160715e-06, + "loss": 0.6497524380683899, + "step": 797 + }, + { + "epoch": 0.18399815540696335, + "grad_norm": 0.9396678930556792, + "learning_rate": 1.9904382967571315e-06, + "loss": 0.6359415054321289, + "step": 798 + }, + { + "epoch": 0.184228729536546, + "grad_norm": 0.8070326036364724, + "learning_rate": 1.9903856273279475e-06, + "loss": 0.6062989234924316, + "step": 799 + }, + { + "epoch": 0.18445930366612867, + "grad_norm": 0.9626677000162343, + "learning_rate": 1.9903328139361753e-06, + "loss": 0.5872690677642822, + "step": 800 + }, + { + "epoch": 0.18468987779571133, + "grad_norm": 0.7985705265040473, + "learning_rate": 1.9902798565894917e-06, + "loss": 0.541993260383606, + "step": 801 + }, + { + "epoch": 0.184920451925294, + "grad_norm": 0.9775943406877085, + "learning_rate": 1.9902267552955948e-06, + "loss": 0.6509004235267639, + "step": 802 + }, + { + "epoch": 0.18515102605487666, + "grad_norm": 1.032367389635004, + "learning_rate": 1.9901735100622034e-06, + "loss": 0.6994458436965942, + "step": 803 + }, + { + "epoch": 0.18538160018445932, + "grad_norm": 0.723727027388961, + "learning_rate": 1.9901201208970574e-06, + "loss": 0.5426214933395386, + "step": 804 + }, + { + "epoch": 0.18561217431404198, + "grad_norm": 0.9494744349432898, + "learning_rate": 1.9900665878079172e-06, + "loss": 0.5889894366264343, + "step": 805 + }, + { + "epoch": 0.18584274844362464, + "grad_norm": 0.8565255265724333, + "learning_rate": 1.990012910802564e-06, + "loss": 0.6455902457237244, + "step": 806 + }, + { + "epoch": 0.18607332257320727, + "grad_norm": 0.8487813974117321, + "learning_rate": 1.989959089888801e-06, + "loss": 0.6336048245429993, + "step": 807 + }, + { + "epoch": 0.18630389670278993, + "grad_norm": 0.8414189962242138, + "learning_rate": 1.9899051250744517e-06, + "loss": 0.6091762781143188, + "step": 808 + }, + { + "epoch": 0.1865344708323726, + "grad_norm": 0.9439572961008054, + "learning_rate": 1.9898510163673594e-06, + "loss": 0.5551953315734863, + "step": 809 + }, + { + "epoch": 0.18676504496195526, + "grad_norm": 1.0494491780231465, + "learning_rate": 1.9897967637753907e-06, + "loss": 0.6441607475280762, + "step": 810 + }, + { + "epoch": 0.18699561909153792, + "grad_norm": 0.886313339848662, + "learning_rate": 1.989742367306431e-06, + "loss": 0.5766205787658691, + "step": 811 + }, + { + "epoch": 0.18722619322112058, + "grad_norm": 0.8129745295139125, + "learning_rate": 1.9896878269683872e-06, + "loss": 0.624677836894989, + "step": 812 + }, + { + "epoch": 0.18745676735070324, + "grad_norm": 1.0883386432883795, + "learning_rate": 1.9896331427691878e-06, + "loss": 0.5942056775093079, + "step": 813 + }, + { + "epoch": 0.1876873414802859, + "grad_norm": 0.9421668652395382, + "learning_rate": 1.989578314716781e-06, + "loss": 0.5194109082221985, + "step": 814 + }, + { + "epoch": 0.18791791560986856, + "grad_norm": 0.9041080200693152, + "learning_rate": 1.9895233428191375e-06, + "loss": 0.5851193070411682, + "step": 815 + }, + { + "epoch": 0.18814848973945122, + "grad_norm": 0.7963655717285544, + "learning_rate": 1.989468227084248e-06, + "loss": 0.5596088171005249, + "step": 816 + }, + { + "epoch": 0.18837906386903389, + "grad_norm": 0.9364254304069746, + "learning_rate": 1.989412967520123e-06, + "loss": 0.608109712600708, + "step": 817 + }, + { + "epoch": 0.18860963799861655, + "grad_norm": 0.8927696059217924, + "learning_rate": 1.9893575641347957e-06, + "loss": 0.6488924026489258, + "step": 818 + }, + { + "epoch": 0.1888402121281992, + "grad_norm": 0.9447086482881396, + "learning_rate": 1.9893020169363202e-06, + "loss": 0.6668595671653748, + "step": 819 + }, + { + "epoch": 0.18907078625778187, + "grad_norm": 0.9937318511996248, + "learning_rate": 1.9892463259327702e-06, + "loss": 0.6516261696815491, + "step": 820 + }, + { + "epoch": 0.18930136038736453, + "grad_norm": 1.0796549259081865, + "learning_rate": 1.9891904911322408e-06, + "loss": 0.5960654020309448, + "step": 821 + }, + { + "epoch": 0.1895319345169472, + "grad_norm": 0.7909478658460368, + "learning_rate": 1.989134512542848e-06, + "loss": 0.5836078524589539, + "step": 822 + }, + { + "epoch": 0.18976250864652985, + "grad_norm": 0.8238472267757905, + "learning_rate": 1.98907839017273e-06, + "loss": 0.6233468651771545, + "step": 823 + }, + { + "epoch": 0.18999308277611252, + "grad_norm": 0.9807541829716023, + "learning_rate": 1.989022124030043e-06, + "loss": 0.6228024363517761, + "step": 824 + }, + { + "epoch": 0.19022365690569518, + "grad_norm": 0.8131035743107407, + "learning_rate": 1.9889657141229674e-06, + "loss": 0.5549489259719849, + "step": 825 + }, + { + "epoch": 0.19045423103527784, + "grad_norm": 1.04900407843417, + "learning_rate": 1.988909160459703e-06, + "loss": 0.572743833065033, + "step": 826 + }, + { + "epoch": 0.1906848051648605, + "grad_norm": 0.9532449351501632, + "learning_rate": 1.988852463048469e-06, + "loss": 0.5483371019363403, + "step": 827 + }, + { + "epoch": 0.19091537929444316, + "grad_norm": 0.8589634934665029, + "learning_rate": 1.988795621897508e-06, + "loss": 0.6489086151123047, + "step": 828 + }, + { + "epoch": 0.19114595342402582, + "grad_norm": 0.8093738620503291, + "learning_rate": 1.9887386370150823e-06, + "loss": 0.5885359644889832, + "step": 829 + }, + { + "epoch": 0.19137652755360848, + "grad_norm": 1.1233507395706857, + "learning_rate": 1.988681508409475e-06, + "loss": 0.5725297927856445, + "step": 830 + }, + { + "epoch": 0.19160710168319114, + "grad_norm": 0.9186016287497916, + "learning_rate": 1.9886242360889907e-06, + "loss": 0.5165927410125732, + "step": 831 + }, + { + "epoch": 0.1918376758127738, + "grad_norm": 0.9873812028582082, + "learning_rate": 1.988566820061954e-06, + "loss": 0.4909062385559082, + "step": 832 + }, + { + "epoch": 0.19206824994235647, + "grad_norm": 0.8524339429885558, + "learning_rate": 1.988509260336711e-06, + "loss": 0.6611230373382568, + "step": 833 + }, + { + "epoch": 0.19229882407193913, + "grad_norm": 0.8054213393470881, + "learning_rate": 1.9884515569216296e-06, + "loss": 0.5702481269836426, + "step": 834 + }, + { + "epoch": 0.1925293982015218, + "grad_norm": 1.0204414620630202, + "learning_rate": 1.988393709825096e-06, + "loss": 0.5923126935958862, + "step": 835 + }, + { + "epoch": 0.19275997233110445, + "grad_norm": 0.9055032000924194, + "learning_rate": 1.98833571905552e-06, + "loss": 0.6054497957229614, + "step": 836 + }, + { + "epoch": 0.1929905464606871, + "grad_norm": 0.9248140875126212, + "learning_rate": 1.9882775846213305e-06, + "loss": 0.6688513159751892, + "step": 837 + }, + { + "epoch": 0.19322112059026977, + "grad_norm": 1.0273808455254545, + "learning_rate": 1.988219306530978e-06, + "loss": 0.5898394584655762, + "step": 838 + }, + { + "epoch": 0.19345169471985244, + "grad_norm": 0.9751112903331337, + "learning_rate": 1.9881608847929345e-06, + "loss": 0.575627326965332, + "step": 839 + }, + { + "epoch": 0.1936822688494351, + "grad_norm": 0.8673669914525766, + "learning_rate": 1.9881023194156913e-06, + "loss": 0.5392276048660278, + "step": 840 + }, + { + "epoch": 0.19391284297901776, + "grad_norm": 0.8706508008641746, + "learning_rate": 1.9880436104077624e-06, + "loss": 0.5464376211166382, + "step": 841 + }, + { + "epoch": 0.19414341710860042, + "grad_norm": 1.1088629334080236, + "learning_rate": 1.9879847577776804e-06, + "loss": 0.5483032464981079, + "step": 842 + }, + { + "epoch": 0.19437399123818308, + "grad_norm": 1.088158010228094, + "learning_rate": 1.9879257615340016e-06, + "loss": 0.583878219127655, + "step": 843 + }, + { + "epoch": 0.19460456536776574, + "grad_norm": 0.903659297701254, + "learning_rate": 1.9878666216853005e-06, + "loss": 0.5646623373031616, + "step": 844 + }, + { + "epoch": 0.1948351394973484, + "grad_norm": 0.8893037043091606, + "learning_rate": 1.9878073382401745e-06, + "loss": 0.4785343408584595, + "step": 845 + }, + { + "epoch": 0.19506571362693106, + "grad_norm": 0.8306997774077053, + "learning_rate": 1.987747911207241e-06, + "loss": 0.6247695684432983, + "step": 846 + }, + { + "epoch": 0.19529628775651373, + "grad_norm": 0.8871051444384922, + "learning_rate": 1.9876883405951377e-06, + "loss": 0.5686244368553162, + "step": 847 + }, + { + "epoch": 0.1955268618860964, + "grad_norm": 1.0693338597203925, + "learning_rate": 1.9876286264125242e-06, + "loss": 0.5887250900268555, + "step": 848 + }, + { + "epoch": 0.19575743601567905, + "grad_norm": 1.009687803574172, + "learning_rate": 1.9875687686680808e-06, + "loss": 0.6225967407226562, + "step": 849 + }, + { + "epoch": 0.1959880101452617, + "grad_norm": 0.8424215047754778, + "learning_rate": 1.987508767370508e-06, + "loss": 0.4695369601249695, + "step": 850 + }, + { + "epoch": 0.19621858427484437, + "grad_norm": 1.0270923710251258, + "learning_rate": 1.9874486225285276e-06, + "loss": 0.5248171091079712, + "step": 851 + }, + { + "epoch": 0.19644915840442703, + "grad_norm": 1.0947189066196994, + "learning_rate": 1.9873883341508825e-06, + "loss": 0.573886513710022, + "step": 852 + }, + { + "epoch": 0.1966797325340097, + "grad_norm": 0.980074050730982, + "learning_rate": 1.9873279022463365e-06, + "loss": 0.5309966802597046, + "step": 853 + }, + { + "epoch": 0.19691030666359235, + "grad_norm": 1.2273525906968545, + "learning_rate": 1.987267326823673e-06, + "loss": 0.7115850448608398, + "step": 854 + }, + { + "epoch": 0.19714088079317502, + "grad_norm": 1.65154587276706, + "learning_rate": 1.9872066078916984e-06, + "loss": 0.6970044374465942, + "step": 855 + }, + { + "epoch": 0.19737145492275768, + "grad_norm": 1.0520569639047552, + "learning_rate": 1.987145745459238e-06, + "loss": 0.5956458449363708, + "step": 856 + }, + { + "epoch": 0.19760202905234034, + "grad_norm": 0.8621512966256671, + "learning_rate": 1.9870847395351395e-06, + "loss": 0.6200698614120483, + "step": 857 + }, + { + "epoch": 0.197832603181923, + "grad_norm": 0.8987981187104104, + "learning_rate": 1.98702359012827e-06, + "loss": 0.6552712321281433, + "step": 858 + }, + { + "epoch": 0.19806317731150566, + "grad_norm": 0.8832934653512269, + "learning_rate": 1.986962297247519e-06, + "loss": 0.5995951294898987, + "step": 859 + }, + { + "epoch": 0.19829375144108832, + "grad_norm": 1.0415029103173328, + "learning_rate": 1.9869008609017946e-06, + "loss": 0.5903854966163635, + "step": 860 + }, + { + "epoch": 0.19852432557067098, + "grad_norm": 0.7946410320386238, + "learning_rate": 1.986839281100029e-06, + "loss": 0.49756956100463867, + "step": 861 + }, + { + "epoch": 0.19875489970025362, + "grad_norm": 0.8989937288923138, + "learning_rate": 1.986777557851172e-06, + "loss": 0.6726386547088623, + "step": 862 + }, + { + "epoch": 0.19898547382983628, + "grad_norm": 1.066877002121069, + "learning_rate": 1.9867156911641963e-06, + "loss": 0.5941756963729858, + "step": 863 + }, + { + "epoch": 0.19921604795941894, + "grad_norm": 1.1426428571577942, + "learning_rate": 1.986653681048095e-06, + "loss": 0.6148152351379395, + "step": 864 + }, + { + "epoch": 0.1994466220890016, + "grad_norm": 0.8574337846446602, + "learning_rate": 1.9865915275118815e-06, + "loss": 0.5484675765037537, + "step": 865 + }, + { + "epoch": 0.19967719621858426, + "grad_norm": 1.279305752369778, + "learning_rate": 1.986529230564591e-06, + "loss": 0.5835011601448059, + "step": 866 + }, + { + "epoch": 0.19990777034816692, + "grad_norm": 1.2828587747963143, + "learning_rate": 1.9864667902152785e-06, + "loss": 0.5505619049072266, + "step": 867 + }, + { + "epoch": 0.20013834447774959, + "grad_norm": 0.978792866059614, + "learning_rate": 1.986404206473021e-06, + "loss": 0.6170759797096252, + "step": 868 + }, + { + "epoch": 0.20036891860733225, + "grad_norm": 0.9063283607010307, + "learning_rate": 1.9863414793469144e-06, + "loss": 0.6302823424339294, + "step": 869 + }, + { + "epoch": 0.2005994927369149, + "grad_norm": 0.9919923586713045, + "learning_rate": 1.9862786088460778e-06, + "loss": 0.6265357732772827, + "step": 870 + }, + { + "epoch": 0.20083006686649757, + "grad_norm": 0.8288163853607481, + "learning_rate": 1.9862155949796497e-06, + "loss": 0.5346760749816895, + "step": 871 + }, + { + "epoch": 0.20106064099608023, + "grad_norm": 1.0613032711669241, + "learning_rate": 1.98615243775679e-06, + "loss": 0.5480276346206665, + "step": 872 + }, + { + "epoch": 0.2012912151256629, + "grad_norm": 1.0504212966242243, + "learning_rate": 1.986089137186679e-06, + "loss": 0.615007758140564, + "step": 873 + }, + { + "epoch": 0.20152178925524555, + "grad_norm": 1.0424303204478471, + "learning_rate": 1.986025693278518e-06, + "loss": 0.598671555519104, + "step": 874 + }, + { + "epoch": 0.20175236338482821, + "grad_norm": 1.1162570964298844, + "learning_rate": 1.98596210604153e-06, + "loss": 0.6029553413391113, + "step": 875 + }, + { + "epoch": 0.20198293751441088, + "grad_norm": 0.9723766835428509, + "learning_rate": 1.985898375484957e-06, + "loss": 0.6854428052902222, + "step": 876 + }, + { + "epoch": 0.20221351164399354, + "grad_norm": 0.7502030102171089, + "learning_rate": 1.9858345016180636e-06, + "loss": 0.5032496452331543, + "step": 877 + }, + { + "epoch": 0.2024440857735762, + "grad_norm": 0.910423493721141, + "learning_rate": 1.9857704844501343e-06, + "loss": 0.5521007776260376, + "step": 878 + }, + { + "epoch": 0.20267465990315886, + "grad_norm": 0.9861926154372014, + "learning_rate": 1.9857063239904742e-06, + "loss": 0.6473567485809326, + "step": 879 + }, + { + "epoch": 0.20290523403274152, + "grad_norm": 0.9973567674127126, + "learning_rate": 1.9856420202484103e-06, + "loss": 0.528810977935791, + "step": 880 + }, + { + "epoch": 0.20313580816232418, + "grad_norm": 1.0663389238750165, + "learning_rate": 1.9855775732332898e-06, + "loss": 0.681857705116272, + "step": 881 + }, + { + "epoch": 0.20336638229190684, + "grad_norm": 0.9199566615284357, + "learning_rate": 1.9855129829544805e-06, + "loss": 0.6510526537895203, + "step": 882 + }, + { + "epoch": 0.2035969564214895, + "grad_norm": 1.0847608945381821, + "learning_rate": 1.985448249421371e-06, + "loss": 0.5690885782241821, + "step": 883 + }, + { + "epoch": 0.20382753055107217, + "grad_norm": 0.9067033263808438, + "learning_rate": 1.985383372643371e-06, + "loss": 0.6451331973075867, + "step": 884 + }, + { + "epoch": 0.20405810468065483, + "grad_norm": 0.7596187493834748, + "learning_rate": 1.9853183526299117e-06, + "loss": 0.493961900472641, + "step": 885 + }, + { + "epoch": 0.2042886788102375, + "grad_norm": 1.031307930072274, + "learning_rate": 1.9852531893904434e-06, + "loss": 0.5390207767486572, + "step": 886 + }, + { + "epoch": 0.20451925293982015, + "grad_norm": 0.9671201783822709, + "learning_rate": 1.9851878829344395e-06, + "loss": 0.5976558923721313, + "step": 887 + }, + { + "epoch": 0.2047498270694028, + "grad_norm": 0.9832697265495778, + "learning_rate": 1.9851224332713917e-06, + "loss": 0.539776623249054, + "step": 888 + }, + { + "epoch": 0.20498040119898547, + "grad_norm": 1.1606849770347532, + "learning_rate": 1.9850568404108144e-06, + "loss": 0.6791383624076843, + "step": 889 + }, + { + "epoch": 0.20521097532856813, + "grad_norm": 1.1599404347752247, + "learning_rate": 1.984991104362242e-06, + "loss": 0.6195741891860962, + "step": 890 + }, + { + "epoch": 0.2054415494581508, + "grad_norm": 1.0295013801913249, + "learning_rate": 1.9849252251352303e-06, + "loss": 0.5792666673660278, + "step": 891 + }, + { + "epoch": 0.20567212358773346, + "grad_norm": 0.7871401361859056, + "learning_rate": 1.984859202739355e-06, + "loss": 0.5633316040039062, + "step": 892 + }, + { + "epoch": 0.20590269771731612, + "grad_norm": 0.9078754261167402, + "learning_rate": 1.9847930371842137e-06, + "loss": 0.6152814626693726, + "step": 893 + }, + { + "epoch": 0.20613327184689878, + "grad_norm": 1.0024181714804654, + "learning_rate": 1.9847267284794234e-06, + "loss": 0.5584526658058167, + "step": 894 + }, + { + "epoch": 0.20636384597648144, + "grad_norm": 0.9442571191896375, + "learning_rate": 1.9846602766346235e-06, + "loss": 0.5526787042617798, + "step": 895 + }, + { + "epoch": 0.2065944201060641, + "grad_norm": 1.114741515810547, + "learning_rate": 1.984593681659473e-06, + "loss": 0.6851564049720764, + "step": 896 + }, + { + "epoch": 0.20682499423564676, + "grad_norm": 0.9529867069899208, + "learning_rate": 1.9845269435636524e-06, + "loss": 0.6012386083602905, + "step": 897 + }, + { + "epoch": 0.20705556836522943, + "grad_norm": 0.9587418141612076, + "learning_rate": 1.9844600623568626e-06, + "loss": 0.5515716075897217, + "step": 898 + }, + { + "epoch": 0.2072861424948121, + "grad_norm": 1.0489716310270325, + "learning_rate": 1.9843930380488255e-06, + "loss": 0.6534323692321777, + "step": 899 + }, + { + "epoch": 0.20751671662439475, + "grad_norm": 0.9795829214559992, + "learning_rate": 1.9843258706492836e-06, + "loss": 0.726966381072998, + "step": 900 + }, + { + "epoch": 0.2077472907539774, + "grad_norm": 1.0154014646465384, + "learning_rate": 1.984258560168001e-06, + "loss": 0.6692399978637695, + "step": 901 + }, + { + "epoch": 0.20797786488356007, + "grad_norm": 0.8361205321250001, + "learning_rate": 1.9841911066147614e-06, + "loss": 0.5815941095352173, + "step": 902 + }, + { + "epoch": 0.20820843901314273, + "grad_norm": 0.8093430372283338, + "learning_rate": 1.98412350999937e-06, + "loss": 0.4850257933139801, + "step": 903 + }, + { + "epoch": 0.2084390131427254, + "grad_norm": 0.9321751727050823, + "learning_rate": 1.9840557703316524e-06, + "loss": 0.7309345006942749, + "step": 904 + }, + { + "epoch": 0.20866958727230805, + "grad_norm": 0.9487721653557605, + "learning_rate": 1.9839878876214556e-06, + "loss": 0.6246342658996582, + "step": 905 + }, + { + "epoch": 0.20890016140189072, + "grad_norm": 0.923401773715514, + "learning_rate": 1.983919861878647e-06, + "loss": 0.503870964050293, + "step": 906 + }, + { + "epoch": 0.20913073553147338, + "grad_norm": 0.9277576649885639, + "learning_rate": 1.9838516931131147e-06, + "loss": 0.5316766500473022, + "step": 907 + }, + { + "epoch": 0.20936130966105604, + "grad_norm": 0.9488124820166146, + "learning_rate": 1.983783381334768e-06, + "loss": 0.5707069039344788, + "step": 908 + }, + { + "epoch": 0.2095918837906387, + "grad_norm": 1.1481758251998657, + "learning_rate": 1.983714926553536e-06, + "loss": 0.5482156276702881, + "step": 909 + }, + { + "epoch": 0.20982245792022136, + "grad_norm": 0.8868748652499737, + "learning_rate": 1.98364632877937e-06, + "loss": 0.45747748017311096, + "step": 910 + }, + { + "epoch": 0.21005303204980402, + "grad_norm": 1.070435205795932, + "learning_rate": 1.9835775880222414e-06, + "loss": 0.5599262118339539, + "step": 911 + }, + { + "epoch": 0.21028360617938668, + "grad_norm": 0.8833178195747919, + "learning_rate": 1.9835087042921416e-06, + "loss": 0.5115377902984619, + "step": 912 + }, + { + "epoch": 0.21051418030896935, + "grad_norm": 1.0026720443060566, + "learning_rate": 1.9834396775990846e-06, + "loss": 0.6577836275100708, + "step": 913 + }, + { + "epoch": 0.210744754438552, + "grad_norm": 1.0996458728397183, + "learning_rate": 1.9833705079531033e-06, + "loss": 0.4979211091995239, + "step": 914 + }, + { + "epoch": 0.21097532856813467, + "grad_norm": 0.9038590231228891, + "learning_rate": 1.983301195364252e-06, + "loss": 0.5052670240402222, + "step": 915 + }, + { + "epoch": 0.21120590269771733, + "grad_norm": 0.9375736925419242, + "learning_rate": 1.9832317398426076e-06, + "loss": 0.5480808019638062, + "step": 916 + }, + { + "epoch": 0.2114364768273, + "grad_norm": 1.1234174619828885, + "learning_rate": 1.983162141398264e-06, + "loss": 0.5328841209411621, + "step": 917 + }, + { + "epoch": 0.21166705095688262, + "grad_norm": 1.0661654042909894, + "learning_rate": 1.98309240004134e-06, + "loss": 0.5572643280029297, + "step": 918 + }, + { + "epoch": 0.21189762508646529, + "grad_norm": 0.7370595537346776, + "learning_rate": 1.983022515781972e-06, + "loss": 0.5180699825286865, + "step": 919 + }, + { + "epoch": 0.21212819921604795, + "grad_norm": 0.9467461169752135, + "learning_rate": 1.9829524886303182e-06, + "loss": 0.5031566619873047, + "step": 920 + }, + { + "epoch": 0.2123587733456306, + "grad_norm": 1.0924744776428812, + "learning_rate": 1.9828823185965587e-06, + "loss": 0.6579925417900085, + "step": 921 + }, + { + "epoch": 0.21258934747521327, + "grad_norm": 1.0635734753276387, + "learning_rate": 1.982812005690893e-06, + "loss": 0.6107230186462402, + "step": 922 + }, + { + "epoch": 0.21281992160479593, + "grad_norm": 0.8209241554677639, + "learning_rate": 1.982741549923542e-06, + "loss": 0.5244725942611694, + "step": 923 + }, + { + "epoch": 0.2130504957343786, + "grad_norm": 0.8970249012108504, + "learning_rate": 1.9826709513047466e-06, + "loss": 0.5857048630714417, + "step": 924 + }, + { + "epoch": 0.21328106986396125, + "grad_norm": 1.1702999413512643, + "learning_rate": 1.9826002098447694e-06, + "loss": 0.6417914628982544, + "step": 925 + }, + { + "epoch": 0.21351164399354391, + "grad_norm": 1.025740647317304, + "learning_rate": 1.982529325553893e-06, + "loss": 0.6062248945236206, + "step": 926 + }, + { + "epoch": 0.21374221812312658, + "grad_norm": 0.8397411976395659, + "learning_rate": 1.982458298442422e-06, + "loss": 0.4870455265045166, + "step": 927 + }, + { + "epoch": 0.21397279225270924, + "grad_norm": 0.8931294029793581, + "learning_rate": 1.9823871285206802e-06, + "loss": 0.6552037000656128, + "step": 928 + }, + { + "epoch": 0.2142033663822919, + "grad_norm": 0.9703019761386622, + "learning_rate": 1.9823158157990133e-06, + "loss": 0.531679093837738, + "step": 929 + }, + { + "epoch": 0.21443394051187456, + "grad_norm": 1.2664544243150397, + "learning_rate": 1.982244360287787e-06, + "loss": 0.516847550868988, + "step": 930 + }, + { + "epoch": 0.21466451464145722, + "grad_norm": 0.810392988957607, + "learning_rate": 1.982172761997388e-06, + "loss": 0.47147709131240845, + "step": 931 + }, + { + "epoch": 0.21489508877103988, + "grad_norm": 0.8771741979565738, + "learning_rate": 1.982101020938224e-06, + "loss": 0.627938985824585, + "step": 932 + }, + { + "epoch": 0.21512566290062254, + "grad_norm": 1.0257080856710215, + "learning_rate": 1.9820291371207233e-06, + "loss": 0.639348030090332, + "step": 933 + }, + { + "epoch": 0.2153562370302052, + "grad_norm": 0.9702705556217962, + "learning_rate": 1.9819571105553354e-06, + "loss": 0.6480363607406616, + "step": 934 + }, + { + "epoch": 0.21558681115978787, + "grad_norm": 0.9260617050921398, + "learning_rate": 1.9818849412525293e-06, + "loss": 0.5776711702346802, + "step": 935 + }, + { + "epoch": 0.21581738528937053, + "grad_norm": 0.9042487017557694, + "learning_rate": 1.9818126292227957e-06, + "loss": 0.5891472101211548, + "step": 936 + }, + { + "epoch": 0.2160479594189532, + "grad_norm": 0.8905401941241984, + "learning_rate": 1.9817401744766465e-06, + "loss": 0.5977755784988403, + "step": 937 + }, + { + "epoch": 0.21627853354853585, + "grad_norm": 0.8626457448308078, + "learning_rate": 1.981667577024613e-06, + "loss": 0.5263733863830566, + "step": 938 + }, + { + "epoch": 0.2165091076781185, + "grad_norm": 1.0627291912482457, + "learning_rate": 1.9815948368772484e-06, + "loss": 0.5440605878829956, + "step": 939 + }, + { + "epoch": 0.21673968180770117, + "grad_norm": 0.9629159186929203, + "learning_rate": 1.9815219540451263e-06, + "loss": 0.5140440464019775, + "step": 940 + }, + { + "epoch": 0.21697025593728383, + "grad_norm": 1.0494365886675714, + "learning_rate": 1.9814489285388402e-06, + "loss": 0.6741353273391724, + "step": 941 + }, + { + "epoch": 0.2172008300668665, + "grad_norm": 1.1329427006993176, + "learning_rate": 1.981375760369006e-06, + "loss": 0.6243258714675903, + "step": 942 + }, + { + "epoch": 0.21743140419644916, + "grad_norm": 1.1054961559311265, + "learning_rate": 1.981302449546259e-06, + "loss": 0.6363699436187744, + "step": 943 + }, + { + "epoch": 0.21766197832603182, + "grad_norm": 0.9214231813217233, + "learning_rate": 1.981228996081256e-06, + "loss": 0.5849490165710449, + "step": 944 + }, + { + "epoch": 0.21789255245561448, + "grad_norm": 0.8824229032075002, + "learning_rate": 1.9811553999846736e-06, + "loss": 0.43679118156433105, + "step": 945 + }, + { + "epoch": 0.21812312658519714, + "grad_norm": 0.8524209104471582, + "learning_rate": 1.9810816612672104e-06, + "loss": 0.5575870275497437, + "step": 946 + }, + { + "epoch": 0.2183537007147798, + "grad_norm": 1.2313981009960802, + "learning_rate": 1.9810077799395846e-06, + "loss": 0.5288122296333313, + "step": 947 + }, + { + "epoch": 0.21858427484436246, + "grad_norm": 0.9413824588491826, + "learning_rate": 1.9809337560125357e-06, + "loss": 0.5618559718132019, + "step": 948 + }, + { + "epoch": 0.21881484897394513, + "grad_norm": 0.900237395227137, + "learning_rate": 1.980859589496824e-06, + "loss": 0.6346654891967773, + "step": 949 + }, + { + "epoch": 0.2190454231035278, + "grad_norm": 0.7859619018047411, + "learning_rate": 1.98078528040323e-06, + "loss": 0.5456810593605042, + "step": 950 + }, + { + "epoch": 0.21927599723311045, + "grad_norm": 1.096845447650345, + "learning_rate": 1.980710828742556e-06, + "loss": 0.6463650465011597, + "step": 951 + }, + { + "epoch": 0.2195065713626931, + "grad_norm": 0.8708852946707265, + "learning_rate": 1.980636234525624e-06, + "loss": 0.5013638734817505, + "step": 952 + }, + { + "epoch": 0.21973714549227577, + "grad_norm": 1.0813749561311563, + "learning_rate": 1.9805614977632763e-06, + "loss": 0.6522110104560852, + "step": 953 + }, + { + "epoch": 0.21996771962185843, + "grad_norm": 1.1282712003155921, + "learning_rate": 1.9804866184663775e-06, + "loss": 0.5864803791046143, + "step": 954 + }, + { + "epoch": 0.2201982937514411, + "grad_norm": 1.0131587624930238, + "learning_rate": 1.9804115966458116e-06, + "loss": 0.5261500477790833, + "step": 955 + }, + { + "epoch": 0.22042886788102375, + "grad_norm": 0.9727651996633074, + "learning_rate": 1.980336432312484e-06, + "loss": 0.585462212562561, + "step": 956 + }, + { + "epoch": 0.22065944201060642, + "grad_norm": 0.913173290527313, + "learning_rate": 1.9802611254773207e-06, + "loss": 0.5889539122581482, + "step": 957 + }, + { + "epoch": 0.22089001614018908, + "grad_norm": 0.9844451118331555, + "learning_rate": 1.980185676151268e-06, + "loss": 0.665162205696106, + "step": 958 + }, + { + "epoch": 0.22112059026977174, + "grad_norm": 0.9378356304402508, + "learning_rate": 1.9801100843452935e-06, + "loss": 0.5344980359077454, + "step": 959 + }, + { + "epoch": 0.2213511643993544, + "grad_norm": 0.9210142542004092, + "learning_rate": 1.980034350070385e-06, + "loss": 0.6301499009132385, + "step": 960 + }, + { + "epoch": 0.22158173852893706, + "grad_norm": 1.0404902143094334, + "learning_rate": 1.9799584733375512e-06, + "loss": 0.5114584565162659, + "step": 961 + }, + { + "epoch": 0.22181231265851972, + "grad_norm": 1.0168872016124533, + "learning_rate": 1.979882454157822e-06, + "loss": 0.5199861526489258, + "step": 962 + }, + { + "epoch": 0.22204288678810238, + "grad_norm": 1.1826380086118446, + "learning_rate": 1.9798062925422472e-06, + "loss": 0.5336212515830994, + "step": 963 + }, + { + "epoch": 0.22227346091768505, + "grad_norm": 1.0189277044162137, + "learning_rate": 1.9797299885018977e-06, + "loss": 0.535847544670105, + "step": 964 + }, + { + "epoch": 0.2225040350472677, + "grad_norm": 1.1943664941065335, + "learning_rate": 1.979653542047865e-06, + "loss": 0.6234130859375, + "step": 965 + }, + { + "epoch": 0.22273460917685037, + "grad_norm": 0.9414245062598806, + "learning_rate": 1.979576953191262e-06, + "loss": 0.5017205476760864, + "step": 966 + }, + { + "epoch": 0.22296518330643303, + "grad_norm": 0.8271602877368085, + "learning_rate": 1.9795002219432204e-06, + "loss": 0.4982973337173462, + "step": 967 + }, + { + "epoch": 0.2231957574360157, + "grad_norm": 1.0821521338057418, + "learning_rate": 1.979423348314895e-06, + "loss": 0.47946417331695557, + "step": 968 + }, + { + "epoch": 0.22342633156559835, + "grad_norm": 0.9333636639659694, + "learning_rate": 1.97934633231746e-06, + "loss": 0.5431856513023376, + "step": 969 + }, + { + "epoch": 0.223656905695181, + "grad_norm": 1.010615347342822, + "learning_rate": 1.9792691739621097e-06, + "loss": 0.5355685949325562, + "step": 970 + }, + { + "epoch": 0.22388747982476367, + "grad_norm": 0.9115391310212676, + "learning_rate": 1.979191873260061e-06, + "loss": 0.6103906631469727, + "step": 971 + }, + { + "epoch": 0.22411805395434634, + "grad_norm": 0.9295016548118124, + "learning_rate": 1.9791144302225493e-06, + "loss": 0.538421094417572, + "step": 972 + }, + { + "epoch": 0.224348628083929, + "grad_norm": 1.2200934433979187, + "learning_rate": 1.9790368448608322e-06, + "loss": 0.6068445444107056, + "step": 973 + }, + { + "epoch": 0.22457920221351163, + "grad_norm": 0.8606144159525476, + "learning_rate": 1.9789591171861874e-06, + "loss": 0.463737815618515, + "step": 974 + }, + { + "epoch": 0.2248097763430943, + "grad_norm": 1.0217946560153375, + "learning_rate": 1.9788812472099135e-06, + "loss": 0.6588588953018188, + "step": 975 + }, + { + "epoch": 0.22504035047267695, + "grad_norm": 1.0288343828209117, + "learning_rate": 1.9788032349433297e-06, + "loss": 0.678712010383606, + "step": 976 + }, + { + "epoch": 0.22527092460225961, + "grad_norm": 1.1695805252394589, + "learning_rate": 1.9787250803977757e-06, + "loss": 0.6397948265075684, + "step": 977 + }, + { + "epoch": 0.22550149873184228, + "grad_norm": 1.029054993282064, + "learning_rate": 1.978646783584612e-06, + "loss": 0.5422782897949219, + "step": 978 + }, + { + "epoch": 0.22573207286142494, + "grad_norm": 0.9969509169785887, + "learning_rate": 1.9785683445152204e-06, + "loss": 0.5314444303512573, + "step": 979 + }, + { + "epoch": 0.2259626469910076, + "grad_norm": 1.0816366548169771, + "learning_rate": 1.9784897632010026e-06, + "loss": 0.6260710954666138, + "step": 980 + }, + { + "epoch": 0.22619322112059026, + "grad_norm": 1.6140506138107567, + "learning_rate": 1.9784110396533804e-06, + "loss": 0.6765384078025818, + "step": 981 + }, + { + "epoch": 0.22642379525017292, + "grad_norm": 0.9741870993027198, + "learning_rate": 1.9783321738837983e-06, + "loss": 0.6716702580451965, + "step": 982 + }, + { + "epoch": 0.22665436937975558, + "grad_norm": 0.9800524570597025, + "learning_rate": 1.978253165903719e-06, + "loss": 0.5537375211715698, + "step": 983 + }, + { + "epoch": 0.22688494350933824, + "grad_norm": 1.2650751897909203, + "learning_rate": 1.9781740157246285e-06, + "loss": 0.525878369808197, + "step": 984 + }, + { + "epoch": 0.2271155176389209, + "grad_norm": 1.1285639712327624, + "learning_rate": 1.978094723358031e-06, + "loss": 0.6349027156829834, + "step": 985 + }, + { + "epoch": 0.22734609176850357, + "grad_norm": 0.9922350297605812, + "learning_rate": 1.9780152888154525e-06, + "loss": 0.5777440071105957, + "step": 986 + }, + { + "epoch": 0.22757666589808623, + "grad_norm": 0.8792919247604332, + "learning_rate": 1.9779357121084402e-06, + "loss": 0.6181483268737793, + "step": 987 + }, + { + "epoch": 0.2278072400276689, + "grad_norm": 1.113677830579263, + "learning_rate": 1.9778559932485606e-06, + "loss": 0.6364198923110962, + "step": 988 + }, + { + "epoch": 0.22803781415725155, + "grad_norm": 1.0528039871957056, + "learning_rate": 1.9777761322474024e-06, + "loss": 0.623460054397583, + "step": 989 + }, + { + "epoch": 0.2282683882868342, + "grad_norm": 1.0042426162492055, + "learning_rate": 1.977696129116574e-06, + "loss": 0.504749059677124, + "step": 990 + }, + { + "epoch": 0.22849896241641687, + "grad_norm": 0.9462650071116105, + "learning_rate": 1.9776159838677048e-06, + "loss": 0.5228890180587769, + "step": 991 + }, + { + "epoch": 0.22872953654599953, + "grad_norm": 0.983638268661895, + "learning_rate": 1.977535696512444e-06, + "loss": 0.5765929222106934, + "step": 992 + }, + { + "epoch": 0.2289601106755822, + "grad_norm": 1.0000819039461677, + "learning_rate": 1.977455267062463e-06, + "loss": 0.5165348052978516, + "step": 993 + }, + { + "epoch": 0.22919068480516486, + "grad_norm": 1.0528189784184039, + "learning_rate": 1.9773746955294525e-06, + "loss": 0.6056735515594482, + "step": 994 + }, + { + "epoch": 0.22942125893474752, + "grad_norm": 1.0625954437167437, + "learning_rate": 1.9772939819251245e-06, + "loss": 0.5430403351783752, + "step": 995 + }, + { + "epoch": 0.22965183306433018, + "grad_norm": 1.2611536344776966, + "learning_rate": 1.977213126261212e-06, + "loss": 0.5710945129394531, + "step": 996 + }, + { + "epoch": 0.22988240719391284, + "grad_norm": 0.9590894945496666, + "learning_rate": 1.977132128549468e-06, + "loss": 0.5189366936683655, + "step": 997 + }, + { + "epoch": 0.2301129813234955, + "grad_norm": 1.229825794085491, + "learning_rate": 1.977050988801666e-06, + "loss": 0.6578037738800049, + "step": 998 + }, + { + "epoch": 0.23034355545307816, + "grad_norm": 1.0761110723698188, + "learning_rate": 1.9769697070296006e-06, + "loss": 0.5787034034729004, + "step": 999 + }, + { + "epoch": 0.23057412958266083, + "grad_norm": 1.0414208441736372, + "learning_rate": 1.976888283245087e-06, + "loss": 0.5169408321380615, + "step": 1000 + }, + { + "epoch": 0.2308047037122435, + "grad_norm": 1.1228864795023747, + "learning_rate": 1.976806717459961e-06, + "loss": 0.6326704025268555, + "step": 1001 + }, + { + "epoch": 0.23103527784182615, + "grad_norm": 1.2998118201322668, + "learning_rate": 1.9767250096860785e-06, + "loss": 0.5188414454460144, + "step": 1002 + }, + { + "epoch": 0.2312658519714088, + "grad_norm": 0.9684429634366722, + "learning_rate": 1.9766431599353173e-06, + "loss": 0.5788798928260803, + "step": 1003 + }, + { + "epoch": 0.23149642610099147, + "grad_norm": 1.011079377555661, + "learning_rate": 1.976561168219575e-06, + "loss": 0.5513355731964111, + "step": 1004 + }, + { + "epoch": 0.23172700023057413, + "grad_norm": 0.9242770139183195, + "learning_rate": 1.97647903455077e-06, + "loss": 0.5810542106628418, + "step": 1005 + }, + { + "epoch": 0.2319575743601568, + "grad_norm": 0.9036081245550505, + "learning_rate": 1.9763967589408407e-06, + "loss": 0.6541746854782104, + "step": 1006 + }, + { + "epoch": 0.23218814848973945, + "grad_norm": 0.972339176589073, + "learning_rate": 1.976314341401747e-06, + "loss": 0.48837774991989136, + "step": 1007 + }, + { + "epoch": 0.23241872261932212, + "grad_norm": 1.0622732331560878, + "learning_rate": 1.976231781945469e-06, + "loss": 0.514664888381958, + "step": 1008 + }, + { + "epoch": 0.23264929674890478, + "grad_norm": 1.1476741578183667, + "learning_rate": 1.976149080584008e-06, + "loss": 0.48295027017593384, + "step": 1009 + }, + { + "epoch": 0.23287987087848744, + "grad_norm": 0.9532553897028984, + "learning_rate": 1.9760662373293847e-06, + "loss": 0.5975791811943054, + "step": 1010 + }, + { + "epoch": 0.2331104450080701, + "grad_norm": 1.0101722687438028, + "learning_rate": 1.9759832521936424e-06, + "loss": 0.4810718297958374, + "step": 1011 + }, + { + "epoch": 0.23334101913765276, + "grad_norm": 0.8377461102160731, + "learning_rate": 1.9759001251888425e-06, + "loss": 0.5984642505645752, + "step": 1012 + }, + { + "epoch": 0.23357159326723542, + "grad_norm": 1.1428510363276687, + "learning_rate": 1.975816856327069e-06, + "loss": 0.600128710269928, + "step": 1013 + }, + { + "epoch": 0.23380216739681808, + "grad_norm": 0.976646115631477, + "learning_rate": 1.9757334456204263e-06, + "loss": 0.5036175847053528, + "step": 1014 + }, + { + "epoch": 0.23403274152640074, + "grad_norm": 0.781296299293608, + "learning_rate": 1.975649893081038e-06, + "loss": 0.49270063638687134, + "step": 1015 + }, + { + "epoch": 0.2342633156559834, + "grad_norm": 1.0782515218974933, + "learning_rate": 1.97556619872105e-06, + "loss": 0.5337218642234802, + "step": 1016 + }, + { + "epoch": 0.23449388978556607, + "grad_norm": 1.279305397178248, + "learning_rate": 1.9754823625526277e-06, + "loss": 0.5263136625289917, + "step": 1017 + }, + { + "epoch": 0.23472446391514873, + "grad_norm": 1.1321753640293293, + "learning_rate": 1.975398384587958e-06, + "loss": 0.6271284818649292, + "step": 1018 + }, + { + "epoch": 0.2349550380447314, + "grad_norm": 0.9524936816808555, + "learning_rate": 1.975314264839248e-06, + "loss": 0.7009197473526001, + "step": 1019 + }, + { + "epoch": 0.23518561217431405, + "grad_norm": 1.0291281498015452, + "learning_rate": 1.9752300033187248e-06, + "loss": 0.5781605839729309, + "step": 1020 + }, + { + "epoch": 0.2354161863038967, + "grad_norm": 1.0439195983844425, + "learning_rate": 1.9751456000386367e-06, + "loss": 0.549934446811676, + "step": 1021 + }, + { + "epoch": 0.23564676043347937, + "grad_norm": 1.1313488046553661, + "learning_rate": 1.9750610550112535e-06, + "loss": 0.5856816172599792, + "step": 1022 + }, + { + "epoch": 0.23587733456306204, + "grad_norm": 1.1355877980298148, + "learning_rate": 1.9749763682488638e-06, + "loss": 0.6225322484970093, + "step": 1023 + }, + { + "epoch": 0.2361079086926447, + "grad_norm": 0.8829653489765357, + "learning_rate": 1.9748915397637775e-06, + "loss": 0.5533155202865601, + "step": 1024 + }, + { + "epoch": 0.23633848282222736, + "grad_norm": 0.9964032830251005, + "learning_rate": 1.974806569568326e-06, + "loss": 0.4960908889770508, + "step": 1025 + }, + { + "epoch": 0.23656905695181002, + "grad_norm": 1.0642112431572752, + "learning_rate": 1.97472145767486e-06, + "loss": 0.5960450768470764, + "step": 1026 + }, + { + "epoch": 0.23679963108139268, + "grad_norm": 1.0609331852795814, + "learning_rate": 1.9746362040957517e-06, + "loss": 0.5653714537620544, + "step": 1027 + }, + { + "epoch": 0.23703020521097534, + "grad_norm": 0.9636699324332547, + "learning_rate": 1.9745508088433936e-06, + "loss": 0.6400578022003174, + "step": 1028 + }, + { + "epoch": 0.23726077934055798, + "grad_norm": 1.0105210896498236, + "learning_rate": 1.9744652719301987e-06, + "loss": 0.5459057092666626, + "step": 1029 + }, + { + "epoch": 0.23749135347014064, + "grad_norm": 1.0859828591491134, + "learning_rate": 1.9743795933686005e-06, + "loss": 0.46735280752182007, + "step": 1030 + }, + { + "epoch": 0.2377219275997233, + "grad_norm": 0.9440768334185448, + "learning_rate": 1.9742937731710533e-06, + "loss": 0.526339590549469, + "step": 1031 + }, + { + "epoch": 0.23795250172930596, + "grad_norm": 1.013077702945683, + "learning_rate": 1.9742078113500323e-06, + "loss": 0.5976641178131104, + "step": 1032 + }, + { + "epoch": 0.23818307585888862, + "grad_norm": 0.9655038700233691, + "learning_rate": 1.9741217079180325e-06, + "loss": 0.5331728458404541, + "step": 1033 + }, + { + "epoch": 0.23841364998847128, + "grad_norm": 0.9368079955738086, + "learning_rate": 1.9740354628875696e-06, + "loss": 0.5743261575698853, + "step": 1034 + }, + { + "epoch": 0.23864422411805394, + "grad_norm": 0.9982653104570526, + "learning_rate": 1.973949076271181e-06, + "loss": 0.54700767993927, + "step": 1035 + }, + { + "epoch": 0.2388747982476366, + "grad_norm": 0.8919318869448586, + "learning_rate": 1.9738625480814235e-06, + "loss": 0.5483411550521851, + "step": 1036 + }, + { + "epoch": 0.23910537237721927, + "grad_norm": 0.9314153856468148, + "learning_rate": 1.973775878330875e-06, + "loss": 0.5677193403244019, + "step": 1037 + }, + { + "epoch": 0.23933594650680193, + "grad_norm": 0.9867371078797748, + "learning_rate": 1.973689067032133e-06, + "loss": 0.5092767477035522, + "step": 1038 + }, + { + "epoch": 0.2395665206363846, + "grad_norm": 0.9526587430164372, + "learning_rate": 1.973602114197818e-06, + "loss": 0.5618614554405212, + "step": 1039 + }, + { + "epoch": 0.23979709476596725, + "grad_norm": 1.1304270434054837, + "learning_rate": 1.9735150198405677e-06, + "loss": 0.5601966977119446, + "step": 1040 + }, + { + "epoch": 0.2400276688955499, + "grad_norm": 1.2376653334727166, + "learning_rate": 1.973427783973043e-06, + "loss": 0.5945397019386292, + "step": 1041 + }, + { + "epoch": 0.24025824302513257, + "grad_norm": 1.084452486357135, + "learning_rate": 1.9733404066079253e-06, + "loss": 0.42448002099990845, + "step": 1042 + }, + { + "epoch": 0.24048881715471523, + "grad_norm": 1.0671556472806993, + "learning_rate": 1.9732528877579146e-06, + "loss": 0.5237313508987427, + "step": 1043 + }, + { + "epoch": 0.2407193912842979, + "grad_norm": 1.085642930506958, + "learning_rate": 1.973165227435733e-06, + "loss": 0.6006743907928467, + "step": 1044 + }, + { + "epoch": 0.24094996541388056, + "grad_norm": 0.9267133414742948, + "learning_rate": 1.973077425654123e-06, + "loss": 0.547584056854248, + "step": 1045 + }, + { + "epoch": 0.24118053954346322, + "grad_norm": 1.0824218376223906, + "learning_rate": 1.972989482425847e-06, + "loss": 0.5472346544265747, + "step": 1046 + }, + { + "epoch": 0.24141111367304588, + "grad_norm": 1.1106806941355478, + "learning_rate": 1.972901397763689e-06, + "loss": 0.5962260365486145, + "step": 1047 + }, + { + "epoch": 0.24164168780262854, + "grad_norm": 0.9770536598072448, + "learning_rate": 1.9728131716804525e-06, + "loss": 0.561386227607727, + "step": 1048 + }, + { + "epoch": 0.2418722619322112, + "grad_norm": 1.2169602038706573, + "learning_rate": 1.9727248041889624e-06, + "loss": 0.46618524193763733, + "step": 1049 + }, + { + "epoch": 0.24210283606179386, + "grad_norm": 0.9641011081185654, + "learning_rate": 1.9726362953020643e-06, + "loss": 0.4684019088745117, + "step": 1050 + }, + { + "epoch": 0.24233341019137652, + "grad_norm": 1.1116892767931694, + "learning_rate": 1.9725476450326227e-06, + "loss": 0.5670303106307983, + "step": 1051 + }, + { + "epoch": 0.2425639843209592, + "grad_norm": 1.0413794589983083, + "learning_rate": 1.9724588533935246e-06, + "loss": 0.5451534986495972, + "step": 1052 + }, + { + "epoch": 0.24279455845054185, + "grad_norm": 1.3028651104025368, + "learning_rate": 1.9723699203976766e-06, + "loss": 0.578605592250824, + "step": 1053 + }, + { + "epoch": 0.2430251325801245, + "grad_norm": 1.072521418141734, + "learning_rate": 1.972280846058006e-06, + "loss": 0.5844857692718506, + "step": 1054 + }, + { + "epoch": 0.24325570670970717, + "grad_norm": 0.8882845471690917, + "learning_rate": 1.9721916303874603e-06, + "loss": 0.5152320861816406, + "step": 1055 + }, + { + "epoch": 0.24348628083928983, + "grad_norm": 0.994596822062513, + "learning_rate": 1.9721022733990087e-06, + "loss": 0.5108952522277832, + "step": 1056 + }, + { + "epoch": 0.2437168549688725, + "grad_norm": 1.2179028657479944, + "learning_rate": 1.97201277510564e-06, + "loss": 0.6345964670181274, + "step": 1057 + }, + { + "epoch": 0.24394742909845515, + "grad_norm": 1.0322609868377797, + "learning_rate": 1.9719231355203627e-06, + "loss": 0.6699639558792114, + "step": 1058 + }, + { + "epoch": 0.24417800322803782, + "grad_norm": 1.0786593444912098, + "learning_rate": 1.971833354656208e-06, + "loss": 0.5426750779151917, + "step": 1059 + }, + { + "epoch": 0.24440857735762048, + "grad_norm": 0.9469348439661489, + "learning_rate": 1.9717434325262253e-06, + "loss": 0.45797908306121826, + "step": 1060 + }, + { + "epoch": 0.24463915148720314, + "grad_norm": 0.9212142090514559, + "learning_rate": 1.9716533691434872e-06, + "loss": 0.46754708886146545, + "step": 1061 + }, + { + "epoch": 0.2448697256167858, + "grad_norm": 1.0419375830533737, + "learning_rate": 1.9715631645210838e-06, + "loss": 0.6593209505081177, + "step": 1062 + }, + { + "epoch": 0.24510029974636846, + "grad_norm": 0.8714440933836988, + "learning_rate": 1.9714728186721287e-06, + "loss": 0.5634866952896118, + "step": 1063 + }, + { + "epoch": 0.24533087387595112, + "grad_norm": 1.3414429697713321, + "learning_rate": 1.971382331609753e-06, + "loss": 0.5066277980804443, + "step": 1064 + }, + { + "epoch": 0.24556144800553378, + "grad_norm": 0.9735373407478976, + "learning_rate": 1.9712917033471113e-06, + "loss": 0.5721756219863892, + "step": 1065 + }, + { + "epoch": 0.24579202213511644, + "grad_norm": 0.9116883309182201, + "learning_rate": 1.9712009338973765e-06, + "loss": 0.5188664197921753, + "step": 1066 + }, + { + "epoch": 0.2460225962646991, + "grad_norm": 1.1314636983505006, + "learning_rate": 1.9711100232737434e-06, + "loss": 0.4879762828350067, + "step": 1067 + }, + { + "epoch": 0.24625317039428177, + "grad_norm": 1.2412816829375237, + "learning_rate": 1.971018971489426e-06, + "loss": 0.5169111490249634, + "step": 1068 + }, + { + "epoch": 0.24648374452386443, + "grad_norm": 1.2239551353327036, + "learning_rate": 1.9709277785576605e-06, + "loss": 0.7341418862342834, + "step": 1069 + }, + { + "epoch": 0.2467143186534471, + "grad_norm": 0.9353793197150668, + "learning_rate": 1.970836444491702e-06, + "loss": 0.48676228523254395, + "step": 1070 + }, + { + "epoch": 0.24694489278302975, + "grad_norm": 1.1049152340951753, + "learning_rate": 1.9707449693048277e-06, + "loss": 0.5594040751457214, + "step": 1071 + }, + { + "epoch": 0.2471754669126124, + "grad_norm": 1.1275772388460679, + "learning_rate": 1.970653353010334e-06, + "loss": 0.575579047203064, + "step": 1072 + }, + { + "epoch": 0.24740604104219507, + "grad_norm": 0.9990792550863451, + "learning_rate": 1.9705615956215375e-06, + "loss": 0.5212938189506531, + "step": 1073 + }, + { + "epoch": 0.24763661517177774, + "grad_norm": 1.2242480620016798, + "learning_rate": 1.970469697151777e-06, + "loss": 0.49838072061538696, + "step": 1074 + }, + { + "epoch": 0.2478671893013604, + "grad_norm": 1.0069439526224342, + "learning_rate": 1.9703776576144106e-06, + "loss": 0.505547285079956, + "step": 1075 + }, + { + "epoch": 0.24809776343094306, + "grad_norm": 0.9320138812686547, + "learning_rate": 1.970285477022817e-06, + "loss": 0.5236082077026367, + "step": 1076 + }, + { + "epoch": 0.24832833756052572, + "grad_norm": 1.1096851604663263, + "learning_rate": 1.9701931553903963e-06, + "loss": 0.5417677760124207, + "step": 1077 + }, + { + "epoch": 0.24855891169010838, + "grad_norm": 1.4437484296393372, + "learning_rate": 1.9701006927305676e-06, + "loss": 0.624547004699707, + "step": 1078 + }, + { + "epoch": 0.24878948581969104, + "grad_norm": 1.1814609406249081, + "learning_rate": 1.9700080890567713e-06, + "loss": 0.7127759456634521, + "step": 1079 + }, + { + "epoch": 0.2490200599492737, + "grad_norm": 1.1432146079503174, + "learning_rate": 1.9699153443824686e-06, + "loss": 0.44590264558792114, + "step": 1080 + }, + { + "epoch": 0.24925063407885636, + "grad_norm": 0.9565451374538135, + "learning_rate": 1.9698224587211407e-06, + "loss": 0.6311746835708618, + "step": 1081 + }, + { + "epoch": 0.24948120820843903, + "grad_norm": 0.870591902169041, + "learning_rate": 1.9697294320862898e-06, + "loss": 0.4837970733642578, + "step": 1082 + }, + { + "epoch": 0.2497117823380217, + "grad_norm": 0.8760016768814028, + "learning_rate": 1.969636264491438e-06, + "loss": 0.5749634504318237, + "step": 1083 + }, + { + "epoch": 0.24994235646760435, + "grad_norm": 0.9733867387062589, + "learning_rate": 1.9695429559501283e-06, + "loss": 0.5002774000167847, + "step": 1084 + }, + { + "epoch": 0.250172930597187, + "grad_norm": 0.9904270135981337, + "learning_rate": 1.9694495064759236e-06, + "loss": 0.5407592058181763, + "step": 1085 + }, + { + "epoch": 0.25040350472676964, + "grad_norm": 0.9112103184885231, + "learning_rate": 1.969355916082408e-06, + "loss": 0.5557315349578857, + "step": 1086 + }, + { + "epoch": 0.2506340788563523, + "grad_norm": 1.073902907739282, + "learning_rate": 1.9692621847831865e-06, + "loss": 0.4710160493850708, + "step": 1087 + }, + { + "epoch": 0.25086465298593497, + "grad_norm": 0.946965380647112, + "learning_rate": 1.969168312591883e-06, + "loss": 0.5935187339782715, + "step": 1088 + }, + { + "epoch": 0.2510952271155176, + "grad_norm": 0.9849357353961209, + "learning_rate": 1.969074299522143e-06, + "loss": 0.5358916521072388, + "step": 1089 + }, + { + "epoch": 0.2513258012451003, + "grad_norm": 0.9196749680008564, + "learning_rate": 1.968980145587632e-06, + "loss": 0.40736621618270874, + "step": 1090 + }, + { + "epoch": 0.25155637537468295, + "grad_norm": 0.8048789415521217, + "learning_rate": 1.968885850802037e-06, + "loss": 0.4986698627471924, + "step": 1091 + }, + { + "epoch": 0.2517869495042656, + "grad_norm": 0.9340127152994311, + "learning_rate": 1.968791415179064e-06, + "loss": 0.5547258853912354, + "step": 1092 + }, + { + "epoch": 0.2520175236338483, + "grad_norm": 1.0477998347740531, + "learning_rate": 1.96869683873244e-06, + "loss": 0.5187167525291443, + "step": 1093 + }, + { + "epoch": 0.25224809776343093, + "grad_norm": 0.9456931065936238, + "learning_rate": 1.9686021214759136e-06, + "loss": 0.560575008392334, + "step": 1094 + }, + { + "epoch": 0.2524786718930136, + "grad_norm": 1.0595767044992972, + "learning_rate": 1.968507263423252e-06, + "loss": 0.6441233158111572, + "step": 1095 + }, + { + "epoch": 0.25270924602259626, + "grad_norm": 1.1650850474563572, + "learning_rate": 1.9684122645882446e-06, + "loss": 0.6693669557571411, + "step": 1096 + }, + { + "epoch": 0.2529398201521789, + "grad_norm": 0.9107773905688578, + "learning_rate": 1.9683171249846992e-06, + "loss": 0.4713742434978485, + "step": 1097 + }, + { + "epoch": 0.2531703942817616, + "grad_norm": 1.0855755163203802, + "learning_rate": 1.9682218446264466e-06, + "loss": 0.5393046140670776, + "step": 1098 + }, + { + "epoch": 0.25340096841134424, + "grad_norm": 0.8304628447343301, + "learning_rate": 1.968126423527336e-06, + "loss": 0.44416874647140503, + "step": 1099 + }, + { + "epoch": 0.2536315425409269, + "grad_norm": 0.8560775526129268, + "learning_rate": 1.9680308617012383e-06, + "loss": 0.486186683177948, + "step": 1100 + }, + { + "epoch": 0.25386211667050956, + "grad_norm": 0.8812542184427957, + "learning_rate": 1.9679351591620446e-06, + "loss": 0.5523893237113953, + "step": 1101 + }, + { + "epoch": 0.2540926908000922, + "grad_norm": 0.9964866126205207, + "learning_rate": 1.967839315923665e-06, + "loss": 0.49889492988586426, + "step": 1102 + }, + { + "epoch": 0.2543232649296749, + "grad_norm": 1.1438608764608638, + "learning_rate": 1.9677433320000325e-06, + "loss": 0.6084630489349365, + "step": 1103 + }, + { + "epoch": 0.25455383905925755, + "grad_norm": 0.9684259335546852, + "learning_rate": 1.967647207405099e-06, + "loss": 0.5458555221557617, + "step": 1104 + }, + { + "epoch": 0.2547844131888402, + "grad_norm": 1.3299718075912128, + "learning_rate": 1.9675509421528367e-06, + "loss": 0.5453877449035645, + "step": 1105 + }, + { + "epoch": 0.25501498731842287, + "grad_norm": 1.0404901274691463, + "learning_rate": 1.9674545362572393e-06, + "loss": 0.5226954221725464, + "step": 1106 + }, + { + "epoch": 0.25524556144800553, + "grad_norm": 1.0740163604419912, + "learning_rate": 1.96735798973232e-06, + "loss": 0.5736720561981201, + "step": 1107 + }, + { + "epoch": 0.2554761355775882, + "grad_norm": 0.9184855028566775, + "learning_rate": 1.9672613025921135e-06, + "loss": 0.5474177598953247, + "step": 1108 + }, + { + "epoch": 0.25570670970717085, + "grad_norm": 1.2485055919980548, + "learning_rate": 1.967164474850673e-06, + "loss": 0.5146498084068298, + "step": 1109 + }, + { + "epoch": 0.2559372838367535, + "grad_norm": 1.1137167951471605, + "learning_rate": 1.967067506522075e-06, + "loss": 0.6319057941436768, + "step": 1110 + }, + { + "epoch": 0.2561678579663362, + "grad_norm": 0.9087550652455604, + "learning_rate": 1.9669703976204136e-06, + "loss": 0.44495588541030884, + "step": 1111 + }, + { + "epoch": 0.25639843209591884, + "grad_norm": 0.9108509097161608, + "learning_rate": 1.9668731481598052e-06, + "loss": 0.5331558585166931, + "step": 1112 + }, + { + "epoch": 0.2566290062255015, + "grad_norm": 0.9795245602848469, + "learning_rate": 1.9667757581543856e-06, + "loss": 0.5409468412399292, + "step": 1113 + }, + { + "epoch": 0.25685958035508416, + "grad_norm": 1.054007279778104, + "learning_rate": 1.9666782276183112e-06, + "loss": 0.5743308663368225, + "step": 1114 + }, + { + "epoch": 0.2570901544846668, + "grad_norm": 1.004577427685411, + "learning_rate": 1.96658055656576e-06, + "loss": 0.5612793564796448, + "step": 1115 + }, + { + "epoch": 0.2573207286142495, + "grad_norm": 0.9750416454144903, + "learning_rate": 1.9664827450109285e-06, + "loss": 0.554356575012207, + "step": 1116 + }, + { + "epoch": 0.25755130274383214, + "grad_norm": 0.9682247695156199, + "learning_rate": 1.9663847929680352e-06, + "loss": 0.5999840497970581, + "step": 1117 + }, + { + "epoch": 0.2577818768734148, + "grad_norm": 1.0370889815397122, + "learning_rate": 1.9662867004513184e-06, + "loss": 0.5152497291564941, + "step": 1118 + }, + { + "epoch": 0.25801245100299747, + "grad_norm": 1.098663296506931, + "learning_rate": 1.966188467475036e-06, + "loss": 0.6333990097045898, + "step": 1119 + }, + { + "epoch": 0.25824302513258013, + "grad_norm": 0.9734180757824468, + "learning_rate": 1.9660900940534685e-06, + "loss": 0.5826340913772583, + "step": 1120 + }, + { + "epoch": 0.2584735992621628, + "grad_norm": 1.0258650855361047, + "learning_rate": 1.965991580200915e-06, + "loss": 0.5968586206436157, + "step": 1121 + }, + { + "epoch": 0.25870417339174545, + "grad_norm": 1.1400845768454182, + "learning_rate": 1.9658929259316945e-06, + "loss": 0.6164212226867676, + "step": 1122 + }, + { + "epoch": 0.2589347475213281, + "grad_norm": 0.9979393096335119, + "learning_rate": 1.9657941312601487e-06, + "loss": 0.6115970611572266, + "step": 1123 + }, + { + "epoch": 0.2591653216509108, + "grad_norm": 1.0595728674513747, + "learning_rate": 1.9656951962006376e-06, + "loss": 0.5490012168884277, + "step": 1124 + }, + { + "epoch": 0.25939589578049344, + "grad_norm": 0.9502072685023252, + "learning_rate": 1.9655961207675425e-06, + "loss": 0.6350439786911011, + "step": 1125 + }, + { + "epoch": 0.2596264699100761, + "grad_norm": 1.0657411847577343, + "learning_rate": 1.965496904975266e-06, + "loss": 0.5667803287506104, + "step": 1126 + }, + { + "epoch": 0.25985704403965876, + "grad_norm": 1.1821679518558437, + "learning_rate": 1.9653975488382287e-06, + "loss": 0.6443949937820435, + "step": 1127 + }, + { + "epoch": 0.2600876181692414, + "grad_norm": 0.9716559479774245, + "learning_rate": 1.965298052370874e-06, + "loss": 0.6085849404335022, + "step": 1128 + }, + { + "epoch": 0.2603181922988241, + "grad_norm": 1.0823001356947075, + "learning_rate": 1.9651984155876644e-06, + "loss": 0.6633332967758179, + "step": 1129 + }, + { + "epoch": 0.26054876642840674, + "grad_norm": 1.2848504053653516, + "learning_rate": 1.965098638503083e-06, + "loss": 0.5997219085693359, + "step": 1130 + }, + { + "epoch": 0.2607793405579894, + "grad_norm": 1.0454096533900064, + "learning_rate": 1.9649987211316333e-06, + "loss": 0.5425878167152405, + "step": 1131 + }, + { + "epoch": 0.26100991468757206, + "grad_norm": 1.1511928917305188, + "learning_rate": 1.9648986634878397e-06, + "loss": 0.5894105434417725, + "step": 1132 + }, + { + "epoch": 0.2612404888171547, + "grad_norm": 1.0098199878370706, + "learning_rate": 1.9647984655862464e-06, + "loss": 0.5967395901679993, + "step": 1133 + }, + { + "epoch": 0.2614710629467374, + "grad_norm": 1.026032503619318, + "learning_rate": 1.964698127441418e-06, + "loss": 0.5129253268241882, + "step": 1134 + }, + { + "epoch": 0.26170163707632005, + "grad_norm": 0.8680242413092717, + "learning_rate": 1.96459764906794e-06, + "loss": 0.4503140449523926, + "step": 1135 + }, + { + "epoch": 0.2619322112059027, + "grad_norm": 1.3487730716398616, + "learning_rate": 1.964497030480418e-06, + "loss": 0.5533326864242554, + "step": 1136 + }, + { + "epoch": 0.26216278533548537, + "grad_norm": 1.020191268815397, + "learning_rate": 1.9643962716934776e-06, + "loss": 0.695278525352478, + "step": 1137 + }, + { + "epoch": 0.26239335946506803, + "grad_norm": 1.0637915159693183, + "learning_rate": 1.9642953727217654e-06, + "loss": 0.5198212265968323, + "step": 1138 + }, + { + "epoch": 0.2626239335946507, + "grad_norm": 0.8691408428805534, + "learning_rate": 1.9641943335799476e-06, + "loss": 0.4348503351211548, + "step": 1139 + }, + { + "epoch": 0.26285450772423335, + "grad_norm": 1.075781292907759, + "learning_rate": 1.9640931542827116e-06, + "loss": 0.5241343975067139, + "step": 1140 + }, + { + "epoch": 0.263085081853816, + "grad_norm": 1.1170175690927264, + "learning_rate": 1.9639918348447654e-06, + "loss": 0.6621984839439392, + "step": 1141 + }, + { + "epoch": 0.2633156559833987, + "grad_norm": 0.9797970310895017, + "learning_rate": 1.9638903752808358e-06, + "loss": 0.6091395020484924, + "step": 1142 + }, + { + "epoch": 0.26354623011298134, + "grad_norm": 1.358580155566318, + "learning_rate": 1.963788775605671e-06, + "loss": 0.4857162833213806, + "step": 1143 + }, + { + "epoch": 0.263776804242564, + "grad_norm": 1.155872598215321, + "learning_rate": 1.9636870358340408e-06, + "loss": 0.5912413597106934, + "step": 1144 + }, + { + "epoch": 0.26400737837214666, + "grad_norm": 0.9493926626803307, + "learning_rate": 1.9635851559807326e-06, + "loss": 0.6006268858909607, + "step": 1145 + }, + { + "epoch": 0.2642379525017293, + "grad_norm": 1.0095494395510323, + "learning_rate": 1.9634831360605567e-06, + "loss": 0.5580735802650452, + "step": 1146 + }, + { + "epoch": 0.264468526631312, + "grad_norm": 1.09443652681985, + "learning_rate": 1.9633809760883423e-06, + "loss": 0.5554602146148682, + "step": 1147 + }, + { + "epoch": 0.26469910076089465, + "grad_norm": 1.0073361110439816, + "learning_rate": 1.9632786760789393e-06, + "loss": 0.5648301839828491, + "step": 1148 + }, + { + "epoch": 0.2649296748904773, + "grad_norm": 0.9958775096480507, + "learning_rate": 1.9631762360472186e-06, + "loss": 0.5317412614822388, + "step": 1149 + }, + { + "epoch": 0.26516024902005997, + "grad_norm": 0.8377541227122274, + "learning_rate": 1.96307365600807e-06, + "loss": 0.5608310699462891, + "step": 1150 + }, + { + "epoch": 0.26539082314964263, + "grad_norm": 0.9709108194630034, + "learning_rate": 1.962970935976405e-06, + "loss": 0.49922698736190796, + "step": 1151 + }, + { + "epoch": 0.2656213972792253, + "grad_norm": 1.0372577064435262, + "learning_rate": 1.9628680759671556e-06, + "loss": 0.5840054750442505, + "step": 1152 + }, + { + "epoch": 0.26585197140880795, + "grad_norm": 1.1264168952681184, + "learning_rate": 1.9627650759952727e-06, + "loss": 0.6038475632667542, + "step": 1153 + }, + { + "epoch": 0.2660825455383906, + "grad_norm": 0.969212515968761, + "learning_rate": 1.9626619360757284e-06, + "loss": 0.5923193097114563, + "step": 1154 + }, + { + "epoch": 0.2663131196679733, + "grad_norm": 1.1606889211687668, + "learning_rate": 1.962558656223516e-06, + "loss": 0.5278598666191101, + "step": 1155 + }, + { + "epoch": 0.26654369379755594, + "grad_norm": 0.9873103600473375, + "learning_rate": 1.9624552364536472e-06, + "loss": 0.47691023349761963, + "step": 1156 + }, + { + "epoch": 0.2667742679271386, + "grad_norm": 0.9087676067471127, + "learning_rate": 1.962351676781156e-06, + "loss": 0.5801899433135986, + "step": 1157 + }, + { + "epoch": 0.26700484205672126, + "grad_norm": 1.253961482177072, + "learning_rate": 1.962247977221095e-06, + "loss": 0.5170506238937378, + "step": 1158 + }, + { + "epoch": 0.2672354161863039, + "grad_norm": 1.0951542684812736, + "learning_rate": 1.9621441377885387e-06, + "loss": 0.6114981174468994, + "step": 1159 + }, + { + "epoch": 0.2674659903158866, + "grad_norm": 1.0027892727643062, + "learning_rate": 1.9620401584985807e-06, + "loss": 0.6377004384994507, + "step": 1160 + }, + { + "epoch": 0.26769656444546924, + "grad_norm": 0.9961094597216124, + "learning_rate": 1.9619360393663356e-06, + "loss": 0.6177431344985962, + "step": 1161 + }, + { + "epoch": 0.2679271385750519, + "grad_norm": 1.1384478708718946, + "learning_rate": 1.9618317804069384e-06, + "loss": 0.579784095287323, + "step": 1162 + }, + { + "epoch": 0.26815771270463457, + "grad_norm": 0.8744752952973797, + "learning_rate": 1.9617273816355444e-06, + "loss": 0.6078776121139526, + "step": 1163 + }, + { + "epoch": 0.2683882868342172, + "grad_norm": 0.9801356210694869, + "learning_rate": 1.961622843067328e-06, + "loss": 0.5583093166351318, + "step": 1164 + }, + { + "epoch": 0.2686188609637999, + "grad_norm": 0.8741287294678143, + "learning_rate": 1.961518164717486e-06, + "loss": 0.46033143997192383, + "step": 1165 + }, + { + "epoch": 0.26884943509338255, + "grad_norm": 1.250568820610365, + "learning_rate": 1.961413346601234e-06, + "loss": 0.5637123584747314, + "step": 1166 + }, + { + "epoch": 0.2690800092229652, + "grad_norm": 1.0360456860810905, + "learning_rate": 1.9613083887338085e-06, + "loss": 0.5943595170974731, + "step": 1167 + }, + { + "epoch": 0.2693105833525478, + "grad_norm": 1.0495419121458136, + "learning_rate": 1.961203291130466e-06, + "loss": 0.5440319776535034, + "step": 1168 + }, + { + "epoch": 0.2695411574821305, + "grad_norm": 0.9704830315061433, + "learning_rate": 1.961098053806484e-06, + "loss": 0.5665608048439026, + "step": 1169 + }, + { + "epoch": 0.26977173161171314, + "grad_norm": 1.0522625707521382, + "learning_rate": 1.960992676777159e-06, + "loss": 0.5707683563232422, + "step": 1170 + }, + { + "epoch": 0.2700023057412958, + "grad_norm": 1.034604689259721, + "learning_rate": 1.9608871600578093e-06, + "loss": 0.5447777509689331, + "step": 1171 + }, + { + "epoch": 0.27023287987087846, + "grad_norm": 1.1920689559592121, + "learning_rate": 1.9607815036637726e-06, + "loss": 0.5598857402801514, + "step": 1172 + }, + { + "epoch": 0.2704634540004611, + "grad_norm": 1.208701571232948, + "learning_rate": 1.960675707610407e-06, + "loss": 0.558403491973877, + "step": 1173 + }, + { + "epoch": 0.2706940281300438, + "grad_norm": 1.3006493228897391, + "learning_rate": 1.960569771913091e-06, + "loss": 0.6696962118148804, + "step": 1174 + }, + { + "epoch": 0.27092460225962645, + "grad_norm": 1.0597715788538418, + "learning_rate": 1.960463696587224e-06, + "loss": 0.519884467124939, + "step": 1175 + }, + { + "epoch": 0.2711551763892091, + "grad_norm": 1.0090714718428708, + "learning_rate": 1.9603574816482243e-06, + "loss": 0.6440261602401733, + "step": 1176 + }, + { + "epoch": 0.27138575051879177, + "grad_norm": 1.1163188497552168, + "learning_rate": 1.9602511271115317e-06, + "loss": 0.48713982105255127, + "step": 1177 + }, + { + "epoch": 0.27161632464837443, + "grad_norm": 0.9570997011710476, + "learning_rate": 1.960144632992606e-06, + "loss": 0.5257129073143005, + "step": 1178 + }, + { + "epoch": 0.2718468987779571, + "grad_norm": 1.3308862733434774, + "learning_rate": 1.9600379993069272e-06, + "loss": 0.5220426917076111, + "step": 1179 + }, + { + "epoch": 0.27207747290753975, + "grad_norm": 1.0690404222828096, + "learning_rate": 1.9599312260699955e-06, + "loss": 0.569817304611206, + "step": 1180 + }, + { + "epoch": 0.2723080470371224, + "grad_norm": 1.0650857331550394, + "learning_rate": 1.9598243132973317e-06, + "loss": 0.4370031952857971, + "step": 1181 + }, + { + "epoch": 0.2725386211667051, + "grad_norm": 1.125403283606087, + "learning_rate": 1.959717261004476e-06, + "loss": 0.6060882210731506, + "step": 1182 + }, + { + "epoch": 0.27276919529628774, + "grad_norm": 0.9065361051198069, + "learning_rate": 1.9596100692069905e-06, + "loss": 0.5830891132354736, + "step": 1183 + }, + { + "epoch": 0.2729997694258704, + "grad_norm": 1.4570032441462188, + "learning_rate": 1.9595027379204556e-06, + "loss": 0.5689493417739868, + "step": 1184 + }, + { + "epoch": 0.27323034355545306, + "grad_norm": 1.3244280690129522, + "learning_rate": 1.9593952671604735e-06, + "loss": 0.5550887584686279, + "step": 1185 + }, + { + "epoch": 0.2734609176850357, + "grad_norm": 1.0207521269848765, + "learning_rate": 1.9592876569426665e-06, + "loss": 0.48127567768096924, + "step": 1186 + }, + { + "epoch": 0.2736914918146184, + "grad_norm": 1.071211669612227, + "learning_rate": 1.9591799072826764e-06, + "loss": 0.640753984451294, + "step": 1187 + }, + { + "epoch": 0.27392206594420104, + "grad_norm": 1.1730143666350425, + "learning_rate": 1.959072018196165e-06, + "loss": 0.5266000032424927, + "step": 1188 + }, + { + "epoch": 0.2741526400737837, + "grad_norm": 0.927867514508325, + "learning_rate": 1.958963989698817e-06, + "loss": 0.5586614608764648, + "step": 1189 + }, + { + "epoch": 0.27438321420336637, + "grad_norm": 1.1860842675481242, + "learning_rate": 1.9588558218063336e-06, + "loss": 0.5937967896461487, + "step": 1190 + }, + { + "epoch": 0.274613788332949, + "grad_norm": 1.3761930600193095, + "learning_rate": 1.958747514534439e-06, + "loss": 0.5887218713760376, + "step": 1191 + }, + { + "epoch": 0.2748443624625317, + "grad_norm": 1.0541442430853707, + "learning_rate": 1.9586390678988766e-06, + "loss": 0.5151614546775818, + "step": 1192 + }, + { + "epoch": 0.27507493659211435, + "grad_norm": 0.9782419657689414, + "learning_rate": 1.95853048191541e-06, + "loss": 0.5392748713493347, + "step": 1193 + }, + { + "epoch": 0.275305510721697, + "grad_norm": 1.330179141409128, + "learning_rate": 1.9584217565998237e-06, + "loss": 0.5649560689926147, + "step": 1194 + }, + { + "epoch": 0.2755360848512797, + "grad_norm": 1.0628047614804303, + "learning_rate": 1.9583128919679213e-06, + "loss": 0.4888305962085724, + "step": 1195 + }, + { + "epoch": 0.27576665898086233, + "grad_norm": 0.8838567368205815, + "learning_rate": 1.9582038880355282e-06, + "loss": 0.5026978850364685, + "step": 1196 + }, + { + "epoch": 0.275997233110445, + "grad_norm": 1.094585503881071, + "learning_rate": 1.9580947448184887e-06, + "loss": 0.5358047485351562, + "step": 1197 + }, + { + "epoch": 0.27622780724002766, + "grad_norm": 1.0838231861798517, + "learning_rate": 1.957985462332668e-06, + "loss": 0.6145739555358887, + "step": 1198 + }, + { + "epoch": 0.2764583813696103, + "grad_norm": 1.1469394336927528, + "learning_rate": 1.957876040593952e-06, + "loss": 0.5155332684516907, + "step": 1199 + }, + { + "epoch": 0.276688955499193, + "grad_norm": 0.9936014396625975, + "learning_rate": 1.957766479618245e-06, + "loss": 0.48794522881507874, + "step": 1200 + }, + { + "epoch": 0.27691952962877564, + "grad_norm": 1.135029138979863, + "learning_rate": 1.957656779421474e-06, + "loss": 0.5851761102676392, + "step": 1201 + }, + { + "epoch": 0.2771501037583583, + "grad_norm": 1.0236207003793518, + "learning_rate": 1.957546940019584e-06, + "loss": 0.603874683380127, + "step": 1202 + }, + { + "epoch": 0.27738067788794096, + "grad_norm": 1.0658787224753152, + "learning_rate": 1.9574369614285426e-06, + "loss": 0.5022559762001038, + "step": 1203 + }, + { + "epoch": 0.2776112520175236, + "grad_norm": 1.4179237341040045, + "learning_rate": 1.9573268436643347e-06, + "loss": 0.6469730138778687, + "step": 1204 + }, + { + "epoch": 0.2778418261471063, + "grad_norm": 0.9207501665109726, + "learning_rate": 1.9572165867429685e-06, + "loss": 0.49918532371520996, + "step": 1205 + }, + { + "epoch": 0.27807240027668895, + "grad_norm": 0.9656836684424259, + "learning_rate": 1.95710619068047e-06, + "loss": 0.48623788356781006, + "step": 1206 + }, + { + "epoch": 0.2783029744062716, + "grad_norm": 0.9837814076450196, + "learning_rate": 1.956995655492887e-06, + "loss": 0.4868438243865967, + "step": 1207 + }, + { + "epoch": 0.27853354853585427, + "grad_norm": 1.3533879485069031, + "learning_rate": 1.9568849811962862e-06, + "loss": 0.5989904403686523, + "step": 1208 + }, + { + "epoch": 0.27876412266543693, + "grad_norm": 1.3345070230968985, + "learning_rate": 1.956774167806756e-06, + "loss": 0.5125104188919067, + "step": 1209 + }, + { + "epoch": 0.2789946967950196, + "grad_norm": 1.0305365483781255, + "learning_rate": 1.956663215340404e-06, + "loss": 0.5126978158950806, + "step": 1210 + }, + { + "epoch": 0.27922527092460225, + "grad_norm": 0.9524616726362105, + "learning_rate": 1.9565521238133576e-06, + "loss": 0.5009375810623169, + "step": 1211 + }, + { + "epoch": 0.2794558450541849, + "grad_norm": 1.0762476710184214, + "learning_rate": 1.956440893241766e-06, + "loss": 0.5601603984832764, + "step": 1212 + }, + { + "epoch": 0.2796864191837676, + "grad_norm": 1.2962045971613827, + "learning_rate": 1.956329523641797e-06, + "loss": 0.6310690641403198, + "step": 1213 + }, + { + "epoch": 0.27991699331335024, + "grad_norm": 1.0395130987242733, + "learning_rate": 1.95621801502964e-06, + "loss": 0.498830646276474, + "step": 1214 + }, + { + "epoch": 0.2801475674429329, + "grad_norm": 1.0547121574701517, + "learning_rate": 1.9561063674215036e-06, + "loss": 0.6612650156021118, + "step": 1215 + }, + { + "epoch": 0.28037814157251556, + "grad_norm": 1.0369778810130763, + "learning_rate": 1.9559945808336166e-06, + "loss": 0.5651615858078003, + "step": 1216 + }, + { + "epoch": 0.2806087157020982, + "grad_norm": 1.0028009497915646, + "learning_rate": 1.955882655282229e-06, + "loss": 0.5675203800201416, + "step": 1217 + }, + { + "epoch": 0.2808392898316809, + "grad_norm": 1.0910384567165883, + "learning_rate": 1.9557705907836095e-06, + "loss": 0.5691455006599426, + "step": 1218 + }, + { + "epoch": 0.28106986396126354, + "grad_norm": 1.2440322291047097, + "learning_rate": 1.955658387354048e-06, + "loss": 0.6018673181533813, + "step": 1219 + }, + { + "epoch": 0.2813004380908462, + "grad_norm": 0.8594681913500082, + "learning_rate": 1.955546045009855e-06, + "loss": 0.5188831090927124, + "step": 1220 + }, + { + "epoch": 0.28153101222042887, + "grad_norm": 0.9611802055135819, + "learning_rate": 1.9554335637673596e-06, + "loss": 0.5161044597625732, + "step": 1221 + }, + { + "epoch": 0.28176158635001153, + "grad_norm": 1.0764912433641416, + "learning_rate": 1.9553209436429132e-06, + "loss": 0.5651452541351318, + "step": 1222 + }, + { + "epoch": 0.2819921604795942, + "grad_norm": 1.0362033432012678, + "learning_rate": 1.9552081846528858e-06, + "loss": 0.5763273239135742, + "step": 1223 + }, + { + "epoch": 0.28222273460917685, + "grad_norm": 1.0512305083546745, + "learning_rate": 1.9550952868136677e-06, + "loss": 0.6379664540290833, + "step": 1224 + }, + { + "epoch": 0.2824533087387595, + "grad_norm": 0.966358468685478, + "learning_rate": 1.95498225014167e-06, + "loss": 0.4021342396736145, + "step": 1225 + }, + { + "epoch": 0.2826838828683422, + "grad_norm": 1.3065298085361052, + "learning_rate": 1.954869074653324e-06, + "loss": 0.49230247735977173, + "step": 1226 + }, + { + "epoch": 0.28291445699792483, + "grad_norm": 0.9198430971109288, + "learning_rate": 1.954755760365081e-06, + "loss": 0.5921554565429688, + "step": 1227 + }, + { + "epoch": 0.2831450311275075, + "grad_norm": 1.2338068239582654, + "learning_rate": 1.954642307293412e-06, + "loss": 0.6495868563652039, + "step": 1228 + }, + { + "epoch": 0.28337560525709016, + "grad_norm": 1.0310593371372254, + "learning_rate": 1.954528715454808e-06, + "loss": 0.5699795484542847, + "step": 1229 + }, + { + "epoch": 0.2836061793866728, + "grad_norm": 1.3462988930710962, + "learning_rate": 1.9544149848657816e-06, + "loss": 0.582231879234314, + "step": 1230 + }, + { + "epoch": 0.2838367535162555, + "grad_norm": 1.0033811085419764, + "learning_rate": 1.9543011155428647e-06, + "loss": 0.5952359437942505, + "step": 1231 + }, + { + "epoch": 0.28406732764583814, + "grad_norm": 1.150479906025031, + "learning_rate": 1.9541871075026092e-06, + "loss": 0.646816611289978, + "step": 1232 + }, + { + "epoch": 0.2842979017754208, + "grad_norm": 1.2509776515814615, + "learning_rate": 1.9540729607615866e-06, + "loss": 0.5781043767929077, + "step": 1233 + }, + { + "epoch": 0.28452847590500346, + "grad_norm": 1.1718295930905136, + "learning_rate": 1.95395867533639e-06, + "loss": 0.609764814376831, + "step": 1234 + }, + { + "epoch": 0.2847590500345861, + "grad_norm": 1.2826152398089232, + "learning_rate": 1.9538442512436325e-06, + "loss": 0.4673759341239929, + "step": 1235 + }, + { + "epoch": 0.2849896241641688, + "grad_norm": 1.1343052125955835, + "learning_rate": 1.953729688499946e-06, + "loss": 0.6310999393463135, + "step": 1236 + }, + { + "epoch": 0.28522019829375145, + "grad_norm": 1.075568996273352, + "learning_rate": 1.953614987121983e-06, + "loss": 0.5103853344917297, + "step": 1237 + }, + { + "epoch": 0.2854507724233341, + "grad_norm": 1.1329951189185654, + "learning_rate": 1.9535001471264178e-06, + "loss": 0.5735328197479248, + "step": 1238 + }, + { + "epoch": 0.28568134655291677, + "grad_norm": 1.010063337652323, + "learning_rate": 1.953385168529942e-06, + "loss": 0.5617454051971436, + "step": 1239 + }, + { + "epoch": 0.28591192068249943, + "grad_norm": 1.1392481671873862, + "learning_rate": 1.9532700513492705e-06, + "loss": 0.49873489141464233, + "step": 1240 + }, + { + "epoch": 0.2861424948120821, + "grad_norm": 0.9923008758606798, + "learning_rate": 1.9531547956011353e-06, + "loss": 0.49185073375701904, + "step": 1241 + }, + { + "epoch": 0.28637306894166475, + "grad_norm": 1.1119890456844754, + "learning_rate": 1.9530394013022907e-06, + "loss": 0.6016734838485718, + "step": 1242 + }, + { + "epoch": 0.2866036430712474, + "grad_norm": 0.984310677257317, + "learning_rate": 1.9529238684695105e-06, + "loss": 0.5922054052352905, + "step": 1243 + }, + { + "epoch": 0.2868342172008301, + "grad_norm": 1.2933601588161594, + "learning_rate": 1.952808197119588e-06, + "loss": 0.6498355269432068, + "step": 1244 + }, + { + "epoch": 0.28706479133041274, + "grad_norm": 1.106145681286101, + "learning_rate": 1.9526923872693382e-06, + "loss": 0.5564426183700562, + "step": 1245 + }, + { + "epoch": 0.2872953654599954, + "grad_norm": 1.0410162813090216, + "learning_rate": 1.9525764389355945e-06, + "loss": 0.6144154071807861, + "step": 1246 + }, + { + "epoch": 0.28752593958957806, + "grad_norm": 0.9304288925500919, + "learning_rate": 1.9524603521352116e-06, + "loss": 0.5958914756774902, + "step": 1247 + }, + { + "epoch": 0.2877565137191607, + "grad_norm": 1.167763375182377, + "learning_rate": 1.952344126885063e-06, + "loss": 0.5471549034118652, + "step": 1248 + }, + { + "epoch": 0.2879870878487434, + "grad_norm": 1.0658282088084226, + "learning_rate": 1.952227763202044e-06, + "loss": 0.5512329936027527, + "step": 1249 + }, + { + "epoch": 0.28821766197832605, + "grad_norm": 0.9336952567830841, + "learning_rate": 1.9521112611030695e-06, + "loss": 0.5545130968093872, + "step": 1250 + }, + { + "epoch": 0.2884482361079087, + "grad_norm": 0.9540157404500241, + "learning_rate": 1.9519946206050734e-06, + "loss": 0.5409479737281799, + "step": 1251 + }, + { + "epoch": 0.28867881023749137, + "grad_norm": 1.0425656776824677, + "learning_rate": 1.9518778417250114e-06, + "loss": 0.5248778462409973, + "step": 1252 + }, + { + "epoch": 0.28890938436707403, + "grad_norm": 1.1108036883068904, + "learning_rate": 1.951760924479858e-06, + "loss": 0.4985620975494385, + "step": 1253 + }, + { + "epoch": 0.2891399584966567, + "grad_norm": 1.1956376798663733, + "learning_rate": 1.951643868886608e-06, + "loss": 0.5470424890518188, + "step": 1254 + }, + { + "epoch": 0.28937053262623935, + "grad_norm": 0.830517770820401, + "learning_rate": 1.9515266749622776e-06, + "loss": 0.5082905292510986, + "step": 1255 + }, + { + "epoch": 0.289601106755822, + "grad_norm": 1.1321002460273393, + "learning_rate": 1.9514093427239013e-06, + "loss": 0.5734596252441406, + "step": 1256 + }, + { + "epoch": 0.2898316808854047, + "grad_norm": 1.133005147672039, + "learning_rate": 1.951291872188535e-06, + "loss": 0.4727100431919098, + "step": 1257 + }, + { + "epoch": 0.29006225501498734, + "grad_norm": 1.044180363768592, + "learning_rate": 1.951174263373254e-06, + "loss": 0.6727551221847534, + "step": 1258 + }, + { + "epoch": 0.29029282914457, + "grad_norm": 0.9491498247436025, + "learning_rate": 1.9510565162951534e-06, + "loss": 0.5225725173950195, + "step": 1259 + }, + { + "epoch": 0.29052340327415266, + "grad_norm": 0.9861385624887246, + "learning_rate": 1.95093863097135e-06, + "loss": 0.46537530422210693, + "step": 1260 + }, + { + "epoch": 0.2907539774037353, + "grad_norm": 1.0433291271591505, + "learning_rate": 1.950820607418979e-06, + "loss": 0.4729498624801636, + "step": 1261 + }, + { + "epoch": 0.290984551533318, + "grad_norm": 1.0319083654914931, + "learning_rate": 1.950702445655196e-06, + "loss": 0.519434928894043, + "step": 1262 + }, + { + "epoch": 0.29121512566290064, + "grad_norm": 1.0839075745171884, + "learning_rate": 1.9505841456971784e-06, + "loss": 0.5487297177314758, + "step": 1263 + }, + { + "epoch": 0.2914456997924833, + "grad_norm": 0.9970964597897494, + "learning_rate": 1.9504657075621207e-06, + "loss": 0.6228574514389038, + "step": 1264 + }, + { + "epoch": 0.29167627392206597, + "grad_norm": 1.076219157850212, + "learning_rate": 1.95034713126724e-06, + "loss": 0.486205518245697, + "step": 1265 + }, + { + "epoch": 0.2919068480516486, + "grad_norm": 1.220321517878089, + "learning_rate": 1.950228416829772e-06, + "loss": 0.6465567350387573, + "step": 1266 + }, + { + "epoch": 0.2921374221812313, + "grad_norm": 1.0227736343783316, + "learning_rate": 1.9501095642669735e-06, + "loss": 0.5160506963729858, + "step": 1267 + }, + { + "epoch": 0.29236799631081395, + "grad_norm": 1.0494858452172506, + "learning_rate": 1.9499905735961206e-06, + "loss": 0.47334107756614685, + "step": 1268 + }, + { + "epoch": 0.2925985704403966, + "grad_norm": 1.1563719640673416, + "learning_rate": 1.9498714448345103e-06, + "loss": 0.46453380584716797, + "step": 1269 + }, + { + "epoch": 0.29282914456997927, + "grad_norm": 0.9754273704287023, + "learning_rate": 1.9497521779994582e-06, + "loss": 0.5617728233337402, + "step": 1270 + }, + { + "epoch": 0.29305971869956193, + "grad_norm": 1.3129160300173046, + "learning_rate": 1.9496327731083026e-06, + "loss": 0.6129153966903687, + "step": 1271 + }, + { + "epoch": 0.2932902928291446, + "grad_norm": 1.2949114738936178, + "learning_rate": 1.9495132301783983e-06, + "loss": 0.4903183579444885, + "step": 1272 + }, + { + "epoch": 0.29352086695872726, + "grad_norm": 1.1167146830002543, + "learning_rate": 1.9493935492271235e-06, + "loss": 0.5087980628013611, + "step": 1273 + }, + { + "epoch": 0.2937514410883099, + "grad_norm": 1.0447162269466075, + "learning_rate": 1.949273730271874e-06, + "loss": 0.5102910399436951, + "step": 1274 + }, + { + "epoch": 0.2939820152178926, + "grad_norm": 1.0971342006057034, + "learning_rate": 1.9491537733300674e-06, + "loss": 0.5581132769584656, + "step": 1275 + }, + { + "epoch": 0.29421258934747524, + "grad_norm": 1.0166201989797772, + "learning_rate": 1.949033678419141e-06, + "loss": 0.5668213367462158, + "step": 1276 + }, + { + "epoch": 0.2944431634770579, + "grad_norm": 1.1646263878722904, + "learning_rate": 1.9489134455565503e-06, + "loss": 0.5352080464363098, + "step": 1277 + }, + { + "epoch": 0.29467373760664056, + "grad_norm": 1.0375138174364513, + "learning_rate": 1.948793074759774e-06, + "loss": 0.47343915700912476, + "step": 1278 + }, + { + "epoch": 0.29490431173622317, + "grad_norm": 1.2395532163204355, + "learning_rate": 1.9486725660463084e-06, + "loss": 0.5169435143470764, + "step": 1279 + }, + { + "epoch": 0.29513488586580583, + "grad_norm": 1.2035025560649288, + "learning_rate": 1.9485519194336707e-06, + "loss": 0.4801402688026428, + "step": 1280 + }, + { + "epoch": 0.2953654599953885, + "grad_norm": 1.2115883619737033, + "learning_rate": 1.9484311349393984e-06, + "loss": 0.6537381410598755, + "step": 1281 + }, + { + "epoch": 0.29559603412497115, + "grad_norm": 0.9306094110342265, + "learning_rate": 1.9483102125810483e-06, + "loss": 0.5160089135169983, + "step": 1282 + }, + { + "epoch": 0.2958266082545538, + "grad_norm": 1.0525832312633145, + "learning_rate": 1.9481891523761985e-06, + "loss": 0.5332320332527161, + "step": 1283 + }, + { + "epoch": 0.2960571823841365, + "grad_norm": 0.9112280719646961, + "learning_rate": 1.9480679543424453e-06, + "loss": 0.5076215267181396, + "step": 1284 + }, + { + "epoch": 0.29628775651371914, + "grad_norm": 1.1265706213450601, + "learning_rate": 1.947946618497407e-06, + "loss": 0.607105016708374, + "step": 1285 + }, + { + "epoch": 0.2965183306433018, + "grad_norm": 1.076771624610464, + "learning_rate": 1.9478251448587203e-06, + "loss": 0.6265846490859985, + "step": 1286 + }, + { + "epoch": 0.29674890477288446, + "grad_norm": 1.164803442921585, + "learning_rate": 1.9477035334440426e-06, + "loss": 0.5313390493392944, + "step": 1287 + }, + { + "epoch": 0.2969794789024671, + "grad_norm": 1.0583207692233336, + "learning_rate": 1.947581784271052e-06, + "loss": 0.5059833526611328, + "step": 1288 + }, + { + "epoch": 0.2972100530320498, + "grad_norm": 1.171630953302918, + "learning_rate": 1.9474598973574455e-06, + "loss": 0.5550922155380249, + "step": 1289 + }, + { + "epoch": 0.29744062716163244, + "grad_norm": 0.9941233964259298, + "learning_rate": 1.947337872720941e-06, + "loss": 0.5594801306724548, + "step": 1290 + }, + { + "epoch": 0.2976712012912151, + "grad_norm": 1.1672729516761162, + "learning_rate": 1.9472157103792753e-06, + "loss": 0.6404933333396912, + "step": 1291 + }, + { + "epoch": 0.29790177542079777, + "grad_norm": 1.216836258446271, + "learning_rate": 1.947093410350206e-06, + "loss": 0.5884830355644226, + "step": 1292 + }, + { + "epoch": 0.2981323495503804, + "grad_norm": 1.313520165154308, + "learning_rate": 1.9469709726515114e-06, + "loss": 0.5723487138748169, + "step": 1293 + }, + { + "epoch": 0.2983629236799631, + "grad_norm": 1.047985941483805, + "learning_rate": 1.946848397300989e-06, + "loss": 0.5298895239830017, + "step": 1294 + }, + { + "epoch": 0.29859349780954575, + "grad_norm": 1.009793366380185, + "learning_rate": 1.9467256843164557e-06, + "loss": 0.6118877530097961, + "step": 1295 + }, + { + "epoch": 0.2988240719391284, + "grad_norm": 1.2369344702112195, + "learning_rate": 1.9466028337157498e-06, + "loss": 0.6014599800109863, + "step": 1296 + }, + { + "epoch": 0.29905464606871107, + "grad_norm": 0.9889478752374168, + "learning_rate": 1.9464798455167278e-06, + "loss": 0.5861071944236755, + "step": 1297 + }, + { + "epoch": 0.29928522019829373, + "grad_norm": 1.238998066636259, + "learning_rate": 1.9463567197372684e-06, + "loss": 0.5863409042358398, + "step": 1298 + }, + { + "epoch": 0.2995157943278764, + "grad_norm": 1.217300214744882, + "learning_rate": 1.9462334563952687e-06, + "loss": 0.6576352119445801, + "step": 1299 + }, + { + "epoch": 0.29974636845745906, + "grad_norm": 1.074029788035818, + "learning_rate": 1.9461100555086463e-06, + "loss": 0.5458395481109619, + "step": 1300 + }, + { + "epoch": 0.2999769425870417, + "grad_norm": 1.2759220903954522, + "learning_rate": 1.945986517095339e-06, + "loss": 0.48430997133255005, + "step": 1301 + }, + { + "epoch": 0.3002075167166244, + "grad_norm": 1.2436119574902915, + "learning_rate": 1.945862841173304e-06, + "loss": 0.4212522506713867, + "step": 1302 + }, + { + "epoch": 0.30043809084620704, + "grad_norm": 1.1823128908009017, + "learning_rate": 1.9457390277605188e-06, + "loss": 0.5671685934066772, + "step": 1303 + }, + { + "epoch": 0.3006686649757897, + "grad_norm": 1.0831721181422946, + "learning_rate": 1.945615076874981e-06, + "loss": 0.5350982546806335, + "step": 1304 + }, + { + "epoch": 0.30089923910537236, + "grad_norm": 0.9247033101108441, + "learning_rate": 1.9454909885347088e-06, + "loss": 0.45792657136917114, + "step": 1305 + }, + { + "epoch": 0.301129813234955, + "grad_norm": 1.0473073919925908, + "learning_rate": 1.9453667627577387e-06, + "loss": 0.5644106864929199, + "step": 1306 + }, + { + "epoch": 0.3013603873645377, + "grad_norm": 1.3332547603439018, + "learning_rate": 1.945242399562129e-06, + "loss": 0.554198145866394, + "step": 1307 + }, + { + "epoch": 0.30159096149412035, + "grad_norm": 0.9232575644574793, + "learning_rate": 1.9451178989659565e-06, + "loss": 0.5073474049568176, + "step": 1308 + }, + { + "epoch": 0.301821535623703, + "grad_norm": 1.0206284762622284, + "learning_rate": 1.944993260987319e-06, + "loss": 0.569359302520752, + "step": 1309 + }, + { + "epoch": 0.30205210975328567, + "grad_norm": 1.0382686851233573, + "learning_rate": 1.944868485644334e-06, + "loss": 0.5011791586875916, + "step": 1310 + }, + { + "epoch": 0.30228268388286833, + "grad_norm": 0.9869955270819804, + "learning_rate": 1.9447435729551384e-06, + "loss": 0.41121986508369446, + "step": 1311 + }, + { + "epoch": 0.302513258012451, + "grad_norm": 1.3489170954309295, + "learning_rate": 1.9446185229378896e-06, + "loss": 0.5615876913070679, + "step": 1312 + }, + { + "epoch": 0.30274383214203365, + "grad_norm": 1.2244043366760826, + "learning_rate": 1.9444933356107652e-06, + "loss": 0.5450695157051086, + "step": 1313 + }, + { + "epoch": 0.3029744062716163, + "grad_norm": 1.0371383598149113, + "learning_rate": 1.9443680109919626e-06, + "loss": 0.522222101688385, + "step": 1314 + }, + { + "epoch": 0.303204980401199, + "grad_norm": 0.9638880730108786, + "learning_rate": 1.9442425490996984e-06, + "loss": 0.5081876516342163, + "step": 1315 + }, + { + "epoch": 0.30343555453078164, + "grad_norm": 1.1506604859779093, + "learning_rate": 1.9441169499522104e-06, + "loss": 0.4955870509147644, + "step": 1316 + }, + { + "epoch": 0.3036661286603643, + "grad_norm": 1.0185303369767542, + "learning_rate": 1.9439912135677553e-06, + "loss": 0.5098991990089417, + "step": 1317 + }, + { + "epoch": 0.30389670278994696, + "grad_norm": 0.9949182918503017, + "learning_rate": 1.94386533996461e-06, + "loss": 0.5686191320419312, + "step": 1318 + }, + { + "epoch": 0.3041272769195296, + "grad_norm": 1.180090494573931, + "learning_rate": 1.943739329161072e-06, + "loss": 0.606401264667511, + "step": 1319 + }, + { + "epoch": 0.3043578510491123, + "grad_norm": 1.0411002752171188, + "learning_rate": 1.9436131811754576e-06, + "loss": 0.49249163269996643, + "step": 1320 + }, + { + "epoch": 0.30458842517869494, + "grad_norm": 1.1079741007732102, + "learning_rate": 1.9434868960261047e-06, + "loss": 0.5373499989509583, + "step": 1321 + }, + { + "epoch": 0.3048189993082776, + "grad_norm": 1.4236897413447511, + "learning_rate": 1.943360473731369e-06, + "loss": 0.4568977355957031, + "step": 1322 + }, + { + "epoch": 0.30504957343786027, + "grad_norm": 1.034905077800575, + "learning_rate": 1.943233914309628e-06, + "loss": 0.562126636505127, + "step": 1323 + }, + { + "epoch": 0.3052801475674429, + "grad_norm": 1.343019932527111, + "learning_rate": 1.943107217779278e-06, + "loss": 0.5795382261276245, + "step": 1324 + }, + { + "epoch": 0.3055107216970256, + "grad_norm": 0.9852538064889438, + "learning_rate": 1.942980384158736e-06, + "loss": 0.5671530365943909, + "step": 1325 + }, + { + "epoch": 0.30574129582660825, + "grad_norm": 0.8981413519731547, + "learning_rate": 1.942853413466438e-06, + "loss": 0.5511401891708374, + "step": 1326 + }, + { + "epoch": 0.3059718699561909, + "grad_norm": 1.1491379693233763, + "learning_rate": 1.942726305720841e-06, + "loss": 0.5712149739265442, + "step": 1327 + }, + { + "epoch": 0.3062024440857736, + "grad_norm": 1.171535283311252, + "learning_rate": 1.9425990609404215e-06, + "loss": 0.5181496739387512, + "step": 1328 + }, + { + "epoch": 0.30643301821535623, + "grad_norm": 1.1968505005842098, + "learning_rate": 1.9424716791436753e-06, + "loss": 0.5758726596832275, + "step": 1329 + }, + { + "epoch": 0.3066635923449389, + "grad_norm": 0.9714627365066287, + "learning_rate": 1.942344160349119e-06, + "loss": 0.5757049322128296, + "step": 1330 + }, + { + "epoch": 0.30689416647452156, + "grad_norm": 0.9271633895158528, + "learning_rate": 1.9422165045752886e-06, + "loss": 0.47352534532546997, + "step": 1331 + }, + { + "epoch": 0.3071247406041042, + "grad_norm": 1.1418817146577889, + "learning_rate": 1.94208871184074e-06, + "loss": 0.5940845012664795, + "step": 1332 + }, + { + "epoch": 0.3073553147336869, + "grad_norm": 1.0590875448509756, + "learning_rate": 1.9419607821640496e-06, + "loss": 0.5225652456283569, + "step": 1333 + }, + { + "epoch": 0.30758588886326954, + "grad_norm": 1.0803440664833228, + "learning_rate": 1.9418327155638126e-06, + "loss": 0.5253404378890991, + "step": 1334 + }, + { + "epoch": 0.3078164629928522, + "grad_norm": 0.9995333811538123, + "learning_rate": 1.941704512058646e-06, + "loss": 0.5637744665145874, + "step": 1335 + }, + { + "epoch": 0.30804703712243486, + "grad_norm": 0.9947267518967771, + "learning_rate": 1.941576171667184e-06, + "loss": 0.48273587226867676, + "step": 1336 + }, + { + "epoch": 0.3082776112520175, + "grad_norm": 0.9569882979404835, + "learning_rate": 1.9414476944080833e-06, + "loss": 0.5989019870758057, + "step": 1337 + }, + { + "epoch": 0.3085081853816002, + "grad_norm": 1.1125936950721667, + "learning_rate": 1.9413190803000183e-06, + "loss": 0.5231547951698303, + "step": 1338 + }, + { + "epoch": 0.30873875951118285, + "grad_norm": 1.0300527191348772, + "learning_rate": 1.9411903293616853e-06, + "loss": 0.5125160217285156, + "step": 1339 + }, + { + "epoch": 0.3089693336407655, + "grad_norm": 1.251133475270548, + "learning_rate": 1.9410614416117993e-06, + "loss": 0.50664883852005, + "step": 1340 + }, + { + "epoch": 0.30919990777034817, + "grad_norm": 1.063411016331963, + "learning_rate": 1.9409324170690955e-06, + "loss": 0.5555824637413025, + "step": 1341 + }, + { + "epoch": 0.30943048189993083, + "grad_norm": 0.9621002533491156, + "learning_rate": 1.940803255752329e-06, + "loss": 0.5182096362113953, + "step": 1342 + }, + { + "epoch": 0.3096610560295135, + "grad_norm": 1.0359415249922332, + "learning_rate": 1.940673957680274e-06, + "loss": 0.5202751159667969, + "step": 1343 + }, + { + "epoch": 0.30989163015909615, + "grad_norm": 0.9908809268815285, + "learning_rate": 1.940544522871726e-06, + "loss": 0.49791598320007324, + "step": 1344 + }, + { + "epoch": 0.3101222042886788, + "grad_norm": 0.990495096784543, + "learning_rate": 1.9404149513454995e-06, + "loss": 0.48691657185554504, + "step": 1345 + }, + { + "epoch": 0.3103527784182615, + "grad_norm": 1.0649987362093034, + "learning_rate": 1.9402852431204293e-06, + "loss": 0.5726481676101685, + "step": 1346 + }, + { + "epoch": 0.31058335254784414, + "grad_norm": 0.9750258824279312, + "learning_rate": 1.940155398215369e-06, + "loss": 0.5443148016929626, + "step": 1347 + }, + { + "epoch": 0.3108139266774268, + "grad_norm": 1.1005441671416878, + "learning_rate": 1.9400254166491935e-06, + "loss": 0.5767767429351807, + "step": 1348 + }, + { + "epoch": 0.31104450080700946, + "grad_norm": 1.059167179602632, + "learning_rate": 1.9398952984407967e-06, + "loss": 0.5208882689476013, + "step": 1349 + }, + { + "epoch": 0.3112750749365921, + "grad_norm": 0.8304820941291429, + "learning_rate": 1.939765043609093e-06, + "loss": 0.5152548551559448, + "step": 1350 + }, + { + "epoch": 0.3115056490661748, + "grad_norm": 1.1875548530259965, + "learning_rate": 1.939634652173016e-06, + "loss": 0.42542198300361633, + "step": 1351 + }, + { + "epoch": 0.31173622319575744, + "grad_norm": 1.1424220130032787, + "learning_rate": 1.9395041241515197e-06, + "loss": 0.6471734046936035, + "step": 1352 + }, + { + "epoch": 0.3119667973253401, + "grad_norm": 1.1191897598164906, + "learning_rate": 1.9393734595635767e-06, + "loss": 0.6257486343383789, + "step": 1353 + }, + { + "epoch": 0.31219737145492277, + "grad_norm": 1.1348942815080005, + "learning_rate": 1.9392426584281815e-06, + "loss": 0.562118649482727, + "step": 1354 + }, + { + "epoch": 0.31242794558450543, + "grad_norm": 1.223083488663697, + "learning_rate": 1.939111720764347e-06, + "loss": 0.5602811574935913, + "step": 1355 + }, + { + "epoch": 0.3126585197140881, + "grad_norm": 1.041642546930775, + "learning_rate": 1.9389806465911056e-06, + "loss": 0.54469895362854, + "step": 1356 + }, + { + "epoch": 0.31288909384367075, + "grad_norm": 1.159034123821878, + "learning_rate": 1.9388494359275115e-06, + "loss": 0.5262914896011353, + "step": 1357 + }, + { + "epoch": 0.3131196679732534, + "grad_norm": 1.184281074720895, + "learning_rate": 1.938718088792637e-06, + "loss": 0.6137207746505737, + "step": 1358 + }, + { + "epoch": 0.3133502421028361, + "grad_norm": 1.0740150522099046, + "learning_rate": 1.9385866052055744e-06, + "loss": 0.5792986750602722, + "step": 1359 + }, + { + "epoch": 0.31358081623241874, + "grad_norm": 0.9946259290534466, + "learning_rate": 1.938454985185437e-06, + "loss": 0.4953799843788147, + "step": 1360 + }, + { + "epoch": 0.3138113903620014, + "grad_norm": 1.2906978669163651, + "learning_rate": 1.938323228751356e-06, + "loss": 0.5722379684448242, + "step": 1361 + }, + { + "epoch": 0.31404196449158406, + "grad_norm": 0.9996513214249106, + "learning_rate": 1.938191335922484e-06, + "loss": 0.513651967048645, + "step": 1362 + }, + { + "epoch": 0.3142725386211667, + "grad_norm": 1.0509635344773647, + "learning_rate": 1.9380593067179935e-06, + "loss": 0.4911235272884369, + "step": 1363 + }, + { + "epoch": 0.3145031127507494, + "grad_norm": 1.0029036193486218, + "learning_rate": 1.9379271411570753e-06, + "loss": 0.5478678941726685, + "step": 1364 + }, + { + "epoch": 0.31473368688033204, + "grad_norm": 0.8901015021428158, + "learning_rate": 1.9377948392589417e-06, + "loss": 0.46698129177093506, + "step": 1365 + }, + { + "epoch": 0.3149642610099147, + "grad_norm": 1.3327357773387452, + "learning_rate": 1.9376624010428243e-06, + "loss": 0.5081343650817871, + "step": 1366 + }, + { + "epoch": 0.31519483513949736, + "grad_norm": 1.1172038301784757, + "learning_rate": 1.9375298265279735e-06, + "loss": 0.583903431892395, + "step": 1367 + }, + { + "epoch": 0.31542540926908, + "grad_norm": 1.0403870552320973, + "learning_rate": 1.937397115733661e-06, + "loss": 0.5249435901641846, + "step": 1368 + }, + { + "epoch": 0.3156559833986627, + "grad_norm": 1.184866053048378, + "learning_rate": 1.9372642686791777e-06, + "loss": 0.5463817119598389, + "step": 1369 + }, + { + "epoch": 0.31588655752824535, + "grad_norm": 1.2179956171685966, + "learning_rate": 1.9371312853838338e-06, + "loss": 0.4634520709514618, + "step": 1370 + }, + { + "epoch": 0.316117131657828, + "grad_norm": 1.2606144259751904, + "learning_rate": 1.93699816586696e-06, + "loss": 0.6018840074539185, + "step": 1371 + }, + { + "epoch": 0.31634770578741067, + "grad_norm": 1.1911067691024062, + "learning_rate": 1.9368649101479072e-06, + "loss": 0.5507885813713074, + "step": 1372 + }, + { + "epoch": 0.31657827991699333, + "grad_norm": 0.9991148637431415, + "learning_rate": 1.9367315182460442e-06, + "loss": 0.5520491600036621, + "step": 1373 + }, + { + "epoch": 0.316808854046576, + "grad_norm": 1.2455223208218802, + "learning_rate": 1.936597990180762e-06, + "loss": 0.5410347580909729, + "step": 1374 + }, + { + "epoch": 0.31703942817615866, + "grad_norm": 1.6049117927004484, + "learning_rate": 1.9364643259714694e-06, + "loss": 0.5771749019622803, + "step": 1375 + }, + { + "epoch": 0.3172700023057413, + "grad_norm": 1.123905862633382, + "learning_rate": 1.9363305256375965e-06, + "loss": 0.5071828365325928, + "step": 1376 + }, + { + "epoch": 0.317500576435324, + "grad_norm": 1.1240180544134455, + "learning_rate": 1.936196589198592e-06, + "loss": 0.558908224105835, + "step": 1377 + }, + { + "epoch": 0.31773115056490664, + "grad_norm": 1.1984781772064843, + "learning_rate": 1.9360625166739256e-06, + "loss": 0.5509803295135498, + "step": 1378 + }, + { + "epoch": 0.3179617246944893, + "grad_norm": 1.1703050385431384, + "learning_rate": 1.935928308083085e-06, + "loss": 0.5333945155143738, + "step": 1379 + }, + { + "epoch": 0.31819229882407196, + "grad_norm": 1.2141630137674275, + "learning_rate": 1.93579396344558e-06, + "loss": 0.5337819457054138, + "step": 1380 + }, + { + "epoch": 0.3184228729536546, + "grad_norm": 1.161230429960398, + "learning_rate": 1.9356594827809387e-06, + "loss": 0.5286899209022522, + "step": 1381 + }, + { + "epoch": 0.3186534470832373, + "grad_norm": 1.3042082103630104, + "learning_rate": 1.9355248661087083e-06, + "loss": 0.5915369987487793, + "step": 1382 + }, + { + "epoch": 0.31888402121281995, + "grad_norm": 1.2725859277548193, + "learning_rate": 1.9353901134484575e-06, + "loss": 0.5843492746353149, + "step": 1383 + }, + { + "epoch": 0.3191145953424026, + "grad_norm": 1.0723106790063142, + "learning_rate": 1.935255224819774e-06, + "loss": 0.5015528202056885, + "step": 1384 + }, + { + "epoch": 0.31934516947198527, + "grad_norm": 1.2053658641154292, + "learning_rate": 1.935120200242265e-06, + "loss": 0.5650957822799683, + "step": 1385 + }, + { + "epoch": 0.31957574360156793, + "grad_norm": 0.9993056241167617, + "learning_rate": 1.9349850397355576e-06, + "loss": 0.5452740788459778, + "step": 1386 + }, + { + "epoch": 0.3198063177311506, + "grad_norm": 1.138341645042275, + "learning_rate": 1.934849743319299e-06, + "loss": 0.5069071054458618, + "step": 1387 + }, + { + "epoch": 0.32003689186073325, + "grad_norm": 1.3097523217194937, + "learning_rate": 1.934714311013156e-06, + "loss": 0.5350260734558105, + "step": 1388 + }, + { + "epoch": 0.3202674659903159, + "grad_norm": 1.065882395696928, + "learning_rate": 1.9345787428368146e-06, + "loss": 0.6002014875411987, + "step": 1389 + }, + { + "epoch": 0.3204980401198986, + "grad_norm": 1.0951548438177328, + "learning_rate": 1.9344430388099813e-06, + "loss": 0.5111383199691772, + "step": 1390 + }, + { + "epoch": 0.3207286142494812, + "grad_norm": 1.3896947100609738, + "learning_rate": 1.934307198952382e-06, + "loss": 0.6029741168022156, + "step": 1391 + }, + { + "epoch": 0.32095918837906384, + "grad_norm": 1.0076386708324083, + "learning_rate": 1.9341712232837628e-06, + "loss": 0.48339328169822693, + "step": 1392 + }, + { + "epoch": 0.3211897625086465, + "grad_norm": 1.5017597017671664, + "learning_rate": 1.9340351118238882e-06, + "loss": 0.6080894470214844, + "step": 1393 + }, + { + "epoch": 0.32142033663822916, + "grad_norm": 1.1935202429445742, + "learning_rate": 1.9338988645925444e-06, + "loss": 0.46375036239624023, + "step": 1394 + }, + { + "epoch": 0.3216509107678118, + "grad_norm": 1.2397479694281224, + "learning_rate": 1.9337624816095357e-06, + "loss": 0.5974088907241821, + "step": 1395 + }, + { + "epoch": 0.3218814848973945, + "grad_norm": 1.4525926184759388, + "learning_rate": 1.9336259628946865e-06, + "loss": 0.5759298801422119, + "step": 1396 + }, + { + "epoch": 0.32211205902697715, + "grad_norm": 1.0361695525185906, + "learning_rate": 1.9334893084678417e-06, + "loss": 0.6050859689712524, + "step": 1397 + }, + { + "epoch": 0.3223426331565598, + "grad_norm": 1.1306650773102374, + "learning_rate": 1.9333525183488657e-06, + "loss": 0.5879993438720703, + "step": 1398 + }, + { + "epoch": 0.32257320728614247, + "grad_norm": 1.055350398289763, + "learning_rate": 1.933215592557642e-06, + "loss": 0.5496323108673096, + "step": 1399 + }, + { + "epoch": 0.32280378141572513, + "grad_norm": 1.2847712135798797, + "learning_rate": 1.9330785311140732e-06, + "loss": 0.48447534441947937, + "step": 1400 + }, + { + "epoch": 0.3230343555453078, + "grad_norm": 1.2583031445613762, + "learning_rate": 1.932941334038084e-06, + "loss": 0.5687322020530701, + "step": 1401 + }, + { + "epoch": 0.32326492967489046, + "grad_norm": 1.1545356458260727, + "learning_rate": 1.9328040013496166e-06, + "loss": 0.4070928990840912, + "step": 1402 + }, + { + "epoch": 0.3234955038044731, + "grad_norm": 0.9643847324304846, + "learning_rate": 1.9326665330686344e-06, + "loss": 0.5131539106369019, + "step": 1403 + }, + { + "epoch": 0.3237260779340558, + "grad_norm": 1.0846567553359194, + "learning_rate": 1.932528929215119e-06, + "loss": 0.47571802139282227, + "step": 1404 + }, + { + "epoch": 0.32395665206363844, + "grad_norm": 1.095169764239565, + "learning_rate": 1.9323911898090728e-06, + "loss": 0.5676391124725342, + "step": 1405 + }, + { + "epoch": 0.3241872261932211, + "grad_norm": 1.0653010445083047, + "learning_rate": 1.9322533148705177e-06, + "loss": 0.5464721322059631, + "step": 1406 + }, + { + "epoch": 0.32441780032280376, + "grad_norm": 1.044728614529827, + "learning_rate": 1.9321153044194953e-06, + "loss": 0.6130954027175903, + "step": 1407 + }, + { + "epoch": 0.3246483744523864, + "grad_norm": 1.6513732337511444, + "learning_rate": 1.9319771584760666e-06, + "loss": 0.6058028936386108, + "step": 1408 + }, + { + "epoch": 0.3248789485819691, + "grad_norm": 1.1251884535657009, + "learning_rate": 1.9318388770603123e-06, + "loss": 0.5326286554336548, + "step": 1409 + }, + { + "epoch": 0.32510952271155175, + "grad_norm": 1.2184625691329178, + "learning_rate": 1.9317004601923337e-06, + "loss": 0.6046053767204285, + "step": 1410 + }, + { + "epoch": 0.3253400968411344, + "grad_norm": 1.058617017669887, + "learning_rate": 1.931561907892251e-06, + "loss": 0.4597975015640259, + "step": 1411 + }, + { + "epoch": 0.32557067097071707, + "grad_norm": 1.1843983331118075, + "learning_rate": 1.9314232201802035e-06, + "loss": 0.6024897694587708, + "step": 1412 + }, + { + "epoch": 0.32580124510029973, + "grad_norm": 1.037552834044261, + "learning_rate": 1.9312843970763512e-06, + "loss": 0.45463523268699646, + "step": 1413 + }, + { + "epoch": 0.3260318192298824, + "grad_norm": 0.9412245310618959, + "learning_rate": 1.9311454386008736e-06, + "loss": 0.512498140335083, + "step": 1414 + }, + { + "epoch": 0.32626239335946505, + "grad_norm": 0.8929271577435476, + "learning_rate": 1.9310063447739695e-06, + "loss": 0.4851795434951782, + "step": 1415 + }, + { + "epoch": 0.3264929674890477, + "grad_norm": 1.1131717345806365, + "learning_rate": 1.930867115615858e-06, + "loss": 0.5464169979095459, + "step": 1416 + }, + { + "epoch": 0.3267235416186304, + "grad_norm": 0.9649299588738096, + "learning_rate": 1.930727751146777e-06, + "loss": 0.5614463090896606, + "step": 1417 + }, + { + "epoch": 0.32695411574821304, + "grad_norm": 1.1279163828506724, + "learning_rate": 1.930588251386985e-06, + "loss": 0.635399341583252, + "step": 1418 + }, + { + "epoch": 0.3271846898777957, + "grad_norm": 1.0116750083389472, + "learning_rate": 1.9304486163567588e-06, + "loss": 0.4862840175628662, + "step": 1419 + }, + { + "epoch": 0.32741526400737836, + "grad_norm": 1.3810849020281415, + "learning_rate": 1.930308846076397e-06, + "loss": 0.6548877954483032, + "step": 1420 + }, + { + "epoch": 0.327645838136961, + "grad_norm": 0.9726550652757486, + "learning_rate": 1.9301689405662154e-06, + "loss": 0.5781031250953674, + "step": 1421 + }, + { + "epoch": 0.3278764122665437, + "grad_norm": 1.0075078554250574, + "learning_rate": 1.930028899846552e-06, + "loss": 0.4945180118083954, + "step": 1422 + }, + { + "epoch": 0.32810698639612634, + "grad_norm": 1.1661473529435082, + "learning_rate": 1.9298887239377623e-06, + "loss": 0.548690915107727, + "step": 1423 + }, + { + "epoch": 0.328337560525709, + "grad_norm": 1.0120278252177992, + "learning_rate": 1.929748412860222e-06, + "loss": 0.44515126943588257, + "step": 1424 + }, + { + "epoch": 0.32856813465529167, + "grad_norm": 0.8968526552864172, + "learning_rate": 1.9296079666343273e-06, + "loss": 0.433849573135376, + "step": 1425 + }, + { + "epoch": 0.3287987087848743, + "grad_norm": 1.185097032812299, + "learning_rate": 1.9294673852804938e-06, + "loss": 0.5600666403770447, + "step": 1426 + }, + { + "epoch": 0.329029282914457, + "grad_norm": 1.1490365285996864, + "learning_rate": 1.9293266688191555e-06, + "loss": 0.5302737355232239, + "step": 1427 + }, + { + "epoch": 0.32925985704403965, + "grad_norm": 1.1854633228597617, + "learning_rate": 1.929185817270768e-06, + "loss": 0.5590239763259888, + "step": 1428 + }, + { + "epoch": 0.3294904311736223, + "grad_norm": 0.9322915581005059, + "learning_rate": 1.929044830655804e-06, + "loss": 0.43225252628326416, + "step": 1429 + }, + { + "epoch": 0.329721005303205, + "grad_norm": 1.0987581728513967, + "learning_rate": 1.9289037089947595e-06, + "loss": 0.4932950735092163, + "step": 1430 + }, + { + "epoch": 0.32995157943278763, + "grad_norm": 1.1539316791656467, + "learning_rate": 1.9287624523081457e-06, + "loss": 0.48358941078186035, + "step": 1431 + }, + { + "epoch": 0.3301821535623703, + "grad_norm": 1.1348341469716536, + "learning_rate": 1.928621060616497e-06, + "loss": 0.48359012603759766, + "step": 1432 + }, + { + "epoch": 0.33041272769195296, + "grad_norm": 0.9278501695529541, + "learning_rate": 1.9284795339403663e-06, + "loss": 0.48462390899658203, + "step": 1433 + }, + { + "epoch": 0.3306433018215356, + "grad_norm": 1.439376655816269, + "learning_rate": 1.9283378723003253e-06, + "loss": 0.5167088508605957, + "step": 1434 + }, + { + "epoch": 0.3308738759511183, + "grad_norm": 1.0184323306356053, + "learning_rate": 1.928196075716966e-06, + "loss": 0.47352856397628784, + "step": 1435 + }, + { + "epoch": 0.33110445008070094, + "grad_norm": 0.9676467825700396, + "learning_rate": 1.9280541442109e-06, + "loss": 0.5013144016265869, + "step": 1436 + }, + { + "epoch": 0.3313350242102836, + "grad_norm": 1.1746874818237374, + "learning_rate": 1.927912077802759e-06, + "loss": 0.5061586499214172, + "step": 1437 + }, + { + "epoch": 0.33156559833986626, + "grad_norm": 1.3055289684633111, + "learning_rate": 1.9277698765131927e-06, + "loss": 0.5718814134597778, + "step": 1438 + }, + { + "epoch": 0.3317961724694489, + "grad_norm": 1.147604660511156, + "learning_rate": 1.9276275403628727e-06, + "loss": 0.47547006607055664, + "step": 1439 + }, + { + "epoch": 0.3320267465990316, + "grad_norm": 1.1585259805283974, + "learning_rate": 1.9274850693724884e-06, + "loss": 0.5387942790985107, + "step": 1440 + }, + { + "epoch": 0.33225732072861425, + "grad_norm": 1.013907046172662, + "learning_rate": 1.9273424635627494e-06, + "loss": 0.524285078048706, + "step": 1441 + }, + { + "epoch": 0.3324878948581969, + "grad_norm": 1.1737357855070976, + "learning_rate": 1.927199722954385e-06, + "loss": 0.5073943138122559, + "step": 1442 + }, + { + "epoch": 0.33271846898777957, + "grad_norm": 1.2047946851654725, + "learning_rate": 1.927056847568144e-06, + "loss": 0.4609600007534027, + "step": 1443 + }, + { + "epoch": 0.33294904311736223, + "grad_norm": 1.0416538135601094, + "learning_rate": 1.926913837424795e-06, + "loss": 0.4861013889312744, + "step": 1444 + }, + { + "epoch": 0.3331796172469449, + "grad_norm": 1.0835107342484427, + "learning_rate": 1.9267706925451253e-06, + "loss": 0.5255436897277832, + "step": 1445 + }, + { + "epoch": 0.33341019137652755, + "grad_norm": 1.4634923921780199, + "learning_rate": 1.9266274129499434e-06, + "loss": 0.6673840880393982, + "step": 1446 + }, + { + "epoch": 0.3336407655061102, + "grad_norm": 0.9656915858584796, + "learning_rate": 1.9264839986600757e-06, + "loss": 0.38582634925842285, + "step": 1447 + }, + { + "epoch": 0.3338713396356929, + "grad_norm": 0.9567963925410773, + "learning_rate": 1.926340449696369e-06, + "loss": 0.4597562253475189, + "step": 1448 + }, + { + "epoch": 0.33410191376527554, + "grad_norm": 1.130778436617546, + "learning_rate": 1.92619676607969e-06, + "loss": 0.5901148319244385, + "step": 1449 + }, + { + "epoch": 0.3343324878948582, + "grad_norm": 1.2252206522255358, + "learning_rate": 1.9260529478309242e-06, + "loss": 0.49872028827667236, + "step": 1450 + }, + { + "epoch": 0.33456306202444086, + "grad_norm": 0.9242619738807548, + "learning_rate": 1.925908994970977e-06, + "loss": 0.4611232578754425, + "step": 1451 + }, + { + "epoch": 0.3347936361540235, + "grad_norm": 1.1122995891321772, + "learning_rate": 1.9257649075207738e-06, + "loss": 0.5671408176422119, + "step": 1452 + }, + { + "epoch": 0.3350242102836062, + "grad_norm": 1.2073453603933548, + "learning_rate": 1.925620685501259e-06, + "loss": 0.4892054498195648, + "step": 1453 + }, + { + "epoch": 0.33525478441318884, + "grad_norm": 1.1748595063207394, + "learning_rate": 1.9254763289333966e-06, + "loss": 0.5506503582000732, + "step": 1454 + }, + { + "epoch": 0.3354853585427715, + "grad_norm": 1.4352362120603241, + "learning_rate": 1.9253318378381702e-06, + "loss": 0.6233078241348267, + "step": 1455 + }, + { + "epoch": 0.33571593267235417, + "grad_norm": 1.2159230168553836, + "learning_rate": 1.9251872122365835e-06, + "loss": 0.5551373958587646, + "step": 1456 + }, + { + "epoch": 0.33594650680193683, + "grad_norm": 1.0308435059717576, + "learning_rate": 1.925042452149659e-06, + "loss": 0.5561612844467163, + "step": 1457 + }, + { + "epoch": 0.3361770809315195, + "grad_norm": 1.0286600789295617, + "learning_rate": 1.924897557598439e-06, + "loss": 0.613766074180603, + "step": 1458 + }, + { + "epoch": 0.33640765506110215, + "grad_norm": 1.092154153863493, + "learning_rate": 1.9247525286039852e-06, + "loss": 0.5767652988433838, + "step": 1459 + }, + { + "epoch": 0.3366382291906848, + "grad_norm": 1.1221153049255785, + "learning_rate": 1.9246073651873795e-06, + "loss": 0.49292564392089844, + "step": 1460 + }, + { + "epoch": 0.3368688033202675, + "grad_norm": 1.2909262812986786, + "learning_rate": 1.9244620673697224e-06, + "loss": 0.5901867151260376, + "step": 1461 + }, + { + "epoch": 0.33709937744985013, + "grad_norm": 1.1013040204716718, + "learning_rate": 1.924316635172135e-06, + "loss": 0.5543808937072754, + "step": 1462 + }, + { + "epoch": 0.3373299515794328, + "grad_norm": 1.3433064818976315, + "learning_rate": 1.9241710686157568e-06, + "loss": 0.528805136680603, + "step": 1463 + }, + { + "epoch": 0.33756052570901546, + "grad_norm": 1.2569454583762516, + "learning_rate": 1.924025367721748e-06, + "loss": 0.6396733522415161, + "step": 1464 + }, + { + "epoch": 0.3377910998385981, + "grad_norm": 0.9764691877916688, + "learning_rate": 1.9238795325112867e-06, + "loss": 0.5558862686157227, + "step": 1465 + }, + { + "epoch": 0.3380216739681808, + "grad_norm": 1.2329860923893396, + "learning_rate": 1.9237335630055724e-06, + "loss": 0.5863986015319824, + "step": 1466 + }, + { + "epoch": 0.33825224809776344, + "grad_norm": 1.0929132974739206, + "learning_rate": 1.923587459225823e-06, + "loss": 0.5636321306228638, + "step": 1467 + }, + { + "epoch": 0.3384828222273461, + "grad_norm": 1.1286586205882263, + "learning_rate": 1.923441221193276e-06, + "loss": 0.6065811514854431, + "step": 1468 + }, + { + "epoch": 0.33871339635692876, + "grad_norm": 1.4147716425908794, + "learning_rate": 1.9232948489291886e-06, + "loss": 0.580939769744873, + "step": 1469 + }, + { + "epoch": 0.3389439704865114, + "grad_norm": 1.1018333541876169, + "learning_rate": 1.9231483424548377e-06, + "loss": 0.5429994463920593, + "step": 1470 + }, + { + "epoch": 0.3391745446160941, + "grad_norm": 1.1834314239894592, + "learning_rate": 1.92300170179152e-06, + "loss": 0.5090892910957336, + "step": 1471 + }, + { + "epoch": 0.33940511874567675, + "grad_norm": 1.053685812356228, + "learning_rate": 1.9228549269605498e-06, + "loss": 0.5280312299728394, + "step": 1472 + }, + { + "epoch": 0.3396356928752594, + "grad_norm": 0.992641626439364, + "learning_rate": 1.9227080179832634e-06, + "loss": 0.5098810195922852, + "step": 1473 + }, + { + "epoch": 0.33986626700484207, + "grad_norm": 1.110706876976592, + "learning_rate": 1.922560974881015e-06, + "loss": 0.4554474353790283, + "step": 1474 + }, + { + "epoch": 0.34009684113442473, + "grad_norm": 1.042826154870894, + "learning_rate": 1.9224137976751793e-06, + "loss": 0.4492517113685608, + "step": 1475 + }, + { + "epoch": 0.3403274152640074, + "grad_norm": 1.3050966518961793, + "learning_rate": 1.9222664863871495e-06, + "loss": 0.47606343030929565, + "step": 1476 + }, + { + "epoch": 0.34055798939359005, + "grad_norm": 1.331553847580159, + "learning_rate": 1.9221190410383394e-06, + "loss": 0.5939435362815857, + "step": 1477 + }, + { + "epoch": 0.3407885635231727, + "grad_norm": 1.0156905582890146, + "learning_rate": 1.921971461650181e-06, + "loss": 0.5418350696563721, + "step": 1478 + }, + { + "epoch": 0.3410191376527554, + "grad_norm": 1.258400628812999, + "learning_rate": 1.9218237482441265e-06, + "loss": 0.5307733416557312, + "step": 1479 + }, + { + "epoch": 0.34124971178233804, + "grad_norm": 1.097634429758053, + "learning_rate": 1.9216759008416483e-06, + "loss": 0.5102016925811768, + "step": 1480 + }, + { + "epoch": 0.3414802859119207, + "grad_norm": 1.6070497683125828, + "learning_rate": 1.9215279194642366e-06, + "loss": 0.5043876767158508, + "step": 1481 + }, + { + "epoch": 0.34171086004150336, + "grad_norm": 1.0925329335071103, + "learning_rate": 1.9213798041334025e-06, + "loss": 0.5365253686904907, + "step": 1482 + }, + { + "epoch": 0.341941434171086, + "grad_norm": 1.1923005853358424, + "learning_rate": 1.921231554870676e-06, + "loss": 0.4938368797302246, + "step": 1483 + }, + { + "epoch": 0.3421720083006687, + "grad_norm": 1.0865439416616147, + "learning_rate": 1.921083171697607e-06, + "loss": 0.5274159908294678, + "step": 1484 + }, + { + "epoch": 0.34240258243025135, + "grad_norm": 1.1913792015364102, + "learning_rate": 1.9209346546357637e-06, + "loss": 0.4720276892185211, + "step": 1485 + }, + { + "epoch": 0.342633156559834, + "grad_norm": 0.9383641214181552, + "learning_rate": 1.920786003706735e-06, + "loss": 0.42276352643966675, + "step": 1486 + }, + { + "epoch": 0.34286373068941667, + "grad_norm": 1.0581324959121157, + "learning_rate": 1.920637218932129e-06, + "loss": 0.5319294333457947, + "step": 1487 + }, + { + "epoch": 0.34309430481899933, + "grad_norm": 1.1819330354237378, + "learning_rate": 1.920488300333572e-06, + "loss": 0.5197560787200928, + "step": 1488 + }, + { + "epoch": 0.343324878948582, + "grad_norm": 1.5013538667422215, + "learning_rate": 1.9203392479327127e-06, + "loss": 0.550025463104248, + "step": 1489 + }, + { + "epoch": 0.34355545307816465, + "grad_norm": 1.0981284345294107, + "learning_rate": 1.920190061751216e-06, + "loss": 0.50255286693573, + "step": 1490 + }, + { + "epoch": 0.3437860272077473, + "grad_norm": 1.1895622589876538, + "learning_rate": 1.9200407418107678e-06, + "loss": 0.5952906608581543, + "step": 1491 + }, + { + "epoch": 0.34401660133733, + "grad_norm": 0.9421522918126589, + "learning_rate": 1.9198912881330737e-06, + "loss": 0.48161056637763977, + "step": 1492 + }, + { + "epoch": 0.34424717546691264, + "grad_norm": 1.177243819966174, + "learning_rate": 1.919741700739858e-06, + "loss": 0.5490972995758057, + "step": 1493 + }, + { + "epoch": 0.3444777495964953, + "grad_norm": 1.4788962836499655, + "learning_rate": 1.9195919796528647e-06, + "loss": 0.45651519298553467, + "step": 1494 + }, + { + "epoch": 0.34470832372607796, + "grad_norm": 1.2203060266370191, + "learning_rate": 1.919442124893857e-06, + "loss": 0.5318460464477539, + "step": 1495 + }, + { + "epoch": 0.3449388978556606, + "grad_norm": 1.0748079339537138, + "learning_rate": 1.9192921364846187e-06, + "loss": 0.5052516460418701, + "step": 1496 + }, + { + "epoch": 0.3451694719852433, + "grad_norm": 1.3171544150804408, + "learning_rate": 1.9191420144469515e-06, + "loss": 0.6653434038162231, + "step": 1497 + }, + { + "epoch": 0.34540004611482594, + "grad_norm": 0.962422061512943, + "learning_rate": 1.9189917588026774e-06, + "loss": 0.47182875871658325, + "step": 1498 + }, + { + "epoch": 0.3456306202444086, + "grad_norm": 1.0305251609345925, + "learning_rate": 1.9188413695736376e-06, + "loss": 0.5257801413536072, + "step": 1499 + }, + { + "epoch": 0.34586119437399127, + "grad_norm": 1.1090254531285808, + "learning_rate": 1.918690846781692e-06, + "loss": 0.565075695514679, + "step": 1500 + }, + { + "epoch": 0.3460917685035739, + "grad_norm": 1.1909717210416553, + "learning_rate": 1.9185401904487214e-06, + "loss": 0.49737876653671265, + "step": 1501 + }, + { + "epoch": 0.34632234263315653, + "grad_norm": 1.021716441788736, + "learning_rate": 1.918389400596625e-06, + "loss": 0.5136237144470215, + "step": 1502 + }, + { + "epoch": 0.3465529167627392, + "grad_norm": 1.011829912931323, + "learning_rate": 1.9182384772473216e-06, + "loss": 0.5122819542884827, + "step": 1503 + }, + { + "epoch": 0.34678349089232186, + "grad_norm": 1.1232586653417744, + "learning_rate": 1.91808742042275e-06, + "loss": 0.4586041271686554, + "step": 1504 + }, + { + "epoch": 0.3470140650219045, + "grad_norm": 1.0599756649712084, + "learning_rate": 1.9179362301448666e-06, + "loss": 0.49752146005630493, + "step": 1505 + }, + { + "epoch": 0.3472446391514872, + "grad_norm": 1.0110535685015802, + "learning_rate": 1.917784906435649e-06, + "loss": 0.4423530101776123, + "step": 1506 + }, + { + "epoch": 0.34747521328106984, + "grad_norm": 1.2828635133632034, + "learning_rate": 1.9176334493170946e-06, + "loss": 0.4979468882083893, + "step": 1507 + }, + { + "epoch": 0.3477057874106525, + "grad_norm": 1.0086748218378025, + "learning_rate": 1.9174818588112178e-06, + "loss": 0.5229524374008179, + "step": 1508 + }, + { + "epoch": 0.34793636154023516, + "grad_norm": 1.006104946386604, + "learning_rate": 1.9173301349400546e-06, + "loss": 0.47884654998779297, + "step": 1509 + }, + { + "epoch": 0.3481669356698178, + "grad_norm": 1.161430061405767, + "learning_rate": 1.9171782777256594e-06, + "loss": 0.5204922556877136, + "step": 1510 + }, + { + "epoch": 0.3483975097994005, + "grad_norm": 1.1268415177845295, + "learning_rate": 1.917026287190106e-06, + "loss": 0.5077674984931946, + "step": 1511 + }, + { + "epoch": 0.34862808392898315, + "grad_norm": 0.9750269271228661, + "learning_rate": 1.9168741633554885e-06, + "loss": 0.4171299934387207, + "step": 1512 + }, + { + "epoch": 0.3488586580585658, + "grad_norm": 1.065613083459404, + "learning_rate": 1.9167219062439187e-06, + "loss": 0.5228694081306458, + "step": 1513 + }, + { + "epoch": 0.34908923218814847, + "grad_norm": 1.188410464922724, + "learning_rate": 1.916569515877529e-06, + "loss": 0.5496635437011719, + "step": 1514 + }, + { + "epoch": 0.34931980631773113, + "grad_norm": 0.969674279609777, + "learning_rate": 1.9164169922784716e-06, + "loss": 0.5197573900222778, + "step": 1515 + }, + { + "epoch": 0.3495503804473138, + "grad_norm": 1.3265152215611398, + "learning_rate": 1.9162643354689163e-06, + "loss": 0.5726813077926636, + "step": 1516 + }, + { + "epoch": 0.34978095457689645, + "grad_norm": 1.0368094455843846, + "learning_rate": 1.916111545471054e-06, + "loss": 0.53382408618927, + "step": 1517 + }, + { + "epoch": 0.3500115287064791, + "grad_norm": 1.0676291023728657, + "learning_rate": 1.915958622307094e-06, + "loss": 0.5535515546798706, + "step": 1518 + }, + { + "epoch": 0.3502421028360618, + "grad_norm": 1.183098293067818, + "learning_rate": 1.9158055659992648e-06, + "loss": 0.5295307040214539, + "step": 1519 + }, + { + "epoch": 0.35047267696564444, + "grad_norm": 1.3231709310936663, + "learning_rate": 1.9156523765698158e-06, + "loss": 0.5397933125495911, + "step": 1520 + }, + { + "epoch": 0.3507032510952271, + "grad_norm": 1.217082341703879, + "learning_rate": 1.915499054041014e-06, + "loss": 0.5614666938781738, + "step": 1521 + }, + { + "epoch": 0.35093382522480976, + "grad_norm": 1.155125291987811, + "learning_rate": 1.915345598435146e-06, + "loss": 0.5321720838546753, + "step": 1522 + }, + { + "epoch": 0.3511643993543924, + "grad_norm": 1.172353935810673, + "learning_rate": 1.9151920097745185e-06, + "loss": 0.51869797706604, + "step": 1523 + }, + { + "epoch": 0.3513949734839751, + "grad_norm": 1.0936179296558388, + "learning_rate": 1.9150382880814577e-06, + "loss": 0.58238685131073, + "step": 1524 + }, + { + "epoch": 0.35162554761355774, + "grad_norm": 1.135142968184709, + "learning_rate": 1.914884433378308e-06, + "loss": 0.5617767572402954, + "step": 1525 + }, + { + "epoch": 0.3518561217431404, + "grad_norm": 0.9232400306777988, + "learning_rate": 1.9147304456874336e-06, + "loss": 0.5207428932189941, + "step": 1526 + }, + { + "epoch": 0.35208669587272307, + "grad_norm": 1.0829138732821308, + "learning_rate": 1.914576325031218e-06, + "loss": 0.5929840207099915, + "step": 1527 + }, + { + "epoch": 0.3523172700023057, + "grad_norm": 1.0372438860332964, + "learning_rate": 1.914422071432065e-06, + "loss": 0.510567307472229, + "step": 1528 + }, + { + "epoch": 0.3525478441318884, + "grad_norm": 1.2529291445912578, + "learning_rate": 1.914267684912397e-06, + "loss": 0.5524177551269531, + "step": 1529 + }, + { + "epoch": 0.35277841826147105, + "grad_norm": 1.0844290023080794, + "learning_rate": 1.9141131654946548e-06, + "loss": 0.5622289180755615, + "step": 1530 + }, + { + "epoch": 0.3530089923910537, + "grad_norm": 1.1655531028574153, + "learning_rate": 1.9139585132012995e-06, + "loss": 0.5085979700088501, + "step": 1531 + }, + { + "epoch": 0.35323956652063637, + "grad_norm": 1.0367412290626608, + "learning_rate": 1.9138037280548117e-06, + "loss": 0.47232770919799805, + "step": 1532 + }, + { + "epoch": 0.35347014065021903, + "grad_norm": 1.3584148636864177, + "learning_rate": 1.913648810077691e-06, + "loss": 0.535300612449646, + "step": 1533 + }, + { + "epoch": 0.3537007147798017, + "grad_norm": 1.1457507125445123, + "learning_rate": 1.9134937592924562e-06, + "loss": 0.4351940155029297, + "step": 1534 + }, + { + "epoch": 0.35393128890938436, + "grad_norm": 0.9891980196576595, + "learning_rate": 1.9133385757216456e-06, + "loss": 0.4691917896270752, + "step": 1535 + }, + { + "epoch": 0.354161863038967, + "grad_norm": 1.03905005054118, + "learning_rate": 1.9131832593878167e-06, + "loss": 0.4911034107208252, + "step": 1536 + }, + { + "epoch": 0.3543924371685497, + "grad_norm": 0.9599946260153974, + "learning_rate": 1.9130278103135458e-06, + "loss": 0.3954068422317505, + "step": 1537 + }, + { + "epoch": 0.35462301129813234, + "grad_norm": 1.2512488183212185, + "learning_rate": 1.9128722285214297e-06, + "loss": 0.5541605949401855, + "step": 1538 + }, + { + "epoch": 0.354853585427715, + "grad_norm": 1.2362059407886639, + "learning_rate": 1.9127165140340832e-06, + "loss": 0.5719314217567444, + "step": 1539 + }, + { + "epoch": 0.35508415955729766, + "grad_norm": 1.342530930822934, + "learning_rate": 1.9125606668741418e-06, + "loss": 0.60889732837677, + "step": 1540 + }, + { + "epoch": 0.3553147336868803, + "grad_norm": 1.2098741685807175, + "learning_rate": 1.9124046870642587e-06, + "loss": 0.5247465968132019, + "step": 1541 + }, + { + "epoch": 0.355545307816463, + "grad_norm": 1.3096766952611592, + "learning_rate": 1.912248574627107e-06, + "loss": 0.5681591033935547, + "step": 1542 + }, + { + "epoch": 0.35577588194604565, + "grad_norm": 1.0008372683888578, + "learning_rate": 1.91209232958538e-06, + "loss": 0.5995845794677734, + "step": 1543 + }, + { + "epoch": 0.3560064560756283, + "grad_norm": 1.0463229098086306, + "learning_rate": 1.9119359519617893e-06, + "loss": 0.514456033706665, + "step": 1544 + }, + { + "epoch": 0.35623703020521097, + "grad_norm": 1.0680000709528683, + "learning_rate": 1.9117794417790657e-06, + "loss": 0.45192602276802063, + "step": 1545 + }, + { + "epoch": 0.35646760433479363, + "grad_norm": 1.042670075197141, + "learning_rate": 1.911622799059959e-06, + "loss": 0.5529573559761047, + "step": 1546 + }, + { + "epoch": 0.3566981784643763, + "grad_norm": 1.2129822836493795, + "learning_rate": 1.9114660238272403e-06, + "loss": 0.4544152021408081, + "step": 1547 + }, + { + "epoch": 0.35692875259395895, + "grad_norm": 1.516629148023364, + "learning_rate": 1.9113091161036974e-06, + "loss": 0.5676225423812866, + "step": 1548 + }, + { + "epoch": 0.3571593267235416, + "grad_norm": 1.1320627323756525, + "learning_rate": 1.9111520759121384e-06, + "loss": 0.5571830868721008, + "step": 1549 + }, + { + "epoch": 0.3573899008531243, + "grad_norm": 1.1377531274302592, + "learning_rate": 1.910994903275391e-06, + "loss": 0.5091487765312195, + "step": 1550 + }, + { + "epoch": 0.35762047498270694, + "grad_norm": 1.107456889270875, + "learning_rate": 1.9108375982163015e-06, + "loss": 0.5484684705734253, + "step": 1551 + }, + { + "epoch": 0.3578510491122896, + "grad_norm": 1.261905478374622, + "learning_rate": 1.9106801607577364e-06, + "loss": 0.49742424488067627, + "step": 1552 + }, + { + "epoch": 0.35808162324187226, + "grad_norm": 1.2341261046425518, + "learning_rate": 1.9105225909225804e-06, + "loss": 0.5871520638465881, + "step": 1553 + }, + { + "epoch": 0.3583121973714549, + "grad_norm": 1.2329576492287886, + "learning_rate": 1.910364888733738e-06, + "loss": 0.5096076726913452, + "step": 1554 + }, + { + "epoch": 0.3585427715010376, + "grad_norm": 1.3375416968847058, + "learning_rate": 1.910207054214133e-06, + "loss": 0.7168693542480469, + "step": 1555 + }, + { + "epoch": 0.35877334563062024, + "grad_norm": 1.126707169388949, + "learning_rate": 1.910049087386707e-06, + "loss": 0.5603561997413635, + "step": 1556 + }, + { + "epoch": 0.3590039197602029, + "grad_norm": 1.299433383477777, + "learning_rate": 1.909890988274424e-06, + "loss": 0.5857734680175781, + "step": 1557 + }, + { + "epoch": 0.35923449388978557, + "grad_norm": 1.040543925807462, + "learning_rate": 1.9097327569002642e-06, + "loss": 0.5612708926200867, + "step": 1558 + }, + { + "epoch": 0.35946506801936823, + "grad_norm": 1.146949414139332, + "learning_rate": 1.909574393287228e-06, + "loss": 0.5264564752578735, + "step": 1559 + }, + { + "epoch": 0.3596956421489509, + "grad_norm": 0.9390137754415148, + "learning_rate": 1.9094158974583357e-06, + "loss": 0.4163395166397095, + "step": 1560 + }, + { + "epoch": 0.35992621627853355, + "grad_norm": 1.0884801214343747, + "learning_rate": 1.909257269436626e-06, + "loss": 0.483236163854599, + "step": 1561 + }, + { + "epoch": 0.3601567904081162, + "grad_norm": 1.0086049535834347, + "learning_rate": 1.9090985092451572e-06, + "loss": 0.48892003297805786, + "step": 1562 + }, + { + "epoch": 0.3603873645376989, + "grad_norm": 1.0090138133688373, + "learning_rate": 1.908939616907007e-06, + "loss": 0.45310860872268677, + "step": 1563 + }, + { + "epoch": 0.36061793866728153, + "grad_norm": 1.0130833457744266, + "learning_rate": 1.908780592445271e-06, + "loss": 0.5242425799369812, + "step": 1564 + }, + { + "epoch": 0.3608485127968642, + "grad_norm": 1.0425805251353624, + "learning_rate": 1.9086214358830663e-06, + "loss": 0.47026845812797546, + "step": 1565 + }, + { + "epoch": 0.36107908692644686, + "grad_norm": 1.2209406413770176, + "learning_rate": 1.9084621472435267e-06, + "loss": 0.5783924460411072, + "step": 1566 + }, + { + "epoch": 0.3613096610560295, + "grad_norm": 1.0139793238266448, + "learning_rate": 1.9083027265498073e-06, + "loss": 0.5534437894821167, + "step": 1567 + }, + { + "epoch": 0.3615402351856122, + "grad_norm": 1.27522834837266, + "learning_rate": 1.9081431738250815e-06, + "loss": 0.49131953716278076, + "step": 1568 + }, + { + "epoch": 0.36177080931519484, + "grad_norm": 1.0466765845853998, + "learning_rate": 1.9079834890925412e-06, + "loss": 0.4798020124435425, + "step": 1569 + }, + { + "epoch": 0.3620013834447775, + "grad_norm": 1.1201181573638213, + "learning_rate": 1.9078236723753987e-06, + "loss": 0.4928893446922302, + "step": 1570 + }, + { + "epoch": 0.36223195757436016, + "grad_norm": 0.884047440430311, + "learning_rate": 1.9076637236968847e-06, + "loss": 0.4483630657196045, + "step": 1571 + }, + { + "epoch": 0.3624625317039428, + "grad_norm": 1.0983581542959335, + "learning_rate": 1.90750364308025e-06, + "loss": 0.593490481376648, + "step": 1572 + }, + { + "epoch": 0.3626931058335255, + "grad_norm": 1.1430514811975505, + "learning_rate": 1.9073434305487631e-06, + "loss": 0.5944634675979614, + "step": 1573 + }, + { + "epoch": 0.36292367996310815, + "grad_norm": 1.003698560447405, + "learning_rate": 1.9071830861257134e-06, + "loss": 0.5010452270507812, + "step": 1574 + }, + { + "epoch": 0.3631542540926908, + "grad_norm": 1.0687566975761509, + "learning_rate": 1.9070226098344078e-06, + "loss": 0.5128473043441772, + "step": 1575 + }, + { + "epoch": 0.36338482822227347, + "grad_norm": 1.0854169038402666, + "learning_rate": 1.9068620016981733e-06, + "loss": 0.6256363987922668, + "step": 1576 + }, + { + "epoch": 0.36361540235185613, + "grad_norm": 1.0796360454107574, + "learning_rate": 1.9067012617403565e-06, + "loss": 0.5502322912216187, + "step": 1577 + }, + { + "epoch": 0.3638459764814388, + "grad_norm": 1.2842731628323776, + "learning_rate": 1.906540389984322e-06, + "loss": 0.5756800174713135, + "step": 1578 + }, + { + "epoch": 0.36407655061102145, + "grad_norm": 1.135643566986845, + "learning_rate": 1.9063793864534543e-06, + "loss": 0.5131359696388245, + "step": 1579 + }, + { + "epoch": 0.3643071247406041, + "grad_norm": 0.9714084254330834, + "learning_rate": 1.9062182511711567e-06, + "loss": 0.5776810646057129, + "step": 1580 + }, + { + "epoch": 0.3645376988701868, + "grad_norm": 1.0973639487789169, + "learning_rate": 1.9060569841608523e-06, + "loss": 0.49460822343826294, + "step": 1581 + }, + { + "epoch": 0.36476827299976944, + "grad_norm": 0.942012419923591, + "learning_rate": 1.9058955854459823e-06, + "loss": 0.5031022429466248, + "step": 1582 + }, + { + "epoch": 0.3649988471293521, + "grad_norm": 1.2106661637014209, + "learning_rate": 1.9057340550500082e-06, + "loss": 0.4957816004753113, + "step": 1583 + }, + { + "epoch": 0.36522942125893476, + "grad_norm": 0.9363710565312526, + "learning_rate": 1.9055723929964102e-06, + "loss": 0.47861093282699585, + "step": 1584 + }, + { + "epoch": 0.3654599953885174, + "grad_norm": 1.027272725701274, + "learning_rate": 1.9054105993086868e-06, + "loss": 0.44517919421195984, + "step": 1585 + }, + { + "epoch": 0.3656905695181001, + "grad_norm": 1.1724343492985738, + "learning_rate": 1.9052486740103568e-06, + "loss": 0.46661484241485596, + "step": 1586 + }, + { + "epoch": 0.36592114364768275, + "grad_norm": 0.9788001147307338, + "learning_rate": 1.9050866171249575e-06, + "loss": 0.517694890499115, + "step": 1587 + }, + { + "epoch": 0.3661517177772654, + "grad_norm": 1.1284193922698917, + "learning_rate": 1.904924428676046e-06, + "loss": 0.49465644359588623, + "step": 1588 + }, + { + "epoch": 0.36638229190684807, + "grad_norm": 1.0036913999315975, + "learning_rate": 1.9047621086871971e-06, + "loss": 0.41830652952194214, + "step": 1589 + }, + { + "epoch": 0.36661286603643073, + "grad_norm": 1.1944977036427056, + "learning_rate": 1.9045996571820067e-06, + "loss": 0.5540663003921509, + "step": 1590 + }, + { + "epoch": 0.3668434401660134, + "grad_norm": 1.072580109375711, + "learning_rate": 1.9044370741840882e-06, + "loss": 0.5619527101516724, + "step": 1591 + }, + { + "epoch": 0.36707401429559605, + "grad_norm": 1.1509533440805209, + "learning_rate": 1.9042743597170746e-06, + "loss": 0.5086055994033813, + "step": 1592 + }, + { + "epoch": 0.3673045884251787, + "grad_norm": 1.050425223739088, + "learning_rate": 1.9041115138046183e-06, + "loss": 0.5839927196502686, + "step": 1593 + }, + { + "epoch": 0.3675351625547614, + "grad_norm": 1.0464789939377692, + "learning_rate": 1.9039485364703904e-06, + "loss": 0.508616030216217, + "step": 1594 + }, + { + "epoch": 0.36776573668434404, + "grad_norm": 1.15877506638183, + "learning_rate": 1.903785427738082e-06, + "loss": 0.46514832973480225, + "step": 1595 + }, + { + "epoch": 0.3679963108139267, + "grad_norm": 1.525284603977575, + "learning_rate": 1.9036221876314016e-06, + "loss": 0.42142176628112793, + "step": 1596 + }, + { + "epoch": 0.36822688494350936, + "grad_norm": 1.3114380851226077, + "learning_rate": 1.9034588161740786e-06, + "loss": 0.42195791006088257, + "step": 1597 + }, + { + "epoch": 0.368457459073092, + "grad_norm": 1.0276642661247686, + "learning_rate": 1.9032953133898601e-06, + "loss": 0.46705931425094604, + "step": 1598 + }, + { + "epoch": 0.3686880332026747, + "grad_norm": 1.1002100436754347, + "learning_rate": 1.9031316793025134e-06, + "loss": 0.4741164743900299, + "step": 1599 + }, + { + "epoch": 0.36891860733225734, + "grad_norm": 1.269728601723268, + "learning_rate": 1.902967913935824e-06, + "loss": 0.49730339646339417, + "step": 1600 + }, + { + "epoch": 0.36914918146184, + "grad_norm": 0.9594474153361355, + "learning_rate": 1.902804017313597e-06, + "loss": 0.47678127884864807, + "step": 1601 + }, + { + "epoch": 0.36937975559142266, + "grad_norm": 1.1964394586929104, + "learning_rate": 1.9026399894596565e-06, + "loss": 0.4954279661178589, + "step": 1602 + }, + { + "epoch": 0.3696103297210053, + "grad_norm": 0.9685506818723637, + "learning_rate": 1.9024758303978456e-06, + "loss": 0.5115381479263306, + "step": 1603 + }, + { + "epoch": 0.369840903850588, + "grad_norm": 1.0632901548704432, + "learning_rate": 1.9023115401520264e-06, + "loss": 0.6147117614746094, + "step": 1604 + }, + { + "epoch": 0.37007147798017065, + "grad_norm": 1.4566806194426465, + "learning_rate": 1.9021471187460802e-06, + "loss": 0.5334371328353882, + "step": 1605 + }, + { + "epoch": 0.3703020521097533, + "grad_norm": 1.2820059739478686, + "learning_rate": 1.9019825662039073e-06, + "loss": 0.4702361226081848, + "step": 1606 + }, + { + "epoch": 0.37053262623933597, + "grad_norm": 1.1889012346736458, + "learning_rate": 1.901817882549427e-06, + "loss": 0.5049586892127991, + "step": 1607 + }, + { + "epoch": 0.37076320036891863, + "grad_norm": 1.2055092488358514, + "learning_rate": 1.901653067806578e-06, + "loss": 0.5063170194625854, + "step": 1608 + }, + { + "epoch": 0.3709937744985013, + "grad_norm": 1.1599393359430212, + "learning_rate": 1.9014881219993175e-06, + "loss": 0.540824294090271, + "step": 1609 + }, + { + "epoch": 0.37122434862808396, + "grad_norm": 1.372148291928607, + "learning_rate": 1.901323045151622e-06, + "loss": 0.4744170904159546, + "step": 1610 + }, + { + "epoch": 0.3714549227576666, + "grad_norm": 1.2144026597364277, + "learning_rate": 1.9011578372874876e-06, + "loss": 0.5090929269790649, + "step": 1611 + }, + { + "epoch": 0.3716854968872493, + "grad_norm": 1.0610635938586983, + "learning_rate": 1.9009924984309284e-06, + "loss": 0.3886772394180298, + "step": 1612 + }, + { + "epoch": 0.3719160710168319, + "grad_norm": 1.1192663585328575, + "learning_rate": 1.9008270286059782e-06, + "loss": 0.4976482391357422, + "step": 1613 + }, + { + "epoch": 0.37214664514641455, + "grad_norm": 1.0577168176218985, + "learning_rate": 1.9006614278366898e-06, + "loss": 0.4629209041595459, + "step": 1614 + }, + { + "epoch": 0.3723772192759972, + "grad_norm": 1.0381238100092287, + "learning_rate": 1.9004956961471352e-06, + "loss": 0.49334412813186646, + "step": 1615 + }, + { + "epoch": 0.37260779340557987, + "grad_norm": 1.2336018114177745, + "learning_rate": 1.9003298335614047e-06, + "loss": 0.614592432975769, + "step": 1616 + }, + { + "epoch": 0.37283836753516253, + "grad_norm": 0.9895019344615126, + "learning_rate": 1.9001638401036082e-06, + "loss": 0.5339843034744263, + "step": 1617 + }, + { + "epoch": 0.3730689416647452, + "grad_norm": 0.9743667038154072, + "learning_rate": 1.8999977157978749e-06, + "loss": 0.5516937375068665, + "step": 1618 + }, + { + "epoch": 0.37329951579432785, + "grad_norm": 1.2149293301312265, + "learning_rate": 1.8998314606683522e-06, + "loss": 0.5034124255180359, + "step": 1619 + }, + { + "epoch": 0.3735300899239105, + "grad_norm": 0.9412969527830801, + "learning_rate": 1.8996650747392073e-06, + "loss": 0.49766790866851807, + "step": 1620 + }, + { + "epoch": 0.3737606640534932, + "grad_norm": 1.1063112007683722, + "learning_rate": 1.899498558034626e-06, + "loss": 0.6662446856498718, + "step": 1621 + }, + { + "epoch": 0.37399123818307584, + "grad_norm": 1.3692241861945424, + "learning_rate": 1.8993319105788129e-06, + "loss": 0.5416747331619263, + "step": 1622 + }, + { + "epoch": 0.3742218123126585, + "grad_norm": 1.2377768970666951, + "learning_rate": 1.8991651323959922e-06, + "loss": 0.5137313604354858, + "step": 1623 + }, + { + "epoch": 0.37445238644224116, + "grad_norm": 1.0509326993065755, + "learning_rate": 1.8989982235104072e-06, + "loss": 0.566002607345581, + "step": 1624 + }, + { + "epoch": 0.3746829605718238, + "grad_norm": 1.314391237074608, + "learning_rate": 1.8988311839463188e-06, + "loss": 0.5201380252838135, + "step": 1625 + }, + { + "epoch": 0.3749135347014065, + "grad_norm": 1.2844709164103703, + "learning_rate": 1.8986640137280087e-06, + "loss": 0.5103918313980103, + "step": 1626 + }, + { + "epoch": 0.37514410883098914, + "grad_norm": 1.081063959726764, + "learning_rate": 1.8984967128797763e-06, + "loss": 0.47900843620300293, + "step": 1627 + }, + { + "epoch": 0.3753746829605718, + "grad_norm": 1.0524739811683044, + "learning_rate": 1.898329281425941e-06, + "loss": 0.42991960048675537, + "step": 1628 + }, + { + "epoch": 0.37560525709015447, + "grad_norm": 1.2087969734027784, + "learning_rate": 1.89816171939084e-06, + "loss": 0.5707317590713501, + "step": 1629 + }, + { + "epoch": 0.3758358312197371, + "grad_norm": 1.0714171850017424, + "learning_rate": 1.8979940267988309e-06, + "loss": 0.565521240234375, + "step": 1630 + }, + { + "epoch": 0.3760664053493198, + "grad_norm": 1.2721353238917528, + "learning_rate": 1.8978262036742888e-06, + "loss": 0.6584400534629822, + "step": 1631 + }, + { + "epoch": 0.37629697947890245, + "grad_norm": 1.1181726564305359, + "learning_rate": 1.897658250041609e-06, + "loss": 0.4749317169189453, + "step": 1632 + }, + { + "epoch": 0.3765275536084851, + "grad_norm": 1.3732616000652873, + "learning_rate": 1.8974901659252048e-06, + "loss": 0.5495604872703552, + "step": 1633 + }, + { + "epoch": 0.37675812773806777, + "grad_norm": 1.6408199477459455, + "learning_rate": 1.8973219513495094e-06, + "loss": 0.465708464384079, + "step": 1634 + }, + { + "epoch": 0.37698870186765043, + "grad_norm": 1.1887777428919946, + "learning_rate": 1.8971536063389742e-06, + "loss": 0.4599069058895111, + "step": 1635 + }, + { + "epoch": 0.3772192759972331, + "grad_norm": 1.1348638946303797, + "learning_rate": 1.89698513091807e-06, + "loss": 0.4716145694255829, + "step": 1636 + }, + { + "epoch": 0.37744985012681576, + "grad_norm": 0.990973234996169, + "learning_rate": 1.8968165251112863e-06, + "loss": 0.594079852104187, + "step": 1637 + }, + { + "epoch": 0.3776804242563984, + "grad_norm": 1.3300173886007076, + "learning_rate": 1.8966477889431317e-06, + "loss": 0.4588915705680847, + "step": 1638 + }, + { + "epoch": 0.3779109983859811, + "grad_norm": 1.5111913527277292, + "learning_rate": 1.8964789224381337e-06, + "loss": 0.5236901044845581, + "step": 1639 + }, + { + "epoch": 0.37814157251556374, + "grad_norm": 1.067104402214014, + "learning_rate": 1.8963099256208388e-06, + "loss": 0.4954737424850464, + "step": 1640 + }, + { + "epoch": 0.3783721466451464, + "grad_norm": 1.066408318154628, + "learning_rate": 1.8961407985158125e-06, + "loss": 0.4194701910018921, + "step": 1641 + }, + { + "epoch": 0.37860272077472906, + "grad_norm": 0.9999478144515371, + "learning_rate": 1.8959715411476388e-06, + "loss": 0.5368303060531616, + "step": 1642 + }, + { + "epoch": 0.3788332949043117, + "grad_norm": 1.2178837934755509, + "learning_rate": 1.8958021535409214e-06, + "loss": 0.5181677341461182, + "step": 1643 + }, + { + "epoch": 0.3790638690338944, + "grad_norm": 1.0342390187480546, + "learning_rate": 1.8956326357202821e-06, + "loss": 0.4755169749259949, + "step": 1644 + }, + { + "epoch": 0.37929444316347705, + "grad_norm": 1.1097461588236448, + "learning_rate": 1.8954629877103625e-06, + "loss": 0.5460895299911499, + "step": 1645 + }, + { + "epoch": 0.3795250172930597, + "grad_norm": 1.090972908814234, + "learning_rate": 1.8952932095358224e-06, + "loss": 0.47811684012413025, + "step": 1646 + }, + { + "epoch": 0.37975559142264237, + "grad_norm": 1.1794844360929688, + "learning_rate": 1.8951233012213405e-06, + "loss": 0.5791733860969543, + "step": 1647 + }, + { + "epoch": 0.37998616555222503, + "grad_norm": 1.1163036430533217, + "learning_rate": 1.8949532627916151e-06, + "loss": 0.4996911585330963, + "step": 1648 + }, + { + "epoch": 0.3802167396818077, + "grad_norm": 1.3190959058791496, + "learning_rate": 1.8947830942713628e-06, + "loss": 0.6108353137969971, + "step": 1649 + }, + { + "epoch": 0.38044731381139035, + "grad_norm": 1.2084081721604487, + "learning_rate": 1.8946127956853195e-06, + "loss": 0.5303040742874146, + "step": 1650 + }, + { + "epoch": 0.380677887940973, + "grad_norm": 1.0581391679258725, + "learning_rate": 1.8944423670582397e-06, + "loss": 0.4651896357536316, + "step": 1651 + }, + { + "epoch": 0.3809084620705557, + "grad_norm": 1.1464415021916683, + "learning_rate": 1.8942718084148969e-06, + "loss": 0.6321637630462646, + "step": 1652 + }, + { + "epoch": 0.38113903620013834, + "grad_norm": 1.1535120052175352, + "learning_rate": 1.8941011197800836e-06, + "loss": 0.5124787092208862, + "step": 1653 + }, + { + "epoch": 0.381369610329721, + "grad_norm": 1.2712538370269149, + "learning_rate": 1.893930301178611e-06, + "loss": 0.5779180526733398, + "step": 1654 + }, + { + "epoch": 0.38160018445930366, + "grad_norm": 1.2579128550158534, + "learning_rate": 1.8937593526353096e-06, + "loss": 0.5723867416381836, + "step": 1655 + }, + { + "epoch": 0.3818307585888863, + "grad_norm": 1.0216965854263103, + "learning_rate": 1.8935882741750281e-06, + "loss": 0.4312398433685303, + "step": 1656 + }, + { + "epoch": 0.382061332718469, + "grad_norm": 1.7195703110538068, + "learning_rate": 1.893417065822635e-06, + "loss": 0.6503756046295166, + "step": 1657 + }, + { + "epoch": 0.38229190684805164, + "grad_norm": 1.2691180997694498, + "learning_rate": 1.8932457276030166e-06, + "loss": 0.508478045463562, + "step": 1658 + }, + { + "epoch": 0.3825224809776343, + "grad_norm": 0.9328619594784499, + "learning_rate": 1.8930742595410792e-06, + "loss": 0.46552446484565735, + "step": 1659 + }, + { + "epoch": 0.38275305510721697, + "grad_norm": 0.983497277362264, + "learning_rate": 1.8929026616617467e-06, + "loss": 0.4739278256893158, + "step": 1660 + }, + { + "epoch": 0.3829836292367996, + "grad_norm": 1.2642164913655083, + "learning_rate": 1.8927309339899634e-06, + "loss": 0.5584233403205872, + "step": 1661 + }, + { + "epoch": 0.3832142033663823, + "grad_norm": 1.0681648876128738, + "learning_rate": 1.8925590765506911e-06, + "loss": 0.6155074238777161, + "step": 1662 + }, + { + "epoch": 0.38344477749596495, + "grad_norm": 1.1479148469369402, + "learning_rate": 1.8923870893689112e-06, + "loss": 0.5253106951713562, + "step": 1663 + }, + { + "epoch": 0.3836753516255476, + "grad_norm": 1.2179992400932398, + "learning_rate": 1.8922149724696238e-06, + "loss": 0.4190565347671509, + "step": 1664 + }, + { + "epoch": 0.3839059257551303, + "grad_norm": 1.124098215736467, + "learning_rate": 1.892042725877848e-06, + "loss": 0.5263853073120117, + "step": 1665 + }, + { + "epoch": 0.38413649988471293, + "grad_norm": 1.0385777204325046, + "learning_rate": 1.8918703496186214e-06, + "loss": 0.4492432773113251, + "step": 1666 + }, + { + "epoch": 0.3843670740142956, + "grad_norm": 1.3356308613758272, + "learning_rate": 1.8916978437170004e-06, + "loss": 0.49745023250579834, + "step": 1667 + }, + { + "epoch": 0.38459764814387826, + "grad_norm": 1.2023114319635457, + "learning_rate": 1.891525208198061e-06, + "loss": 0.6003707647323608, + "step": 1668 + }, + { + "epoch": 0.3848282222734609, + "grad_norm": 1.6371184982518272, + "learning_rate": 1.8913524430868973e-06, + "loss": 0.5430049300193787, + "step": 1669 + }, + { + "epoch": 0.3850587964030436, + "grad_norm": 1.0715049923324578, + "learning_rate": 1.8911795484086222e-06, + "loss": 0.5561289191246033, + "step": 1670 + }, + { + "epoch": 0.38528937053262624, + "grad_norm": 1.1416350409171048, + "learning_rate": 1.8910065241883678e-06, + "loss": 0.5488184690475464, + "step": 1671 + }, + { + "epoch": 0.3855199446622089, + "grad_norm": 1.0082475661815067, + "learning_rate": 1.890833370451285e-06, + "loss": 0.46347010135650635, + "step": 1672 + }, + { + "epoch": 0.38575051879179156, + "grad_norm": 1.0668592703569681, + "learning_rate": 1.8906600872225438e-06, + "loss": 0.553687334060669, + "step": 1673 + }, + { + "epoch": 0.3859810929213742, + "grad_norm": 1.1035800532005071, + "learning_rate": 1.8904866745273323e-06, + "loss": 0.46162208914756775, + "step": 1674 + }, + { + "epoch": 0.3862116670509569, + "grad_norm": 1.076914158561248, + "learning_rate": 1.8903131323908576e-06, + "loss": 0.4478996992111206, + "step": 1675 + }, + { + "epoch": 0.38644224118053955, + "grad_norm": 1.1488135535707533, + "learning_rate": 1.8901394608383463e-06, + "loss": 0.5857031345367432, + "step": 1676 + }, + { + "epoch": 0.3866728153101222, + "grad_norm": 1.5929334393746841, + "learning_rate": 1.8899656598950432e-06, + "loss": 0.592833399772644, + "step": 1677 + }, + { + "epoch": 0.38690338943970487, + "grad_norm": 1.0232228390237461, + "learning_rate": 1.8897917295862117e-06, + "loss": 0.6007786989212036, + "step": 1678 + }, + { + "epoch": 0.38713396356928753, + "grad_norm": 1.109869111259485, + "learning_rate": 1.8896176699371343e-06, + "loss": 0.5248164534568787, + "step": 1679 + }, + { + "epoch": 0.3873645376988702, + "grad_norm": 0.856016560201164, + "learning_rate": 1.8894434809731128e-06, + "loss": 0.43112409114837646, + "step": 1680 + }, + { + "epoch": 0.38759511182845285, + "grad_norm": 1.318795823918729, + "learning_rate": 1.8892691627194673e-06, + "loss": 0.56545090675354, + "step": 1681 + }, + { + "epoch": 0.3878256859580355, + "grad_norm": 1.1470159881146635, + "learning_rate": 1.8890947152015363e-06, + "loss": 0.6287904977798462, + "step": 1682 + }, + { + "epoch": 0.3880562600876182, + "grad_norm": 1.155806897456587, + "learning_rate": 1.8889201384446775e-06, + "loss": 0.48461633920669556, + "step": 1683 + }, + { + "epoch": 0.38828683421720084, + "grad_norm": 1.2251476021613918, + "learning_rate": 1.888745432474268e-06, + "loss": 0.5089331865310669, + "step": 1684 + }, + { + "epoch": 0.3885174083467835, + "grad_norm": 0.9661641286318025, + "learning_rate": 1.8885705973157027e-06, + "loss": 0.4805281162261963, + "step": 1685 + }, + { + "epoch": 0.38874798247636616, + "grad_norm": 1.070887780603473, + "learning_rate": 1.8883956329943955e-06, + "loss": 0.5243096947669983, + "step": 1686 + }, + { + "epoch": 0.3889785566059488, + "grad_norm": 1.240979728566986, + "learning_rate": 1.8882205395357795e-06, + "loss": 0.5808781981468201, + "step": 1687 + }, + { + "epoch": 0.3892091307355315, + "grad_norm": 1.2574299318006046, + "learning_rate": 1.8880453169653063e-06, + "loss": 0.5397018194198608, + "step": 1688 + }, + { + "epoch": 0.38943970486511414, + "grad_norm": 1.182945649827907, + "learning_rate": 1.8878699653084462e-06, + "loss": 0.4475638270378113, + "step": 1689 + }, + { + "epoch": 0.3896702789946968, + "grad_norm": 1.3095447574792232, + "learning_rate": 1.8876944845906884e-06, + "loss": 0.6212958693504333, + "step": 1690 + }, + { + "epoch": 0.38990085312427947, + "grad_norm": 1.1726349359481907, + "learning_rate": 1.8875188748375407e-06, + "loss": 0.44465404748916626, + "step": 1691 + }, + { + "epoch": 0.39013142725386213, + "grad_norm": 1.2650698772045321, + "learning_rate": 1.8873431360745297e-06, + "loss": 0.5711641311645508, + "step": 1692 + }, + { + "epoch": 0.3903620013834448, + "grad_norm": 1.2039233000565408, + "learning_rate": 1.8871672683272012e-06, + "loss": 0.4527866244316101, + "step": 1693 + }, + { + "epoch": 0.39059257551302745, + "grad_norm": 1.515756125658867, + "learning_rate": 1.8869912716211188e-06, + "loss": 0.6242899894714355, + "step": 1694 + }, + { + "epoch": 0.3908231496426101, + "grad_norm": 1.6198907712835393, + "learning_rate": 1.8868151459818656e-06, + "loss": 0.6294416189193726, + "step": 1695 + }, + { + "epoch": 0.3910537237721928, + "grad_norm": 1.2238875456694314, + "learning_rate": 1.8866388914350435e-06, + "loss": 0.49869638681411743, + "step": 1696 + }, + { + "epoch": 0.39128429790177544, + "grad_norm": 1.1755814842525432, + "learning_rate": 1.886462508006273e-06, + "loss": 0.5456752777099609, + "step": 1697 + }, + { + "epoch": 0.3915148720313581, + "grad_norm": 1.0114016306766007, + "learning_rate": 1.8862859957211926e-06, + "loss": 0.4197172224521637, + "step": 1698 + }, + { + "epoch": 0.39174544616094076, + "grad_norm": 1.0278658872450297, + "learning_rate": 1.8861093546054603e-06, + "loss": 0.5012276768684387, + "step": 1699 + }, + { + "epoch": 0.3919760202905234, + "grad_norm": 1.2065880303446173, + "learning_rate": 1.8859325846847531e-06, + "loss": 0.48108845949172974, + "step": 1700 + }, + { + "epoch": 0.3922065944201061, + "grad_norm": 1.1190986847477769, + "learning_rate": 1.885755685984766e-06, + "loss": 0.48592355847358704, + "step": 1701 + }, + { + "epoch": 0.39243716854968874, + "grad_norm": 1.136053467553038, + "learning_rate": 1.8855786585312132e-06, + "loss": 0.5744791030883789, + "step": 1702 + }, + { + "epoch": 0.3926677426792714, + "grad_norm": 1.1435558229801501, + "learning_rate": 1.8854015023498273e-06, + "loss": 0.5378769040107727, + "step": 1703 + }, + { + "epoch": 0.39289831680885406, + "grad_norm": 1.0710678493453967, + "learning_rate": 1.8852242174663594e-06, + "loss": 0.5630123615264893, + "step": 1704 + }, + { + "epoch": 0.3931288909384367, + "grad_norm": 1.0913466409725974, + "learning_rate": 1.8850468039065806e-06, + "loss": 0.5247849225997925, + "step": 1705 + }, + { + "epoch": 0.3933594650680194, + "grad_norm": 1.282307381217427, + "learning_rate": 1.884869261696279e-06, + "loss": 0.5679286122322083, + "step": 1706 + }, + { + "epoch": 0.39359003919760205, + "grad_norm": 1.0140902583392881, + "learning_rate": 1.8846915908612622e-06, + "loss": 0.4505179524421692, + "step": 1707 + }, + { + "epoch": 0.3938206133271847, + "grad_norm": 1.233342858229108, + "learning_rate": 1.8845137914273566e-06, + "loss": 0.6077077388763428, + "step": 1708 + }, + { + "epoch": 0.39405118745676737, + "grad_norm": 1.1523756442286543, + "learning_rate": 1.8843358634204069e-06, + "loss": 0.4703037738800049, + "step": 1709 + }, + { + "epoch": 0.39428176158635003, + "grad_norm": 1.3467147447696661, + "learning_rate": 1.8841578068662773e-06, + "loss": 0.6085091829299927, + "step": 1710 + }, + { + "epoch": 0.3945123357159327, + "grad_norm": 1.3769264461225226, + "learning_rate": 1.8839796217908498e-06, + "loss": 0.6075730919837952, + "step": 1711 + }, + { + "epoch": 0.39474290984551536, + "grad_norm": 1.4068518720273175, + "learning_rate": 1.8838013082200252e-06, + "loss": 0.581851601600647, + "step": 1712 + }, + { + "epoch": 0.394973483975098, + "grad_norm": 0.9365976129961602, + "learning_rate": 1.8836228661797234e-06, + "loss": 0.555284857749939, + "step": 1713 + }, + { + "epoch": 0.3952040581046807, + "grad_norm": 1.205134330479215, + "learning_rate": 1.8834442956958832e-06, + "loss": 0.5342675447463989, + "step": 1714 + }, + { + "epoch": 0.39543463223426334, + "grad_norm": 1.2329889286532099, + "learning_rate": 1.8832655967944605e-06, + "loss": 0.47501081228256226, + "step": 1715 + }, + { + "epoch": 0.395665206363846, + "grad_norm": 1.1350943426800137, + "learning_rate": 1.8830867695014323e-06, + "loss": 0.592293918132782, + "step": 1716 + }, + { + "epoch": 0.39589578049342866, + "grad_norm": 1.2591938264724012, + "learning_rate": 1.8829078138427921e-06, + "loss": 0.5903242826461792, + "step": 1717 + }, + { + "epoch": 0.3961263546230113, + "grad_norm": 1.203385992389072, + "learning_rate": 1.882728729844553e-06, + "loss": 0.5292568206787109, + "step": 1718 + }, + { + "epoch": 0.396356928752594, + "grad_norm": 1.070652075724697, + "learning_rate": 1.8825495175327468e-06, + "loss": 0.5748786926269531, + "step": 1719 + }, + { + "epoch": 0.39658750288217665, + "grad_norm": 1.230421737483, + "learning_rate": 1.8823701769334242e-06, + "loss": 0.6191601753234863, + "step": 1720 + }, + { + "epoch": 0.3968180770117593, + "grad_norm": 1.180452919869617, + "learning_rate": 1.8821907080726535e-06, + "loss": 0.5569231510162354, + "step": 1721 + }, + { + "epoch": 0.39704865114134197, + "grad_norm": 1.291275382361216, + "learning_rate": 1.882011110976523e-06, + "loss": 0.5103349089622498, + "step": 1722 + }, + { + "epoch": 0.39727922527092463, + "grad_norm": 1.1952555855906501, + "learning_rate": 1.8818313856711382e-06, + "loss": 0.4981175363063812, + "step": 1723 + }, + { + "epoch": 0.39750979940050724, + "grad_norm": 1.5157833486690673, + "learning_rate": 1.8816515321826248e-06, + "loss": 0.5429514050483704, + "step": 1724 + }, + { + "epoch": 0.3977403735300899, + "grad_norm": 1.1377768164918185, + "learning_rate": 1.8814715505371254e-06, + "loss": 0.5318386554718018, + "step": 1725 + }, + { + "epoch": 0.39797094765967256, + "grad_norm": 1.0451576127270763, + "learning_rate": 1.881291440760803e-06, + "loss": 0.47451460361480713, + "step": 1726 + }, + { + "epoch": 0.3982015217892552, + "grad_norm": 1.2815255131055066, + "learning_rate": 1.8811112028798384e-06, + "loss": 0.5141372680664062, + "step": 1727 + }, + { + "epoch": 0.3984320959188379, + "grad_norm": 1.0864089006893662, + "learning_rate": 1.8809308369204302e-06, + "loss": 0.4950217008590698, + "step": 1728 + }, + { + "epoch": 0.39866267004842054, + "grad_norm": 0.9530925154379366, + "learning_rate": 1.880750342908797e-06, + "loss": 0.4961693286895752, + "step": 1729 + }, + { + "epoch": 0.3988932441780032, + "grad_norm": 1.1860643451162984, + "learning_rate": 1.8805697208711752e-06, + "loss": 0.43443650007247925, + "step": 1730 + }, + { + "epoch": 0.39912381830758586, + "grad_norm": 1.1332453377909741, + "learning_rate": 1.8803889708338203e-06, + "loss": 0.6116896867752075, + "step": 1731 + }, + { + "epoch": 0.3993543924371685, + "grad_norm": 0.9403622624868753, + "learning_rate": 1.8802080928230062e-06, + "loss": 0.46244728565216064, + "step": 1732 + }, + { + "epoch": 0.3995849665667512, + "grad_norm": 1.3180964068285155, + "learning_rate": 1.880027086865025e-06, + "loss": 0.5728162527084351, + "step": 1733 + }, + { + "epoch": 0.39981554069633385, + "grad_norm": 1.1310284579414278, + "learning_rate": 1.8798459529861876e-06, + "loss": 0.4472135901451111, + "step": 1734 + }, + { + "epoch": 0.4000461148259165, + "grad_norm": 1.4100215542732757, + "learning_rate": 1.8796646912128246e-06, + "loss": 0.5862090587615967, + "step": 1735 + }, + { + "epoch": 0.40027668895549917, + "grad_norm": 1.428537555998266, + "learning_rate": 1.8794833015712831e-06, + "loss": 0.6406301259994507, + "step": 1736 + }, + { + "epoch": 0.40050726308508183, + "grad_norm": 1.3320783455965834, + "learning_rate": 1.8793017840879306e-06, + "loss": 0.5865743160247803, + "step": 1737 + }, + { + "epoch": 0.4007378372146645, + "grad_norm": 1.2736301947050057, + "learning_rate": 1.8791201387891524e-06, + "loss": 0.5521814823150635, + "step": 1738 + }, + { + "epoch": 0.40096841134424716, + "grad_norm": 0.9710129928143749, + "learning_rate": 1.8789383657013522e-06, + "loss": 0.40027791261672974, + "step": 1739 + }, + { + "epoch": 0.4011989854738298, + "grad_norm": 1.213730124395359, + "learning_rate": 1.8787564648509528e-06, + "loss": 0.5594751238822937, + "step": 1740 + }, + { + "epoch": 0.4014295596034125, + "grad_norm": 1.2077878384788876, + "learning_rate": 1.8785744362643955e-06, + "loss": 0.5029730796813965, + "step": 1741 + }, + { + "epoch": 0.40166013373299514, + "grad_norm": 1.086599940670418, + "learning_rate": 1.8783922799681397e-06, + "loss": 0.6089034676551819, + "step": 1742 + }, + { + "epoch": 0.4018907078625778, + "grad_norm": 1.178028157014987, + "learning_rate": 1.8782099959886639e-06, + "loss": 0.5238372683525085, + "step": 1743 + }, + { + "epoch": 0.40212128199216046, + "grad_norm": 1.0430681899893623, + "learning_rate": 1.8780275843524643e-06, + "loss": 0.47281232476234436, + "step": 1744 + }, + { + "epoch": 0.4023518561217431, + "grad_norm": 1.0603667709126336, + "learning_rate": 1.8778450450860571e-06, + "loss": 0.44885876774787903, + "step": 1745 + }, + { + "epoch": 0.4025824302513258, + "grad_norm": 1.1187549409367323, + "learning_rate": 1.8776623782159762e-06, + "loss": 0.5915139317512512, + "step": 1746 + }, + { + "epoch": 0.40281300438090845, + "grad_norm": 1.6743224234561098, + "learning_rate": 1.8774795837687736e-06, + "loss": 0.49341484904289246, + "step": 1747 + }, + { + "epoch": 0.4030435785104911, + "grad_norm": 1.1133076324661322, + "learning_rate": 1.8772966617710205e-06, + "loss": 0.43253493309020996, + "step": 1748 + }, + { + "epoch": 0.40327415264007377, + "grad_norm": 1.2596810310862556, + "learning_rate": 1.8771136122493064e-06, + "loss": 0.48660045862197876, + "step": 1749 + }, + { + "epoch": 0.40350472676965643, + "grad_norm": 1.158836920018239, + "learning_rate": 1.8769304352302396e-06, + "loss": 0.4493838846683502, + "step": 1750 + }, + { + "epoch": 0.4037353008992391, + "grad_norm": 1.1033409495303377, + "learning_rate": 1.8767471307404464e-06, + "loss": 0.5656435489654541, + "step": 1751 + }, + { + "epoch": 0.40396587502882175, + "grad_norm": 1.1945430976561655, + "learning_rate": 1.876563698806572e-06, + "loss": 0.48047327995300293, + "step": 1752 + }, + { + "epoch": 0.4041964491584044, + "grad_norm": 1.117811372759575, + "learning_rate": 1.8763801394552806e-06, + "loss": 0.5314204692840576, + "step": 1753 + }, + { + "epoch": 0.4044270232879871, + "grad_norm": 1.212293607312766, + "learning_rate": 1.876196452713254e-06, + "loss": 0.5436627864837646, + "step": 1754 + }, + { + "epoch": 0.40465759741756974, + "grad_norm": 1.1748084841171984, + "learning_rate": 1.8760126386071933e-06, + "loss": 0.5383991599082947, + "step": 1755 + }, + { + "epoch": 0.4048881715471524, + "grad_norm": 1.1737559222863878, + "learning_rate": 1.8758286971638171e-06, + "loss": 0.48271507024765015, + "step": 1756 + }, + { + "epoch": 0.40511874567673506, + "grad_norm": 1.0323965631837329, + "learning_rate": 1.8756446284098638e-06, + "loss": 0.5920745134353638, + "step": 1757 + }, + { + "epoch": 0.4053493198063177, + "grad_norm": 1.1254236464300211, + "learning_rate": 1.875460432372089e-06, + "loss": 0.4467526078224182, + "step": 1758 + }, + { + "epoch": 0.4055798939359004, + "grad_norm": 0.9503211623796617, + "learning_rate": 1.875276109077268e-06, + "loss": 0.425409734249115, + "step": 1759 + }, + { + "epoch": 0.40581046806548304, + "grad_norm": 1.1318149217921376, + "learning_rate": 1.8750916585521938e-06, + "loss": 0.4911944568157196, + "step": 1760 + }, + { + "epoch": 0.4060410421950657, + "grad_norm": 1.5865124774001016, + "learning_rate": 1.8749070808236787e-06, + "loss": 0.49605780839920044, + "step": 1761 + }, + { + "epoch": 0.40627161632464837, + "grad_norm": 1.322640956813398, + "learning_rate": 1.874722375918552e-06, + "loss": 0.5582889914512634, + "step": 1762 + }, + { + "epoch": 0.406502190454231, + "grad_norm": 1.0487904765861873, + "learning_rate": 1.874537543863663e-06, + "loss": 0.4867294132709503, + "step": 1763 + }, + { + "epoch": 0.4067327645838137, + "grad_norm": 1.062364022734449, + "learning_rate": 1.8743525846858787e-06, + "loss": 0.5050587058067322, + "step": 1764 + }, + { + "epoch": 0.40696333871339635, + "grad_norm": 1.0581562602291477, + "learning_rate": 1.8741674984120852e-06, + "loss": 0.4380977749824524, + "step": 1765 + }, + { + "epoch": 0.407193912842979, + "grad_norm": 1.326690473297383, + "learning_rate": 1.8739822850691865e-06, + "loss": 0.5159280300140381, + "step": 1766 + }, + { + "epoch": 0.4074244869725617, + "grad_norm": 1.3542586293022822, + "learning_rate": 1.8737969446841046e-06, + "loss": 0.6999780535697937, + "step": 1767 + }, + { + "epoch": 0.40765506110214433, + "grad_norm": 1.110421221417803, + "learning_rate": 1.8736114772837816e-06, + "loss": 0.5844931602478027, + "step": 1768 + }, + { + "epoch": 0.407885635231727, + "grad_norm": 1.2621793403708754, + "learning_rate": 1.8734258828951764e-06, + "loss": 0.5078610181808472, + "step": 1769 + }, + { + "epoch": 0.40811620936130966, + "grad_norm": 1.1260800835324682, + "learning_rate": 1.8732401615452673e-06, + "loss": 0.564793586730957, + "step": 1770 + }, + { + "epoch": 0.4083467834908923, + "grad_norm": 1.2906459398399637, + "learning_rate": 1.8730543132610506e-06, + "loss": 0.6145100593566895, + "step": 1771 + }, + { + "epoch": 0.408577357620475, + "grad_norm": 1.181953537531204, + "learning_rate": 1.8728683380695414e-06, + "loss": 0.45434027910232544, + "step": 1772 + }, + { + "epoch": 0.40880793175005764, + "grad_norm": 1.0716516851559217, + "learning_rate": 1.872682235997773e-06, + "loss": 0.4917553961277008, + "step": 1773 + }, + { + "epoch": 0.4090385058796403, + "grad_norm": 1.0983534367258283, + "learning_rate": 1.872496007072797e-06, + "loss": 0.5677252411842346, + "step": 1774 + }, + { + "epoch": 0.40926908000922296, + "grad_norm": 1.042591224606922, + "learning_rate": 1.872309651321684e-06, + "loss": 0.5516688823699951, + "step": 1775 + }, + { + "epoch": 0.4094996541388056, + "grad_norm": 0.9746786592567609, + "learning_rate": 1.8721231687715227e-06, + "loss": 0.46755337715148926, + "step": 1776 + }, + { + "epoch": 0.4097302282683883, + "grad_norm": 1.3130136596789415, + "learning_rate": 1.8719365594494202e-06, + "loss": 0.6575521230697632, + "step": 1777 + }, + { + "epoch": 0.40996080239797095, + "grad_norm": 1.147271087293654, + "learning_rate": 1.8717498233825019e-06, + "loss": 0.6088716983795166, + "step": 1778 + }, + { + "epoch": 0.4101913765275536, + "grad_norm": 0.9692417840942277, + "learning_rate": 1.8715629605979118e-06, + "loss": 0.39476478099823, + "step": 1779 + }, + { + "epoch": 0.41042195065713627, + "grad_norm": 1.1915743629339146, + "learning_rate": 1.8713759711228123e-06, + "loss": 0.4893898665904999, + "step": 1780 + }, + { + "epoch": 0.41065252478671893, + "grad_norm": 1.298092223223541, + "learning_rate": 1.8711888549843842e-06, + "loss": 0.5077828764915466, + "step": 1781 + }, + { + "epoch": 0.4108830989163016, + "grad_norm": 1.0084481520460131, + "learning_rate": 1.8710016122098269e-06, + "loss": 0.5212582349777222, + "step": 1782 + }, + { + "epoch": 0.41111367304588425, + "grad_norm": 1.1325685052130308, + "learning_rate": 1.870814242826358e-06, + "loss": 0.5135321617126465, + "step": 1783 + }, + { + "epoch": 0.4113442471754669, + "grad_norm": 1.3281766258765773, + "learning_rate": 1.8706267468612133e-06, + "loss": 0.5398930311203003, + "step": 1784 + }, + { + "epoch": 0.4115748213050496, + "grad_norm": 1.3736547238310808, + "learning_rate": 1.8704391243416477e-06, + "loss": 0.49205562472343445, + "step": 1785 + }, + { + "epoch": 0.41180539543463224, + "grad_norm": 1.1386437791047925, + "learning_rate": 1.8702513752949335e-06, + "loss": 0.5145718455314636, + "step": 1786 + }, + { + "epoch": 0.4120359695642149, + "grad_norm": 0.9532031818658743, + "learning_rate": 1.8700634997483622e-06, + "loss": 0.4868374466896057, + "step": 1787 + }, + { + "epoch": 0.41226654369379756, + "grad_norm": 1.3881400467911258, + "learning_rate": 1.8698754977292435e-06, + "loss": 0.5409311652183533, + "step": 1788 + }, + { + "epoch": 0.4124971178233802, + "grad_norm": 1.307800898328953, + "learning_rate": 1.8696873692649052e-06, + "loss": 0.5476658344268799, + "step": 1789 + }, + { + "epoch": 0.4127276919529629, + "grad_norm": 1.251951597359409, + "learning_rate": 1.8694991143826937e-06, + "loss": 0.5545511245727539, + "step": 1790 + }, + { + "epoch": 0.41295826608254554, + "grad_norm": 1.1923559975321376, + "learning_rate": 1.869310733109974e-06, + "loss": 0.5479267835617065, + "step": 1791 + }, + { + "epoch": 0.4131888402121282, + "grad_norm": 1.1567279350887396, + "learning_rate": 1.8691222254741289e-06, + "loss": 0.5261585712432861, + "step": 1792 + }, + { + "epoch": 0.41341941434171087, + "grad_norm": 1.035636889065738, + "learning_rate": 1.8689335915025599e-06, + "loss": 0.5478091239929199, + "step": 1793 + }, + { + "epoch": 0.41364998847129353, + "grad_norm": 1.5699808716332777, + "learning_rate": 1.8687448312226872e-06, + "loss": 0.6739054322242737, + "step": 1794 + }, + { + "epoch": 0.4138805626008762, + "grad_norm": 1.2236857571837823, + "learning_rate": 1.8685559446619487e-06, + "loss": 0.613865315914154, + "step": 1795 + }, + { + "epoch": 0.41411113673045885, + "grad_norm": 1.0357788562325108, + "learning_rate": 1.8683669318478012e-06, + "loss": 0.3936721384525299, + "step": 1796 + }, + { + "epoch": 0.4143417108600415, + "grad_norm": 1.2330991076599302, + "learning_rate": 1.8681777928077197e-06, + "loss": 0.5508556365966797, + "step": 1797 + }, + { + "epoch": 0.4145722849896242, + "grad_norm": 1.1597942164225867, + "learning_rate": 1.867988527569197e-06, + "loss": 0.47734567523002625, + "step": 1798 + }, + { + "epoch": 0.41480285911920683, + "grad_norm": 1.0741273588884312, + "learning_rate": 1.8677991361597449e-06, + "loss": 0.46847039461135864, + "step": 1799 + }, + { + "epoch": 0.4150334332487895, + "grad_norm": 1.0364595457718502, + "learning_rate": 1.8676096186068937e-06, + "loss": 0.5202786326408386, + "step": 1800 + }, + { + "epoch": 0.41526400737837216, + "grad_norm": 1.2972392907268704, + "learning_rate": 1.8674199749381914e-06, + "loss": 0.5144700407981873, + "step": 1801 + }, + { + "epoch": 0.4154945815079548, + "grad_norm": 1.1959128972921023, + "learning_rate": 1.8672302051812048e-06, + "loss": 0.4394092559814453, + "step": 1802 + }, + { + "epoch": 0.4157251556375375, + "grad_norm": 1.159378410595036, + "learning_rate": 1.8670403093635185e-06, + "loss": 0.5017338991165161, + "step": 1803 + }, + { + "epoch": 0.41595572976712014, + "grad_norm": 1.173120824085894, + "learning_rate": 1.8668502875127366e-06, + "loss": 0.409381628036499, + "step": 1804 + }, + { + "epoch": 0.4161863038967028, + "grad_norm": 1.0538601271665184, + "learning_rate": 1.8666601396564795e-06, + "loss": 0.5193957090377808, + "step": 1805 + }, + { + "epoch": 0.41641687802628546, + "grad_norm": 1.1338279816499315, + "learning_rate": 1.8664698658223882e-06, + "loss": 0.5933586359024048, + "step": 1806 + }, + { + "epoch": 0.4166474521558681, + "grad_norm": 1.1304820859227924, + "learning_rate": 1.8662794660381204e-06, + "loss": 0.5283366441726685, + "step": 1807 + }, + { + "epoch": 0.4168780262854508, + "grad_norm": 1.118558214164988, + "learning_rate": 1.8660889403313526e-06, + "loss": 0.5063748359680176, + "step": 1808 + }, + { + "epoch": 0.41710860041503345, + "grad_norm": 1.087893149342631, + "learning_rate": 1.86589828872978e-06, + "loss": 0.6386028528213501, + "step": 1809 + }, + { + "epoch": 0.4173391745446161, + "grad_norm": 1.0041938541729358, + "learning_rate": 1.8657075112611153e-06, + "loss": 0.4618440270423889, + "step": 1810 + }, + { + "epoch": 0.41756974867419877, + "grad_norm": 1.3214046412105014, + "learning_rate": 1.8655166079530903e-06, + "loss": 0.4523535966873169, + "step": 1811 + }, + { + "epoch": 0.41780032280378143, + "grad_norm": 1.0747078557029888, + "learning_rate": 1.8653255788334544e-06, + "loss": 0.501311719417572, + "step": 1812 + }, + { + "epoch": 0.4180308969333641, + "grad_norm": 1.112333239244982, + "learning_rate": 1.865134423929976e-06, + "loss": 0.5504614114761353, + "step": 1813 + }, + { + "epoch": 0.41826147106294675, + "grad_norm": 1.0979124892402103, + "learning_rate": 1.864943143270441e-06, + "loss": 0.44275063276290894, + "step": 1814 + }, + { + "epoch": 0.4184920451925294, + "grad_norm": 1.2558217334961832, + "learning_rate": 1.8647517368826545e-06, + "loss": 0.5628173351287842, + "step": 1815 + }, + { + "epoch": 0.4187226193221121, + "grad_norm": 1.032119999950418, + "learning_rate": 1.864560204794439e-06, + "loss": 0.489221453666687, + "step": 1816 + }, + { + "epoch": 0.41895319345169474, + "grad_norm": 1.2211401188891802, + "learning_rate": 1.8643685470336355e-06, + "loss": 0.5440137386322021, + "step": 1817 + }, + { + "epoch": 0.4191837675812774, + "grad_norm": 1.169073111073683, + "learning_rate": 1.8641767636281035e-06, + "loss": 0.4518952965736389, + "step": 1818 + }, + { + "epoch": 0.41941434171086006, + "grad_norm": 1.3403542594346476, + "learning_rate": 1.8639848546057209e-06, + "loss": 0.591090977191925, + "step": 1819 + }, + { + "epoch": 0.4196449158404427, + "grad_norm": 1.1775626126130905, + "learning_rate": 1.8637928199943836e-06, + "loss": 0.5622411966323853, + "step": 1820 + }, + { + "epoch": 0.4198754899700254, + "grad_norm": 1.1913164061698733, + "learning_rate": 1.8636006598220052e-06, + "loss": 0.5086779594421387, + "step": 1821 + }, + { + "epoch": 0.42010606409960805, + "grad_norm": 1.1334153574078034, + "learning_rate": 1.8634083741165188e-06, + "loss": 0.5055384635925293, + "step": 1822 + }, + { + "epoch": 0.4203366382291907, + "grad_norm": 1.129676706405598, + "learning_rate": 1.863215962905875e-06, + "loss": 0.5076277852058411, + "step": 1823 + }, + { + "epoch": 0.42056721235877337, + "grad_norm": 1.2637764937692704, + "learning_rate": 1.8630234262180424e-06, + "loss": 0.5378403067588806, + "step": 1824 + }, + { + "epoch": 0.42079778648835603, + "grad_norm": 1.0886873342980177, + "learning_rate": 1.8628307640810083e-06, + "loss": 0.6133165955543518, + "step": 1825 + }, + { + "epoch": 0.4210283606179387, + "grad_norm": 1.1726755470049002, + "learning_rate": 1.8626379765227782e-06, + "loss": 0.4978156089782715, + "step": 1826 + }, + { + "epoch": 0.42125893474752135, + "grad_norm": 1.0651427070474233, + "learning_rate": 1.8624450635713759e-06, + "loss": 0.43159037828445435, + "step": 1827 + }, + { + "epoch": 0.421489508877104, + "grad_norm": 1.0498543002649237, + "learning_rate": 1.8622520252548424e-06, + "loss": 0.48821642994880676, + "step": 1828 + }, + { + "epoch": 0.4217200830066867, + "grad_norm": 1.016883491579865, + "learning_rate": 1.8620588616012387e-06, + "loss": 0.4666696786880493, + "step": 1829 + }, + { + "epoch": 0.42195065713626934, + "grad_norm": 1.3621906870852534, + "learning_rate": 1.8618655726386425e-06, + "loss": 0.5278067588806152, + "step": 1830 + }, + { + "epoch": 0.422181231265852, + "grad_norm": 1.0791230542588068, + "learning_rate": 1.8616721583951512e-06, + "loss": 0.4357749819755554, + "step": 1831 + }, + { + "epoch": 0.42241180539543466, + "grad_norm": 1.2299213864410639, + "learning_rate": 1.8614786188988782e-06, + "loss": 0.5388439893722534, + "step": 1832 + }, + { + "epoch": 0.4226423795250173, + "grad_norm": 1.4108572710321559, + "learning_rate": 1.8612849541779573e-06, + "loss": 0.5443956255912781, + "step": 1833 + }, + { + "epoch": 0.4228729536546, + "grad_norm": 1.2641105463427431, + "learning_rate": 1.86109116426054e-06, + "loss": 0.5614160895347595, + "step": 1834 + }, + { + "epoch": 0.4231035277841826, + "grad_norm": 1.2744746751945835, + "learning_rate": 1.8608972491747943e-06, + "loss": 0.45780229568481445, + "step": 1835 + }, + { + "epoch": 0.42333410191376525, + "grad_norm": 1.4638598184796152, + "learning_rate": 1.8607032089489088e-06, + "loss": 0.6354867219924927, + "step": 1836 + }, + { + "epoch": 0.4235646760433479, + "grad_norm": 1.2548140048045007, + "learning_rate": 1.860509043611089e-06, + "loss": 0.5172948241233826, + "step": 1837 + }, + { + "epoch": 0.42379525017293057, + "grad_norm": 1.1235697857312772, + "learning_rate": 1.8603147531895586e-06, + "loss": 0.4353157877922058, + "step": 1838 + }, + { + "epoch": 0.42402582430251323, + "grad_norm": 1.1680682893696177, + "learning_rate": 1.8601203377125599e-06, + "loss": 0.4971036911010742, + "step": 1839 + }, + { + "epoch": 0.4242563984320959, + "grad_norm": 1.0750331417799794, + "learning_rate": 1.859925797208353e-06, + "loss": 0.5037736296653748, + "step": 1840 + }, + { + "epoch": 0.42448697256167855, + "grad_norm": 1.052234823772871, + "learning_rate": 1.8597311317052165e-06, + "loss": 0.4480808675289154, + "step": 1841 + }, + { + "epoch": 0.4247175466912612, + "grad_norm": 1.2441100874175304, + "learning_rate": 1.8595363412314468e-06, + "loss": 0.5102680325508118, + "step": 1842 + }, + { + "epoch": 0.4249481208208439, + "grad_norm": 1.1806961844163353, + "learning_rate": 1.8593414258153585e-06, + "loss": 0.5979090929031372, + "step": 1843 + }, + { + "epoch": 0.42517869495042654, + "grad_norm": 1.0776260642041309, + "learning_rate": 1.8591463854852854e-06, + "loss": 0.4616047143936157, + "step": 1844 + }, + { + "epoch": 0.4254092690800092, + "grad_norm": 1.0059742827824252, + "learning_rate": 1.8589512202695773e-06, + "loss": 0.4893925189971924, + "step": 1845 + }, + { + "epoch": 0.42563984320959186, + "grad_norm": 1.0527785435538273, + "learning_rate": 1.8587559301966045e-06, + "loss": 0.49619823694229126, + "step": 1846 + }, + { + "epoch": 0.4258704173391745, + "grad_norm": 1.0558967393125807, + "learning_rate": 1.858560515294754e-06, + "loss": 0.5205181837081909, + "step": 1847 + }, + { + "epoch": 0.4261009914687572, + "grad_norm": 1.3589791827910958, + "learning_rate": 1.8583649755924315e-06, + "loss": 0.5910394191741943, + "step": 1848 + }, + { + "epoch": 0.42633156559833985, + "grad_norm": 1.0092224062378152, + "learning_rate": 1.8581693111180603e-06, + "loss": 0.4916709363460541, + "step": 1849 + }, + { + "epoch": 0.4265621397279225, + "grad_norm": 1.261654259944108, + "learning_rate": 1.8579735219000824e-06, + "loss": 0.5728994011878967, + "step": 1850 + }, + { + "epoch": 0.42679271385750517, + "grad_norm": 1.162885813109175, + "learning_rate": 1.857777607966958e-06, + "loss": 0.49620527029037476, + "step": 1851 + }, + { + "epoch": 0.42702328798708783, + "grad_norm": 1.2230754640158692, + "learning_rate": 1.8575815693471649e-06, + "loss": 0.5100233554840088, + "step": 1852 + }, + { + "epoch": 0.4272538621166705, + "grad_norm": 1.1713081386962017, + "learning_rate": 1.8573854060691994e-06, + "loss": 0.48981544375419617, + "step": 1853 + }, + { + "epoch": 0.42748443624625315, + "grad_norm": 1.0875128431195988, + "learning_rate": 1.8571891181615755e-06, + "loss": 0.44190293550491333, + "step": 1854 + }, + { + "epoch": 0.4277150103758358, + "grad_norm": 1.2645757986317834, + "learning_rate": 1.8569927056528264e-06, + "loss": 0.42867448925971985, + "step": 1855 + }, + { + "epoch": 0.4279455845054185, + "grad_norm": 1.849182592399251, + "learning_rate": 1.8567961685715016e-06, + "loss": 0.4873782694339752, + "step": 1856 + }, + { + "epoch": 0.42817615863500114, + "grad_norm": 1.2007241803680166, + "learning_rate": 1.8565995069461706e-06, + "loss": 0.4985312819480896, + "step": 1857 + }, + { + "epoch": 0.4284067327645838, + "grad_norm": 1.2242163730204847, + "learning_rate": 1.85640272080542e-06, + "loss": 0.5525496006011963, + "step": 1858 + }, + { + "epoch": 0.42863730689416646, + "grad_norm": 1.293851624108558, + "learning_rate": 1.8562058101778547e-06, + "loss": 0.5645877122879028, + "step": 1859 + }, + { + "epoch": 0.4288678810237491, + "grad_norm": 1.0805291431045556, + "learning_rate": 1.856008775092097e-06, + "loss": 0.4304332137107849, + "step": 1860 + }, + { + "epoch": 0.4290984551533318, + "grad_norm": 1.14759009112306, + "learning_rate": 1.8558116155767888e-06, + "loss": 0.4970170259475708, + "step": 1861 + }, + { + "epoch": 0.42932902928291444, + "grad_norm": 1.344010966492771, + "learning_rate": 1.8556143316605888e-06, + "loss": 0.5718003511428833, + "step": 1862 + }, + { + "epoch": 0.4295596034124971, + "grad_norm": 1.3157067542574963, + "learning_rate": 1.8554169233721741e-06, + "loss": 0.4445415139198303, + "step": 1863 + }, + { + "epoch": 0.42979017754207977, + "grad_norm": 1.1001033203387223, + "learning_rate": 1.8552193907402404e-06, + "loss": 0.5297178626060486, + "step": 1864 + }, + { + "epoch": 0.4300207516716624, + "grad_norm": 0.9618626645905404, + "learning_rate": 1.8550217337935013e-06, + "loss": 0.4564483165740967, + "step": 1865 + }, + { + "epoch": 0.4302513258012451, + "grad_norm": 1.2509575429906847, + "learning_rate": 1.8548239525606872e-06, + "loss": 0.4789202809333801, + "step": 1866 + }, + { + "epoch": 0.43048189993082775, + "grad_norm": 1.0950598228304256, + "learning_rate": 1.8546260470705485e-06, + "loss": 0.5240263938903809, + "step": 1867 + }, + { + "epoch": 0.4307124740604104, + "grad_norm": 1.0326884664902543, + "learning_rate": 1.8544280173518523e-06, + "loss": 0.4190866947174072, + "step": 1868 + }, + { + "epoch": 0.43094304818999307, + "grad_norm": 1.098749197470929, + "learning_rate": 1.8542298634333844e-06, + "loss": 0.502301812171936, + "step": 1869 + }, + { + "epoch": 0.43117362231957573, + "grad_norm": 1.3711612309046508, + "learning_rate": 1.8540315853439488e-06, + "loss": 0.5752545595169067, + "step": 1870 + }, + { + "epoch": 0.4314041964491584, + "grad_norm": 0.9641480143185914, + "learning_rate": 1.8538331831123667e-06, + "loss": 0.44959962368011475, + "step": 1871 + }, + { + "epoch": 0.43163477057874106, + "grad_norm": 1.2299121621798328, + "learning_rate": 1.8536346567674782e-06, + "loss": 0.5320106148719788, + "step": 1872 + }, + { + "epoch": 0.4318653447083237, + "grad_norm": 1.393182956860924, + "learning_rate": 1.8534360063381407e-06, + "loss": 0.5981979966163635, + "step": 1873 + }, + { + "epoch": 0.4320959188379064, + "grad_norm": 1.350381662747622, + "learning_rate": 1.8532372318532306e-06, + "loss": 0.5567579865455627, + "step": 1874 + }, + { + "epoch": 0.43232649296748904, + "grad_norm": 1.4350681093951811, + "learning_rate": 1.8530383333416415e-06, + "loss": 0.5604764223098755, + "step": 1875 + }, + { + "epoch": 0.4325570670970717, + "grad_norm": 1.4048444099270982, + "learning_rate": 1.8528393108322852e-06, + "loss": 0.5410721302032471, + "step": 1876 + }, + { + "epoch": 0.43278764122665436, + "grad_norm": 1.1191045271107989, + "learning_rate": 1.852640164354092e-06, + "loss": 0.5417271852493286, + "step": 1877 + }, + { + "epoch": 0.433018215356237, + "grad_norm": 1.1925092385457925, + "learning_rate": 1.8524408939360096e-06, + "loss": 0.5831471681594849, + "step": 1878 + }, + { + "epoch": 0.4332487894858197, + "grad_norm": 1.0939224950949575, + "learning_rate": 1.8522414996070045e-06, + "loss": 0.45030760765075684, + "step": 1879 + }, + { + "epoch": 0.43347936361540235, + "grad_norm": 1.1520994484307991, + "learning_rate": 1.8520419813960596e-06, + "loss": 0.44657936692237854, + "step": 1880 + }, + { + "epoch": 0.433709937744985, + "grad_norm": 1.1691007631884454, + "learning_rate": 1.851842339332178e-06, + "loss": 0.5472795963287354, + "step": 1881 + }, + { + "epoch": 0.43394051187456767, + "grad_norm": 1.1388268257083902, + "learning_rate": 1.8516425734443786e-06, + "loss": 0.4883359968662262, + "step": 1882 + }, + { + "epoch": 0.43417108600415033, + "grad_norm": 1.0473976151781044, + "learning_rate": 1.8514426837617006e-06, + "loss": 0.5172675848007202, + "step": 1883 + }, + { + "epoch": 0.434401660133733, + "grad_norm": 1.2812470936666533, + "learning_rate": 1.851242670313199e-06, + "loss": 0.5253418684005737, + "step": 1884 + }, + { + "epoch": 0.43463223426331565, + "grad_norm": 1.2940121862284113, + "learning_rate": 1.8510425331279485e-06, + "loss": 0.4684918522834778, + "step": 1885 + }, + { + "epoch": 0.4348628083928983, + "grad_norm": 1.7313907662218715, + "learning_rate": 1.8508422722350404e-06, + "loss": 0.522485077381134, + "step": 1886 + }, + { + "epoch": 0.435093382522481, + "grad_norm": 1.0862530759153244, + "learning_rate": 1.8506418876635852e-06, + "loss": 0.5123787522315979, + "step": 1887 + }, + { + "epoch": 0.43532395665206364, + "grad_norm": 1.2812741997977775, + "learning_rate": 1.8504413794427106e-06, + "loss": 0.5195976495742798, + "step": 1888 + }, + { + "epoch": 0.4355545307816463, + "grad_norm": 1.081503403719265, + "learning_rate": 1.8502407476015626e-06, + "loss": 0.48394906520843506, + "step": 1889 + }, + { + "epoch": 0.43578510491122896, + "grad_norm": 1.2031421687566246, + "learning_rate": 1.850039992169305e-06, + "loss": 0.5083323121070862, + "step": 1890 + }, + { + "epoch": 0.4360156790408116, + "grad_norm": 1.2379097603599272, + "learning_rate": 1.8498391131751196e-06, + "loss": 0.5303651094436646, + "step": 1891 + }, + { + "epoch": 0.4362462531703943, + "grad_norm": 1.010820397187413, + "learning_rate": 1.8496381106482062e-06, + "loss": 0.49429047107696533, + "step": 1892 + }, + { + "epoch": 0.43647682729997694, + "grad_norm": 1.2506572926955764, + "learning_rate": 1.8494369846177826e-06, + "loss": 0.5263347625732422, + "step": 1893 + }, + { + "epoch": 0.4367074014295596, + "grad_norm": 1.3195849148516783, + "learning_rate": 1.8492357351130848e-06, + "loss": 0.5332654714584351, + "step": 1894 + }, + { + "epoch": 0.43693797555914227, + "grad_norm": 1.1692381501686961, + "learning_rate": 1.8490343621633657e-06, + "loss": 0.5598278045654297, + "step": 1895 + }, + { + "epoch": 0.43716854968872493, + "grad_norm": 1.0323293964159153, + "learning_rate": 1.8488328657978975e-06, + "loss": 0.4026976227760315, + "step": 1896 + }, + { + "epoch": 0.4373991238183076, + "grad_norm": 1.3568102099956687, + "learning_rate": 1.8486312460459698e-06, + "loss": 0.4277791380882263, + "step": 1897 + }, + { + "epoch": 0.43762969794789025, + "grad_norm": 1.2550644818276735, + "learning_rate": 1.8484295029368896e-06, + "loss": 0.49567973613739014, + "step": 1898 + }, + { + "epoch": 0.4378602720774729, + "grad_norm": 1.3750960531365106, + "learning_rate": 1.8482276364999828e-06, + "loss": 0.4659258723258972, + "step": 1899 + }, + { + "epoch": 0.4380908462070556, + "grad_norm": 1.4921650354400726, + "learning_rate": 1.8480256467645923e-06, + "loss": 0.4950314164161682, + "step": 1900 + }, + { + "epoch": 0.43832142033663823, + "grad_norm": 1.2407118809889077, + "learning_rate": 1.8478235337600796e-06, + "loss": 0.5584981441497803, + "step": 1901 + }, + { + "epoch": 0.4385519944662209, + "grad_norm": 1.4539173472262998, + "learning_rate": 1.847621297515824e-06, + "loss": 0.6322404146194458, + "step": 1902 + }, + { + "epoch": 0.43878256859580356, + "grad_norm": 1.6859923054790666, + "learning_rate": 1.8474189380612225e-06, + "loss": 0.49535471200942993, + "step": 1903 + }, + { + "epoch": 0.4390131427253862, + "grad_norm": 1.0079272515569784, + "learning_rate": 1.8472164554256897e-06, + "loss": 0.40703707933425903, + "step": 1904 + }, + { + "epoch": 0.4392437168549689, + "grad_norm": 1.1125525506446694, + "learning_rate": 1.8470138496386588e-06, + "loss": 0.4540821313858032, + "step": 1905 + }, + { + "epoch": 0.43947429098455154, + "grad_norm": 1.1572392182622382, + "learning_rate": 1.846811120729581e-06, + "loss": 0.45964252948760986, + "step": 1906 + }, + { + "epoch": 0.4397048651141342, + "grad_norm": 1.018497744556974, + "learning_rate": 1.8466082687279244e-06, + "loss": 0.4604472517967224, + "step": 1907 + }, + { + "epoch": 0.43993543924371686, + "grad_norm": 1.114828518838774, + "learning_rate": 1.8464052936631758e-06, + "loss": 0.44585052132606506, + "step": 1908 + }, + { + "epoch": 0.4401660133732995, + "grad_norm": 1.2189161284011176, + "learning_rate": 1.8462021955648397e-06, + "loss": 0.43862414360046387, + "step": 1909 + }, + { + "epoch": 0.4403965875028822, + "grad_norm": 1.0484346475063675, + "learning_rate": 1.8459989744624386e-06, + "loss": 0.5148224234580994, + "step": 1910 + }, + { + "epoch": 0.44062716163246485, + "grad_norm": 1.3041727396087255, + "learning_rate": 1.8457956303855124e-06, + "loss": 0.6201390027999878, + "step": 1911 + }, + { + "epoch": 0.4408577357620475, + "grad_norm": 1.322348681007624, + "learning_rate": 1.8455921633636196e-06, + "loss": 0.5828813314437866, + "step": 1912 + }, + { + "epoch": 0.44108830989163017, + "grad_norm": 1.2413839772395276, + "learning_rate": 1.845388573426336e-06, + "loss": 0.5491579174995422, + "step": 1913 + }, + { + "epoch": 0.44131888402121283, + "grad_norm": 1.135006469141378, + "learning_rate": 1.8451848606032554e-06, + "loss": 0.4204079508781433, + "step": 1914 + }, + { + "epoch": 0.4415494581507955, + "grad_norm": 1.3248528862326203, + "learning_rate": 1.8449810249239898e-06, + "loss": 0.5734649300575256, + "step": 1915 + }, + { + "epoch": 0.44178003228037815, + "grad_norm": 1.1101812599659409, + "learning_rate": 1.8447770664181684e-06, + "loss": 0.48931679129600525, + "step": 1916 + }, + { + "epoch": 0.4420106064099608, + "grad_norm": 1.292831898773596, + "learning_rate": 1.8445729851154392e-06, + "loss": 0.5206375122070312, + "step": 1917 + }, + { + "epoch": 0.4422411805395435, + "grad_norm": 1.3590503413541226, + "learning_rate": 1.8443687810454666e-06, + "loss": 0.4916420578956604, + "step": 1918 + }, + { + "epoch": 0.44247175466912614, + "grad_norm": 1.0963843972341092, + "learning_rate": 1.8441644542379348e-06, + "loss": 0.5021753311157227, + "step": 1919 + }, + { + "epoch": 0.4427023287987088, + "grad_norm": 1.2556127492378621, + "learning_rate": 1.8439600047225441e-06, + "loss": 0.4615249037742615, + "step": 1920 + }, + { + "epoch": 0.44293290292829146, + "grad_norm": 1.3251855444784397, + "learning_rate": 1.8437554325290133e-06, + "loss": 0.4849514365196228, + "step": 1921 + }, + { + "epoch": 0.4431634770578741, + "grad_norm": 1.3926092312086646, + "learning_rate": 1.843550737687079e-06, + "loss": 0.5872727632522583, + "step": 1922 + }, + { + "epoch": 0.4433940511874568, + "grad_norm": 1.1422193923698303, + "learning_rate": 1.843345920226496e-06, + "loss": 0.48469966650009155, + "step": 1923 + }, + { + "epoch": 0.44362462531703944, + "grad_norm": 1.1078885152995024, + "learning_rate": 1.8431409801770364e-06, + "loss": 0.45931774377822876, + "step": 1924 + }, + { + "epoch": 0.4438551994466221, + "grad_norm": 1.0630184817249293, + "learning_rate": 1.8429359175684907e-06, + "loss": 0.5138596296310425, + "step": 1925 + }, + { + "epoch": 0.44408577357620477, + "grad_norm": 1.1576378783801253, + "learning_rate": 1.8427307324306661e-06, + "loss": 0.5586874485015869, + "step": 1926 + }, + { + "epoch": 0.44431634770578743, + "grad_norm": 0.9982496919132913, + "learning_rate": 1.8425254247933887e-06, + "loss": 0.5373901724815369, + "step": 1927 + }, + { + "epoch": 0.4445469218353701, + "grad_norm": 1.3044317948619655, + "learning_rate": 1.8423199946865022e-06, + "loss": 0.46104729175567627, + "step": 1928 + }, + { + "epoch": 0.44477749596495275, + "grad_norm": 1.2637964058278408, + "learning_rate": 1.8421144421398678e-06, + "loss": 0.4837646782398224, + "step": 1929 + }, + { + "epoch": 0.4450080700945354, + "grad_norm": 1.0579849017335872, + "learning_rate": 1.8419087671833647e-06, + "loss": 0.47685718536376953, + "step": 1930 + }, + { + "epoch": 0.4452386442241181, + "grad_norm": 1.3061309074235694, + "learning_rate": 1.8417029698468897e-06, + "loss": 0.5904572606086731, + "step": 1931 + }, + { + "epoch": 0.44546921835370074, + "grad_norm": 1.0698778232309683, + "learning_rate": 1.8414970501603577e-06, + "loss": 0.5434018969535828, + "step": 1932 + }, + { + "epoch": 0.4456997924832834, + "grad_norm": 1.0813116335575876, + "learning_rate": 1.8412910081537012e-06, + "loss": 0.5532705783843994, + "step": 1933 + }, + { + "epoch": 0.44593036661286606, + "grad_norm": 1.2746241772853588, + "learning_rate": 1.8410848438568704e-06, + "loss": 0.4900597929954529, + "step": 1934 + }, + { + "epoch": 0.4461609407424487, + "grad_norm": 1.1321871851277807, + "learning_rate": 1.8408785572998334e-06, + "loss": 0.40426892042160034, + "step": 1935 + }, + { + "epoch": 0.4463915148720314, + "grad_norm": 1.2056959007702837, + "learning_rate": 1.840672148512576e-06, + "loss": 0.48805081844329834, + "step": 1936 + }, + { + "epoch": 0.44662208900161404, + "grad_norm": 1.247599925173634, + "learning_rate": 1.8404656175251019e-06, + "loss": 0.4997096657752991, + "step": 1937 + }, + { + "epoch": 0.4468526631311967, + "grad_norm": 1.1300078883402307, + "learning_rate": 1.8402589643674325e-06, + "loss": 0.5113422274589539, + "step": 1938 + }, + { + "epoch": 0.44708323726077936, + "grad_norm": 1.2034211237767165, + "learning_rate": 1.8400521890696065e-06, + "loss": 0.44080060720443726, + "step": 1939 + }, + { + "epoch": 0.447313811390362, + "grad_norm": 1.1365386964776252, + "learning_rate": 1.8398452916616816e-06, + "loss": 0.4477943778038025, + "step": 1940 + }, + { + "epoch": 0.4475443855199447, + "grad_norm": 1.2171142668463, + "learning_rate": 1.8396382721737318e-06, + "loss": 0.4597470760345459, + "step": 1941 + }, + { + "epoch": 0.44777495964952735, + "grad_norm": 1.1079547319265362, + "learning_rate": 1.8394311306358494e-06, + "loss": 0.4758293628692627, + "step": 1942 + }, + { + "epoch": 0.44800553377911, + "grad_norm": 1.1579717682654027, + "learning_rate": 1.8392238670781453e-06, + "loss": 0.4573550224304199, + "step": 1943 + }, + { + "epoch": 0.44823610790869267, + "grad_norm": 1.318176172591765, + "learning_rate": 1.8390164815307465e-06, + "loss": 0.504696786403656, + "step": 1944 + }, + { + "epoch": 0.44846668203827533, + "grad_norm": 1.176904108457006, + "learning_rate": 1.8388089740237991e-06, + "loss": 0.4936453700065613, + "step": 1945 + }, + { + "epoch": 0.448697256167858, + "grad_norm": 1.0847569291854338, + "learning_rate": 1.8386013445874661e-06, + "loss": 0.4851078987121582, + "step": 1946 + }, + { + "epoch": 0.4489278302974406, + "grad_norm": 1.184810595622898, + "learning_rate": 1.8383935932519288e-06, + "loss": 0.4881519377231598, + "step": 1947 + }, + { + "epoch": 0.44915840442702326, + "grad_norm": 1.2389121525709461, + "learning_rate": 1.8381857200473859e-06, + "loss": 0.5604408979415894, + "step": 1948 + }, + { + "epoch": 0.4493889785566059, + "grad_norm": 1.2909928460674411, + "learning_rate": 1.8379777250040535e-06, + "loss": 0.5022269487380981, + "step": 1949 + }, + { + "epoch": 0.4496195526861886, + "grad_norm": 1.5074815200191058, + "learning_rate": 1.8377696081521666e-06, + "loss": 0.6519315242767334, + "step": 1950 + }, + { + "epoch": 0.44985012681577125, + "grad_norm": 1.0636886048128833, + "learning_rate": 1.8375613695219766e-06, + "loss": 0.3820997476577759, + "step": 1951 + }, + { + "epoch": 0.4500807009453539, + "grad_norm": 1.2705283632306288, + "learning_rate": 1.8373530091437526e-06, + "loss": 0.5473283529281616, + "step": 1952 + }, + { + "epoch": 0.45031127507493657, + "grad_norm": 1.3245130391551474, + "learning_rate": 1.8371445270477828e-06, + "loss": 0.5835955142974854, + "step": 1953 + }, + { + "epoch": 0.45054184920451923, + "grad_norm": 0.9645583101230016, + "learning_rate": 1.8369359232643716e-06, + "loss": 0.5398194789886475, + "step": 1954 + }, + { + "epoch": 0.4507724233341019, + "grad_norm": 1.363319289299188, + "learning_rate": 1.8367271978238418e-06, + "loss": 0.36561834812164307, + "step": 1955 + }, + { + "epoch": 0.45100299746368455, + "grad_norm": 1.212738724980002, + "learning_rate": 1.8365183507565342e-06, + "loss": 0.319802463054657, + "step": 1956 + }, + { + "epoch": 0.4512335715932672, + "grad_norm": 1.2303957915062576, + "learning_rate": 1.8363093820928063e-06, + "loss": 0.46466606855392456, + "step": 1957 + }, + { + "epoch": 0.4514641457228499, + "grad_norm": 1.0793723825771542, + "learning_rate": 1.8361002918630338e-06, + "loss": 0.5839806199073792, + "step": 1958 + }, + { + "epoch": 0.45169471985243254, + "grad_norm": 1.1018651408043991, + "learning_rate": 1.8358910800976105e-06, + "loss": 0.4472346603870392, + "step": 1959 + }, + { + "epoch": 0.4519252939820152, + "grad_norm": 1.2384424942976882, + "learning_rate": 1.835681746826947e-06, + "loss": 0.5191199779510498, + "step": 1960 + }, + { + "epoch": 0.45215586811159786, + "grad_norm": 1.199344967008703, + "learning_rate": 1.8354722920814722e-06, + "loss": 0.5832456350326538, + "step": 1961 + }, + { + "epoch": 0.4523864422411805, + "grad_norm": 1.17539846221013, + "learning_rate": 1.8352627158916326e-06, + "loss": 0.604708194732666, + "step": 1962 + }, + { + "epoch": 0.4526170163707632, + "grad_norm": 1.0362921929144542, + "learning_rate": 1.8350530182878924e-06, + "loss": 0.5640981793403625, + "step": 1963 + }, + { + "epoch": 0.45284759050034584, + "grad_norm": 1.6578766467164143, + "learning_rate": 1.8348431993007326e-06, + "loss": 0.4816977381706238, + "step": 1964 + }, + { + "epoch": 0.4530781646299285, + "grad_norm": 1.1374005988930347, + "learning_rate": 1.8346332589606526e-06, + "loss": 0.4226726293563843, + "step": 1965 + }, + { + "epoch": 0.45330873875951117, + "grad_norm": 1.1547528745449813, + "learning_rate": 1.8344231972981701e-06, + "loss": 0.49635130167007446, + "step": 1966 + }, + { + "epoch": 0.4535393128890938, + "grad_norm": 1.1372879426647424, + "learning_rate": 1.8342130143438193e-06, + "loss": 0.5275523662567139, + "step": 1967 + }, + { + "epoch": 0.4537698870186765, + "grad_norm": 1.202496816282669, + "learning_rate": 1.834002710128152e-06, + "loss": 0.48517313599586487, + "step": 1968 + }, + { + "epoch": 0.45400046114825915, + "grad_norm": 1.1968500607132941, + "learning_rate": 1.8337922846817388e-06, + "loss": 0.4352126717567444, + "step": 1969 + }, + { + "epoch": 0.4542310352778418, + "grad_norm": 1.116289808278095, + "learning_rate": 1.8335817380351668e-06, + "loss": 0.48131102323532104, + "step": 1970 + }, + { + "epoch": 0.45446160940742447, + "grad_norm": 1.1124663257243492, + "learning_rate": 1.8333710702190408e-06, + "loss": 0.48989611864089966, + "step": 1971 + }, + { + "epoch": 0.45469218353700713, + "grad_norm": 1.4370850989895667, + "learning_rate": 1.8331602812639839e-06, + "loss": 0.4841296076774597, + "step": 1972 + }, + { + "epoch": 0.4549227576665898, + "grad_norm": 1.1830445801916494, + "learning_rate": 1.8329493712006364e-06, + "loss": 0.5479841232299805, + "step": 1973 + }, + { + "epoch": 0.45515333179617246, + "grad_norm": 1.1923903658380426, + "learning_rate": 1.8327383400596559e-06, + "loss": 0.4732212424278259, + "step": 1974 + }, + { + "epoch": 0.4553839059257551, + "grad_norm": 1.0628413230145501, + "learning_rate": 1.8325271878717183e-06, + "loss": 0.46675610542297363, + "step": 1975 + }, + { + "epoch": 0.4556144800553378, + "grad_norm": 1.0416293786228703, + "learning_rate": 1.8323159146675163e-06, + "loss": 0.5464143753051758, + "step": 1976 + }, + { + "epoch": 0.45584505418492044, + "grad_norm": 1.0345078154587666, + "learning_rate": 1.832104520477761e-06, + "loss": 0.3888660669326782, + "step": 1977 + }, + { + "epoch": 0.4560756283145031, + "grad_norm": 1.4241654424068988, + "learning_rate": 1.8318930053331805e-06, + "loss": 0.5163271427154541, + "step": 1978 + }, + { + "epoch": 0.45630620244408576, + "grad_norm": 1.2347472844947731, + "learning_rate": 1.8316813692645208e-06, + "loss": 0.5471124649047852, + "step": 1979 + }, + { + "epoch": 0.4565367765736684, + "grad_norm": 1.1473833654009267, + "learning_rate": 1.8314696123025452e-06, + "loss": 0.5907406210899353, + "step": 1980 + }, + { + "epoch": 0.4567673507032511, + "grad_norm": 1.298768820373183, + "learning_rate": 1.8312577344780346e-06, + "loss": 0.5249447226524353, + "step": 1981 + }, + { + "epoch": 0.45699792483283375, + "grad_norm": 1.2135802460189444, + "learning_rate": 1.8310457358217879e-06, + "loss": 0.5063247084617615, + "step": 1982 + }, + { + "epoch": 0.4572284989624164, + "grad_norm": 1.361065103282706, + "learning_rate": 1.830833616364621e-06, + "loss": 0.4448107182979584, + "step": 1983 + }, + { + "epoch": 0.45745907309199907, + "grad_norm": 1.1036363497718666, + "learning_rate": 1.830621376137368e-06, + "loss": 0.5699697732925415, + "step": 1984 + }, + { + "epoch": 0.45768964722158173, + "grad_norm": 1.246349122018957, + "learning_rate": 1.8304090151708794e-06, + "loss": 0.5701720118522644, + "step": 1985 + }, + { + "epoch": 0.4579202213511644, + "grad_norm": 1.2319947144837158, + "learning_rate": 1.830196533496025e-06, + "loss": 0.4754391014575958, + "step": 1986 + }, + { + "epoch": 0.45815079548074705, + "grad_norm": 1.3528306833221286, + "learning_rate": 1.8299839311436903e-06, + "loss": 0.47649019956588745, + "step": 1987 + }, + { + "epoch": 0.4583813696103297, + "grad_norm": 1.3311097062461437, + "learning_rate": 1.8297712081447797e-06, + "loss": 0.5524393320083618, + "step": 1988 + }, + { + "epoch": 0.4586119437399124, + "grad_norm": 1.0762480086961639, + "learning_rate": 1.8295583645302144e-06, + "loss": 0.45731648802757263, + "step": 1989 + }, + { + "epoch": 0.45884251786949504, + "grad_norm": 1.130533269973984, + "learning_rate": 1.8293454003309336e-06, + "loss": 0.4999742805957794, + "step": 1990 + }, + { + "epoch": 0.4590730919990777, + "grad_norm": 1.1313506863251181, + "learning_rate": 1.829132315577894e-06, + "loss": 0.49084147810935974, + "step": 1991 + }, + { + "epoch": 0.45930366612866036, + "grad_norm": 1.2521400943324308, + "learning_rate": 1.828919110302069e-06, + "loss": 0.45332348346710205, + "step": 1992 + }, + { + "epoch": 0.459534240258243, + "grad_norm": 1.0776738520694769, + "learning_rate": 1.8287057845344504e-06, + "loss": 0.5029363632202148, + "step": 1993 + }, + { + "epoch": 0.4597648143878257, + "grad_norm": 1.1554006749910666, + "learning_rate": 1.8284923383060475e-06, + "loss": 0.5373274087905884, + "step": 1994 + }, + { + "epoch": 0.45999538851740834, + "grad_norm": 1.372219905846735, + "learning_rate": 1.8282787716478867e-06, + "loss": 0.5022158622741699, + "step": 1995 + }, + { + "epoch": 0.460225962646991, + "grad_norm": 1.5170390306548123, + "learning_rate": 1.828065084591012e-06, + "loss": 0.5093190670013428, + "step": 1996 + }, + { + "epoch": 0.46045653677657367, + "grad_norm": 1.1628780385550688, + "learning_rate": 1.827851277166485e-06, + "loss": 0.5406581163406372, + "step": 1997 + }, + { + "epoch": 0.4606871109061563, + "grad_norm": 1.0838824930169186, + "learning_rate": 1.8276373494053852e-06, + "loss": 0.4403364062309265, + "step": 1998 + }, + { + "epoch": 0.460917685035739, + "grad_norm": 1.0663930849179153, + "learning_rate": 1.8274233013388085e-06, + "loss": 0.48383134603500366, + "step": 1999 + }, + { + "epoch": 0.46114825916532165, + "grad_norm": 1.278024022767056, + "learning_rate": 1.8272091329978693e-06, + "loss": 0.5177836418151855, + "step": 2000 + }, + { + "epoch": 0.4613788332949043, + "grad_norm": 1.3026255484345248, + "learning_rate": 1.8269948444136991e-06, + "loss": 0.5699004530906677, + "step": 2001 + }, + { + "epoch": 0.461609407424487, + "grad_norm": 1.0712598167444656, + "learning_rate": 1.826780435617447e-06, + "loss": 0.5415153503417969, + "step": 2002 + }, + { + "epoch": 0.46183998155406963, + "grad_norm": 1.3243429308154806, + "learning_rate": 1.8265659066402792e-06, + "loss": 0.5521166920661926, + "step": 2003 + }, + { + "epoch": 0.4620705556836523, + "grad_norm": 1.0401918069659792, + "learning_rate": 1.8263512575133802e-06, + "loss": 0.4518507122993469, + "step": 2004 + }, + { + "epoch": 0.46230112981323496, + "grad_norm": 1.4036586027704223, + "learning_rate": 1.8261364882679508e-06, + "loss": 0.5997140407562256, + "step": 2005 + }, + { + "epoch": 0.4625317039428176, + "grad_norm": 1.2297832096563293, + "learning_rate": 1.8259215989352103e-06, + "loss": 0.5105265974998474, + "step": 2006 + }, + { + "epoch": 0.4627622780724003, + "grad_norm": 1.3620575066378895, + "learning_rate": 1.825706589546395e-06, + "loss": 0.5229371190071106, + "step": 2007 + }, + { + "epoch": 0.46299285220198294, + "grad_norm": 1.323713226525437, + "learning_rate": 1.825491460132759e-06, + "loss": 0.4833800792694092, + "step": 2008 + }, + { + "epoch": 0.4632234263315656, + "grad_norm": 1.443684310899243, + "learning_rate": 1.8252762107255727e-06, + "loss": 0.4323253035545349, + "step": 2009 + }, + { + "epoch": 0.46345400046114826, + "grad_norm": 1.0890999093716327, + "learning_rate": 1.8250608413561253e-06, + "loss": 0.4563494026660919, + "step": 2010 + }, + { + "epoch": 0.4636845745907309, + "grad_norm": 1.5474519259744821, + "learning_rate": 1.8248453520557228e-06, + "loss": 0.5656196475028992, + "step": 2011 + }, + { + "epoch": 0.4639151487203136, + "grad_norm": 1.4798653425077055, + "learning_rate": 1.8246297428556887e-06, + "loss": 0.5448226928710938, + "step": 2012 + }, + { + "epoch": 0.46414572284989625, + "grad_norm": 1.1620535147248132, + "learning_rate": 1.8244140137873645e-06, + "loss": 0.4692860543727875, + "step": 2013 + }, + { + "epoch": 0.4643762969794789, + "grad_norm": 1.1643805671555858, + "learning_rate": 1.8241981648821079e-06, + "loss": 0.5948643088340759, + "step": 2014 + }, + { + "epoch": 0.46460687110906157, + "grad_norm": 1.1853722372788744, + "learning_rate": 1.823982196171295e-06, + "loss": 0.54410719871521, + "step": 2015 + }, + { + "epoch": 0.46483744523864423, + "grad_norm": 1.1149495485691443, + "learning_rate": 1.8237661076863192e-06, + "loss": 0.430447518825531, + "step": 2016 + }, + { + "epoch": 0.4650680193682269, + "grad_norm": 1.2520273819748522, + "learning_rate": 1.8235498994585913e-06, + "loss": 0.5420910716056824, + "step": 2017 + }, + { + "epoch": 0.46529859349780955, + "grad_norm": 1.119152189162338, + "learning_rate": 1.823333571519539e-06, + "loss": 0.5140334963798523, + "step": 2018 + }, + { + "epoch": 0.4655291676273922, + "grad_norm": 1.1399919106847334, + "learning_rate": 1.8231171239006075e-06, + "loss": 0.5901660323143005, + "step": 2019 + }, + { + "epoch": 0.4657597417569749, + "grad_norm": 1.174060044130563, + "learning_rate": 1.8229005566332603e-06, + "loss": 0.5025908350944519, + "step": 2020 + }, + { + "epoch": 0.46599031588655754, + "grad_norm": 1.3363070549997977, + "learning_rate": 1.8226838697489772e-06, + "loss": 0.4884544909000397, + "step": 2021 + }, + { + "epoch": 0.4662208900161402, + "grad_norm": 1.1349219249551332, + "learning_rate": 1.822467063279256e-06, + "loss": 0.46449869871139526, + "step": 2022 + }, + { + "epoch": 0.46645146414572286, + "grad_norm": 1.2563720378844234, + "learning_rate": 1.8222501372556116e-06, + "loss": 0.49463552236557007, + "step": 2023 + }, + { + "epoch": 0.4666820382753055, + "grad_norm": 1.285405581097111, + "learning_rate": 1.8220330917095768e-06, + "loss": 0.5027149319648743, + "step": 2024 + }, + { + "epoch": 0.4669126124048882, + "grad_norm": 1.3048909901236199, + "learning_rate": 1.8218159266727007e-06, + "loss": 0.564018726348877, + "step": 2025 + }, + { + "epoch": 0.46714318653447084, + "grad_norm": 1.1965631228875364, + "learning_rate": 1.821598642176551e-06, + "loss": 0.4235766530036926, + "step": 2026 + }, + { + "epoch": 0.4673737606640535, + "grad_norm": 1.3354885477125742, + "learning_rate": 1.8213812382527118e-06, + "loss": 0.5696560144424438, + "step": 2027 + }, + { + "epoch": 0.46760433479363617, + "grad_norm": 1.2879943344932543, + "learning_rate": 1.8211637149327856e-06, + "loss": 0.6101738214492798, + "step": 2028 + }, + { + "epoch": 0.46783490892321883, + "grad_norm": 1.2787382273760666, + "learning_rate": 1.820946072248391e-06, + "loss": 0.46749603748321533, + "step": 2029 + }, + { + "epoch": 0.4680654830528015, + "grad_norm": 1.0137433334051962, + "learning_rate": 1.8207283102311646e-06, + "loss": 0.4713476300239563, + "step": 2030 + }, + { + "epoch": 0.46829605718238415, + "grad_norm": 1.1924917748606811, + "learning_rate": 1.8205104289127607e-06, + "loss": 0.5381859540939331, + "step": 2031 + }, + { + "epoch": 0.4685266313119668, + "grad_norm": 1.1753816722161505, + "learning_rate": 1.82029242832485e-06, + "loss": 0.4871833324432373, + "step": 2032 + }, + { + "epoch": 0.4687572054415495, + "grad_norm": 1.2889177236993268, + "learning_rate": 1.8200743084991217e-06, + "loss": 0.520627498626709, + "step": 2033 + }, + { + "epoch": 0.46898777957113214, + "grad_norm": 1.1168475824168262, + "learning_rate": 1.8198560694672813e-06, + "loss": 0.5382364392280579, + "step": 2034 + }, + { + "epoch": 0.4692183537007148, + "grad_norm": 1.0953401197844614, + "learning_rate": 1.8196377112610524e-06, + "loss": 0.384588360786438, + "step": 2035 + }, + { + "epoch": 0.46944892783029746, + "grad_norm": 1.3337847292368636, + "learning_rate": 1.8194192339121752e-06, + "loss": 0.5515186786651611, + "step": 2036 + }, + { + "epoch": 0.4696795019598801, + "grad_norm": 1.2634192136555153, + "learning_rate": 1.819200637452408e-06, + "loss": 0.5405331254005432, + "step": 2037 + }, + { + "epoch": 0.4699100760894628, + "grad_norm": 1.3408838607377604, + "learning_rate": 1.818981921913526e-06, + "loss": 0.5565645694732666, + "step": 2038 + }, + { + "epoch": 0.47014065021904544, + "grad_norm": 1.1845986031026676, + "learning_rate": 1.818763087327321e-06, + "loss": 0.4856358468532562, + "step": 2039 + }, + { + "epoch": 0.4703712243486281, + "grad_norm": 1.1018414398540533, + "learning_rate": 1.8185441337256035e-06, + "loss": 0.5495761632919312, + "step": 2040 + }, + { + "epoch": 0.47060179847821076, + "grad_norm": 1.1792744067343253, + "learning_rate": 1.8183250611402007e-06, + "loss": 0.509435772895813, + "step": 2041 + }, + { + "epoch": 0.4708323726077934, + "grad_norm": 1.0107628293119386, + "learning_rate": 1.8181058696029564e-06, + "loss": 0.4663920998573303, + "step": 2042 + }, + { + "epoch": 0.4710629467373761, + "grad_norm": 1.5093599722992523, + "learning_rate": 1.817886559145733e-06, + "loss": 0.5976128578186035, + "step": 2043 + }, + { + "epoch": 0.47129352086695875, + "grad_norm": 1.2084791393616294, + "learning_rate": 1.817667129800409e-06, + "loss": 0.49167966842651367, + "step": 2044 + }, + { + "epoch": 0.4715240949965414, + "grad_norm": 1.1457657477052965, + "learning_rate": 1.817447581598881e-06, + "loss": 0.5889153480529785, + "step": 2045 + }, + { + "epoch": 0.47175466912612407, + "grad_norm": 1.206584712735091, + "learning_rate": 1.8172279145730622e-06, + "loss": 0.4970330595970154, + "step": 2046 + }, + { + "epoch": 0.47198524325570673, + "grad_norm": 1.1497751548880843, + "learning_rate": 1.817008128754884e-06, + "loss": 0.4840531051158905, + "step": 2047 + }, + { + "epoch": 0.4722158173852894, + "grad_norm": 1.0450687693806986, + "learning_rate": 1.816788224176294e-06, + "loss": 0.48297861218452454, + "step": 2048 + }, + { + "epoch": 0.47244639151487205, + "grad_norm": 1.184218710920589, + "learning_rate": 1.8165682008692578e-06, + "loss": 0.540350079536438, + "step": 2049 + }, + { + "epoch": 0.4726769656444547, + "grad_norm": 1.0359041945652345, + "learning_rate": 1.8163480588657578e-06, + "loss": 0.46405351161956787, + "step": 2050 + }, + { + "epoch": 0.4729075397740374, + "grad_norm": 1.1107404730922064, + "learning_rate": 1.816127798197794e-06, + "loss": 0.5175468921661377, + "step": 2051 + }, + { + "epoch": 0.47313811390362004, + "grad_norm": 1.3876726162535544, + "learning_rate": 1.8159074188973836e-06, + "loss": 0.5923771858215332, + "step": 2052 + }, + { + "epoch": 0.4733686880332027, + "grad_norm": 1.135618311389398, + "learning_rate": 1.815686920996561e-06, + "loss": 0.4999024569988251, + "step": 2053 + }, + { + "epoch": 0.47359926216278536, + "grad_norm": 1.260203747569289, + "learning_rate": 1.8154663045273775e-06, + "loss": 0.5630939602851868, + "step": 2054 + }, + { + "epoch": 0.473829836292368, + "grad_norm": 1.0446947469213006, + "learning_rate": 1.8152455695219021e-06, + "loss": 0.5505836009979248, + "step": 2055 + }, + { + "epoch": 0.4740604104219507, + "grad_norm": 1.0593378648910954, + "learning_rate": 1.8150247160122213e-06, + "loss": 0.44550588726997375, + "step": 2056 + }, + { + "epoch": 0.47429098455153335, + "grad_norm": 1.3784716647825315, + "learning_rate": 1.8148037440304375e-06, + "loss": 0.5387516021728516, + "step": 2057 + }, + { + "epoch": 0.47452155868111595, + "grad_norm": 1.2100168024707112, + "learning_rate": 1.814582653608672e-06, + "loss": 0.5941788554191589, + "step": 2058 + }, + { + "epoch": 0.4747521328106986, + "grad_norm": 1.3537451578676338, + "learning_rate": 1.8143614447790622e-06, + "loss": 0.552179217338562, + "step": 2059 + }, + { + "epoch": 0.4749827069402813, + "grad_norm": 1.4352695047482156, + "learning_rate": 1.8141401175737632e-06, + "loss": 0.4475885033607483, + "step": 2060 + }, + { + "epoch": 0.47521328106986394, + "grad_norm": 1.560782042661122, + "learning_rate": 1.813918672024947e-06, + "loss": 0.5821356773376465, + "step": 2061 + }, + { + "epoch": 0.4754438551994466, + "grad_norm": 1.0378834941031638, + "learning_rate": 1.8136971081648027e-06, + "loss": 0.4673501253128052, + "step": 2062 + }, + { + "epoch": 0.47567442932902926, + "grad_norm": 1.278556049660224, + "learning_rate": 1.8134754260255373e-06, + "loss": 0.582427978515625, + "step": 2063 + }, + { + "epoch": 0.4759050034586119, + "grad_norm": 1.050202225169388, + "learning_rate": 1.8132536256393744e-06, + "loss": 0.4494328498840332, + "step": 2064 + }, + { + "epoch": 0.4761355775881946, + "grad_norm": 1.2125688329070163, + "learning_rate": 1.8130317070385552e-06, + "loss": 0.44775205850601196, + "step": 2065 + }, + { + "epoch": 0.47636615171777724, + "grad_norm": 1.6939798990457848, + "learning_rate": 1.8128096702553372e-06, + "loss": 0.5456822514533997, + "step": 2066 + }, + { + "epoch": 0.4765967258473599, + "grad_norm": 1.3273956589633653, + "learning_rate": 1.8125875153219963e-06, + "loss": 0.46396178007125854, + "step": 2067 + }, + { + "epoch": 0.47682729997694256, + "grad_norm": 1.1515186039412058, + "learning_rate": 1.8123652422708247e-06, + "loss": 0.4479365944862366, + "step": 2068 + }, + { + "epoch": 0.4770578741065252, + "grad_norm": 1.2802069282774096, + "learning_rate": 1.8121428511341322e-06, + "loss": 0.4633978605270386, + "step": 2069 + }, + { + "epoch": 0.4772884482361079, + "grad_norm": 1.0517363876370052, + "learning_rate": 1.811920341944245e-06, + "loss": 0.5190213918685913, + "step": 2070 + }, + { + "epoch": 0.47751902236569055, + "grad_norm": 1.1502023331468956, + "learning_rate": 1.811697714733508e-06, + "loss": 0.3900855779647827, + "step": 2071 + }, + { + "epoch": 0.4777495964952732, + "grad_norm": 1.1255517906685018, + "learning_rate": 1.8114749695342816e-06, + "loss": 0.5130020380020142, + "step": 2072 + }, + { + "epoch": 0.47798017062485587, + "grad_norm": 1.181934216759251, + "learning_rate": 1.8112521063789444e-06, + "loss": 0.5279096364974976, + "step": 2073 + }, + { + "epoch": 0.47821074475443853, + "grad_norm": 1.1536132669518966, + "learning_rate": 1.8110291252998918e-06, + "loss": 0.5048732161521912, + "step": 2074 + }, + { + "epoch": 0.4784413188840212, + "grad_norm": 1.3979756779725594, + "learning_rate": 1.8108060263295362e-06, + "loss": 0.5410048365592957, + "step": 2075 + }, + { + "epoch": 0.47867189301360386, + "grad_norm": 1.2583345285712537, + "learning_rate": 1.8105828095003073e-06, + "loss": 0.5144593715667725, + "step": 2076 + }, + { + "epoch": 0.4789024671431865, + "grad_norm": 1.427505910251362, + "learning_rate": 1.810359474844652e-06, + "loss": 0.543846845626831, + "step": 2077 + }, + { + "epoch": 0.4791330412727692, + "grad_norm": 1.3389957969723305, + "learning_rate": 1.8101360223950346e-06, + "loss": 0.5628032684326172, + "step": 2078 + }, + { + "epoch": 0.47936361540235184, + "grad_norm": 1.2233623869672197, + "learning_rate": 1.8099124521839358e-06, + "loss": 0.5248516201972961, + "step": 2079 + }, + { + "epoch": 0.4795941895319345, + "grad_norm": 1.1882395736191633, + "learning_rate": 1.8096887642438537e-06, + "loss": 0.44171589612960815, + "step": 2080 + }, + { + "epoch": 0.47982476366151716, + "grad_norm": 1.1226478747483744, + "learning_rate": 1.809464958607304e-06, + "loss": 0.5003859996795654, + "step": 2081 + }, + { + "epoch": 0.4800553377910998, + "grad_norm": 1.2241972764897475, + "learning_rate": 1.8092410353068183e-06, + "loss": 0.5271269679069519, + "step": 2082 + }, + { + "epoch": 0.4802859119206825, + "grad_norm": 1.390627459359596, + "learning_rate": 1.8090169943749474e-06, + "loss": 0.5191465616226196, + "step": 2083 + }, + { + "epoch": 0.48051648605026515, + "grad_norm": 1.229186901325219, + "learning_rate": 1.8087928358442567e-06, + "loss": 0.4569256007671356, + "step": 2084 + }, + { + "epoch": 0.4807470601798478, + "grad_norm": 1.2586566204343959, + "learning_rate": 1.8085685597473307e-06, + "loss": 0.521030068397522, + "step": 2085 + }, + { + "epoch": 0.48097763430943047, + "grad_norm": 1.8616539280014968, + "learning_rate": 1.80834416611677e-06, + "loss": 0.48959439992904663, + "step": 2086 + }, + { + "epoch": 0.48120820843901313, + "grad_norm": 1.37464754051939, + "learning_rate": 1.8081196549851925e-06, + "loss": 0.6536514163017273, + "step": 2087 + }, + { + "epoch": 0.4814387825685958, + "grad_norm": 1.2292193685806807, + "learning_rate": 1.8078950263852327e-06, + "loss": 0.5746080875396729, + "step": 2088 + }, + { + "epoch": 0.48166935669817845, + "grad_norm": 1.244000490897379, + "learning_rate": 1.8076702803495437e-06, + "loss": 0.5518802404403687, + "step": 2089 + }, + { + "epoch": 0.4818999308277611, + "grad_norm": 1.0641823457217219, + "learning_rate": 1.8074454169107934e-06, + "loss": 0.49385470151901245, + "step": 2090 + }, + { + "epoch": 0.4821305049573438, + "grad_norm": 1.0197781900207734, + "learning_rate": 1.8072204361016688e-06, + "loss": 0.4488806426525116, + "step": 2091 + }, + { + "epoch": 0.48236107908692644, + "grad_norm": 1.1424753749617582, + "learning_rate": 1.8069953379548727e-06, + "loss": 0.4167511761188507, + "step": 2092 + }, + { + "epoch": 0.4825916532165091, + "grad_norm": 1.0650805504939584, + "learning_rate": 1.8067701225031258e-06, + "loss": 0.4181321859359741, + "step": 2093 + }, + { + "epoch": 0.48282222734609176, + "grad_norm": 1.4930083094447149, + "learning_rate": 1.806544789779165e-06, + "loss": 0.5257805585861206, + "step": 2094 + }, + { + "epoch": 0.4830528014756744, + "grad_norm": 1.2055270290247748, + "learning_rate": 1.806319339815745e-06, + "loss": 0.4687056541442871, + "step": 2095 + }, + { + "epoch": 0.4832833756052571, + "grad_norm": 1.4682007990950796, + "learning_rate": 1.8060937726456373e-06, + "loss": 0.48070380091667175, + "step": 2096 + }, + { + "epoch": 0.48351394973483974, + "grad_norm": 1.1555932423285984, + "learning_rate": 1.80586808830163e-06, + "loss": 0.516263484954834, + "step": 2097 + }, + { + "epoch": 0.4837445238644224, + "grad_norm": 1.1676344701764343, + "learning_rate": 1.805642286816529e-06, + "loss": 0.44018858671188354, + "step": 2098 + }, + { + "epoch": 0.48397509799400507, + "grad_norm": 1.1426045047454896, + "learning_rate": 1.8054163682231565e-06, + "loss": 0.469373881816864, + "step": 2099 + }, + { + "epoch": 0.4842056721235877, + "grad_norm": 1.2080131082183756, + "learning_rate": 1.8051903325543525e-06, + "loss": 0.4759753346443176, + "step": 2100 + }, + { + "epoch": 0.4844362462531704, + "grad_norm": 1.210070128706108, + "learning_rate": 1.804964179842973e-06, + "loss": 0.5002714395523071, + "step": 2101 + }, + { + "epoch": 0.48466682038275305, + "grad_norm": 1.5442585246670464, + "learning_rate": 1.804737910121892e-06, + "loss": 0.4869537353515625, + "step": 2102 + }, + { + "epoch": 0.4848973945123357, + "grad_norm": 1.0025531891942765, + "learning_rate": 1.804511523424e-06, + "loss": 0.4840247929096222, + "step": 2103 + }, + { + "epoch": 0.4851279686419184, + "grad_norm": 1.2125955941110753, + "learning_rate": 1.8042850197822049e-06, + "loss": 0.48390740156173706, + "step": 2104 + }, + { + "epoch": 0.48535854277150103, + "grad_norm": 1.2581816256760507, + "learning_rate": 1.8040583992294305e-06, + "loss": 0.5875431895256042, + "step": 2105 + }, + { + "epoch": 0.4855891169010837, + "grad_norm": 1.1530238586197006, + "learning_rate": 1.803831661798619e-06, + "loss": 0.4599287211894989, + "step": 2106 + }, + { + "epoch": 0.48581969103066636, + "grad_norm": 1.120967919274212, + "learning_rate": 1.803604807522729e-06, + "loss": 0.5266382694244385, + "step": 2107 + }, + { + "epoch": 0.486050265160249, + "grad_norm": 1.6402953005136756, + "learning_rate": 1.8033778364347359e-06, + "loss": 0.5592058897018433, + "step": 2108 + }, + { + "epoch": 0.4862808392898317, + "grad_norm": 1.278433491122833, + "learning_rate": 1.8031507485676324e-06, + "loss": 0.4385683834552765, + "step": 2109 + }, + { + "epoch": 0.48651141341941434, + "grad_norm": 0.9409152493815139, + "learning_rate": 1.8029235439544277e-06, + "loss": 0.4205859303474426, + "step": 2110 + }, + { + "epoch": 0.486741987548997, + "grad_norm": 1.2334271425613326, + "learning_rate": 1.8026962226281484e-06, + "loss": 0.4179378151893616, + "step": 2111 + }, + { + "epoch": 0.48697256167857966, + "grad_norm": 1.3018247329424364, + "learning_rate": 1.8024687846218382e-06, + "loss": 0.5022565126419067, + "step": 2112 + }, + { + "epoch": 0.4872031358081623, + "grad_norm": 1.092822670373115, + "learning_rate": 1.8022412299685574e-06, + "loss": 0.4591484069824219, + "step": 2113 + }, + { + "epoch": 0.487433709937745, + "grad_norm": 1.135644170855214, + "learning_rate": 1.8020135587013836e-06, + "loss": 0.44381004571914673, + "step": 2114 + }, + { + "epoch": 0.48766428406732765, + "grad_norm": 1.4882998519827229, + "learning_rate": 1.8017857708534106e-06, + "loss": 0.5418124198913574, + "step": 2115 + }, + { + "epoch": 0.4878948581969103, + "grad_norm": 1.1899076485341344, + "learning_rate": 1.80155786645775e-06, + "loss": 0.45836228132247925, + "step": 2116 + }, + { + "epoch": 0.48812543232649297, + "grad_norm": 1.0900529156655503, + "learning_rate": 1.80132984554753e-06, + "loss": 0.6028016805648804, + "step": 2117 + }, + { + "epoch": 0.48835600645607563, + "grad_norm": 1.2082046720219188, + "learning_rate": 1.8011017081558956e-06, + "loss": 0.461037814617157, + "step": 2118 + }, + { + "epoch": 0.4885865805856583, + "grad_norm": 1.2201342507223627, + "learning_rate": 1.8008734543160092e-06, + "loss": 0.45145073533058167, + "step": 2119 + }, + { + "epoch": 0.48881715471524095, + "grad_norm": 1.0786402560770025, + "learning_rate": 1.8006450840610495e-06, + "loss": 0.5074604153633118, + "step": 2120 + }, + { + "epoch": 0.4890477288448236, + "grad_norm": 1.047533414614444, + "learning_rate": 1.8004165974242124e-06, + "loss": 0.48518210649490356, + "step": 2121 + }, + { + "epoch": 0.4892783029744063, + "grad_norm": 1.3858118136014763, + "learning_rate": 1.800187994438711e-06, + "loss": 0.5427801609039307, + "step": 2122 + }, + { + "epoch": 0.48950887710398894, + "grad_norm": 1.1550068575676335, + "learning_rate": 1.799959275137775e-06, + "loss": 0.5002918839454651, + "step": 2123 + }, + { + "epoch": 0.4897394512335716, + "grad_norm": 1.1639768741422865, + "learning_rate": 1.799730439554651e-06, + "loss": 0.4417838454246521, + "step": 2124 + }, + { + "epoch": 0.48997002536315426, + "grad_norm": 1.1441558832004912, + "learning_rate": 1.7995014877226024e-06, + "loss": 0.4260700047016144, + "step": 2125 + }, + { + "epoch": 0.4902005994927369, + "grad_norm": 1.2965264900873492, + "learning_rate": 1.79927241967491e-06, + "loss": 0.5480694770812988, + "step": 2126 + }, + { + "epoch": 0.4904311736223196, + "grad_norm": 1.1303746553940783, + "learning_rate": 1.7990432354448713e-06, + "loss": 0.3911926746368408, + "step": 2127 + }, + { + "epoch": 0.49066174775190224, + "grad_norm": 1.6919718962195622, + "learning_rate": 1.7988139350657997e-06, + "loss": 0.5269262194633484, + "step": 2128 + }, + { + "epoch": 0.4908923218814849, + "grad_norm": 1.1850805062858767, + "learning_rate": 1.7985845185710272e-06, + "loss": 0.47482216358184814, + "step": 2129 + }, + { + "epoch": 0.49112289601106757, + "grad_norm": 1.1047509042558772, + "learning_rate": 1.7983549859939018e-06, + "loss": 0.5663374662399292, + "step": 2130 + }, + { + "epoch": 0.49135347014065023, + "grad_norm": 1.3067402879954033, + "learning_rate": 1.7981253373677875e-06, + "loss": 0.5322546362876892, + "step": 2131 + }, + { + "epoch": 0.4915840442702329, + "grad_norm": 1.3127111295082199, + "learning_rate": 1.797895572726067e-06, + "loss": 0.4238794445991516, + "step": 2132 + }, + { + "epoch": 0.49181461839981555, + "grad_norm": 1.3803934905983801, + "learning_rate": 1.7976656921021384e-06, + "loss": 0.49363791942596436, + "step": 2133 + }, + { + "epoch": 0.4920451925293982, + "grad_norm": 1.2075981604593182, + "learning_rate": 1.7974356955294178e-06, + "loss": 0.5079565048217773, + "step": 2134 + }, + { + "epoch": 0.4922757666589809, + "grad_norm": 1.2533809097279895, + "learning_rate": 1.7972055830413369e-06, + "loss": 0.5259063243865967, + "step": 2135 + }, + { + "epoch": 0.49250634078856353, + "grad_norm": 1.1936271771370206, + "learning_rate": 1.7969753546713448e-06, + "loss": 0.49021831154823303, + "step": 2136 + }, + { + "epoch": 0.4927369149181462, + "grad_norm": 1.1560183810694227, + "learning_rate": 1.7967450104529078e-06, + "loss": 0.49721387028694153, + "step": 2137 + }, + { + "epoch": 0.49296748904772886, + "grad_norm": 1.523657234221405, + "learning_rate": 1.796514550419509e-06, + "loss": 0.6129348278045654, + "step": 2138 + }, + { + "epoch": 0.4931980631773115, + "grad_norm": 1.245217894172975, + "learning_rate": 1.7962839746046479e-06, + "loss": 0.5034269094467163, + "step": 2139 + }, + { + "epoch": 0.4934286373068942, + "grad_norm": 1.2009412202372387, + "learning_rate": 1.7960532830418408e-06, + "loss": 0.490216463804245, + "step": 2140 + }, + { + "epoch": 0.49365921143647684, + "grad_norm": 1.3063386967377661, + "learning_rate": 1.7958224757646212e-06, + "loss": 0.5609744787216187, + "step": 2141 + }, + { + "epoch": 0.4938897855660595, + "grad_norm": 1.2989425251267097, + "learning_rate": 1.7955915528065395e-06, + "loss": 0.4438238739967346, + "step": 2142 + }, + { + "epoch": 0.49412035969564216, + "grad_norm": 1.1724755739495214, + "learning_rate": 1.7953605142011626e-06, + "loss": 0.4704767167568207, + "step": 2143 + }, + { + "epoch": 0.4943509338252248, + "grad_norm": 1.0972580275821462, + "learning_rate": 1.795129359982074e-06, + "loss": 0.44819536805152893, + "step": 2144 + }, + { + "epoch": 0.4945815079548075, + "grad_norm": 1.4390962273022694, + "learning_rate": 1.7948980901828746e-06, + "loss": 0.5311752557754517, + "step": 2145 + }, + { + "epoch": 0.49481208208439015, + "grad_norm": 1.524280309497039, + "learning_rate": 1.7946667048371818e-06, + "loss": 0.46144258975982666, + "step": 2146 + }, + { + "epoch": 0.4950426562139728, + "grad_norm": 1.719231407355215, + "learning_rate": 1.7944352039786297e-06, + "loss": 0.5973725914955139, + "step": 2147 + }, + { + "epoch": 0.49527323034355547, + "grad_norm": 1.4078850153564488, + "learning_rate": 1.7942035876408693e-06, + "loss": 0.4930835962295532, + "step": 2148 + }, + { + "epoch": 0.49550380447313813, + "grad_norm": 1.3404357985733748, + "learning_rate": 1.7939718558575685e-06, + "loss": 0.39137697219848633, + "step": 2149 + }, + { + "epoch": 0.4957343786027208, + "grad_norm": 1.364926902591579, + "learning_rate": 1.7937400086624117e-06, + "loss": 0.47618329524993896, + "step": 2150 + }, + { + "epoch": 0.49596495273230345, + "grad_norm": 1.1307446090872737, + "learning_rate": 1.7935080460891005e-06, + "loss": 0.4751483201980591, + "step": 2151 + }, + { + "epoch": 0.4961955268618861, + "grad_norm": 1.05862482163457, + "learning_rate": 1.7932759681713528e-06, + "loss": 0.4654052257537842, + "step": 2152 + }, + { + "epoch": 0.4964261009914688, + "grad_norm": 1.5078817597304273, + "learning_rate": 1.7930437749429035e-06, + "loss": 0.551579475402832, + "step": 2153 + }, + { + "epoch": 0.49665667512105144, + "grad_norm": 1.1496698915645684, + "learning_rate": 1.792811466437504e-06, + "loss": 0.4967789053916931, + "step": 2154 + }, + { + "epoch": 0.4968872492506341, + "grad_norm": 1.2983844202508301, + "learning_rate": 1.7925790426889234e-06, + "loss": 0.5826432108879089, + "step": 2155 + }, + { + "epoch": 0.49711782338021676, + "grad_norm": 1.1680445889037752, + "learning_rate": 1.792346503730946e-06, + "loss": 0.4260643720626831, + "step": 2156 + }, + { + "epoch": 0.4973483975097994, + "grad_norm": 1.287300561489553, + "learning_rate": 1.7921138495973741e-06, + "loss": 0.48679620027542114, + "step": 2157 + }, + { + "epoch": 0.4975789716393821, + "grad_norm": 1.219223301068072, + "learning_rate": 1.7918810803220266e-06, + "loss": 0.5048027634620667, + "step": 2158 + }, + { + "epoch": 0.49780954576896475, + "grad_norm": 1.3507694371861767, + "learning_rate": 1.7916481959387384e-06, + "loss": 0.5073787569999695, + "step": 2159 + }, + { + "epoch": 0.4980401198985474, + "grad_norm": 1.1692017846177098, + "learning_rate": 1.791415196481362e-06, + "loss": 0.47361671924591064, + "step": 2160 + }, + { + "epoch": 0.49827069402813007, + "grad_norm": 1.2422906508724816, + "learning_rate": 1.7911820819837659e-06, + "loss": 0.46382519602775574, + "step": 2161 + }, + { + "epoch": 0.49850126815771273, + "grad_norm": 1.2239936361904968, + "learning_rate": 1.7909488524798357e-06, + "loss": 0.5167688727378845, + "step": 2162 + }, + { + "epoch": 0.4987318422872954, + "grad_norm": 1.125831583037744, + "learning_rate": 1.7907155080034739e-06, + "loss": 0.4486730992794037, + "step": 2163 + }, + { + "epoch": 0.49896241641687805, + "grad_norm": 1.1343310195374692, + "learning_rate": 1.7904820485885991e-06, + "loss": 0.508470356464386, + "step": 2164 + }, + { + "epoch": 0.4991929905464607, + "grad_norm": 1.2928862741310794, + "learning_rate": 1.790248474269148e-06, + "loss": 0.4752856492996216, + "step": 2165 + }, + { + "epoch": 0.4994235646760434, + "grad_norm": 1.4158256008874892, + "learning_rate": 1.7900147850790713e-06, + "loss": 0.47191953659057617, + "step": 2166 + }, + { + "epoch": 0.49965413880562604, + "grad_norm": 1.2139421208311327, + "learning_rate": 1.7897809810523396e-06, + "loss": 0.48935621976852417, + "step": 2167 + }, + { + "epoch": 0.4998847129352087, + "grad_norm": 1.0547512942585364, + "learning_rate": 1.789547062222938e-06, + "loss": 0.5455219149589539, + "step": 2168 + }, + { + "epoch": 0.5001152870647914, + "grad_norm": 1.3471138253822197, + "learning_rate": 1.789313028624869e-06, + "loss": 0.5068193078041077, + "step": 2169 + }, + { + "epoch": 0.500345861194374, + "grad_norm": 1.354177516749214, + "learning_rate": 1.789078880292152e-06, + "loss": 0.5868322253227234, + "step": 2170 + }, + { + "epoch": 0.5005764353239567, + "grad_norm": 1.2474005261331733, + "learning_rate": 1.7888446172588222e-06, + "loss": 0.5132089853286743, + "step": 2171 + }, + { + "epoch": 0.5008070094535393, + "grad_norm": 1.6917901077948925, + "learning_rate": 1.788610239558933e-06, + "loss": 0.5673823356628418, + "step": 2172 + }, + { + "epoch": 0.501037583583122, + "grad_norm": 1.1902561905753382, + "learning_rate": 1.7883757472265533e-06, + "loss": 0.47085779905319214, + "step": 2173 + }, + { + "epoch": 0.5012681577127046, + "grad_norm": 1.38526914772559, + "learning_rate": 1.7881411402957685e-06, + "loss": 0.5286725163459778, + "step": 2174 + }, + { + "epoch": 0.5014987318422873, + "grad_norm": 1.1910792946448119, + "learning_rate": 1.7879064188006817e-06, + "loss": 0.5044010877609253, + "step": 2175 + }, + { + "epoch": 0.5017293059718699, + "grad_norm": 1.8451305262061892, + "learning_rate": 1.7876715827754113e-06, + "loss": 0.5329761505126953, + "step": 2176 + }, + { + "epoch": 0.5019598801014526, + "grad_norm": 1.1057498562542696, + "learning_rate": 1.7874366322540937e-06, + "loss": 0.5025275349617004, + "step": 2177 + }, + { + "epoch": 0.5021904542310353, + "grad_norm": 1.1913338911250846, + "learning_rate": 1.7872015672708814e-06, + "loss": 0.48466378450393677, + "step": 2178 + }, + { + "epoch": 0.502421028360618, + "grad_norm": 1.298497377256874, + "learning_rate": 1.7869663878599427e-06, + "loss": 0.505358099937439, + "step": 2179 + }, + { + "epoch": 0.5026516024902006, + "grad_norm": 1.3974305011742736, + "learning_rate": 1.7867310940554643e-06, + "loss": 0.4934875965118408, + "step": 2180 + }, + { + "epoch": 0.5028821766197833, + "grad_norm": 0.9670109365307766, + "learning_rate": 1.7864956858916482e-06, + "loss": 0.4726678133010864, + "step": 2181 + }, + { + "epoch": 0.5031127507493659, + "grad_norm": 1.3043022336942207, + "learning_rate": 1.786260163402713e-06, + "loss": 0.4619986414909363, + "step": 2182 + }, + { + "epoch": 0.5033433248789486, + "grad_norm": 1.17201330946801, + "learning_rate": 1.7860245266228946e-06, + "loss": 0.4483926594257355, + "step": 2183 + }, + { + "epoch": 0.5035738990085312, + "grad_norm": 1.0474549975114675, + "learning_rate": 1.7857887755864451e-06, + "loss": 0.4756368100643158, + "step": 2184 + }, + { + "epoch": 0.5038044731381139, + "grad_norm": 1.248404397964203, + "learning_rate": 1.7855529103276334e-06, + "loss": 0.5610564351081848, + "step": 2185 + }, + { + "epoch": 0.5040350472676965, + "grad_norm": 1.178944045969772, + "learning_rate": 1.7853169308807447e-06, + "loss": 0.49948322772979736, + "step": 2186 + }, + { + "epoch": 0.5042656213972793, + "grad_norm": 1.203613939490818, + "learning_rate": 1.7850808372800813e-06, + "loss": 0.5023819208145142, + "step": 2187 + }, + { + "epoch": 0.5044961955268619, + "grad_norm": 1.1738403952666703, + "learning_rate": 1.7848446295599617e-06, + "loss": 0.45893096923828125, + "step": 2188 + }, + { + "epoch": 0.5047267696564446, + "grad_norm": 1.2621327179460875, + "learning_rate": 1.7846083077547212e-06, + "loss": 0.39129459857940674, + "step": 2189 + }, + { + "epoch": 0.5049573437860272, + "grad_norm": 0.9495823494613052, + "learning_rate": 1.784371871898711e-06, + "loss": 0.42348673939704895, + "step": 2190 + }, + { + "epoch": 0.5051879179156099, + "grad_norm": 1.4438634303858584, + "learning_rate": 1.7841353220263e-06, + "loss": 0.5760704278945923, + "step": 2191 + }, + { + "epoch": 0.5054184920451925, + "grad_norm": 1.1475240268019702, + "learning_rate": 1.7838986581718731e-06, + "loss": 0.5281997323036194, + "step": 2192 + }, + { + "epoch": 0.5056490661747752, + "grad_norm": 1.3139768062702608, + "learning_rate": 1.7836618803698315e-06, + "loss": 0.543775200843811, + "step": 2193 + }, + { + "epoch": 0.5058796403043578, + "grad_norm": 1.2497491249667418, + "learning_rate": 1.7834249886545934e-06, + "loss": 0.4148549437522888, + "step": 2194 + }, + { + "epoch": 0.5061102144339406, + "grad_norm": 1.183178207015322, + "learning_rate": 1.7831879830605936e-06, + "loss": 0.5165001153945923, + "step": 2195 + }, + { + "epoch": 0.5063407885635232, + "grad_norm": 1.0854657175123028, + "learning_rate": 1.782950863622283e-06, + "loss": 0.4183283746242523, + "step": 2196 + }, + { + "epoch": 0.5065713626931059, + "grad_norm": 1.2476527930959387, + "learning_rate": 1.7827136303741292e-06, + "loss": 0.46558016538619995, + "step": 2197 + }, + { + "epoch": 0.5068019368226885, + "grad_norm": 1.2829595269176914, + "learning_rate": 1.782476283350617e-06, + "loss": 0.5491806268692017, + "step": 2198 + }, + { + "epoch": 0.5070325109522712, + "grad_norm": 1.3547672961051511, + "learning_rate": 1.7822388225862466e-06, + "loss": 0.42999008297920227, + "step": 2199 + }, + { + "epoch": 0.5072630850818538, + "grad_norm": 1.2776437457035281, + "learning_rate": 1.7820012481155358e-06, + "loss": 0.42478299140930176, + "step": 2200 + }, + { + "epoch": 0.5074936592114365, + "grad_norm": 4.51069636831696, + "learning_rate": 1.781763559973018e-06, + "loss": 0.4175076186656952, + "step": 2201 + }, + { + "epoch": 0.5077242333410191, + "grad_norm": 1.1985836355289028, + "learning_rate": 1.7815257581932439e-06, + "loss": 0.42197084426879883, + "step": 2202 + }, + { + "epoch": 0.5079548074706018, + "grad_norm": 1.2175005553032592, + "learning_rate": 1.7812878428107803e-06, + "loss": 0.39872926473617554, + "step": 2203 + }, + { + "epoch": 0.5081853816001844, + "grad_norm": 1.2908474732070376, + "learning_rate": 1.7810498138602106e-06, + "loss": 0.4572516977787018, + "step": 2204 + }, + { + "epoch": 0.5084159557297672, + "grad_norm": 1.1254873587347531, + "learning_rate": 1.780811671376135e-06, + "loss": 0.5261520147323608, + "step": 2205 + }, + { + "epoch": 0.5086465298593498, + "grad_norm": 1.8336847349223555, + "learning_rate": 1.7805734153931696e-06, + "loss": 0.4714658260345459, + "step": 2206 + }, + { + "epoch": 0.5088771039889325, + "grad_norm": 1.0757806041139168, + "learning_rate": 1.7803350459459472e-06, + "loss": 0.46184858679771423, + "step": 2207 + }, + { + "epoch": 0.5091076781185151, + "grad_norm": 1.2531712345918984, + "learning_rate": 1.7800965630691173e-06, + "loss": 0.48189157247543335, + "step": 2208 + }, + { + "epoch": 0.5093382522480978, + "grad_norm": 1.5363179586848308, + "learning_rate": 1.7798579667973463e-06, + "loss": 0.47865352034568787, + "step": 2209 + }, + { + "epoch": 0.5095688263776804, + "grad_norm": 1.1589101806191746, + "learning_rate": 1.7796192571653162e-06, + "loss": 0.46073317527770996, + "step": 2210 + }, + { + "epoch": 0.5097994005072631, + "grad_norm": 1.1781605500578527, + "learning_rate": 1.7793804342077253e-06, + "loss": 0.5099648237228394, + "step": 2211 + }, + { + "epoch": 0.5100299746368457, + "grad_norm": 1.2319682423717142, + "learning_rate": 1.7791414979592903e-06, + "loss": 0.5436147451400757, + "step": 2212 + }, + { + "epoch": 0.5102605487664285, + "grad_norm": 1.2305699349330186, + "learning_rate": 1.7789024484547417e-06, + "loss": 0.5455893278121948, + "step": 2213 + }, + { + "epoch": 0.5104911228960111, + "grad_norm": 1.2918560641722026, + "learning_rate": 1.7786632857288284e-06, + "loss": 0.4886546730995178, + "step": 2214 + }, + { + "epoch": 0.5107216970255938, + "grad_norm": 1.1611199451436964, + "learning_rate": 1.778424009816315e-06, + "loss": 0.4793723225593567, + "step": 2215 + }, + { + "epoch": 0.5109522711551764, + "grad_norm": 1.3312189289078886, + "learning_rate": 1.7781846207519826e-06, + "loss": 0.5814248323440552, + "step": 2216 + }, + { + "epoch": 0.5111828452847591, + "grad_norm": 1.1560984097631717, + "learning_rate": 1.777945118570629e-06, + "loss": 0.5057421326637268, + "step": 2217 + }, + { + "epoch": 0.5114134194143417, + "grad_norm": 1.3009634347843195, + "learning_rate": 1.7777055033070682e-06, + "loss": 0.3913435935974121, + "step": 2218 + }, + { + "epoch": 0.5116439935439244, + "grad_norm": 0.9761581598604525, + "learning_rate": 1.7774657749961305e-06, + "loss": 0.4450770616531372, + "step": 2219 + }, + { + "epoch": 0.511874567673507, + "grad_norm": 1.731999332658399, + "learning_rate": 1.7772259336726636e-06, + "loss": 0.5164940357208252, + "step": 2220 + }, + { + "epoch": 0.5121051418030897, + "grad_norm": 1.257043827333845, + "learning_rate": 1.7769859793715298e-06, + "loss": 0.44231802225112915, + "step": 2221 + }, + { + "epoch": 0.5123357159326724, + "grad_norm": 1.2521439253976214, + "learning_rate": 1.7767459121276093e-06, + "loss": 0.516791820526123, + "step": 2222 + }, + { + "epoch": 0.5125662900622551, + "grad_norm": 1.2456616904380073, + "learning_rate": 1.7765057319757989e-06, + "loss": 0.4180450737476349, + "step": 2223 + }, + { + "epoch": 0.5127968641918377, + "grad_norm": 1.1350275613249636, + "learning_rate": 1.77626543895101e-06, + "loss": 0.49246734380722046, + "step": 2224 + }, + { + "epoch": 0.5130274383214203, + "grad_norm": 1.1582721424765736, + "learning_rate": 1.7760250330881728e-06, + "loss": 0.5058225393295288, + "step": 2225 + }, + { + "epoch": 0.513258012451003, + "grad_norm": 1.4118813849041838, + "learning_rate": 1.7757845144222321e-06, + "loss": 0.4752033054828644, + "step": 2226 + }, + { + "epoch": 0.5134885865805856, + "grad_norm": 1.2950831387397626, + "learning_rate": 1.77554388298815e-06, + "loss": 0.45163947343826294, + "step": 2227 + }, + { + "epoch": 0.5137191607101683, + "grad_norm": 1.387042973653302, + "learning_rate": 1.7753031388209044e-06, + "loss": 0.46295779943466187, + "step": 2228 + }, + { + "epoch": 0.5139497348397509, + "grad_norm": 1.2958875463664286, + "learning_rate": 1.7750622819554903e-06, + "loss": 0.5682947635650635, + "step": 2229 + }, + { + "epoch": 0.5141803089693336, + "grad_norm": 1.353052791820573, + "learning_rate": 1.7748213124269187e-06, + "loss": 0.4890878200531006, + "step": 2230 + }, + { + "epoch": 0.5144108830989162, + "grad_norm": 1.4612536503294715, + "learning_rate": 1.7745802302702164e-06, + "loss": 0.5952332615852356, + "step": 2231 + }, + { + "epoch": 0.514641457228499, + "grad_norm": 1.1928368431775584, + "learning_rate": 1.7743390355204278e-06, + "loss": 0.43224406242370605, + "step": 2232 + }, + { + "epoch": 0.5148720313580816, + "grad_norm": 1.1851533508030387, + "learning_rate": 1.7740977282126122e-06, + "loss": 0.5010303258895874, + "step": 2233 + }, + { + "epoch": 0.5151026054876643, + "grad_norm": 1.105983766082305, + "learning_rate": 1.7738563083818469e-06, + "loss": 0.5166633725166321, + "step": 2234 + }, + { + "epoch": 0.5153331796172469, + "grad_norm": 1.0533784617555741, + "learning_rate": 1.7736147760632245e-06, + "loss": 0.4748263359069824, + "step": 2235 + }, + { + "epoch": 0.5155637537468296, + "grad_norm": 0.9010011595528595, + "learning_rate": 1.773373131291854e-06, + "loss": 0.46462053060531616, + "step": 2236 + }, + { + "epoch": 0.5157943278764122, + "grad_norm": 1.1288843437350349, + "learning_rate": 1.7731313741028608e-06, + "loss": 0.47799748182296753, + "step": 2237 + }, + { + "epoch": 0.5160249020059949, + "grad_norm": 1.2958124494051022, + "learning_rate": 1.772889504531387e-06, + "loss": 0.43448662757873535, + "step": 2238 + }, + { + "epoch": 0.5162554761355775, + "grad_norm": 1.2781442130344307, + "learning_rate": 1.7726475226125905e-06, + "loss": 0.4609360098838806, + "step": 2239 + }, + { + "epoch": 0.5164860502651603, + "grad_norm": 1.123946418980165, + "learning_rate": 1.7724054283816463e-06, + "loss": 0.505261242389679, + "step": 2240 + }, + { + "epoch": 0.5167166243947429, + "grad_norm": 1.1143888709548355, + "learning_rate": 1.772163221873745e-06, + "loss": 0.3812851905822754, + "step": 2241 + }, + { + "epoch": 0.5169471985243256, + "grad_norm": 1.1698544335678498, + "learning_rate": 1.7719209031240938e-06, + "loss": 0.42545294761657715, + "step": 2242 + }, + { + "epoch": 0.5171777726539082, + "grad_norm": 1.3964979839005025, + "learning_rate": 1.771678472167916e-06, + "loss": 0.45135340094566345, + "step": 2243 + }, + { + "epoch": 0.5174083467834909, + "grad_norm": 1.1118819857040387, + "learning_rate": 1.7714359290404514e-06, + "loss": 0.4499250650405884, + "step": 2244 + }, + { + "epoch": 0.5176389209130735, + "grad_norm": 1.2793420965554383, + "learning_rate": 1.7711932737769564e-06, + "loss": 0.4355557858943939, + "step": 2245 + }, + { + "epoch": 0.5178694950426562, + "grad_norm": 1.3068878220482505, + "learning_rate": 1.7709505064127036e-06, + "loss": 0.4140744209289551, + "step": 2246 + }, + { + "epoch": 0.5181000691722388, + "grad_norm": 1.2538619837975196, + "learning_rate": 1.7707076269829809e-06, + "loss": 0.5108504891395569, + "step": 2247 + }, + { + "epoch": 0.5183306433018215, + "grad_norm": 1.0866593797381727, + "learning_rate": 1.7704646355230936e-06, + "loss": 0.5064615607261658, + "step": 2248 + }, + { + "epoch": 0.5185612174314042, + "grad_norm": 1.4034267264652582, + "learning_rate": 1.7702215320683636e-06, + "loss": 0.5922794342041016, + "step": 2249 + }, + { + "epoch": 0.5187917915609869, + "grad_norm": 1.236045367714828, + "learning_rate": 1.7699783166541279e-06, + "loss": 0.3890082836151123, + "step": 2250 + }, + { + "epoch": 0.5190223656905695, + "grad_norm": 1.1663861833023768, + "learning_rate": 1.7697349893157402e-06, + "loss": 0.5585668087005615, + "step": 2251 + }, + { + "epoch": 0.5192529398201522, + "grad_norm": 1.2125542528327162, + "learning_rate": 1.7694915500885706e-06, + "loss": 0.3904608488082886, + "step": 2252 + }, + { + "epoch": 0.5194835139497348, + "grad_norm": 1.3213509465151734, + "learning_rate": 1.7692479990080056e-06, + "loss": 0.4764491617679596, + "step": 2253 + }, + { + "epoch": 0.5197140880793175, + "grad_norm": 1.3113796870909902, + "learning_rate": 1.769004336109448e-06, + "loss": 0.49443554878234863, + "step": 2254 + }, + { + "epoch": 0.5199446622089001, + "grad_norm": 1.2196571448758133, + "learning_rate": 1.7687605614283165e-06, + "loss": 0.4679003357887268, + "step": 2255 + }, + { + "epoch": 0.5201752363384828, + "grad_norm": 1.6767016497784393, + "learning_rate": 1.7685166750000465e-06, + "loss": 0.6968683004379272, + "step": 2256 + }, + { + "epoch": 0.5204058104680654, + "grad_norm": 1.406455012631932, + "learning_rate": 1.7682726768600888e-06, + "loss": 0.5688217878341675, + "step": 2257 + }, + { + "epoch": 0.5206363845976482, + "grad_norm": 1.176050025614157, + "learning_rate": 1.7680285670439115e-06, + "loss": 0.4688011705875397, + "step": 2258 + }, + { + "epoch": 0.5208669587272308, + "grad_norm": 1.1772680288415673, + "learning_rate": 1.7677843455869984e-06, + "loss": 0.6447713971138, + "step": 2259 + }, + { + "epoch": 0.5210975328568135, + "grad_norm": 1.3187686937196665, + "learning_rate": 1.767540012524849e-06, + "loss": 0.578650951385498, + "step": 2260 + }, + { + "epoch": 0.5213281069863961, + "grad_norm": 1.4425748519700892, + "learning_rate": 1.76729556789298e-06, + "loss": 0.5001357197761536, + "step": 2261 + }, + { + "epoch": 0.5215586811159788, + "grad_norm": 1.2145912604177214, + "learning_rate": 1.7670510117269242e-06, + "loss": 0.5336331129074097, + "step": 2262 + }, + { + "epoch": 0.5217892552455614, + "grad_norm": 1.2105621787494676, + "learning_rate": 1.76680634406223e-06, + "loss": 0.5628900527954102, + "step": 2263 + }, + { + "epoch": 0.5220198293751441, + "grad_norm": 1.2476030455409495, + "learning_rate": 1.766561564934462e-06, + "loss": 0.46497443318367004, + "step": 2264 + }, + { + "epoch": 0.5222504035047267, + "grad_norm": 1.4921989012106511, + "learning_rate": 1.7663166743792019e-06, + "loss": 0.617607831954956, + "step": 2265 + }, + { + "epoch": 0.5224809776343095, + "grad_norm": 1.1582259137476871, + "learning_rate": 1.7660716724320468e-06, + "loss": 0.5236914157867432, + "step": 2266 + }, + { + "epoch": 0.5227115517638921, + "grad_norm": 1.2919028654437321, + "learning_rate": 1.76582655912861e-06, + "loss": 0.5527941584587097, + "step": 2267 + }, + { + "epoch": 0.5229421258934748, + "grad_norm": 1.208274388494889, + "learning_rate": 1.7655813345045218e-06, + "loss": 0.5394654273986816, + "step": 2268 + }, + { + "epoch": 0.5231727000230574, + "grad_norm": 1.1822216818330542, + "learning_rate": 1.7653359985954275e-06, + "loss": 0.47050246596336365, + "step": 2269 + }, + { + "epoch": 0.5234032741526401, + "grad_norm": 1.2893306401147882, + "learning_rate": 1.7650905514369894e-06, + "loss": 0.49413689970970154, + "step": 2270 + }, + { + "epoch": 0.5236338482822227, + "grad_norm": 1.3086960549802995, + "learning_rate": 1.7648449930648856e-06, + "loss": 0.5568829774856567, + "step": 2271 + }, + { + "epoch": 0.5238644224118054, + "grad_norm": 1.2475799557753502, + "learning_rate": 1.7645993235148107e-06, + "loss": 0.49238815903663635, + "step": 2272 + }, + { + "epoch": 0.524094996541388, + "grad_norm": 1.16612817534413, + "learning_rate": 1.7643535428224752e-06, + "loss": 0.5580959320068359, + "step": 2273 + }, + { + "epoch": 0.5243255706709707, + "grad_norm": 1.4921637909191205, + "learning_rate": 1.7641076510236052e-06, + "loss": 0.5853499174118042, + "step": 2274 + }, + { + "epoch": 0.5245561448005533, + "grad_norm": 1.3988944269011947, + "learning_rate": 1.7638616481539448e-06, + "loss": 0.5638653635978699, + "step": 2275 + }, + { + "epoch": 0.5247867189301361, + "grad_norm": 1.2859178438597552, + "learning_rate": 1.7636155342492521e-06, + "loss": 0.5197241306304932, + "step": 2276 + }, + { + "epoch": 0.5250172930597187, + "grad_norm": 1.1094174928372944, + "learning_rate": 1.7633693093453026e-06, + "loss": 0.4137725234031677, + "step": 2277 + }, + { + "epoch": 0.5252478671893014, + "grad_norm": 1.2940062745509122, + "learning_rate": 1.7631229734778872e-06, + "loss": 0.54244065284729, + "step": 2278 + }, + { + "epoch": 0.525478441318884, + "grad_norm": 1.1871875469955007, + "learning_rate": 1.7628765266828137e-06, + "loss": 0.5215432047843933, + "step": 2279 + }, + { + "epoch": 0.5257090154484667, + "grad_norm": 1.1984410258580116, + "learning_rate": 1.7626299689959057e-06, + "loss": 0.5559565424919128, + "step": 2280 + }, + { + "epoch": 0.5259395895780493, + "grad_norm": 1.1663711332671047, + "learning_rate": 1.7623833004530026e-06, + "loss": 0.5251328945159912, + "step": 2281 + }, + { + "epoch": 0.526170163707632, + "grad_norm": 1.241523894329925, + "learning_rate": 1.7621365210899598e-06, + "loss": 0.5351072549819946, + "step": 2282 + }, + { + "epoch": 0.5264007378372146, + "grad_norm": 1.1901641374825476, + "learning_rate": 1.7618896309426504e-06, + "loss": 0.46850037574768066, + "step": 2283 + }, + { + "epoch": 0.5266313119667974, + "grad_norm": 1.1697893294442419, + "learning_rate": 1.761642630046961e-06, + "loss": 0.5001033544540405, + "step": 2284 + }, + { + "epoch": 0.52686188609638, + "grad_norm": 0.9279299862604019, + "learning_rate": 1.7613955184387968e-06, + "loss": 0.47946250438690186, + "step": 2285 + }, + { + "epoch": 0.5270924602259627, + "grad_norm": 1.0539631796672029, + "learning_rate": 1.761148296154077e-06, + "loss": 0.4743049144744873, + "step": 2286 + }, + { + "epoch": 0.5273230343555453, + "grad_norm": 1.154224335020326, + "learning_rate": 1.7609009632287389e-06, + "loss": 0.4518652558326721, + "step": 2287 + }, + { + "epoch": 0.527553608485128, + "grad_norm": 1.0859896497705106, + "learning_rate": 1.7606535196987338e-06, + "loss": 0.5021224617958069, + "step": 2288 + }, + { + "epoch": 0.5277841826147106, + "grad_norm": 1.4832483769951506, + "learning_rate": 1.760405965600031e-06, + "loss": 0.4848078489303589, + "step": 2289 + }, + { + "epoch": 0.5280147567442933, + "grad_norm": 1.22421773905119, + "learning_rate": 1.7601583009686142e-06, + "loss": 0.49077051877975464, + "step": 2290 + }, + { + "epoch": 0.5282453308738759, + "grad_norm": 1.2916718452438969, + "learning_rate": 1.7599105258404848e-06, + "loss": 0.4802943468093872, + "step": 2291 + }, + { + "epoch": 0.5284759050034586, + "grad_norm": 1.4055248895326071, + "learning_rate": 1.7596626402516589e-06, + "loss": 0.5397455096244812, + "step": 2292 + }, + { + "epoch": 0.5287064791330413, + "grad_norm": 1.0497017336135974, + "learning_rate": 1.759414644238169e-06, + "loss": 0.478559672832489, + "step": 2293 + }, + { + "epoch": 0.528937053262624, + "grad_norm": 1.112359888255478, + "learning_rate": 1.7591665378360644e-06, + "loss": 0.5080797672271729, + "step": 2294 + }, + { + "epoch": 0.5291676273922066, + "grad_norm": 1.0468621326779766, + "learning_rate": 1.7589183210814093e-06, + "loss": 0.4959479868412018, + "step": 2295 + }, + { + "epoch": 0.5293982015217893, + "grad_norm": 1.1985868339045591, + "learning_rate": 1.7586699940102853e-06, + "loss": 0.512288510799408, + "step": 2296 + }, + { + "epoch": 0.5296287756513719, + "grad_norm": 1.1129893572343195, + "learning_rate": 1.7584215566587886e-06, + "loss": 0.525113046169281, + "step": 2297 + }, + { + "epoch": 0.5298593497809546, + "grad_norm": 1.2088844531850982, + "learning_rate": 1.7581730090630322e-06, + "loss": 0.3715069890022278, + "step": 2298 + }, + { + "epoch": 0.5300899239105372, + "grad_norm": 1.3852845244524983, + "learning_rate": 1.757924351259145e-06, + "loss": 0.5833072662353516, + "step": 2299 + }, + { + "epoch": 0.5303204980401199, + "grad_norm": 1.638098016270419, + "learning_rate": 1.7576755832832721e-06, + "loss": 0.5942450761795044, + "step": 2300 + }, + { + "epoch": 0.5305510721697025, + "grad_norm": 1.1523961468173722, + "learning_rate": 1.7574267051715745e-06, + "loss": 0.4754432737827301, + "step": 2301 + }, + { + "epoch": 0.5307816462992853, + "grad_norm": 1.3593694553922624, + "learning_rate": 1.7571777169602287e-06, + "loss": 0.5272700190544128, + "step": 2302 + }, + { + "epoch": 0.5310122204288679, + "grad_norm": 1.137089307163323, + "learning_rate": 1.7569286186854283e-06, + "loss": 0.48376554250717163, + "step": 2303 + }, + { + "epoch": 0.5312427945584506, + "grad_norm": 1.324023805933818, + "learning_rate": 1.7566794103833816e-06, + "loss": 0.4324077367782593, + "step": 2304 + }, + { + "epoch": 0.5314733686880332, + "grad_norm": 1.2843168925212602, + "learning_rate": 1.7564300920903142e-06, + "loss": 0.44939202070236206, + "step": 2305 + }, + { + "epoch": 0.5317039428176159, + "grad_norm": 1.2413807013846574, + "learning_rate": 1.7561806638424662e-06, + "loss": 0.5256277322769165, + "step": 2306 + }, + { + "epoch": 0.5319345169471985, + "grad_norm": 1.0855894350628046, + "learning_rate": 1.7559311256760955e-06, + "loss": 0.43901991844177246, + "step": 2307 + }, + { + "epoch": 0.5321650910767812, + "grad_norm": 1.3134089338347328, + "learning_rate": 1.7556814776274746e-06, + "loss": 0.5256138443946838, + "step": 2308 + }, + { + "epoch": 0.5323956652063638, + "grad_norm": 1.3769537654510517, + "learning_rate": 1.7554317197328922e-06, + "loss": 0.4664478600025177, + "step": 2309 + }, + { + "epoch": 0.5326262393359465, + "grad_norm": 1.1227476903728313, + "learning_rate": 1.7551818520286532e-06, + "loss": 0.5042726397514343, + "step": 2310 + }, + { + "epoch": 0.5328568134655292, + "grad_norm": 1.3417267355052607, + "learning_rate": 1.754931874551079e-06, + "loss": 0.5682350397109985, + "step": 2311 + }, + { + "epoch": 0.5330873875951119, + "grad_norm": 1.2416043105842551, + "learning_rate": 1.754681787336505e-06, + "loss": 0.5082807540893555, + "step": 2312 + }, + { + "epoch": 0.5333179617246945, + "grad_norm": 1.4255568276367208, + "learning_rate": 1.754431590421285e-06, + "loss": 0.6020215749740601, + "step": 2313 + }, + { + "epoch": 0.5335485358542772, + "grad_norm": 1.4104154799235167, + "learning_rate": 1.7541812838417877e-06, + "loss": 0.5004276633262634, + "step": 2314 + }, + { + "epoch": 0.5337791099838598, + "grad_norm": 1.060415170291065, + "learning_rate": 1.753930867634397e-06, + "loss": 0.4889993667602539, + "step": 2315 + }, + { + "epoch": 0.5340096841134425, + "grad_norm": 1.0849217066026469, + "learning_rate": 1.7536803418355141e-06, + "loss": 0.4179444909095764, + "step": 2316 + }, + { + "epoch": 0.5342402582430251, + "grad_norm": 1.2618059778728548, + "learning_rate": 1.7534297064815554e-06, + "loss": 0.46807605028152466, + "step": 2317 + }, + { + "epoch": 0.5344708323726078, + "grad_norm": 1.2827117317411258, + "learning_rate": 1.7531789616089528e-06, + "loss": 0.39173221588134766, + "step": 2318 + }, + { + "epoch": 0.5347014065021904, + "grad_norm": 1.2820357654319097, + "learning_rate": 1.7529281072541548e-06, + "loss": 0.4290514886379242, + "step": 2319 + }, + { + "epoch": 0.5349319806317732, + "grad_norm": 1.3778694052072273, + "learning_rate": 1.752677143453626e-06, + "loss": 0.6052347421646118, + "step": 2320 + }, + { + "epoch": 0.5351625547613558, + "grad_norm": 1.054542888313722, + "learning_rate": 1.752426070243846e-06, + "loss": 0.47622209787368774, + "step": 2321 + }, + { + "epoch": 0.5353931288909385, + "grad_norm": 1.128157779747108, + "learning_rate": 1.7521748876613112e-06, + "loss": 0.4216923415660858, + "step": 2322 + }, + { + "epoch": 0.5356237030205211, + "grad_norm": 2.0737049391078384, + "learning_rate": 1.751923595742533e-06, + "loss": 0.5527430772781372, + "step": 2323 + }, + { + "epoch": 0.5358542771501038, + "grad_norm": 1.1406433043117166, + "learning_rate": 1.75167219452404e-06, + "loss": 0.5562101602554321, + "step": 2324 + }, + { + "epoch": 0.5360848512796864, + "grad_norm": 1.2183539446117024, + "learning_rate": 1.7514206840423757e-06, + "loss": 0.546181321144104, + "step": 2325 + }, + { + "epoch": 0.5363154254092691, + "grad_norm": 1.5216852196360238, + "learning_rate": 1.7511690643340995e-06, + "loss": 0.5883532762527466, + "step": 2326 + }, + { + "epoch": 0.5365459995388517, + "grad_norm": 1.2667138111118152, + "learning_rate": 1.750917335435787e-06, + "loss": 0.5231350660324097, + "step": 2327 + }, + { + "epoch": 0.5367765736684345, + "grad_norm": 1.200525241411545, + "learning_rate": 1.7506654973840292e-06, + "loss": 0.4846429228782654, + "step": 2328 + }, + { + "epoch": 0.5370071477980171, + "grad_norm": 1.0815584734915895, + "learning_rate": 1.7504135502154335e-06, + "loss": 0.43692171573638916, + "step": 2329 + }, + { + "epoch": 0.5372377219275998, + "grad_norm": 1.0658062374834336, + "learning_rate": 1.7501614939666234e-06, + "loss": 0.5076167583465576, + "step": 2330 + }, + { + "epoch": 0.5374682960571824, + "grad_norm": 1.2658937157989252, + "learning_rate": 1.7499093286742373e-06, + "loss": 0.5302891135215759, + "step": 2331 + }, + { + "epoch": 0.5376988701867651, + "grad_norm": 1.3200406937261826, + "learning_rate": 1.7496570543749303e-06, + "loss": 0.5827817916870117, + "step": 2332 + }, + { + "epoch": 0.5379294443163477, + "grad_norm": 1.3684047155196064, + "learning_rate": 1.7494046711053726e-06, + "loss": 0.6765470504760742, + "step": 2333 + }, + { + "epoch": 0.5381600184459304, + "grad_norm": 1.3001315312834418, + "learning_rate": 1.7491521789022513e-06, + "loss": 0.48666322231292725, + "step": 2334 + }, + { + "epoch": 0.538390592575513, + "grad_norm": 1.0490910849362622, + "learning_rate": 1.7488995778022685e-06, + "loss": 0.5163695812225342, + "step": 2335 + }, + { + "epoch": 0.5386211667050956, + "grad_norm": 1.1765286879203154, + "learning_rate": 1.748646867842142e-06, + "loss": 0.44487982988357544, + "step": 2336 + }, + { + "epoch": 0.5388517408346783, + "grad_norm": 1.2992285046307706, + "learning_rate": 1.7483940490586058e-06, + "loss": 0.5512663722038269, + "step": 2337 + }, + { + "epoch": 0.539082314964261, + "grad_norm": 1.1533551829707172, + "learning_rate": 1.7481411214884098e-06, + "loss": 0.461128294467926, + "step": 2338 + }, + { + "epoch": 0.5393128890938437, + "grad_norm": 1.2239639921661383, + "learning_rate": 1.7478880851683197e-06, + "loss": 0.47291088104248047, + "step": 2339 + }, + { + "epoch": 0.5395434632234263, + "grad_norm": 1.1568837363453548, + "learning_rate": 1.747634940135117e-06, + "loss": 0.5900166034698486, + "step": 2340 + }, + { + "epoch": 0.539774037353009, + "grad_norm": 1.0385421801821113, + "learning_rate": 1.7473816864255983e-06, + "loss": 0.3878340721130371, + "step": 2341 + }, + { + "epoch": 0.5400046114825916, + "grad_norm": 1.442772155197814, + "learning_rate": 1.7471283240765775e-06, + "loss": 0.5671564340591431, + "step": 2342 + }, + { + "epoch": 0.5402351856121743, + "grad_norm": 1.1602673867587185, + "learning_rate": 1.7468748531248824e-06, + "loss": 0.5153918266296387, + "step": 2343 + }, + { + "epoch": 0.5404657597417569, + "grad_norm": 1.2187996046056446, + "learning_rate": 1.7466212736073585e-06, + "loss": 0.49520084261894226, + "step": 2344 + }, + { + "epoch": 0.5406963338713396, + "grad_norm": 1.0955374839449357, + "learning_rate": 1.7463675855608654e-06, + "loss": 0.4884970784187317, + "step": 2345 + }, + { + "epoch": 0.5409269080009222, + "grad_norm": 1.401002336922335, + "learning_rate": 1.7461137890222798e-06, + "loss": 0.5233277678489685, + "step": 2346 + }, + { + "epoch": 0.541157482130505, + "grad_norm": 1.272363275240415, + "learning_rate": 1.7458598840284928e-06, + "loss": 0.44011372327804565, + "step": 2347 + }, + { + "epoch": 0.5413880562600876, + "grad_norm": 1.1593134205382656, + "learning_rate": 1.745605870616413e-06, + "loss": 0.4833263158798218, + "step": 2348 + }, + { + "epoch": 0.5416186303896703, + "grad_norm": 1.186578949511732, + "learning_rate": 1.7453517488229634e-06, + "loss": 0.4852379262447357, + "step": 2349 + }, + { + "epoch": 0.5418492045192529, + "grad_norm": 1.527590855990685, + "learning_rate": 1.7450975186850831e-06, + "loss": 0.4710320830345154, + "step": 2350 + }, + { + "epoch": 0.5420797786488356, + "grad_norm": 1.4382691899722804, + "learning_rate": 1.744843180239727e-06, + "loss": 0.5144790410995483, + "step": 2351 + }, + { + "epoch": 0.5423103527784182, + "grad_norm": 1.3784898997392558, + "learning_rate": 1.7445887335238663e-06, + "loss": 0.5815445184707642, + "step": 2352 + }, + { + "epoch": 0.5425409269080009, + "grad_norm": 1.1629274836022288, + "learning_rate": 1.7443341785744864e-06, + "loss": 0.5101407170295715, + "step": 2353 + }, + { + "epoch": 0.5427715010375835, + "grad_norm": 1.1760272227987194, + "learning_rate": 1.7440795154285905e-06, + "loss": 0.4584839940071106, + "step": 2354 + }, + { + "epoch": 0.5430020751671663, + "grad_norm": 1.323122873632264, + "learning_rate": 1.743824744123196e-06, + "loss": 0.482247531414032, + "step": 2355 + }, + { + "epoch": 0.5432326492967489, + "grad_norm": 1.1361176263052393, + "learning_rate": 1.7435698646953364e-06, + "loss": 0.5503325462341309, + "step": 2356 + }, + { + "epoch": 0.5434632234263316, + "grad_norm": 1.2952580221197654, + "learning_rate": 1.7433148771820612e-06, + "loss": 0.4803489148616791, + "step": 2357 + }, + { + "epoch": 0.5436937975559142, + "grad_norm": 1.303291620807208, + "learning_rate": 1.7430597816204351e-06, + "loss": 0.5388872027397156, + "step": 2358 + }, + { + "epoch": 0.5439243716854969, + "grad_norm": 1.6209081192397237, + "learning_rate": 1.742804578047539e-06, + "loss": 0.512636125087738, + "step": 2359 + }, + { + "epoch": 0.5441549458150795, + "grad_norm": 1.5943501598581358, + "learning_rate": 1.7425492665004699e-06, + "loss": 0.49154865741729736, + "step": 2360 + }, + { + "epoch": 0.5443855199446622, + "grad_norm": 1.1498651594774036, + "learning_rate": 1.7422938470163389e-06, + "loss": 0.5185250639915466, + "step": 2361 + }, + { + "epoch": 0.5446160940742448, + "grad_norm": 1.5663688017502957, + "learning_rate": 1.7420383196322747e-06, + "loss": 0.5474511384963989, + "step": 2362 + }, + { + "epoch": 0.5448466682038275, + "grad_norm": 1.3465441719791955, + "learning_rate": 1.7417826843854202e-06, + "loss": 0.48212137818336487, + "step": 2363 + }, + { + "epoch": 0.5450772423334102, + "grad_norm": 1.1320785808666363, + "learning_rate": 1.7415269413129348e-06, + "loss": 0.47983086109161377, + "step": 2364 + }, + { + "epoch": 0.5453078164629929, + "grad_norm": 1.1314426678618292, + "learning_rate": 1.7412710904519932e-06, + "loss": 0.4935225546360016, + "step": 2365 + }, + { + "epoch": 0.5455383905925755, + "grad_norm": 1.2528535153373956, + "learning_rate": 1.7410151318397862e-06, + "loss": 0.5167664289474487, + "step": 2366 + }, + { + "epoch": 0.5457689647221582, + "grad_norm": 1.1782327982922274, + "learning_rate": 1.74075906551352e-06, + "loss": 0.5116056799888611, + "step": 2367 + }, + { + "epoch": 0.5459995388517408, + "grad_norm": 1.1184728717072068, + "learning_rate": 1.7405028915104158e-06, + "loss": 0.4709595739841461, + "step": 2368 + }, + { + "epoch": 0.5462301129813235, + "grad_norm": 1.560534410686712, + "learning_rate": 1.7402466098677118e-06, + "loss": 0.3989061117172241, + "step": 2369 + }, + { + "epoch": 0.5464606871109061, + "grad_norm": 1.1397817693321244, + "learning_rate": 1.739990220622661e-06, + "loss": 0.45720764994621277, + "step": 2370 + }, + { + "epoch": 0.5466912612404888, + "grad_norm": 1.6154705847610804, + "learning_rate": 1.739733723812532e-06, + "loss": 0.5865384936332703, + "step": 2371 + }, + { + "epoch": 0.5469218353700714, + "grad_norm": 1.3129437136284077, + "learning_rate": 1.7394771194746092e-06, + "loss": 0.4451501965522766, + "step": 2372 + }, + { + "epoch": 0.5471524094996542, + "grad_norm": 1.2213938230584949, + "learning_rate": 1.7392204076461928e-06, + "loss": 0.4628486633300781, + "step": 2373 + }, + { + "epoch": 0.5473829836292368, + "grad_norm": 1.2854198948482758, + "learning_rate": 1.7389635883645984e-06, + "loss": 0.4797760248184204, + "step": 2374 + }, + { + "epoch": 0.5476135577588195, + "grad_norm": 1.2890601616689177, + "learning_rate": 1.7387066616671571e-06, + "loss": 0.4716770648956299, + "step": 2375 + }, + { + "epoch": 0.5478441318884021, + "grad_norm": 1.071991179643841, + "learning_rate": 1.738449627591216e-06, + "loss": 0.504901647567749, + "step": 2376 + }, + { + "epoch": 0.5480747060179848, + "grad_norm": 1.259141194312177, + "learning_rate": 1.7381924861741375e-06, + "loss": 0.5248615145683289, + "step": 2377 + }, + { + "epoch": 0.5483052801475674, + "grad_norm": 1.1551298194401718, + "learning_rate": 1.7379352374532998e-06, + "loss": 0.41704076528549194, + "step": 2378 + }, + { + "epoch": 0.5485358542771501, + "grad_norm": 1.1093382819710802, + "learning_rate": 1.7376778814660966e-06, + "loss": 0.42278197407722473, + "step": 2379 + }, + { + "epoch": 0.5487664284067327, + "grad_norm": 1.3240414194175114, + "learning_rate": 1.7374204182499372e-06, + "loss": 0.4104729890823364, + "step": 2380 + }, + { + "epoch": 0.5489970025363154, + "grad_norm": 1.237574436817826, + "learning_rate": 1.7371628478422467e-06, + "loss": 0.5205684304237366, + "step": 2381 + }, + { + "epoch": 0.549227576665898, + "grad_norm": 1.2914374831424469, + "learning_rate": 1.7369051702804648e-06, + "loss": 0.4743306040763855, + "step": 2382 + }, + { + "epoch": 0.5494581507954808, + "grad_norm": 1.4263628155545096, + "learning_rate": 1.7366473856020486e-06, + "loss": 0.6324253678321838, + "step": 2383 + }, + { + "epoch": 0.5496887249250634, + "grad_norm": 1.2093119037905458, + "learning_rate": 1.736389493844469e-06, + "loss": 0.46466588973999023, + "step": 2384 + }, + { + "epoch": 0.5499192990546461, + "grad_norm": 1.257464863029373, + "learning_rate": 1.7361314950452136e-06, + "loss": 0.4117918014526367, + "step": 2385 + }, + { + "epoch": 0.5501498731842287, + "grad_norm": 1.0582357147304537, + "learning_rate": 1.7358733892417848e-06, + "loss": 0.40341615676879883, + "step": 2386 + }, + { + "epoch": 0.5503804473138114, + "grad_norm": 1.2083128590610215, + "learning_rate": 1.735615176471701e-06, + "loss": 0.642855167388916, + "step": 2387 + }, + { + "epoch": 0.550611021443394, + "grad_norm": 1.3821025749968947, + "learning_rate": 1.7353568567724959e-06, + "loss": 0.5490958094596863, + "step": 2388 + }, + { + "epoch": 0.5508415955729767, + "grad_norm": 1.0972882559163057, + "learning_rate": 1.7350984301817192e-06, + "loss": 0.5154834985733032, + "step": 2389 + }, + { + "epoch": 0.5510721697025593, + "grad_norm": 1.5156914347306212, + "learning_rate": 1.7348398967369358e-06, + "loss": 0.49488651752471924, + "step": 2390 + }, + { + "epoch": 0.5513027438321421, + "grad_norm": 1.097164324799634, + "learning_rate": 1.7345812564757257e-06, + "loss": 0.4211215674877167, + "step": 2391 + }, + { + "epoch": 0.5515333179617247, + "grad_norm": 1.1060429845011046, + "learning_rate": 1.7343225094356855e-06, + "loss": 0.41840964555740356, + "step": 2392 + }, + { + "epoch": 0.5517638920913074, + "grad_norm": 1.1213399734290006, + "learning_rate": 1.7340636556544264e-06, + "loss": 0.540780782699585, + "step": 2393 + }, + { + "epoch": 0.55199446622089, + "grad_norm": 1.328334535307567, + "learning_rate": 1.7338046951695754e-06, + "loss": 0.4967775046825409, + "step": 2394 + }, + { + "epoch": 0.5522250403504727, + "grad_norm": 1.337457775660936, + "learning_rate": 1.733545628018775e-06, + "loss": 0.5155577659606934, + "step": 2395 + }, + { + "epoch": 0.5524556144800553, + "grad_norm": 1.3409169497631646, + "learning_rate": 1.7332864542396832e-06, + "loss": 0.5106005072593689, + "step": 2396 + }, + { + "epoch": 0.552686188609638, + "grad_norm": 1.106469342539302, + "learning_rate": 1.7330271738699737e-06, + "loss": 0.3459712862968445, + "step": 2397 + }, + { + "epoch": 0.5529167627392206, + "grad_norm": 1.238811250755909, + "learning_rate": 1.7327677869473356e-06, + "loss": 0.4877927303314209, + "step": 2398 + }, + { + "epoch": 0.5531473368688034, + "grad_norm": 1.298959309949219, + "learning_rate": 1.7325082935094732e-06, + "loss": 0.5183857679367065, + "step": 2399 + }, + { + "epoch": 0.553377910998386, + "grad_norm": 1.1165163437308863, + "learning_rate": 1.7322486935941068e-06, + "loss": 0.4326491057872772, + "step": 2400 + }, + { + "epoch": 0.5536084851279687, + "grad_norm": 1.2472729786065346, + "learning_rate": 1.7319889872389716e-06, + "loss": 0.4688712954521179, + "step": 2401 + }, + { + "epoch": 0.5538390592575513, + "grad_norm": 1.2787851295656323, + "learning_rate": 1.7317291744818184e-06, + "loss": 0.4997788071632385, + "step": 2402 + }, + { + "epoch": 0.554069633387134, + "grad_norm": 1.3085189564145994, + "learning_rate": 1.731469255360414e-06, + "loss": 0.5271172523498535, + "step": 2403 + }, + { + "epoch": 0.5543002075167166, + "grad_norm": 1.3689434717845856, + "learning_rate": 1.73120922991254e-06, + "loss": 0.5339269042015076, + "step": 2404 + }, + { + "epoch": 0.5545307816462993, + "grad_norm": 1.2181123008680574, + "learning_rate": 1.7309490981759938e-06, + "loss": 0.47052568197250366, + "step": 2405 + }, + { + "epoch": 0.5547613557758819, + "grad_norm": 1.2508289898124627, + "learning_rate": 1.7306888601885885e-06, + "loss": 0.4112280309200287, + "step": 2406 + }, + { + "epoch": 0.5549919299054646, + "grad_norm": 1.1812487853939355, + "learning_rate": 1.730428515988152e-06, + "loss": 0.5473710298538208, + "step": 2407 + }, + { + "epoch": 0.5552225040350472, + "grad_norm": 1.6509587018432181, + "learning_rate": 1.7301680656125277e-06, + "loss": 0.5079115629196167, + "step": 2408 + }, + { + "epoch": 0.55545307816463, + "grad_norm": 1.193259996108104, + "learning_rate": 1.7299075090995755e-06, + "loss": 0.4805012345314026, + "step": 2409 + }, + { + "epoch": 0.5556836522942126, + "grad_norm": 1.1958830357632493, + "learning_rate": 1.729646846487169e-06, + "loss": 0.4657474756240845, + "step": 2410 + }, + { + "epoch": 0.5559142264237953, + "grad_norm": 1.2442110767414496, + "learning_rate": 1.729386077813199e-06, + "loss": 0.5887978076934814, + "step": 2411 + }, + { + "epoch": 0.5561448005533779, + "grad_norm": 1.0093517139206267, + "learning_rate": 1.7291252031155704e-06, + "loss": 0.43841421604156494, + "step": 2412 + }, + { + "epoch": 0.5563753746829606, + "grad_norm": 1.304380451031228, + "learning_rate": 1.728864222432204e-06, + "loss": 0.5026551485061646, + "step": 2413 + }, + { + "epoch": 0.5566059488125432, + "grad_norm": 1.2344100865196312, + "learning_rate": 1.728603135801036e-06, + "loss": 0.4525277614593506, + "step": 2414 + }, + { + "epoch": 0.5568365229421259, + "grad_norm": 1.3128956010351178, + "learning_rate": 1.7283419432600182e-06, + "loss": 0.4095644950866699, + "step": 2415 + }, + { + "epoch": 0.5570670970717085, + "grad_norm": 1.2351186073808627, + "learning_rate": 1.7280806448471173e-06, + "loss": 0.5098834037780762, + "step": 2416 + }, + { + "epoch": 0.5572976712012913, + "grad_norm": 0.9689174321932323, + "learning_rate": 1.7278192406003159e-06, + "loss": 0.42802777886390686, + "step": 2417 + }, + { + "epoch": 0.5575282453308739, + "grad_norm": 1.283644069549869, + "learning_rate": 1.7275577305576113e-06, + "loss": 0.5036378502845764, + "step": 2418 + }, + { + "epoch": 0.5577588194604566, + "grad_norm": 1.2960652355454445, + "learning_rate": 1.7272961147570175e-06, + "loss": 0.5324885249137878, + "step": 2419 + }, + { + "epoch": 0.5579893935900392, + "grad_norm": 1.6334614504341187, + "learning_rate": 1.727034393236562e-06, + "loss": 0.5763842463493347, + "step": 2420 + }, + { + "epoch": 0.5582199677196219, + "grad_norm": 1.343133312027108, + "learning_rate": 1.7267725660342895e-06, + "loss": 0.49291908740997314, + "step": 2421 + }, + { + "epoch": 0.5584505418492045, + "grad_norm": 1.651006143174213, + "learning_rate": 1.7265106331882588e-06, + "loss": 0.5114868879318237, + "step": 2422 + }, + { + "epoch": 0.5586811159787872, + "grad_norm": 1.1152807378164393, + "learning_rate": 1.7262485947365449e-06, + "loss": 0.42442530393600464, + "step": 2423 + }, + { + "epoch": 0.5589116901083698, + "grad_norm": 1.1309517905090323, + "learning_rate": 1.725986450717237e-06, + "loss": 0.3680551052093506, + "step": 2424 + }, + { + "epoch": 0.5591422642379525, + "grad_norm": 1.2183025106634426, + "learning_rate": 1.725724201168441e-06, + "loss": 0.5849576592445374, + "step": 2425 + }, + { + "epoch": 0.5593728383675352, + "grad_norm": 1.3597945996239442, + "learning_rate": 1.7254618461282773e-06, + "loss": 0.48919233679771423, + "step": 2426 + }, + { + "epoch": 0.5596034124971179, + "grad_norm": 1.1753552641156777, + "learning_rate": 1.7251993856348821e-06, + "loss": 0.4857720732688904, + "step": 2427 + }, + { + "epoch": 0.5598339866267005, + "grad_norm": 1.3324934167522995, + "learning_rate": 1.7249368197264062e-06, + "loss": 0.5106808543205261, + "step": 2428 + }, + { + "epoch": 0.5600645607562832, + "grad_norm": 1.305986731975411, + "learning_rate": 1.724674148441017e-06, + "loss": 0.500100314617157, + "step": 2429 + }, + { + "epoch": 0.5602951348858658, + "grad_norm": 1.226560051936561, + "learning_rate": 1.7244113718168957e-06, + "loss": 0.5389110445976257, + "step": 2430 + }, + { + "epoch": 0.5605257090154485, + "grad_norm": 1.2848731557614161, + "learning_rate": 1.72414848989224e-06, + "loss": 0.42860496044158936, + "step": 2431 + }, + { + "epoch": 0.5607562831450311, + "grad_norm": 1.2392935426075953, + "learning_rate": 1.723885502705262e-06, + "loss": 0.4867728352546692, + "step": 2432 + }, + { + "epoch": 0.5609868572746138, + "grad_norm": 1.215687300161219, + "learning_rate": 1.7236224102941899e-06, + "loss": 0.49194633960723877, + "step": 2433 + }, + { + "epoch": 0.5612174314041964, + "grad_norm": 1.278802988367442, + "learning_rate": 1.7233592126972667e-06, + "loss": 0.5194358229637146, + "step": 2434 + }, + { + "epoch": 0.5614480055337792, + "grad_norm": 1.518126298536734, + "learning_rate": 1.723095909952751e-06, + "loss": 0.4738645553588867, + "step": 2435 + }, + { + "epoch": 0.5616785796633618, + "grad_norm": 1.1842233457279843, + "learning_rate": 1.7228325020989165e-06, + "loss": 0.48232927918434143, + "step": 2436 + }, + { + "epoch": 0.5619091537929445, + "grad_norm": 1.0590325088103263, + "learning_rate": 1.7225689891740522e-06, + "loss": 0.5192145109176636, + "step": 2437 + }, + { + "epoch": 0.5621397279225271, + "grad_norm": 1.2756639382228332, + "learning_rate": 1.7223053712164621e-06, + "loss": 0.4934930205345154, + "step": 2438 + }, + { + "epoch": 0.5623703020521098, + "grad_norm": 1.294610704846241, + "learning_rate": 1.722041648264466e-06, + "loss": 0.5022200345993042, + "step": 2439 + }, + { + "epoch": 0.5626008761816924, + "grad_norm": 1.15319893327068, + "learning_rate": 1.7217778203563986e-06, + "loss": 0.45300528407096863, + "step": 2440 + }, + { + "epoch": 0.5628314503112751, + "grad_norm": 1.1335234735988557, + "learning_rate": 1.7215138875306103e-06, + "loss": 0.4965200126171112, + "step": 2441 + }, + { + "epoch": 0.5630620244408577, + "grad_norm": 1.3081789750993726, + "learning_rate": 1.721249849825466e-06, + "loss": 0.4618280231952667, + "step": 2442 + }, + { + "epoch": 0.5632925985704405, + "grad_norm": 1.255070715358214, + "learning_rate": 1.7209857072793464e-06, + "loss": 0.42270147800445557, + "step": 2443 + }, + { + "epoch": 0.5635231727000231, + "grad_norm": 1.0830436199918496, + "learning_rate": 1.720721459930647e-06, + "loss": 0.5200725793838501, + "step": 2444 + }, + { + "epoch": 0.5637537468296058, + "grad_norm": 1.1368018551382484, + "learning_rate": 1.7204571078177792e-06, + "loss": 0.47475337982177734, + "step": 2445 + }, + { + "epoch": 0.5639843209591884, + "grad_norm": 1.5482537414338693, + "learning_rate": 1.7201926509791693e-06, + "loss": 0.5493113994598389, + "step": 2446 + }, + { + "epoch": 0.564214895088771, + "grad_norm": 1.2861044506324582, + "learning_rate": 1.719928089453259e-06, + "loss": 0.4743562340736389, + "step": 2447 + }, + { + "epoch": 0.5644454692183537, + "grad_norm": 1.2343956116266135, + "learning_rate": 1.7196634232785038e-06, + "loss": 0.5145455598831177, + "step": 2448 + }, + { + "epoch": 0.5646760433479363, + "grad_norm": 1.5340568803714763, + "learning_rate": 1.719398652493377e-06, + "loss": 0.45072540640830994, + "step": 2449 + }, + { + "epoch": 0.564906617477519, + "grad_norm": 1.2363775684809537, + "learning_rate": 1.7191337771363651e-06, + "loss": 0.5150895714759827, + "step": 2450 + }, + { + "epoch": 0.5651371916071016, + "grad_norm": 1.4238500687035243, + "learning_rate": 1.7188687972459705e-06, + "loss": 0.5025302171707153, + "step": 2451 + }, + { + "epoch": 0.5653677657366843, + "grad_norm": 1.2149895801108108, + "learning_rate": 1.7186037128607107e-06, + "loss": 0.618930459022522, + "step": 2452 + }, + { + "epoch": 0.565598339866267, + "grad_norm": 1.1681250836374313, + "learning_rate": 1.7183385240191183e-06, + "loss": 0.5841591358184814, + "step": 2453 + }, + { + "epoch": 0.5658289139958497, + "grad_norm": 1.2481599814364495, + "learning_rate": 1.7180732307597413e-06, + "loss": 0.4915233850479126, + "step": 2454 + }, + { + "epoch": 0.5660594881254323, + "grad_norm": 1.127625184290067, + "learning_rate": 1.7178078331211429e-06, + "loss": 0.46732476353645325, + "step": 2455 + }, + { + "epoch": 0.566290062255015, + "grad_norm": 1.1121526599443385, + "learning_rate": 1.7175423311419013e-06, + "loss": 0.4640737771987915, + "step": 2456 + }, + { + "epoch": 0.5665206363845976, + "grad_norm": 1.2800685498732043, + "learning_rate": 1.7172767248606095e-06, + "loss": 0.39535683393478394, + "step": 2457 + }, + { + "epoch": 0.5667512105141803, + "grad_norm": 1.196636942462094, + "learning_rate": 1.7170110143158766e-06, + "loss": 0.4782179594039917, + "step": 2458 + }, + { + "epoch": 0.5669817846437629, + "grad_norm": 1.5731644028680265, + "learning_rate": 1.7167451995463258e-06, + "loss": 0.6186003684997559, + "step": 2459 + }, + { + "epoch": 0.5672123587733456, + "grad_norm": 1.3163111292704002, + "learning_rate": 1.7164792805905965e-06, + "loss": 0.4915347099304199, + "step": 2460 + }, + { + "epoch": 0.5674429329029282, + "grad_norm": 1.2683630708246802, + "learning_rate": 1.7162132574873422e-06, + "loss": 0.4789005517959595, + "step": 2461 + }, + { + "epoch": 0.567673507032511, + "grad_norm": 1.6928847577315913, + "learning_rate": 1.7159471302752326e-06, + "loss": 0.6307233572006226, + "step": 2462 + }, + { + "epoch": 0.5679040811620936, + "grad_norm": 1.240574680316347, + "learning_rate": 1.7156808989929514e-06, + "loss": 0.5278424024581909, + "step": 2463 + }, + { + "epoch": 0.5681346552916763, + "grad_norm": 1.4388020329709479, + "learning_rate": 1.7154145636791988e-06, + "loss": 0.48552995920181274, + "step": 2464 + }, + { + "epoch": 0.5683652294212589, + "grad_norm": 1.3679954470869684, + "learning_rate": 1.7151481243726885e-06, + "loss": 0.5125370621681213, + "step": 2465 + }, + { + "epoch": 0.5685958035508416, + "grad_norm": 1.3448408660581435, + "learning_rate": 1.7148815811121506e-06, + "loss": 0.44231730699539185, + "step": 2466 + }, + { + "epoch": 0.5688263776804242, + "grad_norm": 1.367567415522102, + "learning_rate": 1.7146149339363296e-06, + "loss": 0.5593529939651489, + "step": 2467 + }, + { + "epoch": 0.5690569518100069, + "grad_norm": 1.347377301704866, + "learning_rate": 1.714348182883986e-06, + "loss": 0.4830925464630127, + "step": 2468 + }, + { + "epoch": 0.5692875259395895, + "grad_norm": 1.4913136319748062, + "learning_rate": 1.714081327993894e-06, + "loss": 0.5538743734359741, + "step": 2469 + }, + { + "epoch": 0.5695181000691723, + "grad_norm": 1.4135532975212044, + "learning_rate": 1.7138143693048441e-06, + "loss": 0.5145905613899231, + "step": 2470 + }, + { + "epoch": 0.5697486741987549, + "grad_norm": 1.301183082915478, + "learning_rate": 1.713547306855641e-06, + "loss": 0.47706612944602966, + "step": 2471 + }, + { + "epoch": 0.5699792483283376, + "grad_norm": 1.2528774428968483, + "learning_rate": 1.7132801406851056e-06, + "loss": 0.45162689685821533, + "step": 2472 + }, + { + "epoch": 0.5702098224579202, + "grad_norm": 1.5721475156494655, + "learning_rate": 1.7130128708320727e-06, + "loss": 0.5141111612319946, + "step": 2473 + }, + { + "epoch": 0.5704403965875029, + "grad_norm": 1.0845779630695374, + "learning_rate": 1.7127454973353932e-06, + "loss": 0.4443173408508301, + "step": 2474 + }, + { + "epoch": 0.5706709707170855, + "grad_norm": 1.2704796440823871, + "learning_rate": 1.7124780202339317e-06, + "loss": 0.4162046015262604, + "step": 2475 + }, + { + "epoch": 0.5709015448466682, + "grad_norm": 1.100254820278883, + "learning_rate": 1.7122104395665695e-06, + "loss": 0.44526439905166626, + "step": 2476 + }, + { + "epoch": 0.5711321189762508, + "grad_norm": 1.3237501807128542, + "learning_rate": 1.7119427553722016e-06, + "loss": 0.5069452524185181, + "step": 2477 + }, + { + "epoch": 0.5713626931058335, + "grad_norm": 1.2833720010816703, + "learning_rate": 1.7116749676897393e-06, + "loss": 0.46709829568862915, + "step": 2478 + }, + { + "epoch": 0.5715932672354161, + "grad_norm": 1.2011083992406753, + "learning_rate": 1.7114070765581078e-06, + "loss": 0.5443992614746094, + "step": 2479 + }, + { + "epoch": 0.5718238413649989, + "grad_norm": 1.5805836267397864, + "learning_rate": 1.7111390820162477e-06, + "loss": 0.4307284653186798, + "step": 2480 + }, + { + "epoch": 0.5720544154945815, + "grad_norm": 1.272693158326629, + "learning_rate": 1.7108709841031148e-06, + "loss": 0.4753509759902954, + "step": 2481 + }, + { + "epoch": 0.5722849896241642, + "grad_norm": 1.3966851487133662, + "learning_rate": 1.7106027828576798e-06, + "loss": 0.5689436197280884, + "step": 2482 + }, + { + "epoch": 0.5725155637537468, + "grad_norm": 1.3535603859222731, + "learning_rate": 1.710334478318929e-06, + "loss": 0.47182410955429077, + "step": 2483 + }, + { + "epoch": 0.5727461378833295, + "grad_norm": 1.4415402220476166, + "learning_rate": 1.7100660705258623e-06, + "loss": 0.4418888986110687, + "step": 2484 + }, + { + "epoch": 0.5729767120129121, + "grad_norm": 1.0842485548099412, + "learning_rate": 1.709797559517496e-06, + "loss": 0.4315544366836548, + "step": 2485 + }, + { + "epoch": 0.5732072861424948, + "grad_norm": 1.136143164844157, + "learning_rate": 1.709528945332861e-06, + "loss": 0.34541741013526917, + "step": 2486 + }, + { + "epoch": 0.5734378602720774, + "grad_norm": 1.444798755487831, + "learning_rate": 1.709260228011003e-06, + "loss": 0.5380317568778992, + "step": 2487 + }, + { + "epoch": 0.5736684344016602, + "grad_norm": 1.1490218932398577, + "learning_rate": 1.7089914075909824e-06, + "loss": 0.5017478466033936, + "step": 2488 + }, + { + "epoch": 0.5738990085312428, + "grad_norm": 1.317791376396268, + "learning_rate": 1.7087224841118756e-06, + "loss": 0.5608090162277222, + "step": 2489 + }, + { + "epoch": 0.5741295826608255, + "grad_norm": 1.3491498137629283, + "learning_rate": 1.708453457612773e-06, + "loss": 0.5360782146453857, + "step": 2490 + }, + { + "epoch": 0.5743601567904081, + "grad_norm": 1.3100243824681166, + "learning_rate": 1.7081843281327802e-06, + "loss": 0.5638090372085571, + "step": 2491 + }, + { + "epoch": 0.5745907309199908, + "grad_norm": 1.2532603581217905, + "learning_rate": 1.707915095711018e-06, + "loss": 0.45777082443237305, + "step": 2492 + }, + { + "epoch": 0.5748213050495734, + "grad_norm": 1.2028357712850113, + "learning_rate": 1.7076457603866224e-06, + "loss": 0.5423707962036133, + "step": 2493 + }, + { + "epoch": 0.5750518791791561, + "grad_norm": 1.3752974790416335, + "learning_rate": 1.7073763221987436e-06, + "loss": 0.4286508560180664, + "step": 2494 + }, + { + "epoch": 0.5752824533087387, + "grad_norm": 1.1304014566480758, + "learning_rate": 1.7071067811865474e-06, + "loss": 0.4197548031806946, + "step": 2495 + }, + { + "epoch": 0.5755130274383214, + "grad_norm": 1.1820720623961845, + "learning_rate": 1.7068371373892142e-06, + "loss": 0.47944843769073486, + "step": 2496 + }, + { + "epoch": 0.575743601567904, + "grad_norm": 1.5454364363464301, + "learning_rate": 1.7065673908459396e-06, + "loss": 0.49708908796310425, + "step": 2497 + }, + { + "epoch": 0.5759741756974868, + "grad_norm": 1.2002677488287707, + "learning_rate": 1.706297541595934e-06, + "loss": 0.46402662992477417, + "step": 2498 + }, + { + "epoch": 0.5762047498270694, + "grad_norm": 1.2375577528106843, + "learning_rate": 1.7060275896784222e-06, + "loss": 0.4665846824645996, + "step": 2499 + }, + { + "epoch": 0.5764353239566521, + "grad_norm": 1.333335025499966, + "learning_rate": 1.7057575351326452e-06, + "loss": 0.511766791343689, + "step": 2500 + }, + { + "epoch": 0.5766658980862347, + "grad_norm": 1.3129729051878996, + "learning_rate": 1.7054873779978578e-06, + "loss": 0.5731323957443237, + "step": 2501 + }, + { + "epoch": 0.5768964722158174, + "grad_norm": 1.208575824869893, + "learning_rate": 1.70521711831333e-06, + "loss": 0.43246185779571533, + "step": 2502 + }, + { + "epoch": 0.5771270463454, + "grad_norm": 1.3743994267646191, + "learning_rate": 1.704946756118347e-06, + "loss": 0.5062395334243774, + "step": 2503 + }, + { + "epoch": 0.5773576204749827, + "grad_norm": 1.2169597850499592, + "learning_rate": 1.7046762914522087e-06, + "loss": 0.5010061264038086, + "step": 2504 + }, + { + "epoch": 0.5775881946045653, + "grad_norm": 1.1915100175955862, + "learning_rate": 1.7044057243542293e-06, + "loss": 0.5118759870529175, + "step": 2505 + }, + { + "epoch": 0.5778187687341481, + "grad_norm": 1.2406153903833703, + "learning_rate": 1.7041350548637392e-06, + "loss": 0.5796714425086975, + "step": 2506 + }, + { + "epoch": 0.5780493428637307, + "grad_norm": 1.198072830487735, + "learning_rate": 1.7038642830200828e-06, + "loss": 0.43587976694107056, + "step": 2507 + }, + { + "epoch": 0.5782799169933134, + "grad_norm": 1.0836383921827997, + "learning_rate": 1.7035934088626193e-06, + "loss": 0.4780135154724121, + "step": 2508 + }, + { + "epoch": 0.578510491122896, + "grad_norm": 1.2949967246283594, + "learning_rate": 1.7033224324307232e-06, + "loss": 0.48039600253105164, + "step": 2509 + }, + { + "epoch": 0.5787410652524787, + "grad_norm": 1.4288262034065056, + "learning_rate": 1.7030513537637835e-06, + "loss": 0.48075419664382935, + "step": 2510 + }, + { + "epoch": 0.5789716393820613, + "grad_norm": 1.294455603546607, + "learning_rate": 1.7027801729012044e-06, + "loss": 0.5006246566772461, + "step": 2511 + }, + { + "epoch": 0.579202213511644, + "grad_norm": 1.3239915881424993, + "learning_rate": 1.7025088898824046e-06, + "loss": 0.550139307975769, + "step": 2512 + }, + { + "epoch": 0.5794327876412266, + "grad_norm": 1.273345251271078, + "learning_rate": 1.7022375047468178e-06, + "loss": 0.5228495001792908, + "step": 2513 + }, + { + "epoch": 0.5796633617708093, + "grad_norm": 1.223108155250479, + "learning_rate": 1.701966017533893e-06, + "loss": 0.4783739149570465, + "step": 2514 + }, + { + "epoch": 0.579893935900392, + "grad_norm": 1.3364695116135945, + "learning_rate": 1.701694428283093e-06, + "loss": 0.47218769788742065, + "step": 2515 + }, + { + "epoch": 0.5801245100299747, + "grad_norm": 1.271458214482931, + "learning_rate": 1.7014227370338967e-06, + "loss": 0.5340671539306641, + "step": 2516 + }, + { + "epoch": 0.5803550841595573, + "grad_norm": 1.1389068048001012, + "learning_rate": 1.7011509438257967e-06, + "loss": 0.4629259407520294, + "step": 2517 + }, + { + "epoch": 0.58058565828914, + "grad_norm": 1.6036419177897663, + "learning_rate": 1.7008790486983013e-06, + "loss": 0.6334242820739746, + "step": 2518 + }, + { + "epoch": 0.5808162324187226, + "grad_norm": 1.3328081079482175, + "learning_rate": 1.7006070516909327e-06, + "loss": 0.544147789478302, + "step": 2519 + }, + { + "epoch": 0.5810468065483053, + "grad_norm": 1.2269860514972317, + "learning_rate": 1.700334952843229e-06, + "loss": 0.47045618295669556, + "step": 2520 + }, + { + "epoch": 0.5812773806778879, + "grad_norm": 1.4613594501045561, + "learning_rate": 1.700062752194742e-06, + "loss": 0.4582393169403076, + "step": 2521 + }, + { + "epoch": 0.5815079548074706, + "grad_norm": 1.335231293513905, + "learning_rate": 1.699790449785039e-06, + "loss": 0.507327139377594, + "step": 2522 + }, + { + "epoch": 0.5817385289370532, + "grad_norm": 1.3812182502399277, + "learning_rate": 1.6995180456537022e-06, + "loss": 0.5345891714096069, + "step": 2523 + }, + { + "epoch": 0.581969103066636, + "grad_norm": 1.3766088909590293, + "learning_rate": 1.6992455398403277e-06, + "loss": 0.4847550094127655, + "step": 2524 + }, + { + "epoch": 0.5821996771962186, + "grad_norm": 1.2694420906725428, + "learning_rate": 1.6989729323845276e-06, + "loss": 0.4472479820251465, + "step": 2525 + }, + { + "epoch": 0.5824302513258013, + "grad_norm": 1.1676894033843348, + "learning_rate": 1.698700223325928e-06, + "loss": 0.4426107108592987, + "step": 2526 + }, + { + "epoch": 0.5826608254553839, + "grad_norm": 1.3669509353012406, + "learning_rate": 1.6984274127041696e-06, + "loss": 0.4814276099205017, + "step": 2527 + }, + { + "epoch": 0.5828913995849666, + "grad_norm": 1.3849093780882, + "learning_rate": 1.6981545005589084e-06, + "loss": 0.5286451578140259, + "step": 2528 + }, + { + "epoch": 0.5831219737145492, + "grad_norm": 1.3586645163698117, + "learning_rate": 1.6978814869298152e-06, + "loss": 0.5291767120361328, + "step": 2529 + }, + { + "epoch": 0.5833525478441319, + "grad_norm": 1.4376369092272532, + "learning_rate": 1.6976083718565748e-06, + "loss": 0.5807399749755859, + "step": 2530 + }, + { + "epoch": 0.5835831219737145, + "grad_norm": 1.5620885730430554, + "learning_rate": 1.6973351553788878e-06, + "loss": 0.5489222407341003, + "step": 2531 + }, + { + "epoch": 0.5838136961032973, + "grad_norm": 1.5080367455114985, + "learning_rate": 1.6970618375364683e-06, + "loss": 0.5295521020889282, + "step": 2532 + }, + { + "epoch": 0.5840442702328799, + "grad_norm": 1.281498688581256, + "learning_rate": 1.6967884183690467e-06, + "loss": 0.4979495406150818, + "step": 2533 + }, + { + "epoch": 0.5842748443624626, + "grad_norm": 1.0681769287073983, + "learning_rate": 1.6965148979163661e-06, + "loss": 0.45667344331741333, + "step": 2534 + }, + { + "epoch": 0.5845054184920452, + "grad_norm": 1.1552847245372566, + "learning_rate": 1.6962412762181866e-06, + "loss": 0.42687737941741943, + "step": 2535 + }, + { + "epoch": 0.5847359926216279, + "grad_norm": 1.2720388462434997, + "learning_rate": 1.6959675533142815e-06, + "loss": 0.5616278648376465, + "step": 2536 + }, + { + "epoch": 0.5849665667512105, + "grad_norm": 1.245024966542371, + "learning_rate": 1.6956937292444386e-06, + "loss": 0.4961121678352356, + "step": 2537 + }, + { + "epoch": 0.5851971408807932, + "grad_norm": 1.1864554840937962, + "learning_rate": 1.6954198040484617e-06, + "loss": 0.5115770101547241, + "step": 2538 + }, + { + "epoch": 0.5854277150103758, + "grad_norm": 1.41778667190123, + "learning_rate": 1.6951457777661686e-06, + "loss": 0.540202260017395, + "step": 2539 + }, + { + "epoch": 0.5856582891399585, + "grad_norm": 1.3238570605319384, + "learning_rate": 1.6948716504373914e-06, + "loss": 0.5312114357948303, + "step": 2540 + }, + { + "epoch": 0.5858888632695411, + "grad_norm": 1.1842147435507233, + "learning_rate": 1.694597422101978e-06, + "loss": 0.49323517084121704, + "step": 2541 + }, + { + "epoch": 0.5861194373991239, + "grad_norm": 1.3138451660312804, + "learning_rate": 1.6943230927997894e-06, + "loss": 0.42929738759994507, + "step": 2542 + }, + { + "epoch": 0.5863500115287065, + "grad_norm": 1.2474057622168624, + "learning_rate": 1.6940486625707021e-06, + "loss": 0.45236462354660034, + "step": 2543 + }, + { + "epoch": 0.5865805856582892, + "grad_norm": 1.1944700996273265, + "learning_rate": 1.6937741314546084e-06, + "loss": 0.5129071474075317, + "step": 2544 + }, + { + "epoch": 0.5868111597878718, + "grad_norm": 1.303867373152147, + "learning_rate": 1.693499499491413e-06, + "loss": 0.5562577247619629, + "step": 2545 + }, + { + "epoch": 0.5870417339174545, + "grad_norm": 1.472236761409707, + "learning_rate": 1.6932247667210372e-06, + "loss": 0.5593177080154419, + "step": 2546 + }, + { + "epoch": 0.5872723080470371, + "grad_norm": 1.666463518969871, + "learning_rate": 1.692949933183416e-06, + "loss": 0.5536680221557617, + "step": 2547 + }, + { + "epoch": 0.5875028821766198, + "grad_norm": 1.552275933236934, + "learning_rate": 1.6926749989184993e-06, + "loss": 0.5523338317871094, + "step": 2548 + }, + { + "epoch": 0.5877334563062024, + "grad_norm": 1.3066438958077835, + "learning_rate": 1.692399963966251e-06, + "loss": 0.41815924644470215, + "step": 2549 + }, + { + "epoch": 0.5879640304357852, + "grad_norm": 1.1800035534558937, + "learning_rate": 1.6921248283666508e-06, + "loss": 0.46959248185157776, + "step": 2550 + }, + { + "epoch": 0.5881946045653678, + "grad_norm": 1.2343992191174948, + "learning_rate": 1.6918495921596928e-06, + "loss": 0.4748489260673523, + "step": 2551 + }, + { + "epoch": 0.5884251786949505, + "grad_norm": 1.853505775613954, + "learning_rate": 1.6915742553853845e-06, + "loss": 0.4541524052619934, + "step": 2552 + }, + { + "epoch": 0.5886557528245331, + "grad_norm": 1.2688298570187295, + "learning_rate": 1.691298818083749e-06, + "loss": 0.47106000781059265, + "step": 2553 + }, + { + "epoch": 0.5888863269541158, + "grad_norm": 1.6112122400264717, + "learning_rate": 1.6910232802948246e-06, + "loss": 0.5364842414855957, + "step": 2554 + }, + { + "epoch": 0.5891169010836984, + "grad_norm": 1.402469759006704, + "learning_rate": 1.690747642058663e-06, + "loss": 0.48388350009918213, + "step": 2555 + }, + { + "epoch": 0.5893474752132811, + "grad_norm": 1.1992143425994695, + "learning_rate": 1.690471903415331e-06, + "loss": 0.5075609683990479, + "step": 2556 + }, + { + "epoch": 0.5895780493428637, + "grad_norm": 1.2039147901396619, + "learning_rate": 1.6901960644049102e-06, + "loss": 0.45098066329956055, + "step": 2557 + }, + { + "epoch": 0.5898086234724463, + "grad_norm": 1.1869247135212617, + "learning_rate": 1.6899201250674966e-06, + "loss": 0.5329077243804932, + "step": 2558 + }, + { + "epoch": 0.590039197602029, + "grad_norm": 1.2771607201573625, + "learning_rate": 1.6896440854432005e-06, + "loss": 0.4632904529571533, + "step": 2559 + }, + { + "epoch": 0.5902697717316117, + "grad_norm": 1.3016593794447966, + "learning_rate": 1.6893679455721474e-06, + "loss": 0.5302451848983765, + "step": 2560 + }, + { + "epoch": 0.5905003458611944, + "grad_norm": 1.1349040723062418, + "learning_rate": 1.6890917054944768e-06, + "loss": 0.45363447070121765, + "step": 2561 + }, + { + "epoch": 0.590730919990777, + "grad_norm": 1.3869965053274627, + "learning_rate": 1.688815365250343e-06, + "loss": 0.5103914737701416, + "step": 2562 + }, + { + "epoch": 0.5909614941203597, + "grad_norm": 1.2859854063949494, + "learning_rate": 1.6885389248799152e-06, + "loss": 0.45474469661712646, + "step": 2563 + }, + { + "epoch": 0.5911920682499423, + "grad_norm": 1.3905925832105772, + "learning_rate": 1.6882623844233766e-06, + "loss": 0.517952024936676, + "step": 2564 + }, + { + "epoch": 0.591422642379525, + "grad_norm": 1.456181517852448, + "learning_rate": 1.6879857439209245e-06, + "loss": 0.4872232973575592, + "step": 2565 + }, + { + "epoch": 0.5916532165091076, + "grad_norm": 1.146992588808451, + "learning_rate": 1.6877090034127726e-06, + "loss": 0.4938408136367798, + "step": 2566 + }, + { + "epoch": 0.5918837906386903, + "grad_norm": 0.9819996395503116, + "learning_rate": 1.6874321629391469e-06, + "loss": 0.42687565088272095, + "step": 2567 + }, + { + "epoch": 0.592114364768273, + "grad_norm": 1.8882181325825955, + "learning_rate": 1.6871552225402896e-06, + "loss": 0.5272493362426758, + "step": 2568 + }, + { + "epoch": 0.5923449388978557, + "grad_norm": 1.265485903227574, + "learning_rate": 1.6868781822564565e-06, + "loss": 0.4643193185329437, + "step": 2569 + }, + { + "epoch": 0.5925755130274383, + "grad_norm": 1.5054555077342378, + "learning_rate": 1.6866010421279183e-06, + "loss": 0.4957782030105591, + "step": 2570 + }, + { + "epoch": 0.592806087157021, + "grad_norm": 1.2319191303045371, + "learning_rate": 1.6863238021949605e-06, + "loss": 0.442360520362854, + "step": 2571 + }, + { + "epoch": 0.5930366612866036, + "grad_norm": 1.365610357460579, + "learning_rate": 1.6860464624978824e-06, + "loss": 0.5108935832977295, + "step": 2572 + }, + { + "epoch": 0.5932672354161863, + "grad_norm": 1.1047616502548026, + "learning_rate": 1.6857690230769976e-06, + "loss": 0.46559715270996094, + "step": 2573 + }, + { + "epoch": 0.5934978095457689, + "grad_norm": 1.2296310276846145, + "learning_rate": 1.6854914839726356e-06, + "loss": 0.44752076268196106, + "step": 2574 + }, + { + "epoch": 0.5937283836753516, + "grad_norm": 1.6735698653712807, + "learning_rate": 1.6852138452251387e-06, + "loss": 0.4018149971961975, + "step": 2575 + }, + { + "epoch": 0.5939589578049342, + "grad_norm": 1.407358523561205, + "learning_rate": 1.6849361068748652e-06, + "loss": 0.47711417078971863, + "step": 2576 + }, + { + "epoch": 0.594189531934517, + "grad_norm": 1.3386417354625197, + "learning_rate": 1.684658268962187e-06, + "loss": 0.4671875834465027, + "step": 2577 + }, + { + "epoch": 0.5944201060640996, + "grad_norm": 1.2780841808458634, + "learning_rate": 1.6843803315274906e-06, + "loss": 0.48041921854019165, + "step": 2578 + }, + { + "epoch": 0.5946506801936823, + "grad_norm": 1.105183308056311, + "learning_rate": 1.6841022946111772e-06, + "loss": 0.3444385528564453, + "step": 2579 + }, + { + "epoch": 0.5948812543232649, + "grad_norm": 1.3054472047651338, + "learning_rate": 1.6838241582536619e-06, + "loss": 0.46800029277801514, + "step": 2580 + }, + { + "epoch": 0.5951118284528476, + "grad_norm": 1.7022638621771704, + "learning_rate": 1.683545922495375e-06, + "loss": 0.4362339377403259, + "step": 2581 + }, + { + "epoch": 0.5953424025824302, + "grad_norm": 1.5138702229312708, + "learning_rate": 1.6832675873767606e-06, + "loss": 0.4818536043167114, + "step": 2582 + }, + { + "epoch": 0.5955729767120129, + "grad_norm": 1.1464685816902647, + "learning_rate": 1.6829891529382775e-06, + "loss": 0.47899681329727173, + "step": 2583 + }, + { + "epoch": 0.5958035508415955, + "grad_norm": 1.028545290493661, + "learning_rate": 1.6827106192203995e-06, + "loss": 0.4239576458930969, + "step": 2584 + }, + { + "epoch": 0.5960341249711782, + "grad_norm": 1.299757224081726, + "learning_rate": 1.6824319862636136e-06, + "loss": 0.545168399810791, + "step": 2585 + }, + { + "epoch": 0.5962646991007609, + "grad_norm": 1.1433294908143323, + "learning_rate": 1.6821532541084228e-06, + "loss": 0.4238642156124115, + "step": 2586 + }, + { + "epoch": 0.5964952732303436, + "grad_norm": 1.1214453575304018, + "learning_rate": 1.6818744227953422e-06, + "loss": 0.39589810371398926, + "step": 2587 + }, + { + "epoch": 0.5967258473599262, + "grad_norm": 1.1696584305728281, + "learning_rate": 1.6815954923649044e-06, + "loss": 0.4358367919921875, + "step": 2588 + }, + { + "epoch": 0.5969564214895089, + "grad_norm": 1.232714944175718, + "learning_rate": 1.6813164628576538e-06, + "loss": 0.5012080073356628, + "step": 2589 + }, + { + "epoch": 0.5971869956190915, + "grad_norm": 1.0762630624781258, + "learning_rate": 1.6810373343141503e-06, + "loss": 0.4637286365032196, + "step": 2590 + }, + { + "epoch": 0.5974175697486742, + "grad_norm": 1.4947457348694884, + "learning_rate": 1.6807581067749684e-06, + "loss": 0.6130828261375427, + "step": 2591 + }, + { + "epoch": 0.5976481438782568, + "grad_norm": 1.538167494741888, + "learning_rate": 1.680478780280696e-06, + "loss": 0.5430021286010742, + "step": 2592 + }, + { + "epoch": 0.5978787180078395, + "grad_norm": 1.4318445545867842, + "learning_rate": 1.6801993548719368e-06, + "loss": 0.5195741653442383, + "step": 2593 + }, + { + "epoch": 0.5981092921374221, + "grad_norm": 1.4741188457279395, + "learning_rate": 1.6799198305893077e-06, + "loss": 0.5452337265014648, + "step": 2594 + }, + { + "epoch": 0.5983398662670049, + "grad_norm": 1.1858829095847359, + "learning_rate": 1.6796402074734402e-06, + "loss": 0.4802110493183136, + "step": 2595 + }, + { + "epoch": 0.5985704403965875, + "grad_norm": 1.114234548006963, + "learning_rate": 1.679360485564981e-06, + "loss": 0.48554790019989014, + "step": 2596 + }, + { + "epoch": 0.5988010145261702, + "grad_norm": 1.3519600489481014, + "learning_rate": 1.6790806649045896e-06, + "loss": 0.5151324272155762, + "step": 2597 + }, + { + "epoch": 0.5990315886557528, + "grad_norm": 1.4134149785589025, + "learning_rate": 1.6788007455329419e-06, + "loss": 0.5122699737548828, + "step": 2598 + }, + { + "epoch": 0.5992621627853355, + "grad_norm": 1.0762809832802989, + "learning_rate": 1.6785207274907258e-06, + "loss": 0.47776496410369873, + "step": 2599 + }, + { + "epoch": 0.5994927369149181, + "grad_norm": 1.3625217888513212, + "learning_rate": 1.6782406108186455e-06, + "loss": 0.5653492212295532, + "step": 2600 + }, + { + "epoch": 0.5997233110445008, + "grad_norm": 1.2197147141619178, + "learning_rate": 1.677960395557419e-06, + "loss": 0.44313424825668335, + "step": 2601 + }, + { + "epoch": 0.5999538851740834, + "grad_norm": 1.137470066753919, + "learning_rate": 1.677680081747778e-06, + "loss": 0.40465259552001953, + "step": 2602 + }, + { + "epoch": 0.6001844593036662, + "grad_norm": 1.4481779333184874, + "learning_rate": 1.6773996694304687e-06, + "loss": 0.5488068461418152, + "step": 2603 + }, + { + "epoch": 0.6004150334332488, + "grad_norm": 1.2545703783665254, + "learning_rate": 1.6771191586462523e-06, + "loss": 0.5122859477996826, + "step": 2604 + }, + { + "epoch": 0.6006456075628315, + "grad_norm": 1.2685821503383574, + "learning_rate": 1.6768385494359039e-06, + "loss": 0.47173869609832764, + "step": 2605 + }, + { + "epoch": 0.6008761816924141, + "grad_norm": 1.342808103655164, + "learning_rate": 1.6765578418402129e-06, + "loss": 0.527764081954956, + "step": 2606 + }, + { + "epoch": 0.6011067558219968, + "grad_norm": 1.7106657610470863, + "learning_rate": 1.6762770358999826e-06, + "loss": 0.5399610996246338, + "step": 2607 + }, + { + "epoch": 0.6013373299515794, + "grad_norm": 1.1677908773060481, + "learning_rate": 1.6759961316560314e-06, + "loss": 0.3441581428050995, + "step": 2608 + }, + { + "epoch": 0.6015679040811621, + "grad_norm": 1.2546350672529525, + "learning_rate": 1.6757151291491916e-06, + "loss": 0.5027580857276917, + "step": 2609 + }, + { + "epoch": 0.6017984782107447, + "grad_norm": 1.6099655975362483, + "learning_rate": 1.6754340284203095e-06, + "loss": 0.3898310363292694, + "step": 2610 + }, + { + "epoch": 0.6020290523403274, + "grad_norm": 1.5075448921993653, + "learning_rate": 1.675152829510246e-06, + "loss": 0.5577199459075928, + "step": 2611 + }, + { + "epoch": 0.60225962646991, + "grad_norm": 1.178797634573082, + "learning_rate": 1.6748715324598763e-06, + "loss": 0.47849035263061523, + "step": 2612 + }, + { + "epoch": 0.6024902005994928, + "grad_norm": 1.2674537093214957, + "learning_rate": 1.6745901373100896e-06, + "loss": 0.46845290064811707, + "step": 2613 + }, + { + "epoch": 0.6027207747290754, + "grad_norm": 1.4078882858329094, + "learning_rate": 1.6743086441017899e-06, + "loss": 0.46008870005607605, + "step": 2614 + }, + { + "epoch": 0.6029513488586581, + "grad_norm": 1.3347721564783812, + "learning_rate": 1.6740270528758948e-06, + "loss": 0.44386154413223267, + "step": 2615 + }, + { + "epoch": 0.6031819229882407, + "grad_norm": 1.2103476019651458, + "learning_rate": 1.6737453636733364e-06, + "loss": 0.495368629693985, + "step": 2616 + }, + { + "epoch": 0.6034124971178234, + "grad_norm": 1.257056760083973, + "learning_rate": 1.6734635765350613e-06, + "loss": 0.519428551197052, + "step": 2617 + }, + { + "epoch": 0.603643071247406, + "grad_norm": 1.5181965589957365, + "learning_rate": 1.6731816915020302e-06, + "loss": 0.49346470832824707, + "step": 2618 + }, + { + "epoch": 0.6038736453769887, + "grad_norm": 1.3323089431428572, + "learning_rate": 1.6728997086152173e-06, + "loss": 0.554854691028595, + "step": 2619 + }, + { + "epoch": 0.6041042195065713, + "grad_norm": 1.503361315997137, + "learning_rate": 1.6726176279156125e-06, + "loss": 0.4930881857872009, + "step": 2620 + }, + { + "epoch": 0.604334793636154, + "grad_norm": 1.1576996092953873, + "learning_rate": 1.6723354494442186e-06, + "loss": 0.4082447588443756, + "step": 2621 + }, + { + "epoch": 0.6045653677657367, + "grad_norm": 1.2572245396068074, + "learning_rate": 1.6720531732420531e-06, + "loss": 0.5151821374893188, + "step": 2622 + }, + { + "epoch": 0.6047959418953194, + "grad_norm": 1.6316483356509275, + "learning_rate": 1.671770799350148e-06, + "loss": 0.44579264521598816, + "step": 2623 + }, + { + "epoch": 0.605026516024902, + "grad_norm": 1.5349454914737826, + "learning_rate": 1.6714883278095489e-06, + "loss": 0.4937717020511627, + "step": 2624 + }, + { + "epoch": 0.6052570901544847, + "grad_norm": 1.4939841287703146, + "learning_rate": 1.671205758661316e-06, + "loss": 0.46298685669898987, + "step": 2625 + }, + { + "epoch": 0.6054876642840673, + "grad_norm": 1.3089529059854432, + "learning_rate": 1.6709230919465233e-06, + "loss": 0.5535221695899963, + "step": 2626 + }, + { + "epoch": 0.60571823841365, + "grad_norm": 1.2781536932155106, + "learning_rate": 1.6706403277062599e-06, + "loss": 0.5289112329483032, + "step": 2627 + }, + { + "epoch": 0.6059488125432326, + "grad_norm": 1.2619858231183905, + "learning_rate": 1.6703574659816285e-06, + "loss": 0.506280779838562, + "step": 2628 + }, + { + "epoch": 0.6061793866728153, + "grad_norm": 1.366142383501645, + "learning_rate": 1.6700745068137451e-06, + "loss": 0.504257082939148, + "step": 2629 + }, + { + "epoch": 0.606409960802398, + "grad_norm": 1.2835196483556859, + "learning_rate": 1.6697914502437411e-06, + "loss": 0.624682605266571, + "step": 2630 + }, + { + "epoch": 0.6066405349319807, + "grad_norm": 1.1715096985967743, + "learning_rate": 1.6695082963127617e-06, + "loss": 0.4539645314216614, + "step": 2631 + }, + { + "epoch": 0.6068711090615633, + "grad_norm": 1.2852717924915888, + "learning_rate": 1.6692250450619665e-06, + "loss": 0.5461890697479248, + "step": 2632 + }, + { + "epoch": 0.607101683191146, + "grad_norm": 1.2251930368732282, + "learning_rate": 1.6689416965325282e-06, + "loss": 0.615606427192688, + "step": 2633 + }, + { + "epoch": 0.6073322573207286, + "grad_norm": 1.3904526684847855, + "learning_rate": 1.668658250765635e-06, + "loss": 0.5355387926101685, + "step": 2634 + }, + { + "epoch": 0.6075628314503113, + "grad_norm": 1.1464900003631002, + "learning_rate": 1.6683747078024886e-06, + "loss": 0.5804985165596008, + "step": 2635 + }, + { + "epoch": 0.6077934055798939, + "grad_norm": 1.1983123193544134, + "learning_rate": 1.6680910676843042e-06, + "loss": 0.4514031410217285, + "step": 2636 + }, + { + "epoch": 0.6080239797094766, + "grad_norm": 1.3446092692413514, + "learning_rate": 1.6678073304523123e-06, + "loss": 0.5621001720428467, + "step": 2637 + }, + { + "epoch": 0.6082545538390592, + "grad_norm": 1.3749875179413227, + "learning_rate": 1.667523496147757e-06, + "loss": 0.49387669563293457, + "step": 2638 + }, + { + "epoch": 0.608485127968642, + "grad_norm": 1.0479438264918854, + "learning_rate": 1.6672395648118966e-06, + "loss": 0.5857938528060913, + "step": 2639 + }, + { + "epoch": 0.6087157020982246, + "grad_norm": 1.149056345239141, + "learning_rate": 1.6669555364860029e-06, + "loss": 0.46403199434280396, + "step": 2640 + }, + { + "epoch": 0.6089462762278073, + "grad_norm": 1.2068025098167319, + "learning_rate": 1.6666714112113627e-06, + "loss": 0.4998488128185272, + "step": 2641 + }, + { + "epoch": 0.6091768503573899, + "grad_norm": 1.3686546841392573, + "learning_rate": 1.6663871890292765e-06, + "loss": 0.6291745901107788, + "step": 2642 + }, + { + "epoch": 0.6094074244869726, + "grad_norm": 1.7034971765108011, + "learning_rate": 1.6661028699810587e-06, + "loss": 0.6326058506965637, + "step": 2643 + }, + { + "epoch": 0.6096379986165552, + "grad_norm": 1.2748339439376004, + "learning_rate": 1.6658184541080378e-06, + "loss": 0.5737805366516113, + "step": 2644 + }, + { + "epoch": 0.6098685727461379, + "grad_norm": 1.435593858390691, + "learning_rate": 1.6655339414515568e-06, + "loss": 0.565047025680542, + "step": 2645 + }, + { + "epoch": 0.6100991468757205, + "grad_norm": 1.154269897254632, + "learning_rate": 1.6652493320529724e-06, + "loss": 0.5157296061515808, + "step": 2646 + }, + { + "epoch": 0.6103297210053032, + "grad_norm": 1.2671967095996914, + "learning_rate": 1.6649646259536554e-06, + "loss": 0.4475112855434418, + "step": 2647 + }, + { + "epoch": 0.6105602951348859, + "grad_norm": 1.4397592539357233, + "learning_rate": 1.6646798231949911e-06, + "loss": 0.5072107315063477, + "step": 2648 + }, + { + "epoch": 0.6107908692644686, + "grad_norm": 1.3901386223871963, + "learning_rate": 1.6643949238183778e-06, + "loss": 0.44673952460289, + "step": 2649 + }, + { + "epoch": 0.6110214433940512, + "grad_norm": 1.4046630639478026, + "learning_rate": 1.6641099278652293e-06, + "loss": 0.47460734844207764, + "step": 2650 + }, + { + "epoch": 0.6112520175236339, + "grad_norm": 1.251836663583678, + "learning_rate": 1.6638248353769718e-06, + "loss": 0.4529770612716675, + "step": 2651 + }, + { + "epoch": 0.6114825916532165, + "grad_norm": 1.4298404685971746, + "learning_rate": 1.6635396463950473e-06, + "loss": 0.5200958251953125, + "step": 2652 + }, + { + "epoch": 0.6117131657827992, + "grad_norm": 1.4871792439140996, + "learning_rate": 1.66325436096091e-06, + "loss": 0.465969979763031, + "step": 2653 + }, + { + "epoch": 0.6119437399123818, + "grad_norm": 1.1085493213804483, + "learning_rate": 1.6629689791160298e-06, + "loss": 0.5173276662826538, + "step": 2654 + }, + { + "epoch": 0.6121743140419645, + "grad_norm": 1.246647464420017, + "learning_rate": 1.6626835009018892e-06, + "loss": 0.5539907217025757, + "step": 2655 + }, + { + "epoch": 0.6124048881715471, + "grad_norm": 1.1686862955670068, + "learning_rate": 1.6623979263599857e-06, + "loss": 0.5617278814315796, + "step": 2656 + }, + { + "epoch": 0.6126354623011299, + "grad_norm": 1.3640942620216159, + "learning_rate": 1.6621122555318304e-06, + "loss": 0.46238285303115845, + "step": 2657 + }, + { + "epoch": 0.6128660364307125, + "grad_norm": 1.4695540598112733, + "learning_rate": 1.6618264884589484e-06, + "loss": 0.49247878789901733, + "step": 2658 + }, + { + "epoch": 0.6130966105602952, + "grad_norm": 1.0811892876151687, + "learning_rate": 1.6615406251828793e-06, + "loss": 0.4844072163105011, + "step": 2659 + }, + { + "epoch": 0.6133271846898778, + "grad_norm": 1.2024921886284354, + "learning_rate": 1.6612546657451754e-06, + "loss": 0.47372323274612427, + "step": 2660 + }, + { + "epoch": 0.6135577588194605, + "grad_norm": 1.299485129998275, + "learning_rate": 1.660968610187404e-06, + "loss": 0.5287426114082336, + "step": 2661 + }, + { + "epoch": 0.6137883329490431, + "grad_norm": 1.4640884136716181, + "learning_rate": 1.6606824585511471e-06, + "loss": 0.5862994194030762, + "step": 2662 + }, + { + "epoch": 0.6140189070786258, + "grad_norm": 1.0158009777389652, + "learning_rate": 1.6603962108779986e-06, + "loss": 0.4866197109222412, + "step": 2663 + }, + { + "epoch": 0.6142494812082084, + "grad_norm": 1.408246184243547, + "learning_rate": 1.660109867209568e-06, + "loss": 0.5561861991882324, + "step": 2664 + }, + { + "epoch": 0.6144800553377912, + "grad_norm": 1.214620364544681, + "learning_rate": 1.659823427587478e-06, + "loss": 0.4878644645214081, + "step": 2665 + }, + { + "epoch": 0.6147106294673738, + "grad_norm": 1.3262957238727335, + "learning_rate": 1.659536892053366e-06, + "loss": 0.5371976494789124, + "step": 2666 + }, + { + "epoch": 0.6149412035969565, + "grad_norm": 1.2817478175527077, + "learning_rate": 1.6592502606488824e-06, + "loss": 0.4816581606864929, + "step": 2667 + }, + { + "epoch": 0.6151717777265391, + "grad_norm": 1.1536826566839264, + "learning_rate": 1.6589635334156919e-06, + "loss": 0.5105183124542236, + "step": 2668 + }, + { + "epoch": 0.6154023518561217, + "grad_norm": 1.4584261311401567, + "learning_rate": 1.6586767103954737e-06, + "loss": 0.5524129271507263, + "step": 2669 + }, + { + "epoch": 0.6156329259857044, + "grad_norm": 1.3107384301518328, + "learning_rate": 1.6583897916299204e-06, + "loss": 0.42373913526535034, + "step": 2670 + }, + { + "epoch": 0.615863500115287, + "grad_norm": 1.3724263799580212, + "learning_rate": 1.658102777160738e-06, + "loss": 0.5620803833007812, + "step": 2671 + }, + { + "epoch": 0.6160940742448697, + "grad_norm": 1.3004346965884186, + "learning_rate": 1.6578156670296472e-06, + "loss": 0.38180166482925415, + "step": 2672 + }, + { + "epoch": 0.6163246483744523, + "grad_norm": 1.2109058692777805, + "learning_rate": 1.6575284612783825e-06, + "loss": 0.48596519231796265, + "step": 2673 + }, + { + "epoch": 0.616555222504035, + "grad_norm": 1.1846928230852602, + "learning_rate": 1.657241159948692e-06, + "loss": 0.5098127126693726, + "step": 2674 + }, + { + "epoch": 0.6167857966336177, + "grad_norm": 1.5943292852368571, + "learning_rate": 1.6569537630823382e-06, + "loss": 0.5650018453598022, + "step": 2675 + }, + { + "epoch": 0.6170163707632004, + "grad_norm": 1.1501551859696775, + "learning_rate": 1.6566662707210967e-06, + "loss": 0.45061948895454407, + "step": 2676 + }, + { + "epoch": 0.617246944892783, + "grad_norm": 1.3028951742766879, + "learning_rate": 1.6563786829067576e-06, + "loss": 0.4292137622833252, + "step": 2677 + }, + { + "epoch": 0.6174775190223657, + "grad_norm": 1.269567036808456, + "learning_rate": 1.656090999681125e-06, + "loss": 0.4837046265602112, + "step": 2678 + }, + { + "epoch": 0.6177080931519483, + "grad_norm": 1.9486185906204885, + "learning_rate": 1.6558032210860162e-06, + "loss": 0.43580353260040283, + "step": 2679 + }, + { + "epoch": 0.617938667281531, + "grad_norm": 1.2529677917985589, + "learning_rate": 1.6555153471632628e-06, + "loss": 0.47321656346321106, + "step": 2680 + }, + { + "epoch": 0.6181692414111136, + "grad_norm": 1.1423229113084605, + "learning_rate": 1.65522737795471e-06, + "loss": 0.47431111335754395, + "step": 2681 + }, + { + "epoch": 0.6183998155406963, + "grad_norm": 0.9698177160310311, + "learning_rate": 1.6549393135022181e-06, + "loss": 0.38062599301338196, + "step": 2682 + }, + { + "epoch": 0.618630389670279, + "grad_norm": 1.2758905094442272, + "learning_rate": 1.6546511538476584e-06, + "loss": 0.5941839218139648, + "step": 2683 + }, + { + "epoch": 0.6188609637998617, + "grad_norm": 1.453087551621585, + "learning_rate": 1.6543628990329195e-06, + "loss": 0.5323158502578735, + "step": 2684 + }, + { + "epoch": 0.6190915379294443, + "grad_norm": 1.100143863509344, + "learning_rate": 1.654074549099901e-06, + "loss": 0.3814772367477417, + "step": 2685 + }, + { + "epoch": 0.619322112059027, + "grad_norm": 1.5499952709692644, + "learning_rate": 1.6537861040905181e-06, + "loss": 0.5520694255828857, + "step": 2686 + }, + { + "epoch": 0.6195526861886096, + "grad_norm": 1.297782443862308, + "learning_rate": 1.653497564046699e-06, + "loss": 0.5514999628067017, + "step": 2687 + }, + { + "epoch": 0.6197832603181923, + "grad_norm": 1.2170603559624027, + "learning_rate": 1.653208929010386e-06, + "loss": 0.39057493209838867, + "step": 2688 + }, + { + "epoch": 0.6200138344477749, + "grad_norm": 1.0224470752428403, + "learning_rate": 1.6529201990235352e-06, + "loss": 0.4941304922103882, + "step": 2689 + }, + { + "epoch": 0.6202444085773576, + "grad_norm": 1.2590211215766611, + "learning_rate": 1.6526313741281164e-06, + "loss": 0.539762020111084, + "step": 2690 + }, + { + "epoch": 0.6204749827069402, + "grad_norm": 1.3801421787603734, + "learning_rate": 1.6523424543661127e-06, + "loss": 0.49524787068367004, + "step": 2691 + }, + { + "epoch": 0.620705556836523, + "grad_norm": 1.2158625492501351, + "learning_rate": 1.6520534397795225e-06, + "loss": 0.4261528253555298, + "step": 2692 + }, + { + "epoch": 0.6209361309661056, + "grad_norm": 1.3188986304771895, + "learning_rate": 1.6517643304103563e-06, + "loss": 0.578548789024353, + "step": 2693 + }, + { + "epoch": 0.6211667050956883, + "grad_norm": 1.24168526725964, + "learning_rate": 1.6514751263006393e-06, + "loss": 0.4766680598258972, + "step": 2694 + }, + { + "epoch": 0.6213972792252709, + "grad_norm": 1.135518406763033, + "learning_rate": 1.6511858274924098e-06, + "loss": 0.4146459996700287, + "step": 2695 + }, + { + "epoch": 0.6216278533548536, + "grad_norm": 1.4632792907408574, + "learning_rate": 1.650896434027721e-06, + "loss": 0.5148390531539917, + "step": 2696 + }, + { + "epoch": 0.6218584274844362, + "grad_norm": 1.1678475162221296, + "learning_rate": 1.6506069459486388e-06, + "loss": 0.4830890893936157, + "step": 2697 + }, + { + "epoch": 0.6220890016140189, + "grad_norm": 1.2027318756470287, + "learning_rate": 1.6503173632972434e-06, + "loss": 0.4550463557243347, + "step": 2698 + }, + { + "epoch": 0.6223195757436015, + "grad_norm": 1.3023820822101895, + "learning_rate": 1.6500276861156284e-06, + "loss": 0.5811448097229004, + "step": 2699 + }, + { + "epoch": 0.6225501498731842, + "grad_norm": 1.3807858518585416, + "learning_rate": 1.6497379144459014e-06, + "loss": 0.44733545184135437, + "step": 2700 + }, + { + "epoch": 0.6227807240027669, + "grad_norm": 1.103384717152327, + "learning_rate": 1.6494480483301835e-06, + "loss": 0.4379687011241913, + "step": 2701 + }, + { + "epoch": 0.6230112981323496, + "grad_norm": 1.326644045971959, + "learning_rate": 1.6491580878106102e-06, + "loss": 0.5163959860801697, + "step": 2702 + }, + { + "epoch": 0.6232418722619322, + "grad_norm": 1.2037310331107272, + "learning_rate": 1.6488680329293297e-06, + "loss": 0.5636980533599854, + "step": 2703 + }, + { + "epoch": 0.6234724463915149, + "grad_norm": 1.1847301227909297, + "learning_rate": 1.6485778837285044e-06, + "loss": 0.46942776441574097, + "step": 2704 + }, + { + "epoch": 0.6237030205210975, + "grad_norm": 1.3867166397057658, + "learning_rate": 1.6482876402503103e-06, + "loss": 0.5104436278343201, + "step": 2705 + }, + { + "epoch": 0.6239335946506802, + "grad_norm": 1.2701601489299654, + "learning_rate": 1.6479973025369379e-06, + "loss": 0.4689507484436035, + "step": 2706 + }, + { + "epoch": 0.6241641687802628, + "grad_norm": 1.2388644364900292, + "learning_rate": 1.64770687063059e-06, + "loss": 0.4009973406791687, + "step": 2707 + }, + { + "epoch": 0.6243947429098455, + "grad_norm": 1.4958191711517836, + "learning_rate": 1.6474163445734846e-06, + "loss": 0.4938286542892456, + "step": 2708 + }, + { + "epoch": 0.6246253170394281, + "grad_norm": 1.2939637643231117, + "learning_rate": 1.6471257244078519e-06, + "loss": 0.4756525754928589, + "step": 2709 + }, + { + "epoch": 0.6248558911690109, + "grad_norm": 1.0308841763344028, + "learning_rate": 1.6468350101759366e-06, + "loss": 0.4322332739830017, + "step": 2710 + }, + { + "epoch": 0.6250864652985935, + "grad_norm": 1.381148895283306, + "learning_rate": 1.6465442019199972e-06, + "loss": 0.4605666995048523, + "step": 2711 + }, + { + "epoch": 0.6253170394281762, + "grad_norm": 1.3288993921232848, + "learning_rate": 1.6462532996823053e-06, + "loss": 0.4576036334037781, + "step": 2712 + }, + { + "epoch": 0.6255476135577588, + "grad_norm": 1.1587792990864858, + "learning_rate": 1.645962303505147e-06, + "loss": 0.4860233664512634, + "step": 2713 + }, + { + "epoch": 0.6257781876873415, + "grad_norm": 1.2195714743605923, + "learning_rate": 1.6456712134308213e-06, + "loss": 0.4717915654182434, + "step": 2714 + }, + { + "epoch": 0.6260087618169241, + "grad_norm": 1.1008237671202603, + "learning_rate": 1.645380029501641e-06, + "loss": 0.49637067317962646, + "step": 2715 + }, + { + "epoch": 0.6262393359465068, + "grad_norm": 1.2218828759453872, + "learning_rate": 1.6450887517599326e-06, + "loss": 0.45388346910476685, + "step": 2716 + }, + { + "epoch": 0.6264699100760894, + "grad_norm": 1.6333623536070287, + "learning_rate": 1.6447973802480362e-06, + "loss": 0.5549031496047974, + "step": 2717 + }, + { + "epoch": 0.6267004842056721, + "grad_norm": 1.333805192555573, + "learning_rate": 1.644505915008306e-06, + "loss": 0.39759719371795654, + "step": 2718 + }, + { + "epoch": 0.6269310583352548, + "grad_norm": 1.2648542744381963, + "learning_rate": 1.644214356083109e-06, + "loss": 0.5126739740371704, + "step": 2719 + }, + { + "epoch": 0.6271616324648375, + "grad_norm": 1.1846129595938097, + "learning_rate": 1.6439227035148265e-06, + "loss": 0.41424083709716797, + "step": 2720 + }, + { + "epoch": 0.6273922065944201, + "grad_norm": 1.2295786085250646, + "learning_rate": 1.643630957345853e-06, + "loss": 0.5829803943634033, + "step": 2721 + }, + { + "epoch": 0.6276227807240028, + "grad_norm": 1.2114307243350246, + "learning_rate": 1.6433391176185972e-06, + "loss": 0.4736567437648773, + "step": 2722 + }, + { + "epoch": 0.6278533548535854, + "grad_norm": 1.4670818430092263, + "learning_rate": 1.6430471843754804e-06, + "loss": 0.41305306553840637, + "step": 2723 + }, + { + "epoch": 0.6280839289831681, + "grad_norm": 1.5480231340195962, + "learning_rate": 1.6427551576589383e-06, + "loss": 0.38422563672065735, + "step": 2724 + }, + { + "epoch": 0.6283145031127507, + "grad_norm": 1.3725795006115715, + "learning_rate": 1.6424630375114199e-06, + "loss": 0.48302626609802246, + "step": 2725 + }, + { + "epoch": 0.6285450772423334, + "grad_norm": 1.2880102228926575, + "learning_rate": 1.6421708239753875e-06, + "loss": 0.4657328128814697, + "step": 2726 + }, + { + "epoch": 0.628775651371916, + "grad_norm": 1.4057295929235551, + "learning_rate": 1.641878517093318e-06, + "loss": 0.46126431226730347, + "step": 2727 + }, + { + "epoch": 0.6290062255014988, + "grad_norm": 1.3246078376538457, + "learning_rate": 1.6415861169077007e-06, + "loss": 0.5196214914321899, + "step": 2728 + }, + { + "epoch": 0.6292367996310814, + "grad_norm": 1.4794856753558834, + "learning_rate": 1.641293623461039e-06, + "loss": 0.5007073879241943, + "step": 2729 + }, + { + "epoch": 0.6294673737606641, + "grad_norm": 1.1543847272279724, + "learning_rate": 1.64100103679585e-06, + "loss": 0.4699769616127014, + "step": 2730 + }, + { + "epoch": 0.6296979478902467, + "grad_norm": 1.3221766888407216, + "learning_rate": 1.6407083569546636e-06, + "loss": 0.5487842559814453, + "step": 2731 + }, + { + "epoch": 0.6299285220198294, + "grad_norm": 1.0556125358940756, + "learning_rate": 1.6404155839800244e-06, + "loss": 0.42733538150787354, + "step": 2732 + }, + { + "epoch": 0.630159096149412, + "grad_norm": 1.1933689155818472, + "learning_rate": 1.64012271791449e-06, + "loss": 0.5105363726615906, + "step": 2733 + }, + { + "epoch": 0.6303896702789947, + "grad_norm": 1.3185367260440977, + "learning_rate": 1.6398297588006305e-06, + "loss": 0.5836968421936035, + "step": 2734 + }, + { + "epoch": 0.6306202444085773, + "grad_norm": 1.3830049962050668, + "learning_rate": 1.639536706681031e-06, + "loss": 0.4350558817386627, + "step": 2735 + }, + { + "epoch": 0.63085081853816, + "grad_norm": 1.4225393539645832, + "learning_rate": 1.63924356159829e-06, + "loss": 0.5388341546058655, + "step": 2736 + }, + { + "epoch": 0.6310813926677427, + "grad_norm": 1.1218759160612528, + "learning_rate": 1.6389503235950186e-06, + "loss": 0.4576529860496521, + "step": 2737 + }, + { + "epoch": 0.6313119667973254, + "grad_norm": 1.524583554785293, + "learning_rate": 1.6386569927138422e-06, + "loss": 0.4525975286960602, + "step": 2738 + }, + { + "epoch": 0.631542540926908, + "grad_norm": 1.56840988374272, + "learning_rate": 1.6383635689973993e-06, + "loss": 0.42143142223358154, + "step": 2739 + }, + { + "epoch": 0.6317731150564907, + "grad_norm": 1.0672209595897675, + "learning_rate": 1.6380700524883423e-06, + "loss": 0.4440336227416992, + "step": 2740 + }, + { + "epoch": 0.6320036891860733, + "grad_norm": 1.2412570194863743, + "learning_rate": 1.637776443229336e-06, + "loss": 0.5009843707084656, + "step": 2741 + }, + { + "epoch": 0.632234263315656, + "grad_norm": 1.6736573631214935, + "learning_rate": 1.6374827412630604e-06, + "loss": 0.538151741027832, + "step": 2742 + }, + { + "epoch": 0.6324648374452386, + "grad_norm": 1.1895254537976463, + "learning_rate": 1.6371889466322077e-06, + "loss": 0.550201416015625, + "step": 2743 + }, + { + "epoch": 0.6326954115748213, + "grad_norm": 1.3861259597044466, + "learning_rate": 1.6368950593794836e-06, + "loss": 0.5707399845123291, + "step": 2744 + }, + { + "epoch": 0.632925985704404, + "grad_norm": 1.393827128295071, + "learning_rate": 1.6366010795476082e-06, + "loss": 0.5196787714958191, + "step": 2745 + }, + { + "epoch": 0.6331565598339867, + "grad_norm": 1.171378891149435, + "learning_rate": 1.636307007179314e-06, + "loss": 0.5243285894393921, + "step": 2746 + }, + { + "epoch": 0.6333871339635693, + "grad_norm": 1.249132441469792, + "learning_rate": 1.6360128423173473e-06, + "loss": 0.4202825427055359, + "step": 2747 + }, + { + "epoch": 0.633617708093152, + "grad_norm": 1.2547380834154716, + "learning_rate": 1.6357185850044681e-06, + "loss": 0.49080896377563477, + "step": 2748 + }, + { + "epoch": 0.6338482822227346, + "grad_norm": 1.2234752623414968, + "learning_rate": 1.6354242352834502e-06, + "loss": 0.5537371635437012, + "step": 2749 + }, + { + "epoch": 0.6340788563523173, + "grad_norm": 1.1077493127634728, + "learning_rate": 1.6351297931970796e-06, + "loss": 0.3744293451309204, + "step": 2750 + }, + { + "epoch": 0.6343094304818999, + "grad_norm": 1.237975564408939, + "learning_rate": 1.634835258788157e-06, + "loss": 0.5176748037338257, + "step": 2751 + }, + { + "epoch": 0.6345400046114826, + "grad_norm": 1.321137847220575, + "learning_rate": 1.6345406320994952e-06, + "loss": 0.5179395079612732, + "step": 2752 + }, + { + "epoch": 0.6347705787410652, + "grad_norm": 1.3158476651008661, + "learning_rate": 1.634245913173922e-06, + "loss": 0.4810818135738373, + "step": 2753 + }, + { + "epoch": 0.635001152870648, + "grad_norm": 1.2760288557710286, + "learning_rate": 1.6339511020542775e-06, + "loss": 0.5188307762145996, + "step": 2754 + }, + { + "epoch": 0.6352317270002306, + "grad_norm": 1.662662743900965, + "learning_rate": 1.6336561987834151e-06, + "loss": 0.41170865297317505, + "step": 2755 + }, + { + "epoch": 0.6354623011298133, + "grad_norm": 1.1982414473393, + "learning_rate": 1.6333612034042025e-06, + "loss": 0.48726415634155273, + "step": 2756 + }, + { + "epoch": 0.6356928752593959, + "grad_norm": 1.1790415390507374, + "learning_rate": 1.63306611595952e-06, + "loss": 0.4483524560928345, + "step": 2757 + }, + { + "epoch": 0.6359234493889786, + "grad_norm": 1.2150870765180466, + "learning_rate": 1.6327709364922618e-06, + "loss": 0.3979623019695282, + "step": 2758 + }, + { + "epoch": 0.6361540235185612, + "grad_norm": 1.2093786796022739, + "learning_rate": 1.6324756650453346e-06, + "loss": 0.461483895778656, + "step": 2759 + }, + { + "epoch": 0.6363845976481439, + "grad_norm": 1.2350751043575534, + "learning_rate": 1.6321803016616598e-06, + "loss": 0.40054333209991455, + "step": 2760 + }, + { + "epoch": 0.6366151717777265, + "grad_norm": 1.1196609017801307, + "learning_rate": 1.6318848463841712e-06, + "loss": 0.534996747970581, + "step": 2761 + }, + { + "epoch": 0.6368457459073092, + "grad_norm": 1.260260551672407, + "learning_rate": 1.631589299255816e-06, + "loss": 0.49408137798309326, + "step": 2762 + }, + { + "epoch": 0.6370763200368919, + "grad_norm": 1.305230846296416, + "learning_rate": 1.6312936603195557e-06, + "loss": 0.49098217487335205, + "step": 2763 + }, + { + "epoch": 0.6373068941664746, + "grad_norm": 1.1344163970655265, + "learning_rate": 1.6309979296183636e-06, + "loss": 0.4990113377571106, + "step": 2764 + }, + { + "epoch": 0.6375374682960572, + "grad_norm": 1.2952446438426217, + "learning_rate": 1.6307021071952276e-06, + "loss": 0.49399930238723755, + "step": 2765 + }, + { + "epoch": 0.6377680424256399, + "grad_norm": 1.320323762194689, + "learning_rate": 1.6304061930931478e-06, + "loss": 0.5029928684234619, + "step": 2766 + }, + { + "epoch": 0.6379986165552225, + "grad_norm": 1.2455728900211775, + "learning_rate": 1.6301101873551396e-06, + "loss": 0.5732289552688599, + "step": 2767 + }, + { + "epoch": 0.6382291906848052, + "grad_norm": 1.2965522975146178, + "learning_rate": 1.6298140900242293e-06, + "loss": 0.47334790229797363, + "step": 2768 + }, + { + "epoch": 0.6384597648143878, + "grad_norm": 1.2464510374223752, + "learning_rate": 1.6295179011434578e-06, + "loss": 0.44271016120910645, + "step": 2769 + }, + { + "epoch": 0.6386903389439705, + "grad_norm": 1.8250225519339747, + "learning_rate": 1.6292216207558798e-06, + "loss": 0.5768353939056396, + "step": 2770 + }, + { + "epoch": 0.6389209130735531, + "grad_norm": 1.074704735340539, + "learning_rate": 1.6289252489045625e-06, + "loss": 0.48315417766571045, + "step": 2771 + }, + { + "epoch": 0.6391514872031359, + "grad_norm": 1.338382007112913, + "learning_rate": 1.6286287856325855e-06, + "loss": 0.5745590925216675, + "step": 2772 + }, + { + "epoch": 0.6393820613327185, + "grad_norm": 1.473033213400145, + "learning_rate": 1.6283322309830444e-06, + "loss": 0.6084291934967041, + "step": 2773 + }, + { + "epoch": 0.6396126354623012, + "grad_norm": 1.083816855400547, + "learning_rate": 1.6280355849990451e-06, + "loss": 0.4995007812976837, + "step": 2774 + }, + { + "epoch": 0.6398432095918838, + "grad_norm": 1.1962451309299882, + "learning_rate": 1.6277388477237084e-06, + "loss": 0.45811381936073303, + "step": 2775 + }, + { + "epoch": 0.6400737837214665, + "grad_norm": 1.448203316971052, + "learning_rate": 1.6274420192001689e-06, + "loss": 0.5666211247444153, + "step": 2776 + }, + { + "epoch": 0.6403043578510491, + "grad_norm": 1.3871415999727634, + "learning_rate": 1.6271450994715723e-06, + "loss": 0.5059396028518677, + "step": 2777 + }, + { + "epoch": 0.6405349319806318, + "grad_norm": 1.4444216130733851, + "learning_rate": 1.6268480885810798e-06, + "loss": 0.5418530702590942, + "step": 2778 + }, + { + "epoch": 0.6407655061102144, + "grad_norm": 1.4034133564890543, + "learning_rate": 1.6265509865718647e-06, + "loss": 0.5047061443328857, + "step": 2779 + }, + { + "epoch": 0.6409960802397972, + "grad_norm": 1.6003350461542336, + "learning_rate": 1.6262537934871138e-06, + "loss": 0.5104432702064514, + "step": 2780 + }, + { + "epoch": 0.6412266543693798, + "grad_norm": 1.3065683677222188, + "learning_rate": 1.625956509370027e-06, + "loss": 0.44423484802246094, + "step": 2781 + }, + { + "epoch": 0.6414572284989624, + "grad_norm": 1.1820302321160245, + "learning_rate": 1.6256591342638179e-06, + "loss": 0.47618383169174194, + "step": 2782 + }, + { + "epoch": 0.6416878026285451, + "grad_norm": 1.3796601981562324, + "learning_rate": 1.625361668211713e-06, + "loss": 0.5423145890235901, + "step": 2783 + }, + { + "epoch": 0.6419183767581277, + "grad_norm": 1.380895745392916, + "learning_rate": 1.6250641112569515e-06, + "loss": 0.517102837562561, + "step": 2784 + }, + { + "epoch": 0.6421489508877104, + "grad_norm": 1.2388489917279923, + "learning_rate": 1.6247664634427864e-06, + "loss": 0.39601820707321167, + "step": 2785 + }, + { + "epoch": 0.642379525017293, + "grad_norm": 1.296572577942614, + "learning_rate": 1.6244687248124843e-06, + "loss": 0.5480250120162964, + "step": 2786 + }, + { + "epoch": 0.6426100991468757, + "grad_norm": 1.1105051491643492, + "learning_rate": 1.624170895409324e-06, + "loss": 0.4743092656135559, + "step": 2787 + }, + { + "epoch": 0.6428406732764583, + "grad_norm": 1.463202362201621, + "learning_rate": 1.6238729752765985e-06, + "loss": 0.4595726728439331, + "step": 2788 + }, + { + "epoch": 0.643071247406041, + "grad_norm": 1.2909676791556273, + "learning_rate": 1.6235749644576132e-06, + "loss": 0.5058779716491699, + "step": 2789 + }, + { + "epoch": 0.6433018215356237, + "grad_norm": 1.3145538108383794, + "learning_rate": 1.623276862995687e-06, + "loss": 0.5075543522834778, + "step": 2790 + }, + { + "epoch": 0.6435323956652064, + "grad_norm": 1.3185436913231439, + "learning_rate": 1.622978670934152e-06, + "loss": 0.5623351335525513, + "step": 2791 + }, + { + "epoch": 0.643762969794789, + "grad_norm": 1.1682118545924238, + "learning_rate": 1.6226803883163536e-06, + "loss": 0.3645760118961334, + "step": 2792 + }, + { + "epoch": 0.6439935439243717, + "grad_norm": 1.4617740663680228, + "learning_rate": 1.6223820151856501e-06, + "loss": 0.5666004419326782, + "step": 2793 + }, + { + "epoch": 0.6442241180539543, + "grad_norm": 1.3342697895697784, + "learning_rate": 1.6220835515854133e-06, + "loss": 0.6571217775344849, + "step": 2794 + }, + { + "epoch": 0.644454692183537, + "grad_norm": 1.4229199895470708, + "learning_rate": 1.6217849975590271e-06, + "loss": 0.5684333443641663, + "step": 2795 + }, + { + "epoch": 0.6446852663131196, + "grad_norm": 1.5289890556459427, + "learning_rate": 1.62148635314989e-06, + "loss": 0.43374937772750854, + "step": 2796 + }, + { + "epoch": 0.6449158404427023, + "grad_norm": 1.1182458179152783, + "learning_rate": 1.6211876184014134e-06, + "loss": 0.5102420449256897, + "step": 2797 + }, + { + "epoch": 0.6451464145722849, + "grad_norm": 1.0775475511417847, + "learning_rate": 1.6208887933570203e-06, + "loss": 0.39345985651016235, + "step": 2798 + }, + { + "epoch": 0.6453769887018677, + "grad_norm": 1.4503631372644623, + "learning_rate": 1.620589878060149e-06, + "loss": 0.47554945945739746, + "step": 2799 + }, + { + "epoch": 0.6456075628314503, + "grad_norm": 1.601431882721041, + "learning_rate": 1.6202908725542495e-06, + "loss": 0.4385503828525543, + "step": 2800 + }, + { + "epoch": 0.645838136961033, + "grad_norm": 1.1168858860640334, + "learning_rate": 1.619991776882785e-06, + "loss": 0.5589696168899536, + "step": 2801 + }, + { + "epoch": 0.6460687110906156, + "grad_norm": 1.265570460008291, + "learning_rate": 1.619692591089232e-06, + "loss": 0.4827546179294586, + "step": 2802 + }, + { + "epoch": 0.6462992852201983, + "grad_norm": 1.3309974001593363, + "learning_rate": 1.6193933152170809e-06, + "loss": 0.491131067276001, + "step": 2803 + }, + { + "epoch": 0.6465298593497809, + "grad_norm": 1.2647545815457555, + "learning_rate": 1.6190939493098341e-06, + "loss": 0.47185173630714417, + "step": 2804 + }, + { + "epoch": 0.6467604334793636, + "grad_norm": 1.235826049412326, + "learning_rate": 1.6187944934110072e-06, + "loss": 0.4411182701587677, + "step": 2805 + }, + { + "epoch": 0.6469910076089462, + "grad_norm": 1.2245067812038697, + "learning_rate": 1.6184949475641295e-06, + "loss": 0.47243285179138184, + "step": 2806 + }, + { + "epoch": 0.647221581738529, + "grad_norm": 1.3311536114931484, + "learning_rate": 1.6181953118127428e-06, + "loss": 0.4449295401573181, + "step": 2807 + }, + { + "epoch": 0.6474521558681116, + "grad_norm": 1.2292361204281614, + "learning_rate": 1.6178955862004024e-06, + "loss": 0.5148872137069702, + "step": 2808 + }, + { + "epoch": 0.6476827299976943, + "grad_norm": 1.2738055603189895, + "learning_rate": 1.6175957707706762e-06, + "loss": 0.5017277598381042, + "step": 2809 + }, + { + "epoch": 0.6479133041272769, + "grad_norm": 1.1324070696899262, + "learning_rate": 1.6172958655671458e-06, + "loss": 0.44220247864723206, + "step": 2810 + }, + { + "epoch": 0.6481438782568596, + "grad_norm": 1.215492495713019, + "learning_rate": 1.6169958706334053e-06, + "loss": 0.45421087741851807, + "step": 2811 + }, + { + "epoch": 0.6483744523864422, + "grad_norm": 1.5167053281985836, + "learning_rate": 1.6166957860130618e-06, + "loss": 0.4772147536277771, + "step": 2812 + }, + { + "epoch": 0.6486050265160249, + "grad_norm": 1.1252103890770975, + "learning_rate": 1.6163956117497357e-06, + "loss": 0.5319628715515137, + "step": 2813 + }, + { + "epoch": 0.6488356006456075, + "grad_norm": 1.2663721872672429, + "learning_rate": 1.6160953478870608e-06, + "loss": 0.5109438896179199, + "step": 2814 + }, + { + "epoch": 0.6490661747751902, + "grad_norm": 1.33543378668276, + "learning_rate": 1.6157949944686827e-06, + "loss": 0.4417513608932495, + "step": 2815 + }, + { + "epoch": 0.6492967489047728, + "grad_norm": 1.2535935822359765, + "learning_rate": 1.6154945515382616e-06, + "loss": 0.5013085007667542, + "step": 2816 + }, + { + "epoch": 0.6495273230343556, + "grad_norm": 1.1191581438601172, + "learning_rate": 1.6151940191394693e-06, + "loss": 0.5197368860244751, + "step": 2817 + }, + { + "epoch": 0.6497578971639382, + "grad_norm": 1.4218758858652996, + "learning_rate": 1.6148933973159914e-06, + "loss": 0.46540898084640503, + "step": 2818 + }, + { + "epoch": 0.6499884712935209, + "grad_norm": 1.2080431861739462, + "learning_rate": 1.6145926861115268e-06, + "loss": 0.4867633581161499, + "step": 2819 + }, + { + "epoch": 0.6502190454231035, + "grad_norm": 1.1380395234486869, + "learning_rate": 1.6142918855697864e-06, + "loss": 0.426607221364975, + "step": 2820 + }, + { + "epoch": 0.6504496195526862, + "grad_norm": 1.2737116095131904, + "learning_rate": 1.613990995734495e-06, + "loss": 0.5183024406433105, + "step": 2821 + }, + { + "epoch": 0.6506801936822688, + "grad_norm": 1.3839354752611597, + "learning_rate": 1.6136900166493893e-06, + "loss": 0.48635101318359375, + "step": 2822 + }, + { + "epoch": 0.6509107678118515, + "grad_norm": 1.5911912747422927, + "learning_rate": 1.6133889483582204e-06, + "loss": 0.47468632459640503, + "step": 2823 + }, + { + "epoch": 0.6511413419414341, + "grad_norm": 1.1598857858501956, + "learning_rate": 1.6130877909047515e-06, + "loss": 0.4665389358997345, + "step": 2824 + }, + { + "epoch": 0.6513719160710169, + "grad_norm": 1.1793258331020087, + "learning_rate": 1.6127865443327585e-06, + "loss": 0.5069966316223145, + "step": 2825 + }, + { + "epoch": 0.6516024902005995, + "grad_norm": 1.4107626754859688, + "learning_rate": 1.612485208686031e-06, + "loss": 0.47820740938186646, + "step": 2826 + }, + { + "epoch": 0.6518330643301822, + "grad_norm": 1.2189859420338702, + "learning_rate": 1.612183784008371e-06, + "loss": 0.43017104268074036, + "step": 2827 + }, + { + "epoch": 0.6520636384597648, + "grad_norm": 1.158515500774614, + "learning_rate": 1.6118822703435937e-06, + "loss": 0.45495298504829407, + "step": 2828 + }, + { + "epoch": 0.6522942125893475, + "grad_norm": 1.7108375139007879, + "learning_rate": 1.6115806677355272e-06, + "loss": 0.4624331593513489, + "step": 2829 + }, + { + "epoch": 0.6525247867189301, + "grad_norm": 1.0788742222165304, + "learning_rate": 1.6112789762280125e-06, + "loss": 0.39458876848220825, + "step": 2830 + }, + { + "epoch": 0.6527553608485128, + "grad_norm": 1.4194134450814206, + "learning_rate": 1.6109771958649035e-06, + "loss": 0.45552846789360046, + "step": 2831 + }, + { + "epoch": 0.6529859349780954, + "grad_norm": 1.4199555723058743, + "learning_rate": 1.6106753266900671e-06, + "loss": 0.4579755663871765, + "step": 2832 + }, + { + "epoch": 0.6532165091076781, + "grad_norm": 1.2589449636358518, + "learning_rate": 1.6103733687473823e-06, + "loss": 0.5164625644683838, + "step": 2833 + }, + { + "epoch": 0.6534470832372608, + "grad_norm": 1.3635551079325425, + "learning_rate": 1.6100713220807432e-06, + "loss": 0.43071237206459045, + "step": 2834 + }, + { + "epoch": 0.6536776573668435, + "grad_norm": 1.2757429725484968, + "learning_rate": 1.6097691867340543e-06, + "loss": 0.5174099802970886, + "step": 2835 + }, + { + "epoch": 0.6539082314964261, + "grad_norm": 1.31351831375575, + "learning_rate": 1.609466962751234e-06, + "loss": 0.5944932699203491, + "step": 2836 + }, + { + "epoch": 0.6541388056260088, + "grad_norm": 1.312815606757786, + "learning_rate": 1.6091646501762145e-06, + "loss": 0.45203912258148193, + "step": 2837 + }, + { + "epoch": 0.6543693797555914, + "grad_norm": 1.292859531347235, + "learning_rate": 1.6088622490529386e-06, + "loss": 0.4197826683521271, + "step": 2838 + }, + { + "epoch": 0.6545999538851741, + "grad_norm": 1.3008648230701247, + "learning_rate": 1.6085597594253649e-06, + "loss": 0.4806807339191437, + "step": 2839 + }, + { + "epoch": 0.6548305280147567, + "grad_norm": 1.233893928808971, + "learning_rate": 1.608257181337462e-06, + "loss": 0.4618797302246094, + "step": 2840 + }, + { + "epoch": 0.6550611021443394, + "grad_norm": 1.1215282144992917, + "learning_rate": 1.6079545148332137e-06, + "loss": 0.4901892840862274, + "step": 2841 + }, + { + "epoch": 0.655291676273922, + "grad_norm": 1.250624448026336, + "learning_rate": 1.607651759956615e-06, + "loss": 0.44869139790534973, + "step": 2842 + }, + { + "epoch": 0.6555222504035048, + "grad_norm": 1.1064395173732657, + "learning_rate": 1.6073489167516747e-06, + "loss": 0.41470903158187866, + "step": 2843 + }, + { + "epoch": 0.6557528245330874, + "grad_norm": 1.2796938856852533, + "learning_rate": 1.6070459852624143e-06, + "loss": 0.5498615503311157, + "step": 2844 + }, + { + "epoch": 0.6559833986626701, + "grad_norm": 1.4741717641783516, + "learning_rate": 1.6067429655328675e-06, + "loss": 0.5462392568588257, + "step": 2845 + }, + { + "epoch": 0.6562139727922527, + "grad_norm": 1.5147243124828937, + "learning_rate": 1.6064398576070815e-06, + "loss": 0.3775100111961365, + "step": 2846 + }, + { + "epoch": 0.6564445469218354, + "grad_norm": 1.3806942156086204, + "learning_rate": 1.6061366615291161e-06, + "loss": 0.4712100028991699, + "step": 2847 + }, + { + "epoch": 0.656675121051418, + "grad_norm": 1.1320542857842297, + "learning_rate": 1.6058333773430439e-06, + "loss": 0.5152161121368408, + "step": 2848 + }, + { + "epoch": 0.6569056951810007, + "grad_norm": 1.2222287817453417, + "learning_rate": 1.6055300050929502e-06, + "loss": 0.46678972244262695, + "step": 2849 + }, + { + "epoch": 0.6571362693105833, + "grad_norm": 1.1948519980696821, + "learning_rate": 1.6052265448229338e-06, + "loss": 0.4622490108013153, + "step": 2850 + }, + { + "epoch": 0.657366843440166, + "grad_norm": 1.2601521252962713, + "learning_rate": 1.6049229965771052e-06, + "loss": 0.49909311532974243, + "step": 2851 + }, + { + "epoch": 0.6575974175697487, + "grad_norm": 1.1801405687475501, + "learning_rate": 1.6046193603995884e-06, + "loss": 0.4428306221961975, + "step": 2852 + }, + { + "epoch": 0.6578279916993314, + "grad_norm": 1.5295557154716768, + "learning_rate": 1.6043156363345196e-06, + "loss": 0.5842458009719849, + "step": 2853 + }, + { + "epoch": 0.658058565828914, + "grad_norm": 1.4945011678677886, + "learning_rate": 1.604011824426049e-06, + "loss": 0.47183722257614136, + "step": 2854 + }, + { + "epoch": 0.6582891399584967, + "grad_norm": 1.2843309395390234, + "learning_rate": 1.6037079247183379e-06, + "loss": 0.44225364923477173, + "step": 2855 + }, + { + "epoch": 0.6585197140880793, + "grad_norm": 1.3795669225253144, + "learning_rate": 1.6034039372555617e-06, + "loss": 0.4820272922515869, + "step": 2856 + }, + { + "epoch": 0.658750288217662, + "grad_norm": 1.6263387244434722, + "learning_rate": 1.6030998620819075e-06, + "loss": 0.48118168115615845, + "step": 2857 + }, + { + "epoch": 0.6589808623472446, + "grad_norm": 1.4704169894155685, + "learning_rate": 1.6027956992415764e-06, + "loss": 0.4386011064052582, + "step": 2858 + }, + { + "epoch": 0.6592114364768273, + "grad_norm": 1.4148356020107666, + "learning_rate": 1.6024914487787814e-06, + "loss": 0.48740649223327637, + "step": 2859 + }, + { + "epoch": 0.65944201060641, + "grad_norm": 1.436235867684013, + "learning_rate": 1.602187110737748e-06, + "loss": 0.46782761812210083, + "step": 2860 + }, + { + "epoch": 0.6596725847359927, + "grad_norm": 1.2796166668007127, + "learning_rate": 1.6018826851627155e-06, + "loss": 0.5086358189582825, + "step": 2861 + }, + { + "epoch": 0.6599031588655753, + "grad_norm": 1.1582673721463366, + "learning_rate": 1.6015781720979344e-06, + "loss": 0.5631915330886841, + "step": 2862 + }, + { + "epoch": 0.660133732995158, + "grad_norm": 1.462417648098582, + "learning_rate": 1.6012735715876693e-06, + "loss": 0.5134458541870117, + "step": 2863 + }, + { + "epoch": 0.6603643071247406, + "grad_norm": 1.1268653967137703, + "learning_rate": 1.6009688836761969e-06, + "loss": 0.4308784008026123, + "step": 2864 + }, + { + "epoch": 0.6605948812543233, + "grad_norm": 1.3112517816231024, + "learning_rate": 1.6006641084078068e-06, + "loss": 0.5149765610694885, + "step": 2865 + }, + { + "epoch": 0.6608254553839059, + "grad_norm": 1.6101510783439525, + "learning_rate": 1.6003592458268005e-06, + "loss": 0.521892786026001, + "step": 2866 + }, + { + "epoch": 0.6610560295134886, + "grad_norm": 1.247084334907296, + "learning_rate": 1.6000542959774937e-06, + "loss": 0.46611008048057556, + "step": 2867 + }, + { + "epoch": 0.6612866036430712, + "grad_norm": 1.2517698630875118, + "learning_rate": 1.5997492589042135e-06, + "loss": 0.43080392479896545, + "step": 2868 + }, + { + "epoch": 0.661517177772654, + "grad_norm": 1.2239680444750303, + "learning_rate": 1.5994441346513003e-06, + "loss": 0.48026901483535767, + "step": 2869 + }, + { + "epoch": 0.6617477519022366, + "grad_norm": 1.1948228818170457, + "learning_rate": 1.5991389232631068e-06, + "loss": 0.48706555366516113, + "step": 2870 + }, + { + "epoch": 0.6619783260318193, + "grad_norm": 1.205848115890533, + "learning_rate": 1.598833624783999e-06, + "loss": 0.5093512535095215, + "step": 2871 + }, + { + "epoch": 0.6622089001614019, + "grad_norm": 1.37517746631934, + "learning_rate": 1.5985282392583542e-06, + "loss": 0.5197086930274963, + "step": 2872 + }, + { + "epoch": 0.6624394742909846, + "grad_norm": 1.3389415544634544, + "learning_rate": 1.5982227667305646e-06, + "loss": 0.497372031211853, + "step": 2873 + }, + { + "epoch": 0.6626700484205672, + "grad_norm": 1.6851191621911175, + "learning_rate": 1.597917207245033e-06, + "loss": 0.4746604561805725, + "step": 2874 + }, + { + "epoch": 0.6629006225501499, + "grad_norm": 1.2864362072574318, + "learning_rate": 1.5976115608461755e-06, + "loss": 0.5531996488571167, + "step": 2875 + }, + { + "epoch": 0.6631311966797325, + "grad_norm": 1.2032344825838508, + "learning_rate": 1.5973058275784208e-06, + "loss": 0.44950544834136963, + "step": 2876 + }, + { + "epoch": 0.6633617708093152, + "grad_norm": 1.231321509427461, + "learning_rate": 1.597000007486211e-06, + "loss": 0.45596158504486084, + "step": 2877 + }, + { + "epoch": 0.6635923449388978, + "grad_norm": 1.1813154846400662, + "learning_rate": 1.596694100613999e-06, + "loss": 0.5243046879768372, + "step": 2878 + }, + { + "epoch": 0.6638229190684806, + "grad_norm": 1.2111771126184059, + "learning_rate": 1.5963881070062528e-06, + "loss": 0.46450644731521606, + "step": 2879 + }, + { + "epoch": 0.6640534931980632, + "grad_norm": 1.286085494147619, + "learning_rate": 1.5960820267074509e-06, + "loss": 0.5565767288208008, + "step": 2880 + }, + { + "epoch": 0.6642840673276459, + "grad_norm": 1.574495375498682, + "learning_rate": 1.595775859762085e-06, + "loss": 0.4351605176925659, + "step": 2881 + }, + { + "epoch": 0.6645146414572285, + "grad_norm": 1.3382136213218339, + "learning_rate": 1.5954696062146603e-06, + "loss": 0.5113346576690674, + "step": 2882 + }, + { + "epoch": 0.6647452155868112, + "grad_norm": 1.203285083111209, + "learning_rate": 1.5951632661096932e-06, + "loss": 0.5005035996437073, + "step": 2883 + }, + { + "epoch": 0.6649757897163938, + "grad_norm": 1.1502074786882042, + "learning_rate": 1.5948568394917138e-06, + "loss": 0.4539811611175537, + "step": 2884 + }, + { + "epoch": 0.6652063638459765, + "grad_norm": 1.234546797786613, + "learning_rate": 1.5945503264052637e-06, + "loss": 0.4519865810871124, + "step": 2885 + }, + { + "epoch": 0.6654369379755591, + "grad_norm": 1.1932724883335695, + "learning_rate": 1.5942437268948985e-06, + "loss": 0.5688626766204834, + "step": 2886 + }, + { + "epoch": 0.6656675121051419, + "grad_norm": 1.1582733834983177, + "learning_rate": 1.5939370410051846e-06, + "loss": 0.5038400888442993, + "step": 2887 + }, + { + "epoch": 0.6658980862347245, + "grad_norm": 1.4308591259843988, + "learning_rate": 1.5936302687807028e-06, + "loss": 0.6332568526268005, + "step": 2888 + }, + { + "epoch": 0.6661286603643072, + "grad_norm": 1.2020172387992982, + "learning_rate": 1.593323410266045e-06, + "loss": 0.4994644820690155, + "step": 2889 + }, + { + "epoch": 0.6663592344938898, + "grad_norm": 1.3423031921779223, + "learning_rate": 1.5930164655058165e-06, + "loss": 0.4952617883682251, + "step": 2890 + }, + { + "epoch": 0.6665898086234725, + "grad_norm": 1.1769489968231674, + "learning_rate": 1.5927094345446345e-06, + "loss": 0.4188910722732544, + "step": 2891 + }, + { + "epoch": 0.6668203827530551, + "grad_norm": 1.319346697910086, + "learning_rate": 1.5924023174271295e-06, + "loss": 0.47160637378692627, + "step": 2892 + }, + { + "epoch": 0.6670509568826377, + "grad_norm": 1.0773369781050426, + "learning_rate": 1.592095114197944e-06, + "loss": 0.44884049892425537, + "step": 2893 + }, + { + "epoch": 0.6672815310122204, + "grad_norm": 1.3166895153069564, + "learning_rate": 1.5917878249017327e-06, + "loss": 0.4105216860771179, + "step": 2894 + }, + { + "epoch": 0.667512105141803, + "grad_norm": 1.3288589826448391, + "learning_rate": 1.5914804495831634e-06, + "loss": 0.5000967383384705, + "step": 2895 + }, + { + "epoch": 0.6677426792713858, + "grad_norm": 1.4772652615504442, + "learning_rate": 1.5911729882869163e-06, + "loss": 0.45515477657318115, + "step": 2896 + }, + { + "epoch": 0.6679732534009684, + "grad_norm": 1.2034912342077588, + "learning_rate": 1.590865441057684e-06, + "loss": 0.4492835998535156, + "step": 2897 + }, + { + "epoch": 0.6682038275305511, + "grad_norm": 1.5637287950189662, + "learning_rate": 1.5905578079401716e-06, + "loss": 0.553781270980835, + "step": 2898 + }, + { + "epoch": 0.6684344016601337, + "grad_norm": 1.235173143749482, + "learning_rate": 1.5902500889790967e-06, + "loss": 0.5085616111755371, + "step": 2899 + }, + { + "epoch": 0.6686649757897164, + "grad_norm": 1.2766607551584273, + "learning_rate": 1.5899422842191891e-06, + "loss": 0.4651145935058594, + "step": 2900 + }, + { + "epoch": 0.668895549919299, + "grad_norm": 1.3114841240621398, + "learning_rate": 1.5896343937051921e-06, + "loss": 0.5503841638565063, + "step": 2901 + }, + { + "epoch": 0.6691261240488817, + "grad_norm": 1.1881721760666544, + "learning_rate": 1.5893264174818599e-06, + "loss": 0.48213839530944824, + "step": 2902 + }, + { + "epoch": 0.6693566981784643, + "grad_norm": 1.2726619976847688, + "learning_rate": 1.5890183555939604e-06, + "loss": 0.4602949023246765, + "step": 2903 + }, + { + "epoch": 0.669587272308047, + "grad_norm": 1.213092004639277, + "learning_rate": 1.5887102080862736e-06, + "loss": 0.43991196155548096, + "step": 2904 + }, + { + "epoch": 0.6698178464376296, + "grad_norm": 1.2472416336517922, + "learning_rate": 1.5884019750035914e-06, + "loss": 0.48186323046684265, + "step": 2905 + }, + { + "epoch": 0.6700484205672124, + "grad_norm": 1.3445409358829308, + "learning_rate": 1.5880936563907189e-06, + "loss": 0.44907671213150024, + "step": 2906 + }, + { + "epoch": 0.670278994696795, + "grad_norm": 1.874421138474627, + "learning_rate": 1.587785252292473e-06, + "loss": 0.4475386142730713, + "step": 2907 + }, + { + "epoch": 0.6705095688263777, + "grad_norm": 1.2649536391923781, + "learning_rate": 1.587476762753684e-06, + "loss": 0.4504704475402832, + "step": 2908 + }, + { + "epoch": 0.6707401429559603, + "grad_norm": 2.0624210450483376, + "learning_rate": 1.5871681878191937e-06, + "loss": 0.5090106129646301, + "step": 2909 + }, + { + "epoch": 0.670970717085543, + "grad_norm": 1.3010076823717651, + "learning_rate": 1.5868595275338561e-06, + "loss": 0.46150895953178406, + "step": 2910 + }, + { + "epoch": 0.6712012912151256, + "grad_norm": 1.2556909013752833, + "learning_rate": 1.586550781942539e-06, + "loss": 0.5499979257583618, + "step": 2911 + }, + { + "epoch": 0.6714318653447083, + "grad_norm": 1.2089730243488483, + "learning_rate": 1.5862419510901211e-06, + "loss": 0.46628689765930176, + "step": 2912 + }, + { + "epoch": 0.6716624394742909, + "grad_norm": 1.2998808024776154, + "learning_rate": 1.5859330350214941e-06, + "loss": 0.4517399072647095, + "step": 2913 + }, + { + "epoch": 0.6718930136038737, + "grad_norm": 1.0879313971673985, + "learning_rate": 1.5856240337815621e-06, + "loss": 0.4696923792362213, + "step": 2914 + }, + { + "epoch": 0.6721235877334563, + "grad_norm": 1.5676723620382764, + "learning_rate": 1.585314947415242e-06, + "loss": 0.41357535123825073, + "step": 2915 + }, + { + "epoch": 0.672354161863039, + "grad_norm": 1.2988881169526059, + "learning_rate": 1.5850057759674621e-06, + "loss": 0.5223745107650757, + "step": 2916 + }, + { + "epoch": 0.6725847359926216, + "grad_norm": 1.5751566352241433, + "learning_rate": 1.584696519483164e-06, + "loss": 0.48562729358673096, + "step": 2917 + }, + { + "epoch": 0.6728153101222043, + "grad_norm": 1.147456021361514, + "learning_rate": 1.5843871780073009e-06, + "loss": 0.3675496280193329, + "step": 2918 + }, + { + "epoch": 0.6730458842517869, + "grad_norm": 1.4691177353786786, + "learning_rate": 1.5840777515848389e-06, + "loss": 0.5782667994499207, + "step": 2919 + }, + { + "epoch": 0.6732764583813696, + "grad_norm": 1.110911745804502, + "learning_rate": 1.583768240260756e-06, + "loss": 0.419716477394104, + "step": 2920 + }, + { + "epoch": 0.6735070325109522, + "grad_norm": 1.2625181785612978, + "learning_rate": 1.5834586440800434e-06, + "loss": 0.4004133939743042, + "step": 2921 + }, + { + "epoch": 0.673737606640535, + "grad_norm": 1.3860644175168617, + "learning_rate": 1.5831489630877037e-06, + "loss": 0.4917314350605011, + "step": 2922 + }, + { + "epoch": 0.6739681807701176, + "grad_norm": 1.3350109690747092, + "learning_rate": 1.5828391973287522e-06, + "loss": 0.5488141179084778, + "step": 2923 + }, + { + "epoch": 0.6741987548997003, + "grad_norm": 1.2547850876004316, + "learning_rate": 1.5825293468482163e-06, + "loss": 0.5047071576118469, + "step": 2924 + }, + { + "epoch": 0.6744293290292829, + "grad_norm": 1.3178326140677985, + "learning_rate": 1.5822194116911364e-06, + "loss": 0.4830411672592163, + "step": 2925 + }, + { + "epoch": 0.6746599031588656, + "grad_norm": 1.2591886503495524, + "learning_rate": 1.5819093919025641e-06, + "loss": 0.47517114877700806, + "step": 2926 + }, + { + "epoch": 0.6748904772884482, + "grad_norm": 1.3603729738722081, + "learning_rate": 1.5815992875275642e-06, + "loss": 0.5617963075637817, + "step": 2927 + }, + { + "epoch": 0.6751210514180309, + "grad_norm": 1.1752484838801127, + "learning_rate": 1.5812890986112137e-06, + "loss": 0.4360186457633972, + "step": 2928 + }, + { + "epoch": 0.6753516255476135, + "grad_norm": 1.5551926866200483, + "learning_rate": 1.5809788251986014e-06, + "loss": 0.49538636207580566, + "step": 2929 + }, + { + "epoch": 0.6755821996771962, + "grad_norm": 1.1285780293266063, + "learning_rate": 1.5806684673348288e-06, + "loss": 0.538766622543335, + "step": 2930 + }, + { + "epoch": 0.6758127738067788, + "grad_norm": 1.5395880930573347, + "learning_rate": 1.5803580250650094e-06, + "loss": 0.4113287329673767, + "step": 2931 + }, + { + "epoch": 0.6760433479363616, + "grad_norm": 1.4441179706006158, + "learning_rate": 1.5800474984342698e-06, + "loss": 0.5298923254013062, + "step": 2932 + }, + { + "epoch": 0.6762739220659442, + "grad_norm": 1.2285488161220737, + "learning_rate": 1.5797368874877472e-06, + "loss": 0.4891100227832794, + "step": 2933 + }, + { + "epoch": 0.6765044961955269, + "grad_norm": 1.3809520207822814, + "learning_rate": 1.579426192270593e-06, + "loss": 0.4412326216697693, + "step": 2934 + }, + { + "epoch": 0.6767350703251095, + "grad_norm": 1.3386538114869513, + "learning_rate": 1.5791154128279693e-06, + "loss": 0.5514793395996094, + "step": 2935 + }, + { + "epoch": 0.6769656444546922, + "grad_norm": 1.2065068425398038, + "learning_rate": 1.578804549205051e-06, + "loss": 0.44050243496894836, + "step": 2936 + }, + { + "epoch": 0.6771962185842748, + "grad_norm": 1.3084516018872256, + "learning_rate": 1.5784936014470256e-06, + "loss": 0.47503453493118286, + "step": 2937 + }, + { + "epoch": 0.6774267927138575, + "grad_norm": 1.445992727647949, + "learning_rate": 1.5781825695990922e-06, + "loss": 0.524544894695282, + "step": 2938 + }, + { + "epoch": 0.6776573668434401, + "grad_norm": 1.2672201923678605, + "learning_rate": 1.5778714537064628e-06, + "loss": 0.4203689694404602, + "step": 2939 + }, + { + "epoch": 0.6778879409730229, + "grad_norm": 1.255678429788082, + "learning_rate": 1.577560253814361e-06, + "loss": 0.4305247664451599, + "step": 2940 + }, + { + "epoch": 0.6781185151026055, + "grad_norm": 1.2383698343036857, + "learning_rate": 1.577248969968023e-06, + "loss": 0.6129249930381775, + "step": 2941 + }, + { + "epoch": 0.6783490892321882, + "grad_norm": 1.4217586280781416, + "learning_rate": 1.5769376022126969e-06, + "loss": 0.44431981444358826, + "step": 2942 + }, + { + "epoch": 0.6785796633617708, + "grad_norm": 1.2327303005745092, + "learning_rate": 1.576626150593643e-06, + "loss": 0.4394958019256592, + "step": 2943 + }, + { + "epoch": 0.6788102374913535, + "grad_norm": 1.2593798978560244, + "learning_rate": 1.5763146151561345e-06, + "loss": 0.44481268525123596, + "step": 2944 + }, + { + "epoch": 0.6790408116209361, + "grad_norm": 1.4440486279504336, + "learning_rate": 1.5760029959454556e-06, + "loss": 0.4251822829246521, + "step": 2945 + }, + { + "epoch": 0.6792713857505188, + "grad_norm": 1.338830252556874, + "learning_rate": 1.575691293006904e-06, + "loss": 0.41041696071624756, + "step": 2946 + }, + { + "epoch": 0.6795019598801014, + "grad_norm": 1.357017341106407, + "learning_rate": 1.5753795063857883e-06, + "loss": 0.5710239410400391, + "step": 2947 + }, + { + "epoch": 0.6797325340096841, + "grad_norm": 1.2834985119403657, + "learning_rate": 1.57506763612743e-06, + "loss": 0.48825210332870483, + "step": 2948 + }, + { + "epoch": 0.6799631081392667, + "grad_norm": 1.263284608882453, + "learning_rate": 1.5747556822771628e-06, + "loss": 0.37077784538269043, + "step": 2949 + }, + { + "epoch": 0.6801936822688495, + "grad_norm": 1.2458271352531185, + "learning_rate": 1.5744436448803322e-06, + "loss": 0.4618649482727051, + "step": 2950 + }, + { + "epoch": 0.6804242563984321, + "grad_norm": 1.0624348057433408, + "learning_rate": 1.574131523982296e-06, + "loss": 0.4415496289730072, + "step": 2951 + }, + { + "epoch": 0.6806548305280148, + "grad_norm": 1.4732593030941656, + "learning_rate": 1.5738193196284239e-06, + "loss": 0.440029501914978, + "step": 2952 + }, + { + "epoch": 0.6808854046575974, + "grad_norm": 1.3992294210480754, + "learning_rate": 1.5735070318640986e-06, + "loss": 0.5149378776550293, + "step": 2953 + }, + { + "epoch": 0.6811159787871801, + "grad_norm": 1.3173119180782331, + "learning_rate": 1.5731946607347136e-06, + "loss": 0.4838085174560547, + "step": 2954 + }, + { + "epoch": 0.6813465529167627, + "grad_norm": 1.3500402916158631, + "learning_rate": 1.5728822062856757e-06, + "loss": 0.48472005128860474, + "step": 2955 + }, + { + "epoch": 0.6815771270463454, + "grad_norm": 1.163167888868214, + "learning_rate": 1.572569668562403e-06, + "loss": 0.5154656767845154, + "step": 2956 + }, + { + "epoch": 0.681807701175928, + "grad_norm": 1.1906599654401737, + "learning_rate": 1.5722570476103263e-06, + "loss": 0.4094988703727722, + "step": 2957 + }, + { + "epoch": 0.6820382753055108, + "grad_norm": 1.2324943837281264, + "learning_rate": 1.5719443434748877e-06, + "loss": 0.5125937461853027, + "step": 2958 + }, + { + "epoch": 0.6822688494350934, + "grad_norm": 1.2538269370063608, + "learning_rate": 1.5716315562015428e-06, + "loss": 0.4807034730911255, + "step": 2959 + }, + { + "epoch": 0.6824994235646761, + "grad_norm": 1.3513545314522855, + "learning_rate": 1.5713186858357577e-06, + "loss": 0.6126741170883179, + "step": 2960 + }, + { + "epoch": 0.6827299976942587, + "grad_norm": 2.1674593801056887, + "learning_rate": 1.5710057324230113e-06, + "loss": 0.5450708866119385, + "step": 2961 + }, + { + "epoch": 0.6829605718238414, + "grad_norm": 1.8355809144200355, + "learning_rate": 1.5706926960087948e-06, + "loss": 0.47740328311920166, + "step": 2962 + }, + { + "epoch": 0.683191145953424, + "grad_norm": 1.311529987995532, + "learning_rate": 1.5703795766386112e-06, + "loss": 0.4731057584285736, + "step": 2963 + }, + { + "epoch": 0.6834217200830067, + "grad_norm": 1.3162153678952433, + "learning_rate": 1.5700663743579754e-06, + "loss": 0.49735045433044434, + "step": 2964 + }, + { + "epoch": 0.6836522942125893, + "grad_norm": 1.2346637447285915, + "learning_rate": 1.569753089212415e-06, + "loss": 0.5257318019866943, + "step": 2965 + }, + { + "epoch": 0.683882868342172, + "grad_norm": 1.1458467925306592, + "learning_rate": 1.5694397212474685e-06, + "loss": 0.3947733938694, + "step": 2966 + }, + { + "epoch": 0.6841134424717547, + "grad_norm": 1.424176183527685, + "learning_rate": 1.5691262705086875e-06, + "loss": 0.5078107714653015, + "step": 2967 + }, + { + "epoch": 0.6843440166013374, + "grad_norm": 1.7316538509871626, + "learning_rate": 1.5688127370416351e-06, + "loss": 0.5921520590782166, + "step": 2968 + }, + { + "epoch": 0.68457459073092, + "grad_norm": 1.2277129646213039, + "learning_rate": 1.5684991208918866e-06, + "loss": 0.45995181798934937, + "step": 2969 + }, + { + "epoch": 0.6848051648605027, + "grad_norm": 1.1894548452861071, + "learning_rate": 1.5681854221050293e-06, + "loss": 0.4874386787414551, + "step": 2970 + }, + { + "epoch": 0.6850357389900853, + "grad_norm": 1.3695475422493124, + "learning_rate": 1.5678716407266625e-06, + "loss": 0.4522739052772522, + "step": 2971 + }, + { + "epoch": 0.685266313119668, + "grad_norm": 1.3244142914830208, + "learning_rate": 1.5675577768023977e-06, + "loss": 0.4596391022205353, + "step": 2972 + }, + { + "epoch": 0.6854968872492506, + "grad_norm": 1.6847382830263626, + "learning_rate": 1.567243830377858e-06, + "loss": 0.5391427278518677, + "step": 2973 + }, + { + "epoch": 0.6857274613788333, + "grad_norm": 1.2164543996098884, + "learning_rate": 1.5669298014986786e-06, + "loss": 0.5583066940307617, + "step": 2974 + }, + { + "epoch": 0.6859580355084159, + "grad_norm": 1.3656527800334406, + "learning_rate": 1.566615690210507e-06, + "loss": 0.5410330295562744, + "step": 2975 + }, + { + "epoch": 0.6861886096379987, + "grad_norm": 1.2007908045124778, + "learning_rate": 1.566301496559002e-06, + "loss": 0.5145233273506165, + "step": 2976 + }, + { + "epoch": 0.6864191837675813, + "grad_norm": 1.4168885241389684, + "learning_rate": 1.5659872205898356e-06, + "loss": 0.5021970272064209, + "step": 2977 + }, + { + "epoch": 0.686649757897164, + "grad_norm": 1.0896663307775538, + "learning_rate": 1.5656728623486903e-06, + "loss": 0.48251593112945557, + "step": 2978 + }, + { + "epoch": 0.6868803320267466, + "grad_norm": 1.2502610536872558, + "learning_rate": 1.5653584218812617e-06, + "loss": 0.4228450655937195, + "step": 2979 + }, + { + "epoch": 0.6871109061563293, + "grad_norm": 1.4048596098114436, + "learning_rate": 1.5650438992332567e-06, + "loss": 0.3975197374820709, + "step": 2980 + }, + { + "epoch": 0.6873414802859119, + "grad_norm": 1.386478606714872, + "learning_rate": 1.5647292944503945e-06, + "loss": 0.5441234707832336, + "step": 2981 + }, + { + "epoch": 0.6875720544154946, + "grad_norm": 1.3552115877356068, + "learning_rate": 1.5644146075784057e-06, + "loss": 0.5357148051261902, + "step": 2982 + }, + { + "epoch": 0.6878026285450772, + "grad_norm": 1.2605289404512496, + "learning_rate": 1.5640998386630337e-06, + "loss": 0.530154824256897, + "step": 2983 + }, + { + "epoch": 0.68803320267466, + "grad_norm": 1.3830405468746736, + "learning_rate": 1.563784987750033e-06, + "loss": 0.480657696723938, + "step": 2984 + }, + { + "epoch": 0.6882637768042426, + "grad_norm": 1.2595390052779563, + "learning_rate": 1.5634700548851712e-06, + "loss": 0.4822859764099121, + "step": 2985 + }, + { + "epoch": 0.6884943509338253, + "grad_norm": 1.4511024891592457, + "learning_rate": 1.5631550401142257e-06, + "loss": 0.48551490902900696, + "step": 2986 + }, + { + "epoch": 0.6887249250634079, + "grad_norm": 1.252088599015217, + "learning_rate": 1.562839943482988e-06, + "loss": 0.43080294132232666, + "step": 2987 + }, + { + "epoch": 0.6889554991929906, + "grad_norm": 1.1661214157780933, + "learning_rate": 1.56252476503726e-06, + "loss": 0.42780637741088867, + "step": 2988 + }, + { + "epoch": 0.6891860733225732, + "grad_norm": 1.3057809079761946, + "learning_rate": 1.5622095048228565e-06, + "loss": 0.539027214050293, + "step": 2989 + }, + { + "epoch": 0.6894166474521559, + "grad_norm": 1.2289425463506802, + "learning_rate": 1.5618941628856037e-06, + "loss": 0.4529460668563843, + "step": 2990 + }, + { + "epoch": 0.6896472215817385, + "grad_norm": 1.4016140654354556, + "learning_rate": 1.5615787392713395e-06, + "loss": 0.49724727869033813, + "step": 2991 + }, + { + "epoch": 0.6898777957113212, + "grad_norm": 1.25157972103927, + "learning_rate": 1.5612632340259144e-06, + "loss": 0.4711928963661194, + "step": 2992 + }, + { + "epoch": 0.6901083698409038, + "grad_norm": 1.3707143585352468, + "learning_rate": 1.56094764719519e-06, + "loss": 0.42258220911026, + "step": 2993 + }, + { + "epoch": 0.6903389439704866, + "grad_norm": 1.371187363460567, + "learning_rate": 1.5606319788250398e-06, + "loss": 0.47754064202308655, + "step": 2994 + }, + { + "epoch": 0.6905695181000692, + "grad_norm": 1.307708883093593, + "learning_rate": 1.5603162289613501e-06, + "loss": 0.47200560569763184, + "step": 2995 + }, + { + "epoch": 0.6908000922296519, + "grad_norm": 1.359798809074, + "learning_rate": 1.5600003976500173e-06, + "loss": 0.5194537043571472, + "step": 2996 + }, + { + "epoch": 0.6910306663592345, + "grad_norm": 1.707437655194179, + "learning_rate": 1.5596844849369518e-06, + "loss": 0.4874703586101532, + "step": 2997 + }, + { + "epoch": 0.6912612404888172, + "grad_norm": 1.262990523197611, + "learning_rate": 1.5593684908680738e-06, + "loss": 0.5028672218322754, + "step": 2998 + }, + { + "epoch": 0.6914918146183998, + "grad_norm": 1.2420345591817543, + "learning_rate": 1.5590524154893169e-06, + "loss": 0.44250521063804626, + "step": 2999 + }, + { + "epoch": 0.6917223887479825, + "grad_norm": 1.6089998258276121, + "learning_rate": 1.5587362588466253e-06, + "loss": 0.536510705947876, + "step": 3000 + }, + { + "epoch": 0.6919529628775651, + "grad_norm": 1.3333649931769909, + "learning_rate": 1.5584200209859558e-06, + "loss": 0.4514959752559662, + "step": 3001 + }, + { + "epoch": 0.6921835370071479, + "grad_norm": 1.1923376457733827, + "learning_rate": 1.5581037019532773e-06, + "loss": 0.4402197301387787, + "step": 3002 + }, + { + "epoch": 0.6924141111367305, + "grad_norm": 1.1940429657833775, + "learning_rate": 1.5577873017945691e-06, + "loss": 0.508256196975708, + "step": 3003 + }, + { + "epoch": 0.6926446852663131, + "grad_norm": 1.2600794916577294, + "learning_rate": 1.5574708205558236e-06, + "loss": 0.5123175978660583, + "step": 3004 + }, + { + "epoch": 0.6928752593958958, + "grad_norm": 1.4303227599201425, + "learning_rate": 1.5571542582830447e-06, + "loss": 0.4874982237815857, + "step": 3005 + }, + { + "epoch": 0.6931058335254784, + "grad_norm": 1.314228379499143, + "learning_rate": 1.556837615022248e-06, + "loss": 0.44554391503334045, + "step": 3006 + }, + { + "epoch": 0.6933364076550611, + "grad_norm": 1.5428941228634732, + "learning_rate": 1.5565208908194603e-06, + "loss": 0.5899895429611206, + "step": 3007 + }, + { + "epoch": 0.6935669817846437, + "grad_norm": 1.2685614762262514, + "learning_rate": 1.5562040857207208e-06, + "loss": 0.5137951374053955, + "step": 3008 + }, + { + "epoch": 0.6937975559142264, + "grad_norm": 1.2863812659603593, + "learning_rate": 1.5558871997720805e-06, + "loss": 0.5435892343521118, + "step": 3009 + }, + { + "epoch": 0.694028130043809, + "grad_norm": 1.4463505314835092, + "learning_rate": 1.5555702330196021e-06, + "loss": 0.45998525619506836, + "step": 3010 + }, + { + "epoch": 0.6942587041733917, + "grad_norm": 1.324515476398786, + "learning_rate": 1.5552531855093597e-06, + "loss": 0.4676332473754883, + "step": 3011 + }, + { + "epoch": 0.6944892783029744, + "grad_norm": 1.2595225568514163, + "learning_rate": 1.5549360572874397e-06, + "loss": 0.48250633478164673, + "step": 3012 + }, + { + "epoch": 0.6947198524325571, + "grad_norm": 1.4537609539003187, + "learning_rate": 1.5546188483999396e-06, + "loss": 0.4841402769088745, + "step": 3013 + }, + { + "epoch": 0.6949504265621397, + "grad_norm": 1.401637069375295, + "learning_rate": 1.5543015588929688e-06, + "loss": 0.4717336893081665, + "step": 3014 + }, + { + "epoch": 0.6951810006917224, + "grad_norm": 1.3276052543558161, + "learning_rate": 1.5539841888126488e-06, + "loss": 0.48844897747039795, + "step": 3015 + }, + { + "epoch": 0.695411574821305, + "grad_norm": 1.539947517538627, + "learning_rate": 1.5536667382051127e-06, + "loss": 0.5244781970977783, + "step": 3016 + }, + { + "epoch": 0.6956421489508877, + "grad_norm": 1.2794123200247822, + "learning_rate": 1.5533492071165046e-06, + "loss": 0.4612278938293457, + "step": 3017 + }, + { + "epoch": 0.6958727230804703, + "grad_norm": 1.1978546028008836, + "learning_rate": 1.5530315955929817e-06, + "loss": 0.40461257100105286, + "step": 3018 + }, + { + "epoch": 0.696103297210053, + "grad_norm": 1.387518032200497, + "learning_rate": 1.5527139036807112e-06, + "loss": 0.5191174745559692, + "step": 3019 + }, + { + "epoch": 0.6963338713396356, + "grad_norm": 1.510370534054042, + "learning_rate": 1.5523961314258731e-06, + "loss": 0.45882558822631836, + "step": 3020 + }, + { + "epoch": 0.6965644454692184, + "grad_norm": 1.230362803290169, + "learning_rate": 1.552078278874659e-06, + "loss": 0.4766819477081299, + "step": 3021 + }, + { + "epoch": 0.696795019598801, + "grad_norm": 1.2822436220739486, + "learning_rate": 1.5517603460732724e-06, + "loss": 0.4572867751121521, + "step": 3022 + }, + { + "epoch": 0.6970255937283837, + "grad_norm": 1.5677891937472022, + "learning_rate": 1.5514423330679272e-06, + "loss": 0.4689183235168457, + "step": 3023 + }, + { + "epoch": 0.6972561678579663, + "grad_norm": 1.18549719550499, + "learning_rate": 1.5511242399048504e-06, + "loss": 0.45769914984703064, + "step": 3024 + }, + { + "epoch": 0.697486741987549, + "grad_norm": 1.3095011770493485, + "learning_rate": 1.5508060666302796e-06, + "loss": 0.47367236018180847, + "step": 3025 + }, + { + "epoch": 0.6977173161171316, + "grad_norm": 1.5441644429162589, + "learning_rate": 1.550487813290465e-06, + "loss": 0.40873080492019653, + "step": 3026 + }, + { + "epoch": 0.6979478902467143, + "grad_norm": 1.2349195465907241, + "learning_rate": 1.5501694799316671e-06, + "loss": 0.42366844415664673, + "step": 3027 + }, + { + "epoch": 0.6981784643762969, + "grad_norm": 1.2587292360565243, + "learning_rate": 1.5498510666001602e-06, + "loss": 0.3133828043937683, + "step": 3028 + }, + { + "epoch": 0.6984090385058797, + "grad_norm": 1.5168032500602213, + "learning_rate": 1.549532573342228e-06, + "loss": 0.5188712477684021, + "step": 3029 + }, + { + "epoch": 0.6986396126354623, + "grad_norm": 1.2707264640547211, + "learning_rate": 1.5492140002041668e-06, + "loss": 0.4374960660934448, + "step": 3030 + }, + { + "epoch": 0.698870186765045, + "grad_norm": 1.6828882278794643, + "learning_rate": 1.5488953472322845e-06, + "loss": 0.5285592079162598, + "step": 3031 + }, + { + "epoch": 0.6991007608946276, + "grad_norm": 1.5111090584536853, + "learning_rate": 1.5485766144729006e-06, + "loss": 0.5331767797470093, + "step": 3032 + }, + { + "epoch": 0.6993313350242103, + "grad_norm": 1.3626863062762309, + "learning_rate": 1.5482578019723462e-06, + "loss": 0.4546147584915161, + "step": 3033 + }, + { + "epoch": 0.6995619091537929, + "grad_norm": 1.2127032724557087, + "learning_rate": 1.5479389097769639e-06, + "loss": 0.47674182057380676, + "step": 3034 + }, + { + "epoch": 0.6997924832833756, + "grad_norm": 1.2042624102453106, + "learning_rate": 1.5476199379331078e-06, + "loss": 0.496138334274292, + "step": 3035 + }, + { + "epoch": 0.7000230574129582, + "grad_norm": 1.367736432364491, + "learning_rate": 1.547300886487144e-06, + "loss": 0.4843756854534149, + "step": 3036 + }, + { + "epoch": 0.7002536315425409, + "grad_norm": 1.5043582093976149, + "learning_rate": 1.5469817554854494e-06, + "loss": 0.6028264760971069, + "step": 3037 + }, + { + "epoch": 0.7004842056721235, + "grad_norm": 1.4959257460685322, + "learning_rate": 1.5466625449744134e-06, + "loss": 0.49528858065605164, + "step": 3038 + }, + { + "epoch": 0.7007147798017063, + "grad_norm": 1.1403876193260207, + "learning_rate": 1.5463432550004358e-06, + "loss": 0.466439425945282, + "step": 3039 + }, + { + "epoch": 0.7009453539312889, + "grad_norm": 1.1012676712945453, + "learning_rate": 1.5460238856099292e-06, + "loss": 0.4196532368659973, + "step": 3040 + }, + { + "epoch": 0.7011759280608716, + "grad_norm": 1.40353983379054, + "learning_rate": 1.5457044368493173e-06, + "loss": 0.47679999470710754, + "step": 3041 + }, + { + "epoch": 0.7014065021904542, + "grad_norm": 1.2594197008827683, + "learning_rate": 1.5453849087650346e-06, + "loss": 0.4368046522140503, + "step": 3042 + }, + { + "epoch": 0.7016370763200369, + "grad_norm": 1.2211703865137815, + "learning_rate": 1.5450653014035285e-06, + "loss": 0.45165273547172546, + "step": 3043 + }, + { + "epoch": 0.7018676504496195, + "grad_norm": 1.1456058151260982, + "learning_rate": 1.5447456148112563e-06, + "loss": 0.44813454151153564, + "step": 3044 + }, + { + "epoch": 0.7020982245792022, + "grad_norm": 1.269275990698592, + "learning_rate": 1.5444258490346882e-06, + "loss": 0.44681504368782043, + "step": 3045 + }, + { + "epoch": 0.7023287987087848, + "grad_norm": 1.3036360811480283, + "learning_rate": 1.5441060041203057e-06, + "loss": 0.44788169860839844, + "step": 3046 + }, + { + "epoch": 0.7025593728383676, + "grad_norm": 1.3232925218771132, + "learning_rate": 1.5437860801146013e-06, + "loss": 0.3754178285598755, + "step": 3047 + }, + { + "epoch": 0.7027899469679502, + "grad_norm": 1.001044690167693, + "learning_rate": 1.5434660770640787e-06, + "loss": 0.3582305908203125, + "step": 3048 + }, + { + "epoch": 0.7030205210975329, + "grad_norm": 1.3449464333610996, + "learning_rate": 1.543145995015254e-06, + "loss": 0.42649000883102417, + "step": 3049 + }, + { + "epoch": 0.7032510952271155, + "grad_norm": 1.2880551855073363, + "learning_rate": 1.5428258340146543e-06, + "loss": 0.5164098143577576, + "step": 3050 + }, + { + "epoch": 0.7034816693566982, + "grad_norm": 1.2456398303270981, + "learning_rate": 1.5425055941088181e-06, + "loss": 0.4193584620952606, + "step": 3051 + }, + { + "epoch": 0.7037122434862808, + "grad_norm": 1.3825374305431077, + "learning_rate": 1.5421852753442957e-06, + "loss": 0.5230807662010193, + "step": 3052 + }, + { + "epoch": 0.7039428176158635, + "grad_norm": 1.466681367301644, + "learning_rate": 1.5418648777676488e-06, + "loss": 0.4573478102684021, + "step": 3053 + }, + { + "epoch": 0.7041733917454461, + "grad_norm": 1.1343088214156583, + "learning_rate": 1.5415444014254503e-06, + "loss": 0.47031426429748535, + "step": 3054 + }, + { + "epoch": 0.7044039658750288, + "grad_norm": 1.3599997528041683, + "learning_rate": 1.5412238463642844e-06, + "loss": 0.4499198794364929, + "step": 3055 + }, + { + "epoch": 0.7046345400046115, + "grad_norm": 1.4014132343100743, + "learning_rate": 1.5409032126307477e-06, + "loss": 0.4775800406932831, + "step": 3056 + }, + { + "epoch": 0.7048651141341942, + "grad_norm": 1.4264420683743835, + "learning_rate": 1.540582500271447e-06, + "loss": 0.535969614982605, + "step": 3057 + }, + { + "epoch": 0.7050956882637768, + "grad_norm": 1.3808494199198469, + "learning_rate": 1.5402617093330013e-06, + "loss": 0.5358741283416748, + "step": 3058 + }, + { + "epoch": 0.7053262623933595, + "grad_norm": 1.2492824573732915, + "learning_rate": 1.5399408398620406e-06, + "loss": 0.5392765998840332, + "step": 3059 + }, + { + "epoch": 0.7055568365229421, + "grad_norm": 1.275809486426879, + "learning_rate": 1.5396198919052066e-06, + "loss": 0.47976016998291016, + "step": 3060 + }, + { + "epoch": 0.7057874106525248, + "grad_norm": 1.2226120465526635, + "learning_rate": 1.5392988655091526e-06, + "loss": 0.39919328689575195, + "step": 3061 + }, + { + "epoch": 0.7060179847821074, + "grad_norm": 1.6011371731611943, + "learning_rate": 1.538977760720543e-06, + "loss": 0.4503553509712219, + "step": 3062 + }, + { + "epoch": 0.7062485589116901, + "grad_norm": 1.2363983734925073, + "learning_rate": 1.5386565775860531e-06, + "loss": 0.4570388197898865, + "step": 3063 + }, + { + "epoch": 0.7064791330412727, + "grad_norm": 1.2640125065615475, + "learning_rate": 1.5383353161523706e-06, + "loss": 0.54588782787323, + "step": 3064 + }, + { + "epoch": 0.7067097071708555, + "grad_norm": 1.3495245665399438, + "learning_rate": 1.5380139764661945e-06, + "loss": 0.40369170904159546, + "step": 3065 + }, + { + "epoch": 0.7069402813004381, + "grad_norm": 1.40505470554238, + "learning_rate": 1.5376925585742341e-06, + "loss": 0.5079206228256226, + "step": 3066 + }, + { + "epoch": 0.7071708554300208, + "grad_norm": 1.2407138703812135, + "learning_rate": 1.5373710625232107e-06, + "loss": 0.41418159008026123, + "step": 3067 + }, + { + "epoch": 0.7074014295596034, + "grad_norm": 1.2523103492462024, + "learning_rate": 1.5370494883598575e-06, + "loss": 0.4546199142932892, + "step": 3068 + }, + { + "epoch": 0.7076320036891861, + "grad_norm": 1.1794904786936184, + "learning_rate": 1.5367278361309183e-06, + "loss": 0.48041367530822754, + "step": 3069 + }, + { + "epoch": 0.7078625778187687, + "grad_norm": 1.3468711432386478, + "learning_rate": 1.5364061058831486e-06, + "loss": 0.47676384449005127, + "step": 3070 + }, + { + "epoch": 0.7080931519483514, + "grad_norm": 1.1888236379295274, + "learning_rate": 1.5360842976633148e-06, + "loss": 0.47341692447662354, + "step": 3071 + }, + { + "epoch": 0.708323726077934, + "grad_norm": 1.3227579498868685, + "learning_rate": 1.5357624115181956e-06, + "loss": 0.38436269760131836, + "step": 3072 + }, + { + "epoch": 0.7085543002075168, + "grad_norm": 1.4827200040386144, + "learning_rate": 1.5354404474945798e-06, + "loss": 0.5369806289672852, + "step": 3073 + }, + { + "epoch": 0.7087848743370994, + "grad_norm": 1.404704151375413, + "learning_rate": 1.535118405639269e-06, + "loss": 0.5314677953720093, + "step": 3074 + }, + { + "epoch": 0.7090154484666821, + "grad_norm": 1.1927563297298747, + "learning_rate": 1.5347962859990742e-06, + "loss": 0.49233007431030273, + "step": 3075 + }, + { + "epoch": 0.7092460225962647, + "grad_norm": 1.3477590726762334, + "learning_rate": 1.5344740886208194e-06, + "loss": 0.4834766983985901, + "step": 3076 + }, + { + "epoch": 0.7094765967258474, + "grad_norm": 1.432138793969477, + "learning_rate": 1.534151813551339e-06, + "loss": 0.505670428276062, + "step": 3077 + }, + { + "epoch": 0.70970717085543, + "grad_norm": 1.3290190812046396, + "learning_rate": 1.533829460837479e-06, + "loss": 0.5256010293960571, + "step": 3078 + }, + { + "epoch": 0.7099377449850127, + "grad_norm": 1.463108893430833, + "learning_rate": 1.5335070305260967e-06, + "loss": 0.4186098873615265, + "step": 3079 + }, + { + "epoch": 0.7101683191145953, + "grad_norm": 1.2048981968166306, + "learning_rate": 1.5331845226640607e-06, + "loss": 0.4034464359283447, + "step": 3080 + }, + { + "epoch": 0.710398893244178, + "grad_norm": 1.346673761335588, + "learning_rate": 1.5328619372982505e-06, + "loss": 0.4521537721157074, + "step": 3081 + }, + { + "epoch": 0.7106294673737606, + "grad_norm": 1.5250190734837208, + "learning_rate": 1.5325392744755574e-06, + "loss": 0.4919602572917938, + "step": 3082 + }, + { + "epoch": 0.7108600415033434, + "grad_norm": 1.1734195700346683, + "learning_rate": 1.5322165342428835e-06, + "loss": 0.4464415907859802, + "step": 3083 + }, + { + "epoch": 0.711090615632926, + "grad_norm": 1.2610549525832775, + "learning_rate": 1.5318937166471427e-06, + "loss": 0.47444385290145874, + "step": 3084 + }, + { + "epoch": 0.7113211897625087, + "grad_norm": 1.1782687896584645, + "learning_rate": 1.5315708217352595e-06, + "loss": 0.4014730453491211, + "step": 3085 + }, + { + "epoch": 0.7115517638920913, + "grad_norm": 1.1806273152667501, + "learning_rate": 1.5312478495541703e-06, + "loss": 0.4528852701187134, + "step": 3086 + }, + { + "epoch": 0.711782338021674, + "grad_norm": 1.4716504682159035, + "learning_rate": 1.5309248001508216e-06, + "loss": 0.4919637441635132, + "step": 3087 + }, + { + "epoch": 0.7120129121512566, + "grad_norm": 1.3824738486934829, + "learning_rate": 1.530601673572173e-06, + "loss": 0.5630985498428345, + "step": 3088 + }, + { + "epoch": 0.7122434862808393, + "grad_norm": 1.4462966182250279, + "learning_rate": 1.5302784698651935e-06, + "loss": 0.3920522630214691, + "step": 3089 + }, + { + "epoch": 0.7124740604104219, + "grad_norm": 1.3282823423467587, + "learning_rate": 1.5299551890768642e-06, + "loss": 0.5502145290374756, + "step": 3090 + }, + { + "epoch": 0.7127046345400047, + "grad_norm": 1.2547204060730106, + "learning_rate": 1.5296318312541767e-06, + "loss": 0.4839448928833008, + "step": 3091 + }, + { + "epoch": 0.7129352086695873, + "grad_norm": 1.3486430423834108, + "learning_rate": 1.5293083964441355e-06, + "loss": 0.5029735565185547, + "step": 3092 + }, + { + "epoch": 0.71316578279917, + "grad_norm": 1.2299483009823662, + "learning_rate": 1.5289848846937544e-06, + "loss": 0.4724803566932678, + "step": 3093 + }, + { + "epoch": 0.7133963569287526, + "grad_norm": 1.1015042263762262, + "learning_rate": 1.528661296050059e-06, + "loss": 0.4609840512275696, + "step": 3094 + }, + { + "epoch": 0.7136269310583353, + "grad_norm": 1.4829248198628113, + "learning_rate": 1.5283376305600863e-06, + "loss": 0.49763959646224976, + "step": 3095 + }, + { + "epoch": 0.7138575051879179, + "grad_norm": 1.2090810088725865, + "learning_rate": 1.5280138882708847e-06, + "loss": 0.42384523153305054, + "step": 3096 + }, + { + "epoch": 0.7140880793175006, + "grad_norm": 1.3550047979469209, + "learning_rate": 1.5276900692295134e-06, + "loss": 0.5034611225128174, + "step": 3097 + }, + { + "epoch": 0.7143186534470832, + "grad_norm": 1.3321189275554508, + "learning_rate": 1.5273661734830423e-06, + "loss": 0.5617417097091675, + "step": 3098 + }, + { + "epoch": 0.714549227576666, + "grad_norm": 1.320340684589947, + "learning_rate": 1.527042201078553e-06, + "loss": 0.4562014937400818, + "step": 3099 + }, + { + "epoch": 0.7147798017062486, + "grad_norm": 1.6932438225785027, + "learning_rate": 1.5267181520631386e-06, + "loss": 0.5626288056373596, + "step": 3100 + }, + { + "epoch": 0.7150103758358313, + "grad_norm": 1.4526784651389733, + "learning_rate": 1.5263940264839028e-06, + "loss": 0.4882054924964905, + "step": 3101 + }, + { + "epoch": 0.7152409499654139, + "grad_norm": 1.523666745804484, + "learning_rate": 1.5260698243879603e-06, + "loss": 0.5371058583259583, + "step": 3102 + }, + { + "epoch": 0.7154715240949966, + "grad_norm": 1.1599798656247362, + "learning_rate": 1.5257455458224368e-06, + "loss": 0.4683259129524231, + "step": 3103 + }, + { + "epoch": 0.7157020982245792, + "grad_norm": 1.223986374608111, + "learning_rate": 1.5254211908344704e-06, + "loss": 0.4894726872444153, + "step": 3104 + }, + { + "epoch": 0.7159326723541619, + "grad_norm": 1.3226351110788483, + "learning_rate": 1.5250967594712089e-06, + "loss": 0.4517880082130432, + "step": 3105 + }, + { + "epoch": 0.7161632464837445, + "grad_norm": 1.162528176566508, + "learning_rate": 1.5247722517798118e-06, + "loss": 0.5062767267227173, + "step": 3106 + }, + { + "epoch": 0.7163938206133272, + "grad_norm": 1.6349408984878264, + "learning_rate": 1.5244476678074494e-06, + "loss": 0.5029302835464478, + "step": 3107 + }, + { + "epoch": 0.7166243947429098, + "grad_norm": 1.3765367207185526, + "learning_rate": 1.5241230076013035e-06, + "loss": 0.44112175703048706, + "step": 3108 + }, + { + "epoch": 0.7168549688724926, + "grad_norm": 1.3847966627377115, + "learning_rate": 1.5237982712085665e-06, + "loss": 0.43693509697914124, + "step": 3109 + }, + { + "epoch": 0.7170855430020752, + "grad_norm": 1.3509946026255297, + "learning_rate": 1.5234734586764422e-06, + "loss": 0.4544166922569275, + "step": 3110 + }, + { + "epoch": 0.7173161171316579, + "grad_norm": 1.1949924477500942, + "learning_rate": 1.5231485700521451e-06, + "loss": 0.5470178127288818, + "step": 3111 + }, + { + "epoch": 0.7175466912612405, + "grad_norm": 1.5007057362656466, + "learning_rate": 1.5228236053829017e-06, + "loss": 0.5215972065925598, + "step": 3112 + }, + { + "epoch": 0.7177772653908232, + "grad_norm": 1.1400006826022246, + "learning_rate": 1.5224985647159488e-06, + "loss": 0.3922381103038788, + "step": 3113 + }, + { + "epoch": 0.7180078395204058, + "grad_norm": 1.3432802481675237, + "learning_rate": 1.5221734480985341e-06, + "loss": 0.47455158829689026, + "step": 3114 + }, + { + "epoch": 0.7182384136499884, + "grad_norm": 1.517078162476979, + "learning_rate": 1.5218482555779164e-06, + "loss": 0.5776175260543823, + "step": 3115 + }, + { + "epoch": 0.7184689877795711, + "grad_norm": 1.4757174936390305, + "learning_rate": 1.521522987201366e-06, + "loss": 0.40414175391197205, + "step": 3116 + }, + { + "epoch": 0.7186995619091537, + "grad_norm": 1.5441693701407133, + "learning_rate": 1.5211976430161643e-06, + "loss": 0.44597384333610535, + "step": 3117 + }, + { + "epoch": 0.7189301360387365, + "grad_norm": 1.6495022083145716, + "learning_rate": 1.5208722230696024e-06, + "loss": 0.50276118516922, + "step": 3118 + }, + { + "epoch": 0.7191607101683191, + "grad_norm": 1.255966386168249, + "learning_rate": 1.5205467274089844e-06, + "loss": 0.43281811475753784, + "step": 3119 + }, + { + "epoch": 0.7193912842979018, + "grad_norm": 1.196003407991791, + "learning_rate": 1.5202211560816243e-06, + "loss": 0.3796764016151428, + "step": 3120 + }, + { + "epoch": 0.7196218584274844, + "grad_norm": 1.1855608567240021, + "learning_rate": 1.5198955091348463e-06, + "loss": 0.47820231318473816, + "step": 3121 + }, + { + "epoch": 0.7198524325570671, + "grad_norm": 1.3809241508956476, + "learning_rate": 1.5195697866159875e-06, + "loss": 0.4737284779548645, + "step": 3122 + }, + { + "epoch": 0.7200830066866497, + "grad_norm": 1.3019928778593748, + "learning_rate": 1.519243988572394e-06, + "loss": 0.44652169942855835, + "step": 3123 + }, + { + "epoch": 0.7203135808162324, + "grad_norm": 1.0393403987452434, + "learning_rate": 1.518918115051425e-06, + "loss": 0.42702072858810425, + "step": 3124 + }, + { + "epoch": 0.720544154945815, + "grad_norm": 1.3835329760109338, + "learning_rate": 1.5185921661004483e-06, + "loss": 0.5003541707992554, + "step": 3125 + }, + { + "epoch": 0.7207747290753977, + "grad_norm": 1.3444035589789487, + "learning_rate": 1.518266141766845e-06, + "loss": 0.5045102834701538, + "step": 3126 + }, + { + "epoch": 0.7210053032049804, + "grad_norm": 1.3069630488439725, + "learning_rate": 1.5179400420980052e-06, + "loss": 0.46619412302970886, + "step": 3127 + }, + { + "epoch": 0.7212358773345631, + "grad_norm": 1.7755918931491346, + "learning_rate": 1.5176138671413314e-06, + "loss": 0.5006855726242065, + "step": 3128 + }, + { + "epoch": 0.7214664514641457, + "grad_norm": 1.4202077937995432, + "learning_rate": 1.5172876169442362e-06, + "loss": 0.4394634962081909, + "step": 3129 + }, + { + "epoch": 0.7216970255937284, + "grad_norm": 1.203576429459206, + "learning_rate": 1.5169612915541428e-06, + "loss": 0.49311593174934387, + "step": 3130 + }, + { + "epoch": 0.721927599723311, + "grad_norm": 1.2610358507024448, + "learning_rate": 1.5166348910184868e-06, + "loss": 0.38406768441200256, + "step": 3131 + }, + { + "epoch": 0.7221581738528937, + "grad_norm": 1.52088025341024, + "learning_rate": 1.5163084153847132e-06, + "loss": 0.547613799571991, + "step": 3132 + }, + { + "epoch": 0.7223887479824763, + "grad_norm": 1.4599825671580298, + "learning_rate": 1.515981864700279e-06, + "loss": 0.43875589966773987, + "step": 3133 + }, + { + "epoch": 0.722619322112059, + "grad_norm": 1.3276172293945816, + "learning_rate": 1.5156552390126516e-06, + "loss": 0.41515982151031494, + "step": 3134 + }, + { + "epoch": 0.7228498962416416, + "grad_norm": 1.400170522869638, + "learning_rate": 1.5153285383693088e-06, + "loss": 0.43297481536865234, + "step": 3135 + }, + { + "epoch": 0.7230804703712244, + "grad_norm": 1.3346402467183769, + "learning_rate": 1.5150017628177408e-06, + "loss": 0.5059916377067566, + "step": 3136 + }, + { + "epoch": 0.723311044500807, + "grad_norm": 1.4474439218451525, + "learning_rate": 1.514674912405447e-06, + "loss": 0.4776325225830078, + "step": 3137 + }, + { + "epoch": 0.7235416186303897, + "grad_norm": 1.4332410620248028, + "learning_rate": 1.5143479871799381e-06, + "loss": 0.4925272464752197, + "step": 3138 + }, + { + "epoch": 0.7237721927599723, + "grad_norm": 0.9806444224416654, + "learning_rate": 1.5140209871887368e-06, + "loss": 0.3825960159301758, + "step": 3139 + }, + { + "epoch": 0.724002766889555, + "grad_norm": 1.811554812935443, + "learning_rate": 1.513693912479376e-06, + "loss": 0.5582098960876465, + "step": 3140 + }, + { + "epoch": 0.7242333410191376, + "grad_norm": 1.4229587145535472, + "learning_rate": 1.5133667630993983e-06, + "loss": 0.4079757630825043, + "step": 3141 + }, + { + "epoch": 0.7244639151487203, + "grad_norm": 1.3307764336864334, + "learning_rate": 1.513039539096359e-06, + "loss": 0.4996449947357178, + "step": 3142 + }, + { + "epoch": 0.7246944892783029, + "grad_norm": 1.2360600034220603, + "learning_rate": 1.5127122405178233e-06, + "loss": 0.4822157323360443, + "step": 3143 + }, + { + "epoch": 0.7249250634078857, + "grad_norm": 1.2687974509229507, + "learning_rate": 1.512384867411367e-06, + "loss": 0.43123728036880493, + "step": 3144 + }, + { + "epoch": 0.7251556375374683, + "grad_norm": 1.2723246094506335, + "learning_rate": 1.5120574198245776e-06, + "loss": 0.4942808151245117, + "step": 3145 + }, + { + "epoch": 0.725386211667051, + "grad_norm": 1.1117112525626116, + "learning_rate": 1.5117298978050525e-06, + "loss": 0.49165093898773193, + "step": 3146 + }, + { + "epoch": 0.7256167857966336, + "grad_norm": 1.2668452294382095, + "learning_rate": 1.5114023014004008e-06, + "loss": 0.4700804352760315, + "step": 3147 + }, + { + "epoch": 0.7258473599262163, + "grad_norm": 1.9638712043686382, + "learning_rate": 1.5110746306582413e-06, + "loss": 0.4703143835067749, + "step": 3148 + }, + { + "epoch": 0.7260779340557989, + "grad_norm": 1.2418379131661055, + "learning_rate": 1.5107468856262048e-06, + "loss": 0.47312211990356445, + "step": 3149 + }, + { + "epoch": 0.7263085081853816, + "grad_norm": 1.3558937860977873, + "learning_rate": 1.5104190663519323e-06, + "loss": 0.49607813358306885, + "step": 3150 + }, + { + "epoch": 0.7265390823149642, + "grad_norm": 1.2747447528869889, + "learning_rate": 1.5100911728830754e-06, + "loss": 0.4401499629020691, + "step": 3151 + }, + { + "epoch": 0.7267696564445469, + "grad_norm": 1.3050498169083122, + "learning_rate": 1.5097632052672973e-06, + "loss": 0.4979579448699951, + "step": 3152 + }, + { + "epoch": 0.7270002305741295, + "grad_norm": 1.1477032098667286, + "learning_rate": 1.5094351635522706e-06, + "loss": 0.42917048931121826, + "step": 3153 + }, + { + "epoch": 0.7272308047037123, + "grad_norm": 1.2688450847611672, + "learning_rate": 1.50910704778568e-06, + "loss": 0.41664260625839233, + "step": 3154 + }, + { + "epoch": 0.7274613788332949, + "grad_norm": 1.4083630490412662, + "learning_rate": 1.5087788580152206e-06, + "loss": 0.5000253915786743, + "step": 3155 + }, + { + "epoch": 0.7276919529628776, + "grad_norm": 1.2424572303309531, + "learning_rate": 1.5084505942885976e-06, + "loss": 0.5075093507766724, + "step": 3156 + }, + { + "epoch": 0.7279225270924602, + "grad_norm": 1.319578470826436, + "learning_rate": 1.508122256653528e-06, + "loss": 0.44975680112838745, + "step": 3157 + }, + { + "epoch": 0.7281531012220429, + "grad_norm": 1.1450711263341298, + "learning_rate": 1.5077938451577383e-06, + "loss": 0.44494926929473877, + "step": 3158 + }, + { + "epoch": 0.7283836753516255, + "grad_norm": 1.3333716905743178, + "learning_rate": 1.5074653598489673e-06, + "loss": 0.5664352178573608, + "step": 3159 + }, + { + "epoch": 0.7286142494812082, + "grad_norm": 1.1840094617058035, + "learning_rate": 1.507136800774963e-06, + "loss": 0.5694705247879028, + "step": 3160 + }, + { + "epoch": 0.7288448236107908, + "grad_norm": 1.5658434570152957, + "learning_rate": 1.506808167983485e-06, + "loss": 0.5121151804924011, + "step": 3161 + }, + { + "epoch": 0.7290753977403736, + "grad_norm": 1.3559529766390859, + "learning_rate": 1.5064794615223034e-06, + "loss": 0.45935380458831787, + "step": 3162 + }, + { + "epoch": 0.7293059718699562, + "grad_norm": 1.2036749528520703, + "learning_rate": 1.506150681439199e-06, + "loss": 0.517521858215332, + "step": 3163 + }, + { + "epoch": 0.7295365459995389, + "grad_norm": 1.271352713883254, + "learning_rate": 1.5058218277819638e-06, + "loss": 0.5078546404838562, + "step": 3164 + }, + { + "epoch": 0.7297671201291215, + "grad_norm": 1.4877111530715366, + "learning_rate": 1.5054929005983992e-06, + "loss": 0.47892552614212036, + "step": 3165 + }, + { + "epoch": 0.7299976942587042, + "grad_norm": 1.5569470487033794, + "learning_rate": 1.5051638999363185e-06, + "loss": 0.48825597763061523, + "step": 3166 + }, + { + "epoch": 0.7302282683882868, + "grad_norm": 1.2181600327145499, + "learning_rate": 1.5048348258435457e-06, + "loss": 0.488031804561615, + "step": 3167 + }, + { + "epoch": 0.7304588425178695, + "grad_norm": 1.178638754387744, + "learning_rate": 1.5045056783679143e-06, + "loss": 0.4669504761695862, + "step": 3168 + }, + { + "epoch": 0.7306894166474521, + "grad_norm": 1.364305786110939, + "learning_rate": 1.5041764575572695e-06, + "loss": 0.45620614290237427, + "step": 3169 + }, + { + "epoch": 0.7309199907770348, + "grad_norm": 1.4607481202185084, + "learning_rate": 1.5038471634594667e-06, + "loss": 0.4271177649497986, + "step": 3170 + }, + { + "epoch": 0.7311505649066175, + "grad_norm": 1.4441980354968733, + "learning_rate": 1.5035177961223726e-06, + "loss": 0.5170531272888184, + "step": 3171 + }, + { + "epoch": 0.7313811390362002, + "grad_norm": 1.046719642579895, + "learning_rate": 1.5031883555938638e-06, + "loss": 0.4261493682861328, + "step": 3172 + }, + { + "epoch": 0.7316117131657828, + "grad_norm": 1.4357281868096983, + "learning_rate": 1.502858841921828e-06, + "loss": 0.4958994686603546, + "step": 3173 + }, + { + "epoch": 0.7318422872953655, + "grad_norm": 1.631538220078115, + "learning_rate": 1.502529255154163e-06, + "loss": 0.49798572063446045, + "step": 3174 + }, + { + "epoch": 0.7320728614249481, + "grad_norm": 1.3524076496726538, + "learning_rate": 1.502199595338778e-06, + "loss": 0.4067850708961487, + "step": 3175 + }, + { + "epoch": 0.7323034355545308, + "grad_norm": 1.2000506588677564, + "learning_rate": 1.5018698625235916e-06, + "loss": 0.4680994153022766, + "step": 3176 + }, + { + "epoch": 0.7325340096841134, + "grad_norm": 1.3054261583860276, + "learning_rate": 1.501540056756535e-06, + "loss": 0.49181580543518066, + "step": 3177 + }, + { + "epoch": 0.7327645838136961, + "grad_norm": 1.485479754545564, + "learning_rate": 1.501210178085548e-06, + "loss": 0.5425546169281006, + "step": 3178 + }, + { + "epoch": 0.7329951579432787, + "grad_norm": 1.1514309763496005, + "learning_rate": 1.500880226558582e-06, + "loss": 0.4869355261325836, + "step": 3179 + }, + { + "epoch": 0.7332257320728615, + "grad_norm": 1.5737536993523387, + "learning_rate": 1.500550202223599e-06, + "loss": 0.5157885551452637, + "step": 3180 + }, + { + "epoch": 0.7334563062024441, + "grad_norm": 1.4471157017235972, + "learning_rate": 1.5002201051285707e-06, + "loss": 0.528350293636322, + "step": 3181 + }, + { + "epoch": 0.7336868803320268, + "grad_norm": 1.0924579051997452, + "learning_rate": 1.499889935321481e-06, + "loss": 0.3963279128074646, + "step": 3182 + }, + { + "epoch": 0.7339174544616094, + "grad_norm": 1.0536411378011648, + "learning_rate": 1.499559692850323e-06, + "loss": 0.36777108907699585, + "step": 3183 + }, + { + "epoch": 0.7341480285911921, + "grad_norm": 1.3572066258310391, + "learning_rate": 1.4992293777631004e-06, + "loss": 0.4592905044555664, + "step": 3184 + }, + { + "epoch": 0.7343786027207747, + "grad_norm": 1.3801194879873266, + "learning_rate": 1.4988989901078285e-06, + "loss": 0.458257257938385, + "step": 3185 + }, + { + "epoch": 0.7346091768503574, + "grad_norm": 1.2823442631336313, + "learning_rate": 1.4985685299325316e-06, + "loss": 0.4844989478588104, + "step": 3186 + }, + { + "epoch": 0.73483975097994, + "grad_norm": 1.3019212093413413, + "learning_rate": 1.498237997285247e-06, + "loss": 0.381417453289032, + "step": 3187 + }, + { + "epoch": 0.7350703251095227, + "grad_norm": 1.267517645310936, + "learning_rate": 1.4979073922140196e-06, + "loss": 0.42452555894851685, + "step": 3188 + }, + { + "epoch": 0.7353008992391054, + "grad_norm": 1.2143530957836637, + "learning_rate": 1.4975767147669063e-06, + "loss": 0.4660685956478119, + "step": 3189 + }, + { + "epoch": 0.7355314733686881, + "grad_norm": 1.243568614271109, + "learning_rate": 1.4972459649919748e-06, + "loss": 0.4332653880119324, + "step": 3190 + }, + { + "epoch": 0.7357620474982707, + "grad_norm": 1.4818958085574696, + "learning_rate": 1.496915142937303e-06, + "loss": 0.5580132007598877, + "step": 3191 + }, + { + "epoch": 0.7359926216278534, + "grad_norm": 1.102415574688255, + "learning_rate": 1.4965842486509792e-06, + "loss": 0.43711793422698975, + "step": 3192 + }, + { + "epoch": 0.736223195757436, + "grad_norm": 1.1786805187530485, + "learning_rate": 1.496253282181102e-06, + "loss": 0.44969767332077026, + "step": 3193 + }, + { + "epoch": 0.7364537698870187, + "grad_norm": 1.5017804708887366, + "learning_rate": 1.4959222435757809e-06, + "loss": 0.5288668870925903, + "step": 3194 + }, + { + "epoch": 0.7366843440166013, + "grad_norm": 1.2442315862489326, + "learning_rate": 1.4955911328831353e-06, + "loss": 0.45993220806121826, + "step": 3195 + }, + { + "epoch": 0.736914918146184, + "grad_norm": 1.6618645292728147, + "learning_rate": 1.4952599501512963e-06, + "loss": 0.5360512733459473, + "step": 3196 + }, + { + "epoch": 0.7371454922757666, + "grad_norm": 1.2833906478614454, + "learning_rate": 1.4949286954284044e-06, + "loss": 0.3923282325267792, + "step": 3197 + }, + { + "epoch": 0.7373760664053494, + "grad_norm": 1.2830570803742403, + "learning_rate": 1.4945973687626103e-06, + "loss": 0.5051449537277222, + "step": 3198 + }, + { + "epoch": 0.737606640534932, + "grad_norm": 1.288727241344276, + "learning_rate": 1.4942659702020763e-06, + "loss": 0.5035187602043152, + "step": 3199 + }, + { + "epoch": 0.7378372146645147, + "grad_norm": 1.1929311231536464, + "learning_rate": 1.4939344997949742e-06, + "loss": 0.4922195076942444, + "step": 3200 + }, + { + "epoch": 0.7380677887940973, + "grad_norm": 1.1654414900260779, + "learning_rate": 1.4936029575894865e-06, + "loss": 0.49664247035980225, + "step": 3201 + }, + { + "epoch": 0.73829836292368, + "grad_norm": 1.2090144084254086, + "learning_rate": 1.4932713436338065e-06, + "loss": 0.4240155816078186, + "step": 3202 + }, + { + "epoch": 0.7385289370532626, + "grad_norm": 1.150655085488804, + "learning_rate": 1.4929396579761376e-06, + "loss": 0.3830781579017639, + "step": 3203 + }, + { + "epoch": 0.7387595111828453, + "grad_norm": 1.2626520886498587, + "learning_rate": 1.4926079006646936e-06, + "loss": 0.37983447313308716, + "step": 3204 + }, + { + "epoch": 0.7389900853124279, + "grad_norm": 1.37294258180721, + "learning_rate": 1.4922760717476989e-06, + "loss": 0.4680769443511963, + "step": 3205 + }, + { + "epoch": 0.7392206594420107, + "grad_norm": 1.0992782157194299, + "learning_rate": 1.4919441712733878e-06, + "loss": 0.3801664710044861, + "step": 3206 + }, + { + "epoch": 0.7394512335715933, + "grad_norm": 1.2101909370157682, + "learning_rate": 1.4916121992900062e-06, + "loss": 0.5506627559661865, + "step": 3207 + }, + { + "epoch": 0.739681807701176, + "grad_norm": 1.4326210599966231, + "learning_rate": 1.4912801558458087e-06, + "loss": 0.4976215660572052, + "step": 3208 + }, + { + "epoch": 0.7399123818307586, + "grad_norm": 1.269851030633043, + "learning_rate": 1.4909480409890615e-06, + "loss": 0.42806485295295715, + "step": 3209 + }, + { + "epoch": 0.7401429559603413, + "grad_norm": 1.5738327378318604, + "learning_rate": 1.4906158547680413e-06, + "loss": 0.3850712180137634, + "step": 3210 + }, + { + "epoch": 0.7403735300899239, + "grad_norm": 1.1706966056418486, + "learning_rate": 1.4902835972310342e-06, + "loss": 0.4356945753097534, + "step": 3211 + }, + { + "epoch": 0.7406041042195066, + "grad_norm": 1.3196733008465567, + "learning_rate": 1.4899512684263373e-06, + "loss": 0.4806904196739197, + "step": 3212 + }, + { + "epoch": 0.7408346783490892, + "grad_norm": 1.6634902313002624, + "learning_rate": 1.489618868402258e-06, + "loss": 0.544597327709198, + "step": 3213 + }, + { + "epoch": 0.7410652524786719, + "grad_norm": 1.2400106880376924, + "learning_rate": 1.4892863972071141e-06, + "loss": 0.39847469329833984, + "step": 3214 + }, + { + "epoch": 0.7412958266082545, + "grad_norm": 1.165782132875825, + "learning_rate": 1.4889538548892336e-06, + "loss": 0.4959847331047058, + "step": 3215 + }, + { + "epoch": 0.7415264007378373, + "grad_norm": 1.1727701470106202, + "learning_rate": 1.488621241496955e-06, + "loss": 0.3839089870452881, + "step": 3216 + }, + { + "epoch": 0.7417569748674199, + "grad_norm": 1.4119004491894294, + "learning_rate": 1.4882885570786266e-06, + "loss": 0.5187599658966064, + "step": 3217 + }, + { + "epoch": 0.7419875489970026, + "grad_norm": 1.1715648701346035, + "learning_rate": 1.4879558016826082e-06, + "loss": 0.45735663175582886, + "step": 3218 + }, + { + "epoch": 0.7422181231265852, + "grad_norm": 1.2093385209256575, + "learning_rate": 1.4876229753572687e-06, + "loss": 0.5635267496109009, + "step": 3219 + }, + { + "epoch": 0.7424486972561679, + "grad_norm": 1.5737635031230153, + "learning_rate": 1.4872900781509876e-06, + "loss": 0.5255833268165588, + "step": 3220 + }, + { + "epoch": 0.7426792713857505, + "grad_norm": 1.3608013352784492, + "learning_rate": 1.486957110112155e-06, + "loss": 0.4563497304916382, + "step": 3221 + }, + { + "epoch": 0.7429098455153332, + "grad_norm": 1.2494840959741684, + "learning_rate": 1.4866240712891714e-06, + "loss": 0.3737669885158539, + "step": 3222 + }, + { + "epoch": 0.7431404196449158, + "grad_norm": 1.3341042787752078, + "learning_rate": 1.4862909617304473e-06, + "loss": 0.48965659737586975, + "step": 3223 + }, + { + "epoch": 0.7433709937744986, + "grad_norm": 1.138792861067833, + "learning_rate": 1.4859577814844036e-06, + "loss": 0.40867483615875244, + "step": 3224 + }, + { + "epoch": 0.7436015679040812, + "grad_norm": 1.6873709244395776, + "learning_rate": 1.4856245305994711e-06, + "loss": 0.5870566368103027, + "step": 3225 + }, + { + "epoch": 0.7438321420336638, + "grad_norm": 1.9479920905112817, + "learning_rate": 1.4852912091240914e-06, + "loss": 0.5424025654792786, + "step": 3226 + }, + { + "epoch": 0.7440627161632465, + "grad_norm": 1.3117337551828157, + "learning_rate": 1.4849578171067166e-06, + "loss": 0.5305285453796387, + "step": 3227 + }, + { + "epoch": 0.7442932902928291, + "grad_norm": 1.6524409541791285, + "learning_rate": 1.4846243545958078e-06, + "loss": 0.4189227819442749, + "step": 3228 + }, + { + "epoch": 0.7445238644224118, + "grad_norm": 1.3163917938675591, + "learning_rate": 1.4842908216398379e-06, + "loss": 0.44568121433258057, + "step": 3229 + }, + { + "epoch": 0.7447544385519944, + "grad_norm": 1.57546318763007, + "learning_rate": 1.4839572182872883e-06, + "loss": 0.5177523493766785, + "step": 3230 + }, + { + "epoch": 0.7449850126815771, + "grad_norm": 2.0231485633083213, + "learning_rate": 1.4836235445866528e-06, + "loss": 0.5100630521774292, + "step": 3231 + }, + { + "epoch": 0.7452155868111597, + "grad_norm": 1.2988766977840327, + "learning_rate": 1.4832898005864336e-06, + "loss": 0.45731791853904724, + "step": 3232 + }, + { + "epoch": 0.7454461609407425, + "grad_norm": 1.4418312758556044, + "learning_rate": 1.4829559863351437e-06, + "loss": 0.5161736011505127, + "step": 3233 + }, + { + "epoch": 0.7456767350703251, + "grad_norm": 1.2131599613200943, + "learning_rate": 1.4826221018813067e-06, + "loss": 0.4778611660003662, + "step": 3234 + }, + { + "epoch": 0.7459073091999078, + "grad_norm": 1.208766404583587, + "learning_rate": 1.482288147273456e-06, + "loss": 0.467506468296051, + "step": 3235 + }, + { + "epoch": 0.7461378833294904, + "grad_norm": 1.3564852786094337, + "learning_rate": 1.4819541225601352e-06, + "loss": 0.5061084032058716, + "step": 3236 + }, + { + "epoch": 0.7463684574590731, + "grad_norm": 1.3693293129226278, + "learning_rate": 1.4816200277898983e-06, + "loss": 0.5066365599632263, + "step": 3237 + }, + { + "epoch": 0.7465990315886557, + "grad_norm": 1.2091939411250054, + "learning_rate": 1.4812858630113093e-06, + "loss": 0.44285398721694946, + "step": 3238 + }, + { + "epoch": 0.7468296057182384, + "grad_norm": 1.3395886619598594, + "learning_rate": 1.4809516282729426e-06, + "loss": 0.5325936079025269, + "step": 3239 + }, + { + "epoch": 0.747060179847821, + "grad_norm": 1.2575363206535257, + "learning_rate": 1.4806173236233818e-06, + "loss": 0.37296950817108154, + "step": 3240 + }, + { + "epoch": 0.7472907539774037, + "grad_norm": 1.3466058050144787, + "learning_rate": 1.4802829491112228e-06, + "loss": 0.4596887230873108, + "step": 3241 + }, + { + "epoch": 0.7475213281069863, + "grad_norm": 1.4791727382559166, + "learning_rate": 1.4799485047850693e-06, + "loss": 0.4344385266304016, + "step": 3242 + }, + { + "epoch": 0.7477519022365691, + "grad_norm": 1.235031250671636, + "learning_rate": 1.4796139906935365e-06, + "loss": 0.458631306886673, + "step": 3243 + }, + { + "epoch": 0.7479824763661517, + "grad_norm": 1.3676048590005543, + "learning_rate": 1.4792794068852494e-06, + "loss": 0.5425032377243042, + "step": 3244 + }, + { + "epoch": 0.7482130504957344, + "grad_norm": 1.1764717045773245, + "learning_rate": 1.478944753408843e-06, + "loss": 0.4240065813064575, + "step": 3245 + }, + { + "epoch": 0.748443624625317, + "grad_norm": 1.3527342191314002, + "learning_rate": 1.478610030312963e-06, + "loss": 0.5533365607261658, + "step": 3246 + }, + { + "epoch": 0.7486741987548997, + "grad_norm": 1.4574041701217884, + "learning_rate": 1.4782752376462647e-06, + "loss": 0.4089345335960388, + "step": 3247 + }, + { + "epoch": 0.7489047728844823, + "grad_norm": 1.3793731191813918, + "learning_rate": 1.4779403754574131e-06, + "loss": 0.5098259449005127, + "step": 3248 + }, + { + "epoch": 0.749135347014065, + "grad_norm": 1.3041128935188901, + "learning_rate": 1.4776054437950842e-06, + "loss": 0.4615677297115326, + "step": 3249 + }, + { + "epoch": 0.7493659211436476, + "grad_norm": 1.3216071057711354, + "learning_rate": 1.4772704427079639e-06, + "loss": 0.460266649723053, + "step": 3250 + }, + { + "epoch": 0.7495964952732304, + "grad_norm": 1.4054347579351087, + "learning_rate": 1.4769353722447476e-06, + "loss": 0.4727064371109009, + "step": 3251 + }, + { + "epoch": 0.749827069402813, + "grad_norm": 1.3954753679563598, + "learning_rate": 1.4766002324541411e-06, + "loss": 0.4733152985572815, + "step": 3252 + }, + { + "epoch": 0.7500576435323957, + "grad_norm": 1.408517900798552, + "learning_rate": 1.4762650233848609e-06, + "loss": 0.5055218935012817, + "step": 3253 + }, + { + "epoch": 0.7502882176619783, + "grad_norm": 1.3285058616446128, + "learning_rate": 1.4759297450856324e-06, + "loss": 0.6129124164581299, + "step": 3254 + }, + { + "epoch": 0.750518791791561, + "grad_norm": 1.6354094862337523, + "learning_rate": 1.4755943976051926e-06, + "loss": 0.46197545528411865, + "step": 3255 + }, + { + "epoch": 0.7507493659211436, + "grad_norm": 1.3239897164772563, + "learning_rate": 1.4752589809922868e-06, + "loss": 0.5227653980255127, + "step": 3256 + }, + { + "epoch": 0.7509799400507263, + "grad_norm": 1.4638577740242362, + "learning_rate": 1.4749234952956715e-06, + "loss": 0.5189518928527832, + "step": 3257 + }, + { + "epoch": 0.7512105141803089, + "grad_norm": 1.2059107130307087, + "learning_rate": 1.474587940564113e-06, + "loss": 0.4850584864616394, + "step": 3258 + }, + { + "epoch": 0.7514410883098916, + "grad_norm": 1.4809027704015267, + "learning_rate": 1.4742523168463876e-06, + "loss": 0.5218943357467651, + "step": 3259 + }, + { + "epoch": 0.7516716624394743, + "grad_norm": 1.130064311367936, + "learning_rate": 1.4739166241912814e-06, + "loss": 0.4311223030090332, + "step": 3260 + }, + { + "epoch": 0.751902236569057, + "grad_norm": 1.372801682112421, + "learning_rate": 1.473580862647591e-06, + "loss": 0.525306224822998, + "step": 3261 + }, + { + "epoch": 0.7521328106986396, + "grad_norm": 1.291063350632538, + "learning_rate": 1.4732450322641225e-06, + "loss": 0.506609320640564, + "step": 3262 + }, + { + "epoch": 0.7523633848282223, + "grad_norm": 1.4043846834415283, + "learning_rate": 1.4729091330896926e-06, + "loss": 0.5477846264839172, + "step": 3263 + }, + { + "epoch": 0.7525939589578049, + "grad_norm": 1.1342853276703964, + "learning_rate": 1.4725731651731268e-06, + "loss": 0.48802629113197327, + "step": 3264 + }, + { + "epoch": 0.7528245330873876, + "grad_norm": 1.5090127096652195, + "learning_rate": 1.4722371285632626e-06, + "loss": 0.4774906635284424, + "step": 3265 + }, + { + "epoch": 0.7530551072169702, + "grad_norm": 1.4537920297241385, + "learning_rate": 1.4719010233089458e-06, + "loss": 0.4220488667488098, + "step": 3266 + }, + { + "epoch": 0.7532856813465529, + "grad_norm": 1.441465153643324, + "learning_rate": 1.4715648494590324e-06, + "loss": 0.43912187218666077, + "step": 3267 + }, + { + "epoch": 0.7535162554761355, + "grad_norm": 1.3653901674246531, + "learning_rate": 1.4712286070623892e-06, + "loss": 0.5302494764328003, + "step": 3268 + }, + { + "epoch": 0.7537468296057183, + "grad_norm": 1.3282339539348487, + "learning_rate": 1.4708922961678923e-06, + "loss": 0.4800306260585785, + "step": 3269 + }, + { + "epoch": 0.7539774037353009, + "grad_norm": 1.2634165352126685, + "learning_rate": 1.4705559168244275e-06, + "loss": 0.3993161618709564, + "step": 3270 + }, + { + "epoch": 0.7542079778648836, + "grad_norm": 1.446141365903489, + "learning_rate": 1.4702194690808916e-06, + "loss": 0.37037837505340576, + "step": 3271 + }, + { + "epoch": 0.7544385519944662, + "grad_norm": 1.3105522613811469, + "learning_rate": 1.4698829529861898e-06, + "loss": 0.44288602471351624, + "step": 3272 + }, + { + "epoch": 0.7546691261240489, + "grad_norm": 1.542566998549956, + "learning_rate": 1.469546368589239e-06, + "loss": 0.5480727553367615, + "step": 3273 + }, + { + "epoch": 0.7548997002536315, + "grad_norm": 1.5093924463506492, + "learning_rate": 1.4692097159389649e-06, + "loss": 0.4964104890823364, + "step": 3274 + }, + { + "epoch": 0.7551302743832142, + "grad_norm": 1.5912503319666471, + "learning_rate": 1.4688729950843033e-06, + "loss": 0.4744144082069397, + "step": 3275 + }, + { + "epoch": 0.7553608485127968, + "grad_norm": 1.1258853516330976, + "learning_rate": 1.4685362060741997e-06, + "loss": 0.44675350189208984, + "step": 3276 + }, + { + "epoch": 0.7555914226423796, + "grad_norm": 1.4768191837188436, + "learning_rate": 1.46819934895761e-06, + "loss": 0.45261216163635254, + "step": 3277 + }, + { + "epoch": 0.7558219967719622, + "grad_norm": 1.3183121513891758, + "learning_rate": 1.4678624237835005e-06, + "loss": 0.4180977940559387, + "step": 3278 + }, + { + "epoch": 0.7560525709015449, + "grad_norm": 1.34629761070606, + "learning_rate": 1.4675254306008456e-06, + "loss": 0.39477843046188354, + "step": 3279 + }, + { + "epoch": 0.7562831450311275, + "grad_norm": 1.439585323315283, + "learning_rate": 1.467188369458631e-06, + "loss": 0.5033801198005676, + "step": 3280 + }, + { + "epoch": 0.7565137191607102, + "grad_norm": 1.3522884656136929, + "learning_rate": 1.4668512404058527e-06, + "loss": 0.5719846487045288, + "step": 3281 + }, + { + "epoch": 0.7567442932902928, + "grad_norm": 1.6993262990855147, + "learning_rate": 1.4665140434915147e-06, + "loss": 0.5198945999145508, + "step": 3282 + }, + { + "epoch": 0.7569748674198755, + "grad_norm": 1.6486008286234453, + "learning_rate": 1.4661767787646326e-06, + "loss": 0.4641912579536438, + "step": 3283 + }, + { + "epoch": 0.7572054415494581, + "grad_norm": 1.542363438136225, + "learning_rate": 1.4658394462742309e-06, + "loss": 0.44070225954055786, + "step": 3284 + }, + { + "epoch": 0.7574360156790408, + "grad_norm": 1.1923089532877131, + "learning_rate": 1.465502046069345e-06, + "loss": 0.4324581027030945, + "step": 3285 + }, + { + "epoch": 0.7576665898086234, + "grad_norm": 1.5168087965785, + "learning_rate": 1.4651645781990187e-06, + "loss": 0.5789060592651367, + "step": 3286 + }, + { + "epoch": 0.7578971639382062, + "grad_norm": 1.7886030443223944, + "learning_rate": 1.4648270427123068e-06, + "loss": 0.45642149448394775, + "step": 3287 + }, + { + "epoch": 0.7581277380677888, + "grad_norm": 1.222780244920245, + "learning_rate": 1.4644894396582732e-06, + "loss": 0.4587763547897339, + "step": 3288 + }, + { + "epoch": 0.7583583121973715, + "grad_norm": 1.570757900264253, + "learning_rate": 1.4641517690859924e-06, + "loss": 0.5472866892814636, + "step": 3289 + }, + { + "epoch": 0.7585888863269541, + "grad_norm": 1.4662287757114318, + "learning_rate": 1.4638140310445476e-06, + "loss": 0.5274207592010498, + "step": 3290 + }, + { + "epoch": 0.7588194604565368, + "grad_norm": 1.5317060576828687, + "learning_rate": 1.4634762255830326e-06, + "loss": 0.46280741691589355, + "step": 3291 + }, + { + "epoch": 0.7590500345861194, + "grad_norm": 1.357303550008307, + "learning_rate": 1.4631383527505515e-06, + "loss": 0.5395090579986572, + "step": 3292 + }, + { + "epoch": 0.7592806087157021, + "grad_norm": 1.3556569618907826, + "learning_rate": 1.4628004125962168e-06, + "loss": 0.49923229217529297, + "step": 3293 + }, + { + "epoch": 0.7595111828452847, + "grad_norm": 1.437270857620585, + "learning_rate": 1.462462405169152e-06, + "loss": 0.5414037108421326, + "step": 3294 + }, + { + "epoch": 0.7597417569748675, + "grad_norm": 1.2450139122326453, + "learning_rate": 1.4621243305184895e-06, + "loss": 0.4246688485145569, + "step": 3295 + }, + { + "epoch": 0.7599723311044501, + "grad_norm": 1.2346000309431113, + "learning_rate": 1.461786188693372e-06, + "loss": 0.4997994005680084, + "step": 3296 + }, + { + "epoch": 0.7602029052340328, + "grad_norm": 1.2539682682883548, + "learning_rate": 1.4614479797429523e-06, + "loss": 0.4571123719215393, + "step": 3297 + }, + { + "epoch": 0.7604334793636154, + "grad_norm": 1.3546747118119653, + "learning_rate": 1.4611097037163917e-06, + "loss": 0.5178083181381226, + "step": 3298 + }, + { + "epoch": 0.7606640534931981, + "grad_norm": 1.438807896221459, + "learning_rate": 1.4607713606628625e-06, + "loss": 0.538001298904419, + "step": 3299 + }, + { + "epoch": 0.7608946276227807, + "grad_norm": 1.6495208547410056, + "learning_rate": 1.4604329506315464e-06, + "loss": 0.45941218733787537, + "step": 3300 + }, + { + "epoch": 0.7611252017523634, + "grad_norm": 1.469904127152949, + "learning_rate": 1.4600944736716344e-06, + "loss": 0.619648277759552, + "step": 3301 + }, + { + "epoch": 0.761355775881946, + "grad_norm": 1.3648924598961014, + "learning_rate": 1.4597559298323281e-06, + "loss": 0.4035170376300812, + "step": 3302 + }, + { + "epoch": 0.7615863500115287, + "grad_norm": 1.4623041349874883, + "learning_rate": 1.4594173191628374e-06, + "loss": 0.48657041788101196, + "step": 3303 + }, + { + "epoch": 0.7618169241411114, + "grad_norm": 1.3486514765257445, + "learning_rate": 1.4590786417123838e-06, + "loss": 0.43324801325798035, + "step": 3304 + }, + { + "epoch": 0.7620474982706941, + "grad_norm": 1.3543990457839288, + "learning_rate": 1.4587398975301968e-06, + "loss": 0.5020644664764404, + "step": 3305 + }, + { + "epoch": 0.7622780724002767, + "grad_norm": 1.4758408294809282, + "learning_rate": 1.4584010866655163e-06, + "loss": 0.4123230576515198, + "step": 3306 + }, + { + "epoch": 0.7625086465298594, + "grad_norm": 1.4629462638568174, + "learning_rate": 1.4580622091675925e-06, + "loss": 0.5110459327697754, + "step": 3307 + }, + { + "epoch": 0.762739220659442, + "grad_norm": 1.3128675599733384, + "learning_rate": 1.4577232650856842e-06, + "loss": 0.3956744074821472, + "step": 3308 + }, + { + "epoch": 0.7629697947890247, + "grad_norm": 1.028092913473986, + "learning_rate": 1.4573842544690602e-06, + "loss": 0.44418880343437195, + "step": 3309 + }, + { + "epoch": 0.7632003689186073, + "grad_norm": 1.2935675774179733, + "learning_rate": 1.4570451773669993e-06, + "loss": 0.46690821647644043, + "step": 3310 + }, + { + "epoch": 0.76343094304819, + "grad_norm": 1.7250402170715877, + "learning_rate": 1.45670603382879e-06, + "loss": 0.5631324052810669, + "step": 3311 + }, + { + "epoch": 0.7636615171777726, + "grad_norm": 1.3197309301962783, + "learning_rate": 1.4563668239037301e-06, + "loss": 0.42355209589004517, + "step": 3312 + }, + { + "epoch": 0.7638920913073554, + "grad_norm": 1.1819135136971526, + "learning_rate": 1.4560275476411273e-06, + "loss": 0.4509078860282898, + "step": 3313 + }, + { + "epoch": 0.764122665436938, + "grad_norm": 1.2704317123198696, + "learning_rate": 1.4556882050902986e-06, + "loss": 0.48707491159439087, + "step": 3314 + }, + { + "epoch": 0.7643532395665207, + "grad_norm": 1.2817274130067733, + "learning_rate": 1.455348796300571e-06, + "loss": 0.4768955707550049, + "step": 3315 + }, + { + "epoch": 0.7645838136961033, + "grad_norm": 1.1995539933150834, + "learning_rate": 1.4550093213212812e-06, + "loss": 0.44231370091438293, + "step": 3316 + }, + { + "epoch": 0.764814387825686, + "grad_norm": 1.283098801050818, + "learning_rate": 1.4546697802017752e-06, + "loss": 0.41919445991516113, + "step": 3317 + }, + { + "epoch": 0.7650449619552686, + "grad_norm": 1.3370966440445557, + "learning_rate": 1.4543301729914086e-06, + "loss": 0.5004634857177734, + "step": 3318 + }, + { + "epoch": 0.7652755360848513, + "grad_norm": 1.3058062554730827, + "learning_rate": 1.4539904997395467e-06, + "loss": 0.5327651500701904, + "step": 3319 + }, + { + "epoch": 0.7655061102144339, + "grad_norm": 1.2690140519120048, + "learning_rate": 1.4536507604955647e-06, + "loss": 0.4571789801120758, + "step": 3320 + }, + { + "epoch": 0.7657366843440166, + "grad_norm": 1.4712336124149359, + "learning_rate": 1.4533109553088474e-06, + "loss": 0.3989352583885193, + "step": 3321 + }, + { + "epoch": 0.7659672584735993, + "grad_norm": 1.390525487190819, + "learning_rate": 1.452971084228788e-06, + "loss": 0.4661702513694763, + "step": 3322 + }, + { + "epoch": 0.766197832603182, + "grad_norm": 1.4525582608827485, + "learning_rate": 1.4526311473047911e-06, + "loss": 0.5007051825523376, + "step": 3323 + }, + { + "epoch": 0.7664284067327646, + "grad_norm": 1.4087277102322913, + "learning_rate": 1.4522911445862697e-06, + "loss": 0.44391199946403503, + "step": 3324 + }, + { + "epoch": 0.7666589808623473, + "grad_norm": 1.5508781982933997, + "learning_rate": 1.4519510761226466e-06, + "loss": 0.48606377840042114, + "step": 3325 + }, + { + "epoch": 0.7668895549919299, + "grad_norm": 1.4942248011879364, + "learning_rate": 1.4516109419633543e-06, + "loss": 0.4831564426422119, + "step": 3326 + }, + { + "epoch": 0.7671201291215126, + "grad_norm": 1.2492238673667777, + "learning_rate": 1.4512707421578344e-06, + "loss": 0.5033055543899536, + "step": 3327 + }, + { + "epoch": 0.7673507032510952, + "grad_norm": 1.268639260981401, + "learning_rate": 1.4509304767555385e-06, + "loss": 0.40440869331359863, + "step": 3328 + }, + { + "epoch": 0.7675812773806779, + "grad_norm": 1.154540060885232, + "learning_rate": 1.4505901458059282e-06, + "loss": 0.4281578063964844, + "step": 3329 + }, + { + "epoch": 0.7678118515102605, + "grad_norm": 1.2646658661078, + "learning_rate": 1.4502497493584735e-06, + "loss": 0.45301395654678345, + "step": 3330 + }, + { + "epoch": 0.7680424256398433, + "grad_norm": 1.2708958618179473, + "learning_rate": 1.4499092874626545e-06, + "loss": 0.3971232771873474, + "step": 3331 + }, + { + "epoch": 0.7682729997694259, + "grad_norm": 1.470304815457328, + "learning_rate": 1.4495687601679607e-06, + "loss": 0.45382559299468994, + "step": 3332 + }, + { + "epoch": 0.7685035738990086, + "grad_norm": 1.5230375908041864, + "learning_rate": 1.4492281675238916e-06, + "loss": 0.4101349711418152, + "step": 3333 + }, + { + "epoch": 0.7687341480285912, + "grad_norm": 1.7708001369907398, + "learning_rate": 1.4488875095799555e-06, + "loss": 0.5322436690330505, + "step": 3334 + }, + { + "epoch": 0.7689647221581739, + "grad_norm": 1.4488936734065874, + "learning_rate": 1.4485467863856703e-06, + "loss": 0.5497866272926331, + "step": 3335 + }, + { + "epoch": 0.7691952962877565, + "grad_norm": 1.5286830910755105, + "learning_rate": 1.4482059979905642e-06, + "loss": 0.5088074207305908, + "step": 3336 + }, + { + "epoch": 0.7694258704173391, + "grad_norm": 1.2530470288119384, + "learning_rate": 1.4478651444441736e-06, + "loss": 0.4444946050643921, + "step": 3337 + }, + { + "epoch": 0.7696564445469218, + "grad_norm": 1.1602955966590311, + "learning_rate": 1.4475242257960454e-06, + "loss": 0.41257357597351074, + "step": 3338 + }, + { + "epoch": 0.7698870186765044, + "grad_norm": 1.3512416855290101, + "learning_rate": 1.4471832420957356e-06, + "loss": 0.47933512926101685, + "step": 3339 + }, + { + "epoch": 0.7701175928060872, + "grad_norm": 1.204411185284335, + "learning_rate": 1.4468421933928093e-06, + "loss": 0.41331803798675537, + "step": 3340 + }, + { + "epoch": 0.7703481669356698, + "grad_norm": 1.3617384100749454, + "learning_rate": 1.4465010797368416e-06, + "loss": 0.5047392845153809, + "step": 3341 + }, + { + "epoch": 0.7705787410652525, + "grad_norm": 1.2651645489335748, + "learning_rate": 1.446159901177417e-06, + "loss": 0.5265953540802002, + "step": 3342 + }, + { + "epoch": 0.7708093151948351, + "grad_norm": 1.5538943468041178, + "learning_rate": 1.4458186577641285e-06, + "loss": 0.48366689682006836, + "step": 3343 + }, + { + "epoch": 0.7710398893244178, + "grad_norm": 1.3170443751716914, + "learning_rate": 1.4454773495465805e-06, + "loss": 0.4303058087825775, + "step": 3344 + }, + { + "epoch": 0.7712704634540004, + "grad_norm": 1.2782967712931992, + "learning_rate": 1.4451359765743845e-06, + "loss": 0.44936758279800415, + "step": 3345 + }, + { + "epoch": 0.7715010375835831, + "grad_norm": 1.1273529926323729, + "learning_rate": 1.4447945388971631e-06, + "loss": 0.37891095876693726, + "step": 3346 + }, + { + "epoch": 0.7717316117131657, + "grad_norm": 1.3818395750162065, + "learning_rate": 1.4444530365645477e-06, + "loss": 0.4958759546279907, + "step": 3347 + }, + { + "epoch": 0.7719621858427484, + "grad_norm": 1.2809802910956953, + "learning_rate": 1.4441114696261791e-06, + "loss": 0.5180525183677673, + "step": 3348 + }, + { + "epoch": 0.772192759972331, + "grad_norm": 1.3137706702012002, + "learning_rate": 1.4437698381317076e-06, + "loss": 0.4760133624076843, + "step": 3349 + }, + { + "epoch": 0.7724233341019138, + "grad_norm": 1.6019634089420207, + "learning_rate": 1.4434281421307923e-06, + "loss": 0.5095269680023193, + "step": 3350 + }, + { + "epoch": 0.7726539082314964, + "grad_norm": 1.3897770832286553, + "learning_rate": 1.443086381673103e-06, + "loss": 0.41132962703704834, + "step": 3351 + }, + { + "epoch": 0.7728844823610791, + "grad_norm": 2.1191686086439687, + "learning_rate": 1.442744556808317e-06, + "loss": 0.5617398023605347, + "step": 3352 + }, + { + "epoch": 0.7731150564906617, + "grad_norm": 1.3926070515875653, + "learning_rate": 1.4424026675861229e-06, + "loss": 0.4421590566635132, + "step": 3353 + }, + { + "epoch": 0.7733456306202444, + "grad_norm": 1.3079796762796725, + "learning_rate": 1.4420607140562175e-06, + "loss": 0.5533363223075867, + "step": 3354 + }, + { + "epoch": 0.773576204749827, + "grad_norm": 1.2259362177236217, + "learning_rate": 1.441718696268307e-06, + "loss": 0.3703731298446655, + "step": 3355 + }, + { + "epoch": 0.7738067788794097, + "grad_norm": 1.3132566837825874, + "learning_rate": 1.4413766142721074e-06, + "loss": 0.4078833758831024, + "step": 3356 + }, + { + "epoch": 0.7740373530089923, + "grad_norm": 1.3669338987803128, + "learning_rate": 1.4410344681173436e-06, + "loss": 0.47297823429107666, + "step": 3357 + }, + { + "epoch": 0.7742679271385751, + "grad_norm": 1.44476399239333, + "learning_rate": 1.4406922578537501e-06, + "loss": 0.4586789309978485, + "step": 3358 + }, + { + "epoch": 0.7744985012681577, + "grad_norm": 2.005996053014414, + "learning_rate": 1.440349983531071e-06, + "loss": 0.5284359455108643, + "step": 3359 + }, + { + "epoch": 0.7747290753977404, + "grad_norm": 1.453810263762319, + "learning_rate": 1.4400076451990585e-06, + "loss": 0.47153323888778687, + "step": 3360 + }, + { + "epoch": 0.774959649527323, + "grad_norm": 1.277395230723769, + "learning_rate": 1.4396652429074758e-06, + "loss": 0.3862396478652954, + "step": 3361 + }, + { + "epoch": 0.7751902236569057, + "grad_norm": 1.4585054412515979, + "learning_rate": 1.4393227767060938e-06, + "loss": 0.48918354511260986, + "step": 3362 + }, + { + "epoch": 0.7754207977864883, + "grad_norm": 1.2680408475983538, + "learning_rate": 1.4389802466446942e-06, + "loss": 0.5541480779647827, + "step": 3363 + }, + { + "epoch": 0.775651371916071, + "grad_norm": 1.3507983643401953, + "learning_rate": 1.4386376527730665e-06, + "loss": 0.48972445726394653, + "step": 3364 + }, + { + "epoch": 0.7758819460456536, + "grad_norm": 1.7557497204808084, + "learning_rate": 1.4382949951410109e-06, + "loss": 0.5016083717346191, + "step": 3365 + }, + { + "epoch": 0.7761125201752364, + "grad_norm": 1.3196221720148595, + "learning_rate": 1.4379522737983351e-06, + "loss": 0.40227651596069336, + "step": 3366 + }, + { + "epoch": 0.776343094304819, + "grad_norm": 1.596207218013102, + "learning_rate": 1.4376094887948584e-06, + "loss": 0.42994722723960876, + "step": 3367 + }, + { + "epoch": 0.7765736684344017, + "grad_norm": 1.516975070106083, + "learning_rate": 1.4372666401804073e-06, + "loss": 0.5087350010871887, + "step": 3368 + }, + { + "epoch": 0.7768042425639843, + "grad_norm": 1.2618017709219296, + "learning_rate": 1.4369237280048186e-06, + "loss": 0.39419132471084595, + "step": 3369 + }, + { + "epoch": 0.777034816693567, + "grad_norm": 1.3456260179482487, + "learning_rate": 1.4365807523179376e-06, + "loss": 0.500682532787323, + "step": 3370 + }, + { + "epoch": 0.7772653908231496, + "grad_norm": 1.4316905894274476, + "learning_rate": 1.4362377131696198e-06, + "loss": 0.49243754148483276, + "step": 3371 + }, + { + "epoch": 0.7774959649527323, + "grad_norm": 1.4395314935622772, + "learning_rate": 1.4358946106097295e-06, + "loss": 0.5479283332824707, + "step": 3372 + }, + { + "epoch": 0.7777265390823149, + "grad_norm": 1.08521870178353, + "learning_rate": 1.4355514446881396e-06, + "loss": 0.43217700719833374, + "step": 3373 + }, + { + "epoch": 0.7779571132118976, + "grad_norm": 1.292406809665349, + "learning_rate": 1.435208215454733e-06, + "loss": 0.5351289510726929, + "step": 3374 + }, + { + "epoch": 0.7781876873414802, + "grad_norm": 1.2023765125576906, + "learning_rate": 1.4348649229594016e-06, + "loss": 0.45523375272750854, + "step": 3375 + }, + { + "epoch": 0.778418261471063, + "grad_norm": 1.1345172738470508, + "learning_rate": 1.4345215672520465e-06, + "loss": 0.49811118841171265, + "step": 3376 + }, + { + "epoch": 0.7786488356006456, + "grad_norm": 1.3017016981868919, + "learning_rate": 1.434178148382578e-06, + "loss": 0.40621131658554077, + "step": 3377 + }, + { + "epoch": 0.7788794097302283, + "grad_norm": 1.322929743849566, + "learning_rate": 1.4338346664009152e-06, + "loss": 0.43339842557907104, + "step": 3378 + }, + { + "epoch": 0.7791099838598109, + "grad_norm": 1.4276417953872829, + "learning_rate": 1.433491121356987e-06, + "loss": 0.4397253096103668, + "step": 3379 + }, + { + "epoch": 0.7793405579893936, + "grad_norm": 1.3957946390360352, + "learning_rate": 1.433147513300731e-06, + "loss": 0.5146217942237854, + "step": 3380 + }, + { + "epoch": 0.7795711321189762, + "grad_norm": 1.3181842447854462, + "learning_rate": 1.432803842282094e-06, + "loss": 0.46328768134117126, + "step": 3381 + }, + { + "epoch": 0.7798017062485589, + "grad_norm": 1.4008272791948313, + "learning_rate": 1.432460108351032e-06, + "loss": 0.47743386030197144, + "step": 3382 + }, + { + "epoch": 0.7800322803781415, + "grad_norm": 1.4765555896470939, + "learning_rate": 1.4321163115575105e-06, + "loss": 0.467747300863266, + "step": 3383 + }, + { + "epoch": 0.7802628545077243, + "grad_norm": 1.2334202034705792, + "learning_rate": 1.431772451951504e-06, + "loss": 0.4269976019859314, + "step": 3384 + }, + { + "epoch": 0.7804934286373069, + "grad_norm": 1.4332482963337814, + "learning_rate": 1.4314285295829956e-06, + "loss": 0.5440881252288818, + "step": 3385 + }, + { + "epoch": 0.7807240027668896, + "grad_norm": 1.5634188347498899, + "learning_rate": 1.431084544501978e-06, + "loss": 0.42413994669914246, + "step": 3386 + }, + { + "epoch": 0.7809545768964722, + "grad_norm": 1.250472551312306, + "learning_rate": 1.4307404967584528e-06, + "loss": 0.5563687086105347, + "step": 3387 + }, + { + "epoch": 0.7811851510260549, + "grad_norm": 1.2530390736213655, + "learning_rate": 1.4303963864024314e-06, + "loss": 0.4822027087211609, + "step": 3388 + }, + { + "epoch": 0.7814157251556375, + "grad_norm": 1.265644144731409, + "learning_rate": 1.430052213483933e-06, + "loss": 0.5267205834388733, + "step": 3389 + }, + { + "epoch": 0.7816462992852202, + "grad_norm": 1.464631682134491, + "learning_rate": 1.4297079780529868e-06, + "loss": 0.49257054924964905, + "step": 3390 + }, + { + "epoch": 0.7818768734148028, + "grad_norm": 1.4967498256417051, + "learning_rate": 1.4293636801596314e-06, + "loss": 0.45225608348846436, + "step": 3391 + }, + { + "epoch": 0.7821074475443855, + "grad_norm": 1.3090966398510886, + "learning_rate": 1.4290193198539133e-06, + "loss": 0.4891412854194641, + "step": 3392 + }, + { + "epoch": 0.7823380216739682, + "grad_norm": 1.2913501590758174, + "learning_rate": 1.4286748971858893e-06, + "loss": 0.4411062002182007, + "step": 3393 + }, + { + "epoch": 0.7825685958035509, + "grad_norm": 1.3634871078304074, + "learning_rate": 1.4283304122056242e-06, + "loss": 0.4584164619445801, + "step": 3394 + }, + { + "epoch": 0.7827991699331335, + "grad_norm": 1.2884433704058607, + "learning_rate": 1.4279858649631928e-06, + "loss": 0.46913737058639526, + "step": 3395 + }, + { + "epoch": 0.7830297440627162, + "grad_norm": 1.320207574562506, + "learning_rate": 1.4276412555086786e-06, + "loss": 0.40582767128944397, + "step": 3396 + }, + { + "epoch": 0.7832603181922988, + "grad_norm": 1.4930886994867976, + "learning_rate": 1.4272965838921737e-06, + "loss": 0.5089453458786011, + "step": 3397 + }, + { + "epoch": 0.7834908923218815, + "grad_norm": 1.3151641529095257, + "learning_rate": 1.4269518501637798e-06, + "loss": 0.4744444489479065, + "step": 3398 + }, + { + "epoch": 0.7837214664514641, + "grad_norm": 1.3271165993445435, + "learning_rate": 1.426607054373608e-06, + "loss": 0.49168163537979126, + "step": 3399 + }, + { + "epoch": 0.7839520405810468, + "grad_norm": 1.4774301348156431, + "learning_rate": 1.4262621965717768e-06, + "loss": 0.4423940181732178, + "step": 3400 + }, + { + "epoch": 0.7841826147106294, + "grad_norm": 1.541226385884193, + "learning_rate": 1.4259172768084152e-06, + "loss": 0.5138403177261353, + "step": 3401 + }, + { + "epoch": 0.7844131888402122, + "grad_norm": 1.5691210214340656, + "learning_rate": 1.425572295133661e-06, + "loss": 0.5248140096664429, + "step": 3402 + }, + { + "epoch": 0.7846437629697948, + "grad_norm": 1.4659537352972094, + "learning_rate": 1.4252272515976607e-06, + "loss": 0.39161059260368347, + "step": 3403 + }, + { + "epoch": 0.7848743370993775, + "grad_norm": 1.307338649596764, + "learning_rate": 1.4248821462505699e-06, + "loss": 0.46826744079589844, + "step": 3404 + }, + { + "epoch": 0.7851049112289601, + "grad_norm": 1.3428424961182877, + "learning_rate": 1.424536979142553e-06, + "loss": 0.4329161047935486, + "step": 3405 + }, + { + "epoch": 0.7853354853585428, + "grad_norm": 1.3831028347986385, + "learning_rate": 1.4241917503237834e-06, + "loss": 0.4691393971443176, + "step": 3406 + }, + { + "epoch": 0.7855660594881254, + "grad_norm": 1.819344171969547, + "learning_rate": 1.423846459844444e-06, + "loss": 0.5130072236061096, + "step": 3407 + }, + { + "epoch": 0.7857966336177081, + "grad_norm": 1.4381134289937085, + "learning_rate": 1.4235011077547264e-06, + "loss": 0.37478166818618774, + "step": 3408 + }, + { + "epoch": 0.7860272077472907, + "grad_norm": 1.1654669583674488, + "learning_rate": 1.4231556941048307e-06, + "loss": 0.46112769842147827, + "step": 3409 + }, + { + "epoch": 0.7862577818768735, + "grad_norm": 1.3711520199030207, + "learning_rate": 1.422810218944966e-06, + "loss": 0.5095282793045044, + "step": 3410 + }, + { + "epoch": 0.7864883560064561, + "grad_norm": 1.4830709787042864, + "learning_rate": 1.422464682325351e-06, + "loss": 0.4182342290878296, + "step": 3411 + }, + { + "epoch": 0.7867189301360388, + "grad_norm": 1.4898619625675633, + "learning_rate": 1.422119084296213e-06, + "loss": 0.3892830014228821, + "step": 3412 + }, + { + "epoch": 0.7869495042656214, + "grad_norm": 1.655445800570714, + "learning_rate": 1.4217734249077877e-06, + "loss": 0.5294528603553772, + "step": 3413 + }, + { + "epoch": 0.7871800783952041, + "grad_norm": 1.501568458574139, + "learning_rate": 1.4214277042103208e-06, + "loss": 0.471803218126297, + "step": 3414 + }, + { + "epoch": 0.7874106525247867, + "grad_norm": 1.2078819401351728, + "learning_rate": 1.4210819222540662e-06, + "loss": 0.4363842010498047, + "step": 3415 + }, + { + "epoch": 0.7876412266543694, + "grad_norm": 1.191025232167839, + "learning_rate": 1.4207360790892867e-06, + "loss": 0.3834928870201111, + "step": 3416 + }, + { + "epoch": 0.787871800783952, + "grad_norm": 1.342904245190706, + "learning_rate": 1.4203901747662539e-06, + "loss": 0.4639194905757904, + "step": 3417 + }, + { + "epoch": 0.7881023749135347, + "grad_norm": 1.4526860275619324, + "learning_rate": 1.4200442093352486e-06, + "loss": 0.47130632400512695, + "step": 3418 + }, + { + "epoch": 0.7883329490431173, + "grad_norm": 1.2585342771790389, + "learning_rate": 1.4196981828465606e-06, + "loss": 0.4848192632198334, + "step": 3419 + }, + { + "epoch": 0.7885635231727001, + "grad_norm": 1.2424140051596944, + "learning_rate": 1.4193520953504884e-06, + "loss": 0.5137286186218262, + "step": 3420 + }, + { + "epoch": 0.7887940973022827, + "grad_norm": 1.4833943072924853, + "learning_rate": 1.4190059468973385e-06, + "loss": 0.47639960050582886, + "step": 3421 + }, + { + "epoch": 0.7890246714318654, + "grad_norm": 1.3974399628621321, + "learning_rate": 1.418659737537428e-06, + "loss": 0.4300975799560547, + "step": 3422 + }, + { + "epoch": 0.789255245561448, + "grad_norm": 1.6248920549834995, + "learning_rate": 1.4183134673210817e-06, + "loss": 0.5669160485267639, + "step": 3423 + }, + { + "epoch": 0.7894858196910307, + "grad_norm": 1.3431432318053507, + "learning_rate": 1.4179671362986336e-06, + "loss": 0.4113837480545044, + "step": 3424 + }, + { + "epoch": 0.7897163938206133, + "grad_norm": 1.3611327690280945, + "learning_rate": 1.417620744520426e-06, + "loss": 0.4992315173149109, + "step": 3425 + }, + { + "epoch": 0.789946967950196, + "grad_norm": 1.6418572453635272, + "learning_rate": 1.417274292036811e-06, + "loss": 0.5556696653366089, + "step": 3426 + }, + { + "epoch": 0.7901775420797786, + "grad_norm": 1.367999541896107, + "learning_rate": 1.4169277788981485e-06, + "loss": 0.47911009192466736, + "step": 3427 + }, + { + "epoch": 0.7904081162093614, + "grad_norm": 1.2100320134669527, + "learning_rate": 1.416581205154808e-06, + "loss": 0.45395466685295105, + "step": 3428 + }, + { + "epoch": 0.790638690338944, + "grad_norm": 1.5386887400015699, + "learning_rate": 1.4162345708571674e-06, + "loss": 0.4404561519622803, + "step": 3429 + }, + { + "epoch": 0.7908692644685267, + "grad_norm": 1.3845404606780534, + "learning_rate": 1.4158878760556136e-06, + "loss": 0.5541578531265259, + "step": 3430 + }, + { + "epoch": 0.7910998385981093, + "grad_norm": 1.4234082473199938, + "learning_rate": 1.4155411208005422e-06, + "loss": 0.5517834424972534, + "step": 3431 + }, + { + "epoch": 0.791330412727692, + "grad_norm": 1.2851916229874634, + "learning_rate": 1.4151943051423574e-06, + "loss": 0.42650169134140015, + "step": 3432 + }, + { + "epoch": 0.7915609868572746, + "grad_norm": 1.7886227172970943, + "learning_rate": 1.414847429131472e-06, + "loss": 0.42724043130874634, + "step": 3433 + }, + { + "epoch": 0.7917915609868573, + "grad_norm": 1.3978336018588784, + "learning_rate": 1.414500492818309e-06, + "loss": 0.41757941246032715, + "step": 3434 + }, + { + "epoch": 0.7920221351164399, + "grad_norm": 1.4250040620354028, + "learning_rate": 1.4141534962532984e-06, + "loss": 0.47318267822265625, + "step": 3435 + }, + { + "epoch": 0.7922527092460226, + "grad_norm": 1.5092267765141392, + "learning_rate": 1.41380643948688e-06, + "loss": 0.5540967583656311, + "step": 3436 + }, + { + "epoch": 0.7924832833756053, + "grad_norm": 1.2943595959957308, + "learning_rate": 1.4134593225695013e-06, + "loss": 0.4459697902202606, + "step": 3437 + }, + { + "epoch": 0.792713857505188, + "grad_norm": 1.2950911274447663, + "learning_rate": 1.41311214555162e-06, + "loss": 0.5263698101043701, + "step": 3438 + }, + { + "epoch": 0.7929444316347706, + "grad_norm": 1.321260987570187, + "learning_rate": 1.4127649084837016e-06, + "loss": 0.40453940629959106, + "step": 3439 + }, + { + "epoch": 0.7931750057643533, + "grad_norm": 1.4138023773004598, + "learning_rate": 1.412417611416221e-06, + "loss": 0.3859207034111023, + "step": 3440 + }, + { + "epoch": 0.7934055798939359, + "grad_norm": 1.3373104076984894, + "learning_rate": 1.4120702543996603e-06, + "loss": 0.4604511260986328, + "step": 3441 + }, + { + "epoch": 0.7936361540235186, + "grad_norm": 1.2912472996688542, + "learning_rate": 1.411722837484512e-06, + "loss": 0.40292084217071533, + "step": 3442 + }, + { + "epoch": 0.7938667281531012, + "grad_norm": 1.3099743009304052, + "learning_rate": 1.4113753607212766e-06, + "loss": 0.40447625517845154, + "step": 3443 + }, + { + "epoch": 0.7940973022826839, + "grad_norm": 1.1711578682822494, + "learning_rate": 1.4110278241604635e-06, + "loss": 0.48472997546195984, + "step": 3444 + }, + { + "epoch": 0.7943278764122665, + "grad_norm": 1.304688924593958, + "learning_rate": 1.4106802278525902e-06, + "loss": 0.5404670238494873, + "step": 3445 + }, + { + "epoch": 0.7945584505418493, + "grad_norm": 1.2201185877258616, + "learning_rate": 1.4103325718481838e-06, + "loss": 0.5885064005851746, + "step": 3446 + }, + { + "epoch": 0.7947890246714319, + "grad_norm": 1.2045708529585497, + "learning_rate": 1.4099848561977794e-06, + "loss": 0.47806939482688904, + "step": 3447 + }, + { + "epoch": 0.7950195988010145, + "grad_norm": 1.2183758256079422, + "learning_rate": 1.4096370809519213e-06, + "loss": 0.4247834086418152, + "step": 3448 + }, + { + "epoch": 0.7952501729305972, + "grad_norm": 1.4701805176850054, + "learning_rate": 1.409289246161162e-06, + "loss": 0.508902370929718, + "step": 3449 + }, + { + "epoch": 0.7954807470601798, + "grad_norm": 1.3709386014599791, + "learning_rate": 1.4089413518760626e-06, + "loss": 0.4866124987602234, + "step": 3450 + }, + { + "epoch": 0.7957113211897625, + "grad_norm": 1.4351510328158692, + "learning_rate": 1.408593398147193e-06, + "loss": 0.5168731212615967, + "step": 3451 + }, + { + "epoch": 0.7959418953193451, + "grad_norm": 1.257672253058261, + "learning_rate": 1.4082453850251326e-06, + "loss": 0.5039271712303162, + "step": 3452 + }, + { + "epoch": 0.7961724694489278, + "grad_norm": 1.3767040030777011, + "learning_rate": 1.4078973125604674e-06, + "loss": 0.3660929799079895, + "step": 3453 + }, + { + "epoch": 0.7964030435785104, + "grad_norm": 1.5330992916300397, + "learning_rate": 1.407549180803794e-06, + "loss": 0.514503538608551, + "step": 3454 + }, + { + "epoch": 0.7966336177080932, + "grad_norm": 1.5704286671243526, + "learning_rate": 1.4072009898057172e-06, + "loss": 0.4803028702735901, + "step": 3455 + }, + { + "epoch": 0.7968641918376758, + "grad_norm": 1.2332119133725918, + "learning_rate": 1.4068527396168492e-06, + "loss": 0.43116262555122375, + "step": 3456 + }, + { + "epoch": 0.7970947659672585, + "grad_norm": 1.522287028583898, + "learning_rate": 1.4065044302878125e-06, + "loss": 0.5009680986404419, + "step": 3457 + }, + { + "epoch": 0.7973253400968411, + "grad_norm": 1.1307500814268987, + "learning_rate": 1.406156061869237e-06, + "loss": 0.4047713875770569, + "step": 3458 + }, + { + "epoch": 0.7975559142264238, + "grad_norm": 1.348066090689188, + "learning_rate": 1.4058076344117615e-06, + "loss": 0.5287230014801025, + "step": 3459 + }, + { + "epoch": 0.7977864883560064, + "grad_norm": 1.7810979263679612, + "learning_rate": 1.4054591479660335e-06, + "loss": 0.5602750778198242, + "step": 3460 + }, + { + "epoch": 0.7980170624855891, + "grad_norm": 1.0587308388288128, + "learning_rate": 1.4051106025827096e-06, + "loss": 0.4178144335746765, + "step": 3461 + }, + { + "epoch": 0.7982476366151717, + "grad_norm": 1.408691487644406, + "learning_rate": 1.4047619983124536e-06, + "loss": 0.5061960220336914, + "step": 3462 + }, + { + "epoch": 0.7984782107447544, + "grad_norm": 1.5043212480263244, + "learning_rate": 1.4044133352059392e-06, + "loss": 0.5091691017150879, + "step": 3463 + }, + { + "epoch": 0.798708784874337, + "grad_norm": 1.3793897642043385, + "learning_rate": 1.4040646133138478e-06, + "loss": 0.5100894570350647, + "step": 3464 + }, + { + "epoch": 0.7989393590039198, + "grad_norm": 1.2188849241203001, + "learning_rate": 1.4037158326868697e-06, + "loss": 0.47493505477905273, + "step": 3465 + }, + { + "epoch": 0.7991699331335024, + "grad_norm": 1.637846674977116, + "learning_rate": 1.4033669933757038e-06, + "loss": 0.5561350584030151, + "step": 3466 + }, + { + "epoch": 0.7994005072630851, + "grad_norm": 1.4971197328143675, + "learning_rate": 1.4030180954310574e-06, + "loss": 0.44552814960479736, + "step": 3467 + }, + { + "epoch": 0.7996310813926677, + "grad_norm": 1.219192969590734, + "learning_rate": 1.4026691389036465e-06, + "loss": 0.4624238908290863, + "step": 3468 + }, + { + "epoch": 0.7998616555222504, + "grad_norm": 1.348458578104898, + "learning_rate": 1.4023201238441951e-06, + "loss": 0.5424448251724243, + "step": 3469 + }, + { + "epoch": 0.800092229651833, + "grad_norm": 1.2410568882309463, + "learning_rate": 1.4019710503034367e-06, + "loss": 0.4629395008087158, + "step": 3470 + }, + { + "epoch": 0.8003228037814157, + "grad_norm": 1.3564725845833965, + "learning_rate": 1.401621918332112e-06, + "loss": 0.4375717043876648, + "step": 3471 + }, + { + "epoch": 0.8005533779109983, + "grad_norm": 1.5212509367699154, + "learning_rate": 1.401272727980971e-06, + "loss": 0.4419640302658081, + "step": 3472 + }, + { + "epoch": 0.8007839520405811, + "grad_norm": 1.3621301015547722, + "learning_rate": 1.4009234793007724e-06, + "loss": 0.42077577114105225, + "step": 3473 + }, + { + "epoch": 0.8010145261701637, + "grad_norm": 1.394506766094276, + "learning_rate": 1.400574172342283e-06, + "loss": 0.3735182583332062, + "step": 3474 + }, + { + "epoch": 0.8012451002997464, + "grad_norm": 1.3325918102604086, + "learning_rate": 1.4002248071562778e-06, + "loss": 0.4263458251953125, + "step": 3475 + }, + { + "epoch": 0.801475674429329, + "grad_norm": 1.3278985843191269, + "learning_rate": 1.3998753837935406e-06, + "loss": 0.42377904057502747, + "step": 3476 + }, + { + "epoch": 0.8017062485589117, + "grad_norm": 1.4415172635554745, + "learning_rate": 1.399525902304864e-06, + "loss": 0.5017589330673218, + "step": 3477 + }, + { + "epoch": 0.8019368226884943, + "grad_norm": 1.2695777372701094, + "learning_rate": 1.3991763627410485e-06, + "loss": 0.41022592782974243, + "step": 3478 + }, + { + "epoch": 0.802167396818077, + "grad_norm": 1.6097549722001219, + "learning_rate": 1.3988267651529028e-06, + "loss": 0.49957793951034546, + "step": 3479 + }, + { + "epoch": 0.8023979709476596, + "grad_norm": 1.4695518489034636, + "learning_rate": 1.398477109591245e-06, + "loss": 0.5065722465515137, + "step": 3480 + }, + { + "epoch": 0.8026285450772424, + "grad_norm": 1.264735145451503, + "learning_rate": 1.398127396106901e-06, + "loss": 0.4353798031806946, + "step": 3481 + }, + { + "epoch": 0.802859119206825, + "grad_norm": 1.5800938751579423, + "learning_rate": 1.3977776247507049e-06, + "loss": 0.41438236832618713, + "step": 3482 + }, + { + "epoch": 0.8030896933364077, + "grad_norm": 1.2712154799989346, + "learning_rate": 1.3974277955734996e-06, + "loss": 0.4348248839378357, + "step": 3483 + }, + { + "epoch": 0.8033202674659903, + "grad_norm": 1.3020033760882643, + "learning_rate": 1.3970779086261363e-06, + "loss": 0.49369150400161743, + "step": 3484 + }, + { + "epoch": 0.803550841595573, + "grad_norm": 1.445427514378273, + "learning_rate": 1.396727963959475e-06, + "loss": 0.5694580078125, + "step": 3485 + }, + { + "epoch": 0.8037814157251556, + "grad_norm": 1.3859575121879733, + "learning_rate": 1.3963779616243834e-06, + "loss": 0.5357070565223694, + "step": 3486 + }, + { + "epoch": 0.8040119898547383, + "grad_norm": 1.3071217267808923, + "learning_rate": 1.3960279016717377e-06, + "loss": 0.41300907731056213, + "step": 3487 + }, + { + "epoch": 0.8042425639843209, + "grad_norm": 1.4713226080636248, + "learning_rate": 1.395677784152423e-06, + "loss": 0.5058030486106873, + "step": 3488 + }, + { + "epoch": 0.8044731381139036, + "grad_norm": 1.394990226330868, + "learning_rate": 1.3953276091173326e-06, + "loss": 0.5225522518157959, + "step": 3489 + }, + { + "epoch": 0.8047037122434862, + "grad_norm": 1.3669211701935395, + "learning_rate": 1.3949773766173675e-06, + "loss": 0.43893736600875854, + "step": 3490 + }, + { + "epoch": 0.804934286373069, + "grad_norm": 1.575168458794386, + "learning_rate": 1.3946270867034375e-06, + "loss": 0.4583659768104553, + "step": 3491 + }, + { + "epoch": 0.8051648605026516, + "grad_norm": 1.2728568882138123, + "learning_rate": 1.394276739426461e-06, + "loss": 0.49550747871398926, + "step": 3492 + }, + { + "epoch": 0.8053954346322343, + "grad_norm": 1.9438900883437185, + "learning_rate": 1.3939263348373648e-06, + "loss": 0.5637674331665039, + "step": 3493 + }, + { + "epoch": 0.8056260087618169, + "grad_norm": 1.3206034443977903, + "learning_rate": 1.3935758729870835e-06, + "loss": 0.4853670299053192, + "step": 3494 + }, + { + "epoch": 0.8058565828913996, + "grad_norm": 1.479029501570459, + "learning_rate": 1.3932253539265603e-06, + "loss": 0.4535500407218933, + "step": 3495 + }, + { + "epoch": 0.8060871570209822, + "grad_norm": 1.4461411101486477, + "learning_rate": 1.3928747777067464e-06, + "loss": 0.4198870062828064, + "step": 3496 + }, + { + "epoch": 0.8063177311505649, + "grad_norm": 1.3336585529006162, + "learning_rate": 1.392524144378602e-06, + "loss": 0.45773670077323914, + "step": 3497 + }, + { + "epoch": 0.8065483052801475, + "grad_norm": 1.718264798623436, + "learning_rate": 1.3921734539930952e-06, + "loss": 0.45263248682022095, + "step": 3498 + }, + { + "epoch": 0.8067788794097303, + "grad_norm": 1.300886470112164, + "learning_rate": 1.3918227066012025e-06, + "loss": 0.473066508769989, + "step": 3499 + }, + { + "epoch": 0.8070094535393129, + "grad_norm": 1.1261914460441818, + "learning_rate": 1.3914719022539082e-06, + "loss": 0.35737159848213196, + "step": 3500 + }, + { + "epoch": 0.8072400276688956, + "grad_norm": 1.4095537979750905, + "learning_rate": 1.3911210410022054e-06, + "loss": 0.5162703394889832, + "step": 3501 + }, + { + "epoch": 0.8074706017984782, + "grad_norm": 1.494617165800155, + "learning_rate": 1.3907701228970955e-06, + "loss": 0.5347551703453064, + "step": 3502 + }, + { + "epoch": 0.8077011759280609, + "grad_norm": 1.7642790890319513, + "learning_rate": 1.390419147989588e-06, + "loss": 0.4889448881149292, + "step": 3503 + }, + { + "epoch": 0.8079317500576435, + "grad_norm": 1.380092267420659, + "learning_rate": 1.3900681163306999e-06, + "loss": 0.47468650341033936, + "step": 3504 + }, + { + "epoch": 0.8081623241872262, + "grad_norm": 1.4749480234582377, + "learning_rate": 1.3897170279714585e-06, + "loss": 0.43236857652664185, + "step": 3505 + }, + { + "epoch": 0.8083928983168088, + "grad_norm": 1.4419786763918543, + "learning_rate": 1.3893658829628974e-06, + "loss": 0.46778976917266846, + "step": 3506 + }, + { + "epoch": 0.8086234724463915, + "grad_norm": 1.353368455676612, + "learning_rate": 1.389014681356059e-06, + "loss": 0.49447667598724365, + "step": 3507 + }, + { + "epoch": 0.8088540465759742, + "grad_norm": 1.3574196281726325, + "learning_rate": 1.388663423201994e-06, + "loss": 0.5221220254898071, + "step": 3508 + }, + { + "epoch": 0.8090846207055569, + "grad_norm": 1.8319434066548141, + "learning_rate": 1.3883121085517615e-06, + "loss": 0.5037325620651245, + "step": 3509 + }, + { + "epoch": 0.8093151948351395, + "grad_norm": 1.1547190760847952, + "learning_rate": 1.387960737456429e-06, + "loss": 0.46879589557647705, + "step": 3510 + }, + { + "epoch": 0.8095457689647222, + "grad_norm": 1.3552976314399992, + "learning_rate": 1.387609309967071e-06, + "loss": 0.44216716289520264, + "step": 3511 + }, + { + "epoch": 0.8097763430943048, + "grad_norm": 1.2016377736710804, + "learning_rate": 1.3872578261347716e-06, + "loss": 0.4525749981403351, + "step": 3512 + }, + { + "epoch": 0.8100069172238875, + "grad_norm": 1.3138421579944453, + "learning_rate": 1.3869062860106224e-06, + "loss": 0.44681644439697266, + "step": 3513 + }, + { + "epoch": 0.8102374913534701, + "grad_norm": 1.5030736189155554, + "learning_rate": 1.3865546896457233e-06, + "loss": 0.4162617325782776, + "step": 3514 + }, + { + "epoch": 0.8104680654830528, + "grad_norm": 1.4360914568156404, + "learning_rate": 1.3862030370911827e-06, + "loss": 0.5262776613235474, + "step": 3515 + }, + { + "epoch": 0.8106986396126354, + "grad_norm": 1.3010389916824352, + "learning_rate": 1.3858513283981163e-06, + "loss": 0.48102372884750366, + "step": 3516 + }, + { + "epoch": 0.8109292137422182, + "grad_norm": 1.41037363508679, + "learning_rate": 1.385499563617649e-06, + "loss": 0.46166497468948364, + "step": 3517 + }, + { + "epoch": 0.8111597878718008, + "grad_norm": 1.4145741054815544, + "learning_rate": 1.3851477428009133e-06, + "loss": 0.43523284792900085, + "step": 3518 + }, + { + "epoch": 0.8113903620013835, + "grad_norm": 1.3662294611202825, + "learning_rate": 1.3847958659990497e-06, + "loss": 0.5413048267364502, + "step": 3519 + }, + { + "epoch": 0.8116209361309661, + "grad_norm": 1.1462124150969017, + "learning_rate": 1.3844439332632073e-06, + "loss": 0.4257383346557617, + "step": 3520 + }, + { + "epoch": 0.8118515102605488, + "grad_norm": 1.5928313905350753, + "learning_rate": 1.3840919446445427e-06, + "loss": 0.4812018871307373, + "step": 3521 + }, + { + "epoch": 0.8120820843901314, + "grad_norm": 1.5231442697754751, + "learning_rate": 1.3837399001942216e-06, + "loss": 0.4890254735946655, + "step": 3522 + }, + { + "epoch": 0.8123126585197141, + "grad_norm": 1.7091323269762855, + "learning_rate": 1.3833877999634166e-06, + "loss": 0.5079991817474365, + "step": 3523 + }, + { + "epoch": 0.8125432326492967, + "grad_norm": 1.6148941470526432, + "learning_rate": 1.3830356440033096e-06, + "loss": 0.44703438878059387, + "step": 3524 + }, + { + "epoch": 0.8127738067788794, + "grad_norm": 1.4685605039032132, + "learning_rate": 1.3826834323650898e-06, + "loss": 0.4218645989894867, + "step": 3525 + }, + { + "epoch": 0.813004380908462, + "grad_norm": 1.585977018929449, + "learning_rate": 1.3823311650999547e-06, + "loss": 0.4544546902179718, + "step": 3526 + }, + { + "epoch": 0.8132349550380448, + "grad_norm": 1.2954656146833265, + "learning_rate": 1.3819788422591099e-06, + "loss": 0.4978422224521637, + "step": 3527 + }, + { + "epoch": 0.8134655291676274, + "grad_norm": 1.3262250095489831, + "learning_rate": 1.3816264638937688e-06, + "loss": 0.42122140526771545, + "step": 3528 + }, + { + "epoch": 0.8136961032972101, + "grad_norm": 1.0995613789441223, + "learning_rate": 1.381274030055154e-06, + "loss": 0.45674729347229004, + "step": 3529 + }, + { + "epoch": 0.8139266774267927, + "grad_norm": 1.5614041042611542, + "learning_rate": 1.3809215407944947e-06, + "loss": 0.5075385570526123, + "step": 3530 + }, + { + "epoch": 0.8141572515563754, + "grad_norm": 1.4231357002591019, + "learning_rate": 1.380568996163029e-06, + "loss": 0.45952552556991577, + "step": 3531 + }, + { + "epoch": 0.814387825685958, + "grad_norm": 1.239122573849665, + "learning_rate": 1.3802163962120025e-06, + "loss": 0.5062624216079712, + "step": 3532 + }, + { + "epoch": 0.8146183998155407, + "grad_norm": 1.4910945652834293, + "learning_rate": 1.3798637409926698e-06, + "loss": 0.49294552206993103, + "step": 3533 + }, + { + "epoch": 0.8148489739451233, + "grad_norm": 1.347255149566569, + "learning_rate": 1.3795110305562926e-06, + "loss": 0.4389861822128296, + "step": 3534 + }, + { + "epoch": 0.8150795480747061, + "grad_norm": 1.5704776908584448, + "learning_rate": 1.3791582649541401e-06, + "loss": 0.47733181715011597, + "step": 3535 + }, + { + "epoch": 0.8153101222042887, + "grad_norm": 1.3661823105841888, + "learning_rate": 1.3788054442374918e-06, + "loss": 0.5007725358009338, + "step": 3536 + }, + { + "epoch": 0.8155406963338714, + "grad_norm": 1.617600694156108, + "learning_rate": 1.378452568457633e-06, + "loss": 0.4857913553714752, + "step": 3537 + }, + { + "epoch": 0.815771270463454, + "grad_norm": 1.4509204702050165, + "learning_rate": 1.3780996376658577e-06, + "loss": 0.5330549478530884, + "step": 3538 + }, + { + "epoch": 0.8160018445930367, + "grad_norm": 1.283827597345967, + "learning_rate": 1.3777466519134684e-06, + "loss": 0.45034217834472656, + "step": 3539 + }, + { + "epoch": 0.8162324187226193, + "grad_norm": 1.313177908039173, + "learning_rate": 1.3773936112517746e-06, + "loss": 0.4442213773727417, + "step": 3540 + }, + { + "epoch": 0.816462992852202, + "grad_norm": 1.479375223581317, + "learning_rate": 1.377040515732095e-06, + "loss": 0.5000369548797607, + "step": 3541 + }, + { + "epoch": 0.8166935669817846, + "grad_norm": 1.3177535399447533, + "learning_rate": 1.3766873654057551e-06, + "loss": 0.5117775797843933, + "step": 3542 + }, + { + "epoch": 0.8169241411113674, + "grad_norm": 1.4163300067502158, + "learning_rate": 1.3763341603240889e-06, + "loss": 0.431648850440979, + "step": 3543 + }, + { + "epoch": 0.81715471524095, + "grad_norm": 1.230235072546183, + "learning_rate": 1.3759809005384387e-06, + "loss": 0.39463019371032715, + "step": 3544 + }, + { + "epoch": 0.8173852893705327, + "grad_norm": 1.4412595458793114, + "learning_rate": 1.375627586100154e-06, + "loss": 0.38739651441574097, + "step": 3545 + }, + { + "epoch": 0.8176158635001153, + "grad_norm": 1.1409525851258608, + "learning_rate": 1.3752742170605927e-06, + "loss": 0.3973360061645508, + "step": 3546 + }, + { + "epoch": 0.817846437629698, + "grad_norm": 1.3276328290635366, + "learning_rate": 1.3749207934711207e-06, + "loss": 0.4791724383831024, + "step": 3547 + }, + { + "epoch": 0.8180770117592806, + "grad_norm": 1.2963607541712077, + "learning_rate": 1.3745673153831114e-06, + "loss": 0.5245905518531799, + "step": 3548 + }, + { + "epoch": 0.8183075858888633, + "grad_norm": 1.4724838776986868, + "learning_rate": 1.3742137828479472e-06, + "loss": 0.5507007241249084, + "step": 3549 + }, + { + "epoch": 0.8185381600184459, + "grad_norm": 1.6416778504866436, + "learning_rate": 1.373860195917017e-06, + "loss": 0.4555748701095581, + "step": 3550 + }, + { + "epoch": 0.8187687341480286, + "grad_norm": 1.2633428656921684, + "learning_rate": 1.3735065546417182e-06, + "loss": 0.39309239387512207, + "step": 3551 + }, + { + "epoch": 0.8189993082776112, + "grad_norm": 1.205265119124541, + "learning_rate": 1.3731528590734564e-06, + "loss": 0.4984157681465149, + "step": 3552 + }, + { + "epoch": 0.819229882407194, + "grad_norm": 1.4373490041823445, + "learning_rate": 1.3727991092636448e-06, + "loss": 0.45853057503700256, + "step": 3553 + }, + { + "epoch": 0.8194604565367766, + "grad_norm": 1.427750473352885, + "learning_rate": 1.3724453052637043e-06, + "loss": 0.47412237524986267, + "step": 3554 + }, + { + "epoch": 0.8196910306663593, + "grad_norm": 1.5140095273509309, + "learning_rate": 1.3720914471250642e-06, + "loss": 0.46433544158935547, + "step": 3555 + }, + { + "epoch": 0.8199216047959419, + "grad_norm": 1.3530305082066354, + "learning_rate": 1.3717375348991612e-06, + "loss": 0.5773437023162842, + "step": 3556 + }, + { + "epoch": 0.8201521789255246, + "grad_norm": 1.519657617219548, + "learning_rate": 1.37138356863744e-06, + "loss": 0.5943500995635986, + "step": 3557 + }, + { + "epoch": 0.8203827530551072, + "grad_norm": 1.1903323655602067, + "learning_rate": 1.3710295483913533e-06, + "loss": 0.4970731735229492, + "step": 3558 + }, + { + "epoch": 0.8206133271846898, + "grad_norm": 1.3936455952745408, + "learning_rate": 1.3706754742123611e-06, + "loss": 0.44726189970970154, + "step": 3559 + }, + { + "epoch": 0.8208439013142725, + "grad_norm": 1.257368755928624, + "learning_rate": 1.3703213461519325e-06, + "loss": 0.3980759382247925, + "step": 3560 + }, + { + "epoch": 0.8210744754438551, + "grad_norm": 1.510740752003684, + "learning_rate": 1.3699671642615434e-06, + "loss": 0.5521829724311829, + "step": 3561 + }, + { + "epoch": 0.8213050495734379, + "grad_norm": 1.4257916187791417, + "learning_rate": 1.3696129285926769e-06, + "loss": 0.42630624771118164, + "step": 3562 + }, + { + "epoch": 0.8215356237030205, + "grad_norm": 1.3813571407602123, + "learning_rate": 1.3692586391968254e-06, + "loss": 0.5060243606567383, + "step": 3563 + }, + { + "epoch": 0.8217661978326032, + "grad_norm": 1.553405319049413, + "learning_rate": 1.3689042961254884e-06, + "loss": 0.5803407430648804, + "step": 3564 + }, + { + "epoch": 0.8219967719621858, + "grad_norm": 1.1610478816524794, + "learning_rate": 1.3685498994301735e-06, + "loss": 0.4510403871536255, + "step": 3565 + }, + { + "epoch": 0.8222273460917685, + "grad_norm": 1.668001711945016, + "learning_rate": 1.3681954491623953e-06, + "loss": 0.5350467562675476, + "step": 3566 + }, + { + "epoch": 0.8224579202213511, + "grad_norm": 1.4589682016059282, + "learning_rate": 1.367840945373677e-06, + "loss": 0.5194679498672485, + "step": 3567 + }, + { + "epoch": 0.8226884943509338, + "grad_norm": 1.5164701950999842, + "learning_rate": 1.3674863881155495e-06, + "loss": 0.43574345111846924, + "step": 3568 + }, + { + "epoch": 0.8229190684805164, + "grad_norm": 1.2235692010100727, + "learning_rate": 1.367131777439551e-06, + "loss": 0.43051451444625854, + "step": 3569 + }, + { + "epoch": 0.8231496426100992, + "grad_norm": 1.4294583851960962, + "learning_rate": 1.3667771133972278e-06, + "loss": 0.44449925422668457, + "step": 3570 + }, + { + "epoch": 0.8233802167396818, + "grad_norm": 1.4281775124274958, + "learning_rate": 1.3664223960401342e-06, + "loss": 0.4466608464717865, + "step": 3571 + }, + { + "epoch": 0.8236107908692645, + "grad_norm": 1.506734312309144, + "learning_rate": 1.3660676254198318e-06, + "loss": 0.6172389984130859, + "step": 3572 + }, + { + "epoch": 0.8238413649988471, + "grad_norm": 1.3071294444794341, + "learning_rate": 1.36571280158789e-06, + "loss": 0.3789742588996887, + "step": 3573 + }, + { + "epoch": 0.8240719391284298, + "grad_norm": 1.2713531694738989, + "learning_rate": 1.365357924595886e-06, + "loss": 0.3871726095676422, + "step": 3574 + }, + { + "epoch": 0.8243025132580124, + "grad_norm": 1.3659394637334186, + "learning_rate": 1.3650029944954047e-06, + "loss": 0.5464534759521484, + "step": 3575 + }, + { + "epoch": 0.8245330873875951, + "grad_norm": 1.4254183485118588, + "learning_rate": 1.3646480113380392e-06, + "loss": 0.4924513101577759, + "step": 3576 + }, + { + "epoch": 0.8247636615171777, + "grad_norm": 1.3350624286567714, + "learning_rate": 1.3642929751753896e-06, + "loss": 0.39648669958114624, + "step": 3577 + }, + { + "epoch": 0.8249942356467604, + "grad_norm": 1.155634552535419, + "learning_rate": 1.3639378860590642e-06, + "loss": 0.44139498472213745, + "step": 3578 + }, + { + "epoch": 0.825224809776343, + "grad_norm": 1.4016430263315434, + "learning_rate": 1.3635827440406784e-06, + "loss": 0.4477856159210205, + "step": 3579 + }, + { + "epoch": 0.8254553839059258, + "grad_norm": 1.2543072909410065, + "learning_rate": 1.363227549171856e-06, + "loss": 0.48722583055496216, + "step": 3580 + }, + { + "epoch": 0.8256859580355084, + "grad_norm": 1.5407337854642607, + "learning_rate": 1.3628723015042285e-06, + "loss": 0.44485795497894287, + "step": 3581 + }, + { + "epoch": 0.8259165321650911, + "grad_norm": 1.481687909768813, + "learning_rate": 1.362517001089434e-06, + "loss": 0.510918140411377, + "step": 3582 + }, + { + "epoch": 0.8261471062946737, + "grad_norm": 1.4714123899535927, + "learning_rate": 1.3621616479791196e-06, + "loss": 0.5157535076141357, + "step": 3583 + }, + { + "epoch": 0.8263776804242564, + "grad_norm": 1.601097277197277, + "learning_rate": 1.361806242224939e-06, + "loss": 0.6120826005935669, + "step": 3584 + }, + { + "epoch": 0.826608254553839, + "grad_norm": 1.379062804125132, + "learning_rate": 1.3614507838785545e-06, + "loss": 0.47521674633026123, + "step": 3585 + }, + { + "epoch": 0.8268388286834217, + "grad_norm": 1.2544051986437676, + "learning_rate": 1.3610952729916352e-06, + "loss": 0.431441068649292, + "step": 3586 + }, + { + "epoch": 0.8270694028130043, + "grad_norm": 1.4333858511847595, + "learning_rate": 1.3607397096158587e-06, + "loss": 0.5168293118476868, + "step": 3587 + }, + { + "epoch": 0.8272999769425871, + "grad_norm": 1.4075386997192105, + "learning_rate": 1.3603840938029092e-06, + "loss": 0.47669821977615356, + "step": 3588 + }, + { + "epoch": 0.8275305510721697, + "grad_norm": 1.6345113020695277, + "learning_rate": 1.3600284256044791e-06, + "loss": 0.5170806050300598, + "step": 3589 + }, + { + "epoch": 0.8277611252017524, + "grad_norm": 1.3443972777893194, + "learning_rate": 1.359672705072269e-06, + "loss": 0.5578932762145996, + "step": 3590 + }, + { + "epoch": 0.827991699331335, + "grad_norm": 1.2931790064355784, + "learning_rate": 1.3593169322579855e-06, + "loss": 0.45000678300857544, + "step": 3591 + }, + { + "epoch": 0.8282222734609177, + "grad_norm": 1.7408157234389992, + "learning_rate": 1.3589611072133448e-06, + "loss": 0.47859635949134827, + "step": 3592 + }, + { + "epoch": 0.8284528475905003, + "grad_norm": 1.629320946493551, + "learning_rate": 1.3586052299900693e-06, + "loss": 0.5373919606208801, + "step": 3593 + }, + { + "epoch": 0.828683421720083, + "grad_norm": 1.4093194136520946, + "learning_rate": 1.3582493006398888e-06, + "loss": 0.5461571216583252, + "step": 3594 + }, + { + "epoch": 0.8289139958496656, + "grad_norm": 1.4221547222488737, + "learning_rate": 1.357893319214542e-06, + "loss": 0.522891640663147, + "step": 3595 + }, + { + "epoch": 0.8291445699792483, + "grad_norm": 1.3931497044748549, + "learning_rate": 1.3575372857657739e-06, + "loss": 0.503441572189331, + "step": 3596 + }, + { + "epoch": 0.829375144108831, + "grad_norm": 1.4755218467347275, + "learning_rate": 1.357181200345338e-06, + "loss": 0.45475268363952637, + "step": 3597 + }, + { + "epoch": 0.8296057182384137, + "grad_norm": 1.3529340787561033, + "learning_rate": 1.3568250630049944e-06, + "loss": 0.4626728296279907, + "step": 3598 + }, + { + "epoch": 0.8298362923679963, + "grad_norm": 1.5106243497530205, + "learning_rate": 1.3564688737965118e-06, + "loss": 0.590618371963501, + "step": 3599 + }, + { + "epoch": 0.830066866497579, + "grad_norm": 1.1729232075760356, + "learning_rate": 1.3561126327716658e-06, + "loss": 0.4252029061317444, + "step": 3600 + }, + { + "epoch": 0.8302974406271616, + "grad_norm": 1.5093126003070163, + "learning_rate": 1.3557563399822396e-06, + "loss": 0.5741503238677979, + "step": 3601 + }, + { + "epoch": 0.8305280147567443, + "grad_norm": 1.346541706093541, + "learning_rate": 1.3553999954800236e-06, + "loss": 0.4591038227081299, + "step": 3602 + }, + { + "epoch": 0.8307585888863269, + "grad_norm": 1.5342817778823432, + "learning_rate": 1.3550435993168164e-06, + "loss": 0.5761657953262329, + "step": 3603 + }, + { + "epoch": 0.8309891630159096, + "grad_norm": 1.4873747737215213, + "learning_rate": 1.3546871515444239e-06, + "loss": 0.4835323691368103, + "step": 3604 + }, + { + "epoch": 0.8312197371454922, + "grad_norm": 1.3474153162620106, + "learning_rate": 1.3543306522146594e-06, + "loss": 0.6152533292770386, + "step": 3605 + }, + { + "epoch": 0.831450311275075, + "grad_norm": 1.7615931586989606, + "learning_rate": 1.3539741013793431e-06, + "loss": 0.48106616735458374, + "step": 3606 + }, + { + "epoch": 0.8316808854046576, + "grad_norm": 1.3977429311647935, + "learning_rate": 1.3536174990903042e-06, + "loss": 0.48128771781921387, + "step": 3607 + }, + { + "epoch": 0.8319114595342403, + "grad_norm": 1.5624866131401935, + "learning_rate": 1.353260845399378e-06, + "loss": 0.4395609498023987, + "step": 3608 + }, + { + "epoch": 0.8321420336638229, + "grad_norm": 1.6243424583265862, + "learning_rate": 1.3529041403584076e-06, + "loss": 0.5298231840133667, + "step": 3609 + }, + { + "epoch": 0.8323726077934056, + "grad_norm": 1.610376085646533, + "learning_rate": 1.3525473840192436e-06, + "loss": 0.4694434404373169, + "step": 3610 + }, + { + "epoch": 0.8326031819229882, + "grad_norm": 1.3870293085196028, + "learning_rate": 1.3521905764337449e-06, + "loss": 0.4264890253543854, + "step": 3611 + }, + { + "epoch": 0.8328337560525709, + "grad_norm": 1.3900907609641087, + "learning_rate": 1.3518337176537762e-06, + "loss": 0.3266828656196594, + "step": 3612 + }, + { + "epoch": 0.8330643301821535, + "grad_norm": 1.548598004244933, + "learning_rate": 1.351476807731211e-06, + "loss": 0.5554935336112976, + "step": 3613 + }, + { + "epoch": 0.8332949043117363, + "grad_norm": 1.3139574983210685, + "learning_rate": 1.3511198467179295e-06, + "loss": 0.4375999867916107, + "step": 3614 + }, + { + "epoch": 0.8335254784413189, + "grad_norm": 1.3568296792682797, + "learning_rate": 1.35076283466582e-06, + "loss": 0.564457893371582, + "step": 3615 + }, + { + "epoch": 0.8337560525709016, + "grad_norm": 1.5648573569840147, + "learning_rate": 1.3504057716267776e-06, + "loss": 0.5141148567199707, + "step": 3616 + }, + { + "epoch": 0.8339866267004842, + "grad_norm": 1.2607282701974722, + "learning_rate": 1.350048657652705e-06, + "loss": 0.45514535903930664, + "step": 3617 + }, + { + "epoch": 0.8342172008300669, + "grad_norm": 1.298858308641179, + "learning_rate": 1.3496914927955122e-06, + "loss": 0.5224772691726685, + "step": 3618 + }, + { + "epoch": 0.8344477749596495, + "grad_norm": 1.3773935543957632, + "learning_rate": 1.349334277107117e-06, + "loss": 0.45185205340385437, + "step": 3619 + }, + { + "epoch": 0.8346783490892322, + "grad_norm": 1.3400411570126707, + "learning_rate": 1.3489770106394444e-06, + "loss": 0.47232794761657715, + "step": 3620 + }, + { + "epoch": 0.8349089232188148, + "grad_norm": 1.3564585933268873, + "learning_rate": 1.3486196934444264e-06, + "loss": 0.44031190872192383, + "step": 3621 + }, + { + "epoch": 0.8351394973483975, + "grad_norm": 1.2921832515242213, + "learning_rate": 1.3482623255740028e-06, + "loss": 0.4594510793685913, + "step": 3622 + }, + { + "epoch": 0.8353700714779801, + "grad_norm": 1.3491628541071723, + "learning_rate": 1.347904907080121e-06, + "loss": 0.38726723194122314, + "step": 3623 + }, + { + "epoch": 0.8356006456075629, + "grad_norm": 1.4086239991990677, + "learning_rate": 1.3475474380147347e-06, + "loss": 0.544617772102356, + "step": 3624 + }, + { + "epoch": 0.8358312197371455, + "grad_norm": 1.5645995914963535, + "learning_rate": 1.347189918429806e-06, + "loss": 0.503423810005188, + "step": 3625 + }, + { + "epoch": 0.8360617938667282, + "grad_norm": 1.3950432339665733, + "learning_rate": 1.3468323483773038e-06, + "loss": 0.4395143985748291, + "step": 3626 + }, + { + "epoch": 0.8362923679963108, + "grad_norm": 1.6308000434387062, + "learning_rate": 1.346474727909205e-06, + "loss": 0.41464856266975403, + "step": 3627 + }, + { + "epoch": 0.8365229421258935, + "grad_norm": 1.4008674771220466, + "learning_rate": 1.346117057077493e-06, + "loss": 0.4782845079898834, + "step": 3628 + }, + { + "epoch": 0.8367535162554761, + "grad_norm": 1.2484540580184977, + "learning_rate": 1.345759335934159e-06, + "loss": 0.48308104276657104, + "step": 3629 + }, + { + "epoch": 0.8369840903850588, + "grad_norm": 1.3935764281095124, + "learning_rate": 1.345401564531201e-06, + "loss": 0.5759967565536499, + "step": 3630 + }, + { + "epoch": 0.8372146645146414, + "grad_norm": 1.421077506310717, + "learning_rate": 1.3450437429206256e-06, + "loss": 0.5900512337684631, + "step": 3631 + }, + { + "epoch": 0.8374452386442242, + "grad_norm": 1.3643346247687353, + "learning_rate": 1.3446858711544451e-06, + "loss": 0.4776286482810974, + "step": 3632 + }, + { + "epoch": 0.8376758127738068, + "grad_norm": 1.5796891796446009, + "learning_rate": 1.34432794928468e-06, + "loss": 0.5123563408851624, + "step": 3633 + }, + { + "epoch": 0.8379063869033895, + "grad_norm": 1.6272139775850447, + "learning_rate": 1.3439699773633574e-06, + "loss": 0.5505821108818054, + "step": 3634 + }, + { + "epoch": 0.8381369610329721, + "grad_norm": 1.4456391396483874, + "learning_rate": 1.343611955442513e-06, + "loss": 0.5525364875793457, + "step": 3635 + }, + { + "epoch": 0.8383675351625548, + "grad_norm": 1.1644228181066894, + "learning_rate": 1.3432538835741884e-06, + "loss": 0.44074952602386475, + "step": 3636 + }, + { + "epoch": 0.8385981092921374, + "grad_norm": 1.3792820862390651, + "learning_rate": 1.3428957618104331e-06, + "loss": 0.5488649606704712, + "step": 3637 + }, + { + "epoch": 0.8388286834217201, + "grad_norm": 1.159150884236996, + "learning_rate": 1.3425375902033034e-06, + "loss": 0.4427725672721863, + "step": 3638 + }, + { + "epoch": 0.8390592575513027, + "grad_norm": 1.5753495335559473, + "learning_rate": 1.3421793688048636e-06, + "loss": 0.5244250297546387, + "step": 3639 + }, + { + "epoch": 0.8392898316808854, + "grad_norm": 1.2853956216426152, + "learning_rate": 1.3418210976671845e-06, + "loss": 0.4684640169143677, + "step": 3640 + }, + { + "epoch": 0.839520405810468, + "grad_norm": 1.4767228704961965, + "learning_rate": 1.3414627768423449e-06, + "loss": 0.4518035054206848, + "step": 3641 + }, + { + "epoch": 0.8397509799400508, + "grad_norm": 1.5338085000094812, + "learning_rate": 1.34110440638243e-06, + "loss": 0.47504323720932007, + "step": 3642 + }, + { + "epoch": 0.8399815540696334, + "grad_norm": 1.7182899921711987, + "learning_rate": 1.3407459863395326e-06, + "loss": 0.3835057020187378, + "step": 3643 + }, + { + "epoch": 0.8402121281992161, + "grad_norm": 1.4517538314936977, + "learning_rate": 1.3403875167657529e-06, + "loss": 0.4103546738624573, + "step": 3644 + }, + { + "epoch": 0.8404427023287987, + "grad_norm": 1.3338056576205999, + "learning_rate": 1.3400289977131974e-06, + "loss": 0.48064136505126953, + "step": 3645 + }, + { + "epoch": 0.8406732764583814, + "grad_norm": 1.5606949897639386, + "learning_rate": 1.3396704292339813e-06, + "loss": 0.49655234813690186, + "step": 3646 + }, + { + "epoch": 0.840903850587964, + "grad_norm": 1.3180737586627664, + "learning_rate": 1.3393118113802259e-06, + "loss": 0.5559303760528564, + "step": 3647 + }, + { + "epoch": 0.8411344247175467, + "grad_norm": 1.3902505896601203, + "learning_rate": 1.3389531442040599e-06, + "loss": 0.5173505544662476, + "step": 3648 + }, + { + "epoch": 0.8413649988471293, + "grad_norm": 1.4997400095057662, + "learning_rate": 1.338594427757619e-06, + "loss": 0.500524640083313, + "step": 3649 + }, + { + "epoch": 0.8415955729767121, + "grad_norm": 1.3017945585861477, + "learning_rate": 1.3382356620930467e-06, + "loss": 0.5167285203933716, + "step": 3650 + }, + { + "epoch": 0.8418261471062947, + "grad_norm": 1.4661199659605932, + "learning_rate": 1.3378768472624929e-06, + "loss": 0.5006825923919678, + "step": 3651 + }, + { + "epoch": 0.8420567212358774, + "grad_norm": 1.5253217794534257, + "learning_rate": 1.3375179833181153e-06, + "loss": 0.5421864986419678, + "step": 3652 + }, + { + "epoch": 0.84228729536546, + "grad_norm": 1.5304567180850979, + "learning_rate": 1.337159070312078e-06, + "loss": 0.4964475929737091, + "step": 3653 + }, + { + "epoch": 0.8425178694950427, + "grad_norm": 1.2795061721511742, + "learning_rate": 1.3368001082965528e-06, + "loss": 0.4020928144454956, + "step": 3654 + }, + { + "epoch": 0.8427484436246253, + "grad_norm": 1.3457912405228358, + "learning_rate": 1.3364410973237183e-06, + "loss": 0.43009278178215027, + "step": 3655 + }, + { + "epoch": 0.842979017754208, + "grad_norm": 1.3663101783603413, + "learning_rate": 1.3360820374457608e-06, + "loss": 0.5939761400222778, + "step": 3656 + }, + { + "epoch": 0.8432095918837906, + "grad_norm": 1.3723718945789372, + "learning_rate": 1.335722928714873e-06, + "loss": 0.43889346718788147, + "step": 3657 + }, + { + "epoch": 0.8434401660133733, + "grad_norm": 1.510811137049935, + "learning_rate": 1.335363771183255e-06, + "loss": 0.5125945806503296, + "step": 3658 + }, + { + "epoch": 0.843670740142956, + "grad_norm": 1.2988273180041983, + "learning_rate": 1.3350045649031143e-06, + "loss": 0.516818642616272, + "step": 3659 + }, + { + "epoch": 0.8439013142725387, + "grad_norm": 1.2172726171902464, + "learning_rate": 1.3346453099266649e-06, + "loss": 0.5098299980163574, + "step": 3660 + }, + { + "epoch": 0.8441318884021213, + "grad_norm": 1.4809835823543989, + "learning_rate": 1.334286006306128e-06, + "loss": 0.46228134632110596, + "step": 3661 + }, + { + "epoch": 0.844362462531704, + "grad_norm": 1.518730905252404, + "learning_rate": 1.3339266540937324e-06, + "loss": 0.38364481925964355, + "step": 3662 + }, + { + "epoch": 0.8445930366612866, + "grad_norm": 1.2447229933483466, + "learning_rate": 1.3335672533417134e-06, + "loss": 0.4363073706626892, + "step": 3663 + }, + { + "epoch": 0.8448236107908693, + "grad_norm": 1.5445839123019949, + "learning_rate": 1.3332078041023133e-06, + "loss": 0.463603675365448, + "step": 3664 + }, + { + "epoch": 0.8450541849204519, + "grad_norm": 1.118250112497339, + "learning_rate": 1.3328483064277816e-06, + "loss": 0.4173084795475006, + "step": 3665 + }, + { + "epoch": 0.8452847590500346, + "grad_norm": 1.2905398126594152, + "learning_rate": 1.3324887603703756e-06, + "loss": 0.41451913118362427, + "step": 3666 + }, + { + "epoch": 0.8455153331796172, + "grad_norm": 1.3301474043831027, + "learning_rate": 1.3321291659823587e-06, + "loss": 0.49418264627456665, + "step": 3667 + }, + { + "epoch": 0.8457459073092, + "grad_norm": 1.323747824550861, + "learning_rate": 1.3317695233160015e-06, + "loss": 0.48787444829940796, + "step": 3668 + }, + { + "epoch": 0.8459764814387826, + "grad_norm": 1.419516654753041, + "learning_rate": 1.3314098324235814e-06, + "loss": 0.484865665435791, + "step": 3669 + }, + { + "epoch": 0.8462070555683652, + "grad_norm": 1.4996660725713626, + "learning_rate": 1.3310500933573837e-06, + "loss": 0.44162076711654663, + "step": 3670 + }, + { + "epoch": 0.8464376296979479, + "grad_norm": 1.4496595059902684, + "learning_rate": 1.3306903061696999e-06, + "loss": 0.39880990982055664, + "step": 3671 + }, + { + "epoch": 0.8466682038275305, + "grad_norm": 1.596735486600776, + "learning_rate": 1.3303304709128288e-06, + "loss": 0.4405972957611084, + "step": 3672 + }, + { + "epoch": 0.8468987779571132, + "grad_norm": 1.8476371944591239, + "learning_rate": 1.3299705876390755e-06, + "loss": 0.4228917956352234, + "step": 3673 + }, + { + "epoch": 0.8471293520866958, + "grad_norm": 1.3245854918753257, + "learning_rate": 1.3296106564007532e-06, + "loss": 0.44533059000968933, + "step": 3674 + }, + { + "epoch": 0.8473599262162785, + "grad_norm": 1.324480419314636, + "learning_rate": 1.3292506772501816e-06, + "loss": 0.4672505855560303, + "step": 3675 + }, + { + "epoch": 0.8475905003458611, + "grad_norm": 1.5345690520656405, + "learning_rate": 1.3288906502396873e-06, + "loss": 0.5651025772094727, + "step": 3676 + }, + { + "epoch": 0.8478210744754439, + "grad_norm": 1.4113200785742674, + "learning_rate": 1.3285305754216034e-06, + "loss": 0.4877372086048126, + "step": 3677 + }, + { + "epoch": 0.8480516486050265, + "grad_norm": 1.6156626909271148, + "learning_rate": 1.3281704528482713e-06, + "loss": 0.43767499923706055, + "step": 3678 + }, + { + "epoch": 0.8482822227346092, + "grad_norm": 1.6309175000442955, + "learning_rate": 1.3278102825720376e-06, + "loss": 0.5077182650566101, + "step": 3679 + }, + { + "epoch": 0.8485127968641918, + "grad_norm": 1.5150502093819094, + "learning_rate": 1.3274500646452573e-06, + "loss": 0.4814456105232239, + "step": 3680 + }, + { + "epoch": 0.8487433709937745, + "grad_norm": 1.3626740483959299, + "learning_rate": 1.3270897991202913e-06, + "loss": 0.4454193115234375, + "step": 3681 + }, + { + "epoch": 0.8489739451233571, + "grad_norm": 1.1173863119708762, + "learning_rate": 1.3267294860495084e-06, + "loss": 0.3973482549190521, + "step": 3682 + }, + { + "epoch": 0.8492045192529398, + "grad_norm": 1.5337644837004238, + "learning_rate": 1.3263691254852834e-06, + "loss": 0.5115909576416016, + "step": 3683 + }, + { + "epoch": 0.8494350933825224, + "grad_norm": 1.2962888350788886, + "learning_rate": 1.3260087174799982e-06, + "loss": 0.4217768907546997, + "step": 3684 + }, + { + "epoch": 0.8496656675121051, + "grad_norm": 1.5676465439666392, + "learning_rate": 1.3256482620860414e-06, + "loss": 0.4462714195251465, + "step": 3685 + }, + { + "epoch": 0.8498962416416878, + "grad_norm": 1.278085511550712, + "learning_rate": 1.32528775935581e-06, + "loss": 0.4617312550544739, + "step": 3686 + }, + { + "epoch": 0.8501268157712705, + "grad_norm": 1.2760475898780375, + "learning_rate": 1.324927209341706e-06, + "loss": 0.4774616062641144, + "step": 3687 + }, + { + "epoch": 0.8503573899008531, + "grad_norm": 1.389927333157612, + "learning_rate": 1.3245666120961389e-06, + "loss": 0.38730189204216003, + "step": 3688 + }, + { + "epoch": 0.8505879640304358, + "grad_norm": 1.5164687032364252, + "learning_rate": 1.324205967671525e-06, + "loss": 0.45189517736434937, + "step": 3689 + }, + { + "epoch": 0.8508185381600184, + "grad_norm": 1.489462413187487, + "learning_rate": 1.3238452761202887e-06, + "loss": 0.4965584874153137, + "step": 3690 + }, + { + "epoch": 0.8510491122896011, + "grad_norm": 1.2283217886481297, + "learning_rate": 1.3234845374948591e-06, + "loss": 0.4409075975418091, + "step": 3691 + }, + { + "epoch": 0.8512796864191837, + "grad_norm": 1.3545920303070538, + "learning_rate": 1.3231237518476737e-06, + "loss": 0.4457218647003174, + "step": 3692 + }, + { + "epoch": 0.8515102605487664, + "grad_norm": 1.2432481704868787, + "learning_rate": 1.3227629192311762e-06, + "loss": 0.42810603976249695, + "step": 3693 + }, + { + "epoch": 0.851740834678349, + "grad_norm": 1.3504737245283156, + "learning_rate": 1.3224020396978172e-06, + "loss": 0.40753173828125, + "step": 3694 + }, + { + "epoch": 0.8519714088079318, + "grad_norm": 1.5063309076640758, + "learning_rate": 1.3220411133000542e-06, + "loss": 0.5057830810546875, + "step": 3695 + }, + { + "epoch": 0.8522019829375144, + "grad_norm": 1.4625648008354504, + "learning_rate": 1.3216801400903515e-06, + "loss": 0.42498981952667236, + "step": 3696 + }, + { + "epoch": 0.8524325570670971, + "grad_norm": 1.736302707969947, + "learning_rate": 1.3213191201211806e-06, + "loss": 0.44985881447792053, + "step": 3697 + }, + { + "epoch": 0.8526631311966797, + "grad_norm": 1.5257289791960187, + "learning_rate": 1.3209580534450192e-06, + "loss": 0.39984816312789917, + "step": 3698 + }, + { + "epoch": 0.8528937053262624, + "grad_norm": 1.4859934204912078, + "learning_rate": 1.3205969401143516e-06, + "loss": 0.4773896038532257, + "step": 3699 + }, + { + "epoch": 0.853124279455845, + "grad_norm": 1.5299580963987478, + "learning_rate": 1.3202357801816698e-06, + "loss": 0.5699855089187622, + "step": 3700 + }, + { + "epoch": 0.8533548535854277, + "grad_norm": 1.5124437197630332, + "learning_rate": 1.3198745736994714e-06, + "loss": 0.4486675262451172, + "step": 3701 + }, + { + "epoch": 0.8535854277150103, + "grad_norm": 1.3641053506348044, + "learning_rate": 1.3195133207202625e-06, + "loss": 0.47909995913505554, + "step": 3702 + }, + { + "epoch": 0.853816001844593, + "grad_norm": 1.3267279385735278, + "learning_rate": 1.3191520212965542e-06, + "loss": 0.4356222450733185, + "step": 3703 + }, + { + "epoch": 0.8540465759741757, + "grad_norm": 1.5161594053893233, + "learning_rate": 1.3187906754808646e-06, + "loss": 0.4734821319580078, + "step": 3704 + }, + { + "epoch": 0.8542771501037584, + "grad_norm": 1.1414361983546972, + "learning_rate": 1.3184292833257197e-06, + "loss": 0.4164031744003296, + "step": 3705 + }, + { + "epoch": 0.854507724233341, + "grad_norm": 1.5194682024268111, + "learning_rate": 1.3180678448836516e-06, + "loss": 0.505548357963562, + "step": 3706 + }, + { + "epoch": 0.8547382983629237, + "grad_norm": 1.4180879233512311, + "learning_rate": 1.3177063602071985e-06, + "loss": 0.4443202316761017, + "step": 3707 + }, + { + "epoch": 0.8549688724925063, + "grad_norm": 1.4808642334806548, + "learning_rate": 1.317344829348906e-06, + "loss": 0.4594070017337799, + "step": 3708 + }, + { + "epoch": 0.855199446622089, + "grad_norm": 1.595149298191138, + "learning_rate": 1.3169832523613265e-06, + "loss": 0.5346768498420715, + "step": 3709 + }, + { + "epoch": 0.8554300207516716, + "grad_norm": 1.4211934536480004, + "learning_rate": 1.3166216292970185e-06, + "loss": 0.44471168518066406, + "step": 3710 + }, + { + "epoch": 0.8556605948812543, + "grad_norm": 1.3967510109946715, + "learning_rate": 1.3162599602085482e-06, + "loss": 0.4414154589176178, + "step": 3711 + }, + { + "epoch": 0.855891169010837, + "grad_norm": 1.2591243363727789, + "learning_rate": 1.3158982451484873e-06, + "loss": 0.4267842769622803, + "step": 3712 + }, + { + "epoch": 0.8561217431404197, + "grad_norm": 1.5517519524370356, + "learning_rate": 1.315536484169415e-06, + "loss": 0.5282812118530273, + "step": 3713 + }, + { + "epoch": 0.8563523172700023, + "grad_norm": 1.3747848129200213, + "learning_rate": 1.3151746773239167e-06, + "loss": 0.3831692934036255, + "step": 3714 + }, + { + "epoch": 0.856582891399585, + "grad_norm": 1.3399055617764033, + "learning_rate": 1.3148128246645848e-06, + "loss": 0.4714779853820801, + "step": 3715 + }, + { + "epoch": 0.8568134655291676, + "grad_norm": 1.5957966977407376, + "learning_rate": 1.3144509262440185e-06, + "loss": 0.515029788017273, + "step": 3716 + }, + { + "epoch": 0.8570440396587503, + "grad_norm": 1.6565005005078866, + "learning_rate": 1.314088982114823e-06, + "loss": 0.48407065868377686, + "step": 3717 + }, + { + "epoch": 0.8572746137883329, + "grad_norm": 1.2250893853794216, + "learning_rate": 1.3137269923296111e-06, + "loss": 0.4756847620010376, + "step": 3718 + }, + { + "epoch": 0.8575051879179156, + "grad_norm": 1.4417516161095163, + "learning_rate": 1.313364956941001e-06, + "loss": 0.47744277119636536, + "step": 3719 + }, + { + "epoch": 0.8577357620474982, + "grad_norm": 1.4540506451139732, + "learning_rate": 1.3130028760016187e-06, + "loss": 0.4967440366744995, + "step": 3720 + }, + { + "epoch": 0.857966336177081, + "grad_norm": 1.5755023694033539, + "learning_rate": 1.312640749564096e-06, + "loss": 0.44999921321868896, + "step": 3721 + }, + { + "epoch": 0.8581969103066636, + "grad_norm": 1.1829331105101752, + "learning_rate": 1.3122785776810723e-06, + "loss": 0.4454652667045593, + "step": 3722 + }, + { + "epoch": 0.8584274844362463, + "grad_norm": 1.220523426514953, + "learning_rate": 1.3119163604051923e-06, + "loss": 0.37483078241348267, + "step": 3723 + }, + { + "epoch": 0.8586580585658289, + "grad_norm": 1.45963624909142, + "learning_rate": 1.3115540977891076e-06, + "loss": 0.3732140064239502, + "step": 3724 + }, + { + "epoch": 0.8588886326954116, + "grad_norm": 1.5667872254799649, + "learning_rate": 1.3111917898854779e-06, + "loss": 0.5709421634674072, + "step": 3725 + }, + { + "epoch": 0.8591192068249942, + "grad_norm": 2.0482790256244514, + "learning_rate": 1.3108294367469677e-06, + "loss": 0.5301297307014465, + "step": 3726 + }, + { + "epoch": 0.8593497809545769, + "grad_norm": 1.2253994153188903, + "learning_rate": 1.3104670384262484e-06, + "loss": 0.45979735255241394, + "step": 3727 + }, + { + "epoch": 0.8595803550841595, + "grad_norm": 1.5172885339612137, + "learning_rate": 1.3101045949759985e-06, + "loss": 0.5051921606063843, + "step": 3728 + }, + { + "epoch": 0.8598109292137422, + "grad_norm": 1.5432212262669465, + "learning_rate": 1.309742106448903e-06, + "loss": 0.5057204365730286, + "step": 3729 + }, + { + "epoch": 0.8600415033433249, + "grad_norm": 1.3029916397805466, + "learning_rate": 1.3093795728976535e-06, + "loss": 0.4265059530735016, + "step": 3730 + }, + { + "epoch": 0.8602720774729076, + "grad_norm": 1.2392416355330595, + "learning_rate": 1.3090169943749473e-06, + "loss": 0.39166492223739624, + "step": 3731 + }, + { + "epoch": 0.8605026516024902, + "grad_norm": 1.4335892651385718, + "learning_rate": 1.308654370933489e-06, + "loss": 0.4321832060813904, + "step": 3732 + }, + { + "epoch": 0.8607332257320729, + "grad_norm": 1.4026009292758175, + "learning_rate": 1.3082917026259906e-06, + "loss": 0.5028939247131348, + "step": 3733 + }, + { + "epoch": 0.8609637998616555, + "grad_norm": 1.461263824354524, + "learning_rate": 1.3079289895051681e-06, + "loss": 0.4642373323440552, + "step": 3734 + }, + { + "epoch": 0.8611943739912382, + "grad_norm": 1.2616373488525174, + "learning_rate": 1.3075662316237464e-06, + "loss": 0.416348397731781, + "step": 3735 + }, + { + "epoch": 0.8614249481208208, + "grad_norm": 1.9156143459520234, + "learning_rate": 1.3072034290344556e-06, + "loss": 0.48442524671554565, + "step": 3736 + }, + { + "epoch": 0.8616555222504035, + "grad_norm": 1.4675369296005183, + "learning_rate": 1.3068405817900332e-06, + "loss": 0.46903935074806213, + "step": 3737 + }, + { + "epoch": 0.8618860963799861, + "grad_norm": 1.433982633948309, + "learning_rate": 1.3064776899432224e-06, + "loss": 0.48172008991241455, + "step": 3738 + }, + { + "epoch": 0.8621166705095689, + "grad_norm": 1.4697783322173945, + "learning_rate": 1.3061147535467734e-06, + "loss": 0.44460922479629517, + "step": 3739 + }, + { + "epoch": 0.8623472446391515, + "grad_norm": 1.4552688390934359, + "learning_rate": 1.3057517726534423e-06, + "loss": 0.4728608727455139, + "step": 3740 + }, + { + "epoch": 0.8625778187687342, + "grad_norm": 1.2981084774118934, + "learning_rate": 1.3053887473159928e-06, + "loss": 0.36457544565200806, + "step": 3741 + }, + { + "epoch": 0.8628083928983168, + "grad_norm": 1.3219603285138386, + "learning_rate": 1.3050256775871936e-06, + "loss": 0.3753359317779541, + "step": 3742 + }, + { + "epoch": 0.8630389670278995, + "grad_norm": 1.71764180047156, + "learning_rate": 1.304662563519821e-06, + "loss": 0.38679057359695435, + "step": 3743 + }, + { + "epoch": 0.8632695411574821, + "grad_norm": 1.2517686459377946, + "learning_rate": 1.304299405166657e-06, + "loss": 0.5008635520935059, + "step": 3744 + }, + { + "epoch": 0.8635001152870648, + "grad_norm": 1.6524585351681906, + "learning_rate": 1.3039362025804903e-06, + "loss": 0.3723052740097046, + "step": 3745 + }, + { + "epoch": 0.8637306894166474, + "grad_norm": 1.4101013037777343, + "learning_rate": 1.3035729558141166e-06, + "loss": 0.4227592945098877, + "step": 3746 + }, + { + "epoch": 0.8639612635462302, + "grad_norm": 1.2385954175555658, + "learning_rate": 1.3032096649203369e-06, + "loss": 0.44072139263153076, + "step": 3747 + }, + { + "epoch": 0.8641918376758128, + "grad_norm": 1.330285491132409, + "learning_rate": 1.3028463299519594e-06, + "loss": 0.49321871995925903, + "step": 3748 + }, + { + "epoch": 0.8644224118053955, + "grad_norm": 1.1777120494442346, + "learning_rate": 1.3024829509617987e-06, + "loss": 0.3751382827758789, + "step": 3749 + }, + { + "epoch": 0.8646529859349781, + "grad_norm": 1.2092220891938048, + "learning_rate": 1.3021195280026755e-06, + "loss": 0.43967729806900024, + "step": 3750 + }, + { + "epoch": 0.8648835600645608, + "grad_norm": 1.2227774970491123, + "learning_rate": 1.3017560611274172e-06, + "loss": 0.4102880358695984, + "step": 3751 + }, + { + "epoch": 0.8651141341941434, + "grad_norm": 1.4524327131347594, + "learning_rate": 1.301392550388857e-06, + "loss": 0.5225233435630798, + "step": 3752 + }, + { + "epoch": 0.8653447083237261, + "grad_norm": 1.7121734467218848, + "learning_rate": 1.3010289958398352e-06, + "loss": 0.6021677255630493, + "step": 3753 + }, + { + "epoch": 0.8655752824533087, + "grad_norm": 1.294116122042798, + "learning_rate": 1.300665397533198e-06, + "loss": 0.5031560063362122, + "step": 3754 + }, + { + "epoch": 0.8658058565828914, + "grad_norm": 1.2573123861588813, + "learning_rate": 1.300301755521798e-06, + "loss": 0.5406110286712646, + "step": 3755 + }, + { + "epoch": 0.866036430712474, + "grad_norm": 1.3123644187859618, + "learning_rate": 1.2999380698584945e-06, + "loss": 0.5359587669372559, + "step": 3756 + }, + { + "epoch": 0.8662670048420568, + "grad_norm": 1.4006997771166723, + "learning_rate": 1.2995743405961525e-06, + "loss": 0.46089720726013184, + "step": 3757 + }, + { + "epoch": 0.8664975789716394, + "grad_norm": 1.3064464980724229, + "learning_rate": 1.2992105677876444e-06, + "loss": 0.4611746668815613, + "step": 3758 + }, + { + "epoch": 0.8667281531012221, + "grad_norm": 1.3860871410802968, + "learning_rate": 1.2988467514858478e-06, + "loss": 0.47040778398513794, + "step": 3759 + }, + { + "epoch": 0.8669587272308047, + "grad_norm": 1.4624604845389892, + "learning_rate": 1.2984828917436469e-06, + "loss": 0.5118452310562134, + "step": 3760 + }, + { + "epoch": 0.8671893013603874, + "grad_norm": 1.3248325273306294, + "learning_rate": 1.2981189886139326e-06, + "loss": 0.42349302768707275, + "step": 3761 + }, + { + "epoch": 0.86741987548997, + "grad_norm": 1.4983666129317725, + "learning_rate": 1.2977550421496022e-06, + "loss": 0.4888027310371399, + "step": 3762 + }, + { + "epoch": 0.8676504496195527, + "grad_norm": 1.5557430857836938, + "learning_rate": 1.2973910524035587e-06, + "loss": 0.5637897849082947, + "step": 3763 + }, + { + "epoch": 0.8678810237491353, + "grad_norm": 1.2906063231523421, + "learning_rate": 1.2970270194287119e-06, + "loss": 0.4159572124481201, + "step": 3764 + }, + { + "epoch": 0.868111597878718, + "grad_norm": 1.613449710248156, + "learning_rate": 1.2966629432779775e-06, + "loss": 0.4558612108230591, + "step": 3765 + }, + { + "epoch": 0.8683421720083007, + "grad_norm": 1.229959300374187, + "learning_rate": 1.2962988240042775e-06, + "loss": 0.4235115647315979, + "step": 3766 + }, + { + "epoch": 0.8685727461378834, + "grad_norm": 1.5042750051225975, + "learning_rate": 1.2959346616605404e-06, + "loss": 0.5096476078033447, + "step": 3767 + }, + { + "epoch": 0.868803320267466, + "grad_norm": 1.3849812365321899, + "learning_rate": 1.2955704562997013e-06, + "loss": 0.47097906470298767, + "step": 3768 + }, + { + "epoch": 0.8690338943970487, + "grad_norm": 1.2057643302548011, + "learning_rate": 1.2952062079747008e-06, + "loss": 0.4508157968521118, + "step": 3769 + }, + { + "epoch": 0.8692644685266313, + "grad_norm": 1.3904260388472953, + "learning_rate": 1.2948419167384864e-06, + "loss": 0.43800675868988037, + "step": 3770 + }, + { + "epoch": 0.869495042656214, + "grad_norm": 1.3552023829739699, + "learning_rate": 1.2944775826440108e-06, + "loss": 0.5512480735778809, + "step": 3771 + }, + { + "epoch": 0.8697256167857966, + "grad_norm": 1.4428129453899297, + "learning_rate": 1.2941132057442342e-06, + "loss": 0.4654430150985718, + "step": 3772 + }, + { + "epoch": 0.8699561909153793, + "grad_norm": 1.3297596373891312, + "learning_rate": 1.293748786092123e-06, + "loss": 0.5429458618164062, + "step": 3773 + }, + { + "epoch": 0.870186765044962, + "grad_norm": 1.7953090529311853, + "learning_rate": 1.2933843237406481e-06, + "loss": 0.415671169757843, + "step": 3774 + }, + { + "epoch": 0.8704173391745447, + "grad_norm": 1.3784118855195835, + "learning_rate": 1.2930198187427884e-06, + "loss": 0.4347325563430786, + "step": 3775 + }, + { + "epoch": 0.8706479133041273, + "grad_norm": 1.3858530201589612, + "learning_rate": 1.2926552711515287e-06, + "loss": 0.41997528076171875, + "step": 3776 + }, + { + "epoch": 0.87087848743371, + "grad_norm": 1.4475652450278216, + "learning_rate": 1.292290681019859e-06, + "loss": 0.45956090092658997, + "step": 3777 + }, + { + "epoch": 0.8711090615632926, + "grad_norm": 1.3318373392521217, + "learning_rate": 1.2919260484007767e-06, + "loss": 0.4615165889263153, + "step": 3778 + }, + { + "epoch": 0.8713396356928753, + "grad_norm": 1.5526291007190895, + "learning_rate": 1.2915613733472848e-06, + "loss": 0.3919866681098938, + "step": 3779 + }, + { + "epoch": 0.8715702098224579, + "grad_norm": 1.5182901628405527, + "learning_rate": 1.2911966559123922e-06, + "loss": 0.5324772000312805, + "step": 3780 + }, + { + "epoch": 0.8718007839520405, + "grad_norm": 1.4899431097732017, + "learning_rate": 1.2908318961491147e-06, + "loss": 0.4813354015350342, + "step": 3781 + }, + { + "epoch": 0.8720313580816232, + "grad_norm": 1.6904916219237236, + "learning_rate": 1.2904670941104735e-06, + "loss": 0.5617851614952087, + "step": 3782 + }, + { + "epoch": 0.8722619322112058, + "grad_norm": 1.5869523154671146, + "learning_rate": 1.2901022498494963e-06, + "loss": 0.5369905233383179, + "step": 3783 + }, + { + "epoch": 0.8724925063407886, + "grad_norm": 1.4103839502113327, + "learning_rate": 1.289737363419217e-06, + "loss": 0.469723641872406, + "step": 3784 + }, + { + "epoch": 0.8727230804703712, + "grad_norm": 1.5392452648373567, + "learning_rate": 1.2893724348726757e-06, + "loss": 0.5100580453872681, + "step": 3785 + }, + { + "epoch": 0.8729536545999539, + "grad_norm": 1.4522390007049084, + "learning_rate": 1.289007464262918e-06, + "loss": 0.3959219455718994, + "step": 3786 + }, + { + "epoch": 0.8731842287295365, + "grad_norm": 1.3370969443139462, + "learning_rate": 1.2886424516429967e-06, + "loss": 0.4237936735153198, + "step": 3787 + }, + { + "epoch": 0.8734148028591192, + "grad_norm": 1.6505369649722645, + "learning_rate": 1.2882773970659693e-06, + "loss": 0.4604552984237671, + "step": 3788 + }, + { + "epoch": 0.8736453769887018, + "grad_norm": 1.4408188813706955, + "learning_rate": 1.287912300584901e-06, + "loss": 0.4265769124031067, + "step": 3789 + }, + { + "epoch": 0.8738759511182845, + "grad_norm": 1.185765484689313, + "learning_rate": 1.2875471622528617e-06, + "loss": 0.4644312262535095, + "step": 3790 + }, + { + "epoch": 0.8741065252478671, + "grad_norm": 1.5605966972230738, + "learning_rate": 1.2871819821229282e-06, + "loss": 0.5520300269126892, + "step": 3791 + }, + { + "epoch": 0.8743370993774499, + "grad_norm": 1.2172431342127952, + "learning_rate": 1.2868167602481831e-06, + "loss": 0.42350637912750244, + "step": 3792 + }, + { + "epoch": 0.8745676735070325, + "grad_norm": 1.3605025828289865, + "learning_rate": 1.2864514966817155e-06, + "loss": 0.5148683786392212, + "step": 3793 + }, + { + "epoch": 0.8747982476366152, + "grad_norm": 1.2825363473778824, + "learning_rate": 1.2860861914766191e-06, + "loss": 0.4506865441799164, + "step": 3794 + }, + { + "epoch": 0.8750288217661978, + "grad_norm": 1.240014068038836, + "learning_rate": 1.2857208446859957e-06, + "loss": 0.4042026996612549, + "step": 3795 + }, + { + "epoch": 0.8752593958957805, + "grad_norm": 1.749789157467437, + "learning_rate": 1.2853554563629521e-06, + "loss": 0.4601382613182068, + "step": 3796 + }, + { + "epoch": 0.8754899700253631, + "grad_norm": 1.1956968937229655, + "learning_rate": 1.2849900265606007e-06, + "loss": 0.3387809097766876, + "step": 3797 + }, + { + "epoch": 0.8757205441549458, + "grad_norm": 1.3296970918872935, + "learning_rate": 1.2846245553320604e-06, + "loss": 0.5295180082321167, + "step": 3798 + }, + { + "epoch": 0.8759511182845284, + "grad_norm": 1.518762035085977, + "learning_rate": 1.2842590427304564e-06, + "loss": 0.47733891010284424, + "step": 3799 + }, + { + "epoch": 0.8761816924141111, + "grad_norm": 1.3675518552119075, + "learning_rate": 1.2838934888089198e-06, + "loss": 0.46294957399368286, + "step": 3800 + }, + { + "epoch": 0.8764122665436938, + "grad_norm": 1.3892016156570253, + "learning_rate": 1.2835278936205877e-06, + "loss": 0.4638972580432892, + "step": 3801 + }, + { + "epoch": 0.8766428406732765, + "grad_norm": 1.2670627732920314, + "learning_rate": 1.2831622572186027e-06, + "loss": 0.5078087449073792, + "step": 3802 + }, + { + "epoch": 0.8768734148028591, + "grad_norm": 1.2490466990727205, + "learning_rate": 1.2827965796561138e-06, + "loss": 0.49626827239990234, + "step": 3803 + }, + { + "epoch": 0.8771039889324418, + "grad_norm": 1.3784871825818807, + "learning_rate": 1.2824308609862758e-06, + "loss": 0.4857192635536194, + "step": 3804 + }, + { + "epoch": 0.8773345630620244, + "grad_norm": 1.5003545684747548, + "learning_rate": 1.2820651012622498e-06, + "loss": 0.5403131246566772, + "step": 3805 + }, + { + "epoch": 0.8775651371916071, + "grad_norm": 1.532730699853752, + "learning_rate": 1.2816993005372029e-06, + "loss": 0.519463837146759, + "step": 3806 + }, + { + "epoch": 0.8777957113211897, + "grad_norm": 1.648937105926222, + "learning_rate": 1.2813334588643077e-06, + "loss": 0.6038607954978943, + "step": 3807 + }, + { + "epoch": 0.8780262854507724, + "grad_norm": 1.5251750284604964, + "learning_rate": 1.280967576296743e-06, + "loss": 0.4892663359642029, + "step": 3808 + }, + { + "epoch": 0.878256859580355, + "grad_norm": 1.4437992115831912, + "learning_rate": 1.2806016528876934e-06, + "loss": 0.47872501611709595, + "step": 3809 + }, + { + "epoch": 0.8784874337099378, + "grad_norm": 1.401497704596745, + "learning_rate": 1.28023568869035e-06, + "loss": 0.4863993227481842, + "step": 3810 + }, + { + "epoch": 0.8787180078395204, + "grad_norm": 1.2319881889422357, + "learning_rate": 1.2798696837579088e-06, + "loss": 0.45241546630859375, + "step": 3811 + }, + { + "epoch": 0.8789485819691031, + "grad_norm": 1.26957816055566, + "learning_rate": 1.2795036381435728e-06, + "loss": 0.48720863461494446, + "step": 3812 + }, + { + "epoch": 0.8791791560986857, + "grad_norm": 1.4244000796725484, + "learning_rate": 1.2791375519005507e-06, + "loss": 0.49139827489852905, + "step": 3813 + }, + { + "epoch": 0.8794097302282684, + "grad_norm": 1.1021730064681352, + "learning_rate": 1.278771425082056e-06, + "loss": 0.41915225982666016, + "step": 3814 + }, + { + "epoch": 0.879640304357851, + "grad_norm": 1.164668093587021, + "learning_rate": 1.2784052577413095e-06, + "loss": 0.41831016540527344, + "step": 3815 + }, + { + "epoch": 0.8798708784874337, + "grad_norm": 1.392466935090571, + "learning_rate": 1.2780390499315374e-06, + "loss": 0.49456197023391724, + "step": 3816 + }, + { + "epoch": 0.8801014526170163, + "grad_norm": 1.4645341817096265, + "learning_rate": 1.2776728017059714e-06, + "loss": 0.4656866192817688, + "step": 3817 + }, + { + "epoch": 0.880332026746599, + "grad_norm": 1.375452516729426, + "learning_rate": 1.2773065131178494e-06, + "loss": 0.449514776468277, + "step": 3818 + }, + { + "epoch": 0.8805626008761817, + "grad_norm": 1.320026502962018, + "learning_rate": 1.2769401842204156e-06, + "loss": 0.3762073516845703, + "step": 3819 + }, + { + "epoch": 0.8807931750057644, + "grad_norm": 1.6471923718834367, + "learning_rate": 1.2765738150669192e-06, + "loss": 0.5680521130561829, + "step": 3820 + }, + { + "epoch": 0.881023749135347, + "grad_norm": 1.227867578043664, + "learning_rate": 1.276207405710616e-06, + "loss": 0.35371482372283936, + "step": 3821 + }, + { + "epoch": 0.8812543232649297, + "grad_norm": 1.6584454245429339, + "learning_rate": 1.2758409562047669e-06, + "loss": 0.5145018100738525, + "step": 3822 + }, + { + "epoch": 0.8814848973945123, + "grad_norm": 1.4264603788288566, + "learning_rate": 1.2754744666026392e-06, + "loss": 0.5425234436988831, + "step": 3823 + }, + { + "epoch": 0.881715471524095, + "grad_norm": 1.605664005655016, + "learning_rate": 1.275107936957506e-06, + "loss": 0.48439931869506836, + "step": 3824 + }, + { + "epoch": 0.8819460456536776, + "grad_norm": 1.4836193722422002, + "learning_rate": 1.2747413673226462e-06, + "loss": 0.5177323818206787, + "step": 3825 + }, + { + "epoch": 0.8821766197832603, + "grad_norm": 1.4672524591279896, + "learning_rate": 1.2743747577513437e-06, + "loss": 0.4718499779701233, + "step": 3826 + }, + { + "epoch": 0.882407193912843, + "grad_norm": 1.3580668132517044, + "learning_rate": 1.27400810829689e-06, + "loss": 0.5140804648399353, + "step": 3827 + }, + { + "epoch": 0.8826377680424257, + "grad_norm": 1.2476007061260952, + "learning_rate": 1.2736414190125805e-06, + "loss": 0.4611731767654419, + "step": 3828 + }, + { + "epoch": 0.8828683421720083, + "grad_norm": 1.3574827964922753, + "learning_rate": 1.2732746899517175e-06, + "loss": 0.526127815246582, + "step": 3829 + }, + { + "epoch": 0.883098916301591, + "grad_norm": 1.3368001624765957, + "learning_rate": 1.2729079211676085e-06, + "loss": 0.4039766192436218, + "step": 3830 + }, + { + "epoch": 0.8833294904311736, + "grad_norm": 1.5033466347185125, + "learning_rate": 1.2725411127135676e-06, + "loss": 0.4232807159423828, + "step": 3831 + }, + { + "epoch": 0.8835600645607563, + "grad_norm": 1.2556638937655993, + "learning_rate": 1.2721742646429142e-06, + "loss": 0.48490262031555176, + "step": 3832 + }, + { + "epoch": 0.8837906386903389, + "grad_norm": 1.278298782194165, + "learning_rate": 1.2718073770089729e-06, + "loss": 0.4664677083492279, + "step": 3833 + }, + { + "epoch": 0.8840212128199216, + "grad_norm": 1.3387833207328181, + "learning_rate": 1.2714404498650742e-06, + "loss": 0.4402846097946167, + "step": 3834 + }, + { + "epoch": 0.8842517869495042, + "grad_norm": 1.195436797590032, + "learning_rate": 1.2710734832645555e-06, + "loss": 0.45942988991737366, + "step": 3835 + }, + { + "epoch": 0.884482361079087, + "grad_norm": 1.3235253441897963, + "learning_rate": 1.2707064772607587e-06, + "loss": 0.45924365520477295, + "step": 3836 + }, + { + "epoch": 0.8847129352086696, + "grad_norm": 1.2350134713864223, + "learning_rate": 1.270339431907032e-06, + "loss": 0.3877851963043213, + "step": 3837 + }, + { + "epoch": 0.8849435093382523, + "grad_norm": 1.381311043724791, + "learning_rate": 1.2699723472567288e-06, + "loss": 0.45364105701446533, + "step": 3838 + }, + { + "epoch": 0.8851740834678349, + "grad_norm": 1.2798000201692457, + "learning_rate": 1.2696052233632089e-06, + "loss": 0.3527877926826477, + "step": 3839 + }, + { + "epoch": 0.8854046575974176, + "grad_norm": 1.7105597319107566, + "learning_rate": 1.2692380602798375e-06, + "loss": 0.499268501996994, + "step": 3840 + }, + { + "epoch": 0.8856352317270002, + "grad_norm": 1.2823188650483364, + "learning_rate": 1.2688708580599854e-06, + "loss": 0.39443689584732056, + "step": 3841 + }, + { + "epoch": 0.8858658058565829, + "grad_norm": 1.442355552170661, + "learning_rate": 1.268503616757029e-06, + "loss": 0.5262328386306763, + "step": 3842 + }, + { + "epoch": 0.8860963799861655, + "grad_norm": 1.4602798515117177, + "learning_rate": 1.2681363364243509e-06, + "loss": 0.4761236608028412, + "step": 3843 + }, + { + "epoch": 0.8863269541157482, + "grad_norm": 1.3806283660695482, + "learning_rate": 1.2677690171153391e-06, + "loss": 0.5173169374465942, + "step": 3844 + }, + { + "epoch": 0.8865575282453309, + "grad_norm": 1.4796905287439253, + "learning_rate": 1.2674016588833866e-06, + "loss": 0.5304574966430664, + "step": 3845 + }, + { + "epoch": 0.8867881023749136, + "grad_norm": 1.2451043989470143, + "learning_rate": 1.2670342617818925e-06, + "loss": 0.44707632064819336, + "step": 3846 + }, + { + "epoch": 0.8870186765044962, + "grad_norm": 1.4327430501013436, + "learning_rate": 1.2666668258642628e-06, + "loss": 0.44395360350608826, + "step": 3847 + }, + { + "epoch": 0.8872492506340789, + "grad_norm": 1.5382701800989709, + "learning_rate": 1.266299351183907e-06, + "loss": 0.4993078112602234, + "step": 3848 + }, + { + "epoch": 0.8874798247636615, + "grad_norm": 1.447761685140105, + "learning_rate": 1.2659318377942418e-06, + "loss": 0.4836229681968689, + "step": 3849 + }, + { + "epoch": 0.8877103988932442, + "grad_norm": 1.1586406035440977, + "learning_rate": 1.2655642857486885e-06, + "loss": 0.4898098111152649, + "step": 3850 + }, + { + "epoch": 0.8879409730228268, + "grad_norm": 1.4550595650341691, + "learning_rate": 1.2651966951006753e-06, + "loss": 0.5117218494415283, + "step": 3851 + }, + { + "epoch": 0.8881715471524095, + "grad_norm": 1.1751749847019868, + "learning_rate": 1.2648290659036347e-06, + "loss": 0.3920857906341553, + "step": 3852 + }, + { + "epoch": 0.8884021212819921, + "grad_norm": 1.2103531492140316, + "learning_rate": 1.2644613982110055e-06, + "loss": 0.42527467012405396, + "step": 3853 + }, + { + "epoch": 0.8886326954115749, + "grad_norm": 1.4673474591941762, + "learning_rate": 1.2640936920762318e-06, + "loss": 0.5283650159835815, + "step": 3854 + }, + { + "epoch": 0.8888632695411575, + "grad_norm": 1.1384795561192926, + "learning_rate": 1.2637259475527634e-06, + "loss": 0.3976718783378601, + "step": 3855 + }, + { + "epoch": 0.8890938436707402, + "grad_norm": 1.3777221980377923, + "learning_rate": 1.2633581646940555e-06, + "loss": 0.3767106533050537, + "step": 3856 + }, + { + "epoch": 0.8893244178003228, + "grad_norm": 1.2421308508382682, + "learning_rate": 1.2629903435535695e-06, + "loss": 0.4002486765384674, + "step": 3857 + }, + { + "epoch": 0.8895549919299055, + "grad_norm": 1.7761729251417224, + "learning_rate": 1.2626224841847718e-06, + "loss": 0.3829443156719208, + "step": 3858 + }, + { + "epoch": 0.8897855660594881, + "grad_norm": 1.6906089339859913, + "learning_rate": 1.2622545866411342e-06, + "loss": 0.5338312983512878, + "step": 3859 + }, + { + "epoch": 0.8900161401890708, + "grad_norm": 1.3435755743208722, + "learning_rate": 1.2618866509761347e-06, + "loss": 0.49615299701690674, + "step": 3860 + }, + { + "epoch": 0.8902467143186534, + "grad_norm": 1.3772165276715471, + "learning_rate": 1.2615186772432562e-06, + "loss": 0.5080281496047974, + "step": 3861 + }, + { + "epoch": 0.8904772884482361, + "grad_norm": 1.3191602759544514, + "learning_rate": 1.2611506654959877e-06, + "loss": 0.4631335139274597, + "step": 3862 + }, + { + "epoch": 0.8907078625778188, + "grad_norm": 1.6754337710064344, + "learning_rate": 1.2607826157878232e-06, + "loss": 0.5179207921028137, + "step": 3863 + }, + { + "epoch": 0.8909384367074015, + "grad_norm": 1.8689690583071528, + "learning_rate": 1.260414528172263e-06, + "loss": 0.5107406973838806, + "step": 3864 + }, + { + "epoch": 0.8911690108369841, + "grad_norm": 1.4263135964434357, + "learning_rate": 1.2600464027028112e-06, + "loss": 0.3719855844974518, + "step": 3865 + }, + { + "epoch": 0.8913995849665668, + "grad_norm": 1.2717821474296322, + "learning_rate": 1.2596782394329797e-06, + "loss": 0.4703129231929779, + "step": 3866 + }, + { + "epoch": 0.8916301590961494, + "grad_norm": 1.4971801597034615, + "learning_rate": 1.2593100384162842e-06, + "loss": 0.49239644408226013, + "step": 3867 + }, + { + "epoch": 0.8918607332257321, + "grad_norm": 1.505796830220407, + "learning_rate": 1.2589417997062468e-06, + "loss": 0.5194324851036072, + "step": 3868 + }, + { + "epoch": 0.8920913073553147, + "grad_norm": 1.2722329079463401, + "learning_rate": 1.2585735233563943e-06, + "loss": 0.4224633574485779, + "step": 3869 + }, + { + "epoch": 0.8923218814848974, + "grad_norm": 1.7020995758876771, + "learning_rate": 1.2582052094202594e-06, + "loss": 0.4377749562263489, + "step": 3870 + }, + { + "epoch": 0.89255245561448, + "grad_norm": 1.2037908365106704, + "learning_rate": 1.2578368579513809e-06, + "loss": 0.42847269773483276, + "step": 3871 + }, + { + "epoch": 0.8927830297440628, + "grad_norm": 1.4087908465200083, + "learning_rate": 1.2574684690033018e-06, + "loss": 0.5194802284240723, + "step": 3872 + }, + { + "epoch": 0.8930136038736454, + "grad_norm": 1.3553883811442613, + "learning_rate": 1.2571000426295716e-06, + "loss": 0.4401082396507263, + "step": 3873 + }, + { + "epoch": 0.8932441780032281, + "grad_norm": 1.5117708123403886, + "learning_rate": 1.2567315788837442e-06, + "loss": 0.38890570402145386, + "step": 3874 + }, + { + "epoch": 0.8934747521328107, + "grad_norm": 1.4931972330534145, + "learning_rate": 1.2563630778193802e-06, + "loss": 0.522612452507019, + "step": 3875 + }, + { + "epoch": 0.8937053262623934, + "grad_norm": 1.757870637645656, + "learning_rate": 1.2559945394900447e-06, + "loss": 0.516444981098175, + "step": 3876 + }, + { + "epoch": 0.893935900391976, + "grad_norm": 1.193092685346779, + "learning_rate": 1.255625963949308e-06, + "loss": 0.4084436297416687, + "step": 3877 + }, + { + "epoch": 0.8941664745215587, + "grad_norm": 1.4364911954858623, + "learning_rate": 1.2552573512507474e-06, + "loss": 0.4561755657196045, + "step": 3878 + }, + { + "epoch": 0.8943970486511413, + "grad_norm": 1.3498949478529019, + "learning_rate": 1.2548887014479435e-06, + "loss": 0.44372665882110596, + "step": 3879 + }, + { + "epoch": 0.894627622780724, + "grad_norm": 1.4181034577590674, + "learning_rate": 1.2545200145944837e-06, + "loss": 0.4714791774749756, + "step": 3880 + }, + { + "epoch": 0.8948581969103067, + "grad_norm": 1.506508633299638, + "learning_rate": 1.25415129074396e-06, + "loss": 0.48050814867019653, + "step": 3881 + }, + { + "epoch": 0.8950887710398894, + "grad_norm": 1.7788226663138391, + "learning_rate": 1.2537825299499708e-06, + "loss": 0.4078127145767212, + "step": 3882 + }, + { + "epoch": 0.895319345169472, + "grad_norm": 1.1273639481853348, + "learning_rate": 1.2534137322661187e-06, + "loss": 0.41556763648986816, + "step": 3883 + }, + { + "epoch": 0.8955499192990547, + "grad_norm": 1.2916565664076916, + "learning_rate": 1.2530448977460127e-06, + "loss": 0.3862306475639343, + "step": 3884 + }, + { + "epoch": 0.8957804934286373, + "grad_norm": 1.2417402269481763, + "learning_rate": 1.2526760264432656e-06, + "loss": 0.4071112871170044, + "step": 3885 + }, + { + "epoch": 0.89601106755822, + "grad_norm": 1.2074121865816745, + "learning_rate": 1.2523071184114978e-06, + "loss": 0.36956706643104553, + "step": 3886 + }, + { + "epoch": 0.8962416416878026, + "grad_norm": 1.5187969981751328, + "learning_rate": 1.251938173704333e-06, + "loss": 0.5087941884994507, + "step": 3887 + }, + { + "epoch": 0.8964722158173853, + "grad_norm": 1.5300476571906632, + "learning_rate": 1.2515691923754017e-06, + "loss": 0.5636804103851318, + "step": 3888 + }, + { + "epoch": 0.896702789946968, + "grad_norm": 1.2028947296679213, + "learning_rate": 1.2512001744783383e-06, + "loss": 0.40899237990379333, + "step": 3889 + }, + { + "epoch": 0.8969333640765507, + "grad_norm": 1.2319974158201112, + "learning_rate": 1.2508311200667839e-06, + "loss": 0.3964187800884247, + "step": 3890 + }, + { + "epoch": 0.8971639382061333, + "grad_norm": 1.1881521968898023, + "learning_rate": 1.2504620291943838e-06, + "loss": 0.43190568685531616, + "step": 3891 + }, + { + "epoch": 0.897394512335716, + "grad_norm": 1.5323277954151004, + "learning_rate": 1.25009290191479e-06, + "loss": 0.5640079379081726, + "step": 3892 + }, + { + "epoch": 0.8976250864652986, + "grad_norm": 1.5228387521540339, + "learning_rate": 1.2497237382816577e-06, + "loss": 0.4969727396965027, + "step": 3893 + }, + { + "epoch": 0.8978556605948812, + "grad_norm": 1.438395912517929, + "learning_rate": 1.2493545383486497e-06, + "loss": 0.43710076808929443, + "step": 3894 + }, + { + "epoch": 0.8980862347244639, + "grad_norm": 1.217545409086522, + "learning_rate": 1.248985302169432e-06, + "loss": 0.4246212840080261, + "step": 3895 + }, + { + "epoch": 0.8983168088540465, + "grad_norm": 1.1837244532547113, + "learning_rate": 1.2486160297976776e-06, + "loss": 0.3812369108200073, + "step": 3896 + }, + { + "epoch": 0.8985473829836292, + "grad_norm": 2.1554879190255685, + "learning_rate": 1.248246721287063e-06, + "loss": 0.6407653093338013, + "step": 3897 + }, + { + "epoch": 0.8987779571132118, + "grad_norm": 1.6947319293322312, + "learning_rate": 1.247877376691272e-06, + "loss": 0.47748661041259766, + "step": 3898 + }, + { + "epoch": 0.8990085312427946, + "grad_norm": 1.5504399903750061, + "learning_rate": 1.2475079960639922e-06, + "loss": 0.5047964453697205, + "step": 3899 + }, + { + "epoch": 0.8992391053723772, + "grad_norm": 1.1781117181895115, + "learning_rate": 1.2471385794589167e-06, + "loss": 0.37989485263824463, + "step": 3900 + }, + { + "epoch": 0.8994696795019599, + "grad_norm": 1.2955755733611327, + "learning_rate": 1.2467691269297437e-06, + "loss": 0.38857924938201904, + "step": 3901 + }, + { + "epoch": 0.8997002536315425, + "grad_norm": 1.2312069291338004, + "learning_rate": 1.2463996385301776e-06, + "loss": 0.45452386140823364, + "step": 3902 + }, + { + "epoch": 0.8999308277611252, + "grad_norm": 1.5565774035889273, + "learning_rate": 1.2460301143139267e-06, + "loss": 0.41920900344848633, + "step": 3903 + }, + { + "epoch": 0.9001614018907078, + "grad_norm": 1.542875547138451, + "learning_rate": 1.2456605543347051e-06, + "loss": 0.5979125499725342, + "step": 3904 + }, + { + "epoch": 0.9003919760202905, + "grad_norm": 1.5505304900467811, + "learning_rate": 1.2452909586462323e-06, + "loss": 0.5517082214355469, + "step": 3905 + }, + { + "epoch": 0.9006225501498731, + "grad_norm": 1.2381443535248697, + "learning_rate": 1.244921327302233e-06, + "loss": 0.4558248519897461, + "step": 3906 + }, + { + "epoch": 0.9008531242794559, + "grad_norm": 1.5503878716470787, + "learning_rate": 1.2445516603564362e-06, + "loss": 0.5637399554252625, + "step": 3907 + }, + { + "epoch": 0.9010836984090385, + "grad_norm": 1.2396897738245216, + "learning_rate": 1.2441819578625775e-06, + "loss": 0.5208043456077576, + "step": 3908 + }, + { + "epoch": 0.9013142725386212, + "grad_norm": 1.400218770913741, + "learning_rate": 1.243812219874396e-06, + "loss": 0.3901744484901428, + "step": 3909 + }, + { + "epoch": 0.9015448466682038, + "grad_norm": 1.4025338042989108, + "learning_rate": 1.2434424464456376e-06, + "loss": 0.5770972967147827, + "step": 3910 + }, + { + "epoch": 0.9017754207977865, + "grad_norm": 1.375223010916462, + "learning_rate": 1.2430726376300525e-06, + "loss": 0.3457295894622803, + "step": 3911 + }, + { + "epoch": 0.9020059949273691, + "grad_norm": 1.3118554362154196, + "learning_rate": 1.242702793481396e-06, + "loss": 0.4487595558166504, + "step": 3912 + }, + { + "epoch": 0.9022365690569518, + "grad_norm": 1.2548104794507453, + "learning_rate": 1.2423329140534286e-06, + "loss": 0.4369876980781555, + "step": 3913 + }, + { + "epoch": 0.9024671431865344, + "grad_norm": 1.5693012853497335, + "learning_rate": 1.2419629993999165e-06, + "loss": 0.43154388666152954, + "step": 3914 + }, + { + "epoch": 0.9026977173161171, + "grad_norm": 1.313977531855456, + "learning_rate": 1.24159304957463e-06, + "loss": 0.4528294801712036, + "step": 3915 + }, + { + "epoch": 0.9029282914456997, + "grad_norm": 1.4152554930408472, + "learning_rate": 1.2412230646313452e-06, + "loss": 0.4204830527305603, + "step": 3916 + }, + { + "epoch": 0.9031588655752825, + "grad_norm": 1.3117655747531898, + "learning_rate": 1.2408530446238433e-06, + "loss": 0.46544623374938965, + "step": 3917 + }, + { + "epoch": 0.9033894397048651, + "grad_norm": 1.19103055945586, + "learning_rate": 1.2404829896059107e-06, + "loss": 0.39419203996658325, + "step": 3918 + }, + { + "epoch": 0.9036200138344478, + "grad_norm": 1.3085505059347724, + "learning_rate": 1.240112899631338e-06, + "loss": 0.4214451014995575, + "step": 3919 + }, + { + "epoch": 0.9038505879640304, + "grad_norm": 1.310156094815825, + "learning_rate": 1.239742774753922e-06, + "loss": 0.42385220527648926, + "step": 3920 + }, + { + "epoch": 0.9040811620936131, + "grad_norm": 1.4457769612459037, + "learning_rate": 1.2393726150274636e-06, + "loss": 0.5206592082977295, + "step": 3921 + }, + { + "epoch": 0.9043117362231957, + "grad_norm": 1.4602545667694231, + "learning_rate": 1.23900242050577e-06, + "loss": 0.4358803629875183, + "step": 3922 + }, + { + "epoch": 0.9045423103527784, + "grad_norm": 1.3596132034754325, + "learning_rate": 1.2386321912426524e-06, + "loss": 0.4525173306465149, + "step": 3923 + }, + { + "epoch": 0.904772884482361, + "grad_norm": 1.4736466426478543, + "learning_rate": 1.2382619272919273e-06, + "loss": 0.48877185583114624, + "step": 3924 + }, + { + "epoch": 0.9050034586119438, + "grad_norm": 1.152358955118646, + "learning_rate": 1.2378916287074162e-06, + "loss": 0.4401814341545105, + "step": 3925 + }, + { + "epoch": 0.9052340327415264, + "grad_norm": 1.337265572878916, + "learning_rate": 1.2375212955429459e-06, + "loss": 0.37818846106529236, + "step": 3926 + }, + { + "epoch": 0.9054646068711091, + "grad_norm": 1.285760527835995, + "learning_rate": 1.2371509278523482e-06, + "loss": 0.36472904682159424, + "step": 3927 + }, + { + "epoch": 0.9056951810006917, + "grad_norm": 1.2999097028645303, + "learning_rate": 1.2367805256894596e-06, + "loss": 0.5113309025764465, + "step": 3928 + }, + { + "epoch": 0.9059257551302744, + "grad_norm": 1.2052405163032573, + "learning_rate": 1.2364100891081218e-06, + "loss": 0.36074432730674744, + "step": 3929 + }, + { + "epoch": 0.906156329259857, + "grad_norm": 1.3493065976556424, + "learning_rate": 1.2360396181621819e-06, + "loss": 0.39177048206329346, + "step": 3930 + }, + { + "epoch": 0.9063869033894397, + "grad_norm": 1.3736058093352046, + "learning_rate": 1.2356691129054912e-06, + "loss": 0.4758113622665405, + "step": 3931 + }, + { + "epoch": 0.9066174775190223, + "grad_norm": 1.3614234520329223, + "learning_rate": 1.2352985733919065e-06, + "loss": 0.3840598464012146, + "step": 3932 + }, + { + "epoch": 0.906848051648605, + "grad_norm": 1.510763334369694, + "learning_rate": 1.2349279996752892e-06, + "loss": 0.5103816986083984, + "step": 3933 + }, + { + "epoch": 0.9070786257781877, + "grad_norm": 1.466046011323441, + "learning_rate": 1.234557391809507e-06, + "loss": 0.4175255298614502, + "step": 3934 + }, + { + "epoch": 0.9073091999077704, + "grad_norm": 2.627411026682294, + "learning_rate": 1.2341867498484302e-06, + "loss": 0.4504377245903015, + "step": 3935 + }, + { + "epoch": 0.907539774037353, + "grad_norm": 1.2868923632717955, + "learning_rate": 1.2338160738459355e-06, + "loss": 0.45868122577667236, + "step": 3936 + }, + { + "epoch": 0.9077703481669357, + "grad_norm": 1.3231771761325972, + "learning_rate": 1.2334453638559054e-06, + "loss": 0.5161639451980591, + "step": 3937 + }, + { + "epoch": 0.9080009222965183, + "grad_norm": 1.5486748129834036, + "learning_rate": 1.2330746199322257e-06, + "loss": 0.44561630487442017, + "step": 3938 + }, + { + "epoch": 0.908231496426101, + "grad_norm": 1.595486700598371, + "learning_rate": 1.2327038421287876e-06, + "loss": 0.4780126214027405, + "step": 3939 + }, + { + "epoch": 0.9084620705556836, + "grad_norm": 1.2226582649026916, + "learning_rate": 1.2323330304994877e-06, + "loss": 0.505066990852356, + "step": 3940 + }, + { + "epoch": 0.9086926446852663, + "grad_norm": 1.3041405659013958, + "learning_rate": 1.2319621850982274e-06, + "loss": 0.5053813457489014, + "step": 3941 + }, + { + "epoch": 0.9089232188148489, + "grad_norm": 1.178162092657054, + "learning_rate": 1.2315913059789125e-06, + "loss": 0.3579134941101074, + "step": 3942 + }, + { + "epoch": 0.9091537929444317, + "grad_norm": 1.4949007072050957, + "learning_rate": 1.2312203931954543e-06, + "loss": 0.5703507661819458, + "step": 3943 + }, + { + "epoch": 0.9093843670740143, + "grad_norm": 1.4141867956521472, + "learning_rate": 1.2308494468017685e-06, + "loss": 0.4972035884857178, + "step": 3944 + }, + { + "epoch": 0.909614941203597, + "grad_norm": 1.8338477540837272, + "learning_rate": 1.230478466851776e-06, + "loss": 0.5528955459594727, + "step": 3945 + }, + { + "epoch": 0.9098455153331796, + "grad_norm": 1.4009292239467905, + "learning_rate": 1.2301074533994024e-06, + "loss": 0.4099786877632141, + "step": 3946 + }, + { + "epoch": 0.9100760894627623, + "grad_norm": 1.3414325662099453, + "learning_rate": 1.2297364064985786e-06, + "loss": 0.41020166873931885, + "step": 3947 + }, + { + "epoch": 0.9103066635923449, + "grad_norm": 1.4112377219226224, + "learning_rate": 1.2293653262032395e-06, + "loss": 0.4340355694293976, + "step": 3948 + }, + { + "epoch": 0.9105372377219276, + "grad_norm": 1.376446280407005, + "learning_rate": 1.2289942125673261e-06, + "loss": 0.4369847774505615, + "step": 3949 + }, + { + "epoch": 0.9107678118515102, + "grad_norm": 1.4688076477466663, + "learning_rate": 1.228623065644783e-06, + "loss": 0.406423956155777, + "step": 3950 + }, + { + "epoch": 0.910998385981093, + "grad_norm": 1.4230223897567287, + "learning_rate": 1.22825188548956e-06, + "loss": 0.5081946849822998, + "step": 3951 + }, + { + "epoch": 0.9112289601106756, + "grad_norm": 1.7017899930713631, + "learning_rate": 1.2278806721556124e-06, + "loss": 0.43494492769241333, + "step": 3952 + }, + { + "epoch": 0.9114595342402583, + "grad_norm": 1.348884752431283, + "learning_rate": 1.2275094256968996e-06, + "loss": 0.35356831550598145, + "step": 3953 + }, + { + "epoch": 0.9116901083698409, + "grad_norm": 1.2260567341450548, + "learning_rate": 1.227138146167386e-06, + "loss": 0.36741551756858826, + "step": 3954 + }, + { + "epoch": 0.9119206824994236, + "grad_norm": 1.4686302016765889, + "learning_rate": 1.226766833621041e-06, + "loss": 0.491504430770874, + "step": 3955 + }, + { + "epoch": 0.9121512566290062, + "grad_norm": 1.266294151631501, + "learning_rate": 1.2263954881118384e-06, + "loss": 0.4558037519454956, + "step": 3956 + }, + { + "epoch": 0.9123818307585889, + "grad_norm": 1.398276341256052, + "learning_rate": 1.2260241096937571e-06, + "loss": 0.3941671848297119, + "step": 3957 + }, + { + "epoch": 0.9126124048881715, + "grad_norm": 1.7133993603535684, + "learning_rate": 1.2256526984207809e-06, + "loss": 0.40505191683769226, + "step": 3958 + }, + { + "epoch": 0.9128429790177542, + "grad_norm": 1.3369540241008888, + "learning_rate": 1.2252812543468982e-06, + "loss": 0.4669588804244995, + "step": 3959 + }, + { + "epoch": 0.9130735531473368, + "grad_norm": 1.6346862522902008, + "learning_rate": 1.2249097775261014e-06, + "loss": 0.535057544708252, + "step": 3960 + }, + { + "epoch": 0.9133041272769196, + "grad_norm": 1.465530924269544, + "learning_rate": 1.2245382680123898e-06, + "loss": 0.5127478837966919, + "step": 3961 + }, + { + "epoch": 0.9135347014065022, + "grad_norm": 1.239878706419753, + "learning_rate": 1.224166725859765e-06, + "loss": 0.5004767179489136, + "step": 3962 + }, + { + "epoch": 0.9137652755360849, + "grad_norm": 1.3382850542269662, + "learning_rate": 1.2237951511222346e-06, + "loss": 0.47929924726486206, + "step": 3963 + }, + { + "epoch": 0.9139958496656675, + "grad_norm": 1.3650943807220162, + "learning_rate": 1.2234235438538109e-06, + "loss": 0.5619359016418457, + "step": 3964 + }, + { + "epoch": 0.9142264237952502, + "grad_norm": 2.173999313160228, + "learning_rate": 1.223051904108511e-06, + "loss": 0.44648507237434387, + "step": 3965 + }, + { + "epoch": 0.9144569979248328, + "grad_norm": 1.5081082363333118, + "learning_rate": 1.2226802319403562e-06, + "loss": 0.4451872706413269, + "step": 3966 + }, + { + "epoch": 0.9146875720544155, + "grad_norm": 1.1999813764066747, + "learning_rate": 1.222308527403373e-06, + "loss": 0.44295474886894226, + "step": 3967 + }, + { + "epoch": 0.9149181461839981, + "grad_norm": 1.4510785821223537, + "learning_rate": 1.221936790551592e-06, + "loss": 0.517430305480957, + "step": 3968 + }, + { + "epoch": 0.9151487203135809, + "grad_norm": 1.2648448897941866, + "learning_rate": 1.2215650214390493e-06, + "loss": 0.4819454252719879, + "step": 3969 + }, + { + "epoch": 0.9153792944431635, + "grad_norm": 1.40726836834287, + "learning_rate": 1.2211932201197855e-06, + "loss": 0.41739264130592346, + "step": 3970 + }, + { + "epoch": 0.9156098685727462, + "grad_norm": 1.214750449543567, + "learning_rate": 1.2208213866478452e-06, + "loss": 0.38833269476890564, + "step": 3971 + }, + { + "epoch": 0.9158404427023288, + "grad_norm": 1.4780394203565799, + "learning_rate": 1.2204495210772784e-06, + "loss": 0.48899054527282715, + "step": 3972 + }, + { + "epoch": 0.9160710168319115, + "grad_norm": 1.4236888721907983, + "learning_rate": 1.2200776234621395e-06, + "loss": 0.5201622247695923, + "step": 3973 + }, + { + "epoch": 0.9163015909614941, + "grad_norm": 1.4696703280770271, + "learning_rate": 1.219705693856488e-06, + "loss": 0.4105098843574524, + "step": 3974 + }, + { + "epoch": 0.9165321650910768, + "grad_norm": 1.2658629585457457, + "learning_rate": 1.2193337323143865e-06, + "loss": 0.45458245277404785, + "step": 3975 + }, + { + "epoch": 0.9167627392206594, + "grad_norm": 1.4906657502786624, + "learning_rate": 1.2189617388899049e-06, + "loss": 0.5013390779495239, + "step": 3976 + }, + { + "epoch": 0.9169933133502421, + "grad_norm": 1.3837275498584536, + "learning_rate": 1.218589713637115e-06, + "loss": 0.37065303325653076, + "step": 3977 + }, + { + "epoch": 0.9172238874798248, + "grad_norm": 1.4237915808433583, + "learning_rate": 1.218217656610095e-06, + "loss": 0.45158177614212036, + "step": 3978 + }, + { + "epoch": 0.9174544616094075, + "grad_norm": 1.3261399530988285, + "learning_rate": 1.2178455678629271e-06, + "loss": 0.4439426064491272, + "step": 3979 + }, + { + "epoch": 0.9176850357389901, + "grad_norm": 1.4056969202356144, + "learning_rate": 1.217473447449698e-06, + "loss": 0.42215704917907715, + "step": 3980 + }, + { + "epoch": 0.9179156098685728, + "grad_norm": 1.6572776500354818, + "learning_rate": 1.2171012954244991e-06, + "loss": 0.42273545265197754, + "step": 3981 + }, + { + "epoch": 0.9181461839981554, + "grad_norm": 1.5659197643503024, + "learning_rate": 1.216729111841427e-06, + "loss": 0.6045219898223877, + "step": 3982 + }, + { + "epoch": 0.9183767581277381, + "grad_norm": 1.318642532575583, + "learning_rate": 1.216356896754582e-06, + "loss": 0.49316874146461487, + "step": 3983 + }, + { + "epoch": 0.9186073322573207, + "grad_norm": 1.2984174252340932, + "learning_rate": 1.2159846502180692e-06, + "loss": 0.5222599506378174, + "step": 3984 + }, + { + "epoch": 0.9188379063869034, + "grad_norm": 1.21924477747188, + "learning_rate": 1.2156123722859988e-06, + "loss": 0.4513903856277466, + "step": 3985 + }, + { + "epoch": 0.919068480516486, + "grad_norm": 1.5286242494549134, + "learning_rate": 1.2152400630124846e-06, + "loss": 0.4946150779724121, + "step": 3986 + }, + { + "epoch": 0.9192990546460688, + "grad_norm": 1.6287340554518628, + "learning_rate": 1.2148677224516458e-06, + "loss": 0.5482569336891174, + "step": 3987 + }, + { + "epoch": 0.9195296287756514, + "grad_norm": 1.4490082622042646, + "learning_rate": 1.2144953506576061e-06, + "loss": 0.457091361284256, + "step": 3988 + }, + { + "epoch": 0.9197602029052341, + "grad_norm": 1.378032718586854, + "learning_rate": 1.2141229476844933e-06, + "loss": 0.4262084364891052, + "step": 3989 + }, + { + "epoch": 0.9199907770348167, + "grad_norm": 1.2394422456854066, + "learning_rate": 1.2137505135864402e-06, + "loss": 0.4905529022216797, + "step": 3990 + }, + { + "epoch": 0.9202213511643994, + "grad_norm": 1.3246738813802295, + "learning_rate": 1.2133780484175833e-06, + "loss": 0.5001873970031738, + "step": 3991 + }, + { + "epoch": 0.920451925293982, + "grad_norm": 1.4663495799657225, + "learning_rate": 1.2130055522320647e-06, + "loss": 0.396418035030365, + "step": 3992 + }, + { + "epoch": 0.9206824994235647, + "grad_norm": 1.5742445852004807, + "learning_rate": 1.2126330250840302e-06, + "loss": 0.5743722915649414, + "step": 3993 + }, + { + "epoch": 0.9209130735531473, + "grad_norm": 1.720134285882963, + "learning_rate": 1.212260467027631e-06, + "loss": 0.5134707689285278, + "step": 3994 + }, + { + "epoch": 0.92114364768273, + "grad_norm": 1.2913764867867046, + "learning_rate": 1.2118878781170213e-06, + "loss": 0.4191853404045105, + "step": 3995 + }, + { + "epoch": 0.9213742218123127, + "grad_norm": 1.8061166260156263, + "learning_rate": 1.2115152584063613e-06, + "loss": 0.3430103063583374, + "step": 3996 + }, + { + "epoch": 0.9216047959418954, + "grad_norm": 1.491788048135039, + "learning_rate": 1.2111426079498147e-06, + "loss": 0.5229896903038025, + "step": 3997 + }, + { + "epoch": 0.921835370071478, + "grad_norm": 1.9288487767080142, + "learning_rate": 1.2107699268015501e-06, + "loss": 0.5028181076049805, + "step": 3998 + }, + { + "epoch": 0.9220659442010607, + "grad_norm": 1.8323250729268132, + "learning_rate": 1.2103972150157407e-06, + "loss": 0.4662501811981201, + "step": 3999 + }, + { + "epoch": 0.9222965183306433, + "grad_norm": 1.7877363086665337, + "learning_rate": 1.2100244726465636e-06, + "loss": 0.5581385493278503, + "step": 4000 + }, + { + "epoch": 0.922527092460226, + "grad_norm": 1.5059656153682595, + "learning_rate": 1.2096516997482012e-06, + "loss": 0.3925841450691223, + "step": 4001 + }, + { + "epoch": 0.9227576665898086, + "grad_norm": 1.4478402824011334, + "learning_rate": 1.2092788963748393e-06, + "loss": 0.4021197557449341, + "step": 4002 + }, + { + "epoch": 0.9229882407193913, + "grad_norm": 1.5875480480080288, + "learning_rate": 1.2089060625806683e-06, + "loss": 0.5519800186157227, + "step": 4003 + }, + { + "epoch": 0.923218814848974, + "grad_norm": 1.4740215502095901, + "learning_rate": 1.2085331984198847e-06, + "loss": 0.4426038861274719, + "step": 4004 + }, + { + "epoch": 0.9234493889785566, + "grad_norm": 1.3127950735735558, + "learning_rate": 1.2081603039466872e-06, + "loss": 0.4370608925819397, + "step": 4005 + }, + { + "epoch": 0.9236799631081393, + "grad_norm": 1.6270244555647773, + "learning_rate": 1.2077873792152797e-06, + "loss": 0.5535042881965637, + "step": 4006 + }, + { + "epoch": 0.9239105372377219, + "grad_norm": 1.4254025319676356, + "learning_rate": 1.2074144242798708e-06, + "loss": 0.45786774158477783, + "step": 4007 + }, + { + "epoch": 0.9241411113673046, + "grad_norm": 1.305332226115227, + "learning_rate": 1.207041439194673e-06, + "loss": 0.38189244270324707, + "step": 4008 + }, + { + "epoch": 0.9243716854968872, + "grad_norm": 1.4825176983109143, + "learning_rate": 1.206668424013904e-06, + "loss": 0.48782190680503845, + "step": 4009 + }, + { + "epoch": 0.9246022596264699, + "grad_norm": 1.4182276344304934, + "learning_rate": 1.2062953787917852e-06, + "loss": 0.46295344829559326, + "step": 4010 + }, + { + "epoch": 0.9248328337560525, + "grad_norm": 1.370453601452758, + "learning_rate": 1.205922303582542e-06, + "loss": 0.5205795764923096, + "step": 4011 + }, + { + "epoch": 0.9250634078856352, + "grad_norm": 1.431830816120071, + "learning_rate": 1.205549198440405e-06, + "loss": 0.47622987627983093, + "step": 4012 + }, + { + "epoch": 0.9252939820152178, + "grad_norm": 1.3190370245605134, + "learning_rate": 1.2051760634196091e-06, + "loss": 0.4826146960258484, + "step": 4013 + }, + { + "epoch": 0.9255245561448006, + "grad_norm": 1.608771307027525, + "learning_rate": 1.2048028985743928e-06, + "loss": 0.46193474531173706, + "step": 4014 + }, + { + "epoch": 0.9257551302743832, + "grad_norm": 1.4926107871852312, + "learning_rate": 1.2044297039589996e-06, + "loss": 0.523394763469696, + "step": 4015 + }, + { + "epoch": 0.9259857044039659, + "grad_norm": 1.3096026982819484, + "learning_rate": 1.2040564796276773e-06, + "loss": 0.3963446617126465, + "step": 4016 + }, + { + "epoch": 0.9262162785335485, + "grad_norm": 1.3803899653039033, + "learning_rate": 1.2036832256346774e-06, + "loss": 0.5016456842422485, + "step": 4017 + }, + { + "epoch": 0.9264468526631312, + "grad_norm": 1.2198633348825472, + "learning_rate": 1.2033099420342566e-06, + "loss": 0.47298160195350647, + "step": 4018 + }, + { + "epoch": 0.9266774267927138, + "grad_norm": 1.5448162104307424, + "learning_rate": 1.2029366288806748e-06, + "loss": 0.387129545211792, + "step": 4019 + }, + { + "epoch": 0.9269080009222965, + "grad_norm": 1.4210281769521962, + "learning_rate": 1.2025632862281976e-06, + "loss": 0.46101367473602295, + "step": 4020 + }, + { + "epoch": 0.9271385750518791, + "grad_norm": 1.364554371793265, + "learning_rate": 1.2021899141310938e-06, + "loss": 0.4242950677871704, + "step": 4021 + }, + { + "epoch": 0.9273691491814618, + "grad_norm": 1.5524341283687932, + "learning_rate": 1.201816512643637e-06, + "loss": 0.45983830094337463, + "step": 4022 + }, + { + "epoch": 0.9275997233110445, + "grad_norm": 1.3760025635830133, + "learning_rate": 1.2014430818201044e-06, + "loss": 0.39785802364349365, + "step": 4023 + }, + { + "epoch": 0.9278302974406272, + "grad_norm": 1.254017871701417, + "learning_rate": 1.2010696217147783e-06, + "loss": 0.39265739917755127, + "step": 4024 + }, + { + "epoch": 0.9280608715702098, + "grad_norm": 1.4761130221315304, + "learning_rate": 1.2006961323819455e-06, + "loss": 0.49783703684806824, + "step": 4025 + }, + { + "epoch": 0.9282914456997925, + "grad_norm": 1.3764899481486361, + "learning_rate": 1.2003226138758953e-06, + "loss": 0.4479181170463562, + "step": 4026 + }, + { + "epoch": 0.9285220198293751, + "grad_norm": 1.4404345233811269, + "learning_rate": 1.199949066250923e-06, + "loss": 0.5205901265144348, + "step": 4027 + }, + { + "epoch": 0.9287525939589578, + "grad_norm": 1.3718010528366764, + "learning_rate": 1.1995754895613277e-06, + "loss": 0.5163009762763977, + "step": 4028 + }, + { + "epoch": 0.9289831680885404, + "grad_norm": 1.6219891318512447, + "learning_rate": 1.1992018838614124e-06, + "loss": 0.5746268033981323, + "step": 4029 + }, + { + "epoch": 0.9292137422181231, + "grad_norm": 1.2896226756922917, + "learning_rate": 1.1988282492054844e-06, + "loss": 0.5306442975997925, + "step": 4030 + }, + { + "epoch": 0.9294443163477057, + "grad_norm": 1.1978686339854372, + "learning_rate": 1.198454585647855e-06, + "loss": 0.4219534993171692, + "step": 4031 + }, + { + "epoch": 0.9296748904772885, + "grad_norm": 1.3997557750947305, + "learning_rate": 1.1980808932428406e-06, + "loss": 0.4167936444282532, + "step": 4032 + }, + { + "epoch": 0.9299054646068711, + "grad_norm": 1.2271684703243566, + "learning_rate": 1.197707172044761e-06, + "loss": 0.42376089096069336, + "step": 4033 + }, + { + "epoch": 0.9301360387364538, + "grad_norm": 1.5370602561856461, + "learning_rate": 1.1973334221079398e-06, + "loss": 0.48729848861694336, + "step": 4034 + }, + { + "epoch": 0.9303666128660364, + "grad_norm": 1.2353226603771892, + "learning_rate": 1.1969596434867062e-06, + "loss": 0.45877987146377563, + "step": 4035 + }, + { + "epoch": 0.9305971869956191, + "grad_norm": 1.2531522631367908, + "learning_rate": 1.196585836235392e-06, + "loss": 0.504621684551239, + "step": 4036 + }, + { + "epoch": 0.9308277611252017, + "grad_norm": 1.202880043912139, + "learning_rate": 1.1962120004083342e-06, + "loss": 0.45170748233795166, + "step": 4037 + }, + { + "epoch": 0.9310583352547844, + "grad_norm": 1.3604906368473153, + "learning_rate": 1.1958381360598737e-06, + "loss": 0.3969152569770813, + "step": 4038 + }, + { + "epoch": 0.931288909384367, + "grad_norm": 1.2718279913855612, + "learning_rate": 1.1954642432443553e-06, + "loss": 0.4286048412322998, + "step": 4039 + }, + { + "epoch": 0.9315194835139498, + "grad_norm": 1.4261317138789782, + "learning_rate": 1.1950903220161284e-06, + "loss": 0.3755400776863098, + "step": 4040 + }, + { + "epoch": 0.9317500576435324, + "grad_norm": 1.7559058405972485, + "learning_rate": 1.1947163724295457e-06, + "loss": 0.553135871887207, + "step": 4041 + }, + { + "epoch": 0.9319806317731151, + "grad_norm": 1.3529681190465184, + "learning_rate": 1.194342394538965e-06, + "loss": 0.53995281457901, + "step": 4042 + }, + { + "epoch": 0.9322112059026977, + "grad_norm": 1.3239114086556873, + "learning_rate": 1.1939683883987476e-06, + "loss": 0.4405739903450012, + "step": 4043 + }, + { + "epoch": 0.9324417800322804, + "grad_norm": 1.4320084668753248, + "learning_rate": 1.1935943540632591e-06, + "loss": 0.5046489238739014, + "step": 4044 + }, + { + "epoch": 0.932672354161863, + "grad_norm": 1.63220562819442, + "learning_rate": 1.1932202915868694e-06, + "loss": 0.4699453115463257, + "step": 4045 + }, + { + "epoch": 0.9329029282914457, + "grad_norm": 1.791152379500816, + "learning_rate": 1.192846201023952e-06, + "loss": 0.5643539428710938, + "step": 4046 + }, + { + "epoch": 0.9331335024210283, + "grad_norm": 1.3213038373558907, + "learning_rate": 1.192472082428885e-06, + "loss": 0.4423527121543884, + "step": 4047 + }, + { + "epoch": 0.933364076550611, + "grad_norm": 1.488626793530787, + "learning_rate": 1.1920979358560498e-06, + "loss": 0.4446362257003784, + "step": 4048 + }, + { + "epoch": 0.9335946506801936, + "grad_norm": 1.6284188135746005, + "learning_rate": 1.1917237613598332e-06, + "loss": 0.48347601294517517, + "step": 4049 + }, + { + "epoch": 0.9338252248097764, + "grad_norm": 1.339621886087554, + "learning_rate": 1.1913495589946243e-06, + "loss": 0.4736206531524658, + "step": 4050 + }, + { + "epoch": 0.934055798939359, + "grad_norm": 1.5821523477294297, + "learning_rate": 1.1909753288148181e-06, + "loss": 0.4896177053451538, + "step": 4051 + }, + { + "epoch": 0.9342863730689417, + "grad_norm": 1.3503870180183308, + "learning_rate": 1.1906010708748124e-06, + "loss": 0.3953405022621155, + "step": 4052 + }, + { + "epoch": 0.9345169471985243, + "grad_norm": 1.75805064255455, + "learning_rate": 1.1902267852290092e-06, + "loss": 0.30871689319610596, + "step": 4053 + }, + { + "epoch": 0.934747521328107, + "grad_norm": 1.4966149449301516, + "learning_rate": 1.1898524719318151e-06, + "loss": 0.44187474250793457, + "step": 4054 + }, + { + "epoch": 0.9349780954576896, + "grad_norm": 1.3440011557143472, + "learning_rate": 1.1894781310376396e-06, + "loss": 0.4069768488407135, + "step": 4055 + }, + { + "epoch": 0.9352086695872723, + "grad_norm": 1.2938244564986259, + "learning_rate": 1.1891037626008982e-06, + "loss": 0.36307692527770996, + "step": 4056 + }, + { + "epoch": 0.9354392437168549, + "grad_norm": 1.2107088826138788, + "learning_rate": 1.188729366676008e-06, + "loss": 0.38535594940185547, + "step": 4057 + }, + { + "epoch": 0.9356698178464377, + "grad_norm": 1.416105966319888, + "learning_rate": 1.1883549433173916e-06, + "loss": 0.46454256772994995, + "step": 4058 + }, + { + "epoch": 0.9359003919760203, + "grad_norm": 1.5618282514551205, + "learning_rate": 1.1879804925794752e-06, + "loss": 0.48537465929985046, + "step": 4059 + }, + { + "epoch": 0.936130966105603, + "grad_norm": 1.4027831120439134, + "learning_rate": 1.1876060145166893e-06, + "loss": 0.4355062246322632, + "step": 4060 + }, + { + "epoch": 0.9363615402351856, + "grad_norm": 1.4619447190479122, + "learning_rate": 1.1872315091834676e-06, + "loss": 0.47248804569244385, + "step": 4061 + }, + { + "epoch": 0.9365921143647683, + "grad_norm": 1.4336627602293526, + "learning_rate": 1.1868569766342488e-06, + "loss": 0.4896939992904663, + "step": 4062 + }, + { + "epoch": 0.9368226884943509, + "grad_norm": 1.7008224797561309, + "learning_rate": 1.1864824169234744e-06, + "loss": 0.4259600043296814, + "step": 4063 + }, + { + "epoch": 0.9370532626239336, + "grad_norm": 1.4119659383453314, + "learning_rate": 1.186107830105591e-06, + "loss": 0.4228817820549011, + "step": 4064 + }, + { + "epoch": 0.9372838367535162, + "grad_norm": 1.4911543620584802, + "learning_rate": 1.1857332162350484e-06, + "loss": 0.44750750064849854, + "step": 4065 + }, + { + "epoch": 0.937514410883099, + "grad_norm": 1.4424129451647476, + "learning_rate": 1.1853585753663003e-06, + "loss": 0.49125558137893677, + "step": 4066 + }, + { + "epoch": 0.9377449850126816, + "grad_norm": 1.2540485430842725, + "learning_rate": 1.1849839075538048e-06, + "loss": 0.446805477142334, + "step": 4067 + }, + { + "epoch": 0.9379755591422643, + "grad_norm": 1.6527694351266196, + "learning_rate": 1.1846092128520235e-06, + "loss": 0.4516616463661194, + "step": 4068 + }, + { + "epoch": 0.9382061332718469, + "grad_norm": 1.2461495462560317, + "learning_rate": 1.1842344913154223e-06, + "loss": 0.5271207690238953, + "step": 4069 + }, + { + "epoch": 0.9384367074014296, + "grad_norm": 1.3340471888093621, + "learning_rate": 1.1838597429984702e-06, + "loss": 0.46718811988830566, + "step": 4070 + }, + { + "epoch": 0.9386672815310122, + "grad_norm": 1.6970586095771742, + "learning_rate": 1.1834849679556416e-06, + "loss": 0.4948880672454834, + "step": 4071 + }, + { + "epoch": 0.9388978556605949, + "grad_norm": 1.570925891079885, + "learning_rate": 1.183110166241413e-06, + "loss": 0.5141744613647461, + "step": 4072 + }, + { + "epoch": 0.9391284297901775, + "grad_norm": 1.683475962747206, + "learning_rate": 1.1827353379102662e-06, + "loss": 0.43921130895614624, + "step": 4073 + }, + { + "epoch": 0.9393590039197602, + "grad_norm": 1.458461387708897, + "learning_rate": 1.182360483016686e-06, + "loss": 0.35931193828582764, + "step": 4074 + }, + { + "epoch": 0.9395895780493428, + "grad_norm": 1.4562814179425503, + "learning_rate": 1.1819856016151615e-06, + "loss": 0.4376310408115387, + "step": 4075 + }, + { + "epoch": 0.9398201521789256, + "grad_norm": 1.1615675527476144, + "learning_rate": 1.1816106937601856e-06, + "loss": 0.45419907569885254, + "step": 4076 + }, + { + "epoch": 0.9400507263085082, + "grad_norm": 1.447994335613413, + "learning_rate": 1.1812357595062545e-06, + "loss": 0.4077754616737366, + "step": 4077 + }, + { + "epoch": 0.9402813004380909, + "grad_norm": 1.4463033622550583, + "learning_rate": 1.1808607989078686e-06, + "loss": 0.5555585622787476, + "step": 4078 + }, + { + "epoch": 0.9405118745676735, + "grad_norm": 1.4616481074430372, + "learning_rate": 1.1804858120195334e-06, + "loss": 0.4566183090209961, + "step": 4079 + }, + { + "epoch": 0.9407424486972562, + "grad_norm": 1.3314435652232666, + "learning_rate": 1.180110798895756e-06, + "loss": 0.39149847626686096, + "step": 4080 + }, + { + "epoch": 0.9409730228268388, + "grad_norm": 1.3122400287018474, + "learning_rate": 1.1797357595910485e-06, + "loss": 0.42695966362953186, + "step": 4081 + }, + { + "epoch": 0.9412035969564215, + "grad_norm": 1.4264504061469645, + "learning_rate": 1.1793606941599266e-06, + "loss": 0.49673956632614136, + "step": 4082 + }, + { + "epoch": 0.9414341710860041, + "grad_norm": 1.3703442162376693, + "learning_rate": 1.17898560265691e-06, + "loss": 0.44765836000442505, + "step": 4083 + }, + { + "epoch": 0.9416647452155869, + "grad_norm": 1.2694691955405566, + "learning_rate": 1.1786104851365227e-06, + "loss": 0.40580642223358154, + "step": 4084 + }, + { + "epoch": 0.9418953193451695, + "grad_norm": 1.6554640938571203, + "learning_rate": 1.1782353416532907e-06, + "loss": 0.5389235019683838, + "step": 4085 + }, + { + "epoch": 0.9421258934747522, + "grad_norm": 1.4858385739097846, + "learning_rate": 1.1778601722617456e-06, + "loss": 0.5130764245986938, + "step": 4086 + }, + { + "epoch": 0.9423564676043348, + "grad_norm": 1.4406092108567712, + "learning_rate": 1.1774849770164218e-06, + "loss": 0.5031291842460632, + "step": 4087 + }, + { + "epoch": 0.9425870417339175, + "grad_norm": 1.474863885181778, + "learning_rate": 1.1771097559718581e-06, + "loss": 0.463434636592865, + "step": 4088 + }, + { + "epoch": 0.9428176158635001, + "grad_norm": 1.3059771334220434, + "learning_rate": 1.1767345091825962e-06, + "loss": 0.4249681234359741, + "step": 4089 + }, + { + "epoch": 0.9430481899930828, + "grad_norm": 1.322875104249168, + "learning_rate": 1.176359236703182e-06, + "loss": 0.39353805780410767, + "step": 4090 + }, + { + "epoch": 0.9432787641226654, + "grad_norm": 1.1645299347166784, + "learning_rate": 1.1759839385881657e-06, + "loss": 0.4554273188114166, + "step": 4091 + }, + { + "epoch": 0.9435093382522481, + "grad_norm": 1.5935626726835685, + "learning_rate": 1.1756086148921005e-06, + "loss": 0.6275606155395508, + "step": 4092 + }, + { + "epoch": 0.9437399123818307, + "grad_norm": 1.40548177481024, + "learning_rate": 1.1752332656695432e-06, + "loss": 0.5058892965316772, + "step": 4093 + }, + { + "epoch": 0.9439704865114135, + "grad_norm": 1.4618963991295721, + "learning_rate": 1.1748578909750547e-06, + "loss": 0.4318118095397949, + "step": 4094 + }, + { + "epoch": 0.9442010606409961, + "grad_norm": 1.5133013388223657, + "learning_rate": 1.1744824908631996e-06, + "loss": 0.4873964190483093, + "step": 4095 + }, + { + "epoch": 0.9444316347705788, + "grad_norm": 1.7199346017960337, + "learning_rate": 1.1741070653885467e-06, + "loss": 0.5026696920394897, + "step": 4096 + }, + { + "epoch": 0.9446622089001614, + "grad_norm": 1.1838920009196625, + "learning_rate": 1.1737316146056667e-06, + "loss": 0.4337490200996399, + "step": 4097 + }, + { + "epoch": 0.9448927830297441, + "grad_norm": 1.4841621540296046, + "learning_rate": 1.173356138569136e-06, + "loss": 0.4552634358406067, + "step": 4098 + }, + { + "epoch": 0.9451233571593267, + "grad_norm": 1.50340660176824, + "learning_rate": 1.1729806373335336e-06, + "loss": 0.4631303548812866, + "step": 4099 + }, + { + "epoch": 0.9453539312889094, + "grad_norm": 1.2840677998534646, + "learning_rate": 1.1726051109534424e-06, + "loss": 0.5004513263702393, + "step": 4100 + }, + { + "epoch": 0.945584505418492, + "grad_norm": 1.4218926297879624, + "learning_rate": 1.172229559483449e-06, + "loss": 0.4634668827056885, + "step": 4101 + }, + { + "epoch": 0.9458150795480748, + "grad_norm": 1.3580815662313042, + "learning_rate": 1.171853982978144e-06, + "loss": 0.4034295678138733, + "step": 4102 + }, + { + "epoch": 0.9460456536776574, + "grad_norm": 1.4066326558267837, + "learning_rate": 1.1714783814921206e-06, + "loss": 0.4981224536895752, + "step": 4103 + }, + { + "epoch": 0.9462762278072401, + "grad_norm": 1.637441573047362, + "learning_rate": 1.1711027550799767e-06, + "loss": 0.460249125957489, + "step": 4104 + }, + { + "epoch": 0.9465068019368227, + "grad_norm": 1.7282687422797383, + "learning_rate": 1.170727103796313e-06, + "loss": 0.4794936180114746, + "step": 4105 + }, + { + "epoch": 0.9467373760664054, + "grad_norm": 1.679442128589896, + "learning_rate": 1.170351427695735e-06, + "loss": 0.42724454402923584, + "step": 4106 + }, + { + "epoch": 0.946967950195988, + "grad_norm": 1.5092304593591768, + "learning_rate": 1.16997572683285e-06, + "loss": 0.4612593948841095, + "step": 4107 + }, + { + "epoch": 0.9471985243255707, + "grad_norm": 1.4462371891962704, + "learning_rate": 1.169600001262271e-06, + "loss": 0.49512046575546265, + "step": 4108 + }, + { + "epoch": 0.9474290984551533, + "grad_norm": 1.382963972341291, + "learning_rate": 1.1692242510386124e-06, + "loss": 0.49438196420669556, + "step": 4109 + }, + { + "epoch": 0.947659672584736, + "grad_norm": 1.246967438511099, + "learning_rate": 1.1688484762164938e-06, + "loss": 0.4833865165710449, + "step": 4110 + }, + { + "epoch": 0.9478902467143187, + "grad_norm": 1.6394354229670154, + "learning_rate": 1.1684726768505385e-06, + "loss": 0.49647942185401917, + "step": 4111 + }, + { + "epoch": 0.9481208208439014, + "grad_norm": 1.3141370309593936, + "learning_rate": 1.1680968529953718e-06, + "loss": 0.4299147129058838, + "step": 4112 + }, + { + "epoch": 0.948351394973484, + "grad_norm": 1.2751791494481195, + "learning_rate": 1.167721004705624e-06, + "loss": 0.42613041400909424, + "step": 4113 + }, + { + "epoch": 0.9485819691030667, + "grad_norm": 1.5850112492057793, + "learning_rate": 1.1673451320359284e-06, + "loss": 0.3989883065223694, + "step": 4114 + }, + { + "epoch": 0.9488125432326493, + "grad_norm": 1.6195345588406382, + "learning_rate": 1.1669692350409222e-06, + "loss": 0.41362684965133667, + "step": 4115 + }, + { + "epoch": 0.9490431173622319, + "grad_norm": 1.3043186455514282, + "learning_rate": 1.1665933137752452e-06, + "loss": 0.3807048201560974, + "step": 4116 + }, + { + "epoch": 0.9492736914918146, + "grad_norm": 1.452270133487064, + "learning_rate": 1.1662173682935414e-06, + "loss": 0.3440876007080078, + "step": 4117 + }, + { + "epoch": 0.9495042656213972, + "grad_norm": 1.5051121617765968, + "learning_rate": 1.165841398650459e-06, + "loss": 0.43534499406814575, + "step": 4118 + }, + { + "epoch": 0.9497348397509799, + "grad_norm": 1.2124174426672352, + "learning_rate": 1.1654654049006484e-06, + "loss": 0.4900544285774231, + "step": 4119 + }, + { + "epoch": 0.9499654138805625, + "grad_norm": 1.4219346573372744, + "learning_rate": 1.1650893870987643e-06, + "loss": 0.5189288854598999, + "step": 4120 + }, + { + "epoch": 0.9501959880101453, + "grad_norm": 1.5561303354373495, + "learning_rate": 1.1647133452994643e-06, + "loss": 0.587873101234436, + "step": 4121 + }, + { + "epoch": 0.9504265621397279, + "grad_norm": 1.2947612520331362, + "learning_rate": 1.1643372795574106e-06, + "loss": 0.4367108941078186, + "step": 4122 + }, + { + "epoch": 0.9506571362693106, + "grad_norm": 1.3855876287330298, + "learning_rate": 1.1639611899272679e-06, + "loss": 0.4121246635913849, + "step": 4123 + }, + { + "epoch": 0.9508877103988932, + "grad_norm": 1.371083137252789, + "learning_rate": 1.1635850764637042e-06, + "loss": 0.4993973672389984, + "step": 4124 + }, + { + "epoch": 0.9511182845284759, + "grad_norm": 1.3729377845652901, + "learning_rate": 1.163208939221392e-06, + "loss": 0.39145413041114807, + "step": 4125 + }, + { + "epoch": 0.9513488586580585, + "grad_norm": 1.5515816392895183, + "learning_rate": 1.1628327782550065e-06, + "loss": 0.45954760909080505, + "step": 4126 + }, + { + "epoch": 0.9515794327876412, + "grad_norm": 1.5137997254417062, + "learning_rate": 1.1624565936192263e-06, + "loss": 0.5159680843353271, + "step": 4127 + }, + { + "epoch": 0.9518100069172238, + "grad_norm": 1.5429829982679306, + "learning_rate": 1.1620803853687337e-06, + "loss": 0.4441346228122711, + "step": 4128 + }, + { + "epoch": 0.9520405810468066, + "grad_norm": 1.1994992888255296, + "learning_rate": 1.1617041535582144e-06, + "loss": 0.3842248320579529, + "step": 4129 + }, + { + "epoch": 0.9522711551763892, + "grad_norm": 1.5742838715827387, + "learning_rate": 1.1613278982423577e-06, + "loss": 0.5332437753677368, + "step": 4130 + }, + { + "epoch": 0.9525017293059719, + "grad_norm": 1.416443461852387, + "learning_rate": 1.160951619475856e-06, + "loss": 0.4265931248664856, + "step": 4131 + }, + { + "epoch": 0.9527323034355545, + "grad_norm": 1.344407559333665, + "learning_rate": 1.1605753173134052e-06, + "loss": 0.47442418336868286, + "step": 4132 + }, + { + "epoch": 0.9529628775651372, + "grad_norm": 1.4385000789860496, + "learning_rate": 1.1601989918097044e-06, + "loss": 0.6128898859024048, + "step": 4133 + }, + { + "epoch": 0.9531934516947198, + "grad_norm": 1.3167710707989233, + "learning_rate": 1.159822643019457e-06, + "loss": 0.5347775220870972, + "step": 4134 + }, + { + "epoch": 0.9534240258243025, + "grad_norm": 1.1478699481046142, + "learning_rate": 1.1594462709973682e-06, + "loss": 0.39984625577926636, + "step": 4135 + }, + { + "epoch": 0.9536545999538851, + "grad_norm": 1.411910940206958, + "learning_rate": 1.1590698757981483e-06, + "loss": 0.5146951675415039, + "step": 4136 + }, + { + "epoch": 0.9538851740834678, + "grad_norm": 1.4057451726772026, + "learning_rate": 1.1586934574765097e-06, + "loss": 0.3589641749858856, + "step": 4137 + }, + { + "epoch": 0.9541157482130505, + "grad_norm": 1.4047870659239305, + "learning_rate": 1.1583170160871689e-06, + "loss": 0.428930401802063, + "step": 4138 + }, + { + "epoch": 0.9543463223426332, + "grad_norm": 1.3760779428564116, + "learning_rate": 1.1579405516848452e-06, + "loss": 0.46921080350875854, + "step": 4139 + }, + { + "epoch": 0.9545768964722158, + "grad_norm": 1.462957669946579, + "learning_rate": 1.1575640643242616e-06, + "loss": 0.39079514145851135, + "step": 4140 + }, + { + "epoch": 0.9548074706017985, + "grad_norm": 1.5322762323160557, + "learning_rate": 1.1571875540601443e-06, + "loss": 0.4475102424621582, + "step": 4141 + }, + { + "epoch": 0.9550380447313811, + "grad_norm": 1.3964952325110702, + "learning_rate": 1.1568110209472232e-06, + "loss": 0.43881016969680786, + "step": 4142 + }, + { + "epoch": 0.9552686188609638, + "grad_norm": 1.2846843095885363, + "learning_rate": 1.156434465040231e-06, + "loss": 0.4382214844226837, + "step": 4143 + }, + { + "epoch": 0.9554991929905464, + "grad_norm": 1.6590322564778253, + "learning_rate": 1.1560578863939037e-06, + "loss": 0.5390958786010742, + "step": 4144 + }, + { + "epoch": 0.9557297671201291, + "grad_norm": 1.2966408722030756, + "learning_rate": 1.155681285062981e-06, + "loss": 0.4276137948036194, + "step": 4145 + }, + { + "epoch": 0.9559603412497117, + "grad_norm": 1.3756682316204962, + "learning_rate": 1.1553046611022058e-06, + "loss": 0.4541968107223511, + "step": 4146 + }, + { + "epoch": 0.9561909153792945, + "grad_norm": 1.4806679512404375, + "learning_rate": 1.1549280145663242e-06, + "loss": 0.43287473917007446, + "step": 4147 + }, + { + "epoch": 0.9564214895088771, + "grad_norm": 1.5507500145218385, + "learning_rate": 1.1545513455100855e-06, + "loss": 0.432822585105896, + "step": 4148 + }, + { + "epoch": 0.9566520636384598, + "grad_norm": 1.4662390355071035, + "learning_rate": 1.1541746539882424e-06, + "loss": 0.519271969795227, + "step": 4149 + }, + { + "epoch": 0.9568826377680424, + "grad_norm": 1.4521470663351335, + "learning_rate": 1.1537979400555506e-06, + "loss": 0.4158627390861511, + "step": 4150 + }, + { + "epoch": 0.9571132118976251, + "grad_norm": 1.4834584070713739, + "learning_rate": 1.1534212037667698e-06, + "loss": 0.42122989892959595, + "step": 4151 + }, + { + "epoch": 0.9573437860272077, + "grad_norm": 1.696588703842723, + "learning_rate": 1.1530444451766623e-06, + "loss": 0.4141794443130493, + "step": 4152 + }, + { + "epoch": 0.9575743601567904, + "grad_norm": 1.3149219500885996, + "learning_rate": 1.1526676643399933e-06, + "loss": 0.4935780167579651, + "step": 4153 + }, + { + "epoch": 0.957804934286373, + "grad_norm": 1.3661965645097156, + "learning_rate": 1.152290861311532e-06, + "loss": 0.5075733661651611, + "step": 4154 + }, + { + "epoch": 0.9580355084159557, + "grad_norm": 1.37824406851626, + "learning_rate": 1.151914036146051e-06, + "loss": 0.4852841794490814, + "step": 4155 + }, + { + "epoch": 0.9582660825455384, + "grad_norm": 1.2576277022731817, + "learning_rate": 1.151537188898325e-06, + "loss": 0.46114620566368103, + "step": 4156 + }, + { + "epoch": 0.9584966566751211, + "grad_norm": 1.6662322349225411, + "learning_rate": 1.1511603196231327e-06, + "loss": 0.519254207611084, + "step": 4157 + }, + { + "epoch": 0.9587272308047037, + "grad_norm": 1.3283960828325414, + "learning_rate": 1.1507834283752562e-06, + "loss": 0.43635690212249756, + "step": 4158 + }, + { + "epoch": 0.9589578049342864, + "grad_norm": 1.3730336798021219, + "learning_rate": 1.1504065152094802e-06, + "loss": 0.48448023200035095, + "step": 4159 + }, + { + "epoch": 0.959188379063869, + "grad_norm": 1.320755520801986, + "learning_rate": 1.1500295801805927e-06, + "loss": 0.4461054801940918, + "step": 4160 + }, + { + "epoch": 0.9594189531934517, + "grad_norm": 1.3183810948385437, + "learning_rate": 1.1496526233433852e-06, + "loss": 0.44869595766067505, + "step": 4161 + }, + { + "epoch": 0.9596495273230343, + "grad_norm": 1.5137169599039804, + "learning_rate": 1.1492756447526524e-06, + "loss": 0.4592103660106659, + "step": 4162 + }, + { + "epoch": 0.959880101452617, + "grad_norm": 1.3625000210250673, + "learning_rate": 1.1488986444631918e-06, + "loss": 0.48352301120758057, + "step": 4163 + }, + { + "epoch": 0.9601106755821996, + "grad_norm": 1.2039059688900335, + "learning_rate": 1.1485216225298043e-06, + "loss": 0.44718503952026367, + "step": 4164 + }, + { + "epoch": 0.9603412497117824, + "grad_norm": 1.7796976813489804, + "learning_rate": 1.1481445790072933e-06, + "loss": 0.44659486413002014, + "step": 4165 + }, + { + "epoch": 0.960571823841365, + "grad_norm": 1.464260426957605, + "learning_rate": 1.1477675139504665e-06, + "loss": 0.5143063068389893, + "step": 4166 + }, + { + "epoch": 0.9608023979709477, + "grad_norm": 1.825014649582591, + "learning_rate": 1.1473904274141344e-06, + "loss": 0.6708887815475464, + "step": 4167 + }, + { + "epoch": 0.9610329721005303, + "grad_norm": 1.4397638416262573, + "learning_rate": 1.1470133194531094e-06, + "loss": 0.3889666199684143, + "step": 4168 + }, + { + "epoch": 0.961263546230113, + "grad_norm": 1.2805774485856607, + "learning_rate": 1.1466361901222086e-06, + "loss": 0.4610622227191925, + "step": 4169 + }, + { + "epoch": 0.9614941203596956, + "grad_norm": 1.4320030308850267, + "learning_rate": 1.1462590394762514e-06, + "loss": 0.46372538805007935, + "step": 4170 + }, + { + "epoch": 0.9617246944892783, + "grad_norm": 1.5638922992309852, + "learning_rate": 1.1458818675700607e-06, + "loss": 0.5197097063064575, + "step": 4171 + }, + { + "epoch": 0.9619552686188609, + "grad_norm": 1.2417860513603916, + "learning_rate": 1.145504674458462e-06, + "loss": 0.3849745988845825, + "step": 4172 + }, + { + "epoch": 0.9621858427484437, + "grad_norm": 1.5196854039542969, + "learning_rate": 1.1451274601962841e-06, + "loss": 0.4572817385196686, + "step": 4173 + }, + { + "epoch": 0.9624164168780263, + "grad_norm": 1.4154832612934123, + "learning_rate": 1.1447502248383594e-06, + "loss": 0.4383746385574341, + "step": 4174 + }, + { + "epoch": 0.962646991007609, + "grad_norm": 1.473681287130909, + "learning_rate": 1.1443729684395222e-06, + "loss": 0.5319672226905823, + "step": 4175 + }, + { + "epoch": 0.9628775651371916, + "grad_norm": 1.2307542062760268, + "learning_rate": 1.143995691054611e-06, + "loss": 0.4351249933242798, + "step": 4176 + }, + { + "epoch": 0.9631081392667743, + "grad_norm": 1.42416527435209, + "learning_rate": 1.1436183927384668e-06, + "loss": 0.5453774929046631, + "step": 4177 + }, + { + "epoch": 0.9633387133963569, + "grad_norm": 1.569291329857932, + "learning_rate": 1.1432410735459336e-06, + "loss": 0.5605905055999756, + "step": 4178 + }, + { + "epoch": 0.9635692875259396, + "grad_norm": 1.3825364023898294, + "learning_rate": 1.1428637335318587e-06, + "loss": 0.4556693434715271, + "step": 4179 + }, + { + "epoch": 0.9637998616555222, + "grad_norm": 1.316766347101971, + "learning_rate": 1.142486372751092e-06, + "loss": 0.45428892970085144, + "step": 4180 + }, + { + "epoch": 0.9640304357851049, + "grad_norm": 1.4252168865652697, + "learning_rate": 1.142108991258487e-06, + "loss": 0.4897412657737732, + "step": 4181 + }, + { + "epoch": 0.9642610099146876, + "grad_norm": 1.984637391356181, + "learning_rate": 1.1417315891089004e-06, + "loss": 0.5478836894035339, + "step": 4182 + }, + { + "epoch": 0.9644915840442703, + "grad_norm": 1.4620834191298895, + "learning_rate": 1.1413541663571904e-06, + "loss": 0.42394131422042847, + "step": 4183 + }, + { + "epoch": 0.9647221581738529, + "grad_norm": 1.585175673978148, + "learning_rate": 1.1409767230582199e-06, + "loss": 0.5047104954719543, + "step": 4184 + }, + { + "epoch": 0.9649527323034356, + "grad_norm": 1.4749915601759833, + "learning_rate": 1.1405992592668538e-06, + "loss": 0.43985825777053833, + "step": 4185 + }, + { + "epoch": 0.9651833064330182, + "grad_norm": 1.3061643078097422, + "learning_rate": 1.1402217750379608e-06, + "loss": 0.4338407516479492, + "step": 4186 + }, + { + "epoch": 0.9654138805626009, + "grad_norm": 1.5404850502320075, + "learning_rate": 1.1398442704264118e-06, + "loss": 0.4532614052295685, + "step": 4187 + }, + { + "epoch": 0.9656444546921835, + "grad_norm": 1.2345047018331374, + "learning_rate": 1.1394667454870802e-06, + "loss": 0.4546123445034027, + "step": 4188 + }, + { + "epoch": 0.9658750288217662, + "grad_norm": 1.5321856096614175, + "learning_rate": 1.139089200274844e-06, + "loss": 0.44743451476097107, + "step": 4189 + }, + { + "epoch": 0.9661056029513488, + "grad_norm": 1.3411063865526411, + "learning_rate": 1.138711634844583e-06, + "loss": 0.4566968083381653, + "step": 4190 + }, + { + "epoch": 0.9663361770809316, + "grad_norm": 1.481468600614622, + "learning_rate": 1.13833404925118e-06, + "loss": 0.46385467052459717, + "step": 4191 + }, + { + "epoch": 0.9665667512105142, + "grad_norm": 1.2411450691863102, + "learning_rate": 1.137956443549521e-06, + "loss": 0.4614461660385132, + "step": 4192 + }, + { + "epoch": 0.9667973253400969, + "grad_norm": 1.3326432316915904, + "learning_rate": 1.1375788177944945e-06, + "loss": 0.4351955056190491, + "step": 4193 + }, + { + "epoch": 0.9670278994696795, + "grad_norm": 1.368161025215393, + "learning_rate": 1.1372011720409927e-06, + "loss": 0.4172135591506958, + "step": 4194 + }, + { + "epoch": 0.9672584735992622, + "grad_norm": 1.6941620223477674, + "learning_rate": 1.1368235063439102e-06, + "loss": 0.5482916831970215, + "step": 4195 + }, + { + "epoch": 0.9674890477288448, + "grad_norm": 1.3508434751874687, + "learning_rate": 1.136445820758144e-06, + "loss": 0.4336891770362854, + "step": 4196 + }, + { + "epoch": 0.9677196218584275, + "grad_norm": 1.5072664158429512, + "learning_rate": 1.1360681153385956e-06, + "loss": 0.42612385749816895, + "step": 4197 + }, + { + "epoch": 0.9679501959880101, + "grad_norm": 1.5000454097568379, + "learning_rate": 1.135690390140167e-06, + "loss": 0.513736367225647, + "step": 4198 + }, + { + "epoch": 0.9681807701175928, + "grad_norm": 1.8279069537189752, + "learning_rate": 1.1353126452177656e-06, + "loss": 0.45551058650016785, + "step": 4199 + }, + { + "epoch": 0.9684113442471755, + "grad_norm": 1.3479770342549766, + "learning_rate": 1.1349348806262994e-06, + "loss": 0.45450061559677124, + "step": 4200 + }, + { + "epoch": 0.9686419183767582, + "grad_norm": 1.5942392384347237, + "learning_rate": 1.1345570964206807e-06, + "loss": 0.43962353467941284, + "step": 4201 + }, + { + "epoch": 0.9688724925063408, + "grad_norm": 1.4695533515040724, + "learning_rate": 1.1341792926558245e-06, + "loss": 0.5304821729660034, + "step": 4202 + }, + { + "epoch": 0.9691030666359235, + "grad_norm": 1.57215629996827, + "learning_rate": 1.1338014693866483e-06, + "loss": 0.6079045534133911, + "step": 4203 + }, + { + "epoch": 0.9693336407655061, + "grad_norm": 1.3451772860900804, + "learning_rate": 1.1334236266680724e-06, + "loss": 0.39895182847976685, + "step": 4204 + }, + { + "epoch": 0.9695642148950888, + "grad_norm": 1.4224201035305835, + "learning_rate": 1.1330457645550202e-06, + "loss": 0.5264945030212402, + "step": 4205 + }, + { + "epoch": 0.9697947890246714, + "grad_norm": 1.3209691457440123, + "learning_rate": 1.1326678831024178e-06, + "loss": 0.4794533848762512, + "step": 4206 + }, + { + "epoch": 0.9700253631542541, + "grad_norm": 1.472204632290126, + "learning_rate": 1.1322899823651938e-06, + "loss": 0.42917680740356445, + "step": 4207 + }, + { + "epoch": 0.9702559372838367, + "grad_norm": 1.4163025348687577, + "learning_rate": 1.1319120623982804e-06, + "loss": 0.42155951261520386, + "step": 4208 + }, + { + "epoch": 0.9704865114134195, + "grad_norm": 1.455345134423215, + "learning_rate": 1.1315341232566121e-06, + "loss": 0.5119719505310059, + "step": 4209 + }, + { + "epoch": 0.9707170855430021, + "grad_norm": 1.4441630965274395, + "learning_rate": 1.1311561649951255e-06, + "loss": 0.5261529684066772, + "step": 4210 + }, + { + "epoch": 0.9709476596725848, + "grad_norm": 1.3046857195112773, + "learning_rate": 1.1307781876687609e-06, + "loss": 0.5133010149002075, + "step": 4211 + }, + { + "epoch": 0.9711782338021674, + "grad_norm": 1.4061037707348525, + "learning_rate": 1.1304001913324617e-06, + "loss": 0.5214196443557739, + "step": 4212 + }, + { + "epoch": 0.9714088079317501, + "grad_norm": 1.4191122003483587, + "learning_rate": 1.1300221760411732e-06, + "loss": 0.4665095806121826, + "step": 4213 + }, + { + "epoch": 0.9716393820613327, + "grad_norm": 1.2917310787961995, + "learning_rate": 1.1296441418498435e-06, + "loss": 0.44912537932395935, + "step": 4214 + }, + { + "epoch": 0.9718699561909154, + "grad_norm": 1.384060094796334, + "learning_rate": 1.1292660888134241e-06, + "loss": 0.48622840642929077, + "step": 4215 + }, + { + "epoch": 0.972100530320498, + "grad_norm": 1.3952506250953003, + "learning_rate": 1.128888016986868e-06, + "loss": 0.40099745988845825, + "step": 4216 + }, + { + "epoch": 0.9723311044500808, + "grad_norm": 1.6661609433762745, + "learning_rate": 1.1285099264251331e-06, + "loss": 0.4981631934642792, + "step": 4217 + }, + { + "epoch": 0.9725616785796634, + "grad_norm": 1.3061541456837051, + "learning_rate": 1.1281318171831778e-06, + "loss": 0.3902980387210846, + "step": 4218 + }, + { + "epoch": 0.9727922527092461, + "grad_norm": 1.646940009523485, + "learning_rate": 1.1277536893159641e-06, + "loss": 0.5120723843574524, + "step": 4219 + }, + { + "epoch": 0.9730228268388287, + "grad_norm": 1.4050676349560098, + "learning_rate": 1.1273755428784568e-06, + "loss": 0.47908157110214233, + "step": 4220 + }, + { + "epoch": 0.9732534009684114, + "grad_norm": 1.3980215754858654, + "learning_rate": 1.126997377925624e-06, + "loss": 0.44935697317123413, + "step": 4221 + }, + { + "epoch": 0.973483975097994, + "grad_norm": 1.7936737063106103, + "learning_rate": 1.1266191945124345e-06, + "loss": 0.46883124113082886, + "step": 4222 + }, + { + "epoch": 0.9737145492275767, + "grad_norm": 1.3605023071963889, + "learning_rate": 1.1262409926938622e-06, + "loss": 0.41385799646377563, + "step": 4223 + }, + { + "epoch": 0.9739451233571593, + "grad_norm": 1.352097187992639, + "learning_rate": 1.1258627725248821e-06, + "loss": 0.5450118780136108, + "step": 4224 + }, + { + "epoch": 0.974175697486742, + "grad_norm": 1.3149598759310381, + "learning_rate": 1.1254845340604725e-06, + "loss": 0.4728820323944092, + "step": 4225 + }, + { + "epoch": 0.9744062716163246, + "grad_norm": 1.490906480143449, + "learning_rate": 1.1251062773556143e-06, + "loss": 0.5111296772956848, + "step": 4226 + }, + { + "epoch": 0.9746368457459073, + "grad_norm": 1.6529549144482583, + "learning_rate": 1.1247280024652908e-06, + "loss": 0.4538743793964386, + "step": 4227 + }, + { + "epoch": 0.97486741987549, + "grad_norm": 1.4130886870951611, + "learning_rate": 1.1243497094444877e-06, + "loss": 0.4917091131210327, + "step": 4228 + }, + { + "epoch": 0.9750979940050726, + "grad_norm": 1.387244231549714, + "learning_rate": 1.1239713983481945e-06, + "loss": 0.40376198291778564, + "step": 4229 + }, + { + "epoch": 0.9753285681346553, + "grad_norm": 1.4554658551428983, + "learning_rate": 1.1235930692314019e-06, + "loss": 0.5356566905975342, + "step": 4230 + }, + { + "epoch": 0.9755591422642379, + "grad_norm": 1.4359135131794967, + "learning_rate": 1.123214722149104e-06, + "loss": 0.4374624490737915, + "step": 4231 + }, + { + "epoch": 0.9757897163938206, + "grad_norm": 1.4746549529981767, + "learning_rate": 1.1228363571562976e-06, + "loss": 0.4225429594516754, + "step": 4232 + }, + { + "epoch": 0.9760202905234032, + "grad_norm": 1.4500544144002923, + "learning_rate": 1.1224579743079819e-06, + "loss": 0.5389699935913086, + "step": 4233 + }, + { + "epoch": 0.9762508646529859, + "grad_norm": 1.39848035447059, + "learning_rate": 1.1220795736591584e-06, + "loss": 0.4925463795661926, + "step": 4234 + }, + { + "epoch": 0.9764814387825685, + "grad_norm": 1.2916834361485914, + "learning_rate": 1.1217011552648315e-06, + "loss": 0.4694328308105469, + "step": 4235 + }, + { + "epoch": 0.9767120129121513, + "grad_norm": 1.377557176325016, + "learning_rate": 1.1213227191800086e-06, + "loss": 0.39887624979019165, + "step": 4236 + }, + { + "epoch": 0.9769425870417339, + "grad_norm": 1.5555659299458584, + "learning_rate": 1.120944265459699e-06, + "loss": 0.4930388927459717, + "step": 4237 + }, + { + "epoch": 0.9771731611713166, + "grad_norm": 1.2486101676760866, + "learning_rate": 1.1205657941589143e-06, + "loss": 0.4595404863357544, + "step": 4238 + }, + { + "epoch": 0.9774037353008992, + "grad_norm": 1.4574273243269236, + "learning_rate": 1.1201873053326695e-06, + "loss": 0.44177496433258057, + "step": 4239 + }, + { + "epoch": 0.9776343094304819, + "grad_norm": 1.4308970126871865, + "learning_rate": 1.119808799035982e-06, + "loss": 0.47095373272895813, + "step": 4240 + }, + { + "epoch": 0.9778648835600645, + "grad_norm": 1.4049777741841016, + "learning_rate": 1.1194302753238716e-06, + "loss": 0.4649583697319031, + "step": 4241 + }, + { + "epoch": 0.9780954576896472, + "grad_norm": 1.5269711326381101, + "learning_rate": 1.1190517342513598e-06, + "loss": 0.44815266132354736, + "step": 4242 + }, + { + "epoch": 0.9783260318192298, + "grad_norm": 1.462868793648971, + "learning_rate": 1.118673175873472e-06, + "loss": 0.4861665368080139, + "step": 4243 + }, + { + "epoch": 0.9785566059488126, + "grad_norm": 1.3395897424173215, + "learning_rate": 1.1182946002452354e-06, + "loss": 0.5196468830108643, + "step": 4244 + }, + { + "epoch": 0.9787871800783952, + "grad_norm": 1.5910002582718288, + "learning_rate": 1.11791600742168e-06, + "loss": 0.49746841192245483, + "step": 4245 + }, + { + "epoch": 0.9790177542079779, + "grad_norm": 1.2919062217717159, + "learning_rate": 1.1175373974578377e-06, + "loss": 0.4637739956378937, + "step": 4246 + }, + { + "epoch": 0.9792483283375605, + "grad_norm": 1.228394275609753, + "learning_rate": 1.1171587704087434e-06, + "loss": 0.46009692549705505, + "step": 4247 + }, + { + "epoch": 0.9794789024671432, + "grad_norm": 2.1569798034684706, + "learning_rate": 1.1167801263294346e-06, + "loss": 0.49036258459091187, + "step": 4248 + }, + { + "epoch": 0.9797094765967258, + "grad_norm": 1.395933426650918, + "learning_rate": 1.1164014652749509e-06, + "loss": 0.4730580449104309, + "step": 4249 + }, + { + "epoch": 0.9799400507263085, + "grad_norm": 1.618438538763921, + "learning_rate": 1.1160227873003345e-06, + "loss": 0.5029968023300171, + "step": 4250 + }, + { + "epoch": 0.9801706248558911, + "grad_norm": 1.4870951402562973, + "learning_rate": 1.1156440924606299e-06, + "loss": 0.5149805545806885, + "step": 4251 + }, + { + "epoch": 0.9804011989854738, + "grad_norm": 1.6248587467562292, + "learning_rate": 1.1152653808108845e-06, + "loss": 0.5017384886741638, + "step": 4252 + }, + { + "epoch": 0.9806317731150564, + "grad_norm": 1.486462967422998, + "learning_rate": 1.114886652406148e-06, + "loss": 0.47569048404693604, + "step": 4253 + }, + { + "epoch": 0.9808623472446392, + "grad_norm": 1.4476623501612873, + "learning_rate": 1.1145079073014722e-06, + "loss": 0.5127655863761902, + "step": 4254 + }, + { + "epoch": 0.9810929213742218, + "grad_norm": 1.4943063660203757, + "learning_rate": 1.1141291455519114e-06, + "loss": 0.4014360308647156, + "step": 4255 + }, + { + "epoch": 0.9813234955038045, + "grad_norm": 1.4814879590427052, + "learning_rate": 1.1137503672125228e-06, + "loss": 0.43737465143203735, + "step": 4256 + }, + { + "epoch": 0.9815540696333871, + "grad_norm": 1.413525212350489, + "learning_rate": 1.1133715723383655e-06, + "loss": 0.4389764070510864, + "step": 4257 + }, + { + "epoch": 0.9817846437629698, + "grad_norm": 1.3532173754404184, + "learning_rate": 1.112992760984501e-06, + "loss": 0.5105381608009338, + "step": 4258 + }, + { + "epoch": 0.9820152178925524, + "grad_norm": 1.4052776017835835, + "learning_rate": 1.1126139332059937e-06, + "loss": 0.4393002688884735, + "step": 4259 + }, + { + "epoch": 0.9822457920221351, + "grad_norm": 1.3179147448132482, + "learning_rate": 1.1122350890579102e-06, + "loss": 0.541419267654419, + "step": 4260 + }, + { + "epoch": 0.9824763661517177, + "grad_norm": 1.5177150542407778, + "learning_rate": 1.1118562285953186e-06, + "loss": 0.4153546094894409, + "step": 4261 + }, + { + "epoch": 0.9827069402813005, + "grad_norm": 1.4649176443917427, + "learning_rate": 1.1114773518732907e-06, + "loss": 0.5060696601867676, + "step": 4262 + }, + { + "epoch": 0.9829375144108831, + "grad_norm": 1.6266321171712574, + "learning_rate": 1.1110984589468998e-06, + "loss": 0.5975456237792969, + "step": 4263 + }, + { + "epoch": 0.9831680885404658, + "grad_norm": 1.4920078622156363, + "learning_rate": 1.110719549871222e-06, + "loss": 0.5729621648788452, + "step": 4264 + }, + { + "epoch": 0.9833986626700484, + "grad_norm": 1.3838030985279757, + "learning_rate": 1.1103406247013356e-06, + "loss": 0.3948165476322174, + "step": 4265 + }, + { + "epoch": 0.9836292367996311, + "grad_norm": 1.3893062538653607, + "learning_rate": 1.1099616834923212e-06, + "loss": 0.41744932532310486, + "step": 4266 + }, + { + "epoch": 0.9838598109292137, + "grad_norm": 1.3638196246051946, + "learning_rate": 1.1095827262992611e-06, + "loss": 0.4701330065727234, + "step": 4267 + }, + { + "epoch": 0.9840903850587964, + "grad_norm": 1.4764746527882953, + "learning_rate": 1.109203753177242e-06, + "loss": 0.4841681718826294, + "step": 4268 + }, + { + "epoch": 0.984320959188379, + "grad_norm": 1.3604414964396274, + "learning_rate": 1.10882476418135e-06, + "loss": 0.4180435538291931, + "step": 4269 + }, + { + "epoch": 0.9845515333179617, + "grad_norm": 1.4211218067668543, + "learning_rate": 1.1084457593666758e-06, + "loss": 0.39362633228302, + "step": 4270 + }, + { + "epoch": 0.9847821074475444, + "grad_norm": 1.4239354595534417, + "learning_rate": 1.1080667387883116e-06, + "loss": 0.5192993879318237, + "step": 4271 + }, + { + "epoch": 0.9850126815771271, + "grad_norm": 1.5201720088447181, + "learning_rate": 1.1076877025013517e-06, + "loss": 0.48835504055023193, + "step": 4272 + }, + { + "epoch": 0.9852432557067097, + "grad_norm": 1.5142338003412266, + "learning_rate": 1.1073086505608925e-06, + "loss": 0.44442474842071533, + "step": 4273 + }, + { + "epoch": 0.9854738298362924, + "grad_norm": 1.3436041344969518, + "learning_rate": 1.1069295830220339e-06, + "loss": 0.4544455409049988, + "step": 4274 + }, + { + "epoch": 0.985704403965875, + "grad_norm": 1.5833831369807498, + "learning_rate": 1.106550499939876e-06, + "loss": 0.482341468334198, + "step": 4275 + }, + { + "epoch": 0.9859349780954577, + "grad_norm": 1.421534858967002, + "learning_rate": 1.1061714013695236e-06, + "loss": 0.5251357555389404, + "step": 4276 + }, + { + "epoch": 0.9861655522250403, + "grad_norm": 1.2537356796939523, + "learning_rate": 1.1057922873660819e-06, + "loss": 0.4538683295249939, + "step": 4277 + }, + { + "epoch": 0.986396126354623, + "grad_norm": 2.0128553783671728, + "learning_rate": 1.105413157984659e-06, + "loss": 0.5112448930740356, + "step": 4278 + }, + { + "epoch": 0.9866267004842056, + "grad_norm": 1.4914994042257563, + "learning_rate": 1.1050340132803654e-06, + "loss": 0.48863890767097473, + "step": 4279 + }, + { + "epoch": 0.9868572746137884, + "grad_norm": 1.494741313695512, + "learning_rate": 1.1046548533083134e-06, + "loss": 0.43637439608573914, + "step": 4280 + }, + { + "epoch": 0.987087848743371, + "grad_norm": 1.5727176113962202, + "learning_rate": 1.104275678123618e-06, + "loss": 0.5231983065605164, + "step": 4281 + }, + { + "epoch": 0.9873184228729537, + "grad_norm": 1.7169447967595874, + "learning_rate": 1.1038964877813955e-06, + "loss": 0.46838122606277466, + "step": 4282 + }, + { + "epoch": 0.9875489970025363, + "grad_norm": 1.3537630033218837, + "learning_rate": 1.1035172823367658e-06, + "loss": 0.4330589473247528, + "step": 4283 + }, + { + "epoch": 0.987779571132119, + "grad_norm": 1.4178119046272273, + "learning_rate": 1.1031380618448501e-06, + "loss": 0.44962531328201294, + "step": 4284 + }, + { + "epoch": 0.9880101452617016, + "grad_norm": 1.3547255909489988, + "learning_rate": 1.1027588263607719e-06, + "loss": 0.44549795985221863, + "step": 4285 + }, + { + "epoch": 0.9882407193912843, + "grad_norm": 1.7082954293487662, + "learning_rate": 1.1023795759396568e-06, + "loss": 0.43510758876800537, + "step": 4286 + }, + { + "epoch": 0.9884712935208669, + "grad_norm": 1.3135837847563279, + "learning_rate": 1.1020003106366324e-06, + "loss": 0.4369906187057495, + "step": 4287 + }, + { + "epoch": 0.9887018676504497, + "grad_norm": 1.416650593568537, + "learning_rate": 1.1016210305068296e-06, + "loss": 0.42049574851989746, + "step": 4288 + }, + { + "epoch": 0.9889324417800323, + "grad_norm": 1.6285692706476314, + "learning_rate": 1.10124173560538e-06, + "loss": 0.449156790971756, + "step": 4289 + }, + { + "epoch": 0.989163015909615, + "grad_norm": 1.5784410678150576, + "learning_rate": 1.1008624259874177e-06, + "loss": 0.4736451506614685, + "step": 4290 + }, + { + "epoch": 0.9893935900391976, + "grad_norm": 1.3029401584123959, + "learning_rate": 1.10048310170808e-06, + "loss": 0.3988722860813141, + "step": 4291 + }, + { + "epoch": 0.9896241641687803, + "grad_norm": 1.4221756045070393, + "learning_rate": 1.100103762822505e-06, + "loss": 0.44330862164497375, + "step": 4292 + }, + { + "epoch": 0.9898547382983629, + "grad_norm": 1.5471015099626197, + "learning_rate": 1.0997244093858336e-06, + "loss": 0.5294286608695984, + "step": 4293 + }, + { + "epoch": 0.9900853124279456, + "grad_norm": 1.3808712553027187, + "learning_rate": 1.0993450414532082e-06, + "loss": 0.463120698928833, + "step": 4294 + }, + { + "epoch": 0.9903158865575282, + "grad_norm": 1.294463919332552, + "learning_rate": 1.0989656590797747e-06, + "loss": 0.4481865167617798, + "step": 4295 + }, + { + "epoch": 0.9905464606871109, + "grad_norm": 1.4153337646078945, + "learning_rate": 1.0985862623206794e-06, + "loss": 0.4467630386352539, + "step": 4296 + }, + { + "epoch": 0.9907770348166935, + "grad_norm": 1.8865527079498654, + "learning_rate": 1.0982068512310717e-06, + "loss": 0.43485027551651, + "step": 4297 + }, + { + "epoch": 0.9910076089462763, + "grad_norm": 1.5277390713389145, + "learning_rate": 1.0978274258661032e-06, + "loss": 0.4556450843811035, + "step": 4298 + }, + { + "epoch": 0.9912381830758589, + "grad_norm": 1.4768070925377026, + "learning_rate": 1.0974479862809268e-06, + "loss": 0.48326122760772705, + "step": 4299 + }, + { + "epoch": 0.9914687572054416, + "grad_norm": 1.1782147993424035, + "learning_rate": 1.097068532530698e-06, + "loss": 0.42254534363746643, + "step": 4300 + }, + { + "epoch": 0.9916993313350242, + "grad_norm": 1.3623288149981243, + "learning_rate": 1.096689064670574e-06, + "loss": 0.4076887369155884, + "step": 4301 + }, + { + "epoch": 0.9919299054646069, + "grad_norm": 1.4246737986617306, + "learning_rate": 1.0963095827557146e-06, + "loss": 0.40615612268447876, + "step": 4302 + }, + { + "epoch": 0.9921604795941895, + "grad_norm": 1.391998245639926, + "learning_rate": 1.095930086841281e-06, + "loss": 0.47794467210769653, + "step": 4303 + }, + { + "epoch": 0.9923910537237722, + "grad_norm": 1.479591301344316, + "learning_rate": 1.0955505769824375e-06, + "loss": 0.4927758574485779, + "step": 4304 + }, + { + "epoch": 0.9926216278533548, + "grad_norm": 1.1962407216416377, + "learning_rate": 1.0951710532343493e-06, + "loss": 0.40777790546417236, + "step": 4305 + }, + { + "epoch": 0.9928522019829376, + "grad_norm": 1.2781565166204398, + "learning_rate": 1.0947915156521837e-06, + "loss": 0.41996532678604126, + "step": 4306 + }, + { + "epoch": 0.9930827761125202, + "grad_norm": 1.3495931588969972, + "learning_rate": 1.0944119642911107e-06, + "loss": 0.4366680383682251, + "step": 4307 + }, + { + "epoch": 0.9933133502421029, + "grad_norm": 1.4609250216040512, + "learning_rate": 1.094032399206302e-06, + "loss": 0.5350530743598938, + "step": 4308 + }, + { + "epoch": 0.9935439243716855, + "grad_norm": 1.5545326791900604, + "learning_rate": 1.093652820452931e-06, + "loss": 0.5166209936141968, + "step": 4309 + }, + { + "epoch": 0.9937744985012682, + "grad_norm": 1.3624754056256652, + "learning_rate": 1.0932732280861734e-06, + "loss": 0.5104992389678955, + "step": 4310 + }, + { + "epoch": 0.9940050726308508, + "grad_norm": 1.293281056582964, + "learning_rate": 1.0928936221612068e-06, + "loss": 0.38249820470809937, + "step": 4311 + }, + { + "epoch": 0.9942356467604335, + "grad_norm": 1.5718744647134053, + "learning_rate": 1.0925140027332107e-06, + "loss": 0.4930746555328369, + "step": 4312 + }, + { + "epoch": 0.9944662208900161, + "grad_norm": 1.5006868919231642, + "learning_rate": 1.092134369857367e-06, + "loss": 0.46536654233932495, + "step": 4313 + }, + { + "epoch": 0.9946967950195988, + "grad_norm": 1.5384946564391833, + "learning_rate": 1.0917547235888582e-06, + "loss": 0.4591559171676636, + "step": 4314 + }, + { + "epoch": 0.9949273691491815, + "grad_norm": 1.609102883203802, + "learning_rate": 1.0913750639828709e-06, + "loss": 0.5034719705581665, + "step": 4315 + }, + { + "epoch": 0.9951579432787642, + "grad_norm": 1.3461654572756176, + "learning_rate": 1.0909953910945921e-06, + "loss": 0.5289135575294495, + "step": 4316 + }, + { + "epoch": 0.9953885174083468, + "grad_norm": 1.5181970245510374, + "learning_rate": 1.090615704979211e-06, + "loss": 0.48736900091171265, + "step": 4317 + }, + { + "epoch": 0.9956190915379295, + "grad_norm": 1.347314123709775, + "learning_rate": 1.0902360056919186e-06, + "loss": 0.44812899827957153, + "step": 4318 + }, + { + "epoch": 0.9958496656675121, + "grad_norm": 1.717313100956624, + "learning_rate": 1.0898562932879083e-06, + "loss": 0.42837953567504883, + "step": 4319 + }, + { + "epoch": 0.9960802397970948, + "grad_norm": 1.3616068420969312, + "learning_rate": 1.089476567822375e-06, + "loss": 0.4946538805961609, + "step": 4320 + }, + { + "epoch": 0.9963108139266774, + "grad_norm": 1.3738772638549184, + "learning_rate": 1.089096829350516e-06, + "loss": 0.472694993019104, + "step": 4321 + }, + { + "epoch": 0.9965413880562601, + "grad_norm": 1.51102718471871, + "learning_rate": 1.0887170779275297e-06, + "loss": 0.546560525894165, + "step": 4322 + }, + { + "epoch": 0.9967719621858427, + "grad_norm": 1.7144585803126207, + "learning_rate": 1.088337313608617e-06, + "loss": 0.5098580718040466, + "step": 4323 + }, + { + "epoch": 0.9970025363154255, + "grad_norm": 1.4511718916783138, + "learning_rate": 1.0879575364489807e-06, + "loss": 0.4127371907234192, + "step": 4324 + }, + { + "epoch": 0.9972331104450081, + "grad_norm": 1.361622993253284, + "learning_rate": 1.0875777465038249e-06, + "loss": 0.41234201192855835, + "step": 4325 + }, + { + "epoch": 0.9974636845745908, + "grad_norm": 1.334187068919988, + "learning_rate": 1.087197943828356e-06, + "loss": 0.42657697200775146, + "step": 4326 + }, + { + "epoch": 0.9976942587041734, + "grad_norm": 1.5731685077464828, + "learning_rate": 1.0868181284777825e-06, + "loss": 0.5168975591659546, + "step": 4327 + }, + { + "epoch": 0.9979248328337561, + "grad_norm": 1.3417267376651396, + "learning_rate": 1.0864383005073142e-06, + "loss": 0.4712294340133667, + "step": 4328 + }, + { + "epoch": 0.9981554069633387, + "grad_norm": 1.514146578387226, + "learning_rate": 1.0860584599721624e-06, + "loss": 0.4685649871826172, + "step": 4329 + }, + { + "epoch": 0.9983859810929214, + "grad_norm": 1.4104009699586146, + "learning_rate": 1.0856786069275417e-06, + "loss": 0.4699268937110901, + "step": 4330 + }, + { + "epoch": 0.998616555222504, + "grad_norm": 1.5072273981885642, + "learning_rate": 1.0852987414286669e-06, + "loss": 0.44216299057006836, + "step": 4331 + }, + { + "epoch": 0.9988471293520867, + "grad_norm": 1.489870947647978, + "learning_rate": 1.0849188635307558e-06, + "loss": 0.4374035894870758, + "step": 4332 + }, + { + "epoch": 0.9990777034816694, + "grad_norm": 1.396380314188184, + "learning_rate": 1.0845389732890269e-06, + "loss": 0.4538502097129822, + "step": 4333 + }, + { + "epoch": 0.9993082776112521, + "grad_norm": 1.5201233043344708, + "learning_rate": 1.0841590707587017e-06, + "loss": 0.4432523250579834, + "step": 4334 + }, + { + "epoch": 0.9995388517408347, + "grad_norm": 1.3401246835224159, + "learning_rate": 1.0837791559950026e-06, + "loss": 0.3614054322242737, + "step": 4335 + }, + { + "epoch": 0.9997694258704174, + "grad_norm": 1.5241184734301618, + "learning_rate": 1.0833992290531542e-06, + "loss": 0.5412651300430298, + "step": 4336 + }, + { + "epoch": 1.0, + "grad_norm": 1.3961487739465548, + "learning_rate": 1.0830192899883825e-06, + "loss": 0.43333327770233154, + "step": 4337 + }, + { + "epoch": 1.0002305741295827, + "grad_norm": 1.3739097269887006, + "learning_rate": 1.0826393388559156e-06, + "loss": 0.40433377027511597, + "step": 4338 + }, + { + "epoch": 1.0004611482591652, + "grad_norm": 1.5246903566917884, + "learning_rate": 1.0822593757109835e-06, + "loss": 0.49699902534484863, + "step": 4339 + }, + { + "epoch": 1.000691722388748, + "grad_norm": 1.4093275236950669, + "learning_rate": 1.0818794006088174e-06, + "loss": 0.4992629289627075, + "step": 4340 + }, + { + "epoch": 1.0009222965183306, + "grad_norm": 1.546985643456235, + "learning_rate": 1.0814994136046503e-06, + "loss": 0.39532744884490967, + "step": 4341 + }, + { + "epoch": 1.0011528706479134, + "grad_norm": 1.4715614082094945, + "learning_rate": 1.0811194147537177e-06, + "loss": 0.48260024189949036, + "step": 4342 + }, + { + "epoch": 1.0013834447774959, + "grad_norm": 1.1813818983438111, + "learning_rate": 1.0807394041112562e-06, + "loss": 0.40896737575531006, + "step": 4343 + }, + { + "epoch": 1.0016140189070786, + "grad_norm": 1.373003199387245, + "learning_rate": 1.0803593817325037e-06, + "loss": 0.361757755279541, + "step": 4344 + }, + { + "epoch": 1.0018445930366613, + "grad_norm": 1.3113582417275997, + "learning_rate": 1.0799793476727006e-06, + "loss": 0.5524640083312988, + "step": 4345 + }, + { + "epoch": 1.002075167166244, + "grad_norm": 1.4504745740569693, + "learning_rate": 1.0795993019870891e-06, + "loss": 0.4798622727394104, + "step": 4346 + }, + { + "epoch": 1.0023057412958265, + "grad_norm": 1.1125620580650875, + "learning_rate": 1.079219244730912e-06, + "loss": 0.3408532440662384, + "step": 4347 + }, + { + "epoch": 1.0025363154254092, + "grad_norm": 1.6198320758392701, + "learning_rate": 1.0788391759594152e-06, + "loss": 0.4185452461242676, + "step": 4348 + }, + { + "epoch": 1.002766889554992, + "grad_norm": 1.4569047754589481, + "learning_rate": 1.078459095727845e-06, + "loss": 0.4656596779823303, + "step": 4349 + }, + { + "epoch": 1.0029974636845747, + "grad_norm": 1.2861299587948707, + "learning_rate": 1.07807900409145e-06, + "loss": 0.45649081468582153, + "step": 4350 + }, + { + "epoch": 1.0032280378141571, + "grad_norm": 1.4368410869138808, + "learning_rate": 1.0776989011054806e-06, + "loss": 0.4732903242111206, + "step": 4351 + }, + { + "epoch": 1.0034586119437399, + "grad_norm": 1.4875640347613817, + "learning_rate": 1.0773187868251882e-06, + "loss": 0.5313757658004761, + "step": 4352 + }, + { + "epoch": 1.0036891860733226, + "grad_norm": 1.7663418153227872, + "learning_rate": 1.0769386613058267e-06, + "loss": 0.5373719334602356, + "step": 4353 + }, + { + "epoch": 1.0039197602029053, + "grad_norm": 1.4108655227977445, + "learning_rate": 1.076558524602651e-06, + "loss": 0.4530528783798218, + "step": 4354 + }, + { + "epoch": 1.0041503343324878, + "grad_norm": 2.0172927781638816, + "learning_rate": 1.076178376770918e-06, + "loss": 0.361511766910553, + "step": 4355 + }, + { + "epoch": 1.0043809084620705, + "grad_norm": 1.5430566364369291, + "learning_rate": 1.0757982178658857e-06, + "loss": 0.4260486364364624, + "step": 4356 + }, + { + "epoch": 1.0046114825916532, + "grad_norm": 1.4352564218347874, + "learning_rate": 1.0754180479428142e-06, + "loss": 0.4765712320804596, + "step": 4357 + }, + { + "epoch": 1.004842056721236, + "grad_norm": 1.408849526827852, + "learning_rate": 1.0750378670569652e-06, + "loss": 0.485443115234375, + "step": 4358 + }, + { + "epoch": 1.0050726308508184, + "grad_norm": 1.3833154190721015, + "learning_rate": 1.074657675263602e-06, + "loss": 0.5010418891906738, + "step": 4359 + }, + { + "epoch": 1.0053032049804012, + "grad_norm": 1.2138138176978153, + "learning_rate": 1.074277472617989e-06, + "loss": 0.42195719480514526, + "step": 4360 + }, + { + "epoch": 1.0055337791099839, + "grad_norm": 1.4341592826356415, + "learning_rate": 1.073897259175392e-06, + "loss": 0.48555606603622437, + "step": 4361 + }, + { + "epoch": 1.0057643532395666, + "grad_norm": 1.4030257216310642, + "learning_rate": 1.07351703499108e-06, + "loss": 0.4991112947463989, + "step": 4362 + }, + { + "epoch": 1.005994927369149, + "grad_norm": 1.365972754336138, + "learning_rate": 1.0731368001203217e-06, + "loss": 0.43016430735588074, + "step": 4363 + }, + { + "epoch": 1.0062255014987318, + "grad_norm": 1.635861674358112, + "learning_rate": 1.0727565546183883e-06, + "loss": 0.47147876024246216, + "step": 4364 + }, + { + "epoch": 1.0064560756283145, + "grad_norm": 1.4724107461573035, + "learning_rate": 1.0723762985405522e-06, + "loss": 0.4695407748222351, + "step": 4365 + }, + { + "epoch": 1.0066866497578972, + "grad_norm": 1.4167512288976294, + "learning_rate": 1.0719960319420878e-06, + "loss": 0.42666512727737427, + "step": 4366 + }, + { + "epoch": 1.0069172238874797, + "grad_norm": 1.4965231034133355, + "learning_rate": 1.0716157548782705e-06, + "loss": 0.5685237050056458, + "step": 4367 + }, + { + "epoch": 1.0071477980170624, + "grad_norm": 1.2856237164503312, + "learning_rate": 1.0712354674043774e-06, + "loss": 0.45181894302368164, + "step": 4368 + }, + { + "epoch": 1.0073783721466452, + "grad_norm": 1.479568259964695, + "learning_rate": 1.070855169575687e-06, + "loss": 0.4079795479774475, + "step": 4369 + }, + { + "epoch": 1.0076089462762279, + "grad_norm": 1.196685278300245, + "learning_rate": 1.0704748614474798e-06, + "loss": 0.4011094570159912, + "step": 4370 + }, + { + "epoch": 1.0078395204058104, + "grad_norm": 1.5280378960817975, + "learning_rate": 1.0700945430750373e-06, + "loss": 0.48842671513557434, + "step": 4371 + }, + { + "epoch": 1.008070094535393, + "grad_norm": 1.237232307792151, + "learning_rate": 1.0697142145136425e-06, + "loss": 0.5183907151222229, + "step": 4372 + }, + { + "epoch": 1.0083006686649758, + "grad_norm": 1.4080736997180416, + "learning_rate": 1.0693338758185797e-06, + "loss": 0.5022784471511841, + "step": 4373 + }, + { + "epoch": 1.0085312427945585, + "grad_norm": 1.5160750764739457, + "learning_rate": 1.0689535270451358e-06, + "loss": 0.500054121017456, + "step": 4374 + }, + { + "epoch": 1.008761816924141, + "grad_norm": 1.331407944528498, + "learning_rate": 1.068573168248598e-06, + "loss": 0.43674880266189575, + "step": 4375 + }, + { + "epoch": 1.0089923910537237, + "grad_norm": 1.3441260000045296, + "learning_rate": 1.068192799484255e-06, + "loss": 0.4272059202194214, + "step": 4376 + }, + { + "epoch": 1.0092229651833065, + "grad_norm": 1.3188087584834265, + "learning_rate": 1.0678124208073972e-06, + "loss": 0.41053932905197144, + "step": 4377 + }, + { + "epoch": 1.0094535393128892, + "grad_norm": 1.3285405544041065, + "learning_rate": 1.0674320322733173e-06, + "loss": 0.4571593701839447, + "step": 4378 + }, + { + "epoch": 1.0096841134424717, + "grad_norm": 1.2947195973212757, + "learning_rate": 1.0670516339373081e-06, + "loss": 0.464965283870697, + "step": 4379 + }, + { + "epoch": 1.0099146875720544, + "grad_norm": 1.2757697611295247, + "learning_rate": 1.0666712258546639e-06, + "loss": 0.4086726903915405, + "step": 4380 + }, + { + "epoch": 1.010145261701637, + "grad_norm": 1.3664230084580502, + "learning_rate": 1.0662908080806815e-06, + "loss": 0.49988412857055664, + "step": 4381 + }, + { + "epoch": 1.0103758358312198, + "grad_norm": 1.33263070405775, + "learning_rate": 1.0659103806706587e-06, + "loss": 0.3976360559463501, + "step": 4382 + }, + { + "epoch": 1.0106064099608023, + "grad_norm": 1.3554444243435904, + "learning_rate": 1.065529943679894e-06, + "loss": 0.4500683546066284, + "step": 4383 + }, + { + "epoch": 1.010836984090385, + "grad_norm": 1.4532099828866123, + "learning_rate": 1.0651494971636875e-06, + "loss": 0.5617754459381104, + "step": 4384 + }, + { + "epoch": 1.0110675582199677, + "grad_norm": 1.2285766706051995, + "learning_rate": 1.0647690411773414e-06, + "loss": 0.4180886745452881, + "step": 4385 + }, + { + "epoch": 1.0112981323495505, + "grad_norm": 1.3797895213155087, + "learning_rate": 1.0643885757761588e-06, + "loss": 0.406663179397583, + "step": 4386 + }, + { + "epoch": 1.011528706479133, + "grad_norm": 1.2899676326462104, + "learning_rate": 1.0640081010154443e-06, + "loss": 0.4698946475982666, + "step": 4387 + }, + { + "epoch": 1.0117592806087157, + "grad_norm": 1.2421672055806043, + "learning_rate": 1.0636276169505034e-06, + "loss": 0.4845995306968689, + "step": 4388 + }, + { + "epoch": 1.0119898547382984, + "grad_norm": 1.7127723444190444, + "learning_rate": 1.0632471236366435e-06, + "loss": 0.5065066814422607, + "step": 4389 + }, + { + "epoch": 1.012220428867881, + "grad_norm": 1.5183614166838566, + "learning_rate": 1.0628666211291735e-06, + "loss": 0.4302946925163269, + "step": 4390 + }, + { + "epoch": 1.0124510029974636, + "grad_norm": 1.682116735922279, + "learning_rate": 1.0624861094834029e-06, + "loss": 0.5772345066070557, + "step": 4391 + }, + { + "epoch": 1.0126815771270463, + "grad_norm": 1.3399536785573158, + "learning_rate": 1.0621055887546425e-06, + "loss": 0.5294336080551147, + "step": 4392 + }, + { + "epoch": 1.012912151256629, + "grad_norm": 1.1967430772955985, + "learning_rate": 1.0617250589982059e-06, + "loss": 0.5028249621391296, + "step": 4393 + }, + { + "epoch": 1.0131427253862118, + "grad_norm": 1.3120231857267954, + "learning_rate": 1.0613445202694065e-06, + "loss": 0.5072348713874817, + "step": 4394 + }, + { + "epoch": 1.0133732995157942, + "grad_norm": 1.3107230472369709, + "learning_rate": 1.060963972623559e-06, + "loss": 0.3632262945175171, + "step": 4395 + }, + { + "epoch": 1.013603873645377, + "grad_norm": 1.4739700660925632, + "learning_rate": 1.06058341611598e-06, + "loss": 0.419277161359787, + "step": 4396 + }, + { + "epoch": 1.0138344477749597, + "grad_norm": 1.4201089967708693, + "learning_rate": 1.060202850801988e-06, + "loss": 0.4056069850921631, + "step": 4397 + }, + { + "epoch": 1.0140650219045424, + "grad_norm": 1.4908298419223913, + "learning_rate": 1.0598222767369014e-06, + "loss": 0.5591505765914917, + "step": 4398 + }, + { + "epoch": 1.014295596034125, + "grad_norm": 1.2646885984398546, + "learning_rate": 1.0594416939760408e-06, + "loss": 0.38529443740844727, + "step": 4399 + }, + { + "epoch": 1.0145261701637076, + "grad_norm": 1.3255980825912217, + "learning_rate": 1.0590611025747272e-06, + "loss": 0.3609437644481659, + "step": 4400 + }, + { + "epoch": 1.0147567442932903, + "grad_norm": 1.3538282738769345, + "learning_rate": 1.058680502588284e-06, + "loss": 0.4849050045013428, + "step": 4401 + }, + { + "epoch": 1.014987318422873, + "grad_norm": 1.4516377120705455, + "learning_rate": 1.058299894072035e-06, + "loss": 0.39454251527786255, + "step": 4402 + }, + { + "epoch": 1.0152178925524555, + "grad_norm": 1.5578248119945644, + "learning_rate": 1.0579192770813052e-06, + "loss": 0.39726459980010986, + "step": 4403 + }, + { + "epoch": 1.0154484666820383, + "grad_norm": 1.4398814364290877, + "learning_rate": 1.0575386516714218e-06, + "loss": 0.4730626940727234, + "step": 4404 + }, + { + "epoch": 1.015679040811621, + "grad_norm": 1.5842749126492264, + "learning_rate": 1.0571580178977123e-06, + "loss": 0.5436214804649353, + "step": 4405 + }, + { + "epoch": 1.0159096149412037, + "grad_norm": 1.4188700773135285, + "learning_rate": 1.0567773758155055e-06, + "loss": 0.4197273850440979, + "step": 4406 + }, + { + "epoch": 1.0161401890707862, + "grad_norm": 1.2873423308659837, + "learning_rate": 1.0563967254801316e-06, + "loss": 0.46460944414138794, + "step": 4407 + }, + { + "epoch": 1.016370763200369, + "grad_norm": 1.3771325056314752, + "learning_rate": 1.056016066946922e-06, + "loss": 0.3504630923271179, + "step": 4408 + }, + { + "epoch": 1.0166013373299516, + "grad_norm": 1.3484234762530152, + "learning_rate": 1.0556354002712098e-06, + "loss": 0.4620180130004883, + "step": 4409 + }, + { + "epoch": 1.0168319114595343, + "grad_norm": 1.414975730602458, + "learning_rate": 1.0552547255083283e-06, + "loss": 0.5642764568328857, + "step": 4410 + }, + { + "epoch": 1.0170624855891168, + "grad_norm": 1.3858649703726607, + "learning_rate": 1.054874042713612e-06, + "loss": 0.48283201456069946, + "step": 4411 + }, + { + "epoch": 1.0172930597186995, + "grad_norm": 1.3477248933257546, + "learning_rate": 1.0544933519423976e-06, + "loss": 0.5346091985702515, + "step": 4412 + }, + { + "epoch": 1.0175236338482823, + "grad_norm": 1.216774984460132, + "learning_rate": 1.0541126532500224e-06, + "loss": 0.4710259437561035, + "step": 4413 + }, + { + "epoch": 1.017754207977865, + "grad_norm": 1.6611025915045114, + "learning_rate": 1.0537319466918243e-06, + "loss": 0.535955548286438, + "step": 4414 + }, + { + "epoch": 1.0179847821074475, + "grad_norm": 1.298601209078171, + "learning_rate": 1.0533512323231438e-06, + "loss": 0.4127902388572693, + "step": 4415 + }, + { + "epoch": 1.0182153562370302, + "grad_norm": 1.6222892430544704, + "learning_rate": 1.0529705101993203e-06, + "loss": 0.5209894180297852, + "step": 4416 + }, + { + "epoch": 1.018445930366613, + "grad_norm": 1.5702821211846574, + "learning_rate": 1.0525897803756967e-06, + "loss": 0.45600390434265137, + "step": 4417 + }, + { + "epoch": 1.0186765044961956, + "grad_norm": 1.6858904509627837, + "learning_rate": 1.0522090429076155e-06, + "loss": 0.5043426156044006, + "step": 4418 + }, + { + "epoch": 1.0189070786257781, + "grad_norm": 1.8442717417612486, + "learning_rate": 1.0518282978504207e-06, + "loss": 0.43386173248291016, + "step": 4419 + }, + { + "epoch": 1.0191376527553608, + "grad_norm": 1.4810433748538916, + "learning_rate": 1.0514475452594578e-06, + "loss": 0.44956767559051514, + "step": 4420 + }, + { + "epoch": 1.0193682268849436, + "grad_norm": 1.4162663845873593, + "learning_rate": 1.0510667851900726e-06, + "loss": 0.47164878249168396, + "step": 4421 + }, + { + "epoch": 1.0195988010145263, + "grad_norm": 1.3111398742961289, + "learning_rate": 1.0506860176976127e-06, + "loss": 0.4977136552333832, + "step": 4422 + }, + { + "epoch": 1.0198293751441088, + "grad_norm": 1.2272027402421368, + "learning_rate": 1.0503052428374264e-06, + "loss": 0.4344305396080017, + "step": 4423 + }, + { + "epoch": 1.0200599492736915, + "grad_norm": 1.4594484344103595, + "learning_rate": 1.049924460664863e-06, + "loss": 0.46536487340927124, + "step": 4424 + }, + { + "epoch": 1.0202905234032742, + "grad_norm": 1.5676489928965973, + "learning_rate": 1.0495436712352733e-06, + "loss": 0.4583844840526581, + "step": 4425 + }, + { + "epoch": 1.020521097532857, + "grad_norm": 1.3353943490467204, + "learning_rate": 1.049162874604009e-06, + "loss": 0.4098002314567566, + "step": 4426 + }, + { + "epoch": 1.0207516716624394, + "grad_norm": 1.5212892459953231, + "learning_rate": 1.0487820708264227e-06, + "loss": 0.48168665170669556, + "step": 4427 + }, + { + "epoch": 1.0209822457920221, + "grad_norm": 1.575752706874104, + "learning_rate": 1.048401259957868e-06, + "loss": 0.5517562627792358, + "step": 4428 + }, + { + "epoch": 1.0212128199216048, + "grad_norm": 1.4762864972879257, + "learning_rate": 1.0480204420536998e-06, + "loss": 0.5131476521492004, + "step": 4429 + }, + { + "epoch": 1.0214433940511876, + "grad_norm": 1.3669237261259728, + "learning_rate": 1.0476396171692734e-06, + "loss": 0.4590519666671753, + "step": 4430 + }, + { + "epoch": 1.02167396818077, + "grad_norm": 1.6209541549743127, + "learning_rate": 1.0472587853599458e-06, + "loss": 0.5581461191177368, + "step": 4431 + }, + { + "epoch": 1.0219045423103528, + "grad_norm": 1.9464318549736228, + "learning_rate": 1.046877946681075e-06, + "loss": 0.4169657826423645, + "step": 4432 + }, + { + "epoch": 1.0221351164399355, + "grad_norm": 1.6990409231148407, + "learning_rate": 1.0464971011880195e-06, + "loss": 0.48135459423065186, + "step": 4433 + }, + { + "epoch": 1.0223656905695182, + "grad_norm": 1.5888684830629844, + "learning_rate": 1.046116248936139e-06, + "loss": 0.5116040706634521, + "step": 4434 + }, + { + "epoch": 1.0225962646991007, + "grad_norm": 1.2239425777755701, + "learning_rate": 1.0457353899807946e-06, + "loss": 0.4369809329509735, + "step": 4435 + }, + { + "epoch": 1.0228268388286834, + "grad_norm": 1.3094581394180187, + "learning_rate": 1.0453545243773474e-06, + "loss": 0.42936772108078003, + "step": 4436 + }, + { + "epoch": 1.0230574129582661, + "grad_norm": 1.4191745941139933, + "learning_rate": 1.0449736521811605e-06, + "loss": 0.3614712357521057, + "step": 4437 + }, + { + "epoch": 1.0232879870878488, + "grad_norm": 1.4958077731615864, + "learning_rate": 1.0445927734475977e-06, + "loss": 0.40728119015693665, + "step": 4438 + }, + { + "epoch": 1.0235185612174313, + "grad_norm": 1.6199665099354292, + "learning_rate": 1.0442118882320233e-06, + "loss": 0.4940561056137085, + "step": 4439 + }, + { + "epoch": 1.023749135347014, + "grad_norm": 1.5292135898443935, + "learning_rate": 1.0438309965898027e-06, + "loss": 0.49529674649238586, + "step": 4440 + }, + { + "epoch": 1.0239797094765968, + "grad_norm": 1.3839632419664316, + "learning_rate": 1.0434500985763027e-06, + "loss": 0.4849408268928528, + "step": 4441 + }, + { + "epoch": 1.0242102836061795, + "grad_norm": 1.2306090654878221, + "learning_rate": 1.0430691942468903e-06, + "loss": 0.4121132791042328, + "step": 4442 + }, + { + "epoch": 1.024440857735762, + "grad_norm": 1.3788405992777184, + "learning_rate": 1.042688283656934e-06, + "loss": 0.4348478317260742, + "step": 4443 + }, + { + "epoch": 1.0246714318653447, + "grad_norm": 1.4946594419770094, + "learning_rate": 1.0423073668618033e-06, + "loss": 0.46817919611930847, + "step": 4444 + }, + { + "epoch": 1.0249020059949274, + "grad_norm": 1.4309128927667782, + "learning_rate": 1.041926443916868e-06, + "loss": 0.4422008991241455, + "step": 4445 + }, + { + "epoch": 1.02513258012451, + "grad_norm": 1.4766353003575698, + "learning_rate": 1.041545514877499e-06, + "loss": 0.5108183026313782, + "step": 4446 + }, + { + "epoch": 1.0253631542540926, + "grad_norm": 1.4287581583003561, + "learning_rate": 1.0411645797990685e-06, + "loss": 0.4759529232978821, + "step": 4447 + }, + { + "epoch": 1.0255937283836754, + "grad_norm": 1.4822019265627726, + "learning_rate": 1.040783638736949e-06, + "loss": 0.44447648525238037, + "step": 4448 + }, + { + "epoch": 1.025824302513258, + "grad_norm": 1.9820121270715096, + "learning_rate": 1.0404026917465144e-06, + "loss": 0.4558752477169037, + "step": 4449 + }, + { + "epoch": 1.0260548766428408, + "grad_norm": 1.5117188074263472, + "learning_rate": 1.0400217388831393e-06, + "loss": 0.4728459417819977, + "step": 4450 + }, + { + "epoch": 1.0262854507724233, + "grad_norm": 1.2832295949174854, + "learning_rate": 1.0396407802021985e-06, + "loss": 0.4815519452095032, + "step": 4451 + }, + { + "epoch": 1.026516024902006, + "grad_norm": 1.493224641636315, + "learning_rate": 1.0392598157590685e-06, + "loss": 0.5173656344413757, + "step": 4452 + }, + { + "epoch": 1.0267465990315887, + "grad_norm": 1.389267472286255, + "learning_rate": 1.0388788456091267e-06, + "loss": 0.5280762910842896, + "step": 4453 + }, + { + "epoch": 1.0269771731611712, + "grad_norm": 1.3239342530675255, + "learning_rate": 1.0384978698077506e-06, + "loss": 0.4524118900299072, + "step": 4454 + }, + { + "epoch": 1.027207747290754, + "grad_norm": 1.3855017021962426, + "learning_rate": 1.0381168884103186e-06, + "loss": 0.4011715054512024, + "step": 4455 + }, + { + "epoch": 1.0274383214203366, + "grad_norm": 1.6664926632341406, + "learning_rate": 1.0377359014722108e-06, + "loss": 0.518020749092102, + "step": 4456 + }, + { + "epoch": 1.0276688955499194, + "grad_norm": 1.3443799803410221, + "learning_rate": 1.0373549090488073e-06, + "loss": 0.44726112484931946, + "step": 4457 + }, + { + "epoch": 1.0278994696795019, + "grad_norm": 1.5697915792497608, + "learning_rate": 1.0369739111954894e-06, + "loss": 0.5344264507293701, + "step": 4458 + }, + { + "epoch": 1.0281300438090846, + "grad_norm": 1.3300732692572412, + "learning_rate": 1.0365929079676387e-06, + "loss": 0.4902813732624054, + "step": 4459 + }, + { + "epoch": 1.0283606179386673, + "grad_norm": 1.6676294678142136, + "learning_rate": 1.0362118994206378e-06, + "loss": 0.38346555829048157, + "step": 4460 + }, + { + "epoch": 1.02859119206825, + "grad_norm": 1.4992112279059755, + "learning_rate": 1.0358308856098705e-06, + "loss": 0.4232872724533081, + "step": 4461 + }, + { + "epoch": 1.0288217661978325, + "grad_norm": 1.4973168899301483, + "learning_rate": 1.0354498665907207e-06, + "loss": 0.5184470415115356, + "step": 4462 + }, + { + "epoch": 1.0290523403274152, + "grad_norm": 1.3344202325848402, + "learning_rate": 1.0350688424185733e-06, + "loss": 0.4989054203033447, + "step": 4463 + }, + { + "epoch": 1.029282914456998, + "grad_norm": 1.4348006325476266, + "learning_rate": 1.0346878131488145e-06, + "loss": 0.5204064249992371, + "step": 4464 + }, + { + "epoch": 1.0295134885865806, + "grad_norm": 1.5066284997527284, + "learning_rate": 1.0343067788368307e-06, + "loss": 0.47872811555862427, + "step": 4465 + }, + { + "epoch": 1.0297440627161631, + "grad_norm": 1.4195028916227292, + "learning_rate": 1.0339257395380087e-06, + "loss": 0.4104915261268616, + "step": 4466 + }, + { + "epoch": 1.0299746368457459, + "grad_norm": 1.3696214178005537, + "learning_rate": 1.0335446953077366e-06, + "loss": 0.39327263832092285, + "step": 4467 + }, + { + "epoch": 1.0302052109753286, + "grad_norm": 1.4702497550106948, + "learning_rate": 1.033163646201403e-06, + "loss": 0.4395657777786255, + "step": 4468 + }, + { + "epoch": 1.0304357851049113, + "grad_norm": 1.419425725268843, + "learning_rate": 1.0327825922743976e-06, + "loss": 0.462537944316864, + "step": 4469 + }, + { + "epoch": 1.0306663592344938, + "grad_norm": 1.3686105119540095, + "learning_rate": 1.03240153358211e-06, + "loss": 0.4399976134300232, + "step": 4470 + }, + { + "epoch": 1.0308969333640765, + "grad_norm": 1.2004518913155955, + "learning_rate": 1.0320204701799311e-06, + "loss": 0.4289684593677521, + "step": 4471 + }, + { + "epoch": 1.0311275074936592, + "grad_norm": 1.700414177665105, + "learning_rate": 1.0316394021232524e-06, + "loss": 0.4771305322647095, + "step": 4472 + }, + { + "epoch": 1.031358081623242, + "grad_norm": 1.3381367861828992, + "learning_rate": 1.031258329467466e-06, + "loss": 0.4544849395751953, + "step": 4473 + }, + { + "epoch": 1.0315886557528244, + "grad_norm": 1.7319531178301495, + "learning_rate": 1.0308772522679646e-06, + "loss": 0.5362099409103394, + "step": 4474 + }, + { + "epoch": 1.0318192298824072, + "grad_norm": 1.564907240947497, + "learning_rate": 1.0304961705801413e-06, + "loss": 0.48966753482818604, + "step": 4475 + }, + { + "epoch": 1.0320498040119899, + "grad_norm": 1.379783010020372, + "learning_rate": 1.0301150844593908e-06, + "loss": 0.3750344216823578, + "step": 4476 + }, + { + "epoch": 1.0322803781415726, + "grad_norm": 1.3651499470494945, + "learning_rate": 1.0297339939611076e-06, + "loss": 0.453983873128891, + "step": 4477 + }, + { + "epoch": 1.032510952271155, + "grad_norm": 1.837467998410361, + "learning_rate": 1.029352899140687e-06, + "loss": 0.5096027255058289, + "step": 4478 + }, + { + "epoch": 1.0327415264007378, + "grad_norm": 1.395622916901131, + "learning_rate": 1.028971800053525e-06, + "loss": 0.4387558698654175, + "step": 4479 + }, + { + "epoch": 1.0329721005303205, + "grad_norm": 1.324708629656248, + "learning_rate": 1.0285906967550184e-06, + "loss": 0.45710843801498413, + "step": 4480 + }, + { + "epoch": 1.0332026746599032, + "grad_norm": 1.631576144246761, + "learning_rate": 1.0282095893005643e-06, + "loss": 0.5258994102478027, + "step": 4481 + }, + { + "epoch": 1.0334332487894857, + "grad_norm": 1.320456527047697, + "learning_rate": 1.0278284777455603e-06, + "loss": 0.5037236213684082, + "step": 4482 + }, + { + "epoch": 1.0336638229190684, + "grad_norm": 1.3671446032683054, + "learning_rate": 1.027447362145405e-06, + "loss": 0.4730300307273865, + "step": 4483 + }, + { + "epoch": 1.0338943970486512, + "grad_norm": 1.5284074958618745, + "learning_rate": 1.0270662425554974e-06, + "loss": 0.4373326301574707, + "step": 4484 + }, + { + "epoch": 1.0341249711782339, + "grad_norm": 1.379045843622324, + "learning_rate": 1.0266851190312373e-06, + "loss": 0.3915579319000244, + "step": 4485 + }, + { + "epoch": 1.0343555453078164, + "grad_norm": 1.3482794503547837, + "learning_rate": 1.0263039916280247e-06, + "loss": 0.36588191986083984, + "step": 4486 + }, + { + "epoch": 1.034586119437399, + "grad_norm": 1.2333606023937755, + "learning_rate": 1.0259228604012602e-06, + "loss": 0.4287286400794983, + "step": 4487 + }, + { + "epoch": 1.0348166935669818, + "grad_norm": 1.3775270616642934, + "learning_rate": 1.0255417254063454e-06, + "loss": 0.4405861496925354, + "step": 4488 + }, + { + "epoch": 1.0350472676965645, + "grad_norm": 1.443831892269548, + "learning_rate": 1.0251605866986818e-06, + "loss": 0.4859738349914551, + "step": 4489 + }, + { + "epoch": 1.035277841826147, + "grad_norm": 1.4103288990509777, + "learning_rate": 1.0247794443336722e-06, + "loss": 0.40879446268081665, + "step": 4490 + }, + { + "epoch": 1.0355084159557297, + "grad_norm": 1.4900612923986292, + "learning_rate": 1.024398298366719e-06, + "loss": 0.44872337579727173, + "step": 4491 + }, + { + "epoch": 1.0357389900853124, + "grad_norm": 1.3707597883324278, + "learning_rate": 1.0240171488532258e-06, + "loss": 0.41155117750167847, + "step": 4492 + }, + { + "epoch": 1.0359695642148952, + "grad_norm": 1.4935319402234073, + "learning_rate": 1.0236359958485966e-06, + "loss": 0.48941487073898315, + "step": 4493 + }, + { + "epoch": 1.0362001383444777, + "grad_norm": 1.3889526979110256, + "learning_rate": 1.0232548394082362e-06, + "loss": 0.4462544322013855, + "step": 4494 + }, + { + "epoch": 1.0364307124740604, + "grad_norm": 1.7635931454030804, + "learning_rate": 1.0228736795875487e-06, + "loss": 0.3791837692260742, + "step": 4495 + }, + { + "epoch": 1.036661286603643, + "grad_norm": 1.7988283203699307, + "learning_rate": 1.0224925164419404e-06, + "loss": 0.5037285685539246, + "step": 4496 + }, + { + "epoch": 1.0368918607332258, + "grad_norm": 1.5033654685782605, + "learning_rate": 1.0221113500268169e-06, + "loss": 0.4762890636920929, + "step": 4497 + }, + { + "epoch": 1.0371224348628083, + "grad_norm": 1.2678994584792878, + "learning_rate": 1.0217301803975844e-06, + "loss": 0.4673793315887451, + "step": 4498 + }, + { + "epoch": 1.037353008992391, + "grad_norm": 1.4491139066226089, + "learning_rate": 1.0213490076096501e-06, + "loss": 0.37522250413894653, + "step": 4499 + }, + { + "epoch": 1.0375835831219737, + "grad_norm": 1.4197729369573655, + "learning_rate": 1.020967831718421e-06, + "loss": 0.4986375570297241, + "step": 4500 + }, + { + "epoch": 1.0378141572515565, + "grad_norm": 1.3424622189818292, + "learning_rate": 1.0205866527793053e-06, + "loss": 0.488337904214859, + "step": 4501 + }, + { + "epoch": 1.038044731381139, + "grad_norm": 1.2513264252251595, + "learning_rate": 1.0202054708477107e-06, + "loss": 0.37420767545700073, + "step": 4502 + }, + { + "epoch": 1.0382753055107217, + "grad_norm": 1.1901249454864467, + "learning_rate": 1.0198242859790465e-06, + "loss": 0.42453843355178833, + "step": 4503 + }, + { + "epoch": 1.0385058796403044, + "grad_norm": 1.5998980096348292, + "learning_rate": 1.0194430982287211e-06, + "loss": 0.4431978166103363, + "step": 4504 + }, + { + "epoch": 1.038736453769887, + "grad_norm": 1.2584649975167521, + "learning_rate": 1.0190619076521445e-06, + "loss": 0.5079195499420166, + "step": 4505 + }, + { + "epoch": 1.0389670278994696, + "grad_norm": 1.3630757915855334, + "learning_rate": 1.0186807143047263e-06, + "loss": 0.442915678024292, + "step": 4506 + }, + { + "epoch": 1.0391976020290523, + "grad_norm": 1.4946032354137926, + "learning_rate": 1.018299518241877e-06, + "loss": 0.4720972180366516, + "step": 4507 + }, + { + "epoch": 1.039428176158635, + "grad_norm": 1.407838633939113, + "learning_rate": 1.0179183195190073e-06, + "loss": 0.4637352526187897, + "step": 4508 + }, + { + "epoch": 1.0396587502882177, + "grad_norm": 1.3457342565284411, + "learning_rate": 1.0175371181915283e-06, + "loss": 0.4207759499549866, + "step": 4509 + }, + { + "epoch": 1.0398893244178002, + "grad_norm": 1.5872196626053143, + "learning_rate": 1.0171559143148514e-06, + "loss": 0.49227845668792725, + "step": 4510 + }, + { + "epoch": 1.040119898547383, + "grad_norm": 1.4565076836431372, + "learning_rate": 1.0167747079443884e-06, + "loss": 0.5006893873214722, + "step": 4511 + }, + { + "epoch": 1.0403504726769657, + "grad_norm": 1.4618469895611303, + "learning_rate": 1.016393499135552e-06, + "loss": 0.42048192024230957, + "step": 4512 + }, + { + "epoch": 1.0405810468065484, + "grad_norm": 1.5634742093932859, + "learning_rate": 1.0160122879437538e-06, + "loss": 0.5275895595550537, + "step": 4513 + }, + { + "epoch": 1.0408116209361309, + "grad_norm": 1.1544305266604897, + "learning_rate": 1.0156310744244073e-06, + "loss": 0.4677985906600952, + "step": 4514 + }, + { + "epoch": 1.0410421950657136, + "grad_norm": 1.422644417212902, + "learning_rate": 1.015249858632926e-06, + "loss": 0.5214150547981262, + "step": 4515 + }, + { + "epoch": 1.0412727691952963, + "grad_norm": 1.2418435857264525, + "learning_rate": 1.0148686406247232e-06, + "loss": 0.40790024399757385, + "step": 4516 + }, + { + "epoch": 1.041503343324879, + "grad_norm": 1.6199751141856524, + "learning_rate": 1.0144874204552125e-06, + "loss": 0.5943785309791565, + "step": 4517 + }, + { + "epoch": 1.0417339174544615, + "grad_norm": 1.531988684910503, + "learning_rate": 1.0141061981798086e-06, + "loss": 0.4590263366699219, + "step": 4518 + }, + { + "epoch": 1.0419644915840443, + "grad_norm": 1.3212940799821826, + "learning_rate": 1.0137249738539257e-06, + "loss": 0.4106098413467407, + "step": 4519 + }, + { + "epoch": 1.042195065713627, + "grad_norm": 1.4102973636174063, + "learning_rate": 1.013343747532979e-06, + "loss": 0.4730203151702881, + "step": 4520 + }, + { + "epoch": 1.0424256398432097, + "grad_norm": 1.2769276209650842, + "learning_rate": 1.0129625192723833e-06, + "loss": 0.43245944380760193, + "step": 4521 + }, + { + "epoch": 1.0426562139727922, + "grad_norm": 1.3088740452256564, + "learning_rate": 1.012581289127554e-06, + "loss": 0.40828272700309753, + "step": 4522 + }, + { + "epoch": 1.042886788102375, + "grad_norm": 1.5940499075267438, + "learning_rate": 1.0122000571539069e-06, + "loss": 0.4232874810695648, + "step": 4523 + }, + { + "epoch": 1.0431173622319576, + "grad_norm": 1.45477003479617, + "learning_rate": 1.0118188234068579e-06, + "loss": 0.43044984340667725, + "step": 4524 + }, + { + "epoch": 1.0433479363615403, + "grad_norm": 1.6545172631907663, + "learning_rate": 1.011437587941823e-06, + "loss": 0.4502897262573242, + "step": 4525 + }, + { + "epoch": 1.0435785104911228, + "grad_norm": 2.0995258586192467, + "learning_rate": 1.0110563508142185e-06, + "loss": 0.5505340099334717, + "step": 4526 + }, + { + "epoch": 1.0438090846207055, + "grad_norm": 1.5629586322344833, + "learning_rate": 1.0106751120794617e-06, + "loss": 0.4026086628437042, + "step": 4527 + }, + { + "epoch": 1.0440396587502883, + "grad_norm": 1.5105039899180257, + "learning_rate": 1.0102938717929692e-06, + "loss": 0.3910222053527832, + "step": 4528 + }, + { + "epoch": 1.044270232879871, + "grad_norm": 1.6830902678008934, + "learning_rate": 1.009912630010158e-06, + "loss": 0.4134068191051483, + "step": 4529 + }, + { + "epoch": 1.0445008070094535, + "grad_norm": 1.4825250898714368, + "learning_rate": 1.0095313867864457e-06, + "loss": 0.4801563024520874, + "step": 4530 + }, + { + "epoch": 1.0447313811390362, + "grad_norm": 1.2424640239796358, + "learning_rate": 1.0091501421772495e-06, + "loss": 0.4269358515739441, + "step": 4531 + }, + { + "epoch": 1.044961955268619, + "grad_norm": 1.3485994976026512, + "learning_rate": 1.0087688962379877e-06, + "loss": 0.5300281047821045, + "step": 4532 + }, + { + "epoch": 1.0451925293982016, + "grad_norm": 1.6865287595757648, + "learning_rate": 1.0083876490240777e-06, + "loss": 0.4634189009666443, + "step": 4533 + }, + { + "epoch": 1.0454231035277841, + "grad_norm": 1.5187760856795984, + "learning_rate": 1.0080064005909379e-06, + "loss": 0.37037551403045654, + "step": 4534 + }, + { + "epoch": 1.0456536776573668, + "grad_norm": 1.2977267015714409, + "learning_rate": 1.0076251509939867e-06, + "loss": 0.4740016460418701, + "step": 4535 + }, + { + "epoch": 1.0458842517869495, + "grad_norm": 1.4686161726335998, + "learning_rate": 1.0072439002886426e-06, + "loss": 0.4824775159358978, + "step": 4536 + }, + { + "epoch": 1.0461148259165323, + "grad_norm": 1.4032368341998698, + "learning_rate": 1.0068626485303242e-06, + "loss": 0.4891430735588074, + "step": 4537 + }, + { + "epoch": 1.0463454000461148, + "grad_norm": 1.440410031419601, + "learning_rate": 1.00648139577445e-06, + "loss": 0.48089975118637085, + "step": 4538 + }, + { + "epoch": 1.0465759741756975, + "grad_norm": 1.3280505427696812, + "learning_rate": 1.0061001420764395e-06, + "loss": 0.4353799521923065, + "step": 4539 + }, + { + "epoch": 1.0468065483052802, + "grad_norm": 1.5425308952951848, + "learning_rate": 1.0057188874917117e-06, + "loss": 0.4259982705116272, + "step": 4540 + }, + { + "epoch": 1.047037122434863, + "grad_norm": 1.502788920344227, + "learning_rate": 1.0053376320756852e-06, + "loss": 0.4400532841682434, + "step": 4541 + }, + { + "epoch": 1.0472676965644454, + "grad_norm": 1.398609267878258, + "learning_rate": 1.00495637588378e-06, + "loss": 0.48598533868789673, + "step": 4542 + }, + { + "epoch": 1.0474982706940281, + "grad_norm": 1.7261761893493324, + "learning_rate": 1.0045751189714153e-06, + "loss": 0.6310586929321289, + "step": 4543 + }, + { + "epoch": 1.0477288448236108, + "grad_norm": 1.4822203646620422, + "learning_rate": 1.0041938613940108e-06, + "loss": 0.49084293842315674, + "step": 4544 + }, + { + "epoch": 1.0479594189531936, + "grad_norm": 1.6167393331453148, + "learning_rate": 1.003812603206986e-06, + "loss": 0.5144428014755249, + "step": 4545 + }, + { + "epoch": 1.048189993082776, + "grad_norm": 1.4962485615696877, + "learning_rate": 1.0034313444657605e-06, + "loss": 0.4480917155742645, + "step": 4546 + }, + { + "epoch": 1.0484205672123588, + "grad_norm": 1.4833727438286728, + "learning_rate": 1.0030500852257545e-06, + "loss": 0.4505491852760315, + "step": 4547 + }, + { + "epoch": 1.0486511413419415, + "grad_norm": 1.3728340651335322, + "learning_rate": 1.0026688255423876e-06, + "loss": 0.3344930410385132, + "step": 4548 + }, + { + "epoch": 1.0488817154715242, + "grad_norm": 1.3493238342876126, + "learning_rate": 1.0022875654710801e-06, + "loss": 0.4006739854812622, + "step": 4549 + }, + { + "epoch": 1.0491122896011067, + "grad_norm": 1.4777604777161095, + "learning_rate": 1.0019063050672517e-06, + "loss": 0.4815717935562134, + "step": 4550 + }, + { + "epoch": 1.0493428637306894, + "grad_norm": 1.4182246513528267, + "learning_rate": 1.0015250443863223e-06, + "loss": 0.4660469889640808, + "step": 4551 + }, + { + "epoch": 1.0495734378602721, + "grad_norm": 1.4298035442899577, + "learning_rate": 1.0011437834837125e-06, + "loss": 0.5233521461486816, + "step": 4552 + }, + { + "epoch": 1.0498040119898548, + "grad_norm": 1.7530768174577198, + "learning_rate": 1.0007625224148418e-06, + "loss": 0.6037864685058594, + "step": 4553 + }, + { + "epoch": 1.0500345861194373, + "grad_norm": 1.726860458569315, + "learning_rate": 1.000381261235131e-06, + "loss": 0.469952255487442, + "step": 4554 + }, + { + "epoch": 1.05026516024902, + "grad_norm": 1.302712404041117, + "learning_rate": 1e-06, + "loss": 0.4577752649784088, + "step": 4555 + }, + { + "epoch": 1.0504957343786028, + "grad_norm": 1.537724574807554, + "learning_rate": 9.996187387648692e-07, + "loss": 0.46796074509620667, + "step": 4556 + }, + { + "epoch": 1.0507263085081853, + "grad_norm": 1.3633141581703183, + "learning_rate": 9.992374775851583e-07, + "loss": 0.40709036588668823, + "step": 4557 + }, + { + "epoch": 1.050956882637768, + "grad_norm": 1.2121351653860253, + "learning_rate": 9.988562165162878e-07, + "loss": 0.3997795879840851, + "step": 4558 + }, + { + "epoch": 1.0511874567673507, + "grad_norm": 1.6938685288563167, + "learning_rate": 9.984749556136779e-07, + "loss": 0.4677845239639282, + "step": 4559 + }, + { + "epoch": 1.0514180308969334, + "grad_norm": 1.315537055431831, + "learning_rate": 9.980936949327487e-07, + "loss": 0.40411800146102905, + "step": 4560 + }, + { + "epoch": 1.0516486050265161, + "grad_norm": 1.3999939149032237, + "learning_rate": 9.9771243452892e-07, + "loss": 0.50546795129776, + "step": 4561 + }, + { + "epoch": 1.0518791791560986, + "grad_norm": 1.5468163611837324, + "learning_rate": 9.973311744576125e-07, + "loss": 0.4116637110710144, + "step": 4562 + }, + { + "epoch": 1.0521097532856813, + "grad_norm": 1.2997915019544943, + "learning_rate": 9.969499147742454e-07, + "loss": 0.4271109700202942, + "step": 4563 + }, + { + "epoch": 1.052340327415264, + "grad_norm": 1.1760164248835672, + "learning_rate": 9.965686555342396e-07, + "loss": 0.37195074558258057, + "step": 4564 + }, + { + "epoch": 1.0525709015448466, + "grad_norm": 1.6759945376385115, + "learning_rate": 9.96187396793014e-07, + "loss": 0.4020707607269287, + "step": 4565 + }, + { + "epoch": 1.0528014756744293, + "grad_norm": 1.5880882887273124, + "learning_rate": 9.95806138605989e-07, + "loss": 0.4980151951313019, + "step": 4566 + }, + { + "epoch": 1.053032049804012, + "grad_norm": 1.419377079967674, + "learning_rate": 9.95424881028585e-07, + "loss": 0.39553767442703247, + "step": 4567 + }, + { + "epoch": 1.0532626239335947, + "grad_norm": 1.3361167736969362, + "learning_rate": 9.9504362411622e-07, + "loss": 0.47618645429611206, + "step": 4568 + }, + { + "epoch": 1.0534931980631772, + "grad_norm": 1.6469408967264108, + "learning_rate": 9.94662367924315e-07, + "loss": 0.4613817036151886, + "step": 4569 + }, + { + "epoch": 1.05372377219276, + "grad_norm": 1.4563205269464143, + "learning_rate": 9.942811125082884e-07, + "loss": 0.35888034105300903, + "step": 4570 + }, + { + "epoch": 1.0539543463223426, + "grad_norm": 1.896669698951033, + "learning_rate": 9.938998579235606e-07, + "loss": 0.45810097455978394, + "step": 4571 + }, + { + "epoch": 1.0541849204519254, + "grad_norm": 1.4115626759758866, + "learning_rate": 9.935186042255499e-07, + "loss": 0.5351384878158569, + "step": 4572 + }, + { + "epoch": 1.0544154945815079, + "grad_norm": 1.4888165757644622, + "learning_rate": 9.931373514696759e-07, + "loss": 0.5261274576187134, + "step": 4573 + }, + { + "epoch": 1.0546460687110906, + "grad_norm": 1.368295507669899, + "learning_rate": 9.927560997113573e-07, + "loss": 0.483295202255249, + "step": 4574 + }, + { + "epoch": 1.0548766428406733, + "grad_norm": 1.5639325535974613, + "learning_rate": 9.923748490060132e-07, + "loss": 0.5371580719947815, + "step": 4575 + }, + { + "epoch": 1.055107216970256, + "grad_norm": 1.8721225876517977, + "learning_rate": 9.919935994090622e-07, + "loss": 0.4863673746585846, + "step": 4576 + }, + { + "epoch": 1.0553377910998385, + "grad_norm": 1.5391981555318386, + "learning_rate": 9.916123509759224e-07, + "loss": 0.47929099202156067, + "step": 4577 + }, + { + "epoch": 1.0555683652294212, + "grad_norm": 1.3884034720788059, + "learning_rate": 9.912311037620126e-07, + "loss": 0.4687851667404175, + "step": 4578 + }, + { + "epoch": 1.055798939359004, + "grad_norm": 1.5841867302150618, + "learning_rate": 9.908498578227504e-07, + "loss": 0.5308720469474792, + "step": 4579 + }, + { + "epoch": 1.0560295134885866, + "grad_norm": 1.8691314272616926, + "learning_rate": 9.904686132135546e-07, + "loss": 0.45900580286979675, + "step": 4580 + }, + { + "epoch": 1.0562600876181691, + "grad_norm": 1.4586686619480431, + "learning_rate": 9.900873699898422e-07, + "loss": 0.49392157793045044, + "step": 4581 + }, + { + "epoch": 1.0564906617477519, + "grad_norm": 1.6139111586944341, + "learning_rate": 9.89706128207031e-07, + "loss": 0.47190070152282715, + "step": 4582 + }, + { + "epoch": 1.0567212358773346, + "grad_norm": 1.7781894650458763, + "learning_rate": 9.893248879205382e-07, + "loss": 0.4431575834751129, + "step": 4583 + }, + { + "epoch": 1.0569518100069173, + "grad_norm": 1.293421470994464, + "learning_rate": 9.889436491857814e-07, + "loss": 0.49873441457748413, + "step": 4584 + }, + { + "epoch": 1.0571823841364998, + "grad_norm": 1.4263954197349762, + "learning_rate": 9.885624120581772e-07, + "loss": 0.41190844774246216, + "step": 4585 + }, + { + "epoch": 1.0574129582660825, + "grad_norm": 1.5698735406284627, + "learning_rate": 9.881811765931423e-07, + "loss": 0.5164123773574829, + "step": 4586 + }, + { + "epoch": 1.0576435323956652, + "grad_norm": 1.5034141006108586, + "learning_rate": 9.877999428460933e-07, + "loss": 0.4141567349433899, + "step": 4587 + }, + { + "epoch": 1.057874106525248, + "grad_norm": 1.557658840701198, + "learning_rate": 9.87418710872446e-07, + "loss": 0.457628458738327, + "step": 4588 + }, + { + "epoch": 1.0581046806548304, + "grad_norm": 1.4732865673601758, + "learning_rate": 9.870374807276168e-07, + "loss": 0.41788995265960693, + "step": 4589 + }, + { + "epoch": 1.0583352547844131, + "grad_norm": 1.6240063497851516, + "learning_rate": 9.866562524670209e-07, + "loss": 0.5124667882919312, + "step": 4590 + }, + { + "epoch": 1.0585658289139959, + "grad_norm": 1.1619873853554898, + "learning_rate": 9.862750261460742e-07, + "loss": 0.4192196726799011, + "step": 4591 + }, + { + "epoch": 1.0587964030435786, + "grad_norm": 1.3804521479784477, + "learning_rate": 9.858938018201913e-07, + "loss": 0.4345153868198395, + "step": 4592 + }, + { + "epoch": 1.059026977173161, + "grad_norm": 1.3186049119261667, + "learning_rate": 9.855125795447874e-07, + "loss": 0.391804963350296, + "step": 4593 + }, + { + "epoch": 1.0592575513027438, + "grad_norm": 1.3394610780120433, + "learning_rate": 9.851313593752767e-07, + "loss": 0.3904710114002228, + "step": 4594 + }, + { + "epoch": 1.0594881254323265, + "grad_norm": 1.4234043935357816, + "learning_rate": 9.847501413670742e-07, + "loss": 0.37314411997795105, + "step": 4595 + }, + { + "epoch": 1.0597186995619092, + "grad_norm": 1.7572920451540888, + "learning_rate": 9.843689255755926e-07, + "loss": 0.5402779579162598, + "step": 4596 + }, + { + "epoch": 1.0599492736914917, + "grad_norm": 1.4688689617213957, + "learning_rate": 9.839877120562463e-07, + "loss": 0.4243565797805786, + "step": 4597 + }, + { + "epoch": 1.0601798478210744, + "grad_norm": 1.6330717694890693, + "learning_rate": 9.836065008644484e-07, + "loss": 0.4504585564136505, + "step": 4598 + }, + { + "epoch": 1.0604104219506572, + "grad_norm": 1.3073319656874434, + "learning_rate": 9.832252920556115e-07, + "loss": 0.46487870812416077, + "step": 4599 + }, + { + "epoch": 1.0606409960802399, + "grad_norm": 1.452752590173503, + "learning_rate": 9.828440856851487e-07, + "loss": 0.470059871673584, + "step": 4600 + }, + { + "epoch": 1.0608715702098224, + "grad_norm": 1.4580866952416336, + "learning_rate": 9.824628818084716e-07, + "loss": 0.4307391047477722, + "step": 4601 + }, + { + "epoch": 1.061102144339405, + "grad_norm": 1.545423985207434, + "learning_rate": 9.820816804809927e-07, + "loss": 0.49449142813682556, + "step": 4602 + }, + { + "epoch": 1.0613327184689878, + "grad_norm": 1.4803985945664777, + "learning_rate": 9.817004817581229e-07, + "loss": 0.4932701885700226, + "step": 4603 + }, + { + "epoch": 1.0615632925985705, + "grad_norm": 1.4502372729626234, + "learning_rate": 9.813192856952739e-07, + "loss": 0.49543553590774536, + "step": 4604 + }, + { + "epoch": 1.061793866728153, + "grad_norm": 1.1578379554584357, + "learning_rate": 9.809380923478554e-07, + "loss": 0.3906818926334381, + "step": 4605 + }, + { + "epoch": 1.0620244408577357, + "grad_norm": 1.4436425775524195, + "learning_rate": 9.80556901771279e-07, + "loss": 0.41667112708091736, + "step": 4606 + }, + { + "epoch": 1.0622550149873184, + "grad_norm": 1.475010908303335, + "learning_rate": 9.801757140209538e-07, + "loss": 0.36195361614227295, + "step": 4607 + }, + { + "epoch": 1.0624855891169012, + "grad_norm": 1.4053500417900708, + "learning_rate": 9.797945291522892e-07, + "loss": 0.4056081175804138, + "step": 4608 + }, + { + "epoch": 1.0627161632464837, + "grad_norm": 1.4310559040175581, + "learning_rate": 9.794133472206948e-07, + "loss": 0.5048736929893494, + "step": 4609 + }, + { + "epoch": 1.0629467373760664, + "grad_norm": 1.3896886111265523, + "learning_rate": 9.790321682815788e-07, + "loss": 0.4846169352531433, + "step": 4610 + }, + { + "epoch": 1.063177311505649, + "grad_norm": 1.3569892439901554, + "learning_rate": 9.7865099239035e-07, + "loss": 0.5149316787719727, + "step": 4611 + }, + { + "epoch": 1.0634078856352318, + "grad_norm": 1.5344870466099163, + "learning_rate": 9.782698196024155e-07, + "loss": 0.3816874623298645, + "step": 4612 + }, + { + "epoch": 1.0636384597648143, + "grad_norm": 1.39688044025804, + "learning_rate": 9.77888649973183e-07, + "loss": 0.5469645261764526, + "step": 4613 + }, + { + "epoch": 1.063869033894397, + "grad_norm": 1.2954034757094786, + "learning_rate": 9.775074835580593e-07, + "loss": 0.42796647548675537, + "step": 4614 + }, + { + "epoch": 1.0640996080239797, + "grad_norm": 1.4924945772778404, + "learning_rate": 9.771263204124512e-07, + "loss": 0.4931715726852417, + "step": 4615 + }, + { + "epoch": 1.0643301821535625, + "grad_norm": 1.367565961969811, + "learning_rate": 9.767451605917641e-07, + "loss": 0.5435268878936768, + "step": 4616 + }, + { + "epoch": 1.064560756283145, + "grad_norm": 1.6066093331363582, + "learning_rate": 9.763640041514033e-07, + "loss": 0.46361953020095825, + "step": 4617 + }, + { + "epoch": 1.0647913304127277, + "grad_norm": 1.240667858579194, + "learning_rate": 9.759828511467743e-07, + "loss": 0.3742775619029999, + "step": 4618 + }, + { + "epoch": 1.0650219045423104, + "grad_norm": 1.5520509510364326, + "learning_rate": 9.75601701633281e-07, + "loss": 0.4060659408569336, + "step": 4619 + }, + { + "epoch": 1.065252478671893, + "grad_norm": 1.2052909018096978, + "learning_rate": 9.75220555666328e-07, + "loss": 0.45316505432128906, + "step": 4620 + }, + { + "epoch": 1.0654830528014756, + "grad_norm": 1.4180749825165042, + "learning_rate": 9.748394133013179e-07, + "loss": 0.4548850655555725, + "step": 4621 + }, + { + "epoch": 1.0657136269310583, + "grad_norm": 1.2793215690458788, + "learning_rate": 9.744582745936547e-07, + "loss": 0.5065705180168152, + "step": 4622 + }, + { + "epoch": 1.065944201060641, + "grad_norm": 1.4912306578981507, + "learning_rate": 9.740771395987395e-07, + "loss": 0.4114503860473633, + "step": 4623 + }, + { + "epoch": 1.0661747751902237, + "grad_norm": 1.4280192292492455, + "learning_rate": 9.736960083719752e-07, + "loss": 0.4568501114845276, + "step": 4624 + }, + { + "epoch": 1.0664053493198062, + "grad_norm": 1.2972553921673455, + "learning_rate": 9.733148809687624e-07, + "loss": 0.49967026710510254, + "step": 4625 + }, + { + "epoch": 1.066635923449389, + "grad_norm": 1.4642812597554793, + "learning_rate": 9.729337574445025e-07, + "loss": 0.529681384563446, + "step": 4626 + }, + { + "epoch": 1.0668664975789717, + "grad_norm": 1.4791668180519966, + "learning_rate": 9.72552637854595e-07, + "loss": 0.4819791316986084, + "step": 4627 + }, + { + "epoch": 1.0670970717085544, + "grad_norm": 1.3549019355661691, + "learning_rate": 9.721715222544396e-07, + "loss": 0.4186001718044281, + "step": 4628 + }, + { + "epoch": 1.0673276458381369, + "grad_norm": 1.221767945169434, + "learning_rate": 9.717904106994359e-07, + "loss": 0.4442529082298279, + "step": 4629 + }, + { + "epoch": 1.0675582199677196, + "grad_norm": 1.886711265076429, + "learning_rate": 9.714093032449815e-07, + "loss": 0.4655953049659729, + "step": 4630 + }, + { + "epoch": 1.0677887940973023, + "grad_norm": 1.2641786187672595, + "learning_rate": 9.71028199946475e-07, + "loss": 0.45248714089393616, + "step": 4631 + }, + { + "epoch": 1.068019368226885, + "grad_norm": 1.547270813258376, + "learning_rate": 9.706471008593128e-07, + "loss": 0.4244336485862732, + "step": 4632 + }, + { + "epoch": 1.0682499423564675, + "grad_norm": 1.441914160495435, + "learning_rate": 9.702660060388923e-07, + "loss": 0.4396495819091797, + "step": 4633 + }, + { + "epoch": 1.0684805164860502, + "grad_norm": 1.3832490714301353, + "learning_rate": 9.698849155406089e-07, + "loss": 0.4504232406616211, + "step": 4634 + }, + { + "epoch": 1.068711090615633, + "grad_norm": 1.5660708185651993, + "learning_rate": 9.695038294198588e-07, + "loss": 0.40112000703811646, + "step": 4635 + }, + { + "epoch": 1.0689416647452157, + "grad_norm": 1.5797332497697052, + "learning_rate": 9.691227477320357e-07, + "loss": 0.4511067271232605, + "step": 4636 + }, + { + "epoch": 1.0691722388747982, + "grad_norm": 1.4624732720511697, + "learning_rate": 9.687416705325342e-07, + "loss": 0.44541406631469727, + "step": 4637 + }, + { + "epoch": 1.069402813004381, + "grad_norm": 1.3872197811900322, + "learning_rate": 9.68360597876748e-07, + "loss": 0.5038847327232361, + "step": 4638 + }, + { + "epoch": 1.0696333871339636, + "grad_norm": 1.2356986255488158, + "learning_rate": 9.67979529820069e-07, + "loss": 0.41960060596466064, + "step": 4639 + }, + { + "epoch": 1.0698639612635463, + "grad_norm": 1.6121133741192841, + "learning_rate": 9.6759846641789e-07, + "loss": 0.49760064482688904, + "step": 4640 + }, + { + "epoch": 1.0700945353931288, + "grad_norm": 1.7920934015909264, + "learning_rate": 9.672174077256023e-07, + "loss": 0.46513333916664124, + "step": 4641 + }, + { + "epoch": 1.0703251095227115, + "grad_norm": 1.5128396951273724, + "learning_rate": 9.66836353798597e-07, + "loss": 0.41129356622695923, + "step": 4642 + }, + { + "epoch": 1.0705556836522943, + "grad_norm": 1.1803503202020598, + "learning_rate": 9.664553046922634e-07, + "loss": 0.5021853446960449, + "step": 4643 + }, + { + "epoch": 1.070786257781877, + "grad_norm": 1.7444146178498035, + "learning_rate": 9.660742604619912e-07, + "loss": 0.5184302926063538, + "step": 4644 + }, + { + "epoch": 1.0710168319114595, + "grad_norm": 1.8278981381437267, + "learning_rate": 9.65693221163169e-07, + "loss": 0.4793940484523773, + "step": 4645 + }, + { + "epoch": 1.0712474060410422, + "grad_norm": 1.6157027564363053, + "learning_rate": 9.653121868511854e-07, + "loss": 0.43454456329345703, + "step": 4646 + }, + { + "epoch": 1.071477980170625, + "grad_norm": 1.3605748894383922, + "learning_rate": 9.649311575814266e-07, + "loss": 0.49123185873031616, + "step": 4647 + }, + { + "epoch": 1.0717085543002076, + "grad_norm": 1.2316654311751212, + "learning_rate": 9.645501334092792e-07, + "loss": 0.37020617723464966, + "step": 4648 + }, + { + "epoch": 1.0719391284297901, + "grad_norm": 1.3370776970957903, + "learning_rate": 9.641691143901296e-07, + "loss": 0.461778849363327, + "step": 4649 + }, + { + "epoch": 1.0721697025593728, + "grad_norm": 1.7402606402657241, + "learning_rate": 9.63788100579362e-07, + "loss": 0.46640273928642273, + "step": 4650 + }, + { + "epoch": 1.0724002766889555, + "grad_norm": 1.543123481033078, + "learning_rate": 9.634070920323614e-07, + "loss": 0.44978517293930054, + "step": 4651 + }, + { + "epoch": 1.0726308508185383, + "grad_norm": 1.5280216878422028, + "learning_rate": 9.630260888045103e-07, + "loss": 0.5070945024490356, + "step": 4652 + }, + { + "epoch": 1.0728614249481208, + "grad_norm": 1.3361545028178132, + "learning_rate": 9.626450909511926e-07, + "loss": 0.4513545334339142, + "step": 4653 + }, + { + "epoch": 1.0730919990777035, + "grad_norm": 1.2352969540055843, + "learning_rate": 9.622640985277889e-07, + "loss": 0.4430030584335327, + "step": 4654 + }, + { + "epoch": 1.0733225732072862, + "grad_norm": 1.7185507494111099, + "learning_rate": 9.618831115896814e-07, + "loss": 0.45619165897369385, + "step": 4655 + }, + { + "epoch": 1.073553147336869, + "grad_norm": 1.3452693944435885, + "learning_rate": 9.615021301922497e-07, + "loss": 0.411594033241272, + "step": 4656 + }, + { + "epoch": 1.0737837214664514, + "grad_norm": 1.696260647190632, + "learning_rate": 9.611211543908732e-07, + "loss": 0.5230164527893066, + "step": 4657 + }, + { + "epoch": 1.0740142955960341, + "grad_norm": 1.2546383850728546, + "learning_rate": 9.607401842409316e-07, + "loss": 0.45379406213760376, + "step": 4658 + }, + { + "epoch": 1.0742448697256168, + "grad_norm": 1.4465974878955368, + "learning_rate": 9.603592197978016e-07, + "loss": 0.47254839539527893, + "step": 4659 + }, + { + "epoch": 1.0744754438551993, + "grad_norm": 1.4899733507525732, + "learning_rate": 9.59978261116861e-07, + "loss": 0.3990492820739746, + "step": 4660 + }, + { + "epoch": 1.074706017984782, + "grad_norm": 1.2629235312972213, + "learning_rate": 9.595973082534855e-07, + "loss": 0.41671720147132874, + "step": 4661 + }, + { + "epoch": 1.0749365921143648, + "grad_norm": 1.3769486256402874, + "learning_rate": 9.59216361263051e-07, + "loss": 0.4269324839115143, + "step": 4662 + }, + { + "epoch": 1.0751671662439475, + "grad_norm": 1.7548425902665015, + "learning_rate": 9.588354202009314e-07, + "loss": 0.42989516258239746, + "step": 4663 + }, + { + "epoch": 1.0753977403735302, + "grad_norm": 1.5474664125691167, + "learning_rate": 9.584544851225008e-07, + "loss": 0.5224605798721313, + "step": 4664 + }, + { + "epoch": 1.0756283145031127, + "grad_norm": 1.393419713492626, + "learning_rate": 9.580735560831318e-07, + "loss": 0.3853871524333954, + "step": 4665 + }, + { + "epoch": 1.0758588886326954, + "grad_norm": 1.360242198109215, + "learning_rate": 9.576926331381968e-07, + "loss": 0.4460698366165161, + "step": 4666 + }, + { + "epoch": 1.0760894627622781, + "grad_norm": 1.524802030014046, + "learning_rate": 9.57311716343066e-07, + "loss": 0.45617812871932983, + "step": 4667 + }, + { + "epoch": 1.0763200368918606, + "grad_norm": 1.7079854681006486, + "learning_rate": 9.569308057531096e-07, + "loss": 0.5631355047225952, + "step": 4668 + }, + { + "epoch": 1.0765506110214433, + "grad_norm": 1.3155596598859882, + "learning_rate": 9.565499014236977e-07, + "loss": 0.4197179973125458, + "step": 4669 + }, + { + "epoch": 1.076781185151026, + "grad_norm": 1.5894301477582775, + "learning_rate": 9.561690034101973e-07, + "loss": 0.4262646436691284, + "step": 4670 + }, + { + "epoch": 1.0770117592806088, + "grad_norm": 1.4805271814916348, + "learning_rate": 9.557881117679768e-07, + "loss": 0.42719966173171997, + "step": 4671 + }, + { + "epoch": 1.0772423334101915, + "grad_norm": 1.3479731294807211, + "learning_rate": 9.554072265524022e-07, + "loss": 0.4278491735458374, + "step": 4672 + }, + { + "epoch": 1.077472907539774, + "grad_norm": 1.4324931591130032, + "learning_rate": 9.550263478188396e-07, + "loss": 0.3915478587150574, + "step": 4673 + }, + { + "epoch": 1.0777034816693567, + "grad_norm": 1.4807606218185139, + "learning_rate": 9.546454756226525e-07, + "loss": 0.4391477704048157, + "step": 4674 + }, + { + "epoch": 1.0779340557989394, + "grad_norm": 1.6230153652074522, + "learning_rate": 9.542646100192055e-07, + "loss": 0.47325795888900757, + "step": 4675 + }, + { + "epoch": 1.078164629928522, + "grad_norm": 1.3326185339285364, + "learning_rate": 9.538837510638607e-07, + "loss": 0.4698373079299927, + "step": 4676 + }, + { + "epoch": 1.0783952040581046, + "grad_norm": 1.5843176103578385, + "learning_rate": 9.535028988119805e-07, + "loss": 0.4252272844314575, + "step": 4677 + }, + { + "epoch": 1.0786257781876873, + "grad_norm": 1.4642476960881914, + "learning_rate": 9.531220533189253e-07, + "loss": 0.46726179122924805, + "step": 4678 + }, + { + "epoch": 1.07885635231727, + "grad_norm": 1.3792408296611596, + "learning_rate": 9.527412146400542e-07, + "loss": 0.46616411209106445, + "step": 4679 + }, + { + "epoch": 1.0790869264468528, + "grad_norm": 1.3938952826758202, + "learning_rate": 9.523603828307268e-07, + "loss": 0.5607181787490845, + "step": 4680 + }, + { + "epoch": 1.0793175005764353, + "grad_norm": 1.6234566687004295, + "learning_rate": 9.519795579463002e-07, + "loss": 0.5039520859718323, + "step": 4681 + }, + { + "epoch": 1.079548074706018, + "grad_norm": 1.6358698645091259, + "learning_rate": 9.515987400421322e-07, + "loss": 0.45532113313674927, + "step": 4682 + }, + { + "epoch": 1.0797786488356007, + "grad_norm": 1.3987490622653254, + "learning_rate": 9.512179291735772e-07, + "loss": 0.4198398292064667, + "step": 4683 + }, + { + "epoch": 1.0800092229651832, + "grad_norm": 2.0745649369110577, + "learning_rate": 9.508371253959909e-07, + "loss": 0.371380090713501, + "step": 4684 + }, + { + "epoch": 1.080239797094766, + "grad_norm": 1.6602368865180097, + "learning_rate": 9.504563287647265e-07, + "loss": 0.44341978430747986, + "step": 4685 + }, + { + "epoch": 1.0804703712243486, + "grad_norm": 1.3233390600316475, + "learning_rate": 9.500755393351372e-07, + "loss": 0.4184574484825134, + "step": 4686 + }, + { + "epoch": 1.0807009453539314, + "grad_norm": 1.554478033670439, + "learning_rate": 9.496947571625739e-07, + "loss": 0.5584033727645874, + "step": 4687 + }, + { + "epoch": 1.0809315194835138, + "grad_norm": 1.4303675439776025, + "learning_rate": 9.493139823023874e-07, + "loss": 0.44405317306518555, + "step": 4688 + }, + { + "epoch": 1.0811620936130966, + "grad_norm": 1.5109921870756446, + "learning_rate": 9.489332148099277e-07, + "loss": 0.41137009859085083, + "step": 4689 + }, + { + "epoch": 1.0813926677426793, + "grad_norm": 1.5933695881826222, + "learning_rate": 9.485524547405424e-07, + "loss": 0.4831092357635498, + "step": 4690 + }, + { + "epoch": 1.081623241872262, + "grad_norm": 1.3224307777817799, + "learning_rate": 9.481717021495793e-07, + "loss": 0.41243845224380493, + "step": 4691 + }, + { + "epoch": 1.0818538160018445, + "grad_norm": 1.506253034871724, + "learning_rate": 9.477909570923844e-07, + "loss": 0.33649003505706787, + "step": 4692 + }, + { + "epoch": 1.0820843901314272, + "grad_norm": 1.3759728989311568, + "learning_rate": 9.474102196243033e-07, + "loss": 0.4959014654159546, + "step": 4693 + }, + { + "epoch": 1.08231496426101, + "grad_norm": 1.4717496348190642, + "learning_rate": 9.470294898006795e-07, + "loss": 0.43924248218536377, + "step": 4694 + }, + { + "epoch": 1.0825455383905926, + "grad_norm": 1.5425758669304555, + "learning_rate": 9.466487676768563e-07, + "loss": 0.4777243137359619, + "step": 4695 + }, + { + "epoch": 1.0827761125201751, + "grad_norm": 1.7258911046059784, + "learning_rate": 9.462680533081752e-07, + "loss": 0.4488077759742737, + "step": 4696 + }, + { + "epoch": 1.0830066866497579, + "grad_norm": 1.5375128445555653, + "learning_rate": 9.458873467499778e-07, + "loss": 0.5058270692825317, + "step": 4697 + }, + { + "epoch": 1.0832372607793406, + "grad_norm": 1.5052517610014813, + "learning_rate": 9.455066480576025e-07, + "loss": 0.4537619650363922, + "step": 4698 + }, + { + "epoch": 1.0834678349089233, + "grad_norm": 1.5194044905455244, + "learning_rate": 9.45125957286388e-07, + "loss": 0.4725874960422516, + "step": 4699 + }, + { + "epoch": 1.0836984090385058, + "grad_norm": 1.61840988882087, + "learning_rate": 9.447452744916722e-07, + "loss": 0.4967196583747864, + "step": 4700 + }, + { + "epoch": 1.0839289831680885, + "grad_norm": 1.3272496966479597, + "learning_rate": 9.443645997287902e-07, + "loss": 0.43682345747947693, + "step": 4701 + }, + { + "epoch": 1.0841595572976712, + "grad_norm": 1.4038050893134464, + "learning_rate": 9.439839330530781e-07, + "loss": 0.48844271898269653, + "step": 4702 + }, + { + "epoch": 1.084390131427254, + "grad_norm": 1.3581740542884078, + "learning_rate": 9.436032745198682e-07, + "loss": 0.43654918670654297, + "step": 4703 + }, + { + "epoch": 1.0846207055568364, + "grad_norm": 1.6070546851567389, + "learning_rate": 9.432226241844947e-07, + "loss": 0.5034382939338684, + "step": 4704 + }, + { + "epoch": 1.0848512796864191, + "grad_norm": 1.9516449815592325, + "learning_rate": 9.428419821022877e-07, + "loss": 0.5407527089118958, + "step": 4705 + }, + { + "epoch": 1.0850818538160019, + "grad_norm": 1.3188521673213394, + "learning_rate": 9.424613483285783e-07, + "loss": 0.4372078478336334, + "step": 4706 + }, + { + "epoch": 1.0853124279455846, + "grad_norm": 1.3673238165045705, + "learning_rate": 9.420807229186949e-07, + "loss": 0.5264855623245239, + "step": 4707 + }, + { + "epoch": 1.085543002075167, + "grad_norm": 1.2884056915833075, + "learning_rate": 9.417001059279652e-07, + "loss": 0.3810223937034607, + "step": 4708 + }, + { + "epoch": 1.0857735762047498, + "grad_norm": 1.318670262430079, + "learning_rate": 9.413194974117163e-07, + "loss": 0.368865430355072, + "step": 4709 + }, + { + "epoch": 1.0860041503343325, + "grad_norm": 1.3202107346651724, + "learning_rate": 9.409388974252729e-07, + "loss": 0.41845810413360596, + "step": 4710 + }, + { + "epoch": 1.0862347244639152, + "grad_norm": 1.4709870024189373, + "learning_rate": 9.405583060239594e-07, + "loss": 0.5185590982437134, + "step": 4711 + }, + { + "epoch": 1.0864652985934977, + "grad_norm": 1.7793671382372165, + "learning_rate": 9.401777232630983e-07, + "loss": 0.4848501682281494, + "step": 4712 + }, + { + "epoch": 1.0866958727230804, + "grad_norm": 1.5218788678149173, + "learning_rate": 9.397971491980119e-07, + "loss": 0.5581566691398621, + "step": 4713 + }, + { + "epoch": 1.0869264468526632, + "grad_norm": 1.475012350727374, + "learning_rate": 9.394165838840196e-07, + "loss": 0.42043447494506836, + "step": 4714 + }, + { + "epoch": 1.0871570209822459, + "grad_norm": 1.3731967040929853, + "learning_rate": 9.39036027376441e-07, + "loss": 0.45076289772987366, + "step": 4715 + }, + { + "epoch": 1.0873875951118284, + "grad_norm": 1.353578451117457, + "learning_rate": 9.386554797305934e-07, + "loss": 0.3650796413421631, + "step": 4716 + }, + { + "epoch": 1.087618169241411, + "grad_norm": 1.436571768450736, + "learning_rate": 9.38274941001794e-07, + "loss": 0.4837912321090698, + "step": 4717 + }, + { + "epoch": 1.0878487433709938, + "grad_norm": 1.5272898845570653, + "learning_rate": 9.378944112453574e-07, + "loss": 0.41277679800987244, + "step": 4718 + }, + { + "epoch": 1.0880793175005765, + "grad_norm": 1.7344713328668464, + "learning_rate": 9.375138905165973e-07, + "loss": 0.48409390449523926, + "step": 4719 + }, + { + "epoch": 1.088309891630159, + "grad_norm": 1.360949967282617, + "learning_rate": 9.371333788708268e-07, + "loss": 0.3952450752258301, + "step": 4720 + }, + { + "epoch": 1.0885404657597417, + "grad_norm": 1.6450358552008089, + "learning_rate": 9.367528763633563e-07, + "loss": 0.42314866185188293, + "step": 4721 + }, + { + "epoch": 1.0887710398893244, + "grad_norm": 1.492846868063658, + "learning_rate": 9.363723830494966e-07, + "loss": 0.5322449207305908, + "step": 4722 + }, + { + "epoch": 1.0890016140189072, + "grad_norm": 1.3552869600155872, + "learning_rate": 9.359918989845557e-07, + "loss": 0.42307883501052856, + "step": 4723 + }, + { + "epoch": 1.0892321881484897, + "grad_norm": 1.3481901437941268, + "learning_rate": 9.356114242238413e-07, + "loss": 0.39321061968803406, + "step": 4724 + }, + { + "epoch": 1.0894627622780724, + "grad_norm": 1.6333273110158268, + "learning_rate": 9.352309588226585e-07, + "loss": 0.5064421892166138, + "step": 4725 + }, + { + "epoch": 1.089693336407655, + "grad_norm": 1.4475724274606394, + "learning_rate": 9.348505028363125e-07, + "loss": 0.44825220108032227, + "step": 4726 + }, + { + "epoch": 1.0899239105372378, + "grad_norm": 1.384316241889946, + "learning_rate": 9.344700563201065e-07, + "loss": 0.4323306679725647, + "step": 4727 + }, + { + "epoch": 1.0901544846668203, + "grad_norm": 1.3254947105842285, + "learning_rate": 9.340896193293414e-07, + "loss": 0.44907987117767334, + "step": 4728 + }, + { + "epoch": 1.090385058796403, + "grad_norm": 1.3161326376052391, + "learning_rate": 9.337091919193185e-07, + "loss": 0.416559636592865, + "step": 4729 + }, + { + "epoch": 1.0906156329259857, + "grad_norm": 1.6044534711260028, + "learning_rate": 9.33328774145336e-07, + "loss": 0.5361836552619934, + "step": 4730 + }, + { + "epoch": 1.0908462070555685, + "grad_norm": 1.3742080048163032, + "learning_rate": 9.329483660626922e-07, + "loss": 0.4815465211868286, + "step": 4731 + }, + { + "epoch": 1.091076781185151, + "grad_norm": 1.4553535934080677, + "learning_rate": 9.325679677266826e-07, + "loss": 0.5205050110816956, + "step": 4732 + }, + { + "epoch": 1.0913073553147337, + "grad_norm": 1.9887709257052897, + "learning_rate": 9.321875791926028e-07, + "loss": 0.4830896258354187, + "step": 4733 + }, + { + "epoch": 1.0915379294443164, + "grad_norm": 1.3739860439026885, + "learning_rate": 9.318072005157451e-07, + "loss": 0.4394579827785492, + "step": 4734 + }, + { + "epoch": 1.091768503573899, + "grad_norm": 1.6664317769247758, + "learning_rate": 9.314268317514022e-07, + "loss": 0.4614049792289734, + "step": 4735 + }, + { + "epoch": 1.0919990777034816, + "grad_norm": 1.5989711566807139, + "learning_rate": 9.31046472954864e-07, + "loss": 0.5123867988586426, + "step": 4736 + }, + { + "epoch": 1.0922296518330643, + "grad_norm": 1.879970895540274, + "learning_rate": 9.306661241814204e-07, + "loss": 0.43548035621643066, + "step": 4737 + }, + { + "epoch": 1.092460225962647, + "grad_norm": 1.4190205685105515, + "learning_rate": 9.302857854863579e-07, + "loss": 0.4102709889411926, + "step": 4738 + }, + { + "epoch": 1.0926908000922297, + "grad_norm": 1.7007344632271022, + "learning_rate": 9.299054569249628e-07, + "loss": 0.46276605129241943, + "step": 4739 + }, + { + "epoch": 1.0929213742218122, + "grad_norm": 1.5950261365712695, + "learning_rate": 9.295251385525204e-07, + "loss": 0.47700244188308716, + "step": 4740 + }, + { + "epoch": 1.093151948351395, + "grad_norm": 1.5081940540312389, + "learning_rate": 9.29144830424313e-07, + "loss": 0.5492758750915527, + "step": 4741 + }, + { + "epoch": 1.0933825224809777, + "grad_norm": 1.6521559747103167, + "learning_rate": 9.287645325956228e-07, + "loss": 0.3846803307533264, + "step": 4742 + }, + { + "epoch": 1.0936130966105604, + "grad_norm": 1.4300122822608972, + "learning_rate": 9.283842451217294e-07, + "loss": 0.47237372398376465, + "step": 4743 + }, + { + "epoch": 1.0938436707401429, + "grad_norm": 1.6996074936661776, + "learning_rate": 9.280039680579122e-07, + "loss": 0.4651675820350647, + "step": 4744 + }, + { + "epoch": 1.0940742448697256, + "grad_norm": 1.6397662048344088, + "learning_rate": 9.276237014594476e-07, + "loss": 0.5472640991210938, + "step": 4745 + }, + { + "epoch": 1.0943048189993083, + "grad_norm": 1.3158004626748314, + "learning_rate": 9.272434453816117e-07, + "loss": 0.45672351121902466, + "step": 4746 + }, + { + "epoch": 1.094535393128891, + "grad_norm": 1.4246135812847533, + "learning_rate": 9.268631998796785e-07, + "loss": 0.4589729905128479, + "step": 4747 + }, + { + "epoch": 1.0947659672584735, + "grad_norm": 1.4398967186683822, + "learning_rate": 9.264829650089201e-07, + "loss": 0.45882588624954224, + "step": 4748 + }, + { + "epoch": 1.0949965413880562, + "grad_norm": 1.8586265213095916, + "learning_rate": 9.26102740824608e-07, + "loss": 0.6183863282203674, + "step": 4749 + }, + { + "epoch": 1.095227115517639, + "grad_norm": 1.4631882562588927, + "learning_rate": 9.257225273820112e-07, + "loss": 0.4512014389038086, + "step": 4750 + }, + { + "epoch": 1.0954576896472217, + "grad_norm": 1.5706161838979387, + "learning_rate": 9.253423247363983e-07, + "loss": 0.5006139874458313, + "step": 4751 + }, + { + "epoch": 1.0956882637768042, + "grad_norm": 1.4110458948787974, + "learning_rate": 9.249621329430346e-07, + "loss": 0.5394018888473511, + "step": 4752 + }, + { + "epoch": 1.095918837906387, + "grad_norm": 1.5150959480945791, + "learning_rate": 9.245819520571858e-07, + "loss": 0.35523056983947754, + "step": 4753 + }, + { + "epoch": 1.0961494120359696, + "grad_norm": 1.3819812548856059, + "learning_rate": 9.242017821341143e-07, + "loss": 0.44379743933677673, + "step": 4754 + }, + { + "epoch": 1.0963799861655523, + "grad_norm": 1.6129174796361336, + "learning_rate": 9.238216232290821e-07, + "loss": 0.4190908968448639, + "step": 4755 + }, + { + "epoch": 1.0966105602951348, + "grad_norm": 1.6222067534589701, + "learning_rate": 9.234414753973488e-07, + "loss": 0.44818970561027527, + "step": 4756 + }, + { + "epoch": 1.0968411344247175, + "grad_norm": 1.4925644141379035, + "learning_rate": 9.230613386941734e-07, + "loss": 0.4134204685688019, + "step": 4757 + }, + { + "epoch": 1.0970717085543003, + "grad_norm": 1.2148478016107016, + "learning_rate": 9.226812131748118e-07, + "loss": 0.3554952144622803, + "step": 4758 + }, + { + "epoch": 1.097302282683883, + "grad_norm": 1.674922299722459, + "learning_rate": 9.223010988945194e-07, + "loss": 0.522594690322876, + "step": 4759 + }, + { + "epoch": 1.0975328568134655, + "grad_norm": 1.4320622438584156, + "learning_rate": 9.219209959085502e-07, + "loss": 0.44814133644104004, + "step": 4760 + }, + { + "epoch": 1.0977634309430482, + "grad_norm": 1.4723286174250931, + "learning_rate": 9.215409042721551e-07, + "loss": 0.42479634284973145, + "step": 4761 + }, + { + "epoch": 1.097994005072631, + "grad_norm": 1.5414891522514993, + "learning_rate": 9.211608240405849e-07, + "loss": 0.4384934902191162, + "step": 4762 + }, + { + "epoch": 1.0982245792022136, + "grad_norm": 1.4811013868533904, + "learning_rate": 9.207807552690878e-07, + "loss": 0.5378658771514893, + "step": 4763 + }, + { + "epoch": 1.098455153331796, + "grad_norm": 1.4445039209024981, + "learning_rate": 9.204006980129111e-07, + "loss": 0.5071386694908142, + "step": 4764 + }, + { + "epoch": 1.0986857274613788, + "grad_norm": 1.5460474623164162, + "learning_rate": 9.200206523272992e-07, + "loss": 0.46085822582244873, + "step": 4765 + }, + { + "epoch": 1.0989163015909615, + "grad_norm": 1.544747382675103, + "learning_rate": 9.196406182674964e-07, + "loss": 0.5083057880401611, + "step": 4766 + }, + { + "epoch": 1.0991468757205443, + "grad_norm": 1.2845065354356755, + "learning_rate": 9.192605958887438e-07, + "loss": 0.48307740688323975, + "step": 4767 + }, + { + "epoch": 1.0993774498501268, + "grad_norm": 1.8405581264672015, + "learning_rate": 9.188805852462824e-07, + "loss": 0.5195509791374207, + "step": 4768 + }, + { + "epoch": 1.0996080239797095, + "grad_norm": 1.5537273798526559, + "learning_rate": 9.185005863953498e-07, + "loss": 0.5161266326904297, + "step": 4769 + }, + { + "epoch": 1.0998385981092922, + "grad_norm": 1.5985708455901557, + "learning_rate": 9.181205993911827e-07, + "loss": 0.4757764935493469, + "step": 4770 + }, + { + "epoch": 1.1000691722388747, + "grad_norm": 1.5307887938016926, + "learning_rate": 9.177406242890167e-07, + "loss": 0.4071381688117981, + "step": 4771 + }, + { + "epoch": 1.1002997463684574, + "grad_norm": 1.3525378547606768, + "learning_rate": 9.173606611440842e-07, + "loss": 0.4794449210166931, + "step": 4772 + }, + { + "epoch": 1.1005303204980401, + "grad_norm": 1.3205547171467464, + "learning_rate": 9.169807100116175e-07, + "loss": 0.4678712487220764, + "step": 4773 + }, + { + "epoch": 1.1007608946276228, + "grad_norm": 1.2863487713029464, + "learning_rate": 9.166007709468456e-07, + "loss": 0.43200960755348206, + "step": 4774 + }, + { + "epoch": 1.1009914687572055, + "grad_norm": 1.8114336882311408, + "learning_rate": 9.162208440049974e-07, + "loss": 0.49283260107040405, + "step": 4775 + }, + { + "epoch": 1.101222042886788, + "grad_norm": 1.2265456496064566, + "learning_rate": 9.158409292412982e-07, + "loss": 0.4430215358734131, + "step": 4776 + }, + { + "epoch": 1.1014526170163708, + "grad_norm": 1.282698473472426, + "learning_rate": 9.154610267109731e-07, + "loss": 0.4529581069946289, + "step": 4777 + }, + { + "epoch": 1.1016831911459535, + "grad_norm": 1.3698366211761768, + "learning_rate": 9.150811364692446e-07, + "loss": 0.3872554302215576, + "step": 4778 + }, + { + "epoch": 1.101913765275536, + "grad_norm": 1.4034579683870105, + "learning_rate": 9.147012585713331e-07, + "loss": 0.466983437538147, + "step": 4779 + }, + { + "epoch": 1.1021443394051187, + "grad_norm": 1.3799350437064777, + "learning_rate": 9.143213930724587e-07, + "loss": 0.4841456115245819, + "step": 4780 + }, + { + "epoch": 1.1023749135347014, + "grad_norm": 2.083063073101601, + "learning_rate": 9.139415400278376e-07, + "loss": 0.4506613612174988, + "step": 4781 + }, + { + "epoch": 1.1026054876642841, + "grad_norm": 1.5047320834529434, + "learning_rate": 9.135616994926861e-07, + "loss": 0.428241491317749, + "step": 4782 + }, + { + "epoch": 1.1028360617938668, + "grad_norm": 1.3329992006000018, + "learning_rate": 9.131818715222175e-07, + "loss": 0.46940821409225464, + "step": 4783 + }, + { + "epoch": 1.1030666359234493, + "grad_norm": 1.5416614978551508, + "learning_rate": 9.12802056171644e-07, + "loss": 0.4527658224105835, + "step": 4784 + }, + { + "epoch": 1.103297210053032, + "grad_norm": 1.3412511641642377, + "learning_rate": 9.124222534961749e-07, + "loss": 0.3284989893436432, + "step": 4785 + }, + { + "epoch": 1.1035277841826148, + "grad_norm": 1.497248247266052, + "learning_rate": 9.120424635510193e-07, + "loss": 0.448346883058548, + "step": 4786 + }, + { + "epoch": 1.1037583583121973, + "grad_norm": 1.5413647461227613, + "learning_rate": 9.116626863913826e-07, + "loss": 0.4625587463378906, + "step": 4787 + }, + { + "epoch": 1.10398893244178, + "grad_norm": 1.398727589269655, + "learning_rate": 9.112829220724703e-07, + "loss": 0.37891942262649536, + "step": 4788 + }, + { + "epoch": 1.1042195065713627, + "grad_norm": 1.510309439727558, + "learning_rate": 9.109031706494841e-07, + "loss": 0.48719239234924316, + "step": 4789 + }, + { + "epoch": 1.1044500807009454, + "grad_norm": 1.695631911449914, + "learning_rate": 9.105234321776247e-07, + "loss": 0.5341615676879883, + "step": 4790 + }, + { + "epoch": 1.1046806548305281, + "grad_norm": 1.30752453253924, + "learning_rate": 9.101437067120918e-07, + "loss": 0.36677777767181396, + "step": 4791 + }, + { + "epoch": 1.1049112289601106, + "grad_norm": 1.3000512165603213, + "learning_rate": 9.097639943080813e-07, + "loss": 0.4348159432411194, + "step": 4792 + }, + { + "epoch": 1.1051418030896933, + "grad_norm": 1.3763164723830184, + "learning_rate": 9.093842950207891e-07, + "loss": 0.44912683963775635, + "step": 4793 + }, + { + "epoch": 1.105372377219276, + "grad_norm": 1.655048045877048, + "learning_rate": 9.090046089054077e-07, + "loss": 0.5576057434082031, + "step": 4794 + }, + { + "epoch": 1.1056029513488586, + "grad_norm": 1.4655907130631036, + "learning_rate": 9.08624936017129e-07, + "loss": 0.43964770436286926, + "step": 4795 + }, + { + "epoch": 1.1058335254784413, + "grad_norm": 1.3648059541391266, + "learning_rate": 9.082452764111415e-07, + "loss": 0.4285386800765991, + "step": 4796 + }, + { + "epoch": 1.106064099608024, + "grad_norm": 1.6322901017927212, + "learning_rate": 9.078656301426332e-07, + "loss": 0.4257868230342865, + "step": 4797 + }, + { + "epoch": 1.1062946737376067, + "grad_norm": 1.9314022304382554, + "learning_rate": 9.074859972667895e-07, + "loss": 0.4540346562862396, + "step": 4798 + }, + { + "epoch": 1.1065252478671892, + "grad_norm": 1.6801359554397164, + "learning_rate": 9.071063778387933e-07, + "loss": 0.5273457765579224, + "step": 4799 + }, + { + "epoch": 1.106755821996772, + "grad_norm": 1.4107980839711056, + "learning_rate": 9.067267719138268e-07, + "loss": 0.391310453414917, + "step": 4800 + }, + { + "epoch": 1.1069863961263546, + "grad_norm": 1.4182050274963418, + "learning_rate": 9.063471795470691e-07, + "loss": 0.47945383191108704, + "step": 4801 + }, + { + "epoch": 1.1072169702559373, + "grad_norm": 1.7087277476088294, + "learning_rate": 9.05967600793698e-07, + "loss": 0.49561476707458496, + "step": 4802 + }, + { + "epoch": 1.1074475443855198, + "grad_norm": 1.3070252929290396, + "learning_rate": 9.05588035708889e-07, + "loss": 0.4505256414413452, + "step": 4803 + }, + { + "epoch": 1.1076781185151026, + "grad_norm": 1.6864844579974707, + "learning_rate": 9.052084843478164e-07, + "loss": 0.37591490149497986, + "step": 4804 + }, + { + "epoch": 1.1079086926446853, + "grad_norm": 1.486226704077577, + "learning_rate": 9.048289467656508e-07, + "loss": 0.478586345911026, + "step": 4805 + }, + { + "epoch": 1.108139266774268, + "grad_norm": 1.3819959446941394, + "learning_rate": 9.044494230175625e-07, + "loss": 0.4373725354671478, + "step": 4806 + }, + { + "epoch": 1.1083698409038505, + "grad_norm": 1.4091791216138099, + "learning_rate": 9.040699131587186e-07, + "loss": 0.3976345360279083, + "step": 4807 + }, + { + "epoch": 1.1086004150334332, + "grad_norm": 1.3848852740812903, + "learning_rate": 9.036904172442857e-07, + "loss": 0.44611310958862305, + "step": 4808 + }, + { + "epoch": 1.108830989163016, + "grad_norm": 1.3117584806534919, + "learning_rate": 9.033109353294262e-07, + "loss": 0.40816667675971985, + "step": 4809 + }, + { + "epoch": 1.1090615632925986, + "grad_norm": 1.359605756890841, + "learning_rate": 9.029314674693023e-07, + "loss": 0.37462317943573, + "step": 4810 + }, + { + "epoch": 1.1092921374221811, + "grad_norm": 1.3641846963299056, + "learning_rate": 9.025520137190735e-07, + "loss": 0.3856509327888489, + "step": 4811 + }, + { + "epoch": 1.1095227115517639, + "grad_norm": 1.5740711616700624, + "learning_rate": 9.021725741338969e-07, + "loss": 0.4728443920612335, + "step": 4812 + }, + { + "epoch": 1.1097532856813466, + "grad_norm": 2.0717537833557773, + "learning_rate": 9.017931487689282e-07, + "loss": 0.4614938795566559, + "step": 4813 + }, + { + "epoch": 1.1099838598109293, + "grad_norm": 1.4925546437709947, + "learning_rate": 9.014137376793203e-07, + "loss": 0.4137331247329712, + "step": 4814 + }, + { + "epoch": 1.1102144339405118, + "grad_norm": 1.2481779358565226, + "learning_rate": 9.010343409202255e-07, + "loss": 0.42436620593070984, + "step": 4815 + }, + { + "epoch": 1.1104450080700945, + "grad_norm": 1.3339513565407848, + "learning_rate": 9.006549585467916e-07, + "loss": 0.43592822551727295, + "step": 4816 + }, + { + "epoch": 1.1106755821996772, + "grad_norm": 1.3742872645989155, + "learning_rate": 9.002755906141666e-07, + "loss": 0.45627349615097046, + "step": 4817 + }, + { + "epoch": 1.11090615632926, + "grad_norm": 1.819907938722267, + "learning_rate": 8.998962371774953e-07, + "loss": 0.5103771686553955, + "step": 4818 + }, + { + "epoch": 1.1111367304588424, + "grad_norm": 1.4418115437773273, + "learning_rate": 8.995168982919201e-07, + "loss": 0.470276802778244, + "step": 4819 + }, + { + "epoch": 1.1113673045884251, + "grad_norm": 1.3186176277536419, + "learning_rate": 8.991375740125823e-07, + "loss": 0.49486416578292847, + "step": 4820 + }, + { + "epoch": 1.1115978787180079, + "grad_norm": 1.143316450397621, + "learning_rate": 8.987582643946201e-07, + "loss": 0.338329017162323, + "step": 4821 + }, + { + "epoch": 1.1118284528475906, + "grad_norm": 1.4885392176771477, + "learning_rate": 8.983789694931706e-07, + "loss": 0.38252198696136475, + "step": 4822 + }, + { + "epoch": 1.112059026977173, + "grad_norm": 1.4537319037859584, + "learning_rate": 8.979996893633675e-07, + "loss": 0.47691571712493896, + "step": 4823 + }, + { + "epoch": 1.1122896011067558, + "grad_norm": 1.41954873904419, + "learning_rate": 8.976204240603433e-07, + "loss": 0.40156808495521545, + "step": 4824 + }, + { + "epoch": 1.1125201752363385, + "grad_norm": 1.312743475511893, + "learning_rate": 8.97241173639228e-07, + "loss": 0.3837090730667114, + "step": 4825 + }, + { + "epoch": 1.1127507493659212, + "grad_norm": 1.6300077035939553, + "learning_rate": 8.968619381551499e-07, + "loss": 0.5094380378723145, + "step": 4826 + }, + { + "epoch": 1.1129813234955037, + "grad_norm": 1.4389159508234053, + "learning_rate": 8.964827176632339e-07, + "loss": 0.48674100637435913, + "step": 4827 + }, + { + "epoch": 1.1132118976250864, + "grad_norm": 1.7742534070601, + "learning_rate": 8.961035122186045e-07, + "loss": 0.49288761615753174, + "step": 4828 + }, + { + "epoch": 1.1134424717546691, + "grad_norm": 1.4156686622304593, + "learning_rate": 8.957243218763824e-07, + "loss": 0.42933952808380127, + "step": 4829 + }, + { + "epoch": 1.1136730458842519, + "grad_norm": 1.838762036908513, + "learning_rate": 8.953451466916866e-07, + "loss": 0.39244914054870605, + "step": 4830 + }, + { + "epoch": 1.1139036200138344, + "grad_norm": 1.3776049792093739, + "learning_rate": 8.949659867196348e-07, + "loss": 0.44688090682029724, + "step": 4831 + }, + { + "epoch": 1.114134194143417, + "grad_norm": 1.6923430022628052, + "learning_rate": 8.945868420153409e-07, + "loss": 0.5388743877410889, + "step": 4832 + }, + { + "epoch": 1.1143647682729998, + "grad_norm": 1.6108426528928312, + "learning_rate": 8.942077126339182e-07, + "loss": 0.4320666193962097, + "step": 4833 + }, + { + "epoch": 1.1145953424025825, + "grad_norm": 1.3700008221476991, + "learning_rate": 8.938285986304762e-07, + "loss": 0.37623411417007446, + "step": 4834 + }, + { + "epoch": 1.114825916532165, + "grad_norm": 1.4274453986312428, + "learning_rate": 8.93449500060124e-07, + "loss": 0.4743962287902832, + "step": 4835 + }, + { + "epoch": 1.1150564906617477, + "grad_norm": 1.4687481503878526, + "learning_rate": 8.930704169779663e-07, + "loss": 0.4833221435546875, + "step": 4836 + }, + { + "epoch": 1.1152870647913304, + "grad_norm": 1.580828459296504, + "learning_rate": 8.926913494391074e-07, + "loss": 0.48811084032058716, + "step": 4837 + }, + { + "epoch": 1.1155176389209132, + "grad_norm": 1.4663777441823886, + "learning_rate": 8.923122974986487e-07, + "loss": 0.42525774240493774, + "step": 4838 + }, + { + "epoch": 1.1157482130504957, + "grad_norm": 1.4773669175093567, + "learning_rate": 8.919332612116884e-07, + "loss": 0.4347909688949585, + "step": 4839 + }, + { + "epoch": 1.1159787871800784, + "grad_norm": 1.9619203877260345, + "learning_rate": 8.915542406333241e-07, + "loss": 0.5085601806640625, + "step": 4840 + }, + { + "epoch": 1.116209361309661, + "grad_norm": 1.4214902735687815, + "learning_rate": 8.911752358186497e-07, + "loss": 0.4620482325553894, + "step": 4841 + }, + { + "epoch": 1.1164399354392438, + "grad_norm": 1.3147570239530335, + "learning_rate": 8.907962468227582e-07, + "loss": 0.44923216104507446, + "step": 4842 + }, + { + "epoch": 1.1166705095688263, + "grad_norm": 1.6422580107908513, + "learning_rate": 8.904172737007386e-07, + "loss": 0.547439694404602, + "step": 4843 + }, + { + "epoch": 1.116901083698409, + "grad_norm": 1.7769022711207687, + "learning_rate": 8.900383165076789e-07, + "loss": 0.4609268307685852, + "step": 4844 + }, + { + "epoch": 1.1171316578279917, + "grad_norm": 1.4046866803141593, + "learning_rate": 8.896593752986642e-07, + "loss": 0.41780030727386475, + "step": 4845 + }, + { + "epoch": 1.1173622319575744, + "grad_norm": 1.3641825367692086, + "learning_rate": 8.89280450128778e-07, + "loss": 0.506212592124939, + "step": 4846 + }, + { + "epoch": 1.117592806087157, + "grad_norm": 1.4049897839890735, + "learning_rate": 8.889015410531001e-07, + "loss": 0.4436545968055725, + "step": 4847 + }, + { + "epoch": 1.1178233802167397, + "grad_norm": 1.3856199735325436, + "learning_rate": 8.885226481267093e-07, + "loss": 0.4473826289176941, + "step": 4848 + }, + { + "epoch": 1.1180539543463224, + "grad_norm": 1.42622736433257, + "learning_rate": 8.881437714046815e-07, + "loss": 0.43499836325645447, + "step": 4849 + }, + { + "epoch": 1.118284528475905, + "grad_norm": 1.5927469786677344, + "learning_rate": 8.877649109420899e-07, + "loss": 0.522705078125, + "step": 4850 + }, + { + "epoch": 1.1185151026054876, + "grad_norm": 1.5596781330511842, + "learning_rate": 8.873860667940064e-07, + "loss": 0.42146036028862, + "step": 4851 + }, + { + "epoch": 1.1187456767350703, + "grad_norm": 1.649425162171124, + "learning_rate": 8.870072390154989e-07, + "loss": 0.5875130891799927, + "step": 4852 + }, + { + "epoch": 1.118976250864653, + "grad_norm": 1.6372722830693418, + "learning_rate": 8.866284276616345e-07, + "loss": 0.5187985301017761, + "step": 4853 + }, + { + "epoch": 1.1192068249942357, + "grad_norm": 2.6266893474509474, + "learning_rate": 8.86249632787477e-07, + "loss": 0.46115952730178833, + "step": 4854 + }, + { + "epoch": 1.1194373991238182, + "grad_norm": 1.4714921061709185, + "learning_rate": 8.858708544480886e-07, + "loss": 0.4926493167877197, + "step": 4855 + }, + { + "epoch": 1.119667973253401, + "grad_norm": 1.5525331026142626, + "learning_rate": 8.854920926985278e-07, + "loss": 0.44512006640434265, + "step": 4856 + }, + { + "epoch": 1.1198985473829837, + "grad_norm": 1.5145408688074757, + "learning_rate": 8.85113347593852e-07, + "loss": 0.45973241329193115, + "step": 4857 + }, + { + "epoch": 1.1201291215125664, + "grad_norm": 1.5400172209521554, + "learning_rate": 8.847346191891157e-07, + "loss": 0.4915385842323303, + "step": 4858 + }, + { + "epoch": 1.1203596956421489, + "grad_norm": 1.4900152202768027, + "learning_rate": 8.843559075393701e-07, + "loss": 0.4457864463329315, + "step": 4859 + }, + { + "epoch": 1.1205902697717316, + "grad_norm": 1.3414730221020197, + "learning_rate": 8.839772126996658e-07, + "loss": 0.4782453775405884, + "step": 4860 + }, + { + "epoch": 1.1208208439013143, + "grad_norm": 1.3591384899787133, + "learning_rate": 8.835985347250492e-07, + "loss": 0.42789584398269653, + "step": 4861 + }, + { + "epoch": 1.121051418030897, + "grad_norm": 1.8532602863182117, + "learning_rate": 8.832198736705657e-07, + "loss": 0.49990910291671753, + "step": 4862 + }, + { + "epoch": 1.1212819921604795, + "grad_norm": 1.4158258863269764, + "learning_rate": 8.828412295912566e-07, + "loss": 0.3735005855560303, + "step": 4863 + }, + { + "epoch": 1.1215125662900622, + "grad_norm": 1.3744374187815367, + "learning_rate": 8.824626025421624e-07, + "loss": 0.402673602104187, + "step": 4864 + }, + { + "epoch": 1.121743140419645, + "grad_norm": 1.57241412674585, + "learning_rate": 8.820839925783198e-07, + "loss": 0.4675491452217102, + "step": 4865 + }, + { + "epoch": 1.1219737145492277, + "grad_norm": 2.0200104658377254, + "learning_rate": 8.817053997547645e-07, + "loss": 0.5098662376403809, + "step": 4866 + }, + { + "epoch": 1.1222042886788102, + "grad_norm": 1.3880207155981488, + "learning_rate": 8.813268241265278e-07, + "loss": 0.44478029012680054, + "step": 4867 + }, + { + "epoch": 1.1224348628083929, + "grad_norm": 1.4983402004688406, + "learning_rate": 8.809482657486401e-07, + "loss": 0.410754919052124, + "step": 4868 + }, + { + "epoch": 1.1226654369379756, + "grad_norm": 1.193726420763111, + "learning_rate": 8.805697246761288e-07, + "loss": 0.4198191165924072, + "step": 4869 + }, + { + "epoch": 1.1228960110675583, + "grad_norm": 1.6015778378598091, + "learning_rate": 8.801912009640178e-07, + "loss": 0.5399911403656006, + "step": 4870 + }, + { + "epoch": 1.1231265851971408, + "grad_norm": 1.3209581029003303, + "learning_rate": 8.798126946673305e-07, + "loss": 0.3879680633544922, + "step": 4871 + }, + { + "epoch": 1.1233571593267235, + "grad_norm": 1.7893299917127135, + "learning_rate": 8.794342058410856e-07, + "loss": 0.4629073739051819, + "step": 4872 + }, + { + "epoch": 1.1235877334563062, + "grad_norm": 1.25180398717926, + "learning_rate": 8.790557345403013e-07, + "loss": 0.42299884557724, + "step": 4873 + }, + { + "epoch": 1.123818307585889, + "grad_norm": 1.5467146262725529, + "learning_rate": 8.786772808199912e-07, + "loss": 0.509437620639801, + "step": 4874 + }, + { + "epoch": 1.1240488817154715, + "grad_norm": 1.3436359029840506, + "learning_rate": 8.782988447351684e-07, + "loss": 0.4682687222957611, + "step": 4875 + }, + { + "epoch": 1.1242794558450542, + "grad_norm": 1.2884743737928093, + "learning_rate": 8.779204263408416e-07, + "loss": 0.41155606508255005, + "step": 4876 + }, + { + "epoch": 1.124510029974637, + "grad_norm": 1.6449136860944156, + "learning_rate": 8.775420256920182e-07, + "loss": 0.4705810844898224, + "step": 4877 + }, + { + "epoch": 1.1247406041042196, + "grad_norm": 1.4648471947605348, + "learning_rate": 8.771636428437022e-07, + "loss": 0.36571264266967773, + "step": 4878 + }, + { + "epoch": 1.124971178233802, + "grad_norm": 1.1768139651906544, + "learning_rate": 8.76785277850896e-07, + "loss": 0.36618396639823914, + "step": 4879 + }, + { + "epoch": 1.1252017523633848, + "grad_norm": 1.5334328638730685, + "learning_rate": 8.764069307685983e-07, + "loss": 0.4861210584640503, + "step": 4880 + }, + { + "epoch": 1.1254323264929675, + "grad_norm": 1.457839206264918, + "learning_rate": 8.760286016518056e-07, + "loss": 0.43346846103668213, + "step": 4881 + }, + { + "epoch": 1.12566290062255, + "grad_norm": 1.28421921022301, + "learning_rate": 8.756502905555123e-07, + "loss": 0.40088707208633423, + "step": 4882 + }, + { + "epoch": 1.1258934747521328, + "grad_norm": 1.4643062187844458, + "learning_rate": 8.752719975347092e-07, + "loss": 0.4088619649410248, + "step": 4883 + }, + { + "epoch": 1.1261240488817155, + "grad_norm": 1.5527291710325282, + "learning_rate": 8.748937226443857e-07, + "loss": 0.4988909661769867, + "step": 4884 + }, + { + "epoch": 1.1263546230112982, + "grad_norm": 1.5377239167998313, + "learning_rate": 8.745154659395271e-07, + "loss": 0.47022196650505066, + "step": 4885 + }, + { + "epoch": 1.126585197140881, + "grad_norm": 1.3259626220698026, + "learning_rate": 8.741372274751178e-07, + "loss": 0.45005398988723755, + "step": 4886 + }, + { + "epoch": 1.1268157712704634, + "grad_norm": 1.5001674672720546, + "learning_rate": 8.737590073061376e-07, + "loss": 0.4632537364959717, + "step": 4887 + }, + { + "epoch": 1.1270463454000461, + "grad_norm": 1.2983235840008036, + "learning_rate": 8.733808054875653e-07, + "loss": 0.41034963726997375, + "step": 4888 + }, + { + "epoch": 1.1272769195296288, + "grad_norm": 1.423352740140202, + "learning_rate": 8.730026220743765e-07, + "loss": 0.5169668793678284, + "step": 4889 + }, + { + "epoch": 1.1275074936592113, + "grad_norm": 1.46630659535839, + "learning_rate": 8.726244571215431e-07, + "loss": 0.44972485303878784, + "step": 4890 + }, + { + "epoch": 1.127738067788794, + "grad_norm": 1.5712937661942725, + "learning_rate": 8.722463106840361e-07, + "loss": 0.4854368567466736, + "step": 4891 + }, + { + "epoch": 1.1279686419183768, + "grad_norm": 1.0525840961962005, + "learning_rate": 8.718681828168223e-07, + "loss": 0.3029147982597351, + "step": 4892 + }, + { + "epoch": 1.1281992160479595, + "grad_norm": 1.5856241308624208, + "learning_rate": 8.714900735748671e-07, + "loss": 0.4770504832267761, + "step": 4893 + }, + { + "epoch": 1.1284297901775422, + "grad_norm": 1.3799690323722245, + "learning_rate": 8.711119830131317e-07, + "loss": 0.48508110642433167, + "step": 4894 + }, + { + "epoch": 1.1286603643071247, + "grad_norm": 1.4227656672873528, + "learning_rate": 8.707339111865761e-07, + "loss": 0.43302488327026367, + "step": 4895 + }, + { + "epoch": 1.1288909384367074, + "grad_norm": 1.3481652076868464, + "learning_rate": 8.703558581501563e-07, + "loss": 0.5720575451850891, + "step": 4896 + }, + { + "epoch": 1.1291215125662901, + "grad_norm": 1.1736572520471924, + "learning_rate": 8.69977823958827e-07, + "loss": 0.48236098885536194, + "step": 4897 + }, + { + "epoch": 1.1293520866958726, + "grad_norm": 1.6539784416028527, + "learning_rate": 8.69599808667538e-07, + "loss": 0.48531901836395264, + "step": 4898 + }, + { + "epoch": 1.1295826608254553, + "grad_norm": 1.390226643422974, + "learning_rate": 8.69221812331239e-07, + "loss": 0.4150174856185913, + "step": 4899 + }, + { + "epoch": 1.129813234955038, + "grad_norm": 1.4594360531114157, + "learning_rate": 8.688438350048748e-07, + "loss": 0.4729560911655426, + "step": 4900 + }, + { + "epoch": 1.1300438090846208, + "grad_norm": 1.5805161631694824, + "learning_rate": 8.684658767433881e-07, + "loss": 0.5081748962402344, + "step": 4901 + }, + { + "epoch": 1.1302743832142035, + "grad_norm": 1.3577399194161552, + "learning_rate": 8.680879376017197e-07, + "loss": 0.4552333354949951, + "step": 4902 + }, + { + "epoch": 1.130504957343786, + "grad_norm": 1.666206186626053, + "learning_rate": 8.67710017634806e-07, + "loss": 0.4784387946128845, + "step": 4903 + }, + { + "epoch": 1.1307355314733687, + "grad_norm": 1.7781011363806714, + "learning_rate": 8.673321168975823e-07, + "loss": 0.46922338008880615, + "step": 4904 + }, + { + "epoch": 1.1309661056029514, + "grad_norm": 1.414520843561148, + "learning_rate": 8.669542354449797e-07, + "loss": 0.38181525468826294, + "step": 4905 + }, + { + "epoch": 1.131196679732534, + "grad_norm": 1.409807627526861, + "learning_rate": 8.665763733319278e-07, + "loss": 0.4729689359664917, + "step": 4906 + }, + { + "epoch": 1.1314272538621166, + "grad_norm": 1.3128859029806206, + "learning_rate": 8.661985306133517e-07, + "loss": 0.3934294581413269, + "step": 4907 + }, + { + "epoch": 1.1316578279916993, + "grad_norm": 1.1525332387894895, + "learning_rate": 8.658207073441754e-07, + "loss": 0.40270352363586426, + "step": 4908 + }, + { + "epoch": 1.131888402121282, + "grad_norm": 1.245477282269021, + "learning_rate": 8.654429035793196e-07, + "loss": 0.43291163444519043, + "step": 4909 + }, + { + "epoch": 1.1321189762508648, + "grad_norm": 1.8011937733870678, + "learning_rate": 8.650651193737009e-07, + "loss": 0.5054877996444702, + "step": 4910 + }, + { + "epoch": 1.1323495503804473, + "grad_norm": 1.4188548576207016, + "learning_rate": 8.646873547822347e-07, + "loss": 0.5043776035308838, + "step": 4911 + }, + { + "epoch": 1.13258012451003, + "grad_norm": 1.511127988179462, + "learning_rate": 8.643096098598328e-07, + "loss": 0.4246225953102112, + "step": 4912 + }, + { + "epoch": 1.1328106986396127, + "grad_norm": 1.3198976342579845, + "learning_rate": 8.639318846614048e-07, + "loss": 0.4514849781990051, + "step": 4913 + }, + { + "epoch": 1.1330412727691952, + "grad_norm": 1.5409054507370947, + "learning_rate": 8.635541792418557e-07, + "loss": 0.4780477285385132, + "step": 4914 + }, + { + "epoch": 1.133271846898778, + "grad_norm": 1.4447509965410514, + "learning_rate": 8.631764936560899e-07, + "loss": 0.47164270281791687, + "step": 4915 + }, + { + "epoch": 1.1335024210283606, + "grad_norm": 1.4642572467177732, + "learning_rate": 8.62798827959007e-07, + "loss": 0.5462276339530945, + "step": 4916 + }, + { + "epoch": 1.1337329951579433, + "grad_norm": 1.3611348332418316, + "learning_rate": 8.624211822055055e-07, + "loss": 0.37229591608047485, + "step": 4917 + }, + { + "epoch": 1.133963569287526, + "grad_norm": 1.6004056206114348, + "learning_rate": 8.620435564504791e-07, + "loss": 0.46595901250839233, + "step": 4918 + }, + { + "epoch": 1.1341941434171086, + "grad_norm": 1.899603419019246, + "learning_rate": 8.616659507488201e-07, + "loss": 0.4645708203315735, + "step": 4919 + }, + { + "epoch": 1.1344247175466913, + "grad_norm": 1.3014565799840314, + "learning_rate": 8.612883651554173e-07, + "loss": 0.4309888482093811, + "step": 4920 + }, + { + "epoch": 1.134655291676274, + "grad_norm": 1.2254662174184374, + "learning_rate": 8.60910799725156e-07, + "loss": 0.4000548720359802, + "step": 4921 + }, + { + "epoch": 1.1348858658058565, + "grad_norm": 1.2990272231335294, + "learning_rate": 8.6053325451292e-07, + "loss": 0.41321274638175964, + "step": 4922 + }, + { + "epoch": 1.1351164399354392, + "grad_norm": 1.7479036509525407, + "learning_rate": 8.601557295735884e-07, + "loss": 0.38982951641082764, + "step": 4923 + }, + { + "epoch": 1.135347014065022, + "grad_norm": 1.3265126570648142, + "learning_rate": 8.597782249620394e-07, + "loss": 0.44623300433158875, + "step": 4924 + }, + { + "epoch": 1.1355775881946046, + "grad_norm": 1.6004563551212632, + "learning_rate": 8.594007407331458e-07, + "loss": 0.46876993775367737, + "step": 4925 + }, + { + "epoch": 1.1358081623241871, + "grad_norm": 1.4785026933128127, + "learning_rate": 8.590232769417803e-07, + "loss": 0.41345149278640747, + "step": 4926 + }, + { + "epoch": 1.1360387364537698, + "grad_norm": 1.6712340860086734, + "learning_rate": 8.586458336428095e-07, + "loss": 0.4199402332305908, + "step": 4927 + }, + { + "epoch": 1.1362693105833526, + "grad_norm": 1.5807454346525946, + "learning_rate": 8.582684108910998e-07, + "loss": 0.4424753785133362, + "step": 4928 + }, + { + "epoch": 1.1364998847129353, + "grad_norm": 1.5318763722061228, + "learning_rate": 8.57891008741513e-07, + "loss": 0.5066598057746887, + "step": 4929 + }, + { + "epoch": 1.1367304588425178, + "grad_norm": 1.409045447069904, + "learning_rate": 8.575136272489081e-07, + "loss": 0.45959407091140747, + "step": 4930 + }, + { + "epoch": 1.1369610329721005, + "grad_norm": 1.191773933725539, + "learning_rate": 8.571362664681415e-07, + "loss": 0.4579051733016968, + "step": 4931 + }, + { + "epoch": 1.1371916071016832, + "grad_norm": 1.4061203144708347, + "learning_rate": 8.567589264540665e-07, + "loss": 0.5125559568405151, + "step": 4932 + }, + { + "epoch": 1.137422181231266, + "grad_norm": 1.484125992313306, + "learning_rate": 8.563816072615335e-07, + "loss": 0.4236595630645752, + "step": 4933 + }, + { + "epoch": 1.1376527553608484, + "grad_norm": 1.3909472723060943, + "learning_rate": 8.56004308945389e-07, + "loss": 0.40187013149261475, + "step": 4934 + }, + { + "epoch": 1.1378833294904311, + "grad_norm": 1.7306785223672838, + "learning_rate": 8.556270315604778e-07, + "loss": 0.5069487690925598, + "step": 4935 + }, + { + "epoch": 1.1381139036200139, + "grad_norm": 1.2666499948179348, + "learning_rate": 8.552497751616406e-07, + "loss": 0.4032680094242096, + "step": 4936 + }, + { + "epoch": 1.1383444777495966, + "grad_norm": 1.5147949059405765, + "learning_rate": 8.548725398037158e-07, + "loss": 0.4745323061943054, + "step": 4937 + }, + { + "epoch": 1.138575051879179, + "grad_norm": 1.6025857024716508, + "learning_rate": 8.544953255415379e-07, + "loss": 0.5203470587730408, + "step": 4938 + }, + { + "epoch": 1.1388056260087618, + "grad_norm": 1.3018365690111693, + "learning_rate": 8.541181324299392e-07, + "loss": 0.3780457079410553, + "step": 4939 + }, + { + "epoch": 1.1390362001383445, + "grad_norm": 1.4908739703097478, + "learning_rate": 8.537409605237486e-07, + "loss": 0.4544069766998291, + "step": 4940 + }, + { + "epoch": 1.1392667742679272, + "grad_norm": 1.3726631913286653, + "learning_rate": 8.533638098777914e-07, + "loss": 0.3692469000816345, + "step": 4941 + }, + { + "epoch": 1.1394973483975097, + "grad_norm": 1.7461198015621147, + "learning_rate": 8.529866805468907e-07, + "loss": 0.4733508825302124, + "step": 4942 + }, + { + "epoch": 1.1397279225270924, + "grad_norm": 1.7055847796006547, + "learning_rate": 8.526095725858658e-07, + "loss": 0.5165152549743652, + "step": 4943 + }, + { + "epoch": 1.1399584966566751, + "grad_norm": 1.5781652989183093, + "learning_rate": 8.522324860495336e-07, + "loss": 0.40220290422439575, + "step": 4944 + }, + { + "epoch": 1.1401890707862579, + "grad_norm": 1.676524129553008, + "learning_rate": 8.518554209927066e-07, + "loss": 0.511976957321167, + "step": 4945 + }, + { + "epoch": 1.1404196449158404, + "grad_norm": 1.4578766238891505, + "learning_rate": 8.514783774701959e-07, + "loss": 0.4472247362136841, + "step": 4946 + }, + { + "epoch": 1.140650219045423, + "grad_norm": 1.3731717985494665, + "learning_rate": 8.51101355536808e-07, + "loss": 0.4368797242641449, + "step": 4947 + }, + { + "epoch": 1.1408807931750058, + "grad_norm": 1.3383514367818596, + "learning_rate": 8.507243552473476e-07, + "loss": 0.3794320225715637, + "step": 4948 + }, + { + "epoch": 1.1411113673045885, + "grad_norm": 1.7604514892248042, + "learning_rate": 8.50347376656615e-07, + "loss": 0.5229817628860474, + "step": 4949 + }, + { + "epoch": 1.141341941434171, + "grad_norm": 1.4803188344976619, + "learning_rate": 8.499704198194075e-07, + "loss": 0.4771326780319214, + "step": 4950 + }, + { + "epoch": 1.1415725155637537, + "grad_norm": 1.406078110966921, + "learning_rate": 8.495934847905201e-07, + "loss": 0.45151978731155396, + "step": 4951 + }, + { + "epoch": 1.1418030896933364, + "grad_norm": 1.3579359781108167, + "learning_rate": 8.492165716247439e-07, + "loss": 0.3963208496570587, + "step": 4952 + }, + { + "epoch": 1.1420336638229192, + "grad_norm": 1.2797227148111936, + "learning_rate": 8.488396803768675e-07, + "loss": 0.37465882301330566, + "step": 4953 + }, + { + "epoch": 1.1422642379525016, + "grad_norm": 1.7257432451816517, + "learning_rate": 8.484628111016752e-07, + "loss": 0.437372088432312, + "step": 4954 + }, + { + "epoch": 1.1424948120820844, + "grad_norm": 1.3198726990576308, + "learning_rate": 8.480859638539492e-07, + "loss": 0.40495651960372925, + "step": 4955 + }, + { + "epoch": 1.142725386211667, + "grad_norm": 1.5937176142563847, + "learning_rate": 8.477091386884677e-07, + "loss": 0.5346927642822266, + "step": 4956 + }, + { + "epoch": 1.1429559603412498, + "grad_norm": 1.7035083737998966, + "learning_rate": 8.473323356600068e-07, + "loss": 0.42448925971984863, + "step": 4957 + }, + { + "epoch": 1.1431865344708323, + "grad_norm": 1.4329878189218077, + "learning_rate": 8.469555548233378e-07, + "loss": 0.4715193808078766, + "step": 4958 + }, + { + "epoch": 1.143417108600415, + "grad_norm": 1.5249370547485697, + "learning_rate": 8.465787962332301e-07, + "loss": 0.4721440076828003, + "step": 4959 + }, + { + "epoch": 1.1436476827299977, + "grad_norm": 1.4963659204960478, + "learning_rate": 8.462020599444495e-07, + "loss": 0.5478333234786987, + "step": 4960 + }, + { + "epoch": 1.1438782568595804, + "grad_norm": 1.5534391969085817, + "learning_rate": 8.458253460117577e-07, + "loss": 0.4005582928657532, + "step": 4961 + }, + { + "epoch": 1.144108830989163, + "grad_norm": 1.4816205297794078, + "learning_rate": 8.454486544899146e-07, + "loss": 0.43886178731918335, + "step": 4962 + }, + { + "epoch": 1.1443394051187457, + "grad_norm": 1.2296294541393762, + "learning_rate": 8.450719854336758e-07, + "loss": 0.4404095709323883, + "step": 4963 + }, + { + "epoch": 1.1445699792483284, + "grad_norm": 1.5412493838775327, + "learning_rate": 8.446953388977943e-07, + "loss": 0.5386335849761963, + "step": 4964 + }, + { + "epoch": 1.144800553377911, + "grad_norm": 1.5969922474986569, + "learning_rate": 8.44318714937019e-07, + "loss": 0.4576258659362793, + "step": 4965 + }, + { + "epoch": 1.1450311275074936, + "grad_norm": 1.2968718824878773, + "learning_rate": 8.439421136060964e-07, + "loss": 0.4619024991989136, + "step": 4966 + }, + { + "epoch": 1.1452617016370763, + "grad_norm": 1.4106895392209726, + "learning_rate": 8.435655349597689e-07, + "loss": 0.4071081876754761, + "step": 4967 + }, + { + "epoch": 1.145492275766659, + "grad_norm": 1.3534750631649812, + "learning_rate": 8.431889790527769e-07, + "loss": 0.4605948328971863, + "step": 4968 + }, + { + "epoch": 1.1457228498962417, + "grad_norm": 1.4715761177473734, + "learning_rate": 8.428124459398554e-07, + "loss": 0.46706438064575195, + "step": 4969 + }, + { + "epoch": 1.1459534240258242, + "grad_norm": 1.480784825415981, + "learning_rate": 8.424359356757383e-07, + "loss": 0.39674657583236694, + "step": 4970 + }, + { + "epoch": 1.146183998155407, + "grad_norm": 1.4606371633345823, + "learning_rate": 8.42059448315155e-07, + "loss": 0.4421246647834778, + "step": 4971 + }, + { + "epoch": 1.1464145722849897, + "grad_norm": 1.6921922922853865, + "learning_rate": 8.416829839128312e-07, + "loss": 0.5220682621002197, + "step": 4972 + }, + { + "epoch": 1.1466451464145724, + "grad_norm": 1.338254387958773, + "learning_rate": 8.413065425234904e-07, + "loss": 0.40189129114151, + "step": 4973 + }, + { + "epoch": 1.1468757205441549, + "grad_norm": 1.3011913252808138, + "learning_rate": 8.409301242018517e-07, + "loss": 0.448421835899353, + "step": 4974 + }, + { + "epoch": 1.1471062946737376, + "grad_norm": 1.5996651322296722, + "learning_rate": 8.405537290026318e-07, + "loss": 0.49476757645606995, + "step": 4975 + }, + { + "epoch": 1.1473368688033203, + "grad_norm": 1.4573872381246367, + "learning_rate": 8.401773569805431e-07, + "loss": 0.3888528347015381, + "step": 4976 + }, + { + "epoch": 1.1475674429329028, + "grad_norm": 1.4760563096119323, + "learning_rate": 8.398010081902956e-07, + "loss": 0.49057653546333313, + "step": 4977 + }, + { + "epoch": 1.1477980170624855, + "grad_norm": 1.3851559333900214, + "learning_rate": 8.39424682686595e-07, + "loss": 0.41700610518455505, + "step": 4978 + }, + { + "epoch": 1.1480285911920682, + "grad_norm": 1.5382531029836037, + "learning_rate": 8.390483805241441e-07, + "loss": 0.4801902770996094, + "step": 4979 + }, + { + "epoch": 1.148259165321651, + "grad_norm": 1.5691797878096674, + "learning_rate": 8.386721017576426e-07, + "loss": 0.5438926219940186, + "step": 4980 + }, + { + "epoch": 1.1484897394512337, + "grad_norm": 1.3886510011393631, + "learning_rate": 8.382958464417857e-07, + "loss": 0.3991735577583313, + "step": 4981 + }, + { + "epoch": 1.1487203135808162, + "grad_norm": 1.5064271527131172, + "learning_rate": 8.379196146312664e-07, + "loss": 0.4918370246887207, + "step": 4982 + }, + { + "epoch": 1.1489508877103989, + "grad_norm": 1.713149481922198, + "learning_rate": 8.375434063807737e-07, + "loss": 0.5280467867851257, + "step": 4983 + }, + { + "epoch": 1.1491814618399816, + "grad_norm": 1.2990876069782782, + "learning_rate": 8.371672217449936e-07, + "loss": 0.4186179041862488, + "step": 4984 + }, + { + "epoch": 1.149412035969564, + "grad_norm": 1.3742464834005608, + "learning_rate": 8.367910607786079e-07, + "loss": 0.3698224723339081, + "step": 4985 + }, + { + "epoch": 1.1496426100991468, + "grad_norm": 1.4766762383505605, + "learning_rate": 8.364149235362956e-07, + "loss": 0.45402267575263977, + "step": 4986 + }, + { + "epoch": 1.1498731842287295, + "grad_norm": 1.530758978566143, + "learning_rate": 8.36038810072732e-07, + "loss": 0.5145484209060669, + "step": 4987 + }, + { + "epoch": 1.1501037583583122, + "grad_norm": 1.2257671687651395, + "learning_rate": 8.356627204425893e-07, + "loss": 0.4293951392173767, + "step": 4988 + }, + { + "epoch": 1.150334332487895, + "grad_norm": 1.5415847348488914, + "learning_rate": 8.352866547005354e-07, + "loss": 0.3916272521018982, + "step": 4989 + }, + { + "epoch": 1.1505649066174775, + "grad_norm": 1.6777087516004896, + "learning_rate": 8.349106129012357e-07, + "loss": 0.40171611309051514, + "step": 4990 + }, + { + "epoch": 1.1507954807470602, + "grad_norm": 1.5767244212385862, + "learning_rate": 8.345345950993518e-07, + "loss": 0.49580252170562744, + "step": 4991 + }, + { + "epoch": 1.151026054876643, + "grad_norm": 1.491822308561489, + "learning_rate": 8.34158601349541e-07, + "loss": 0.4521256685256958, + "step": 4992 + }, + { + "epoch": 1.1512566290062254, + "grad_norm": 1.5317445246777317, + "learning_rate": 8.337826317064585e-07, + "loss": 0.3920813798904419, + "step": 4993 + }, + { + "epoch": 1.151487203135808, + "grad_norm": 1.4336055128806646, + "learning_rate": 8.334066862247547e-07, + "loss": 0.4263145923614502, + "step": 4994 + }, + { + "epoch": 1.1517177772653908, + "grad_norm": 1.513949850078891, + "learning_rate": 8.330307649590779e-07, + "loss": 0.4746140241622925, + "step": 4995 + }, + { + "epoch": 1.1519483513949735, + "grad_norm": 1.6708741885004843, + "learning_rate": 8.326548679640713e-07, + "loss": 0.37520158290863037, + "step": 4996 + }, + { + "epoch": 1.1521789255245563, + "grad_norm": 1.4060610690176367, + "learning_rate": 8.322789952943759e-07, + "loss": 0.4481951892375946, + "step": 4997 + }, + { + "epoch": 1.1524094996541387, + "grad_norm": 1.4336851088246751, + "learning_rate": 8.319031470046281e-07, + "loss": 0.40319859981536865, + "step": 4998 + }, + { + "epoch": 1.1526400737837215, + "grad_norm": 1.805948160607668, + "learning_rate": 8.315273231494615e-07, + "loss": 0.47720152139663696, + "step": 4999 + }, + { + "epoch": 1.1528706479133042, + "grad_norm": 1.2994404231083814, + "learning_rate": 8.311515237835063e-07, + "loss": 0.4027557969093323, + "step": 5000 + }, + { + "epoch": 1.1531012220428867, + "grad_norm": 1.5346692874582604, + "learning_rate": 8.307757489613878e-07, + "loss": 0.3939552307128906, + "step": 5001 + }, + { + "epoch": 1.1533317961724694, + "grad_norm": 1.541801101637957, + "learning_rate": 8.303999987377295e-07, + "loss": 0.379425585269928, + "step": 5002 + }, + { + "epoch": 1.153562370302052, + "grad_norm": 1.3222707927494204, + "learning_rate": 8.300242731671499e-07, + "loss": 0.46231499314308167, + "step": 5003 + }, + { + "epoch": 1.1537929444316348, + "grad_norm": 1.5623820882279815, + "learning_rate": 8.296485723042654e-07, + "loss": 0.4639621675014496, + "step": 5004 + }, + { + "epoch": 1.1540235185612175, + "grad_norm": 1.4577901713449948, + "learning_rate": 8.29272896203687e-07, + "loss": 0.49264025688171387, + "step": 5005 + }, + { + "epoch": 1.1542540926908, + "grad_norm": 1.2796677798690286, + "learning_rate": 8.288972449200233e-07, + "loss": 0.4145156145095825, + "step": 5006 + }, + { + "epoch": 1.1544846668203828, + "grad_norm": 1.3338594060824709, + "learning_rate": 8.285216185078792e-07, + "loss": 0.39693811535835266, + "step": 5007 + }, + { + "epoch": 1.1547152409499655, + "grad_norm": 1.356694069152444, + "learning_rate": 8.281460170218561e-07, + "loss": 0.46224820613861084, + "step": 5008 + }, + { + "epoch": 1.154945815079548, + "grad_norm": 1.5380330607680774, + "learning_rate": 8.277704405165506e-07, + "loss": 0.48868128657341003, + "step": 5009 + }, + { + "epoch": 1.1551763892091307, + "grad_norm": 1.4024811483349113, + "learning_rate": 8.273948890465574e-07, + "loss": 0.5127776265144348, + "step": 5010 + }, + { + "epoch": 1.1554069633387134, + "grad_norm": 1.4092381840768406, + "learning_rate": 8.270193626664665e-07, + "loss": 0.4039389491081238, + "step": 5011 + }, + { + "epoch": 1.1556375374682961, + "grad_norm": 1.5807780806971976, + "learning_rate": 8.266438614308641e-07, + "loss": 0.4224502444267273, + "step": 5012 + }, + { + "epoch": 1.1558681115978788, + "grad_norm": 1.42726619115002, + "learning_rate": 8.262683853943335e-07, + "loss": 0.4392918050289154, + "step": 5013 + }, + { + "epoch": 1.1560986857274613, + "grad_norm": 1.5001771531608157, + "learning_rate": 8.258929346114534e-07, + "loss": 0.5055289268493652, + "step": 5014 + }, + { + "epoch": 1.156329259857044, + "grad_norm": 1.3839083181087675, + "learning_rate": 8.255175091368003e-07, + "loss": 0.43851351737976074, + "step": 5015 + }, + { + "epoch": 1.1565598339866268, + "grad_norm": 1.576893376736649, + "learning_rate": 8.251421090249451e-07, + "loss": 0.4557814598083496, + "step": 5016 + }, + { + "epoch": 1.1567904081162093, + "grad_norm": 1.2994912796642604, + "learning_rate": 8.247667343304568e-07, + "loss": 0.4288882613182068, + "step": 5017 + }, + { + "epoch": 1.157020982245792, + "grad_norm": 1.4237104241903844, + "learning_rate": 8.243913851078994e-07, + "loss": 0.42711886763572693, + "step": 5018 + }, + { + "epoch": 1.1572515563753747, + "grad_norm": 1.8597293679946851, + "learning_rate": 8.240160614118342e-07, + "loss": 0.515809953212738, + "step": 5019 + }, + { + "epoch": 1.1574821305049574, + "grad_norm": 1.828777504717114, + "learning_rate": 8.236407632968182e-07, + "loss": 0.5754632949829102, + "step": 5020 + }, + { + "epoch": 1.1577127046345401, + "grad_norm": 1.553176542229762, + "learning_rate": 8.232654908174038e-07, + "loss": 0.4601830244064331, + "step": 5021 + }, + { + "epoch": 1.1579432787641226, + "grad_norm": 1.500802040492981, + "learning_rate": 8.228902440281422e-07, + "loss": 0.4740797281265259, + "step": 5022 + }, + { + "epoch": 1.1581738528937053, + "grad_norm": 1.688304974088827, + "learning_rate": 8.225150229835781e-07, + "loss": 0.4066367745399475, + "step": 5023 + }, + { + "epoch": 1.158404427023288, + "grad_norm": 1.357187761009418, + "learning_rate": 8.221398277382546e-07, + "loss": 0.4664362668991089, + "step": 5024 + }, + { + "epoch": 1.1586350011528705, + "grad_norm": 1.3912425171719864, + "learning_rate": 8.217646583467093e-07, + "loss": 0.5204637050628662, + "step": 5025 + }, + { + "epoch": 1.1588655752824533, + "grad_norm": 1.4227227145637968, + "learning_rate": 8.213895148634775e-07, + "loss": 0.4991419017314911, + "step": 5026 + }, + { + "epoch": 1.159096149412036, + "grad_norm": 1.2844880437163813, + "learning_rate": 8.210143973430896e-07, + "loss": 0.40420424938201904, + "step": 5027 + }, + { + "epoch": 1.1593267235416187, + "grad_norm": 1.4946107412544847, + "learning_rate": 8.206393058400736e-07, + "loss": 0.523331880569458, + "step": 5028 + }, + { + "epoch": 1.1595572976712014, + "grad_norm": 1.4908780499938201, + "learning_rate": 8.202642404089516e-07, + "loss": 0.5019216537475586, + "step": 5029 + }, + { + "epoch": 1.159787871800784, + "grad_norm": 1.6451488656605473, + "learning_rate": 8.198892011042442e-07, + "loss": 0.522672712802887, + "step": 5030 + }, + { + "epoch": 1.1600184459303666, + "grad_norm": 1.505727418733034, + "learning_rate": 8.195141879804668e-07, + "loss": 0.418377548456192, + "step": 5031 + }, + { + "epoch": 1.1602490200599493, + "grad_norm": 1.5635210393713965, + "learning_rate": 8.191392010921312e-07, + "loss": 0.4914432764053345, + "step": 5032 + }, + { + "epoch": 1.1604795941895318, + "grad_norm": 1.3929576184838368, + "learning_rate": 8.187642404937459e-07, + "loss": 0.42149683833122253, + "step": 5033 + }, + { + "epoch": 1.1607101683191146, + "grad_norm": 1.6811040317548793, + "learning_rate": 8.183893062398145e-07, + "loss": 0.5637058019638062, + "step": 5034 + }, + { + "epoch": 1.1609407424486973, + "grad_norm": 1.2252559322458123, + "learning_rate": 8.180143983848387e-07, + "loss": 0.49930211901664734, + "step": 5035 + }, + { + "epoch": 1.16117131657828, + "grad_norm": 1.626369547940987, + "learning_rate": 8.176395169833139e-07, + "loss": 0.4217071235179901, + "step": 5036 + }, + { + "epoch": 1.1614018907078625, + "grad_norm": 1.9654976691842632, + "learning_rate": 8.172646620897336e-07, + "loss": 0.4208733141422272, + "step": 5037 + }, + { + "epoch": 1.1616324648374452, + "grad_norm": 1.434216808832, + "learning_rate": 8.168898337585866e-07, + "loss": 0.42970529198646545, + "step": 5038 + }, + { + "epoch": 1.161863038967028, + "grad_norm": 1.429859410744686, + "learning_rate": 8.165150320443584e-07, + "loss": 0.49482622742652893, + "step": 5039 + }, + { + "epoch": 1.1620936130966106, + "grad_norm": 1.2888747437309156, + "learning_rate": 8.161402570015297e-07, + "loss": 0.4106384217739105, + "step": 5040 + }, + { + "epoch": 1.1623241872261931, + "grad_norm": 1.8632515092828725, + "learning_rate": 8.157655086845778e-07, + "loss": 0.4550397992134094, + "step": 5041 + }, + { + "epoch": 1.1625547613557758, + "grad_norm": 1.4636128502892785, + "learning_rate": 8.153907871479768e-07, + "loss": 0.5144504308700562, + "step": 5042 + }, + { + "epoch": 1.1627853354853586, + "grad_norm": 1.4308354935014596, + "learning_rate": 8.150160924461953e-07, + "loss": 0.3970009684562683, + "step": 5043 + }, + { + "epoch": 1.1630159096149413, + "grad_norm": 1.4674063038688332, + "learning_rate": 8.146414246336998e-07, + "loss": 0.45825856924057007, + "step": 5044 + }, + { + "epoch": 1.1632464837445238, + "grad_norm": 1.6850972190756333, + "learning_rate": 8.142667837649515e-07, + "loss": 0.4515247344970703, + "step": 5045 + }, + { + "epoch": 1.1634770578741065, + "grad_norm": 1.347770803032681, + "learning_rate": 8.13892169894409e-07, + "loss": 0.41265833377838135, + "step": 5046 + }, + { + "epoch": 1.1637076320036892, + "grad_norm": 1.4117996459358377, + "learning_rate": 8.135175830765254e-07, + "loss": 0.39820557832717896, + "step": 5047 + }, + { + "epoch": 1.163938206133272, + "grad_norm": 1.4272016239744356, + "learning_rate": 8.131430233657514e-07, + "loss": 0.41528987884521484, + "step": 5048 + }, + { + "epoch": 1.1641687802628544, + "grad_norm": 1.3404996701874776, + "learning_rate": 8.127684908165323e-07, + "loss": 0.4453636407852173, + "step": 5049 + }, + { + "epoch": 1.1643993543924371, + "grad_norm": 1.846029547761043, + "learning_rate": 8.123939854833107e-07, + "loss": 0.45008519291877747, + "step": 5050 + }, + { + "epoch": 1.1646299285220199, + "grad_norm": 1.7254544812081525, + "learning_rate": 8.120195074205249e-07, + "loss": 0.456550657749176, + "step": 5051 + }, + { + "epoch": 1.1648605026516026, + "grad_norm": 1.4455041595835194, + "learning_rate": 8.116450566826086e-07, + "loss": 0.44465887546539307, + "step": 5052 + }, + { + "epoch": 1.165091076781185, + "grad_norm": 1.4606872040412728, + "learning_rate": 8.112706333239923e-07, + "loss": 0.4769172668457031, + "step": 5053 + }, + { + "epoch": 1.1653216509107678, + "grad_norm": 1.5800176181940382, + "learning_rate": 8.108962373991019e-07, + "loss": 0.42662739753723145, + "step": 5054 + }, + { + "epoch": 1.1655522250403505, + "grad_norm": 1.533727299161298, + "learning_rate": 8.105218689623603e-07, + "loss": 0.4923250079154968, + "step": 5055 + }, + { + "epoch": 1.1657827991699332, + "grad_norm": 1.5783599756682145, + "learning_rate": 8.10147528068185e-07, + "loss": 0.42462587356567383, + "step": 5056 + }, + { + "epoch": 1.1660133732995157, + "grad_norm": 1.3458818448335859, + "learning_rate": 8.097732147709908e-07, + "loss": 0.47610223293304443, + "step": 5057 + }, + { + "epoch": 1.1662439474290984, + "grad_norm": 1.6207397386125497, + "learning_rate": 8.093989291251875e-07, + "loss": 0.47519630193710327, + "step": 5058 + }, + { + "epoch": 1.1664745215586811, + "grad_norm": 1.3901575117179885, + "learning_rate": 8.090246711851819e-07, + "loss": 0.38865840435028076, + "step": 5059 + }, + { + "epoch": 1.1667050956882639, + "grad_norm": 1.271312682478528, + "learning_rate": 8.086504410053757e-07, + "loss": 0.39990776777267456, + "step": 5060 + }, + { + "epoch": 1.1669356698178464, + "grad_norm": 1.4665951386644982, + "learning_rate": 8.082762386401669e-07, + "loss": 0.4330836534500122, + "step": 5061 + }, + { + "epoch": 1.167166243947429, + "grad_norm": 1.286707043518209, + "learning_rate": 8.079020641439504e-07, + "loss": 0.4285934865474701, + "step": 5062 + }, + { + "epoch": 1.1673968180770118, + "grad_norm": 1.7499199825760443, + "learning_rate": 8.075279175711152e-07, + "loss": 0.3900645077228546, + "step": 5063 + }, + { + "epoch": 1.1676273922065945, + "grad_norm": 1.3606445329404238, + "learning_rate": 8.07153798976048e-07, + "loss": 0.48145759105682373, + "step": 5064 + }, + { + "epoch": 1.167857966336177, + "grad_norm": 1.7592322949259351, + "learning_rate": 8.067797084131305e-07, + "loss": 0.4239045977592468, + "step": 5065 + }, + { + "epoch": 1.1680885404657597, + "grad_norm": 1.7501505795878665, + "learning_rate": 8.064056459367409e-07, + "loss": 0.55517578125, + "step": 5066 + }, + { + "epoch": 1.1683191145953424, + "grad_norm": 1.588400616006081, + "learning_rate": 8.060316116012524e-07, + "loss": 0.4956046938896179, + "step": 5067 + }, + { + "epoch": 1.1685496887249252, + "grad_norm": 1.3607022789051413, + "learning_rate": 8.05657605461035e-07, + "loss": 0.4051878750324249, + "step": 5068 + }, + { + "epoch": 1.1687802628545076, + "grad_norm": 1.6471264462607456, + "learning_rate": 8.052836275704541e-07, + "loss": 0.47389912605285645, + "step": 5069 + }, + { + "epoch": 1.1690108369840904, + "grad_norm": 1.3462872241997197, + "learning_rate": 8.049096779838717e-07, + "loss": 0.5023842453956604, + "step": 5070 + }, + { + "epoch": 1.169241411113673, + "grad_norm": 1.3943514778037218, + "learning_rate": 8.045357567556449e-07, + "loss": 0.4895137548446655, + "step": 5071 + }, + { + "epoch": 1.1694719852432558, + "grad_norm": 1.5328176046123796, + "learning_rate": 8.041618639401264e-07, + "loss": 0.47874224185943604, + "step": 5072 + }, + { + "epoch": 1.1697025593728383, + "grad_norm": 1.4666773972258982, + "learning_rate": 8.037879995916659e-07, + "loss": 0.4784395694732666, + "step": 5073 + }, + { + "epoch": 1.169933133502421, + "grad_norm": 1.4433652991816976, + "learning_rate": 8.034141637646079e-07, + "loss": 0.45289772748947144, + "step": 5074 + }, + { + "epoch": 1.1701637076320037, + "grad_norm": 1.931933746015264, + "learning_rate": 8.030403565132942e-07, + "loss": 0.5375204682350159, + "step": 5075 + }, + { + "epoch": 1.1703942817615864, + "grad_norm": 1.4956339972756536, + "learning_rate": 8.026665778920602e-07, + "loss": 0.45003899931907654, + "step": 5076 + }, + { + "epoch": 1.170624855891169, + "grad_norm": 1.348037979358877, + "learning_rate": 8.022928279552392e-07, + "loss": 0.4236389994621277, + "step": 5077 + }, + { + "epoch": 1.1708554300207517, + "grad_norm": 1.3333943245649609, + "learning_rate": 8.019191067571592e-07, + "loss": 0.43182557821273804, + "step": 5078 + }, + { + "epoch": 1.1710860041503344, + "grad_norm": 1.7521692166476222, + "learning_rate": 8.01545414352145e-07, + "loss": 0.5171953439712524, + "step": 5079 + }, + { + "epoch": 1.171316578279917, + "grad_norm": 1.5319548219026522, + "learning_rate": 8.011717507945157e-07, + "loss": 0.5084770321846008, + "step": 5080 + }, + { + "epoch": 1.1715471524094996, + "grad_norm": 1.6342595542262888, + "learning_rate": 8.007981161385876e-07, + "loss": 0.4685532748699188, + "step": 5081 + }, + { + "epoch": 1.1717777265390823, + "grad_norm": 1.5086552244362486, + "learning_rate": 8.004245104386724e-07, + "loss": 0.4647448658943176, + "step": 5082 + }, + { + "epoch": 1.172008300668665, + "grad_norm": 1.4914913702780284, + "learning_rate": 8.000509337490768e-07, + "loss": 0.4038098454475403, + "step": 5083 + }, + { + "epoch": 1.1722388747982477, + "grad_norm": 1.435384500623052, + "learning_rate": 7.996773861241047e-07, + "loss": 0.4153759479522705, + "step": 5084 + }, + { + "epoch": 1.1724694489278302, + "grad_norm": 1.5573715225755111, + "learning_rate": 7.993038676180545e-07, + "loss": 0.4569447636604309, + "step": 5085 + }, + { + "epoch": 1.172700023057413, + "grad_norm": 1.4307958679817, + "learning_rate": 7.989303782852215e-07, + "loss": 0.4419426918029785, + "step": 5086 + }, + { + "epoch": 1.1729305971869957, + "grad_norm": 1.4177391878017933, + "learning_rate": 7.985569181798955e-07, + "loss": 0.3902894854545593, + "step": 5087 + }, + { + "epoch": 1.1731611713165782, + "grad_norm": 1.3935681641299988, + "learning_rate": 7.981834873563631e-07, + "loss": 0.4066358208656311, + "step": 5088 + }, + { + "epoch": 1.1733917454461609, + "grad_norm": 1.579270038843054, + "learning_rate": 7.978100858689059e-07, + "loss": 0.4589639902114868, + "step": 5089 + }, + { + "epoch": 1.1736223195757436, + "grad_norm": 1.5868805646941586, + "learning_rate": 7.974367137718024e-07, + "loss": 0.4431188106536865, + "step": 5090 + }, + { + "epoch": 1.1738528937053263, + "grad_norm": 1.3420666663317198, + "learning_rate": 7.970633711193252e-07, + "loss": 0.43412742018699646, + "step": 5091 + }, + { + "epoch": 1.174083467834909, + "grad_norm": 1.360898150528172, + "learning_rate": 7.966900579657435e-07, + "loss": 0.40296387672424316, + "step": 5092 + }, + { + "epoch": 1.1743140419644915, + "grad_norm": 1.4702894316239854, + "learning_rate": 7.963167743653228e-07, + "loss": 0.4814741611480713, + "step": 5093 + }, + { + "epoch": 1.1745446160940742, + "grad_norm": 1.7678935112109417, + "learning_rate": 7.959435203723228e-07, + "loss": 0.4412423372268677, + "step": 5094 + }, + { + "epoch": 1.174775190223657, + "grad_norm": 1.698823813376211, + "learning_rate": 7.955702960410006e-07, + "loss": 0.49773266911506653, + "step": 5095 + }, + { + "epoch": 1.1750057643532394, + "grad_norm": 1.445996901779518, + "learning_rate": 7.951971014256073e-07, + "loss": 0.4657529592514038, + "step": 5096 + }, + { + "epoch": 1.1752363384828222, + "grad_norm": 1.4844953949134, + "learning_rate": 7.94823936580391e-07, + "loss": 0.4062782824039459, + "step": 5097 + }, + { + "epoch": 1.1754669126124049, + "grad_norm": 1.3280643963390701, + "learning_rate": 7.944508015595948e-07, + "loss": 0.4154980182647705, + "step": 5098 + }, + { + "epoch": 1.1756974867419876, + "grad_norm": 1.3235405382692107, + "learning_rate": 7.940776964174582e-07, + "loss": 0.4724680185317993, + "step": 5099 + }, + { + "epoch": 1.1759280608715703, + "grad_norm": 1.4212228031547876, + "learning_rate": 7.937046212082149e-07, + "loss": 0.48808538913726807, + "step": 5100 + }, + { + "epoch": 1.1761586350011528, + "grad_norm": 1.3949555418133748, + "learning_rate": 7.933315759860959e-07, + "loss": 0.4985845983028412, + "step": 5101 + }, + { + "epoch": 1.1763892091307355, + "grad_norm": 1.2192149824969183, + "learning_rate": 7.92958560805327e-07, + "loss": 0.3735587000846863, + "step": 5102 + }, + { + "epoch": 1.1766197832603182, + "grad_norm": 1.3793872147262238, + "learning_rate": 7.925855757201294e-07, + "loss": 0.4198414385318756, + "step": 5103 + }, + { + "epoch": 1.1768503573899007, + "grad_norm": 1.7231390796467927, + "learning_rate": 7.922126207847204e-07, + "loss": 0.41973787546157837, + "step": 5104 + }, + { + "epoch": 1.1770809315194835, + "grad_norm": 1.8258365265115961, + "learning_rate": 7.918396960533128e-07, + "loss": 0.5179545283317566, + "step": 5105 + }, + { + "epoch": 1.1773115056490662, + "grad_norm": 1.5757377934881964, + "learning_rate": 7.914668015801153e-07, + "loss": 0.4917227625846863, + "step": 5106 + }, + { + "epoch": 1.1775420797786489, + "grad_norm": 1.5132865673859617, + "learning_rate": 7.910939374193312e-07, + "loss": 0.41775548458099365, + "step": 5107 + }, + { + "epoch": 1.1777726539082316, + "grad_norm": 1.484971286444874, + "learning_rate": 7.907211036251608e-07, + "loss": 0.45468997955322266, + "step": 5108 + }, + { + "epoch": 1.178003228037814, + "grad_norm": 1.292166499414124, + "learning_rate": 7.903483002517988e-07, + "loss": 0.3749620318412781, + "step": 5109 + }, + { + "epoch": 1.1782338021673968, + "grad_norm": 1.3945828421286317, + "learning_rate": 7.899755273534365e-07, + "loss": 0.48940956592559814, + "step": 5110 + }, + { + "epoch": 1.1784643762969795, + "grad_norm": 1.3575927994558319, + "learning_rate": 7.896027849842594e-07, + "loss": 0.4561386704444885, + "step": 5111 + }, + { + "epoch": 1.178694950426562, + "grad_norm": 1.4968176209501343, + "learning_rate": 7.892300731984498e-07, + "loss": 0.441898375749588, + "step": 5112 + }, + { + "epoch": 1.1789255245561447, + "grad_norm": 1.7617220832230103, + "learning_rate": 7.888573920501856e-07, + "loss": 0.43445056676864624, + "step": 5113 + }, + { + "epoch": 1.1791560986857275, + "grad_norm": 1.4680500200302005, + "learning_rate": 7.884847415936389e-07, + "loss": 0.42653167247772217, + "step": 5114 + }, + { + "epoch": 1.1793866728153102, + "grad_norm": 1.3867120793190437, + "learning_rate": 7.881121218829787e-07, + "loss": 0.42003321647644043, + "step": 5115 + }, + { + "epoch": 1.179617246944893, + "grad_norm": 1.613544333660259, + "learning_rate": 7.87739532972369e-07, + "loss": 0.4920128881931305, + "step": 5116 + }, + { + "epoch": 1.1798478210744754, + "grad_norm": 1.430783098871577, + "learning_rate": 7.873669749159697e-07, + "loss": 0.49529707431793213, + "step": 5117 + }, + { + "epoch": 1.180078395204058, + "grad_norm": 1.4915607575501106, + "learning_rate": 7.869944477679351e-07, + "loss": 0.4813005328178406, + "step": 5118 + }, + { + "epoch": 1.1803089693336408, + "grad_norm": 1.4923304237688, + "learning_rate": 7.866219515824168e-07, + "loss": 0.47239556908607483, + "step": 5119 + }, + { + "epoch": 1.1805395434632233, + "grad_norm": 1.7203098580351979, + "learning_rate": 7.862494864135596e-07, + "loss": 0.4808405935764313, + "step": 5120 + }, + { + "epoch": 1.180770117592806, + "grad_norm": 1.5206410201181635, + "learning_rate": 7.858770523155066e-07, + "loss": 0.44946521520614624, + "step": 5121 + }, + { + "epoch": 1.1810006917223888, + "grad_norm": 1.8958199353441048, + "learning_rate": 7.85504649342394e-07, + "loss": 0.5344874858856201, + "step": 5122 + }, + { + "epoch": 1.1812312658519715, + "grad_norm": 1.729692211161555, + "learning_rate": 7.851322775483542e-07, + "loss": 0.49354079365730286, + "step": 5123 + }, + { + "epoch": 1.1814618399815542, + "grad_norm": 1.6407900723292905, + "learning_rate": 7.847599369875155e-07, + "loss": 0.414085328578949, + "step": 5124 + }, + { + "epoch": 1.1816924141111367, + "grad_norm": 1.51838750003237, + "learning_rate": 7.843876277140013e-07, + "loss": 0.4638150632381439, + "step": 5125 + }, + { + "epoch": 1.1819229882407194, + "grad_norm": 1.5309477954820934, + "learning_rate": 7.84015349781931e-07, + "loss": 0.39239877462387085, + "step": 5126 + }, + { + "epoch": 1.1821535623703021, + "grad_norm": 1.456140160914471, + "learning_rate": 7.83643103245418e-07, + "loss": 0.46846455335617065, + "step": 5127 + }, + { + "epoch": 1.1823841364998846, + "grad_norm": 1.7368044200229882, + "learning_rate": 7.832708881585729e-07, + "loss": 0.5257229804992676, + "step": 5128 + }, + { + "epoch": 1.1826147106294673, + "grad_norm": 1.246852967804398, + "learning_rate": 7.828987045755006e-07, + "loss": 0.3858698904514313, + "step": 5129 + }, + { + "epoch": 1.18284528475905, + "grad_norm": 1.526790126487461, + "learning_rate": 7.82526552550302e-07, + "loss": 0.48664575815200806, + "step": 5130 + }, + { + "epoch": 1.1830758588886328, + "grad_norm": 1.4370667079865387, + "learning_rate": 7.821544321370731e-07, + "loss": 0.5246836543083191, + "step": 5131 + }, + { + "epoch": 1.1833064330182155, + "grad_norm": 1.6695741670894575, + "learning_rate": 7.817823433899049e-07, + "loss": 0.5538516640663147, + "step": 5132 + }, + { + "epoch": 1.183537007147798, + "grad_norm": 1.5154692060299837, + "learning_rate": 7.814102863628852e-07, + "loss": 0.4563618302345276, + "step": 5133 + }, + { + "epoch": 1.1837675812773807, + "grad_norm": 1.6013623117191365, + "learning_rate": 7.810382611100952e-07, + "loss": 0.48093757033348083, + "step": 5134 + }, + { + "epoch": 1.1839981554069634, + "grad_norm": 1.4079128694512013, + "learning_rate": 7.806662676856133e-07, + "loss": 0.41152772307395935, + "step": 5135 + }, + { + "epoch": 1.184228729536546, + "grad_norm": 1.470828934761741, + "learning_rate": 7.802943061435121e-07, + "loss": 0.4429926574230194, + "step": 5136 + }, + { + "epoch": 1.1844593036661286, + "grad_norm": 1.6844871985058756, + "learning_rate": 7.799223765378604e-07, + "loss": 0.5795058012008667, + "step": 5137 + }, + { + "epoch": 1.1846898777957113, + "grad_norm": 1.3964078038325152, + "learning_rate": 7.795504789227214e-07, + "loss": 0.43219637870788574, + "step": 5138 + }, + { + "epoch": 1.184920451925294, + "grad_norm": 1.3120429368988666, + "learning_rate": 7.791786133521547e-07, + "loss": 0.472915917634964, + "step": 5139 + }, + { + "epoch": 1.1851510260548768, + "grad_norm": 1.8547533260703066, + "learning_rate": 7.788067798802144e-07, + "loss": 0.609251081943512, + "step": 5140 + }, + { + "epoch": 1.1853816001844593, + "grad_norm": 1.5647854614729606, + "learning_rate": 7.784349785609506e-07, + "loss": 0.5051882266998291, + "step": 5141 + }, + { + "epoch": 1.185612174314042, + "grad_norm": 1.8256847598733492, + "learning_rate": 7.780632094484081e-07, + "loss": 0.5062044858932495, + "step": 5142 + }, + { + "epoch": 1.1858427484436247, + "grad_norm": 1.6792228276022907, + "learning_rate": 7.77691472596627e-07, + "loss": 0.48717936873435974, + "step": 5143 + }, + { + "epoch": 1.1860733225732072, + "grad_norm": 1.4962691739334948, + "learning_rate": 7.773197680596439e-07, + "loss": 0.4755759537220001, + "step": 5144 + }, + { + "epoch": 1.18630389670279, + "grad_norm": 1.5701944534084074, + "learning_rate": 7.769480958914889e-07, + "loss": 0.4549487829208374, + "step": 5145 + }, + { + "epoch": 1.1865344708323726, + "grad_norm": 1.3416043214582947, + "learning_rate": 7.765764561461891e-07, + "loss": 0.39759546518325806, + "step": 5146 + }, + { + "epoch": 1.1867650449619553, + "grad_norm": 1.7321999626139561, + "learning_rate": 7.762048488777654e-07, + "loss": 0.5151915550231934, + "step": 5147 + }, + { + "epoch": 1.1869956190915378, + "grad_norm": 1.739537041268416, + "learning_rate": 7.758332741402351e-07, + "loss": 0.4555166959762573, + "step": 5148 + }, + { + "epoch": 1.1872261932211206, + "grad_norm": 1.246823148309275, + "learning_rate": 7.754617319876102e-07, + "loss": 0.3639993667602539, + "step": 5149 + }, + { + "epoch": 1.1874567673507033, + "grad_norm": 1.4228626603425891, + "learning_rate": 7.750902224738984e-07, + "loss": 0.4158916473388672, + "step": 5150 + }, + { + "epoch": 1.187687341480286, + "grad_norm": 1.5159845507016538, + "learning_rate": 7.747187456531021e-07, + "loss": 0.44933754205703735, + "step": 5151 + }, + { + "epoch": 1.1879179156098685, + "grad_norm": 1.1574431418082898, + "learning_rate": 7.74347301579219e-07, + "loss": 0.35436397790908813, + "step": 5152 + }, + { + "epoch": 1.1881484897394512, + "grad_norm": 1.7559371420298944, + "learning_rate": 7.73975890306243e-07, + "loss": 0.40650928020477295, + "step": 5153 + }, + { + "epoch": 1.188379063869034, + "grad_norm": 1.655955114095899, + "learning_rate": 7.736045118881615e-07, + "loss": 0.424211710691452, + "step": 5154 + }, + { + "epoch": 1.1886096379986166, + "grad_norm": 1.386370427214692, + "learning_rate": 7.73233166378959e-07, + "loss": 0.38909512758255005, + "step": 5155 + }, + { + "epoch": 1.1888402121281991, + "grad_norm": 1.6273556393891413, + "learning_rate": 7.728618538326139e-07, + "loss": 0.4452083110809326, + "step": 5156 + }, + { + "epoch": 1.1890707862577818, + "grad_norm": 1.7325341862894768, + "learning_rate": 7.724905743031005e-07, + "loss": 0.45061540603637695, + "step": 5157 + }, + { + "epoch": 1.1893013603873646, + "grad_norm": 1.875195364158454, + "learning_rate": 7.721193278443875e-07, + "loss": 0.5301374197006226, + "step": 5158 + }, + { + "epoch": 1.1895319345169473, + "grad_norm": 1.32653936253781, + "learning_rate": 7.717481145104398e-07, + "loss": 0.4386521577835083, + "step": 5159 + }, + { + "epoch": 1.1897625086465298, + "grad_norm": 1.5893013583646332, + "learning_rate": 7.713769343552169e-07, + "loss": 0.447623074054718, + "step": 5160 + }, + { + "epoch": 1.1899930827761125, + "grad_norm": 1.4757184491338362, + "learning_rate": 7.71005787432674e-07, + "loss": 0.44326454401016235, + "step": 5161 + }, + { + "epoch": 1.1902236569056952, + "grad_norm": 1.4868394681814385, + "learning_rate": 7.706346737967603e-07, + "loss": 0.564007043838501, + "step": 5162 + }, + { + "epoch": 1.190454231035278, + "grad_norm": 1.4497565739191507, + "learning_rate": 7.702635935014213e-07, + "loss": 0.5338540077209473, + "step": 5163 + }, + { + "epoch": 1.1906848051648604, + "grad_norm": 1.5430964424900424, + "learning_rate": 7.698925466005977e-07, + "loss": 0.45307862758636475, + "step": 5164 + }, + { + "epoch": 1.1909153792944431, + "grad_norm": 1.4703583168080245, + "learning_rate": 7.69521533148224e-07, + "loss": 0.5383142232894897, + "step": 5165 + }, + { + "epoch": 1.1911459534240258, + "grad_norm": 1.46357622305891, + "learning_rate": 7.691505531982316e-07, + "loss": 0.3794770836830139, + "step": 5166 + }, + { + "epoch": 1.1913765275536086, + "grad_norm": 1.73725405615964, + "learning_rate": 7.687796068045455e-07, + "loss": 0.4633198082447052, + "step": 5167 + }, + { + "epoch": 1.191607101683191, + "grad_norm": 1.4824242158713679, + "learning_rate": 7.684086940210875e-07, + "loss": 0.5080294609069824, + "step": 5168 + }, + { + "epoch": 1.1918376758127738, + "grad_norm": 1.4742940614632714, + "learning_rate": 7.680378149017724e-07, + "loss": 0.3952289819717407, + "step": 5169 + }, + { + "epoch": 1.1920682499423565, + "grad_norm": 1.6284523488523228, + "learning_rate": 7.676669695005122e-07, + "loss": 0.4518551528453827, + "step": 5170 + }, + { + "epoch": 1.1922988240719392, + "grad_norm": 1.3915500318606786, + "learning_rate": 7.672961578712125e-07, + "loss": 0.4752943515777588, + "step": 5171 + }, + { + "epoch": 1.1925293982015217, + "grad_norm": 1.4424968675316805, + "learning_rate": 7.669253800677744e-07, + "loss": 0.5059680342674255, + "step": 5172 + }, + { + "epoch": 1.1927599723311044, + "grad_norm": 1.4513506332822887, + "learning_rate": 7.665546361440949e-07, + "loss": 0.47073960304260254, + "step": 5173 + }, + { + "epoch": 1.1929905464606871, + "grad_norm": 1.6974826094600077, + "learning_rate": 7.661839261540644e-07, + "loss": 0.5851496458053589, + "step": 5174 + }, + { + "epoch": 1.1932211205902699, + "grad_norm": 1.4255244135326766, + "learning_rate": 7.658132501515701e-07, + "loss": 0.44255387783050537, + "step": 5175 + }, + { + "epoch": 1.1934516947198524, + "grad_norm": 1.7360033352973823, + "learning_rate": 7.654426081904931e-07, + "loss": 0.543785810470581, + "step": 5176 + }, + { + "epoch": 1.193682268849435, + "grad_norm": 1.697289945139709, + "learning_rate": 7.650720003247107e-07, + "loss": 0.503501296043396, + "step": 5177 + }, + { + "epoch": 1.1939128429790178, + "grad_norm": 1.6448034142146566, + "learning_rate": 7.647014266080935e-07, + "loss": 0.43894368410110474, + "step": 5178 + }, + { + "epoch": 1.1941434171086005, + "grad_norm": 1.9780925681836061, + "learning_rate": 7.643308870945088e-07, + "loss": 0.5014036297798157, + "step": 5179 + }, + { + "epoch": 1.194373991238183, + "grad_norm": 1.3813934145743847, + "learning_rate": 7.639603818378178e-07, + "loss": 0.4859309196472168, + "step": 5180 + }, + { + "epoch": 1.1946045653677657, + "grad_norm": 1.611175852060371, + "learning_rate": 7.635899108918781e-07, + "loss": 0.40631920099258423, + "step": 5181 + }, + { + "epoch": 1.1948351394973484, + "grad_norm": 1.923584573200039, + "learning_rate": 7.632194743105405e-07, + "loss": 0.5206565856933594, + "step": 5182 + }, + { + "epoch": 1.1950657136269311, + "grad_norm": 1.659582338573284, + "learning_rate": 7.628490721476517e-07, + "loss": 0.5052351355552673, + "step": 5183 + }, + { + "epoch": 1.1952962877565136, + "grad_norm": 1.3967739180573415, + "learning_rate": 7.624787044570543e-07, + "loss": 0.4921465516090393, + "step": 5184 + }, + { + "epoch": 1.1955268618860964, + "grad_norm": 1.2706689377506823, + "learning_rate": 7.621083712925839e-07, + "loss": 0.3307859003543854, + "step": 5185 + }, + { + "epoch": 1.195757436015679, + "grad_norm": 1.5942715812711645, + "learning_rate": 7.617380727080728e-07, + "loss": 0.4276743531227112, + "step": 5186 + }, + { + "epoch": 1.1959880101452618, + "grad_norm": 1.434739100338101, + "learning_rate": 7.613678087573475e-07, + "loss": 0.5065702795982361, + "step": 5187 + }, + { + "epoch": 1.1962185842748443, + "grad_norm": 1.2918886211693255, + "learning_rate": 7.609975794942301e-07, + "loss": 0.3588709533214569, + "step": 5188 + }, + { + "epoch": 1.196449158404427, + "grad_norm": 1.4907134183008088, + "learning_rate": 7.606273849725362e-07, + "loss": 0.4296506941318512, + "step": 5189 + }, + { + "epoch": 1.1966797325340097, + "grad_norm": 1.5501182036176049, + "learning_rate": 7.602572252460782e-07, + "loss": 0.517792820930481, + "step": 5190 + }, + { + "epoch": 1.1969103066635924, + "grad_norm": 1.6883448687359832, + "learning_rate": 7.598871003686619e-07, + "loss": 0.38939881324768066, + "step": 5191 + }, + { + "epoch": 1.197140880793175, + "grad_norm": 1.5288548185908284, + "learning_rate": 7.595170103940896e-07, + "loss": 0.5759290456771851, + "step": 5192 + }, + { + "epoch": 1.1973714549227576, + "grad_norm": 1.975229876516129, + "learning_rate": 7.591469553761569e-07, + "loss": 0.4705851078033447, + "step": 5193 + }, + { + "epoch": 1.1976020290523404, + "grad_norm": 1.4820736709912923, + "learning_rate": 7.587769353686548e-07, + "loss": 0.5137619972229004, + "step": 5194 + }, + { + "epoch": 1.197832603181923, + "grad_norm": 1.426346211238444, + "learning_rate": 7.584069504253701e-07, + "loss": 0.43207496404647827, + "step": 5195 + }, + { + "epoch": 1.1980631773115056, + "grad_norm": 1.7446559629267169, + "learning_rate": 7.580370006000835e-07, + "loss": 0.3976139426231384, + "step": 5196 + }, + { + "epoch": 1.1982937514410883, + "grad_norm": 1.3117053560833851, + "learning_rate": 7.576670859465715e-07, + "loss": 0.41323673725128174, + "step": 5197 + }, + { + "epoch": 1.198524325570671, + "grad_norm": 1.5110343718270132, + "learning_rate": 7.57297206518604e-07, + "loss": 0.404024600982666, + "step": 5198 + }, + { + "epoch": 1.1987548997002535, + "grad_norm": 1.3684281900258655, + "learning_rate": 7.569273623699475e-07, + "loss": 0.4010540843009949, + "step": 5199 + }, + { + "epoch": 1.1989854738298362, + "grad_norm": 1.5739020793077496, + "learning_rate": 7.565575535543623e-07, + "loss": 0.44299256801605225, + "step": 5200 + }, + { + "epoch": 1.199216047959419, + "grad_norm": 1.5204166282494558, + "learning_rate": 7.561877801256041e-07, + "loss": 0.5217546820640564, + "step": 5201 + }, + { + "epoch": 1.1994466220890017, + "grad_norm": 1.868873770331591, + "learning_rate": 7.558180421374229e-07, + "loss": 0.5192688703536987, + "step": 5202 + }, + { + "epoch": 1.1996771962185844, + "grad_norm": 1.5743910950617057, + "learning_rate": 7.554483396435637e-07, + "loss": 0.38272884488105774, + "step": 5203 + }, + { + "epoch": 1.1999077703481669, + "grad_norm": 1.4246723536184043, + "learning_rate": 7.550786726977673e-07, + "loss": 0.474464476108551, + "step": 5204 + }, + { + "epoch": 1.2001383444777496, + "grad_norm": 1.6360159300410695, + "learning_rate": 7.547090413537676e-07, + "loss": 0.540134072303772, + "step": 5205 + }, + { + "epoch": 1.2003689186073323, + "grad_norm": 1.4752644193711169, + "learning_rate": 7.543394456652948e-07, + "loss": 0.4662882089614868, + "step": 5206 + }, + { + "epoch": 1.2005994927369148, + "grad_norm": 1.6858064119472538, + "learning_rate": 7.539698856860732e-07, + "loss": 0.440970778465271, + "step": 5207 + }, + { + "epoch": 1.2008300668664975, + "grad_norm": 1.3786365004169476, + "learning_rate": 7.536003614698225e-07, + "loss": 0.41787397861480713, + "step": 5208 + }, + { + "epoch": 1.2010606409960802, + "grad_norm": 1.4726677497641942, + "learning_rate": 7.532308730702561e-07, + "loss": 0.5503408908843994, + "step": 5209 + }, + { + "epoch": 1.201291215125663, + "grad_norm": 1.4739960164302617, + "learning_rate": 7.528614205410833e-07, + "loss": 0.43713903427124023, + "step": 5210 + }, + { + "epoch": 1.2015217892552457, + "grad_norm": 1.5362481289460599, + "learning_rate": 7.524920039360076e-07, + "loss": 0.4145667552947998, + "step": 5211 + }, + { + "epoch": 1.2017523633848282, + "grad_norm": 1.4800845890771783, + "learning_rate": 7.521226233087279e-07, + "loss": 0.4307587146759033, + "step": 5212 + }, + { + "epoch": 1.2019829375144109, + "grad_norm": 1.436182742461266, + "learning_rate": 7.517532787129369e-07, + "loss": 0.43784570693969727, + "step": 5213 + }, + { + "epoch": 1.2022135116439936, + "grad_norm": 1.3395031095564736, + "learning_rate": 7.513839702023226e-07, + "loss": 0.40003830194473267, + "step": 5214 + }, + { + "epoch": 1.202444085773576, + "grad_norm": 1.4786298792735793, + "learning_rate": 7.510146978305682e-07, + "loss": 0.4880738854408264, + "step": 5215 + }, + { + "epoch": 1.2026746599031588, + "grad_norm": 1.31895753202322, + "learning_rate": 7.506454616513505e-07, + "loss": 0.39548349380493164, + "step": 5216 + }, + { + "epoch": 1.2029052340327415, + "grad_norm": 1.5189592384869435, + "learning_rate": 7.502762617183425e-07, + "loss": 0.4060090184211731, + "step": 5217 + }, + { + "epoch": 1.2031358081623242, + "grad_norm": 1.6902238907281657, + "learning_rate": 7.499070980852101e-07, + "loss": 0.44657808542251587, + "step": 5218 + }, + { + "epoch": 1.203366382291907, + "grad_norm": 1.553015362629627, + "learning_rate": 7.495379708056161e-07, + "loss": 0.5283595323562622, + "step": 5219 + }, + { + "epoch": 1.2035969564214895, + "grad_norm": 1.5940858647104894, + "learning_rate": 7.49168879933216e-07, + "loss": 0.4424205422401428, + "step": 5220 + }, + { + "epoch": 1.2038275305510722, + "grad_norm": 1.4929497446465205, + "learning_rate": 7.487998255216619e-07, + "loss": 0.4998319745063782, + "step": 5221 + }, + { + "epoch": 1.2040581046806549, + "grad_norm": 1.3437939609448373, + "learning_rate": 7.484308076245987e-07, + "loss": 0.3821876645088196, + "step": 5222 + }, + { + "epoch": 1.2042886788102374, + "grad_norm": 1.4227177114495277, + "learning_rate": 7.480618262956669e-07, + "loss": 0.4567919373512268, + "step": 5223 + }, + { + "epoch": 1.20451925293982, + "grad_norm": 1.4207326358395804, + "learning_rate": 7.476928815885026e-07, + "loss": 0.4561428427696228, + "step": 5224 + }, + { + "epoch": 1.2047498270694028, + "grad_norm": 1.5720016799439587, + "learning_rate": 7.473239735567344e-07, + "loss": 0.4384823739528656, + "step": 5225 + }, + { + "epoch": 1.2049804011989855, + "grad_norm": 1.518914607229236, + "learning_rate": 7.469551022539877e-07, + "loss": 0.42840123176574707, + "step": 5226 + }, + { + "epoch": 1.2052109753285682, + "grad_norm": 1.4031825092609558, + "learning_rate": 7.465862677338812e-07, + "loss": 0.39553213119506836, + "step": 5227 + }, + { + "epoch": 1.2054415494581507, + "grad_norm": 1.521464998921144, + "learning_rate": 7.462174700500295e-07, + "loss": 0.4325043559074402, + "step": 5228 + }, + { + "epoch": 1.2056721235877335, + "grad_norm": 1.7451009485961195, + "learning_rate": 7.4584870925604e-07, + "loss": 0.5004623532295227, + "step": 5229 + }, + { + "epoch": 1.2059026977173162, + "grad_norm": 1.6975060246760258, + "learning_rate": 7.454799854055165e-07, + "loss": 0.42296791076660156, + "step": 5230 + }, + { + "epoch": 1.2061332718468987, + "grad_norm": 1.7859122255595659, + "learning_rate": 7.451112985520565e-07, + "loss": 0.45638370513916016, + "step": 5231 + }, + { + "epoch": 1.2063638459764814, + "grad_norm": 1.9018837416313183, + "learning_rate": 7.447426487492528e-07, + "loss": 0.5134493112564087, + "step": 5232 + }, + { + "epoch": 1.206594420106064, + "grad_norm": 1.382989024686568, + "learning_rate": 7.443740360506918e-07, + "loss": 0.4132578372955322, + "step": 5233 + }, + { + "epoch": 1.2068249942356468, + "grad_norm": 1.321784021070878, + "learning_rate": 7.440054605099552e-07, + "loss": 0.4363224506378174, + "step": 5234 + }, + { + "epoch": 1.2070555683652295, + "grad_norm": 1.4395608486144074, + "learning_rate": 7.4363692218062e-07, + "loss": 0.44970041513442993, + "step": 5235 + }, + { + "epoch": 1.207286142494812, + "grad_norm": 1.3219627332758312, + "learning_rate": 7.432684211162556e-07, + "loss": 0.39787235856056213, + "step": 5236 + }, + { + "epoch": 1.2075167166243947, + "grad_norm": 1.694639970069785, + "learning_rate": 7.428999573704284e-07, + "loss": 0.46057572960853577, + "step": 5237 + }, + { + "epoch": 1.2077472907539775, + "grad_norm": 1.3954230269661139, + "learning_rate": 7.42531530996698e-07, + "loss": 0.46754559874534607, + "step": 5238 + }, + { + "epoch": 1.20797786488356, + "grad_norm": 1.4060087118514482, + "learning_rate": 7.42163142048619e-07, + "loss": 0.5072697401046753, + "step": 5239 + }, + { + "epoch": 1.2082084390131427, + "grad_norm": 1.5355585762921151, + "learning_rate": 7.417947905797403e-07, + "loss": 0.4691959023475647, + "step": 5240 + }, + { + "epoch": 1.2084390131427254, + "grad_norm": 1.4596733170422231, + "learning_rate": 7.414264766436056e-07, + "loss": 0.43248072266578674, + "step": 5241 + }, + { + "epoch": 1.208669587272308, + "grad_norm": 1.8386458599943265, + "learning_rate": 7.410582002937534e-07, + "loss": 0.4748457968235016, + "step": 5242 + }, + { + "epoch": 1.2089001614018908, + "grad_norm": 1.413498638420547, + "learning_rate": 7.406899615837157e-07, + "loss": 0.4682820439338684, + "step": 5243 + }, + { + "epoch": 1.2091307355314733, + "grad_norm": 1.3788557575990639, + "learning_rate": 7.403217605670205e-07, + "loss": 0.41747021675109863, + "step": 5244 + }, + { + "epoch": 1.209361309661056, + "grad_norm": 1.5523861247321795, + "learning_rate": 7.399535972971886e-07, + "loss": 0.4968727231025696, + "step": 5245 + }, + { + "epoch": 1.2095918837906388, + "grad_norm": 1.6255626899279143, + "learning_rate": 7.395854718277372e-07, + "loss": 0.486778199672699, + "step": 5246 + }, + { + "epoch": 1.2098224579202213, + "grad_norm": 1.938770231002498, + "learning_rate": 7.392173842121765e-07, + "loss": 0.5153725147247314, + "step": 5247 + }, + { + "epoch": 1.210053032049804, + "grad_norm": 1.6258479412197122, + "learning_rate": 7.388493345040123e-07, + "loss": 0.42352354526519775, + "step": 5248 + }, + { + "epoch": 1.2102836061793867, + "grad_norm": 1.477454043811349, + "learning_rate": 7.384813227567437e-07, + "loss": 0.363994300365448, + "step": 5249 + }, + { + "epoch": 1.2105141803089694, + "grad_norm": 1.3450193947115454, + "learning_rate": 7.381133490238654e-07, + "loss": 0.44195863604545593, + "step": 5250 + }, + { + "epoch": 1.2107447544385521, + "grad_norm": 1.6510262733932026, + "learning_rate": 7.377454133588657e-07, + "loss": 0.5031026601791382, + "step": 5251 + }, + { + "epoch": 1.2109753285681346, + "grad_norm": 1.1126223170422647, + "learning_rate": 7.373775158152284e-07, + "loss": 0.3900304436683655, + "step": 5252 + }, + { + "epoch": 1.2112059026977173, + "grad_norm": 1.4718461813811798, + "learning_rate": 7.370096564464308e-07, + "loss": 0.406912624835968, + "step": 5253 + }, + { + "epoch": 1.2114364768273, + "grad_norm": 1.2742945351379469, + "learning_rate": 7.366418353059445e-07, + "loss": 0.407238632440567, + "step": 5254 + }, + { + "epoch": 1.2116670509568825, + "grad_norm": 2.3145771276343625, + "learning_rate": 7.36274052447237e-07, + "loss": 0.5605549216270447, + "step": 5255 + }, + { + "epoch": 1.2118976250864653, + "grad_norm": 1.7547311772877803, + "learning_rate": 7.359063079237684e-07, + "loss": 0.5016111731529236, + "step": 5256 + }, + { + "epoch": 1.212128199216048, + "grad_norm": 1.31999939383151, + "learning_rate": 7.355386017889946e-07, + "loss": 0.38812315464019775, + "step": 5257 + }, + { + "epoch": 1.2123587733456307, + "grad_norm": 1.5177330463551633, + "learning_rate": 7.35170934096365e-07, + "loss": 0.46022963523864746, + "step": 5258 + }, + { + "epoch": 1.2125893474752132, + "grad_norm": 1.4118628857930515, + "learning_rate": 7.348033048993246e-07, + "loss": 0.40029624104499817, + "step": 5259 + }, + { + "epoch": 1.212819921604796, + "grad_norm": 1.4051430521275825, + "learning_rate": 7.344357142513111e-07, + "loss": 0.4331943392753601, + "step": 5260 + }, + { + "epoch": 1.2130504957343786, + "grad_norm": 1.565074125850335, + "learning_rate": 7.340681622057582e-07, + "loss": 0.43757596611976624, + "step": 5261 + }, + { + "epoch": 1.2132810698639613, + "grad_norm": 1.7743971563599887, + "learning_rate": 7.337006488160931e-07, + "loss": 0.49733203649520874, + "step": 5262 + }, + { + "epoch": 1.2135116439935438, + "grad_norm": 1.341577967095045, + "learning_rate": 7.333331741357373e-07, + "loss": 0.35552018880844116, + "step": 5263 + }, + { + "epoch": 1.2137422181231265, + "grad_norm": 1.6321675762702066, + "learning_rate": 7.329657382181074e-07, + "loss": 0.4102798104286194, + "step": 5264 + }, + { + "epoch": 1.2139727922527093, + "grad_norm": 1.4184297160567871, + "learning_rate": 7.325983411166136e-07, + "loss": 0.4517349600791931, + "step": 5265 + }, + { + "epoch": 1.214203366382292, + "grad_norm": 1.6427775893660324, + "learning_rate": 7.322309828846613e-07, + "loss": 0.48924458026885986, + "step": 5266 + }, + { + "epoch": 1.2144339405118745, + "grad_norm": 1.4030974508932201, + "learning_rate": 7.31863663575649e-07, + "loss": 0.38971561193466187, + "step": 5267 + }, + { + "epoch": 1.2146645146414572, + "grad_norm": 1.6155044970268224, + "learning_rate": 7.31496383242971e-07, + "loss": 0.6503559350967407, + "step": 5268 + }, + { + "epoch": 1.21489508877104, + "grad_norm": 1.6905359606856467, + "learning_rate": 7.311291419400146e-07, + "loss": 0.4615272879600525, + "step": 5269 + }, + { + "epoch": 1.2151256629006226, + "grad_norm": 1.6629441467357413, + "learning_rate": 7.307619397201625e-07, + "loss": 0.3793429732322693, + "step": 5270 + }, + { + "epoch": 1.2153562370302051, + "grad_norm": 1.3076578533376795, + "learning_rate": 7.303947766367909e-07, + "loss": 0.48186585307121277, + "step": 5271 + }, + { + "epoch": 1.2155868111597878, + "grad_norm": 1.4243590091370186, + "learning_rate": 7.300276527432713e-07, + "loss": 0.4051778018474579, + "step": 5272 + }, + { + "epoch": 1.2158173852893706, + "grad_norm": 1.6820510248806995, + "learning_rate": 7.296605680929684e-07, + "loss": 0.43364250659942627, + "step": 5273 + }, + { + "epoch": 1.2160479594189533, + "grad_norm": 1.6130796939421093, + "learning_rate": 7.292935227392414e-07, + "loss": 0.4893898367881775, + "step": 5274 + }, + { + "epoch": 1.2162785335485358, + "grad_norm": 1.240780138685616, + "learning_rate": 7.289265167354448e-07, + "loss": 0.43125462532043457, + "step": 5275 + }, + { + "epoch": 1.2165091076781185, + "grad_norm": 1.6108443522760163, + "learning_rate": 7.285595501349258e-07, + "loss": 0.4086509943008423, + "step": 5276 + }, + { + "epoch": 1.2167396818077012, + "grad_norm": 1.838256686394942, + "learning_rate": 7.281926229910274e-07, + "loss": 0.5176471471786499, + "step": 5277 + }, + { + "epoch": 1.216970255937284, + "grad_norm": 1.8145364687667531, + "learning_rate": 7.278257353570857e-07, + "loss": 0.4783210754394531, + "step": 5278 + }, + { + "epoch": 1.2172008300668664, + "grad_norm": 1.5012148176529632, + "learning_rate": 7.274588872864322e-07, + "loss": 0.4847145080566406, + "step": 5279 + }, + { + "epoch": 1.2174314041964491, + "grad_norm": 1.4076947828029491, + "learning_rate": 7.270920788323911e-07, + "loss": 0.4691849946975708, + "step": 5280 + }, + { + "epoch": 1.2176619783260318, + "grad_norm": 1.8729494542899485, + "learning_rate": 7.267253100482824e-07, + "loss": 0.5755687952041626, + "step": 5281 + }, + { + "epoch": 1.2178925524556146, + "grad_norm": 1.3639853941099451, + "learning_rate": 7.263585809874193e-07, + "loss": 0.42995721101760864, + "step": 5282 + }, + { + "epoch": 1.218123126585197, + "grad_norm": 1.4560966669318844, + "learning_rate": 7.259918917031101e-07, + "loss": 0.501590371131897, + "step": 5283 + }, + { + "epoch": 1.2183537007147798, + "grad_norm": 1.5326641731074693, + "learning_rate": 7.256252422486563e-07, + "loss": 0.5499469041824341, + "step": 5284 + }, + { + "epoch": 1.2185842748443625, + "grad_norm": 1.7075536366613502, + "learning_rate": 7.25258632677354e-07, + "loss": 0.4567297399044037, + "step": 5285 + }, + { + "epoch": 1.2188148489739452, + "grad_norm": 1.3251311548344207, + "learning_rate": 7.248920630424942e-07, + "loss": 0.4046020805835724, + "step": 5286 + }, + { + "epoch": 1.2190454231035277, + "grad_norm": 1.4721989927884918, + "learning_rate": 7.245255333973608e-07, + "loss": 0.3534840941429138, + "step": 5287 + }, + { + "epoch": 1.2192759972331104, + "grad_norm": 1.4151850401024268, + "learning_rate": 7.241590437952331e-07, + "loss": 0.45795637369155884, + "step": 5288 + }, + { + "epoch": 1.2195065713626931, + "grad_norm": 1.4921564176260302, + "learning_rate": 7.237925942893839e-07, + "loss": 0.3984150290489197, + "step": 5289 + }, + { + "epoch": 1.2197371454922759, + "grad_norm": 1.5617581917582364, + "learning_rate": 7.234261849330807e-07, + "loss": 0.46833336353302, + "step": 5290 + }, + { + "epoch": 1.2199677196218583, + "grad_norm": 1.6200691445613622, + "learning_rate": 7.230598157795842e-07, + "loss": 0.5395709276199341, + "step": 5291 + }, + { + "epoch": 1.220198293751441, + "grad_norm": 1.300141768975315, + "learning_rate": 7.226934868821505e-07, + "loss": 0.4556152820587158, + "step": 5292 + }, + { + "epoch": 1.2204288678810238, + "grad_norm": 1.5916352600329198, + "learning_rate": 7.223271982940287e-07, + "loss": 0.49564266204833984, + "step": 5293 + }, + { + "epoch": 1.2206594420106065, + "grad_norm": 1.5492667362910795, + "learning_rate": 7.219609500684625e-07, + "loss": 0.5389127731323242, + "step": 5294 + }, + { + "epoch": 1.220890016140189, + "grad_norm": 1.3125997254034645, + "learning_rate": 7.215947422586905e-07, + "loss": 0.48815661668777466, + "step": 5295 + }, + { + "epoch": 1.2211205902697717, + "grad_norm": 1.6576709424363434, + "learning_rate": 7.21228574917944e-07, + "loss": 0.4204339385032654, + "step": 5296 + }, + { + "epoch": 1.2213511643993544, + "grad_norm": 1.2807688149232648, + "learning_rate": 7.208624480994494e-07, + "loss": 0.39993199706077576, + "step": 5297 + }, + { + "epoch": 1.2215817385289371, + "grad_norm": 1.7420778835945019, + "learning_rate": 7.204963618564268e-07, + "loss": 0.5679433941841125, + "step": 5298 + }, + { + "epoch": 1.2218123126585196, + "grad_norm": 1.819503614929131, + "learning_rate": 7.201303162420913e-07, + "loss": 0.46620815992355347, + "step": 5299 + }, + { + "epoch": 1.2220428867881024, + "grad_norm": 1.4667553556365653, + "learning_rate": 7.1976431130965e-07, + "loss": 0.44684547185897827, + "step": 5300 + }, + { + "epoch": 1.222273460917685, + "grad_norm": 1.6182813529173974, + "learning_rate": 7.193983471123066e-07, + "loss": 0.4518858790397644, + "step": 5301 + }, + { + "epoch": 1.2225040350472678, + "grad_norm": 1.497058969625444, + "learning_rate": 7.190324237032569e-07, + "loss": 0.3966304659843445, + "step": 5302 + }, + { + "epoch": 1.2227346091768503, + "grad_norm": 1.7688402904846452, + "learning_rate": 7.186665411356925e-07, + "loss": 0.5541782379150391, + "step": 5303 + }, + { + "epoch": 1.222965183306433, + "grad_norm": 1.5748150394963076, + "learning_rate": 7.183006994627972e-07, + "loss": 0.3986799120903015, + "step": 5304 + }, + { + "epoch": 1.2231957574360157, + "grad_norm": 1.3179167901427211, + "learning_rate": 7.1793489873775e-07, + "loss": 0.485867977142334, + "step": 5305 + }, + { + "epoch": 1.2234263315655984, + "grad_norm": 1.6264368495030206, + "learning_rate": 7.175691390137244e-07, + "loss": 0.40187692642211914, + "step": 5306 + }, + { + "epoch": 1.223656905695181, + "grad_norm": 1.5085798270078894, + "learning_rate": 7.172034203438864e-07, + "loss": 0.4679393172264099, + "step": 5307 + }, + { + "epoch": 1.2238874798247636, + "grad_norm": 1.3178949369734356, + "learning_rate": 7.168377427813974e-07, + "loss": 0.512301504611969, + "step": 5308 + }, + { + "epoch": 1.2241180539543464, + "grad_norm": 1.4684075358167812, + "learning_rate": 7.164721063794122e-07, + "loss": 0.5340646505355835, + "step": 5309 + }, + { + "epoch": 1.224348628083929, + "grad_norm": 1.6528941936609833, + "learning_rate": 7.1610651119108e-07, + "loss": 0.4757506847381592, + "step": 5310 + }, + { + "epoch": 1.2245792022135116, + "grad_norm": 1.5982652868975813, + "learning_rate": 7.157409572695434e-07, + "loss": 0.5697519779205322, + "step": 5311 + }, + { + "epoch": 1.2248097763430943, + "grad_norm": 1.4427165421847559, + "learning_rate": 7.153754446679395e-07, + "loss": 0.47521811723709106, + "step": 5312 + }, + { + "epoch": 1.225040350472677, + "grad_norm": 1.4092560589123113, + "learning_rate": 7.150099734393997e-07, + "loss": 0.40484973788261414, + "step": 5313 + }, + { + "epoch": 1.2252709246022597, + "grad_norm": 1.4095470452598946, + "learning_rate": 7.146445436370481e-07, + "loss": 0.4465969204902649, + "step": 5314 + }, + { + "epoch": 1.2255014987318422, + "grad_norm": 1.5543895211488108, + "learning_rate": 7.142791553140044e-07, + "loss": 0.44878089427948, + "step": 5315 + }, + { + "epoch": 1.225732072861425, + "grad_norm": 1.657847170962442, + "learning_rate": 7.139138085233809e-07, + "loss": 0.5049536228179932, + "step": 5316 + }, + { + "epoch": 1.2259626469910077, + "grad_norm": 1.377588971885486, + "learning_rate": 7.135485033182847e-07, + "loss": 0.42945951223373413, + "step": 5317 + }, + { + "epoch": 1.2261932211205901, + "grad_norm": 1.607627236207016, + "learning_rate": 7.131832397518167e-07, + "loss": 0.4668564200401306, + "step": 5318 + }, + { + "epoch": 1.2264237952501729, + "grad_norm": 1.640684584420395, + "learning_rate": 7.128180178770718e-07, + "loss": 0.4691551625728607, + "step": 5319 + }, + { + "epoch": 1.2266543693797556, + "grad_norm": 1.4653351758865718, + "learning_rate": 7.124528377471382e-07, + "loss": 0.4306211769580841, + "step": 5320 + }, + { + "epoch": 1.2268849435093383, + "grad_norm": 1.7130888177954928, + "learning_rate": 7.120876994150991e-07, + "loss": 0.4986322522163391, + "step": 5321 + }, + { + "epoch": 1.227115517638921, + "grad_norm": 1.4775997138779564, + "learning_rate": 7.117226029340304e-07, + "loss": 0.4058566093444824, + "step": 5322 + }, + { + "epoch": 1.2273460917685035, + "grad_norm": 1.3729187298835452, + "learning_rate": 7.113575483570036e-07, + "loss": 0.390174925327301, + "step": 5323 + }, + { + "epoch": 1.2275766658980862, + "grad_norm": 1.3070483816242904, + "learning_rate": 7.109925357370821e-07, + "loss": 0.38822996616363525, + "step": 5324 + }, + { + "epoch": 1.227807240027669, + "grad_norm": 1.3599088173875424, + "learning_rate": 7.106275651273244e-07, + "loss": 0.47792741656303406, + "step": 5325 + }, + { + "epoch": 1.2280378141572514, + "grad_norm": 1.52666177684785, + "learning_rate": 7.102626365807833e-07, + "loss": 0.5332789421081543, + "step": 5326 + }, + { + "epoch": 1.2282683882868342, + "grad_norm": 1.4337525635961101, + "learning_rate": 7.098977501505036e-07, + "loss": 0.5325096845626831, + "step": 5327 + }, + { + "epoch": 1.2284989624164169, + "grad_norm": 1.6185088994304762, + "learning_rate": 7.095329058895267e-07, + "loss": 0.4184231162071228, + "step": 5328 + }, + { + "epoch": 1.2287295365459996, + "grad_norm": 1.7570013482364435, + "learning_rate": 7.091681038508852e-07, + "loss": 0.43037641048431396, + "step": 5329 + }, + { + "epoch": 1.2289601106755823, + "grad_norm": 1.5067774692843796, + "learning_rate": 7.088033440876078e-07, + "loss": 0.4466821551322937, + "step": 5330 + }, + { + "epoch": 1.2291906848051648, + "grad_norm": 1.5083021571464743, + "learning_rate": 7.084386266527151e-07, + "loss": 0.35853004455566406, + "step": 5331 + }, + { + "epoch": 1.2294212589347475, + "grad_norm": 1.542402337323393, + "learning_rate": 7.080739515992231e-07, + "loss": 0.44986268877983093, + "step": 5332 + }, + { + "epoch": 1.2296518330643302, + "grad_norm": 1.7104999289185845, + "learning_rate": 7.07709318980141e-07, + "loss": 0.3563602566719055, + "step": 5333 + }, + { + "epoch": 1.2298824071939127, + "grad_norm": 1.5401970805558025, + "learning_rate": 7.073447288484715e-07, + "loss": 0.4505435824394226, + "step": 5334 + }, + { + "epoch": 1.2301129813234954, + "grad_norm": 1.3508208021904817, + "learning_rate": 7.069801812572116e-07, + "loss": 0.4477807283401489, + "step": 5335 + }, + { + "epoch": 1.2303435554530782, + "grad_norm": 1.5084663891676386, + "learning_rate": 7.066156762593518e-07, + "loss": 0.4470565915107727, + "step": 5336 + }, + { + "epoch": 1.2305741295826609, + "grad_norm": 1.4627780913359043, + "learning_rate": 7.062512139078773e-07, + "loss": 0.4236464500427246, + "step": 5337 + }, + { + "epoch": 1.2308047037122436, + "grad_norm": 1.3002436810863733, + "learning_rate": 7.058867942557655e-07, + "loss": 0.3221476376056671, + "step": 5338 + }, + { + "epoch": 1.231035277841826, + "grad_norm": 1.818660153327524, + "learning_rate": 7.055224173559891e-07, + "loss": 0.502305269241333, + "step": 5339 + }, + { + "epoch": 1.2312658519714088, + "grad_norm": 1.655814956644188, + "learning_rate": 7.051580832615136e-07, + "loss": 0.5121853351593018, + "step": 5340 + }, + { + "epoch": 1.2314964261009915, + "grad_norm": 1.713071870874518, + "learning_rate": 7.047937920252991e-07, + "loss": 0.5468438863754272, + "step": 5341 + }, + { + "epoch": 1.231727000230574, + "grad_norm": 1.2030374980808431, + "learning_rate": 7.044295437002985e-07, + "loss": 0.5026402473449707, + "step": 5342 + }, + { + "epoch": 1.2319575743601567, + "grad_norm": 1.9445671085046203, + "learning_rate": 7.040653383394596e-07, + "loss": 0.5205342173576355, + "step": 5343 + }, + { + "epoch": 1.2321881484897395, + "grad_norm": 1.5970504229179872, + "learning_rate": 7.037011759957228e-07, + "loss": 0.5184727311134338, + "step": 5344 + }, + { + "epoch": 1.2324187226193222, + "grad_norm": 1.3779493729990695, + "learning_rate": 7.033370567220227e-07, + "loss": 0.414316862821579, + "step": 5345 + }, + { + "epoch": 1.2326492967489049, + "grad_norm": 1.4260441300832385, + "learning_rate": 7.029729805712885e-07, + "loss": 0.42133980989456177, + "step": 5346 + }, + { + "epoch": 1.2328798708784874, + "grad_norm": 1.8139584962445312, + "learning_rate": 7.026089475964414e-07, + "loss": 0.4888553321361542, + "step": 5347 + }, + { + "epoch": 1.23311044500807, + "grad_norm": 1.3419182130591616, + "learning_rate": 7.022449578503979e-07, + "loss": 0.4702431857585907, + "step": 5348 + }, + { + "epoch": 1.2333410191376528, + "grad_norm": 1.7237576970327266, + "learning_rate": 7.018810113860672e-07, + "loss": 0.5312628746032715, + "step": 5349 + }, + { + "epoch": 1.2335715932672353, + "grad_norm": 1.3183810824607851, + "learning_rate": 7.015171082563533e-07, + "loss": 0.5297777056694031, + "step": 5350 + }, + { + "epoch": 1.233802167396818, + "grad_norm": 1.4423147751678271, + "learning_rate": 7.011532485141524e-07, + "loss": 0.5172504782676697, + "step": 5351 + }, + { + "epoch": 1.2340327415264007, + "grad_norm": 1.4663357988839691, + "learning_rate": 7.007894322123556e-07, + "loss": 0.4288995862007141, + "step": 5352 + }, + { + "epoch": 1.2342633156559835, + "grad_norm": 1.373863251988179, + "learning_rate": 7.004256594038475e-07, + "loss": 0.4194108247756958, + "step": 5353 + }, + { + "epoch": 1.2344938897855662, + "grad_norm": 1.6567765897983155, + "learning_rate": 7.000619301415056e-07, + "loss": 0.48825979232788086, + "step": 5354 + }, + { + "epoch": 1.2347244639151487, + "grad_norm": 1.5674749005570563, + "learning_rate": 6.99698244478202e-07, + "loss": 0.4721163213253021, + "step": 5355 + }, + { + "epoch": 1.2349550380447314, + "grad_norm": 1.4292932334311201, + "learning_rate": 6.993346024668019e-07, + "loss": 0.5104520916938782, + "step": 5356 + }, + { + "epoch": 1.235185612174314, + "grad_norm": 1.757397862406759, + "learning_rate": 6.98971004160165e-07, + "loss": 0.5257378816604614, + "step": 5357 + }, + { + "epoch": 1.2354161863038966, + "grad_norm": 1.5756368498047397, + "learning_rate": 6.986074496111429e-07, + "loss": 0.5624911785125732, + "step": 5358 + }, + { + "epoch": 1.2356467604334793, + "grad_norm": 1.4832170020848512, + "learning_rate": 6.982439388725828e-07, + "loss": 0.5186502933502197, + "step": 5359 + }, + { + "epoch": 1.235877334563062, + "grad_norm": 1.4333093290057806, + "learning_rate": 6.978804719973241e-07, + "loss": 0.42711856961250305, + "step": 5360 + }, + { + "epoch": 1.2361079086926448, + "grad_norm": 1.5710112274218073, + "learning_rate": 6.975170490382013e-07, + "loss": 0.525848388671875, + "step": 5361 + }, + { + "epoch": 1.2363384828222275, + "grad_norm": 1.475742371846223, + "learning_rate": 6.971536700480405e-07, + "loss": 0.41279107332229614, + "step": 5362 + }, + { + "epoch": 1.23656905695181, + "grad_norm": 1.381610773190275, + "learning_rate": 6.967903350796632e-07, + "loss": 0.38868075609207153, + "step": 5363 + }, + { + "epoch": 1.2367996310813927, + "grad_norm": 1.2852056850014901, + "learning_rate": 6.964270441858837e-07, + "loss": 0.41875284910202026, + "step": 5364 + }, + { + "epoch": 1.2370302052109754, + "grad_norm": 1.6506819982730945, + "learning_rate": 6.960637974195096e-07, + "loss": 0.4754808843135834, + "step": 5365 + }, + { + "epoch": 1.237260779340558, + "grad_norm": 1.367170455716087, + "learning_rate": 6.957005948333434e-07, + "loss": 0.5073249340057373, + "step": 5366 + }, + { + "epoch": 1.2374913534701406, + "grad_norm": 1.4682970250918908, + "learning_rate": 6.953374364801792e-07, + "loss": 0.4545915126800537, + "step": 5367 + }, + { + "epoch": 1.2377219275997233, + "grad_norm": 1.4664699450973697, + "learning_rate": 6.949743224128064e-07, + "loss": 0.42797422409057617, + "step": 5368 + }, + { + "epoch": 1.237952501729306, + "grad_norm": 1.7409270878989862, + "learning_rate": 6.946112526840071e-07, + "loss": 0.570556104183197, + "step": 5369 + }, + { + "epoch": 1.2381830758588885, + "grad_norm": 1.21807525986395, + "learning_rate": 6.942482273465577e-07, + "loss": 0.3866136074066162, + "step": 5370 + }, + { + "epoch": 1.2384136499884713, + "grad_norm": 1.385922338157159, + "learning_rate": 6.938852464532267e-07, + "loss": 0.3716529309749603, + "step": 5371 + }, + { + "epoch": 1.238644224118054, + "grad_norm": 1.5756601150848535, + "learning_rate": 6.935223100567776e-07, + "loss": 0.4781096577644348, + "step": 5372 + }, + { + "epoch": 1.2388747982476367, + "grad_norm": 1.5023911555765588, + "learning_rate": 6.931594182099671e-07, + "loss": 0.4262877106666565, + "step": 5373 + }, + { + "epoch": 1.2391053723772192, + "grad_norm": 1.6023295142223875, + "learning_rate": 6.927965709655444e-07, + "loss": 0.49859267473220825, + "step": 5374 + }, + { + "epoch": 1.239335946506802, + "grad_norm": 1.8550612096678925, + "learning_rate": 6.924337683762539e-07, + "loss": 0.4710119664669037, + "step": 5375 + }, + { + "epoch": 1.2395665206363846, + "grad_norm": 1.518585467890365, + "learning_rate": 6.92071010494832e-07, + "loss": 0.4974974989891052, + "step": 5376 + }, + { + "epoch": 1.2397970947659673, + "grad_norm": 2.029509938602293, + "learning_rate": 6.917082973740098e-07, + "loss": 0.4118514657020569, + "step": 5377 + }, + { + "epoch": 1.2400276688955498, + "grad_norm": 1.391922482329176, + "learning_rate": 6.913456290665106e-07, + "loss": 0.4223165214061737, + "step": 5378 + }, + { + "epoch": 1.2402582430251325, + "grad_norm": 1.5760276199817416, + "learning_rate": 6.909830056250526e-07, + "loss": 0.4896865487098694, + "step": 5379 + }, + { + "epoch": 1.2404888171547153, + "grad_norm": 1.35318854532684, + "learning_rate": 6.906204271023463e-07, + "loss": 0.36112266778945923, + "step": 5380 + }, + { + "epoch": 1.240719391284298, + "grad_norm": 1.4255868593911465, + "learning_rate": 6.902578935510969e-07, + "loss": 0.4665502905845642, + "step": 5381 + }, + { + "epoch": 1.2409499654138805, + "grad_norm": 1.6036447338223971, + "learning_rate": 6.898954050240013e-07, + "loss": 0.46059858798980713, + "step": 5382 + }, + { + "epoch": 1.2411805395434632, + "grad_norm": 1.4844055015741944, + "learning_rate": 6.895329615737515e-07, + "loss": 0.46149420738220215, + "step": 5383 + }, + { + "epoch": 1.241411113673046, + "grad_norm": 1.5602784439666317, + "learning_rate": 6.891705632530327e-07, + "loss": 0.42226743698120117, + "step": 5384 + }, + { + "epoch": 1.2416416878026286, + "grad_norm": 1.4308699177023212, + "learning_rate": 6.88808210114522e-07, + "loss": 0.45789939165115356, + "step": 5385 + }, + { + "epoch": 1.2418722619322111, + "grad_norm": 1.5754200685163184, + "learning_rate": 6.884459022108922e-07, + "loss": 0.44569891691207886, + "step": 5386 + }, + { + "epoch": 1.2421028360617938, + "grad_norm": 1.4099412845136035, + "learning_rate": 6.880836395948078e-07, + "loss": 0.3971112370491028, + "step": 5387 + }, + { + "epoch": 1.2423334101913766, + "grad_norm": 1.6636550459216706, + "learning_rate": 6.877214223189278e-07, + "loss": 0.46052566170692444, + "step": 5388 + }, + { + "epoch": 1.2425639843209593, + "grad_norm": 1.2735689149473257, + "learning_rate": 6.873592504359037e-07, + "loss": 0.42730599641799927, + "step": 5389 + }, + { + "epoch": 1.2427945584505418, + "grad_norm": 1.5806143555224212, + "learning_rate": 6.869971239983814e-07, + "loss": 0.4391734004020691, + "step": 5390 + }, + { + "epoch": 1.2430251325801245, + "grad_norm": 1.5314248582389964, + "learning_rate": 6.866350430589989e-07, + "loss": 0.4523593485355377, + "step": 5391 + }, + { + "epoch": 1.2432557067097072, + "grad_norm": 1.587550694342246, + "learning_rate": 6.86273007670389e-07, + "loss": 0.5398315787315369, + "step": 5392 + }, + { + "epoch": 1.24348628083929, + "grad_norm": 1.2298139407771986, + "learning_rate": 6.859110178851767e-07, + "loss": 0.40480807423591614, + "step": 5393 + }, + { + "epoch": 1.2437168549688724, + "grad_norm": 1.4233815325100456, + "learning_rate": 6.855490737559816e-07, + "loss": 0.42483675479888916, + "step": 5394 + }, + { + "epoch": 1.2439474290984551, + "grad_norm": 1.611497963721617, + "learning_rate": 6.851871753354153e-07, + "loss": 0.39951619505882263, + "step": 5395 + }, + { + "epoch": 1.2441780032280378, + "grad_norm": 1.5084898015563448, + "learning_rate": 6.848253226760833e-07, + "loss": 0.48650771379470825, + "step": 5396 + }, + { + "epoch": 1.2444085773576206, + "grad_norm": 1.5899141960647352, + "learning_rate": 6.844635158305853e-07, + "loss": 0.5377830266952515, + "step": 5397 + }, + { + "epoch": 1.244639151487203, + "grad_norm": 1.667763606347776, + "learning_rate": 6.841017548515127e-07, + "loss": 0.4365614950656891, + "step": 5398 + }, + { + "epoch": 1.2448697256167858, + "grad_norm": 1.2560105349082187, + "learning_rate": 6.837400397914519e-07, + "loss": 0.39739400148391724, + "step": 5399 + }, + { + "epoch": 1.2451002997463685, + "grad_norm": 1.3287360038901976, + "learning_rate": 6.833783707029812e-07, + "loss": 0.4005683660507202, + "step": 5400 + }, + { + "epoch": 1.2453308738759512, + "grad_norm": 1.6646043641444999, + "learning_rate": 6.830167476386737e-07, + "loss": 0.5635108351707458, + "step": 5401 + }, + { + "epoch": 1.2455614480055337, + "grad_norm": 1.6642180514990483, + "learning_rate": 6.82655170651094e-07, + "loss": 0.4332388639450073, + "step": 5402 + }, + { + "epoch": 1.2457920221351164, + "grad_norm": 1.525164084943155, + "learning_rate": 6.822936397928015e-07, + "loss": 0.47506433725357056, + "step": 5403 + }, + { + "epoch": 1.2460225962646991, + "grad_norm": 1.600563207739989, + "learning_rate": 6.819321551163486e-07, + "loss": 0.5081777572631836, + "step": 5404 + }, + { + "epoch": 1.2462531703942819, + "grad_norm": 1.6650056699718765, + "learning_rate": 6.815707166742801e-07, + "loss": 0.4038957953453064, + "step": 5405 + }, + { + "epoch": 1.2464837445238643, + "grad_norm": 1.759676797230376, + "learning_rate": 6.812093245191354e-07, + "loss": 0.4665706753730774, + "step": 5406 + }, + { + "epoch": 1.246714318653447, + "grad_norm": 1.8957165771048585, + "learning_rate": 6.808479787034459e-07, + "loss": 0.45610785484313965, + "step": 5407 + }, + { + "epoch": 1.2469448927830298, + "grad_norm": 1.443572019443965, + "learning_rate": 6.804866792797377e-07, + "loss": 0.4334958493709564, + "step": 5408 + }, + { + "epoch": 1.2471754669126125, + "grad_norm": 1.4719822396111175, + "learning_rate": 6.801254263005283e-07, + "loss": 0.5505996942520142, + "step": 5409 + }, + { + "epoch": 1.247406041042195, + "grad_norm": 1.5261896109132582, + "learning_rate": 6.797642198183303e-07, + "loss": 0.5589424967765808, + "step": 5410 + }, + { + "epoch": 1.2476366151717777, + "grad_norm": 1.892082521677576, + "learning_rate": 6.794030598856483e-07, + "loss": 0.48142847418785095, + "step": 5411 + }, + { + "epoch": 1.2478671893013604, + "grad_norm": 1.6606812394072976, + "learning_rate": 6.790419465549811e-07, + "loss": 0.5549830198287964, + "step": 5412 + }, + { + "epoch": 1.2480977634309431, + "grad_norm": 1.6097248774465256, + "learning_rate": 6.786808798788193e-07, + "loss": 0.5974072217941284, + "step": 5413 + }, + { + "epoch": 1.2483283375605256, + "grad_norm": 1.3333137403479542, + "learning_rate": 6.783198599096484e-07, + "loss": 0.38189029693603516, + "step": 5414 + }, + { + "epoch": 1.2485589116901084, + "grad_norm": 1.4543286006354934, + "learning_rate": 6.779588866999459e-07, + "loss": 0.41150039434432983, + "step": 5415 + }, + { + "epoch": 1.248789485819691, + "grad_norm": 1.451215833026304, + "learning_rate": 6.775979603021828e-07, + "loss": 0.4291636645793915, + "step": 5416 + }, + { + "epoch": 1.2490200599492738, + "grad_norm": 1.2798211834451962, + "learning_rate": 6.772370807688242e-07, + "loss": 0.45324140787124634, + "step": 5417 + }, + { + "epoch": 1.2492506340788563, + "grad_norm": 1.3895968147090427, + "learning_rate": 6.768762481523262e-07, + "loss": 0.4748731851577759, + "step": 5418 + }, + { + "epoch": 1.249481208208439, + "grad_norm": 1.618628812481624, + "learning_rate": 6.765154625051408e-07, + "loss": 0.43602505326271057, + "step": 5419 + }, + { + "epoch": 1.2497117823380217, + "grad_norm": 1.4027608933739075, + "learning_rate": 6.761547238797112e-07, + "loss": 0.49135684967041016, + "step": 5420 + }, + { + "epoch": 1.2499423564676044, + "grad_norm": 1.6315360373382408, + "learning_rate": 6.757940323284747e-07, + "loss": 0.47508272528648376, + "step": 5421 + }, + { + "epoch": 1.250172930597187, + "grad_norm": 1.612865868213556, + "learning_rate": 6.754333879038611e-07, + "loss": 0.399259090423584, + "step": 5422 + }, + { + "epoch": 1.2504035047267696, + "grad_norm": 1.6878741312884291, + "learning_rate": 6.750727906582941e-07, + "loss": 0.426364004611969, + "step": 5423 + }, + { + "epoch": 1.2506340788563524, + "grad_norm": 1.4584807010931917, + "learning_rate": 6.747122406441903e-07, + "loss": 0.4641951322555542, + "step": 5424 + }, + { + "epoch": 1.250864652985935, + "grad_norm": 1.3880451781756755, + "learning_rate": 6.743517379139585e-07, + "loss": 0.35008323192596436, + "step": 5425 + }, + { + "epoch": 1.2510952271155176, + "grad_norm": 1.4485633708895984, + "learning_rate": 6.739912825200022e-07, + "loss": 0.49627771973609924, + "step": 5426 + }, + { + "epoch": 1.2513258012451003, + "grad_norm": 1.628398042874366, + "learning_rate": 6.736308745147168e-07, + "loss": 0.4926851987838745, + "step": 5427 + }, + { + "epoch": 1.251556375374683, + "grad_norm": 1.622960147434406, + "learning_rate": 6.732705139504917e-07, + "loss": 0.44777536392211914, + "step": 5428 + }, + { + "epoch": 1.2517869495042655, + "grad_norm": 1.6523545202218224, + "learning_rate": 6.729102008797085e-07, + "loss": 0.39160430431365967, + "step": 5429 + }, + { + "epoch": 1.2520175236338482, + "grad_norm": 1.5184849781676724, + "learning_rate": 6.725499353547426e-07, + "loss": 0.4585273861885071, + "step": 5430 + }, + { + "epoch": 1.252248097763431, + "grad_norm": 1.5327675196324342, + "learning_rate": 6.721897174279621e-07, + "loss": 0.5245224237442017, + "step": 5431 + }, + { + "epoch": 1.2524786718930137, + "grad_norm": 1.5257069000403813, + "learning_rate": 6.718295471517288e-07, + "loss": 0.4217349886894226, + "step": 5432 + }, + { + "epoch": 1.2527092460225964, + "grad_norm": 1.4826939266004133, + "learning_rate": 6.714694245783963e-07, + "loss": 0.4944193661212921, + "step": 5433 + }, + { + "epoch": 1.2529398201521789, + "grad_norm": 1.387839760206308, + "learning_rate": 6.711093497603127e-07, + "loss": 0.5058057904243469, + "step": 5434 + }, + { + "epoch": 1.2531703942817616, + "grad_norm": 1.381621888753065, + "learning_rate": 6.707493227498186e-07, + "loss": 0.45669037103652954, + "step": 5435 + }, + { + "epoch": 1.2534009684113443, + "grad_norm": 1.5997486257834712, + "learning_rate": 6.703893435992469e-07, + "loss": 0.4248945116996765, + "step": 5436 + }, + { + "epoch": 1.2536315425409268, + "grad_norm": 1.6056111266165571, + "learning_rate": 6.700294123609249e-07, + "loss": 0.3984343707561493, + "step": 5437 + }, + { + "epoch": 1.2538621166705095, + "grad_norm": 1.5349078061254786, + "learning_rate": 6.696695290871715e-07, + "loss": 0.435299813747406, + "step": 5438 + }, + { + "epoch": 1.2540926908000922, + "grad_norm": 1.6277363060500583, + "learning_rate": 6.693096938303002e-07, + "loss": 0.4225304126739502, + "step": 5439 + }, + { + "epoch": 1.254323264929675, + "grad_norm": 1.6495416759002697, + "learning_rate": 6.689499066426161e-07, + "loss": 0.4686669111251831, + "step": 5440 + }, + { + "epoch": 1.2545538390592577, + "grad_norm": 1.5168957851404996, + "learning_rate": 6.685901675764186e-07, + "loss": 0.45163553953170776, + "step": 5441 + }, + { + "epoch": 1.2547844131888402, + "grad_norm": 1.3593822737620262, + "learning_rate": 6.682304766839986e-07, + "loss": 0.44223567843437195, + "step": 5442 + }, + { + "epoch": 1.2550149873184229, + "grad_norm": 1.5363469724843986, + "learning_rate": 6.678708340176413e-07, + "loss": 0.4008648991584778, + "step": 5443 + }, + { + "epoch": 1.2552455614480056, + "grad_norm": 1.4199248627467993, + "learning_rate": 6.675112396296245e-07, + "loss": 0.4500792324542999, + "step": 5444 + }, + { + "epoch": 1.255476135577588, + "grad_norm": 1.490145734356762, + "learning_rate": 6.671516935722183e-07, + "loss": 0.42558690905570984, + "step": 5445 + }, + { + "epoch": 1.2557067097071708, + "grad_norm": 1.7098682543926618, + "learning_rate": 6.667921958976871e-07, + "loss": 0.4676043391227722, + "step": 5446 + }, + { + "epoch": 1.2559372838367535, + "grad_norm": 1.8041492407407758, + "learning_rate": 6.664327466582869e-07, + "loss": 0.44114184379577637, + "step": 5447 + }, + { + "epoch": 1.2561678579663362, + "grad_norm": 1.6102069805165957, + "learning_rate": 6.660733459062679e-07, + "loss": 0.33865463733673096, + "step": 5448 + }, + { + "epoch": 1.256398432095919, + "grad_norm": 1.8619975614063338, + "learning_rate": 6.65713993693872e-07, + "loss": 0.5397414565086365, + "step": 5449 + }, + { + "epoch": 1.2566290062255014, + "grad_norm": 1.4730562973077854, + "learning_rate": 6.653546900733352e-07, + "loss": 0.49249517917633057, + "step": 5450 + }, + { + "epoch": 1.2568595803550842, + "grad_norm": 1.5757041605280757, + "learning_rate": 6.649954350968855e-07, + "loss": 0.5438433885574341, + "step": 5451 + }, + { + "epoch": 1.2570901544846669, + "grad_norm": 1.4727448576353426, + "learning_rate": 6.646362288167448e-07, + "loss": 0.43725037574768066, + "step": 5452 + }, + { + "epoch": 1.2573207286142494, + "grad_norm": 1.5159104216766552, + "learning_rate": 6.642770712851269e-07, + "loss": 0.5369226336479187, + "step": 5453 + }, + { + "epoch": 1.257551302743832, + "grad_norm": 1.4915531986930697, + "learning_rate": 6.63917962554239e-07, + "loss": 0.45022842288017273, + "step": 5454 + }, + { + "epoch": 1.2577818768734148, + "grad_norm": 1.6219974371712227, + "learning_rate": 6.635589026762818e-07, + "loss": 0.42483362555503845, + "step": 5455 + }, + { + "epoch": 1.2580124510029975, + "grad_norm": 1.4115832140490556, + "learning_rate": 6.631998917034474e-07, + "loss": 0.4909497797489166, + "step": 5456 + }, + { + "epoch": 1.2582430251325802, + "grad_norm": 1.3159817254483799, + "learning_rate": 6.628409296879223e-07, + "loss": 0.4927433431148529, + "step": 5457 + }, + { + "epoch": 1.2584735992621627, + "grad_norm": 1.550356576361105, + "learning_rate": 6.624820166818847e-07, + "loss": 0.4452761113643646, + "step": 5458 + }, + { + "epoch": 1.2587041733917455, + "grad_norm": 1.5683413746620685, + "learning_rate": 6.62123152737507e-07, + "loss": 0.4637982249259949, + "step": 5459 + }, + { + "epoch": 1.2589347475213282, + "grad_norm": 1.3293268937895057, + "learning_rate": 6.617643379069532e-07, + "loss": 0.3189438581466675, + "step": 5460 + }, + { + "epoch": 1.2591653216509107, + "grad_norm": 1.3296675722252447, + "learning_rate": 6.614055722423808e-07, + "loss": 0.420698881149292, + "step": 5461 + }, + { + "epoch": 1.2593958957804934, + "grad_norm": 1.5202476608747133, + "learning_rate": 6.610468557959398e-07, + "loss": 0.5187642574310303, + "step": 5462 + }, + { + "epoch": 1.259626469910076, + "grad_norm": 1.4954844764147424, + "learning_rate": 6.606881886197741e-07, + "loss": 0.48519381880760193, + "step": 5463 + }, + { + "epoch": 1.2598570440396588, + "grad_norm": 1.4755140585184632, + "learning_rate": 6.60329570766019e-07, + "loss": 0.3930806815624237, + "step": 5464 + }, + { + "epoch": 1.2600876181692415, + "grad_norm": 1.8617928902566707, + "learning_rate": 6.599710022868027e-07, + "loss": 0.4890612065792084, + "step": 5465 + }, + { + "epoch": 1.260318192298824, + "grad_norm": 1.2781262224531547, + "learning_rate": 6.596124832342476e-07, + "loss": 0.4202774465084076, + "step": 5466 + }, + { + "epoch": 1.2605487664284067, + "grad_norm": 1.5196012608537903, + "learning_rate": 6.592540136604674e-07, + "loss": 0.5053761005401611, + "step": 5467 + }, + { + "epoch": 1.2607793405579895, + "grad_norm": 1.4874107682553572, + "learning_rate": 6.588955936175702e-07, + "loss": 0.4827175736427307, + "step": 5468 + }, + { + "epoch": 1.261009914687572, + "grad_norm": 1.4659080652243894, + "learning_rate": 6.585372231576551e-07, + "loss": 0.45179229974746704, + "step": 5469 + }, + { + "epoch": 1.2612404888171547, + "grad_norm": 1.3781712796058982, + "learning_rate": 6.581789023328155e-07, + "loss": 0.4024949073791504, + "step": 5470 + }, + { + "epoch": 1.2614710629467374, + "grad_norm": 1.7288759385339574, + "learning_rate": 6.578206311951363e-07, + "loss": 0.48839491605758667, + "step": 5471 + }, + { + "epoch": 1.26170163707632, + "grad_norm": 1.4778086795689929, + "learning_rate": 6.574624097966968e-07, + "loss": 0.45897620916366577, + "step": 5472 + }, + { + "epoch": 1.2619322112059028, + "grad_norm": 1.5548512112712307, + "learning_rate": 6.571042381895671e-07, + "loss": 0.48471882939338684, + "step": 5473 + }, + { + "epoch": 1.2621627853354853, + "grad_norm": 2.0045804163216414, + "learning_rate": 6.567461164258117e-07, + "loss": 0.44159913063049316, + "step": 5474 + }, + { + "epoch": 1.262393359465068, + "grad_norm": 1.5752243442253915, + "learning_rate": 6.563880445574872e-07, + "loss": 0.39186012744903564, + "step": 5475 + }, + { + "epoch": 1.2626239335946507, + "grad_norm": 1.818057995697113, + "learning_rate": 6.560300226366425e-07, + "loss": 0.5332233905792236, + "step": 5476 + }, + { + "epoch": 1.2628545077242332, + "grad_norm": 1.350222227503923, + "learning_rate": 6.556720507153201e-07, + "loss": 0.4252084195613861, + "step": 5477 + }, + { + "epoch": 1.263085081853816, + "grad_norm": 1.4204993118440263, + "learning_rate": 6.553141288455548e-07, + "loss": 0.36927711963653564, + "step": 5478 + }, + { + "epoch": 1.2633156559833987, + "grad_norm": 1.5676826878414558, + "learning_rate": 6.549562570793745e-07, + "loss": 0.4405602216720581, + "step": 5479 + }, + { + "epoch": 1.2635462301129814, + "grad_norm": 1.5245742985153417, + "learning_rate": 6.545984354687986e-07, + "loss": 0.5691590309143066, + "step": 5480 + }, + { + "epoch": 1.2637768042425641, + "grad_norm": 1.468644623890153, + "learning_rate": 6.542406640658411e-07, + "loss": 0.3750354051589966, + "step": 5481 + }, + { + "epoch": 1.2640073783721466, + "grad_norm": 1.5266320276968284, + "learning_rate": 6.538829429225068e-07, + "loss": 0.47816041111946106, + "step": 5482 + }, + { + "epoch": 1.2642379525017293, + "grad_norm": 1.4911563737024116, + "learning_rate": 6.535252720907951e-07, + "loss": 0.42470186948776245, + "step": 5483 + }, + { + "epoch": 1.264468526631312, + "grad_norm": 1.4256480441382235, + "learning_rate": 6.531676516226961e-07, + "loss": 0.37356555461883545, + "step": 5484 + }, + { + "epoch": 1.2646991007608945, + "grad_norm": 1.4604810104028516, + "learning_rate": 6.528100815701942e-07, + "loss": 0.4895293116569519, + "step": 5485 + }, + { + "epoch": 1.2649296748904773, + "grad_norm": 1.9575945537740915, + "learning_rate": 6.524525619852656e-07, + "loss": 0.4963725805282593, + "step": 5486 + }, + { + "epoch": 1.26516024902006, + "grad_norm": 1.7629474018170985, + "learning_rate": 6.520950929198792e-07, + "loss": 0.5443764925003052, + "step": 5487 + }, + { + "epoch": 1.2653908231496427, + "grad_norm": 1.2536482779264142, + "learning_rate": 6.517376744259972e-07, + "loss": 0.400549054145813, + "step": 5488 + }, + { + "epoch": 1.2656213972792254, + "grad_norm": 1.8850482793273033, + "learning_rate": 6.513803065555736e-07, + "loss": 0.46384042501449585, + "step": 5489 + }, + { + "epoch": 1.265851971408808, + "grad_norm": 1.4893040501119004, + "learning_rate": 6.510229893605556e-07, + "loss": 0.5044240951538086, + "step": 5490 + }, + { + "epoch": 1.2660825455383906, + "grad_norm": 1.477450831039122, + "learning_rate": 6.506657228928827e-07, + "loss": 0.4544214904308319, + "step": 5491 + }, + { + "epoch": 1.2663131196679733, + "grad_norm": 1.441487086349296, + "learning_rate": 6.503085072044878e-07, + "loss": 0.36688071489334106, + "step": 5492 + }, + { + "epoch": 1.2665436937975558, + "grad_norm": 1.4594163949727883, + "learning_rate": 6.499513423472951e-07, + "loss": 0.4058225154876709, + "step": 5493 + }, + { + "epoch": 1.2667742679271385, + "grad_norm": 1.4647938941101153, + "learning_rate": 6.495942283732225e-07, + "loss": 0.36429229378700256, + "step": 5494 + }, + { + "epoch": 1.2670048420567213, + "grad_norm": 1.7674965095028434, + "learning_rate": 6.492371653341802e-07, + "loss": 0.47116899490356445, + "step": 5495 + }, + { + "epoch": 1.267235416186304, + "grad_norm": 1.4923904627456126, + "learning_rate": 6.488801532820706e-07, + "loss": 0.4437965750694275, + "step": 5496 + }, + { + "epoch": 1.2674659903158867, + "grad_norm": 1.5533994295939695, + "learning_rate": 6.485231922687893e-07, + "loss": 0.4810328483581543, + "step": 5497 + }, + { + "epoch": 1.2676965644454692, + "grad_norm": 1.4632129166419525, + "learning_rate": 6.481662823462238e-07, + "loss": 0.362907350063324, + "step": 5498 + }, + { + "epoch": 1.267927138575052, + "grad_norm": 1.375729756251652, + "learning_rate": 6.478094235662554e-07, + "loss": 0.43647170066833496, + "step": 5499 + }, + { + "epoch": 1.2681577127046346, + "grad_norm": 1.422215620145209, + "learning_rate": 6.474526159807563e-07, + "loss": 0.4566631317138672, + "step": 5500 + }, + { + "epoch": 1.2683882868342171, + "grad_norm": 1.5097982290449063, + "learning_rate": 6.470958596415925e-07, + "loss": 0.3940081298351288, + "step": 5501 + }, + { + "epoch": 1.2686188609637998, + "grad_norm": 1.617526881385646, + "learning_rate": 6.46739154600622e-07, + "loss": 0.5275603532791138, + "step": 5502 + }, + { + "epoch": 1.2688494350933825, + "grad_norm": 1.846449658895825, + "learning_rate": 6.463825009096959e-07, + "loss": 0.42546436190605164, + "step": 5503 + }, + { + "epoch": 1.2690800092229653, + "grad_norm": 1.6068032996774941, + "learning_rate": 6.460258986206566e-07, + "loss": 0.3833821713924408, + "step": 5504 + }, + { + "epoch": 1.2693105833525478, + "grad_norm": 1.4806797403979666, + "learning_rate": 6.456693477853408e-07, + "loss": 0.5056046843528748, + "step": 5505 + }, + { + "epoch": 1.2695411574821305, + "grad_norm": 1.6345259734279236, + "learning_rate": 6.453128484555764e-07, + "loss": 0.3544192910194397, + "step": 5506 + }, + { + "epoch": 1.2697717316117132, + "grad_norm": 1.684231386275673, + "learning_rate": 6.449564006831836e-07, + "loss": 0.47164130210876465, + "step": 5507 + }, + { + "epoch": 1.2700023057412957, + "grad_norm": 1.3334241214641123, + "learning_rate": 6.446000045199765e-07, + "loss": 0.4580638110637665, + "step": 5508 + }, + { + "epoch": 1.2702328798708784, + "grad_norm": 1.2809631136030655, + "learning_rate": 6.442436600177606e-07, + "loss": 0.45945844054222107, + "step": 5509 + }, + { + "epoch": 1.2704634540004611, + "grad_norm": 1.447660138842985, + "learning_rate": 6.438873672283343e-07, + "loss": 0.5539910793304443, + "step": 5510 + }, + { + "epoch": 1.2706940281300438, + "grad_norm": 1.6550705344684873, + "learning_rate": 6.43531126203488e-07, + "loss": 0.4661790132522583, + "step": 5511 + }, + { + "epoch": 1.2709246022596266, + "grad_norm": 1.7015547164246037, + "learning_rate": 6.431749369950057e-07, + "loss": 0.3781178891658783, + "step": 5512 + }, + { + "epoch": 1.271155176389209, + "grad_norm": 1.571227420481097, + "learning_rate": 6.428187996546621e-07, + "loss": 0.4858461618423462, + "step": 5513 + }, + { + "epoch": 1.2713857505187918, + "grad_norm": 1.5308384830726272, + "learning_rate": 6.424627142342262e-07, + "loss": 0.5003963708877563, + "step": 5514 + }, + { + "epoch": 1.2716163246483745, + "grad_norm": 1.3605664168425382, + "learning_rate": 6.421066807854584e-07, + "loss": 0.4620795249938965, + "step": 5515 + }, + { + "epoch": 1.271846898777957, + "grad_norm": 1.385915858471925, + "learning_rate": 6.417506993601114e-07, + "loss": 0.43998581171035767, + "step": 5516 + }, + { + "epoch": 1.2720774729075397, + "grad_norm": 1.6777446711260993, + "learning_rate": 6.413947700099311e-07, + "loss": 0.5204107165336609, + "step": 5517 + }, + { + "epoch": 1.2723080470371224, + "grad_norm": 1.5515853600398104, + "learning_rate": 6.410388927866551e-07, + "loss": 0.46675950288772583, + "step": 5518 + }, + { + "epoch": 1.2725386211667051, + "grad_norm": 1.4020610518461032, + "learning_rate": 6.406830677420146e-07, + "loss": 0.4002436101436615, + "step": 5519 + }, + { + "epoch": 1.2727691952962878, + "grad_norm": 1.6847281008342299, + "learning_rate": 6.403272949277312e-07, + "loss": 0.4051012396812439, + "step": 5520 + }, + { + "epoch": 1.2729997694258703, + "grad_norm": 1.4780078562694616, + "learning_rate": 6.399715743955209e-07, + "loss": 0.4847797751426697, + "step": 5521 + }, + { + "epoch": 1.273230343555453, + "grad_norm": 1.6389704995828815, + "learning_rate": 6.396159061970907e-07, + "loss": 0.4742053151130676, + "step": 5522 + }, + { + "epoch": 1.2734609176850358, + "grad_norm": 1.4123933831310747, + "learning_rate": 6.392602903841415e-07, + "loss": 0.44291001558303833, + "step": 5523 + }, + { + "epoch": 1.2736914918146183, + "grad_norm": 1.438016627678946, + "learning_rate": 6.389047270083646e-07, + "loss": 0.38993996381759644, + "step": 5524 + }, + { + "epoch": 1.273922065944201, + "grad_norm": 1.5621491080936318, + "learning_rate": 6.385492161214454e-07, + "loss": 0.5045995116233826, + "step": 5525 + }, + { + "epoch": 1.2741526400737837, + "grad_norm": 1.4769511790871679, + "learning_rate": 6.381937577750611e-07, + "loss": 0.4377788305282593, + "step": 5526 + }, + { + "epoch": 1.2743832142033664, + "grad_norm": 1.470801087764595, + "learning_rate": 6.378383520208806e-07, + "loss": 0.5363353490829468, + "step": 5527 + }, + { + "epoch": 1.2746137883329491, + "grad_norm": 1.340047582641372, + "learning_rate": 6.374829989105661e-07, + "loss": 0.42230546474456787, + "step": 5528 + }, + { + "epoch": 1.2748443624625316, + "grad_norm": 1.2882420810653734, + "learning_rate": 6.371276984957715e-07, + "loss": 0.39565908908843994, + "step": 5529 + }, + { + "epoch": 1.2750749365921143, + "grad_norm": 1.3633189139651096, + "learning_rate": 6.36772450828144e-07, + "loss": 0.4375323951244354, + "step": 5530 + }, + { + "epoch": 1.275305510721697, + "grad_norm": 1.5028848525750826, + "learning_rate": 6.364172559593215e-07, + "loss": 0.4901241660118103, + "step": 5531 + }, + { + "epoch": 1.2755360848512796, + "grad_norm": 1.3653729298225772, + "learning_rate": 6.360621139409359e-07, + "loss": 0.4108780026435852, + "step": 5532 + }, + { + "epoch": 1.2757666589808623, + "grad_norm": 1.4800363393725149, + "learning_rate": 6.357070248246102e-07, + "loss": 0.43631279468536377, + "step": 5533 + }, + { + "epoch": 1.275997233110445, + "grad_norm": 1.5982504223136969, + "learning_rate": 6.353519886619607e-07, + "loss": 0.4623757004737854, + "step": 5534 + }, + { + "epoch": 1.2762278072400277, + "grad_norm": 1.5284512936045929, + "learning_rate": 6.349970055045954e-07, + "loss": 0.41303062438964844, + "step": 5535 + }, + { + "epoch": 1.2764583813696104, + "grad_norm": 1.7689201212047627, + "learning_rate": 6.34642075404114e-07, + "loss": 0.5157878994941711, + "step": 5536 + }, + { + "epoch": 1.276688955499193, + "grad_norm": 1.6093049161057067, + "learning_rate": 6.342871984121103e-07, + "loss": 0.41295093297958374, + "step": 5537 + }, + { + "epoch": 1.2769195296287756, + "grad_norm": 1.4185213028911483, + "learning_rate": 6.339323745801682e-07, + "loss": 0.4636460542678833, + "step": 5538 + }, + { + "epoch": 1.2771501037583584, + "grad_norm": 1.44057433861511, + "learning_rate": 6.335776039598659e-07, + "loss": 0.45273804664611816, + "step": 5539 + }, + { + "epoch": 1.2773806778879409, + "grad_norm": 1.7212686324453035, + "learning_rate": 6.332228866027721e-07, + "loss": 0.4562758803367615, + "step": 5540 + }, + { + "epoch": 1.2776112520175236, + "grad_norm": 1.5821328258880776, + "learning_rate": 6.328682225604491e-07, + "loss": 0.3162837326526642, + "step": 5541 + }, + { + "epoch": 1.2778418261471063, + "grad_norm": 1.4226618207277133, + "learning_rate": 6.325136118844504e-07, + "loss": 0.48594871163368225, + "step": 5542 + }, + { + "epoch": 1.278072400276689, + "grad_norm": 1.398820126458318, + "learning_rate": 6.321590546263231e-07, + "loss": 0.4346798360347748, + "step": 5543 + }, + { + "epoch": 1.2783029744062717, + "grad_norm": 1.7945463027279862, + "learning_rate": 6.318045508376046e-07, + "loss": 0.5133204460144043, + "step": 5544 + }, + { + "epoch": 1.2785335485358542, + "grad_norm": 1.6462955147402891, + "learning_rate": 6.314501005698266e-07, + "loss": 0.40679338574409485, + "step": 5545 + }, + { + "epoch": 1.278764122665437, + "grad_norm": 1.341754342655084, + "learning_rate": 6.310957038745117e-07, + "loss": 0.363874614238739, + "step": 5546 + }, + { + "epoch": 1.2789946967950196, + "grad_norm": 1.3013776361069782, + "learning_rate": 6.307413608031746e-07, + "loss": 0.43020665645599365, + "step": 5547 + }, + { + "epoch": 1.2792252709246021, + "grad_norm": 1.301444097702827, + "learning_rate": 6.303870714073233e-07, + "loss": 0.5280083417892456, + "step": 5548 + }, + { + "epoch": 1.2794558450541849, + "grad_norm": 1.803757705570539, + "learning_rate": 6.300328357384568e-07, + "loss": 0.4584185481071472, + "step": 5549 + }, + { + "epoch": 1.2796864191837676, + "grad_norm": 1.4682285924702114, + "learning_rate": 6.296786538480675e-07, + "loss": 0.4068162441253662, + "step": 5550 + }, + { + "epoch": 1.2799169933133503, + "grad_norm": 1.361515758715701, + "learning_rate": 6.293245257876387e-07, + "loss": 0.4336085915565491, + "step": 5551 + }, + { + "epoch": 1.280147567442933, + "grad_norm": 1.4906971509519245, + "learning_rate": 6.289704516086468e-07, + "loss": 0.4932886064052582, + "step": 5552 + }, + { + "epoch": 1.2803781415725155, + "grad_norm": 1.3660207414526373, + "learning_rate": 6.2861643136256e-07, + "loss": 0.437292218208313, + "step": 5553 + }, + { + "epoch": 1.2806087157020982, + "grad_norm": 1.5017461161180483, + "learning_rate": 6.28262465100839e-07, + "loss": 0.4131085276603699, + "step": 5554 + }, + { + "epoch": 1.280839289831681, + "grad_norm": 1.441603184912447, + "learning_rate": 6.27908552874936e-07, + "loss": 0.4146266579627991, + "step": 5555 + }, + { + "epoch": 1.2810698639612634, + "grad_norm": 1.6115588407174422, + "learning_rate": 6.275546947362957e-07, + "loss": 0.4778539538383484, + "step": 5556 + }, + { + "epoch": 1.2813004380908461, + "grad_norm": 1.4722189673341872, + "learning_rate": 6.272008907363555e-07, + "loss": 0.3989019989967346, + "step": 5557 + }, + { + "epoch": 1.2815310122204289, + "grad_norm": 1.5188067628601776, + "learning_rate": 6.268471409265436e-07, + "loss": 0.4433528184890747, + "step": 5558 + }, + { + "epoch": 1.2817615863500116, + "grad_norm": 1.4551631195697798, + "learning_rate": 6.264934453582817e-07, + "loss": 0.46929931640625, + "step": 5559 + }, + { + "epoch": 1.2819921604795943, + "grad_norm": 1.749202490253535, + "learning_rate": 6.261398040829829e-07, + "loss": 0.4908202886581421, + "step": 5560 + }, + { + "epoch": 1.2822227346091768, + "grad_norm": 1.766310768413501, + "learning_rate": 6.257862171520528e-07, + "loss": 0.44195377826690674, + "step": 5561 + }, + { + "epoch": 1.2824533087387595, + "grad_norm": 1.8716445464357578, + "learning_rate": 6.254326846168882e-07, + "loss": 0.548696756362915, + "step": 5562 + }, + { + "epoch": 1.2826838828683422, + "grad_norm": 1.6355324229757326, + "learning_rate": 6.250792065288794e-07, + "loss": 0.4015994668006897, + "step": 5563 + }, + { + "epoch": 1.2829144569979247, + "grad_norm": 1.5798153885574688, + "learning_rate": 6.247257829394074e-07, + "loss": 0.4281688928604126, + "step": 5564 + }, + { + "epoch": 1.2831450311275074, + "grad_norm": 1.2159971773233473, + "learning_rate": 6.243724138998462e-07, + "loss": 0.37623634934425354, + "step": 5565 + }, + { + "epoch": 1.2833756052570902, + "grad_norm": 1.7282596196498647, + "learning_rate": 6.240190994615617e-07, + "loss": 0.4753819704055786, + "step": 5566 + }, + { + "epoch": 1.2836061793866729, + "grad_norm": 1.8092084567061366, + "learning_rate": 6.236658396759111e-07, + "loss": 0.4584893584251404, + "step": 5567 + }, + { + "epoch": 1.2838367535162556, + "grad_norm": 1.598249680169706, + "learning_rate": 6.23312634594245e-07, + "loss": 0.445067435503006, + "step": 5568 + }, + { + "epoch": 1.284067327645838, + "grad_norm": 1.402901275205923, + "learning_rate": 6.229594842679049e-07, + "loss": 0.4209640920162201, + "step": 5569 + }, + { + "epoch": 1.2842979017754208, + "grad_norm": 1.3481434606649714, + "learning_rate": 6.226063887482254e-07, + "loss": 0.34620141983032227, + "step": 5570 + }, + { + "epoch": 1.2845284759050035, + "grad_norm": 1.2702834444597235, + "learning_rate": 6.222533480865315e-07, + "loss": 0.43683767318725586, + "step": 5571 + }, + { + "epoch": 1.284759050034586, + "grad_norm": 1.5394879174992184, + "learning_rate": 6.219003623341421e-07, + "loss": 0.45881450176239014, + "step": 5572 + }, + { + "epoch": 1.2849896241641687, + "grad_norm": 1.2015099259152706, + "learning_rate": 6.215474315423667e-07, + "loss": 0.40115928649902344, + "step": 5573 + }, + { + "epoch": 1.2852201982937514, + "grad_norm": 1.5480428253925462, + "learning_rate": 6.211945557625082e-07, + "loss": 0.4181373119354248, + "step": 5574 + }, + { + "epoch": 1.2854507724233342, + "grad_norm": 1.6874872010842208, + "learning_rate": 6.208417350458598e-07, + "loss": 0.4743300676345825, + "step": 5575 + }, + { + "epoch": 1.2856813465529169, + "grad_norm": 1.6331906817141153, + "learning_rate": 6.204889694437077e-07, + "loss": 0.4236707091331482, + "step": 5576 + }, + { + "epoch": 1.2859119206824994, + "grad_norm": 1.1887995996963334, + "learning_rate": 6.201362590073305e-07, + "loss": 0.4105497896671295, + "step": 5577 + }, + { + "epoch": 1.286142494812082, + "grad_norm": 1.3982883240902815, + "learning_rate": 6.197836037879973e-07, + "loss": 0.4164474606513977, + "step": 5578 + }, + { + "epoch": 1.2863730689416648, + "grad_norm": 1.648111600369129, + "learning_rate": 6.19431003836971e-07, + "loss": 0.49809616804122925, + "step": 5579 + }, + { + "epoch": 1.2866036430712473, + "grad_norm": 1.608787056057215, + "learning_rate": 6.19078459205505e-07, + "loss": 0.4902994632720947, + "step": 5580 + }, + { + "epoch": 1.28683421720083, + "grad_norm": 1.336430500063446, + "learning_rate": 6.18725969944846e-07, + "loss": 0.3697085380554199, + "step": 5581 + }, + { + "epoch": 1.2870647913304127, + "grad_norm": 1.353359914681952, + "learning_rate": 6.183735361062309e-07, + "loss": 0.446627140045166, + "step": 5582 + }, + { + "epoch": 1.2872953654599955, + "grad_norm": 1.590519620379444, + "learning_rate": 6.180211577408901e-07, + "loss": 0.39521220326423645, + "step": 5583 + }, + { + "epoch": 1.2875259395895782, + "grad_norm": 1.7929636253307002, + "learning_rate": 6.176688349000452e-07, + "loss": 0.6308573484420776, + "step": 5584 + }, + { + "epoch": 1.2877565137191607, + "grad_norm": 1.5017758457543093, + "learning_rate": 6.173165676349102e-07, + "loss": 0.4558343291282654, + "step": 5585 + }, + { + "epoch": 1.2879870878487434, + "grad_norm": 1.4546689222111522, + "learning_rate": 6.169643559966906e-07, + "loss": 0.5487015247344971, + "step": 5586 + }, + { + "epoch": 1.288217661978326, + "grad_norm": 1.3949279502201517, + "learning_rate": 6.166122000365834e-07, + "loss": 0.39074039459228516, + "step": 5587 + }, + { + "epoch": 1.2884482361079086, + "grad_norm": 1.4687466147876906, + "learning_rate": 6.162600998057787e-07, + "loss": 0.5136120915412903, + "step": 5588 + }, + { + "epoch": 1.2886788102374913, + "grad_norm": 1.5457442901158343, + "learning_rate": 6.159080553554572e-07, + "loss": 0.5344336628913879, + "step": 5589 + }, + { + "epoch": 1.288909384367074, + "grad_norm": 1.5840783894802135, + "learning_rate": 6.15556066736793e-07, + "loss": 0.5204205513000488, + "step": 5590 + }, + { + "epoch": 1.2891399584966567, + "grad_norm": 1.588345092971114, + "learning_rate": 6.152041340009504e-07, + "loss": 0.4768211245536804, + "step": 5591 + }, + { + "epoch": 1.2893705326262395, + "grad_norm": 2.0914169507965936, + "learning_rate": 6.148522571990868e-07, + "loss": 0.44098299741744995, + "step": 5592 + }, + { + "epoch": 1.289601106755822, + "grad_norm": 1.6411833405865308, + "learning_rate": 6.145004363823509e-07, + "loss": 0.5038055181503296, + "step": 5593 + }, + { + "epoch": 1.2898316808854047, + "grad_norm": 1.6256634474518743, + "learning_rate": 6.141486716018837e-07, + "loss": 0.417998343706131, + "step": 5594 + }, + { + "epoch": 1.2900622550149874, + "grad_norm": 1.755327490864145, + "learning_rate": 6.137969629088174e-07, + "loss": 0.48858124017715454, + "step": 5595 + }, + { + "epoch": 1.2902928291445699, + "grad_norm": 1.6236287189755654, + "learning_rate": 6.134453103542765e-07, + "loss": 0.46988582611083984, + "step": 5596 + }, + { + "epoch": 1.2905234032741526, + "grad_norm": 1.4715150644247719, + "learning_rate": 6.130937139893779e-07, + "loss": 0.5100589394569397, + "step": 5597 + }, + { + "epoch": 1.2907539774037353, + "grad_norm": 1.861124742863941, + "learning_rate": 6.127421738652286e-07, + "loss": 0.490558922290802, + "step": 5598 + }, + { + "epoch": 1.290984551533318, + "grad_norm": 1.624496792014592, + "learning_rate": 6.123906900329291e-07, + "loss": 0.4749597907066345, + "step": 5599 + }, + { + "epoch": 1.2912151256629008, + "grad_norm": 1.4155787175262067, + "learning_rate": 6.12039262543571e-07, + "loss": 0.5006792545318604, + "step": 5600 + }, + { + "epoch": 1.2914456997924832, + "grad_norm": 1.6772265070157861, + "learning_rate": 6.116878914482384e-07, + "loss": 0.46902909874916077, + "step": 5601 + }, + { + "epoch": 1.291676273922066, + "grad_norm": 1.4563548131763813, + "learning_rate": 6.113365767980059e-07, + "loss": 0.46765559911727905, + "step": 5602 + }, + { + "epoch": 1.2919068480516487, + "grad_norm": 1.4143636586875892, + "learning_rate": 6.10985318643941e-07, + "loss": 0.45960646867752075, + "step": 5603 + }, + { + "epoch": 1.2921374221812312, + "grad_norm": 1.578129032516793, + "learning_rate": 6.106341170371024e-07, + "loss": 0.4067912697792053, + "step": 5604 + }, + { + "epoch": 1.292367996310814, + "grad_norm": 1.653263856685772, + "learning_rate": 6.102829720285414e-07, + "loss": 0.45004114508628845, + "step": 5605 + }, + { + "epoch": 1.2925985704403966, + "grad_norm": 1.698803058368325, + "learning_rate": 6.099318836692999e-07, + "loss": 0.5086014270782471, + "step": 5606 + }, + { + "epoch": 1.2928291445699793, + "grad_norm": 1.5400277013654406, + "learning_rate": 6.095808520104122e-07, + "loss": 0.49985191226005554, + "step": 5607 + }, + { + "epoch": 1.293059718699562, + "grad_norm": 1.5622376081366391, + "learning_rate": 6.092298771029047e-07, + "loss": 0.5066381096839905, + "step": 5608 + }, + { + "epoch": 1.2932902928291445, + "grad_norm": 1.5786958248418999, + "learning_rate": 6.088789589977947e-07, + "loss": 0.49626559019088745, + "step": 5609 + }, + { + "epoch": 1.2935208669587273, + "grad_norm": 1.6542820345168319, + "learning_rate": 6.085280977460921e-07, + "loss": 0.4837498962879181, + "step": 5610 + }, + { + "epoch": 1.29375144108831, + "grad_norm": 1.3607897650960659, + "learning_rate": 6.081772933987977e-07, + "loss": 0.41308102011680603, + "step": 5611 + }, + { + "epoch": 1.2939820152178925, + "grad_norm": 1.4026215025684987, + "learning_rate": 6.078265460069048e-07, + "loss": 0.4453086853027344, + "step": 5612 + }, + { + "epoch": 1.2942125893474752, + "grad_norm": 1.5506248233039113, + "learning_rate": 6.074758556213976e-07, + "loss": 0.4700174927711487, + "step": 5613 + }, + { + "epoch": 1.294443163477058, + "grad_norm": 1.6021152444285431, + "learning_rate": 6.071252222932537e-07, + "loss": 0.578227162361145, + "step": 5614 + }, + { + "epoch": 1.2946737376066406, + "grad_norm": 1.3711009132002785, + "learning_rate": 6.067746460734398e-07, + "loss": 0.36468571424484253, + "step": 5615 + }, + { + "epoch": 1.2949043117362231, + "grad_norm": 1.7197393040240752, + "learning_rate": 6.064241270129166e-07, + "loss": 0.4793199896812439, + "step": 5616 + }, + { + "epoch": 1.2951348858658058, + "grad_norm": 1.4731744493442007, + "learning_rate": 6.060736651626355e-07, + "loss": 0.40342214703559875, + "step": 5617 + }, + { + "epoch": 1.2953654599953885, + "grad_norm": 1.2868571274228024, + "learning_rate": 6.05723260573539e-07, + "loss": 0.4212435185909271, + "step": 5618 + }, + { + "epoch": 1.295596034124971, + "grad_norm": 1.592545901664945, + "learning_rate": 6.053729132965626e-07, + "loss": 0.44668713212013245, + "step": 5619 + }, + { + "epoch": 1.2958266082545538, + "grad_norm": 1.3590289444558108, + "learning_rate": 6.050226233826326e-07, + "loss": 0.5159831643104553, + "step": 5620 + }, + { + "epoch": 1.2960571823841365, + "grad_norm": 1.792827614220507, + "learning_rate": 6.046723908826676e-07, + "loss": 0.5091866850852966, + "step": 5621 + }, + { + "epoch": 1.2962877565137192, + "grad_norm": 1.3636713576072057, + "learning_rate": 6.043222158475767e-07, + "loss": 0.34838563203811646, + "step": 5622 + }, + { + "epoch": 1.296518330643302, + "grad_norm": 1.679394698956229, + "learning_rate": 6.039720983282621e-07, + "loss": 0.46576952934265137, + "step": 5623 + }, + { + "epoch": 1.2967489047728844, + "grad_norm": 1.5739745386461328, + "learning_rate": 6.036220383756163e-07, + "loss": 0.4971234202384949, + "step": 5624 + }, + { + "epoch": 1.2969794789024671, + "grad_norm": 1.3832811037885837, + "learning_rate": 6.03272036040525e-07, + "loss": 0.4792482256889343, + "step": 5625 + }, + { + "epoch": 1.2972100530320498, + "grad_norm": 1.5438407741127544, + "learning_rate": 6.029220913738636e-07, + "loss": 0.45584213733673096, + "step": 5626 + }, + { + "epoch": 1.2974406271616323, + "grad_norm": 2.1628056802136686, + "learning_rate": 6.025722044265004e-07, + "loss": 0.5094096064567566, + "step": 5627 + }, + { + "epoch": 1.297671201291215, + "grad_norm": 1.2707985126710273, + "learning_rate": 6.022223752492954e-07, + "loss": 0.33178865909576416, + "step": 5628 + }, + { + "epoch": 1.2979017754207978, + "grad_norm": 1.4977758648466553, + "learning_rate": 6.018726038930991e-07, + "loss": 0.4955121874809265, + "step": 5629 + }, + { + "epoch": 1.2981323495503805, + "grad_norm": 1.9087861970540962, + "learning_rate": 6.01522890408755e-07, + "loss": 0.46253639459609985, + "step": 5630 + }, + { + "epoch": 1.2983629236799632, + "grad_norm": 1.725580686624441, + "learning_rate": 6.011732348470971e-07, + "loss": 0.4760236442089081, + "step": 5631 + }, + { + "epoch": 1.2985934978095457, + "grad_norm": 1.487451213133888, + "learning_rate": 6.008236372589516e-07, + "loss": 0.44413092732429504, + "step": 5632 + }, + { + "epoch": 1.2988240719391284, + "grad_norm": 1.5710401716420814, + "learning_rate": 6.004740976951358e-07, + "loss": 0.5431559681892395, + "step": 5633 + }, + { + "epoch": 1.2990546460687111, + "grad_norm": 1.448678008923642, + "learning_rate": 6.001246162064592e-07, + "loss": 0.41276806592941284, + "step": 5634 + }, + { + "epoch": 1.2992852201982936, + "grad_norm": 1.8698453553316883, + "learning_rate": 5.997751928437219e-07, + "loss": 0.3998986482620239, + "step": 5635 + }, + { + "epoch": 1.2995157943278763, + "grad_norm": 1.7019145009400753, + "learning_rate": 5.994258276577169e-07, + "loss": 0.47741782665252686, + "step": 5636 + }, + { + "epoch": 1.299746368457459, + "grad_norm": 1.8471752326794122, + "learning_rate": 5.990765206992277e-07, + "loss": 0.4294115900993347, + "step": 5637 + }, + { + "epoch": 1.2999769425870418, + "grad_norm": 1.2676173155963009, + "learning_rate": 5.987272720190288e-07, + "loss": 0.4717773199081421, + "step": 5638 + }, + { + "epoch": 1.3002075167166245, + "grad_norm": 1.4764264012124577, + "learning_rate": 5.983780816678881e-07, + "loss": 0.5169499516487122, + "step": 5639 + }, + { + "epoch": 1.300438090846207, + "grad_norm": 1.3402196455719508, + "learning_rate": 5.980289496965634e-07, + "loss": 0.3796359598636627, + "step": 5640 + }, + { + "epoch": 1.3006686649757897, + "grad_norm": 1.439771899645747, + "learning_rate": 5.976798761558048e-07, + "loss": 0.44377613067626953, + "step": 5641 + }, + { + "epoch": 1.3008992391053724, + "grad_norm": 1.4787491173073983, + "learning_rate": 5.973308610963534e-07, + "loss": 0.46863383054733276, + "step": 5642 + }, + { + "epoch": 1.301129813234955, + "grad_norm": 1.6231703309548882, + "learning_rate": 5.969819045689426e-07, + "loss": 0.5437184572219849, + "step": 5643 + }, + { + "epoch": 1.3013603873645376, + "grad_norm": 1.3526724102376106, + "learning_rate": 5.96633006624296e-07, + "loss": 0.4487720727920532, + "step": 5644 + }, + { + "epoch": 1.3015909614941203, + "grad_norm": 1.4099594164441491, + "learning_rate": 5.962841673131305e-07, + "loss": 0.42834270000457764, + "step": 5645 + }, + { + "epoch": 1.301821535623703, + "grad_norm": 1.6303538612123332, + "learning_rate": 5.959353866861525e-07, + "loss": 0.5242533087730408, + "step": 5646 + }, + { + "epoch": 1.3020521097532858, + "grad_norm": 1.467793467454458, + "learning_rate": 5.955866647940609e-07, + "loss": 0.4529950022697449, + "step": 5647 + }, + { + "epoch": 1.3022826838828683, + "grad_norm": 1.704233159172443, + "learning_rate": 5.952380016875465e-07, + "loss": 0.41109561920166016, + "step": 5648 + }, + { + "epoch": 1.302513258012451, + "grad_norm": 2.1978948521850237, + "learning_rate": 5.948893974172904e-07, + "loss": 0.5468418598175049, + "step": 5649 + }, + { + "epoch": 1.3027438321420337, + "grad_norm": 1.6524182777322811, + "learning_rate": 5.945408520339663e-07, + "loss": 0.4594927430152893, + "step": 5650 + }, + { + "epoch": 1.3029744062716162, + "grad_norm": 1.8822005278969978, + "learning_rate": 5.941923655882383e-07, + "loss": 0.5011999011039734, + "step": 5651 + }, + { + "epoch": 1.303204980401199, + "grad_norm": 1.3940543055361847, + "learning_rate": 5.938439381307632e-07, + "loss": 0.519101083278656, + "step": 5652 + }, + { + "epoch": 1.3034355545307816, + "grad_norm": 1.3048743953658823, + "learning_rate": 5.934955697121875e-07, + "loss": 0.521979570388794, + "step": 5653 + }, + { + "epoch": 1.3036661286603644, + "grad_norm": 1.5140544105240696, + "learning_rate": 5.931472603831507e-07, + "loss": 0.5969122648239136, + "step": 5654 + }, + { + "epoch": 1.303896702789947, + "grad_norm": 1.6283257057537612, + "learning_rate": 5.927990101942826e-07, + "loss": 0.47013232111930847, + "step": 5655 + }, + { + "epoch": 1.3041272769195296, + "grad_norm": 1.485470149052559, + "learning_rate": 5.924508191962059e-07, + "loss": 0.4135271906852722, + "step": 5656 + }, + { + "epoch": 1.3043578510491123, + "grad_norm": 1.6826248484124529, + "learning_rate": 5.921026874395327e-07, + "loss": 0.45639151334762573, + "step": 5657 + }, + { + "epoch": 1.304588425178695, + "grad_norm": 1.4851105420204929, + "learning_rate": 5.917546149748676e-07, + "loss": 0.4047633409500122, + "step": 5658 + }, + { + "epoch": 1.3048189993082775, + "grad_norm": 1.470073094956581, + "learning_rate": 5.91406601852807e-07, + "loss": 0.4352290630340576, + "step": 5659 + }, + { + "epoch": 1.3050495734378602, + "grad_norm": 1.569723084578139, + "learning_rate": 5.910586481239375e-07, + "loss": 0.4912130534648895, + "step": 5660 + }, + { + "epoch": 1.305280147567443, + "grad_norm": 1.4302762159123064, + "learning_rate": 5.907107538388383e-07, + "loss": 0.4114433526992798, + "step": 5661 + }, + { + "epoch": 1.3055107216970256, + "grad_norm": 1.6307461117750972, + "learning_rate": 5.903629190480786e-07, + "loss": 0.4230955243110657, + "step": 5662 + }, + { + "epoch": 1.3057412958266084, + "grad_norm": 1.525164874833489, + "learning_rate": 5.900151438022205e-07, + "loss": 0.5020648241043091, + "step": 5663 + }, + { + "epoch": 1.3059718699561909, + "grad_norm": 1.6834639607808413, + "learning_rate": 5.89667428151816e-07, + "loss": 0.48636388778686523, + "step": 5664 + }, + { + "epoch": 1.3062024440857736, + "grad_norm": 1.376635193773143, + "learning_rate": 5.893197721474099e-07, + "loss": 0.412000447511673, + "step": 5665 + }, + { + "epoch": 1.3064330182153563, + "grad_norm": 1.8328035722486296, + "learning_rate": 5.889721758395369e-07, + "loss": 0.3584952652454376, + "step": 5666 + }, + { + "epoch": 1.3066635923449388, + "grad_norm": 1.599166825150926, + "learning_rate": 5.886246392787234e-07, + "loss": 0.4538918733596802, + "step": 5667 + }, + { + "epoch": 1.3068941664745215, + "grad_norm": 1.3551701558323133, + "learning_rate": 5.882771625154883e-07, + "loss": 0.478498637676239, + "step": 5668 + }, + { + "epoch": 1.3071247406041042, + "grad_norm": 1.5353917292288828, + "learning_rate": 5.879297456003398e-07, + "loss": 0.49535906314849854, + "step": 5669 + }, + { + "epoch": 1.307355314733687, + "grad_norm": 1.4516733372645705, + "learning_rate": 5.875823885837793e-07, + "loss": 0.48975661396980286, + "step": 5670 + }, + { + "epoch": 1.3075858888632697, + "grad_norm": 1.675865776424194, + "learning_rate": 5.87235091516298e-07, + "loss": 0.4870087802410126, + "step": 5671 + }, + { + "epoch": 1.3078164629928521, + "grad_norm": 1.5358758810801338, + "learning_rate": 5.8688785444838e-07, + "loss": 0.43411481380462646, + "step": 5672 + }, + { + "epoch": 1.3080470371224349, + "grad_norm": 1.5956307221574964, + "learning_rate": 5.865406774304986e-07, + "loss": 0.5108835697174072, + "step": 5673 + }, + { + "epoch": 1.3082776112520176, + "grad_norm": 1.6165992027891032, + "learning_rate": 5.861935605131202e-07, + "loss": 0.47449198365211487, + "step": 5674 + }, + { + "epoch": 1.3085081853816, + "grad_norm": 1.8165499378032328, + "learning_rate": 5.858465037467014e-07, + "loss": 0.5550234913825989, + "step": 5675 + }, + { + "epoch": 1.3087387595111828, + "grad_norm": 1.5758581559369806, + "learning_rate": 5.854995071816911e-07, + "loss": 0.4548208713531494, + "step": 5676 + }, + { + "epoch": 1.3089693336407655, + "grad_norm": 1.4849539841305146, + "learning_rate": 5.851525708685279e-07, + "loss": 0.5176935195922852, + "step": 5677 + }, + { + "epoch": 1.3091999077703482, + "grad_norm": 1.5664760566663032, + "learning_rate": 5.848056948576428e-07, + "loss": 0.4460016191005707, + "step": 5678 + }, + { + "epoch": 1.309430481899931, + "grad_norm": 1.808203061607658, + "learning_rate": 5.84458879199458e-07, + "loss": 0.5344464182853699, + "step": 5679 + }, + { + "epoch": 1.3096610560295134, + "grad_norm": 1.3109840468073877, + "learning_rate": 5.841121239443863e-07, + "loss": 0.48601672053337097, + "step": 5680 + }, + { + "epoch": 1.3098916301590962, + "grad_norm": 1.3467689115963568, + "learning_rate": 5.837654291428327e-07, + "loss": 0.46849286556243896, + "step": 5681 + }, + { + "epoch": 1.3101222042886789, + "grad_norm": 1.2665516862618484, + "learning_rate": 5.834187948451918e-07, + "loss": 0.4353019893169403, + "step": 5682 + }, + { + "epoch": 1.3103527784182614, + "grad_norm": 1.7099740749541261, + "learning_rate": 5.830722211018516e-07, + "loss": 0.5345665812492371, + "step": 5683 + }, + { + "epoch": 1.310583352547844, + "grad_norm": 1.4659221660940824, + "learning_rate": 5.827257079631886e-07, + "loss": 0.4060036540031433, + "step": 5684 + }, + { + "epoch": 1.3108139266774268, + "grad_norm": 1.3640742579072, + "learning_rate": 5.823792554795738e-07, + "loss": 0.43724536895751953, + "step": 5685 + }, + { + "epoch": 1.3110445008070095, + "grad_norm": 1.550163679413481, + "learning_rate": 5.820328637013665e-07, + "loss": 0.4600690007209778, + "step": 5686 + }, + { + "epoch": 1.3112750749365922, + "grad_norm": 1.5199243554334652, + "learning_rate": 5.816865326789182e-07, + "loss": 0.4352531433105469, + "step": 5687 + }, + { + "epoch": 1.3115056490661747, + "grad_norm": 1.4575114943022274, + "learning_rate": 5.813402624625722e-07, + "loss": 0.39384984970092773, + "step": 5688 + }, + { + "epoch": 1.3117362231957574, + "grad_norm": 1.329194110980277, + "learning_rate": 5.809940531026616e-07, + "loss": 0.44367098808288574, + "step": 5689 + }, + { + "epoch": 1.3119667973253402, + "grad_norm": 1.4497223943190725, + "learning_rate": 5.806479046495123e-07, + "loss": 0.4757416546344757, + "step": 5690 + }, + { + "epoch": 1.3121973714549227, + "grad_norm": 1.5821654764353048, + "learning_rate": 5.803018171534396e-07, + "loss": 0.521708607673645, + "step": 5691 + }, + { + "epoch": 1.3124279455845054, + "grad_norm": 1.3510537988002305, + "learning_rate": 5.799557906647514e-07, + "loss": 0.4127439260482788, + "step": 5692 + }, + { + "epoch": 1.312658519714088, + "grad_norm": 1.4570205213875538, + "learning_rate": 5.79609825233746e-07, + "loss": 0.4809693396091461, + "step": 5693 + }, + { + "epoch": 1.3128890938436708, + "grad_norm": 1.2590938015478794, + "learning_rate": 5.792639209107134e-07, + "loss": 0.5075684189796448, + "step": 5694 + }, + { + "epoch": 1.3131196679732535, + "grad_norm": 1.3738792104421846, + "learning_rate": 5.789180777459336e-07, + "loss": 0.416393518447876, + "step": 5695 + }, + { + "epoch": 1.313350242102836, + "grad_norm": 1.4282126857493198, + "learning_rate": 5.78572295789679e-07, + "loss": 0.4456642270088196, + "step": 5696 + }, + { + "epoch": 1.3135808162324187, + "grad_norm": 1.327521871832615, + "learning_rate": 5.782265750922124e-07, + "loss": 0.4757812023162842, + "step": 5697 + }, + { + "epoch": 1.3138113903620015, + "grad_norm": 1.6103197546493997, + "learning_rate": 5.778809157037872e-07, + "loss": 0.5081768035888672, + "step": 5698 + }, + { + "epoch": 1.314041964491584, + "grad_norm": 1.6849043068796357, + "learning_rate": 5.775353176746489e-07, + "loss": 0.4604584872722626, + "step": 5699 + }, + { + "epoch": 1.3142725386211667, + "grad_norm": 1.3964100189157245, + "learning_rate": 5.771897810550339e-07, + "loss": 0.4153773784637451, + "step": 5700 + }, + { + "epoch": 1.3145031127507494, + "grad_norm": 1.5346514188080242, + "learning_rate": 5.768443058951695e-07, + "loss": 0.5194085836410522, + "step": 5701 + }, + { + "epoch": 1.314733686880332, + "grad_norm": 1.6610989574168062, + "learning_rate": 5.764988922452733e-07, + "loss": 0.4398482143878937, + "step": 5702 + }, + { + "epoch": 1.3149642610099148, + "grad_norm": 1.747178590910114, + "learning_rate": 5.761535401555558e-07, + "loss": 0.5148836374282837, + "step": 5703 + }, + { + "epoch": 1.3151948351394973, + "grad_norm": 1.8977812861580863, + "learning_rate": 5.758082496762163e-07, + "loss": 0.533142626285553, + "step": 5704 + }, + { + "epoch": 1.31542540926908, + "grad_norm": 1.3488739739710767, + "learning_rate": 5.754630208574473e-07, + "loss": 0.4059423804283142, + "step": 5705 + }, + { + "epoch": 1.3156559833986627, + "grad_norm": 1.3213051571946475, + "learning_rate": 5.751178537494302e-07, + "loss": 0.4685533940792084, + "step": 5706 + }, + { + "epoch": 1.3158865575282452, + "grad_norm": 1.5403217644159128, + "learning_rate": 5.747727484023392e-07, + "loss": 0.4454694986343384, + "step": 5707 + }, + { + "epoch": 1.316117131657828, + "grad_norm": 1.481350859430692, + "learning_rate": 5.74427704866339e-07, + "loss": 0.4058796167373657, + "step": 5708 + }, + { + "epoch": 1.3163477057874107, + "grad_norm": 1.3294270142641733, + "learning_rate": 5.740827231915847e-07, + "loss": 0.3891766369342804, + "step": 5709 + }, + { + "epoch": 1.3165782799169934, + "grad_norm": 1.5072356875610937, + "learning_rate": 5.737378034282235e-07, + "loss": 0.47912657260894775, + "step": 5710 + }, + { + "epoch": 1.316808854046576, + "grad_norm": 1.5228549079910219, + "learning_rate": 5.733929456263922e-07, + "loss": 0.4221952557563782, + "step": 5711 + }, + { + "epoch": 1.3170394281761586, + "grad_norm": 1.5405159904484362, + "learning_rate": 5.730481498362202e-07, + "loss": 0.39018404483795166, + "step": 5712 + }, + { + "epoch": 1.3172700023057413, + "grad_norm": 1.6184406292698126, + "learning_rate": 5.727034161078262e-07, + "loss": 0.5388307571411133, + "step": 5713 + }, + { + "epoch": 1.317500576435324, + "grad_norm": 1.5278965195377916, + "learning_rate": 5.723587444913216e-07, + "loss": 0.3243408501148224, + "step": 5714 + }, + { + "epoch": 1.3177311505649065, + "grad_norm": 1.6496814482710773, + "learning_rate": 5.720141350368072e-07, + "loss": 0.46480363607406616, + "step": 5715 + }, + { + "epoch": 1.3179617246944892, + "grad_norm": 1.6265951465013608, + "learning_rate": 5.716695877943757e-07, + "loss": 0.5286417603492737, + "step": 5716 + }, + { + "epoch": 1.318192298824072, + "grad_norm": 1.455901542591345, + "learning_rate": 5.71325102814111e-07, + "loss": 0.4170069694519043, + "step": 5717 + }, + { + "epoch": 1.3184228729536547, + "grad_norm": 1.5051159019770526, + "learning_rate": 5.709806801460867e-07, + "loss": 0.5738973617553711, + "step": 5718 + }, + { + "epoch": 1.3186534470832374, + "grad_norm": 1.4473352410585376, + "learning_rate": 5.706363198403689e-07, + "loss": 0.5309658050537109, + "step": 5719 + }, + { + "epoch": 1.31888402121282, + "grad_norm": 1.588487236125564, + "learning_rate": 5.70292021947013e-07, + "loss": 0.4569379389286041, + "step": 5720 + }, + { + "epoch": 1.3191145953424026, + "grad_norm": 1.5641598702256398, + "learning_rate": 5.699477865160674e-07, + "loss": 0.46686258912086487, + "step": 5721 + }, + { + "epoch": 1.3193451694719853, + "grad_norm": 1.551220703032623, + "learning_rate": 5.696036135975688e-07, + "loss": 0.5333213806152344, + "step": 5722 + }, + { + "epoch": 1.3195757436015678, + "grad_norm": 1.6027893782611593, + "learning_rate": 5.69259503241547e-07, + "loss": 0.3519536256790161, + "step": 5723 + }, + { + "epoch": 1.3198063177311505, + "grad_norm": 1.5104260104986362, + "learning_rate": 5.689154554980218e-07, + "loss": 0.4763161242008209, + "step": 5724 + }, + { + "epoch": 1.3200368918607333, + "grad_norm": 1.5061315373489772, + "learning_rate": 5.685714704170044e-07, + "loss": 0.43600207567214966, + "step": 5725 + }, + { + "epoch": 1.320267465990316, + "grad_norm": 1.4992417251350876, + "learning_rate": 5.682275480484958e-07, + "loss": 0.41991305351257324, + "step": 5726 + }, + { + "epoch": 1.3204980401198987, + "grad_norm": 1.663551629444692, + "learning_rate": 5.678836884424894e-07, + "loss": 0.44275131821632385, + "step": 5727 + }, + { + "epoch": 1.3207286142494812, + "grad_norm": 1.65999947024113, + "learning_rate": 5.675398916489682e-07, + "loss": 0.4339372515678406, + "step": 5728 + }, + { + "epoch": 1.320959188379064, + "grad_norm": 1.484455134036602, + "learning_rate": 5.671961577179062e-07, + "loss": 0.4462248384952545, + "step": 5729 + }, + { + "epoch": 1.3211897625086464, + "grad_norm": 1.4704913213821902, + "learning_rate": 5.668524866992693e-07, + "loss": 0.36548441648483276, + "step": 5730 + }, + { + "epoch": 1.321420336638229, + "grad_norm": 1.5370532211440713, + "learning_rate": 5.665088786430129e-07, + "loss": 0.4709678888320923, + "step": 5731 + }, + { + "epoch": 1.3216509107678118, + "grad_norm": 1.4993066403144744, + "learning_rate": 5.661653335990848e-07, + "loss": 0.40125030279159546, + "step": 5732 + }, + { + "epoch": 1.3218814848973945, + "grad_norm": 1.8517319571144346, + "learning_rate": 5.658218516174218e-07, + "loss": 0.5288605690002441, + "step": 5733 + }, + { + "epoch": 1.3221120590269773, + "grad_norm": 1.2954018601150643, + "learning_rate": 5.654784327479534e-07, + "loss": 0.41306072473526, + "step": 5734 + }, + { + "epoch": 1.3223426331565598, + "grad_norm": 1.3199807449430407, + "learning_rate": 5.651350770405983e-07, + "loss": 0.34327009320259094, + "step": 5735 + }, + { + "epoch": 1.3225732072861425, + "grad_norm": 1.4524630442098247, + "learning_rate": 5.647917845452671e-07, + "loss": 0.5055800080299377, + "step": 5736 + }, + { + "epoch": 1.3228037814157252, + "grad_norm": 1.7153085926535214, + "learning_rate": 5.644485553118609e-07, + "loss": 0.45496249198913574, + "step": 5737 + }, + { + "epoch": 1.3230343555453077, + "grad_norm": 1.6142993934275558, + "learning_rate": 5.641053893902708e-07, + "loss": 0.4626169502735138, + "step": 5738 + }, + { + "epoch": 1.3232649296748904, + "grad_norm": 1.3569624734396053, + "learning_rate": 5.637622868303802e-07, + "loss": 0.46621328592300415, + "step": 5739 + }, + { + "epoch": 1.3234955038044731, + "grad_norm": 1.5833136701466524, + "learning_rate": 5.634192476820623e-07, + "loss": 0.47793662548065186, + "step": 5740 + }, + { + "epoch": 1.3237260779340558, + "grad_norm": 1.5367680790773321, + "learning_rate": 5.630762719951816e-07, + "loss": 0.42578715085983276, + "step": 5741 + }, + { + "epoch": 1.3239566520636386, + "grad_norm": 1.7421270871218182, + "learning_rate": 5.627333598195927e-07, + "loss": 0.3146113157272339, + "step": 5742 + }, + { + "epoch": 1.324187226193221, + "grad_norm": 1.376620002714832, + "learning_rate": 5.623905112051417e-07, + "loss": 0.39731544256210327, + "step": 5743 + }, + { + "epoch": 1.3244178003228038, + "grad_norm": 1.6655684412604148, + "learning_rate": 5.620477262016647e-07, + "loss": 0.3755846619606018, + "step": 5744 + }, + { + "epoch": 1.3246483744523865, + "grad_norm": 1.5953907301532468, + "learning_rate": 5.617050048589896e-07, + "loss": 0.43060415983200073, + "step": 5745 + }, + { + "epoch": 1.324878948581969, + "grad_norm": 1.54564820857706, + "learning_rate": 5.613623472269334e-07, + "loss": 0.4213481545448303, + "step": 5746 + }, + { + "epoch": 1.3251095227115517, + "grad_norm": 1.2422408749001486, + "learning_rate": 5.610197533553057e-07, + "loss": 0.3923456072807312, + "step": 5747 + }, + { + "epoch": 1.3253400968411344, + "grad_norm": 1.6088447345623693, + "learning_rate": 5.606772232939061e-07, + "loss": 0.42293328046798706, + "step": 5748 + }, + { + "epoch": 1.3255706709707171, + "grad_norm": 1.596682526932072, + "learning_rate": 5.603347570925242e-07, + "loss": 0.4545479118824005, + "step": 5749 + }, + { + "epoch": 1.3258012451002998, + "grad_norm": 1.4262513090332916, + "learning_rate": 5.599923548009416e-07, + "loss": 0.3969312310218811, + "step": 5750 + }, + { + "epoch": 1.3260318192298823, + "grad_norm": 1.687653911460881, + "learning_rate": 5.59650016468929e-07, + "loss": 0.4296644330024719, + "step": 5751 + }, + { + "epoch": 1.326262393359465, + "grad_norm": 1.4928189267328964, + "learning_rate": 5.5930774214625e-07, + "loss": 0.43291348218917847, + "step": 5752 + }, + { + "epoch": 1.3264929674890478, + "grad_norm": 1.4463941028108167, + "learning_rate": 5.589655318826564e-07, + "loss": 0.47684454917907715, + "step": 5753 + }, + { + "epoch": 1.3267235416186303, + "grad_norm": 1.3515496302725483, + "learning_rate": 5.586233857278924e-07, + "loss": 0.48520004749298096, + "step": 5754 + }, + { + "epoch": 1.326954115748213, + "grad_norm": 1.6127441732883512, + "learning_rate": 5.582813037316926e-07, + "loss": 0.4434587359428406, + "step": 5755 + }, + { + "epoch": 1.3271846898777957, + "grad_norm": 1.7808352880972456, + "learning_rate": 5.579392859437825e-07, + "loss": 0.47306808829307556, + "step": 5756 + }, + { + "epoch": 1.3274152640073784, + "grad_norm": 1.5663021335869645, + "learning_rate": 5.575973324138772e-07, + "loss": 0.4349653720855713, + "step": 5757 + }, + { + "epoch": 1.3276458381369611, + "grad_norm": 1.2914359149982935, + "learning_rate": 5.572554431916829e-07, + "loss": 0.31277602910995483, + "step": 5758 + }, + { + "epoch": 1.3278764122665436, + "grad_norm": 1.5658319454866303, + "learning_rate": 5.569136183268974e-07, + "loss": 0.4281114637851715, + "step": 5759 + }, + { + "epoch": 1.3281069863961263, + "grad_norm": 1.2867721627127386, + "learning_rate": 5.565718578692076e-07, + "loss": 0.45071113109588623, + "step": 5760 + }, + { + "epoch": 1.328337560525709, + "grad_norm": 1.4460147363867, + "learning_rate": 5.562301618682927e-07, + "loss": 0.426133394241333, + "step": 5761 + }, + { + "epoch": 1.3285681346552916, + "grad_norm": 1.3630920926710801, + "learning_rate": 5.558885303738209e-07, + "loss": 0.3882424235343933, + "step": 5762 + }, + { + "epoch": 1.3287987087848743, + "grad_norm": 1.3878174095068123, + "learning_rate": 5.55546963435452e-07, + "loss": 0.4706958532333374, + "step": 5763 + }, + { + "epoch": 1.329029282914457, + "grad_norm": 1.9122348340273743, + "learning_rate": 5.552054611028365e-07, + "loss": 0.4868433475494385, + "step": 5764 + }, + { + "epoch": 1.3292598570440397, + "grad_norm": 1.4411048310630292, + "learning_rate": 5.548640234256154e-07, + "loss": 0.41839566826820374, + "step": 5765 + }, + { + "epoch": 1.3294904311736224, + "grad_norm": 1.9627530346102546, + "learning_rate": 5.545226504534195e-07, + "loss": 0.4088629484176636, + "step": 5766 + }, + { + "epoch": 1.329721005303205, + "grad_norm": 1.3819218540316194, + "learning_rate": 5.541813422358715e-07, + "loss": 0.34617769718170166, + "step": 5767 + }, + { + "epoch": 1.3299515794327876, + "grad_norm": 1.5711021474470717, + "learning_rate": 5.538400988225835e-07, + "loss": 0.5098900198936462, + "step": 5768 + }, + { + "epoch": 1.3301821535623704, + "grad_norm": 1.5683015797269382, + "learning_rate": 5.534989202631586e-07, + "loss": 0.4294108748435974, + "step": 5769 + }, + { + "epoch": 1.3304127276919528, + "grad_norm": 1.3488716534216894, + "learning_rate": 5.531578066071907e-07, + "loss": 0.42205139994621277, + "step": 5770 + }, + { + "epoch": 1.3306433018215356, + "grad_norm": 1.8657910300729754, + "learning_rate": 5.528167579042645e-07, + "loss": 0.5009530186653137, + "step": 5771 + }, + { + "epoch": 1.3308738759511183, + "grad_norm": 1.468249228101101, + "learning_rate": 5.524757742039545e-07, + "loss": 0.554497241973877, + "step": 5772 + }, + { + "epoch": 1.331104450080701, + "grad_norm": 1.711116822757576, + "learning_rate": 5.521348555558263e-07, + "loss": 0.3514432907104492, + "step": 5773 + }, + { + "epoch": 1.3313350242102837, + "grad_norm": 1.4224522574801144, + "learning_rate": 5.51794002009436e-07, + "loss": 0.4712038040161133, + "step": 5774 + }, + { + "epoch": 1.3315655983398662, + "grad_norm": 1.6288850118765847, + "learning_rate": 5.514532136143295e-07, + "loss": 0.48556071519851685, + "step": 5775 + }, + { + "epoch": 1.331796172469449, + "grad_norm": 1.42798680480441, + "learning_rate": 5.511124904200448e-07, + "loss": 0.43158456683158875, + "step": 5776 + }, + { + "epoch": 1.3320267465990316, + "grad_norm": 1.8128360066016722, + "learning_rate": 5.507718324761085e-07, + "loss": 0.5376255512237549, + "step": 5777 + }, + { + "epoch": 1.3322573207286141, + "grad_norm": 1.446480187929883, + "learning_rate": 5.504312398320392e-07, + "loss": 0.3800685405731201, + "step": 5778 + }, + { + "epoch": 1.3324878948581969, + "grad_norm": 1.3675185316121448, + "learning_rate": 5.500907125373458e-07, + "loss": 0.4015260338783264, + "step": 5779 + }, + { + "epoch": 1.3327184689877796, + "grad_norm": 1.7400186621828952, + "learning_rate": 5.497502506415266e-07, + "loss": 0.42762285470962524, + "step": 5780 + }, + { + "epoch": 1.3329490431173623, + "grad_norm": 1.4501572722598215, + "learning_rate": 5.494098541940719e-07, + "loss": 0.4467644691467285, + "step": 5781 + }, + { + "epoch": 1.333179617246945, + "grad_norm": 1.9298171674754279, + "learning_rate": 5.490695232444613e-07, + "loss": 0.42699599266052246, + "step": 5782 + }, + { + "epoch": 1.3334101913765275, + "grad_norm": 1.6654850032985582, + "learning_rate": 5.487292578421659e-07, + "loss": 0.586537778377533, + "step": 5783 + }, + { + "epoch": 1.3336407655061102, + "grad_norm": 1.761605169999467, + "learning_rate": 5.48389058036646e-07, + "loss": 0.4525066018104553, + "step": 5784 + }, + { + "epoch": 1.333871339635693, + "grad_norm": 1.4697934550209713, + "learning_rate": 5.480489238773535e-07, + "loss": 0.40520548820495605, + "step": 5785 + }, + { + "epoch": 1.3341019137652754, + "grad_norm": 1.7127717596843188, + "learning_rate": 5.477088554137304e-07, + "loss": 0.3910450339317322, + "step": 5786 + }, + { + "epoch": 1.3343324878948581, + "grad_norm": 1.781985995356997, + "learning_rate": 5.473688526952087e-07, + "loss": 0.45285511016845703, + "step": 5787 + }, + { + "epoch": 1.3345630620244409, + "grad_norm": 1.3079701521023397, + "learning_rate": 5.47028915771212e-07, + "loss": 0.39207279682159424, + "step": 5788 + }, + { + "epoch": 1.3347936361540236, + "grad_norm": 1.3401224496215014, + "learning_rate": 5.466890446911527e-07, + "loss": 0.40281063318252563, + "step": 5789 + }, + { + "epoch": 1.3350242102836063, + "grad_norm": 1.5855589292084546, + "learning_rate": 5.463492395044354e-07, + "loss": 0.5087814927101135, + "step": 5790 + }, + { + "epoch": 1.3352547844131888, + "grad_norm": 1.6443172906836578, + "learning_rate": 5.460095002604532e-07, + "loss": 0.47597891092300415, + "step": 5791 + }, + { + "epoch": 1.3354853585427715, + "grad_norm": 1.656230003127049, + "learning_rate": 5.456698270085917e-07, + "loss": 0.5722953677177429, + "step": 5792 + }, + { + "epoch": 1.3357159326723542, + "grad_norm": 1.6424947586218923, + "learning_rate": 5.45330219798225e-07, + "loss": 0.5133349299430847, + "step": 5793 + }, + { + "epoch": 1.3359465068019367, + "grad_norm": 1.5413030595202453, + "learning_rate": 5.449906786787187e-07, + "loss": 0.46230804920196533, + "step": 5794 + }, + { + "epoch": 1.3361770809315194, + "grad_norm": 1.6839619437291453, + "learning_rate": 5.446512036994286e-07, + "loss": 0.42002394795417786, + "step": 5795 + }, + { + "epoch": 1.3364076550611022, + "grad_norm": 1.46623243210155, + "learning_rate": 5.443117949097013e-07, + "loss": 0.42281097173690796, + "step": 5796 + }, + { + "epoch": 1.3366382291906849, + "grad_norm": 1.4476698476010996, + "learning_rate": 5.439724523588726e-07, + "loss": 0.511898398399353, + "step": 5797 + }, + { + "epoch": 1.3368688033202676, + "grad_norm": 1.4307520026731049, + "learning_rate": 5.4363317609627e-07, + "loss": 0.4475559592247009, + "step": 5798 + }, + { + "epoch": 1.33709937744985, + "grad_norm": 1.509864957359139, + "learning_rate": 5.432939661712103e-07, + "loss": 0.4872414469718933, + "step": 5799 + }, + { + "epoch": 1.3373299515794328, + "grad_norm": 1.3480605234272842, + "learning_rate": 5.429548226330009e-07, + "loss": 0.40401679277420044, + "step": 5800 + }, + { + "epoch": 1.3375605257090155, + "grad_norm": 2.083088707198395, + "learning_rate": 5.426157455309399e-07, + "loss": 0.43559926748275757, + "step": 5801 + }, + { + "epoch": 1.337791099838598, + "grad_norm": 1.6000855398004097, + "learning_rate": 5.422767349143158e-07, + "loss": 0.44283759593963623, + "step": 5802 + }, + { + "epoch": 1.3380216739681807, + "grad_norm": 1.310277684226626, + "learning_rate": 5.419377908324077e-07, + "loss": 0.3770032525062561, + "step": 5803 + }, + { + "epoch": 1.3382522480977634, + "grad_norm": 1.3856773934136148, + "learning_rate": 5.415989133344834e-07, + "loss": 0.4497501850128174, + "step": 5804 + }, + { + "epoch": 1.3384828222273462, + "grad_norm": 1.49195449044666, + "learning_rate": 5.412601024698033e-07, + "loss": 0.5008253455162048, + "step": 5805 + }, + { + "epoch": 1.3387133963569289, + "grad_norm": 1.3694796854029274, + "learning_rate": 5.409213582876162e-07, + "loss": 0.46178537607192993, + "step": 5806 + }, + { + "epoch": 1.3389439704865114, + "grad_norm": 1.1951838089282807, + "learning_rate": 5.405826808371625e-07, + "loss": 0.39843931794166565, + "step": 5807 + }, + { + "epoch": 1.339174544616094, + "grad_norm": 1.4243934050525646, + "learning_rate": 5.402440701676724e-07, + "loss": 0.4829174280166626, + "step": 5808 + }, + { + "epoch": 1.3394051187456768, + "grad_norm": 1.0859530853021675, + "learning_rate": 5.399055263283656e-07, + "loss": 0.36173316836357117, + "step": 5809 + }, + { + "epoch": 1.3396356928752593, + "grad_norm": 1.5741135880130834, + "learning_rate": 5.395670493684536e-07, + "loss": 0.400304913520813, + "step": 5810 + }, + { + "epoch": 1.339866267004842, + "grad_norm": 1.507879612413509, + "learning_rate": 5.392286393371372e-07, + "loss": 0.4536975622177124, + "step": 5811 + }, + { + "epoch": 1.3400968411344247, + "grad_norm": 1.7310508291395992, + "learning_rate": 5.388902962836084e-07, + "loss": 0.6474577188491821, + "step": 5812 + }, + { + "epoch": 1.3403274152640074, + "grad_norm": 1.6348182443046517, + "learning_rate": 5.385520202570477e-07, + "loss": 0.48008009791374207, + "step": 5813 + }, + { + "epoch": 1.3405579893935902, + "grad_norm": 1.6214175923335088, + "learning_rate": 5.38213811306628e-07, + "loss": 0.4518657326698303, + "step": 5814 + }, + { + "epoch": 1.3407885635231727, + "grad_norm": 1.280530895656809, + "learning_rate": 5.378756694815105e-07, + "loss": 0.449008584022522, + "step": 5815 + }, + { + "epoch": 1.3410191376527554, + "grad_norm": 1.689898643370083, + "learning_rate": 5.375375948308483e-07, + "loss": 0.5448319315910339, + "step": 5816 + }, + { + "epoch": 1.341249711782338, + "grad_norm": 1.5166178678578832, + "learning_rate": 5.371995874037832e-07, + "loss": 0.5078369379043579, + "step": 5817 + }, + { + "epoch": 1.3414802859119206, + "grad_norm": 1.611364899344997, + "learning_rate": 5.368616472494482e-07, + "loss": 0.508685290813446, + "step": 5818 + }, + { + "epoch": 1.3417108600415033, + "grad_norm": 1.3809568946566115, + "learning_rate": 5.365237744169672e-07, + "loss": 0.4166705012321472, + "step": 5819 + }, + { + "epoch": 1.341941434171086, + "grad_norm": 1.432431964622234, + "learning_rate": 5.361859689554524e-07, + "loss": 0.4741361737251282, + "step": 5820 + }, + { + "epoch": 1.3421720083006687, + "grad_norm": 1.5546451283342237, + "learning_rate": 5.358482309140079e-07, + "loss": 0.36658185720443726, + "step": 5821 + }, + { + "epoch": 1.3424025824302515, + "grad_norm": 1.9632157270552801, + "learning_rate": 5.355105603417267e-07, + "loss": 0.38921263813972473, + "step": 5822 + }, + { + "epoch": 1.342633156559834, + "grad_norm": 1.9732368197118861, + "learning_rate": 5.351729572876935e-07, + "loss": 0.5553977489471436, + "step": 5823 + }, + { + "epoch": 1.3428637306894167, + "grad_norm": 1.4618484003422054, + "learning_rate": 5.348354218009813e-07, + "loss": 0.3968391418457031, + "step": 5824 + }, + { + "epoch": 1.3430943048189994, + "grad_norm": 1.4937275325292458, + "learning_rate": 5.344979539306549e-07, + "loss": 0.4289783239364624, + "step": 5825 + }, + { + "epoch": 1.3433248789485819, + "grad_norm": 1.313862309148984, + "learning_rate": 5.341605537257686e-07, + "loss": 0.45359861850738525, + "step": 5826 + }, + { + "epoch": 1.3435554530781646, + "grad_norm": 1.366684570776694, + "learning_rate": 5.338232212353675e-07, + "loss": 0.3571642339229584, + "step": 5827 + }, + { + "epoch": 1.3437860272077473, + "grad_norm": 1.1954938252676188, + "learning_rate": 5.334859565084855e-07, + "loss": 0.3784096837043762, + "step": 5828 + }, + { + "epoch": 1.34401660133733, + "grad_norm": 1.5372749019268697, + "learning_rate": 5.331487595941475e-07, + "loss": 0.44996407628059387, + "step": 5829 + }, + { + "epoch": 1.3442471754669127, + "grad_norm": 1.4793854978740197, + "learning_rate": 5.32811630541369e-07, + "loss": 0.4466405510902405, + "step": 5830 + }, + { + "epoch": 1.3444777495964952, + "grad_norm": 1.3432081322840168, + "learning_rate": 5.324745693991545e-07, + "loss": 0.34488850831985474, + "step": 5831 + }, + { + "epoch": 1.344708323726078, + "grad_norm": 1.589654871057016, + "learning_rate": 5.321375762164999e-07, + "loss": 0.5530165433883667, + "step": 5832 + }, + { + "epoch": 1.3449388978556607, + "grad_norm": 1.6555576202053326, + "learning_rate": 5.318006510423898e-07, + "loss": 0.40732342004776, + "step": 5833 + }, + { + "epoch": 1.3451694719852432, + "grad_norm": 1.5528027430812303, + "learning_rate": 5.314637939258002e-07, + "loss": 0.3364611566066742, + "step": 5834 + }, + { + "epoch": 1.3454000461148259, + "grad_norm": 1.4557702222082582, + "learning_rate": 5.311270049156966e-07, + "loss": 0.43964290618896484, + "step": 5835 + }, + { + "epoch": 1.3456306202444086, + "grad_norm": 1.5963363545263636, + "learning_rate": 5.30790284061035e-07, + "loss": 0.5203431844711304, + "step": 5836 + }, + { + "epoch": 1.3458611943739913, + "grad_norm": 1.356219303149177, + "learning_rate": 5.304536314107607e-07, + "loss": 0.4779793620109558, + "step": 5837 + }, + { + "epoch": 1.346091768503574, + "grad_norm": 1.4030454651132978, + "learning_rate": 5.301170470138102e-07, + "loss": 0.4769410490989685, + "step": 5838 + }, + { + "epoch": 1.3463223426331565, + "grad_norm": 1.5437367488200047, + "learning_rate": 5.297805309191089e-07, + "loss": 0.42390304803848267, + "step": 5839 + }, + { + "epoch": 1.3465529167627392, + "grad_norm": 1.6498587295444291, + "learning_rate": 5.294440831755727e-07, + "loss": 0.5550302863121033, + "step": 5840 + }, + { + "epoch": 1.3467834908923217, + "grad_norm": 1.5927381474044073, + "learning_rate": 5.291077038321078e-07, + "loss": 0.4897978901863098, + "step": 5841 + }, + { + "epoch": 1.3470140650219045, + "grad_norm": 1.5707311912828865, + "learning_rate": 5.287713929376105e-07, + "loss": 0.4014284610748291, + "step": 5842 + }, + { + "epoch": 1.3472446391514872, + "grad_norm": 1.61036503253005, + "learning_rate": 5.284351505409675e-07, + "loss": 0.4299513101577759, + "step": 5843 + }, + { + "epoch": 1.34747521328107, + "grad_norm": 1.382725158348277, + "learning_rate": 5.280989766910541e-07, + "loss": 0.44863104820251465, + "step": 5844 + }, + { + "epoch": 1.3477057874106526, + "grad_norm": 1.4391517424186664, + "learning_rate": 5.277628714367374e-07, + "loss": 0.41933274269104004, + "step": 5845 + }, + { + "epoch": 1.347936361540235, + "grad_norm": 1.5110585127257306, + "learning_rate": 5.274268348268729e-07, + "loss": 0.48257556557655334, + "step": 5846 + }, + { + "epoch": 1.3481669356698178, + "grad_norm": 1.6840388322451993, + "learning_rate": 5.270908669103078e-07, + "loss": 0.435384064912796, + "step": 5847 + }, + { + "epoch": 1.3483975097994005, + "grad_norm": 1.502056490079635, + "learning_rate": 5.267549677358775e-07, + "loss": 0.43291670083999634, + "step": 5848 + }, + { + "epoch": 1.348628083928983, + "grad_norm": 2.07427587572329, + "learning_rate": 5.264191373524089e-07, + "loss": 0.4584086537361145, + "step": 5849 + }, + { + "epoch": 1.3488586580585658, + "grad_norm": 1.4212548389061759, + "learning_rate": 5.260833758087187e-07, + "loss": 0.44879037141799927, + "step": 5850 + }, + { + "epoch": 1.3490892321881485, + "grad_norm": 1.4876230861981237, + "learning_rate": 5.257476831536124e-07, + "loss": 0.48467326164245605, + "step": 5851 + }, + { + "epoch": 1.3493198063177312, + "grad_norm": 1.4803329007154076, + "learning_rate": 5.254120594358871e-07, + "loss": 0.4126189947128296, + "step": 5852 + }, + { + "epoch": 1.349550380447314, + "grad_norm": 1.494164620045959, + "learning_rate": 5.250765047043284e-07, + "loss": 0.5592546463012695, + "step": 5853 + }, + { + "epoch": 1.3497809545768964, + "grad_norm": 1.2572079660485564, + "learning_rate": 5.247410190077134e-07, + "loss": 0.3269529342651367, + "step": 5854 + }, + { + "epoch": 1.3500115287064791, + "grad_norm": 1.4784058003593112, + "learning_rate": 5.244056023948075e-07, + "loss": 0.42812949419021606, + "step": 5855 + }, + { + "epoch": 1.3502421028360618, + "grad_norm": 1.643847647603701, + "learning_rate": 5.240702549143676e-07, + "loss": 0.4266297221183777, + "step": 5856 + }, + { + "epoch": 1.3504726769656443, + "grad_norm": 1.6490610440384348, + "learning_rate": 5.237349766151392e-07, + "loss": 0.43848085403442383, + "step": 5857 + }, + { + "epoch": 1.350703251095227, + "grad_norm": 1.5778355488021025, + "learning_rate": 5.233997675458588e-07, + "loss": 0.47512906789779663, + "step": 5858 + }, + { + "epoch": 1.3509338252248098, + "grad_norm": 1.4893970639177625, + "learning_rate": 5.230646277552527e-07, + "loss": 0.3484492897987366, + "step": 5859 + }, + { + "epoch": 1.3511643993543925, + "grad_norm": 1.5529244445697006, + "learning_rate": 5.227295572920363e-07, + "loss": 0.48915669322013855, + "step": 5860 + }, + { + "epoch": 1.3513949734839752, + "grad_norm": 1.687195391171769, + "learning_rate": 5.223945562049159e-07, + "loss": 0.415932834148407, + "step": 5861 + }, + { + "epoch": 1.3516255476135577, + "grad_norm": 1.8036222540660396, + "learning_rate": 5.220596245425869e-07, + "loss": 0.47945982217788696, + "step": 5862 + }, + { + "epoch": 1.3518561217431404, + "grad_norm": 1.7032993247582504, + "learning_rate": 5.217247623537356e-07, + "loss": 0.4322330951690674, + "step": 5863 + }, + { + "epoch": 1.3520866958727231, + "grad_norm": 1.7271334098970212, + "learning_rate": 5.213899696870369e-07, + "loss": 0.4608469605445862, + "step": 5864 + }, + { + "epoch": 1.3523172700023056, + "grad_norm": 1.4726583260713841, + "learning_rate": 5.210552465911566e-07, + "loss": 0.5108528137207031, + "step": 5865 + }, + { + "epoch": 1.3525478441318883, + "grad_norm": 1.3172906919344538, + "learning_rate": 5.207205931147502e-07, + "loss": 0.37947285175323486, + "step": 5866 + }, + { + "epoch": 1.352778418261471, + "grad_norm": 1.5825329658520386, + "learning_rate": 5.203860093064635e-07, + "loss": 0.49094486236572266, + "step": 5867 + }, + { + "epoch": 1.3530089923910538, + "grad_norm": 1.7057097538270483, + "learning_rate": 5.200514952149308e-07, + "loss": 0.34238702058792114, + "step": 5868 + }, + { + "epoch": 1.3532395665206365, + "grad_norm": 1.4815052827701158, + "learning_rate": 5.197170508887774e-07, + "loss": 0.46390393376350403, + "step": 5869 + }, + { + "epoch": 1.353470140650219, + "grad_norm": 1.517083535949924, + "learning_rate": 5.193826763766183e-07, + "loss": 0.44219160079956055, + "step": 5870 + }, + { + "epoch": 1.3537007147798017, + "grad_norm": 1.2444078580604416, + "learning_rate": 5.190483717270578e-07, + "loss": 0.42801350355148315, + "step": 5871 + }, + { + "epoch": 1.3539312889093844, + "grad_norm": 1.5276855271974423, + "learning_rate": 5.187141369886906e-07, + "loss": 0.43861454725265503, + "step": 5872 + }, + { + "epoch": 1.354161863038967, + "grad_norm": 1.3684710867849712, + "learning_rate": 5.183799722101014e-07, + "loss": 0.4381449222564697, + "step": 5873 + }, + { + "epoch": 1.3543924371685496, + "grad_norm": 1.6990772878337996, + "learning_rate": 5.180458774398646e-07, + "loss": 0.4341619610786438, + "step": 5874 + }, + { + "epoch": 1.3546230112981323, + "grad_norm": 1.5170997767832792, + "learning_rate": 5.177118527265437e-07, + "loss": 0.4376588463783264, + "step": 5875 + }, + { + "epoch": 1.354853585427715, + "grad_norm": 1.4712846387139202, + "learning_rate": 5.173778981186932e-07, + "loss": 0.38568538427352905, + "step": 5876 + }, + { + "epoch": 1.3550841595572978, + "grad_norm": 1.4162179235966152, + "learning_rate": 5.170440136648561e-07, + "loss": 0.44178056716918945, + "step": 5877 + }, + { + "epoch": 1.3553147336868803, + "grad_norm": 1.434763306400174, + "learning_rate": 5.167101994135665e-07, + "loss": 0.49847882986068726, + "step": 5878 + }, + { + "epoch": 1.355545307816463, + "grad_norm": 1.3114035605969607, + "learning_rate": 5.163764554133476e-07, + "loss": 0.33697545528411865, + "step": 5879 + }, + { + "epoch": 1.3557758819460457, + "grad_norm": 1.9314852987462174, + "learning_rate": 5.160427817127117e-07, + "loss": 0.5216578841209412, + "step": 5880 + }, + { + "epoch": 1.3560064560756282, + "grad_norm": 1.5367735086016923, + "learning_rate": 5.157091783601624e-07, + "loss": 0.5101301670074463, + "step": 5881 + }, + { + "epoch": 1.356237030205211, + "grad_norm": 1.4437708354871932, + "learning_rate": 5.15375645404192e-07, + "loss": 0.47876495122909546, + "step": 5882 + }, + { + "epoch": 1.3564676043347936, + "grad_norm": 1.413429948502146, + "learning_rate": 5.150421828932837e-07, + "loss": 0.4656233787536621, + "step": 5883 + }, + { + "epoch": 1.3566981784643763, + "grad_norm": 1.4503708847221477, + "learning_rate": 5.147087908759082e-07, + "loss": 0.4392930269241333, + "step": 5884 + }, + { + "epoch": 1.356928752593959, + "grad_norm": 1.6187538312851866, + "learning_rate": 5.143754694005289e-07, + "loss": 0.5044047832489014, + "step": 5885 + }, + { + "epoch": 1.3571593267235416, + "grad_norm": 1.3914560087628793, + "learning_rate": 5.140422185155964e-07, + "loss": 0.4345476031303406, + "step": 5886 + }, + { + "epoch": 1.3573899008531243, + "grad_norm": 1.768236932460398, + "learning_rate": 5.137090382695528e-07, + "loss": 0.49207669496536255, + "step": 5887 + }, + { + "epoch": 1.357620474982707, + "grad_norm": 1.531417533887488, + "learning_rate": 5.133759287108286e-07, + "loss": 0.4054356813430786, + "step": 5888 + }, + { + "epoch": 1.3578510491122895, + "grad_norm": 1.9704323937726442, + "learning_rate": 5.130428898878449e-07, + "loss": 0.5436004400253296, + "step": 5889 + }, + { + "epoch": 1.3580816232418722, + "grad_norm": 1.521959500035041, + "learning_rate": 5.127099218490127e-07, + "loss": 0.4832550287246704, + "step": 5890 + }, + { + "epoch": 1.358312197371455, + "grad_norm": 1.4438750839498624, + "learning_rate": 5.123770246427315e-07, + "loss": 0.38890475034713745, + "step": 5891 + }, + { + "epoch": 1.3585427715010376, + "grad_norm": 1.3028583829520697, + "learning_rate": 5.12044198317392e-07, + "loss": 0.49784210324287415, + "step": 5892 + }, + { + "epoch": 1.3587733456306204, + "grad_norm": 1.5058620289816076, + "learning_rate": 5.117114429213732e-07, + "loss": 0.5033924579620361, + "step": 5893 + }, + { + "epoch": 1.3590039197602028, + "grad_norm": 1.5069016697055244, + "learning_rate": 5.113787585030454e-07, + "loss": 0.4857698678970337, + "step": 5894 + }, + { + "epoch": 1.3592344938897856, + "grad_norm": 1.6430229342698937, + "learning_rate": 5.110461451107663e-07, + "loss": 0.4269944429397583, + "step": 5895 + }, + { + "epoch": 1.3594650680193683, + "grad_norm": 1.5554523008644683, + "learning_rate": 5.107136027928858e-07, + "loss": 0.44045162200927734, + "step": 5896 + }, + { + "epoch": 1.3596956421489508, + "grad_norm": 1.6719472262672752, + "learning_rate": 5.103811315977418e-07, + "loss": 0.5223391056060791, + "step": 5897 + }, + { + "epoch": 1.3599262162785335, + "grad_norm": 1.6234993813736853, + "learning_rate": 5.100487315736627e-07, + "loss": 0.45988473296165466, + "step": 5898 + }, + { + "epoch": 1.3601567904081162, + "grad_norm": 1.3494964030299075, + "learning_rate": 5.097164027689661e-07, + "loss": 0.46342164278030396, + "step": 5899 + }, + { + "epoch": 1.360387364537699, + "grad_norm": 1.6151646749241875, + "learning_rate": 5.093841452319588e-07, + "loss": 0.48150479793548584, + "step": 5900 + }, + { + "epoch": 1.3606179386672816, + "grad_norm": 1.3258214555354595, + "learning_rate": 5.090519590109386e-07, + "loss": 0.3971351981163025, + "step": 5901 + }, + { + "epoch": 1.3608485127968641, + "grad_norm": 1.755266254483419, + "learning_rate": 5.087198441541914e-07, + "loss": 0.44869956374168396, + "step": 5902 + }, + { + "epoch": 1.3610790869264469, + "grad_norm": 1.4425507935259798, + "learning_rate": 5.083878007099943e-07, + "loss": 0.3402775526046753, + "step": 5903 + }, + { + "epoch": 1.3613096610560296, + "grad_norm": 1.3415772700158808, + "learning_rate": 5.080558287266119e-07, + "loss": 0.4031033515930176, + "step": 5904 + }, + { + "epoch": 1.361540235185612, + "grad_norm": 1.6435607583739225, + "learning_rate": 5.077239282523012e-07, + "loss": 0.493259459733963, + "step": 5905 + }, + { + "epoch": 1.3617708093151948, + "grad_norm": 1.4120722192098578, + "learning_rate": 5.073920993353063e-07, + "loss": 0.39178919792175293, + "step": 5906 + }, + { + "epoch": 1.3620013834447775, + "grad_norm": 1.6684880889475469, + "learning_rate": 5.070603420238624e-07, + "loss": 0.5091253519058228, + "step": 5907 + }, + { + "epoch": 1.3622319575743602, + "grad_norm": 1.3497137288112562, + "learning_rate": 5.067286563661934e-07, + "loss": 0.416462779045105, + "step": 5908 + }, + { + "epoch": 1.362462531703943, + "grad_norm": 1.7821137618482668, + "learning_rate": 5.063970424105137e-07, + "loss": 0.5018768310546875, + "step": 5909 + }, + { + "epoch": 1.3626931058335254, + "grad_norm": 1.4656990143163084, + "learning_rate": 5.060655002050262e-07, + "loss": 0.5512624979019165, + "step": 5910 + }, + { + "epoch": 1.3629236799631081, + "grad_norm": 1.3507263825947706, + "learning_rate": 5.057340297979241e-07, + "loss": 0.3953768014907837, + "step": 5911 + }, + { + "epoch": 1.3631542540926909, + "grad_norm": 1.2807145092132266, + "learning_rate": 5.054026312373896e-07, + "loss": 0.4355456233024597, + "step": 5912 + }, + { + "epoch": 1.3633848282222734, + "grad_norm": 1.7515987196576535, + "learning_rate": 5.050713045715955e-07, + "loss": 0.4826827645301819, + "step": 5913 + }, + { + "epoch": 1.363615402351856, + "grad_norm": 1.5075633708078446, + "learning_rate": 5.047400498487035e-07, + "loss": 0.47084230184555054, + "step": 5914 + }, + { + "epoch": 1.3638459764814388, + "grad_norm": 1.750968751768445, + "learning_rate": 5.044088671168644e-07, + "loss": 0.5273452997207642, + "step": 5915 + }, + { + "epoch": 1.3640765506110215, + "grad_norm": 1.484245498844297, + "learning_rate": 5.040777564242194e-07, + "loss": 0.44878947734832764, + "step": 5916 + }, + { + "epoch": 1.3643071247406042, + "grad_norm": 1.5815904358854045, + "learning_rate": 5.03746717818898e-07, + "loss": 0.47986388206481934, + "step": 5917 + }, + { + "epoch": 1.3645376988701867, + "grad_norm": 1.4148899602283196, + "learning_rate": 5.034157513490211e-07, + "loss": 0.4807628393173218, + "step": 5918 + }, + { + "epoch": 1.3647682729997694, + "grad_norm": 1.3747301384734179, + "learning_rate": 5.030848570626969e-07, + "loss": 0.46027708053588867, + "step": 5919 + }, + { + "epoch": 1.3649988471293522, + "grad_norm": 1.517934310152821, + "learning_rate": 5.027540350080249e-07, + "loss": 0.3803088963031769, + "step": 5920 + }, + { + "epoch": 1.3652294212589347, + "grad_norm": 1.7239494972976075, + "learning_rate": 5.024232852330939e-07, + "loss": 0.5530920028686523, + "step": 5921 + }, + { + "epoch": 1.3654599953885174, + "grad_norm": 1.7183928961648565, + "learning_rate": 5.020926077859805e-07, + "loss": 0.45984846353530884, + "step": 5922 + }, + { + "epoch": 1.3656905695181, + "grad_norm": 1.5752429840016822, + "learning_rate": 5.017620027147533e-07, + "loss": 0.4448089301586151, + "step": 5923 + }, + { + "epoch": 1.3659211436476828, + "grad_norm": 1.713335636587649, + "learning_rate": 5.01431470067468e-07, + "loss": 0.4226706326007843, + "step": 5924 + }, + { + "epoch": 1.3661517177772655, + "grad_norm": 1.9953320185051966, + "learning_rate": 5.011010098921718e-07, + "loss": 0.5243814587593079, + "step": 5925 + }, + { + "epoch": 1.366382291906848, + "grad_norm": 1.6278540239253128, + "learning_rate": 5.007706222368995e-07, + "loss": 0.5733383893966675, + "step": 5926 + }, + { + "epoch": 1.3666128660364307, + "grad_norm": 1.373199955472141, + "learning_rate": 5.00440307149677e-07, + "loss": 0.4583539366722107, + "step": 5927 + }, + { + "epoch": 1.3668434401660134, + "grad_norm": 1.5871148090703988, + "learning_rate": 5.001100646785186e-07, + "loss": 0.474712610244751, + "step": 5928 + }, + { + "epoch": 1.367074014295596, + "grad_norm": 1.6888872351824356, + "learning_rate": 4.997798948714291e-07, + "loss": 0.3995950222015381, + "step": 5929 + }, + { + "epoch": 1.3673045884251787, + "grad_norm": 1.7317310910620232, + "learning_rate": 4.994497977764011e-07, + "loss": 0.4236767888069153, + "step": 5930 + }, + { + "epoch": 1.3675351625547614, + "grad_norm": 1.6853541022393534, + "learning_rate": 4.991197734414178e-07, + "loss": 0.4972396492958069, + "step": 5931 + }, + { + "epoch": 1.367765736684344, + "grad_norm": 1.503037819471691, + "learning_rate": 4.98789821914452e-07, + "loss": 0.444613516330719, + "step": 5932 + }, + { + "epoch": 1.3679963108139268, + "grad_norm": 1.6912958330957677, + "learning_rate": 4.984599432434649e-07, + "loss": 0.4955690801143646, + "step": 5933 + }, + { + "epoch": 1.3682268849435093, + "grad_norm": 1.559115794882019, + "learning_rate": 4.981301374764084e-07, + "loss": 0.4983398914337158, + "step": 5934 + }, + { + "epoch": 1.368457459073092, + "grad_norm": 1.5588186216828477, + "learning_rate": 4.978004046612223e-07, + "loss": 0.45190921425819397, + "step": 5935 + }, + { + "epoch": 1.3686880332026747, + "grad_norm": 1.757499738470118, + "learning_rate": 4.974707448458369e-07, + "loss": 0.5014151334762573, + "step": 5936 + }, + { + "epoch": 1.3689186073322572, + "grad_norm": 1.5399509659752455, + "learning_rate": 4.971411580781719e-07, + "loss": 0.3868405818939209, + "step": 5937 + }, + { + "epoch": 1.36914918146184, + "grad_norm": 1.42775142494789, + "learning_rate": 4.968116444061363e-07, + "loss": 0.4093654155731201, + "step": 5938 + }, + { + "epoch": 1.3693797555914227, + "grad_norm": 1.318689202230345, + "learning_rate": 4.964822038776276e-07, + "loss": 0.3945506513118744, + "step": 5939 + }, + { + "epoch": 1.3696103297210054, + "grad_norm": 1.5874458283663229, + "learning_rate": 4.961528365405333e-07, + "loss": 0.3645547330379486, + "step": 5940 + }, + { + "epoch": 1.369840903850588, + "grad_norm": 1.760752800086673, + "learning_rate": 4.958235424427309e-07, + "loss": 0.36679786443710327, + "step": 5941 + }, + { + "epoch": 1.3700714779801706, + "grad_norm": 1.5458160371079348, + "learning_rate": 4.954943216320861e-07, + "loss": 0.4892774820327759, + "step": 5942 + }, + { + "epoch": 1.3703020521097533, + "grad_norm": 1.4817693224477149, + "learning_rate": 4.951651741564544e-07, + "loss": 0.40406349301338196, + "step": 5943 + }, + { + "epoch": 1.370532626239336, + "grad_norm": 1.277384097830529, + "learning_rate": 4.948361000636812e-07, + "loss": 0.4219849407672882, + "step": 5944 + }, + { + "epoch": 1.3707632003689185, + "grad_norm": 1.7190062313169097, + "learning_rate": 4.945070994016008e-07, + "loss": 0.5329363346099854, + "step": 5945 + }, + { + "epoch": 1.3709937744985012, + "grad_norm": 1.5495655705207303, + "learning_rate": 4.941781722180361e-07, + "loss": 0.42577850818634033, + "step": 5946 + }, + { + "epoch": 1.371224348628084, + "grad_norm": 1.3916296167797302, + "learning_rate": 4.938493185608008e-07, + "loss": 0.4157155156135559, + "step": 5947 + }, + { + "epoch": 1.3714549227576667, + "grad_norm": 1.5016286739703502, + "learning_rate": 4.935205384776965e-07, + "loss": 0.46491485834121704, + "step": 5948 + }, + { + "epoch": 1.3716854968872494, + "grad_norm": 1.6766694792768029, + "learning_rate": 4.931918320165151e-07, + "loss": 0.39582759141921997, + "step": 5949 + }, + { + "epoch": 1.3719160710168319, + "grad_norm": 1.3277840228822322, + "learning_rate": 4.928631992250371e-07, + "loss": 0.4380473792552948, + "step": 5950 + }, + { + "epoch": 1.3721466451464146, + "grad_norm": 1.5358043238579873, + "learning_rate": 4.925346401510327e-07, + "loss": 0.5044572949409485, + "step": 5951 + }, + { + "epoch": 1.372377219275997, + "grad_norm": 1.6172521688559274, + "learning_rate": 4.922061548422617e-07, + "loss": 0.4808889627456665, + "step": 5952 + }, + { + "epoch": 1.3726077934055798, + "grad_norm": 1.370713689883329, + "learning_rate": 4.91877743346472e-07, + "loss": 0.4215632677078247, + "step": 5953 + }, + { + "epoch": 1.3728383675351625, + "grad_norm": 1.4640509349497177, + "learning_rate": 4.915494057114025e-07, + "loss": 0.4999268651008606, + "step": 5954 + }, + { + "epoch": 1.3730689416647452, + "grad_norm": 1.593000178254792, + "learning_rate": 4.912211419847793e-07, + "loss": 0.476152241230011, + "step": 5955 + }, + { + "epoch": 1.373299515794328, + "grad_norm": 1.5436036358421792, + "learning_rate": 4.908929522143201e-07, + "loss": 0.4253045320510864, + "step": 5956 + }, + { + "epoch": 1.3735300899239105, + "grad_norm": 1.6726587032262756, + "learning_rate": 4.905648364477293e-07, + "loss": 0.4251098036766052, + "step": 5957 + }, + { + "epoch": 1.3737606640534932, + "grad_norm": 1.5635582188699524, + "learning_rate": 4.902367947327029e-07, + "loss": 0.3820844888687134, + "step": 5958 + }, + { + "epoch": 1.373991238183076, + "grad_norm": 1.5563353591748068, + "learning_rate": 4.899088271169245e-07, + "loss": 0.4725508689880371, + "step": 5959 + }, + { + "epoch": 1.3742218123126584, + "grad_norm": 1.4545077693536257, + "learning_rate": 4.895809336480675e-07, + "loss": 0.48313626646995544, + "step": 5960 + }, + { + "epoch": 1.374452386442241, + "grad_norm": 1.6596316713803083, + "learning_rate": 4.892531143737952e-07, + "loss": 0.5344939231872559, + "step": 5961 + }, + { + "epoch": 1.3746829605718238, + "grad_norm": 1.7551620350578117, + "learning_rate": 4.889253693417585e-07, + "loss": 0.4305552840232849, + "step": 5962 + }, + { + "epoch": 1.3749135347014065, + "grad_norm": 1.4302106398553562, + "learning_rate": 4.885976985995996e-07, + "loss": 0.3564034700393677, + "step": 5963 + }, + { + "epoch": 1.3751441088309893, + "grad_norm": 1.4796542999179279, + "learning_rate": 4.882701021949475e-07, + "loss": 0.5498751997947693, + "step": 5964 + }, + { + "epoch": 1.3753746829605717, + "grad_norm": 1.5956710623028654, + "learning_rate": 4.879425801754226e-07, + "loss": 0.4489964246749878, + "step": 5965 + }, + { + "epoch": 1.3756052570901545, + "grad_norm": 1.7595842751992934, + "learning_rate": 4.87615132588633e-07, + "loss": 0.4142688810825348, + "step": 5966 + }, + { + "epoch": 1.3758358312197372, + "grad_norm": 1.483255834477138, + "learning_rate": 4.872877594821767e-07, + "loss": 0.3823632597923279, + "step": 5967 + }, + { + "epoch": 1.3760664053493197, + "grad_norm": 1.603982795420405, + "learning_rate": 4.869604609036408e-07, + "loss": 0.39014697074890137, + "step": 5968 + }, + { + "epoch": 1.3762969794789024, + "grad_norm": 1.5363032345717058, + "learning_rate": 4.866332369006016e-07, + "loss": 0.3907933235168457, + "step": 5969 + }, + { + "epoch": 1.376527553608485, + "grad_norm": 1.5125931439342233, + "learning_rate": 4.863060875206244e-07, + "loss": 0.3872087001800537, + "step": 5970 + }, + { + "epoch": 1.3767581277380678, + "grad_norm": 1.5847290584713085, + "learning_rate": 4.85979012811263e-07, + "loss": 0.40380537509918213, + "step": 5971 + }, + { + "epoch": 1.3769887018676505, + "grad_norm": 1.3127541034285726, + "learning_rate": 4.856520128200621e-07, + "loss": 0.39867663383483887, + "step": 5972 + }, + { + "epoch": 1.377219275997233, + "grad_norm": 1.7829413941875683, + "learning_rate": 4.853250875945534e-07, + "loss": 0.5337423086166382, + "step": 5973 + }, + { + "epoch": 1.3774498501268158, + "grad_norm": 1.4903518724810052, + "learning_rate": 4.849982371822593e-07, + "loss": 0.3824300765991211, + "step": 5974 + }, + { + "epoch": 1.3776804242563985, + "grad_norm": 1.4611697760932394, + "learning_rate": 4.846714616306907e-07, + "loss": 0.3613823652267456, + "step": 5975 + }, + { + "epoch": 1.377910998385981, + "grad_norm": 1.5701851835478555, + "learning_rate": 4.843447609873484e-07, + "loss": 0.5040241479873657, + "step": 5976 + }, + { + "epoch": 1.3781415725155637, + "grad_norm": 1.5801365248176698, + "learning_rate": 4.840181352997207e-07, + "loss": 0.4639400243759155, + "step": 5977 + }, + { + "epoch": 1.3783721466451464, + "grad_norm": 1.730401874176074, + "learning_rate": 4.836915846152867e-07, + "loss": 0.503246009349823, + "step": 5978 + }, + { + "epoch": 1.3786027207747291, + "grad_norm": 1.6695377873006745, + "learning_rate": 4.833651089815135e-07, + "loss": 0.3974607586860657, + "step": 5979 + }, + { + "epoch": 1.3788332949043118, + "grad_norm": 1.556324884896908, + "learning_rate": 4.830387084458573e-07, + "loss": 0.43200844526290894, + "step": 5980 + }, + { + "epoch": 1.3790638690338943, + "grad_norm": 1.8355646307086506, + "learning_rate": 4.827123830557644e-07, + "loss": 0.547272801399231, + "step": 5981 + }, + { + "epoch": 1.379294443163477, + "grad_norm": 1.5723785141918243, + "learning_rate": 4.823861328586688e-07, + "loss": 0.4509696960449219, + "step": 5982 + }, + { + "epoch": 1.3795250172930598, + "grad_norm": 1.53889123165165, + "learning_rate": 4.820599579019946e-07, + "loss": 0.46022483706474304, + "step": 5983 + }, + { + "epoch": 1.3797555914226423, + "grad_norm": 1.5251655198087088, + "learning_rate": 4.817338582331548e-07, + "loss": 0.40973198413848877, + "step": 5984 + }, + { + "epoch": 1.379986165552225, + "grad_norm": 1.6235538954137896, + "learning_rate": 4.814078338995515e-07, + "loss": 0.39012736082077026, + "step": 5985 + }, + { + "epoch": 1.3802167396818077, + "grad_norm": 1.6954879615528178, + "learning_rate": 4.810818849485749e-07, + "loss": 0.40657323598861694, + "step": 5986 + }, + { + "epoch": 1.3804473138113904, + "grad_norm": 1.4158383607530642, + "learning_rate": 4.80756011427606e-07, + "loss": 0.38662189245224, + "step": 5987 + }, + { + "epoch": 1.3806778879409731, + "grad_norm": 1.629559894183336, + "learning_rate": 4.804302133840126e-07, + "loss": 0.4888705015182495, + "step": 5988 + }, + { + "epoch": 1.3809084620705556, + "grad_norm": 1.4732586688358036, + "learning_rate": 4.801044908651537e-07, + "loss": 0.4559556245803833, + "step": 5989 + }, + { + "epoch": 1.3811390362001383, + "grad_norm": 1.773370569584542, + "learning_rate": 4.797788439183757e-07, + "loss": 0.40912386775016785, + "step": 5990 + }, + { + "epoch": 1.381369610329721, + "grad_norm": 1.3364334005028415, + "learning_rate": 4.794532725910152e-07, + "loss": 0.3848627209663391, + "step": 5991 + }, + { + "epoch": 1.3816001844593035, + "grad_norm": 1.3860556916017956, + "learning_rate": 4.791277769303975e-07, + "loss": 0.4995359778404236, + "step": 5992 + }, + { + "epoch": 1.3818307585888863, + "grad_norm": 1.3898521995378452, + "learning_rate": 4.788023569838356e-07, + "loss": 0.38717859983444214, + "step": 5993 + }, + { + "epoch": 1.382061332718469, + "grad_norm": 1.7766923949498086, + "learning_rate": 4.784770127986339e-07, + "loss": 0.39855217933654785, + "step": 5994 + }, + { + "epoch": 1.3822919068480517, + "grad_norm": 1.337680228597258, + "learning_rate": 4.781517444220835e-07, + "loss": 0.38494858145713806, + "step": 5995 + }, + { + "epoch": 1.3825224809776344, + "grad_norm": 1.4735802599680248, + "learning_rate": 4.778265519014661e-07, + "loss": 0.44064784049987793, + "step": 5996 + }, + { + "epoch": 1.382753055107217, + "grad_norm": 1.8926413264660993, + "learning_rate": 4.775014352840512e-07, + "loss": 0.39377373456954956, + "step": 5997 + }, + { + "epoch": 1.3829836292367996, + "grad_norm": 1.5108151654480286, + "learning_rate": 4.771763946170979e-07, + "loss": 0.45127296447753906, + "step": 5998 + }, + { + "epoch": 1.3832142033663823, + "grad_norm": 1.4916107560429466, + "learning_rate": 4.768514299478545e-07, + "loss": 0.4999358654022217, + "step": 5999 + }, + { + "epoch": 1.3834447774959648, + "grad_norm": 1.7185286370183794, + "learning_rate": 4.7652654132355784e-07, + "loss": 0.49552851915359497, + "step": 6000 + }, + { + "epoch": 1.3836753516255476, + "grad_norm": 1.7765151369959267, + "learning_rate": 4.762017287914338e-07, + "loss": 0.49196135997772217, + "step": 6001 + }, + { + "epoch": 1.3839059257551303, + "grad_norm": 1.6417248034868954, + "learning_rate": 4.758769923986966e-07, + "loss": 0.3870600461959839, + "step": 6002 + }, + { + "epoch": 1.384136499884713, + "grad_norm": 1.6104154654929026, + "learning_rate": 4.7555233219255074e-07, + "loss": 0.4585425853729248, + "step": 6003 + }, + { + "epoch": 1.3843670740142957, + "grad_norm": 1.3699827425500786, + "learning_rate": 4.752277482201882e-07, + "loss": 0.4332588315010071, + "step": 6004 + }, + { + "epoch": 1.3845976481438782, + "grad_norm": 1.6005942921335146, + "learning_rate": 4.749032405287913e-07, + "loss": 0.4386274814605713, + "step": 6005 + }, + { + "epoch": 1.384828222273461, + "grad_norm": 1.430715117905666, + "learning_rate": 4.745788091655295e-07, + "loss": 0.5064895749092102, + "step": 6006 + }, + { + "epoch": 1.3850587964030436, + "grad_norm": 1.470846994377081, + "learning_rate": 4.7425445417756295e-07, + "loss": 0.4441327452659607, + "step": 6007 + }, + { + "epoch": 1.3852893705326261, + "grad_norm": 1.6191746478584856, + "learning_rate": 4.7393017561203965e-07, + "loss": 0.4415687918663025, + "step": 6008 + }, + { + "epoch": 1.3855199446622088, + "grad_norm": 1.4021203224812295, + "learning_rate": 4.736059735160973e-07, + "loss": 0.4668382704257965, + "step": 6009 + }, + { + "epoch": 1.3857505187917916, + "grad_norm": 1.6079029250549948, + "learning_rate": 4.732818479368615e-07, + "loss": 0.3981805443763733, + "step": 6010 + }, + { + "epoch": 1.3859810929213743, + "grad_norm": 1.4448652226463723, + "learning_rate": 4.7295779892144694e-07, + "loss": 0.4465348720550537, + "step": 6011 + }, + { + "epoch": 1.386211667050957, + "grad_norm": 1.7530840597871544, + "learning_rate": 4.7263382651695805e-07, + "loss": 0.4844682812690735, + "step": 6012 + }, + { + "epoch": 1.3864422411805395, + "grad_norm": 1.417618664232542, + "learning_rate": 4.723099307704868e-07, + "loss": 0.4261378347873688, + "step": 6013 + }, + { + "epoch": 1.3866728153101222, + "grad_norm": 1.4997543603341101, + "learning_rate": 4.7198611172911506e-07, + "loss": 0.457815945148468, + "step": 6014 + }, + { + "epoch": 1.386903389439705, + "grad_norm": 1.570655771567204, + "learning_rate": 4.7166236943991333e-07, + "loss": 0.46352216601371765, + "step": 6015 + }, + { + "epoch": 1.3871339635692874, + "grad_norm": 1.486567492766103, + "learning_rate": 4.7133870394994104e-07, + "loss": 0.4166485667228699, + "step": 6016 + }, + { + "epoch": 1.3873645376988701, + "grad_norm": 1.6982826579565595, + "learning_rate": 4.710151153062456e-07, + "loss": 0.405789852142334, + "step": 6017 + }, + { + "epoch": 1.3875951118284529, + "grad_norm": 1.7459761562612983, + "learning_rate": 4.7069160355586456e-07, + "loss": 0.47718119621276855, + "step": 6018 + }, + { + "epoch": 1.3878256859580356, + "grad_norm": 1.5824023496617, + "learning_rate": 4.7036816874582307e-07, + "loss": 0.5040356516838074, + "step": 6019 + }, + { + "epoch": 1.3880562600876183, + "grad_norm": 1.5657039890557007, + "learning_rate": 4.700448109231362e-07, + "loss": 0.45093637704849243, + "step": 6020 + }, + { + "epoch": 1.3882868342172008, + "grad_norm": 1.4929438188817195, + "learning_rate": 4.6972153013480666e-07, + "loss": 0.5363638997077942, + "step": 6021 + }, + { + "epoch": 1.3885174083467835, + "grad_norm": 1.6076509313088967, + "learning_rate": 4.6939832642782684e-07, + "loss": 0.4917050004005432, + "step": 6022 + }, + { + "epoch": 1.3887479824763662, + "grad_norm": 1.692377103708349, + "learning_rate": 4.690751998491782e-07, + "loss": 0.43033331632614136, + "step": 6023 + }, + { + "epoch": 1.3889785566059487, + "grad_norm": 1.5272594017885164, + "learning_rate": 4.6875215044582973e-07, + "loss": 0.36168330907821655, + "step": 6024 + }, + { + "epoch": 1.3892091307355314, + "grad_norm": 1.693805471797637, + "learning_rate": 4.6842917826474047e-07, + "loss": 0.48347967863082886, + "step": 6025 + }, + { + "epoch": 1.3894397048651141, + "grad_norm": 1.332022962916858, + "learning_rate": 4.681062833528572e-07, + "loss": 0.4493439495563507, + "step": 6026 + }, + { + "epoch": 1.3896702789946969, + "grad_norm": 1.4842335012941816, + "learning_rate": 4.677834657571165e-07, + "loss": 0.385773628950119, + "step": 6027 + }, + { + "epoch": 1.3899008531242796, + "grad_norm": 1.396017775513053, + "learning_rate": 4.674607255244426e-07, + "loss": 0.4254469573497772, + "step": 6028 + }, + { + "epoch": 1.390131427253862, + "grad_norm": 1.6964811881797437, + "learning_rate": 4.671380627017497e-07, + "loss": 0.5070454478263855, + "step": 6029 + }, + { + "epoch": 1.3903620013834448, + "grad_norm": 1.4647574188657595, + "learning_rate": 4.668154773359394e-07, + "loss": 0.44099801778793335, + "step": 6030 + }, + { + "epoch": 1.3905925755130275, + "grad_norm": 1.6731498815474952, + "learning_rate": 4.6649296947390314e-07, + "loss": 0.4965481162071228, + "step": 6031 + }, + { + "epoch": 1.39082314964261, + "grad_norm": 1.6621123973009748, + "learning_rate": 4.6617053916252116e-07, + "loss": 0.4085753262042999, + "step": 6032 + }, + { + "epoch": 1.3910537237721927, + "grad_norm": 1.473260966023028, + "learning_rate": 4.6584818644866106e-07, + "loss": 0.3768424391746521, + "step": 6033 + }, + { + "epoch": 1.3912842979017754, + "grad_norm": 1.7152094772871185, + "learning_rate": 4.6552591137918087e-07, + "loss": 0.4330044388771057, + "step": 6034 + }, + { + "epoch": 1.3915148720313582, + "grad_norm": 1.5907700374750249, + "learning_rate": 4.6520371400092584e-07, + "loss": 0.4669216275215149, + "step": 6035 + }, + { + "epoch": 1.3917454461609409, + "grad_norm": 1.8634085835731031, + "learning_rate": 4.648815943607314e-07, + "loss": 0.5491182208061218, + "step": 6036 + }, + { + "epoch": 1.3919760202905234, + "grad_norm": 1.439715262819595, + "learning_rate": 4.6455955250542e-07, + "loss": 0.4842255413532257, + "step": 6037 + }, + { + "epoch": 1.392206594420106, + "grad_norm": 1.598726710739168, + "learning_rate": 4.6423758848180427e-07, + "loss": 0.45479631423950195, + "step": 6038 + }, + { + "epoch": 1.3924371685496888, + "grad_norm": 1.5770365297702393, + "learning_rate": 4.6391570233668486e-07, + "loss": 0.4209587574005127, + "step": 6039 + }, + { + "epoch": 1.3926677426792713, + "grad_norm": 1.4722680740741498, + "learning_rate": 4.6359389411685145e-07, + "loss": 0.5061464905738831, + "step": 6040 + }, + { + "epoch": 1.392898316808854, + "grad_norm": 1.5166334201375402, + "learning_rate": 4.6327216386908196e-07, + "loss": 0.39443570375442505, + "step": 6041 + }, + { + "epoch": 1.3931288909384367, + "grad_norm": 1.6936024892202146, + "learning_rate": 4.6295051164014256e-07, + "loss": 0.4784463942050934, + "step": 6042 + }, + { + "epoch": 1.3933594650680194, + "grad_norm": 1.623401531095956, + "learning_rate": 4.6262893747678957e-07, + "loss": 0.41256606578826904, + "step": 6043 + }, + { + "epoch": 1.3935900391976022, + "grad_norm": 1.430742297932055, + "learning_rate": 4.623074414257662e-07, + "loss": 0.4507666230201721, + "step": 6044 + }, + { + "epoch": 1.3938206133271847, + "grad_norm": 1.4646678303979026, + "learning_rate": 4.6198602353380545e-07, + "loss": 0.3783376216888428, + "step": 6045 + }, + { + "epoch": 1.3940511874567674, + "grad_norm": 1.5485119918407955, + "learning_rate": 4.616646838476289e-07, + "loss": 0.47854840755462646, + "step": 6046 + }, + { + "epoch": 1.39428176158635, + "grad_norm": 1.506150277535636, + "learning_rate": 4.6134342241394685e-07, + "loss": 0.47121208906173706, + "step": 6047 + }, + { + "epoch": 1.3945123357159326, + "grad_norm": 1.4779397331062858, + "learning_rate": 4.610222392794569e-07, + "loss": 0.5211559534072876, + "step": 6048 + }, + { + "epoch": 1.3947429098455153, + "grad_norm": 2.0522570691736606, + "learning_rate": 4.6070113449084747e-07, + "loss": 0.5846370458602905, + "step": 6049 + }, + { + "epoch": 1.394973483975098, + "grad_norm": 1.6651959806589232, + "learning_rate": 4.6038010809479365e-07, + "loss": 0.4787401854991913, + "step": 6050 + }, + { + "epoch": 1.3952040581046807, + "grad_norm": 1.336725780471279, + "learning_rate": 4.600591601379596e-07, + "loss": 0.36429738998413086, + "step": 6051 + }, + { + "epoch": 1.3954346322342635, + "grad_norm": 1.606284081701607, + "learning_rate": 4.597382906669992e-07, + "loss": 0.49923771619796753, + "step": 6052 + }, + { + "epoch": 1.395665206363846, + "grad_norm": 1.5476584348847333, + "learning_rate": 4.5941749972855326e-07, + "loss": 0.408005028963089, + "step": 6053 + }, + { + "epoch": 1.3958957804934287, + "grad_norm": 1.72927604568786, + "learning_rate": 4.590967873692523e-07, + "loss": 0.4524402618408203, + "step": 6054 + }, + { + "epoch": 1.3961263546230114, + "grad_norm": 1.5041096845532136, + "learning_rate": 4.587761536357152e-07, + "loss": 0.5264980792999268, + "step": 6055 + }, + { + "epoch": 1.3963569287525939, + "grad_norm": 1.6066275699787076, + "learning_rate": 4.5845559857454976e-07, + "loss": 0.5324279069900513, + "step": 6056 + }, + { + "epoch": 1.3965875028821766, + "grad_norm": 1.4996065290876746, + "learning_rate": 4.581351222323511e-07, + "loss": 0.5197574496269226, + "step": 6057 + }, + { + "epoch": 1.3968180770117593, + "grad_norm": 1.6418756331716369, + "learning_rate": 4.578147246557043e-07, + "loss": 0.4549001157283783, + "step": 6058 + }, + { + "epoch": 1.397048651141342, + "grad_norm": 1.374490396915421, + "learning_rate": 4.5749440589118183e-07, + "loss": 0.38597673177719116, + "step": 6059 + }, + { + "epoch": 1.3972792252709247, + "grad_norm": 1.3707652210777583, + "learning_rate": 4.57174165985346e-07, + "loss": 0.4104316532611847, + "step": 6060 + }, + { + "epoch": 1.3975097994005072, + "grad_norm": 1.7242255092716443, + "learning_rate": 4.5685400498474614e-07, + "loss": 0.5241787433624268, + "step": 6061 + }, + { + "epoch": 1.39774037353009, + "grad_norm": 1.668574015144598, + "learning_rate": 4.565339229359213e-07, + "loss": 0.5033289790153503, + "step": 6062 + }, + { + "epoch": 1.3979709476596724, + "grad_norm": 1.3309384356199967, + "learning_rate": 4.5621391988539894e-07, + "loss": 0.436188280582428, + "step": 6063 + }, + { + "epoch": 1.3982015217892552, + "grad_norm": 1.4783680897212301, + "learning_rate": 4.5589399587969414e-07, + "loss": 0.3885838985443115, + "step": 6064 + }, + { + "epoch": 1.3984320959188379, + "grad_norm": 1.6395174483956128, + "learning_rate": 4.555741509653116e-07, + "loss": 0.5140193104743958, + "step": 6065 + }, + { + "epoch": 1.3986626700484206, + "grad_norm": 1.360236032045127, + "learning_rate": 4.552543851887436e-07, + "loss": 0.41084468364715576, + "step": 6066 + }, + { + "epoch": 1.3988932441780033, + "grad_norm": 1.417896120601143, + "learning_rate": 4.549346985964718e-07, + "loss": 0.3606417179107666, + "step": 6067 + }, + { + "epoch": 1.3991238183075858, + "grad_norm": 1.5212574193639694, + "learning_rate": 4.546150912349653e-07, + "loss": 0.48518556356430054, + "step": 6068 + }, + { + "epoch": 1.3993543924371685, + "grad_norm": 1.6821671640024862, + "learning_rate": 4.5429556315068264e-07, + "loss": 0.5394424200057983, + "step": 6069 + }, + { + "epoch": 1.3995849665667512, + "grad_norm": 1.3734997636022714, + "learning_rate": 4.539761143900708e-07, + "loss": 0.40272367000579834, + "step": 6070 + }, + { + "epoch": 1.3998155406963337, + "grad_norm": 1.6175896107942709, + "learning_rate": 4.536567449995641e-07, + "loss": 0.4279879331588745, + "step": 6071 + }, + { + "epoch": 1.4000461148259165, + "grad_norm": 1.4620694447822713, + "learning_rate": 4.5333745502558695e-07, + "loss": 0.48560982942581177, + "step": 6072 + }, + { + "epoch": 1.4002766889554992, + "grad_norm": 1.7184355426607418, + "learning_rate": 4.530182445145506e-07, + "loss": 0.49256429076194763, + "step": 6073 + }, + { + "epoch": 1.4005072630850819, + "grad_norm": 1.4236944961072253, + "learning_rate": 4.5269911351285614e-07, + "loss": 0.5015553832054138, + "step": 6074 + }, + { + "epoch": 1.4007378372146646, + "grad_norm": 1.4505255602543088, + "learning_rate": 4.5238006206689204e-07, + "loss": 0.4313800632953644, + "step": 6075 + }, + { + "epoch": 1.400968411344247, + "grad_norm": 1.311079736416616, + "learning_rate": 4.520610902230363e-07, + "loss": 0.3440586030483246, + "step": 6076 + }, + { + "epoch": 1.4011989854738298, + "grad_norm": 1.4064686390113332, + "learning_rate": 4.517421980276538e-07, + "loss": 0.43868017196655273, + "step": 6077 + }, + { + "epoch": 1.4014295596034125, + "grad_norm": 1.6307364330463041, + "learning_rate": 4.5142338552709923e-07, + "loss": 0.5581029057502747, + "step": 6078 + }, + { + "epoch": 1.401660133732995, + "grad_norm": 1.6962393590938891, + "learning_rate": 4.5110465276771524e-07, + "loss": 0.4543154835700989, + "step": 6079 + }, + { + "epoch": 1.4018907078625777, + "grad_norm": 1.5554679193557313, + "learning_rate": 4.507859997958333e-07, + "loss": 0.5229466557502747, + "step": 6080 + }, + { + "epoch": 1.4021212819921605, + "grad_norm": 1.5285075075955497, + "learning_rate": 4.504674266577724e-07, + "loss": 0.46781739592552185, + "step": 6081 + }, + { + "epoch": 1.4023518561217432, + "grad_norm": 1.6198419428344395, + "learning_rate": 4.5014893339983993e-07, + "loss": 0.48040711879730225, + "step": 6082 + }, + { + "epoch": 1.402582430251326, + "grad_norm": 1.5279313939865138, + "learning_rate": 4.49830520068333e-07, + "loss": 0.5039708018302917, + "step": 6083 + }, + { + "epoch": 1.4028130043809084, + "grad_norm": 1.4998739241266676, + "learning_rate": 4.495121867095354e-07, + "loss": 0.43496155738830566, + "step": 6084 + }, + { + "epoch": 1.403043578510491, + "grad_norm": 1.3838778339679694, + "learning_rate": 4.4919393336972045e-07, + "loss": 0.4603109061717987, + "step": 6085 + }, + { + "epoch": 1.4032741526400738, + "grad_norm": 1.476085268646584, + "learning_rate": 4.488757600951496e-07, + "loss": 0.4571962356567383, + "step": 6086 + }, + { + "epoch": 1.4035047267696563, + "grad_norm": 1.4791952167701867, + "learning_rate": 4.485576669320729e-07, + "loss": 0.46302443742752075, + "step": 6087 + }, + { + "epoch": 1.403735300899239, + "grad_norm": 1.675302072516594, + "learning_rate": 4.482396539267275e-07, + "loss": 0.39066869020462036, + "step": 6088 + }, + { + "epoch": 1.4039658750288218, + "grad_norm": 1.704176039322231, + "learning_rate": 4.4792172112534076e-07, + "loss": 0.4797130823135376, + "step": 6089 + }, + { + "epoch": 1.4041964491584045, + "grad_norm": 1.5835144658620484, + "learning_rate": 4.4760386857412704e-07, + "loss": 0.4578198492527008, + "step": 6090 + }, + { + "epoch": 1.4044270232879872, + "grad_norm": 1.3987211085891795, + "learning_rate": 4.472860963192889e-07, + "loss": 0.40768736600875854, + "step": 6091 + }, + { + "epoch": 1.4046575974175697, + "grad_norm": 1.4530633567004236, + "learning_rate": 4.4696840440701846e-07, + "loss": 0.4201413094997406, + "step": 6092 + }, + { + "epoch": 1.4048881715471524, + "grad_norm": 1.3648395822246437, + "learning_rate": 4.466507928834951e-07, + "loss": 0.45901796221733093, + "step": 6093 + }, + { + "epoch": 1.4051187456767351, + "grad_norm": 1.6465847208416895, + "learning_rate": 4.463332617948874e-07, + "loss": 0.4699435830116272, + "step": 6094 + }, + { + "epoch": 1.4053493198063176, + "grad_norm": 1.4755445259366653, + "learning_rate": 4.46015811187351e-07, + "loss": 0.4526669383049011, + "step": 6095 + }, + { + "epoch": 1.4055798939359003, + "grad_norm": 1.5721685230021194, + "learning_rate": 4.456984411070313e-07, + "loss": 0.46754884719848633, + "step": 6096 + }, + { + "epoch": 1.405810468065483, + "grad_norm": 2.1874728205075495, + "learning_rate": 4.453811516000604e-07, + "loss": 0.5119268894195557, + "step": 6097 + }, + { + "epoch": 1.4060410421950658, + "grad_norm": 2.056110026644097, + "learning_rate": 4.4506394271256043e-07, + "loss": 0.42980802059173584, + "step": 6098 + }, + { + "epoch": 1.4062716163246485, + "grad_norm": 1.5339161636381375, + "learning_rate": 4.447468144906401e-07, + "loss": 0.5895063281059265, + "step": 6099 + }, + { + "epoch": 1.406502190454231, + "grad_norm": 1.3796241305160553, + "learning_rate": 4.4442976698039803e-07, + "loss": 0.42768803238868713, + "step": 6100 + }, + { + "epoch": 1.4067327645838137, + "grad_norm": 1.608854909074267, + "learning_rate": 4.4411280022791943e-07, + "loss": 0.44234544038772583, + "step": 6101 + }, + { + "epoch": 1.4069633387133964, + "grad_norm": 1.3028889839673445, + "learning_rate": 4.437959142792791e-07, + "loss": 0.4382736086845398, + "step": 6102 + }, + { + "epoch": 1.407193912842979, + "grad_norm": 1.6088674485493302, + "learning_rate": 4.4347910918054e-07, + "loss": 0.47603681683540344, + "step": 6103 + }, + { + "epoch": 1.4074244869725616, + "grad_norm": 1.8816511615485159, + "learning_rate": 4.431623849777522e-07, + "loss": 0.5562035441398621, + "step": 6104 + }, + { + "epoch": 1.4076550611021443, + "grad_norm": 2.2517510056002763, + "learning_rate": 4.4284574171695535e-07, + "loss": 0.4153141677379608, + "step": 6105 + }, + { + "epoch": 1.407885635231727, + "grad_norm": 1.2534764690727898, + "learning_rate": 4.425291794441762e-07, + "loss": 0.4825887680053711, + "step": 6106 + }, + { + "epoch": 1.4081162093613098, + "grad_norm": 1.4829126230878127, + "learning_rate": 4.4221269820543104e-07, + "loss": 0.4853668808937073, + "step": 6107 + }, + { + "epoch": 1.4083467834908923, + "grad_norm": 1.6140810272295893, + "learning_rate": 4.418962980467229e-07, + "loss": 0.5615251064300537, + "step": 6108 + }, + { + "epoch": 1.408577357620475, + "grad_norm": 1.8397680714752904, + "learning_rate": 4.4157997901404396e-07, + "loss": 0.38605546951293945, + "step": 6109 + }, + { + "epoch": 1.4088079317500577, + "grad_norm": 1.412066772348378, + "learning_rate": 4.412637411533745e-07, + "loss": 0.41582173109054565, + "step": 6110 + }, + { + "epoch": 1.4090385058796402, + "grad_norm": 1.4963267141581975, + "learning_rate": 4.4094758451068327e-07, + "loss": 0.38091376423835754, + "step": 6111 + }, + { + "epoch": 1.409269080009223, + "grad_norm": 1.5465721612260863, + "learning_rate": 4.4063150913192635e-07, + "loss": 0.43319058418273926, + "step": 6112 + }, + { + "epoch": 1.4094996541388056, + "grad_norm": 1.2123497825560654, + "learning_rate": 4.403155150630484e-07, + "loss": 0.43207013607025146, + "step": 6113 + }, + { + "epoch": 1.4097302282683883, + "grad_norm": 1.7217391258871346, + "learning_rate": 4.399996023499829e-07, + "loss": 0.43750250339508057, + "step": 6114 + }, + { + "epoch": 1.409960802397971, + "grad_norm": 1.5123653802002535, + "learning_rate": 4.3968377103865016e-07, + "loss": 0.44084444642066956, + "step": 6115 + }, + { + "epoch": 1.4101913765275536, + "grad_norm": 1.4135580211481893, + "learning_rate": 4.3936802117495997e-07, + "loss": 0.4752010405063629, + "step": 6116 + }, + { + "epoch": 1.4104219506571363, + "grad_norm": 1.384945744446678, + "learning_rate": 4.390523528048098e-07, + "loss": 0.39239025115966797, + "step": 6117 + }, + { + "epoch": 1.410652524786719, + "grad_norm": 1.7179287290824201, + "learning_rate": 4.387367659740856e-07, + "loss": 0.46021080017089844, + "step": 6118 + }, + { + "epoch": 1.4108830989163015, + "grad_norm": 1.3751290560349647, + "learning_rate": 4.3842126072866014e-07, + "loss": 0.4079766571521759, + "step": 6119 + }, + { + "epoch": 1.4111136730458842, + "grad_norm": 1.5182170234243058, + "learning_rate": 4.381058371143964e-07, + "loss": 0.4922672510147095, + "step": 6120 + }, + { + "epoch": 1.411344247175467, + "grad_norm": 1.5200373777326295, + "learning_rate": 4.377904951771438e-07, + "loss": 0.3950929045677185, + "step": 6121 + }, + { + "epoch": 1.4115748213050496, + "grad_norm": 1.6189013836504815, + "learning_rate": 4.374752349627402e-07, + "loss": 0.503406286239624, + "step": 6122 + }, + { + "epoch": 1.4118053954346323, + "grad_norm": 1.724327270706253, + "learning_rate": 4.3716005651701215e-07, + "loss": 0.49198317527770996, + "step": 6123 + }, + { + "epoch": 1.4120359695642148, + "grad_norm": 1.424527206510087, + "learning_rate": 4.368449598857742e-07, + "loss": 0.47396305203437805, + "step": 6124 + }, + { + "epoch": 1.4122665436937976, + "grad_norm": 1.7537535213801698, + "learning_rate": 4.365299451148291e-07, + "loss": 0.5248152017593384, + "step": 6125 + }, + { + "epoch": 1.4124971178233803, + "grad_norm": 1.310814657820865, + "learning_rate": 4.362150122499666e-07, + "loss": 0.44327419996261597, + "step": 6126 + }, + { + "epoch": 1.4127276919529628, + "grad_norm": 1.5885906377106098, + "learning_rate": 4.3590016133696626e-07, + "loss": 0.4628877639770508, + "step": 6127 + }, + { + "epoch": 1.4129582660825455, + "grad_norm": 1.5166490469327556, + "learning_rate": 4.355853924215942e-07, + "loss": 0.5277193188667297, + "step": 6128 + }, + { + "epoch": 1.4131888402121282, + "grad_norm": 1.6202759290555122, + "learning_rate": 4.3527070554960577e-07, + "loss": 0.4675426781177521, + "step": 6129 + }, + { + "epoch": 1.413419414341711, + "grad_norm": 1.668904355836008, + "learning_rate": 4.349561007667433e-07, + "loss": 0.3762160539627075, + "step": 6130 + }, + { + "epoch": 1.4136499884712936, + "grad_norm": 1.5686457690092273, + "learning_rate": 4.346415781187385e-07, + "loss": 0.4797256588935852, + "step": 6131 + }, + { + "epoch": 1.4138805626008761, + "grad_norm": 1.283129438483415, + "learning_rate": 4.3432713765130967e-07, + "loss": 0.4348931312561035, + "step": 6132 + }, + { + "epoch": 1.4141111367304589, + "grad_norm": 1.72495987311985, + "learning_rate": 4.3401277941016435e-07, + "loss": 0.5080585479736328, + "step": 6133 + }, + { + "epoch": 1.4143417108600416, + "grad_norm": 1.5083246190317607, + "learning_rate": 4.33698503440998e-07, + "loss": 0.40223604440689087, + "step": 6134 + }, + { + "epoch": 1.414572284989624, + "grad_norm": 1.5888336584861464, + "learning_rate": 4.3338430978949315e-07, + "loss": 0.4460202753543854, + "step": 6135 + }, + { + "epoch": 1.4148028591192068, + "grad_norm": 1.6992292342961226, + "learning_rate": 4.3307019850132167e-07, + "loss": 0.5814889669418335, + "step": 6136 + }, + { + "epoch": 1.4150334332487895, + "grad_norm": 1.366462724450419, + "learning_rate": 4.3275616962214214e-07, + "loss": 0.39237886667251587, + "step": 6137 + }, + { + "epoch": 1.4152640073783722, + "grad_norm": 1.8844588932900945, + "learning_rate": 4.324422231976025e-07, + "loss": 0.4621772766113281, + "step": 6138 + }, + { + "epoch": 1.415494581507955, + "grad_norm": 1.2090393738968102, + "learning_rate": 4.3212835927333745e-07, + "loss": 0.3722139596939087, + "step": 6139 + }, + { + "epoch": 1.4157251556375374, + "grad_norm": 1.4849768206374545, + "learning_rate": 4.3181457789497055e-07, + "loss": 0.5007534623146057, + "step": 6140 + }, + { + "epoch": 1.4159557297671201, + "grad_norm": 1.603501037396303, + "learning_rate": 4.315008791081135e-07, + "loss": 0.470672607421875, + "step": 6141 + }, + { + "epoch": 1.4161863038967029, + "grad_norm": 1.6882048347200689, + "learning_rate": 4.3118726295836495e-07, + "loss": 0.5196114778518677, + "step": 6142 + }, + { + "epoch": 1.4164168780262854, + "grad_norm": 1.686399785386393, + "learning_rate": 4.3087372949131275e-07, + "loss": 0.4606804847717285, + "step": 6143 + }, + { + "epoch": 1.416647452155868, + "grad_norm": 1.2427386262927842, + "learning_rate": 4.3056027875253156e-07, + "loss": 0.3926661014556885, + "step": 6144 + }, + { + "epoch": 1.4168780262854508, + "grad_norm": 1.5075319697699416, + "learning_rate": 4.3024691078758536e-07, + "loss": 0.4570828080177307, + "step": 6145 + }, + { + "epoch": 1.4171086004150335, + "grad_norm": 1.4876286685500335, + "learning_rate": 4.299336256420245e-07, + "loss": 0.398615300655365, + "step": 6146 + }, + { + "epoch": 1.4173391745446162, + "grad_norm": 1.5413174329970663, + "learning_rate": 4.2962042336138873e-07, + "loss": 0.47571802139282227, + "step": 6147 + }, + { + "epoch": 1.4175697486741987, + "grad_norm": 1.5960399575320494, + "learning_rate": 4.2930730399120487e-07, + "loss": 0.4266431927680969, + "step": 6148 + }, + { + "epoch": 1.4178003228037814, + "grad_norm": 1.5511638894349447, + "learning_rate": 4.289942675769886e-07, + "loss": 0.47870057821273804, + "step": 6149 + }, + { + "epoch": 1.4180308969333641, + "grad_norm": 1.3514029969532406, + "learning_rate": 4.2868131416424223e-07, + "loss": 0.3947669267654419, + "step": 6150 + }, + { + "epoch": 1.4182614710629466, + "grad_norm": 1.6045441623823578, + "learning_rate": 4.283684437984573e-07, + "loss": 0.49074164032936096, + "step": 6151 + }, + { + "epoch": 1.4184920451925294, + "grad_norm": 1.5267380397937564, + "learning_rate": 4.280556565251123e-07, + "loss": 0.5540445446968079, + "step": 6152 + }, + { + "epoch": 1.418722619322112, + "grad_norm": 1.4292058799019856, + "learning_rate": 4.2774295238967386e-07, + "loss": 0.4898286461830139, + "step": 6153 + }, + { + "epoch": 1.4189531934516948, + "grad_norm": 1.5872207462828773, + "learning_rate": 4.2743033143759733e-07, + "loss": 0.5432708859443665, + "step": 6154 + }, + { + "epoch": 1.4191837675812775, + "grad_norm": 1.811563729099354, + "learning_rate": 4.2711779371432445e-07, + "loss": 0.4438853859901428, + "step": 6155 + }, + { + "epoch": 1.41941434171086, + "grad_norm": 1.4197202159023756, + "learning_rate": 4.268053392652863e-07, + "loss": 0.4885905385017395, + "step": 6156 + }, + { + "epoch": 1.4196449158404427, + "grad_norm": 2.10234923243058, + "learning_rate": 4.264929681359013e-07, + "loss": 0.4465547204017639, + "step": 6157 + }, + { + "epoch": 1.4198754899700254, + "grad_norm": 1.5987256760741122, + "learning_rate": 4.2618068037157594e-07, + "loss": 0.4392780661582947, + "step": 6158 + }, + { + "epoch": 1.420106064099608, + "grad_norm": 1.7421664904589054, + "learning_rate": 4.258684760177039e-07, + "loss": 0.4501269459724426, + "step": 6159 + }, + { + "epoch": 1.4203366382291907, + "grad_norm": 1.399976858224263, + "learning_rate": 4.2555635511966783e-07, + "loss": 0.38439738750457764, + "step": 6160 + }, + { + "epoch": 1.4205672123587734, + "grad_norm": 1.4211214514262747, + "learning_rate": 4.2524431772283743e-07, + "loss": 0.4679202437400818, + "step": 6161 + }, + { + "epoch": 1.420797786488356, + "grad_norm": 1.3094843029172225, + "learning_rate": 4.2493236387257e-07, + "loss": 0.33505773544311523, + "step": 6162 + }, + { + "epoch": 1.4210283606179388, + "grad_norm": 1.7083049967506945, + "learning_rate": 4.246204936142116e-07, + "loss": 0.39141514897346497, + "step": 6163 + }, + { + "epoch": 1.4212589347475213, + "grad_norm": 1.5786326298364493, + "learning_rate": 4.243087069930958e-07, + "loss": 0.49278295040130615, + "step": 6164 + }, + { + "epoch": 1.421489508877104, + "grad_norm": 2.2314439595882214, + "learning_rate": 4.239970040545442e-07, + "loss": 0.44093143939971924, + "step": 6165 + }, + { + "epoch": 1.4217200830066867, + "grad_norm": 1.5138193694081605, + "learning_rate": 4.236853848438654e-07, + "loss": 0.3840683102607727, + "step": 6166 + }, + { + "epoch": 1.4219506571362692, + "grad_norm": 1.7654139979291832, + "learning_rate": 4.23373849406357e-07, + "loss": 0.49814748764038086, + "step": 6167 + }, + { + "epoch": 1.422181231265852, + "grad_norm": 1.672205831624779, + "learning_rate": 4.2306239778730314e-07, + "loss": 0.37481504678726196, + "step": 6168 + }, + { + "epoch": 1.4224118053954347, + "grad_norm": 1.6089555356775624, + "learning_rate": 4.227510300319772e-07, + "loss": 0.3936859965324402, + "step": 6169 + }, + { + "epoch": 1.4226423795250174, + "grad_norm": 1.6958111197730896, + "learning_rate": 4.224397461856389e-07, + "loss": 0.4448816478252411, + "step": 6170 + }, + { + "epoch": 1.4228729536546, + "grad_norm": 1.7506080980818486, + "learning_rate": 4.22128546293537e-07, + "loss": 0.5494886040687561, + "step": 6171 + }, + { + "epoch": 1.4231035277841826, + "grad_norm": 1.6093955633210433, + "learning_rate": 4.218174304009078e-07, + "loss": 0.4532161355018616, + "step": 6172 + }, + { + "epoch": 1.4233341019137653, + "grad_norm": 1.5423276922709723, + "learning_rate": 4.215063985529743e-07, + "loss": 0.4771450161933899, + "step": 6173 + }, + { + "epoch": 1.4235646760433478, + "grad_norm": 1.4359456178719159, + "learning_rate": 4.211954507949491e-07, + "loss": 0.40784329175949097, + "step": 6174 + }, + { + "epoch": 1.4237952501729305, + "grad_norm": 1.6548161498628766, + "learning_rate": 4.208845871720308e-07, + "loss": 0.5336268544197083, + "step": 6175 + }, + { + "epoch": 1.4240258243025132, + "grad_norm": 1.495644640745375, + "learning_rate": 4.205738077294072e-07, + "loss": 0.44641751050949097, + "step": 6176 + }, + { + "epoch": 1.424256398432096, + "grad_norm": 1.650188328042211, + "learning_rate": 4.2026311251225264e-07, + "loss": 0.4370793104171753, + "step": 6177 + }, + { + "epoch": 1.4244869725616787, + "grad_norm": 1.5423618719597711, + "learning_rate": 4.1995250156573046e-07, + "loss": 0.4290730953216553, + "step": 6178 + }, + { + "epoch": 1.4247175466912612, + "grad_norm": 1.8757556733756044, + "learning_rate": 4.196419749349904e-07, + "loss": 0.5021491646766663, + "step": 6179 + }, + { + "epoch": 1.4249481208208439, + "grad_norm": 1.4243786827618563, + "learning_rate": 4.193315326651711e-07, + "loss": 0.3880186080932617, + "step": 6180 + }, + { + "epoch": 1.4251786949504266, + "grad_norm": 1.6032235222838507, + "learning_rate": 4.1902117480139876e-07, + "loss": 0.46498721837997437, + "step": 6181 + }, + { + "epoch": 1.425409269080009, + "grad_norm": 1.6074916356613946, + "learning_rate": 4.187109013887863e-07, + "loss": 0.45799821615219116, + "step": 6182 + }, + { + "epoch": 1.4256398432095918, + "grad_norm": 1.7936327965955485, + "learning_rate": 4.1840071247243594e-07, + "loss": 0.47459733486175537, + "step": 6183 + }, + { + "epoch": 1.4258704173391745, + "grad_norm": 1.7628830057109544, + "learning_rate": 4.18090608097436e-07, + "loss": 0.47636276483535767, + "step": 6184 + }, + { + "epoch": 1.4261009914687572, + "grad_norm": 1.4575388433663756, + "learning_rate": 4.17780588308864e-07, + "loss": 0.4710165858268738, + "step": 6185 + }, + { + "epoch": 1.42633156559834, + "grad_norm": 1.6068491390352067, + "learning_rate": 4.174706531517836e-07, + "loss": 0.4222904443740845, + "step": 6186 + }, + { + "epoch": 1.4265621397279225, + "grad_norm": 1.6136307494472921, + "learning_rate": 4.171608026712476e-07, + "loss": 0.43496620655059814, + "step": 6187 + }, + { + "epoch": 1.4267927138575052, + "grad_norm": 1.6637888441260775, + "learning_rate": 4.1685103691229597e-07, + "loss": 0.5178344249725342, + "step": 6188 + }, + { + "epoch": 1.4270232879870879, + "grad_norm": 1.2438461713878222, + "learning_rate": 4.1654135591995644e-07, + "loss": 0.4033231735229492, + "step": 6189 + }, + { + "epoch": 1.4272538621166704, + "grad_norm": 1.6711330724791171, + "learning_rate": 4.162317597392436e-07, + "loss": 0.3368793725967407, + "step": 6190 + }, + { + "epoch": 1.427484436246253, + "grad_norm": 1.6185157962363963, + "learning_rate": 4.159222484151612e-07, + "loss": 0.44133609533309937, + "step": 6191 + }, + { + "epoch": 1.4277150103758358, + "grad_norm": 1.4778493402771002, + "learning_rate": 4.1561282199269944e-07, + "loss": 0.431888222694397, + "step": 6192 + }, + { + "epoch": 1.4279455845054185, + "grad_norm": 1.6042487363335018, + "learning_rate": 4.1530348051683615e-07, + "loss": 0.4319697618484497, + "step": 6193 + }, + { + "epoch": 1.4281761586350012, + "grad_norm": 2.1012743912812986, + "learning_rate": 4.1499422403253783e-07, + "loss": 0.5468018054962158, + "step": 6194 + }, + { + "epoch": 1.4284067327645837, + "grad_norm": 1.5851271799276925, + "learning_rate": 4.1468505258475784e-07, + "loss": 0.5083246231079102, + "step": 6195 + }, + { + "epoch": 1.4286373068941665, + "grad_norm": 1.5639019523203612, + "learning_rate": 4.1437596621843774e-07, + "loss": 0.3767821788787842, + "step": 6196 + }, + { + "epoch": 1.4288678810237492, + "grad_norm": 1.7459586887034657, + "learning_rate": 4.140669649785058e-07, + "loss": 0.5210238099098206, + "step": 6197 + }, + { + "epoch": 1.4290984551533317, + "grad_norm": 1.7429606479800976, + "learning_rate": 4.1375804890987907e-07, + "loss": 0.4498119354248047, + "step": 6198 + }, + { + "epoch": 1.4293290292829144, + "grad_norm": 1.8267093368864302, + "learning_rate": 4.134492180574609e-07, + "loss": 0.5093557238578796, + "step": 6199 + }, + { + "epoch": 1.429559603412497, + "grad_norm": 1.422406352052411, + "learning_rate": 4.131404724661438e-07, + "loss": 0.4745742082595825, + "step": 6200 + }, + { + "epoch": 1.4297901775420798, + "grad_norm": 1.506088588333767, + "learning_rate": 4.128318121808068e-07, + "loss": 0.45697301626205444, + "step": 6201 + }, + { + "epoch": 1.4300207516716625, + "grad_norm": 1.7309660786915744, + "learning_rate": 4.125232372463161e-07, + "loss": 0.4690994918346405, + "step": 6202 + }, + { + "epoch": 1.430251325801245, + "grad_norm": 1.6241026421208185, + "learning_rate": 4.1221474770752696e-07, + "loss": 0.49369046092033386, + "step": 6203 + }, + { + "epoch": 1.4304818999308277, + "grad_norm": 1.573925179309737, + "learning_rate": 4.1190634360928113e-07, + "loss": 0.5137126445770264, + "step": 6204 + }, + { + "epoch": 1.4307124740604105, + "grad_norm": 1.492371449937338, + "learning_rate": 4.1159802499640883e-07, + "loss": 0.43663549423217773, + "step": 6205 + }, + { + "epoch": 1.430943048189993, + "grad_norm": 1.373244593865611, + "learning_rate": 4.112897919137265e-07, + "loss": 0.40197718143463135, + "step": 6206 + }, + { + "epoch": 1.4311736223195757, + "grad_norm": 1.782636444844866, + "learning_rate": 4.1098164440603967e-07, + "loss": 0.5537480115890503, + "step": 6207 + }, + { + "epoch": 1.4314041964491584, + "grad_norm": 1.415124349915093, + "learning_rate": 4.1067358251814e-07, + "loss": 0.36077365279197693, + "step": 6208 + }, + { + "epoch": 1.4316347705787411, + "grad_norm": 1.8848844116732066, + "learning_rate": 4.103656062948081e-07, + "loss": 0.5421038866043091, + "step": 6209 + }, + { + "epoch": 1.4318653447083238, + "grad_norm": 1.5989095555214856, + "learning_rate": 4.100577157808107e-07, + "loss": 0.4330317974090576, + "step": 6210 + }, + { + "epoch": 1.4320959188379063, + "grad_norm": 1.5778977933757077, + "learning_rate": 4.0974991102090315e-07, + "loss": 0.4734618067741394, + "step": 6211 + }, + { + "epoch": 1.432326492967489, + "grad_norm": 1.7307541730622933, + "learning_rate": 4.0944219205982853e-07, + "loss": 0.4664125442504883, + "step": 6212 + }, + { + "epoch": 1.4325570670970718, + "grad_norm": 1.5163510968488794, + "learning_rate": 4.09134558942316e-07, + "loss": 0.5214053988456726, + "step": 6213 + }, + { + "epoch": 1.4327876412266543, + "grad_norm": 1.4446024999002893, + "learning_rate": 4.08827011713084e-07, + "loss": 0.4694370627403259, + "step": 6214 + }, + { + "epoch": 1.433018215356237, + "grad_norm": 1.4399092047479434, + "learning_rate": 4.0851955041683674e-07, + "loss": 0.46517378091812134, + "step": 6215 + }, + { + "epoch": 1.4332487894858197, + "grad_norm": 1.589744461016997, + "learning_rate": 4.0821217509826766e-07, + "loss": 0.49152523279190063, + "step": 6216 + }, + { + "epoch": 1.4334793636154024, + "grad_norm": 1.3335404796705832, + "learning_rate": 4.0790488580205616e-07, + "loss": 0.4272884726524353, + "step": 6217 + }, + { + "epoch": 1.4337099377449851, + "grad_norm": 1.7167989658225775, + "learning_rate": 4.075976825728703e-07, + "loss": 0.4585829973220825, + "step": 6218 + }, + { + "epoch": 1.4339405118745676, + "grad_norm": 1.4284884424474726, + "learning_rate": 4.07290565455365e-07, + "loss": 0.33463186025619507, + "step": 6219 + }, + { + "epoch": 1.4341710860041503, + "grad_norm": 1.618873724040505, + "learning_rate": 4.0698353449418344e-07, + "loss": 0.4228953719139099, + "step": 6220 + }, + { + "epoch": 1.434401660133733, + "grad_norm": 1.688194150248175, + "learning_rate": 4.066765897339547e-07, + "loss": 0.5336583256721497, + "step": 6221 + }, + { + "epoch": 1.4346322342633155, + "grad_norm": 1.590308662997971, + "learning_rate": 4.063697312192972e-07, + "loss": 0.4779771864414215, + "step": 6222 + }, + { + "epoch": 1.4348628083928983, + "grad_norm": 1.4786534556099964, + "learning_rate": 4.060629589948155e-07, + "loss": 0.35226666927337646, + "step": 6223 + }, + { + "epoch": 1.435093382522481, + "grad_norm": 1.7110004239307235, + "learning_rate": 4.0575627310510174e-07, + "loss": 0.5006309747695923, + "step": 6224 + }, + { + "epoch": 1.4353239566520637, + "grad_norm": 1.5102552970375984, + "learning_rate": 4.0544967359473645e-07, + "loss": 0.3925382196903229, + "step": 6225 + }, + { + "epoch": 1.4355545307816464, + "grad_norm": 1.4323897305301354, + "learning_rate": 4.0514316050828643e-07, + "loss": 0.3443659543991089, + "step": 6226 + }, + { + "epoch": 1.435785104911229, + "grad_norm": 1.3832333833383677, + "learning_rate": 4.048367338903067e-07, + "loss": 0.35585030913352966, + "step": 6227 + }, + { + "epoch": 1.4360156790408116, + "grad_norm": 1.551815991519559, + "learning_rate": 4.045303937853395e-07, + "loss": 0.4147206246852875, + "step": 6228 + }, + { + "epoch": 1.4362462531703943, + "grad_norm": 1.2817256800052734, + "learning_rate": 4.0422414023791486e-07, + "loss": 0.4475427567958832, + "step": 6229 + }, + { + "epoch": 1.4364768272999768, + "grad_norm": 1.3842198366935599, + "learning_rate": 4.0391797329254897e-07, + "loss": 0.5235386490821838, + "step": 6230 + }, + { + "epoch": 1.4367074014295595, + "grad_norm": 1.4929978689012695, + "learning_rate": 4.036118929937472e-07, + "loss": 0.3543087840080261, + "step": 6231 + }, + { + "epoch": 1.4369379755591423, + "grad_norm": 1.793735853632873, + "learning_rate": 4.03305899386001e-07, + "loss": 0.4718255400657654, + "step": 6232 + }, + { + "epoch": 1.437168549688725, + "grad_norm": 1.338180352532036, + "learning_rate": 4.0299999251378924e-07, + "loss": 0.41239792108535767, + "step": 6233 + }, + { + "epoch": 1.4373991238183077, + "grad_norm": 1.5900128771725797, + "learning_rate": 4.026941724215791e-07, + "loss": 0.4241238236427307, + "step": 6234 + }, + { + "epoch": 1.4376296979478902, + "grad_norm": 1.4625134538700348, + "learning_rate": 4.0238843915382435e-07, + "loss": 0.43678992986679077, + "step": 6235 + }, + { + "epoch": 1.437860272077473, + "grad_norm": 1.3845075397304552, + "learning_rate": 4.0208279275496706e-07, + "loss": 0.4304202198982239, + "step": 6236 + }, + { + "epoch": 1.4380908462070556, + "grad_norm": 1.4379971371115365, + "learning_rate": 4.0177723326943516e-07, + "loss": 0.4297143816947937, + "step": 6237 + }, + { + "epoch": 1.4383214203366381, + "grad_norm": 1.4713452003345164, + "learning_rate": 4.0147176074164557e-07, + "loss": 0.4823951721191406, + "step": 6238 + }, + { + "epoch": 1.4385519944662208, + "grad_norm": 1.4766475893290447, + "learning_rate": 4.0116637521600104e-07, + "loss": 0.41384291648864746, + "step": 6239 + }, + { + "epoch": 1.4387825685958036, + "grad_norm": 1.4772189735738515, + "learning_rate": 4.008610767368933e-07, + "loss": 0.5725995898246765, + "step": 6240 + }, + { + "epoch": 1.4390131427253863, + "grad_norm": 1.580155865045121, + "learning_rate": 4.0055586534869976e-07, + "loss": 0.5222553014755249, + "step": 6241 + }, + { + "epoch": 1.439243716854969, + "grad_norm": 1.3886146191032183, + "learning_rate": 4.002507410957864e-07, + "loss": 0.33871912956237793, + "step": 6242 + }, + { + "epoch": 1.4394742909845515, + "grad_norm": 1.6215524550661136, + "learning_rate": 3.9994570402250647e-07, + "loss": 0.423028826713562, + "step": 6243 + }, + { + "epoch": 1.4397048651141342, + "grad_norm": 1.5682836985778081, + "learning_rate": 3.996407541731994e-07, + "loss": 0.4235682785511017, + "step": 6244 + }, + { + "epoch": 1.439935439243717, + "grad_norm": 1.231022526448631, + "learning_rate": 3.993358915921936e-07, + "loss": 0.43758147954940796, + "step": 6245 + }, + { + "epoch": 1.4401660133732994, + "grad_norm": 1.4111669631590298, + "learning_rate": 3.9903111632380314e-07, + "loss": 0.4462485611438751, + "step": 6246 + }, + { + "epoch": 1.4403965875028821, + "grad_norm": 1.4290246546090093, + "learning_rate": 3.9872642841233086e-07, + "loss": 0.4650310277938843, + "step": 6247 + }, + { + "epoch": 1.4406271616324648, + "grad_norm": 1.4998946903017614, + "learning_rate": 3.984218279020656e-07, + "loss": 0.36653342843055725, + "step": 6248 + }, + { + "epoch": 1.4408577357620476, + "grad_norm": 1.4936296304301175, + "learning_rate": 3.9811731483728483e-07, + "loss": 0.4102433919906616, + "step": 6249 + }, + { + "epoch": 1.4410883098916303, + "grad_norm": 1.6065631349936378, + "learning_rate": 3.9781288926225187e-07, + "loss": 0.46611371636390686, + "step": 6250 + }, + { + "epoch": 1.4413188840212128, + "grad_norm": 1.4339333577964222, + "learning_rate": 3.9750855122121854e-07, + "loss": 0.39757978916168213, + "step": 6251 + }, + { + "epoch": 1.4415494581507955, + "grad_norm": 1.762654016187883, + "learning_rate": 3.972043007584236e-07, + "loss": 0.3736093044281006, + "step": 6252 + }, + { + "epoch": 1.4417800322803782, + "grad_norm": 1.463877920104907, + "learning_rate": 3.9690013791809243e-07, + "loss": 0.4907599091529846, + "step": 6253 + }, + { + "epoch": 1.4420106064099607, + "grad_norm": 1.8306810417206691, + "learning_rate": 3.965960627444387e-07, + "loss": 0.4852679967880249, + "step": 6254 + }, + { + "epoch": 1.4422411805395434, + "grad_norm": 1.379992571943406, + "learning_rate": 3.962920752816622e-07, + "loss": 0.3681846261024475, + "step": 6255 + }, + { + "epoch": 1.4424717546691261, + "grad_norm": 1.3930271555712797, + "learning_rate": 3.9598817557395136e-07, + "loss": 0.36029407382011414, + "step": 6256 + }, + { + "epoch": 1.4427023287987089, + "grad_norm": 1.5468752557100751, + "learning_rate": 3.9568436366548044e-07, + "loss": 0.4156547486782074, + "step": 6257 + }, + { + "epoch": 1.4429329029282916, + "grad_norm": 1.2893479866141693, + "learning_rate": 3.9538063960041155e-07, + "loss": 0.417999804019928, + "step": 6258 + }, + { + "epoch": 1.443163477057874, + "grad_norm": 1.5873772931626444, + "learning_rate": 3.9507700342289454e-07, + "loss": 0.34347790479660034, + "step": 6259 + }, + { + "epoch": 1.4433940511874568, + "grad_norm": 1.6747174695424258, + "learning_rate": 3.9477345517706606e-07, + "loss": 0.5093958973884583, + "step": 6260 + }, + { + "epoch": 1.4436246253170395, + "grad_norm": 1.3786087360846342, + "learning_rate": 3.9446999490704935e-07, + "loss": 0.45406264066696167, + "step": 6261 + }, + { + "epoch": 1.443855199446622, + "grad_norm": 1.4643807349818905, + "learning_rate": 3.941666226569561e-07, + "loss": 0.35074740648269653, + "step": 6262 + }, + { + "epoch": 1.4440857735762047, + "grad_norm": 1.9209061652207753, + "learning_rate": 3.9386333847088414e-07, + "loss": 0.4588093161582947, + "step": 6263 + }, + { + "epoch": 1.4443163477057874, + "grad_norm": 1.706957598822881, + "learning_rate": 3.935601423929187e-07, + "loss": 0.5431508421897888, + "step": 6264 + }, + { + "epoch": 1.4445469218353701, + "grad_norm": 2.1293944579193744, + "learning_rate": 3.9325703446713253e-07, + "loss": 0.5942284464836121, + "step": 6265 + }, + { + "epoch": 1.4447774959649529, + "grad_norm": 1.563688512589723, + "learning_rate": 3.929540147375856e-07, + "loss": 0.45533287525177, + "step": 6266 + }, + { + "epoch": 1.4450080700945354, + "grad_norm": 1.4069649860322977, + "learning_rate": 3.926510832483252e-07, + "loss": 0.41154634952545166, + "step": 6267 + }, + { + "epoch": 1.445238644224118, + "grad_norm": 1.7442081379649044, + "learning_rate": 3.923482400433847e-07, + "loss": 0.548882246017456, + "step": 6268 + }, + { + "epoch": 1.4454692183537008, + "grad_norm": 1.6064445647457797, + "learning_rate": 3.9204548516678635e-07, + "loss": 0.4062466621398926, + "step": 6269 + }, + { + "epoch": 1.4456997924832833, + "grad_norm": 1.4970160030578672, + "learning_rate": 3.917428186625378e-07, + "loss": 0.39035165309906006, + "step": 6270 + }, + { + "epoch": 1.445930366612866, + "grad_norm": 1.647666751716306, + "learning_rate": 3.9144024057463545e-07, + "loss": 0.44899889826774597, + "step": 6271 + }, + { + "epoch": 1.4461609407424487, + "grad_norm": 1.6865824844286113, + "learning_rate": 3.911377509470616e-07, + "loss": 0.5676968097686768, + "step": 6272 + }, + { + "epoch": 1.4463915148720314, + "grad_norm": 1.5001442753287921, + "learning_rate": 3.9083534982378596e-07, + "loss": 0.5157150626182556, + "step": 6273 + }, + { + "epoch": 1.4466220890016142, + "grad_norm": 1.3999116109701921, + "learning_rate": 3.9053303724876595e-07, + "loss": 0.4405839443206787, + "step": 6274 + }, + { + "epoch": 1.4468526631311966, + "grad_norm": 1.4027072316284976, + "learning_rate": 3.9023081326594564e-07, + "loss": 0.4184240400791168, + "step": 6275 + }, + { + "epoch": 1.4470832372607794, + "grad_norm": 1.4676581347164595, + "learning_rate": 3.8992867791925687e-07, + "loss": 0.46825113892555237, + "step": 6276 + }, + { + "epoch": 1.447313811390362, + "grad_norm": 1.5974669468558875, + "learning_rate": 3.896266312526174e-07, + "loss": 0.39870697259902954, + "step": 6277 + }, + { + "epoch": 1.4475443855199446, + "grad_norm": 1.5056097224989398, + "learning_rate": 3.893246733099332e-07, + "loss": 0.5021681785583496, + "step": 6278 + }, + { + "epoch": 1.4477749596495273, + "grad_norm": 1.6448123845050522, + "learning_rate": 3.890228041350966e-07, + "loss": 0.5453378558158875, + "step": 6279 + }, + { + "epoch": 1.44800553377911, + "grad_norm": 1.6411917622938994, + "learning_rate": 3.887210237719877e-07, + "loss": 0.4488704800605774, + "step": 6280 + }, + { + "epoch": 1.4482361079086927, + "grad_norm": 1.5018657352386517, + "learning_rate": 3.8841933226447274e-07, + "loss": 0.45669007301330566, + "step": 6281 + }, + { + "epoch": 1.4484666820382754, + "grad_norm": 1.704954137797073, + "learning_rate": 3.881177296564061e-07, + "loss": 0.43954944610595703, + "step": 6282 + }, + { + "epoch": 1.448697256167858, + "grad_norm": 1.3077525799414271, + "learning_rate": 3.8781621599162896e-07, + "loss": 0.39490729570388794, + "step": 6283 + }, + { + "epoch": 1.4489278302974407, + "grad_norm": 1.8875404119821422, + "learning_rate": 3.875147913139688e-07, + "loss": 0.44206392765045166, + "step": 6284 + }, + { + "epoch": 1.4491584044270232, + "grad_norm": 1.5003627073617865, + "learning_rate": 3.872134556672415e-07, + "loss": 0.3874932527542114, + "step": 6285 + }, + { + "epoch": 1.4493889785566059, + "grad_norm": 1.616983828039009, + "learning_rate": 3.8691220909524847e-07, + "loss": 0.4762042760848999, + "step": 6286 + }, + { + "epoch": 1.4496195526861886, + "grad_norm": 1.4983771405139852, + "learning_rate": 3.8661105164177955e-07, + "loss": 0.45220378041267395, + "step": 6287 + }, + { + "epoch": 1.4498501268157713, + "grad_norm": 1.5182044259213916, + "learning_rate": 3.863099833506105e-07, + "loss": 0.48711973428726196, + "step": 6288 + }, + { + "epoch": 1.450080700945354, + "grad_norm": 1.795485740865634, + "learning_rate": 3.8600900426550495e-07, + "loss": 0.3985457420349121, + "step": 6289 + }, + { + "epoch": 1.4503112750749365, + "grad_norm": 1.8111920220274738, + "learning_rate": 3.8570811443021324e-07, + "loss": 0.4626576006412506, + "step": 6290 + }, + { + "epoch": 1.4505418492045192, + "grad_norm": 1.3056530217454654, + "learning_rate": 3.8540731388847303e-07, + "loss": 0.49909156560897827, + "step": 6291 + }, + { + "epoch": 1.450772423334102, + "grad_norm": 1.6088418800938844, + "learning_rate": 3.8510660268400853e-07, + "loss": 0.47779160737991333, + "step": 6292 + }, + { + "epoch": 1.4510029974636844, + "grad_norm": 1.7546373602134575, + "learning_rate": 3.8480598086053073e-07, + "loss": 0.41273951530456543, + "step": 6293 + }, + { + "epoch": 1.4512335715932672, + "grad_norm": 1.372334717947673, + "learning_rate": 3.8450544846173873e-07, + "loss": 0.49659836292266846, + "step": 6294 + }, + { + "epoch": 1.4514641457228499, + "grad_norm": 1.5745738888755318, + "learning_rate": 3.842050055313174e-07, + "loss": 0.48864418268203735, + "step": 6295 + }, + { + "epoch": 1.4516947198524326, + "grad_norm": 1.5511685453466029, + "learning_rate": 3.8390465211293964e-07, + "loss": 0.4437263011932373, + "step": 6296 + }, + { + "epoch": 1.4519252939820153, + "grad_norm": 1.425822828962689, + "learning_rate": 3.83604388250264e-07, + "loss": 0.4785847067832947, + "step": 6297 + }, + { + "epoch": 1.4521558681115978, + "grad_norm": 1.4667204310824673, + "learning_rate": 3.8330421398693815e-07, + "loss": 0.4376726746559143, + "step": 6298 + }, + { + "epoch": 1.4523864422411805, + "grad_norm": 1.3570227959381094, + "learning_rate": 3.8300412936659456e-07, + "loss": 0.39121049642562866, + "step": 6299 + }, + { + "epoch": 1.4526170163707632, + "grad_norm": 1.3658035995507571, + "learning_rate": 3.827041344328541e-07, + "loss": 0.4635738730430603, + "step": 6300 + }, + { + "epoch": 1.4528475905003457, + "grad_norm": 2.0304852722065068, + "learning_rate": 3.8240422922932345e-07, + "loss": 0.502306342124939, + "step": 6301 + }, + { + "epoch": 1.4530781646299284, + "grad_norm": 1.4029845821737765, + "learning_rate": 3.8210441379959765e-07, + "loss": 0.4401247799396515, + "step": 6302 + }, + { + "epoch": 1.4533087387595112, + "grad_norm": 1.3861824238158087, + "learning_rate": 3.8180468818725744e-07, + "loss": 0.5291532874107361, + "step": 6303 + }, + { + "epoch": 1.4535393128890939, + "grad_norm": 1.6276608547131342, + "learning_rate": 3.8150505243587074e-07, + "loss": 0.44658181071281433, + "step": 6304 + }, + { + "epoch": 1.4537698870186766, + "grad_norm": 1.6458326531407963, + "learning_rate": 3.8120550658899284e-07, + "loss": 0.45127803087234497, + "step": 6305 + }, + { + "epoch": 1.454000461148259, + "grad_norm": 1.492007208083286, + "learning_rate": 3.809060506901659e-07, + "loss": 0.42187097668647766, + "step": 6306 + }, + { + "epoch": 1.4542310352778418, + "grad_norm": 1.5038936507089915, + "learning_rate": 3.806066847829191e-07, + "loss": 0.3573130667209625, + "step": 6307 + }, + { + "epoch": 1.4544616094074245, + "grad_norm": 1.9148379623538745, + "learning_rate": 3.8030740891076775e-07, + "loss": 0.4350733757019043, + "step": 6308 + }, + { + "epoch": 1.454692183537007, + "grad_norm": 1.541900067739278, + "learning_rate": 3.8000822311721526e-07, + "loss": 0.48514148592948914, + "step": 6309 + }, + { + "epoch": 1.4549227576665897, + "grad_norm": 1.4827947959124368, + "learning_rate": 3.797091274457507e-07, + "loss": 0.41036373376846313, + "step": 6310 + }, + { + "epoch": 1.4551533317961725, + "grad_norm": 1.494922453363639, + "learning_rate": 3.7941012193985113e-07, + "loss": 0.4141424298286438, + "step": 6311 + }, + { + "epoch": 1.4553839059257552, + "grad_norm": 1.273366480801725, + "learning_rate": 3.7911120664297947e-07, + "loss": 0.4465962052345276, + "step": 6312 + }, + { + "epoch": 1.455614480055338, + "grad_norm": 1.5781844793110138, + "learning_rate": 3.7881238159858653e-07, + "loss": 0.42370718717575073, + "step": 6313 + }, + { + "epoch": 1.4558450541849204, + "grad_norm": 1.5971127849956464, + "learning_rate": 3.785136468501098e-07, + "loss": 0.5199419260025024, + "step": 6314 + }, + { + "epoch": 1.456075628314503, + "grad_norm": 1.617344004292436, + "learning_rate": 3.782150024409727e-07, + "loss": 0.4802842140197754, + "step": 6315 + }, + { + "epoch": 1.4563062024440858, + "grad_norm": 1.24431475405318, + "learning_rate": 3.77916448414587e-07, + "loss": 0.4640405476093292, + "step": 6316 + }, + { + "epoch": 1.4565367765736683, + "grad_norm": 1.4636172678889559, + "learning_rate": 3.776179848143497e-07, + "loss": 0.4338728189468384, + "step": 6317 + }, + { + "epoch": 1.456767350703251, + "grad_norm": 2.139264242241595, + "learning_rate": 3.7731961168364644e-07, + "loss": 0.42709267139434814, + "step": 6318 + }, + { + "epoch": 1.4569979248328337, + "grad_norm": 1.6617712318798017, + "learning_rate": 3.7702132906584784e-07, + "loss": 0.4985729455947876, + "step": 6319 + }, + { + "epoch": 1.4572284989624165, + "grad_norm": 1.441274937368423, + "learning_rate": 3.7672313700431277e-07, + "loss": 0.46335911750793457, + "step": 6320 + }, + { + "epoch": 1.4574590730919992, + "grad_norm": 1.416712646344965, + "learning_rate": 3.7642503554238657e-07, + "loss": 0.39897364377975464, + "step": 6321 + }, + { + "epoch": 1.4576896472215817, + "grad_norm": 1.7524170106258121, + "learning_rate": 3.761270247234014e-07, + "loss": 0.4338347017765045, + "step": 6322 + }, + { + "epoch": 1.4579202213511644, + "grad_norm": 1.5421394568485456, + "learning_rate": 3.7582910459067607e-07, + "loss": 0.4619752764701843, + "step": 6323 + }, + { + "epoch": 1.458150795480747, + "grad_norm": 1.6592584693059589, + "learning_rate": 3.7553127518751583e-07, + "loss": 0.4676104784011841, + "step": 6324 + }, + { + "epoch": 1.4583813696103296, + "grad_norm": 1.495504668484879, + "learning_rate": 3.752335365572138e-07, + "loss": 0.37536361813545227, + "step": 6325 + }, + { + "epoch": 1.4586119437399123, + "grad_norm": 1.5747560176376743, + "learning_rate": 3.749358887430487e-07, + "loss": 0.4389209449291229, + "step": 6326 + }, + { + "epoch": 1.458842517869495, + "grad_norm": 1.561809426616513, + "learning_rate": 3.746383317882874e-07, + "loss": 0.44722115993499756, + "step": 6327 + }, + { + "epoch": 1.4590730919990778, + "grad_norm": 1.8177515516918266, + "learning_rate": 3.743408657361821e-07, + "loss": 0.39179277420043945, + "step": 6328 + }, + { + "epoch": 1.4593036661286605, + "grad_norm": 1.5511886302037754, + "learning_rate": 3.7404349062997275e-07, + "loss": 0.4704967737197876, + "step": 6329 + }, + { + "epoch": 1.459534240258243, + "grad_norm": 1.4679557991806869, + "learning_rate": 3.737462065128859e-07, + "loss": 0.4294360876083374, + "step": 6330 + }, + { + "epoch": 1.4597648143878257, + "grad_norm": 1.5082268745032619, + "learning_rate": 3.734490134281353e-07, + "loss": 0.5070170760154724, + "step": 6331 + }, + { + "epoch": 1.4599953885174084, + "grad_norm": 1.4285887900302483, + "learning_rate": 3.7315191141892013e-07, + "loss": 0.3670409023761749, + "step": 6332 + }, + { + "epoch": 1.460225962646991, + "grad_norm": 1.4866250279072872, + "learning_rate": 3.7285490052842785e-07, + "loss": 0.5043025016784668, + "step": 6333 + }, + { + "epoch": 1.4604565367765736, + "grad_norm": 1.5557807366245089, + "learning_rate": 3.725579807998316e-07, + "loss": 0.43942689895629883, + "step": 6334 + }, + { + "epoch": 1.4606871109061563, + "grad_norm": 1.61242194971354, + "learning_rate": 3.7226115227629164e-07, + "loss": 0.3444882035255432, + "step": 6335 + }, + { + "epoch": 1.460917685035739, + "grad_norm": 1.4093154726677697, + "learning_rate": 3.71964415000955e-07, + "loss": 0.3994483947753906, + "step": 6336 + }, + { + "epoch": 1.4611482591653218, + "grad_norm": 1.799524270186483, + "learning_rate": 3.7166776901695564e-07, + "loss": 0.3581928014755249, + "step": 6337 + }, + { + "epoch": 1.4613788332949043, + "grad_norm": 1.4094806965107296, + "learning_rate": 3.7137121436741423e-07, + "loss": 0.4068276286125183, + "step": 6338 + }, + { + "epoch": 1.461609407424487, + "grad_norm": 1.5430920931361498, + "learning_rate": 3.710747510954376e-07, + "loss": 0.4140080213546753, + "step": 6339 + }, + { + "epoch": 1.4618399815540697, + "grad_norm": 1.5667918006300834, + "learning_rate": 3.707783792441201e-07, + "loss": 0.4328460097312927, + "step": 6340 + }, + { + "epoch": 1.4620705556836522, + "grad_norm": 1.7344820768552758, + "learning_rate": 3.704820988565419e-07, + "loss": 0.49252209067344666, + "step": 6341 + }, + { + "epoch": 1.462301129813235, + "grad_norm": 1.4564646974830249, + "learning_rate": 3.7018590997577093e-07, + "loss": 0.43051671981811523, + "step": 6342 + }, + { + "epoch": 1.4625317039428176, + "grad_norm": 1.5901870751351228, + "learning_rate": 3.698898126448605e-07, + "loss": 0.5131059288978577, + "step": 6343 + }, + { + "epoch": 1.4627622780724003, + "grad_norm": 2.025312431684147, + "learning_rate": 3.6959380690685185e-07, + "loss": 0.4633597731590271, + "step": 6344 + }, + { + "epoch": 1.462992852201983, + "grad_norm": 1.5138095102076332, + "learning_rate": 3.6929789280477265e-07, + "loss": 0.3603428602218628, + "step": 6345 + }, + { + "epoch": 1.4632234263315655, + "grad_norm": 1.4981993836978438, + "learning_rate": 3.6900207038163633e-07, + "loss": 0.5337490439414978, + "step": 6346 + }, + { + "epoch": 1.4634540004611483, + "grad_norm": 1.8305905685338713, + "learning_rate": 3.687063396804444e-07, + "loss": 0.4940665066242218, + "step": 6347 + }, + { + "epoch": 1.463684574590731, + "grad_norm": 2.012256207996667, + "learning_rate": 3.6841070074418367e-07, + "loss": 0.45664387941360474, + "step": 6348 + }, + { + "epoch": 1.4639151487203135, + "grad_norm": 1.6965611532451377, + "learning_rate": 3.681151536158289e-07, + "loss": 0.4546254277229309, + "step": 6349 + }, + { + "epoch": 1.4641457228498962, + "grad_norm": 1.4760234786987596, + "learning_rate": 3.6781969833834015e-07, + "loss": 0.37474149465560913, + "step": 6350 + }, + { + "epoch": 1.464376296979479, + "grad_norm": 1.473821341410815, + "learning_rate": 3.675243349546655e-07, + "loss": 0.38016337156295776, + "step": 6351 + }, + { + "epoch": 1.4646068711090616, + "grad_norm": 1.3725937182091388, + "learning_rate": 3.672290635077384e-07, + "loss": 0.46079233288764954, + "step": 6352 + }, + { + "epoch": 1.4648374452386443, + "grad_norm": 1.754716547965532, + "learning_rate": 3.669338840404799e-07, + "loss": 0.39382117986679077, + "step": 6353 + }, + { + "epoch": 1.4650680193682268, + "grad_norm": 1.5018040161914972, + "learning_rate": 3.6663879659579766e-07, + "loss": 0.4502074718475342, + "step": 6354 + }, + { + "epoch": 1.4652985934978096, + "grad_norm": 1.4446726503170868, + "learning_rate": 3.663438012165848e-07, + "loss": 0.38199833035469055, + "step": 6355 + }, + { + "epoch": 1.4655291676273923, + "grad_norm": 1.4760781012903512, + "learning_rate": 3.660488979457228e-07, + "loss": 0.4340086579322815, + "step": 6356 + }, + { + "epoch": 1.4657597417569748, + "grad_norm": 1.7005769563076596, + "learning_rate": 3.65754086826078e-07, + "loss": 0.5425105094909668, + "step": 6357 + }, + { + "epoch": 1.4659903158865575, + "grad_norm": 1.4480393161895644, + "learning_rate": 3.654593679005048e-07, + "loss": 0.4671604633331299, + "step": 6358 + }, + { + "epoch": 1.4662208900161402, + "grad_norm": 1.6404775976624013, + "learning_rate": 3.6516474121184317e-07, + "loss": 0.4608290195465088, + "step": 6359 + }, + { + "epoch": 1.466451464145723, + "grad_norm": 1.9415349791307541, + "learning_rate": 3.6487020680292023e-07, + "loss": 0.5272650122642517, + "step": 6360 + }, + { + "epoch": 1.4666820382753056, + "grad_norm": 1.4115666654764834, + "learning_rate": 3.645757647165495e-07, + "loss": 0.40990152955055237, + "step": 6361 + }, + { + "epoch": 1.4669126124048881, + "grad_norm": 1.405277693008717, + "learning_rate": 3.6428141499553166e-07, + "loss": 0.4723639488220215, + "step": 6362 + }, + { + "epoch": 1.4671431865344708, + "grad_norm": 1.7789473556982454, + "learning_rate": 3.639871576826529e-07, + "loss": 0.5115963220596313, + "step": 6363 + }, + { + "epoch": 1.4673737606640536, + "grad_norm": 1.669989973617769, + "learning_rate": 3.636929928206862e-07, + "loss": 0.44548431038856506, + "step": 6364 + }, + { + "epoch": 1.467604334793636, + "grad_norm": 1.5904330694852653, + "learning_rate": 3.633989204523922e-07, + "loss": 0.48599356412887573, + "step": 6365 + }, + { + "epoch": 1.4678349089232188, + "grad_norm": 1.4664661517676485, + "learning_rate": 3.631049406205164e-07, + "loss": 0.463236004114151, + "step": 6366 + }, + { + "epoch": 1.4680654830528015, + "grad_norm": 1.7238002544119735, + "learning_rate": 3.6281105336779225e-07, + "loss": 0.4840255379676819, + "step": 6367 + }, + { + "epoch": 1.4682960571823842, + "grad_norm": 1.5727046676978498, + "learning_rate": 3.6251725873693926e-07, + "loss": 0.39191675186157227, + "step": 6368 + }, + { + "epoch": 1.468526631311967, + "grad_norm": 1.4333992251496341, + "learning_rate": 3.622235567706637e-07, + "loss": 0.5161769986152649, + "step": 6369 + }, + { + "epoch": 1.4687572054415494, + "grad_norm": 1.811820117175508, + "learning_rate": 3.6192994751165764e-07, + "loss": 0.4579160213470459, + "step": 6370 + }, + { + "epoch": 1.4689877795711321, + "grad_norm": 1.5348364339019953, + "learning_rate": 3.616364310026006e-07, + "loss": 0.4254727363586426, + "step": 6371 + }, + { + "epoch": 1.4692183537007149, + "grad_norm": 1.60846510703603, + "learning_rate": 3.613430072861575e-07, + "loss": 0.3614911139011383, + "step": 6372 + }, + { + "epoch": 1.4694489278302973, + "grad_norm": 1.332197813540827, + "learning_rate": 3.610496764049814e-07, + "loss": 0.4501386284828186, + "step": 6373 + }, + { + "epoch": 1.46967950195988, + "grad_norm": 1.4207205401720155, + "learning_rate": 3.607564384017102e-07, + "loss": 0.4988802671432495, + "step": 6374 + }, + { + "epoch": 1.4699100760894628, + "grad_norm": 1.5751788296655767, + "learning_rate": 3.6046329331896907e-07, + "loss": 0.4277713894844055, + "step": 6375 + }, + { + "epoch": 1.4701406502190455, + "grad_norm": 1.5414838298104503, + "learning_rate": 3.601702411993697e-07, + "loss": 0.5007919073104858, + "step": 6376 + }, + { + "epoch": 1.4703712243486282, + "grad_norm": 1.5705777345927519, + "learning_rate": 3.5987728208551015e-07, + "loss": 0.4857282042503357, + "step": 6377 + }, + { + "epoch": 1.4706017984782107, + "grad_norm": 1.3913774043642957, + "learning_rate": 3.595844160199756e-07, + "loss": 0.45752188563346863, + "step": 6378 + }, + { + "epoch": 1.4708323726077934, + "grad_norm": 1.3374827793978188, + "learning_rate": 3.592916430453361e-07, + "loss": 0.4364059269428253, + "step": 6379 + }, + { + "epoch": 1.4710629467373761, + "grad_norm": 1.4896729369612345, + "learning_rate": 3.589989632041501e-07, + "loss": 0.48765695095062256, + "step": 6380 + }, + { + "epoch": 1.4712935208669586, + "grad_norm": 1.8321401665511103, + "learning_rate": 3.5870637653896087e-07, + "loss": 0.5505347847938538, + "step": 6381 + }, + { + "epoch": 1.4715240949965414, + "grad_norm": 1.5940287914496154, + "learning_rate": 3.584138830922994e-07, + "loss": 0.4468069076538086, + "step": 6382 + }, + { + "epoch": 1.471754669126124, + "grad_norm": 1.2639532856264213, + "learning_rate": 3.5812148290668186e-07, + "loss": 0.4050968289375305, + "step": 6383 + }, + { + "epoch": 1.4719852432557068, + "grad_norm": 1.6709771008348266, + "learning_rate": 3.578291760246122e-07, + "loss": 0.47324883937835693, + "step": 6384 + }, + { + "epoch": 1.4722158173852895, + "grad_norm": 1.646291535207369, + "learning_rate": 3.5753696248858025e-07, + "loss": 0.4431450366973877, + "step": 6385 + }, + { + "epoch": 1.472446391514872, + "grad_norm": 1.3398593447687968, + "learning_rate": 3.5724484234106166e-07, + "loss": 0.4599822163581848, + "step": 6386 + }, + { + "epoch": 1.4726769656444547, + "grad_norm": 1.6764694987177748, + "learning_rate": 3.5695281562451964e-07, + "loss": 0.3655046224594116, + "step": 6387 + }, + { + "epoch": 1.4729075397740374, + "grad_norm": 1.925765064850511, + "learning_rate": 3.5666088238140267e-07, + "loss": 0.4543811082839966, + "step": 6388 + }, + { + "epoch": 1.47313811390362, + "grad_norm": 1.7682119668466059, + "learning_rate": 3.563690426541469e-07, + "loss": 0.45380568504333496, + "step": 6389 + }, + { + "epoch": 1.4733686880332026, + "grad_norm": 1.3928278789748259, + "learning_rate": 3.5607729648517336e-07, + "loss": 0.3640294373035431, + "step": 6390 + }, + { + "epoch": 1.4735992621627854, + "grad_norm": 1.4826659174775283, + "learning_rate": 3.557856439168907e-07, + "loss": 0.39890235662460327, + "step": 6391 + }, + { + "epoch": 1.473829836292368, + "grad_norm": 1.7657939773449876, + "learning_rate": 3.5549408499169374e-07, + "loss": 0.47551727294921875, + "step": 6392 + }, + { + "epoch": 1.4740604104219508, + "grad_norm": 1.5946717850777934, + "learning_rate": 3.5520261975196364e-07, + "loss": 0.43851834535598755, + "step": 6393 + }, + { + "epoch": 1.4742909845515333, + "grad_norm": 1.7160257871535318, + "learning_rate": 3.549112482400676e-07, + "loss": 0.45289307832717896, + "step": 6394 + }, + { + "epoch": 1.474521558681116, + "grad_norm": 1.660677297447299, + "learning_rate": 3.546199704983591e-07, + "loss": 0.5229180455207825, + "step": 6395 + }, + { + "epoch": 1.4747521328106985, + "grad_norm": 1.5089259577077747, + "learning_rate": 3.5432878656917884e-07, + "loss": 0.47332310676574707, + "step": 6396 + }, + { + "epoch": 1.4749827069402812, + "grad_norm": 1.402371205517633, + "learning_rate": 3.540376964948529e-07, + "loss": 0.4079092741012573, + "step": 6397 + }, + { + "epoch": 1.475213281069864, + "grad_norm": 1.607654850710184, + "learning_rate": 3.5374670031769484e-07, + "loss": 0.43366020917892456, + "step": 6398 + }, + { + "epoch": 1.4754438551994467, + "grad_norm": 1.6067458113996615, + "learning_rate": 3.5345579808000294e-07, + "loss": 0.45040106773376465, + "step": 6399 + }, + { + "epoch": 1.4756744293290294, + "grad_norm": 1.584960802510298, + "learning_rate": 3.531649898240634e-07, + "loss": 0.4409756064414978, + "step": 6400 + }, + { + "epoch": 1.4759050034586119, + "grad_norm": 1.5204759785794038, + "learning_rate": 3.528742755921481e-07, + "loss": 0.4141521751880646, + "step": 6401 + }, + { + "epoch": 1.4761355775881946, + "grad_norm": 1.6363482264143396, + "learning_rate": 3.525836554265156e-07, + "loss": 0.4697296619415283, + "step": 6402 + }, + { + "epoch": 1.4763661517177773, + "grad_norm": 1.3771953803345143, + "learning_rate": 3.5229312936941013e-07, + "loss": 0.4369434714317322, + "step": 6403 + }, + { + "epoch": 1.4765967258473598, + "grad_norm": 1.3415133870830294, + "learning_rate": 3.5200269746306224e-07, + "loss": 0.4197359085083008, + "step": 6404 + }, + { + "epoch": 1.4768272999769425, + "grad_norm": 1.8249279231813902, + "learning_rate": 3.5171235974968996e-07, + "loss": 0.495933473110199, + "step": 6405 + }, + { + "epoch": 1.4770578741065252, + "grad_norm": 1.3638396377453934, + "learning_rate": 3.51422116271496e-07, + "loss": 0.4177231192588806, + "step": 6406 + }, + { + "epoch": 1.477288448236108, + "grad_norm": 1.5336568107147823, + "learning_rate": 3.511319670706705e-07, + "loss": 0.5366500020027161, + "step": 6407 + }, + { + "epoch": 1.4775190223656907, + "grad_norm": 1.5479295323166011, + "learning_rate": 3.508419121893897e-07, + "loss": 0.3900446891784668, + "step": 6408 + }, + { + "epoch": 1.4777495964952732, + "grad_norm": 1.8223854522009124, + "learning_rate": 3.5055195166981646e-07, + "loss": 0.40877431631088257, + "step": 6409 + }, + { + "epoch": 1.4779801706248559, + "grad_norm": 1.3594177124317366, + "learning_rate": 3.502620855540985e-07, + "loss": 0.4381163716316223, + "step": 6410 + }, + { + "epoch": 1.4782107447544386, + "grad_norm": 1.2256800281998605, + "learning_rate": 3.4997231388437167e-07, + "loss": 0.3449817895889282, + "step": 6411 + }, + { + "epoch": 1.478441318884021, + "grad_norm": 1.4879818959728963, + "learning_rate": 3.4968263670275653e-07, + "loss": 0.4879523515701294, + "step": 6412 + }, + { + "epoch": 1.4786718930136038, + "grad_norm": 1.5651020351069762, + "learning_rate": 3.493930540513613e-07, + "loss": 0.3781365156173706, + "step": 6413 + }, + { + "epoch": 1.4789024671431865, + "grad_norm": 1.6645622352676888, + "learning_rate": 3.49103565972279e-07, + "loss": 0.4505656361579895, + "step": 6414 + }, + { + "epoch": 1.4791330412727692, + "grad_norm": 1.4565716791756764, + "learning_rate": 3.4881417250759006e-07, + "loss": 0.4285612106323242, + "step": 6415 + }, + { + "epoch": 1.479363615402352, + "grad_norm": 1.5357416036601346, + "learning_rate": 3.48524873699361e-07, + "loss": 0.5285177826881409, + "step": 6416 + }, + { + "epoch": 1.4795941895319344, + "grad_norm": 1.6484784065232339, + "learning_rate": 3.482356695896437e-07, + "loss": 0.4504782259464264, + "step": 6417 + }, + { + "epoch": 1.4798247636615172, + "grad_norm": 1.5658620514352724, + "learning_rate": 3.4794656022047765e-07, + "loss": 0.45295125246047974, + "step": 6418 + }, + { + "epoch": 1.4800553377910999, + "grad_norm": 1.3627022105594853, + "learning_rate": 3.47657545633887e-07, + "loss": 0.35889285802841187, + "step": 6419 + }, + { + "epoch": 1.4802859119206824, + "grad_norm": 1.5560865897069756, + "learning_rate": 3.4736862587188384e-07, + "loss": 0.49129703640937805, + "step": 6420 + }, + { + "epoch": 1.480516486050265, + "grad_norm": 1.6626930717329957, + "learning_rate": 3.4707980097646474e-07, + "loss": 0.5018036365509033, + "step": 6421 + }, + { + "epoch": 1.4807470601798478, + "grad_norm": 1.6557207215915222, + "learning_rate": 3.46791070989614e-07, + "loss": 0.48743095993995667, + "step": 6422 + }, + { + "epoch": 1.4809776343094305, + "grad_norm": 1.5043027194300391, + "learning_rate": 3.46502435953301e-07, + "loss": 0.4876127243041992, + "step": 6423 + }, + { + "epoch": 1.4812082084390132, + "grad_norm": 1.971149486413709, + "learning_rate": 3.462138959094818e-07, + "loss": 0.517420768737793, + "step": 6424 + }, + { + "epoch": 1.4814387825685957, + "grad_norm": 1.8274785313456325, + "learning_rate": 3.4592545090009907e-07, + "loss": 0.49587076902389526, + "step": 6425 + }, + { + "epoch": 1.4816693566981785, + "grad_norm": 1.5362037346917286, + "learning_rate": 3.4563710096708063e-07, + "loss": 0.43007123470306396, + "step": 6426 + }, + { + "epoch": 1.4818999308277612, + "grad_norm": 1.358212427456112, + "learning_rate": 3.4534884615234163e-07, + "loss": 0.41231095790863037, + "step": 6427 + }, + { + "epoch": 1.4821305049573437, + "grad_norm": 1.6451517308598724, + "learning_rate": 3.450606864977822e-07, + "loss": 0.4454977512359619, + "step": 6428 + }, + { + "epoch": 1.4823610790869264, + "grad_norm": 1.3739971676037328, + "learning_rate": 3.447726220452899e-07, + "loss": 0.4432292878627777, + "step": 6429 + }, + { + "epoch": 1.482591653216509, + "grad_norm": 1.6222705799101154, + "learning_rate": 3.444846528367372e-07, + "loss": 0.47547852993011475, + "step": 6430 + }, + { + "epoch": 1.4828222273460918, + "grad_norm": 1.522255385470065, + "learning_rate": 3.441967789139837e-07, + "loss": 0.45712774991989136, + "step": 6431 + }, + { + "epoch": 1.4830528014756745, + "grad_norm": 2.2700209255759107, + "learning_rate": 3.439090003188748e-07, + "loss": 0.4485551714897156, + "step": 6432 + }, + { + "epoch": 1.483283375605257, + "grad_norm": 1.4019614855782472, + "learning_rate": 3.4362131709324225e-07, + "loss": 0.5157139301300049, + "step": 6433 + }, + { + "epoch": 1.4835139497348397, + "grad_norm": 1.6970431173839349, + "learning_rate": 3.4333372927890346e-07, + "loss": 0.3786337375640869, + "step": 6434 + }, + { + "epoch": 1.4837445238644225, + "grad_norm": 1.430215191007922, + "learning_rate": 3.430462369176619e-07, + "loss": 0.444644033908844, + "step": 6435 + }, + { + "epoch": 1.483975097994005, + "grad_norm": 1.5213084700296855, + "learning_rate": 3.427588400513082e-07, + "loss": 0.450777530670166, + "step": 6436 + }, + { + "epoch": 1.4842056721235877, + "grad_norm": 1.6553650689166306, + "learning_rate": 3.424715387216176e-07, + "loss": 0.4547499418258667, + "step": 6437 + }, + { + "epoch": 1.4844362462531704, + "grad_norm": 1.3603667716838959, + "learning_rate": 3.4218433297035274e-07, + "loss": 0.41394394636154175, + "step": 6438 + }, + { + "epoch": 1.484666820382753, + "grad_norm": 1.3921623882761025, + "learning_rate": 3.4189722283926194e-07, + "loss": 0.46392822265625, + "step": 6439 + }, + { + "epoch": 1.4848973945123358, + "grad_norm": 1.3499969732544597, + "learning_rate": 3.416102083700797e-07, + "loss": 0.443311870098114, + "step": 6440 + }, + { + "epoch": 1.4851279686419183, + "grad_norm": 1.3830140570978715, + "learning_rate": 3.4132328960452594e-07, + "loss": 0.49744826555252075, + "step": 6441 + }, + { + "epoch": 1.485358542771501, + "grad_norm": 1.5191431970911358, + "learning_rate": 3.4103646658430787e-07, + "loss": 0.3906005620956421, + "step": 6442 + }, + { + "epoch": 1.4855891169010838, + "grad_norm": 1.3526583076340324, + "learning_rate": 3.407497393511175e-07, + "loss": 0.4236280918121338, + "step": 6443 + }, + { + "epoch": 1.4858196910306662, + "grad_norm": 1.6787824686307624, + "learning_rate": 3.4046310794663403e-07, + "loss": 0.5457645654678345, + "step": 6444 + }, + { + "epoch": 1.486050265160249, + "grad_norm": 1.7325001007084588, + "learning_rate": 3.4017657241252217e-07, + "loss": 0.541573703289032, + "step": 6445 + }, + { + "epoch": 1.4862808392898317, + "grad_norm": 1.9081537369674455, + "learning_rate": 3.398901327904322e-07, + "loss": 0.496945858001709, + "step": 6446 + }, + { + "epoch": 1.4865114134194144, + "grad_norm": 1.5413856714091914, + "learning_rate": 3.3960378912200136e-07, + "loss": 0.46119701862335205, + "step": 6447 + }, + { + "epoch": 1.4867419875489971, + "grad_norm": 1.8976464043536114, + "learning_rate": 3.3931754144885284e-07, + "loss": 0.5169441103935242, + "step": 6448 + }, + { + "epoch": 1.4869725616785796, + "grad_norm": 1.7130869588848308, + "learning_rate": 3.390313898125957e-07, + "loss": 0.525173544883728, + "step": 6449 + }, + { + "epoch": 1.4872031358081623, + "grad_norm": 1.6684348208587065, + "learning_rate": 3.3874533425482457e-07, + "loss": 0.46877139806747437, + "step": 6450 + }, + { + "epoch": 1.487433709937745, + "grad_norm": 1.6810644095850389, + "learning_rate": 3.3845937481712096e-07, + "loss": 0.49436479806900024, + "step": 6451 + }, + { + "epoch": 1.4876642840673275, + "grad_norm": 1.2950679928032611, + "learning_rate": 3.3817351154105145e-07, + "loss": 0.40879231691360474, + "step": 6452 + }, + { + "epoch": 1.4878948581969103, + "grad_norm": 1.5253823933458253, + "learning_rate": 3.378877444681697e-07, + "loss": 0.5060825347900391, + "step": 6453 + }, + { + "epoch": 1.488125432326493, + "grad_norm": 1.4561081118713566, + "learning_rate": 3.3760207364001434e-07, + "loss": 0.4875546097755432, + "step": 6454 + }, + { + "epoch": 1.4883560064560757, + "grad_norm": 1.5036556031092911, + "learning_rate": 3.373164990981108e-07, + "loss": 0.3791916072368622, + "step": 6455 + }, + { + "epoch": 1.4885865805856584, + "grad_norm": 1.4585716739422292, + "learning_rate": 3.370310208839704e-07, + "loss": 0.46757322549819946, + "step": 6456 + }, + { + "epoch": 1.488817154715241, + "grad_norm": 1.4061567541704671, + "learning_rate": 3.3674563903908994e-07, + "loss": 0.4334050416946411, + "step": 6457 + }, + { + "epoch": 1.4890477288448236, + "grad_norm": 1.4217577265821555, + "learning_rate": 3.3646035360495294e-07, + "loss": 0.4408720135688782, + "step": 6458 + }, + { + "epoch": 1.4892783029744063, + "grad_norm": 1.637938092148249, + "learning_rate": 3.3617516462302795e-07, + "loss": 0.46556228399276733, + "step": 6459 + }, + { + "epoch": 1.4895088771039888, + "grad_norm": 1.3694379850190115, + "learning_rate": 3.3589007213477096e-07, + "loss": 0.5212184190750122, + "step": 6460 + }, + { + "epoch": 1.4897394512335715, + "grad_norm": 1.6425370019041445, + "learning_rate": 3.35605076181622e-07, + "loss": 0.5340084433555603, + "step": 6461 + }, + { + "epoch": 1.4899700253631543, + "grad_norm": 1.4674031830711234, + "learning_rate": 3.353201768050088e-07, + "loss": 0.38049495220184326, + "step": 6462 + }, + { + "epoch": 1.490200599492737, + "grad_norm": 1.5849611777401629, + "learning_rate": 3.350353740463442e-07, + "loss": 0.5480734705924988, + "step": 6463 + }, + { + "epoch": 1.4904311736223197, + "grad_norm": 1.4050939080217109, + "learning_rate": 3.3475066794702756e-07, + "loss": 0.4179231524467468, + "step": 6464 + }, + { + "epoch": 1.4906617477519022, + "grad_norm": 1.8331951463468434, + "learning_rate": 3.3446605854844335e-07, + "loss": 0.5380987524986267, + "step": 6465 + }, + { + "epoch": 1.490892321881485, + "grad_norm": 1.4221970681414315, + "learning_rate": 3.3418154589196226e-07, + "loss": 0.41146454215049744, + "step": 6466 + }, + { + "epoch": 1.4911228960110676, + "grad_norm": 1.5814296524447065, + "learning_rate": 3.3389713001894157e-07, + "loss": 0.4586387276649475, + "step": 6467 + }, + { + "epoch": 1.4913534701406501, + "grad_norm": 1.1757977126470995, + "learning_rate": 3.336128109707236e-07, + "loss": 0.4023931920528412, + "step": 6468 + }, + { + "epoch": 1.4915840442702328, + "grad_norm": 1.6673237012516164, + "learning_rate": 3.333285887886373e-07, + "loss": 0.5373448133468628, + "step": 6469 + }, + { + "epoch": 1.4918146183998156, + "grad_norm": 1.4523946751037105, + "learning_rate": 3.330444635139971e-07, + "loss": 0.4413643479347229, + "step": 6470 + }, + { + "epoch": 1.4920451925293983, + "grad_norm": 1.3734904271626787, + "learning_rate": 3.3276043518810327e-07, + "loss": 0.399494469165802, + "step": 6471 + }, + { + "epoch": 1.492275766658981, + "grad_norm": 1.4170973987364872, + "learning_rate": 3.3247650385224256e-07, + "loss": 0.4353644847869873, + "step": 6472 + }, + { + "epoch": 1.4925063407885635, + "grad_norm": 1.7462483377307876, + "learning_rate": 3.3219266954768743e-07, + "loss": 0.5231607556343079, + "step": 6473 + }, + { + "epoch": 1.4927369149181462, + "grad_norm": 1.55800999194994, + "learning_rate": 3.3190893231569596e-07, + "loss": 0.414408802986145, + "step": 6474 + }, + { + "epoch": 1.492967489047729, + "grad_norm": 1.6408204727748315, + "learning_rate": 3.3162529219751155e-07, + "loss": 0.3921009302139282, + "step": 6475 + }, + { + "epoch": 1.4931980631773114, + "grad_norm": 1.6197044883986413, + "learning_rate": 3.3134174923436506e-07, + "loss": 0.4317164421081543, + "step": 6476 + }, + { + "epoch": 1.4934286373068941, + "grad_norm": 1.5697343564549593, + "learning_rate": 3.3105830346747175e-07, + "loss": 0.46302181482315063, + "step": 6477 + }, + { + "epoch": 1.4936592114364768, + "grad_norm": 1.464087037907405, + "learning_rate": 3.307749549380335e-07, + "loss": 0.45704615116119385, + "step": 6478 + }, + { + "epoch": 1.4938897855660596, + "grad_norm": 1.5032451370482525, + "learning_rate": 3.304917036872379e-07, + "loss": 0.45455485582351685, + "step": 6479 + }, + { + "epoch": 1.4941203596956423, + "grad_norm": 1.5465084069557762, + "learning_rate": 3.302085497562588e-07, + "loss": 0.41939157247543335, + "step": 6480 + }, + { + "epoch": 1.4943509338252248, + "grad_norm": 1.3682263746176198, + "learning_rate": 3.2992549318625487e-07, + "loss": 0.4109286367893219, + "step": 6481 + }, + { + "epoch": 1.4945815079548075, + "grad_norm": 2.0164734849697, + "learning_rate": 3.2964253401837173e-07, + "loss": 0.44710463285446167, + "step": 6482 + }, + { + "epoch": 1.4948120820843902, + "grad_norm": 1.6884711291100036, + "learning_rate": 3.2935967229373986e-07, + "loss": 0.4330691695213318, + "step": 6483 + }, + { + "epoch": 1.4950426562139727, + "grad_norm": 1.4066891595951536, + "learning_rate": 3.2907690805347667e-07, + "loss": 0.41174834966659546, + "step": 6484 + }, + { + "epoch": 1.4952732303435554, + "grad_norm": 1.5235589172624593, + "learning_rate": 3.2879424133868406e-07, + "loss": 0.4368870258331299, + "step": 6485 + }, + { + "epoch": 1.4955038044731381, + "grad_norm": 1.581699276196859, + "learning_rate": 3.2851167219045107e-07, + "loss": 0.5155518651008606, + "step": 6486 + }, + { + "epoch": 1.4957343786027208, + "grad_norm": 1.4965040692694338, + "learning_rate": 3.282292006498522e-07, + "loss": 0.47015419602394104, + "step": 6487 + }, + { + "epoch": 1.4959649527323036, + "grad_norm": 1.4271101962383341, + "learning_rate": 3.2794682675794684e-07, + "loss": 0.41059884428977966, + "step": 6488 + }, + { + "epoch": 1.496195526861886, + "grad_norm": 1.7728377181019612, + "learning_rate": 3.2766455055578157e-07, + "loss": 0.4864136278629303, + "step": 6489 + }, + { + "epoch": 1.4964261009914688, + "grad_norm": 1.1780419841322618, + "learning_rate": 3.2738237208438744e-07, + "loss": 0.3599165976047516, + "step": 6490 + }, + { + "epoch": 1.4966566751210515, + "grad_norm": 1.4373611771192503, + "learning_rate": 3.2710029138478267e-07, + "loss": 0.4734029769897461, + "step": 6491 + }, + { + "epoch": 1.496887249250634, + "grad_norm": 1.5053587105753783, + "learning_rate": 3.268183084979699e-07, + "loss": 0.46739861369132996, + "step": 6492 + }, + { + "epoch": 1.4971178233802167, + "grad_norm": 1.745789102022849, + "learning_rate": 3.265364234649387e-07, + "loss": 0.46794670820236206, + "step": 6493 + }, + { + "epoch": 1.4973483975097994, + "grad_norm": 1.6683012395243093, + "learning_rate": 3.262546363266635e-07, + "loss": 0.463203489780426, + "step": 6494 + }, + { + "epoch": 1.4975789716393821, + "grad_norm": 1.4489172807794646, + "learning_rate": 3.2597294712410504e-07, + "loss": 0.4495059847831726, + "step": 6495 + }, + { + "epoch": 1.4978095457689649, + "grad_norm": 1.464704014292867, + "learning_rate": 3.256913558982101e-07, + "loss": 0.43549245595932007, + "step": 6496 + }, + { + "epoch": 1.4980401198985474, + "grad_norm": 1.552183908593376, + "learning_rate": 3.254098626899102e-07, + "loss": 0.40582704544067383, + "step": 6497 + }, + { + "epoch": 1.49827069402813, + "grad_norm": 1.527774566610999, + "learning_rate": 3.251284675401238e-07, + "loss": 0.3720378279685974, + "step": 6498 + }, + { + "epoch": 1.4985012681577128, + "grad_norm": 1.4814613073983138, + "learning_rate": 3.24847170489754e-07, + "loss": 0.42694520950317383, + "step": 6499 + }, + { + "epoch": 1.4987318422872953, + "grad_norm": 1.4768231117771715, + "learning_rate": 3.2456597157969066e-07, + "loss": 0.442158043384552, + "step": 6500 + }, + { + "epoch": 1.498962416416878, + "grad_norm": 1.4765054194953837, + "learning_rate": 3.2428487085080846e-07, + "loss": 0.44245558977127075, + "step": 6501 + }, + { + "epoch": 1.4991929905464607, + "grad_norm": 1.3559485373971267, + "learning_rate": 3.240038683439684e-07, + "loss": 0.4127236008644104, + "step": 6502 + }, + { + "epoch": 1.4994235646760434, + "grad_norm": 1.4985576311709152, + "learning_rate": 3.237229641000171e-07, + "loss": 0.4262787103652954, + "step": 6503 + }, + { + "epoch": 1.4996541388056261, + "grad_norm": 1.6706445028718073, + "learning_rate": 3.2344215815978714e-07, + "loss": 0.4181264042854309, + "step": 6504 + }, + { + "epoch": 1.4998847129352086, + "grad_norm": 1.6044294628436637, + "learning_rate": 3.2316145056409616e-07, + "loss": 0.4416937530040741, + "step": 6505 + }, + { + "epoch": 1.5001152870647914, + "grad_norm": 1.8850023720212492, + "learning_rate": 3.228808413537476e-07, + "loss": 0.4901489019393921, + "step": 6506 + }, + { + "epoch": 1.5003458611943739, + "grad_norm": 1.3996173090866784, + "learning_rate": 3.2260033056953153e-07, + "loss": 0.37932026386260986, + "step": 6507 + }, + { + "epoch": 1.5005764353239566, + "grad_norm": 1.649923361135509, + "learning_rate": 3.223199182522223e-07, + "loss": 0.4680899381637573, + "step": 6508 + }, + { + "epoch": 1.5008070094535393, + "grad_norm": 1.6955418693371036, + "learning_rate": 3.2203960444258105e-07, + "loss": 0.508334219455719, + "step": 6509 + }, + { + "epoch": 1.501037583583122, + "grad_norm": 2.0480591557575685, + "learning_rate": 3.2175938918135415e-07, + "loss": 0.3386784791946411, + "step": 6510 + }, + { + "epoch": 1.5012681577127047, + "grad_norm": 1.860117074212897, + "learning_rate": 3.214792725092741e-07, + "loss": 0.4315892457962036, + "step": 6511 + }, + { + "epoch": 1.5014987318422874, + "grad_norm": 1.4533616152071933, + "learning_rate": 3.211992544670582e-07, + "loss": 0.3709627389907837, + "step": 6512 + }, + { + "epoch": 1.50172930597187, + "grad_norm": 1.6433224440752017, + "learning_rate": 3.2091933509541023e-07, + "loss": 0.5260987877845764, + "step": 6513 + }, + { + "epoch": 1.5019598801014526, + "grad_norm": 1.5201640514539732, + "learning_rate": 3.20639514435019e-07, + "loss": 0.5379073619842529, + "step": 6514 + }, + { + "epoch": 1.5021904542310351, + "grad_norm": 1.2867052063244526, + "learning_rate": 3.2035979252655976e-07, + "loss": 0.47530391812324524, + "step": 6515 + }, + { + "epoch": 1.5024210283606179, + "grad_norm": 1.5201328820105404, + "learning_rate": 3.200801694106926e-07, + "loss": 0.459227979183197, + "step": 6516 + }, + { + "epoch": 1.5026516024902006, + "grad_norm": 1.5330729417783509, + "learning_rate": 3.19800645128063e-07, + "loss": 0.4867238998413086, + "step": 6517 + }, + { + "epoch": 1.5028821766197833, + "grad_norm": 1.4246709864782185, + "learning_rate": 3.195212197193039e-07, + "loss": 0.38478928804397583, + "step": 6518 + }, + { + "epoch": 1.503112750749366, + "grad_norm": 1.625989812299007, + "learning_rate": 3.192418932250316e-07, + "loss": 0.3938423991203308, + "step": 6519 + }, + { + "epoch": 1.5033433248789487, + "grad_norm": 1.8227844221564524, + "learning_rate": 3.1896266568584975e-07, + "loss": 0.457303911447525, + "step": 6520 + }, + { + "epoch": 1.5035738990085312, + "grad_norm": 1.5422494994233005, + "learning_rate": 3.1868353714234607e-07, + "loss": 0.5007269382476807, + "step": 6521 + }, + { + "epoch": 1.503804473138114, + "grad_norm": 1.4891205198132078, + "learning_rate": 3.1840450763509576e-07, + "loss": 0.3878381848335266, + "step": 6522 + }, + { + "epoch": 1.5040350472676964, + "grad_norm": 1.798955261342233, + "learning_rate": 3.181255772046575e-07, + "loss": 0.488269567489624, + "step": 6523 + }, + { + "epoch": 1.5042656213972792, + "grad_norm": 1.4981578078592954, + "learning_rate": 3.1784674589157767e-07, + "loss": 0.41664889454841614, + "step": 6524 + }, + { + "epoch": 1.5044961955268619, + "grad_norm": 1.6014375227212925, + "learning_rate": 3.175680137363863e-07, + "loss": 0.4862533509731293, + "step": 6525 + }, + { + "epoch": 1.5047267696564446, + "grad_norm": 1.599713126186934, + "learning_rate": 3.172893807796004e-07, + "loss": 0.4629037380218506, + "step": 6526 + }, + { + "epoch": 1.5049573437860273, + "grad_norm": 1.6094632634811818, + "learning_rate": 3.1701084706172245e-07, + "loss": 0.46300196647644043, + "step": 6527 + }, + { + "epoch": 1.50518791791561, + "grad_norm": 1.4186362500626026, + "learning_rate": 3.1673241262323934e-07, + "loss": 0.40698888897895813, + "step": 6528 + }, + { + "epoch": 1.5054184920451925, + "grad_norm": 1.484473947418196, + "learning_rate": 3.1645407750462514e-07, + "loss": 0.4344380497932434, + "step": 6529 + }, + { + "epoch": 1.5056490661747752, + "grad_norm": 1.6200348544461498, + "learning_rate": 3.1617584174633806e-07, + "loss": 0.49757128953933716, + "step": 6530 + }, + { + "epoch": 1.5058796403043577, + "grad_norm": 1.6256839483530447, + "learning_rate": 3.15897705388823e-07, + "loss": 0.4506916105747223, + "step": 6531 + }, + { + "epoch": 1.5061102144339404, + "grad_norm": 1.5009759227514647, + "learning_rate": 3.156196684725093e-07, + "loss": 0.3941146731376648, + "step": 6532 + }, + { + "epoch": 1.5063407885635232, + "grad_norm": 1.9065405733956409, + "learning_rate": 3.153417310378127e-07, + "loss": 0.5400820374488831, + "step": 6533 + }, + { + "epoch": 1.5065713626931059, + "grad_norm": 1.774411964329925, + "learning_rate": 3.1506389312513435e-07, + "loss": 0.4418470859527588, + "step": 6534 + }, + { + "epoch": 1.5068019368226886, + "grad_norm": 1.3196915654196755, + "learning_rate": 3.1478615477486113e-07, + "loss": 0.3897334933280945, + "step": 6535 + }, + { + "epoch": 1.5070325109522713, + "grad_norm": 1.5772083777596413, + "learning_rate": 3.145085160273647e-07, + "loss": 0.4923437833786011, + "step": 6536 + }, + { + "epoch": 1.5072630850818538, + "grad_norm": 1.575539005736493, + "learning_rate": 3.142309769230025e-07, + "loss": 0.41996920108795166, + "step": 6537 + }, + { + "epoch": 1.5074936592114365, + "grad_norm": 1.5634954618427415, + "learning_rate": 3.1395353750211806e-07, + "loss": 0.38584667444229126, + "step": 6538 + }, + { + "epoch": 1.507724233341019, + "grad_norm": 1.5469052539454182, + "learning_rate": 3.136761978050395e-07, + "loss": 0.5093455910682678, + "step": 6539 + }, + { + "epoch": 1.5079548074706017, + "grad_norm": 1.8844111555093896, + "learning_rate": 3.1339895787208126e-07, + "loss": 0.5592935681343079, + "step": 6540 + }, + { + "epoch": 1.5081853816001844, + "grad_norm": 1.7670191671756568, + "learning_rate": 3.1312181774354306e-07, + "loss": 0.38311779499053955, + "step": 6541 + }, + { + "epoch": 1.5084159557297672, + "grad_norm": 1.6894588927823573, + "learning_rate": 3.1284477745971025e-07, + "loss": 0.4422299265861511, + "step": 6542 + }, + { + "epoch": 1.5086465298593499, + "grad_norm": 1.5653024747826005, + "learning_rate": 3.125678370608528e-07, + "loss": 0.5097527503967285, + "step": 6543 + }, + { + "epoch": 1.5088771039889326, + "grad_norm": 1.4635088499535702, + "learning_rate": 3.1229099658722747e-07, + "loss": 0.42586642503738403, + "step": 6544 + }, + { + "epoch": 1.509107678118515, + "grad_norm": 1.7853929312810684, + "learning_rate": 3.120142560790755e-07, + "loss": 0.5006861686706543, + "step": 6545 + }, + { + "epoch": 1.5093382522480978, + "grad_norm": 1.292111562170076, + "learning_rate": 3.117376155766237e-07, + "loss": 0.4361686706542969, + "step": 6546 + }, + { + "epoch": 1.5095688263776803, + "grad_norm": 1.4890005224956508, + "learning_rate": 3.11461075120085e-07, + "loss": 0.45466339588165283, + "step": 6547 + }, + { + "epoch": 1.509799400507263, + "grad_norm": 1.4657261766322067, + "learning_rate": 3.1118463474965697e-07, + "loss": 0.39591068029403687, + "step": 6548 + }, + { + "epoch": 1.5100299746368457, + "grad_norm": 1.669083463008409, + "learning_rate": 3.1090829450552316e-07, + "loss": 0.4672427475452423, + "step": 6549 + }, + { + "epoch": 1.5102605487664285, + "grad_norm": 1.6273442700037082, + "learning_rate": 3.1063205442785234e-07, + "loss": 0.4785880148410797, + "step": 6550 + }, + { + "epoch": 1.5104911228960112, + "grad_norm": 1.3915985235576667, + "learning_rate": 3.103559145567994e-07, + "loss": 0.441936731338501, + "step": 6551 + }, + { + "epoch": 1.510721697025594, + "grad_norm": 1.5501390159164539, + "learning_rate": 3.1007987493250334e-07, + "loss": 0.49719512462615967, + "step": 6552 + }, + { + "epoch": 1.5109522711551764, + "grad_norm": 1.7806538694012621, + "learning_rate": 3.098039355950899e-07, + "loss": 0.40702491998672485, + "step": 6553 + }, + { + "epoch": 1.511182845284759, + "grad_norm": 1.4605232780084745, + "learning_rate": 3.0952809658466896e-07, + "loss": 0.44754648208618164, + "step": 6554 + }, + { + "epoch": 1.5114134194143416, + "grad_norm": 1.7119927234849008, + "learning_rate": 3.0925235794133717e-07, + "loss": 0.5370102524757385, + "step": 6555 + }, + { + "epoch": 1.5116439935439243, + "grad_norm": 1.4781444883115034, + "learning_rate": 3.089767197051755e-07, + "loss": 0.46693646907806396, + "step": 6556 + }, + { + "epoch": 1.511874567673507, + "grad_norm": 1.3940905139236526, + "learning_rate": 3.0870118191625084e-07, + "loss": 0.3887597322463989, + "step": 6557 + }, + { + "epoch": 1.5121051418030897, + "grad_norm": 1.509297997221229, + "learning_rate": 3.0842574461461577e-07, + "loss": 0.4783397912979126, + "step": 6558 + }, + { + "epoch": 1.5123357159326725, + "grad_norm": 2.254982960205746, + "learning_rate": 3.081504078403073e-07, + "loss": 0.5305588245391846, + "step": 6559 + }, + { + "epoch": 1.5125662900622552, + "grad_norm": 1.867807225680096, + "learning_rate": 3.078751716333492e-07, + "loss": 0.45315784215927124, + "step": 6560 + }, + { + "epoch": 1.5127968641918377, + "grad_norm": 1.6356411182801975, + "learning_rate": 3.0760003603374897e-07, + "loss": 0.4805132746696472, + "step": 6561 + }, + { + "epoch": 1.5130274383214202, + "grad_norm": 1.5579254915377003, + "learning_rate": 3.0732500108150104e-07, + "loss": 0.4956076145172119, + "step": 6562 + }, + { + "epoch": 1.5132580124510029, + "grad_norm": 1.6872988549232402, + "learning_rate": 3.07050066816584e-07, + "loss": 0.3629196882247925, + "step": 6563 + }, + { + "epoch": 1.5134885865805856, + "grad_norm": 1.4271734684348691, + "learning_rate": 3.067752332789626e-07, + "loss": 0.43240371346473694, + "step": 6564 + }, + { + "epoch": 1.5137191607101683, + "grad_norm": 1.4730845718882644, + "learning_rate": 3.065005005085869e-07, + "loss": 0.4933302402496338, + "step": 6565 + }, + { + "epoch": 1.513949734839751, + "grad_norm": 1.5594123406832316, + "learning_rate": 3.0622586854539155e-07, + "loss": 0.47905197739601135, + "step": 6566 + }, + { + "epoch": 1.5141803089693338, + "grad_norm": 1.3120965583955209, + "learning_rate": 3.059513374292978e-07, + "loss": 0.4245232343673706, + "step": 6567 + }, + { + "epoch": 1.5144108830989162, + "grad_norm": 1.6401225191596096, + "learning_rate": 3.0567690720021077e-07, + "loss": 0.40526312589645386, + "step": 6568 + }, + { + "epoch": 1.514641457228499, + "grad_norm": 1.7208705138340397, + "learning_rate": 3.0540257789802227e-07, + "loss": 0.5808804631233215, + "step": 6569 + }, + { + "epoch": 1.5148720313580815, + "grad_norm": 1.791338069752229, + "learning_rate": 3.0512834956260836e-07, + "loss": 0.44997286796569824, + "step": 6570 + }, + { + "epoch": 1.5151026054876642, + "grad_norm": 1.6800897456169108, + "learning_rate": 3.048542222338315e-07, + "loss": 0.44051581621170044, + "step": 6571 + }, + { + "epoch": 1.515333179617247, + "grad_norm": 1.525217042834723, + "learning_rate": 3.045801959515382e-07, + "loss": 0.5113236308097839, + "step": 6572 + }, + { + "epoch": 1.5155637537468296, + "grad_norm": 1.5439102757372205, + "learning_rate": 3.0430627075556125e-07, + "loss": 0.554703950881958, + "step": 6573 + }, + { + "epoch": 1.5157943278764123, + "grad_norm": 1.600156572288611, + "learning_rate": 3.0403244668571847e-07, + "loss": 0.3819808065891266, + "step": 6574 + }, + { + "epoch": 1.516024902005995, + "grad_norm": 1.4872928405937125, + "learning_rate": 3.037587237818133e-07, + "loss": 0.47970864176750183, + "step": 6575 + }, + { + "epoch": 1.5162554761355775, + "grad_norm": 1.4776778157711579, + "learning_rate": 3.0348510208363386e-07, + "loss": 0.4296469986438751, + "step": 6576 + }, + { + "epoch": 1.5164860502651603, + "grad_norm": 1.462836798021035, + "learning_rate": 3.032115816309535e-07, + "loss": 0.4372752904891968, + "step": 6577 + }, + { + "epoch": 1.5167166243947428, + "grad_norm": 1.673613757204577, + "learning_rate": 3.029381624635318e-07, + "loss": 0.4711950719356537, + "step": 6578 + }, + { + "epoch": 1.5169471985243255, + "grad_norm": 1.3932522433513406, + "learning_rate": 3.026648446211124e-07, + "loss": 0.4448170065879822, + "step": 6579 + }, + { + "epoch": 1.5171777726539082, + "grad_norm": 1.6184181695445041, + "learning_rate": 3.02391628143425e-07, + "loss": 0.4527873992919922, + "step": 6580 + }, + { + "epoch": 1.517408346783491, + "grad_norm": 1.6799725255249693, + "learning_rate": 3.0211851307018463e-07, + "loss": 0.453765332698822, + "step": 6581 + }, + { + "epoch": 1.5176389209130736, + "grad_norm": 1.686193810125547, + "learning_rate": 3.018454994410915e-07, + "loss": 0.46818265318870544, + "step": 6582 + }, + { + "epoch": 1.5178694950426563, + "grad_norm": 1.6601834563107158, + "learning_rate": 3.0157258729583026e-07, + "loss": 0.38551369309425354, + "step": 6583 + }, + { + "epoch": 1.5181000691722388, + "grad_norm": 1.2759146716130436, + "learning_rate": 3.012997766740721e-07, + "loss": 0.3651260733604431, + "step": 6584 + }, + { + "epoch": 1.5183306433018215, + "grad_norm": 1.4942378521466573, + "learning_rate": 3.010270676154726e-07, + "loss": 0.36894726753234863, + "step": 6585 + }, + { + "epoch": 1.518561217431404, + "grad_norm": 1.5163949110289714, + "learning_rate": 3.007544601596722e-07, + "loss": 0.42595791816711426, + "step": 6586 + }, + { + "epoch": 1.5187917915609868, + "grad_norm": 1.9011368495730705, + "learning_rate": 3.004819543462979e-07, + "loss": 0.4916795492172241, + "step": 6587 + }, + { + "epoch": 1.5190223656905695, + "grad_norm": 3.958756092482824, + "learning_rate": 3.0020955021496073e-07, + "loss": 0.5098932385444641, + "step": 6588 + }, + { + "epoch": 1.5192529398201522, + "grad_norm": 1.7429564765653418, + "learning_rate": 2.9993724780525796e-07, + "loss": 0.6336305737495422, + "step": 6589 + }, + { + "epoch": 1.519483513949735, + "grad_norm": 1.6454779446539551, + "learning_rate": 2.996650471567709e-07, + "loss": 0.4911893606185913, + "step": 6590 + }, + { + "epoch": 1.5197140880793176, + "grad_norm": 1.6053455149976412, + "learning_rate": 2.9939294830906727e-07, + "loss": 0.4388008117675781, + "step": 6591 + }, + { + "epoch": 1.5199446622089001, + "grad_norm": 1.4960203678707569, + "learning_rate": 2.991209513016986e-07, + "loss": 0.392263799905777, + "step": 6592 + }, + { + "epoch": 1.5201752363384828, + "grad_norm": 1.4101720949081316, + "learning_rate": 2.988490561742032e-07, + "loss": 0.36495402455329895, + "step": 6593 + }, + { + "epoch": 1.5204058104680653, + "grad_norm": 1.6817212910549741, + "learning_rate": 2.985772629661032e-07, + "loss": 0.5280855298042297, + "step": 6594 + }, + { + "epoch": 1.520636384597648, + "grad_norm": 1.4575719708434207, + "learning_rate": 2.9830557171690693e-07, + "loss": 0.43953752517700195, + "step": 6595 + }, + { + "epoch": 1.5208669587272308, + "grad_norm": 1.261754251016282, + "learning_rate": 2.980339824661071e-07, + "loss": 0.41361862421035767, + "step": 6596 + }, + { + "epoch": 1.5210975328568135, + "grad_norm": 1.4525947923531464, + "learning_rate": 2.977624952531821e-07, + "loss": 0.39955854415893555, + "step": 6597 + }, + { + "epoch": 1.5213281069863962, + "grad_norm": 1.664684863463753, + "learning_rate": 2.9749111011759565e-07, + "loss": 0.505165696144104, + "step": 6598 + }, + { + "epoch": 1.521558681115979, + "grad_norm": 1.5619432117854901, + "learning_rate": 2.9721982709879566e-07, + "loss": 0.4388153851032257, + "step": 6599 + }, + { + "epoch": 1.5217892552455614, + "grad_norm": 1.454152411615684, + "learning_rate": 2.969486462362167e-07, + "loss": 0.4479100704193115, + "step": 6600 + }, + { + "epoch": 1.5220198293751441, + "grad_norm": 1.4345831092951191, + "learning_rate": 2.9667756756927686e-07, + "loss": 0.4005380868911743, + "step": 6601 + }, + { + "epoch": 1.5222504035047266, + "grad_norm": 1.707280681236192, + "learning_rate": 2.9640659113738087e-07, + "loss": 0.43774881958961487, + "step": 6602 + }, + { + "epoch": 1.5224809776343093, + "grad_norm": 1.5608510724785551, + "learning_rate": 2.9613571697991725e-07, + "loss": 0.4449707865715027, + "step": 6603 + }, + { + "epoch": 1.522711551763892, + "grad_norm": 1.6567386639534631, + "learning_rate": 2.958649451362606e-07, + "loss": 0.454499751329422, + "step": 6604 + }, + { + "epoch": 1.5229421258934748, + "grad_norm": 1.2977143159727098, + "learning_rate": 2.955942756457707e-07, + "loss": 0.35601305961608887, + "step": 6605 + }, + { + "epoch": 1.5231727000230575, + "grad_norm": 1.6684183476509384, + "learning_rate": 2.9532370854779143e-07, + "loss": 0.5252523422241211, + "step": 6606 + }, + { + "epoch": 1.5234032741526402, + "grad_norm": 1.3731317276647081, + "learning_rate": 2.950532438816531e-07, + "loss": 0.4311884939670563, + "step": 6607 + }, + { + "epoch": 1.5236338482822227, + "grad_norm": 1.5784692430456444, + "learning_rate": 2.9478288168667e-07, + "loss": 0.43956485390663147, + "step": 6608 + }, + { + "epoch": 1.5238644224118054, + "grad_norm": 1.4213527447836085, + "learning_rate": 2.9451262200214235e-07, + "loss": 0.400115430355072, + "step": 6609 + }, + { + "epoch": 1.524094996541388, + "grad_norm": 1.6612091081011793, + "learning_rate": 2.942424648673548e-07, + "loss": 0.41738802194595337, + "step": 6610 + }, + { + "epoch": 1.5243255706709706, + "grad_norm": 1.5951584459105572, + "learning_rate": 2.939724103215776e-07, + "loss": 0.412765771150589, + "step": 6611 + }, + { + "epoch": 1.5245561448005533, + "grad_norm": 1.6739308031441762, + "learning_rate": 2.937024584040659e-07, + "loss": 0.44869422912597656, + "step": 6612 + }, + { + "epoch": 1.524786718930136, + "grad_norm": 1.5443554211834334, + "learning_rate": 2.934326091540603e-07, + "loss": 0.39191997051239014, + "step": 6613 + }, + { + "epoch": 1.5250172930597188, + "grad_norm": 1.307209963924962, + "learning_rate": 2.9316286261078547e-07, + "loss": 0.36575692892074585, + "step": 6614 + }, + { + "epoch": 1.5252478671893015, + "grad_norm": 1.5775953874602453, + "learning_rate": 2.9289321881345254e-07, + "loss": 0.49928778409957886, + "step": 6615 + }, + { + "epoch": 1.525478441318884, + "grad_norm": 1.5029437064522762, + "learning_rate": 2.926236778012565e-07, + "loss": 0.49619296193122864, + "step": 6616 + }, + { + "epoch": 1.5257090154484667, + "grad_norm": 1.5175956935877304, + "learning_rate": 2.923542396133777e-07, + "loss": 0.4614447355270386, + "step": 6617 + }, + { + "epoch": 1.5259395895780492, + "grad_norm": 1.5326379965687464, + "learning_rate": 2.9208490428898213e-07, + "loss": 0.43820804357528687, + "step": 6618 + }, + { + "epoch": 1.526170163707632, + "grad_norm": 1.7297859153701105, + "learning_rate": 2.9181567186722e-07, + "loss": 0.46856528520584106, + "step": 6619 + }, + { + "epoch": 1.5264007378372146, + "grad_norm": 1.5560178508678546, + "learning_rate": 2.915465423872272e-07, + "loss": 0.45428818464279175, + "step": 6620 + }, + { + "epoch": 1.5266313119667974, + "grad_norm": 1.765757281110695, + "learning_rate": 2.912775158881243e-07, + "loss": 0.44715386629104614, + "step": 6621 + }, + { + "epoch": 1.52686188609638, + "grad_norm": 1.845941311143575, + "learning_rate": 2.9100859240901764e-07, + "loss": 0.537441611289978, + "step": 6622 + }, + { + "epoch": 1.5270924602259628, + "grad_norm": 2.100811269468338, + "learning_rate": 2.9073977198899714e-07, + "loss": 0.4430112838745117, + "step": 6623 + }, + { + "epoch": 1.5273230343555453, + "grad_norm": 1.625928583733216, + "learning_rate": 2.904710546671392e-07, + "loss": 0.41713255643844604, + "step": 6624 + }, + { + "epoch": 1.527553608485128, + "grad_norm": 1.639578198355071, + "learning_rate": 2.9020244048250396e-07, + "loss": 0.4313931465148926, + "step": 6625 + }, + { + "epoch": 1.5277841826147105, + "grad_norm": 1.617455818460061, + "learning_rate": 2.899339294741379e-07, + "loss": 0.5038034319877625, + "step": 6626 + }, + { + "epoch": 1.5280147567442932, + "grad_norm": 1.6017224429954546, + "learning_rate": 2.8966552168107127e-07, + "loss": 0.45088762044906616, + "step": 6627 + }, + { + "epoch": 1.528245330873876, + "grad_norm": 1.6027378992570083, + "learning_rate": 2.8939721714232e-07, + "loss": 0.40857064723968506, + "step": 6628 + }, + { + "epoch": 1.5284759050034586, + "grad_norm": 1.5432592985198028, + "learning_rate": 2.891290158968853e-07, + "loss": 0.43766242265701294, + "step": 6629 + }, + { + "epoch": 1.5287064791330414, + "grad_norm": 1.6663524119863393, + "learning_rate": 2.888609179837523e-07, + "loss": 0.45986247062683105, + "step": 6630 + }, + { + "epoch": 1.528937053262624, + "grad_norm": 1.5102818288035118, + "learning_rate": 2.8859292344189236e-07, + "loss": 0.4681728482246399, + "step": 6631 + }, + { + "epoch": 1.5291676273922066, + "grad_norm": 1.4009274503220306, + "learning_rate": 2.883250323102605e-07, + "loss": 0.36730295419692993, + "step": 6632 + }, + { + "epoch": 1.5293982015217893, + "grad_norm": 1.6785355662696937, + "learning_rate": 2.880572446277982e-07, + "loss": 0.43494418263435364, + "step": 6633 + }, + { + "epoch": 1.5296287756513718, + "grad_norm": 1.6257441783659756, + "learning_rate": 2.877895604334305e-07, + "loss": 0.49145790934562683, + "step": 6634 + }, + { + "epoch": 1.5298593497809545, + "grad_norm": 1.4638603112091872, + "learning_rate": 2.875219797660681e-07, + "loss": 0.4166264832019806, + "step": 6635 + }, + { + "epoch": 1.5300899239105372, + "grad_norm": 1.3504636181719787, + "learning_rate": 2.8725450266460704e-07, + "loss": 0.4336514472961426, + "step": 6636 + }, + { + "epoch": 1.53032049804012, + "grad_norm": 1.6796430942391267, + "learning_rate": 2.869871291679271e-07, + "loss": 0.44186240434646606, + "step": 6637 + }, + { + "epoch": 1.5305510721697027, + "grad_norm": 1.4751166079505253, + "learning_rate": 2.867198593148945e-07, + "loss": 0.40619733929634094, + "step": 6638 + }, + { + "epoch": 1.5307816462992854, + "grad_norm": 1.4034694689938345, + "learning_rate": 2.864526931443588e-07, + "loss": 0.45552101731300354, + "step": 6639 + }, + { + "epoch": 1.5310122204288679, + "grad_norm": 1.3563039501008287, + "learning_rate": 2.861856306951562e-07, + "loss": 0.45153865218162537, + "step": 6640 + }, + { + "epoch": 1.5312427945584506, + "grad_norm": 1.5793746333655185, + "learning_rate": 2.859186720061061e-07, + "loss": 0.5146148204803467, + "step": 6641 + }, + { + "epoch": 1.531473368688033, + "grad_norm": 1.5627792728055054, + "learning_rate": 2.856518171160143e-07, + "loss": 0.4566080868244171, + "step": 6642 + }, + { + "epoch": 1.5317039428176158, + "grad_norm": 1.93802928616596, + "learning_rate": 2.853850660636703e-07, + "loss": 0.4390585124492645, + "step": 6643 + }, + { + "epoch": 1.5319345169471985, + "grad_norm": 1.7734959004013588, + "learning_rate": 2.851184188878493e-07, + "loss": 0.5508195757865906, + "step": 6644 + }, + { + "epoch": 1.5321650910767812, + "grad_norm": 1.6721581584041076, + "learning_rate": 2.8485187562731126e-07, + "loss": 0.47640183568000793, + "step": 6645 + }, + { + "epoch": 1.532395665206364, + "grad_norm": 1.421769874384772, + "learning_rate": 2.8458543632080123e-07, + "loss": 0.4511566758155823, + "step": 6646 + }, + { + "epoch": 1.5326262393359467, + "grad_norm": 1.5003089507123706, + "learning_rate": 2.843191010070486e-07, + "loss": 0.414367139339447, + "step": 6647 + }, + { + "epoch": 1.5328568134655292, + "grad_norm": 1.5192326893049226, + "learning_rate": 2.840528697247674e-07, + "loss": 0.4611589312553406, + "step": 6648 + }, + { + "epoch": 1.5330873875951119, + "grad_norm": 1.6397285440449882, + "learning_rate": 2.8378674251265787e-07, + "loss": 0.4675883948802948, + "step": 6649 + }, + { + "epoch": 1.5333179617246944, + "grad_norm": 1.6281144487220143, + "learning_rate": 2.835207194094036e-07, + "loss": 0.49039095640182495, + "step": 6650 + }, + { + "epoch": 1.533548535854277, + "grad_norm": 1.6636356702139277, + "learning_rate": 2.832548004536741e-07, + "loss": 0.45641693472862244, + "step": 6651 + }, + { + "epoch": 1.5337791099838598, + "grad_norm": 1.7323507398911224, + "learning_rate": 2.829889856841233e-07, + "loss": 0.4858587682247162, + "step": 6652 + }, + { + "epoch": 1.5340096841134425, + "grad_norm": 1.3640056940377991, + "learning_rate": 2.8272327513939055e-07, + "loss": 0.3640017807483673, + "step": 6653 + }, + { + "epoch": 1.5342402582430252, + "grad_norm": 1.5342226074105705, + "learning_rate": 2.8245766885809865e-07, + "loss": 0.42915207147598267, + "step": 6654 + }, + { + "epoch": 1.534470832372608, + "grad_norm": 1.5250515427099394, + "learning_rate": 2.8219216687885707e-07, + "loss": 0.5041407346725464, + "step": 6655 + }, + { + "epoch": 1.5347014065021904, + "grad_norm": 1.479165849869464, + "learning_rate": 2.8192676924025885e-07, + "loss": 0.4748334288597107, + "step": 6656 + }, + { + "epoch": 1.5349319806317732, + "grad_norm": 1.5854109757101433, + "learning_rate": 2.8166147598088173e-07, + "loss": 0.4745975136756897, + "step": 6657 + }, + { + "epoch": 1.5351625547613557, + "grad_norm": 1.6430139570672564, + "learning_rate": 2.813962871392893e-07, + "loss": 0.49246084690093994, + "step": 6658 + }, + { + "epoch": 1.5353931288909384, + "grad_norm": 1.3796442061928538, + "learning_rate": 2.8113120275402936e-07, + "loss": 0.47876033186912537, + "step": 6659 + }, + { + "epoch": 1.535623703020521, + "grad_norm": 1.6460545742229191, + "learning_rate": 2.808662228636348e-07, + "loss": 0.5244987607002258, + "step": 6660 + }, + { + "epoch": 1.5358542771501038, + "grad_norm": 1.6433381019004774, + "learning_rate": 2.8060134750662277e-07, + "loss": 0.44661569595336914, + "step": 6661 + }, + { + "epoch": 1.5360848512796865, + "grad_norm": 1.4583799872096337, + "learning_rate": 2.8033657672149615e-07, + "loss": 0.4508060812950134, + "step": 6662 + }, + { + "epoch": 1.5363154254092692, + "grad_norm": 1.3497148067649773, + "learning_rate": 2.8007191054674117e-07, + "loss": 0.4657326340675354, + "step": 6663 + }, + { + "epoch": 1.5365459995388517, + "grad_norm": 1.4227603766742651, + "learning_rate": 2.798073490208307e-07, + "loss": 0.495077520608902, + "step": 6664 + }, + { + "epoch": 1.5367765736684345, + "grad_norm": 1.4557135691757939, + "learning_rate": 2.795428921822206e-07, + "loss": 0.40721309185028076, + "step": 6665 + }, + { + "epoch": 1.537007147798017, + "grad_norm": 1.4109014285343175, + "learning_rate": 2.7927854006935315e-07, + "loss": 0.3279367685317993, + "step": 6666 + }, + { + "epoch": 1.5372377219275997, + "grad_norm": 1.6893419118169095, + "learning_rate": 2.790142927206538e-07, + "loss": 0.4849242866039276, + "step": 6667 + }, + { + "epoch": 1.5374682960571824, + "grad_norm": 1.7502055418971636, + "learning_rate": 2.7875015017453394e-07, + "loss": 0.45151397585868835, + "step": 6668 + }, + { + "epoch": 1.537698870186765, + "grad_norm": 1.7275509884274352, + "learning_rate": 2.784861124693898e-07, + "loss": 0.43480992317199707, + "step": 6669 + }, + { + "epoch": 1.5379294443163478, + "grad_norm": 1.606181868361543, + "learning_rate": 2.782221796436012e-07, + "loss": 0.48764440417289734, + "step": 6670 + }, + { + "epoch": 1.5381600184459305, + "grad_norm": 1.5345831310523104, + "learning_rate": 2.7795835173553407e-07, + "loss": 0.4164161682128906, + "step": 6671 + }, + { + "epoch": 1.538390592575513, + "grad_norm": 1.8060994369656536, + "learning_rate": 2.7769462878353777e-07, + "loss": 0.49934858083724976, + "step": 6672 + }, + { + "epoch": 1.5386211667050955, + "grad_norm": 1.4004311994850918, + "learning_rate": 2.77431010825948e-07, + "loss": 0.4877321124076843, + "step": 6673 + }, + { + "epoch": 1.5388517408346782, + "grad_norm": 1.7442704894714258, + "learning_rate": 2.771674979010834e-07, + "loss": 0.44518858194351196, + "step": 6674 + }, + { + "epoch": 1.539082314964261, + "grad_norm": 1.4902795732558884, + "learning_rate": 2.769040900472488e-07, + "loss": 0.4237474203109741, + "step": 6675 + }, + { + "epoch": 1.5393128890938437, + "grad_norm": 1.8818051716593445, + "learning_rate": 2.7664078730273335e-07, + "loss": 0.45270341634750366, + "step": 6676 + }, + { + "epoch": 1.5395434632234264, + "grad_norm": 1.9777420597791724, + "learning_rate": 2.7637758970581004e-07, + "loss": 0.3866819739341736, + "step": 6677 + }, + { + "epoch": 1.539774037353009, + "grad_norm": 1.709571144624541, + "learning_rate": 2.7611449729473825e-07, + "loss": 0.4384220838546753, + "step": 6678 + }, + { + "epoch": 1.5400046114825916, + "grad_norm": 1.523752237168306, + "learning_rate": 2.758515101077602e-07, + "loss": 0.4462182819843292, + "step": 6679 + }, + { + "epoch": 1.5402351856121743, + "grad_norm": 1.6129576485586044, + "learning_rate": 2.755886281831046e-07, + "loss": 0.3927033245563507, + "step": 6680 + }, + { + "epoch": 1.5404657597417568, + "grad_norm": 1.7095013933604486, + "learning_rate": 2.7532585155898314e-07, + "loss": 0.4678634703159332, + "step": 6681 + }, + { + "epoch": 1.5406963338713395, + "grad_norm": 1.4524055684149206, + "learning_rate": 2.750631802735935e-07, + "loss": 0.4165131151676178, + "step": 6682 + }, + { + "epoch": 1.5409269080009222, + "grad_norm": 1.1494402193253566, + "learning_rate": 2.748006143651178e-07, + "loss": 0.3705793023109436, + "step": 6683 + }, + { + "epoch": 1.541157482130505, + "grad_norm": 1.5819526439113667, + "learning_rate": 2.745381538717226e-07, + "loss": 0.5428882837295532, + "step": 6684 + }, + { + "epoch": 1.5413880562600877, + "grad_norm": 1.6426127293668795, + "learning_rate": 2.742757988315589e-07, + "loss": 0.4116673171520233, + "step": 6685 + }, + { + "epoch": 1.5416186303896704, + "grad_norm": 1.4540567592422353, + "learning_rate": 2.740135492827631e-07, + "loss": 0.4617515802383423, + "step": 6686 + }, + { + "epoch": 1.541849204519253, + "grad_norm": 1.6140828940427878, + "learning_rate": 2.737514052634555e-07, + "loss": 0.5002453923225403, + "step": 6687 + }, + { + "epoch": 1.5420797786488356, + "grad_norm": 1.4130856063185002, + "learning_rate": 2.734893668117412e-07, + "loss": 0.46029362082481384, + "step": 6688 + }, + { + "epoch": 1.542310352778418, + "grad_norm": 1.4809565956171882, + "learning_rate": 2.732274339657107e-07, + "loss": 0.40502026677131653, + "step": 6689 + }, + { + "epoch": 1.5425409269080008, + "grad_norm": 1.6538580711421296, + "learning_rate": 2.7296560676343803e-07, + "loss": 0.5267831087112427, + "step": 6690 + }, + { + "epoch": 1.5427715010375835, + "grad_norm": 1.3087993674480496, + "learning_rate": 2.727038852429826e-07, + "loss": 0.3464335799217224, + "step": 6691 + }, + { + "epoch": 1.5430020751671663, + "grad_norm": 1.5384863769893498, + "learning_rate": 2.7244226944238847e-07, + "loss": 0.36635881662368774, + "step": 6692 + }, + { + "epoch": 1.543232649296749, + "grad_norm": 1.7314925345176482, + "learning_rate": 2.7218075939968435e-07, + "loss": 0.4567757844924927, + "step": 6693 + }, + { + "epoch": 1.5434632234263317, + "grad_norm": 1.9452957704897642, + "learning_rate": 2.719193551528827e-07, + "loss": 0.539220929145813, + "step": 6694 + }, + { + "epoch": 1.5436937975559142, + "grad_norm": 1.653206530012829, + "learning_rate": 2.71658056739982e-07, + "loss": 0.48553818464279175, + "step": 6695 + }, + { + "epoch": 1.543924371685497, + "grad_norm": 1.5040526715775615, + "learning_rate": 2.7139686419896424e-07, + "loss": 0.48564499616622925, + "step": 6696 + }, + { + "epoch": 1.5441549458150794, + "grad_norm": 1.3502417010865393, + "learning_rate": 2.7113577756779616e-07, + "loss": 0.4163014590740204, + "step": 6697 + }, + { + "epoch": 1.544385519944662, + "grad_norm": 1.864828438533457, + "learning_rate": 2.708747968844296e-07, + "loss": 0.5686431527137756, + "step": 6698 + }, + { + "epoch": 1.5446160940742448, + "grad_norm": 1.8608147536494253, + "learning_rate": 2.706139221868008e-07, + "loss": 0.5365211963653564, + "step": 6699 + }, + { + "epoch": 1.5448466682038275, + "grad_norm": 1.5480523179756653, + "learning_rate": 2.7035315351283084e-07, + "loss": 0.4147397577762604, + "step": 6700 + }, + { + "epoch": 1.5450772423334103, + "grad_norm": 1.5279455451058772, + "learning_rate": 2.7009249090042454e-07, + "loss": 0.3938590884208679, + "step": 6701 + }, + { + "epoch": 1.545307816462993, + "grad_norm": 1.726862148896079, + "learning_rate": 2.698319343874722e-07, + "loss": 0.3521370589733124, + "step": 6702 + }, + { + "epoch": 1.5455383905925755, + "grad_norm": 1.6305887024948476, + "learning_rate": 2.69571484011848e-07, + "loss": 0.430014967918396, + "step": 6703 + }, + { + "epoch": 1.5457689647221582, + "grad_norm": 1.636933956561892, + "learning_rate": 2.6931113981141164e-07, + "loss": 0.4697108864784241, + "step": 6704 + }, + { + "epoch": 1.5459995388517407, + "grad_norm": 1.5552943329509785, + "learning_rate": 2.69050901824006e-07, + "loss": 0.46567851305007935, + "step": 6705 + }, + { + "epoch": 1.5462301129813234, + "grad_norm": 1.620367133120872, + "learning_rate": 2.6879077008745986e-07, + "loss": 0.46061819791793823, + "step": 6706 + }, + { + "epoch": 1.5464606871109061, + "grad_norm": 1.5411435279833592, + "learning_rate": 2.6853074463958614e-07, + "loss": 0.568658709526062, + "step": 6707 + }, + { + "epoch": 1.5466912612404888, + "grad_norm": 1.3834999667432357, + "learning_rate": 2.682708255181815e-07, + "loss": 0.42816412448883057, + "step": 6708 + }, + { + "epoch": 1.5469218353700716, + "grad_norm": 1.576410551372393, + "learning_rate": 2.6801101276102866e-07, + "loss": 0.42515552043914795, + "step": 6709 + }, + { + "epoch": 1.5471524094996543, + "grad_norm": 1.5447523266389376, + "learning_rate": 2.677513064058932e-07, + "loss": 0.46513399481773376, + "step": 6710 + }, + { + "epoch": 1.5473829836292368, + "grad_norm": 1.3853944144224488, + "learning_rate": 2.6749170649052675e-07, + "loss": 0.4194756746292114, + "step": 6711 + }, + { + "epoch": 1.5476135577588195, + "grad_norm": 1.4035563039276318, + "learning_rate": 2.672322130526643e-07, + "loss": 0.4456541836261749, + "step": 6712 + }, + { + "epoch": 1.547844131888402, + "grad_norm": 1.5113453932130136, + "learning_rate": 2.669728261300264e-07, + "loss": 0.493444561958313, + "step": 6713 + }, + { + "epoch": 1.5480747060179847, + "grad_norm": 1.582884732282312, + "learning_rate": 2.6671354576031645e-07, + "loss": 0.47202616930007935, + "step": 6714 + }, + { + "epoch": 1.5483052801475674, + "grad_norm": 1.824788636144565, + "learning_rate": 2.66454371981225e-07, + "loss": 0.4584811329841614, + "step": 6715 + }, + { + "epoch": 1.5485358542771501, + "grad_norm": 1.3167028831683925, + "learning_rate": 2.6619530483042485e-07, + "loss": 0.4072091579437256, + "step": 6716 + }, + { + "epoch": 1.5487664284067328, + "grad_norm": 1.5656021898929726, + "learning_rate": 2.6593634434557365e-07, + "loss": 0.49742361903190613, + "step": 6717 + }, + { + "epoch": 1.5489970025363156, + "grad_norm": 1.6686846450785309, + "learning_rate": 2.6567749056431467e-07, + "loss": 0.49291643500328064, + "step": 6718 + }, + { + "epoch": 1.549227576665898, + "grad_norm": 1.5234565390584587, + "learning_rate": 2.6541874352427427e-07, + "loss": 0.5210362076759338, + "step": 6719 + }, + { + "epoch": 1.5494581507954808, + "grad_norm": 1.523136615036839, + "learning_rate": 2.651601032630645e-07, + "loss": 0.4489557147026062, + "step": 6720 + }, + { + "epoch": 1.5496887249250633, + "grad_norm": 1.515706035484409, + "learning_rate": 2.649015698182808e-07, + "loss": 0.4417908191680908, + "step": 6721 + }, + { + "epoch": 1.549919299054646, + "grad_norm": 1.5123745571810647, + "learning_rate": 2.6464314322750404e-07, + "loss": 0.45177266001701355, + "step": 6722 + }, + { + "epoch": 1.5501498731842287, + "grad_norm": 1.5422888438788165, + "learning_rate": 2.6438482352829896e-07, + "loss": 0.37720638513565063, + "step": 6723 + }, + { + "epoch": 1.5503804473138114, + "grad_norm": 1.5572735157633186, + "learning_rate": 2.641266107582153e-07, + "loss": 0.5108897089958191, + "step": 6724 + }, + { + "epoch": 1.5506110214433941, + "grad_norm": 1.5098940840101445, + "learning_rate": 2.638685049547863e-07, + "loss": 0.449248731136322, + "step": 6725 + }, + { + "epoch": 1.5508415955729768, + "grad_norm": 1.4667668469814954, + "learning_rate": 2.636105061555309e-07, + "loss": 0.4692652225494385, + "step": 6726 + }, + { + "epoch": 1.5510721697025593, + "grad_norm": 1.5150559633489926, + "learning_rate": 2.6335261439795153e-07, + "loss": 0.49128347635269165, + "step": 6727 + }, + { + "epoch": 1.551302743832142, + "grad_norm": 1.5725646817979666, + "learning_rate": 2.630948297195351e-07, + "loss": 0.4618053436279297, + "step": 6728 + }, + { + "epoch": 1.5515333179617246, + "grad_norm": 1.5786249232029208, + "learning_rate": 2.6283715215775336e-07, + "loss": 0.4342828094959259, + "step": 6729 + }, + { + "epoch": 1.5517638920913073, + "grad_norm": 1.5592983853420144, + "learning_rate": 2.625795817500626e-07, + "loss": 0.5214434862136841, + "step": 6730 + }, + { + "epoch": 1.55199446622089, + "grad_norm": 1.521395946192631, + "learning_rate": 2.623221185339034e-07, + "loss": 0.4873029589653015, + "step": 6731 + }, + { + "epoch": 1.5522250403504727, + "grad_norm": 1.5014817933254478, + "learning_rate": 2.6206476254670007e-07, + "loss": 0.4510548412799835, + "step": 6732 + }, + { + "epoch": 1.5524556144800554, + "grad_norm": 1.5931454307395074, + "learning_rate": 2.6180751382586265e-07, + "loss": 0.4832548499107361, + "step": 6733 + }, + { + "epoch": 1.5526861886096381, + "grad_norm": 1.8273040799326088, + "learning_rate": 2.6155037240878406e-07, + "loss": 0.5438823699951172, + "step": 6734 + }, + { + "epoch": 1.5529167627392206, + "grad_norm": 1.488758610712305, + "learning_rate": 2.6129333833284315e-07, + "loss": 0.4967566728591919, + "step": 6735 + }, + { + "epoch": 1.5531473368688034, + "grad_norm": 1.419700158234616, + "learning_rate": 2.610364116354018e-07, + "loss": 0.5187437534332275, + "step": 6736 + }, + { + "epoch": 1.5533779109983858, + "grad_norm": 1.3624978155475462, + "learning_rate": 2.607795923538072e-07, + "loss": 0.4199862480163574, + "step": 6737 + }, + { + "epoch": 1.5536084851279686, + "grad_norm": 1.463828508781327, + "learning_rate": 2.6052288052539084e-07, + "loss": 0.5009325742721558, + "step": 6738 + }, + { + "epoch": 1.5538390592575513, + "grad_norm": 1.5361155892650822, + "learning_rate": 2.602662761874679e-07, + "loss": 0.48698678612709045, + "step": 6739 + }, + { + "epoch": 1.554069633387134, + "grad_norm": 1.4600353762817446, + "learning_rate": 2.6000977937733905e-07, + "loss": 0.4845883846282959, + "step": 6740 + }, + { + "epoch": 1.5543002075167167, + "grad_norm": 1.6153802807658302, + "learning_rate": 2.59753390132288e-07, + "loss": 0.512161135673523, + "step": 6741 + }, + { + "epoch": 1.5545307816462994, + "grad_norm": 1.756231295082545, + "learning_rate": 2.5949710848958415e-07, + "loss": 0.42334964871406555, + "step": 6742 + }, + { + "epoch": 1.554761355775882, + "grad_norm": 1.2927501946290025, + "learning_rate": 2.592409344864801e-07, + "loss": 0.3781980276107788, + "step": 6743 + }, + { + "epoch": 1.5549919299054646, + "grad_norm": 1.5363470406300028, + "learning_rate": 2.5898486816021394e-07, + "loss": 0.4989853501319885, + "step": 6744 + }, + { + "epoch": 1.5552225040350471, + "grad_norm": 1.5873964925893267, + "learning_rate": 2.5872890954800676e-07, + "loss": 0.45715585350990295, + "step": 6745 + }, + { + "epoch": 1.5554530781646299, + "grad_norm": 1.3499060893753405, + "learning_rate": 2.5847305868706515e-07, + "loss": 0.5025684833526611, + "step": 6746 + }, + { + "epoch": 1.5556836522942126, + "grad_norm": 1.5290460697986008, + "learning_rate": 2.5821731561457994e-07, + "loss": 0.47298115491867065, + "step": 6747 + }, + { + "epoch": 1.5559142264237953, + "grad_norm": 1.4250590830459762, + "learning_rate": 2.5796168036772524e-07, + "loss": 0.45412957668304443, + "step": 6748 + }, + { + "epoch": 1.556144800553378, + "grad_norm": 1.6230149340497857, + "learning_rate": 2.5770615298366107e-07, + "loss": 0.3958669602870941, + "step": 6749 + }, + { + "epoch": 1.5563753746829607, + "grad_norm": 1.4992477100706287, + "learning_rate": 2.574507334995302e-07, + "loss": 0.4748396873474121, + "step": 6750 + }, + { + "epoch": 1.5566059488125432, + "grad_norm": 2.1473408883216534, + "learning_rate": 2.5719542195246093e-07, + "loss": 0.4741169810295105, + "step": 6751 + }, + { + "epoch": 1.556836522942126, + "grad_norm": 1.5072269547692108, + "learning_rate": 2.569402183795648e-07, + "loss": 0.4362972378730774, + "step": 6752 + }, + { + "epoch": 1.5570670970717084, + "grad_norm": 1.5695384848079892, + "learning_rate": 2.5668512281793873e-07, + "loss": 0.48013412952423096, + "step": 6753 + }, + { + "epoch": 1.5572976712012911, + "grad_norm": 1.4514603270444408, + "learning_rate": 2.564301353046634e-07, + "loss": 0.4728567600250244, + "step": 6754 + }, + { + "epoch": 1.5575282453308739, + "grad_norm": 1.7592773476195727, + "learning_rate": 2.56175255876804e-07, + "loss": 0.4304337501525879, + "step": 6755 + }, + { + "epoch": 1.5577588194604566, + "grad_norm": 1.5275686028016913, + "learning_rate": 2.5592048457140926e-07, + "loss": 0.43467870354652405, + "step": 6756 + }, + { + "epoch": 1.5579893935900393, + "grad_norm": 1.9596482130933712, + "learning_rate": 2.556658214255134e-07, + "loss": 0.3912844657897949, + "step": 6757 + }, + { + "epoch": 1.558219967719622, + "grad_norm": 1.5284327791141838, + "learning_rate": 2.5541126647613397e-07, + "loss": 0.4462862014770508, + "step": 6758 + }, + { + "epoch": 1.5584505418492045, + "grad_norm": 1.5847675751494867, + "learning_rate": 2.551568197602729e-07, + "loss": 0.43929487466812134, + "step": 6759 + }, + { + "epoch": 1.5586811159787872, + "grad_norm": 1.5077581986013873, + "learning_rate": 2.549024813149169e-07, + "loss": 0.44473958015441895, + "step": 6760 + }, + { + "epoch": 1.5589116901083697, + "grad_norm": 1.5536876763085832, + "learning_rate": 2.546482511770365e-07, + "loss": 0.5159727931022644, + "step": 6761 + }, + { + "epoch": 1.5591422642379524, + "grad_norm": 1.7371461951042986, + "learning_rate": 2.5439412938358696e-07, + "loss": 0.3975204825401306, + "step": 6762 + }, + { + "epoch": 1.5593728383675352, + "grad_norm": 1.493493619365051, + "learning_rate": 2.54140115971507e-07, + "loss": 0.5198286175727844, + "step": 6763 + }, + { + "epoch": 1.5596034124971179, + "grad_norm": 1.4309109790386, + "learning_rate": 2.5388621097772046e-07, + "loss": 0.4815763831138611, + "step": 6764 + }, + { + "epoch": 1.5598339866267006, + "grad_norm": 1.3803469238514527, + "learning_rate": 2.5363241443913454e-07, + "loss": 0.365215539932251, + "step": 6765 + }, + { + "epoch": 1.5600645607562833, + "grad_norm": 1.6088793691676593, + "learning_rate": 2.533787263926417e-07, + "loss": 0.486020028591156, + "step": 6766 + }, + { + "epoch": 1.5602951348858658, + "grad_norm": 1.5355383857513338, + "learning_rate": 2.5312514687511766e-07, + "loss": 0.38536715507507324, + "step": 6767 + }, + { + "epoch": 1.5605257090154485, + "grad_norm": 1.649862765507334, + "learning_rate": 2.528716759234227e-07, + "loss": 0.44713371992111206, + "step": 6768 + }, + { + "epoch": 1.560756283145031, + "grad_norm": 1.868794454538197, + "learning_rate": 2.5261831357440154e-07, + "loss": 0.4122806489467621, + "step": 6769 + }, + { + "epoch": 1.5609868572746137, + "grad_norm": 1.6234940940069353, + "learning_rate": 2.523650598648829e-07, + "loss": 0.40514320135116577, + "step": 6770 + }, + { + "epoch": 1.5612174314041964, + "grad_norm": 1.4417973525561176, + "learning_rate": 2.5211191483168027e-07, + "loss": 0.4273102283477783, + "step": 6771 + }, + { + "epoch": 1.5614480055337792, + "grad_norm": 1.4229504510118502, + "learning_rate": 2.5185887851159005e-07, + "loss": 0.4774209260940552, + "step": 6772 + }, + { + "epoch": 1.5616785796633619, + "grad_norm": 1.583645566960067, + "learning_rate": 2.5160595094139436e-07, + "loss": 0.3928600549697876, + "step": 6773 + }, + { + "epoch": 1.5619091537929446, + "grad_norm": 1.6757793450729852, + "learning_rate": 2.5135313215785816e-07, + "loss": 0.4414944052696228, + "step": 6774 + }, + { + "epoch": 1.562139727922527, + "grad_norm": 1.733143939427008, + "learning_rate": 2.5110042219773176e-07, + "loss": 0.36133646965026855, + "step": 6775 + }, + { + "epoch": 1.5623703020521098, + "grad_norm": 1.8443586806925936, + "learning_rate": 2.508478210977486e-07, + "loss": 0.44824904203414917, + "step": 6776 + }, + { + "epoch": 1.5626008761816923, + "grad_norm": 1.1693439456079453, + "learning_rate": 2.5059532889462707e-07, + "loss": 0.3699820637702942, + "step": 6777 + }, + { + "epoch": 1.562831450311275, + "grad_norm": 1.9309547773144982, + "learning_rate": 2.5034294562506976e-07, + "loss": 0.4809808135032654, + "step": 6778 + }, + { + "epoch": 1.5630620244408577, + "grad_norm": 1.7665230327633363, + "learning_rate": 2.5009067132576256e-07, + "loss": 0.487751841545105, + "step": 6779 + }, + { + "epoch": 1.5632925985704405, + "grad_norm": 1.5839144124062823, + "learning_rate": 2.4983850603337675e-07, + "loss": 0.47932374477386475, + "step": 6780 + }, + { + "epoch": 1.5635231727000232, + "grad_norm": 1.4782012523005248, + "learning_rate": 2.495864497845663e-07, + "loss": 0.42852234840393066, + "step": 6781 + }, + { + "epoch": 1.5637537468296059, + "grad_norm": 1.4802387383863571, + "learning_rate": 2.49334502615971e-07, + "loss": 0.4392131567001343, + "step": 6782 + }, + { + "epoch": 1.5639843209591884, + "grad_norm": 1.5042475261036963, + "learning_rate": 2.4908266456421323e-07, + "loss": 0.45050233602523804, + "step": 6783 + }, + { + "epoch": 1.5642148950887709, + "grad_norm": 1.4962883173938244, + "learning_rate": 2.488309356659004e-07, + "loss": 0.45328110456466675, + "step": 6784 + }, + { + "epoch": 1.5644454692183536, + "grad_norm": 1.451199382042834, + "learning_rate": 2.4857931595762403e-07, + "loss": 0.3851325511932373, + "step": 6785 + }, + { + "epoch": 1.5646760433479363, + "grad_norm": 1.5269726027188475, + "learning_rate": 2.4832780547595976e-07, + "loss": 0.4096960127353668, + "step": 6786 + }, + { + "epoch": 1.564906617477519, + "grad_norm": 1.4158017969205454, + "learning_rate": 2.480764042574669e-07, + "loss": 0.4439825117588043, + "step": 6787 + }, + { + "epoch": 1.5651371916071017, + "grad_norm": 1.5084778231824414, + "learning_rate": 2.4782511233868895e-07, + "loss": 0.4259459972381592, + "step": 6788 + }, + { + "epoch": 1.5653677657366845, + "grad_norm": 1.6383230301383533, + "learning_rate": 2.475739297561542e-07, + "loss": 0.4701216220855713, + "step": 6789 + }, + { + "epoch": 1.565598339866267, + "grad_norm": 1.4707071600317903, + "learning_rate": 2.473228565463742e-07, + "loss": 0.4435737133026123, + "step": 6790 + }, + { + "epoch": 1.5658289139958497, + "grad_norm": 1.4361527011832544, + "learning_rate": 2.4707189274584537e-07, + "loss": 0.4476662278175354, + "step": 6791 + }, + { + "epoch": 1.5660594881254322, + "grad_norm": 1.8319243980176085, + "learning_rate": 2.468210383910474e-07, + "loss": 0.4399911165237427, + "step": 6792 + }, + { + "epoch": 1.5662900622550149, + "grad_norm": 1.5617800363149925, + "learning_rate": 2.465702935184446e-07, + "loss": 0.4206039309501648, + "step": 6793 + }, + { + "epoch": 1.5665206363845976, + "grad_norm": 1.5998109403316092, + "learning_rate": 2.463196581644855e-07, + "loss": 0.44936686754226685, + "step": 6794 + }, + { + "epoch": 1.5667512105141803, + "grad_norm": 1.4750351364947134, + "learning_rate": 2.4606913236560277e-07, + "loss": 0.39926016330718994, + "step": 6795 + }, + { + "epoch": 1.566981784643763, + "grad_norm": 1.607414705164721, + "learning_rate": 2.4581871615821216e-07, + "loss": 0.4338487982749939, + "step": 6796 + }, + { + "epoch": 1.5672123587733457, + "grad_norm": 1.6693881073802184, + "learning_rate": 2.455684095787148e-07, + "loss": 0.5047430992126465, + "step": 6797 + }, + { + "epoch": 1.5674429329029282, + "grad_norm": 1.623571142038879, + "learning_rate": 2.4531821266349504e-07, + "loss": 0.46082550287246704, + "step": 6798 + }, + { + "epoch": 1.567673507032511, + "grad_norm": 1.5687485332342288, + "learning_rate": 2.450681254489214e-07, + "loss": 0.44586509466171265, + "step": 6799 + }, + { + "epoch": 1.5679040811620935, + "grad_norm": 1.6011741376497353, + "learning_rate": 2.4481814797134657e-07, + "loss": 0.5167746543884277, + "step": 6800 + }, + { + "epoch": 1.5681346552916762, + "grad_norm": 1.4074512111564024, + "learning_rate": 2.4456828026710753e-07, + "loss": 0.44062116742134094, + "step": 6801 + }, + { + "epoch": 1.5683652294212589, + "grad_norm": 1.718295945554571, + "learning_rate": 2.4431852237252524e-07, + "loss": 0.5096040368080139, + "step": 6802 + }, + { + "epoch": 1.5685958035508416, + "grad_norm": 1.3369851313651875, + "learning_rate": 2.440688743239042e-07, + "loss": 0.44234153628349304, + "step": 6803 + }, + { + "epoch": 1.5688263776804243, + "grad_norm": 1.7878168925295264, + "learning_rate": 2.4381933615753357e-07, + "loss": 0.431011825799942, + "step": 6804 + }, + { + "epoch": 1.569056951810007, + "grad_norm": 1.5221569168970472, + "learning_rate": 2.435699079096858e-07, + "loss": 0.4903266131877899, + "step": 6805 + }, + { + "epoch": 1.5692875259395895, + "grad_norm": 1.4830626229942445, + "learning_rate": 2.433205896166185e-07, + "loss": 0.4698626399040222, + "step": 6806 + }, + { + "epoch": 1.5695181000691723, + "grad_norm": 1.7678576287420633, + "learning_rate": 2.4307138131457184e-07, + "loss": 0.37576574087142944, + "step": 6807 + }, + { + "epoch": 1.5697486741987547, + "grad_norm": 1.442601981615427, + "learning_rate": 2.4282228303977113e-07, + "loss": 0.47068172693252563, + "step": 6808 + }, + { + "epoch": 1.5699792483283375, + "grad_norm": 1.5121414961596256, + "learning_rate": 2.425732948284257e-07, + "loss": 0.45246315002441406, + "step": 6809 + }, + { + "epoch": 1.5702098224579202, + "grad_norm": 1.670746435704044, + "learning_rate": 2.423244167167278e-07, + "loss": 0.4746376574039459, + "step": 6810 + }, + { + "epoch": 1.570440396587503, + "grad_norm": 1.6491072802367082, + "learning_rate": 2.420756487408551e-07, + "loss": 0.413469135761261, + "step": 6811 + }, + { + "epoch": 1.5706709707170856, + "grad_norm": 1.4392614299059656, + "learning_rate": 2.418269909369678e-07, + "loss": 0.3567890226840973, + "step": 6812 + }, + { + "epoch": 1.5709015448466683, + "grad_norm": 1.9034789277869502, + "learning_rate": 2.415784433412116e-07, + "loss": 0.4676034450531006, + "step": 6813 + }, + { + "epoch": 1.5711321189762508, + "grad_norm": 1.5100461636177536, + "learning_rate": 2.4133000598971477e-07, + "loss": 0.429337739944458, + "step": 6814 + }, + { + "epoch": 1.5713626931058335, + "grad_norm": 1.657098818036463, + "learning_rate": 2.4108167891859065e-07, + "loss": 0.35861289501190186, + "step": 6815 + }, + { + "epoch": 1.571593267235416, + "grad_norm": 1.7985300174152374, + "learning_rate": 2.4083346216393564e-07, + "loss": 0.43728363513946533, + "step": 6816 + }, + { + "epoch": 1.5718238413649988, + "grad_norm": 1.6655671112295587, + "learning_rate": 2.405853557618308e-07, + "loss": 0.44594380259513855, + "step": 6817 + }, + { + "epoch": 1.5720544154945815, + "grad_norm": 1.430621764890317, + "learning_rate": 2.403373597483414e-07, + "loss": 0.36871337890625, + "step": 6818 + }, + { + "epoch": 1.5722849896241642, + "grad_norm": 1.4284927159530842, + "learning_rate": 2.400894741595152e-07, + "loss": 0.3769477307796478, + "step": 6819 + }, + { + "epoch": 1.572515563753747, + "grad_norm": 1.6803573488891066, + "learning_rate": 2.3984169903138583e-07, + "loss": 0.503145694732666, + "step": 6820 + }, + { + "epoch": 1.5727461378833296, + "grad_norm": 1.552866324250783, + "learning_rate": 2.395940343999691e-07, + "loss": 0.4082655906677246, + "step": 6821 + }, + { + "epoch": 1.5729767120129121, + "grad_norm": 1.4215190376699491, + "learning_rate": 2.3934648030126625e-07, + "loss": 0.4106418192386627, + "step": 6822 + }, + { + "epoch": 1.5732072861424948, + "grad_norm": 1.663561714777188, + "learning_rate": 2.390990367712613e-07, + "loss": 0.45363783836364746, + "step": 6823 + }, + { + "epoch": 1.5734378602720773, + "grad_norm": 1.4253235303875884, + "learning_rate": 2.388517038459227e-07, + "loss": 0.4416825473308563, + "step": 6824 + }, + { + "epoch": 1.57366843440166, + "grad_norm": 1.5727508875619094, + "learning_rate": 2.3860448156120304e-07, + "loss": 0.5106863379478455, + "step": 6825 + }, + { + "epoch": 1.5738990085312428, + "grad_norm": 1.431151413456896, + "learning_rate": 2.3835736995303879e-07, + "loss": 0.4618466794490814, + "step": 6826 + }, + { + "epoch": 1.5741295826608255, + "grad_norm": 1.6611294255159201, + "learning_rate": 2.381103690573495e-07, + "loss": 0.414678692817688, + "step": 6827 + }, + { + "epoch": 1.5743601567904082, + "grad_norm": 1.3583782134926532, + "learning_rate": 2.3786347891004e-07, + "loss": 0.39774662256240845, + "step": 6828 + }, + { + "epoch": 1.574590730919991, + "grad_norm": 1.3689702631653482, + "learning_rate": 2.376166995469977e-07, + "loss": 0.4513537287712097, + "step": 6829 + }, + { + "epoch": 1.5748213050495734, + "grad_norm": 1.5433747348092586, + "learning_rate": 2.3737003100409447e-07, + "loss": 0.44062697887420654, + "step": 6830 + }, + { + "epoch": 1.5750518791791561, + "grad_norm": 1.6549219639884087, + "learning_rate": 2.3712347331718617e-07, + "loss": 0.42305582761764526, + "step": 6831 + }, + { + "epoch": 1.5752824533087386, + "grad_norm": 1.628456252942963, + "learning_rate": 2.3687702652211262e-07, + "loss": 0.46731626987457275, + "step": 6832 + }, + { + "epoch": 1.5755130274383213, + "grad_norm": 1.569042371408869, + "learning_rate": 2.3663069065469753e-07, + "loss": 0.4926149845123291, + "step": 6833 + }, + { + "epoch": 1.575743601567904, + "grad_norm": 1.8433451746214373, + "learning_rate": 2.3638446575074777e-07, + "loss": 0.49002933502197266, + "step": 6834 + }, + { + "epoch": 1.5759741756974868, + "grad_norm": 1.9286763636552064, + "learning_rate": 2.3613835184605523e-07, + "loss": 0.47110694646835327, + "step": 6835 + }, + { + "epoch": 1.5762047498270695, + "grad_norm": 1.7003781450027053, + "learning_rate": 2.3589234897639444e-07, + "loss": 0.4257816672325134, + "step": 6836 + }, + { + "epoch": 1.5764353239566522, + "grad_norm": 1.4515610553726317, + "learning_rate": 2.3564645717752506e-07, + "loss": 0.4031051695346832, + "step": 6837 + }, + { + "epoch": 1.5766658980862347, + "grad_norm": 1.7208107126331553, + "learning_rate": 2.3540067648518957e-07, + "loss": 0.5077808499336243, + "step": 6838 + }, + { + "epoch": 1.5768964722158174, + "grad_norm": 1.4184547433402042, + "learning_rate": 2.3515500693511449e-07, + "loss": 0.3877585232257843, + "step": 6839 + }, + { + "epoch": 1.5771270463454, + "grad_norm": 1.6806127701824354, + "learning_rate": 2.3490944856301064e-07, + "loss": 0.4356805682182312, + "step": 6840 + }, + { + "epoch": 1.5773576204749826, + "grad_norm": 1.5102184976880006, + "learning_rate": 2.346640014045723e-07, + "loss": 0.46679362654685974, + "step": 6841 + }, + { + "epoch": 1.5775881946045653, + "grad_norm": 1.4361079018846885, + "learning_rate": 2.3441866549547817e-07, + "loss": 0.4837648272514343, + "step": 6842 + }, + { + "epoch": 1.577818768734148, + "grad_norm": 1.5395603940472438, + "learning_rate": 2.341734408713897e-07, + "loss": 0.42723533511161804, + "step": 6843 + }, + { + "epoch": 1.5780493428637308, + "grad_norm": 1.7296429757269751, + "learning_rate": 2.3392832756795322e-07, + "loss": 0.3680928647518158, + "step": 6844 + }, + { + "epoch": 1.5782799169933135, + "grad_norm": 1.3398871717628533, + "learning_rate": 2.3368332562079797e-07, + "loss": 0.434980571269989, + "step": 6845 + }, + { + "epoch": 1.578510491122896, + "grad_norm": 1.5976407072584213, + "learning_rate": 2.3343843506553805e-07, + "loss": 0.45552271604537964, + "step": 6846 + }, + { + "epoch": 1.5787410652524787, + "grad_norm": 1.5496903398620734, + "learning_rate": 2.331936559377702e-07, + "loss": 0.4292616844177246, + "step": 6847 + }, + { + "epoch": 1.5789716393820612, + "grad_norm": 1.6907239258434268, + "learning_rate": 2.3294898827307573e-07, + "loss": 0.5025339126586914, + "step": 6848 + }, + { + "epoch": 1.579202213511644, + "grad_norm": 1.434142265629081, + "learning_rate": 2.3270443210701996e-07, + "loss": 0.47567370533943176, + "step": 6849 + }, + { + "epoch": 1.5794327876412266, + "grad_norm": 1.9792768486961878, + "learning_rate": 2.3245998747515095e-07, + "loss": 0.5435467958450317, + "step": 6850 + }, + { + "epoch": 1.5796633617708093, + "grad_norm": 1.2141081677893035, + "learning_rate": 2.3221565441300194e-07, + "loss": 0.4409145712852478, + "step": 6851 + }, + { + "epoch": 1.579893935900392, + "grad_norm": 1.3643265195449554, + "learning_rate": 2.3197143295608845e-07, + "loss": 0.40482181310653687, + "step": 6852 + }, + { + "epoch": 1.5801245100299748, + "grad_norm": 1.8983898955785605, + "learning_rate": 2.317273231399113e-07, + "loss": 0.40231794118881226, + "step": 6853 + }, + { + "epoch": 1.5803550841595573, + "grad_norm": 1.3860542767537625, + "learning_rate": 2.314833249999535e-07, + "loss": 0.43245166540145874, + "step": 6854 + }, + { + "epoch": 1.58058565828914, + "grad_norm": 1.5386782332278715, + "learning_rate": 2.3123943857168315e-07, + "loss": 0.40237659215927124, + "step": 6855 + }, + { + "epoch": 1.5808162324187225, + "grad_norm": 1.7869361833965254, + "learning_rate": 2.309956638905517e-07, + "loss": 0.48900318145751953, + "step": 6856 + }, + { + "epoch": 1.5810468065483052, + "grad_norm": 1.482622476685355, + "learning_rate": 2.3075200099199422e-07, + "loss": 0.42364567518234253, + "step": 6857 + }, + { + "epoch": 1.581277380677888, + "grad_norm": 1.6159587255295897, + "learning_rate": 2.3050844991142958e-07, + "loss": 0.4658735990524292, + "step": 6858 + }, + { + "epoch": 1.5815079548074706, + "grad_norm": 1.4775627716781476, + "learning_rate": 2.3026501068426007e-07, + "loss": 0.42268991470336914, + "step": 6859 + }, + { + "epoch": 1.5817385289370534, + "grad_norm": 1.4348002511722773, + "learning_rate": 2.3002168334587247e-07, + "loss": 0.44876742362976074, + "step": 6860 + }, + { + "epoch": 1.581969103066636, + "grad_norm": 1.5171591869453156, + "learning_rate": 2.2977846793163646e-07, + "loss": 0.42540132999420166, + "step": 6861 + }, + { + "epoch": 1.5821996771962186, + "grad_norm": 1.4296859038074168, + "learning_rate": 2.2953536447690636e-07, + "loss": 0.48768138885498047, + "step": 6862 + }, + { + "epoch": 1.5824302513258013, + "grad_norm": 1.5445046236967466, + "learning_rate": 2.292923730170192e-07, + "loss": 0.42905953526496887, + "step": 6863 + }, + { + "epoch": 1.5826608254553838, + "grad_norm": 1.4472242985886439, + "learning_rate": 2.2904949358729653e-07, + "loss": 0.4103778004646301, + "step": 6864 + }, + { + "epoch": 1.5828913995849665, + "grad_norm": 1.5180272333652802, + "learning_rate": 2.2880672622304331e-07, + "loss": 0.39303290843963623, + "step": 6865 + }, + { + "epoch": 1.5831219737145492, + "grad_norm": 1.4702183686842207, + "learning_rate": 2.2856407095954843e-07, + "loss": 0.5087130069732666, + "step": 6866 + }, + { + "epoch": 1.583352547844132, + "grad_norm": 1.5644640444387603, + "learning_rate": 2.283215278320839e-07, + "loss": 0.33117055892944336, + "step": 6867 + }, + { + "epoch": 1.5835831219737146, + "grad_norm": 1.7090383225203818, + "learning_rate": 2.280790968759063e-07, + "loss": 0.41781488060951233, + "step": 6868 + }, + { + "epoch": 1.5838136961032974, + "grad_norm": 1.4121975925065597, + "learning_rate": 2.2783677812625523e-07, + "loss": 0.5104382634162903, + "step": 6869 + }, + { + "epoch": 1.5840442702328799, + "grad_norm": 1.5723614045021508, + "learning_rate": 2.2759457161835372e-07, + "loss": 0.3987969160079956, + "step": 6870 + }, + { + "epoch": 1.5842748443624626, + "grad_norm": 1.705658009146651, + "learning_rate": 2.2735247738740936e-07, + "loss": 0.4723064601421356, + "step": 6871 + }, + { + "epoch": 1.584505418492045, + "grad_norm": 1.707721278006975, + "learning_rate": 2.2711049546861293e-07, + "loss": 0.3942141830921173, + "step": 6872 + }, + { + "epoch": 1.5847359926216278, + "grad_norm": 1.5657011191058785, + "learning_rate": 2.268686258971393e-07, + "loss": 0.38271787762641907, + "step": 6873 + }, + { + "epoch": 1.5849665667512105, + "grad_norm": 1.3977071321322045, + "learning_rate": 2.2662686870814607e-07, + "loss": 0.4944665729999542, + "step": 6874 + }, + { + "epoch": 1.5851971408807932, + "grad_norm": 1.7910306093530013, + "learning_rate": 2.2638522393677562e-07, + "loss": 0.46695005893707275, + "step": 6875 + }, + { + "epoch": 1.585427715010376, + "grad_norm": 1.7074115790208728, + "learning_rate": 2.2614369161815295e-07, + "loss": 0.4620080888271332, + "step": 6876 + }, + { + "epoch": 1.5856582891399587, + "grad_norm": 1.6877087434684872, + "learning_rate": 2.2590227178738776e-07, + "loss": 0.5650279521942139, + "step": 6877 + }, + { + "epoch": 1.5858888632695411, + "grad_norm": 1.3471081039016284, + "learning_rate": 2.2566096447957227e-07, + "loss": 0.3556622564792633, + "step": 6878 + }, + { + "epoch": 1.5861194373991239, + "grad_norm": 1.3889188451731431, + "learning_rate": 2.254197697297834e-07, + "loss": 0.4978718161582947, + "step": 6879 + }, + { + "epoch": 1.5863500115287064, + "grad_norm": 1.375490517958548, + "learning_rate": 2.2517868757308146e-07, + "loss": 0.4759003520011902, + "step": 6880 + }, + { + "epoch": 1.586580585658289, + "grad_norm": 1.579013983466932, + "learning_rate": 2.2493771804450945e-07, + "loss": 0.5078370571136475, + "step": 6881 + }, + { + "epoch": 1.5868111597878718, + "grad_norm": 1.3607586792133322, + "learning_rate": 2.2469686117909547e-07, + "loss": 0.4188239276409149, + "step": 6882 + }, + { + "epoch": 1.5870417339174545, + "grad_norm": 1.3488510335317552, + "learning_rate": 2.2445611701184997e-07, + "loss": 0.4075232744216919, + "step": 6883 + }, + { + "epoch": 1.5872723080470372, + "grad_norm": 1.5004910712339554, + "learning_rate": 2.2421548557776794e-07, + "loss": 0.3643442988395691, + "step": 6884 + }, + { + "epoch": 1.58750288217662, + "grad_norm": 1.4193604715362476, + "learning_rate": 2.2397496691182716e-07, + "loss": 0.38767147064208984, + "step": 6885 + }, + { + "epoch": 1.5877334563062024, + "grad_norm": 1.6373352976605955, + "learning_rate": 2.2373456104899e-07, + "loss": 0.4874354600906372, + "step": 6886 + }, + { + "epoch": 1.5879640304357852, + "grad_norm": 1.5573200679287742, + "learning_rate": 2.2349426802420134e-07, + "loss": 0.46412762999534607, + "step": 6887 + }, + { + "epoch": 1.5881946045653677, + "grad_norm": 1.3720639419051985, + "learning_rate": 2.2325408787239054e-07, + "loss": 0.4299372434616089, + "step": 6888 + }, + { + "epoch": 1.5884251786949504, + "grad_norm": 1.6309152140238423, + "learning_rate": 2.230140206284703e-07, + "loss": 0.3962220549583435, + "step": 6889 + }, + { + "epoch": 1.588655752824533, + "grad_norm": 1.617512400235996, + "learning_rate": 2.2277406632733653e-07, + "loss": 0.5048998594284058, + "step": 6890 + }, + { + "epoch": 1.5888863269541158, + "grad_norm": 2.0443646004817024, + "learning_rate": 2.2253422500386932e-07, + "loss": 0.35463857650756836, + "step": 6891 + }, + { + "epoch": 1.5891169010836985, + "grad_norm": 1.5696832175175914, + "learning_rate": 2.2229449669293165e-07, + "loss": 0.3969672620296478, + "step": 6892 + }, + { + "epoch": 1.5893474752132812, + "grad_norm": 1.5166803382402412, + "learning_rate": 2.22054881429371e-07, + "loss": 0.36300575733184814, + "step": 6893 + }, + { + "epoch": 1.5895780493428637, + "grad_norm": 1.41057555150973, + "learning_rate": 2.2181537924801729e-07, + "loss": 0.45796507596969604, + "step": 6894 + }, + { + "epoch": 1.5898086234724462, + "grad_norm": 1.556089643432737, + "learning_rate": 2.2157599018368488e-07, + "loss": 0.42725688219070435, + "step": 6895 + }, + { + "epoch": 1.590039197602029, + "grad_norm": 1.8436048050065164, + "learning_rate": 2.213367142711714e-07, + "loss": 0.4959419369697571, + "step": 6896 + }, + { + "epoch": 1.5902697717316117, + "grad_norm": 1.6607109480306586, + "learning_rate": 2.2109755154525821e-07, + "loss": 0.3707115948200226, + "step": 6897 + }, + { + "epoch": 1.5905003458611944, + "grad_norm": 1.4025605906760028, + "learning_rate": 2.2085850204070989e-07, + "loss": 0.3647577166557312, + "step": 6898 + }, + { + "epoch": 1.590730919990777, + "grad_norm": 1.505368584241417, + "learning_rate": 2.2061956579227447e-07, + "loss": 0.42227697372436523, + "step": 6899 + }, + { + "epoch": 1.5909614941203598, + "grad_norm": 1.508703122498175, + "learning_rate": 2.2038074283468412e-07, + "loss": 0.41736292839050293, + "step": 6900 + }, + { + "epoch": 1.5911920682499423, + "grad_norm": 1.6418039973045746, + "learning_rate": 2.201420332026538e-07, + "loss": 0.46005967259407043, + "step": 6901 + }, + { + "epoch": 1.591422642379525, + "grad_norm": 1.4328523009517202, + "learning_rate": 2.1990343693088243e-07, + "loss": 0.3572643995285034, + "step": 6902 + }, + { + "epoch": 1.5916532165091075, + "grad_norm": 1.744760153255399, + "learning_rate": 2.196649540540527e-07, + "loss": 0.5321012735366821, + "step": 6903 + }, + { + "epoch": 1.5918837906386902, + "grad_norm": 1.5415731453823578, + "learning_rate": 2.194265846068305e-07, + "loss": 0.4913836419582367, + "step": 6904 + }, + { + "epoch": 1.592114364768273, + "grad_norm": 1.7016363411577065, + "learning_rate": 2.1918832862386493e-07, + "loss": 0.37674903869628906, + "step": 6905 + }, + { + "epoch": 1.5923449388978557, + "grad_norm": 1.5772289300833298, + "learning_rate": 2.1895018613978934e-07, + "loss": 0.4385930001735687, + "step": 6906 + }, + { + "epoch": 1.5925755130274384, + "grad_norm": 2.224743671968565, + "learning_rate": 2.1871215718921964e-07, + "loss": 0.5219674706459045, + "step": 6907 + }, + { + "epoch": 1.592806087157021, + "grad_norm": 1.5215408344839954, + "learning_rate": 2.1847424180675622e-07, + "loss": 0.4241113066673279, + "step": 6908 + }, + { + "epoch": 1.5930366612866036, + "grad_norm": 1.4296843598144484, + "learning_rate": 2.1823644002698237e-07, + "loss": 0.4008786082267761, + "step": 6909 + }, + { + "epoch": 1.5932672354161863, + "grad_norm": 1.5021365471039205, + "learning_rate": 2.179987518844645e-07, + "loss": 0.3333933651447296, + "step": 6910 + }, + { + "epoch": 1.5934978095457688, + "grad_norm": 1.652596855301234, + "learning_rate": 2.1776117741375343e-07, + "loss": 0.48857730627059937, + "step": 6911 + }, + { + "epoch": 1.5937283836753515, + "grad_norm": 1.4724322236306013, + "learning_rate": 2.1752371664938306e-07, + "loss": 0.37393617630004883, + "step": 6912 + }, + { + "epoch": 1.5939589578049342, + "grad_norm": 1.4102085657254086, + "learning_rate": 2.172863696258709e-07, + "loss": 0.5365080833435059, + "step": 6913 + }, + { + "epoch": 1.594189531934517, + "grad_norm": 1.7683912421422305, + "learning_rate": 2.1704913637771705e-07, + "loss": 0.49318936467170715, + "step": 6914 + }, + { + "epoch": 1.5944201060640997, + "grad_norm": 1.8200372673393599, + "learning_rate": 2.1681201693940666e-07, + "loss": 0.37682920694351196, + "step": 6915 + }, + { + "epoch": 1.5946506801936824, + "grad_norm": 1.4120260343966702, + "learning_rate": 2.1657501134540657e-07, + "loss": 0.4894877076148987, + "step": 6916 + }, + { + "epoch": 1.5948812543232649, + "grad_norm": 1.5895963005275906, + "learning_rate": 2.1633811963016869e-07, + "loss": 0.4200783967971802, + "step": 6917 + }, + { + "epoch": 1.5951118284528476, + "grad_norm": 1.7361608161591027, + "learning_rate": 2.1610134182812702e-07, + "loss": 0.3953052759170532, + "step": 6918 + }, + { + "epoch": 1.59534240258243, + "grad_norm": 1.4727518091374385, + "learning_rate": 2.158646779736999e-07, + "loss": 0.4006558656692505, + "step": 6919 + }, + { + "epoch": 1.5955729767120128, + "grad_norm": 1.7355475804217702, + "learning_rate": 2.1562812810128906e-07, + "loss": 0.3749210238456726, + "step": 6920 + }, + { + "epoch": 1.5958035508415955, + "grad_norm": 1.5378158592599445, + "learning_rate": 2.1539169224527887e-07, + "loss": 0.4688538610935211, + "step": 6921 + }, + { + "epoch": 1.5960341249711782, + "grad_norm": 1.590308500795848, + "learning_rate": 2.151553704400383e-07, + "loss": 0.4483727216720581, + "step": 6922 + }, + { + "epoch": 1.596264699100761, + "grad_norm": 1.589431373760787, + "learning_rate": 2.149191627199185e-07, + "loss": 0.5118253827095032, + "step": 6923 + }, + { + "epoch": 1.5964952732303437, + "grad_norm": 1.644731800905039, + "learning_rate": 2.1468306911925525e-07, + "loss": 0.43641170859336853, + "step": 6924 + }, + { + "epoch": 1.5967258473599262, + "grad_norm": 1.4755114053374785, + "learning_rate": 2.1444708967236657e-07, + "loss": 0.38253384828567505, + "step": 6925 + }, + { + "epoch": 1.596956421489509, + "grad_norm": 1.5638213373412855, + "learning_rate": 2.1421122441355476e-07, + "loss": 0.43674635887145996, + "step": 6926 + }, + { + "epoch": 1.5971869956190914, + "grad_norm": 1.3940207891491625, + "learning_rate": 2.1397547337710519e-07, + "loss": 0.37392908334732056, + "step": 6927 + }, + { + "epoch": 1.597417569748674, + "grad_norm": 1.5097907813025324, + "learning_rate": 2.13739836597287e-07, + "loss": 0.4531250298023224, + "step": 6928 + }, + { + "epoch": 1.5976481438782568, + "grad_norm": 1.3308296891253455, + "learning_rate": 2.13504314108352e-07, + "loss": 0.38579899072647095, + "step": 6929 + }, + { + "epoch": 1.5978787180078395, + "grad_norm": 1.8618083111554995, + "learning_rate": 2.1326890594453563e-07, + "loss": 0.5215288400650024, + "step": 6930 + }, + { + "epoch": 1.5981092921374223, + "grad_norm": 1.6019249166669218, + "learning_rate": 2.130336121400572e-07, + "loss": 0.4396743178367615, + "step": 6931 + }, + { + "epoch": 1.598339866267005, + "grad_norm": 1.5371889029106374, + "learning_rate": 2.127984327291188e-07, + "loss": 0.5068432688713074, + "step": 6932 + }, + { + "epoch": 1.5985704403965875, + "grad_norm": 1.7855756215277538, + "learning_rate": 2.1256336774590643e-07, + "loss": 0.48809194564819336, + "step": 6933 + }, + { + "epoch": 1.5988010145261702, + "grad_norm": 1.4166815561679078, + "learning_rate": 2.123284172245885e-07, + "loss": 0.4191613793373108, + "step": 6934 + }, + { + "epoch": 1.5990315886557527, + "grad_norm": 1.5763678308245206, + "learning_rate": 2.1209358119931843e-07, + "loss": 0.41901010274887085, + "step": 6935 + }, + { + "epoch": 1.5992621627853354, + "grad_norm": 1.8296822391624505, + "learning_rate": 2.1185885970423133e-07, + "loss": 0.5046913623809814, + "step": 6936 + }, + { + "epoch": 1.5994927369149181, + "grad_norm": 2.1559492699976492, + "learning_rate": 2.1162425277344675e-07, + "loss": 0.5113730430603027, + "step": 6937 + }, + { + "epoch": 1.5997233110445008, + "grad_norm": 1.520077424866564, + "learning_rate": 2.1138976044106672e-07, + "loss": 0.34129637479782104, + "step": 6938 + }, + { + "epoch": 1.5999538851740835, + "grad_norm": 1.5890047902961466, + "learning_rate": 2.1115538274117762e-07, + "loss": 0.4492289423942566, + "step": 6939 + }, + { + "epoch": 1.6001844593036663, + "grad_norm": 1.5532375131614289, + "learning_rate": 2.1092111970784833e-07, + "loss": 0.41002708673477173, + "step": 6940 + }, + { + "epoch": 1.6004150334332488, + "grad_norm": 1.887817008406582, + "learning_rate": 2.1068697137513113e-07, + "loss": 0.5444740056991577, + "step": 6941 + }, + { + "epoch": 1.6006456075628315, + "grad_norm": 1.518981510824895, + "learning_rate": 2.1045293777706196e-07, + "loss": 0.3489699959754944, + "step": 6942 + }, + { + "epoch": 1.600876181692414, + "grad_norm": 1.5115486172446684, + "learning_rate": 2.1021901894766025e-07, + "loss": 0.41807419061660767, + "step": 6943 + }, + { + "epoch": 1.6011067558219967, + "grad_norm": 1.7376028221450257, + "learning_rate": 2.0998521492092857e-07, + "loss": 0.41074657440185547, + "step": 6944 + }, + { + "epoch": 1.6013373299515794, + "grad_norm": 1.370751011576157, + "learning_rate": 2.097515257308521e-07, + "loss": 0.4085312485694885, + "step": 6945 + }, + { + "epoch": 1.6015679040811621, + "grad_norm": 1.6632563260665783, + "learning_rate": 2.095179514114006e-07, + "loss": 0.42699170112609863, + "step": 6946 + }, + { + "epoch": 1.6017984782107448, + "grad_norm": 1.6347540938108835, + "learning_rate": 2.0928449199652597e-07, + "loss": 0.40041583776474, + "step": 6947 + }, + { + "epoch": 1.6020290523403276, + "grad_norm": 1.385214375087801, + "learning_rate": 2.090511475201643e-07, + "loss": 0.47465208172798157, + "step": 6948 + }, + { + "epoch": 1.60225962646991, + "grad_norm": 1.5233208405026366, + "learning_rate": 2.0881791801623405e-07, + "loss": 0.4338058829307556, + "step": 6949 + }, + { + "epoch": 1.6024902005994928, + "grad_norm": 1.857588116409586, + "learning_rate": 2.0858480351863794e-07, + "loss": 0.5398772954940796, + "step": 6950 + }, + { + "epoch": 1.6027207747290753, + "grad_norm": 1.41461865858101, + "learning_rate": 2.0835180406126151e-07, + "loss": 0.40750259160995483, + "step": 6951 + }, + { + "epoch": 1.602951348858658, + "grad_norm": 1.6330208123854022, + "learning_rate": 2.0811891967797336e-07, + "loss": 0.4365716278553009, + "step": 6952 + }, + { + "epoch": 1.6031819229882407, + "grad_norm": 1.395812913626374, + "learning_rate": 2.078861504026258e-07, + "loss": 0.41537174582481384, + "step": 6953 + }, + { + "epoch": 1.6034124971178234, + "grad_norm": 1.331855885968294, + "learning_rate": 2.0765349626905394e-07, + "loss": 0.3687853217124939, + "step": 6954 + }, + { + "epoch": 1.6036430712474061, + "grad_norm": 1.4291699726024594, + "learning_rate": 2.074209573110769e-07, + "loss": 0.48866790533065796, + "step": 6955 + }, + { + "epoch": 1.6038736453769888, + "grad_norm": 1.7541297686576787, + "learning_rate": 2.0718853356249588e-07, + "loss": 0.4618760347366333, + "step": 6956 + }, + { + "epoch": 1.6041042195065713, + "grad_norm": 1.820272898606224, + "learning_rate": 2.0695622505709654e-07, + "loss": 0.365873247385025, + "step": 6957 + }, + { + "epoch": 1.604334793636154, + "grad_norm": 1.7127779412462347, + "learning_rate": 2.0672403182864706e-07, + "loss": 0.4346495270729065, + "step": 6958 + }, + { + "epoch": 1.6045653677657365, + "grad_norm": 1.4385774019168192, + "learning_rate": 2.0649195391089935e-07, + "loss": 0.3995724618434906, + "step": 6959 + }, + { + "epoch": 1.6047959418953193, + "grad_norm": 1.890499669463449, + "learning_rate": 2.062599913375882e-07, + "loss": 0.4628515839576721, + "step": 6960 + }, + { + "epoch": 1.605026516024902, + "grad_norm": 1.8491035226730044, + "learning_rate": 2.060281441424314e-07, + "loss": 0.39776262640953064, + "step": 6961 + }, + { + "epoch": 1.6052570901544847, + "grad_norm": 1.6838333142700899, + "learning_rate": 2.057964123591307e-07, + "loss": 0.4622994065284729, + "step": 6962 + }, + { + "epoch": 1.6054876642840674, + "grad_norm": 1.3806987670969462, + "learning_rate": 2.0556479602137033e-07, + "loss": 0.4028933048248291, + "step": 6963 + }, + { + "epoch": 1.6057182384136501, + "grad_norm": 1.592137730506949, + "learning_rate": 2.0533329516281838e-07, + "loss": 0.46639660000801086, + "step": 6964 + }, + { + "epoch": 1.6059488125432326, + "grad_norm": 1.3243378898371028, + "learning_rate": 2.0510190981712537e-07, + "loss": 0.4063863158226013, + "step": 6965 + }, + { + "epoch": 1.6061793866728153, + "grad_norm": 1.6927530193908227, + "learning_rate": 2.0487064001792586e-07, + "loss": 0.471376895904541, + "step": 6966 + }, + { + "epoch": 1.6064099608023978, + "grad_norm": 1.5262354616100662, + "learning_rate": 2.0463948579883727e-07, + "loss": 0.5094102025032043, + "step": 6967 + }, + { + "epoch": 1.6066405349319806, + "grad_norm": 1.613731344454896, + "learning_rate": 2.0440844719346039e-07, + "loss": 0.3922441005706787, + "step": 6968 + }, + { + "epoch": 1.6068711090615633, + "grad_norm": 1.7524315605420397, + "learning_rate": 2.0417752423537882e-07, + "loss": 0.47777149081230164, + "step": 6969 + }, + { + "epoch": 1.607101683191146, + "grad_norm": 2.2487851564601065, + "learning_rate": 2.0394671695815924e-07, + "loss": 0.5780138969421387, + "step": 6970 + }, + { + "epoch": 1.6073322573207287, + "grad_norm": 1.6028588432287403, + "learning_rate": 2.0371602539535237e-07, + "loss": 0.43968862295150757, + "step": 6971 + }, + { + "epoch": 1.6075628314503114, + "grad_norm": 1.877374036184133, + "learning_rate": 2.0348544958049096e-07, + "loss": 0.5204722881317139, + "step": 6972 + }, + { + "epoch": 1.607793405579894, + "grad_norm": 1.5207193577135807, + "learning_rate": 2.0325498954709198e-07, + "loss": 0.3944805860519409, + "step": 6973 + }, + { + "epoch": 1.6080239797094766, + "grad_norm": 1.454235622222141, + "learning_rate": 2.0302464532865505e-07, + "loss": 0.42686349153518677, + "step": 6974 + }, + { + "epoch": 1.6082545538390591, + "grad_norm": 1.5958289830519565, + "learning_rate": 2.027944169586633e-07, + "loss": 0.3860762119293213, + "step": 6975 + }, + { + "epoch": 1.6084851279686418, + "grad_norm": 1.880005605643703, + "learning_rate": 2.0256430447058215e-07, + "loss": 0.5570458769798279, + "step": 6976 + }, + { + "epoch": 1.6087157020982246, + "grad_norm": 1.8351241687154358, + "learning_rate": 2.0233430789786132e-07, + "loss": 0.4556728005409241, + "step": 6977 + }, + { + "epoch": 1.6089462762278073, + "grad_norm": 1.4746534507162423, + "learning_rate": 2.0210442727393285e-07, + "loss": 0.48365700244903564, + "step": 6978 + }, + { + "epoch": 1.60917685035739, + "grad_norm": 1.7835628524046172, + "learning_rate": 2.018746626322124e-07, + "loss": 0.4456971287727356, + "step": 6979 + }, + { + "epoch": 1.6094074244869727, + "grad_norm": 1.6700237073697568, + "learning_rate": 2.0164501400609835e-07, + "loss": 0.41877123713493347, + "step": 6980 + }, + { + "epoch": 1.6096379986165552, + "grad_norm": 1.3803715462197303, + "learning_rate": 2.0141548142897246e-07, + "loss": 0.4073547124862671, + "step": 6981 + }, + { + "epoch": 1.609868572746138, + "grad_norm": 1.5181775501419725, + "learning_rate": 2.0118606493420021e-07, + "loss": 0.4987693727016449, + "step": 6982 + }, + { + "epoch": 1.6100991468757204, + "grad_norm": 1.603543806365415, + "learning_rate": 2.0095676455512878e-07, + "loss": 0.4391751289367676, + "step": 6983 + }, + { + "epoch": 1.6103297210053031, + "grad_norm": 1.4062982467603231, + "learning_rate": 2.0072758032508996e-07, + "loss": 0.409262478351593, + "step": 6984 + }, + { + "epoch": 1.6105602951348859, + "grad_norm": 1.353394057864669, + "learning_rate": 2.0049851227739744e-07, + "loss": 0.38653457164764404, + "step": 6985 + }, + { + "epoch": 1.6107908692644686, + "grad_norm": 1.9189325963312815, + "learning_rate": 2.0026956044534914e-07, + "loss": 0.4824348986148834, + "step": 6986 + }, + { + "epoch": 1.6110214433940513, + "grad_norm": 1.7037748706735498, + "learning_rate": 2.00040724862225e-07, + "loss": 0.45774850249290466, + "step": 6987 + }, + { + "epoch": 1.611252017523634, + "grad_norm": 1.5419477618151842, + "learning_rate": 1.9981200556128906e-07, + "loss": 0.45437830686569214, + "step": 6988 + }, + { + "epoch": 1.6114825916532165, + "grad_norm": 1.4581568342693196, + "learning_rate": 1.9958340257578753e-07, + "loss": 0.4563155770301819, + "step": 6989 + }, + { + "epoch": 1.6117131657827992, + "grad_norm": 1.7363246075229848, + "learning_rate": 1.9935491593895048e-07, + "loss": 0.5786794424057007, + "step": 6990 + }, + { + "epoch": 1.6119437399123817, + "grad_norm": 1.6120161181322603, + "learning_rate": 1.991265456839909e-07, + "loss": 0.5290218591690063, + "step": 6991 + }, + { + "epoch": 1.6121743140419644, + "grad_norm": 1.607774677113548, + "learning_rate": 1.9889829184410434e-07, + "loss": 0.3456650376319885, + "step": 6992 + }, + { + "epoch": 1.6124048881715471, + "grad_norm": 1.414142582496391, + "learning_rate": 1.9867015445247015e-07, + "loss": 0.40869832038879395, + "step": 6993 + }, + { + "epoch": 1.6126354623011299, + "grad_norm": 2.3563881452147992, + "learning_rate": 1.9844213354225004e-07, + "loss": 0.49926644563674927, + "step": 6994 + }, + { + "epoch": 1.6128660364307126, + "grad_norm": 1.904270429684393, + "learning_rate": 1.9821422914658957e-07, + "loss": 0.4874018132686615, + "step": 6995 + }, + { + "epoch": 1.6130966105602953, + "grad_norm": 1.872252891476363, + "learning_rate": 1.9798644129861654e-07, + "loss": 0.4228810667991638, + "step": 6996 + }, + { + "epoch": 1.6133271846898778, + "grad_norm": 1.4437194678200662, + "learning_rate": 1.9775877003144237e-07, + "loss": 0.4309043884277344, + "step": 6997 + }, + { + "epoch": 1.6135577588194605, + "grad_norm": 1.6133739556944033, + "learning_rate": 1.9753121537816142e-07, + "loss": 0.3917756676673889, + "step": 6998 + }, + { + "epoch": 1.613788332949043, + "grad_norm": 1.492105866056543, + "learning_rate": 1.9730377737185145e-07, + "loss": 0.4074435830116272, + "step": 6999 + }, + { + "epoch": 1.6140189070786257, + "grad_norm": 1.7474889804918834, + "learning_rate": 1.9707645604557243e-07, + "loss": 0.4581322968006134, + "step": 7000 + }, + { + "epoch": 1.6142494812082084, + "grad_norm": 1.5240615238309698, + "learning_rate": 1.9684925143236776e-07, + "loss": 0.4479151666164398, + "step": 7001 + }, + { + "epoch": 1.6144800553377912, + "grad_norm": 1.4379805154063257, + "learning_rate": 1.966221635652643e-07, + "loss": 0.3378838300704956, + "step": 7002 + }, + { + "epoch": 1.6147106294673739, + "grad_norm": 1.6755517427089033, + "learning_rate": 1.96395192477271e-07, + "loss": 0.3383278250694275, + "step": 7003 + }, + { + "epoch": 1.6149412035969566, + "grad_norm": 1.5430108527415651, + "learning_rate": 1.9616833820138091e-07, + "loss": 0.5164717435836792, + "step": 7004 + }, + { + "epoch": 1.615171777726539, + "grad_norm": 1.6927378959186403, + "learning_rate": 1.9594160077056932e-07, + "loss": 0.4548792243003845, + "step": 7005 + }, + { + "epoch": 1.6154023518561216, + "grad_norm": 1.608730816141968, + "learning_rate": 1.9571498021779531e-07, + "loss": 0.41074928641319275, + "step": 7006 + }, + { + "epoch": 1.6156329259857043, + "grad_norm": 1.5384399915677613, + "learning_rate": 1.9548847657599976e-07, + "loss": 0.4156193137168884, + "step": 7007 + }, + { + "epoch": 1.615863500115287, + "grad_norm": 1.742725966102226, + "learning_rate": 1.95262089878108e-07, + "loss": 0.4602770209312439, + "step": 7008 + }, + { + "epoch": 1.6160940742448697, + "grad_norm": 1.5880816009582301, + "learning_rate": 1.9503582015702713e-07, + "loss": 0.4911346733570099, + "step": 7009 + }, + { + "epoch": 1.6163246483744524, + "grad_norm": 1.5007140709934312, + "learning_rate": 1.9480966744564764e-07, + "loss": 0.394087553024292, + "step": 7010 + }, + { + "epoch": 1.6165552225040352, + "grad_norm": 1.5836059389854649, + "learning_rate": 1.9458363177684367e-07, + "loss": 0.4845706820487976, + "step": 7011 + }, + { + "epoch": 1.6167857966336177, + "grad_norm": 1.7088454795128305, + "learning_rate": 1.9435771318347116e-07, + "loss": 0.49142736196517944, + "step": 7012 + }, + { + "epoch": 1.6170163707632004, + "grad_norm": 1.3798831769041013, + "learning_rate": 1.9413191169836996e-07, + "loss": 0.4408283829689026, + "step": 7013 + }, + { + "epoch": 1.6172469448927829, + "grad_norm": 1.6476950016993046, + "learning_rate": 1.9390622735436268e-07, + "loss": 0.6088640689849854, + "step": 7014 + }, + { + "epoch": 1.6174775190223656, + "grad_norm": 1.912745817268737, + "learning_rate": 1.93680660184255e-07, + "loss": 0.5208842158317566, + "step": 7015 + }, + { + "epoch": 1.6177080931519483, + "grad_norm": 1.7742607180865566, + "learning_rate": 1.9345521022083488e-07, + "loss": 0.5652821660041809, + "step": 7016 + }, + { + "epoch": 1.617938667281531, + "grad_norm": 1.5895189074949856, + "learning_rate": 1.9322987749687437e-07, + "loss": 0.4861832857131958, + "step": 7017 + }, + { + "epoch": 1.6181692414111137, + "grad_norm": 1.5693969535816144, + "learning_rate": 1.930046620451272e-07, + "loss": 0.39583832025527954, + "step": 7018 + }, + { + "epoch": 1.6183998155406965, + "grad_norm": 1.6283824576887038, + "learning_rate": 1.927795638983313e-07, + "loss": 0.5638653039932251, + "step": 7019 + }, + { + "epoch": 1.618630389670279, + "grad_norm": 1.7595661530223012, + "learning_rate": 1.9255458308920648e-07, + "loss": 0.4737275242805481, + "step": 7020 + }, + { + "epoch": 1.6188609637998617, + "grad_norm": 1.3807112997659796, + "learning_rate": 1.923297196504563e-07, + "loss": 0.4526802897453308, + "step": 7021 + }, + { + "epoch": 1.6190915379294442, + "grad_norm": 1.5519742811018764, + "learning_rate": 1.9210497361476708e-07, + "loss": 0.40800565481185913, + "step": 7022 + }, + { + "epoch": 1.6193221120590269, + "grad_norm": 1.3169867108502276, + "learning_rate": 1.9188034501480744e-07, + "loss": 0.39532414078712463, + "step": 7023 + }, + { + "epoch": 1.6195526861886096, + "grad_norm": 1.3982522966659368, + "learning_rate": 1.9165583388322993e-07, + "loss": 0.40236538648605347, + "step": 7024 + }, + { + "epoch": 1.6197832603181923, + "grad_norm": 1.4838960013292628, + "learning_rate": 1.91431440252669e-07, + "loss": 0.4421047866344452, + "step": 7025 + }, + { + "epoch": 1.620013834447775, + "grad_norm": 1.5688320926864374, + "learning_rate": 1.9120716415574322e-07, + "loss": 0.4149084687232971, + "step": 7026 + }, + { + "epoch": 1.6202444085773577, + "grad_norm": 1.8747733544619556, + "learning_rate": 1.9098300562505264e-07, + "loss": 0.4186127185821533, + "step": 7027 + }, + { + "epoch": 1.6204749827069402, + "grad_norm": 1.5276498671204974, + "learning_rate": 1.9075896469318132e-07, + "loss": 0.4649406671524048, + "step": 7028 + }, + { + "epoch": 1.620705556836523, + "grad_norm": 1.5217002126023946, + "learning_rate": 1.9053504139269593e-07, + "loss": 0.43240052461624146, + "step": 7029 + }, + { + "epoch": 1.6209361309661054, + "grad_norm": 1.7731525747902717, + "learning_rate": 1.9031123575614628e-07, + "loss": 0.4874862730503082, + "step": 7030 + }, + { + "epoch": 1.6211667050956882, + "grad_norm": 1.6133636879972175, + "learning_rate": 1.900875478160644e-07, + "loss": 0.3771815896034241, + "step": 7031 + }, + { + "epoch": 1.6213972792252709, + "grad_norm": 1.548316338784864, + "learning_rate": 1.898639776049653e-07, + "loss": 0.49882376194000244, + "step": 7032 + }, + { + "epoch": 1.6216278533548536, + "grad_norm": 1.5189621230999546, + "learning_rate": 1.896405251553479e-07, + "loss": 0.3813830614089966, + "step": 7033 + }, + { + "epoch": 1.6218584274844363, + "grad_norm": 1.588790821712345, + "learning_rate": 1.8941719049969272e-07, + "loss": 0.41883599758148193, + "step": 7034 + }, + { + "epoch": 1.622089001614019, + "grad_norm": 1.4271058877816405, + "learning_rate": 1.8919397367046409e-07, + "loss": 0.42194586992263794, + "step": 7035 + }, + { + "epoch": 1.6223195757436015, + "grad_norm": 1.5957469997065072, + "learning_rate": 1.889708747001084e-07, + "loss": 0.36967700719833374, + "step": 7036 + }, + { + "epoch": 1.6225501498731842, + "grad_norm": 1.4373460175753532, + "learning_rate": 1.887478936210556e-07, + "loss": 0.4493946433067322, + "step": 7037 + }, + { + "epoch": 1.6227807240027667, + "grad_norm": 1.6526676224310628, + "learning_rate": 1.8852503046571833e-07, + "loss": 0.42121458053588867, + "step": 7038 + }, + { + "epoch": 1.6230112981323495, + "grad_norm": 1.430632776113786, + "learning_rate": 1.8830228526649207e-07, + "loss": 0.4529588222503662, + "step": 7039 + }, + { + "epoch": 1.6232418722619322, + "grad_norm": 1.537552702708545, + "learning_rate": 1.88079658055755e-07, + "loss": 0.387844443321228, + "step": 7040 + }, + { + "epoch": 1.623472446391515, + "grad_norm": 1.4872655198554567, + "learning_rate": 1.8785714886586802e-07, + "loss": 0.49954158067703247, + "step": 7041 + }, + { + "epoch": 1.6237030205210976, + "grad_norm": 1.3845875929093436, + "learning_rate": 1.8763475772917548e-07, + "loss": 0.4016296863555908, + "step": 7042 + }, + { + "epoch": 1.6239335946506803, + "grad_norm": 1.5208389143205874, + "learning_rate": 1.8741248467800362e-07, + "loss": 0.358657568693161, + "step": 7043 + }, + { + "epoch": 1.6241641687802628, + "grad_norm": 1.471037478852436, + "learning_rate": 1.8719032974466264e-07, + "loss": 0.434385746717453, + "step": 7044 + }, + { + "epoch": 1.6243947429098455, + "grad_norm": 1.4705602216948914, + "learning_rate": 1.8696829296144466e-07, + "loss": 0.4658992886543274, + "step": 7045 + }, + { + "epoch": 1.624625317039428, + "grad_norm": 1.8724382429627917, + "learning_rate": 1.8674637436062545e-07, + "loss": 0.5438188910484314, + "step": 7046 + }, + { + "epoch": 1.6248558911690107, + "grad_norm": 1.9024479318941907, + "learning_rate": 1.8652457397446254e-07, + "loss": 0.47364577651023865, + "step": 7047 + }, + { + "epoch": 1.6250864652985935, + "grad_norm": 1.386287471529149, + "learning_rate": 1.8630289183519733e-07, + "loss": 0.3664509654045105, + "step": 7048 + }, + { + "epoch": 1.6253170394281762, + "grad_norm": 1.5676786934992741, + "learning_rate": 1.8608132797505317e-07, + "loss": 0.4226282835006714, + "step": 7049 + }, + { + "epoch": 1.625547613557759, + "grad_norm": 1.4581751590991685, + "learning_rate": 1.8585988242623706e-07, + "loss": 0.47477972507476807, + "step": 7050 + }, + { + "epoch": 1.6257781876873416, + "grad_norm": 2.082606809210874, + "learning_rate": 1.8563855522093786e-07, + "loss": 0.5372269749641418, + "step": 7051 + }, + { + "epoch": 1.626008761816924, + "grad_norm": 1.3565872618977541, + "learning_rate": 1.8541734639132788e-07, + "loss": 0.37929385900497437, + "step": 7052 + }, + { + "epoch": 1.6262393359465068, + "grad_norm": 1.5119164625864447, + "learning_rate": 1.8519625596956244e-07, + "loss": 0.4029538631439209, + "step": 7053 + }, + { + "epoch": 1.6264699100760893, + "grad_norm": 1.5739338248608081, + "learning_rate": 1.8497528398777874e-07, + "loss": 0.3932439982891083, + "step": 7054 + }, + { + "epoch": 1.626700484205672, + "grad_norm": 1.5806776566898322, + "learning_rate": 1.847544304780978e-07, + "loss": 0.45190152525901794, + "step": 7055 + }, + { + "epoch": 1.6269310583352548, + "grad_norm": 1.8629994959724827, + "learning_rate": 1.8453369547262242e-07, + "loss": 0.4852195382118225, + "step": 7056 + }, + { + "epoch": 1.6271616324648375, + "grad_norm": 1.608209634523461, + "learning_rate": 1.8431307900343918e-07, + "loss": 0.41676801443099976, + "step": 7057 + }, + { + "epoch": 1.6273922065944202, + "grad_norm": 1.388166685170728, + "learning_rate": 1.8409258110261626e-07, + "loss": 0.44374561309814453, + "step": 7058 + }, + { + "epoch": 1.627622780724003, + "grad_norm": 1.5975340281654677, + "learning_rate": 1.838722018022061e-07, + "loss": 0.4348192811012268, + "step": 7059 + }, + { + "epoch": 1.6278533548535854, + "grad_norm": 1.626194256762104, + "learning_rate": 1.836519411342422e-07, + "loss": 0.46572640538215637, + "step": 7060 + }, + { + "epoch": 1.6280839289831681, + "grad_norm": 1.4985871084379754, + "learning_rate": 1.8343179913074214e-07, + "loss": 0.4633631408214569, + "step": 7061 + }, + { + "epoch": 1.6283145031127506, + "grad_norm": 1.3260867645697678, + "learning_rate": 1.8321177582370605e-07, + "loss": 0.44420552253723145, + "step": 7062 + }, + { + "epoch": 1.6285450772423333, + "grad_norm": 1.8207040168707305, + "learning_rate": 1.8299187124511594e-07, + "loss": 0.5628370046615601, + "step": 7063 + }, + { + "epoch": 1.628775651371916, + "grad_norm": 1.7448936691285617, + "learning_rate": 1.8277208542693778e-07, + "loss": 0.5342314839363098, + "step": 7064 + }, + { + "epoch": 1.6290062255014988, + "grad_norm": 1.529076197622531, + "learning_rate": 1.82552418401119e-07, + "loss": 0.440934419631958, + "step": 7065 + }, + { + "epoch": 1.6292367996310815, + "grad_norm": 1.4532572456773438, + "learning_rate": 1.823328701995912e-07, + "loss": 0.45218637585639954, + "step": 7066 + }, + { + "epoch": 1.6294673737606642, + "grad_norm": 1.456173637640115, + "learning_rate": 1.8211344085426716e-07, + "loss": 0.4059211015701294, + "step": 7067 + }, + { + "epoch": 1.6296979478902467, + "grad_norm": 2.0474805024349876, + "learning_rate": 1.818941303970435e-07, + "loss": 0.5036444067955017, + "step": 7068 + }, + { + "epoch": 1.6299285220198294, + "grad_norm": 1.6421868165266436, + "learning_rate": 1.8167493885979935e-07, + "loss": 0.5034196972846985, + "step": 7069 + }, + { + "epoch": 1.630159096149412, + "grad_norm": 1.5247456374523982, + "learning_rate": 1.8145586627439645e-07, + "loss": 0.4199259281158447, + "step": 7070 + }, + { + "epoch": 1.6303896702789946, + "grad_norm": 1.5913722133067008, + "learning_rate": 1.8123691267267915e-07, + "loss": 0.5439015626907349, + "step": 7071 + }, + { + "epoch": 1.6306202444085773, + "grad_norm": 1.6181852234306913, + "learning_rate": 1.810180780864743e-07, + "loss": 0.4349868893623352, + "step": 7072 + }, + { + "epoch": 1.63085081853816, + "grad_norm": 1.5299206997440553, + "learning_rate": 1.807993625475921e-07, + "loss": 0.39939552545547485, + "step": 7073 + }, + { + "epoch": 1.6310813926677428, + "grad_norm": 1.575600412629914, + "learning_rate": 1.8058076608782468e-07, + "loss": 0.43073540925979614, + "step": 7074 + }, + { + "epoch": 1.6313119667973255, + "grad_norm": 1.6461603718238804, + "learning_rate": 1.8036228873894744e-07, + "loss": 0.4735824465751648, + "step": 7075 + }, + { + "epoch": 1.631542540926908, + "grad_norm": 1.466337846989889, + "learning_rate": 1.8014393053271836e-07, + "loss": 0.42971551418304443, + "step": 7076 + }, + { + "epoch": 1.6317731150564907, + "grad_norm": 1.694502155411865, + "learning_rate": 1.7992569150087823e-07, + "loss": 0.48593759536743164, + "step": 7077 + }, + { + "epoch": 1.6320036891860732, + "grad_norm": 1.55292324755966, + "learning_rate": 1.7970757167514973e-07, + "loss": 0.530194878578186, + "step": 7078 + }, + { + "epoch": 1.632234263315656, + "grad_norm": 1.7324585048939796, + "learning_rate": 1.794895710872394e-07, + "loss": 0.43393629789352417, + "step": 7079 + }, + { + "epoch": 1.6324648374452386, + "grad_norm": 1.5827349286667418, + "learning_rate": 1.7927168976883556e-07, + "loss": 0.4211798906326294, + "step": 7080 + }, + { + "epoch": 1.6326954115748213, + "grad_norm": 1.5939322533043618, + "learning_rate": 1.790539277516091e-07, + "loss": 0.39001476764678955, + "step": 7081 + }, + { + "epoch": 1.632925985704404, + "grad_norm": 1.6028280785725797, + "learning_rate": 1.788362850672146e-07, + "loss": 0.4360283613204956, + "step": 7082 + }, + { + "epoch": 1.6331565598339868, + "grad_norm": 1.6516207153980025, + "learning_rate": 1.7861876174728807e-07, + "loss": 0.47754842042922974, + "step": 7083 + }, + { + "epoch": 1.6333871339635693, + "grad_norm": 1.634690883802538, + "learning_rate": 1.7840135782344888e-07, + "loss": 0.35193490982055664, + "step": 7084 + }, + { + "epoch": 1.633617708093152, + "grad_norm": 1.2825662437681398, + "learning_rate": 1.7818407332729912e-07, + "loss": 0.39997392892837524, + "step": 7085 + }, + { + "epoch": 1.6338482822227345, + "grad_norm": 1.324570823301632, + "learning_rate": 1.7796690829042328e-07, + "loss": 0.3255331218242645, + "step": 7086 + }, + { + "epoch": 1.6340788563523172, + "grad_norm": 1.424074701555127, + "learning_rate": 1.777498627443882e-07, + "loss": 0.47072282433509827, + "step": 7087 + }, + { + "epoch": 1.6343094304819, + "grad_norm": 1.5293726959445282, + "learning_rate": 1.775329367207441e-07, + "loss": 0.4231484830379486, + "step": 7088 + }, + { + "epoch": 1.6345400046114826, + "grad_norm": 1.4406985915809287, + "learning_rate": 1.7731613025102276e-07, + "loss": 0.37112197279930115, + "step": 7089 + }, + { + "epoch": 1.6347705787410653, + "grad_norm": 1.5117815815493545, + "learning_rate": 1.7709944336673986e-07, + "loss": 0.5772623419761658, + "step": 7090 + }, + { + "epoch": 1.635001152870648, + "grad_norm": 1.4205344879838042, + "learning_rate": 1.7688287609939244e-07, + "loss": 0.45922917127609253, + "step": 7091 + }, + { + "epoch": 1.6352317270002306, + "grad_norm": 1.6262912271430976, + "learning_rate": 1.7666642848046098e-07, + "loss": 0.42784950137138367, + "step": 7092 + }, + { + "epoch": 1.6354623011298133, + "grad_norm": 1.585709168390131, + "learning_rate": 1.7645010054140873e-07, + "loss": 0.4676967263221741, + "step": 7093 + }, + { + "epoch": 1.6356928752593958, + "grad_norm": 1.4782811209898545, + "learning_rate": 1.7623389231368046e-07, + "loss": 0.434337317943573, + "step": 7094 + }, + { + "epoch": 1.6359234493889785, + "grad_norm": 1.512954791126533, + "learning_rate": 1.760178038287048e-07, + "loss": 0.4667350947856903, + "step": 7095 + }, + { + "epoch": 1.6361540235185612, + "grad_norm": 1.3397712801467159, + "learning_rate": 1.7580183511789204e-07, + "loss": 0.42233705520629883, + "step": 7096 + }, + { + "epoch": 1.636384597648144, + "grad_norm": 1.5093056460018237, + "learning_rate": 1.7558598621263565e-07, + "loss": 0.4488460421562195, + "step": 7097 + }, + { + "epoch": 1.6366151717777266, + "grad_norm": 1.6708888950919063, + "learning_rate": 1.753702571443112e-07, + "loss": 0.4264194667339325, + "step": 7098 + }, + { + "epoch": 1.6368457459073094, + "grad_norm": 1.414729354018089, + "learning_rate": 1.7515464794427715e-07, + "loss": 0.32695144414901733, + "step": 7099 + }, + { + "epoch": 1.6370763200368919, + "grad_norm": 2.0744464699438825, + "learning_rate": 1.7493915864387487e-07, + "loss": 0.3573018014431, + "step": 7100 + }, + { + "epoch": 1.6373068941664746, + "grad_norm": 1.4506197336511393, + "learning_rate": 1.7472378927442732e-07, + "loss": 0.4545198082923889, + "step": 7101 + }, + { + "epoch": 1.637537468296057, + "grad_norm": 1.59875503504847, + "learning_rate": 1.7450853986724123e-07, + "loss": 0.42589202523231506, + "step": 7102 + }, + { + "epoch": 1.6377680424256398, + "grad_norm": 1.5169081767342318, + "learning_rate": 1.742934104536048e-07, + "loss": 0.4403502345085144, + "step": 7103 + }, + { + "epoch": 1.6379986165552225, + "grad_norm": 1.7606747961526963, + "learning_rate": 1.7407840106478955e-07, + "loss": 0.4262208938598633, + "step": 7104 + }, + { + "epoch": 1.6382291906848052, + "grad_norm": 1.6000265796951778, + "learning_rate": 1.7386351173204905e-07, + "loss": 0.4706578254699707, + "step": 7105 + }, + { + "epoch": 1.638459764814388, + "grad_norm": 1.4657752166922586, + "learning_rate": 1.7364874248661986e-07, + "loss": 0.4526079297065735, + "step": 7106 + }, + { + "epoch": 1.6386903389439706, + "grad_norm": 1.7833403214487409, + "learning_rate": 1.734340933597207e-07, + "loss": 0.42836326360702515, + "step": 7107 + }, + { + "epoch": 1.6389209130735531, + "grad_norm": 1.4453465477500804, + "learning_rate": 1.7321956438255292e-07, + "loss": 0.42680823802948, + "step": 7108 + }, + { + "epoch": 1.6391514872031359, + "grad_norm": 1.3964828689114657, + "learning_rate": 1.7300515558630068e-07, + "loss": 0.38365036249160767, + "step": 7109 + }, + { + "epoch": 1.6393820613327184, + "grad_norm": 1.4748773918598719, + "learning_rate": 1.7279086700213063e-07, + "loss": 0.4153991937637329, + "step": 7110 + }, + { + "epoch": 1.639612635462301, + "grad_norm": 1.5777502702437645, + "learning_rate": 1.7257669866119163e-07, + "loss": 0.42257291078567505, + "step": 7111 + }, + { + "epoch": 1.6398432095918838, + "grad_norm": 1.7309640190055833, + "learning_rate": 1.7236265059461498e-07, + "loss": 0.34990063309669495, + "step": 7112 + }, + { + "epoch": 1.6400737837214665, + "grad_norm": 1.3939407429934887, + "learning_rate": 1.72148722833515e-07, + "loss": 0.44848760962486267, + "step": 7113 + }, + { + "epoch": 1.6403043578510492, + "grad_norm": 1.4649667660689574, + "learning_rate": 1.7193491540898808e-07, + "loss": 0.4649186134338379, + "step": 7114 + }, + { + "epoch": 1.640534931980632, + "grad_norm": 1.5050161434573055, + "learning_rate": 1.7172122835211333e-07, + "loss": 0.480952650308609, + "step": 7115 + }, + { + "epoch": 1.6407655061102144, + "grad_norm": 1.6101365826637175, + "learning_rate": 1.7150766169395235e-07, + "loss": 0.4669501483440399, + "step": 7116 + }, + { + "epoch": 1.6409960802397972, + "grad_norm": 1.486994174732026, + "learning_rate": 1.7129421546554957e-07, + "loss": 0.4273250102996826, + "step": 7117 + }, + { + "epoch": 1.6412266543693796, + "grad_norm": 1.8106380448833757, + "learning_rate": 1.71080889697931e-07, + "loss": 0.47923076152801514, + "step": 7118 + }, + { + "epoch": 1.6414572284989624, + "grad_norm": 1.5033931180120297, + "learning_rate": 1.708676844221061e-07, + "loss": 0.42801159620285034, + "step": 7119 + }, + { + "epoch": 1.641687802628545, + "grad_norm": 1.4792875147029159, + "learning_rate": 1.7065459966906636e-07, + "loss": 0.39929044246673584, + "step": 7120 + }, + { + "epoch": 1.6419183767581278, + "grad_norm": 1.4727601001923896, + "learning_rate": 1.7044163546978553e-07, + "loss": 0.4919764995574951, + "step": 7121 + }, + { + "epoch": 1.6421489508877105, + "grad_norm": 1.5018740505050776, + "learning_rate": 1.702287918552202e-07, + "loss": 0.45943617820739746, + "step": 7122 + }, + { + "epoch": 1.642379525017293, + "grad_norm": 1.5202994857697039, + "learning_rate": 1.7001606885630948e-07, + "loss": 0.48078954219818115, + "step": 7123 + }, + { + "epoch": 1.6426100991468757, + "grad_norm": 1.406204806461001, + "learning_rate": 1.6980346650397505e-07, + "loss": 0.4217113256454468, + "step": 7124 + }, + { + "epoch": 1.6428406732764582, + "grad_norm": 1.479814078881505, + "learning_rate": 1.6959098482912037e-07, + "loss": 0.4643937051296234, + "step": 7125 + }, + { + "epoch": 1.643071247406041, + "grad_norm": 1.6157838326637273, + "learning_rate": 1.6937862386263212e-07, + "loss": 0.43977001309394836, + "step": 7126 + }, + { + "epoch": 1.6433018215356237, + "grad_norm": 1.4653862858165947, + "learning_rate": 1.6916638363537882e-07, + "loss": 0.3872392177581787, + "step": 7127 + }, + { + "epoch": 1.6435323956652064, + "grad_norm": 1.4668608493131068, + "learning_rate": 1.6895426417821213e-07, + "loss": 0.44625502824783325, + "step": 7128 + }, + { + "epoch": 1.643762969794789, + "grad_norm": 1.6445652935798991, + "learning_rate": 1.6874226552196523e-07, + "loss": 0.36836186051368713, + "step": 7129 + }, + { + "epoch": 1.6439935439243718, + "grad_norm": 1.5181829131466213, + "learning_rate": 1.6853038769745465e-07, + "loss": 0.35491907596588135, + "step": 7130 + }, + { + "epoch": 1.6442241180539543, + "grad_norm": 1.5107933584098798, + "learning_rate": 1.6831863073547913e-07, + "loss": 0.5210527181625366, + "step": 7131 + }, + { + "epoch": 1.644454692183537, + "grad_norm": 1.5854667470103982, + "learning_rate": 1.6810699466681932e-07, + "loss": 0.3805693984031677, + "step": 7132 + }, + { + "epoch": 1.6446852663131195, + "grad_norm": 1.8089883418272688, + "learning_rate": 1.6789547952223893e-07, + "loss": 0.5768346786499023, + "step": 7133 + }, + { + "epoch": 1.6449158404427022, + "grad_norm": 1.8423402992377882, + "learning_rate": 1.6768408533248356e-07, + "loss": 0.46465635299682617, + "step": 7134 + }, + { + "epoch": 1.645146414572285, + "grad_norm": 1.8710111931219464, + "learning_rate": 1.674728121282819e-07, + "loss": 0.43119215965270996, + "step": 7135 + }, + { + "epoch": 1.6453769887018677, + "grad_norm": 1.4436891948188744, + "learning_rate": 1.6726165994034402e-07, + "loss": 0.42814093828201294, + "step": 7136 + }, + { + "epoch": 1.6456075628314504, + "grad_norm": 1.5822684467576347, + "learning_rate": 1.6705062879936382e-07, + "loss": 0.41762328147888184, + "step": 7137 + }, + { + "epoch": 1.645838136961033, + "grad_norm": 2.059560914873905, + "learning_rate": 1.668397187360161e-07, + "loss": 0.42717012763023376, + "step": 7138 + }, + { + "epoch": 1.6460687110906156, + "grad_norm": 1.3692759576709286, + "learning_rate": 1.666289297809591e-07, + "loss": 0.37660926580429077, + "step": 7139 + }, + { + "epoch": 1.6462992852201983, + "grad_norm": 1.689926156627043, + "learning_rate": 1.664182619648331e-07, + "loss": 0.3905887007713318, + "step": 7140 + }, + { + "epoch": 1.6465298593497808, + "grad_norm": 1.5648955881343065, + "learning_rate": 1.6620771531826117e-07, + "loss": 0.4848547577857971, + "step": 7141 + }, + { + "epoch": 1.6467604334793635, + "grad_norm": 1.5642509939041707, + "learning_rate": 1.659972898718479e-07, + "loss": 0.37895438075065613, + "step": 7142 + }, + { + "epoch": 1.6469910076089462, + "grad_norm": 1.6050388867308452, + "learning_rate": 1.6578698565618075e-07, + "loss": 0.46770527958869934, + "step": 7143 + }, + { + "epoch": 1.647221581738529, + "grad_norm": 1.705579614415488, + "learning_rate": 1.6557680270182995e-07, + "loss": 0.44138044118881226, + "step": 7144 + }, + { + "epoch": 1.6474521558681117, + "grad_norm": 1.7922951246817975, + "learning_rate": 1.6536674103934734e-07, + "loss": 0.3681126832962036, + "step": 7145 + }, + { + "epoch": 1.6476827299976944, + "grad_norm": 1.454313444949356, + "learning_rate": 1.651568006992675e-07, + "loss": 0.4410884380340576, + "step": 7146 + }, + { + "epoch": 1.6479133041272769, + "grad_norm": 1.444668904765709, + "learning_rate": 1.6494698171210776e-07, + "loss": 0.4161960482597351, + "step": 7147 + }, + { + "epoch": 1.6481438782568596, + "grad_norm": 1.6873012096950248, + "learning_rate": 1.647372841083674e-07, + "loss": 0.4912784695625305, + "step": 7148 + }, + { + "epoch": 1.648374452386442, + "grad_norm": 1.8457570973340096, + "learning_rate": 1.6452770791852766e-07, + "loss": 0.5137985944747925, + "step": 7149 + }, + { + "epoch": 1.6486050265160248, + "grad_norm": 1.845102008062213, + "learning_rate": 1.6431825317305303e-07, + "loss": 0.43644070625305176, + "step": 7150 + }, + { + "epoch": 1.6488356006456075, + "grad_norm": 1.508191131690363, + "learning_rate": 1.6410891990238973e-07, + "loss": 0.4319378733634949, + "step": 7151 + }, + { + "epoch": 1.6490661747751902, + "grad_norm": 1.6137067673031091, + "learning_rate": 1.6389970813696619e-07, + "loss": 0.474090039730072, + "step": 7152 + }, + { + "epoch": 1.649296748904773, + "grad_norm": 1.656766330100741, + "learning_rate": 1.6369061790719375e-07, + "loss": 0.40291503071784973, + "step": 7153 + }, + { + "epoch": 1.6495273230343557, + "grad_norm": 1.5434308580585603, + "learning_rate": 1.6348164924346562e-07, + "loss": 0.51482754945755, + "step": 7154 + }, + { + "epoch": 1.6497578971639382, + "grad_norm": 1.421069671161851, + "learning_rate": 1.632728021761579e-07, + "loss": 0.35308974981307983, + "step": 7155 + }, + { + "epoch": 1.6499884712935209, + "grad_norm": 1.7501565194944115, + "learning_rate": 1.6306407673562815e-07, + "loss": 0.5269055366516113, + "step": 7156 + }, + { + "epoch": 1.6502190454231034, + "grad_norm": 1.4775332310798848, + "learning_rate": 1.6285547295221724e-07, + "loss": 0.41290512681007385, + "step": 7157 + }, + { + "epoch": 1.650449619552686, + "grad_norm": 1.4513808656924674, + "learning_rate": 1.6264699085624721e-07, + "loss": 0.39930522441864014, + "step": 7158 + }, + { + "epoch": 1.6506801936822688, + "grad_norm": 1.475028134913826, + "learning_rate": 1.6243863047802365e-07, + "loss": 0.4617648422718048, + "step": 7159 + }, + { + "epoch": 1.6509107678118515, + "grad_norm": 1.6583284073308129, + "learning_rate": 1.6223039184783337e-07, + "loss": 0.4618498980998993, + "step": 7160 + }, + { + "epoch": 1.6511413419414342, + "grad_norm": 1.5177380348824272, + "learning_rate": 1.6202227499594635e-07, + "loss": 0.43138834834098816, + "step": 7161 + }, + { + "epoch": 1.651371916071017, + "grad_norm": 1.9944130162827052, + "learning_rate": 1.618142799526141e-07, + "loss": 0.5330632925033569, + "step": 7162 + }, + { + "epoch": 1.6516024902005995, + "grad_norm": 1.4381555357456468, + "learning_rate": 1.6160640674807103e-07, + "loss": 0.45410698652267456, + "step": 7163 + }, + { + "epoch": 1.6518330643301822, + "grad_norm": 1.52256812211894, + "learning_rate": 1.6139865541253384e-07, + "loss": 0.4216715693473816, + "step": 7164 + }, + { + "epoch": 1.6520636384597647, + "grad_norm": 1.6818151368938485, + "learning_rate": 1.6119102597620083e-07, + "loss": 0.3738868832588196, + "step": 7165 + }, + { + "epoch": 1.6522942125893474, + "grad_norm": 1.587335339212439, + "learning_rate": 1.609835184692535e-07, + "loss": 0.44595998525619507, + "step": 7166 + }, + { + "epoch": 1.65252478671893, + "grad_norm": 1.8461813575956394, + "learning_rate": 1.6077613292185466e-07, + "loss": 0.5446096062660217, + "step": 7167 + }, + { + "epoch": 1.6527553608485128, + "grad_norm": 1.5661326715584178, + "learning_rate": 1.605688693641505e-07, + "loss": 0.47280746698379517, + "step": 7168 + }, + { + "epoch": 1.6529859349780955, + "grad_norm": 1.6260653553703972, + "learning_rate": 1.6036172782626823e-07, + "loss": 0.5280133485794067, + "step": 7169 + }, + { + "epoch": 1.6532165091076783, + "grad_norm": 1.6507744528919734, + "learning_rate": 1.6015470833831835e-07, + "loss": 0.4659959375858307, + "step": 7170 + }, + { + "epoch": 1.6534470832372608, + "grad_norm": 1.5548632331284282, + "learning_rate": 1.5994781093039335e-07, + "loss": 0.5196797251701355, + "step": 7171 + }, + { + "epoch": 1.6536776573668435, + "grad_norm": 1.298650586457363, + "learning_rate": 1.597410356325676e-07, + "loss": 0.41855669021606445, + "step": 7172 + }, + { + "epoch": 1.653908231496426, + "grad_norm": 1.6301682003715197, + "learning_rate": 1.5953438247489814e-07, + "loss": 0.43063706159591675, + "step": 7173 + }, + { + "epoch": 1.6541388056260087, + "grad_norm": 1.556025937846025, + "learning_rate": 1.59327851487424e-07, + "loss": 0.3954850435256958, + "step": 7174 + }, + { + "epoch": 1.6543693797555914, + "grad_norm": 1.6096102290125367, + "learning_rate": 1.591214427001667e-07, + "loss": 0.4497464895248413, + "step": 7175 + }, + { + "epoch": 1.6545999538851741, + "grad_norm": 1.573427243133678, + "learning_rate": 1.5891515614312967e-07, + "loss": 0.47012704610824585, + "step": 7176 + }, + { + "epoch": 1.6548305280147568, + "grad_norm": 1.345166831078004, + "learning_rate": 1.5870899184629872e-07, + "loss": 0.399054616689682, + "step": 7177 + }, + { + "epoch": 1.6550611021443395, + "grad_norm": 1.68897296856965, + "learning_rate": 1.5850294983964208e-07, + "loss": 0.41277164220809937, + "step": 7178 + }, + { + "epoch": 1.655291676273922, + "grad_norm": 1.6410807386564468, + "learning_rate": 1.5829703015311013e-07, + "loss": 0.4735640287399292, + "step": 7179 + }, + { + "epoch": 1.6555222504035048, + "grad_norm": 1.5414168893805387, + "learning_rate": 1.5809123281663516e-07, + "loss": 0.4244140386581421, + "step": 7180 + }, + { + "epoch": 1.6557528245330873, + "grad_norm": 1.6196858148033184, + "learning_rate": 1.5788555786013212e-07, + "loss": 0.4291320741176605, + "step": 7181 + }, + { + "epoch": 1.65598339866267, + "grad_norm": 1.8656270771434302, + "learning_rate": 1.576800053134979e-07, + "loss": 0.3965643048286438, + "step": 7182 + }, + { + "epoch": 1.6562139727922527, + "grad_norm": 1.5939688831505687, + "learning_rate": 1.5747457520661123e-07, + "loss": 0.4087764620780945, + "step": 7183 + }, + { + "epoch": 1.6564445469218354, + "grad_norm": 1.523375144006796, + "learning_rate": 1.5726926756933411e-07, + "loss": 0.4207920432090759, + "step": 7184 + }, + { + "epoch": 1.6566751210514181, + "grad_norm": 1.757376584691626, + "learning_rate": 1.570640824315095e-07, + "loss": 0.34311753511428833, + "step": 7185 + }, + { + "epoch": 1.6569056951810008, + "grad_norm": 2.079059544313622, + "learning_rate": 1.5685901982296345e-07, + "loss": 0.44728145003318787, + "step": 7186 + }, + { + "epoch": 1.6571362693105833, + "grad_norm": 1.6933442739443483, + "learning_rate": 1.5665407977350386e-07, + "loss": 0.38300156593322754, + "step": 7187 + }, + { + "epoch": 1.657366843440166, + "grad_norm": 1.4613322908312483, + "learning_rate": 1.56449262312921e-07, + "loss": 0.32724204659461975, + "step": 7188 + }, + { + "epoch": 1.6575974175697485, + "grad_norm": 1.5277123552551555, + "learning_rate": 1.562445674709868e-07, + "loss": 0.4812743067741394, + "step": 7189 + }, + { + "epoch": 1.6578279916993313, + "grad_norm": 1.279031260784297, + "learning_rate": 1.5603999527745615e-07, + "loss": 0.3974485397338867, + "step": 7190 + }, + { + "epoch": 1.658058565828914, + "grad_norm": 1.729819799365075, + "learning_rate": 1.5583554576206536e-07, + "loss": 0.5058138370513916, + "step": 7191 + }, + { + "epoch": 1.6582891399584967, + "grad_norm": 1.451214505055382, + "learning_rate": 1.5563121895453323e-07, + "loss": 0.4442358613014221, + "step": 7192 + }, + { + "epoch": 1.6585197140880794, + "grad_norm": 1.6317499919466611, + "learning_rate": 1.5542701488456077e-07, + "loss": 0.35400623083114624, + "step": 7193 + }, + { + "epoch": 1.6587502882176621, + "grad_norm": 1.8335890419904581, + "learning_rate": 1.5522293358183125e-07, + "loss": 0.5046352744102478, + "step": 7194 + }, + { + "epoch": 1.6589808623472446, + "grad_norm": 1.8150914477063191, + "learning_rate": 1.5501897507601015e-07, + "loss": 0.45344769954681396, + "step": 7195 + }, + { + "epoch": 1.6592114364768273, + "grad_norm": 1.7111771949579255, + "learning_rate": 1.548151393967444e-07, + "loss": 0.4251500368118286, + "step": 7196 + }, + { + "epoch": 1.6594420106064098, + "grad_norm": 1.4323459769713944, + "learning_rate": 1.5461142657366399e-07, + "loss": 0.3728788495063782, + "step": 7197 + }, + { + "epoch": 1.6596725847359926, + "grad_norm": 1.5246938682723656, + "learning_rate": 1.5440783663638036e-07, + "loss": 0.3143829107284546, + "step": 7198 + }, + { + "epoch": 1.6599031588655753, + "grad_norm": 1.3416076020806418, + "learning_rate": 1.5420436961448758e-07, + "loss": 0.5070813894271851, + "step": 7199 + }, + { + "epoch": 1.660133732995158, + "grad_norm": 1.2380684135092845, + "learning_rate": 1.5400102553756145e-07, + "loss": 0.3644014000892639, + "step": 7200 + }, + { + "epoch": 1.6603643071247407, + "grad_norm": 2.973338937285917, + "learning_rate": 1.5379780443516023e-07, + "loss": 0.4120270609855652, + "step": 7201 + }, + { + "epoch": 1.6605948812543234, + "grad_norm": 1.6150469405356445, + "learning_rate": 1.5359470633682425e-07, + "loss": 0.4327865242958069, + "step": 7202 + }, + { + "epoch": 1.660825455383906, + "grad_norm": 2.011470811225138, + "learning_rate": 1.5339173127207562e-07, + "loss": 0.626624584197998, + "step": 7203 + }, + { + "epoch": 1.6610560295134886, + "grad_norm": 1.6601868604564274, + "learning_rate": 1.5318887927041913e-07, + "loss": 0.45536088943481445, + "step": 7204 + }, + { + "epoch": 1.6612866036430711, + "grad_norm": 1.6789895391694964, + "learning_rate": 1.52986150361341e-07, + "loss": 0.5306276082992554, + "step": 7205 + }, + { + "epoch": 1.6615171777726538, + "grad_norm": 1.5374267124283623, + "learning_rate": 1.5278354457431043e-07, + "loss": 0.4263244867324829, + "step": 7206 + }, + { + "epoch": 1.6617477519022366, + "grad_norm": 1.5390387444640852, + "learning_rate": 1.5258106193877762e-07, + "loss": 0.4578266143798828, + "step": 7207 + }, + { + "epoch": 1.6619783260318193, + "grad_norm": 1.4963429405053086, + "learning_rate": 1.5237870248417605e-07, + "loss": 0.5120365619659424, + "step": 7208 + }, + { + "epoch": 1.662208900161402, + "grad_norm": 1.7987725718508283, + "learning_rate": 1.521764662399202e-07, + "loss": 0.4491463005542755, + "step": 7209 + }, + { + "epoch": 1.6624394742909847, + "grad_norm": 1.588713571736857, + "learning_rate": 1.5197435323540752e-07, + "loss": 0.4810635447502136, + "step": 7210 + }, + { + "epoch": 1.6626700484205672, + "grad_norm": 1.549550087406024, + "learning_rate": 1.5177236350001722e-07, + "loss": 0.4250200390815735, + "step": 7211 + }, + { + "epoch": 1.66290062255015, + "grad_norm": 1.8619243359226805, + "learning_rate": 1.515704970631102e-07, + "loss": 0.49981385469436646, + "step": 7212 + }, + { + "epoch": 1.6631311966797324, + "grad_norm": 1.621928409701738, + "learning_rate": 1.5136875395403027e-07, + "loss": 0.40204358100891113, + "step": 7213 + }, + { + "epoch": 1.6633617708093151, + "grad_norm": 1.504987607563178, + "learning_rate": 1.5116713420210236e-07, + "loss": 0.514127254486084, + "step": 7214 + }, + { + "epoch": 1.6635923449388978, + "grad_norm": 1.8745773841611948, + "learning_rate": 1.509656378366343e-07, + "loss": 0.5119338631629944, + "step": 7215 + }, + { + "epoch": 1.6638229190684806, + "grad_norm": 1.6137446017437618, + "learning_rate": 1.507642648869153e-07, + "loss": 0.45031970739364624, + "step": 7216 + }, + { + "epoch": 1.6640534931980633, + "grad_norm": 1.427878863576358, + "learning_rate": 1.5056301538221716e-07, + "loss": 0.4503582715988159, + "step": 7217 + }, + { + "epoch": 1.664284067327646, + "grad_norm": 1.4651953746761925, + "learning_rate": 1.503618893517935e-07, + "loss": 0.38793227076530457, + "step": 7218 + }, + { + "epoch": 1.6645146414572285, + "grad_norm": 1.4683280962315126, + "learning_rate": 1.5016088682488026e-07, + "loss": 0.4446987211704254, + "step": 7219 + }, + { + "epoch": 1.6647452155868112, + "grad_norm": 1.7835855909787117, + "learning_rate": 1.4996000783069485e-07, + "loss": 0.4687119722366333, + "step": 7220 + }, + { + "epoch": 1.6649757897163937, + "grad_norm": 1.6205230957470973, + "learning_rate": 1.4975925239843734e-07, + "loss": 0.48283010721206665, + "step": 7221 + }, + { + "epoch": 1.6652063638459764, + "grad_norm": 1.630894562773258, + "learning_rate": 1.4955862055728941e-07, + "loss": 0.510201632976532, + "step": 7222 + }, + { + "epoch": 1.6654369379755591, + "grad_norm": 1.4932233099831633, + "learning_rate": 1.4935811233641471e-07, + "loss": 0.4070482850074768, + "step": 7223 + }, + { + "epoch": 1.6656675121051419, + "grad_norm": 1.5683915035975688, + "learning_rate": 1.4915772776495948e-07, + "loss": 0.44347989559173584, + "step": 7224 + }, + { + "epoch": 1.6658980862347246, + "grad_norm": 1.6817444257008654, + "learning_rate": 1.4895746687205147e-07, + "loss": 0.4160166382789612, + "step": 7225 + }, + { + "epoch": 1.6661286603643073, + "grad_norm": 1.5428277862719844, + "learning_rate": 1.4875732968680098e-07, + "loss": 0.39939236640930176, + "step": 7226 + }, + { + "epoch": 1.6663592344938898, + "grad_norm": 1.8461591057744162, + "learning_rate": 1.4855731623829936e-07, + "loss": 0.4604174494743347, + "step": 7227 + }, + { + "epoch": 1.6665898086234725, + "grad_norm": 1.5963571116977615, + "learning_rate": 1.4835742655562134e-07, + "loss": 0.4691208004951477, + "step": 7228 + }, + { + "epoch": 1.666820382753055, + "grad_norm": 1.358957710417088, + "learning_rate": 1.481576606678222e-07, + "loss": 0.4146147668361664, + "step": 7229 + }, + { + "epoch": 1.6670509568826377, + "grad_norm": 1.4681059084163257, + "learning_rate": 1.4795801860394041e-07, + "loss": 0.4064391255378723, + "step": 7230 + }, + { + "epoch": 1.6672815310122204, + "grad_norm": 1.233349352710464, + "learning_rate": 1.4775850039299587e-07, + "loss": 0.3696960210800171, + "step": 7231 + }, + { + "epoch": 1.6675121051418031, + "grad_norm": 1.763624641268307, + "learning_rate": 1.4755910606399023e-07, + "loss": 0.4356287121772766, + "step": 7232 + }, + { + "epoch": 1.6677426792713859, + "grad_norm": 1.6119962512147328, + "learning_rate": 1.473598356459078e-07, + "loss": 0.39327436685562134, + "step": 7233 + }, + { + "epoch": 1.6679732534009684, + "grad_norm": 1.4528281796334948, + "learning_rate": 1.4716068916771452e-07, + "loss": 0.4722225069999695, + "step": 7234 + }, + { + "epoch": 1.668203827530551, + "grad_norm": 1.3954919737652625, + "learning_rate": 1.4696166665835852e-07, + "loss": 0.3645583987236023, + "step": 7235 + }, + { + "epoch": 1.6684344016601336, + "grad_norm": 1.628738998914794, + "learning_rate": 1.4676276814676935e-07, + "loss": 0.4153117537498474, + "step": 7236 + }, + { + "epoch": 1.6686649757897163, + "grad_norm": 1.2987847859472657, + "learning_rate": 1.4656399366185933e-07, + "loss": 0.3470612168312073, + "step": 7237 + }, + { + "epoch": 1.668895549919299, + "grad_norm": 1.424067964832139, + "learning_rate": 1.4636534323252203e-07, + "loss": 0.3934207260608673, + "step": 7238 + }, + { + "epoch": 1.6691261240488817, + "grad_norm": 1.6191654953115664, + "learning_rate": 1.4616681688763355e-07, + "loss": 0.35530412197113037, + "step": 7239 + }, + { + "epoch": 1.6693566981784644, + "grad_norm": 1.5867473768730196, + "learning_rate": 1.4596841465605136e-07, + "loss": 0.5218726396560669, + "step": 7240 + }, + { + "epoch": 1.6695872723080472, + "grad_norm": 1.9070671037743527, + "learning_rate": 1.4577013656661542e-07, + "loss": 0.4287494421005249, + "step": 7241 + }, + { + "epoch": 1.6698178464376296, + "grad_norm": 2.099754040079973, + "learning_rate": 1.4557198264814775e-07, + "loss": 0.5161805152893066, + "step": 7242 + }, + { + "epoch": 1.6700484205672124, + "grad_norm": 1.485709070131558, + "learning_rate": 1.4537395292945153e-07, + "loss": 0.4843006730079651, + "step": 7243 + }, + { + "epoch": 1.6702789946967949, + "grad_norm": 1.416657421952009, + "learning_rate": 1.4517604743931288e-07, + "loss": 0.526993989944458, + "step": 7244 + }, + { + "epoch": 1.6705095688263776, + "grad_norm": 1.318696888956493, + "learning_rate": 1.4497826620649888e-07, + "loss": 0.43705734610557556, + "step": 7245 + }, + { + "epoch": 1.6707401429559603, + "grad_norm": 1.626300355229789, + "learning_rate": 1.4478060925975942e-07, + "loss": 0.6001747846603394, + "step": 7246 + }, + { + "epoch": 1.670970717085543, + "grad_norm": 1.6701240840694564, + "learning_rate": 1.4458307662782564e-07, + "loss": 0.4041635990142822, + "step": 7247 + }, + { + "epoch": 1.6712012912151257, + "grad_norm": 1.6291301094782007, + "learning_rate": 1.4438566833941112e-07, + "loss": 0.4425908923149109, + "step": 7248 + }, + { + "epoch": 1.6714318653447084, + "grad_norm": 1.8234242321709921, + "learning_rate": 1.4418838442321102e-07, + "loss": 0.5202267169952393, + "step": 7249 + }, + { + "epoch": 1.671662439474291, + "grad_norm": 1.3646967283137599, + "learning_rate": 1.4399122490790293e-07, + "loss": 0.44352006912231445, + "step": 7250 + }, + { + "epoch": 1.6718930136038737, + "grad_norm": 1.5745296606833632, + "learning_rate": 1.4379418982214542e-07, + "loss": 0.4757179021835327, + "step": 7251 + }, + { + "epoch": 1.6721235877334562, + "grad_norm": 2.0125776677757825, + "learning_rate": 1.4359727919457998e-07, + "loss": 0.4748988747596741, + "step": 7252 + }, + { + "epoch": 1.6723541618630389, + "grad_norm": 1.4390886859105494, + "learning_rate": 1.434004930538294e-07, + "loss": 0.4280398190021515, + "step": 7253 + }, + { + "epoch": 1.6725847359926216, + "grad_norm": 1.5844583735943714, + "learning_rate": 1.4320383142849834e-07, + "loss": 0.4959871172904968, + "step": 7254 + }, + { + "epoch": 1.6728153101222043, + "grad_norm": 1.6551218088905322, + "learning_rate": 1.4300729434717396e-07, + "loss": 0.506413996219635, + "step": 7255 + }, + { + "epoch": 1.673045884251787, + "grad_norm": 1.5894513628120581, + "learning_rate": 1.4281088183842448e-07, + "loss": 0.4723675847053528, + "step": 7256 + }, + { + "epoch": 1.6732764583813697, + "grad_norm": 1.5735532616627814, + "learning_rate": 1.4261459393080076e-07, + "loss": 0.41801339387893677, + "step": 7257 + }, + { + "epoch": 1.6735070325109522, + "grad_norm": 1.651784117733762, + "learning_rate": 1.424184306528351e-07, + "loss": 0.4463369846343994, + "step": 7258 + }, + { + "epoch": 1.673737606640535, + "grad_norm": 1.6205372576102755, + "learning_rate": 1.422223920330421e-07, + "loss": 0.4167429506778717, + "step": 7259 + }, + { + "epoch": 1.6739681807701174, + "grad_norm": 1.448285732733219, + "learning_rate": 1.420264780999174e-07, + "loss": 0.48808401823043823, + "step": 7260 + }, + { + "epoch": 1.6741987548997002, + "grad_norm": 1.7994342785579152, + "learning_rate": 1.4183068888193973e-07, + "loss": 0.515659749507904, + "step": 7261 + }, + { + "epoch": 1.6744293290292829, + "grad_norm": 1.6582236339460064, + "learning_rate": 1.416350244075688e-07, + "loss": 0.4393026530742645, + "step": 7262 + }, + { + "epoch": 1.6746599031588656, + "grad_norm": 1.6750398739214198, + "learning_rate": 1.4143948470524602e-07, + "loss": 0.35053056478500366, + "step": 7263 + }, + { + "epoch": 1.6748904772884483, + "grad_norm": 1.1872706234379884, + "learning_rate": 1.4124406980339532e-07, + "loss": 0.35598453879356384, + "step": 7264 + }, + { + "epoch": 1.675121051418031, + "grad_norm": 1.747342634360751, + "learning_rate": 1.410487797304224e-07, + "loss": 0.47989165782928467, + "step": 7265 + }, + { + "epoch": 1.6753516255476135, + "grad_norm": 1.4767801179152846, + "learning_rate": 1.408536145147148e-07, + "loss": 0.4621499180793762, + "step": 7266 + }, + { + "epoch": 1.6755821996771962, + "grad_norm": 1.4469255776490486, + "learning_rate": 1.4065857418464122e-07, + "loss": 0.40567925572395325, + "step": 7267 + }, + { + "epoch": 1.6758127738067787, + "grad_norm": 2.121901896007684, + "learning_rate": 1.4046365876855326e-07, + "loss": 0.38889849185943604, + "step": 7268 + }, + { + "epoch": 1.6760433479363614, + "grad_norm": 1.8036845925466258, + "learning_rate": 1.4026886829478345e-07, + "loss": 0.516187846660614, + "step": 7269 + }, + { + "epoch": 1.6762739220659442, + "grad_norm": 1.3670995724086425, + "learning_rate": 1.4007420279164706e-07, + "loss": 0.4007910192012787, + "step": 7270 + }, + { + "epoch": 1.6765044961955269, + "grad_norm": 1.4513245632029468, + "learning_rate": 1.3987966228744007e-07, + "loss": 0.4426886737346649, + "step": 7271 + }, + { + "epoch": 1.6767350703251096, + "grad_norm": 1.7767592903800882, + "learning_rate": 1.3968524681044114e-07, + "loss": 0.46890369057655334, + "step": 7272 + }, + { + "epoch": 1.6769656444546923, + "grad_norm": 1.714201330640179, + "learning_rate": 1.3949095638891096e-07, + "loss": 0.510369598865509, + "step": 7273 + }, + { + "epoch": 1.6771962185842748, + "grad_norm": 1.697492362317676, + "learning_rate": 1.3929679105109106e-07, + "loss": 0.47810226678848267, + "step": 7274 + }, + { + "epoch": 1.6774267927138575, + "grad_norm": 1.6234301902278867, + "learning_rate": 1.3910275082520572e-07, + "loss": 0.48592591285705566, + "step": 7275 + }, + { + "epoch": 1.67765736684344, + "grad_norm": 1.5107060260742486, + "learning_rate": 1.3890883573946021e-07, + "loss": 0.4664943814277649, + "step": 7276 + }, + { + "epoch": 1.6778879409730227, + "grad_norm": 1.6514095493299281, + "learning_rate": 1.3871504582204263e-07, + "loss": 0.47146645188331604, + "step": 7277 + }, + { + "epoch": 1.6781185151026055, + "grad_norm": 1.615997642769361, + "learning_rate": 1.3852138110112166e-07, + "loss": 0.5171671509742737, + "step": 7278 + }, + { + "epoch": 1.6783490892321882, + "grad_norm": 1.8275491234958787, + "learning_rate": 1.3832784160484913e-07, + "loss": 0.45887336134910583, + "step": 7279 + }, + { + "epoch": 1.678579663361771, + "grad_norm": 1.494861700798582, + "learning_rate": 1.3813442736135728e-07, + "loss": 0.4363539516925812, + "step": 7280 + }, + { + "epoch": 1.6788102374913536, + "grad_norm": 2.0171892009876147, + "learning_rate": 1.379411383987612e-07, + "loss": 0.4626097083091736, + "step": 7281 + }, + { + "epoch": 1.679040811620936, + "grad_norm": 1.8196525383976765, + "learning_rate": 1.3774797474515766e-07, + "loss": 0.5939204096794128, + "step": 7282 + }, + { + "epoch": 1.6792713857505188, + "grad_norm": 1.6878435890648014, + "learning_rate": 1.3755493642862437e-07, + "loss": 0.5463666915893555, + "step": 7283 + }, + { + "epoch": 1.6795019598801013, + "grad_norm": 1.622691460463702, + "learning_rate": 1.3736202347722182e-07, + "loss": 0.3634001910686493, + "step": 7284 + }, + { + "epoch": 1.679732534009684, + "grad_norm": 1.6327202188647956, + "learning_rate": 1.3716923591899166e-07, + "loss": 0.39512360095977783, + "step": 7285 + }, + { + "epoch": 1.6799631081392667, + "grad_norm": 1.3361978857608434, + "learning_rate": 1.3697657378195772e-07, + "loss": 0.3858473300933838, + "step": 7286 + }, + { + "epoch": 1.6801936822688495, + "grad_norm": 1.4527844976472322, + "learning_rate": 1.36784037094125e-07, + "loss": 0.473757266998291, + "step": 7287 + }, + { + "epoch": 1.6804242563984322, + "grad_norm": 1.410877918262981, + "learning_rate": 1.3659162588348107e-07, + "loss": 0.41679126024246216, + "step": 7288 + }, + { + "epoch": 1.680654830528015, + "grad_norm": 1.7135792249847552, + "learning_rate": 1.363993401779946e-07, + "loss": 0.4267998933792114, + "step": 7289 + }, + { + "epoch": 1.6808854046575974, + "grad_norm": 1.6476835268765473, + "learning_rate": 1.3620718000561648e-07, + "loss": 0.5453667044639587, + "step": 7290 + }, + { + "epoch": 1.68111597878718, + "grad_norm": 1.4347316593862658, + "learning_rate": 1.3601514539427895e-07, + "loss": 0.3882933259010315, + "step": 7291 + }, + { + "epoch": 1.6813465529167626, + "grad_norm": 1.7177796725752086, + "learning_rate": 1.3582323637189653e-07, + "loss": 0.5565635561943054, + "step": 7292 + }, + { + "epoch": 1.6815771270463453, + "grad_norm": 1.448665873125515, + "learning_rate": 1.356314529663647e-07, + "loss": 0.49807024002075195, + "step": 7293 + }, + { + "epoch": 1.681807701175928, + "grad_norm": 1.5449122885779156, + "learning_rate": 1.3543979520556116e-07, + "loss": 0.40868130326271057, + "step": 7294 + }, + { + "epoch": 1.6820382753055108, + "grad_norm": 1.4045709349742252, + "learning_rate": 1.352482631173455e-07, + "loss": 0.46088406443595886, + "step": 7295 + }, + { + "epoch": 1.6822688494350935, + "grad_norm": 1.7658846162202777, + "learning_rate": 1.3505685672955869e-07, + "loss": 0.44346722960472107, + "step": 7296 + }, + { + "epoch": 1.6824994235646762, + "grad_norm": 1.3703801713050607, + "learning_rate": 1.348655760700239e-07, + "loss": 0.36585044860839844, + "step": 7297 + }, + { + "epoch": 1.6827299976942587, + "grad_norm": 1.8199719530329925, + "learning_rate": 1.3467442116654536e-07, + "loss": 0.46082472801208496, + "step": 7298 + }, + { + "epoch": 1.6829605718238414, + "grad_norm": 1.8043564550526412, + "learning_rate": 1.3448339204690974e-07, + "loss": 0.5011709928512573, + "step": 7299 + }, + { + "epoch": 1.683191145953424, + "grad_norm": 2.1355217293891378, + "learning_rate": 1.3429248873888454e-07, + "loss": 0.4382838010787964, + "step": 7300 + }, + { + "epoch": 1.6834217200830066, + "grad_norm": 1.4118543770807777, + "learning_rate": 1.3410171127022008e-07, + "loss": 0.35204610228538513, + "step": 7301 + }, + { + "epoch": 1.6836522942125893, + "grad_norm": 1.3718001359049319, + "learning_rate": 1.3391105966864745e-07, + "loss": 0.3915257453918457, + "step": 7302 + }, + { + "epoch": 1.683882868342172, + "grad_norm": 1.4102637825932318, + "learning_rate": 1.3372053396187967e-07, + "loss": 0.3945339322090149, + "step": 7303 + }, + { + "epoch": 1.6841134424717548, + "grad_norm": 1.7911618298179695, + "learning_rate": 1.335301341776117e-07, + "loss": 0.48783642053604126, + "step": 7304 + }, + { + "epoch": 1.6843440166013375, + "grad_norm": 1.745012134293522, + "learning_rate": 1.333398603435203e-07, + "loss": 0.49026161432266235, + "step": 7305 + }, + { + "epoch": 1.68457459073092, + "grad_norm": 1.9699708710220791, + "learning_rate": 1.3314971248726358e-07, + "loss": 0.5035061836242676, + "step": 7306 + }, + { + "epoch": 1.6848051648605027, + "grad_norm": 1.7602149086036532, + "learning_rate": 1.3295969063648126e-07, + "loss": 0.5452826023101807, + "step": 7307 + }, + { + "epoch": 1.6850357389900852, + "grad_norm": 1.7088858518580703, + "learning_rate": 1.3276979481879524e-07, + "loss": 0.4609105885028839, + "step": 7308 + }, + { + "epoch": 1.685266313119668, + "grad_norm": 1.6869514802612067, + "learning_rate": 1.3258002506180855e-07, + "loss": 0.5799046754837036, + "step": 7309 + }, + { + "epoch": 1.6854968872492506, + "grad_norm": 1.6691103426337504, + "learning_rate": 1.3239038139310644e-07, + "loss": 0.42096465826034546, + "step": 7310 + }, + { + "epoch": 1.6857274613788333, + "grad_norm": 1.9781377178498367, + "learning_rate": 1.3220086384025508e-07, + "loss": 0.4741813540458679, + "step": 7311 + }, + { + "epoch": 1.685958035508416, + "grad_norm": 1.5972207301313162, + "learning_rate": 1.3201147243080302e-07, + "loss": 0.4872191250324249, + "step": 7312 + }, + { + "epoch": 1.6861886096379988, + "grad_norm": 1.7767879845396581, + "learning_rate": 1.3182220719228054e-07, + "loss": 0.5210198163986206, + "step": 7313 + }, + { + "epoch": 1.6864191837675813, + "grad_norm": 1.932834262840403, + "learning_rate": 1.3163306815219878e-07, + "loss": 0.4873948395252228, + "step": 7314 + }, + { + "epoch": 1.686649757897164, + "grad_norm": 1.723686253702064, + "learning_rate": 1.3144405533805136e-07, + "loss": 0.46856212615966797, + "step": 7315 + }, + { + "epoch": 1.6868803320267465, + "grad_norm": 1.549399332710726, + "learning_rate": 1.3125516877731279e-07, + "loss": 0.3931645154953003, + "step": 7316 + }, + { + "epoch": 1.6871109061563292, + "grad_norm": 1.5988122745666866, + "learning_rate": 1.3106640849744023e-07, + "loss": 0.4473317861557007, + "step": 7317 + }, + { + "epoch": 1.687341480285912, + "grad_norm": 1.5841372684708825, + "learning_rate": 1.3087777452587124e-07, + "loss": 0.4499043822288513, + "step": 7318 + }, + { + "epoch": 1.6875720544154946, + "grad_norm": 1.6054649930580802, + "learning_rate": 1.30689266890026e-07, + "loss": 0.4992508292198181, + "step": 7319 + }, + { + "epoch": 1.6878026285450773, + "grad_norm": 1.426896936128743, + "learning_rate": 1.305008856173061e-07, + "loss": 0.4684743583202362, + "step": 7320 + }, + { + "epoch": 1.68803320267466, + "grad_norm": 1.7876602073965717, + "learning_rate": 1.303126307350948e-07, + "loss": 0.5543930530548096, + "step": 7321 + }, + { + "epoch": 1.6882637768042426, + "grad_norm": 1.3482084944505501, + "learning_rate": 1.3012450227075655e-07, + "loss": 0.3812211751937866, + "step": 7322 + }, + { + "epoch": 1.6884943509338253, + "grad_norm": 2.079165248146425, + "learning_rate": 1.299365002516377e-07, + "loss": 0.5455845594406128, + "step": 7323 + }, + { + "epoch": 1.6887249250634078, + "grad_norm": 1.3768890960712863, + "learning_rate": 1.2974862470506654e-07, + "loss": 0.4256778657436371, + "step": 7324 + }, + { + "epoch": 1.6889554991929905, + "grad_norm": 1.9468423520002898, + "learning_rate": 1.2956087565835228e-07, + "loss": 0.4973354637622833, + "step": 7325 + }, + { + "epoch": 1.6891860733225732, + "grad_norm": 1.5779840439512345, + "learning_rate": 1.2937325313878666e-07, + "loss": 0.5141343474388123, + "step": 7326 + }, + { + "epoch": 1.689416647452156, + "grad_norm": 1.5179632497576485, + "learning_rate": 1.2918575717364178e-07, + "loss": 0.3872978687286377, + "step": 7327 + }, + { + "epoch": 1.6896472215817386, + "grad_norm": 1.3857087225021212, + "learning_rate": 1.2899838779017292e-07, + "loss": 0.4333486557006836, + "step": 7328 + }, + { + "epoch": 1.6898777957113214, + "grad_norm": 1.5624646221048997, + "learning_rate": 1.2881114501561553e-07, + "loss": 0.42979496717453003, + "step": 7329 + }, + { + "epoch": 1.6901083698409038, + "grad_norm": 1.6512939392276094, + "learning_rate": 1.2862402887718771e-07, + "loss": 0.43296414613723755, + "step": 7330 + }, + { + "epoch": 1.6903389439704866, + "grad_norm": 1.4822998528875215, + "learning_rate": 1.2843703940208816e-07, + "loss": 0.41763681173324585, + "step": 7331 + }, + { + "epoch": 1.690569518100069, + "grad_norm": 1.4433304691783968, + "learning_rate": 1.2825017661749814e-07, + "loss": 0.4531592130661011, + "step": 7332 + }, + { + "epoch": 1.6908000922296518, + "grad_norm": 1.5515786608723572, + "learning_rate": 1.2806344055057995e-07, + "loss": 0.4608149826526642, + "step": 7333 + }, + { + "epoch": 1.6910306663592345, + "grad_norm": 1.5678716271625897, + "learning_rate": 1.2787683122847726e-07, + "loss": 0.4298786520957947, + "step": 7334 + }, + { + "epoch": 1.6912612404888172, + "grad_norm": 1.5882305453896473, + "learning_rate": 1.2769034867831586e-07, + "loss": 0.4404297471046448, + "step": 7335 + }, + { + "epoch": 1.6914918146184, + "grad_norm": 1.590662947019878, + "learning_rate": 1.2750399292720281e-07, + "loss": 0.3857702910900116, + "step": 7336 + }, + { + "epoch": 1.6917223887479826, + "grad_norm": 1.5092920813034143, + "learning_rate": 1.2731776400222716e-07, + "loss": 0.351214200258255, + "step": 7337 + }, + { + "epoch": 1.6919529628775651, + "grad_norm": 1.6618460717985095, + "learning_rate": 1.2713166193045854e-07, + "loss": 0.4711484909057617, + "step": 7338 + }, + { + "epoch": 1.6921835370071479, + "grad_norm": 1.605912014604012, + "learning_rate": 1.2694568673894946e-07, + "loss": 0.4819946587085724, + "step": 7339 + }, + { + "epoch": 1.6924141111367303, + "grad_norm": 1.5366035327097678, + "learning_rate": 1.267598384547327e-07, + "loss": 0.39870262145996094, + "step": 7340 + }, + { + "epoch": 1.692644685266313, + "grad_norm": 1.410709311062986, + "learning_rate": 1.265741171048237e-07, + "loss": 0.4775997996330261, + "step": 7341 + }, + { + "epoch": 1.6928752593958958, + "grad_norm": 1.5031428119722987, + "learning_rate": 1.2638852271621836e-07, + "loss": 0.4166836738586426, + "step": 7342 + }, + { + "epoch": 1.6931058335254785, + "grad_norm": 1.362546283009112, + "learning_rate": 1.2620305531589514e-07, + "loss": 0.396761953830719, + "step": 7343 + }, + { + "epoch": 1.6933364076550612, + "grad_norm": 1.5811036971551204, + "learning_rate": 1.260177149308136e-07, + "loss": 0.36929184198379517, + "step": 7344 + }, + { + "epoch": 1.6935669817846437, + "grad_norm": 1.6142308009439483, + "learning_rate": 1.2583250158791459e-07, + "loss": 0.4664369821548462, + "step": 7345 + }, + { + "epoch": 1.6937975559142264, + "grad_norm": 1.4490673957983151, + "learning_rate": 1.2564741531412115e-07, + "loss": 0.40877625346183777, + "step": 7346 + }, + { + "epoch": 1.694028130043809, + "grad_norm": 1.3363670323915413, + "learning_rate": 1.254624561363369e-07, + "loss": 0.4282684922218323, + "step": 7347 + }, + { + "epoch": 1.6942587041733916, + "grad_norm": 1.7781191335343183, + "learning_rate": 1.2527762408144805e-07, + "loss": 0.5430412292480469, + "step": 7348 + }, + { + "epoch": 1.6944892783029744, + "grad_norm": 1.7384245962384524, + "learning_rate": 1.2509291917632147e-07, + "loss": 0.45990923047065735, + "step": 7349 + }, + { + "epoch": 1.694719852432557, + "grad_norm": 1.5699544039589348, + "learning_rate": 1.2490834144780593e-07, + "loss": 0.38062262535095215, + "step": 7350 + }, + { + "epoch": 1.6949504265621398, + "grad_norm": 1.5427808320923257, + "learning_rate": 1.2472389092273172e-07, + "loss": 0.4704701900482178, + "step": 7351 + }, + { + "epoch": 1.6951810006917225, + "grad_norm": 1.3215044901700805, + "learning_rate": 1.2453956762791084e-07, + "loss": 0.4439951181411743, + "step": 7352 + }, + { + "epoch": 1.695411574821305, + "grad_norm": 1.6827848110964911, + "learning_rate": 1.2435537159013632e-07, + "loss": 0.49405014514923096, + "step": 7353 + }, + { + "epoch": 1.6956421489508877, + "grad_norm": 1.4071924274505998, + "learning_rate": 1.2417130283618282e-07, + "loss": 0.4282076060771942, + "step": 7354 + }, + { + "epoch": 1.6958727230804702, + "grad_norm": 1.4129187553888694, + "learning_rate": 1.2398736139280687e-07, + "loss": 0.43492811918258667, + "step": 7355 + }, + { + "epoch": 1.696103297210053, + "grad_norm": 1.550272919478409, + "learning_rate": 1.238035472867458e-07, + "loss": 0.37239378690719604, + "step": 7356 + }, + { + "epoch": 1.6963338713396356, + "grad_norm": 1.2721176079849843, + "learning_rate": 1.236198605447194e-07, + "loss": 0.39911961555480957, + "step": 7357 + }, + { + "epoch": 1.6965644454692184, + "grad_norm": 1.911188398718987, + "learning_rate": 1.2343630119342786e-07, + "loss": 0.4962255656719208, + "step": 7358 + }, + { + "epoch": 1.696795019598801, + "grad_norm": 1.3131623819116638, + "learning_rate": 1.2325286925955358e-07, + "loss": 0.37414759397506714, + "step": 7359 + }, + { + "epoch": 1.6970255937283838, + "grad_norm": 1.5092759235813635, + "learning_rate": 1.230695647697604e-07, + "loss": 0.41224929690361023, + "step": 7360 + }, + { + "epoch": 1.6972561678579663, + "grad_norm": 1.3964295729715615, + "learning_rate": 1.228863877506936e-07, + "loss": 0.43184489011764526, + "step": 7361 + }, + { + "epoch": 1.697486741987549, + "grad_norm": 1.6991026917946972, + "learning_rate": 1.227033382289795e-07, + "loss": 0.4741829037666321, + "step": 7362 + }, + { + "epoch": 1.6977173161171315, + "grad_norm": 1.677947901828469, + "learning_rate": 1.2252041623122643e-07, + "loss": 0.43224620819091797, + "step": 7363 + }, + { + "epoch": 1.6979478902467142, + "grad_norm": 1.678576477296345, + "learning_rate": 1.2233762178402386e-07, + "loss": 0.46645525097846985, + "step": 7364 + }, + { + "epoch": 1.698178464376297, + "grad_norm": 1.4201537921120515, + "learning_rate": 1.2215495491394256e-07, + "loss": 0.4237707555294037, + "step": 7365 + }, + { + "epoch": 1.6984090385058797, + "grad_norm": 1.3069690432597363, + "learning_rate": 1.2197241564753535e-07, + "loss": 0.36378395557403564, + "step": 7366 + }, + { + "epoch": 1.6986396126354624, + "grad_norm": 1.6387935949488672, + "learning_rate": 1.21790004011336e-07, + "loss": 0.4564269185066223, + "step": 7367 + }, + { + "epoch": 1.698870186765045, + "grad_norm": 1.3009015849639454, + "learning_rate": 1.2160772003186027e-07, + "loss": 0.4492420256137848, + "step": 7368 + }, + { + "epoch": 1.6991007608946276, + "grad_norm": 1.6097888974991954, + "learning_rate": 1.214255637356043e-07, + "loss": 0.515146017074585, + "step": 7369 + }, + { + "epoch": 1.6993313350242103, + "grad_norm": 1.5565943453492384, + "learning_rate": 1.2124353514904707e-07, + "loss": 0.41473329067230225, + "step": 7370 + }, + { + "epoch": 1.6995619091537928, + "grad_norm": 1.6571527829218886, + "learning_rate": 1.210616342986477e-07, + "loss": 0.4408412575721741, + "step": 7371 + }, + { + "epoch": 1.6997924832833755, + "grad_norm": 1.6546450900594125, + "learning_rate": 1.208798612108477e-07, + "loss": 0.5370820760726929, + "step": 7372 + }, + { + "epoch": 1.7000230574129582, + "grad_norm": 1.502975927661507, + "learning_rate": 1.206982159120693e-07, + "loss": 0.46518170833587646, + "step": 7373 + }, + { + "epoch": 1.700253631542541, + "grad_norm": 1.5801444025292624, + "learning_rate": 1.205166984287167e-07, + "loss": 0.45063477754592896, + "step": 7374 + }, + { + "epoch": 1.7004842056721237, + "grad_norm": 1.4109266758667123, + "learning_rate": 1.2033530878717546e-07, + "loss": 0.47391965985298157, + "step": 7375 + }, + { + "epoch": 1.7007147798017064, + "grad_norm": 1.680591382104731, + "learning_rate": 1.2015404701381205e-07, + "loss": 0.45812156796455383, + "step": 7376 + }, + { + "epoch": 1.7009453539312889, + "grad_norm": 1.7661450796417113, + "learning_rate": 1.1997291313497503e-07, + "loss": 0.5174708366394043, + "step": 7377 + }, + { + "epoch": 1.7011759280608716, + "grad_norm": 1.2379321910437706, + "learning_rate": 1.1979190717699373e-07, + "loss": 0.3412814736366272, + "step": 7378 + }, + { + "epoch": 1.701406502190454, + "grad_norm": 1.6619687091053885, + "learning_rate": 1.196110291661796e-07, + "loss": 0.41912171244621277, + "step": 7379 + }, + { + "epoch": 1.7016370763200368, + "grad_norm": 1.7384039938738447, + "learning_rate": 1.1943027912882464e-07, + "loss": 0.5569772720336914, + "step": 7380 + }, + { + "epoch": 1.7018676504496195, + "grad_norm": 1.309448309717786, + "learning_rate": 1.1924965709120304e-07, + "loss": 0.40875375270843506, + "step": 7381 + }, + { + "epoch": 1.7020982245792022, + "grad_norm": 1.5803953469974217, + "learning_rate": 1.1906916307956983e-07, + "loss": 0.46906760334968567, + "step": 7382 + }, + { + "epoch": 1.702328798708785, + "grad_norm": 1.2850228520937832, + "learning_rate": 1.1888879712016165e-07, + "loss": 0.40830397605895996, + "step": 7383 + }, + { + "epoch": 1.7025593728383677, + "grad_norm": 1.4770811279187035, + "learning_rate": 1.1870855923919687e-07, + "loss": 0.4051646590232849, + "step": 7384 + }, + { + "epoch": 1.7027899469679502, + "grad_norm": 1.696009847928002, + "learning_rate": 1.1852844946287432e-07, + "loss": 0.5042610764503479, + "step": 7385 + }, + { + "epoch": 1.7030205210975329, + "grad_norm": 1.6262740295484197, + "learning_rate": 1.183484678173754e-07, + "loss": 0.5304923057556152, + "step": 7386 + }, + { + "epoch": 1.7032510952271154, + "grad_norm": 1.2604579461831944, + "learning_rate": 1.1816861432886171e-07, + "loss": 0.443366676568985, + "step": 7387 + }, + { + "epoch": 1.703481669356698, + "grad_norm": 1.3836719865657088, + "learning_rate": 1.1798888902347714e-07, + "loss": 0.4527779817581177, + "step": 7388 + }, + { + "epoch": 1.7037122434862808, + "grad_norm": 1.3616715508883823, + "learning_rate": 1.1780929192734634e-07, + "loss": 0.4277183413505554, + "step": 7389 + }, + { + "epoch": 1.7039428176158635, + "grad_norm": 1.3714415020573154, + "learning_rate": 1.1762982306657577e-07, + "loss": 0.4908677637577057, + "step": 7390 + }, + { + "epoch": 1.7041733917454462, + "grad_norm": 1.4373179697113392, + "learning_rate": 1.1745048246725286e-07, + "loss": 0.398892879486084, + "step": 7391 + }, + { + "epoch": 1.704403965875029, + "grad_norm": 1.801155926723525, + "learning_rate": 1.1727127015544691e-07, + "loss": 0.4654615521430969, + "step": 7392 + }, + { + "epoch": 1.7046345400046115, + "grad_norm": 1.6258673974312492, + "learning_rate": 1.1709218615720806e-07, + "loss": 0.4850313663482666, + "step": 7393 + }, + { + "epoch": 1.7048651141341942, + "grad_norm": 1.3854283292952871, + "learning_rate": 1.1691323049856772e-07, + "loss": 0.4036976099014282, + "step": 7394 + }, + { + "epoch": 1.7050956882637767, + "grad_norm": 1.6824325261066553, + "learning_rate": 1.167344032055394e-07, + "loss": 0.39174383878707886, + "step": 7395 + }, + { + "epoch": 1.7053262623933594, + "grad_norm": 1.49190685623753, + "learning_rate": 1.1655570430411699e-07, + "loss": 0.44915109872817993, + "step": 7396 + }, + { + "epoch": 1.705556836522942, + "grad_norm": 1.4487302731781821, + "learning_rate": 1.1637713382027636e-07, + "loss": 0.4720522165298462, + "step": 7397 + }, + { + "epoch": 1.7057874106525248, + "grad_norm": 1.5236154065511855, + "learning_rate": 1.1619869177997455e-07, + "loss": 0.4452325105667114, + "step": 7398 + }, + { + "epoch": 1.7060179847821075, + "grad_norm": 1.489108876491428, + "learning_rate": 1.1602037820915023e-07, + "loss": 0.4009271562099457, + "step": 7399 + }, + { + "epoch": 1.7062485589116902, + "grad_norm": 1.3320502296097492, + "learning_rate": 1.1584219313372257e-07, + "loss": 0.37518051266670227, + "step": 7400 + }, + { + "epoch": 1.7064791330412727, + "grad_norm": 1.5361245639590775, + "learning_rate": 1.1566413657959295e-07, + "loss": 0.42883241176605225, + "step": 7401 + }, + { + "epoch": 1.7067097071708555, + "grad_norm": 1.5311391941499002, + "learning_rate": 1.1548620857264346e-07, + "loss": 0.4597551226615906, + "step": 7402 + }, + { + "epoch": 1.706940281300438, + "grad_norm": 1.4815045613998048, + "learning_rate": 1.1530840913873797e-07, + "loss": 0.5491876006126404, + "step": 7403 + }, + { + "epoch": 1.7071708554300207, + "grad_norm": 1.8810828492754625, + "learning_rate": 1.1513073830372122e-07, + "loss": 0.5632074475288391, + "step": 7404 + }, + { + "epoch": 1.7074014295596034, + "grad_norm": 1.557196455612015, + "learning_rate": 1.1495319609341947e-07, + "loss": 0.5251858234405518, + "step": 7405 + }, + { + "epoch": 1.707632003689186, + "grad_norm": 1.7979639485315768, + "learning_rate": 1.1477578253364028e-07, + "loss": 0.5388965606689453, + "step": 7406 + }, + { + "epoch": 1.7078625778187688, + "grad_norm": 1.7322317596816112, + "learning_rate": 1.145984976501726e-07, + "loss": 0.4429551959037781, + "step": 7407 + }, + { + "epoch": 1.7080931519483515, + "grad_norm": 1.5048923212213088, + "learning_rate": 1.144213414687868e-07, + "loss": 0.4702358841896057, + "step": 7408 + }, + { + "epoch": 1.708323726077934, + "grad_norm": 1.616629635802576, + "learning_rate": 1.1424431401523382e-07, + "loss": 0.4506569504737854, + "step": 7409 + }, + { + "epoch": 1.7085543002075168, + "grad_norm": 1.5722880063833475, + "learning_rate": 1.1406741531524689e-07, + "loss": 0.384244441986084, + "step": 7410 + }, + { + "epoch": 1.7087848743370992, + "grad_norm": 1.6254816299222574, + "learning_rate": 1.1389064539453952e-07, + "loss": 0.4642629027366638, + "step": 7411 + }, + { + "epoch": 1.709015448466682, + "grad_norm": 1.5180284715923413, + "learning_rate": 1.1371400427880761e-07, + "loss": 0.4568482041358948, + "step": 7412 + }, + { + "epoch": 1.7092460225962647, + "grad_norm": 1.6058744016500281, + "learning_rate": 1.135374919937272e-07, + "loss": 0.536895215511322, + "step": 7413 + }, + { + "epoch": 1.7094765967258474, + "grad_norm": 1.6944575711634469, + "learning_rate": 1.1336110856495628e-07, + "loss": 0.49696239829063416, + "step": 7414 + }, + { + "epoch": 1.7097071708554301, + "grad_norm": 1.802031783829704, + "learning_rate": 1.1318485401813438e-07, + "loss": 0.3857358694076538, + "step": 7415 + }, + { + "epoch": 1.7099377449850128, + "grad_norm": 1.5410848248596472, + "learning_rate": 1.1300872837888121e-07, + "loss": 0.38111335039138794, + "step": 7416 + }, + { + "epoch": 1.7101683191145953, + "grad_norm": 1.6014644101172142, + "learning_rate": 1.1283273167279906e-07, + "loss": 0.4255755543708801, + "step": 7417 + }, + { + "epoch": 1.710398893244178, + "grad_norm": 1.6646696692039435, + "learning_rate": 1.1265686392547024e-07, + "loss": 0.5048757791519165, + "step": 7418 + }, + { + "epoch": 1.7106294673737605, + "grad_norm": 1.6262992093918878, + "learning_rate": 1.1248112516245944e-07, + "loss": 0.5402916073799133, + "step": 7419 + }, + { + "epoch": 1.7108600415033433, + "grad_norm": 1.6105931834922984, + "learning_rate": 1.1230551540931165e-07, + "loss": 0.3617591857910156, + "step": 7420 + }, + { + "epoch": 1.711090615632926, + "grad_norm": 1.584818843359006, + "learning_rate": 1.1213003469155369e-07, + "loss": 0.4636116921901703, + "step": 7421 + }, + { + "epoch": 1.7113211897625087, + "grad_norm": 1.7626797404606351, + "learning_rate": 1.1195468303469346e-07, + "loss": 0.4675198495388031, + "step": 7422 + }, + { + "epoch": 1.7115517638920914, + "grad_norm": 1.6024517382949015, + "learning_rate": 1.1177946046422038e-07, + "loss": 0.48491787910461426, + "step": 7423 + }, + { + "epoch": 1.7117823380216741, + "grad_norm": 1.5413352133121294, + "learning_rate": 1.1160436700560449e-07, + "loss": 0.3898283839225769, + "step": 7424 + }, + { + "epoch": 1.7120129121512566, + "grad_norm": 1.5514584947710022, + "learning_rate": 1.1142940268429735e-07, + "loss": 0.41522908210754395, + "step": 7425 + }, + { + "epoch": 1.7122434862808393, + "grad_norm": 1.430903522239028, + "learning_rate": 1.1125456752573215e-07, + "loss": 0.4681985378265381, + "step": 7426 + }, + { + "epoch": 1.7124740604104218, + "grad_norm": 1.8962296460852388, + "learning_rate": 1.1107986155532245e-07, + "loss": 0.4788553714752197, + "step": 7427 + }, + { + "epoch": 1.7127046345400045, + "grad_norm": 1.5072364623848036, + "learning_rate": 1.1090528479846406e-07, + "loss": 0.43853843212127686, + "step": 7428 + }, + { + "epoch": 1.7129352086695873, + "grad_norm": 1.542463594674994, + "learning_rate": 1.107308372805329e-07, + "loss": 0.3736591637134552, + "step": 7429 + }, + { + "epoch": 1.71316578279917, + "grad_norm": 1.8237435289536401, + "learning_rate": 1.1055651902688712e-07, + "loss": 0.5770819783210754, + "step": 7430 + }, + { + "epoch": 1.7133963569287527, + "grad_norm": 1.7972828104133267, + "learning_rate": 1.1038233006286558e-07, + "loss": 0.5906555652618408, + "step": 7431 + }, + { + "epoch": 1.7136269310583354, + "grad_norm": 1.396062928601261, + "learning_rate": 1.1020827041378844e-07, + "loss": 0.4621407389640808, + "step": 7432 + }, + { + "epoch": 1.713857505187918, + "grad_norm": 1.6487194571266346, + "learning_rate": 1.1003434010495705e-07, + "loss": 0.4203164279460907, + "step": 7433 + }, + { + "epoch": 1.7140880793175006, + "grad_norm": 1.59720117870823, + "learning_rate": 1.0986053916165373e-07, + "loss": 0.4607565104961395, + "step": 7434 + }, + { + "epoch": 1.7143186534470831, + "grad_norm": 1.4411738322949479, + "learning_rate": 1.0968686760914248e-07, + "loss": 0.47256794571876526, + "step": 7435 + }, + { + "epoch": 1.7145492275766658, + "grad_norm": 2.1203032230505414, + "learning_rate": 1.0951332547266778e-07, + "loss": 0.479513943195343, + "step": 7436 + }, + { + "epoch": 1.7147798017062486, + "grad_norm": 1.7633354860000339, + "learning_rate": 1.0933991277745614e-07, + "loss": 0.47687965631484985, + "step": 7437 + }, + { + "epoch": 1.7150103758358313, + "grad_norm": 1.6696730348311766, + "learning_rate": 1.091666295487147e-07, + "loss": 0.45799845457077026, + "step": 7438 + }, + { + "epoch": 1.715240949965414, + "grad_norm": 1.4765505689651048, + "learning_rate": 1.089934758116322e-07, + "loss": 0.43398863077163696, + "step": 7439 + }, + { + "epoch": 1.7154715240949967, + "grad_norm": 1.627580558092534, + "learning_rate": 1.0882045159137788e-07, + "loss": 0.4098217189311981, + "step": 7440 + }, + { + "epoch": 1.7157020982245792, + "grad_norm": 1.8062601643320504, + "learning_rate": 1.086475569131029e-07, + "loss": 0.49889707565307617, + "step": 7441 + }, + { + "epoch": 1.715932672354162, + "grad_norm": 1.4613353368332702, + "learning_rate": 1.0847479180193897e-07, + "loss": 0.4187192916870117, + "step": 7442 + }, + { + "epoch": 1.7161632464837444, + "grad_norm": 2.068945016126778, + "learning_rate": 1.0830215628299954e-07, + "loss": 0.44331133365631104, + "step": 7443 + }, + { + "epoch": 1.7163938206133271, + "grad_norm": 1.6773749938074582, + "learning_rate": 1.0812965038137856e-07, + "loss": 0.4888196587562561, + "step": 7444 + }, + { + "epoch": 1.7166243947429098, + "grad_norm": 1.6578617629701122, + "learning_rate": 1.0795727412215183e-07, + "loss": 0.4884798228740692, + "step": 7445 + }, + { + "epoch": 1.7168549688724926, + "grad_norm": 1.5723023883356735, + "learning_rate": 1.07785027530376e-07, + "loss": 0.45655232667922974, + "step": 7446 + }, + { + "epoch": 1.7170855430020753, + "grad_norm": 1.685893884498356, + "learning_rate": 1.0761291063108857e-07, + "loss": 0.3086237907409668, + "step": 7447 + }, + { + "epoch": 1.717316117131658, + "grad_norm": 1.5738053973393145, + "learning_rate": 1.0744092344930888e-07, + "loss": 0.4279823899269104, + "step": 7448 + }, + { + "epoch": 1.7175466912612405, + "grad_norm": 1.7221029802689058, + "learning_rate": 1.072690660100366e-07, + "loss": 0.4241681396961212, + "step": 7449 + }, + { + "epoch": 1.7177772653908232, + "grad_norm": 1.7874830878272077, + "learning_rate": 1.070973383382533e-07, + "loss": 0.47086501121520996, + "step": 7450 + }, + { + "epoch": 1.7180078395204057, + "grad_norm": 1.3780373187479635, + "learning_rate": 1.0692574045892099e-07, + "loss": 0.43798619508743286, + "step": 7451 + }, + { + "epoch": 1.7182384136499884, + "grad_norm": 1.7289936352675708, + "learning_rate": 1.0675427239698354e-07, + "loss": 0.5781964659690857, + "step": 7452 + }, + { + "epoch": 1.7184689877795711, + "grad_norm": 1.4621228929512655, + "learning_rate": 1.0658293417736508e-07, + "loss": 0.4850879907608032, + "step": 7453 + }, + { + "epoch": 1.7186995619091539, + "grad_norm": 1.3236244677460836, + "learning_rate": 1.064117258249717e-07, + "loss": 0.40468811988830566, + "step": 7454 + }, + { + "epoch": 1.7189301360387366, + "grad_norm": 1.7069112900372936, + "learning_rate": 1.0624064736469052e-07, + "loss": 0.4054880142211914, + "step": 7455 + }, + { + "epoch": 1.719160710168319, + "grad_norm": 1.7589002706519377, + "learning_rate": 1.0606969882138894e-07, + "loss": 0.38633522391319275, + "step": 7456 + }, + { + "epoch": 1.7193912842979018, + "grad_norm": 1.6917357500409704, + "learning_rate": 1.0589888021991644e-07, + "loss": 0.4287499785423279, + "step": 7457 + }, + { + "epoch": 1.7196218584274843, + "grad_norm": 1.613018561241669, + "learning_rate": 1.0572819158510316e-07, + "loss": 0.49269533157348633, + "step": 7458 + }, + { + "epoch": 1.719852432557067, + "grad_norm": 1.4600608769783265, + "learning_rate": 1.0555763294176045e-07, + "loss": 0.38874679803848267, + "step": 7459 + }, + { + "epoch": 1.7200830066866497, + "grad_norm": 1.5663184097893508, + "learning_rate": 1.0538720431468051e-07, + "loss": 0.4381089508533478, + "step": 7460 + }, + { + "epoch": 1.7203135808162324, + "grad_norm": 1.6242553694361792, + "learning_rate": 1.0521690572863706e-07, + "loss": 0.4550422430038452, + "step": 7461 + }, + { + "epoch": 1.7205441549458151, + "grad_norm": 1.5017985009159773, + "learning_rate": 1.0504673720838476e-07, + "loss": 0.5173785090446472, + "step": 7462 + }, + { + "epoch": 1.7207747290753979, + "grad_norm": 1.4906138636113029, + "learning_rate": 1.0487669877865945e-07, + "loss": 0.5082184076309204, + "step": 7463 + }, + { + "epoch": 1.7210053032049804, + "grad_norm": 1.7383580581523643, + "learning_rate": 1.0470679046417786e-07, + "loss": 0.49810969829559326, + "step": 7464 + }, + { + "epoch": 1.721235877334563, + "grad_norm": 1.7302456540952424, + "learning_rate": 1.0453701228963751e-07, + "loss": 0.47808337211608887, + "step": 7465 + }, + { + "epoch": 1.7214664514641456, + "grad_norm": 1.6093569631380469, + "learning_rate": 1.0436736427971782e-07, + "loss": 0.5100537538528442, + "step": 7466 + }, + { + "epoch": 1.7216970255937283, + "grad_norm": 1.5019138408689112, + "learning_rate": 1.0419784645907858e-07, + "loss": 0.44948023557662964, + "step": 7467 + }, + { + "epoch": 1.721927599723311, + "grad_norm": 1.3792836042899619, + "learning_rate": 1.040284588523611e-07, + "loss": 0.4653180241584778, + "step": 7468 + }, + { + "epoch": 1.7221581738528937, + "grad_norm": 1.901421358760061, + "learning_rate": 1.0385920148418737e-07, + "loss": 0.4930723309516907, + "step": 7469 + }, + { + "epoch": 1.7223887479824764, + "grad_norm": 1.5964124799736943, + "learning_rate": 1.036900743791611e-07, + "loss": 0.48883867263793945, + "step": 7470 + }, + { + "epoch": 1.7226193221120591, + "grad_norm": 1.27924002772244, + "learning_rate": 1.0352107756186624e-07, + "loss": 0.4030319154262543, + "step": 7471 + }, + { + "epoch": 1.7228498962416416, + "grad_norm": 1.8060139526740588, + "learning_rate": 1.033522110568683e-07, + "loss": 0.4174875319004059, + "step": 7472 + }, + { + "epoch": 1.7230804703712244, + "grad_norm": 1.731157383735833, + "learning_rate": 1.0318347488871371e-07, + "loss": 0.5152361392974854, + "step": 7473 + }, + { + "epoch": 1.7233110445008069, + "grad_norm": 1.3983774946509473, + "learning_rate": 1.0301486908193014e-07, + "loss": 0.43221428990364075, + "step": 7474 + }, + { + "epoch": 1.7235416186303896, + "grad_norm": 1.6931290113673243, + "learning_rate": 1.0284639366102598e-07, + "loss": 0.4239969849586487, + "step": 7475 + }, + { + "epoch": 1.7237721927599723, + "grad_norm": 1.5094560861426634, + "learning_rate": 1.0267804865049068e-07, + "loss": 0.5171400904655457, + "step": 7476 + }, + { + "epoch": 1.724002766889555, + "grad_norm": 1.3913671775557208, + "learning_rate": 1.0250983407479518e-07, + "loss": 0.45670178532600403, + "step": 7477 + }, + { + "epoch": 1.7242333410191377, + "grad_norm": 1.3489970844922, + "learning_rate": 1.0234174995839107e-07, + "loss": 0.36458373069763184, + "step": 7478 + }, + { + "epoch": 1.7244639151487204, + "grad_norm": 1.6926167509742018, + "learning_rate": 1.0217379632571122e-07, + "loss": 0.4940750002861023, + "step": 7479 + }, + { + "epoch": 1.724694489278303, + "grad_norm": 1.3742895139526408, + "learning_rate": 1.0200597320116911e-07, + "loss": 0.43453872203826904, + "step": 7480 + }, + { + "epoch": 1.7249250634078857, + "grad_norm": 1.4325916198137496, + "learning_rate": 1.0183828060915989e-07, + "loss": 0.49255162477493286, + "step": 7481 + }, + { + "epoch": 1.7251556375374681, + "grad_norm": 1.5551839406586245, + "learning_rate": 1.0167071857405906e-07, + "loss": 0.46221014857292175, + "step": 7482 + }, + { + "epoch": 1.7253862116670509, + "grad_norm": 1.6044214909369097, + "learning_rate": 1.015032871202236e-07, + "loss": 0.43426087498664856, + "step": 7483 + }, + { + "epoch": 1.7256167857966336, + "grad_norm": 1.3471292376409894, + "learning_rate": 1.0133598627199136e-07, + "loss": 0.45327985286712646, + "step": 7484 + }, + { + "epoch": 1.7258473599262163, + "grad_norm": 1.7300792096053668, + "learning_rate": 1.011688160536811e-07, + "loss": 0.4691676199436188, + "step": 7485 + }, + { + "epoch": 1.726077934055799, + "grad_norm": 1.7168424748125397, + "learning_rate": 1.0100177648959296e-07, + "loss": 0.5080254077911377, + "step": 7486 + }, + { + "epoch": 1.7263085081853817, + "grad_norm": 1.3360541862160926, + "learning_rate": 1.008348676040075e-07, + "loss": 0.34122025966644287, + "step": 7487 + }, + { + "epoch": 1.7265390823149642, + "grad_norm": 1.650892930499383, + "learning_rate": 1.0066808942118699e-07, + "loss": 0.44408074021339417, + "step": 7488 + }, + { + "epoch": 1.726769656444547, + "grad_norm": 1.4603224951411022, + "learning_rate": 1.0050144196537402e-07, + "loss": 0.3777790665626526, + "step": 7489 + }, + { + "epoch": 1.7270002305741294, + "grad_norm": 1.6365267437093343, + "learning_rate": 1.0033492526079279e-07, + "loss": 0.48730146884918213, + "step": 7490 + }, + { + "epoch": 1.7272308047037122, + "grad_norm": 1.5792338555913825, + "learning_rate": 1.001685393316477e-07, + "loss": 0.35903626680374146, + "step": 7491 + }, + { + "epoch": 1.7274613788332949, + "grad_norm": 1.3953813288199584, + "learning_rate": 1.0000228420212509e-07, + "loss": 0.37729373574256897, + "step": 7492 + }, + { + "epoch": 1.7276919529628776, + "grad_norm": 1.6314801226105193, + "learning_rate": 9.98361598963916e-08, + "loss": 0.4388326406478882, + "step": 7493 + }, + { + "epoch": 1.7279225270924603, + "grad_norm": 1.4829220781258674, + "learning_rate": 9.967016643859527e-08, + "loss": 0.45095232129096985, + "step": 7494 + }, + { + "epoch": 1.728153101222043, + "grad_norm": 1.5130736602015042, + "learning_rate": 9.95043038528649e-08, + "loss": 0.4736475944519043, + "step": 7495 + }, + { + "epoch": 1.7283836753516255, + "grad_norm": 1.6393405202034401, + "learning_rate": 9.933857216330999e-08, + "loss": 0.2984190285205841, + "step": 7496 + }, + { + "epoch": 1.7286142494812082, + "grad_norm": 1.5993261500159095, + "learning_rate": 9.91729713940218e-08, + "loss": 0.45391780138015747, + "step": 7497 + }, + { + "epoch": 1.7288448236107907, + "grad_norm": 1.732905558263472, + "learning_rate": 9.900750156907157e-08, + "loss": 0.5150727033615112, + "step": 7498 + }, + { + "epoch": 1.7290753977403734, + "grad_norm": 1.372519788443724, + "learning_rate": 9.884216271251256e-08, + "loss": 0.41298598051071167, + "step": 7499 + }, + { + "epoch": 1.7293059718699562, + "grad_norm": 1.5310483983437806, + "learning_rate": 9.86769548483779e-08, + "loss": 0.4820541441440582, + "step": 7500 + }, + { + "epoch": 1.7295365459995389, + "grad_norm": 1.4103659952581913, + "learning_rate": 9.85118780006825e-08, + "loss": 0.4148511290550232, + "step": 7501 + }, + { + "epoch": 1.7297671201291216, + "grad_norm": 1.535383378975012, + "learning_rate": 9.834693219342183e-08, + "loss": 0.39676210284233093, + "step": 7502 + }, + { + "epoch": 1.7299976942587043, + "grad_norm": 1.3969764743432636, + "learning_rate": 9.818211745057292e-08, + "loss": 0.3665908873081207, + "step": 7503 + }, + { + "epoch": 1.7302282683882868, + "grad_norm": 1.5255452230855382, + "learning_rate": 9.801743379609274e-08, + "loss": 0.39340025186538696, + "step": 7504 + }, + { + "epoch": 1.7304588425178695, + "grad_norm": 1.4673439514671116, + "learning_rate": 9.785288125391977e-08, + "loss": 0.4677412807941437, + "step": 7505 + }, + { + "epoch": 1.730689416647452, + "grad_norm": 1.8421716352805986, + "learning_rate": 9.768845984797369e-08, + "loss": 0.49413764476776123, + "step": 7506 + }, + { + "epoch": 1.7309199907770347, + "grad_norm": 2.1097980684598223, + "learning_rate": 9.752416960215437e-08, + "loss": 0.5312438607215881, + "step": 7507 + }, + { + "epoch": 1.7311505649066175, + "grad_norm": 1.408973464564324, + "learning_rate": 9.736001054034338e-08, + "loss": 0.38522863388061523, + "step": 7508 + }, + { + "epoch": 1.7313811390362002, + "grad_norm": 1.4496862609377634, + "learning_rate": 9.719598268640283e-08, + "loss": 0.49167078733444214, + "step": 7509 + }, + { + "epoch": 1.7316117131657829, + "grad_norm": 1.7071655256469307, + "learning_rate": 9.7032086064176e-08, + "loss": 0.4465949535369873, + "step": 7510 + }, + { + "epoch": 1.7318422872953656, + "grad_norm": 1.580755639233498, + "learning_rate": 9.686832069748663e-08, + "loss": 0.4627634882926941, + "step": 7511 + }, + { + "epoch": 1.732072861424948, + "grad_norm": 1.5945960217093318, + "learning_rate": 9.670468661013998e-08, + "loss": 0.4188409447669983, + "step": 7512 + }, + { + "epoch": 1.7323034355545308, + "grad_norm": 1.6767285085334622, + "learning_rate": 9.654118382592146e-08, + "loss": 0.5775213241577148, + "step": 7513 + }, + { + "epoch": 1.7325340096841133, + "grad_norm": 1.4889326648746473, + "learning_rate": 9.637781236859843e-08, + "loss": 0.43912672996520996, + "step": 7514 + }, + { + "epoch": 1.732764583813696, + "grad_norm": 1.677177851910315, + "learning_rate": 9.62145722619182e-08, + "loss": 0.5364755392074585, + "step": 7515 + }, + { + "epoch": 1.7329951579432787, + "grad_norm": 1.5135890648676678, + "learning_rate": 9.605146352960935e-08, + "loss": 0.4832648038864136, + "step": 7516 + }, + { + "epoch": 1.7332257320728615, + "grad_norm": 1.640472153194824, + "learning_rate": 9.588848619538182e-08, + "loss": 0.36932459473609924, + "step": 7517 + }, + { + "epoch": 1.7334563062024442, + "grad_norm": 1.4731235594964114, + "learning_rate": 9.57256402829254e-08, + "loss": 0.43458276987075806, + "step": 7518 + }, + { + "epoch": 1.733686880332027, + "grad_norm": 1.457966513875051, + "learning_rate": 9.556292581591196e-08, + "loss": 0.41533568501472473, + "step": 7519 + }, + { + "epoch": 1.7339174544616094, + "grad_norm": 1.4363289807621746, + "learning_rate": 9.540034281799325e-08, + "loss": 0.45898690819740295, + "step": 7520 + }, + { + "epoch": 1.734148028591192, + "grad_norm": 1.610315429506808, + "learning_rate": 9.523789131280279e-08, + "loss": 0.3321181535720825, + "step": 7521 + }, + { + "epoch": 1.7343786027207746, + "grad_norm": 1.5824862936232118, + "learning_rate": 9.507557132395416e-08, + "loss": 0.3926161229610443, + "step": 7522 + }, + { + "epoch": 1.7346091768503573, + "grad_norm": 1.264710302836967, + "learning_rate": 9.491338287504247e-08, + "loss": 0.41051846742630005, + "step": 7523 + }, + { + "epoch": 1.73483975097994, + "grad_norm": 1.3604853902379428, + "learning_rate": 9.47513259896432e-08, + "loss": 0.4440652132034302, + "step": 7524 + }, + { + "epoch": 1.7350703251095227, + "grad_norm": 1.5933781203678954, + "learning_rate": 9.458940069131304e-08, + "loss": 0.5175125598907471, + "step": 7525 + }, + { + "epoch": 1.7353008992391055, + "grad_norm": 1.4535445480892137, + "learning_rate": 9.442760700358987e-08, + "loss": 0.45521751046180725, + "step": 7526 + }, + { + "epoch": 1.7355314733686882, + "grad_norm": 1.5707484811695662, + "learning_rate": 9.426594494999151e-08, + "loss": 0.5133911967277527, + "step": 7527 + }, + { + "epoch": 1.7357620474982707, + "grad_norm": 1.8770278394623805, + "learning_rate": 9.410441455401752e-08, + "loss": 0.4397609233856201, + "step": 7528 + }, + { + "epoch": 1.7359926216278534, + "grad_norm": 3.7292879258339693, + "learning_rate": 9.394301583914765e-08, + "loss": 0.4503510594367981, + "step": 7529 + }, + { + "epoch": 1.7362231957574359, + "grad_norm": 1.5909450336667472, + "learning_rate": 9.378174882884327e-08, + "loss": 0.44119834899902344, + "step": 7530 + }, + { + "epoch": 1.7364537698870186, + "grad_norm": 1.5959659498105105, + "learning_rate": 9.362061354654583e-08, + "loss": 0.46257996559143066, + "step": 7531 + }, + { + "epoch": 1.7366843440166013, + "grad_norm": 1.4727698319610416, + "learning_rate": 9.345961001567792e-08, + "loss": 0.4468308687210083, + "step": 7532 + }, + { + "epoch": 1.736914918146184, + "grad_norm": 1.329652616869682, + "learning_rate": 9.32987382596433e-08, + "loss": 0.3837989568710327, + "step": 7533 + }, + { + "epoch": 1.7371454922757668, + "grad_norm": 1.7149798865191848, + "learning_rate": 9.313799830182644e-08, + "loss": 0.4224961996078491, + "step": 7534 + }, + { + "epoch": 1.7373760664053495, + "grad_norm": 1.3527154365554523, + "learning_rate": 9.297739016559225e-08, + "loss": 0.37379956245422363, + "step": 7535 + }, + { + "epoch": 1.737606640534932, + "grad_norm": 1.3983736958193809, + "learning_rate": 9.281691387428658e-08, + "loss": 0.4204242527484894, + "step": 7536 + }, + { + "epoch": 1.7378372146645147, + "grad_norm": 1.550547566194999, + "learning_rate": 9.265656945123678e-08, + "loss": 0.5270572900772095, + "step": 7537 + }, + { + "epoch": 1.7380677887940972, + "grad_norm": 1.6826850331086136, + "learning_rate": 9.249635691975e-08, + "loss": 0.44208282232284546, + "step": 7538 + }, + { + "epoch": 1.73829836292368, + "grad_norm": 1.158547237110862, + "learning_rate": 9.233627630311502e-08, + "loss": 0.32514283061027527, + "step": 7539 + }, + { + "epoch": 1.7385289370532626, + "grad_norm": 1.42135951118167, + "learning_rate": 9.217632762460126e-08, + "loss": 0.35472434759140015, + "step": 7540 + }, + { + "epoch": 1.7387595111828453, + "grad_norm": 1.9134735814581072, + "learning_rate": 9.201651090745888e-08, + "loss": 0.5034215450286865, + "step": 7541 + }, + { + "epoch": 1.738990085312428, + "grad_norm": 1.4950522917395752, + "learning_rate": 9.185682617491863e-08, + "loss": 0.4779762029647827, + "step": 7542 + }, + { + "epoch": 1.7392206594420108, + "grad_norm": 1.7544463226218252, + "learning_rate": 9.169727345019263e-08, + "loss": 0.4964079260826111, + "step": 7543 + }, + { + "epoch": 1.7394512335715933, + "grad_norm": 1.8208500448761544, + "learning_rate": 9.153785275647319e-08, + "loss": 0.5125068426132202, + "step": 7544 + }, + { + "epoch": 1.739681807701176, + "grad_norm": 1.369096268264849, + "learning_rate": 9.13785641169339e-08, + "loss": 0.39051756262779236, + "step": 7545 + }, + { + "epoch": 1.7399123818307585, + "grad_norm": 1.6132499721446665, + "learning_rate": 9.121940755472901e-08, + "loss": 0.45951950550079346, + "step": 7546 + }, + { + "epoch": 1.7401429559603412, + "grad_norm": 1.402513218333582, + "learning_rate": 9.106038309299302e-08, + "loss": 0.42676979303359985, + "step": 7547 + }, + { + "epoch": 1.740373530089924, + "grad_norm": 1.6248647623340229, + "learning_rate": 9.090149075484255e-08, + "loss": 0.3585033416748047, + "step": 7548 + }, + { + "epoch": 1.7406041042195066, + "grad_norm": 1.5204418845888263, + "learning_rate": 9.074273056337366e-08, + "loss": 0.4613775312900543, + "step": 7549 + }, + { + "epoch": 1.7408346783490893, + "grad_norm": 1.5756472296671777, + "learning_rate": 9.058410254166415e-08, + "loss": 0.48934412002563477, + "step": 7550 + }, + { + "epoch": 1.741065252478672, + "grad_norm": 2.3682357853653895, + "learning_rate": 9.042560671277177e-08, + "loss": 0.5749069452285767, + "step": 7551 + }, + { + "epoch": 1.7412958266082545, + "grad_norm": 1.4990310296288942, + "learning_rate": 9.026724309973588e-08, + "loss": 0.4760423004627228, + "step": 7552 + }, + { + "epoch": 1.7415264007378373, + "grad_norm": 1.38070744019409, + "learning_rate": 9.010901172557594e-08, + "loss": 0.43080049753189087, + "step": 7553 + }, + { + "epoch": 1.7417569748674198, + "grad_norm": 1.4636238536042068, + "learning_rate": 8.99509126132928e-08, + "loss": 0.44850271940231323, + "step": 7554 + }, + { + "epoch": 1.7419875489970025, + "grad_norm": 1.5357653243690434, + "learning_rate": 8.979294578586738e-08, + "loss": 0.34593498706817627, + "step": 7555 + }, + { + "epoch": 1.7422181231265852, + "grad_norm": 1.3635590695208566, + "learning_rate": 8.963511126626188e-08, + "loss": 0.3738324046134949, + "step": 7556 + }, + { + "epoch": 1.742448697256168, + "grad_norm": 1.6262402635208488, + "learning_rate": 8.947740907741952e-08, + "loss": 0.47988662123680115, + "step": 7557 + }, + { + "epoch": 1.7426792713857506, + "grad_norm": 1.904530616299084, + "learning_rate": 8.931983924226338e-08, + "loss": 0.5863034725189209, + "step": 7558 + }, + { + "epoch": 1.7429098455153333, + "grad_norm": 1.497315511162884, + "learning_rate": 8.916240178369827e-08, + "loss": 0.38455232977867126, + "step": 7559 + }, + { + "epoch": 1.7431404196449158, + "grad_norm": 1.711133818053075, + "learning_rate": 8.900509672460899e-08, + "loss": 0.3919760584831238, + "step": 7560 + }, + { + "epoch": 1.7433709937744986, + "grad_norm": 1.8876361089943499, + "learning_rate": 8.884792408786169e-08, + "loss": 0.4090653657913208, + "step": 7561 + }, + { + "epoch": 1.743601567904081, + "grad_norm": 1.458591423296693, + "learning_rate": 8.869088389630264e-08, + "loss": 0.42597073316574097, + "step": 7562 + }, + { + "epoch": 1.7438321420336638, + "grad_norm": 1.4410906971279085, + "learning_rate": 8.853397617275959e-08, + "loss": 0.38760805130004883, + "step": 7563 + }, + { + "epoch": 1.7440627161632465, + "grad_norm": 1.3930314463175644, + "learning_rate": 8.837720094004042e-08, + "loss": 0.3753165900707245, + "step": 7564 + }, + { + "epoch": 1.7442932902928292, + "grad_norm": 1.4708100181524995, + "learning_rate": 8.822055822093432e-08, + "loss": 0.5169536471366882, + "step": 7565 + }, + { + "epoch": 1.744523864422412, + "grad_norm": 1.436339252382814, + "learning_rate": 8.806404803821077e-08, + "loss": 0.3886902332305908, + "step": 7566 + }, + { + "epoch": 1.7447544385519944, + "grad_norm": 1.7378167101447366, + "learning_rate": 8.790767041461977e-08, + "loss": 0.48971402645111084, + "step": 7567 + }, + { + "epoch": 1.7449850126815771, + "grad_norm": 1.3555756556469605, + "learning_rate": 8.775142537289282e-08, + "loss": 0.4656449556350708, + "step": 7568 + }, + { + "epoch": 1.7452155868111596, + "grad_norm": 1.24689144854066, + "learning_rate": 8.75953129357414e-08, + "loss": 0.43197786808013916, + "step": 7569 + }, + { + "epoch": 1.7454461609407423, + "grad_norm": 1.6584429086506909, + "learning_rate": 8.743933312585816e-08, + "loss": 0.5062606930732727, + "step": 7570 + }, + { + "epoch": 1.745676735070325, + "grad_norm": 1.714345013647294, + "learning_rate": 8.728348596591639e-08, + "loss": 0.5489983558654785, + "step": 7571 + }, + { + "epoch": 1.7459073091999078, + "grad_norm": 1.4457283500823468, + "learning_rate": 8.712777147857031e-08, + "loss": 0.4351652264595032, + "step": 7572 + }, + { + "epoch": 1.7461378833294905, + "grad_norm": 2.160367880410759, + "learning_rate": 8.697218968645403e-08, + "loss": 0.5096884965896606, + "step": 7573 + }, + { + "epoch": 1.7463684574590732, + "grad_norm": 1.2837319415683648, + "learning_rate": 8.681674061218347e-08, + "loss": 0.3127269744873047, + "step": 7574 + }, + { + "epoch": 1.7465990315886557, + "grad_norm": 1.8378362837335938, + "learning_rate": 8.666142427835443e-08, + "loss": 0.4738629460334778, + "step": 7575 + }, + { + "epoch": 1.7468296057182384, + "grad_norm": 1.5090024147723615, + "learning_rate": 8.650624070754375e-08, + "loss": 0.46921902894973755, + "step": 7576 + }, + { + "epoch": 1.747060179847821, + "grad_norm": 1.578667567709185, + "learning_rate": 8.635118992230906e-08, + "loss": 0.5296987891197205, + "step": 7577 + }, + { + "epoch": 1.7472907539774036, + "grad_norm": 1.1732895039201416, + "learning_rate": 8.619627194518819e-08, + "loss": 0.3522387742996216, + "step": 7578 + }, + { + "epoch": 1.7475213281069863, + "grad_norm": 1.550879536093582, + "learning_rate": 8.604148679870049e-08, + "loss": 0.42747724056243896, + "step": 7579 + }, + { + "epoch": 1.747751902236569, + "grad_norm": 1.535695568842986, + "learning_rate": 8.588683450534528e-08, + "loss": 0.399990439414978, + "step": 7580 + }, + { + "epoch": 1.7479824763661518, + "grad_norm": 1.688266581429453, + "learning_rate": 8.573231508760315e-08, + "loss": 0.48220518231391907, + "step": 7581 + }, + { + "epoch": 1.7482130504957345, + "grad_norm": 1.8452105924711204, + "learning_rate": 8.557792856793455e-08, + "loss": 0.5227106213569641, + "step": 7582 + }, + { + "epoch": 1.748443624625317, + "grad_norm": 1.596076015195143, + "learning_rate": 8.542367496878178e-08, + "loss": 0.5436732769012451, + "step": 7583 + }, + { + "epoch": 1.7486741987548997, + "grad_norm": 1.5781135040763308, + "learning_rate": 8.526955431256644e-08, + "loss": 0.48398053646087646, + "step": 7584 + }, + { + "epoch": 1.7489047728844822, + "grad_norm": 1.8109008330023073, + "learning_rate": 8.511556662169217e-08, + "loss": 0.5727924108505249, + "step": 7585 + }, + { + "epoch": 1.749135347014065, + "grad_norm": 1.7451913815699138, + "learning_rate": 8.496171191854229e-08, + "loss": 0.48077693581581116, + "step": 7586 + }, + { + "epoch": 1.7493659211436476, + "grad_norm": 1.4513314868999736, + "learning_rate": 8.480799022548113e-08, + "loss": 0.45447635650634766, + "step": 7587 + }, + { + "epoch": 1.7495964952732304, + "grad_norm": 1.7305734402801412, + "learning_rate": 8.465440156485392e-08, + "loss": 0.4605486989021301, + "step": 7588 + }, + { + "epoch": 1.749827069402813, + "grad_norm": 1.6087138586576477, + "learning_rate": 8.450094595898604e-08, + "loss": 0.4229927062988281, + "step": 7589 + }, + { + "epoch": 1.7500576435323958, + "grad_norm": 1.371495589643338, + "learning_rate": 8.434762343018408e-08, + "loss": 0.43005260825157166, + "step": 7590 + }, + { + "epoch": 1.7502882176619783, + "grad_norm": 1.739761797548497, + "learning_rate": 8.41944340007349e-08, + "loss": 0.47446098923683167, + "step": 7591 + }, + { + "epoch": 1.750518791791561, + "grad_norm": 1.6084919754115274, + "learning_rate": 8.40413776929062e-08, + "loss": 0.40554216504096985, + "step": 7592 + }, + { + "epoch": 1.7507493659211435, + "grad_norm": 1.2363538330087616, + "learning_rate": 8.38884545289461e-08, + "loss": 0.4144189953804016, + "step": 7593 + }, + { + "epoch": 1.7509799400507262, + "grad_norm": 1.6677815347140812, + "learning_rate": 8.373566453108361e-08, + "loss": 0.449351966381073, + "step": 7594 + }, + { + "epoch": 1.751210514180309, + "grad_norm": 1.8357616333643774, + "learning_rate": 8.358300772152849e-08, + "loss": 0.4584103226661682, + "step": 7595 + }, + { + "epoch": 1.7514410883098916, + "grad_norm": 1.6545876792386258, + "learning_rate": 8.343048412247066e-08, + "loss": 0.4739362895488739, + "step": 7596 + }, + { + "epoch": 1.7516716624394744, + "grad_norm": 1.3684829539670578, + "learning_rate": 8.327809375608131e-08, + "loss": 0.3970356583595276, + "step": 7597 + }, + { + "epoch": 1.751902236569057, + "grad_norm": 1.390074068538192, + "learning_rate": 8.312583664451157e-08, + "loss": 0.4298238754272461, + "step": 7598 + }, + { + "epoch": 1.7521328106986396, + "grad_norm": 1.5218432452457022, + "learning_rate": 8.297371280989385e-08, + "loss": 0.4920361340045929, + "step": 7599 + }, + { + "epoch": 1.7523633848282223, + "grad_norm": 1.6001856104794878, + "learning_rate": 8.282172227434059e-08, + "loss": 0.5035870671272278, + "step": 7600 + }, + { + "epoch": 1.7525939589578048, + "grad_norm": 1.8053658495544915, + "learning_rate": 8.266986505994555e-08, + "loss": 0.373248815536499, + "step": 7601 + }, + { + "epoch": 1.7528245330873875, + "grad_norm": 2.0338367024251345, + "learning_rate": 8.25181411887822e-08, + "loss": 0.48491543531417847, + "step": 7602 + }, + { + "epoch": 1.7530551072169702, + "grad_norm": 1.6403088167242337, + "learning_rate": 8.236655068290554e-08, + "loss": 0.4298476576805115, + "step": 7603 + }, + { + "epoch": 1.753285681346553, + "grad_norm": 1.5503246605292686, + "learning_rate": 8.221509356435064e-08, + "loss": 0.48804932832717896, + "step": 7604 + }, + { + "epoch": 1.7535162554761357, + "grad_norm": 1.595278442494436, + "learning_rate": 8.206376985513353e-08, + "loss": 0.467857301235199, + "step": 7605 + }, + { + "epoch": 1.7537468296057184, + "grad_norm": 1.8978537163965867, + "learning_rate": 8.19125795772504e-08, + "loss": 0.48995548486709595, + "step": 7606 + }, + { + "epoch": 1.7539774037353009, + "grad_norm": 1.488521983097995, + "learning_rate": 8.176152275267823e-08, + "loss": 0.4459487795829773, + "step": 7607 + }, + { + "epoch": 1.7542079778648836, + "grad_norm": 1.4326042778667836, + "learning_rate": 8.1610599403375e-08, + "loss": 0.5054866671562195, + "step": 7608 + }, + { + "epoch": 1.754438551994466, + "grad_norm": 1.4563884146816763, + "learning_rate": 8.145980955127862e-08, + "loss": 0.46223869919776917, + "step": 7609 + }, + { + "epoch": 1.7546691261240488, + "grad_norm": 1.696768225081691, + "learning_rate": 8.1309153218308e-08, + "loss": 0.4743426442146301, + "step": 7610 + }, + { + "epoch": 1.7548997002536315, + "grad_norm": 1.7623915082520603, + "learning_rate": 8.115863042636262e-08, + "loss": 0.40808072686195374, + "step": 7611 + }, + { + "epoch": 1.7551302743832142, + "grad_norm": 1.3859431275297254, + "learning_rate": 8.100824119732263e-08, + "loss": 0.4452321231365204, + "step": 7612 + }, + { + "epoch": 1.755360848512797, + "grad_norm": 1.556764426976114, + "learning_rate": 8.085798555304824e-08, + "loss": 0.4211857318878174, + "step": 7613 + }, + { + "epoch": 1.7555914226423797, + "grad_norm": 1.5080375348033017, + "learning_rate": 8.070786351538117e-08, + "loss": 0.3356667757034302, + "step": 7614 + }, + { + "epoch": 1.7558219967719622, + "grad_norm": 1.7842469682737618, + "learning_rate": 8.055787510614287e-08, + "loss": 0.4636021852493286, + "step": 7615 + }, + { + "epoch": 1.7560525709015449, + "grad_norm": 1.624229543588168, + "learning_rate": 8.040802034713546e-08, + "loss": 0.4066168963909149, + "step": 7616 + }, + { + "epoch": 1.7562831450311274, + "grad_norm": 1.4896510438449921, + "learning_rate": 8.025829926014216e-08, + "loss": 0.426937460899353, + "step": 7617 + }, + { + "epoch": 1.75651371916071, + "grad_norm": 1.838065393231424, + "learning_rate": 8.010871186692625e-08, + "loss": 0.464493989944458, + "step": 7618 + }, + { + "epoch": 1.7567442932902928, + "grad_norm": 1.7522078931434732, + "learning_rate": 7.995925818923222e-08, + "loss": 0.44130605459213257, + "step": 7619 + }, + { + "epoch": 1.7569748674198755, + "grad_norm": 1.6877219329526134, + "learning_rate": 7.980993824878402e-08, + "loss": 0.5241909027099609, + "step": 7620 + }, + { + "epoch": 1.7572054415494582, + "grad_norm": 1.605603526262718, + "learning_rate": 7.96607520672874e-08, + "loss": 0.45450860261917114, + "step": 7621 + }, + { + "epoch": 1.757436015679041, + "grad_norm": 1.6393742771356723, + "learning_rate": 7.951169966642757e-08, + "loss": 0.443767786026001, + "step": 7622 + }, + { + "epoch": 1.7576665898086234, + "grad_norm": 1.5258486167332923, + "learning_rate": 7.936278106787131e-08, + "loss": 0.3951075077056885, + "step": 7623 + }, + { + "epoch": 1.7578971639382062, + "grad_norm": 1.8216713225734935, + "learning_rate": 7.921399629326509e-08, + "loss": 0.44628477096557617, + "step": 7624 + }, + { + "epoch": 1.7581277380677887, + "grad_norm": 1.7421703870668572, + "learning_rate": 7.906534536423648e-08, + "loss": 0.38743889331817627, + "step": 7625 + }, + { + "epoch": 1.7583583121973714, + "grad_norm": 1.4726686928375068, + "learning_rate": 7.891682830239311e-08, + "loss": 0.4338032007217407, + "step": 7626 + }, + { + "epoch": 1.758588886326954, + "grad_norm": 1.7605246972541082, + "learning_rate": 7.876844512932367e-08, + "loss": 0.47387874126434326, + "step": 7627 + }, + { + "epoch": 1.7588194604565368, + "grad_norm": 1.6222674378421518, + "learning_rate": 7.86201958665973e-08, + "loss": 0.4082717299461365, + "step": 7628 + }, + { + "epoch": 1.7590500345861195, + "grad_norm": 1.462169761343313, + "learning_rate": 7.847208053576326e-08, + "loss": 0.4254682958126068, + "step": 7629 + }, + { + "epoch": 1.7592806087157022, + "grad_norm": 1.319688989297758, + "learning_rate": 7.832409915835181e-08, + "loss": 0.3572045564651489, + "step": 7630 + }, + { + "epoch": 1.7595111828452847, + "grad_norm": 1.398732808330898, + "learning_rate": 7.817625175587328e-08, + "loss": 0.39110279083251953, + "step": 7631 + }, + { + "epoch": 1.7597417569748675, + "grad_norm": 2.455493892116574, + "learning_rate": 7.802853834981926e-08, + "loss": 0.49292176961898804, + "step": 7632 + }, + { + "epoch": 1.75997233110445, + "grad_norm": 1.460109162216243, + "learning_rate": 7.78809589616608e-08, + "loss": 0.4271275997161865, + "step": 7633 + }, + { + "epoch": 1.7602029052340327, + "grad_norm": 1.5973984242111468, + "learning_rate": 7.77335136128503e-08, + "loss": 0.470772922039032, + "step": 7634 + }, + { + "epoch": 1.7604334793636154, + "grad_norm": 1.5415713448452681, + "learning_rate": 7.758620232482083e-08, + "loss": 0.4872988760471344, + "step": 7635 + }, + { + "epoch": 1.760664053493198, + "grad_norm": 1.2959777480648245, + "learning_rate": 7.743902511898492e-08, + "loss": 0.4300990104675293, + "step": 7636 + }, + { + "epoch": 1.7608946276227808, + "grad_norm": 1.4331560277043864, + "learning_rate": 7.729198201673682e-08, + "loss": 0.4524795711040497, + "step": 7637 + }, + { + "epoch": 1.7611252017523635, + "grad_norm": 1.580884966063861, + "learning_rate": 7.714507303945028e-08, + "loss": 0.4673241376876831, + "step": 7638 + }, + { + "epoch": 1.761355775881946, + "grad_norm": 1.7656151539321776, + "learning_rate": 7.699829820848048e-08, + "loss": 0.5171443223953247, + "step": 7639 + }, + { + "epoch": 1.7615863500115287, + "grad_norm": 1.5721911288259287, + "learning_rate": 7.68516575451621e-08, + "loss": 0.44416171312332153, + "step": 7640 + }, + { + "epoch": 1.7618169241411112, + "grad_norm": 1.8596688405579505, + "learning_rate": 7.670515107081122e-08, + "loss": 0.4456225633621216, + "step": 7641 + }, + { + "epoch": 1.762047498270694, + "grad_norm": 1.427384194238264, + "learning_rate": 7.65587788067239e-08, + "loss": 0.5235984921455383, + "step": 7642 + }, + { + "epoch": 1.7622780724002767, + "grad_norm": 1.5098894741733768, + "learning_rate": 7.641254077417702e-08, + "loss": 0.4957311749458313, + "step": 7643 + }, + { + "epoch": 1.7625086465298594, + "grad_norm": 1.9524483698152115, + "learning_rate": 7.626643699442748e-08, + "loss": 0.48401015996932983, + "step": 7644 + }, + { + "epoch": 1.762739220659442, + "grad_norm": 1.5925905896008645, + "learning_rate": 7.612046748871326e-08, + "loss": 0.5440249443054199, + "step": 7645 + }, + { + "epoch": 1.7629697947890248, + "grad_norm": 1.5363697612706335, + "learning_rate": 7.597463227825229e-08, + "loss": 0.3922181725502014, + "step": 7646 + }, + { + "epoch": 1.7632003689186073, + "grad_norm": 1.7121602067196948, + "learning_rate": 7.582893138424318e-08, + "loss": 0.4679541289806366, + "step": 7647 + }, + { + "epoch": 1.76343094304819, + "grad_norm": 1.63738592997542, + "learning_rate": 7.568336482786508e-08, + "loss": 0.4461076557636261, + "step": 7648 + }, + { + "epoch": 1.7636615171777725, + "grad_norm": 1.769800706819883, + "learning_rate": 7.553793263027752e-08, + "loss": 0.4028201997280121, + "step": 7649 + }, + { + "epoch": 1.7638920913073552, + "grad_norm": 1.6924130336118084, + "learning_rate": 7.53926348126206e-08, + "loss": 0.47307640314102173, + "step": 7650 + }, + { + "epoch": 1.764122665436938, + "grad_norm": 1.7236868707009407, + "learning_rate": 7.524747139601473e-08, + "loss": 0.4763333201408386, + "step": 7651 + }, + { + "epoch": 1.7643532395665207, + "grad_norm": 1.5475351462285587, + "learning_rate": 7.510244240156127e-08, + "loss": 0.5062815546989441, + "step": 7652 + }, + { + "epoch": 1.7645838136961034, + "grad_norm": 1.4648234779945293, + "learning_rate": 7.495754785034114e-08, + "loss": 0.38344740867614746, + "step": 7653 + }, + { + "epoch": 1.7648143878256861, + "grad_norm": 1.5630602768230752, + "learning_rate": 7.48127877634166e-08, + "loss": 0.36255425214767456, + "step": 7654 + }, + { + "epoch": 1.7650449619552686, + "grad_norm": 1.4144647369682326, + "learning_rate": 7.466816216182969e-08, + "loss": 0.4136468172073364, + "step": 7655 + }, + { + "epoch": 1.7652755360848513, + "grad_norm": 1.5589028620208925, + "learning_rate": 7.452367106660351e-08, + "loss": 0.4294041395187378, + "step": 7656 + }, + { + "epoch": 1.7655061102144338, + "grad_norm": 1.5271012787948486, + "learning_rate": 7.437931449874101e-08, + "loss": 0.3865356147289276, + "step": 7657 + }, + { + "epoch": 1.7657366843440165, + "grad_norm": 1.5355711497321805, + "learning_rate": 7.42350924792261e-08, + "loss": 0.44538289308547974, + "step": 7658 + }, + { + "epoch": 1.7659672584735993, + "grad_norm": 1.6285566114230512, + "learning_rate": 7.409100502902299e-08, + "loss": 0.4943844676017761, + "step": 7659 + }, + { + "epoch": 1.766197832603182, + "grad_norm": 1.759721404059002, + "learning_rate": 7.394705216907582e-08, + "loss": 0.41705092787742615, + "step": 7660 + }, + { + "epoch": 1.7664284067327647, + "grad_norm": 1.4175389623557053, + "learning_rate": 7.380323392031018e-08, + "loss": 0.4304206967353821, + "step": 7661 + }, + { + "epoch": 1.7666589808623474, + "grad_norm": 1.3933381760031749, + "learning_rate": 7.365955030363102e-08, + "loss": 0.4830179214477539, + "step": 7662 + }, + { + "epoch": 1.76688955499193, + "grad_norm": 1.51616499834235, + "learning_rate": 7.351600133992452e-08, + "loss": 0.47749078273773193, + "step": 7663 + }, + { + "epoch": 1.7671201291215126, + "grad_norm": 1.4074934707168656, + "learning_rate": 7.337258705005667e-08, + "loss": 0.3899204730987549, + "step": 7664 + }, + { + "epoch": 1.7673507032510951, + "grad_norm": 1.4123867126002758, + "learning_rate": 7.322930745487443e-08, + "loss": 0.4621524214744568, + "step": 7665 + }, + { + "epoch": 1.7675812773806778, + "grad_norm": 1.725639837898645, + "learning_rate": 7.308616257520506e-08, + "loss": 0.5305047035217285, + "step": 7666 + }, + { + "epoch": 1.7678118515102605, + "grad_norm": 2.1356750734168646, + "learning_rate": 7.294315243185578e-08, + "loss": 0.5894631147384644, + "step": 7667 + }, + { + "epoch": 1.7680424256398433, + "grad_norm": 1.5389151696841823, + "learning_rate": 7.280027704561498e-08, + "loss": 0.38509970903396606, + "step": 7668 + }, + { + "epoch": 1.768272999769426, + "grad_norm": 1.7309245548099654, + "learning_rate": 7.265753643725048e-08, + "loss": 0.45494410395622253, + "step": 7669 + }, + { + "epoch": 1.7685035738990087, + "grad_norm": 1.7035489800713894, + "learning_rate": 7.251493062751169e-08, + "loss": 0.4819248914718628, + "step": 7670 + }, + { + "epoch": 1.7687341480285912, + "grad_norm": 1.4325571648838293, + "learning_rate": 7.237245963712724e-08, + "loss": 0.43286386132240295, + "step": 7671 + }, + { + "epoch": 1.768964722158174, + "grad_norm": 1.3036122364237743, + "learning_rate": 7.223012348680724e-08, + "loss": 0.4285479187965393, + "step": 7672 + }, + { + "epoch": 1.7691952962877564, + "grad_norm": 1.6598071005655777, + "learning_rate": 7.208792219724124e-08, + "loss": 0.42678505182266235, + "step": 7673 + }, + { + "epoch": 1.7694258704173391, + "grad_norm": 1.647090361621967, + "learning_rate": 7.194585578909995e-08, + "loss": 0.47091686725616455, + "step": 7674 + }, + { + "epoch": 1.7696564445469218, + "grad_norm": 1.5115484466399114, + "learning_rate": 7.180392428303394e-08, + "loss": 0.41932445764541626, + "step": 7675 + }, + { + "epoch": 1.7698870186765046, + "grad_norm": 1.2463006271885857, + "learning_rate": 7.166212769967483e-08, + "loss": 0.4043616056442261, + "step": 7676 + }, + { + "epoch": 1.7701175928060873, + "grad_norm": 1.5310666660883137, + "learning_rate": 7.15204660596338e-08, + "loss": 0.395826518535614, + "step": 7677 + }, + { + "epoch": 1.7703481669356698, + "grad_norm": 1.4874807127430703, + "learning_rate": 7.13789393835027e-08, + "loss": 0.4684498906135559, + "step": 7678 + }, + { + "epoch": 1.7705787410652525, + "grad_norm": 1.8560085011670902, + "learning_rate": 7.12375476918542e-08, + "loss": 0.4713285565376282, + "step": 7679 + }, + { + "epoch": 1.770809315194835, + "grad_norm": 1.487262641155755, + "learning_rate": 7.109629100524073e-08, + "loss": 0.47559499740600586, + "step": 7680 + }, + { + "epoch": 1.7710398893244177, + "grad_norm": 1.5741914036439861, + "learning_rate": 7.095516934419554e-08, + "loss": 0.5364210605621338, + "step": 7681 + }, + { + "epoch": 1.7712704634540004, + "grad_norm": 1.942648846069337, + "learning_rate": 7.081418272923212e-08, + "loss": 0.5731894969940186, + "step": 7682 + }, + { + "epoch": 1.7715010375835831, + "grad_norm": 1.7006107903804015, + "learning_rate": 7.067333118084428e-08, + "loss": 0.4287458062171936, + "step": 7683 + }, + { + "epoch": 1.7717316117131658, + "grad_norm": 1.5575643616743255, + "learning_rate": 7.053261471950612e-08, + "loss": 0.3849913775920868, + "step": 7684 + }, + { + "epoch": 1.7719621858427486, + "grad_norm": 1.4243498094919005, + "learning_rate": 7.039203336567245e-08, + "loss": 0.4933156371116638, + "step": 7685 + }, + { + "epoch": 1.772192759972331, + "grad_norm": 1.897795122632639, + "learning_rate": 7.025158713977808e-08, + "loss": 0.5185002088546753, + "step": 7686 + }, + { + "epoch": 1.7724233341019138, + "grad_norm": 1.634847266537775, + "learning_rate": 7.011127606223799e-08, + "loss": 0.514995276927948, + "step": 7687 + }, + { + "epoch": 1.7726539082314963, + "grad_norm": 1.5845868665458605, + "learning_rate": 6.99711001534481e-08, + "loss": 0.4362761676311493, + "step": 7688 + }, + { + "epoch": 1.772884482361079, + "grad_norm": 1.699858455397738, + "learning_rate": 6.983105943378431e-08, + "loss": 0.44117432832717896, + "step": 7689 + }, + { + "epoch": 1.7731150564906617, + "grad_norm": 1.5875521204144505, + "learning_rate": 6.969115392360325e-08, + "loss": 0.4940808415412903, + "step": 7690 + }, + { + "epoch": 1.7733456306202444, + "grad_norm": 1.9046624573594293, + "learning_rate": 6.955138364324109e-08, + "loss": 0.4322758913040161, + "step": 7691 + }, + { + "epoch": 1.7735762047498271, + "grad_norm": 1.467450936859881, + "learning_rate": 6.941174861301536e-08, + "loss": 0.3867933750152588, + "step": 7692 + }, + { + "epoch": 1.7738067788794099, + "grad_norm": 1.6321329987514115, + "learning_rate": 6.927224885322302e-08, + "loss": 0.4380000829696655, + "step": 7693 + }, + { + "epoch": 1.7740373530089923, + "grad_norm": 1.7183023620516549, + "learning_rate": 6.913288438414222e-08, + "loss": 0.46499723196029663, + "step": 7694 + }, + { + "epoch": 1.774267927138575, + "grad_norm": 1.6625572218896962, + "learning_rate": 6.89936552260304e-08, + "loss": 0.4845675230026245, + "step": 7695 + }, + { + "epoch": 1.7744985012681576, + "grad_norm": 1.3920222388819354, + "learning_rate": 6.88545613991266e-08, + "loss": 0.3755526542663574, + "step": 7696 + }, + { + "epoch": 1.7747290753977403, + "grad_norm": 1.358162383242242, + "learning_rate": 6.871560292364887e-08, + "loss": 0.4765484929084778, + "step": 7697 + }, + { + "epoch": 1.774959649527323, + "grad_norm": 1.5701618596645643, + "learning_rate": 6.857677981979659e-08, + "loss": 0.4176154136657715, + "step": 7698 + }, + { + "epoch": 1.7751902236569057, + "grad_norm": 1.5881043143352427, + "learning_rate": 6.84380921077492e-08, + "loss": 0.410483717918396, + "step": 7699 + }, + { + "epoch": 1.7754207977864884, + "grad_norm": 1.876508092569716, + "learning_rate": 6.829953980766612e-08, + "loss": 0.5188060998916626, + "step": 7700 + }, + { + "epoch": 1.7756513719160711, + "grad_norm": 1.5514145308665186, + "learning_rate": 6.816112293968745e-08, + "loss": 0.47039783000946045, + "step": 7701 + }, + { + "epoch": 1.7758819460456536, + "grad_norm": 1.6296649452825585, + "learning_rate": 6.802284152393345e-08, + "loss": 0.5367648601531982, + "step": 7702 + }, + { + "epoch": 1.7761125201752364, + "grad_norm": 1.55513001656084, + "learning_rate": 6.78846955805048e-08, + "loss": 0.500449538230896, + "step": 7703 + }, + { + "epoch": 1.7763430943048188, + "grad_norm": 1.5060722099238588, + "learning_rate": 6.774668512948234e-08, + "loss": 0.4579819440841675, + "step": 7704 + }, + { + "epoch": 1.7765736684344016, + "grad_norm": 1.7824280377613644, + "learning_rate": 6.760881019092712e-08, + "loss": 0.41459107398986816, + "step": 7705 + }, + { + "epoch": 1.7768042425639843, + "grad_norm": 1.7900526752813857, + "learning_rate": 6.747107078488112e-08, + "loss": 0.46020573377609253, + "step": 7706 + }, + { + "epoch": 1.777034816693567, + "grad_norm": 1.7709884076088374, + "learning_rate": 6.733346693136566e-08, + "loss": 0.48069459199905396, + "step": 7707 + }, + { + "epoch": 1.7772653908231497, + "grad_norm": 1.4499402707441236, + "learning_rate": 6.719599865038328e-08, + "loss": 0.3514458239078522, + "step": 7708 + }, + { + "epoch": 1.7774959649527324, + "grad_norm": 1.7044500533180955, + "learning_rate": 6.705866596191601e-08, + "loss": 0.4696041941642761, + "step": 7709 + }, + { + "epoch": 1.777726539082315, + "grad_norm": 1.6058185659780073, + "learning_rate": 6.692146888592675e-08, + "loss": 0.45286083221435547, + "step": 7710 + }, + { + "epoch": 1.7779571132118976, + "grad_norm": 1.8525271361461533, + "learning_rate": 6.678440744235848e-08, + "loss": 0.4659677743911743, + "step": 7711 + }, + { + "epoch": 1.7781876873414801, + "grad_norm": 1.5770202034991272, + "learning_rate": 6.664748165113432e-08, + "loss": 0.4030906558036804, + "step": 7712 + }, + { + "epoch": 1.7784182614710629, + "grad_norm": 1.4781448065809968, + "learning_rate": 6.651069153215804e-08, + "loss": 0.4878493547439575, + "step": 7713 + }, + { + "epoch": 1.7786488356006456, + "grad_norm": 2.5716911461046115, + "learning_rate": 6.637403710531352e-08, + "loss": 0.4651924669742584, + "step": 7714 + }, + { + "epoch": 1.7788794097302283, + "grad_norm": 1.5268258649377473, + "learning_rate": 6.623751839046455e-08, + "loss": 0.37795954942703247, + "step": 7715 + }, + { + "epoch": 1.779109983859811, + "grad_norm": 1.8617699048987524, + "learning_rate": 6.610113540745577e-08, + "loss": 0.5722923278808594, + "step": 7716 + }, + { + "epoch": 1.7793405579893937, + "grad_norm": 2.039919155814789, + "learning_rate": 6.59648881761118e-08, + "loss": 0.46933984756469727, + "step": 7717 + }, + { + "epoch": 1.7795711321189762, + "grad_norm": 1.7692714186594531, + "learning_rate": 6.582877671623732e-08, + "loss": 0.5066707134246826, + "step": 7718 + }, + { + "epoch": 1.779801706248559, + "grad_norm": 1.5518843020711044, + "learning_rate": 6.569280104761787e-08, + "loss": 0.5064150094985962, + "step": 7719 + }, + { + "epoch": 1.7800322803781414, + "grad_norm": 1.4858522723338492, + "learning_rate": 6.555696119001853e-08, + "loss": 0.408633828163147, + "step": 7720 + }, + { + "epoch": 1.7802628545077241, + "grad_norm": 1.9460802080180855, + "learning_rate": 6.542125716318514e-08, + "loss": 0.4960691034793854, + "step": 7721 + }, + { + "epoch": 1.7804934286373069, + "grad_norm": 1.609433139750494, + "learning_rate": 6.528568898684373e-08, + "loss": 0.4275667071342468, + "step": 7722 + }, + { + "epoch": 1.7807240027668896, + "grad_norm": 1.5242191505097453, + "learning_rate": 6.515025668070062e-08, + "loss": 0.5309962630271912, + "step": 7723 + }, + { + "epoch": 1.7809545768964723, + "grad_norm": 1.3218748644597216, + "learning_rate": 6.501496026444197e-08, + "loss": 0.42067253589630127, + "step": 7724 + }, + { + "epoch": 1.781185151026055, + "grad_norm": 1.5205678956011466, + "learning_rate": 6.487979975773484e-08, + "loss": 0.43419337272644043, + "step": 7725 + }, + { + "epoch": 1.7814157251556375, + "grad_norm": 1.728456021255068, + "learning_rate": 6.474477518022592e-08, + "loss": 0.46563541889190674, + "step": 7726 + }, + { + "epoch": 1.7816462992852202, + "grad_norm": 1.2994636821353438, + "learning_rate": 6.460988655154232e-08, + "loss": 0.4233010411262512, + "step": 7727 + }, + { + "epoch": 1.7818768734148027, + "grad_norm": 1.5541073736247684, + "learning_rate": 6.447513389129155e-08, + "loss": 0.47119754552841187, + "step": 7728 + }, + { + "epoch": 1.7821074475443854, + "grad_norm": 1.7457851161988949, + "learning_rate": 6.434051721906142e-08, + "loss": 0.5227707624435425, + "step": 7729 + }, + { + "epoch": 1.7823380216739682, + "grad_norm": 1.6453844551794445, + "learning_rate": 6.42060365544198e-08, + "loss": 0.4521239399909973, + "step": 7730 + }, + { + "epoch": 1.7825685958035509, + "grad_norm": 1.5739071323130231, + "learning_rate": 6.407169191691464e-08, + "loss": 0.36693084239959717, + "step": 7731 + }, + { + "epoch": 1.7827991699331336, + "grad_norm": 1.9032214424835083, + "learning_rate": 6.393748332607463e-08, + "loss": 0.43610745668411255, + "step": 7732 + }, + { + "epoch": 1.7830297440627163, + "grad_norm": 1.4784257370105836, + "learning_rate": 6.380341080140794e-08, + "loss": 0.4471576511859894, + "step": 7733 + }, + { + "epoch": 1.7832603181922988, + "grad_norm": 1.61284007349941, + "learning_rate": 6.366947436240367e-08, + "loss": 0.48119011521339417, + "step": 7734 + }, + { + "epoch": 1.7834908923218815, + "grad_norm": 1.4393647934894105, + "learning_rate": 6.353567402853055e-08, + "loss": 0.44503623247146606, + "step": 7735 + }, + { + "epoch": 1.783721466451464, + "grad_norm": 1.3430253886827939, + "learning_rate": 6.340200981923804e-08, + "loss": 0.3350965678691864, + "step": 7736 + }, + { + "epoch": 1.7839520405810467, + "grad_norm": 1.4031838686370632, + "learning_rate": 6.326848175395572e-08, + "loss": 0.4814649224281311, + "step": 7737 + }, + { + "epoch": 1.7841826147106294, + "grad_norm": 1.3042254858214102, + "learning_rate": 6.313508985209281e-08, + "loss": 0.42114442586898804, + "step": 7738 + }, + { + "epoch": 1.7844131888402122, + "grad_norm": 1.4924201661244643, + "learning_rate": 6.30018341330396e-08, + "loss": 0.5044004917144775, + "step": 7739 + }, + { + "epoch": 1.7846437629697949, + "grad_norm": 1.7211591431218773, + "learning_rate": 6.286871461616594e-08, + "loss": 0.46084678173065186, + "step": 7740 + }, + { + "epoch": 1.7848743370993776, + "grad_norm": 1.8074380950640034, + "learning_rate": 6.273573132082222e-08, + "loss": 0.5159536600112915, + "step": 7741 + }, + { + "epoch": 1.78510491122896, + "grad_norm": 2.6340339816007394, + "learning_rate": 6.260288426633875e-08, + "loss": 0.4394105076789856, + "step": 7742 + }, + { + "epoch": 1.7853354853585428, + "grad_norm": 1.415651636415873, + "learning_rate": 6.247017347202643e-08, + "loss": 0.39798909425735474, + "step": 7743 + }, + { + "epoch": 1.7855660594881253, + "grad_norm": 1.439083218855293, + "learning_rate": 6.23375989571756e-08, + "loss": 0.3865649104118347, + "step": 7744 + }, + { + "epoch": 1.785796633617708, + "grad_norm": 1.3172940172138528, + "learning_rate": 6.220516074105808e-08, + "loss": 0.3641304671764374, + "step": 7745 + }, + { + "epoch": 1.7860272077472907, + "grad_norm": 1.7148086023867872, + "learning_rate": 6.207285884292468e-08, + "loss": 0.5025773644447327, + "step": 7746 + }, + { + "epoch": 1.7862577818768735, + "grad_norm": 1.5237733931532715, + "learning_rate": 6.194069328200669e-08, + "loss": 0.4289078414440155, + "step": 7747 + }, + { + "epoch": 1.7864883560064562, + "grad_norm": 1.5368409458369108, + "learning_rate": 6.180866407751595e-08, + "loss": 0.37442147731781006, + "step": 7748 + }, + { + "epoch": 1.7867189301360389, + "grad_norm": 1.6962674881863276, + "learning_rate": 6.167677124864412e-08, + "loss": 0.4975471794605255, + "step": 7749 + }, + { + "epoch": 1.7869495042656214, + "grad_norm": 1.7290797112616507, + "learning_rate": 6.154501481456331e-08, + "loss": 0.42754751443862915, + "step": 7750 + }, + { + "epoch": 1.787180078395204, + "grad_norm": 1.508949301788889, + "learning_rate": 6.141339479442542e-08, + "loss": 0.40203964710235596, + "step": 7751 + }, + { + "epoch": 1.7874106525247866, + "grad_norm": 1.6453479393381845, + "learning_rate": 6.128191120736293e-08, + "loss": 0.46465349197387695, + "step": 7752 + }, + { + "epoch": 1.7876412266543693, + "grad_norm": 1.527112166022553, + "learning_rate": 6.11505640724882e-08, + "loss": 0.43915730714797974, + "step": 7753 + }, + { + "epoch": 1.787871800783952, + "grad_norm": 1.6855929805801586, + "learning_rate": 6.101935340889419e-08, + "loss": 0.5205652713775635, + "step": 7754 + }, + { + "epoch": 1.7881023749135347, + "grad_norm": 1.8024849017160496, + "learning_rate": 6.088827923565321e-08, + "loss": 0.39400190114974976, + "step": 7755 + }, + { + "epoch": 1.7883329490431175, + "grad_norm": 1.585632228373493, + "learning_rate": 6.075734157181855e-08, + "loss": 0.48021531105041504, + "step": 7756 + }, + { + "epoch": 1.7885635231727002, + "grad_norm": 1.313118747015303, + "learning_rate": 6.062654043642334e-08, + "loss": 0.42780327796936035, + "step": 7757 + }, + { + "epoch": 1.7887940973022827, + "grad_norm": 1.5444008946931698, + "learning_rate": 6.049587584848059e-08, + "loss": 0.4307866096496582, + "step": 7758 + }, + { + "epoch": 1.7890246714318654, + "grad_norm": 1.8803266889221286, + "learning_rate": 6.036534782698377e-08, + "loss": 0.4258533716201782, + "step": 7759 + }, + { + "epoch": 1.7892552455614479, + "grad_norm": 1.7033971690196206, + "learning_rate": 6.02349563909067e-08, + "loss": 0.5159060955047607, + "step": 7760 + }, + { + "epoch": 1.7894858196910306, + "grad_norm": 1.4016246032179807, + "learning_rate": 6.0104701559203e-08, + "loss": 0.4407171308994293, + "step": 7761 + }, + { + "epoch": 1.7897163938206133, + "grad_norm": 1.4060175796774192, + "learning_rate": 5.99745833508063e-08, + "loss": 0.40273964405059814, + "step": 7762 + }, + { + "epoch": 1.789946967950196, + "grad_norm": 1.5929040194351833, + "learning_rate": 5.984460178463102e-08, + "loss": 0.42018163204193115, + "step": 7763 + }, + { + "epoch": 1.7901775420797787, + "grad_norm": 1.5421517490968868, + "learning_rate": 5.971475687957084e-08, + "loss": 0.519807755947113, + "step": 7764 + }, + { + "epoch": 1.7904081162093615, + "grad_norm": 1.4320196013314206, + "learning_rate": 5.9585048654500535e-08, + "loss": 0.42557477951049805, + "step": 7765 + }, + { + "epoch": 1.790638690338944, + "grad_norm": 1.520426042431449, + "learning_rate": 5.9455477128273924e-08, + "loss": 0.39568305015563965, + "step": 7766 + }, + { + "epoch": 1.7908692644685267, + "grad_norm": 1.566797519717712, + "learning_rate": 5.932604231972593e-08, + "loss": 0.43125781416893005, + "step": 7767 + }, + { + "epoch": 1.7910998385981092, + "grad_norm": 1.5764190405770546, + "learning_rate": 5.919674424767129e-08, + "loss": 0.46194958686828613, + "step": 7768 + }, + { + "epoch": 1.791330412727692, + "grad_norm": 1.3811294262508054, + "learning_rate": 5.906758293090441e-08, + "loss": 0.40115779638290405, + "step": 7769 + }, + { + "epoch": 1.7915609868572746, + "grad_norm": 1.4511176958262644, + "learning_rate": 5.893855838820061e-08, + "loss": 0.46589648723602295, + "step": 7770 + }, + { + "epoch": 1.7917915609868573, + "grad_norm": 1.4613820552852321, + "learning_rate": 5.880967063831455e-08, + "loss": 0.3540228605270386, + "step": 7771 + }, + { + "epoch": 1.79202213511644, + "grad_norm": 1.3900736631273891, + "learning_rate": 5.868091969998168e-08, + "loss": 0.4324638545513153, + "step": 7772 + }, + { + "epoch": 1.7922527092460228, + "grad_norm": 1.426811730253004, + "learning_rate": 5.855230559191693e-08, + "loss": 0.4301075339317322, + "step": 7773 + }, + { + "epoch": 1.7924832833756053, + "grad_norm": 1.4903234676277026, + "learning_rate": 5.842382833281612e-08, + "loss": 0.4496096670627594, + "step": 7774 + }, + { + "epoch": 1.792713857505188, + "grad_norm": 1.7119132871592322, + "learning_rate": 5.8295487941354195e-08, + "loss": 0.4554907977581024, + "step": 7775 + }, + { + "epoch": 1.7929444316347705, + "grad_norm": 1.6357284914311145, + "learning_rate": 5.816728443618701e-08, + "loss": 0.5020148158073425, + "step": 7776 + }, + { + "epoch": 1.7931750057643532, + "grad_norm": 1.5886767874513543, + "learning_rate": 5.803921783595045e-08, + "loss": 0.4073353409767151, + "step": 7777 + }, + { + "epoch": 1.793405579893936, + "grad_norm": 1.7806143022342438, + "learning_rate": 5.791128815925983e-08, + "loss": 0.4995894432067871, + "step": 7778 + }, + { + "epoch": 1.7936361540235186, + "grad_norm": 1.4290018525481676, + "learning_rate": 5.778349542471139e-08, + "loss": 0.5383706092834473, + "step": 7779 + }, + { + "epoch": 1.7938667281531013, + "grad_norm": 1.5928372327878688, + "learning_rate": 5.765583965088083e-08, + "loss": 0.4206235408782959, + "step": 7780 + }, + { + "epoch": 1.794097302282684, + "grad_norm": 1.516533597399375, + "learning_rate": 5.752832085632453e-08, + "loss": 0.49053555727005005, + "step": 7781 + }, + { + "epoch": 1.7943278764122665, + "grad_norm": 1.4761016261714877, + "learning_rate": 5.740093905957832e-08, + "loss": 0.4372660517692566, + "step": 7782 + }, + { + "epoch": 1.7945584505418493, + "grad_norm": 1.364372499711938, + "learning_rate": 5.727369427915851e-08, + "loss": 0.40125733613967896, + "step": 7783 + }, + { + "epoch": 1.7947890246714318, + "grad_norm": 1.5421908029736124, + "learning_rate": 5.714658653356153e-08, + "loss": 0.3595162034034729, + "step": 7784 + }, + { + "epoch": 1.7950195988010145, + "grad_norm": 1.4909078230640012, + "learning_rate": 5.7019615841263915e-08, + "loss": 0.42618101835250854, + "step": 7785 + }, + { + "epoch": 1.7952501729305972, + "grad_norm": 1.2890347032019704, + "learning_rate": 5.6892782220721694e-08, + "loss": 0.39135509729385376, + "step": 7786 + }, + { + "epoch": 1.79548074706018, + "grad_norm": 1.2930421412734876, + "learning_rate": 5.6766085690372004e-08, + "loss": 0.3792929947376251, + "step": 7787 + }, + { + "epoch": 1.7957113211897626, + "grad_norm": 2.137954515105217, + "learning_rate": 5.6639526268631e-08, + "loss": 0.5193231105804443, + "step": 7788 + }, + { + "epoch": 1.7959418953193451, + "grad_norm": 1.3992061535387368, + "learning_rate": 5.6513103973895415e-08, + "loss": 0.3896862268447876, + "step": 7789 + }, + { + "epoch": 1.7961724694489278, + "grad_norm": 1.6107653457361368, + "learning_rate": 5.638681882454211e-08, + "loss": 0.5345273017883301, + "step": 7790 + }, + { + "epoch": 1.7964030435785103, + "grad_norm": 1.597285051654587, + "learning_rate": 5.626067083892794e-08, + "loss": 0.4297627806663513, + "step": 7791 + }, + { + "epoch": 1.796633617708093, + "grad_norm": 1.8890048408663909, + "learning_rate": 5.6134660035389914e-08, + "loss": 0.3176969587802887, + "step": 7792 + }, + { + "epoch": 1.7968641918376758, + "grad_norm": 1.684652354437091, + "learning_rate": 5.600878643224471e-08, + "loss": 0.5449323654174805, + "step": 7793 + }, + { + "epoch": 1.7970947659672585, + "grad_norm": 1.3924882582172304, + "learning_rate": 5.588305004778959e-08, + "loss": 0.38096293807029724, + "step": 7794 + }, + { + "epoch": 1.7973253400968412, + "grad_norm": 1.6284420500901806, + "learning_rate": 5.575745090030137e-08, + "loss": 0.3917475938796997, + "step": 7795 + }, + { + "epoch": 1.797555914226424, + "grad_norm": 1.8012275849309003, + "learning_rate": 5.563198900803734e-08, + "loss": 0.41522616147994995, + "step": 7796 + }, + { + "epoch": 1.7977864883560064, + "grad_norm": 1.4000666419018515, + "learning_rate": 5.550666438923468e-08, + "loss": 0.46558207273483276, + "step": 7797 + }, + { + "epoch": 1.7980170624855891, + "grad_norm": 1.4562091239424864, + "learning_rate": 5.538147706211038e-08, + "loss": 0.43256324529647827, + "step": 7798 + }, + { + "epoch": 1.7982476366151716, + "grad_norm": 1.5167378404298808, + "learning_rate": 5.5256427044861666e-08, + "loss": 0.37302178144454956, + "step": 7799 + }, + { + "epoch": 1.7984782107447543, + "grad_norm": 1.7103098772379584, + "learning_rate": 5.5131514355666095e-08, + "loss": 0.5247504711151123, + "step": 7800 + }, + { + "epoch": 1.798708784874337, + "grad_norm": 1.3345270008803303, + "learning_rate": 5.5006739012680934e-08, + "loss": 0.3906348943710327, + "step": 7801 + }, + { + "epoch": 1.7989393590039198, + "grad_norm": 1.863821074304618, + "learning_rate": 5.488210103404345e-08, + "loss": 0.5293325185775757, + "step": 7802 + }, + { + "epoch": 1.7991699331335025, + "grad_norm": 1.8021445170106478, + "learning_rate": 5.4757600437871146e-08, + "loss": 0.4189381003379822, + "step": 7803 + }, + { + "epoch": 1.7994005072630852, + "grad_norm": 1.4161978936431723, + "learning_rate": 5.4633237242261207e-08, + "loss": 0.40476128458976746, + "step": 7804 + }, + { + "epoch": 1.7996310813926677, + "grad_norm": 1.6288403815954717, + "learning_rate": 5.45090114652913e-08, + "loss": 0.3908376097679138, + "step": 7805 + }, + { + "epoch": 1.7998616555222504, + "grad_norm": 1.4731211435711635, + "learning_rate": 5.438492312501885e-08, + "loss": 0.42332786321640015, + "step": 7806 + }, + { + "epoch": 1.800092229651833, + "grad_norm": 1.2492034971721793, + "learning_rate": 5.426097223948123e-08, + "loss": 0.3398321866989136, + "step": 7807 + }, + { + "epoch": 1.8003228037814156, + "grad_norm": 1.410970674481118, + "learning_rate": 5.413715882669623e-08, + "loss": 0.4610673189163208, + "step": 7808 + }, + { + "epoch": 1.8005533779109983, + "grad_norm": 1.4416956666235687, + "learning_rate": 5.401348290466112e-08, + "loss": 0.4149124026298523, + "step": 7809 + }, + { + "epoch": 1.800783952040581, + "grad_norm": 1.4475278396115219, + "learning_rate": 5.388994449135376e-08, + "loss": 0.47464168071746826, + "step": 7810 + }, + { + "epoch": 1.8010145261701638, + "grad_norm": 1.4581354291230397, + "learning_rate": 5.376654360473121e-08, + "loss": 0.4530913829803467, + "step": 7811 + }, + { + "epoch": 1.8012451002997465, + "grad_norm": 1.7198902838066041, + "learning_rate": 5.364328026273157e-08, + "loss": 0.5577078461647034, + "step": 7812 + }, + { + "epoch": 1.801475674429329, + "grad_norm": 1.828526033611825, + "learning_rate": 5.3520154483272075e-08, + "loss": 0.4772539436817169, + "step": 7813 + }, + { + "epoch": 1.8017062485589117, + "grad_norm": 1.690066578469317, + "learning_rate": 5.339716628425039e-08, + "loss": 0.5387610197067261, + "step": 7814 + }, + { + "epoch": 1.8019368226884942, + "grad_norm": 1.7130913599502742, + "learning_rate": 5.327431568354401e-08, + "loss": 0.4505125880241394, + "step": 7815 + }, + { + "epoch": 1.802167396818077, + "grad_norm": 1.5145450098970203, + "learning_rate": 5.3151602699010867e-08, + "loss": 0.43021589517593384, + "step": 7816 + }, + { + "epoch": 1.8023979709476596, + "grad_norm": 1.6184493194868252, + "learning_rate": 5.3029027348488244e-08, + "loss": 0.44107457995414734, + "step": 7817 + }, + { + "epoch": 1.8026285450772424, + "grad_norm": 1.6224833006548345, + "learning_rate": 5.2906589649793666e-08, + "loss": 0.42265504598617554, + "step": 7818 + }, + { + "epoch": 1.802859119206825, + "grad_norm": 1.3828256021454344, + "learning_rate": 5.2784289620724895e-08, + "loss": 0.4814263582229614, + "step": 7819 + }, + { + "epoch": 1.8030896933364078, + "grad_norm": 1.3840958899744187, + "learning_rate": 5.2662127279059275e-08, + "loss": 0.4255106747150421, + "step": 7820 + }, + { + "epoch": 1.8033202674659903, + "grad_norm": 1.3789211684549096, + "learning_rate": 5.2540102642554593e-08, + "loss": 0.43405312299728394, + "step": 7821 + }, + { + "epoch": 1.803550841595573, + "grad_norm": 1.5062041567676776, + "learning_rate": 5.2418215728948004e-08, + "loss": 0.3986097574234009, + "step": 7822 + }, + { + "epoch": 1.8037814157251555, + "grad_norm": 1.7653469724585684, + "learning_rate": 5.2296466555957205e-08, + "loss": 0.4988093972206116, + "step": 7823 + }, + { + "epoch": 1.8040119898547382, + "grad_norm": 1.6382094442265007, + "learning_rate": 5.217485514127973e-08, + "loss": 0.5290527939796448, + "step": 7824 + }, + { + "epoch": 1.804242563984321, + "grad_norm": 1.4794199807921353, + "learning_rate": 5.205338150259308e-08, + "loss": 0.3705815076828003, + "step": 7825 + }, + { + "epoch": 1.8044731381139036, + "grad_norm": 1.3872232407887637, + "learning_rate": 5.193204565755449e-08, + "loss": 0.37735384702682495, + "step": 7826 + }, + { + "epoch": 1.8047037122434864, + "grad_norm": 1.38875357732027, + "learning_rate": 5.1810847623801504e-08, + "loss": 0.39033758640289307, + "step": 7827 + }, + { + "epoch": 1.804934286373069, + "grad_norm": 1.5105458662939806, + "learning_rate": 5.168978741895147e-08, + "loss": 0.4669237732887268, + "step": 7828 + }, + { + "epoch": 1.8051648605026516, + "grad_norm": 1.6910832171163468, + "learning_rate": 5.156886506060154e-08, + "loss": 0.5178482532501221, + "step": 7829 + }, + { + "epoch": 1.8053954346322343, + "grad_norm": 1.4473544670706617, + "learning_rate": 5.14480805663291e-08, + "loss": 0.44134122133255005, + "step": 7830 + }, + { + "epoch": 1.8056260087618168, + "grad_norm": 1.5836257156251672, + "learning_rate": 5.132743395369144e-08, + "loss": 0.44371920824050903, + "step": 7831 + }, + { + "epoch": 1.8058565828913995, + "grad_norm": 1.513244295553376, + "learning_rate": 5.1206925240225964e-08, + "loss": 0.43268662691116333, + "step": 7832 + }, + { + "epoch": 1.8060871570209822, + "grad_norm": 1.736730853895812, + "learning_rate": 5.1086554443449445e-08, + "loss": 0.5035665035247803, + "step": 7833 + }, + { + "epoch": 1.806317731150565, + "grad_norm": 1.3694047806165788, + "learning_rate": 5.0966321580859336e-08, + "loss": 0.4987141191959381, + "step": 7834 + }, + { + "epoch": 1.8065483052801476, + "grad_norm": 1.816085685560109, + "learning_rate": 5.0846226669932437e-08, + "loss": 0.5951617956161499, + "step": 7835 + }, + { + "epoch": 1.8067788794097304, + "grad_norm": 1.464038827862328, + "learning_rate": 5.072626972812599e-08, + "loss": 0.4710814654827118, + "step": 7836 + }, + { + "epoch": 1.8070094535393129, + "grad_norm": 1.6196482413694708, + "learning_rate": 5.060645077287662e-08, + "loss": 0.5173348188400269, + "step": 7837 + }, + { + "epoch": 1.8072400276688956, + "grad_norm": 1.4170272466334293, + "learning_rate": 5.048676982160161e-08, + "loss": 0.49508416652679443, + "step": 7838 + }, + { + "epoch": 1.807470601798478, + "grad_norm": 1.7639395740589152, + "learning_rate": 5.03672268916977e-08, + "loss": 0.4535290598869324, + "step": 7839 + }, + { + "epoch": 1.8077011759280608, + "grad_norm": 1.7696762607003815, + "learning_rate": 5.024782200054145e-08, + "loss": 0.5337553024291992, + "step": 7840 + }, + { + "epoch": 1.8079317500576435, + "grad_norm": 1.6346280356935987, + "learning_rate": 5.012855516548986e-08, + "loss": 0.47118210792541504, + "step": 7841 + }, + { + "epoch": 1.8081623241872262, + "grad_norm": 1.504680600844573, + "learning_rate": 5.0009426403879283e-08, + "loss": 0.4458848237991333, + "step": 7842 + }, + { + "epoch": 1.808392898316809, + "grad_norm": 1.5297682575974059, + "learning_rate": 4.9890435733026536e-08, + "loss": 0.5055558681488037, + "step": 7843 + }, + { + "epoch": 1.8086234724463917, + "grad_norm": 1.4365609441585347, + "learning_rate": 4.9771583170228006e-08, + "loss": 0.43715038895606995, + "step": 7844 + }, + { + "epoch": 1.8088540465759742, + "grad_norm": 1.545411862707653, + "learning_rate": 4.96528687327602e-08, + "loss": 0.427906334400177, + "step": 7845 + }, + { + "epoch": 1.8090846207055569, + "grad_norm": 1.6703597275780244, + "learning_rate": 4.953429243787932e-08, + "loss": 0.48160994052886963, + "step": 7846 + }, + { + "epoch": 1.8093151948351394, + "grad_norm": 1.3261658854233023, + "learning_rate": 4.941585430282158e-08, + "loss": 0.40856754779815674, + "step": 7847 + }, + { + "epoch": 1.809545768964722, + "grad_norm": 1.3569384823756985, + "learning_rate": 4.929755434480354e-08, + "loss": 0.40482330322265625, + "step": 7848 + }, + { + "epoch": 1.8097763430943048, + "grad_norm": 1.530544362283251, + "learning_rate": 4.9179392581021e-08, + "loss": 0.4286755323410034, + "step": 7849 + }, + { + "epoch": 1.8100069172238875, + "grad_norm": 1.5805205551700128, + "learning_rate": 4.906136902864999e-08, + "loss": 0.4436051547527313, + "step": 7850 + }, + { + "epoch": 1.8102374913534702, + "grad_norm": 1.5320309451669083, + "learning_rate": 4.8943483704846465e-08, + "loss": 0.41794437170028687, + "step": 7851 + }, + { + "epoch": 1.810468065483053, + "grad_norm": 1.4506407579843814, + "learning_rate": 4.8825736626746384e-08, + "loss": 0.4308912754058838, + "step": 7852 + }, + { + "epoch": 1.8106986396126354, + "grad_norm": 1.5274898640972132, + "learning_rate": 4.870812781146516e-08, + "loss": 0.43090081214904785, + "step": 7853 + }, + { + "epoch": 1.8109292137422182, + "grad_norm": 1.3117483081436436, + "learning_rate": 4.859065727609857e-08, + "loss": 0.4329320192337036, + "step": 7854 + }, + { + "epoch": 1.8111597878718007, + "grad_norm": 1.266199300666261, + "learning_rate": 4.8473325037722276e-08, + "loss": 0.3162953853607178, + "step": 7855 + }, + { + "epoch": 1.8113903620013834, + "grad_norm": 1.4534333887380995, + "learning_rate": 4.835613111339165e-08, + "loss": 0.37513065338134766, + "step": 7856 + }, + { + "epoch": 1.811620936130966, + "grad_norm": 1.494207838495638, + "learning_rate": 4.823907552014195e-08, + "loss": 0.4120938181877136, + "step": 7857 + }, + { + "epoch": 1.8118515102605488, + "grad_norm": 1.555741011782435, + "learning_rate": 4.8122158274988555e-08, + "loss": 0.4295421242713928, + "step": 7858 + }, + { + "epoch": 1.8120820843901315, + "grad_norm": 1.4697042695976983, + "learning_rate": 4.8005379394926435e-08, + "loss": 0.44738203287124634, + "step": 7859 + }, + { + "epoch": 1.8123126585197142, + "grad_norm": 1.7388489283467792, + "learning_rate": 4.7888738896930456e-08, + "loss": 0.447609007358551, + "step": 7860 + }, + { + "epoch": 1.8125432326492967, + "grad_norm": 1.6367328188270214, + "learning_rate": 4.777223679795561e-08, + "loss": 0.38288167119026184, + "step": 7861 + }, + { + "epoch": 1.8127738067788794, + "grad_norm": 1.5566909994885838, + "learning_rate": 4.765587311493668e-08, + "loss": 0.5003981590270996, + "step": 7862 + }, + { + "epoch": 1.813004380908462, + "grad_norm": 1.5140425774804767, + "learning_rate": 4.7539647864788476e-08, + "loss": 0.5244492888450623, + "step": 7863 + }, + { + "epoch": 1.8132349550380447, + "grad_norm": 1.4098788698269693, + "learning_rate": 4.742356106440526e-08, + "loss": 0.505184531211853, + "step": 7864 + }, + { + "epoch": 1.8134655291676274, + "grad_norm": 2.493869291024891, + "learning_rate": 4.7307612730661636e-08, + "loss": 0.5364291071891785, + "step": 7865 + }, + { + "epoch": 1.81369610329721, + "grad_norm": 1.5655893218937025, + "learning_rate": 4.719180288041158e-08, + "loss": 0.4370742738246918, + "step": 7866 + }, + { + "epoch": 1.8139266774267928, + "grad_norm": 1.3233268572547954, + "learning_rate": 4.7076131530489505e-08, + "loss": 0.37784355878829956, + "step": 7867 + }, + { + "epoch": 1.8141572515563755, + "grad_norm": 1.6040150628213576, + "learning_rate": 4.6960598697709294e-08, + "loss": 0.5184513330459595, + "step": 7868 + }, + { + "epoch": 1.814387825685958, + "grad_norm": 1.6174173359265467, + "learning_rate": 4.6845204398864743e-08, + "loss": 0.41221511363983154, + "step": 7869 + }, + { + "epoch": 1.8146183998155407, + "grad_norm": 1.960596641519608, + "learning_rate": 4.672994865072965e-08, + "loss": 0.43040651082992554, + "step": 7870 + }, + { + "epoch": 1.8148489739451232, + "grad_norm": 1.887961823292038, + "learning_rate": 4.6614831470057625e-08, + "loss": 0.4681999385356903, + "step": 7871 + }, + { + "epoch": 1.815079548074706, + "grad_norm": 1.5463001442495705, + "learning_rate": 4.649985287358227e-08, + "loss": 0.49752098321914673, + "step": 7872 + }, + { + "epoch": 1.8153101222042887, + "grad_norm": 1.4528059880154254, + "learning_rate": 4.6385012878016663e-08, + "loss": 0.4621706008911133, + "step": 7873 + }, + { + "epoch": 1.8155406963338714, + "grad_norm": 1.339046035541834, + "learning_rate": 4.627031150005401e-08, + "loss": 0.4359724521636963, + "step": 7874 + }, + { + "epoch": 1.815771270463454, + "grad_norm": 1.4288119410903932, + "learning_rate": 4.6155748756367294e-08, + "loss": 0.4901214838027954, + "step": 7875 + }, + { + "epoch": 1.8160018445930368, + "grad_norm": 1.7234395975437273, + "learning_rate": 4.604132466360955e-08, + "loss": 0.5012428760528564, + "step": 7876 + }, + { + "epoch": 1.8162324187226193, + "grad_norm": 1.6768636456338364, + "learning_rate": 4.592703923841323e-08, + "loss": 0.5048446655273438, + "step": 7877 + }, + { + "epoch": 1.816462992852202, + "grad_norm": 1.5761086054200695, + "learning_rate": 4.5812892497390955e-08, + "loss": 0.5025140047073364, + "step": 7878 + }, + { + "epoch": 1.8166935669817845, + "grad_norm": 1.5593886228823222, + "learning_rate": 4.5698884457135324e-08, + "loss": 0.4456709623336792, + "step": 7879 + }, + { + "epoch": 1.8169241411113672, + "grad_norm": 1.4583950124069596, + "learning_rate": 4.5585015134218196e-08, + "loss": 0.38283586502075195, + "step": 7880 + }, + { + "epoch": 1.81715471524095, + "grad_norm": 1.5479198908902716, + "learning_rate": 4.5471284545192004e-08, + "loss": 0.3458648920059204, + "step": 7881 + }, + { + "epoch": 1.8173852893705327, + "grad_norm": 1.7126815699296334, + "learning_rate": 4.53576927065884e-08, + "loss": 0.4609532952308655, + "step": 7882 + }, + { + "epoch": 1.8176158635001154, + "grad_norm": 1.238404719965568, + "learning_rate": 4.524423963491919e-08, + "loss": 0.4250793159008026, + "step": 7883 + }, + { + "epoch": 1.817846437629698, + "grad_norm": 1.7276559977997992, + "learning_rate": 4.513092534667584e-08, + "loss": 0.41343796253204346, + "step": 7884 + }, + { + "epoch": 1.8180770117592806, + "grad_norm": 1.5863495927207087, + "learning_rate": 4.5017749858329736e-08, + "loss": 0.46575528383255005, + "step": 7885 + }, + { + "epoch": 1.8183075858888633, + "grad_norm": 1.7387493602059383, + "learning_rate": 4.4904713186332156e-08, + "loss": 0.47052180767059326, + "step": 7886 + }, + { + "epoch": 1.8185381600184458, + "grad_norm": 1.4938009961123744, + "learning_rate": 4.479181534711429e-08, + "loss": 0.42979568243026733, + "step": 7887 + }, + { + "epoch": 1.8187687341480285, + "grad_norm": 1.4298617258142596, + "learning_rate": 4.46790563570868e-08, + "loss": 0.4278537929058075, + "step": 7888 + }, + { + "epoch": 1.8189993082776112, + "grad_norm": 1.6571154898401685, + "learning_rate": 4.456643623264022e-08, + "loss": 0.45380616188049316, + "step": 7889 + }, + { + "epoch": 1.819229882407194, + "grad_norm": 1.6141969165708208, + "learning_rate": 4.445395499014526e-08, + "loss": 0.46085125207901, + "step": 7890 + }, + { + "epoch": 1.8194604565367767, + "grad_norm": 1.7363894486391924, + "learning_rate": 4.434161264595204e-08, + "loss": 0.47558531165122986, + "step": 7891 + }, + { + "epoch": 1.8196910306663594, + "grad_norm": 1.552212209885486, + "learning_rate": 4.4229409216390845e-08, + "loss": 0.42082321643829346, + "step": 7892 + }, + { + "epoch": 1.819921604795942, + "grad_norm": 1.6844917452185877, + "learning_rate": 4.411734471777129e-08, + "loss": 0.40222978591918945, + "step": 7893 + }, + { + "epoch": 1.8201521789255246, + "grad_norm": 1.7385505168528088, + "learning_rate": 4.400541916638323e-08, + "loss": 0.39737701416015625, + "step": 7894 + }, + { + "epoch": 1.820382753055107, + "grad_norm": 1.6976347614290264, + "learning_rate": 4.389363257849632e-08, + "loss": 0.46538835763931274, + "step": 7895 + }, + { + "epoch": 1.8206133271846898, + "grad_norm": 2.034464057065236, + "learning_rate": 4.378198497035979e-08, + "loss": 0.4994567036628723, + "step": 7896 + }, + { + "epoch": 1.8208439013142725, + "grad_norm": 1.517699554285521, + "learning_rate": 4.367047635820264e-08, + "loss": 0.4574298858642578, + "step": 7897 + }, + { + "epoch": 1.8210744754438553, + "grad_norm": 1.7361916973448048, + "learning_rate": 4.3559106758234044e-08, + "loss": 0.4716116786003113, + "step": 7898 + }, + { + "epoch": 1.821305049573438, + "grad_norm": 1.7495776361282012, + "learning_rate": 4.344787618664247e-08, + "loss": 0.35549741983413696, + "step": 7899 + }, + { + "epoch": 1.8215356237030205, + "grad_norm": 1.673931935617008, + "learning_rate": 4.3336784659596226e-08, + "loss": 0.44955599308013916, + "step": 7900 + }, + { + "epoch": 1.8217661978326032, + "grad_norm": 1.2588104675314307, + "learning_rate": 4.322583219324394e-08, + "loss": 0.4047467112541199, + "step": 7901 + }, + { + "epoch": 1.8219967719621857, + "grad_norm": 1.3892625958432285, + "learning_rate": 4.3115018803713596e-08, + "loss": 0.40367889404296875, + "step": 7902 + }, + { + "epoch": 1.8222273460917684, + "grad_norm": 1.3189968956301878, + "learning_rate": 4.3004344507113096e-08, + "loss": 0.32705235481262207, + "step": 7903 + }, + { + "epoch": 1.8224579202213511, + "grad_norm": 1.3777118561947166, + "learning_rate": 4.2893809319529794e-08, + "loss": 0.3845488727092743, + "step": 7904 + }, + { + "epoch": 1.8226884943509338, + "grad_norm": 1.4977030222677208, + "learning_rate": 4.2783413257031495e-08, + "loss": 0.49070197343826294, + "step": 7905 + }, + { + "epoch": 1.8229190684805165, + "grad_norm": 1.729181630904155, + "learning_rate": 4.267315633566493e-08, + "loss": 0.550437867641449, + "step": 7906 + }, + { + "epoch": 1.8231496426100993, + "grad_norm": 1.6119404797366197, + "learning_rate": 4.25630385714576e-08, + "loss": 0.5042926073074341, + "step": 7907 + }, + { + "epoch": 1.8233802167396818, + "grad_norm": 1.5956788246532367, + "learning_rate": 4.245305998041571e-08, + "loss": 0.48839205503463745, + "step": 7908 + }, + { + "epoch": 1.8236107908692645, + "grad_norm": 1.6028821186444346, + "learning_rate": 4.234322057852602e-08, + "loss": 0.4754030108451843, + "step": 7909 + }, + { + "epoch": 1.823841364998847, + "grad_norm": 1.5406282114264656, + "learning_rate": 4.223352038175487e-08, + "loss": 0.394174188375473, + "step": 7910 + }, + { + "epoch": 1.8240719391284297, + "grad_norm": 1.3144512253416945, + "learning_rate": 4.2123959406048183e-08, + "loss": 0.39882469177246094, + "step": 7911 + }, + { + "epoch": 1.8243025132580124, + "grad_norm": 1.3036980510979261, + "learning_rate": 4.201453766733176e-08, + "loss": 0.4611927270889282, + "step": 7912 + }, + { + "epoch": 1.8245330873875951, + "grad_norm": 1.3717750651706109, + "learning_rate": 4.190525518151122e-08, + "loss": 0.4164184331893921, + "step": 7913 + }, + { + "epoch": 1.8247636615171778, + "grad_norm": 1.7048234275294294, + "learning_rate": 4.179611196447186e-08, + "loss": 0.41586828231811523, + "step": 7914 + }, + { + "epoch": 1.8249942356467606, + "grad_norm": 1.486464242852147, + "learning_rate": 4.168710803207864e-08, + "loss": 0.4707748591899872, + "step": 7915 + }, + { + "epoch": 1.825224809776343, + "grad_norm": 1.6925426332325308, + "learning_rate": 4.157824340017657e-08, + "loss": 0.4235571622848511, + "step": 7916 + }, + { + "epoch": 1.8254553839059258, + "grad_norm": 1.5746767320284107, + "learning_rate": 4.146951808458998e-08, + "loss": 0.3761681914329529, + "step": 7917 + }, + { + "epoch": 1.8256859580355083, + "grad_norm": 1.9541083814793623, + "learning_rate": 4.136093210112346e-08, + "loss": 0.45545494556427, + "step": 7918 + }, + { + "epoch": 1.825916532165091, + "grad_norm": 1.4946968371557119, + "learning_rate": 4.1252485465561035e-08, + "loss": 0.4154251515865326, + "step": 7919 + }, + { + "epoch": 1.8261471062946737, + "grad_norm": 1.4442817043721163, + "learning_rate": 4.114417819366633e-08, + "loss": 0.3664330244064331, + "step": 7920 + }, + { + "epoch": 1.8263776804242564, + "grad_norm": 1.4915985489350694, + "learning_rate": 4.10360103011832e-08, + "loss": 0.4527730643749237, + "step": 7921 + }, + { + "epoch": 1.8266082545538391, + "grad_norm": 1.6683615123339999, + "learning_rate": 4.092798180383461e-08, + "loss": 0.5245767831802368, + "step": 7922 + }, + { + "epoch": 1.8268388286834218, + "grad_norm": 1.6122193238326974, + "learning_rate": 4.0820092717323894e-08, + "loss": 0.39781343936920166, + "step": 7923 + }, + { + "epoch": 1.8270694028130043, + "grad_norm": 1.592304216861808, + "learning_rate": 4.071234305733362e-08, + "loss": 0.4173957109451294, + "step": 7924 + }, + { + "epoch": 1.827299976942587, + "grad_norm": 1.7592031102615102, + "learning_rate": 4.0604732839526256e-08, + "loss": 0.38840869069099426, + "step": 7925 + }, + { + "epoch": 1.8275305510721696, + "grad_norm": 1.777360398097105, + "learning_rate": 4.0497262079544294e-08, + "loss": 0.4107547998428345, + "step": 7926 + }, + { + "epoch": 1.8277611252017523, + "grad_norm": 1.5475583296259725, + "learning_rate": 4.038993079300956e-08, + "loss": 0.41102874279022217, + "step": 7927 + }, + { + "epoch": 1.827991699331335, + "grad_norm": 1.4229533643496446, + "learning_rate": 4.028273899552381e-08, + "loss": 0.3393939733505249, + "step": 7928 + }, + { + "epoch": 1.8282222734609177, + "grad_norm": 1.4844610719466356, + "learning_rate": 4.017568670266835e-08, + "loss": 0.42469024658203125, + "step": 7929 + }, + { + "epoch": 1.8284528475905004, + "grad_norm": 1.316542585504155, + "learning_rate": 4.006877393000441e-08, + "loss": 0.4869099259376526, + "step": 7930 + }, + { + "epoch": 1.8286834217200831, + "grad_norm": 1.3905230120628338, + "learning_rate": 3.996200069307265e-08, + "loss": 0.4463779926300049, + "step": 7931 + }, + { + "epoch": 1.8289139958496656, + "grad_norm": 1.908726864953878, + "learning_rate": 3.985536700739378e-08, + "loss": 0.429579496383667, + "step": 7932 + }, + { + "epoch": 1.8291445699792483, + "grad_norm": 1.555687929117211, + "learning_rate": 3.9748872888468065e-08, + "loss": 0.38837558031082153, + "step": 7933 + }, + { + "epoch": 1.8293751441088308, + "grad_norm": 1.467502995951613, + "learning_rate": 3.964251835177568e-08, + "loss": 0.4444499909877777, + "step": 7934 + }, + { + "epoch": 1.8296057182384136, + "grad_norm": 1.5836026531003116, + "learning_rate": 3.953630341277603e-08, + "loss": 0.5216259360313416, + "step": 7935 + }, + { + "epoch": 1.8298362923679963, + "grad_norm": 1.316614330242316, + "learning_rate": 3.943022808690888e-08, + "loss": 0.46454817056655884, + "step": 7936 + }, + { + "epoch": 1.830066866497579, + "grad_norm": 1.5390661326727673, + "learning_rate": 3.9324292389593005e-08, + "loss": 0.38960570096969604, + "step": 7937 + }, + { + "epoch": 1.8302974406271617, + "grad_norm": 1.2960127878271992, + "learning_rate": 3.9218496336227426e-08, + "loss": 0.3318006992340088, + "step": 7938 + }, + { + "epoch": 1.8305280147567444, + "grad_norm": 1.501585055160058, + "learning_rate": 3.9112839942190725e-08, + "loss": 0.41555076837539673, + "step": 7939 + }, + { + "epoch": 1.830758588886327, + "grad_norm": 1.4035625255113318, + "learning_rate": 3.900732322284095e-08, + "loss": 0.4296320080757141, + "step": 7940 + }, + { + "epoch": 1.8309891630159096, + "grad_norm": 1.6738155247978692, + "learning_rate": 3.8901946193516055e-08, + "loss": 0.4416658282279968, + "step": 7941 + }, + { + "epoch": 1.8312197371454921, + "grad_norm": 1.885789179393057, + "learning_rate": 3.8796708869533676e-08, + "loss": 0.4539029598236084, + "step": 7942 + }, + { + "epoch": 1.8314503112750748, + "grad_norm": 1.4867619575158202, + "learning_rate": 3.869161126619136e-08, + "loss": 0.4526992440223694, + "step": 7943 + }, + { + "epoch": 1.8316808854046576, + "grad_norm": 1.5927522884216676, + "learning_rate": 3.8586653398765766e-08, + "loss": 0.3991963863372803, + "step": 7944 + }, + { + "epoch": 1.8319114595342403, + "grad_norm": 1.4460483349984772, + "learning_rate": 3.848183528251381e-08, + "loss": 0.44474589824676514, + "step": 7945 + }, + { + "epoch": 1.832142033663823, + "grad_norm": 1.7969739964524274, + "learning_rate": 3.837715693267174e-08, + "loss": 0.5022028684616089, + "step": 7946 + }, + { + "epoch": 1.8323726077934057, + "grad_norm": 1.6274178723126447, + "learning_rate": 3.8272618364455836e-08, + "loss": 0.4839058518409729, + "step": 7947 + }, + { + "epoch": 1.8326031819229882, + "grad_norm": 1.7924980398771633, + "learning_rate": 3.8168219593061376e-08, + "loss": 0.3580874800682068, + "step": 7948 + }, + { + "epoch": 1.832833756052571, + "grad_norm": 1.6096517551702718, + "learning_rate": 3.806396063366424e-08, + "loss": 0.4350799024105072, + "step": 7949 + }, + { + "epoch": 1.8330643301821534, + "grad_norm": 1.3546161389632028, + "learning_rate": 3.79598415014194e-08, + "loss": 0.4386145770549774, + "step": 7950 + }, + { + "epoch": 1.8332949043117361, + "grad_norm": 1.4421267919386862, + "learning_rate": 3.785586221146142e-08, + "loss": 0.5122627019882202, + "step": 7951 + }, + { + "epoch": 1.8335254784413189, + "grad_norm": 1.3507016201924953, + "learning_rate": 3.77520227789051e-08, + "loss": 0.41197121143341064, + "step": 7952 + }, + { + "epoch": 1.8337560525709016, + "grad_norm": 1.7729553069577912, + "learning_rate": 3.764832321884426e-08, + "loss": 0.5508084297180176, + "step": 7953 + }, + { + "epoch": 1.8339866267004843, + "grad_norm": 1.3788371713361898, + "learning_rate": 3.754476354635283e-08, + "loss": 0.40791934728622437, + "step": 7954 + }, + { + "epoch": 1.834217200830067, + "grad_norm": 1.4693932480728087, + "learning_rate": 3.7441343776484113e-08, + "loss": 0.3880457878112793, + "step": 7955 + }, + { + "epoch": 1.8344477749596495, + "grad_norm": 1.4561569110121497, + "learning_rate": 3.7338063924271304e-08, + "loss": 0.40519118309020996, + "step": 7956 + }, + { + "epoch": 1.8346783490892322, + "grad_norm": 1.4799489730655653, + "learning_rate": 3.723492400472716e-08, + "loss": 0.46081095933914185, + "step": 7957 + }, + { + "epoch": 1.8349089232188147, + "grad_norm": 1.3167338346767847, + "learning_rate": 3.713192403284438e-08, + "loss": 0.3946321904659271, + "step": 7958 + }, + { + "epoch": 1.8351394973483974, + "grad_norm": 1.743632986191688, + "learning_rate": 3.702906402359474e-08, + "loss": 0.4699859023094177, + "step": 7959 + }, + { + "epoch": 1.8353700714779801, + "grad_norm": 1.4691817330554993, + "learning_rate": 3.692634399192995e-08, + "loss": 0.43031781911849976, + "step": 7960 + }, + { + "epoch": 1.8356006456075629, + "grad_norm": 1.5694622813964751, + "learning_rate": 3.6823763952781636e-08, + "loss": 0.4072418212890625, + "step": 7961 + }, + { + "epoch": 1.8358312197371456, + "grad_norm": 1.7009922761684866, + "learning_rate": 3.672132392106053e-08, + "loss": 0.40659528970718384, + "step": 7962 + }, + { + "epoch": 1.8360617938667283, + "grad_norm": 1.2845193385628964, + "learning_rate": 3.661902391165772e-08, + "loss": 0.41279205679893494, + "step": 7963 + }, + { + "epoch": 1.8362923679963108, + "grad_norm": 1.407521764327922, + "learning_rate": 3.65168639394432e-08, + "loss": 0.43887826800346375, + "step": 7964 + }, + { + "epoch": 1.8365229421258935, + "grad_norm": 1.585883988281566, + "learning_rate": 3.6414844019267196e-08, + "loss": 0.46111762523651123, + "step": 7965 + }, + { + "epoch": 1.836753516255476, + "grad_norm": 1.5089060420061358, + "learning_rate": 3.63129641659593e-08, + "loss": 0.42694801092147827, + "step": 7966 + }, + { + "epoch": 1.8369840903850587, + "grad_norm": 1.563222995065882, + "learning_rate": 3.6211224394328775e-08, + "loss": 0.4674855172634125, + "step": 7967 + }, + { + "epoch": 1.8372146645146414, + "grad_norm": 1.6612957725595774, + "learning_rate": 3.610962471916435e-08, + "loss": 0.48998844623565674, + "step": 7968 + }, + { + "epoch": 1.8374452386442242, + "grad_norm": 1.517118505836267, + "learning_rate": 3.600816515523486e-08, + "loss": 0.4162273406982422, + "step": 7969 + }, + { + "epoch": 1.8376758127738069, + "grad_norm": 1.6498845355681542, + "learning_rate": 3.5906845717288304e-08, + "loss": 0.4446166753768921, + "step": 7970 + }, + { + "epoch": 1.8379063869033896, + "grad_norm": 1.6723175784368125, + "learning_rate": 3.580566642005245e-08, + "loss": 0.4782527983188629, + "step": 7971 + }, + { + "epoch": 1.838136961032972, + "grad_norm": 1.667138689471541, + "learning_rate": 3.570462727823476e-08, + "loss": 0.43014609813690186, + "step": 7972 + }, + { + "epoch": 1.8383675351625548, + "grad_norm": 1.5808858327085533, + "learning_rate": 3.560372830652225e-08, + "loss": 0.5155357122421265, + "step": 7973 + }, + { + "epoch": 1.8385981092921373, + "grad_norm": 1.4181681095350445, + "learning_rate": 3.5502969519581984e-08, + "loss": 0.4231104254722595, + "step": 7974 + }, + { + "epoch": 1.83882868342172, + "grad_norm": 1.8426199170185766, + "learning_rate": 3.540235093205979e-08, + "loss": 0.529877245426178, + "step": 7975 + }, + { + "epoch": 1.8390592575513027, + "grad_norm": 1.5632800597633676, + "learning_rate": 3.530187255858186e-08, + "loss": 0.4841991662979126, + "step": 7976 + }, + { + "epoch": 1.8392898316808854, + "grad_norm": 1.5770240615602402, + "learning_rate": 3.520153441375362e-08, + "loss": 0.40202534198760986, + "step": 7977 + }, + { + "epoch": 1.8395204058104682, + "grad_norm": 1.4104759549786023, + "learning_rate": 3.51013365121603e-08, + "loss": 0.398551344871521, + "step": 7978 + }, + { + "epoch": 1.8397509799400509, + "grad_norm": 1.5102819529399165, + "learning_rate": 3.500127886836668e-08, + "loss": 0.49139225482940674, + "step": 7979 + }, + { + "epoch": 1.8399815540696334, + "grad_norm": 1.7659081046335245, + "learning_rate": 3.4901361496917135e-08, + "loss": 0.4708287715911865, + "step": 7980 + }, + { + "epoch": 1.840212128199216, + "grad_norm": 1.3491474153090526, + "learning_rate": 3.4801584412335714e-08, + "loss": 0.4174381494522095, + "step": 7981 + }, + { + "epoch": 1.8404427023287986, + "grad_norm": 1.6453019064878467, + "learning_rate": 3.470194762912593e-08, + "loss": 0.535778284072876, + "step": 7982 + }, + { + "epoch": 1.8406732764583813, + "grad_norm": 1.7228199406120377, + "learning_rate": 3.4602451161771186e-08, + "loss": 0.540034294128418, + "step": 7983 + }, + { + "epoch": 1.840903850587964, + "grad_norm": 1.794022377740068, + "learning_rate": 3.450309502473403e-08, + "loss": 0.4399121403694153, + "step": 7984 + }, + { + "epoch": 1.8411344247175467, + "grad_norm": 1.6932512977389786, + "learning_rate": 3.4403879232457134e-08, + "loss": 0.5011022686958313, + "step": 7985 + }, + { + "epoch": 1.8413649988471295, + "grad_norm": 1.580497796669037, + "learning_rate": 3.4304803799362405e-08, + "loss": 0.392477810382843, + "step": 7986 + }, + { + "epoch": 1.8415955729767122, + "grad_norm": 1.5439573803469637, + "learning_rate": 3.420586873985132e-08, + "loss": 0.4734686315059662, + "step": 7987 + }, + { + "epoch": 1.8418261471062947, + "grad_norm": 1.3285059669744466, + "learning_rate": 3.410707406830537e-08, + "loss": 0.37347573041915894, + "step": 7988 + }, + { + "epoch": 1.8420567212358774, + "grad_norm": 1.6328708193086845, + "learning_rate": 3.400841979908531e-08, + "loss": 0.38837599754333496, + "step": 7989 + }, + { + "epoch": 1.8422872953654599, + "grad_norm": 1.6277616294407593, + "learning_rate": 3.390990594653142e-08, + "loss": 0.38598424196243286, + "step": 7990 + }, + { + "epoch": 1.8425178694950426, + "grad_norm": 1.584379501910531, + "learning_rate": 3.381153252496371e-08, + "loss": 0.48508739471435547, + "step": 7991 + }, + { + "epoch": 1.8427484436246253, + "grad_norm": 1.609395355542375, + "learning_rate": 3.3713299548681736e-08, + "loss": 0.41946491599082947, + "step": 7992 + }, + { + "epoch": 1.842979017754208, + "grad_norm": 1.4959274640542461, + "learning_rate": 3.3615207031964744e-08, + "loss": 0.4803915023803711, + "step": 7993 + }, + { + "epoch": 1.8432095918837907, + "grad_norm": 1.3835076847275678, + "learning_rate": 3.351725498907143e-08, + "loss": 0.39463797211647034, + "step": 7994 + }, + { + "epoch": 1.8434401660133735, + "grad_norm": 1.5742658557245284, + "learning_rate": 3.341944343424008e-08, + "loss": 0.43345123529434204, + "step": 7995 + }, + { + "epoch": 1.843670740142956, + "grad_norm": 1.7826616989180466, + "learning_rate": 3.332177238168854e-08, + "loss": 0.5164570212364197, + "step": 7996 + }, + { + "epoch": 1.8439013142725387, + "grad_norm": 1.71354580792071, + "learning_rate": 3.322424184561445e-08, + "loss": 0.5313355922698975, + "step": 7997 + }, + { + "epoch": 1.8441318884021212, + "grad_norm": 1.901316143248936, + "learning_rate": 3.3126851840194815e-08, + "loss": 0.4488258361816406, + "step": 7998 + }, + { + "epoch": 1.8443624625317039, + "grad_norm": 1.479116299891256, + "learning_rate": 3.30296023795863e-08, + "loss": 0.5122581720352173, + "step": 7999 + }, + { + "epoch": 1.8445930366612866, + "grad_norm": 1.4735639536720297, + "learning_rate": 3.293249347792493e-08, + "loss": 0.4619610905647278, + "step": 8000 + }, + { + "epoch": 1.8448236107908693, + "grad_norm": 1.3540260330438945, + "learning_rate": 3.2835525149326636e-08, + "loss": 0.4214603006839752, + "step": 8001 + }, + { + "epoch": 1.845054184920452, + "grad_norm": 1.4074387483331625, + "learning_rate": 3.2738697407886485e-08, + "loss": 0.40279510617256165, + "step": 8002 + }, + { + "epoch": 1.8452847590500348, + "grad_norm": 1.4474967943141424, + "learning_rate": 3.264201026767977e-08, + "loss": 0.4797242283821106, + "step": 8003 + }, + { + "epoch": 1.8455153331796172, + "grad_norm": 1.3554973222515974, + "learning_rate": 3.254546374276057e-08, + "loss": 0.3833237588405609, + "step": 8004 + }, + { + "epoch": 1.8457459073092, + "grad_norm": 1.4594426546625732, + "learning_rate": 3.244905784716323e-08, + "loss": 0.41461342573165894, + "step": 8005 + }, + { + "epoch": 1.8459764814387825, + "grad_norm": 1.5177617199741877, + "learning_rate": 3.235279259490109e-08, + "loss": 0.592107892036438, + "step": 8006 + }, + { + "epoch": 1.8462070555683652, + "grad_norm": 1.684042887917187, + "learning_rate": 3.2256667999967405e-08, + "loss": 0.39025670289993286, + "step": 8007 + }, + { + "epoch": 1.846437629697948, + "grad_norm": 1.286539298720562, + "learning_rate": 3.2160684076334766e-08, + "loss": 0.40197378396987915, + "step": 8008 + }, + { + "epoch": 1.8466682038275306, + "grad_norm": 1.8155125046022762, + "learning_rate": 3.206484083795558e-08, + "loss": 0.4013815224170685, + "step": 8009 + }, + { + "epoch": 1.8468987779571133, + "grad_norm": 1.5762142363003944, + "learning_rate": 3.1969138298761356e-08, + "loss": 0.45386412739753723, + "step": 8010 + }, + { + "epoch": 1.8471293520866958, + "grad_norm": 1.8756892627173425, + "learning_rate": 3.187357647266353e-08, + "loss": 0.43034985661506653, + "step": 8011 + }, + { + "epoch": 1.8473599262162785, + "grad_norm": 1.6730495727197179, + "learning_rate": 3.177815537355322e-08, + "loss": 0.4346637725830078, + "step": 8012 + }, + { + "epoch": 1.847590500345861, + "grad_norm": 1.8461631710642654, + "learning_rate": 3.1682875015300535e-08, + "loss": 0.5203511118888855, + "step": 8013 + }, + { + "epoch": 1.8478210744754437, + "grad_norm": 1.5817324628827356, + "learning_rate": 3.1587735411755636e-08, + "loss": 0.37658393383026123, + "step": 8014 + }, + { + "epoch": 1.8480516486050265, + "grad_norm": 1.6304961028131815, + "learning_rate": 3.149273657674789e-08, + "loss": 0.5473518371582031, + "step": 8015 + }, + { + "epoch": 1.8482822227346092, + "grad_norm": 1.800633884327913, + "learning_rate": 3.1397878524086484e-08, + "loss": 0.5171597599983215, + "step": 8016 + }, + { + "epoch": 1.848512796864192, + "grad_norm": 1.585245081928725, + "learning_rate": 3.130316126755983e-08, + "loss": 0.46588706970214844, + "step": 8017 + }, + { + "epoch": 1.8487433709937746, + "grad_norm": 1.496582071882617, + "learning_rate": 3.1208584820936244e-08, + "loss": 0.5571366548538208, + "step": 8018 + }, + { + "epoch": 1.848973945123357, + "grad_norm": 1.5249372170069353, + "learning_rate": 3.111414919796318e-08, + "loss": 0.45803195238113403, + "step": 8019 + }, + { + "epoch": 1.8492045192529398, + "grad_norm": 1.4834943043987898, + "learning_rate": 3.1019854412367875e-08, + "loss": 0.4732629060745239, + "step": 8020 + }, + { + "epoch": 1.8494350933825223, + "grad_norm": 1.7625144420898597, + "learning_rate": 3.092570047785714e-08, + "loss": 0.5268767476081848, + "step": 8021 + }, + { + "epoch": 1.849665667512105, + "grad_norm": 1.5017810734056087, + "learning_rate": 3.0831687408117035e-08, + "loss": 0.5179537534713745, + "step": 8022 + }, + { + "epoch": 1.8498962416416878, + "grad_norm": 1.7406452748153565, + "learning_rate": 3.073781521681351e-08, + "loss": 0.5110389590263367, + "step": 8023 + }, + { + "epoch": 1.8501268157712705, + "grad_norm": 1.442631804804713, + "learning_rate": 3.064408391759154e-08, + "loss": 0.4078633189201355, + "step": 8024 + }, + { + "epoch": 1.8503573899008532, + "grad_norm": 1.6619024740283894, + "learning_rate": 3.055049352407624e-08, + "loss": 0.4632648229598999, + "step": 8025 + }, + { + "epoch": 1.850587964030436, + "grad_norm": 1.577432813868154, + "learning_rate": 3.0457044049871705e-08, + "loss": 0.41569265723228455, + "step": 8026 + }, + { + "epoch": 1.8508185381600184, + "grad_norm": 1.3795657287644, + "learning_rate": 3.036373550856186e-08, + "loss": 0.4105853736400604, + "step": 8027 + }, + { + "epoch": 1.8510491122896011, + "grad_norm": 1.6584799060214424, + "learning_rate": 3.027056791370996e-08, + "loss": 0.4415978789329529, + "step": 8028 + }, + { + "epoch": 1.8512796864191836, + "grad_norm": 1.571030596092026, + "learning_rate": 3.017754127885908e-08, + "loss": 0.3990614414215088, + "step": 8029 + }, + { + "epoch": 1.8515102605487663, + "grad_norm": 1.5323241652532567, + "learning_rate": 3.0084655617531376e-08, + "loss": 0.42349040508270264, + "step": 8030 + }, + { + "epoch": 1.851740834678349, + "grad_norm": 1.4436112405033301, + "learning_rate": 2.9991910943228725e-08, + "loss": 0.4687228798866272, + "step": 8031 + }, + { + "epoch": 1.8519714088079318, + "grad_norm": 1.91227305815919, + "learning_rate": 2.989930726943268e-08, + "loss": 0.6091229915618896, + "step": 8032 + }, + { + "epoch": 1.8522019829375145, + "grad_norm": 1.527659992048368, + "learning_rate": 2.980684460960381e-08, + "loss": 0.43401795625686646, + "step": 8033 + }, + { + "epoch": 1.8524325570670972, + "grad_norm": 1.521615388244922, + "learning_rate": 2.9714522977182688e-08, + "loss": 0.47280481457710266, + "step": 8034 + }, + { + "epoch": 1.8526631311966797, + "grad_norm": 1.6019291161476, + "learning_rate": 2.962234238558925e-08, + "loss": 0.5078729391098022, + "step": 8035 + }, + { + "epoch": 1.8528937053262624, + "grad_norm": 1.8353491661496104, + "learning_rate": 2.9530302848223e-08, + "loss": 0.4279085695743561, + "step": 8036 + }, + { + "epoch": 1.853124279455845, + "grad_norm": 1.4587208506754334, + "learning_rate": 2.9438404378462455e-08, + "loss": 0.3720093369483948, + "step": 8037 + }, + { + "epoch": 1.8533548535854276, + "grad_norm": 1.810026420285634, + "learning_rate": 2.934664698966627e-08, + "loss": 0.26778513193130493, + "step": 8038 + }, + { + "epoch": 1.8535854277150103, + "grad_norm": 1.569617242169025, + "learning_rate": 2.9255030695172324e-08, + "loss": 0.47606828808784485, + "step": 8039 + }, + { + "epoch": 1.853816001844593, + "grad_norm": 1.8330928647910023, + "learning_rate": 2.9163555508297632e-08, + "loss": 0.437153160572052, + "step": 8040 + }, + { + "epoch": 1.8540465759741758, + "grad_norm": 1.3219241142527494, + "learning_rate": 2.907222144233945e-08, + "loss": 0.408009797334671, + "step": 8041 + }, + { + "epoch": 1.8542771501037585, + "grad_norm": 1.3761080217774861, + "learning_rate": 2.8981028510573824e-08, + "loss": 0.3435688018798828, + "step": 8042 + }, + { + "epoch": 1.854507724233341, + "grad_norm": 1.881646492298394, + "learning_rate": 2.8889976726256705e-08, + "loss": 0.4829018712043762, + "step": 8043 + }, + { + "epoch": 1.8547382983629237, + "grad_norm": 1.5758694223281, + "learning_rate": 2.879906610262339e-08, + "loss": 0.44579288363456726, + "step": 8044 + }, + { + "epoch": 1.8549688724925062, + "grad_norm": 1.3922554430382053, + "learning_rate": 2.8708296652888764e-08, + "loss": 0.4952869415283203, + "step": 8045 + }, + { + "epoch": 1.855199446622089, + "grad_norm": 1.4450922871815606, + "learning_rate": 2.8617668390246818e-08, + "loss": 0.4870997965335846, + "step": 8046 + }, + { + "epoch": 1.8554300207516716, + "grad_norm": 1.5651252792966914, + "learning_rate": 2.8527181327871465e-08, + "loss": 0.5009135603904724, + "step": 8047 + }, + { + "epoch": 1.8556605948812543, + "grad_norm": 1.3977550991376733, + "learning_rate": 2.8436835478915954e-08, + "loss": 0.4837114214897156, + "step": 8048 + }, + { + "epoch": 1.855891169010837, + "grad_norm": 1.6474653449248091, + "learning_rate": 2.8346630856512897e-08, + "loss": 0.47955578565597534, + "step": 8049 + }, + { + "epoch": 1.8561217431404198, + "grad_norm": 1.705788106947518, + "learning_rate": 2.8256567473774363e-08, + "loss": 0.4882965385913849, + "step": 8050 + }, + { + "epoch": 1.8563523172700023, + "grad_norm": 1.5940097685845425, + "learning_rate": 2.8166645343792094e-08, + "loss": 0.4542367458343506, + "step": 8051 + }, + { + "epoch": 1.856582891399585, + "grad_norm": 1.5880265061576002, + "learning_rate": 2.8076864479637198e-08, + "loss": 0.4506416916847229, + "step": 8052 + }, + { + "epoch": 1.8568134655291675, + "grad_norm": 1.699970116686096, + "learning_rate": 2.798722489436012e-08, + "loss": 0.5043084025382996, + "step": 8053 + }, + { + "epoch": 1.8570440396587502, + "grad_norm": 1.397398070036947, + "learning_rate": 2.78977266009911e-08, + "loss": 0.3711032271385193, + "step": 8054 + }, + { + "epoch": 1.857274613788333, + "grad_norm": 1.3008294527362816, + "learning_rate": 2.7808369612539405e-08, + "loss": 0.33371198177337646, + "step": 8055 + }, + { + "epoch": 1.8575051879179156, + "grad_norm": 1.7364482681056421, + "learning_rate": 2.771915394199409e-08, + "loss": 0.5328178405761719, + "step": 8056 + }, + { + "epoch": 1.8577357620474984, + "grad_norm": 1.925308909381556, + "learning_rate": 2.7630079602323443e-08, + "loss": 0.4615975618362427, + "step": 8057 + }, + { + "epoch": 1.857966336177081, + "grad_norm": 1.506605490676224, + "learning_rate": 2.754114660647533e-08, + "loss": 0.4667460024356842, + "step": 8058 + }, + { + "epoch": 1.8581969103066636, + "grad_norm": 1.7246190337812906, + "learning_rate": 2.745235496737719e-08, + "loss": 0.483825147151947, + "step": 8059 + }, + { + "epoch": 1.8584274844362463, + "grad_norm": 1.7802094460466942, + "learning_rate": 2.736370469793592e-08, + "loss": 0.4376814365386963, + "step": 8060 + }, + { + "epoch": 1.8586580585658288, + "grad_norm": 1.4605341926622646, + "learning_rate": 2.7275195811037432e-08, + "loss": 0.4862465262413025, + "step": 8061 + }, + { + "epoch": 1.8588886326954115, + "grad_norm": 1.6497121576486102, + "learning_rate": 2.718682831954744e-08, + "loss": 0.48104172945022583, + "step": 8062 + }, + { + "epoch": 1.8591192068249942, + "grad_norm": 1.3643295104524422, + "learning_rate": 2.709860223631122e-08, + "loss": 0.43358030915260315, + "step": 8063 + }, + { + "epoch": 1.859349780954577, + "grad_norm": 1.3052220670178016, + "learning_rate": 2.701051757415307e-08, + "loss": 0.44614607095718384, + "step": 8064 + }, + { + "epoch": 1.8595803550841596, + "grad_norm": 1.8220525339474862, + "learning_rate": 2.6922574345877303e-08, + "loss": 0.49824249744415283, + "step": 8065 + }, + { + "epoch": 1.8598109292137424, + "grad_norm": 1.3314333068504594, + "learning_rate": 2.683477256426714e-08, + "loss": 0.39621901512145996, + "step": 8066 + }, + { + "epoch": 1.8600415033433249, + "grad_norm": 1.3391032368154236, + "learning_rate": 2.6747112242085478e-08, + "loss": 0.40166205167770386, + "step": 8067 + }, + { + "epoch": 1.8602720774729076, + "grad_norm": 1.720101921843303, + "learning_rate": 2.6659593392074575e-08, + "loss": 0.4249534606933594, + "step": 8068 + }, + { + "epoch": 1.86050265160249, + "grad_norm": 1.3203085704476971, + "learning_rate": 2.6572216026956473e-08, + "loss": 0.4015510678291321, + "step": 8069 + }, + { + "epoch": 1.8607332257320728, + "grad_norm": 1.8982655978960439, + "learning_rate": 2.6484980159432236e-08, + "loss": 0.4691264033317566, + "step": 8070 + }, + { + "epoch": 1.8609637998616555, + "grad_norm": 1.6363630573411998, + "learning_rate": 2.639788580218216e-08, + "loss": 0.5095053315162659, + "step": 8071 + }, + { + "epoch": 1.8611943739912382, + "grad_norm": 1.707433776183968, + "learning_rate": 2.6310932967866794e-08, + "loss": 0.4658794403076172, + "step": 8072 + }, + { + "epoch": 1.861424948120821, + "grad_norm": 1.7622547433521365, + "learning_rate": 2.622412166912513e-08, + "loss": 0.495827853679657, + "step": 8073 + }, + { + "epoch": 1.8616555222504036, + "grad_norm": 1.6584095706736666, + "learning_rate": 2.6137451918576413e-08, + "loss": 0.43652772903442383, + "step": 8074 + }, + { + "epoch": 1.8618860963799861, + "grad_norm": 1.410927084601702, + "learning_rate": 2.6050923728818784e-08, + "loss": 0.4636423587799072, + "step": 8075 + }, + { + "epoch": 1.8621166705095689, + "grad_norm": 1.6137478822178715, + "learning_rate": 2.5964537112430186e-08, + "loss": 0.4572441577911377, + "step": 8076 + }, + { + "epoch": 1.8623472446391514, + "grad_norm": 1.5268149737583054, + "learning_rate": 2.587829208196757e-08, + "loss": 0.4549320340156555, + "step": 8077 + }, + { + "epoch": 1.862577818768734, + "grad_norm": 1.4757300368438027, + "learning_rate": 2.5792188649967795e-08, + "loss": 0.46412795782089233, + "step": 8078 + }, + { + "epoch": 1.8628083928983168, + "grad_norm": 1.566100546942984, + "learning_rate": 2.570622682894652e-08, + "loss": 0.40059781074523926, + "step": 8079 + }, + { + "epoch": 1.8630389670278995, + "grad_norm": 1.8382248312833556, + "learning_rate": 2.5620406631399416e-08, + "loss": 0.5396246910095215, + "step": 8080 + }, + { + "epoch": 1.8632695411574822, + "grad_norm": 1.630240250521673, + "learning_rate": 2.553472806980128e-08, + "loss": 0.4793856143951416, + "step": 8081 + }, + { + "epoch": 1.863500115287065, + "grad_norm": 1.7081981493499068, + "learning_rate": 2.5449191156606264e-08, + "loss": 0.4428815543651581, + "step": 8082 + }, + { + "epoch": 1.8637306894166474, + "grad_norm": 1.3161952024113066, + "learning_rate": 2.5363795904248086e-08, + "loss": 0.4024256467819214, + "step": 8083 + }, + { + "epoch": 1.8639612635462302, + "grad_norm": 1.7334425937535092, + "learning_rate": 2.5278542325139818e-08, + "loss": 0.4868123531341553, + "step": 8084 + }, + { + "epoch": 1.8641918376758126, + "grad_norm": 1.8199560965911645, + "learning_rate": 2.519343043167399e-08, + "loss": 0.602108359336853, + "step": 8085 + }, + { + "epoch": 1.8644224118053954, + "grad_norm": 1.8527423308196338, + "learning_rate": 2.510846023622237e-08, + "loss": 0.4500008225440979, + "step": 8086 + }, + { + "epoch": 1.864652985934978, + "grad_norm": 1.4521386296534855, + "learning_rate": 2.502363175113642e-08, + "loss": 0.3894640803337097, + "step": 8087 + }, + { + "epoch": 1.8648835600645608, + "grad_norm": 1.471988486213167, + "learning_rate": 2.493894498874649e-08, + "loss": 0.4525550305843353, + "step": 8088 + }, + { + "epoch": 1.8651141341941435, + "grad_norm": 1.362693221908779, + "learning_rate": 2.485439996136296e-08, + "loss": 0.3908608555793762, + "step": 8089 + }, + { + "epoch": 1.8653447083237262, + "grad_norm": 1.5537540661666722, + "learning_rate": 2.4769996681275106e-08, + "loss": 0.4551984667778015, + "step": 8090 + }, + { + "epoch": 1.8655752824533087, + "grad_norm": 1.3331466559033927, + "learning_rate": 2.468573516075201e-08, + "loss": 0.34474045038223267, + "step": 8091 + }, + { + "epoch": 1.8658058565828914, + "grad_norm": 1.675344505563735, + "learning_rate": 2.4601615412041755e-08, + "loss": 0.41480594873428345, + "step": 8092 + }, + { + "epoch": 1.866036430712474, + "grad_norm": 1.6368782805002868, + "learning_rate": 2.4517637447372007e-08, + "loss": 0.5043104887008667, + "step": 8093 + }, + { + "epoch": 1.8662670048420567, + "grad_norm": 1.7139805676568358, + "learning_rate": 2.4433801278950007e-08, + "loss": 0.4467152953147888, + "step": 8094 + }, + { + "epoch": 1.8664975789716394, + "grad_norm": 1.5274424401661542, + "learning_rate": 2.4350106918962e-08, + "loss": 0.454445481300354, + "step": 8095 + }, + { + "epoch": 1.866728153101222, + "grad_norm": 1.5661075903861215, + "learning_rate": 2.426655437957392e-08, + "loss": 0.4639291763305664, + "step": 8096 + }, + { + "epoch": 1.8669587272308048, + "grad_norm": 1.6251687636184629, + "learning_rate": 2.418314367293084e-08, + "loss": 0.46178731322288513, + "step": 8097 + }, + { + "epoch": 1.8671893013603875, + "grad_norm": 1.5047265923361783, + "learning_rate": 2.4099874811157383e-08, + "loss": 0.43832290172576904, + "step": 8098 + }, + { + "epoch": 1.86741987548997, + "grad_norm": 1.569040322283118, + "learning_rate": 2.4016747806357652e-08, + "loss": 0.4586114287376404, + "step": 8099 + }, + { + "epoch": 1.8676504496195527, + "grad_norm": 1.403368540081911, + "learning_rate": 2.3933762670614978e-08, + "loss": 0.37975889444351196, + "step": 8100 + }, + { + "epoch": 1.8678810237491352, + "grad_norm": 1.6666819300781532, + "learning_rate": 2.3850919415992042e-08, + "loss": 0.4579748511314392, + "step": 8101 + }, + { + "epoch": 1.868111597878718, + "grad_norm": 1.5976733248377182, + "learning_rate": 2.3768218054530775e-08, + "loss": 0.5120238661766052, + "step": 8102 + }, + { + "epoch": 1.8683421720083007, + "grad_norm": 1.47865092584181, + "learning_rate": 2.3685658598253e-08, + "loss": 0.41514822840690613, + "step": 8103 + }, + { + "epoch": 1.8685727461378834, + "grad_norm": 1.6132937806442644, + "learning_rate": 2.360324105915934e-08, + "loss": 0.49480026960372925, + "step": 8104 + }, + { + "epoch": 1.868803320267466, + "grad_norm": 1.516759878457302, + "learning_rate": 2.352096544922999e-08, + "loss": 0.41115111112594604, + "step": 8105 + }, + { + "epoch": 1.8690338943970488, + "grad_norm": 1.8593225608723183, + "learning_rate": 2.3438831780424607e-08, + "loss": 0.44793501496315, + "step": 8106 + }, + { + "epoch": 1.8692644685266313, + "grad_norm": 2.087747863463927, + "learning_rate": 2.3356840064682305e-08, + "loss": 0.4197582006454468, + "step": 8107 + }, + { + "epoch": 1.869495042656214, + "grad_norm": 1.3708560469219937, + "learning_rate": 2.3274990313921218e-08, + "loss": 0.3654597997665405, + "step": 8108 + }, + { + "epoch": 1.8697256167857965, + "grad_norm": 1.6733057347639861, + "learning_rate": 2.319328254003927e-08, + "loss": 0.5105487704277039, + "step": 8109 + }, + { + "epoch": 1.8699561909153792, + "grad_norm": 1.6787548385436994, + "learning_rate": 2.3111716754913192e-08, + "loss": 0.5202287435531616, + "step": 8110 + }, + { + "epoch": 1.870186765044962, + "grad_norm": 1.5305524386936447, + "learning_rate": 2.303029297039949e-08, + "loss": 0.4475836753845215, + "step": 8111 + }, + { + "epoch": 1.8704173391745447, + "grad_norm": 1.579007380002247, + "learning_rate": 2.2949011198334144e-08, + "loss": 0.5010285973548889, + "step": 8112 + }, + { + "epoch": 1.8706479133041274, + "grad_norm": 1.4473541177707174, + "learning_rate": 2.286787145053204e-08, + "loss": 0.41949477791786194, + "step": 8113 + }, + { + "epoch": 1.87087848743371, + "grad_norm": 1.3276801089952157, + "learning_rate": 2.2786873738787738e-08, + "loss": 0.38505449891090393, + "step": 8114 + }, + { + "epoch": 1.8711090615632926, + "grad_norm": 1.8776948972547884, + "learning_rate": 2.2706018074875043e-08, + "loss": 0.4854990839958191, + "step": 8115 + }, + { + "epoch": 1.8713396356928753, + "grad_norm": 1.3982424394333428, + "learning_rate": 2.2625304470547336e-08, + "loss": 0.3846585154533386, + "step": 8116 + }, + { + "epoch": 1.8715702098224578, + "grad_norm": 1.7499321509858707, + "learning_rate": 2.2544732937537003e-08, + "loss": 0.48948657512664795, + "step": 8117 + }, + { + "epoch": 1.8718007839520405, + "grad_norm": 2.062408637955344, + "learning_rate": 2.2464303487555902e-08, + "loss": 0.5571197867393494, + "step": 8118 + }, + { + "epoch": 1.8720313580816232, + "grad_norm": 1.6301482456607912, + "learning_rate": 2.2384016132295345e-08, + "loss": 0.514819324016571, + "step": 8119 + }, + { + "epoch": 1.872261932211206, + "grad_norm": 1.5677432247071832, + "learning_rate": 2.230387088342589e-08, + "loss": 0.4411713182926178, + "step": 8120 + }, + { + "epoch": 1.8724925063407887, + "grad_norm": 1.4508146354194726, + "learning_rate": 2.2223867752597437e-08, + "loss": 0.4494340717792511, + "step": 8121 + }, + { + "epoch": 1.8727230804703712, + "grad_norm": 1.6205003929883524, + "learning_rate": 2.2144006751439236e-08, + "loss": 0.4186316132545471, + "step": 8122 + }, + { + "epoch": 1.8729536545999539, + "grad_norm": 1.5017815147990925, + "learning_rate": 2.2064287891560007e-08, + "loss": 0.45932692289352417, + "step": 8123 + }, + { + "epoch": 1.8731842287295364, + "grad_norm": 1.475598332139336, + "learning_rate": 2.1984711184547477e-08, + "loss": 0.4095005989074707, + "step": 8124 + }, + { + "epoch": 1.873414802859119, + "grad_norm": 1.4633944208901333, + "learning_rate": 2.1905276641969284e-08, + "loss": 0.3822292685508728, + "step": 8125 + }, + { + "epoch": 1.8736453769887018, + "grad_norm": 1.5993925787143786, + "learning_rate": 2.1825984275371633e-08, + "loss": 0.41837501525878906, + "step": 8126 + }, + { + "epoch": 1.8738759511182845, + "grad_norm": 1.6176173713553115, + "learning_rate": 2.1746834096280752e-08, + "loss": 0.3903341591358185, + "step": 8127 + }, + { + "epoch": 1.8741065252478672, + "grad_norm": 1.4079834631265329, + "learning_rate": 2.166782611620177e-08, + "loss": 0.4760533571243286, + "step": 8128 + }, + { + "epoch": 1.87433709937745, + "grad_norm": 1.4208864897990974, + "learning_rate": 2.1588960346619388e-08, + "loss": 0.43960827589035034, + "step": 8129 + }, + { + "epoch": 1.8745676735070325, + "grad_norm": 1.7654096006141957, + "learning_rate": 2.151023679899755e-08, + "loss": 0.47941142320632935, + "step": 8130 + }, + { + "epoch": 1.8747982476366152, + "grad_norm": 1.41048993466122, + "learning_rate": 2.143165548477943e-08, + "loss": 0.4467000961303711, + "step": 8131 + }, + { + "epoch": 1.8750288217661977, + "grad_norm": 1.4796609851220597, + "learning_rate": 2.1353216415387788e-08, + "loss": 0.42472416162490845, + "step": 8132 + }, + { + "epoch": 1.8752593958957804, + "grad_norm": 1.9200971165248846, + "learning_rate": 2.1274919602224273e-08, + "loss": 0.5127208232879639, + "step": 8133 + }, + { + "epoch": 1.875489970025363, + "grad_norm": 1.8325759046238386, + "learning_rate": 2.119676505667045e-08, + "loss": 0.5362575650215149, + "step": 8134 + }, + { + "epoch": 1.8757205441549458, + "grad_norm": 1.2983178226172876, + "learning_rate": 2.111875279008657e-08, + "loss": 0.4025413990020752, + "step": 8135 + }, + { + "epoch": 1.8759511182845285, + "grad_norm": 1.5647543555868217, + "learning_rate": 2.1040882813812667e-08, + "loss": 0.49126237630844116, + "step": 8136 + }, + { + "epoch": 1.8761816924141113, + "grad_norm": 1.64373423682739, + "learning_rate": 2.096315513916791e-08, + "loss": 0.40609198808670044, + "step": 8137 + }, + { + "epoch": 1.8764122665436938, + "grad_norm": 1.4881317882345182, + "learning_rate": 2.0885569777450707e-08, + "loss": 0.47826945781707764, + "step": 8138 + }, + { + "epoch": 1.8766428406732765, + "grad_norm": 1.4578062807690564, + "learning_rate": 2.0808126739939035e-08, + "loss": 0.39987948536872864, + "step": 8139 + }, + { + "epoch": 1.876873414802859, + "grad_norm": 1.6010627164873539, + "learning_rate": 2.0730826037890003e-08, + "loss": 0.5727471113204956, + "step": 8140 + }, + { + "epoch": 1.8771039889324417, + "grad_norm": 1.3737495035065335, + "learning_rate": 2.0653667682540066e-08, + "loss": 0.4772847294807434, + "step": 8141 + }, + { + "epoch": 1.8773345630620244, + "grad_norm": 1.54097710668183, + "learning_rate": 2.0576651685104697e-08, + "loss": 0.3258974552154541, + "step": 8142 + }, + { + "epoch": 1.8775651371916071, + "grad_norm": 1.4067173519179077, + "learning_rate": 2.049977805677938e-08, + "loss": 0.5220766067504883, + "step": 8143 + }, + { + "epoch": 1.8777957113211898, + "grad_norm": 1.2918102910413813, + "learning_rate": 2.0423046808738077e-08, + "loss": 0.39550334215164185, + "step": 8144 + }, + { + "epoch": 1.8780262854507725, + "grad_norm": 2.3983596335767334, + "learning_rate": 2.034645795213463e-08, + "loss": 0.4487137198448181, + "step": 8145 + }, + { + "epoch": 1.878256859580355, + "grad_norm": 1.3947776950768658, + "learning_rate": 2.0270011498102147e-08, + "loss": 0.3363339304924011, + "step": 8146 + }, + { + "epoch": 1.8784874337099378, + "grad_norm": 1.5333942075668883, + "learning_rate": 2.019370745775273e-08, + "loss": 0.5161975026130676, + "step": 8147 + }, + { + "epoch": 1.8787180078395203, + "grad_norm": 1.4587907721196531, + "learning_rate": 2.011754584217784e-08, + "loss": 0.359643816947937, + "step": 8148 + }, + { + "epoch": 1.878948581969103, + "grad_norm": 1.3696377552673178, + "learning_rate": 2.0041526662448625e-08, + "loss": 0.4472349286079407, + "step": 8149 + }, + { + "epoch": 1.8791791560986857, + "grad_norm": 1.6693442042315434, + "learning_rate": 1.9965649929615135e-08, + "loss": 0.40363550186157227, + "step": 8150 + }, + { + "epoch": 1.8794097302282684, + "grad_norm": 1.7598833036688746, + "learning_rate": 1.9889915654706656e-08, + "loss": 0.46063172817230225, + "step": 8151 + }, + { + "epoch": 1.8796403043578511, + "grad_norm": 1.6348416553504144, + "learning_rate": 1.981432384873205e-08, + "loss": 0.4478832483291626, + "step": 8152 + }, + { + "epoch": 1.8798708784874338, + "grad_norm": 1.7016857171242656, + "learning_rate": 1.9738874522679304e-08, + "loss": 0.3438538908958435, + "step": 8153 + }, + { + "epoch": 1.8801014526170163, + "grad_norm": 2.2031337611169435, + "learning_rate": 1.966356768751598e-08, + "loss": 0.6035101413726807, + "step": 8154 + }, + { + "epoch": 1.880332026746599, + "grad_norm": 1.6642481554824737, + "learning_rate": 1.958840335418832e-08, + "loss": 0.42533814907073975, + "step": 8155 + }, + { + "epoch": 1.8805626008761815, + "grad_norm": 1.5825430260849223, + "learning_rate": 1.9513381533622587e-08, + "loss": 0.4117417633533478, + "step": 8156 + }, + { + "epoch": 1.8807931750057643, + "grad_norm": 1.6218701576707837, + "learning_rate": 1.943850223672361e-08, + "loss": 0.4353973865509033, + "step": 8157 + }, + { + "epoch": 1.881023749135347, + "grad_norm": 1.5613174256794196, + "learning_rate": 1.9363765474376125e-08, + "loss": 0.46115410327911377, + "step": 8158 + }, + { + "epoch": 1.8812543232649297, + "grad_norm": 1.4415196194001674, + "learning_rate": 1.9289171257443782e-08, + "loss": 0.3851476311683655, + "step": 8159 + }, + { + "epoch": 1.8814848973945124, + "grad_norm": 1.5586436794771006, + "learning_rate": 1.921471959676957e-08, + "loss": 0.4786919355392456, + "step": 8160 + }, + { + "epoch": 1.8817154715240951, + "grad_norm": 1.6398537249529117, + "learning_rate": 1.914041050317583e-08, + "loss": 0.4427906274795532, + "step": 8161 + }, + { + "epoch": 1.8819460456536776, + "grad_norm": 1.495606046913042, + "learning_rate": 1.906624398746415e-08, + "loss": 0.37774696946144104, + "step": 8162 + }, + { + "epoch": 1.8821766197832603, + "grad_norm": 1.5733237369323263, + "learning_rate": 1.8992220060415343e-08, + "loss": 0.43793195486068726, + "step": 8163 + }, + { + "epoch": 1.8824071939128428, + "grad_norm": 1.2904039749569203, + "learning_rate": 1.8918338732789587e-08, + "loss": 0.3869394063949585, + "step": 8164 + }, + { + "epoch": 1.8826377680424256, + "grad_norm": 1.9325019962539283, + "learning_rate": 1.8844600015326283e-08, + "loss": 0.4963928461074829, + "step": 8165 + }, + { + "epoch": 1.8828683421720083, + "grad_norm": 1.5945637624217548, + "learning_rate": 1.8771003918743978e-08, + "loss": 0.45727187395095825, + "step": 8166 + }, + { + "epoch": 1.883098916301591, + "grad_norm": 1.8455372682093192, + "learning_rate": 1.8697550453740884e-08, + "loss": 0.4878919720649719, + "step": 8167 + }, + { + "epoch": 1.8833294904311737, + "grad_norm": 1.7826396913976752, + "learning_rate": 1.862423963099391e-08, + "loss": 0.5376998782157898, + "step": 8168 + }, + { + "epoch": 1.8835600645607564, + "grad_norm": 1.4765870494853872, + "learning_rate": 1.8551071461159638e-08, + "loss": 0.4534180760383606, + "step": 8169 + }, + { + "epoch": 1.883790638690339, + "grad_norm": 1.561114582514347, + "learning_rate": 1.847804595487379e-08, + "loss": 0.43389183282852173, + "step": 8170 + }, + { + "epoch": 1.8840212128199216, + "grad_norm": 1.535519375075225, + "learning_rate": 1.8405163122751532e-08, + "loss": 0.4833742678165436, + "step": 8171 + }, + { + "epoch": 1.8842517869495041, + "grad_norm": 1.622186588307033, + "learning_rate": 1.833242297538695e-08, + "loss": 0.49344220757484436, + "step": 8172 + }, + { + "epoch": 1.8844823610790868, + "grad_norm": 1.4984978840285303, + "learning_rate": 1.8259825523353478e-08, + "loss": 0.49290287494659424, + "step": 8173 + }, + { + "epoch": 1.8847129352086696, + "grad_norm": 1.3380486770022888, + "learning_rate": 1.8187370777204115e-08, + "loss": 0.3971661627292633, + "step": 8174 + }, + { + "epoch": 1.8849435093382523, + "grad_norm": 1.5640300636460862, + "learning_rate": 1.811505874747066e-08, + "loss": 0.4984559416770935, + "step": 8175 + }, + { + "epoch": 1.885174083467835, + "grad_norm": 1.5865101985098036, + "learning_rate": 1.804288944466459e-08, + "loss": 0.38448822498321533, + "step": 8176 + }, + { + "epoch": 1.8854046575974177, + "grad_norm": 1.9477188873182039, + "learning_rate": 1.7970862879276406e-08, + "loss": 0.5468838214874268, + "step": 8177 + }, + { + "epoch": 1.8856352317270002, + "grad_norm": 1.4768596083300787, + "learning_rate": 1.7898979061775844e-08, + "loss": 0.46132227778434753, + "step": 8178 + }, + { + "epoch": 1.885865805856583, + "grad_norm": 1.436520509516384, + "learning_rate": 1.782723800261199e-08, + "loss": 0.4636603593826294, + "step": 8179 + }, + { + "epoch": 1.8860963799861654, + "grad_norm": 1.5429934177783204, + "learning_rate": 1.7755639712213057e-08, + "loss": 0.5302075147628784, + "step": 8180 + }, + { + "epoch": 1.8863269541157481, + "grad_norm": 1.6563780466455296, + "learning_rate": 1.7684184200986718e-08, + "loss": 0.4817178249359131, + "step": 8181 + }, + { + "epoch": 1.8865575282453309, + "grad_norm": 1.4897334937072715, + "learning_rate": 1.7612871479319668e-08, + "loss": 0.4535263180732727, + "step": 8182 + }, + { + "epoch": 1.8867881023749136, + "grad_norm": 1.6029244875460678, + "learning_rate": 1.7541701557577837e-08, + "loss": 0.5260534286499023, + "step": 8183 + }, + { + "epoch": 1.8870186765044963, + "grad_norm": 1.4065276330082377, + "learning_rate": 1.7470674446106614e-08, + "loss": 0.4526366591453552, + "step": 8184 + }, + { + "epoch": 1.887249250634079, + "grad_norm": 1.663451618032215, + "learning_rate": 1.7399790155230632e-08, + "loss": 0.4721973240375519, + "step": 8185 + }, + { + "epoch": 1.8874798247636615, + "grad_norm": 1.6510288712519465, + "learning_rate": 1.7329048695253422e-08, + "loss": 0.4331268072128296, + "step": 8186 + }, + { + "epoch": 1.8877103988932442, + "grad_norm": 1.9623503418050199, + "learning_rate": 1.7258450076458097e-08, + "loss": 0.5175650119781494, + "step": 8187 + }, + { + "epoch": 1.8879409730228267, + "grad_norm": 1.3640756960267433, + "learning_rate": 1.718799430910678e-08, + "loss": 0.45537033677101135, + "step": 8188 + }, + { + "epoch": 1.8881715471524094, + "grad_norm": 1.540072753548263, + "learning_rate": 1.7117681403441054e-08, + "loss": 0.5055547952651978, + "step": 8189 + }, + { + "epoch": 1.8884021212819921, + "grad_norm": 1.5849214553434074, + "learning_rate": 1.7047511369681522e-08, + "loss": 0.45514553785324097, + "step": 8190 + }, + { + "epoch": 1.8886326954115749, + "grad_norm": 1.4821599822935887, + "learning_rate": 1.6977484218028136e-08, + "loss": 0.44227129220962524, + "step": 8191 + }, + { + "epoch": 1.8888632695411576, + "grad_norm": 1.7163429603820965, + "learning_rate": 1.690759995866009e-08, + "loss": 0.4916682839393616, + "step": 8192 + }, + { + "epoch": 1.8890938436707403, + "grad_norm": 1.8219225402151713, + "learning_rate": 1.683785860173559e-08, + "loss": 0.48626652359962463, + "step": 8193 + }, + { + "epoch": 1.8893244178003228, + "grad_norm": 1.491517373721971, + "learning_rate": 1.676826015739252e-08, + "loss": 0.39982378482818604, + "step": 8194 + }, + { + "epoch": 1.8895549919299055, + "grad_norm": 1.8710391095575285, + "learning_rate": 1.6698804635747576e-08, + "loss": 0.49218645691871643, + "step": 8195 + }, + { + "epoch": 1.889785566059488, + "grad_norm": 1.5127362254029266, + "learning_rate": 1.6629492046896897e-08, + "loss": 0.38896578550338745, + "step": 8196 + }, + { + "epoch": 1.8900161401890707, + "grad_norm": 1.5870268370960243, + "learning_rate": 1.6560322400915538e-08, + "loss": 0.4217762053012848, + "step": 8197 + }, + { + "epoch": 1.8902467143186534, + "grad_norm": 1.5231528042475502, + "learning_rate": 1.6491295707858343e-08, + "loss": 0.4020112156867981, + "step": 8198 + }, + { + "epoch": 1.8904772884482361, + "grad_norm": 2.1189678944561954, + "learning_rate": 1.6422411977758843e-08, + "loss": 0.4630794823169708, + "step": 8199 + }, + { + "epoch": 1.8907078625778189, + "grad_norm": 1.526138087578761, + "learning_rate": 1.6353671220629917e-08, + "loss": 0.3673272132873535, + "step": 8200 + }, + { + "epoch": 1.8909384367074016, + "grad_norm": 1.4930616058109705, + "learning_rate": 1.6285073446463903e-08, + "loss": 0.4677228331565857, + "step": 8201 + }, + { + "epoch": 1.891169010836984, + "grad_norm": 1.718939922651036, + "learning_rate": 1.621661866523216e-08, + "loss": 0.4532579183578491, + "step": 8202 + }, + { + "epoch": 1.8913995849665668, + "grad_norm": 1.4990742550855458, + "learning_rate": 1.6148306886885287e-08, + "loss": 0.3011256456375122, + "step": 8203 + }, + { + "epoch": 1.8916301590961493, + "grad_norm": 1.731114486954807, + "learning_rate": 1.6080138121352892e-08, + "loss": 0.43071651458740234, + "step": 8204 + }, + { + "epoch": 1.891860733225732, + "grad_norm": 1.4183554819693576, + "learning_rate": 1.6012112378544272e-08, + "loss": 0.3180675506591797, + "step": 8205 + }, + { + "epoch": 1.8920913073553147, + "grad_norm": 1.6038525214828652, + "learning_rate": 1.594422966834741e-08, + "loss": 0.35130774974823, + "step": 8206 + }, + { + "epoch": 1.8923218814848974, + "grad_norm": 1.388613528735296, + "learning_rate": 1.587649000062996e-08, + "loss": 0.4953269958496094, + "step": 8207 + }, + { + "epoch": 1.8925524556144802, + "grad_norm": 1.5668590048532676, + "learning_rate": 1.5808893385238388e-08, + "loss": 0.3713166415691376, + "step": 8208 + }, + { + "epoch": 1.8927830297440629, + "grad_norm": 1.4824855259294067, + "learning_rate": 1.5741439831998827e-08, + "loss": 0.4273546040058136, + "step": 8209 + }, + { + "epoch": 1.8930136038736454, + "grad_norm": 1.8212221910711959, + "learning_rate": 1.5674129350715994e-08, + "loss": 0.45312386751174927, + "step": 8210 + }, + { + "epoch": 1.893244178003228, + "grad_norm": 1.4687276423683582, + "learning_rate": 1.560696195117439e-08, + "loss": 0.40246695280075073, + "step": 8211 + }, + { + "epoch": 1.8934747521328106, + "grad_norm": 1.9323139227263069, + "learning_rate": 1.5539937643137325e-08, + "loss": 0.5229366421699524, + "step": 8212 + }, + { + "epoch": 1.8937053262623933, + "grad_norm": 1.4419033757005335, + "learning_rate": 1.5473056436347554e-08, + "loss": 0.43834251165390015, + "step": 8213 + }, + { + "epoch": 1.893935900391976, + "grad_norm": 1.5176292463299432, + "learning_rate": 1.540631834052697e-08, + "loss": 0.4423528015613556, + "step": 8214 + }, + { + "epoch": 1.8941664745215587, + "grad_norm": 1.6176606345399394, + "learning_rate": 1.5339723365376478e-08, + "loss": 0.49888452887535095, + "step": 8215 + }, + { + "epoch": 1.8943970486511414, + "grad_norm": 1.7422668701695732, + "learning_rate": 1.5273271520576448e-08, + "loss": 0.44023919105529785, + "step": 8216 + }, + { + "epoch": 1.8946276227807242, + "grad_norm": 1.5430241161700802, + "learning_rate": 1.5206962815786262e-08, + "loss": 0.4733201861381531, + "step": 8217 + }, + { + "epoch": 1.8948581969103067, + "grad_norm": 1.992567039765999, + "learning_rate": 1.5140797260644768e-08, + "loss": 0.5393285751342773, + "step": 8218 + }, + { + "epoch": 1.8950887710398894, + "grad_norm": 1.5439154792235448, + "learning_rate": 1.507477486476949e-08, + "loss": 0.4240071773529053, + "step": 8219 + }, + { + "epoch": 1.8953193451694719, + "grad_norm": 1.4272355688005478, + "learning_rate": 1.5008895637757647e-08, + "loss": 0.42983078956604004, + "step": 8220 + }, + { + "epoch": 1.8955499192990546, + "grad_norm": 1.470069283076572, + "learning_rate": 1.4943159589185462e-08, + "loss": 0.47513502836227417, + "step": 8221 + }, + { + "epoch": 1.8957804934286373, + "grad_norm": 1.49966428795426, + "learning_rate": 1.4877566728608293e-08, + "loss": 0.41938167810440063, + "step": 8222 + }, + { + "epoch": 1.89601106755822, + "grad_norm": 1.513306290399523, + "learning_rate": 1.4812117065560625e-08, + "loss": 0.44817137718200684, + "step": 8223 + }, + { + "epoch": 1.8962416416878027, + "grad_norm": 1.6563869108965783, + "learning_rate": 1.4746810609556292e-08, + "loss": 0.46840909123420715, + "step": 8224 + }, + { + "epoch": 1.8964722158173855, + "grad_norm": 1.4822882914533433, + "learning_rate": 1.4681647370088369e-08, + "loss": 0.377409964799881, + "step": 8225 + }, + { + "epoch": 1.896702789946968, + "grad_norm": 1.595495246407856, + "learning_rate": 1.4616627356628831e-08, + "loss": 0.41149425506591797, + "step": 8226 + }, + { + "epoch": 1.8969333640765507, + "grad_norm": 1.548113444870098, + "learning_rate": 1.455175057862923e-08, + "loss": 0.39183878898620605, + "step": 8227 + }, + { + "epoch": 1.8971639382061332, + "grad_norm": 1.3643453838150799, + "learning_rate": 1.448701704551969e-08, + "loss": 0.3629387617111206, + "step": 8228 + }, + { + "epoch": 1.8973945123357159, + "grad_norm": 1.6546771139251113, + "learning_rate": 1.4422426766710239e-08, + "loss": 0.4007713794708252, + "step": 8229 + }, + { + "epoch": 1.8976250864652986, + "grad_norm": 1.648419698601457, + "learning_rate": 1.4357979751589477e-08, + "loss": 0.42354586720466614, + "step": 8230 + }, + { + "epoch": 1.8978556605948813, + "grad_norm": 1.9683167812350795, + "learning_rate": 1.429367600952558e-08, + "loss": 0.5321829319000244, + "step": 8231 + }, + { + "epoch": 1.898086234724464, + "grad_norm": 1.5240649560541817, + "learning_rate": 1.4229515549865845e-08, + "loss": 0.4840988218784332, + "step": 8232 + }, + { + "epoch": 1.8983168088540465, + "grad_norm": 1.6587626955063286, + "learning_rate": 1.4165498381936369e-08, + "loss": 0.5006803870201111, + "step": 8233 + }, + { + "epoch": 1.8985473829836292, + "grad_norm": 1.855334923621547, + "learning_rate": 1.4101624515042821e-08, + "loss": 0.40582865476608276, + "step": 8234 + }, + { + "epoch": 1.8987779571132117, + "grad_norm": 1.6458084674224973, + "learning_rate": 1.4037893958469993e-08, + "loss": 0.38199514150619507, + "step": 8235 + }, + { + "epoch": 1.8990085312427945, + "grad_norm": 1.4513711417071327, + "learning_rate": 1.3974306721481699e-08, + "loss": 0.39234936237335205, + "step": 8236 + }, + { + "epoch": 1.8992391053723772, + "grad_norm": 1.661857153956049, + "learning_rate": 1.391086281332099e-08, + "loss": 0.42211759090423584, + "step": 8237 + }, + { + "epoch": 1.8994696795019599, + "grad_norm": 1.5171507269414566, + "learning_rate": 1.3847562243210043e-08, + "loss": 0.4519961476325989, + "step": 8238 + }, + { + "epoch": 1.8997002536315426, + "grad_norm": 1.618394005210342, + "learning_rate": 1.3784405020350276e-08, + "loss": 0.4795762896537781, + "step": 8239 + }, + { + "epoch": 1.8999308277611253, + "grad_norm": 1.5749927795923588, + "learning_rate": 1.3721391153922235e-08, + "loss": 0.4549542963504791, + "step": 8240 + }, + { + "epoch": 1.9001614018907078, + "grad_norm": 1.759482125374446, + "learning_rate": 1.3658520653085703e-08, + "loss": 0.5253233313560486, + "step": 8241 + }, + { + "epoch": 1.9003919760202905, + "grad_norm": 1.4274315163192688, + "learning_rate": 1.3595793526979371e-08, + "loss": 0.44850921630859375, + "step": 8242 + }, + { + "epoch": 1.900622550149873, + "grad_norm": 1.5448941620644567, + "learning_rate": 1.35332097847215e-08, + "loss": 0.4416281580924988, + "step": 8243 + }, + { + "epoch": 1.9008531242794557, + "grad_norm": 1.932595440608825, + "learning_rate": 1.3470769435409036e-08, + "loss": 0.5567417740821838, + "step": 8244 + }, + { + "epoch": 1.9010836984090385, + "grad_norm": 1.4810071060864598, + "learning_rate": 1.3408472488118383e-08, + "loss": 0.43554848432540894, + "step": 8245 + }, + { + "epoch": 1.9013142725386212, + "grad_norm": 1.6729713604736038, + "learning_rate": 1.3346318951905077e-08, + "loss": 0.4219995141029358, + "step": 8246 + }, + { + "epoch": 1.901544846668204, + "grad_norm": 1.5600368865419485, + "learning_rate": 1.328430883580367e-08, + "loss": 0.45862913131713867, + "step": 8247 + }, + { + "epoch": 1.9017754207977866, + "grad_norm": 1.5932092717655322, + "learning_rate": 1.3222442148828172e-08, + "loss": 0.5026064515113831, + "step": 8248 + }, + { + "epoch": 1.902005994927369, + "grad_norm": 1.6308659122795583, + "learning_rate": 1.316071889997139e-08, + "loss": 0.46948713064193726, + "step": 8249 + }, + { + "epoch": 1.9022365690569518, + "grad_norm": 1.5718314790268124, + "learning_rate": 1.3099139098205258e-08, + "loss": 0.4263686537742615, + "step": 8250 + }, + { + "epoch": 1.9024671431865343, + "grad_norm": 1.516002170215572, + "learning_rate": 1.3037702752481394e-08, + "loss": 0.4652191400527954, + "step": 8251 + }, + { + "epoch": 1.902697717316117, + "grad_norm": 1.553138573631746, + "learning_rate": 1.2976409871729987e-08, + "loss": 0.4918743371963501, + "step": 8252 + }, + { + "epoch": 1.9029282914456997, + "grad_norm": 1.4916920711393407, + "learning_rate": 1.2915260464860466e-08, + "loss": 0.5297696590423584, + "step": 8253 + }, + { + "epoch": 1.9031588655752825, + "grad_norm": 1.7049232652010609, + "learning_rate": 1.2854254540761722e-08, + "loss": 0.5320281982421875, + "step": 8254 + }, + { + "epoch": 1.9033894397048652, + "grad_norm": 1.6403951625522013, + "learning_rate": 1.2793392108301437e-08, + "loss": 0.4424601197242737, + "step": 8255 + }, + { + "epoch": 1.903620013834448, + "grad_norm": 1.7301429652605729, + "learning_rate": 1.2732673176326758e-08, + "loss": 0.4811365008354187, + "step": 8256 + }, + { + "epoch": 1.9038505879640304, + "grad_norm": 1.4707627617860477, + "learning_rate": 1.2672097753663624e-08, + "loss": 0.3744504451751709, + "step": 8257 + }, + { + "epoch": 1.904081162093613, + "grad_norm": 1.4178929694153364, + "learning_rate": 1.2611665849117326e-08, + "loss": 0.4703986644744873, + "step": 8258 + }, + { + "epoch": 1.9043117362231956, + "grad_norm": 1.7267205141598052, + "learning_rate": 1.255137747147228e-08, + "loss": 0.5431181192398071, + "step": 8259 + }, + { + "epoch": 1.9045423103527783, + "grad_norm": 1.8088892551764337, + "learning_rate": 1.2491232629492143e-08, + "loss": 0.5066450238227844, + "step": 8260 + }, + { + "epoch": 1.904772884482361, + "grad_norm": 1.4945728049455276, + "learning_rate": 1.2431231331919368e-08, + "loss": 0.4374620020389557, + "step": 8261 + }, + { + "epoch": 1.9050034586119438, + "grad_norm": 1.5574450804582989, + "learning_rate": 1.2371373587475753e-08, + "loss": 0.3628976345062256, + "step": 8262 + }, + { + "epoch": 1.9052340327415265, + "grad_norm": 1.6159357629155715, + "learning_rate": 1.231165940486234e-08, + "loss": 0.43471890687942505, + "step": 8263 + }, + { + "epoch": 1.9054646068711092, + "grad_norm": 1.4892272896008858, + "learning_rate": 1.2252088792759074e-08, + "loss": 0.5038785934448242, + "step": 8264 + }, + { + "epoch": 1.9056951810006917, + "grad_norm": 1.388813738509663, + "learning_rate": 1.2192661759825363e-08, + "loss": 0.44022035598754883, + "step": 8265 + }, + { + "epoch": 1.9059257551302744, + "grad_norm": 1.8473214990080156, + "learning_rate": 1.2133378314699294e-08, + "loss": 0.4924722909927368, + "step": 8266 + }, + { + "epoch": 1.906156329259857, + "grad_norm": 1.525292247487046, + "learning_rate": 1.2074238465998532e-08, + "loss": 0.3824247121810913, + "step": 8267 + }, + { + "epoch": 1.9063869033894396, + "grad_norm": 1.821466956277618, + "learning_rate": 1.2015242222319422e-08, + "loss": 0.47094473242759705, + "step": 8268 + }, + { + "epoch": 1.9066174775190223, + "grad_norm": 1.7313158547849, + "learning_rate": 1.1956389592237881e-08, + "loss": 0.5653735399246216, + "step": 8269 + }, + { + "epoch": 1.906848051648605, + "grad_norm": 1.7620428814203788, + "learning_rate": 1.1897680584308512e-08, + "loss": 0.4763476848602295, + "step": 8270 + }, + { + "epoch": 1.9070786257781878, + "grad_norm": 1.5194232107831984, + "learning_rate": 1.1839115207065487e-08, + "loss": 0.3845449686050415, + "step": 8271 + }, + { + "epoch": 1.9073091999077705, + "grad_norm": 1.5881713237890829, + "learning_rate": 1.1780693469021775e-08, + "loss": 0.43071988224983215, + "step": 8272 + }, + { + "epoch": 1.907539774037353, + "grad_norm": 1.4466344827167648, + "learning_rate": 1.172241537866947e-08, + "loss": 0.43860751390457153, + "step": 8273 + }, + { + "epoch": 1.9077703481669357, + "grad_norm": 1.7623171007667486, + "learning_rate": 1.1664280944480132e-08, + "loss": 0.5077678561210632, + "step": 8274 + }, + { + "epoch": 1.9080009222965182, + "grad_norm": 1.4297374268054954, + "learning_rate": 1.1606290174903888e-08, + "loss": 0.3832993805408478, + "step": 8275 + }, + { + "epoch": 1.908231496426101, + "grad_norm": 1.629527864713481, + "learning_rate": 1.1548443078370551e-08, + "loss": 0.48003530502319336, + "step": 8276 + }, + { + "epoch": 1.9084620705556836, + "grad_norm": 1.5503547776003848, + "learning_rate": 1.1490739663288618e-08, + "loss": 0.6109439134597778, + "step": 8277 + }, + { + "epoch": 1.9086926446852663, + "grad_norm": 1.9064677948637023, + "learning_rate": 1.1433179938045823e-08, + "loss": 0.4559859037399292, + "step": 8278 + }, + { + "epoch": 1.908923218814849, + "grad_norm": 1.4670877218502, + "learning_rate": 1.137576391100925e-08, + "loss": 0.3935600221157074, + "step": 8279 + }, + { + "epoch": 1.9091537929444318, + "grad_norm": 1.6460426557554972, + "learning_rate": 1.1318491590524782e-08, + "loss": 0.44477611780166626, + "step": 8280 + }, + { + "epoch": 1.9093843670740143, + "grad_norm": 1.652813391764361, + "learning_rate": 1.1261362984917533e-08, + "loss": 0.47065627574920654, + "step": 8281 + }, + { + "epoch": 1.909614941203597, + "grad_norm": 1.567401132156008, + "learning_rate": 1.1204378102491862e-08, + "loss": 0.44851434230804443, + "step": 8282 + }, + { + "epoch": 1.9098455153331795, + "grad_norm": 1.6119259284309502, + "learning_rate": 1.1147536951530923e-08, + "loss": 0.38606488704681396, + "step": 8283 + }, + { + "epoch": 1.9100760894627622, + "grad_norm": 1.7145601291142103, + "learning_rate": 1.1090839540297103e-08, + "loss": 0.5400182008743286, + "step": 8284 + }, + { + "epoch": 1.910306663592345, + "grad_norm": 1.5193110263706777, + "learning_rate": 1.1034285877032146e-08, + "loss": 0.4225059449672699, + "step": 8285 + }, + { + "epoch": 1.9105372377219276, + "grad_norm": 1.8787563951518915, + "learning_rate": 1.0977875969956584e-08, + "loss": 0.5111556649208069, + "step": 8286 + }, + { + "epoch": 1.9107678118515103, + "grad_norm": 1.583999151547768, + "learning_rate": 1.0921609827270196e-08, + "loss": 0.40596213936805725, + "step": 8287 + }, + { + "epoch": 1.910998385981093, + "grad_norm": 1.619272502884341, + "learning_rate": 1.0865487457151768e-08, + "loss": 0.47917360067367554, + "step": 8288 + }, + { + "epoch": 1.9112289601106756, + "grad_norm": 1.8556422558472565, + "learning_rate": 1.0809508867759331e-08, + "loss": 0.45154574513435364, + "step": 8289 + }, + { + "epoch": 1.9114595342402583, + "grad_norm": 1.7391028962680364, + "learning_rate": 1.0753674067229935e-08, + "loss": 0.5024373531341553, + "step": 8290 + }, + { + "epoch": 1.9116901083698408, + "grad_norm": 1.6003253992080113, + "learning_rate": 1.069798306367975e-08, + "loss": 0.5084686875343323, + "step": 8291 + }, + { + "epoch": 1.9119206824994235, + "grad_norm": 1.5906220140950642, + "learning_rate": 1.064243586520408e-08, + "loss": 0.3947920501232147, + "step": 8292 + }, + { + "epoch": 1.9121512566290062, + "grad_norm": 1.5037329879323602, + "learning_rate": 1.0587032479877023e-08, + "loss": 0.5011960864067078, + "step": 8293 + }, + { + "epoch": 1.912381830758589, + "grad_norm": 1.6116996984750152, + "learning_rate": 1.0531772915752247e-08, + "loss": 0.43622612953186035, + "step": 8294 + }, + { + "epoch": 1.9126124048881716, + "grad_norm": 1.664400790122745, + "learning_rate": 1.0476657180862325e-08, + "loss": 0.380764365196228, + "step": 8295 + }, + { + "epoch": 1.9128429790177544, + "grad_norm": 1.59176785573853, + "learning_rate": 1.042168528321874e-08, + "loss": 0.4183109700679779, + "step": 8296 + }, + { + "epoch": 1.9130735531473368, + "grad_norm": 1.7993335153125511, + "learning_rate": 1.036685723081221e-08, + "loss": 0.4221222698688507, + "step": 8297 + }, + { + "epoch": 1.9133041272769196, + "grad_norm": 1.7816315005923467, + "learning_rate": 1.0312173031612804e-08, + "loss": 0.543656051158905, + "step": 8298 + }, + { + "epoch": 1.913534701406502, + "grad_norm": 1.5681621709441897, + "learning_rate": 1.0257632693569052e-08, + "loss": 0.48872441053390503, + "step": 8299 + }, + { + "epoch": 1.9137652755360848, + "grad_norm": 1.5640812032082956, + "learning_rate": 1.0203236224609169e-08, + "loss": 0.5447995662689209, + "step": 8300 + }, + { + "epoch": 1.9139958496656675, + "grad_norm": 1.4954141524676323, + "learning_rate": 1.0148983632640162e-08, + "loss": 0.39448055624961853, + "step": 8301 + }, + { + "epoch": 1.9142264237952502, + "grad_norm": 1.755968676337724, + "learning_rate": 1.009487492554828e-08, + "loss": 0.44735193252563477, + "step": 8302 + }, + { + "epoch": 1.914456997924833, + "grad_norm": 1.6151813931913763, + "learning_rate": 1.0040910111198786e-08, + "loss": 0.4747859537601471, + "step": 8303 + }, + { + "epoch": 1.9146875720544156, + "grad_norm": 1.6130507888649155, + "learning_rate": 9.987089197435739e-09, + "loss": 0.5120220184326172, + "step": 8304 + }, + { + "epoch": 1.9149181461839981, + "grad_norm": 1.6267491510418168, + "learning_rate": 9.933412192082991e-09, + "loss": 0.3889455795288086, + "step": 8305 + }, + { + "epoch": 1.9151487203135809, + "grad_norm": 1.497355606160038, + "learning_rate": 9.879879102942635e-09, + "loss": 0.36584073305130005, + "step": 8306 + }, + { + "epoch": 1.9153792944431633, + "grad_norm": 2.0010610263228643, + "learning_rate": 9.826489937796556e-09, + "loss": 0.6259280443191528, + "step": 8307 + }, + { + "epoch": 1.915609868572746, + "grad_norm": 1.780257440356438, + "learning_rate": 9.773244704405104e-09, + "loss": 0.45160970091819763, + "step": 8308 + }, + { + "epoch": 1.9158404427023288, + "grad_norm": 1.559258218463348, + "learning_rate": 9.720143410508309e-09, + "loss": 0.47028589248657227, + "step": 8309 + }, + { + "epoch": 1.9160710168319115, + "grad_norm": 1.7146410364961069, + "learning_rate": 9.667186063824773e-09, + "loss": 0.3850802183151245, + "step": 8310 + }, + { + "epoch": 1.9163015909614942, + "grad_norm": 1.69252010891113, + "learning_rate": 9.614372672052451e-09, + "loss": 0.4134417772293091, + "step": 8311 + }, + { + "epoch": 1.916532165091077, + "grad_norm": 1.4197660481073355, + "learning_rate": 9.561703242868425e-09, + "loss": 0.5340328216552734, + "step": 8312 + }, + { + "epoch": 1.9167627392206594, + "grad_norm": 1.5089395557239718, + "learning_rate": 9.509177783928569e-09, + "loss": 0.4580942392349243, + "step": 8313 + }, + { + "epoch": 1.9169933133502421, + "grad_norm": 1.559427035261756, + "learning_rate": 9.45679630286811e-09, + "loss": 0.4227365553379059, + "step": 8314 + }, + { + "epoch": 1.9172238874798246, + "grad_norm": 1.462151537342571, + "learning_rate": 9.404558807301065e-09, + "loss": 0.42711400985717773, + "step": 8315 + }, + { + "epoch": 1.9174544616094074, + "grad_norm": 1.6466969798320865, + "learning_rate": 9.352465304820811e-09, + "loss": 0.41088467836380005, + "step": 8316 + }, + { + "epoch": 1.91768503573899, + "grad_norm": 1.7161905508950221, + "learning_rate": 9.30051580299962e-09, + "loss": 0.4669058918952942, + "step": 8317 + }, + { + "epoch": 1.9179156098685728, + "grad_norm": 1.8956617878589224, + "learning_rate": 9.248710309388896e-09, + "loss": 0.34129124879837036, + "step": 8318 + }, + { + "epoch": 1.9181461839981555, + "grad_norm": 1.6346151888813216, + "learning_rate": 9.19704883151906e-09, + "loss": 0.5538367033004761, + "step": 8319 + }, + { + "epoch": 1.9183767581277382, + "grad_norm": 1.8993289351204807, + "learning_rate": 9.145531376899773e-09, + "loss": 0.4591939151287079, + "step": 8320 + }, + { + "epoch": 1.9186073322573207, + "grad_norm": 1.531598340011727, + "learning_rate": 9.094157953019376e-09, + "loss": 0.38709723949432373, + "step": 8321 + }, + { + "epoch": 1.9188379063869034, + "grad_norm": 1.7947823187484588, + "learning_rate": 9.042928567345787e-09, + "loss": 0.503919780254364, + "step": 8322 + }, + { + "epoch": 1.919068480516486, + "grad_norm": 1.6367087262197295, + "learning_rate": 8.991843227325491e-09, + "loss": 0.510110080242157, + "step": 8323 + }, + { + "epoch": 1.9192990546460686, + "grad_norm": 1.6066272425773898, + "learning_rate": 8.940901940384437e-09, + "loss": 0.5100687146186829, + "step": 8324 + }, + { + "epoch": 1.9195296287756514, + "grad_norm": 1.513750458500578, + "learning_rate": 8.89010471392726e-09, + "loss": 0.44701308012008667, + "step": 8325 + }, + { + "epoch": 1.919760202905234, + "grad_norm": 1.563320875474341, + "learning_rate": 8.83945155533794e-09, + "loss": 0.4657078981399536, + "step": 8326 + }, + { + "epoch": 1.9199907770348168, + "grad_norm": 1.9297827676028427, + "learning_rate": 8.788942471979588e-09, + "loss": 0.510329008102417, + "step": 8327 + }, + { + "epoch": 1.9202213511643995, + "grad_norm": 1.471307451139604, + "learning_rate": 8.738577471193997e-09, + "loss": 0.5373008847236633, + "step": 8328 + }, + { + "epoch": 1.920451925293982, + "grad_norm": 1.9012550118721963, + "learning_rate": 8.688356560302313e-09, + "loss": 0.46517014503479004, + "step": 8329 + }, + { + "epoch": 1.9206824994235647, + "grad_norm": 1.6705233787528915, + "learning_rate": 8.638279746604582e-09, + "loss": 0.3993692398071289, + "step": 8330 + }, + { + "epoch": 1.9209130735531472, + "grad_norm": 1.366585505535673, + "learning_rate": 8.588347037380095e-09, + "loss": 0.42480504512786865, + "step": 8331 + }, + { + "epoch": 1.92114364768273, + "grad_norm": 1.7413386006663227, + "learning_rate": 8.538558439887044e-09, + "loss": 0.44433218240737915, + "step": 8332 + }, + { + "epoch": 1.9213742218123127, + "grad_norm": 1.59463524320548, + "learning_rate": 8.488913961362643e-09, + "loss": 0.4645090103149414, + "step": 8333 + }, + { + "epoch": 1.9216047959418954, + "grad_norm": 1.7690127959905497, + "learning_rate": 8.439413609023227e-09, + "loss": 0.47265806794166565, + "step": 8334 + }, + { + "epoch": 1.921835370071478, + "grad_norm": 1.6930025984848287, + "learning_rate": 8.390057390064265e-09, + "loss": 0.46389561891555786, + "step": 8335 + }, + { + "epoch": 1.9220659442010608, + "grad_norm": 1.8286869444988214, + "learning_rate": 8.340845311660127e-09, + "loss": 0.45355337858200073, + "step": 8336 + }, + { + "epoch": 1.9222965183306433, + "grad_norm": 1.6861508362464954, + "learning_rate": 8.291777380964315e-09, + "loss": 0.47136229276657104, + "step": 8337 + }, + { + "epoch": 1.922527092460226, + "grad_norm": 1.7162470073135112, + "learning_rate": 8.242853605109234e-09, + "loss": 0.4914461374282837, + "step": 8338 + }, + { + "epoch": 1.9227576665898085, + "grad_norm": 1.5896610300054894, + "learning_rate": 8.194073991206641e-09, + "loss": 0.48298412561416626, + "step": 8339 + }, + { + "epoch": 1.9229882407193912, + "grad_norm": 1.591559243664797, + "learning_rate": 8.145438546346971e-09, + "loss": 0.5316052436828613, + "step": 8340 + }, + { + "epoch": 1.923218814848974, + "grad_norm": 1.530763445371585, + "learning_rate": 8.09694727760002e-09, + "loss": 0.45742303133010864, + "step": 8341 + }, + { + "epoch": 1.9234493889785567, + "grad_norm": 1.800664891434664, + "learning_rate": 8.048600192014365e-09, + "loss": 0.41579365730285645, + "step": 8342 + }, + { + "epoch": 1.9236799631081394, + "grad_norm": 1.4284255731817002, + "learning_rate": 8.000397296617834e-09, + "loss": 0.37775835394859314, + "step": 8343 + }, + { + "epoch": 1.9239105372377219, + "grad_norm": 1.7051685129810905, + "learning_rate": 7.95233859841704e-09, + "loss": 0.4720783531665802, + "step": 8344 + }, + { + "epoch": 1.9241411113673046, + "grad_norm": 1.608380789109436, + "learning_rate": 7.904424104398067e-09, + "loss": 0.5015095472335815, + "step": 8345 + }, + { + "epoch": 1.924371685496887, + "grad_norm": 1.5886093342032406, + "learning_rate": 7.856653821525672e-09, + "loss": 0.6053783893585205, + "step": 8346 + }, + { + "epoch": 1.9246022596264698, + "grad_norm": 1.71106607476921, + "learning_rate": 7.809027756743635e-09, + "loss": 0.47775521874427795, + "step": 8347 + }, + { + "epoch": 1.9248328337560525, + "grad_norm": 1.559597916397487, + "learning_rate": 7.761545916974976e-09, + "loss": 0.36487245559692383, + "step": 8348 + }, + { + "epoch": 1.9250634078856352, + "grad_norm": 1.6596969619350017, + "learning_rate": 7.714208309121617e-09, + "loss": 0.48085975646972656, + "step": 8349 + }, + { + "epoch": 1.925293982015218, + "grad_norm": 1.6156245324091865, + "learning_rate": 7.667014940064609e-09, + "loss": 0.48800790309906006, + "step": 8350 + }, + { + "epoch": 1.9255245561448007, + "grad_norm": 1.654653168113963, + "learning_rate": 7.61996581666402e-09, + "loss": 0.5294181704521179, + "step": 8351 + }, + { + "epoch": 1.9257551302743832, + "grad_norm": 1.4725020612800932, + "learning_rate": 7.573060945758936e-09, + "loss": 0.44024503231048584, + "step": 8352 + }, + { + "epoch": 1.9259857044039659, + "grad_norm": 1.8377372608503795, + "learning_rate": 7.526300334167235e-09, + "loss": 0.4359186887741089, + "step": 8353 + }, + { + "epoch": 1.9262162785335484, + "grad_norm": 1.6594669465231893, + "learning_rate": 7.479683988686259e-09, + "loss": 0.4803895652294159, + "step": 8354 + }, + { + "epoch": 1.926446852663131, + "grad_norm": 1.5824042504509404, + "learning_rate": 7.433211916092141e-09, + "loss": 0.43153274059295654, + "step": 8355 + }, + { + "epoch": 1.9266774267927138, + "grad_norm": 1.812737055881384, + "learning_rate": 7.386884123140036e-09, + "loss": 0.38263070583343506, + "step": 8356 + }, + { + "epoch": 1.9269080009222965, + "grad_norm": 1.42789662226475, + "learning_rate": 7.340700616564e-09, + "loss": 0.42121192812919617, + "step": 8357 + }, + { + "epoch": 1.9271385750518792, + "grad_norm": 1.6902764865159838, + "learning_rate": 7.294661403077662e-09, + "loss": 0.46008965373039246, + "step": 8358 + }, + { + "epoch": 1.927369149181462, + "grad_norm": 1.5923895901686829, + "learning_rate": 7.248766489372893e-09, + "loss": 0.48495203256607056, + "step": 8359 + }, + { + "epoch": 1.9275997233110445, + "grad_norm": 1.6833123633851883, + "learning_rate": 7.203015882121244e-09, + "loss": 0.5004169940948486, + "step": 8360 + }, + { + "epoch": 1.9278302974406272, + "grad_norm": 1.4732497687996942, + "learning_rate": 7.15740958797284e-09, + "loss": 0.5660319328308105, + "step": 8361 + }, + { + "epoch": 1.9280608715702097, + "grad_norm": 1.588922332622674, + "learning_rate": 7.111947613557268e-09, + "loss": 0.43854010105133057, + "step": 8362 + }, + { + "epoch": 1.9282914456997924, + "grad_norm": 2.093362311602714, + "learning_rate": 7.066629965482574e-09, + "loss": 0.44730937480926514, + "step": 8363 + }, + { + "epoch": 1.928522019829375, + "grad_norm": 1.6568658526601971, + "learning_rate": 7.021456650336377e-09, + "loss": 0.45642590522766113, + "step": 8364 + }, + { + "epoch": 1.9287525939589578, + "grad_norm": 1.9173353497487595, + "learning_rate": 6.976427674684871e-09, + "loss": 0.5613523721694946, + "step": 8365 + }, + { + "epoch": 1.9289831680885405, + "grad_norm": 1.7976713831697748, + "learning_rate": 6.931543045073706e-09, + "loss": 0.4231454133987427, + "step": 8366 + }, + { + "epoch": 1.9292137422181233, + "grad_norm": 1.9184335289270926, + "learning_rate": 6.886802768027223e-09, + "loss": 0.464144766330719, + "step": 8367 + }, + { + "epoch": 1.9294443163477057, + "grad_norm": 1.6282751196601715, + "learning_rate": 6.8422068500487705e-09, + "loss": 0.4303344488143921, + "step": 8368 + }, + { + "epoch": 1.9296748904772885, + "grad_norm": 1.5717538042291814, + "learning_rate": 6.797755297620944e-09, + "loss": 0.4333549737930298, + "step": 8369 + }, + { + "epoch": 1.929905464606871, + "grad_norm": 1.5673646456508366, + "learning_rate": 6.753448117205241e-09, + "loss": 0.4656146466732025, + "step": 8370 + }, + { + "epoch": 1.9301360387364537, + "grad_norm": 2.0556236314521077, + "learning_rate": 6.709285315242063e-09, + "loss": 0.3823866844177246, + "step": 8371 + }, + { + "epoch": 1.9303666128660364, + "grad_norm": 1.5412445917312292, + "learning_rate": 6.665266898150946e-09, + "loss": 0.4552363157272339, + "step": 8372 + }, + { + "epoch": 1.930597186995619, + "grad_norm": 1.5304233694461045, + "learning_rate": 6.6213928723304335e-09, + "loss": 0.48757460713386536, + "step": 8373 + }, + { + "epoch": 1.9308277611252018, + "grad_norm": 1.0877844091844102, + "learning_rate": 6.577663244158094e-09, + "loss": 0.3263235092163086, + "step": 8374 + }, + { + "epoch": 1.9310583352547845, + "grad_norm": 1.6065207890727204, + "learning_rate": 6.534078019990397e-09, + "loss": 0.510450541973114, + "step": 8375 + }, + { + "epoch": 1.931288909384367, + "grad_norm": 1.4737968731950963, + "learning_rate": 6.490637206162941e-09, + "loss": 0.37407904863357544, + "step": 8376 + }, + { + "epoch": 1.9315194835139498, + "grad_norm": 1.5691906942234775, + "learning_rate": 6.4473408089902315e-09, + "loss": 0.4216376543045044, + "step": 8377 + }, + { + "epoch": 1.9317500576435322, + "grad_norm": 1.647678033925203, + "learning_rate": 6.404188834766011e-09, + "loss": 0.41611379384994507, + "step": 8378 + }, + { + "epoch": 1.931980631773115, + "grad_norm": 1.6406917387427478, + "learning_rate": 6.361181289762596e-09, + "loss": 0.5301774740219116, + "step": 8379 + }, + { + "epoch": 1.9322112059026977, + "grad_norm": 1.457780743812755, + "learning_rate": 6.3183181802317635e-09, + "loss": 0.43767407536506653, + "step": 8380 + }, + { + "epoch": 1.9324417800322804, + "grad_norm": 1.5497586314138279, + "learning_rate": 6.275599512404084e-09, + "loss": 0.417082279920578, + "step": 8381 + }, + { + "epoch": 1.9326723541618631, + "grad_norm": 1.646560289289956, + "learning_rate": 6.233025292489147e-09, + "loss": 0.41670864820480347, + "step": 8382 + }, + { + "epoch": 1.9329029282914458, + "grad_norm": 1.4085441335066406, + "learning_rate": 6.190595526675446e-09, + "loss": 0.48778587579727173, + "step": 8383 + }, + { + "epoch": 1.9331335024210283, + "grad_norm": 1.39299487584749, + "learning_rate": 6.148310221130604e-09, + "loss": 0.44433802366256714, + "step": 8384 + }, + { + "epoch": 1.933364076550611, + "grad_norm": 1.7057166388160585, + "learning_rate": 6.106169382001369e-09, + "loss": 0.46826764941215515, + "step": 8385 + }, + { + "epoch": 1.9335946506801935, + "grad_norm": 1.6832081073908207, + "learning_rate": 6.064173015413177e-09, + "loss": 0.5509334802627563, + "step": 8386 + }, + { + "epoch": 1.9338252248097763, + "grad_norm": 1.4200036599053338, + "learning_rate": 6.022321127470698e-09, + "loss": 0.4436245560646057, + "step": 8387 + }, + { + "epoch": 1.934055798939359, + "grad_norm": 1.4658061886752614, + "learning_rate": 5.9806137242574e-09, + "loss": 0.3577145040035248, + "step": 8388 + }, + { + "epoch": 1.9342863730689417, + "grad_norm": 1.3485508447539643, + "learning_rate": 5.939050811835988e-09, + "loss": 0.39893999695777893, + "step": 8389 + }, + { + "epoch": 1.9345169471985244, + "grad_norm": 1.4373848732418595, + "learning_rate": 5.897632396248075e-09, + "loss": 0.4109868109226227, + "step": 8390 + }, + { + "epoch": 1.9347475213281071, + "grad_norm": 1.6148537069486861, + "learning_rate": 5.85635848351429e-09, + "loss": 0.4193134307861328, + "step": 8391 + }, + { + "epoch": 1.9349780954576896, + "grad_norm": 1.774944389887914, + "learning_rate": 5.8152290796340545e-09, + "loss": 0.44189178943634033, + "step": 8392 + }, + { + "epoch": 1.9352086695872723, + "grad_norm": 1.7653802191556502, + "learning_rate": 5.774244190586141e-09, + "loss": 0.5014302730560303, + "step": 8393 + }, + { + "epoch": 1.9354392437168548, + "grad_norm": 1.5565367331009852, + "learning_rate": 5.733403822328009e-09, + "loss": 0.4962024688720703, + "step": 8394 + }, + { + "epoch": 1.9356698178464375, + "grad_norm": 1.585877874844532, + "learning_rate": 5.69270798079613e-09, + "loss": 0.45495474338531494, + "step": 8395 + }, + { + "epoch": 1.9359003919760203, + "grad_norm": 1.4665884192601668, + "learning_rate": 5.652156671906105e-09, + "loss": 0.49062758684158325, + "step": 8396 + }, + { + "epoch": 1.936130966105603, + "grad_norm": 1.6573434385643893, + "learning_rate": 5.611749901552554e-09, + "loss": 0.45899879932403564, + "step": 8397 + }, + { + "epoch": 1.9363615402351857, + "grad_norm": 1.511951038657192, + "learning_rate": 5.57148767560911e-09, + "loss": 0.47287002205848694, + "step": 8398 + }, + { + "epoch": 1.9365921143647684, + "grad_norm": 1.5970704539129832, + "learning_rate": 5.531369999927982e-09, + "loss": 0.439136266708374, + "step": 8399 + }, + { + "epoch": 1.936822688494351, + "grad_norm": 1.2795152915391526, + "learning_rate": 5.4913968803410594e-09, + "loss": 0.3920954465866089, + "step": 8400 + }, + { + "epoch": 1.9370532626239336, + "grad_norm": 1.254790295470771, + "learning_rate": 5.451568322658473e-09, + "loss": 0.4608895480632782, + "step": 8401 + }, + { + "epoch": 1.9372838367535161, + "grad_norm": 1.4389672316514175, + "learning_rate": 5.4118843326699246e-09, + "loss": 0.4617875814437866, + "step": 8402 + }, + { + "epoch": 1.9375144108830988, + "grad_norm": 1.8398027260263112, + "learning_rate": 5.372344916143912e-09, + "loss": 0.5293254852294922, + "step": 8403 + }, + { + "epoch": 1.9377449850126816, + "grad_norm": 1.2603762011573385, + "learning_rate": 5.332950078827725e-09, + "loss": 0.3935343623161316, + "step": 8404 + }, + { + "epoch": 1.9379755591422643, + "grad_norm": 1.3159194137267558, + "learning_rate": 5.293699826447895e-09, + "loss": 0.4612414240837097, + "step": 8405 + }, + { + "epoch": 1.938206133271847, + "grad_norm": 1.5616222982589931, + "learning_rate": 5.254594164709858e-09, + "loss": 0.4779428243637085, + "step": 8406 + }, + { + "epoch": 1.9384367074014297, + "grad_norm": 1.3393838173044101, + "learning_rate": 5.215633099298067e-09, + "loss": 0.37436819076538086, + "step": 8407 + }, + { + "epoch": 1.9386672815310122, + "grad_norm": 1.5367283978531407, + "learning_rate": 5.1768166358757695e-09, + "loss": 0.458698570728302, + "step": 8408 + }, + { + "epoch": 1.938897855660595, + "grad_norm": 1.52395102556278, + "learning_rate": 5.1381447800854515e-09, + "loss": 0.39365172386169434, + "step": 8409 + }, + { + "epoch": 1.9391284297901774, + "grad_norm": 1.6915141620999796, + "learning_rate": 5.099617537548284e-09, + "loss": 0.46358722448349, + "step": 8410 + }, + { + "epoch": 1.9393590039197601, + "grad_norm": 1.4920931037664487, + "learning_rate": 5.061234913864898e-09, + "loss": 0.4286697506904602, + "step": 8411 + }, + { + "epoch": 1.9395895780493428, + "grad_norm": 1.2865245997479036, + "learning_rate": 5.022996914614275e-09, + "loss": 0.4925898015499115, + "step": 8412 + }, + { + "epoch": 1.9398201521789256, + "grad_norm": 1.5226712255874009, + "learning_rate": 4.984903545354857e-09, + "loss": 0.46924275159835815, + "step": 8413 + }, + { + "epoch": 1.9400507263085083, + "grad_norm": 1.5857623247989538, + "learning_rate": 4.946954811623994e-09, + "loss": 0.5326268672943115, + "step": 8414 + }, + { + "epoch": 1.940281300438091, + "grad_norm": 1.5901041586459477, + "learning_rate": 4.909150718937716e-09, + "loss": 0.4367690682411194, + "step": 8415 + }, + { + "epoch": 1.9405118745676735, + "grad_norm": 1.5390541996103484, + "learning_rate": 4.8714912727914055e-09, + "loss": 0.45579224824905396, + "step": 8416 + }, + { + "epoch": 1.9407424486972562, + "grad_norm": 1.5246826105956603, + "learning_rate": 4.8339764786590186e-09, + "loss": 0.4420431852340698, + "step": 8417 + }, + { + "epoch": 1.9409730228268387, + "grad_norm": 1.7713819487127218, + "learning_rate": 4.79660634199397e-09, + "loss": 0.4175274670124054, + "step": 8418 + }, + { + "epoch": 1.9412035969564214, + "grad_norm": 1.4046803968065067, + "learning_rate": 4.759380868228246e-09, + "loss": 0.41451364755630493, + "step": 8419 + }, + { + "epoch": 1.9414341710860041, + "grad_norm": 1.5394804899846177, + "learning_rate": 4.722300062772966e-09, + "loss": 0.4211805462837219, + "step": 8420 + }, + { + "epoch": 1.9416647452155869, + "grad_norm": 1.5805052208208792, + "learning_rate": 4.68536393101826e-09, + "loss": 0.4458296000957489, + "step": 8421 + }, + { + "epoch": 1.9418953193451696, + "grad_norm": 1.8263114249420374, + "learning_rate": 4.648572478333057e-09, + "loss": 0.6226488351821899, + "step": 8422 + }, + { + "epoch": 1.9421258934747523, + "grad_norm": 1.467298573422793, + "learning_rate": 4.611925710065523e-09, + "loss": 0.343037486076355, + "step": 8423 + }, + { + "epoch": 1.9423564676043348, + "grad_norm": 1.4279799784372957, + "learning_rate": 4.575423631542397e-09, + "loss": 0.42478299140930176, + "step": 8424 + }, + { + "epoch": 1.9425870417339175, + "grad_norm": 1.4809253602160373, + "learning_rate": 4.539066248069878e-09, + "loss": 0.4467424750328064, + "step": 8425 + }, + { + "epoch": 1.9428176158635, + "grad_norm": 1.5230213064501263, + "learning_rate": 4.50285356493274e-09, + "loss": 0.4598960876464844, + "step": 8426 + }, + { + "epoch": 1.9430481899930827, + "grad_norm": 1.767389183054306, + "learning_rate": 4.466785587394883e-09, + "loss": 0.43005913496017456, + "step": 8427 + }, + { + "epoch": 1.9432787641226654, + "grad_norm": 1.6819998310369073, + "learning_rate": 4.430862320699114e-09, + "loss": 0.4259253740310669, + "step": 8428 + }, + { + "epoch": 1.9435093382522481, + "grad_norm": 1.4809575809160866, + "learning_rate": 4.395083770067476e-09, + "loss": 0.4275285601615906, + "step": 8429 + }, + { + "epoch": 1.9437399123818309, + "grad_norm": 1.5009509074634573, + "learning_rate": 4.3594499407003656e-09, + "loss": 0.42151302099227905, + "step": 8430 + }, + { + "epoch": 1.9439704865114136, + "grad_norm": 1.2121055184272223, + "learning_rate": 4.3239608377778625e-09, + "loss": 0.41727957129478455, + "step": 8431 + }, + { + "epoch": 1.944201060640996, + "grad_norm": 1.6993320655678226, + "learning_rate": 4.288616466458395e-09, + "loss": 0.5026905536651611, + "step": 8432 + }, + { + "epoch": 1.9444316347705788, + "grad_norm": 1.7732059667125062, + "learning_rate": 4.2534168318798524e-09, + "loss": 0.5170408487319946, + "step": 8433 + }, + { + "epoch": 1.9446622089001613, + "grad_norm": 1.4027101607713113, + "learning_rate": 4.21836193915881e-09, + "loss": 0.3918447196483612, + "step": 8434 + }, + { + "epoch": 1.944892783029744, + "grad_norm": 1.6652823795220828, + "learning_rate": 4.183451793390747e-09, + "loss": 0.49871906638145447, + "step": 8435 + }, + { + "epoch": 1.9451233571593267, + "grad_norm": 1.4696705484226025, + "learning_rate": 4.1486863996502694e-09, + "loss": 0.43729400634765625, + "step": 8436 + }, + { + "epoch": 1.9453539312889094, + "grad_norm": 1.6971586346839116, + "learning_rate": 4.114065762990781e-09, + "loss": 0.49198442697525024, + "step": 8437 + }, + { + "epoch": 1.9455845054184921, + "grad_norm": 1.7555960999646751, + "learning_rate": 4.079589888444923e-09, + "loss": 0.48610788583755493, + "step": 8438 + }, + { + "epoch": 1.9458150795480749, + "grad_norm": 1.4385738810997333, + "learning_rate": 4.045258781024019e-09, + "loss": 0.43962734937667847, + "step": 8439 + }, + { + "epoch": 1.9460456536776574, + "grad_norm": 1.5800303425440292, + "learning_rate": 4.011072445718522e-09, + "loss": 0.3320704400539398, + "step": 8440 + }, + { + "epoch": 1.94627622780724, + "grad_norm": 1.6634559640737916, + "learning_rate": 3.977030887497568e-09, + "loss": 0.4773918092250824, + "step": 8441 + }, + { + "epoch": 1.9465068019368226, + "grad_norm": 1.6386159776295786, + "learning_rate": 3.9431341113096425e-09, + "loss": 0.424363911151886, + "step": 8442 + }, + { + "epoch": 1.9467373760664053, + "grad_norm": 1.9939094308024221, + "learning_rate": 3.9093821220818055e-09, + "loss": 0.5321601033210754, + "step": 8443 + }, + { + "epoch": 1.946967950195988, + "grad_norm": 1.7091737329216896, + "learning_rate": 3.875774924720465e-09, + "loss": 0.48579344153404236, + "step": 8444 + }, + { + "epoch": 1.9471985243255707, + "grad_norm": 1.4617398717494952, + "learning_rate": 3.842312524110603e-09, + "loss": 0.39313316345214844, + "step": 8445 + }, + { + "epoch": 1.9474290984551534, + "grad_norm": 1.6233833617742501, + "learning_rate": 3.8089949251163264e-09, + "loss": 0.522427499294281, + "step": 8446 + }, + { + "epoch": 1.9476596725847362, + "grad_norm": 1.601217744469266, + "learning_rate": 3.775822132580875e-09, + "loss": 0.3822653889656067, + "step": 8447 + }, + { + "epoch": 1.9478902467143187, + "grad_norm": 1.5787465509087006, + "learning_rate": 3.7427941513259454e-09, + "loss": 0.4322483241558075, + "step": 8448 + }, + { + "epoch": 1.9481208208439014, + "grad_norm": 1.6934897718136162, + "learning_rate": 3.7099109861528087e-09, + "loss": 0.4862939715385437, + "step": 8449 + }, + { + "epoch": 1.9483513949734839, + "grad_norm": 1.5875963080752307, + "learning_rate": 3.6771726418410863e-09, + "loss": 0.45388323068618774, + "step": 8450 + }, + { + "epoch": 1.9485819691030666, + "grad_norm": 1.5187043160616758, + "learning_rate": 3.644579123149749e-09, + "loss": 0.3937215805053711, + "step": 8451 + }, + { + "epoch": 1.9488125432326493, + "grad_norm": 1.5446261991465484, + "learning_rate": 3.6121304348165628e-09, + "loss": 0.46887993812561035, + "step": 8452 + }, + { + "epoch": 1.949043117362232, + "grad_norm": 1.763834546986469, + "learning_rate": 3.5798265815584204e-09, + "loss": 0.4444226026535034, + "step": 8453 + }, + { + "epoch": 1.9492736914918147, + "grad_norm": 1.639572253352884, + "learning_rate": 3.5476675680709e-09, + "loss": 0.4938625991344452, + "step": 8454 + }, + { + "epoch": 1.9495042656213972, + "grad_norm": 1.456362188758518, + "learning_rate": 3.5156533990285953e-09, + "loss": 0.37632471323013306, + "step": 8455 + }, + { + "epoch": 1.94973483975098, + "grad_norm": 1.8608548289842328, + "learning_rate": 3.483784079085117e-09, + "loss": 0.4345025420188904, + "step": 8456 + }, + { + "epoch": 1.9499654138805624, + "grad_norm": 1.4598938490767328, + "learning_rate": 3.4520596128729818e-09, + "loss": 0.3721727132797241, + "step": 8457 + }, + { + "epoch": 1.9501959880101452, + "grad_norm": 1.6409042038383927, + "learning_rate": 3.4204800050037232e-09, + "loss": 0.4871670603752136, + "step": 8458 + }, + { + "epoch": 1.9504265621397279, + "grad_norm": 1.8307964169711943, + "learning_rate": 3.38904526006778e-09, + "loss": 0.578133225440979, + "step": 8459 + }, + { + "epoch": 1.9506571362693106, + "grad_norm": 1.5202457315236042, + "learning_rate": 3.357755382634386e-09, + "loss": 0.4721870422363281, + "step": 8460 + }, + { + "epoch": 1.9508877103988933, + "grad_norm": 1.798795599183991, + "learning_rate": 3.3266103772519037e-09, + "loss": 0.4569184184074402, + "step": 8461 + }, + { + "epoch": 1.951118284528476, + "grad_norm": 1.7311036262190431, + "learning_rate": 3.2956102484477112e-09, + "loss": 0.48763811588287354, + "step": 8462 + }, + { + "epoch": 1.9513488586580585, + "grad_norm": 1.5898725581558353, + "learning_rate": 3.264755000727759e-09, + "loss": 0.45957818627357483, + "step": 8463 + }, + { + "epoch": 1.9515794327876412, + "grad_norm": 1.661536076059429, + "learning_rate": 3.234044638577238e-09, + "loss": 0.49398598074913025, + "step": 8464 + }, + { + "epoch": 1.9518100069172237, + "grad_norm": 1.8367269278410805, + "learning_rate": 3.2034791664603544e-09, + "loss": 0.48884931206703186, + "step": 8465 + }, + { + "epoch": 1.9520405810468064, + "grad_norm": 1.4322798652039197, + "learning_rate": 3.173058588819999e-09, + "loss": 0.45171886682510376, + "step": 8466 + }, + { + "epoch": 1.9522711551763892, + "grad_norm": 1.7896431151356735, + "learning_rate": 3.142782910077968e-09, + "loss": 0.45110028982162476, + "step": 8467 + }, + { + "epoch": 1.9525017293059719, + "grad_norm": 1.6339596386172939, + "learning_rate": 3.1126521346354074e-09, + "loss": 0.4602523446083069, + "step": 8468 + }, + { + "epoch": 1.9527323034355546, + "grad_norm": 1.4993439724695443, + "learning_rate": 3.082666266872036e-09, + "loss": 0.3908727169036865, + "step": 8469 + }, + { + "epoch": 1.9529628775651373, + "grad_norm": 1.6588394319404383, + "learning_rate": 3.0528253111464786e-09, + "loss": 0.4886831045150757, + "step": 8470 + }, + { + "epoch": 1.9531934516947198, + "grad_norm": 1.8142188930520524, + "learning_rate": 3.023129271796598e-09, + "loss": 0.4407721161842346, + "step": 8471 + }, + { + "epoch": 1.9534240258243025, + "grad_norm": 1.545809203271424, + "learning_rate": 2.9935781531389425e-09, + "loss": 0.46958622336387634, + "step": 8472 + }, + { + "epoch": 1.953654599953885, + "grad_norm": 1.5632050072309709, + "learning_rate": 2.964171959469075e-09, + "loss": 0.4642796516418457, + "step": 8473 + }, + { + "epoch": 1.9538851740834677, + "grad_norm": 1.5522529280671595, + "learning_rate": 2.9349106950613545e-09, + "loss": 0.5124588012695312, + "step": 8474 + }, + { + "epoch": 1.9541157482130505, + "grad_norm": 1.7441462887025347, + "learning_rate": 2.9057943641693784e-09, + "loss": 0.516730546951294, + "step": 8475 + }, + { + "epoch": 1.9543463223426332, + "grad_norm": 1.6015713883307108, + "learning_rate": 2.876822971025428e-09, + "loss": 0.47847944498062134, + "step": 8476 + }, + { + "epoch": 1.9545768964722159, + "grad_norm": 1.9133896423438201, + "learning_rate": 2.8479965198408007e-09, + "loss": 0.5167095065116882, + "step": 8477 + }, + { + "epoch": 1.9548074706017986, + "grad_norm": 1.4489948600651796, + "learning_rate": 2.819315014805812e-09, + "loss": 0.40728163719177246, + "step": 8478 + }, + { + "epoch": 1.955038044731381, + "grad_norm": 1.4413821780207463, + "learning_rate": 2.790778460089349e-09, + "loss": 0.49741852283477783, + "step": 8479 + }, + { + "epoch": 1.9552686188609638, + "grad_norm": 1.3759130199865537, + "learning_rate": 2.7623868598397603e-09, + "loss": 0.33847475051879883, + "step": 8480 + }, + { + "epoch": 1.9554991929905463, + "grad_norm": 1.6995475203184411, + "learning_rate": 2.734140218183856e-09, + "loss": 0.39727652072906494, + "step": 8481 + }, + { + "epoch": 1.955729767120129, + "grad_norm": 1.7012108842781224, + "learning_rate": 2.706038539227795e-09, + "loss": 0.40332260727882385, + "step": 8482 + }, + { + "epoch": 1.9559603412497117, + "grad_norm": 1.3388931691886075, + "learning_rate": 2.6780818270562e-09, + "loss": 0.40296924114227295, + "step": 8483 + }, + { + "epoch": 1.9561909153792945, + "grad_norm": 1.4889010944404621, + "learning_rate": 2.650270085732931e-09, + "loss": 0.4253476858139038, + "step": 8484 + }, + { + "epoch": 1.9564214895088772, + "grad_norm": 1.5794301308382195, + "learning_rate": 2.6226033193007535e-09, + "loss": 0.448941171169281, + "step": 8485 + }, + { + "epoch": 1.95665206363846, + "grad_norm": 1.9411463996799059, + "learning_rate": 2.59508153178134e-09, + "loss": 0.48213180899620056, + "step": 8486 + }, + { + "epoch": 1.9568826377680424, + "grad_norm": 1.6243019689896288, + "learning_rate": 2.5677047271752683e-09, + "loss": 0.48886558413505554, + "step": 8487 + }, + { + "epoch": 1.957113211897625, + "grad_norm": 1.4212209484619593, + "learning_rate": 2.5404729094619103e-09, + "loss": 0.49786341190338135, + "step": 8488 + }, + { + "epoch": 1.9573437860272076, + "grad_norm": 2.1312601270605365, + "learning_rate": 2.5133860825997667e-09, + "loss": 0.4487866163253784, + "step": 8489 + }, + { + "epoch": 1.9575743601567903, + "grad_norm": 1.7672945087914924, + "learning_rate": 2.486444250526243e-09, + "loss": 0.46193206310272217, + "step": 8490 + }, + { + "epoch": 1.957804934286373, + "grad_norm": 1.5923899778865398, + "learning_rate": 2.459647417157429e-09, + "loss": 0.44729042053222656, + "step": 8491 + }, + { + "epoch": 1.9580355084159557, + "grad_norm": 1.8298057614969963, + "learning_rate": 2.432995586388764e-09, + "loss": 0.4646851718425751, + "step": 8492 + }, + { + "epoch": 1.9582660825455385, + "grad_norm": 1.6514495959092017, + "learning_rate": 2.40648876209415e-09, + "loss": 0.49538400769233704, + "step": 8493 + }, + { + "epoch": 1.9584966566751212, + "grad_norm": 1.7330889796307278, + "learning_rate": 2.3801269481267262e-09, + "loss": 0.5548783540725708, + "step": 8494 + }, + { + "epoch": 1.9587272308047037, + "grad_norm": 1.65108674708811, + "learning_rate": 2.3539101483184277e-09, + "loss": 0.4390280544757843, + "step": 8495 + }, + { + "epoch": 1.9589578049342864, + "grad_norm": 1.323831070791993, + "learning_rate": 2.327838366480095e-09, + "loss": 0.3079942464828491, + "step": 8496 + }, + { + "epoch": 1.959188379063869, + "grad_norm": 2.030408303723105, + "learning_rate": 2.301911606401585e-09, + "loss": 0.5199894309043884, + "step": 8497 + }, + { + "epoch": 1.9594189531934516, + "grad_norm": 1.6402740340647268, + "learning_rate": 2.276129871851662e-09, + "loss": 0.3403523564338684, + "step": 8498 + }, + { + "epoch": 1.9596495273230343, + "grad_norm": 1.785907762491574, + "learning_rate": 2.2504931665777714e-09, + "loss": 0.49699991941452026, + "step": 8499 + }, + { + "epoch": 1.959880101452617, + "grad_norm": 1.5969429106714301, + "learning_rate": 2.2250014943066e-09, + "loss": 0.4178547263145447, + "step": 8500 + } + ], + "logging_steps": 1, + "max_steps": 8674, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2934193071882240.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8500/training_args.bin b/checkpoint-8500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9d22a9f5260d66a35a24391e4e9c5ae1d42e2bf --- /dev/null +++ b/checkpoint-8500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b4d99570e121a32da71712aa554f3b32e79266529670ac42e5a5b8fc07e99d +size 6968 diff --git a/checkpoint-8500/zero_to_fp32.py b/checkpoint-8500/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-8500/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-8600/README.md b/checkpoint-8600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-8600/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-8600/adapter_config.json b/checkpoint-8600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f36e32e61c434af152644134a13070b69334e6c --- /dev/null +++ b/checkpoint-8600/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.0.mlp.up_proj", + "layers.16.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.4.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.19.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.18.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.8.mlp.down_proj", + "layers.7.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.6.mlp.down_proj", + "layers.27.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.12.mlp.down_proj", + "layers.24.mlp.down_proj", + "layers.17.mlp.gate_proj", + "layers.21.mlp.down_proj", + "layers.15.mlp.gate_proj", + "layers.11.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.10.mlp.up_proj", + "layers.4.mlp.gate_proj", + "layers.6.mlp.up_proj", + "layers.10.mlp.gate_proj", + "layers.26.mlp.up_proj", + "q_proj", + "layers.20.mlp.gate_proj", + "layers.19.mlp.down_proj", + "layers.2.mlp.gate_proj", + "layers.20.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.3.mlp.up_proj", + "layers.15.mlp.up_proj", + "layers.22.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.8.mlp.gate_proj", + "layers.0.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.0.mlp.gate_proj", + "layers.12.mlp.gate_proj", + "layers.2.mlp.down_proj", + "layers.17.mlp.up_proj", + "layers.25.mlp.up_proj", + "k_proj", + "layers.1.mlp.up_proj", + "layers.24.mlp.gate_proj", + "layers.9.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.5.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.9.mlp.up_proj", + "o_proj", + "layers.19.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.6.mlp.gate_proj", + "layers.12.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.16.mlp.gate_proj", + "layers.21.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.27.mlp.up_proj", + "layers.3.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.25.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.23.mlp.gate_proj", + "layers.10.mlp.down_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-8600/adapter_model.safetensors b/checkpoint-8600/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..526e111f07f8368eadef1871a9c2734c5196271e --- /dev/null +++ b/checkpoint-8600/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6290e0b2b1251148ee58c2b47b218f485c97f95319117557aa222f9b7ca70cdf +size 40428088 diff --git a/checkpoint-8600/chat_template.jinja b/checkpoint-8600/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-8600/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-8600/global_step8600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-8600/global_step8600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..09ec6ea46a5b4113a2129ea81dd78c2e97bc454a --- /dev/null +++ b/checkpoint-8600/global_step8600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc318cae88f07b9519daba7237fb7d71b7198ea59b20ebbcea998c8f948d8056 +size 242224880 diff --git a/checkpoint-8600/global_step8600/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-8600/global_step8600/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a429a1063f3e169437082971e83c69a9ada88fd --- /dev/null +++ b/checkpoint-8600/global_step8600/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f075d95c92c7749bf6e51b7cbbd76a4398c551ca0d0b0eaa79d6910254aeea +size 460630 diff --git a/checkpoint-8600/latest b/checkpoint-8600/latest new file mode 100644 index 0000000000000000000000000000000000000000..bd2f68de4142a438c6f7e9678de46c517960b066 --- /dev/null +++ b/checkpoint-8600/latest @@ -0,0 +1 @@ +global_step8600 \ No newline at end of file diff --git a/checkpoint-8600/processor_config.json b/checkpoint-8600/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-8600/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-8600/rng_state.pth b/checkpoint-8600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fcf11c9b78de2c2c55fdfc44daef09cd9181c14 --- /dev/null +++ b/checkpoint-8600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc398a73e46bca50defc25b4467441315246a33383a5d6c80985d238e57127f +size 14244 diff --git a/checkpoint-8600/scheduler.pt b/checkpoint-8600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..83bcb7cd144949bfca446417cd7fad99083ec5e5 --- /dev/null +++ b/checkpoint-8600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e1f1a93aae5d12e948145f1c69d9b77b1a1408df74307af855dda0830efcff +size 1000 diff --git a/checkpoint-8600/tokenizer.json b/checkpoint-8600/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-8600/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-8600/tokenizer_config.json b/checkpoint-8600/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-8600/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-8600/trainer_state.json b/checkpoint-8600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8a21da648b7684d21c6ccb1d3fa40d10494fd9cb --- /dev/null +++ b/checkpoint-8600/trainer_state.json @@ -0,0 +1,60234 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.982937514410883, + "eval_steps": 500, + "global_step": 8600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00023057412958266084, + "grad_norm": 0.5456158480642083, + "learning_rate": 0.0, + "loss": 1.2793785333633423, + "step": 1 + }, + { + "epoch": 0.0004611482591653217, + "grad_norm": 0.5348414425588685, + "learning_rate": 4.6082949308755755e-09, + "loss": 1.2810249328613281, + "step": 2 + }, + { + "epoch": 0.0006917223887479825, + "grad_norm": 0.5742665952103186, + "learning_rate": 9.216589861751151e-09, + "loss": 1.5180970430374146, + "step": 3 + }, + { + "epoch": 0.0009222965183306433, + "grad_norm": 0.47570843593061296, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.2771815061569214, + "step": 4 + }, + { + "epoch": 0.001152870647913304, + "grad_norm": 0.6179854753010914, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.6275714635849, + "step": 5 + }, + { + "epoch": 0.001383444777495965, + "grad_norm": 0.5728287935763549, + "learning_rate": 2.304147465437788e-08, + "loss": 1.4852838516235352, + "step": 6 + }, + { + "epoch": 0.0016140189070786258, + "grad_norm": 0.7402806033919309, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.3845010995864868, + "step": 7 + }, + { + "epoch": 0.0018445930366612867, + "grad_norm": 0.5357861516775319, + "learning_rate": 3.225806451612903e-08, + "loss": 1.2716574668884277, + "step": 8 + }, + { + "epoch": 0.0020751671662439476, + "grad_norm": 0.49378309074438254, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.4046194553375244, + "step": 9 + }, + { + "epoch": 0.002305741295826608, + "grad_norm": 0.5231726157264511, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.4988269805908203, + "step": 10 + }, + { + "epoch": 0.002536315425409269, + "grad_norm": 0.5469518790093721, + "learning_rate": 4.608294930875576e-08, + "loss": 1.3523340225219727, + "step": 11 + }, + { + "epoch": 0.00276688955499193, + "grad_norm": 0.5125117134786147, + "learning_rate": 5.069124423963134e-08, + "loss": 1.3664941787719727, + "step": 12 + }, + { + "epoch": 0.0029974636845745907, + "grad_norm": 0.5526794406387441, + "learning_rate": 5.529953917050691e-08, + "loss": 1.4892609119415283, + "step": 13 + }, + { + "epoch": 0.0032280378141572516, + "grad_norm": 0.5197262159341672, + "learning_rate": 5.990783410138249e-08, + "loss": 1.305836796760559, + "step": 14 + }, + { + "epoch": 0.0034586119437399125, + "grad_norm": 0.5214120337499729, + "learning_rate": 6.451612903225806e-08, + "loss": 1.3458774089813232, + "step": 15 + }, + { + "epoch": 0.0036891860733225734, + "grad_norm": 0.5249821302153419, + "learning_rate": 6.912442396313364e-08, + "loss": 1.4305222034454346, + "step": 16 + }, + { + "epoch": 0.003919760202905234, + "grad_norm": 0.48597332722440695, + "learning_rate": 7.373271889400921e-08, + "loss": 1.4247705936431885, + "step": 17 + }, + { + "epoch": 0.004150334332487895, + "grad_norm": 0.5492563451667527, + "learning_rate": 7.834101382488478e-08, + "loss": 1.4151098728179932, + "step": 18 + }, + { + "epoch": 0.004380908462070556, + "grad_norm": 0.4931832122178826, + "learning_rate": 8.294930875576037e-08, + "loss": 1.4633708000183105, + "step": 19 + }, + { + "epoch": 0.004611482591653216, + "grad_norm": 0.4601872454406169, + "learning_rate": 8.755760368663594e-08, + "loss": 1.2271082401275635, + "step": 20 + }, + { + "epoch": 0.004842056721235877, + "grad_norm": 0.5482366075993729, + "learning_rate": 9.216589861751152e-08, + "loss": 1.493757724761963, + "step": 21 + }, + { + "epoch": 0.005072630850818538, + "grad_norm": 0.5190439230451068, + "learning_rate": 9.677419354838709e-08, + "loss": 1.446916103363037, + "step": 22 + }, + { + "epoch": 0.005303204980401199, + "grad_norm": 0.5010656217784003, + "learning_rate": 1.0138248847926267e-07, + "loss": 1.4575269222259521, + "step": 23 + }, + { + "epoch": 0.00553377910998386, + "grad_norm": 0.5983934917725938, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.5000505447387695, + "step": 24 + }, + { + "epoch": 0.005764353239566521, + "grad_norm": 0.5264341016273323, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.32895827293396, + "step": 25 + }, + { + "epoch": 0.005994927369149181, + "grad_norm": 0.5507902323042685, + "learning_rate": 1.152073732718894e-07, + "loss": 1.479337215423584, + "step": 26 + }, + { + "epoch": 0.006225501498731842, + "grad_norm": 0.4597707182389027, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.3543293476104736, + "step": 27 + }, + { + "epoch": 0.006456075628314503, + "grad_norm": 0.4984681813259071, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.3075106143951416, + "step": 28 + }, + { + "epoch": 0.006686649757897164, + "grad_norm": 0.540668752320374, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.2077248096466064, + "step": 29 + }, + { + "epoch": 0.006917223887479825, + "grad_norm": 0.5053904313535789, + "learning_rate": 1.336405529953917e-07, + "loss": 1.2841781377792358, + "step": 30 + }, + { + "epoch": 0.0071477980170624855, + "grad_norm": 0.5007265235886551, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.4022557735443115, + "step": 31 + }, + { + "epoch": 0.007378372146645147, + "grad_norm": 0.5376464155945276, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.4971141815185547, + "step": 32 + }, + { + "epoch": 0.007608946276227807, + "grad_norm": 0.49485432736210644, + "learning_rate": 1.4746543778801842e-07, + "loss": 1.3699426651000977, + "step": 33 + }, + { + "epoch": 0.007839520405810468, + "grad_norm": 0.602690054138726, + "learning_rate": 1.52073732718894e-07, + "loss": 1.466570258140564, + "step": 34 + }, + { + "epoch": 0.008070094535393129, + "grad_norm": 0.544784030735669, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.3031455278396606, + "step": 35 + }, + { + "epoch": 0.00830066866497579, + "grad_norm": 0.5516628365932859, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.3989369869232178, + "step": 36 + }, + { + "epoch": 0.00853124279455845, + "grad_norm": 0.5375908894429152, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.41139817237854, + "step": 37 + }, + { + "epoch": 0.008761816924141111, + "grad_norm": 0.4923010186613349, + "learning_rate": 1.705069124423963e-07, + "loss": 1.305363655090332, + "step": 38 + }, + { + "epoch": 0.008992391053723773, + "grad_norm": 0.5782996548067549, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.3931915760040283, + "step": 39 + }, + { + "epoch": 0.009222965183306432, + "grad_norm": 0.5425552369520273, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.4728009700775146, + "step": 40 + }, + { + "epoch": 0.009453539312889093, + "grad_norm": 0.5162050268750099, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.4165544509887695, + "step": 41 + }, + { + "epoch": 0.009684113442471755, + "grad_norm": 0.509079818266607, + "learning_rate": 1.889400921658986e-07, + "loss": 1.3693115711212158, + "step": 42 + }, + { + "epoch": 0.009914687572054416, + "grad_norm": 0.5804116282906935, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.468721866607666, + "step": 43 + }, + { + "epoch": 0.010145261701637076, + "grad_norm": 0.5466645633601509, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.4732704162597656, + "step": 44 + }, + { + "epoch": 0.010375835831219737, + "grad_norm": 0.4534942899185725, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.2579209804534912, + "step": 45 + }, + { + "epoch": 0.010606409960802398, + "grad_norm": 0.4766380716605293, + "learning_rate": 2.073732718894009e-07, + "loss": 1.3587429523468018, + "step": 46 + }, + { + "epoch": 0.010836984090385058, + "grad_norm": 0.5409254453286721, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.368800401687622, + "step": 47 + }, + { + "epoch": 0.01106755821996772, + "grad_norm": 0.5103994243466702, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.2960132360458374, + "step": 48 + }, + { + "epoch": 0.01129813234955038, + "grad_norm": 0.47493679434319974, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.3035235404968262, + "step": 49 + }, + { + "epoch": 0.011528706479133042, + "grad_norm": 0.5271868916321076, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.5074443817138672, + "step": 50 + }, + { + "epoch": 0.011759280608715702, + "grad_norm": 0.5381217045242119, + "learning_rate": 2.304147465437788e-07, + "loss": 1.4689760208129883, + "step": 51 + }, + { + "epoch": 0.011989854738298363, + "grad_norm": 0.4629483381608022, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.3542251586914062, + "step": 52 + }, + { + "epoch": 0.012220428867881024, + "grad_norm": 0.4592532760230554, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.3521728515625, + "step": 53 + }, + { + "epoch": 0.012451002997463684, + "grad_norm": 0.5030837073491258, + "learning_rate": 2.442396313364055e-07, + "loss": 1.3577494621276855, + "step": 54 + }, + { + "epoch": 0.012681577127046345, + "grad_norm": 0.5438911836333451, + "learning_rate": 2.488479262672811e-07, + "loss": 1.459476351737976, + "step": 55 + }, + { + "epoch": 0.012912151256629006, + "grad_norm": 0.52516269169267, + "learning_rate": 2.534562211981567e-07, + "loss": 1.484410047531128, + "step": 56 + }, + { + "epoch": 0.013142725386211668, + "grad_norm": 0.5188914022486312, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.3589065074920654, + "step": 57 + }, + { + "epoch": 0.013373299515794327, + "grad_norm": 0.5619229477118247, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.3558262586593628, + "step": 58 + }, + { + "epoch": 0.013603873645376989, + "grad_norm": 0.5534574014271282, + "learning_rate": 2.672811059907834e-07, + "loss": 1.5165367126464844, + "step": 59 + }, + { + "epoch": 0.01383444777495965, + "grad_norm": 0.47598313164662104, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.3051776885986328, + "step": 60 + }, + { + "epoch": 0.01406502190454231, + "grad_norm": 0.45011107968146047, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.2916524410247803, + "step": 61 + }, + { + "epoch": 0.014295596034124971, + "grad_norm": 0.513792634149487, + "learning_rate": 2.8110599078341015e-07, + "loss": 1.440261721611023, + "step": 62 + }, + { + "epoch": 0.014526170163707632, + "grad_norm": 0.5424492375693261, + "learning_rate": 2.857142857142857e-07, + "loss": 1.3422625064849854, + "step": 63 + }, + { + "epoch": 0.014756744293290294, + "grad_norm": 0.4598784526258713, + "learning_rate": 2.903225806451613e-07, + "loss": 1.374439001083374, + "step": 64 + }, + { + "epoch": 0.014987318422872953, + "grad_norm": 0.5339252174305668, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.4382294416427612, + "step": 65 + }, + { + "epoch": 0.015217892552455614, + "grad_norm": 0.5302645203365586, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.3971002101898193, + "step": 66 + }, + { + "epoch": 0.015448466682038276, + "grad_norm": 0.5711144083332746, + "learning_rate": 3.04147465437788e-07, + "loss": 1.376272439956665, + "step": 67 + }, + { + "epoch": 0.015679040811620935, + "grad_norm": 0.5016109357973636, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.3135097026824951, + "step": 68 + }, + { + "epoch": 0.015909614941203597, + "grad_norm": 0.5041882505031982, + "learning_rate": 3.133640552995391e-07, + "loss": 1.2688875198364258, + "step": 69 + }, + { + "epoch": 0.016140189070786258, + "grad_norm": 0.544108037399583, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.4380691051483154, + "step": 70 + }, + { + "epoch": 0.01637076320036892, + "grad_norm": 0.5634345795303867, + "learning_rate": 3.225806451612903e-07, + "loss": 1.319260835647583, + "step": 71 + }, + { + "epoch": 0.01660133732995158, + "grad_norm": 0.5352869486400713, + "learning_rate": 3.271889400921659e-07, + "loss": 1.4083738327026367, + "step": 72 + }, + { + "epoch": 0.01683191145953424, + "grad_norm": 0.5524091199068598, + "learning_rate": 3.317972350230415e-07, + "loss": 1.4904775619506836, + "step": 73 + }, + { + "epoch": 0.0170624855891169, + "grad_norm": 0.5488563092854116, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.4534616470336914, + "step": 74 + }, + { + "epoch": 0.01729305971869956, + "grad_norm": 0.621117268365485, + "learning_rate": 3.410138248847926e-07, + "loss": 1.6545689105987549, + "step": 75 + }, + { + "epoch": 0.017523633848282223, + "grad_norm": 0.4834761822798673, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.2267192602157593, + "step": 76 + }, + { + "epoch": 0.017754207977864884, + "grad_norm": 0.5801091305703396, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.4207227230072021, + "step": 77 + }, + { + "epoch": 0.017984782107447545, + "grad_norm": 0.5253671028782199, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.4952092170715332, + "step": 78 + }, + { + "epoch": 0.018215356237030206, + "grad_norm": 0.4832223487637491, + "learning_rate": 3.594470046082949e-07, + "loss": 1.2932121753692627, + "step": 79 + }, + { + "epoch": 0.018445930366612864, + "grad_norm": 0.5623376259320272, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.3855851888656616, + "step": 80 + }, + { + "epoch": 0.018676504496195526, + "grad_norm": 0.45682252121341854, + "learning_rate": 3.686635944700461e-07, + "loss": 1.3645650148391724, + "step": 81 + }, + { + "epoch": 0.018907078625778187, + "grad_norm": 0.49579660369860507, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.322283387184143, + "step": 82 + }, + { + "epoch": 0.01913765275536085, + "grad_norm": 0.5177315365924456, + "learning_rate": 3.778801843317972e-07, + "loss": 1.3363629579544067, + "step": 83 + }, + { + "epoch": 0.01936822688494351, + "grad_norm": 0.616201260540867, + "learning_rate": 3.824884792626728e-07, + "loss": 1.553279161453247, + "step": 84 + }, + { + "epoch": 0.01959880101452617, + "grad_norm": 0.5198473540371843, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.4434814453125, + "step": 85 + }, + { + "epoch": 0.019829375144108832, + "grad_norm": 0.5923570018189629, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.5134285688400269, + "step": 86 + }, + { + "epoch": 0.02005994927369149, + "grad_norm": 0.5850924486743854, + "learning_rate": 3.963133640552995e-07, + "loss": 1.4244651794433594, + "step": 87 + }, + { + "epoch": 0.02029052340327415, + "grad_norm": 0.560105193358992, + "learning_rate": 4.009216589861751e-07, + "loss": 1.4571855068206787, + "step": 88 + }, + { + "epoch": 0.020521097532856813, + "grad_norm": 0.48108556089196525, + "learning_rate": 4.055299539170507e-07, + "loss": 1.2940685749053955, + "step": 89 + }, + { + "epoch": 0.020751671662439474, + "grad_norm": 0.5203979535892653, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.3537572622299194, + "step": 90 + }, + { + "epoch": 0.020982245792022135, + "grad_norm": 0.5791117780548783, + "learning_rate": 4.147465437788018e-07, + "loss": 1.524500846862793, + "step": 91 + }, + { + "epoch": 0.021212819921604797, + "grad_norm": 0.4890632694429427, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.4414368867874146, + "step": 92 + }, + { + "epoch": 0.021443394051187458, + "grad_norm": 0.49954451696473423, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.284010887145996, + "step": 93 + }, + { + "epoch": 0.021673968180770116, + "grad_norm": 0.6088073736973271, + "learning_rate": 4.285714285714285e-07, + "loss": 1.5901892185211182, + "step": 94 + }, + { + "epoch": 0.021904542310352777, + "grad_norm": 0.5856129890195899, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.4408211708068848, + "step": 95 + }, + { + "epoch": 0.02213511643993544, + "grad_norm": 0.49571353442310634, + "learning_rate": 4.377880184331797e-07, + "loss": 1.2293554544448853, + "step": 96 + }, + { + "epoch": 0.0223656905695181, + "grad_norm": 0.570508723127356, + "learning_rate": 4.423963133640553e-07, + "loss": 1.4144377708435059, + "step": 97 + }, + { + "epoch": 0.02259626469910076, + "grad_norm": 0.5952794755762669, + "learning_rate": 4.4700460829493084e-07, + "loss": 1.359034776687622, + "step": 98 + }, + { + "epoch": 0.022826838828683423, + "grad_norm": 0.5878914385748992, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.3299517631530762, + "step": 99 + }, + { + "epoch": 0.023057412958266084, + "grad_norm": 0.5039341997298462, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.3072423934936523, + "step": 100 + }, + { + "epoch": 0.023287987087848742, + "grad_norm": 0.6205508042108064, + "learning_rate": 4.608294930875576e-07, + "loss": 1.5683096647262573, + "step": 101 + }, + { + "epoch": 0.023518561217431403, + "grad_norm": 0.6300075069307655, + "learning_rate": 4.654377880184331e-07, + "loss": 1.6294015645980835, + "step": 102 + }, + { + "epoch": 0.023749135347014064, + "grad_norm": 0.5245849244619794, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.424511194229126, + "step": 103 + }, + { + "epoch": 0.023979709476596726, + "grad_norm": 0.5471205081131801, + "learning_rate": 4.746543778801843e-07, + "loss": 1.4169164896011353, + "step": 104 + }, + { + "epoch": 0.024210283606179387, + "grad_norm": 0.5854813174619509, + "learning_rate": 4.792626728110599e-07, + "loss": 1.3933480978012085, + "step": 105 + }, + { + "epoch": 0.02444085773576205, + "grad_norm": 0.6166413586526565, + "learning_rate": 4.838709677419355e-07, + "loss": 1.488750696182251, + "step": 106 + }, + { + "epoch": 0.02467143186534471, + "grad_norm": 0.6052025315612124, + "learning_rate": 4.88479262672811e-07, + "loss": 1.4852150678634644, + "step": 107 + }, + { + "epoch": 0.024902005994927368, + "grad_norm": 0.5750922845804657, + "learning_rate": 4.930875576036866e-07, + "loss": 1.4256765842437744, + "step": 108 + }, + { + "epoch": 0.02513258012451003, + "grad_norm": 0.5231547313189364, + "learning_rate": 4.976958525345622e-07, + "loss": 1.3063642978668213, + "step": 109 + }, + { + "epoch": 0.02536315425409269, + "grad_norm": 0.5734263022927267, + "learning_rate": 5.023041474654378e-07, + "loss": 1.549802303314209, + "step": 110 + }, + { + "epoch": 0.02559372838367535, + "grad_norm": 0.5041709928346361, + "learning_rate": 5.069124423963134e-07, + "loss": 1.301950454711914, + "step": 111 + }, + { + "epoch": 0.025824302513258013, + "grad_norm": 0.5567596794280206, + "learning_rate": 5.11520737327189e-07, + "loss": 1.3025325536727905, + "step": 112 + }, + { + "epoch": 0.026054876642840674, + "grad_norm": 0.5369405016436734, + "learning_rate": 5.161290322580645e-07, + "loss": 1.40749192237854, + "step": 113 + }, + { + "epoch": 0.026285450772423335, + "grad_norm": 0.5208396194792263, + "learning_rate": 5.2073732718894e-07, + "loss": 1.3216793537139893, + "step": 114 + }, + { + "epoch": 0.026516024902005993, + "grad_norm": 0.5052494958784187, + "learning_rate": 5.253456221198155e-07, + "loss": 1.3189308643341064, + "step": 115 + }, + { + "epoch": 0.026746599031588655, + "grad_norm": 0.5632602249643789, + "learning_rate": 5.299539170506912e-07, + "loss": 1.430384635925293, + "step": 116 + }, + { + "epoch": 0.026977173161171316, + "grad_norm": 0.5516062364182813, + "learning_rate": 5.345622119815668e-07, + "loss": 1.4081478118896484, + "step": 117 + }, + { + "epoch": 0.027207747290753977, + "grad_norm": 0.6385508559977366, + "learning_rate": 5.391705069124423e-07, + "loss": 1.434388518333435, + "step": 118 + }, + { + "epoch": 0.02743832142033664, + "grad_norm": 0.6138756203209041, + "learning_rate": 5.437788018433179e-07, + "loss": 1.4139282703399658, + "step": 119 + }, + { + "epoch": 0.0276688955499193, + "grad_norm": 0.5683069275087388, + "learning_rate": 5.483870967741935e-07, + "loss": 1.4511487483978271, + "step": 120 + }, + { + "epoch": 0.02789946967950196, + "grad_norm": 0.6423215590072974, + "learning_rate": 5.529953917050691e-07, + "loss": 1.5713481903076172, + "step": 121 + }, + { + "epoch": 0.02813004380908462, + "grad_norm": 0.5705917499340588, + "learning_rate": 5.576036866359447e-07, + "loss": 1.4315730333328247, + "step": 122 + }, + { + "epoch": 0.02836061793866728, + "grad_norm": 0.5316898536625556, + "learning_rate": 5.622119815668203e-07, + "loss": 1.3283708095550537, + "step": 123 + }, + { + "epoch": 0.028591192068249942, + "grad_norm": 0.6184222176453401, + "learning_rate": 5.668202764976958e-07, + "loss": 1.4329016208648682, + "step": 124 + }, + { + "epoch": 0.028821766197832603, + "grad_norm": 0.5872933055537319, + "learning_rate": 5.714285714285714e-07, + "loss": 1.444648265838623, + "step": 125 + }, + { + "epoch": 0.029052340327415264, + "grad_norm": 0.5205647887621043, + "learning_rate": 5.760368663594469e-07, + "loss": 1.3584785461425781, + "step": 126 + }, + { + "epoch": 0.029282914456997926, + "grad_norm": 0.5687232002808722, + "learning_rate": 5.806451612903226e-07, + "loss": 1.2815918922424316, + "step": 127 + }, + { + "epoch": 0.029513488586580587, + "grad_norm": 0.5252774303203537, + "learning_rate": 5.852534562211982e-07, + "loss": 1.3332037925720215, + "step": 128 + }, + { + "epoch": 0.029744062716163245, + "grad_norm": 0.5694649769044726, + "learning_rate": 5.898617511520737e-07, + "loss": 1.4522390365600586, + "step": 129 + }, + { + "epoch": 0.029974636845745906, + "grad_norm": 0.5607244925516301, + "learning_rate": 5.944700460829493e-07, + "loss": 1.4362024068832397, + "step": 130 + }, + { + "epoch": 0.030205210975328568, + "grad_norm": 0.5432906779366606, + "learning_rate": 5.990783410138249e-07, + "loss": 1.3271276950836182, + "step": 131 + }, + { + "epoch": 0.03043578510491123, + "grad_norm": 0.6175056690394787, + "learning_rate": 6.036866359447004e-07, + "loss": 1.5936369895935059, + "step": 132 + }, + { + "epoch": 0.03066635923449389, + "grad_norm": 0.5887629397700789, + "learning_rate": 6.08294930875576e-07, + "loss": 1.4786381721496582, + "step": 133 + }, + { + "epoch": 0.03089693336407655, + "grad_norm": 0.5490770556101789, + "learning_rate": 6.129032258064516e-07, + "loss": 1.3499064445495605, + "step": 134 + }, + { + "epoch": 0.031127507493659213, + "grad_norm": 0.583021664079577, + "learning_rate": 6.175115207373271e-07, + "loss": 1.4434795379638672, + "step": 135 + }, + { + "epoch": 0.03135808162324187, + "grad_norm": 0.6037371306112707, + "learning_rate": 6.221198156682027e-07, + "loss": 1.4064602851867676, + "step": 136 + }, + { + "epoch": 0.03158865575282453, + "grad_norm": 0.5005511365111003, + "learning_rate": 6.267281105990782e-07, + "loss": 1.3325507640838623, + "step": 137 + }, + { + "epoch": 0.03181922988240719, + "grad_norm": 0.516984621863849, + "learning_rate": 6.313364055299539e-07, + "loss": 1.2584879398345947, + "step": 138 + }, + { + "epoch": 0.032049804011989855, + "grad_norm": 0.5401703370709408, + "learning_rate": 6.359447004608295e-07, + "loss": 1.3754582405090332, + "step": 139 + }, + { + "epoch": 0.032280378141572516, + "grad_norm": 0.5773695778497429, + "learning_rate": 6.40552995391705e-07, + "loss": 1.2700412273406982, + "step": 140 + }, + { + "epoch": 0.03251095227115518, + "grad_norm": 0.580045410672373, + "learning_rate": 6.451612903225806e-07, + "loss": 1.395858645439148, + "step": 141 + }, + { + "epoch": 0.03274152640073784, + "grad_norm": 0.6146943532430481, + "learning_rate": 6.497695852534562e-07, + "loss": 1.402890682220459, + "step": 142 + }, + { + "epoch": 0.0329721005303205, + "grad_norm": 0.5736524878471048, + "learning_rate": 6.543778801843318e-07, + "loss": 1.5405397415161133, + "step": 143 + }, + { + "epoch": 0.03320267465990316, + "grad_norm": 0.5418174501474893, + "learning_rate": 6.589861751152074e-07, + "loss": 1.2394921779632568, + "step": 144 + }, + { + "epoch": 0.03343324878948582, + "grad_norm": 0.6276742940359161, + "learning_rate": 6.63594470046083e-07, + "loss": 1.453255295753479, + "step": 145 + }, + { + "epoch": 0.03366382291906848, + "grad_norm": 0.6191808042065741, + "learning_rate": 6.682027649769585e-07, + "loss": 1.3661112785339355, + "step": 146 + }, + { + "epoch": 0.03389439704865114, + "grad_norm": 0.5260230971069313, + "learning_rate": 6.728110599078341e-07, + "loss": 1.2952282428741455, + "step": 147 + }, + { + "epoch": 0.0341249711782338, + "grad_norm": 0.6693704726704671, + "learning_rate": 6.774193548387096e-07, + "loss": 1.396565318107605, + "step": 148 + }, + { + "epoch": 0.03435554530781646, + "grad_norm": 0.5881355966882998, + "learning_rate": 6.820276497695853e-07, + "loss": 1.3207082748413086, + "step": 149 + }, + { + "epoch": 0.03458611943739912, + "grad_norm": 0.5727010424261832, + "learning_rate": 6.866359447004608e-07, + "loss": 1.4085125923156738, + "step": 150 + }, + { + "epoch": 0.034816693566981784, + "grad_norm": 0.6667208730018341, + "learning_rate": 6.912442396313363e-07, + "loss": 1.5698528289794922, + "step": 151 + }, + { + "epoch": 0.035047267696564445, + "grad_norm": 0.5847511619477141, + "learning_rate": 6.958525345622119e-07, + "loss": 1.4091004133224487, + "step": 152 + }, + { + "epoch": 0.035277841826147106, + "grad_norm": 0.5143540253572731, + "learning_rate": 7.004608294930875e-07, + "loss": 1.2392504215240479, + "step": 153 + }, + { + "epoch": 0.03550841595572977, + "grad_norm": 0.6061996419355483, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3355891704559326, + "step": 154 + }, + { + "epoch": 0.03573899008531243, + "grad_norm": 0.5654677060773288, + "learning_rate": 7.096774193548387e-07, + "loss": 1.330599308013916, + "step": 155 + }, + { + "epoch": 0.03596956421489509, + "grad_norm": 0.5625277163359125, + "learning_rate": 7.142857142857143e-07, + "loss": 1.344653844833374, + "step": 156 + }, + { + "epoch": 0.03620013834447775, + "grad_norm": 0.5693935421186345, + "learning_rate": 7.188940092165898e-07, + "loss": 1.341560959815979, + "step": 157 + }, + { + "epoch": 0.03643071247406041, + "grad_norm": 0.5761507210889462, + "learning_rate": 7.235023041474654e-07, + "loss": 1.2242077589035034, + "step": 158 + }, + { + "epoch": 0.036661286603643074, + "grad_norm": 0.61477283253827, + "learning_rate": 7.281105990783409e-07, + "loss": 1.2858202457427979, + "step": 159 + }, + { + "epoch": 0.03689186073322573, + "grad_norm": 0.6410836439864531, + "learning_rate": 7.327188940092166e-07, + "loss": 1.479524850845337, + "step": 160 + }, + { + "epoch": 0.03712243486280839, + "grad_norm": 0.5918139936623208, + "learning_rate": 7.373271889400922e-07, + "loss": 1.43915855884552, + "step": 161 + }, + { + "epoch": 0.03735300899239105, + "grad_norm": 0.6478814183526712, + "learning_rate": 7.419354838709677e-07, + "loss": 1.3939034938812256, + "step": 162 + }, + { + "epoch": 0.03758358312197371, + "grad_norm": 0.6065250961726126, + "learning_rate": 7.465437788018433e-07, + "loss": 1.2733443975448608, + "step": 163 + }, + { + "epoch": 0.037814157251556374, + "grad_norm": 0.5670760124517911, + "learning_rate": 7.511520737327189e-07, + "loss": 1.3436474800109863, + "step": 164 + }, + { + "epoch": 0.038044731381139035, + "grad_norm": 0.622037546591312, + "learning_rate": 7.557603686635944e-07, + "loss": 1.4250465631484985, + "step": 165 + }, + { + "epoch": 0.0382753055107217, + "grad_norm": 0.607298640184171, + "learning_rate": 7.603686635944701e-07, + "loss": 1.4244422912597656, + "step": 166 + }, + { + "epoch": 0.03850587964030436, + "grad_norm": 0.6986289389542176, + "learning_rate": 7.649769585253457e-07, + "loss": 1.5487544536590576, + "step": 167 + }, + { + "epoch": 0.03873645376988702, + "grad_norm": 0.5793907792629099, + "learning_rate": 7.695852534562211e-07, + "loss": 1.3282281160354614, + "step": 168 + }, + { + "epoch": 0.03896702789946968, + "grad_norm": 0.5428953608010194, + "learning_rate": 7.741935483870967e-07, + "loss": 1.2823774814605713, + "step": 169 + }, + { + "epoch": 0.03919760202905234, + "grad_norm": 0.5889853233557574, + "learning_rate": 7.788018433179722e-07, + "loss": 1.2402329444885254, + "step": 170 + }, + { + "epoch": 0.039428176158635, + "grad_norm": 0.6219537569729359, + "learning_rate": 7.834101382488479e-07, + "loss": 1.3755587339401245, + "step": 171 + }, + { + "epoch": 0.039658750288217665, + "grad_norm": 0.5509851701904478, + "learning_rate": 7.880184331797235e-07, + "loss": 1.3403921127319336, + "step": 172 + }, + { + "epoch": 0.039889324417800326, + "grad_norm": 0.5971512014225002, + "learning_rate": 7.92626728110599e-07, + "loss": 1.3742129802703857, + "step": 173 + }, + { + "epoch": 0.04011989854738298, + "grad_norm": 0.7068161569826883, + "learning_rate": 7.972350230414746e-07, + "loss": 1.6444599628448486, + "step": 174 + }, + { + "epoch": 0.04035047267696564, + "grad_norm": 0.6019721571978455, + "learning_rate": 8.018433179723502e-07, + "loss": 1.3891929388046265, + "step": 175 + }, + { + "epoch": 0.0405810468065483, + "grad_norm": 0.5520157347061957, + "learning_rate": 8.064516129032257e-07, + "loss": 1.2279409170150757, + "step": 176 + }, + { + "epoch": 0.040811620936130964, + "grad_norm": 0.6346481492269727, + "learning_rate": 8.110599078341014e-07, + "loss": 1.4576997756958008, + "step": 177 + }, + { + "epoch": 0.041042195065713626, + "grad_norm": 0.612489332435889, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3585199117660522, + "step": 178 + }, + { + "epoch": 0.04127276919529629, + "grad_norm": 0.5908354773562909, + "learning_rate": 8.202764976958525e-07, + "loss": 1.3056905269622803, + "step": 179 + }, + { + "epoch": 0.04150334332487895, + "grad_norm": 0.5749600887070265, + "learning_rate": 8.248847926267281e-07, + "loss": 1.3029698133468628, + "step": 180 + }, + { + "epoch": 0.04173391745446161, + "grad_norm": 0.6598409427706357, + "learning_rate": 8.294930875576036e-07, + "loss": 1.4368736743927002, + "step": 181 + }, + { + "epoch": 0.04196449158404427, + "grad_norm": 0.5781034108869284, + "learning_rate": 8.341013824884793e-07, + "loss": 1.3243422508239746, + "step": 182 + }, + { + "epoch": 0.04219506571362693, + "grad_norm": 0.5206395827762466, + "learning_rate": 8.387096774193549e-07, + "loss": 1.232081413269043, + "step": 183 + }, + { + "epoch": 0.042425639843209594, + "grad_norm": 0.656527379150416, + "learning_rate": 8.433179723502303e-07, + "loss": 1.4601390361785889, + "step": 184 + }, + { + "epoch": 0.042656213972792255, + "grad_norm": 0.7159376690159417, + "learning_rate": 8.479262672811059e-07, + "loss": 1.3778860569000244, + "step": 185 + }, + { + "epoch": 0.042886788102374916, + "grad_norm": 0.590059263278645, + "learning_rate": 8.525345622119815e-07, + "loss": 1.3235092163085938, + "step": 186 + }, + { + "epoch": 0.04311736223195758, + "grad_norm": 0.6886704124574455, + "learning_rate": 8.57142857142857e-07, + "loss": 1.4480581283569336, + "step": 187 + }, + { + "epoch": 0.04334793636154023, + "grad_norm": 0.6346582437238362, + "learning_rate": 8.617511520737327e-07, + "loss": 1.4530816078186035, + "step": 188 + }, + { + "epoch": 0.04357851049112289, + "grad_norm": 0.6767670706852607, + "learning_rate": 8.663594470046083e-07, + "loss": 1.4447407722473145, + "step": 189 + }, + { + "epoch": 0.043809084620705555, + "grad_norm": 0.6049885392306779, + "learning_rate": 8.709677419354838e-07, + "loss": 1.3610244989395142, + "step": 190 + }, + { + "epoch": 0.044039658750288216, + "grad_norm": 0.6415008170468611, + "learning_rate": 8.755760368663594e-07, + "loss": 1.4084277153015137, + "step": 191 + }, + { + "epoch": 0.04427023287987088, + "grad_norm": 0.579530872526008, + "learning_rate": 8.801843317972349e-07, + "loss": 1.3652758598327637, + "step": 192 + }, + { + "epoch": 0.04450080700945354, + "grad_norm": 0.7106489880805067, + "learning_rate": 8.847926267281106e-07, + "loss": 1.4791496992111206, + "step": 193 + }, + { + "epoch": 0.0447313811390362, + "grad_norm": 0.6211187249917176, + "learning_rate": 8.894009216589862e-07, + "loss": 1.3958008289337158, + "step": 194 + }, + { + "epoch": 0.04496195526861886, + "grad_norm": 0.700016972508283, + "learning_rate": 8.940092165898617e-07, + "loss": 1.4134410619735718, + "step": 195 + }, + { + "epoch": 0.04519252939820152, + "grad_norm": 0.6911089974612981, + "learning_rate": 8.986175115207373e-07, + "loss": 1.4062776565551758, + "step": 196 + }, + { + "epoch": 0.045423103527784184, + "grad_norm": 0.6823334536756955, + "learning_rate": 9.032258064516129e-07, + "loss": 1.375224232673645, + "step": 197 + }, + { + "epoch": 0.045653677657366845, + "grad_norm": 0.6003343488972004, + "learning_rate": 9.078341013824884e-07, + "loss": 1.2440606355667114, + "step": 198 + }, + { + "epoch": 0.045884251786949506, + "grad_norm": 0.6737684280449967, + "learning_rate": 9.124423963133641e-07, + "loss": 1.4068349599838257, + "step": 199 + }, + { + "epoch": 0.04611482591653217, + "grad_norm": 0.6181499859340271, + "learning_rate": 9.170506912442397e-07, + "loss": 1.3797581195831299, + "step": 200 + }, + { + "epoch": 0.04634540004611483, + "grad_norm": 0.6445170966825345, + "learning_rate": 9.216589861751152e-07, + "loss": 1.4441678524017334, + "step": 201 + }, + { + "epoch": 0.046575974175697483, + "grad_norm": 0.6677276378953197, + "learning_rate": 9.262672811059907e-07, + "loss": 1.4727370738983154, + "step": 202 + }, + { + "epoch": 0.046806548305280145, + "grad_norm": 0.7032332117559357, + "learning_rate": 9.308755760368662e-07, + "loss": 1.448495864868164, + "step": 203 + }, + { + "epoch": 0.047037122434862806, + "grad_norm": 0.674429398641426, + "learning_rate": 9.354838709677418e-07, + "loss": 1.3727293014526367, + "step": 204 + }, + { + "epoch": 0.04726769656444547, + "grad_norm": 0.6701259318687961, + "learning_rate": 9.400921658986175e-07, + "loss": 1.4234352111816406, + "step": 205 + }, + { + "epoch": 0.04749827069402813, + "grad_norm": 0.5974678653003657, + "learning_rate": 9.44700460829493e-07, + "loss": 1.2407056093215942, + "step": 206 + }, + { + "epoch": 0.04772884482361079, + "grad_norm": 0.672276356974357, + "learning_rate": 9.493087557603686e-07, + "loss": 1.3502311706542969, + "step": 207 + }, + { + "epoch": 0.04795941895319345, + "grad_norm": 0.7465400676066979, + "learning_rate": 9.539170506912442e-07, + "loss": 1.4618254899978638, + "step": 208 + }, + { + "epoch": 0.04818999308277611, + "grad_norm": 0.681303163705478, + "learning_rate": 9.585253456221198e-07, + "loss": 1.3624317646026611, + "step": 209 + }, + { + "epoch": 0.048420567212358774, + "grad_norm": 0.7608712138693399, + "learning_rate": 9.631336405529954e-07, + "loss": 1.512046456336975, + "step": 210 + }, + { + "epoch": 0.048651141341941435, + "grad_norm": 0.6018077766578277, + "learning_rate": 9.67741935483871e-07, + "loss": 1.2896164655685425, + "step": 211 + }, + { + "epoch": 0.0488817154715241, + "grad_norm": 0.7063578249182565, + "learning_rate": 9.723502304147466e-07, + "loss": 1.5507850646972656, + "step": 212 + }, + { + "epoch": 0.04911228960110676, + "grad_norm": 0.7081498572564182, + "learning_rate": 9.76958525345622e-07, + "loss": 1.425408124923706, + "step": 213 + }, + { + "epoch": 0.04934286373068942, + "grad_norm": 0.7025877080602252, + "learning_rate": 9.815668202764976e-07, + "loss": 1.347771406173706, + "step": 214 + }, + { + "epoch": 0.04957343786027208, + "grad_norm": 0.7201983919068122, + "learning_rate": 9.861751152073732e-07, + "loss": 1.4044904708862305, + "step": 215 + }, + { + "epoch": 0.049804011989854735, + "grad_norm": 0.7045020078596302, + "learning_rate": 9.907834101382488e-07, + "loss": 1.3507332801818848, + "step": 216 + }, + { + "epoch": 0.050034586119437396, + "grad_norm": 0.6820424993070572, + "learning_rate": 9.953917050691244e-07, + "loss": 1.3022946119308472, + "step": 217 + }, + { + "epoch": 0.05026516024902006, + "grad_norm": 0.6561516180690095, + "learning_rate": 1e-06, + "loss": 1.284754991531372, + "step": 218 + }, + { + "epoch": 0.05049573437860272, + "grad_norm": 0.6003085662526402, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.2985923290252686, + "step": 219 + }, + { + "epoch": 0.05072630850818538, + "grad_norm": 0.6214608767923379, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.3855717182159424, + "step": 220 + }, + { + "epoch": 0.05095688263776804, + "grad_norm": 0.675694738994849, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.357919692993164, + "step": 221 + }, + { + "epoch": 0.0511874567673507, + "grad_norm": 0.6736529895786637, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.2818949222564697, + "step": 222 + }, + { + "epoch": 0.051418030896933364, + "grad_norm": 0.6226203332882617, + "learning_rate": 1.023041474654378e-06, + "loss": 1.2488511800765991, + "step": 223 + }, + { + "epoch": 0.051648605026516026, + "grad_norm": 0.7420146271711324, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.3824148178100586, + "step": 224 + }, + { + "epoch": 0.05187917915609869, + "grad_norm": 0.6473939851836901, + "learning_rate": 1.032258064516129e-06, + "loss": 1.3114633560180664, + "step": 225 + }, + { + "epoch": 0.05210975328568135, + "grad_norm": 0.6372141360329365, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.272273063659668, + "step": 226 + }, + { + "epoch": 0.05234032741526401, + "grad_norm": 0.8216490037105428, + "learning_rate": 1.04147465437788e-06, + "loss": 1.5072649717330933, + "step": 227 + }, + { + "epoch": 0.05257090154484667, + "grad_norm": 0.7183581578734374, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.4087142944335938, + "step": 228 + }, + { + "epoch": 0.05280147567442933, + "grad_norm": 0.8332625481322393, + "learning_rate": 1.050691244239631e-06, + "loss": 1.4866605997085571, + "step": 229 + }, + { + "epoch": 0.05303204980401199, + "grad_norm": 0.6315632875144884, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.3377184867858887, + "step": 230 + }, + { + "epoch": 0.05326262393359465, + "grad_norm": 0.6695801561741619, + "learning_rate": 1.0599078341013825e-06, + "loss": 1.4009103775024414, + "step": 231 + }, + { + "epoch": 0.05349319806317731, + "grad_norm": 0.7832755910275336, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.4878556728363037, + "step": 232 + }, + { + "epoch": 0.05372377219275997, + "grad_norm": 0.7218421394327601, + "learning_rate": 1.0691244239631337e-06, + "loss": 1.4002021551132202, + "step": 233 + }, + { + "epoch": 0.05395434632234263, + "grad_norm": 0.6918832056192313, + "learning_rate": 1.073732718894009e-06, + "loss": 1.337146520614624, + "step": 234 + }, + { + "epoch": 0.05418492045192529, + "grad_norm": 0.7101215642172168, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.4084792137145996, + "step": 235 + }, + { + "epoch": 0.054415494581507955, + "grad_norm": 0.8413614642264606, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.4131449460983276, + "step": 236 + }, + { + "epoch": 0.054646068711090616, + "grad_norm": 0.6587637953772119, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.1869292259216309, + "step": 237 + }, + { + "epoch": 0.05487664284067328, + "grad_norm": 0.7608337119634553, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.3970961570739746, + "step": 238 + }, + { + "epoch": 0.05510721697025594, + "grad_norm": 0.7677503323555195, + "learning_rate": 1.096774193548387e-06, + "loss": 1.2682442665100098, + "step": 239 + }, + { + "epoch": 0.0553377910998386, + "grad_norm": 0.6546621813731868, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2983934879302979, + "step": 240 + }, + { + "epoch": 0.05556836522942126, + "grad_norm": 0.7451544478647047, + "learning_rate": 1.1059907834101382e-06, + "loss": 1.3980869054794312, + "step": 241 + }, + { + "epoch": 0.05579893935900392, + "grad_norm": 0.6116475273591584, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.3068631887435913, + "step": 242 + }, + { + "epoch": 0.056029513488586584, + "grad_norm": 0.7974654782353883, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.5353353023529053, + "step": 243 + }, + { + "epoch": 0.05626008761816924, + "grad_norm": 0.663054900024182, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.290163278579712, + "step": 244 + }, + { + "epoch": 0.0564906617477519, + "grad_norm": 0.6761997400626832, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.3671848773956299, + "step": 245 + }, + { + "epoch": 0.05672123587733456, + "grad_norm": 0.6294209937786865, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.3020408153533936, + "step": 246 + }, + { + "epoch": 0.05695181000691722, + "grad_norm": 0.7207247726421506, + "learning_rate": 1.1336405529953916e-06, + "loss": 1.3159775733947754, + "step": 247 + }, + { + "epoch": 0.057182384136499884, + "grad_norm": 0.6708051542823367, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.3163995742797852, + "step": 248 + }, + { + "epoch": 0.057412958266082545, + "grad_norm": 0.8019994049858626, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.5215930938720703, + "step": 249 + }, + { + "epoch": 0.057643532395665206, + "grad_norm": 0.6559479072990889, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.2870161533355713, + "step": 250 + }, + { + "epoch": 0.05787410652524787, + "grad_norm": 0.7147869966218979, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.2624198198318481, + "step": 251 + }, + { + "epoch": 0.05810468065483053, + "grad_norm": 0.7319832858668294, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.2778981924057007, + "step": 252 + }, + { + "epoch": 0.05833525478441319, + "grad_norm": 0.6564800467165074, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.1934442520141602, + "step": 253 + }, + { + "epoch": 0.05856582891399585, + "grad_norm": 0.7291335446235057, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.3840088844299316, + "step": 254 + }, + { + "epoch": 0.05879640304357851, + "grad_norm": 0.7017610521536986, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.373002290725708, + "step": 255 + }, + { + "epoch": 0.059026977173161174, + "grad_norm": 0.6853330554611681, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.3614685535430908, + "step": 256 + }, + { + "epoch": 0.059257551302743836, + "grad_norm": 0.7170055632885292, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.3525335788726807, + "step": 257 + }, + { + "epoch": 0.05948812543232649, + "grad_norm": 0.7471586447698318, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.3806469440460205, + "step": 258 + }, + { + "epoch": 0.05971869956190915, + "grad_norm": 0.7262354481718393, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.372736930847168, + "step": 259 + }, + { + "epoch": 0.05994927369149181, + "grad_norm": 0.7470794959515278, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.309061050415039, + "step": 260 + }, + { + "epoch": 0.060179847821074474, + "grad_norm": 0.7217295951903909, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.3500525951385498, + "step": 261 + }, + { + "epoch": 0.060410421950657135, + "grad_norm": 0.7498906773328822, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.4197357892990112, + "step": 262 + }, + { + "epoch": 0.0606409960802398, + "grad_norm": 0.9553336191863615, + "learning_rate": 1.207373271889401e-06, + "loss": 1.6454131603240967, + "step": 263 + }, + { + "epoch": 0.06087157020982246, + "grad_norm": 0.7361372249879211, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.269604206085205, + "step": 264 + }, + { + "epoch": 0.06110214433940512, + "grad_norm": 0.6596823046141973, + "learning_rate": 1.216589861751152e-06, + "loss": 1.2358057498931885, + "step": 265 + }, + { + "epoch": 0.06133271846898778, + "grad_norm": 0.7203751630823346, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.2713422775268555, + "step": 266 + }, + { + "epoch": 0.06156329259857044, + "grad_norm": 0.7033446179657081, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.225820779800415, + "step": 267 + }, + { + "epoch": 0.0617938667281531, + "grad_norm": 0.6900817599997362, + "learning_rate": 1.2304147465437787e-06, + "loss": 1.279617190361023, + "step": 268 + }, + { + "epoch": 0.062024440857735764, + "grad_norm": 0.6800159728233099, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.2081385850906372, + "step": 269 + }, + { + "epoch": 0.062255014987318426, + "grad_norm": 0.7378639399050563, + "learning_rate": 1.23963133640553e-06, + "loss": 1.3121249675750732, + "step": 270 + }, + { + "epoch": 0.06248558911690109, + "grad_norm": 0.7497904685097676, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.28495454788208, + "step": 271 + }, + { + "epoch": 0.06271616324648374, + "grad_norm": 0.7749777957183016, + "learning_rate": 1.248847926267281e-06, + "loss": 1.3837053775787354, + "step": 272 + }, + { + "epoch": 0.0629467373760664, + "grad_norm": 0.7210838772374344, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.2119230031967163, + "step": 273 + }, + { + "epoch": 0.06317731150564906, + "grad_norm": 0.7143072591295863, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.323190450668335, + "step": 274 + }, + { + "epoch": 0.06340788563523173, + "grad_norm": 0.7546501032980093, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.4300715923309326, + "step": 275 + }, + { + "epoch": 0.06363845976481439, + "grad_norm": 0.7154461007442852, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.1680996417999268, + "step": 276 + }, + { + "epoch": 0.06386903389439705, + "grad_norm": 0.8088364505140268, + "learning_rate": 1.271889400921659e-06, + "loss": 1.3980211019515991, + "step": 277 + }, + { + "epoch": 0.06409960802397971, + "grad_norm": 0.7801914373505492, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.40798020362854, + "step": 278 + }, + { + "epoch": 0.06433018215356237, + "grad_norm": 0.7237186405433459, + "learning_rate": 1.28110599078341e-06, + "loss": 1.2535033226013184, + "step": 279 + }, + { + "epoch": 0.06456075628314503, + "grad_norm": 0.7779219570683336, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.3866907358169556, + "step": 280 + }, + { + "epoch": 0.0647913304127277, + "grad_norm": 0.7036374523288562, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.1985647678375244, + "step": 281 + }, + { + "epoch": 0.06502190454231035, + "grad_norm": 0.8186126171093759, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.3741936683654785, + "step": 282 + }, + { + "epoch": 0.06525247867189302, + "grad_norm": 0.7795060457073558, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.3684422969818115, + "step": 283 + }, + { + "epoch": 0.06548305280147568, + "grad_norm": 0.7685811594695469, + "learning_rate": 1.304147465437788e-06, + "loss": 1.3792086839675903, + "step": 284 + }, + { + "epoch": 0.06571362693105834, + "grad_norm": 0.8541112738893439, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.3252873420715332, + "step": 285 + }, + { + "epoch": 0.065944201060641, + "grad_norm": 0.7272989570317888, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.1918525695800781, + "step": 286 + }, + { + "epoch": 0.06617477519022366, + "grad_norm": 0.8825171015262823, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.3760654926300049, + "step": 287 + }, + { + "epoch": 0.06640534931980632, + "grad_norm": 0.8100539272477522, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.3452839851379395, + "step": 288 + }, + { + "epoch": 0.06663592344938898, + "grad_norm": 0.7635396360128843, + "learning_rate": 1.327188940092166e-06, + "loss": 1.321220874786377, + "step": 289 + }, + { + "epoch": 0.06686649757897165, + "grad_norm": 0.724002123288283, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.222012996673584, + "step": 290 + }, + { + "epoch": 0.0670970717085543, + "grad_norm": 0.7939713970528558, + "learning_rate": 1.336405529953917e-06, + "loss": 1.3209044933319092, + "step": 291 + }, + { + "epoch": 0.06732764583813695, + "grad_norm": 0.834643855588948, + "learning_rate": 1.3410138248847927e-06, + "loss": 1.3250432014465332, + "step": 292 + }, + { + "epoch": 0.06755821996771962, + "grad_norm": 0.6522445861220314, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.1738805770874023, + "step": 293 + }, + { + "epoch": 0.06778879409730228, + "grad_norm": 0.7430324759377445, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.238675832748413, + "step": 294 + }, + { + "epoch": 0.06801936822688494, + "grad_norm": 0.6872443402637277, + "learning_rate": 1.354838709677419e-06, + "loss": 1.2162814140319824, + "step": 295 + }, + { + "epoch": 0.0682499423564676, + "grad_norm": 0.7451321254668013, + "learning_rate": 1.359447004608295e-06, + "loss": 1.2087210416793823, + "step": 296 + }, + { + "epoch": 0.06848051648605026, + "grad_norm": 0.7183129418570579, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.2657420635223389, + "step": 297 + }, + { + "epoch": 0.06871109061563292, + "grad_norm": 0.8828866176671843, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.496249794960022, + "step": 298 + }, + { + "epoch": 0.06894166474521558, + "grad_norm": 0.7852198432087445, + "learning_rate": 1.3732718894009217e-06, + "loss": 1.2698930501937866, + "step": 299 + }, + { + "epoch": 0.06917223887479824, + "grad_norm": 0.723866375282328, + "learning_rate": 1.377880184331797e-06, + "loss": 1.2088165283203125, + "step": 300 + }, + { + "epoch": 0.0694028130043809, + "grad_norm": 0.764377981893855, + "learning_rate": 1.3824884792626727e-06, + "loss": 1.392000436782837, + "step": 301 + }, + { + "epoch": 0.06963338713396357, + "grad_norm": 0.7252481501169622, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.366544485092163, + "step": 302 + }, + { + "epoch": 0.06986396126354623, + "grad_norm": 0.7900814443800929, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.3276031017303467, + "step": 303 + }, + { + "epoch": 0.07009453539312889, + "grad_norm": 0.7000339586583599, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1413768529891968, + "step": 304 + }, + { + "epoch": 0.07032510952271155, + "grad_norm": 0.7903483195817192, + "learning_rate": 1.400921658986175e-06, + "loss": 1.2958520650863647, + "step": 305 + }, + { + "epoch": 0.07055568365229421, + "grad_norm": 0.7651988170590107, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.3514549732208252, + "step": 306 + }, + { + "epoch": 0.07078625778187687, + "grad_norm": 0.767117117462576, + "learning_rate": 1.410138248847926e-06, + "loss": 1.332120418548584, + "step": 307 + }, + { + "epoch": 0.07101683191145954, + "grad_norm": 0.8380945550826328, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.282820463180542, + "step": 308 + }, + { + "epoch": 0.0712474060410422, + "grad_norm": 0.7478573370757386, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.3927665948867798, + "step": 309 + }, + { + "epoch": 0.07147798017062486, + "grad_norm": 0.7471336867744233, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.2459386587142944, + "step": 310 + }, + { + "epoch": 0.07170855430020752, + "grad_norm": 0.715680538211599, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.1996700763702393, + "step": 311 + }, + { + "epoch": 0.07193912842979018, + "grad_norm": 0.7466366577926873, + "learning_rate": 1.433179723502304e-06, + "loss": 1.1007883548736572, + "step": 312 + }, + { + "epoch": 0.07216970255937284, + "grad_norm": 0.6505103448142013, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.211327314376831, + "step": 313 + }, + { + "epoch": 0.0724002766889555, + "grad_norm": 0.7475198907178121, + "learning_rate": 1.4423963133640554e-06, + "loss": 1.314349889755249, + "step": 314 + }, + { + "epoch": 0.07263085081853816, + "grad_norm": 0.7782372886671983, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.2270662784576416, + "step": 315 + }, + { + "epoch": 0.07286142494812083, + "grad_norm": 0.7521500862086049, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.1802537441253662, + "step": 316 + }, + { + "epoch": 0.07309199907770349, + "grad_norm": 0.7684137773026678, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.275806188583374, + "step": 317 + }, + { + "epoch": 0.07332257320728615, + "grad_norm": 0.789590997753613, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.2713148593902588, + "step": 318 + }, + { + "epoch": 0.07355314733686881, + "grad_norm": 0.8345280857312554, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.3091093301773071, + "step": 319 + }, + { + "epoch": 0.07378372146645146, + "grad_norm": 0.7108154017524825, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.1274672746658325, + "step": 320 + }, + { + "epoch": 0.07401429559603412, + "grad_norm": 0.7137227522476419, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.236955165863037, + "step": 321 + }, + { + "epoch": 0.07424486972561678, + "grad_norm": 0.7825967305477171, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.2561366558074951, + "step": 322 + }, + { + "epoch": 0.07447544385519944, + "grad_norm": 0.7250730413423113, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.1229519844055176, + "step": 323 + }, + { + "epoch": 0.0747060179847821, + "grad_norm": 0.7688658143017724, + "learning_rate": 1.4884792626728112e-06, + "loss": 1.200115442276001, + "step": 324 + }, + { + "epoch": 0.07493659211436476, + "grad_norm": 0.7499295220603182, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1930850744247437, + "step": 325 + }, + { + "epoch": 0.07516716624394743, + "grad_norm": 0.8209913282027874, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.3204331398010254, + "step": 326 + }, + { + "epoch": 0.07539774037353009, + "grad_norm": 0.7429612395335268, + "learning_rate": 1.5023041474654377e-06, + "loss": 1.109247088432312, + "step": 327 + }, + { + "epoch": 0.07562831450311275, + "grad_norm": 0.7097388789784923, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.1239254474639893, + "step": 328 + }, + { + "epoch": 0.07585888863269541, + "grad_norm": 0.7867677832004493, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.22686767578125, + "step": 329 + }, + { + "epoch": 0.07608946276227807, + "grad_norm": 0.8425243281826544, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.2846856117248535, + "step": 330 + }, + { + "epoch": 0.07632003689186073, + "grad_norm": 0.7611030204070008, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.1720764636993408, + "step": 331 + }, + { + "epoch": 0.0765506110214434, + "grad_norm": 0.6783089545901869, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.05867338180542, + "step": 332 + }, + { + "epoch": 0.07678118515102605, + "grad_norm": 0.781197296597327, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.2652220726013184, + "step": 333 + }, + { + "epoch": 0.07701175928060872, + "grad_norm": 0.7674267376615101, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.1367218494415283, + "step": 334 + }, + { + "epoch": 0.07724233341019138, + "grad_norm": 0.7149265599125916, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.169439673423767, + "step": 335 + }, + { + "epoch": 0.07747290753977404, + "grad_norm": 0.8284832797024527, + "learning_rate": 1.543778801843318e-06, + "loss": 1.265104055404663, + "step": 336 + }, + { + "epoch": 0.0777034816693567, + "grad_norm": 0.6605498491920537, + "learning_rate": 1.5483870967741935e-06, + "loss": 1.059098243713379, + "step": 337 + }, + { + "epoch": 0.07793405579893936, + "grad_norm": 0.8255024678570093, + "learning_rate": 1.552995391705069e-06, + "loss": 1.0998419523239136, + "step": 338 + }, + { + "epoch": 0.07816462992852202, + "grad_norm": 0.8285993940213782, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.1361349821090698, + "step": 339 + }, + { + "epoch": 0.07839520405810468, + "grad_norm": 0.7677612111698353, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.1051890850067139, + "step": 340 + }, + { + "epoch": 0.07862577818768735, + "grad_norm": 0.8204078401725609, + "learning_rate": 1.5668202764976959e-06, + "loss": 1.1675043106079102, + "step": 341 + }, + { + "epoch": 0.07885635231727, + "grad_norm": 0.8428908363907526, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.180741786956787, + "step": 342 + }, + { + "epoch": 0.07908692644685267, + "grad_norm": 0.8559354133772745, + "learning_rate": 1.576036866359447e-06, + "loss": 1.241147518157959, + "step": 343 + }, + { + "epoch": 0.07931750057643533, + "grad_norm": 0.848204694935563, + "learning_rate": 1.5806451612903224e-06, + "loss": 1.2831401824951172, + "step": 344 + }, + { + "epoch": 0.07954807470601799, + "grad_norm": 0.7281233645086155, + "learning_rate": 1.585253456221198e-06, + "loss": 1.2328094244003296, + "step": 345 + }, + { + "epoch": 0.07977864883560065, + "grad_norm": 0.7932743453051899, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.296494960784912, + "step": 346 + }, + { + "epoch": 0.08000922296518331, + "grad_norm": 0.7368517201206619, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.1802153587341309, + "step": 347 + }, + { + "epoch": 0.08023979709476596, + "grad_norm": 0.8829436639082808, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.2387690544128418, + "step": 348 + }, + { + "epoch": 0.08047037122434862, + "grad_norm": 0.8002618721063425, + "learning_rate": 1.6036866359447004e-06, + "loss": 1.1307916641235352, + "step": 349 + }, + { + "epoch": 0.08070094535393128, + "grad_norm": 0.8185303488247757, + "learning_rate": 1.608294930875576e-06, + "loss": 1.117497444152832, + "step": 350 + }, + { + "epoch": 0.08093151948351394, + "grad_norm": 0.7524331692605707, + "learning_rate": 1.6129032258064514e-06, + "loss": 1.1360805034637451, + "step": 351 + }, + { + "epoch": 0.0811620936130966, + "grad_norm": 0.7626049955851422, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.1756231784820557, + "step": 352 + }, + { + "epoch": 0.08139266774267927, + "grad_norm": 0.7605864356179197, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.0260417461395264, + "step": 353 + }, + { + "epoch": 0.08162324187226193, + "grad_norm": 0.6949706544727091, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.0863536596298218, + "step": 354 + }, + { + "epoch": 0.08185381600184459, + "grad_norm": 0.7427032746567218, + "learning_rate": 1.631336405529954e-06, + "loss": 1.0529779195785522, + "step": 355 + }, + { + "epoch": 0.08208439013142725, + "grad_norm": 0.7626426518406405, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.0374994277954102, + "step": 356 + }, + { + "epoch": 0.08231496426100991, + "grad_norm": 0.7762352327056515, + "learning_rate": 1.640552995391705e-06, + "loss": 1.153419017791748, + "step": 357 + }, + { + "epoch": 0.08254553839059257, + "grad_norm": 0.7455681546697154, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.0155376195907593, + "step": 358 + }, + { + "epoch": 0.08277611252017524, + "grad_norm": 0.779838920397346, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1288530826568604, + "step": 359 + }, + { + "epoch": 0.0830066866497579, + "grad_norm": 0.8920666311969824, + "learning_rate": 1.6543778801843317e-06, + "loss": 1.1493456363677979, + "step": 360 + }, + { + "epoch": 0.08323726077934056, + "grad_norm": 0.8383114858680324, + "learning_rate": 1.6589861751152071e-06, + "loss": 1.1064895391464233, + "step": 361 + }, + { + "epoch": 0.08346783490892322, + "grad_norm": 0.752156167882629, + "learning_rate": 1.663594470046083e-06, + "loss": 1.0102828741073608, + "step": 362 + }, + { + "epoch": 0.08369840903850588, + "grad_norm": 0.8341451005387022, + "learning_rate": 1.6682027649769585e-06, + "loss": 1.0750138759613037, + "step": 363 + }, + { + "epoch": 0.08392898316808854, + "grad_norm": 0.8504953523340792, + "learning_rate": 1.672811059907834e-06, + "loss": 1.1611195802688599, + "step": 364 + }, + { + "epoch": 0.0841595572976712, + "grad_norm": 0.8228646683486963, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.2799829244613647, + "step": 365 + }, + { + "epoch": 0.08439013142725386, + "grad_norm": 0.9626273899315478, + "learning_rate": 1.682027649769585e-06, + "loss": 1.2427947521209717, + "step": 366 + }, + { + "epoch": 0.08462070555683653, + "grad_norm": 0.724553415716276, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0379959344863892, + "step": 367 + }, + { + "epoch": 0.08485127968641919, + "grad_norm": 0.7173602639018404, + "learning_rate": 1.6912442396313363e-06, + "loss": 0.8439304828643799, + "step": 368 + }, + { + "epoch": 0.08508185381600185, + "grad_norm": 0.8477542480910312, + "learning_rate": 1.6958525345622119e-06, + "loss": 1.1249288320541382, + "step": 369 + }, + { + "epoch": 0.08531242794558451, + "grad_norm": 0.8715705993798011, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.186207890510559, + "step": 370 + }, + { + "epoch": 0.08554300207516717, + "grad_norm": 0.9990300341847143, + "learning_rate": 1.705069124423963e-06, + "loss": 1.1181306838989258, + "step": 371 + }, + { + "epoch": 0.08577357620474983, + "grad_norm": 0.8792678686182055, + "learning_rate": 1.7096774193548387e-06, + "loss": 0.9828017950057983, + "step": 372 + }, + { + "epoch": 0.0860041503343325, + "grad_norm": 0.7710250186072433, + "learning_rate": 1.714285714285714e-06, + "loss": 1.1158804893493652, + "step": 373 + }, + { + "epoch": 0.08623472446391516, + "grad_norm": 0.9602707019706166, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.1771481037139893, + "step": 374 + }, + { + "epoch": 0.08646529859349782, + "grad_norm": 0.8137176951163696, + "learning_rate": 1.7235023041474655e-06, + "loss": 1.1378540992736816, + "step": 375 + }, + { + "epoch": 0.08669587272308046, + "grad_norm": 0.819557644912057, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.2011152505874634, + "step": 376 + }, + { + "epoch": 0.08692644685266313, + "grad_norm": 0.8779923853134601, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0932848453521729, + "step": 377 + }, + { + "epoch": 0.08715702098224579, + "grad_norm": 0.7579888078286682, + "learning_rate": 1.737327188940092e-06, + "loss": 1.0530626773834229, + "step": 378 + }, + { + "epoch": 0.08738759511182845, + "grad_norm": 0.8123881302713649, + "learning_rate": 1.7419354838709676e-06, + "loss": 1.09238600730896, + "step": 379 + }, + { + "epoch": 0.08761816924141111, + "grad_norm": 0.8179032370650432, + "learning_rate": 1.7465437788018434e-06, + "loss": 1.10097336769104, + "step": 380 + }, + { + "epoch": 0.08784874337099377, + "grad_norm": 0.9066182701404021, + "learning_rate": 1.7511520737327188e-06, + "loss": 1.1483392715454102, + "step": 381 + }, + { + "epoch": 0.08807931750057643, + "grad_norm": 0.7929757896387074, + "learning_rate": 1.7557603686635944e-06, + "loss": 0.9776606559753418, + "step": 382 + }, + { + "epoch": 0.08830989163015909, + "grad_norm": 0.7070713392242878, + "learning_rate": 1.7603686635944698e-06, + "loss": 0.9363219738006592, + "step": 383 + }, + { + "epoch": 0.08854046575974175, + "grad_norm": 0.8829017901239412, + "learning_rate": 1.7649769585253456e-06, + "loss": 1.1259841918945312, + "step": 384 + }, + { + "epoch": 0.08877103988932442, + "grad_norm": 0.8379913612296851, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.0652339458465576, + "step": 385 + }, + { + "epoch": 0.08900161401890708, + "grad_norm": 0.9016264696692738, + "learning_rate": 1.7741935483870966e-06, + "loss": 1.1088197231292725, + "step": 386 + }, + { + "epoch": 0.08923218814848974, + "grad_norm": 0.8434226175443441, + "learning_rate": 1.7788018433179724e-06, + "loss": 1.0171717405319214, + "step": 387 + }, + { + "epoch": 0.0894627622780724, + "grad_norm": 0.893116506697827, + "learning_rate": 1.7834101382488478e-06, + "loss": 1.0391405820846558, + "step": 388 + }, + { + "epoch": 0.08969333640765506, + "grad_norm": 0.9558704899064524, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9970325231552124, + "step": 389 + }, + { + "epoch": 0.08992391053723772, + "grad_norm": 0.8304308575964876, + "learning_rate": 1.792626728110599e-06, + "loss": 1.1427147388458252, + "step": 390 + }, + { + "epoch": 0.09015448466682038, + "grad_norm": 0.8319398781501527, + "learning_rate": 1.7972350230414746e-06, + "loss": 0.8830767273902893, + "step": 391 + }, + { + "epoch": 0.09038505879640304, + "grad_norm": 0.8983385232838542, + "learning_rate": 1.8018433179723502e-06, + "loss": 1.0469788312911987, + "step": 392 + }, + { + "epoch": 0.0906156329259857, + "grad_norm": 1.0033385350969977, + "learning_rate": 1.8064516129032258e-06, + "loss": 1.022156834602356, + "step": 393 + }, + { + "epoch": 0.09084620705556837, + "grad_norm": 0.8626168210196775, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.0723674297332764, + "step": 394 + }, + { + "epoch": 0.09107678118515103, + "grad_norm": 0.8060308252194399, + "learning_rate": 1.8156682027649767e-06, + "loss": 0.9089772701263428, + "step": 395 + }, + { + "epoch": 0.09130735531473369, + "grad_norm": 0.8875270675183294, + "learning_rate": 1.8202764976958525e-06, + "loss": 1.1029877662658691, + "step": 396 + }, + { + "epoch": 0.09153792944431635, + "grad_norm": 0.94113090982248, + "learning_rate": 1.8248847926267281e-06, + "loss": 0.998812198638916, + "step": 397 + }, + { + "epoch": 0.09176850357389901, + "grad_norm": 1.0016962443263888, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.116652250289917, + "step": 398 + }, + { + "epoch": 0.09199907770348167, + "grad_norm": 0.8575568562545252, + "learning_rate": 1.8341013824884793e-06, + "loss": 1.0071923732757568, + "step": 399 + }, + { + "epoch": 0.09222965183306434, + "grad_norm": 0.9758059413772218, + "learning_rate": 1.8387096774193547e-06, + "loss": 1.0713586807250977, + "step": 400 + }, + { + "epoch": 0.092460225962647, + "grad_norm": 0.8883854169226675, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.0897400379180908, + "step": 401 + }, + { + "epoch": 0.09269080009222966, + "grad_norm": 0.9342253113098401, + "learning_rate": 1.8479262672811061e-06, + "loss": 0.9571444392204285, + "step": 402 + }, + { + "epoch": 0.09292137422181232, + "grad_norm": 0.9173411430110425, + "learning_rate": 1.8525345622119815e-06, + "loss": 0.9822309017181396, + "step": 403 + }, + { + "epoch": 0.09315194835139497, + "grad_norm": 0.8821702665182305, + "learning_rate": 1.857142857142857e-06, + "loss": 1.0010900497436523, + "step": 404 + }, + { + "epoch": 0.09338252248097763, + "grad_norm": 0.8417761058687274, + "learning_rate": 1.8617511520737325e-06, + "loss": 0.8548961877822876, + "step": 405 + }, + { + "epoch": 0.09361309661056029, + "grad_norm": 0.9390158571311362, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.0856781005859375, + "step": 406 + }, + { + "epoch": 0.09384367074014295, + "grad_norm": 0.9100547740927183, + "learning_rate": 1.8709677419354837e-06, + "loss": 1.0913856029510498, + "step": 407 + }, + { + "epoch": 0.09407424486972561, + "grad_norm": 1.0379606890495185, + "learning_rate": 1.8755760368663593e-06, + "loss": 0.9409916400909424, + "step": 408 + }, + { + "epoch": 0.09430481899930827, + "grad_norm": 0.9523962354053698, + "learning_rate": 1.880184331797235e-06, + "loss": 0.9950551390647888, + "step": 409 + }, + { + "epoch": 0.09453539312889093, + "grad_norm": 0.861704297563458, + "learning_rate": 1.8847926267281104e-06, + "loss": 0.9915211200714111, + "step": 410 + }, + { + "epoch": 0.0947659672584736, + "grad_norm": 0.9290893256356082, + "learning_rate": 1.889400921658986e-06, + "loss": 1.0381574630737305, + "step": 411 + }, + { + "epoch": 0.09499654138805626, + "grad_norm": 0.9228539253940193, + "learning_rate": 1.8940092165898616e-06, + "loss": 0.8911284804344177, + "step": 412 + }, + { + "epoch": 0.09522711551763892, + "grad_norm": 0.9426577567548815, + "learning_rate": 1.8986175115207372e-06, + "loss": 0.8757172226905823, + "step": 413 + }, + { + "epoch": 0.09545768964722158, + "grad_norm": 0.7971911677154941, + "learning_rate": 1.9032258064516128e-06, + "loss": 0.8362075090408325, + "step": 414 + }, + { + "epoch": 0.09568826377680424, + "grad_norm": 0.9051810749284879, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.906524658203125, + "step": 415 + }, + { + "epoch": 0.0959188379063869, + "grad_norm": 0.9304511138009018, + "learning_rate": 1.912442396313364e-06, + "loss": 1.100447654724121, + "step": 416 + }, + { + "epoch": 0.09614941203596956, + "grad_norm": 0.8321943001479206, + "learning_rate": 1.9170506912442396e-06, + "loss": 0.9658455848693848, + "step": 417 + }, + { + "epoch": 0.09637998616555223, + "grad_norm": 0.9393736008547379, + "learning_rate": 1.921658986175115e-06, + "loss": 0.971304714679718, + "step": 418 + }, + { + "epoch": 0.09661056029513489, + "grad_norm": 0.8792304256570437, + "learning_rate": 1.926267281105991e-06, + "loss": 0.916153073310852, + "step": 419 + }, + { + "epoch": 0.09684113442471755, + "grad_norm": 0.960700719296913, + "learning_rate": 1.930875576036866e-06, + "loss": 0.9166572093963623, + "step": 420 + }, + { + "epoch": 0.09707170855430021, + "grad_norm": 0.8385154496673872, + "learning_rate": 1.935483870967742e-06, + "loss": 0.8754867315292358, + "step": 421 + }, + { + "epoch": 0.09730228268388287, + "grad_norm": 0.8951117289542856, + "learning_rate": 1.9400921658986174e-06, + "loss": 0.9507668018341064, + "step": 422 + }, + { + "epoch": 0.09753285681346553, + "grad_norm": 1.0251554467069826, + "learning_rate": 1.944700460829493e-06, + "loss": 0.8977904319763184, + "step": 423 + }, + { + "epoch": 0.0977634309430482, + "grad_norm": 0.8433365129133346, + "learning_rate": 1.9493087557603686e-06, + "loss": 0.8359580039978027, + "step": 424 + }, + { + "epoch": 0.09799400507263085, + "grad_norm": 0.8653781711190967, + "learning_rate": 1.953917050691244e-06, + "loss": 0.8928875923156738, + "step": 425 + }, + { + "epoch": 0.09822457920221352, + "grad_norm": 1.016156538051323, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.9031360149383545, + "step": 426 + }, + { + "epoch": 0.09845515333179618, + "grad_norm": 0.9535004151409068, + "learning_rate": 1.963133640552995e-06, + "loss": 0.9135938286781311, + "step": 427 + }, + { + "epoch": 0.09868572746137884, + "grad_norm": 0.9913179989235431, + "learning_rate": 1.967741935483871e-06, + "loss": 0.8978056907653809, + "step": 428 + }, + { + "epoch": 0.0989163015909615, + "grad_norm": 0.7393338474601954, + "learning_rate": 1.9723502304147463e-06, + "loss": 0.8236517906188965, + "step": 429 + }, + { + "epoch": 0.09914687572054416, + "grad_norm": 0.9578937542491764, + "learning_rate": 1.976958525345622e-06, + "loss": 0.8279497027397156, + "step": 430 + }, + { + "epoch": 0.09937744985012681, + "grad_norm": 0.8687224271614162, + "learning_rate": 1.9815668202764975e-06, + "loss": 0.9273175001144409, + "step": 431 + }, + { + "epoch": 0.09960802397970947, + "grad_norm": 0.9008857811722423, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.8990100622177124, + "step": 432 + }, + { + "epoch": 0.09983859810929213, + "grad_norm": 0.9051637314581525, + "learning_rate": 1.9907834101382487e-06, + "loss": 0.9221487045288086, + "step": 433 + }, + { + "epoch": 0.10006917223887479, + "grad_norm": 0.8468556051112544, + "learning_rate": 1.995391705069124e-06, + "loss": 0.7376757264137268, + "step": 434 + }, + { + "epoch": 0.10029974636845745, + "grad_norm": 0.8651656722450953, + "learning_rate": 2e-06, + "loss": 0.8496265411376953, + "step": 435 + }, + { + "epoch": 0.10053032049804012, + "grad_norm": 0.8177327534577133, + "learning_rate": 1.9999999273199326e-06, + "loss": 0.73260897397995, + "step": 436 + }, + { + "epoch": 0.10076089462762278, + "grad_norm": 1.2545811776233549, + "learning_rate": 1.999999709279741e-06, + "loss": 0.9583776593208313, + "step": 437 + }, + { + "epoch": 0.10099146875720544, + "grad_norm": 0.7771019547302918, + "learning_rate": 1.9999993458794573e-06, + "loss": 0.810507595539093, + "step": 438 + }, + { + "epoch": 0.1012220428867881, + "grad_norm": 0.8756547566965167, + "learning_rate": 1.9999988371191337e-06, + "loss": 0.7957329750061035, + "step": 439 + }, + { + "epoch": 0.10145261701637076, + "grad_norm": 0.8325539024899065, + "learning_rate": 1.9999981829988444e-06, + "loss": 0.8141027688980103, + "step": 440 + }, + { + "epoch": 0.10168319114595342, + "grad_norm": 0.9256731752358246, + "learning_rate": 1.9999973835186847e-06, + "loss": 0.8454669117927551, + "step": 441 + }, + { + "epoch": 0.10191376527553608, + "grad_norm": 0.9086105801784582, + "learning_rate": 1.9999964386787706e-06, + "loss": 0.7966687679290771, + "step": 442 + }, + { + "epoch": 0.10214433940511874, + "grad_norm": 0.8420803725442093, + "learning_rate": 1.9999953484792394e-06, + "loss": 0.8623852133750916, + "step": 443 + }, + { + "epoch": 0.1023749135347014, + "grad_norm": 0.976279238987049, + "learning_rate": 1.9999941129202494e-06, + "loss": 0.9604165554046631, + "step": 444 + }, + { + "epoch": 0.10260548766428407, + "grad_norm": 0.8427059790049124, + "learning_rate": 1.999992732001981e-06, + "loss": 0.7461415529251099, + "step": 445 + }, + { + "epoch": 0.10283606179386673, + "grad_norm": 0.8066869506045082, + "learning_rate": 1.9999912057246342e-06, + "loss": 0.7243722677230835, + "step": 446 + }, + { + "epoch": 0.10306663592344939, + "grad_norm": 0.8507773615519725, + "learning_rate": 1.999989534088431e-06, + "loss": 0.8466402292251587, + "step": 447 + }, + { + "epoch": 0.10329721005303205, + "grad_norm": 0.9504023717644374, + "learning_rate": 1.9999877170936142e-06, + "loss": 0.8062578439712524, + "step": 448 + }, + { + "epoch": 0.10352778418261471, + "grad_norm": 0.8134117517887439, + "learning_rate": 1.9999857547404484e-06, + "loss": 0.8979625701904297, + "step": 449 + }, + { + "epoch": 0.10375835831219737, + "grad_norm": 0.7889840834274454, + "learning_rate": 1.999983647029219e-06, + "loss": 0.7970046401023865, + "step": 450 + }, + { + "epoch": 0.10398893244178004, + "grad_norm": 0.8933195109789729, + "learning_rate": 1.999981393960231e-06, + "loss": 0.9027936458587646, + "step": 451 + }, + { + "epoch": 0.1042195065713627, + "grad_norm": 0.9428128689196352, + "learning_rate": 1.9999789955338133e-06, + "loss": 0.8347916007041931, + "step": 452 + }, + { + "epoch": 0.10445008070094536, + "grad_norm": 0.7636783217821816, + "learning_rate": 1.9999764517503146e-06, + "loss": 0.7856979370117188, + "step": 453 + }, + { + "epoch": 0.10468065483052802, + "grad_norm": 0.8588750023960529, + "learning_rate": 1.9999737626101037e-06, + "loss": 0.8370383381843567, + "step": 454 + }, + { + "epoch": 0.10491122896011068, + "grad_norm": 0.7607065236764231, + "learning_rate": 1.9999709281135718e-06, + "loss": 0.8629742860794067, + "step": 455 + }, + { + "epoch": 0.10514180308969334, + "grad_norm": 0.7031266959727278, + "learning_rate": 1.9999679482611315e-06, + "loss": 0.8187414407730103, + "step": 456 + }, + { + "epoch": 0.105372377219276, + "grad_norm": 0.7996485745988237, + "learning_rate": 1.9999648230532156e-06, + "loss": 0.8169279098510742, + "step": 457 + }, + { + "epoch": 0.10560295134885866, + "grad_norm": 0.7291726430068795, + "learning_rate": 1.999961552490278e-06, + "loss": 0.7186012268066406, + "step": 458 + }, + { + "epoch": 0.10583352547844131, + "grad_norm": 0.8814433348597316, + "learning_rate": 1.9999581365727947e-06, + "loss": 0.8088201284408569, + "step": 459 + }, + { + "epoch": 0.10606409960802397, + "grad_norm": 0.8945815471698739, + "learning_rate": 1.999954575301262e-06, + "loss": 0.7067796587944031, + "step": 460 + }, + { + "epoch": 0.10629467373760663, + "grad_norm": 0.8727386643724712, + "learning_rate": 1.9999508686761974e-06, + "loss": 0.8839461803436279, + "step": 461 + }, + { + "epoch": 0.1065252478671893, + "grad_norm": 0.7752145606049893, + "learning_rate": 1.99994701669814e-06, + "loss": 0.750046968460083, + "step": 462 + }, + { + "epoch": 0.10675582199677196, + "grad_norm": 0.8246620057663118, + "learning_rate": 1.999943019367649e-06, + "loss": 0.7954964637756348, + "step": 463 + }, + { + "epoch": 0.10698639612635462, + "grad_norm": 0.8139454190246876, + "learning_rate": 1.9999388766853065e-06, + "loss": 0.7178900241851807, + "step": 464 + }, + { + "epoch": 0.10721697025593728, + "grad_norm": 0.7775108685144316, + "learning_rate": 1.999934588651714e-06, + "loss": 0.7583869695663452, + "step": 465 + }, + { + "epoch": 0.10744754438551994, + "grad_norm": 0.7294165374555056, + "learning_rate": 1.999930155267495e-06, + "loss": 0.8068876266479492, + "step": 466 + }, + { + "epoch": 0.1076781185151026, + "grad_norm": 0.7396884936816651, + "learning_rate": 1.9999255765332946e-06, + "loss": 0.7507776021957397, + "step": 467 + }, + { + "epoch": 0.10790869264468526, + "grad_norm": 0.7418847797451098, + "learning_rate": 1.999920852449777e-06, + "loss": 0.7719494104385376, + "step": 468 + }, + { + "epoch": 0.10813926677426793, + "grad_norm": 0.7666886626519035, + "learning_rate": 1.99991598301763e-06, + "loss": 0.7420990467071533, + "step": 469 + }, + { + "epoch": 0.10836984090385059, + "grad_norm": 0.7701810012003275, + "learning_rate": 1.9999109682375606e-06, + "loss": 0.7152374386787415, + "step": 470 + }, + { + "epoch": 0.10860041503343325, + "grad_norm": 0.6850973266115482, + "learning_rate": 1.9999058081102985e-06, + "loss": 0.7971220016479492, + "step": 471 + }, + { + "epoch": 0.10883098916301591, + "grad_norm": 0.7306176016482578, + "learning_rate": 1.9999005026365936e-06, + "loss": 0.774874746799469, + "step": 472 + }, + { + "epoch": 0.10906156329259857, + "grad_norm": 0.8957955356096076, + "learning_rate": 1.999895051817216e-06, + "loss": 0.7567731142044067, + "step": 473 + }, + { + "epoch": 0.10929213742218123, + "grad_norm": 0.9679087986333686, + "learning_rate": 1.99988945565296e-06, + "loss": 0.7221060991287231, + "step": 474 + }, + { + "epoch": 0.1095227115517639, + "grad_norm": 0.7758710632294333, + "learning_rate": 1.9998837141446378e-06, + "loss": 0.8064852952957153, + "step": 475 + }, + { + "epoch": 0.10975328568134655, + "grad_norm": 0.7342367942239104, + "learning_rate": 1.9998778272930842e-06, + "loss": 0.7329462766647339, + "step": 476 + }, + { + "epoch": 0.10998385981092922, + "grad_norm": 0.6944047501493505, + "learning_rate": 1.999871795099155e-06, + "loss": 0.715752363204956, + "step": 477 + }, + { + "epoch": 0.11021443394051188, + "grad_norm": 1.250464562888065, + "learning_rate": 1.9998656175637265e-06, + "loss": 0.8702882528305054, + "step": 478 + }, + { + "epoch": 0.11044500807009454, + "grad_norm": 0.9132853105204283, + "learning_rate": 1.9998592946876976e-06, + "loss": 0.8559622764587402, + "step": 479 + }, + { + "epoch": 0.1106755821996772, + "grad_norm": 1.0302853941011325, + "learning_rate": 1.999852826471987e-06, + "loss": 0.910442590713501, + "step": 480 + }, + { + "epoch": 0.11090615632925986, + "grad_norm": 0.7658983046756905, + "learning_rate": 1.9998462129175347e-06, + "loss": 0.8159372806549072, + "step": 481 + }, + { + "epoch": 0.11113673045884252, + "grad_norm": 0.6814545269174561, + "learning_rate": 1.9998394540253022e-06, + "loss": 0.8120635747909546, + "step": 482 + }, + { + "epoch": 0.11136730458842518, + "grad_norm": 0.9382461503301303, + "learning_rate": 1.999832549796272e-06, + "loss": 0.7867682576179504, + "step": 483 + }, + { + "epoch": 0.11159787871800785, + "grad_norm": 0.7285854274509946, + "learning_rate": 1.999825500231448e-06, + "loss": 0.695517897605896, + "step": 484 + }, + { + "epoch": 0.1118284528475905, + "grad_norm": 0.7426222297635688, + "learning_rate": 1.999818305331854e-06, + "loss": 0.8402971029281616, + "step": 485 + }, + { + "epoch": 0.11205902697717317, + "grad_norm": 0.9496598665654408, + "learning_rate": 1.9998109650985372e-06, + "loss": 0.7987074851989746, + "step": 486 + }, + { + "epoch": 0.11228960110675582, + "grad_norm": 0.7601824170608918, + "learning_rate": 1.9998034795325634e-06, + "loss": 0.6525362133979797, + "step": 487 + }, + { + "epoch": 0.11252017523633848, + "grad_norm": 0.6649425764525309, + "learning_rate": 1.999795848635021e-06, + "loss": 0.6218863725662231, + "step": 488 + }, + { + "epoch": 0.11275074936592114, + "grad_norm": 0.6793237780262881, + "learning_rate": 1.99978807240702e-06, + "loss": 0.7225729823112488, + "step": 489 + }, + { + "epoch": 0.1129813234955038, + "grad_norm": 0.7289774462660574, + "learning_rate": 1.9997801508496893e-06, + "loss": 0.7553551197052002, + "step": 490 + }, + { + "epoch": 0.11321189762508646, + "grad_norm": 0.7070554840091658, + "learning_rate": 1.999772083964182e-06, + "loss": 0.6695772409439087, + "step": 491 + }, + { + "epoch": 0.11344247175466912, + "grad_norm": 0.7937000317220514, + "learning_rate": 1.999763871751669e-06, + "loss": 0.7683162689208984, + "step": 492 + }, + { + "epoch": 0.11367304588425178, + "grad_norm": 0.7958897510308529, + "learning_rate": 1.9997555142133457e-06, + "loss": 0.7761441469192505, + "step": 493 + }, + { + "epoch": 0.11390362001383444, + "grad_norm": 0.8391915745578431, + "learning_rate": 1.999747011350426e-06, + "loss": 0.7204692959785461, + "step": 494 + }, + { + "epoch": 0.1141341941434171, + "grad_norm": 0.6535908344557003, + "learning_rate": 1.999738363164146e-06, + "loss": 0.6960519552230835, + "step": 495 + }, + { + "epoch": 0.11436476827299977, + "grad_norm": 0.669834933810116, + "learning_rate": 1.999729569655763e-06, + "loss": 0.7502788305282593, + "step": 496 + }, + { + "epoch": 0.11459534240258243, + "grad_norm": 0.7119093873273127, + "learning_rate": 1.999720630826555e-06, + "loss": 0.7649067640304565, + "step": 497 + }, + { + "epoch": 0.11482591653216509, + "grad_norm": 0.865452520980124, + "learning_rate": 1.9997115466778214e-06, + "loss": 0.6867918968200684, + "step": 498 + }, + { + "epoch": 0.11505649066174775, + "grad_norm": 0.7725462530919065, + "learning_rate": 1.9997023172108828e-06, + "loss": 0.7324330806732178, + "step": 499 + }, + { + "epoch": 0.11528706479133041, + "grad_norm": 0.7493898462804314, + "learning_rate": 1.999692942427081e-06, + "loss": 0.7452527284622192, + "step": 500 + }, + { + "epoch": 0.11551763892091307, + "grad_norm": 0.8849003751162662, + "learning_rate": 1.9996834223277775e-06, + "loss": 0.8311381340026855, + "step": 501 + }, + { + "epoch": 0.11574821305049574, + "grad_norm": 0.7698737492516583, + "learning_rate": 1.999673756914358e-06, + "loss": 0.6955340504646301, + "step": 502 + }, + { + "epoch": 0.1159787871800784, + "grad_norm": 0.9035827861690212, + "learning_rate": 1.999663946188226e-06, + "loss": 0.802892804145813, + "step": 503 + }, + { + "epoch": 0.11620936130966106, + "grad_norm": 0.9827928009523055, + "learning_rate": 1.9996539901508086e-06, + "loss": 0.8307123184204102, + "step": 504 + }, + { + "epoch": 0.11643993543924372, + "grad_norm": 0.7167523084062808, + "learning_rate": 1.9996438888035525e-06, + "loss": 0.7604272365570068, + "step": 505 + }, + { + "epoch": 0.11667050956882638, + "grad_norm": 0.7887244154559485, + "learning_rate": 1.9996336421479256e-06, + "loss": 0.798006534576416, + "step": 506 + }, + { + "epoch": 0.11690108369840904, + "grad_norm": 0.9102232519285063, + "learning_rate": 1.999623250185418e-06, + "loss": 0.7342728972434998, + "step": 507 + }, + { + "epoch": 0.1171316578279917, + "grad_norm": 0.689331248687117, + "learning_rate": 1.9996127129175402e-06, + "loss": 0.7659468650817871, + "step": 508 + }, + { + "epoch": 0.11736223195757436, + "grad_norm": 0.9057052272338976, + "learning_rate": 1.999602030345824e-06, + "loss": 0.6467913389205933, + "step": 509 + }, + { + "epoch": 0.11759280608715703, + "grad_norm": 0.9026632882900626, + "learning_rate": 1.9995912024718214e-06, + "loss": 0.8207371234893799, + "step": 510 + }, + { + "epoch": 0.11782338021673969, + "grad_norm": 0.6427345565408408, + "learning_rate": 1.999580229297108e-06, + "loss": 0.6865919232368469, + "step": 511 + }, + { + "epoch": 0.11805395434632235, + "grad_norm": 0.9123825063372557, + "learning_rate": 1.999569110823277e-06, + "loss": 0.7367759346961975, + "step": 512 + }, + { + "epoch": 0.11828452847590501, + "grad_norm": 0.7732312467631449, + "learning_rate": 1.9995578470519455e-06, + "loss": 0.678460955619812, + "step": 513 + }, + { + "epoch": 0.11851510260548767, + "grad_norm": 0.9273893139854266, + "learning_rate": 1.999546437984751e-06, + "loss": 0.7442954182624817, + "step": 514 + }, + { + "epoch": 0.11874567673507032, + "grad_norm": 0.7064385006159516, + "learning_rate": 1.9995348836233515e-06, + "loss": 0.6881241798400879, + "step": 515 + }, + { + "epoch": 0.11897625086465298, + "grad_norm": 0.7494917485319132, + "learning_rate": 1.9995231839694267e-06, + "loss": 0.6957181692123413, + "step": 516 + }, + { + "epoch": 0.11920682499423564, + "grad_norm": 1.0228956088069594, + "learning_rate": 1.9995113390246773e-06, + "loss": 0.655665934085846, + "step": 517 + }, + { + "epoch": 0.1194373991238183, + "grad_norm": 0.8789756041062182, + "learning_rate": 1.9994993487908245e-06, + "loss": 0.8156173229217529, + "step": 518 + }, + { + "epoch": 0.11966797325340096, + "grad_norm": 0.8973364358315123, + "learning_rate": 1.9994872132696125e-06, + "loss": 0.7063135504722595, + "step": 519 + }, + { + "epoch": 0.11989854738298363, + "grad_norm": 0.91785396837973, + "learning_rate": 1.9994749324628046e-06, + "loss": 0.694409966468811, + "step": 520 + }, + { + "epoch": 0.12012912151256629, + "grad_norm": 0.7331348179727938, + "learning_rate": 1.9994625063721852e-06, + "loss": 0.8167020082473755, + "step": 521 + }, + { + "epoch": 0.12035969564214895, + "grad_norm": 0.9326590546614593, + "learning_rate": 1.9994499349995615e-06, + "loss": 0.7214051485061646, + "step": 522 + }, + { + "epoch": 0.12059026977173161, + "grad_norm": 0.8993621490561152, + "learning_rate": 1.999437218346761e-06, + "loss": 0.8798317909240723, + "step": 523 + }, + { + "epoch": 0.12082084390131427, + "grad_norm": 0.6552492075288662, + "learning_rate": 1.9994243564156316e-06, + "loss": 0.684230387210846, + "step": 524 + }, + { + "epoch": 0.12105141803089693, + "grad_norm": 0.9112132053465716, + "learning_rate": 1.999411349208043e-06, + "loss": 0.7519755363464355, + "step": 525 + }, + { + "epoch": 0.1212819921604796, + "grad_norm": 0.8052315425352758, + "learning_rate": 1.9993981967258857e-06, + "loss": 0.8420398235321045, + "step": 526 + }, + { + "epoch": 0.12151256629006225, + "grad_norm": 0.7105743668928439, + "learning_rate": 1.999384898971073e-06, + "loss": 0.8349270820617676, + "step": 527 + }, + { + "epoch": 0.12174314041964492, + "grad_norm": 1.0983006521395142, + "learning_rate": 1.999371455945536e-06, + "loss": 0.794980525970459, + "step": 528 + }, + { + "epoch": 0.12197371454922758, + "grad_norm": 1.1816598770476783, + "learning_rate": 1.9993578676512294e-06, + "loss": 0.666529655456543, + "step": 529 + }, + { + "epoch": 0.12220428867881024, + "grad_norm": 0.7564948773505585, + "learning_rate": 1.999344134090129e-06, + "loss": 0.7356991767883301, + "step": 530 + }, + { + "epoch": 0.1224348628083929, + "grad_norm": 0.8210277180950322, + "learning_rate": 1.9993302552642305e-06, + "loss": 0.6289858818054199, + "step": 531 + }, + { + "epoch": 0.12266543693797556, + "grad_norm": 0.7570779839057131, + "learning_rate": 1.9993162311755516e-06, + "loss": 0.706937313079834, + "step": 532 + }, + { + "epoch": 0.12289601106755822, + "grad_norm": 0.8676215771749471, + "learning_rate": 1.99930206182613e-06, + "loss": 0.7265158891677856, + "step": 533 + }, + { + "epoch": 0.12312658519714088, + "grad_norm": 0.7802472371537522, + "learning_rate": 1.999287747218027e-06, + "loss": 0.6575910449028015, + "step": 534 + }, + { + "epoch": 0.12335715932672355, + "grad_norm": 0.6298254280489823, + "learning_rate": 1.999273287353322e-06, + "loss": 0.6696841716766357, + "step": 535 + }, + { + "epoch": 0.1235877334563062, + "grad_norm": 1.071079002554872, + "learning_rate": 1.9992586822341177e-06, + "loss": 0.7749101519584656, + "step": 536 + }, + { + "epoch": 0.12381830758588887, + "grad_norm": 0.9432884782892066, + "learning_rate": 1.9992439318625367e-06, + "loss": 0.6880518198013306, + "step": 537 + }, + { + "epoch": 0.12404888171547153, + "grad_norm": 0.7827285978985046, + "learning_rate": 1.999229036240723e-06, + "loss": 0.6871178150177002, + "step": 538 + }, + { + "epoch": 0.12427945584505419, + "grad_norm": 0.7976778538474537, + "learning_rate": 1.999213995370842e-06, + "loss": 0.5867285132408142, + "step": 539 + }, + { + "epoch": 0.12451002997463685, + "grad_norm": 0.9357527236724963, + "learning_rate": 1.99919880925508e-06, + "loss": 0.8276966214179993, + "step": 540 + }, + { + "epoch": 0.12474060410421951, + "grad_norm": 1.0175450529032033, + "learning_rate": 1.9991834778956445e-06, + "loss": 0.7710754871368408, + "step": 541 + }, + { + "epoch": 0.12497117823380217, + "grad_norm": 0.9390745817535735, + "learning_rate": 1.9991680012947642e-06, + "loss": 0.7753217816352844, + "step": 542 + }, + { + "epoch": 0.12520175236338482, + "grad_norm": 0.8094522929040034, + "learning_rate": 1.9991523794546886e-06, + "loss": 0.7906090617179871, + "step": 543 + }, + { + "epoch": 0.12543232649296748, + "grad_norm": 0.9340000664605023, + "learning_rate": 1.9991366123776885e-06, + "loss": 0.7199760675430298, + "step": 544 + }, + { + "epoch": 0.12566290062255014, + "grad_norm": 0.7023452308433018, + "learning_rate": 1.9991207000660556e-06, + "loss": 0.671667218208313, + "step": 545 + }, + { + "epoch": 0.1258934747521328, + "grad_norm": 0.8347026711317173, + "learning_rate": 1.9991046425221036e-06, + "loss": 0.7289182543754578, + "step": 546 + }, + { + "epoch": 0.12612404888171547, + "grad_norm": 0.7827652568460417, + "learning_rate": 1.999088439748166e-06, + "loss": 0.6894270181655884, + "step": 547 + }, + { + "epoch": 0.12635462301129813, + "grad_norm": 0.7280796152072353, + "learning_rate": 1.9990720917465983e-06, + "loss": 0.5861620306968689, + "step": 548 + }, + { + "epoch": 0.1265851971408808, + "grad_norm": 0.9057106564897087, + "learning_rate": 1.999055598519777e-06, + "loss": 0.7082245349884033, + "step": 549 + }, + { + "epoch": 0.12681577127046345, + "grad_norm": 0.9647506404446157, + "learning_rate": 1.999038960070099e-06, + "loss": 0.6746149659156799, + "step": 550 + }, + { + "epoch": 0.1270463454000461, + "grad_norm": 0.8620899067636014, + "learning_rate": 1.999022176399983e-06, + "loss": 0.7791188955307007, + "step": 551 + }, + { + "epoch": 0.12727691952962877, + "grad_norm": 0.7157725370776972, + "learning_rate": 1.999005247511869e-06, + "loss": 0.6371017694473267, + "step": 552 + }, + { + "epoch": 0.12750749365921143, + "grad_norm": 1.0373263968991309, + "learning_rate": 1.9989881734082182e-06, + "loss": 0.7006558179855347, + "step": 553 + }, + { + "epoch": 0.1277380677887941, + "grad_norm": 1.0670128946400503, + "learning_rate": 1.9989709540915115e-06, + "loss": 0.7011476755142212, + "step": 554 + }, + { + "epoch": 0.12796864191837676, + "grad_norm": 0.7293348024241428, + "learning_rate": 1.998953589564252e-06, + "loss": 0.6518280506134033, + "step": 555 + }, + { + "epoch": 0.12819921604795942, + "grad_norm": 1.013490270581775, + "learning_rate": 1.9989360798289646e-06, + "loss": 0.703351616859436, + "step": 556 + }, + { + "epoch": 0.12842979017754208, + "grad_norm": 0.9007382613729068, + "learning_rate": 1.998918424888194e-06, + "loss": 0.7498817443847656, + "step": 557 + }, + { + "epoch": 0.12866036430712474, + "grad_norm": 0.7936147649672419, + "learning_rate": 1.998900624744507e-06, + "loss": 0.647042989730835, + "step": 558 + }, + { + "epoch": 0.1288909384367074, + "grad_norm": 1.058658035724676, + "learning_rate": 1.99888267940049e-06, + "loss": 0.7519131898880005, + "step": 559 + }, + { + "epoch": 0.12912151256629006, + "grad_norm": 0.9392201849899589, + "learning_rate": 1.9988645888587524e-06, + "loss": 0.8416757583618164, + "step": 560 + }, + { + "epoch": 0.12935208669587273, + "grad_norm": 0.7856467653874107, + "learning_rate": 1.9988463531219238e-06, + "loss": 0.7044156193733215, + "step": 561 + }, + { + "epoch": 0.1295826608254554, + "grad_norm": 0.7712707168267965, + "learning_rate": 1.9988279721926547e-06, + "loss": 0.5429179668426514, + "step": 562 + }, + { + "epoch": 0.12981323495503805, + "grad_norm": 0.8186921939471294, + "learning_rate": 1.9988094460736173e-06, + "loss": 0.6146735548973083, + "step": 563 + }, + { + "epoch": 0.1300438090846207, + "grad_norm": 0.8439852070799176, + "learning_rate": 1.9987907747675038e-06, + "loss": 0.7544587850570679, + "step": 564 + }, + { + "epoch": 0.13027438321420337, + "grad_norm": 0.9760725928946941, + "learning_rate": 1.998771958277029e-06, + "loss": 0.7344266772270203, + "step": 565 + }, + { + "epoch": 0.13050495734378603, + "grad_norm": 0.8485941936610121, + "learning_rate": 1.9987529966049276e-06, + "loss": 0.6952091455459595, + "step": 566 + }, + { + "epoch": 0.1307355314733687, + "grad_norm": 0.7996168239987546, + "learning_rate": 1.9987338897539563e-06, + "loss": 0.6164644956588745, + "step": 567 + }, + { + "epoch": 0.13096610560295135, + "grad_norm": 1.04815525718601, + "learning_rate": 1.998714637726892e-06, + "loss": 0.7554208636283875, + "step": 568 + }, + { + "epoch": 0.13119667973253402, + "grad_norm": 0.97358719596577, + "learning_rate": 1.9986952405265336e-06, + "loss": 0.6640980243682861, + "step": 569 + }, + { + "epoch": 0.13142725386211668, + "grad_norm": 0.8089360786109361, + "learning_rate": 1.9986756981557005e-06, + "loss": 0.6947968006134033, + "step": 570 + }, + { + "epoch": 0.13165782799169934, + "grad_norm": 0.8239726316605849, + "learning_rate": 1.9986560106172332e-06, + "loss": 0.5987592935562134, + "step": 571 + }, + { + "epoch": 0.131888402121282, + "grad_norm": 0.709030479654625, + "learning_rate": 1.9986361779139944e-06, + "loss": 0.5830701589584351, + "step": 572 + }, + { + "epoch": 0.13211897625086466, + "grad_norm": 1.1719328645727012, + "learning_rate": 1.9986162000488655e-06, + "loss": 0.6589827537536621, + "step": 573 + }, + { + "epoch": 0.13234955038044732, + "grad_norm": 0.795778409153881, + "learning_rate": 1.9985960770247514e-06, + "loss": 0.7761766910552979, + "step": 574 + }, + { + "epoch": 0.13258012451002998, + "grad_norm": 0.8403074018612, + "learning_rate": 1.998575808844577e-06, + "loss": 0.6817613244056702, + "step": 575 + }, + { + "epoch": 0.13281069863961265, + "grad_norm": 0.8817998372104671, + "learning_rate": 1.998555395511289e-06, + "loss": 0.553085207939148, + "step": 576 + }, + { + "epoch": 0.1330412727691953, + "grad_norm": 0.6885856342268037, + "learning_rate": 1.998534837027854e-06, + "loss": 0.6500711441040039, + "step": 577 + }, + { + "epoch": 0.13327184689877797, + "grad_norm": 1.046231764034874, + "learning_rate": 1.9985141333972605e-06, + "loss": 0.7818950414657593, + "step": 578 + }, + { + "epoch": 0.13350242102836063, + "grad_norm": 0.7987907466299384, + "learning_rate": 1.9984932846225178e-06, + "loss": 0.7030247449874878, + "step": 579 + }, + { + "epoch": 0.1337329951579433, + "grad_norm": 0.7031460051202854, + "learning_rate": 1.9984722907066572e-06, + "loss": 0.6336206197738647, + "step": 580 + }, + { + "epoch": 0.13396356928752595, + "grad_norm": 0.8178681347907562, + "learning_rate": 1.9984511516527295e-06, + "loss": 0.7483044862747192, + "step": 581 + }, + { + "epoch": 0.1341941434171086, + "grad_norm": 0.8070808524670383, + "learning_rate": 1.9984298674638084e-06, + "loss": 0.7124725580215454, + "step": 582 + }, + { + "epoch": 0.13442471754669127, + "grad_norm": 0.8209937510618921, + "learning_rate": 1.998408438142987e-06, + "loss": 0.623436450958252, + "step": 583 + }, + { + "epoch": 0.1346552916762739, + "grad_norm": 0.8592886051949084, + "learning_rate": 1.9983868636933804e-06, + "loss": 0.646303653717041, + "step": 584 + }, + { + "epoch": 0.13488586580585657, + "grad_norm": 0.715391883952278, + "learning_rate": 1.998365144118125e-06, + "loss": 0.6349619626998901, + "step": 585 + }, + { + "epoch": 0.13511643993543923, + "grad_norm": 0.842094849315078, + "learning_rate": 1.9983432794203778e-06, + "loss": 0.5222466588020325, + "step": 586 + }, + { + "epoch": 0.1353470140650219, + "grad_norm": 0.7893129778630776, + "learning_rate": 1.998321269603317e-06, + "loss": 0.7210453152656555, + "step": 587 + }, + { + "epoch": 0.13557758819460455, + "grad_norm": 0.8260995902689467, + "learning_rate": 1.998299114670142e-06, + "loss": 0.6829872131347656, + "step": 588 + }, + { + "epoch": 0.13580816232418721, + "grad_norm": 0.714861095640182, + "learning_rate": 1.998276814624073e-06, + "loss": 0.6493744254112244, + "step": 589 + }, + { + "epoch": 0.13603873645376988, + "grad_norm": 0.8350239344719634, + "learning_rate": 1.998254369468352e-06, + "loss": 0.6885819435119629, + "step": 590 + }, + { + "epoch": 0.13626931058335254, + "grad_norm": 0.7070632175859811, + "learning_rate": 1.9982317792062415e-06, + "loss": 0.6393503546714783, + "step": 591 + }, + { + "epoch": 0.1364998847129352, + "grad_norm": 1.010551624947432, + "learning_rate": 1.998209043841025e-06, + "loss": 0.7243417501449585, + "step": 592 + }, + { + "epoch": 0.13673045884251786, + "grad_norm": 0.693273868923859, + "learning_rate": 1.9981861633760073e-06, + "loss": 0.5955190658569336, + "step": 593 + }, + { + "epoch": 0.13696103297210052, + "grad_norm": 0.89841301134605, + "learning_rate": 1.9981631378145147e-06, + "loss": 0.6907675862312317, + "step": 594 + }, + { + "epoch": 0.13719160710168318, + "grad_norm": 1.022542216960162, + "learning_rate": 1.9981399671598938e-06, + "loss": 0.8540418148040771, + "step": 595 + }, + { + "epoch": 0.13742218123126584, + "grad_norm": 0.850573072747265, + "learning_rate": 1.9981166514155128e-06, + "loss": 0.6558555364608765, + "step": 596 + }, + { + "epoch": 0.1376527553608485, + "grad_norm": 0.9448807343375427, + "learning_rate": 1.9980931905847607e-06, + "loss": 0.6902164220809937, + "step": 597 + }, + { + "epoch": 0.13788332949043117, + "grad_norm": 1.240663469028779, + "learning_rate": 1.9980695846710485e-06, + "loss": 0.7090387344360352, + "step": 598 + }, + { + "epoch": 0.13811390362001383, + "grad_norm": 0.8847772852436644, + "learning_rate": 1.9980458336778067e-06, + "loss": 0.5913621187210083, + "step": 599 + }, + { + "epoch": 0.1383444777495965, + "grad_norm": 0.864647475805302, + "learning_rate": 1.998021937608488e-06, + "loss": 0.6742709279060364, + "step": 600 + }, + { + "epoch": 0.13857505187917915, + "grad_norm": 0.9253166862332501, + "learning_rate": 1.997997896466566e-06, + "loss": 0.7156273126602173, + "step": 601 + }, + { + "epoch": 0.1388056260087618, + "grad_norm": 0.7104566809406643, + "learning_rate": 1.9979737102555358e-06, + "loss": 0.6039655208587646, + "step": 602 + }, + { + "epoch": 0.13903620013834447, + "grad_norm": 0.7521323143425293, + "learning_rate": 1.9979493789789123e-06, + "loss": 0.6437175273895264, + "step": 603 + }, + { + "epoch": 0.13926677426792713, + "grad_norm": 0.7922747435817725, + "learning_rate": 1.9979249026402327e-06, + "loss": 0.6037663221359253, + "step": 604 + }, + { + "epoch": 0.1394973483975098, + "grad_norm": 0.8526913554693543, + "learning_rate": 1.9979002812430544e-06, + "loss": 0.6014829874038696, + "step": 605 + }, + { + "epoch": 0.13972792252709246, + "grad_norm": 0.9960319429386536, + "learning_rate": 1.9978755147909575e-06, + "loss": 0.5644428133964539, + "step": 606 + }, + { + "epoch": 0.13995849665667512, + "grad_norm": 0.7146930597248379, + "learning_rate": 1.997850603287541e-06, + "loss": 0.5483256578445435, + "step": 607 + }, + { + "epoch": 0.14018907078625778, + "grad_norm": 0.941628560636658, + "learning_rate": 1.9978255467364264e-06, + "loss": 0.6323236227035522, + "step": 608 + }, + { + "epoch": 0.14041964491584044, + "grad_norm": 0.8661204864695959, + "learning_rate": 1.9978003451412563e-06, + "loss": 0.677186131477356, + "step": 609 + }, + { + "epoch": 0.1406502190454231, + "grad_norm": 0.7467694215725664, + "learning_rate": 1.9977749985056934e-06, + "loss": 0.6768285036087036, + "step": 610 + }, + { + "epoch": 0.14088079317500576, + "grad_norm": 0.6978429335446755, + "learning_rate": 1.997749506833422e-06, + "loss": 0.5347047448158264, + "step": 611 + }, + { + "epoch": 0.14111136730458843, + "grad_norm": 0.8856138167235749, + "learning_rate": 1.9977238701281484e-06, + "loss": 0.7459336519241333, + "step": 612 + }, + { + "epoch": 0.1413419414341711, + "grad_norm": 0.7081494897690513, + "learning_rate": 1.9976980883935982e-06, + "loss": 0.6617337465286255, + "step": 613 + }, + { + "epoch": 0.14157251556375375, + "grad_norm": 0.766248846701343, + "learning_rate": 1.9976721616335197e-06, + "loss": 0.6214765310287476, + "step": 614 + }, + { + "epoch": 0.1418030896933364, + "grad_norm": 0.9664061776833217, + "learning_rate": 1.9976460898516814e-06, + "loss": 0.7468793392181396, + "step": 615 + }, + { + "epoch": 0.14203366382291907, + "grad_norm": 0.9401860990707812, + "learning_rate": 1.9976198730518733e-06, + "loss": 0.676013708114624, + "step": 616 + }, + { + "epoch": 0.14226423795250173, + "grad_norm": 0.7984359669803877, + "learning_rate": 1.9975935112379057e-06, + "loss": 0.6350057125091553, + "step": 617 + }, + { + "epoch": 0.1424948120820844, + "grad_norm": 0.7941645196610473, + "learning_rate": 1.997567004413611e-06, + "loss": 0.6743426322937012, + "step": 618 + }, + { + "epoch": 0.14272538621166705, + "grad_norm": 0.9456320720036326, + "learning_rate": 1.9975403525828423e-06, + "loss": 0.5894836187362671, + "step": 619 + }, + { + "epoch": 0.14295596034124972, + "grad_norm": 1.1964423414511856, + "learning_rate": 1.9975135557494735e-06, + "loss": 0.7142415046691895, + "step": 620 + }, + { + "epoch": 0.14318653447083238, + "grad_norm": 0.7973360588907056, + "learning_rate": 1.9974866139174e-06, + "loss": 0.6402454972267151, + "step": 621 + }, + { + "epoch": 0.14341710860041504, + "grad_norm": 0.8197617379148621, + "learning_rate": 1.997459527090538e-06, + "loss": 0.6870661973953247, + "step": 622 + }, + { + "epoch": 0.1436476827299977, + "grad_norm": 0.9660987988063562, + "learning_rate": 1.9974322952728247e-06, + "loss": 0.5526704788208008, + "step": 623 + }, + { + "epoch": 0.14387825685958036, + "grad_norm": 0.8373386744091922, + "learning_rate": 1.9974049184682186e-06, + "loss": 0.6712762117385864, + "step": 624 + }, + { + "epoch": 0.14410883098916302, + "grad_norm": 0.8330659804365839, + "learning_rate": 1.997377396680699e-06, + "loss": 0.6064080595970154, + "step": 625 + }, + { + "epoch": 0.14433940511874568, + "grad_norm": 0.7758896299152315, + "learning_rate": 1.997349729914267e-06, + "loss": 0.5540767908096313, + "step": 626 + }, + { + "epoch": 0.14456997924832835, + "grad_norm": 0.7444906414234538, + "learning_rate": 1.997321918172944e-06, + "loss": 0.52143394947052, + "step": 627 + }, + { + "epoch": 0.144800553377911, + "grad_norm": 0.8091707705607726, + "learning_rate": 1.9972939614607723e-06, + "loss": 0.7708792686462402, + "step": 628 + }, + { + "epoch": 0.14503112750749367, + "grad_norm": 1.0019252225174067, + "learning_rate": 1.997265859781816e-06, + "loss": 0.706872284412384, + "step": 629 + }, + { + "epoch": 0.14526170163707633, + "grad_norm": 0.7978488701627702, + "learning_rate": 1.99723761314016e-06, + "loss": 0.6643307209014893, + "step": 630 + }, + { + "epoch": 0.145492275766659, + "grad_norm": 1.0319728160628425, + "learning_rate": 1.9972092215399107e-06, + "loss": 0.6582880020141602, + "step": 631 + }, + { + "epoch": 0.14572284989624165, + "grad_norm": 0.7041979367649327, + "learning_rate": 1.997180684985194e-06, + "loss": 0.5704749822616577, + "step": 632 + }, + { + "epoch": 0.1459534240258243, + "grad_norm": 0.9160954038448087, + "learning_rate": 1.997152003480159e-06, + "loss": 0.6021866798400879, + "step": 633 + }, + { + "epoch": 0.14618399815540697, + "grad_norm": 1.0186739140184302, + "learning_rate": 1.9971231770289745e-06, + "loss": 0.6980762481689453, + "step": 634 + }, + { + "epoch": 0.14641457228498964, + "grad_norm": 0.9102171344238382, + "learning_rate": 1.9970942056358307e-06, + "loss": 0.6252140998840332, + "step": 635 + }, + { + "epoch": 0.1466451464145723, + "grad_norm": 0.8257085970836279, + "learning_rate": 1.9970650893049384e-06, + "loss": 0.5938589572906494, + "step": 636 + }, + { + "epoch": 0.14687572054415496, + "grad_norm": 0.7561297866548697, + "learning_rate": 1.997035828040531e-06, + "loss": 0.48420464992523193, + "step": 637 + }, + { + "epoch": 0.14710629467373762, + "grad_norm": 1.1749911282917564, + "learning_rate": 1.997006421846861e-06, + "loss": 0.6917499303817749, + "step": 638 + }, + { + "epoch": 0.14733686880332028, + "grad_norm": 0.9636395596462505, + "learning_rate": 1.9969768707282034e-06, + "loss": 0.7040522694587708, + "step": 639 + }, + { + "epoch": 0.14756744293290291, + "grad_norm": 0.7956128694692409, + "learning_rate": 1.9969471746888535e-06, + "loss": 0.6131860017776489, + "step": 640 + }, + { + "epoch": 0.14779801706248558, + "grad_norm": 0.8000550155014501, + "learning_rate": 1.996917333733128e-06, + "loss": 0.7042062282562256, + "step": 641 + }, + { + "epoch": 0.14802859119206824, + "grad_norm": 0.9440344299424565, + "learning_rate": 1.9968873478653647e-06, + "loss": 0.6729326844215393, + "step": 642 + }, + { + "epoch": 0.1482591653216509, + "grad_norm": 0.8065631083250541, + "learning_rate": 1.996857217089922e-06, + "loss": 0.5801228880882263, + "step": 643 + }, + { + "epoch": 0.14848973945123356, + "grad_norm": 0.9584481605552773, + "learning_rate": 1.99682694141118e-06, + "loss": 0.6657989025115967, + "step": 644 + }, + { + "epoch": 0.14872031358081622, + "grad_norm": 0.8276892521273487, + "learning_rate": 1.9967965208335395e-06, + "loss": 0.5915562510490417, + "step": 645 + }, + { + "epoch": 0.14895088771039888, + "grad_norm": 0.8005079741579677, + "learning_rate": 1.9967659553614225e-06, + "loss": 0.6651759147644043, + "step": 646 + }, + { + "epoch": 0.14918146183998154, + "grad_norm": 0.785500734493462, + "learning_rate": 1.996735244999272e-06, + "loss": 0.625860333442688, + "step": 647 + }, + { + "epoch": 0.1494120359695642, + "grad_norm": 0.891334856659417, + "learning_rate": 1.996704389751552e-06, + "loss": 0.5731238126754761, + "step": 648 + }, + { + "epoch": 0.14964261009914687, + "grad_norm": 0.8662032133236818, + "learning_rate": 1.996673389622748e-06, + "loss": 0.6233615875244141, + "step": 649 + }, + { + "epoch": 0.14987318422872953, + "grad_norm": 0.7037223780792468, + "learning_rate": 1.9966422446173655e-06, + "loss": 0.5294947028160095, + "step": 650 + }, + { + "epoch": 0.1501037583583122, + "grad_norm": 0.8024689158972043, + "learning_rate": 1.996610954739932e-06, + "loss": 0.6234334707260132, + "step": 651 + }, + { + "epoch": 0.15033433248789485, + "grad_norm": 0.9863259301950934, + "learning_rate": 1.996579519994996e-06, + "loss": 0.5800126194953918, + "step": 652 + }, + { + "epoch": 0.1505649066174775, + "grad_norm": 0.9145794705086053, + "learning_rate": 1.9965479403871268e-06, + "loss": 0.7072441577911377, + "step": 653 + }, + { + "epoch": 0.15079548074706017, + "grad_norm": 0.8604804316966843, + "learning_rate": 1.996516215920915e-06, + "loss": 0.6350210309028625, + "step": 654 + }, + { + "epoch": 0.15102605487664283, + "grad_norm": 0.8272551438363688, + "learning_rate": 1.996484346600971e-06, + "loss": 0.6098944544792175, + "step": 655 + }, + { + "epoch": 0.1512566290062255, + "grad_norm": 0.7942772112843086, + "learning_rate": 1.996452332431929e-06, + "loss": 0.6593213081359863, + "step": 656 + }, + { + "epoch": 0.15148720313580816, + "grad_norm": 1.0870788996229426, + "learning_rate": 1.9964201734184413e-06, + "loss": 0.6997909545898438, + "step": 657 + }, + { + "epoch": 0.15171777726539082, + "grad_norm": 0.8320533396880808, + "learning_rate": 1.996387869565183e-06, + "loss": 0.5672277212142944, + "step": 658 + }, + { + "epoch": 0.15194835139497348, + "grad_norm": 0.8777194103988153, + "learning_rate": 1.99635542087685e-06, + "loss": 0.5835613012313843, + "step": 659 + }, + { + "epoch": 0.15217892552455614, + "grad_norm": 1.0025309187744094, + "learning_rate": 1.9963228273581587e-06, + "loss": 0.6001917123794556, + "step": 660 + }, + { + "epoch": 0.1524094996541388, + "grad_norm": 0.9582174045063777, + "learning_rate": 1.996290089013847e-06, + "loss": 0.6421242356300354, + "step": 661 + }, + { + "epoch": 0.15264007378372146, + "grad_norm": 0.8996449559898986, + "learning_rate": 1.996257205848674e-06, + "loss": 0.6888365745544434, + "step": 662 + }, + { + "epoch": 0.15287064791330413, + "grad_norm": 0.8017642329752841, + "learning_rate": 1.9962241778674193e-06, + "loss": 0.6694042682647705, + "step": 663 + }, + { + "epoch": 0.1531012220428868, + "grad_norm": 0.8362235694997654, + "learning_rate": 1.9961910050748836e-06, + "loss": 0.6754042506217957, + "step": 664 + }, + { + "epoch": 0.15333179617246945, + "grad_norm": 0.9429947161447709, + "learning_rate": 1.9961576874758893e-06, + "loss": 0.576134979724884, + "step": 665 + }, + { + "epoch": 0.1535623703020521, + "grad_norm": 0.8634505888713511, + "learning_rate": 1.9961242250752796e-06, + "loss": 0.6548957824707031, + "step": 666 + }, + { + "epoch": 0.15379294443163477, + "grad_norm": 0.8494612034918267, + "learning_rate": 1.9960906178779183e-06, + "loss": 0.553372859954834, + "step": 667 + }, + { + "epoch": 0.15402351856121743, + "grad_norm": 0.8776559544848238, + "learning_rate": 1.9960568658886904e-06, + "loss": 0.6749063730239868, + "step": 668 + }, + { + "epoch": 0.1542540926908001, + "grad_norm": 0.8490449157821316, + "learning_rate": 1.9960229691125023e-06, + "loss": 0.6083666086196899, + "step": 669 + }, + { + "epoch": 0.15448466682038275, + "grad_norm": 0.9102216407598661, + "learning_rate": 1.995988927554281e-06, + "loss": 0.6468017101287842, + "step": 670 + }, + { + "epoch": 0.15471524094996542, + "grad_norm": 0.9054463862187181, + "learning_rate": 1.995954741218976e-06, + "loss": 0.7095121145248413, + "step": 671 + }, + { + "epoch": 0.15494581507954808, + "grad_norm": 0.8984210973740085, + "learning_rate": 1.995920410111555e-06, + "loss": 0.7167302966117859, + "step": 672 + }, + { + "epoch": 0.15517638920913074, + "grad_norm": 0.9754903087688545, + "learning_rate": 1.995885934237009e-06, + "loss": 0.6563462018966675, + "step": 673 + }, + { + "epoch": 0.1554069633387134, + "grad_norm": 0.7833661271069817, + "learning_rate": 1.9958513136003495e-06, + "loss": 0.638554573059082, + "step": 674 + }, + { + "epoch": 0.15563753746829606, + "grad_norm": 1.1119382875058637, + "learning_rate": 1.995816548206609e-06, + "loss": 0.7051291465759277, + "step": 675 + }, + { + "epoch": 0.15586811159787872, + "grad_norm": 0.879000690907415, + "learning_rate": 1.995781638060841e-06, + "loss": 0.6292394399642944, + "step": 676 + }, + { + "epoch": 0.15609868572746138, + "grad_norm": 0.7328696227145686, + "learning_rate": 1.99574658316812e-06, + "loss": 0.5266016721725464, + "step": 677 + }, + { + "epoch": 0.15632925985704405, + "grad_norm": 0.8021809147598078, + "learning_rate": 1.9957113835335415e-06, + "loss": 0.6059033870697021, + "step": 678 + }, + { + "epoch": 0.1565598339866267, + "grad_norm": 1.0012445200078677, + "learning_rate": 1.995676039162222e-06, + "loss": 0.5252447128295898, + "step": 679 + }, + { + "epoch": 0.15679040811620937, + "grad_norm": 0.9661534967224599, + "learning_rate": 1.9956405500593e-06, + "loss": 0.5963196754455566, + "step": 680 + }, + { + "epoch": 0.15702098224579203, + "grad_norm": 1.1191160767100459, + "learning_rate": 1.9956049162299322e-06, + "loss": 0.7262317538261414, + "step": 681 + }, + { + "epoch": 0.1572515563753747, + "grad_norm": 0.6929567178003186, + "learning_rate": 1.995569137679301e-06, + "loss": 0.6701623201370239, + "step": 682 + }, + { + "epoch": 0.15748213050495735, + "grad_norm": 1.1067508842107727, + "learning_rate": 1.9955332144126048e-06, + "loss": 0.6201569437980652, + "step": 683 + }, + { + "epoch": 0.15771270463454, + "grad_norm": 0.8729576302308473, + "learning_rate": 1.9954971464350673e-06, + "loss": 0.5338399410247803, + "step": 684 + }, + { + "epoch": 0.15794327876412267, + "grad_norm": 1.0541267316046437, + "learning_rate": 1.99546093375193e-06, + "loss": 0.6784210205078125, + "step": 685 + }, + { + "epoch": 0.15817385289370534, + "grad_norm": 0.7386088048688241, + "learning_rate": 1.9954245763684574e-06, + "loss": 0.6752813458442688, + "step": 686 + }, + { + "epoch": 0.158404427023288, + "grad_norm": 0.92655840240498, + "learning_rate": 1.9953880742899344e-06, + "loss": 0.6734355688095093, + "step": 687 + }, + { + "epoch": 0.15863500115287066, + "grad_norm": 1.0183777461857344, + "learning_rate": 1.995351427521667e-06, + "loss": 0.4857062101364136, + "step": 688 + }, + { + "epoch": 0.15886557528245332, + "grad_norm": 1.0292686670210065, + "learning_rate": 1.995314636068982e-06, + "loss": 0.6014343500137329, + "step": 689 + }, + { + "epoch": 0.15909614941203598, + "grad_norm": 0.6804392354384567, + "learning_rate": 1.995277699937227e-06, + "loss": 0.571649432182312, + "step": 690 + }, + { + "epoch": 0.15932672354161864, + "grad_norm": 0.8504096595688001, + "learning_rate": 1.9952406191317717e-06, + "loss": 0.5195556879043579, + "step": 691 + }, + { + "epoch": 0.1595572976712013, + "grad_norm": 1.0458950135227758, + "learning_rate": 1.995203393658006e-06, + "loss": 0.6520895957946777, + "step": 692 + }, + { + "epoch": 0.15978787180078396, + "grad_norm": 0.8415432435774023, + "learning_rate": 1.995166023521341e-06, + "loss": 0.7223460674285889, + "step": 693 + }, + { + "epoch": 0.16001844593036663, + "grad_norm": 0.9976828679541363, + "learning_rate": 1.9951285087272085e-06, + "loss": 0.5540120005607605, + "step": 694 + }, + { + "epoch": 0.1602490200599493, + "grad_norm": 0.9583028785849829, + "learning_rate": 1.995090849281062e-06, + "loss": 0.6539945602416992, + "step": 695 + }, + { + "epoch": 0.16047959418953192, + "grad_norm": 0.6996553037894581, + "learning_rate": 1.995053045188376e-06, + "loss": 0.595169186592102, + "step": 696 + }, + { + "epoch": 0.16071016831911458, + "grad_norm": 0.7841493951031693, + "learning_rate": 1.995015096454645e-06, + "loss": 0.564440131187439, + "step": 697 + }, + { + "epoch": 0.16094074244869724, + "grad_norm": 0.8288568147288248, + "learning_rate": 1.9949770030853857e-06, + "loss": 0.5934277772903442, + "step": 698 + }, + { + "epoch": 0.1611713165782799, + "grad_norm": 0.8284586150514878, + "learning_rate": 1.9949387650861353e-06, + "loss": 0.5645352602005005, + "step": 699 + }, + { + "epoch": 0.16140189070786257, + "grad_norm": 0.7431587516594325, + "learning_rate": 1.9949003824624517e-06, + "loss": 0.6437552571296692, + "step": 700 + }, + { + "epoch": 0.16163246483744523, + "grad_norm": 0.9720884796741701, + "learning_rate": 1.9948618552199147e-06, + "loss": 0.7052004337310791, + "step": 701 + }, + { + "epoch": 0.1618630389670279, + "grad_norm": 0.869867046800395, + "learning_rate": 1.994823183364124e-06, + "loss": 0.6547686457633972, + "step": 702 + }, + { + "epoch": 0.16209361309661055, + "grad_norm": 0.8852938288883528, + "learning_rate": 1.994784366900702e-06, + "loss": 0.582744836807251, + "step": 703 + }, + { + "epoch": 0.1623241872261932, + "grad_norm": 0.9493941174588165, + "learning_rate": 1.99474540583529e-06, + "loss": 0.6668936014175415, + "step": 704 + }, + { + "epoch": 0.16255476135577587, + "grad_norm": 0.8294615633120708, + "learning_rate": 1.994706300173552e-06, + "loss": 0.6076918840408325, + "step": 705 + }, + { + "epoch": 0.16278533548535853, + "grad_norm": 0.8313694025786441, + "learning_rate": 1.994667049921172e-06, + "loss": 0.5053621530532837, + "step": 706 + }, + { + "epoch": 0.1630159096149412, + "grad_norm": 0.7898437620774408, + "learning_rate": 1.994627655083856e-06, + "loss": 0.5480915904045105, + "step": 707 + }, + { + "epoch": 0.16324648374452386, + "grad_norm": 0.8758549357955973, + "learning_rate": 1.99458811566733e-06, + "loss": 0.5851327776908875, + "step": 708 + }, + { + "epoch": 0.16347705787410652, + "grad_norm": 0.8484239464634123, + "learning_rate": 1.9945484316773415e-06, + "loss": 0.7058213949203491, + "step": 709 + }, + { + "epoch": 0.16370763200368918, + "grad_norm": 1.019538936894149, + "learning_rate": 1.9945086031196588e-06, + "loss": 0.6900246739387512, + "step": 710 + }, + { + "epoch": 0.16393820613327184, + "grad_norm": 0.9247299002550031, + "learning_rate": 1.994468630000072e-06, + "loss": 0.6088757514953613, + "step": 711 + }, + { + "epoch": 0.1641687802628545, + "grad_norm": 0.82117755294185, + "learning_rate": 1.9944285123243908e-06, + "loss": 0.6167945861816406, + "step": 712 + }, + { + "epoch": 0.16439935439243716, + "grad_norm": 0.8171354955480022, + "learning_rate": 1.994388250098447e-06, + "loss": 0.5842427015304565, + "step": 713 + }, + { + "epoch": 0.16462992852201982, + "grad_norm": 1.0833616769520091, + "learning_rate": 1.9943478433280937e-06, + "loss": 0.6709132194519043, + "step": 714 + }, + { + "epoch": 0.1648605026516025, + "grad_norm": 0.9486447603343945, + "learning_rate": 1.994307292019204e-06, + "loss": 0.5600479245185852, + "step": 715 + }, + { + "epoch": 0.16509107678118515, + "grad_norm": 0.9425877157645439, + "learning_rate": 1.994266596177672e-06, + "loss": 0.59420245885849, + "step": 716 + }, + { + "epoch": 0.1653216509107678, + "grad_norm": 0.8878954538957776, + "learning_rate": 1.994225755809414e-06, + "loss": 0.6098697185516357, + "step": 717 + }, + { + "epoch": 0.16555222504035047, + "grad_norm": 0.9792435497913993, + "learning_rate": 1.994184770920366e-06, + "loss": 0.5626084804534912, + "step": 718 + }, + { + "epoch": 0.16578279916993313, + "grad_norm": 0.827415177568412, + "learning_rate": 1.9941436415164854e-06, + "loss": 0.633317232131958, + "step": 719 + }, + { + "epoch": 0.1660133732995158, + "grad_norm": 0.7458775266643737, + "learning_rate": 1.994102367603752e-06, + "loss": 0.6629287004470825, + "step": 720 + }, + { + "epoch": 0.16624394742909845, + "grad_norm": 0.8804838237561229, + "learning_rate": 1.994060949188164e-06, + "loss": 0.6281176805496216, + "step": 721 + }, + { + "epoch": 0.16647452155868112, + "grad_norm": 0.7448717784104247, + "learning_rate": 1.994019386275743e-06, + "loss": 0.49195849895477295, + "step": 722 + }, + { + "epoch": 0.16670509568826378, + "grad_norm": 0.8001133040698483, + "learning_rate": 1.9939776788725295e-06, + "loss": 0.5165697932243347, + "step": 723 + }, + { + "epoch": 0.16693566981784644, + "grad_norm": 0.7747636914973149, + "learning_rate": 1.9939358269845867e-06, + "loss": 0.6294844150543213, + "step": 724 + }, + { + "epoch": 0.1671662439474291, + "grad_norm": 0.944854174617811, + "learning_rate": 1.9938938306179986e-06, + "loss": 0.6117822527885437, + "step": 725 + }, + { + "epoch": 0.16739681807701176, + "grad_norm": 0.8223415721013929, + "learning_rate": 1.9938516897788693e-06, + "loss": 0.5904515981674194, + "step": 726 + }, + { + "epoch": 0.16762739220659442, + "grad_norm": 0.9451811550082199, + "learning_rate": 1.9938094044733247e-06, + "loss": 0.5453853011131287, + "step": 727 + }, + { + "epoch": 0.16785796633617708, + "grad_norm": 1.0093698810967915, + "learning_rate": 1.9937669747075107e-06, + "loss": 0.6724731922149658, + "step": 728 + }, + { + "epoch": 0.16808854046575974, + "grad_norm": 0.8787203913390783, + "learning_rate": 1.993724400487596e-06, + "loss": 0.4844778776168823, + "step": 729 + }, + { + "epoch": 0.1683191145953424, + "grad_norm": 1.0150110817624924, + "learning_rate": 1.9936816818197682e-06, + "loss": 0.6666063070297241, + "step": 730 + }, + { + "epoch": 0.16854968872492507, + "grad_norm": 0.8363215992575103, + "learning_rate": 1.9936388187102374e-06, + "loss": 0.49354803562164307, + "step": 731 + }, + { + "epoch": 0.16878026285450773, + "grad_norm": 1.011739420494133, + "learning_rate": 1.993595811165234e-06, + "loss": 0.6587027311325073, + "step": 732 + }, + { + "epoch": 0.1690108369840904, + "grad_norm": 0.8706809761457309, + "learning_rate": 1.9935526591910095e-06, + "loss": 0.5618065595626831, + "step": 733 + }, + { + "epoch": 0.16924141111367305, + "grad_norm": 1.0230867510580486, + "learning_rate": 1.993509362793837e-06, + "loss": 0.6332052946090698, + "step": 734 + }, + { + "epoch": 0.1694719852432557, + "grad_norm": 0.8938300688074264, + "learning_rate": 1.9934659219800095e-06, + "loss": 0.5888797044754028, + "step": 735 + }, + { + "epoch": 0.16970255937283837, + "grad_norm": 0.9600504381358347, + "learning_rate": 1.9934223367558418e-06, + "loss": 0.6995177865028381, + "step": 736 + }, + { + "epoch": 0.16993313350242104, + "grad_norm": 0.8183852978697493, + "learning_rate": 1.9933786071276693e-06, + "loss": 0.6117641925811768, + "step": 737 + }, + { + "epoch": 0.1701637076320037, + "grad_norm": 0.8824726889784998, + "learning_rate": 1.9933347331018487e-06, + "loss": 0.7138235569000244, + "step": 738 + }, + { + "epoch": 0.17039428176158636, + "grad_norm": 0.9234925675447027, + "learning_rate": 1.993290714684758e-06, + "loss": 0.6139661073684692, + "step": 739 + }, + { + "epoch": 0.17062485589116902, + "grad_norm": 0.9457487351494172, + "learning_rate": 1.9932465518827945e-06, + "loss": 0.6998997926712036, + "step": 740 + }, + { + "epoch": 0.17085543002075168, + "grad_norm": 0.8625145077640682, + "learning_rate": 1.9932022447023787e-06, + "loss": 0.5736757516860962, + "step": 741 + }, + { + "epoch": 0.17108600415033434, + "grad_norm": 0.7768775382949296, + "learning_rate": 1.993157793149951e-06, + "loss": 0.6069833040237427, + "step": 742 + }, + { + "epoch": 0.171316578279917, + "grad_norm": 0.9368489446003049, + "learning_rate": 1.9931131972319726e-06, + "loss": 0.618720531463623, + "step": 743 + }, + { + "epoch": 0.17154715240949966, + "grad_norm": 1.1182101771495103, + "learning_rate": 1.9930684569549263e-06, + "loss": 0.6918530464172363, + "step": 744 + }, + { + "epoch": 0.17177772653908233, + "grad_norm": 0.9107072762217621, + "learning_rate": 1.993023572325315e-06, + "loss": 0.5303134322166443, + "step": 745 + }, + { + "epoch": 0.172008300668665, + "grad_norm": 1.163525853024132, + "learning_rate": 1.9929785433496637e-06, + "loss": 0.5017606019973755, + "step": 746 + }, + { + "epoch": 0.17223887479824765, + "grad_norm": 0.8248835281602814, + "learning_rate": 1.9929333700345176e-06, + "loss": 0.5683910846710205, + "step": 747 + }, + { + "epoch": 0.1724694489278303, + "grad_norm": 1.024957040527593, + "learning_rate": 1.992888052386443e-06, + "loss": 0.7594112157821655, + "step": 748 + }, + { + "epoch": 0.17270002305741297, + "grad_norm": 0.8415419064063624, + "learning_rate": 1.9928425904120272e-06, + "loss": 0.5817109942436218, + "step": 749 + }, + { + "epoch": 0.17293059718699563, + "grad_norm": 0.9772344685918459, + "learning_rate": 1.9927969841178785e-06, + "loss": 0.74810391664505, + "step": 750 + }, + { + "epoch": 0.17316117131657827, + "grad_norm": 0.7709842631317299, + "learning_rate": 1.992751233510627e-06, + "loss": 0.5620408654212952, + "step": 751 + }, + { + "epoch": 0.17339174544616093, + "grad_norm": 0.9147017514524429, + "learning_rate": 1.9927053385969224e-06, + "loss": 0.5661174654960632, + "step": 752 + }, + { + "epoch": 0.1736223195757436, + "grad_norm": 0.8721149149743948, + "learning_rate": 1.992659299383436e-06, + "loss": 0.6170656681060791, + "step": 753 + }, + { + "epoch": 0.17385289370532625, + "grad_norm": 0.8946316220934861, + "learning_rate": 1.99261311587686e-06, + "loss": 0.6399837136268616, + "step": 754 + }, + { + "epoch": 0.1740834678349089, + "grad_norm": 0.7741035474142021, + "learning_rate": 1.992566788083908e-06, + "loss": 0.646568775177002, + "step": 755 + }, + { + "epoch": 0.17431404196449157, + "grad_norm": 0.8936741351690501, + "learning_rate": 1.992520316011314e-06, + "loss": 0.6836358904838562, + "step": 756 + }, + { + "epoch": 0.17454461609407423, + "grad_norm": 0.8304614027509832, + "learning_rate": 1.9924736996658327e-06, + "loss": 0.7077229619026184, + "step": 757 + }, + { + "epoch": 0.1747751902236569, + "grad_norm": 0.87551528703017, + "learning_rate": 1.9924269390542408e-06, + "loss": 0.5127657651901245, + "step": 758 + }, + { + "epoch": 0.17500576435323956, + "grad_norm": 0.9006786249451013, + "learning_rate": 1.992380034183336e-06, + "loss": 0.49244552850723267, + "step": 759 + }, + { + "epoch": 0.17523633848282222, + "grad_norm": 0.8017561502743571, + "learning_rate": 1.9923329850599353e-06, + "loss": 0.6145986318588257, + "step": 760 + }, + { + "epoch": 0.17546691261240488, + "grad_norm": 1.0163805424999015, + "learning_rate": 1.9922857916908784e-06, + "loss": 0.5233397483825684, + "step": 761 + }, + { + "epoch": 0.17569748674198754, + "grad_norm": 0.9596772303146165, + "learning_rate": 1.992238454083025e-06, + "loss": 0.6296844482421875, + "step": 762 + }, + { + "epoch": 0.1759280608715702, + "grad_norm": 0.7860963753584104, + "learning_rate": 1.9921909722432565e-06, + "loss": 0.5274437665939331, + "step": 763 + }, + { + "epoch": 0.17615863500115286, + "grad_norm": 0.8930810667791799, + "learning_rate": 1.9921433461784744e-06, + "loss": 0.6365554332733154, + "step": 764 + }, + { + "epoch": 0.17638920913073552, + "grad_norm": 0.9611521576454714, + "learning_rate": 1.992095575895602e-06, + "loss": 0.6256603002548218, + "step": 765 + }, + { + "epoch": 0.17661978326031819, + "grad_norm": 0.9488006285824869, + "learning_rate": 1.9920476614015827e-06, + "loss": 0.6914918422698975, + "step": 766 + }, + { + "epoch": 0.17685035738990085, + "grad_norm": 0.9925839476608436, + "learning_rate": 1.9919996027033823e-06, + "loss": 0.618436336517334, + "step": 767 + }, + { + "epoch": 0.1770809315194835, + "grad_norm": 1.0637307823847924, + "learning_rate": 1.9919513998079857e-06, + "loss": 0.7496027946472168, + "step": 768 + }, + { + "epoch": 0.17731150564906617, + "grad_norm": 0.873569070894671, + "learning_rate": 1.9919030527224e-06, + "loss": 0.6188616752624512, + "step": 769 + }, + { + "epoch": 0.17754207977864883, + "grad_norm": 0.9573370107752551, + "learning_rate": 1.991854561453653e-06, + "loss": 0.6525505185127258, + "step": 770 + }, + { + "epoch": 0.1777726539082315, + "grad_norm": 0.8791752874309303, + "learning_rate": 1.9918059260087933e-06, + "loss": 0.6302521228790283, + "step": 771 + }, + { + "epoch": 0.17800322803781415, + "grad_norm": 0.7767159097983319, + "learning_rate": 1.9917571463948905e-06, + "loss": 0.48817628622055054, + "step": 772 + }, + { + "epoch": 0.17823380216739682, + "grad_norm": 0.9997756560425097, + "learning_rate": 1.9917082226190357e-06, + "loss": 0.7571396231651306, + "step": 773 + }, + { + "epoch": 0.17846437629697948, + "grad_norm": 0.9019653117383005, + "learning_rate": 1.99165915468834e-06, + "loss": 0.6416890025138855, + "step": 774 + }, + { + "epoch": 0.17869495042656214, + "grad_norm": 0.9030141776784474, + "learning_rate": 1.9916099426099357e-06, + "loss": 0.5668659210205078, + "step": 775 + }, + { + "epoch": 0.1789255245561448, + "grad_norm": 0.8616948701360102, + "learning_rate": 1.991560586390977e-06, + "loss": 0.5491495132446289, + "step": 776 + }, + { + "epoch": 0.17915609868572746, + "grad_norm": 0.8461739489170892, + "learning_rate": 1.991511086038637e-06, + "loss": 0.5596655607223511, + "step": 777 + }, + { + "epoch": 0.17938667281531012, + "grad_norm": 0.948797979696852, + "learning_rate": 1.991461441560113e-06, + "loss": 0.606618344783783, + "step": 778 + }, + { + "epoch": 0.17961724694489278, + "grad_norm": 0.8682290862864503, + "learning_rate": 1.9914116529626195e-06, + "loss": 0.6534444093704224, + "step": 779 + }, + { + "epoch": 0.17984782107447544, + "grad_norm": 0.7942772802909244, + "learning_rate": 1.9913617202533956e-06, + "loss": 0.6566994190216064, + "step": 780 + }, + { + "epoch": 0.1800783952040581, + "grad_norm": 0.8753236598884384, + "learning_rate": 1.9913116434396976e-06, + "loss": 0.6745898723602295, + "step": 781 + }, + { + "epoch": 0.18030896933364077, + "grad_norm": 0.8904483654623074, + "learning_rate": 1.991261422528806e-06, + "loss": 0.6260639429092407, + "step": 782 + }, + { + "epoch": 0.18053954346322343, + "grad_norm": 1.095081708934966, + "learning_rate": 1.9912110575280203e-06, + "loss": 0.6937930583953857, + "step": 783 + }, + { + "epoch": 0.1807701175928061, + "grad_norm": 0.7535766751550929, + "learning_rate": 1.991160548444662e-06, + "loss": 0.5220614671707153, + "step": 784 + }, + { + "epoch": 0.18100069172238875, + "grad_norm": 1.0171096783148863, + "learning_rate": 1.9911098952860725e-06, + "loss": 0.630463719367981, + "step": 785 + }, + { + "epoch": 0.1812312658519714, + "grad_norm": 0.9064677619585607, + "learning_rate": 1.9910590980596154e-06, + "loss": 0.5476818084716797, + "step": 786 + }, + { + "epoch": 0.18146183998155407, + "grad_norm": 0.8827497683061851, + "learning_rate": 1.9910081567726745e-06, + "loss": 0.619910478591919, + "step": 787 + }, + { + "epoch": 0.18169241411113674, + "grad_norm": 0.9583246792904453, + "learning_rate": 1.990957071432654e-06, + "loss": 0.759405255317688, + "step": 788 + }, + { + "epoch": 0.1819229882407194, + "grad_norm": 0.9249642030902185, + "learning_rate": 1.9909058420469808e-06, + "loss": 0.6093606948852539, + "step": 789 + }, + { + "epoch": 0.18215356237030206, + "grad_norm": 1.0777393301256872, + "learning_rate": 1.9908544686231e-06, + "loss": 0.5358198285102844, + "step": 790 + }, + { + "epoch": 0.18238413649988472, + "grad_norm": 0.8619190562873736, + "learning_rate": 1.9908029511684806e-06, + "loss": 0.577926754951477, + "step": 791 + }, + { + "epoch": 0.18261471062946738, + "grad_norm": 1.0298704295501269, + "learning_rate": 1.990751289690611e-06, + "loss": 0.6232448816299438, + "step": 792 + }, + { + "epoch": 0.18284528475905004, + "grad_norm": 0.9837349749201401, + "learning_rate": 1.9906994841970005e-06, + "loss": 0.5461868047714233, + "step": 793 + }, + { + "epoch": 0.1830758588886327, + "grad_norm": 0.9430576362377001, + "learning_rate": 1.9906475346951793e-06, + "loss": 0.6074671745300293, + "step": 794 + }, + { + "epoch": 0.18330643301821536, + "grad_norm": 0.9936839742941572, + "learning_rate": 1.990595441192699e-06, + "loss": 0.7101696729660034, + "step": 795 + }, + { + "epoch": 0.18353700714779803, + "grad_norm": 0.950260898814123, + "learning_rate": 1.9905432036971318e-06, + "loss": 0.6507722735404968, + "step": 796 + }, + { + "epoch": 0.1837675812773807, + "grad_norm": 0.8942288113166778, + "learning_rate": 1.9904908222160715e-06, + "loss": 0.6497524380683899, + "step": 797 + }, + { + "epoch": 0.18399815540696335, + "grad_norm": 0.9396678930556792, + "learning_rate": 1.9904382967571315e-06, + "loss": 0.6359415054321289, + "step": 798 + }, + { + "epoch": 0.184228729536546, + "grad_norm": 0.8070326036364724, + "learning_rate": 1.9903856273279475e-06, + "loss": 0.6062989234924316, + "step": 799 + }, + { + "epoch": 0.18445930366612867, + "grad_norm": 0.9626677000162343, + "learning_rate": 1.9903328139361753e-06, + "loss": 0.5872690677642822, + "step": 800 + }, + { + "epoch": 0.18468987779571133, + "grad_norm": 0.7985705265040473, + "learning_rate": 1.9902798565894917e-06, + "loss": 0.541993260383606, + "step": 801 + }, + { + "epoch": 0.184920451925294, + "grad_norm": 0.9775943406877085, + "learning_rate": 1.9902267552955948e-06, + "loss": 0.6509004235267639, + "step": 802 + }, + { + "epoch": 0.18515102605487666, + "grad_norm": 1.032367389635004, + "learning_rate": 1.9901735100622034e-06, + "loss": 0.6994458436965942, + "step": 803 + }, + { + "epoch": 0.18538160018445932, + "grad_norm": 0.723727027388961, + "learning_rate": 1.9901201208970574e-06, + "loss": 0.5426214933395386, + "step": 804 + }, + { + "epoch": 0.18561217431404198, + "grad_norm": 0.9494744349432898, + "learning_rate": 1.9900665878079172e-06, + "loss": 0.5889894366264343, + "step": 805 + }, + { + "epoch": 0.18584274844362464, + "grad_norm": 0.8565255265724333, + "learning_rate": 1.990012910802564e-06, + "loss": 0.6455902457237244, + "step": 806 + }, + { + "epoch": 0.18607332257320727, + "grad_norm": 0.8487813974117321, + "learning_rate": 1.989959089888801e-06, + "loss": 0.6336048245429993, + "step": 807 + }, + { + "epoch": 0.18630389670278993, + "grad_norm": 0.8414189962242138, + "learning_rate": 1.9899051250744517e-06, + "loss": 0.6091762781143188, + "step": 808 + }, + { + "epoch": 0.1865344708323726, + "grad_norm": 0.9439572961008054, + "learning_rate": 1.9898510163673594e-06, + "loss": 0.5551953315734863, + "step": 809 + }, + { + "epoch": 0.18676504496195526, + "grad_norm": 1.0494491780231465, + "learning_rate": 1.9897967637753907e-06, + "loss": 0.6441607475280762, + "step": 810 + }, + { + "epoch": 0.18699561909153792, + "grad_norm": 0.886313339848662, + "learning_rate": 1.989742367306431e-06, + "loss": 0.5766205787658691, + "step": 811 + }, + { + "epoch": 0.18722619322112058, + "grad_norm": 0.8129745295139125, + "learning_rate": 1.9896878269683872e-06, + "loss": 0.624677836894989, + "step": 812 + }, + { + "epoch": 0.18745676735070324, + "grad_norm": 1.0883386432883795, + "learning_rate": 1.9896331427691878e-06, + "loss": 0.5942056775093079, + "step": 813 + }, + { + "epoch": 0.1876873414802859, + "grad_norm": 0.9421668652395382, + "learning_rate": 1.989578314716781e-06, + "loss": 0.5194109082221985, + "step": 814 + }, + { + "epoch": 0.18791791560986856, + "grad_norm": 0.9041080200693152, + "learning_rate": 1.9895233428191375e-06, + "loss": 0.5851193070411682, + "step": 815 + }, + { + "epoch": 0.18814848973945122, + "grad_norm": 0.7963655717285544, + "learning_rate": 1.989468227084248e-06, + "loss": 0.5596088171005249, + "step": 816 + }, + { + "epoch": 0.18837906386903389, + "grad_norm": 0.9364254304069746, + "learning_rate": 1.989412967520123e-06, + "loss": 0.608109712600708, + "step": 817 + }, + { + "epoch": 0.18860963799861655, + "grad_norm": 0.8927696059217924, + "learning_rate": 1.9893575641347957e-06, + "loss": 0.6488924026489258, + "step": 818 + }, + { + "epoch": 0.1888402121281992, + "grad_norm": 0.9447086482881396, + "learning_rate": 1.9893020169363202e-06, + "loss": 0.6668595671653748, + "step": 819 + }, + { + "epoch": 0.18907078625778187, + "grad_norm": 0.9937318511996248, + "learning_rate": 1.9892463259327702e-06, + "loss": 0.6516261696815491, + "step": 820 + }, + { + "epoch": 0.18930136038736453, + "grad_norm": 1.0796549259081865, + "learning_rate": 1.9891904911322408e-06, + "loss": 0.5960654020309448, + "step": 821 + }, + { + "epoch": 0.1895319345169472, + "grad_norm": 0.7909478658460368, + "learning_rate": 1.989134512542848e-06, + "loss": 0.5836078524589539, + "step": 822 + }, + { + "epoch": 0.18976250864652985, + "grad_norm": 0.8238472267757905, + "learning_rate": 1.98907839017273e-06, + "loss": 0.6233468651771545, + "step": 823 + }, + { + "epoch": 0.18999308277611252, + "grad_norm": 0.9807541829716023, + "learning_rate": 1.989022124030043e-06, + "loss": 0.6228024363517761, + "step": 824 + }, + { + "epoch": 0.19022365690569518, + "grad_norm": 0.8131035743107407, + "learning_rate": 1.9889657141229674e-06, + "loss": 0.5549489259719849, + "step": 825 + }, + { + "epoch": 0.19045423103527784, + "grad_norm": 1.04900407843417, + "learning_rate": 1.988909160459703e-06, + "loss": 0.572743833065033, + "step": 826 + }, + { + "epoch": 0.1906848051648605, + "grad_norm": 0.9532449351501632, + "learning_rate": 1.988852463048469e-06, + "loss": 0.5483371019363403, + "step": 827 + }, + { + "epoch": 0.19091537929444316, + "grad_norm": 0.8589634934665029, + "learning_rate": 1.988795621897508e-06, + "loss": 0.6489086151123047, + "step": 828 + }, + { + "epoch": 0.19114595342402582, + "grad_norm": 0.8093738620503291, + "learning_rate": 1.9887386370150823e-06, + "loss": 0.5885359644889832, + "step": 829 + }, + { + "epoch": 0.19137652755360848, + "grad_norm": 1.1233507395706857, + "learning_rate": 1.988681508409475e-06, + "loss": 0.5725297927856445, + "step": 830 + }, + { + "epoch": 0.19160710168319114, + "grad_norm": 0.9186016287497916, + "learning_rate": 1.9886242360889907e-06, + "loss": 0.5165927410125732, + "step": 831 + }, + { + "epoch": 0.1918376758127738, + "grad_norm": 0.9873812028582082, + "learning_rate": 1.988566820061954e-06, + "loss": 0.4909062385559082, + "step": 832 + }, + { + "epoch": 0.19206824994235647, + "grad_norm": 0.8524339429885558, + "learning_rate": 1.988509260336711e-06, + "loss": 0.6611230373382568, + "step": 833 + }, + { + "epoch": 0.19229882407193913, + "grad_norm": 0.8054213393470881, + "learning_rate": 1.9884515569216296e-06, + "loss": 0.5702481269836426, + "step": 834 + }, + { + "epoch": 0.1925293982015218, + "grad_norm": 1.0204414620630202, + "learning_rate": 1.988393709825096e-06, + "loss": 0.5923126935958862, + "step": 835 + }, + { + "epoch": 0.19275997233110445, + "grad_norm": 0.9055032000924194, + "learning_rate": 1.98833571905552e-06, + "loss": 0.6054497957229614, + "step": 836 + }, + { + "epoch": 0.1929905464606871, + "grad_norm": 0.9248140875126212, + "learning_rate": 1.9882775846213305e-06, + "loss": 0.6688513159751892, + "step": 837 + }, + { + "epoch": 0.19322112059026977, + "grad_norm": 1.0273808455254545, + "learning_rate": 1.988219306530978e-06, + "loss": 0.5898394584655762, + "step": 838 + }, + { + "epoch": 0.19345169471985244, + "grad_norm": 0.9751112903331337, + "learning_rate": 1.9881608847929345e-06, + "loss": 0.575627326965332, + "step": 839 + }, + { + "epoch": 0.1936822688494351, + "grad_norm": 0.8673669914525766, + "learning_rate": 1.9881023194156913e-06, + "loss": 0.5392276048660278, + "step": 840 + }, + { + "epoch": 0.19391284297901776, + "grad_norm": 0.8706508008641746, + "learning_rate": 1.9880436104077624e-06, + "loss": 0.5464376211166382, + "step": 841 + }, + { + "epoch": 0.19414341710860042, + "grad_norm": 1.1088629334080236, + "learning_rate": 1.9879847577776804e-06, + "loss": 0.5483032464981079, + "step": 842 + }, + { + "epoch": 0.19437399123818308, + "grad_norm": 1.088158010228094, + "learning_rate": 1.9879257615340016e-06, + "loss": 0.583878219127655, + "step": 843 + }, + { + "epoch": 0.19460456536776574, + "grad_norm": 0.903659297701254, + "learning_rate": 1.9878666216853005e-06, + "loss": 0.5646623373031616, + "step": 844 + }, + { + "epoch": 0.1948351394973484, + "grad_norm": 0.8893037043091606, + "learning_rate": 1.9878073382401745e-06, + "loss": 0.4785343408584595, + "step": 845 + }, + { + "epoch": 0.19506571362693106, + "grad_norm": 0.8306997774077053, + "learning_rate": 1.987747911207241e-06, + "loss": 0.6247695684432983, + "step": 846 + }, + { + "epoch": 0.19529628775651373, + "grad_norm": 0.8871051444384922, + "learning_rate": 1.9876883405951377e-06, + "loss": 0.5686244368553162, + "step": 847 + }, + { + "epoch": 0.1955268618860964, + "grad_norm": 1.0693338597203925, + "learning_rate": 1.9876286264125242e-06, + "loss": 0.5887250900268555, + "step": 848 + }, + { + "epoch": 0.19575743601567905, + "grad_norm": 1.009687803574172, + "learning_rate": 1.9875687686680808e-06, + "loss": 0.6225967407226562, + "step": 849 + }, + { + "epoch": 0.1959880101452617, + "grad_norm": 0.8424215047754778, + "learning_rate": 1.987508767370508e-06, + "loss": 0.4695369601249695, + "step": 850 + }, + { + "epoch": 0.19621858427484437, + "grad_norm": 1.0270923710251258, + "learning_rate": 1.9874486225285276e-06, + "loss": 0.5248171091079712, + "step": 851 + }, + { + "epoch": 0.19644915840442703, + "grad_norm": 1.0947189066196994, + "learning_rate": 1.9873883341508825e-06, + "loss": 0.573886513710022, + "step": 852 + }, + { + "epoch": 0.1966797325340097, + "grad_norm": 0.980074050730982, + "learning_rate": 1.9873279022463365e-06, + "loss": 0.5309966802597046, + "step": 853 + }, + { + "epoch": 0.19691030666359235, + "grad_norm": 1.2273525906968545, + "learning_rate": 1.987267326823673e-06, + "loss": 0.7115850448608398, + "step": 854 + }, + { + "epoch": 0.19714088079317502, + "grad_norm": 1.65154587276706, + "learning_rate": 1.9872066078916984e-06, + "loss": 0.6970044374465942, + "step": 855 + }, + { + "epoch": 0.19737145492275768, + "grad_norm": 1.0520569639047552, + "learning_rate": 1.987145745459238e-06, + "loss": 0.5956458449363708, + "step": 856 + }, + { + "epoch": 0.19760202905234034, + "grad_norm": 0.8621512966256671, + "learning_rate": 1.9870847395351395e-06, + "loss": 0.6200698614120483, + "step": 857 + }, + { + "epoch": 0.197832603181923, + "grad_norm": 0.8987981187104104, + "learning_rate": 1.98702359012827e-06, + "loss": 0.6552712321281433, + "step": 858 + }, + { + "epoch": 0.19806317731150566, + "grad_norm": 0.8832934653512269, + "learning_rate": 1.986962297247519e-06, + "loss": 0.5995951294898987, + "step": 859 + }, + { + "epoch": 0.19829375144108832, + "grad_norm": 1.0415029103173328, + "learning_rate": 1.9869008609017946e-06, + "loss": 0.5903854966163635, + "step": 860 + }, + { + "epoch": 0.19852432557067098, + "grad_norm": 0.7946410320386238, + "learning_rate": 1.986839281100029e-06, + "loss": 0.49756956100463867, + "step": 861 + }, + { + "epoch": 0.19875489970025362, + "grad_norm": 0.8989937288923138, + "learning_rate": 1.986777557851172e-06, + "loss": 0.6726386547088623, + "step": 862 + }, + { + "epoch": 0.19898547382983628, + "grad_norm": 1.066877002121069, + "learning_rate": 1.9867156911641963e-06, + "loss": 0.5941756963729858, + "step": 863 + }, + { + "epoch": 0.19921604795941894, + "grad_norm": 1.1426428571577942, + "learning_rate": 1.986653681048095e-06, + "loss": 0.6148152351379395, + "step": 864 + }, + { + "epoch": 0.1994466220890016, + "grad_norm": 0.8574337846446602, + "learning_rate": 1.9865915275118815e-06, + "loss": 0.5484675765037537, + "step": 865 + }, + { + "epoch": 0.19967719621858426, + "grad_norm": 1.279305752369778, + "learning_rate": 1.986529230564591e-06, + "loss": 0.5835011601448059, + "step": 866 + }, + { + "epoch": 0.19990777034816692, + "grad_norm": 1.2828587747963143, + "learning_rate": 1.9864667902152785e-06, + "loss": 0.5505619049072266, + "step": 867 + }, + { + "epoch": 0.20013834447774959, + "grad_norm": 0.978792866059614, + "learning_rate": 1.986404206473021e-06, + "loss": 0.6170759797096252, + "step": 868 + }, + { + "epoch": 0.20036891860733225, + "grad_norm": 0.9063283607010307, + "learning_rate": 1.9863414793469144e-06, + "loss": 0.6302823424339294, + "step": 869 + }, + { + "epoch": 0.2005994927369149, + "grad_norm": 0.9919923586713045, + "learning_rate": 1.9862786088460778e-06, + "loss": 0.6265357732772827, + "step": 870 + }, + { + "epoch": 0.20083006686649757, + "grad_norm": 0.8288163853607481, + "learning_rate": 1.9862155949796497e-06, + "loss": 0.5346760749816895, + "step": 871 + }, + { + "epoch": 0.20106064099608023, + "grad_norm": 1.0613032711669241, + "learning_rate": 1.98615243775679e-06, + "loss": 0.5480276346206665, + "step": 872 + }, + { + "epoch": 0.2012912151256629, + "grad_norm": 1.0504212966242243, + "learning_rate": 1.986089137186679e-06, + "loss": 0.615007758140564, + "step": 873 + }, + { + "epoch": 0.20152178925524555, + "grad_norm": 1.0424303204478471, + "learning_rate": 1.986025693278518e-06, + "loss": 0.598671555519104, + "step": 874 + }, + { + "epoch": 0.20175236338482821, + "grad_norm": 1.1162570964298844, + "learning_rate": 1.98596210604153e-06, + "loss": 0.6029553413391113, + "step": 875 + }, + { + "epoch": 0.20198293751441088, + "grad_norm": 0.9723766835428509, + "learning_rate": 1.985898375484957e-06, + "loss": 0.6854428052902222, + "step": 876 + }, + { + "epoch": 0.20221351164399354, + "grad_norm": 0.7502030102171089, + "learning_rate": 1.9858345016180636e-06, + "loss": 0.5032496452331543, + "step": 877 + }, + { + "epoch": 0.2024440857735762, + "grad_norm": 0.910423493721141, + "learning_rate": 1.9857704844501343e-06, + "loss": 0.5521007776260376, + "step": 878 + }, + { + "epoch": 0.20267465990315886, + "grad_norm": 0.9861926154372014, + "learning_rate": 1.9857063239904742e-06, + "loss": 0.6473567485809326, + "step": 879 + }, + { + "epoch": 0.20290523403274152, + "grad_norm": 0.9973567674127126, + "learning_rate": 1.9856420202484103e-06, + "loss": 0.528810977935791, + "step": 880 + }, + { + "epoch": 0.20313580816232418, + "grad_norm": 1.0663389238750165, + "learning_rate": 1.9855775732332898e-06, + "loss": 0.681857705116272, + "step": 881 + }, + { + "epoch": 0.20336638229190684, + "grad_norm": 0.9199566615284357, + "learning_rate": 1.9855129829544805e-06, + "loss": 0.6510526537895203, + "step": 882 + }, + { + "epoch": 0.2035969564214895, + "grad_norm": 1.0847608945381821, + "learning_rate": 1.985448249421371e-06, + "loss": 0.5690885782241821, + "step": 883 + }, + { + "epoch": 0.20382753055107217, + "grad_norm": 0.9067033263808438, + "learning_rate": 1.985383372643371e-06, + "loss": 0.6451331973075867, + "step": 884 + }, + { + "epoch": 0.20405810468065483, + "grad_norm": 0.7596187493834748, + "learning_rate": 1.9853183526299117e-06, + "loss": 0.493961900472641, + "step": 885 + }, + { + "epoch": 0.2042886788102375, + "grad_norm": 1.031307930072274, + "learning_rate": 1.9852531893904434e-06, + "loss": 0.5390207767486572, + "step": 886 + }, + { + "epoch": 0.20451925293982015, + "grad_norm": 0.9671201783822709, + "learning_rate": 1.9851878829344395e-06, + "loss": 0.5976558923721313, + "step": 887 + }, + { + "epoch": 0.2047498270694028, + "grad_norm": 0.9832697265495778, + "learning_rate": 1.9851224332713917e-06, + "loss": 0.539776623249054, + "step": 888 + }, + { + "epoch": 0.20498040119898547, + "grad_norm": 1.1606849770347532, + "learning_rate": 1.9850568404108144e-06, + "loss": 0.6791383624076843, + "step": 889 + }, + { + "epoch": 0.20521097532856813, + "grad_norm": 1.1599404347752247, + "learning_rate": 1.984991104362242e-06, + "loss": 0.6195741891860962, + "step": 890 + }, + { + "epoch": 0.2054415494581508, + "grad_norm": 1.0295013801913249, + "learning_rate": 1.9849252251352303e-06, + "loss": 0.5792666673660278, + "step": 891 + }, + { + "epoch": 0.20567212358773346, + "grad_norm": 0.7871401361859056, + "learning_rate": 1.984859202739355e-06, + "loss": 0.5633316040039062, + "step": 892 + }, + { + "epoch": 0.20590269771731612, + "grad_norm": 0.9078754261167402, + "learning_rate": 1.9847930371842137e-06, + "loss": 0.6152814626693726, + "step": 893 + }, + { + "epoch": 0.20613327184689878, + "grad_norm": 1.0024181714804654, + "learning_rate": 1.9847267284794234e-06, + "loss": 0.5584526658058167, + "step": 894 + }, + { + "epoch": 0.20636384597648144, + "grad_norm": 0.9442571191896375, + "learning_rate": 1.9846602766346235e-06, + "loss": 0.5526787042617798, + "step": 895 + }, + { + "epoch": 0.2065944201060641, + "grad_norm": 1.114741515810547, + "learning_rate": 1.984593681659473e-06, + "loss": 0.6851564049720764, + "step": 896 + }, + { + "epoch": 0.20682499423564676, + "grad_norm": 0.9529867069899208, + "learning_rate": 1.9845269435636524e-06, + "loss": 0.6012386083602905, + "step": 897 + }, + { + "epoch": 0.20705556836522943, + "grad_norm": 0.9587418141612076, + "learning_rate": 1.9844600623568626e-06, + "loss": 0.5515716075897217, + "step": 898 + }, + { + "epoch": 0.2072861424948121, + "grad_norm": 1.0489716310270325, + "learning_rate": 1.9843930380488255e-06, + "loss": 0.6534323692321777, + "step": 899 + }, + { + "epoch": 0.20751671662439475, + "grad_norm": 0.9795829214559992, + "learning_rate": 1.9843258706492836e-06, + "loss": 0.726966381072998, + "step": 900 + }, + { + "epoch": 0.2077472907539774, + "grad_norm": 1.0154014646465384, + "learning_rate": 1.984258560168001e-06, + "loss": 0.6692399978637695, + "step": 901 + }, + { + "epoch": 0.20797786488356007, + "grad_norm": 0.8361205321250001, + "learning_rate": 1.9841911066147614e-06, + "loss": 0.5815941095352173, + "step": 902 + }, + { + "epoch": 0.20820843901314273, + "grad_norm": 0.8093430372283338, + "learning_rate": 1.98412350999937e-06, + "loss": 0.4850257933139801, + "step": 903 + }, + { + "epoch": 0.2084390131427254, + "grad_norm": 0.9321751727050823, + "learning_rate": 1.9840557703316524e-06, + "loss": 0.7309345006942749, + "step": 904 + }, + { + "epoch": 0.20866958727230805, + "grad_norm": 0.9487721653557605, + "learning_rate": 1.9839878876214556e-06, + "loss": 0.6246342658996582, + "step": 905 + }, + { + "epoch": 0.20890016140189072, + "grad_norm": 0.923401773715514, + "learning_rate": 1.983919861878647e-06, + "loss": 0.503870964050293, + "step": 906 + }, + { + "epoch": 0.20913073553147338, + "grad_norm": 0.9277576649885639, + "learning_rate": 1.9838516931131147e-06, + "loss": 0.5316766500473022, + "step": 907 + }, + { + "epoch": 0.20936130966105604, + "grad_norm": 0.9488124820166146, + "learning_rate": 1.983783381334768e-06, + "loss": 0.5707069039344788, + "step": 908 + }, + { + "epoch": 0.2095918837906387, + "grad_norm": 1.1481758251998657, + "learning_rate": 1.983714926553536e-06, + "loss": 0.5482156276702881, + "step": 909 + }, + { + "epoch": 0.20982245792022136, + "grad_norm": 0.8868748652499737, + "learning_rate": 1.98364632877937e-06, + "loss": 0.45747748017311096, + "step": 910 + }, + { + "epoch": 0.21005303204980402, + "grad_norm": 1.070435205795932, + "learning_rate": 1.9835775880222414e-06, + "loss": 0.5599262118339539, + "step": 911 + }, + { + "epoch": 0.21028360617938668, + "grad_norm": 0.8833178195747919, + "learning_rate": 1.9835087042921416e-06, + "loss": 0.5115377902984619, + "step": 912 + }, + { + "epoch": 0.21051418030896935, + "grad_norm": 1.0026720443060566, + "learning_rate": 1.9834396775990846e-06, + "loss": 0.6577836275100708, + "step": 913 + }, + { + "epoch": 0.210744754438552, + "grad_norm": 1.0996458728397183, + "learning_rate": 1.9833705079531033e-06, + "loss": 0.4979211091995239, + "step": 914 + }, + { + "epoch": 0.21097532856813467, + "grad_norm": 0.9038590231228891, + "learning_rate": 1.983301195364252e-06, + "loss": 0.5052670240402222, + "step": 915 + }, + { + "epoch": 0.21120590269771733, + "grad_norm": 0.9375736925419242, + "learning_rate": 1.9832317398426076e-06, + "loss": 0.5480808019638062, + "step": 916 + }, + { + "epoch": 0.2114364768273, + "grad_norm": 1.1234174619828885, + "learning_rate": 1.983162141398264e-06, + "loss": 0.5328841209411621, + "step": 917 + }, + { + "epoch": 0.21166705095688262, + "grad_norm": 1.0661654042909894, + "learning_rate": 1.98309240004134e-06, + "loss": 0.5572643280029297, + "step": 918 + }, + { + "epoch": 0.21189762508646529, + "grad_norm": 0.7370595537346776, + "learning_rate": 1.983022515781972e-06, + "loss": 0.5180699825286865, + "step": 919 + }, + { + "epoch": 0.21212819921604795, + "grad_norm": 0.9467461169752135, + "learning_rate": 1.9829524886303182e-06, + "loss": 0.5031566619873047, + "step": 920 + }, + { + "epoch": 0.2123587733456306, + "grad_norm": 1.0924744776428812, + "learning_rate": 1.9828823185965587e-06, + "loss": 0.6579925417900085, + "step": 921 + }, + { + "epoch": 0.21258934747521327, + "grad_norm": 1.0635734753276387, + "learning_rate": 1.982812005690893e-06, + "loss": 0.6107230186462402, + "step": 922 + }, + { + "epoch": 0.21281992160479593, + "grad_norm": 0.8209241554677639, + "learning_rate": 1.982741549923542e-06, + "loss": 0.5244725942611694, + "step": 923 + }, + { + "epoch": 0.2130504957343786, + "grad_norm": 0.8970249012108504, + "learning_rate": 1.9826709513047466e-06, + "loss": 0.5857048630714417, + "step": 924 + }, + { + "epoch": 0.21328106986396125, + "grad_norm": 1.1702999413512643, + "learning_rate": 1.9826002098447694e-06, + "loss": 0.6417914628982544, + "step": 925 + }, + { + "epoch": 0.21351164399354391, + "grad_norm": 1.025740647317304, + "learning_rate": 1.982529325553893e-06, + "loss": 0.6062248945236206, + "step": 926 + }, + { + "epoch": 0.21374221812312658, + "grad_norm": 0.8397411976395659, + "learning_rate": 1.982458298442422e-06, + "loss": 0.4870455265045166, + "step": 927 + }, + { + "epoch": 0.21397279225270924, + "grad_norm": 0.8931294029793581, + "learning_rate": 1.9823871285206802e-06, + "loss": 0.6552037000656128, + "step": 928 + }, + { + "epoch": 0.2142033663822919, + "grad_norm": 0.9703019761386622, + "learning_rate": 1.9823158157990133e-06, + "loss": 0.531679093837738, + "step": 929 + }, + { + "epoch": 0.21443394051187456, + "grad_norm": 1.2664544243150397, + "learning_rate": 1.982244360287787e-06, + "loss": 0.516847550868988, + "step": 930 + }, + { + "epoch": 0.21466451464145722, + "grad_norm": 0.810392988957607, + "learning_rate": 1.982172761997388e-06, + "loss": 0.47147709131240845, + "step": 931 + }, + { + "epoch": 0.21489508877103988, + "grad_norm": 0.8771741979565738, + "learning_rate": 1.982101020938224e-06, + "loss": 0.627938985824585, + "step": 932 + }, + { + "epoch": 0.21512566290062254, + "grad_norm": 1.0257080856710215, + "learning_rate": 1.9820291371207233e-06, + "loss": 0.639348030090332, + "step": 933 + }, + { + "epoch": 0.2153562370302052, + "grad_norm": 0.9702705556217962, + "learning_rate": 1.9819571105553354e-06, + "loss": 0.6480363607406616, + "step": 934 + }, + { + "epoch": 0.21558681115978787, + "grad_norm": 0.9260617050921398, + "learning_rate": 1.9818849412525293e-06, + "loss": 0.5776711702346802, + "step": 935 + }, + { + "epoch": 0.21581738528937053, + "grad_norm": 0.9042487017557694, + "learning_rate": 1.9818126292227957e-06, + "loss": 0.5891472101211548, + "step": 936 + }, + { + "epoch": 0.2160479594189532, + "grad_norm": 0.8905401941241984, + "learning_rate": 1.9817401744766465e-06, + "loss": 0.5977755784988403, + "step": 937 + }, + { + "epoch": 0.21627853354853585, + "grad_norm": 0.8626457448308078, + "learning_rate": 1.981667577024613e-06, + "loss": 0.5263733863830566, + "step": 938 + }, + { + "epoch": 0.2165091076781185, + "grad_norm": 1.0627291912482457, + "learning_rate": 1.9815948368772484e-06, + "loss": 0.5440605878829956, + "step": 939 + }, + { + "epoch": 0.21673968180770117, + "grad_norm": 0.9629159186929203, + "learning_rate": 1.9815219540451263e-06, + "loss": 0.5140440464019775, + "step": 940 + }, + { + "epoch": 0.21697025593728383, + "grad_norm": 1.0494365886675714, + "learning_rate": 1.9814489285388402e-06, + "loss": 0.6741353273391724, + "step": 941 + }, + { + "epoch": 0.2172008300668665, + "grad_norm": 1.1329427006993176, + "learning_rate": 1.981375760369006e-06, + "loss": 0.6243258714675903, + "step": 942 + }, + { + "epoch": 0.21743140419644916, + "grad_norm": 1.1054961559311265, + "learning_rate": 1.981302449546259e-06, + "loss": 0.6363699436187744, + "step": 943 + }, + { + "epoch": 0.21766197832603182, + "grad_norm": 0.9214231813217233, + "learning_rate": 1.981228996081256e-06, + "loss": 0.5849490165710449, + "step": 944 + }, + { + "epoch": 0.21789255245561448, + "grad_norm": 0.8824229032075002, + "learning_rate": 1.9811553999846736e-06, + "loss": 0.43679118156433105, + "step": 945 + }, + { + "epoch": 0.21812312658519714, + "grad_norm": 0.8524209104471582, + "learning_rate": 1.9810816612672104e-06, + "loss": 0.5575870275497437, + "step": 946 + }, + { + "epoch": 0.2183537007147798, + "grad_norm": 1.2313981009960802, + "learning_rate": 1.9810077799395846e-06, + "loss": 0.5288122296333313, + "step": 947 + }, + { + "epoch": 0.21858427484436246, + "grad_norm": 0.9413824588491826, + "learning_rate": 1.9809337560125357e-06, + "loss": 0.5618559718132019, + "step": 948 + }, + { + "epoch": 0.21881484897394513, + "grad_norm": 0.900237395227137, + "learning_rate": 1.980859589496824e-06, + "loss": 0.6346654891967773, + "step": 949 + }, + { + "epoch": 0.2190454231035278, + "grad_norm": 0.7859619018047411, + "learning_rate": 1.98078528040323e-06, + "loss": 0.5456810593605042, + "step": 950 + }, + { + "epoch": 0.21927599723311045, + "grad_norm": 1.096845447650345, + "learning_rate": 1.980710828742556e-06, + "loss": 0.6463650465011597, + "step": 951 + }, + { + "epoch": 0.2195065713626931, + "grad_norm": 0.8708852946707265, + "learning_rate": 1.980636234525624e-06, + "loss": 0.5013638734817505, + "step": 952 + }, + { + "epoch": 0.21973714549227577, + "grad_norm": 1.0813749561311563, + "learning_rate": 1.9805614977632763e-06, + "loss": 0.6522110104560852, + "step": 953 + }, + { + "epoch": 0.21996771962185843, + "grad_norm": 1.1282712003155921, + "learning_rate": 1.9804866184663775e-06, + "loss": 0.5864803791046143, + "step": 954 + }, + { + "epoch": 0.2201982937514411, + "grad_norm": 1.0131587624930238, + "learning_rate": 1.9804115966458116e-06, + "loss": 0.5261500477790833, + "step": 955 + }, + { + "epoch": 0.22042886788102375, + "grad_norm": 0.9727651996633074, + "learning_rate": 1.980336432312484e-06, + "loss": 0.585462212562561, + "step": 956 + }, + { + "epoch": 0.22065944201060642, + "grad_norm": 0.913173290527313, + "learning_rate": 1.9802611254773207e-06, + "loss": 0.5889539122581482, + "step": 957 + }, + { + "epoch": 0.22089001614018908, + "grad_norm": 0.9844451118331555, + "learning_rate": 1.980185676151268e-06, + "loss": 0.665162205696106, + "step": 958 + }, + { + "epoch": 0.22112059026977174, + "grad_norm": 0.9378356304402508, + "learning_rate": 1.9801100843452935e-06, + "loss": 0.5344980359077454, + "step": 959 + }, + { + "epoch": 0.2213511643993544, + "grad_norm": 0.9210142542004092, + "learning_rate": 1.980034350070385e-06, + "loss": 0.6301499009132385, + "step": 960 + }, + { + "epoch": 0.22158173852893706, + "grad_norm": 1.0404902143094334, + "learning_rate": 1.9799584733375512e-06, + "loss": 0.5114584565162659, + "step": 961 + }, + { + "epoch": 0.22181231265851972, + "grad_norm": 1.0168872016124533, + "learning_rate": 1.979882454157822e-06, + "loss": 0.5199861526489258, + "step": 962 + }, + { + "epoch": 0.22204288678810238, + "grad_norm": 1.1826380086118446, + "learning_rate": 1.9798062925422472e-06, + "loss": 0.5336212515830994, + "step": 963 + }, + { + "epoch": 0.22227346091768505, + "grad_norm": 1.0189277044162137, + "learning_rate": 1.9797299885018977e-06, + "loss": 0.535847544670105, + "step": 964 + }, + { + "epoch": 0.2225040350472677, + "grad_norm": 1.1943664941065335, + "learning_rate": 1.979653542047865e-06, + "loss": 0.6234130859375, + "step": 965 + }, + { + "epoch": 0.22273460917685037, + "grad_norm": 0.9414245062598806, + "learning_rate": 1.979576953191262e-06, + "loss": 0.5017205476760864, + "step": 966 + }, + { + "epoch": 0.22296518330643303, + "grad_norm": 0.8271602877368085, + "learning_rate": 1.9795002219432204e-06, + "loss": 0.4982973337173462, + "step": 967 + }, + { + "epoch": 0.2231957574360157, + "grad_norm": 1.0821521338057418, + "learning_rate": 1.979423348314895e-06, + "loss": 0.47946417331695557, + "step": 968 + }, + { + "epoch": 0.22342633156559835, + "grad_norm": 0.9333636639659694, + "learning_rate": 1.97934633231746e-06, + "loss": 0.5431856513023376, + "step": 969 + }, + { + "epoch": 0.223656905695181, + "grad_norm": 1.010615347342822, + "learning_rate": 1.9792691739621097e-06, + "loss": 0.5355685949325562, + "step": 970 + }, + { + "epoch": 0.22388747982476367, + "grad_norm": 0.9115391310212676, + "learning_rate": 1.979191873260061e-06, + "loss": 0.6103906631469727, + "step": 971 + }, + { + "epoch": 0.22411805395434634, + "grad_norm": 0.9295016548118124, + "learning_rate": 1.9791144302225493e-06, + "loss": 0.538421094417572, + "step": 972 + }, + { + "epoch": 0.224348628083929, + "grad_norm": 1.2200934433979187, + "learning_rate": 1.9790368448608322e-06, + "loss": 0.6068445444107056, + "step": 973 + }, + { + "epoch": 0.22457920221351163, + "grad_norm": 0.8606144159525476, + "learning_rate": 1.9789591171861874e-06, + "loss": 0.463737815618515, + "step": 974 + }, + { + "epoch": 0.2248097763430943, + "grad_norm": 1.0217946560153375, + "learning_rate": 1.9788812472099135e-06, + "loss": 0.6588588953018188, + "step": 975 + }, + { + "epoch": 0.22504035047267695, + "grad_norm": 1.0288343828209117, + "learning_rate": 1.9788032349433297e-06, + "loss": 0.678712010383606, + "step": 976 + }, + { + "epoch": 0.22527092460225961, + "grad_norm": 1.1695805252394589, + "learning_rate": 1.9787250803977757e-06, + "loss": 0.6397948265075684, + "step": 977 + }, + { + "epoch": 0.22550149873184228, + "grad_norm": 1.029054993282064, + "learning_rate": 1.978646783584612e-06, + "loss": 0.5422782897949219, + "step": 978 + }, + { + "epoch": 0.22573207286142494, + "grad_norm": 0.9969509169785887, + "learning_rate": 1.9785683445152204e-06, + "loss": 0.5314444303512573, + "step": 979 + }, + { + "epoch": 0.2259626469910076, + "grad_norm": 1.0816366548169771, + "learning_rate": 1.9784897632010026e-06, + "loss": 0.6260710954666138, + "step": 980 + }, + { + "epoch": 0.22619322112059026, + "grad_norm": 1.6140506138107567, + "learning_rate": 1.9784110396533804e-06, + "loss": 0.6765384078025818, + "step": 981 + }, + { + "epoch": 0.22642379525017292, + "grad_norm": 0.9741870993027198, + "learning_rate": 1.9783321738837983e-06, + "loss": 0.6716702580451965, + "step": 982 + }, + { + "epoch": 0.22665436937975558, + "grad_norm": 0.9800524570597025, + "learning_rate": 1.978253165903719e-06, + "loss": 0.5537375211715698, + "step": 983 + }, + { + "epoch": 0.22688494350933824, + "grad_norm": 1.2650751897909203, + "learning_rate": 1.9781740157246285e-06, + "loss": 0.525878369808197, + "step": 984 + }, + { + "epoch": 0.2271155176389209, + "grad_norm": 1.1285639712327624, + "learning_rate": 1.978094723358031e-06, + "loss": 0.6349027156829834, + "step": 985 + }, + { + "epoch": 0.22734609176850357, + "grad_norm": 0.9922350297605812, + "learning_rate": 1.9780152888154525e-06, + "loss": 0.5777440071105957, + "step": 986 + }, + { + "epoch": 0.22757666589808623, + "grad_norm": 0.8792919247604332, + "learning_rate": 1.9779357121084402e-06, + "loss": 0.6181483268737793, + "step": 987 + }, + { + "epoch": 0.2278072400276689, + "grad_norm": 1.113677830579263, + "learning_rate": 1.9778559932485606e-06, + "loss": 0.6364198923110962, + "step": 988 + }, + { + "epoch": 0.22803781415725155, + "grad_norm": 1.0528039871957056, + "learning_rate": 1.9777761322474024e-06, + "loss": 0.623460054397583, + "step": 989 + }, + { + "epoch": 0.2282683882868342, + "grad_norm": 1.0042426162492055, + "learning_rate": 1.977696129116574e-06, + "loss": 0.504749059677124, + "step": 990 + }, + { + "epoch": 0.22849896241641687, + "grad_norm": 0.9462650071116105, + "learning_rate": 1.9776159838677048e-06, + "loss": 0.5228890180587769, + "step": 991 + }, + { + "epoch": 0.22872953654599953, + "grad_norm": 0.983638268661895, + "learning_rate": 1.977535696512444e-06, + "loss": 0.5765929222106934, + "step": 992 + }, + { + "epoch": 0.2289601106755822, + "grad_norm": 1.0000819039461677, + "learning_rate": 1.977455267062463e-06, + "loss": 0.5165348052978516, + "step": 993 + }, + { + "epoch": 0.22919068480516486, + "grad_norm": 1.0528189784184039, + "learning_rate": 1.9773746955294525e-06, + "loss": 0.6056735515594482, + "step": 994 + }, + { + "epoch": 0.22942125893474752, + "grad_norm": 1.0625954437167437, + "learning_rate": 1.9772939819251245e-06, + "loss": 0.5430403351783752, + "step": 995 + }, + { + "epoch": 0.22965183306433018, + "grad_norm": 1.2611536344776966, + "learning_rate": 1.977213126261212e-06, + "loss": 0.5710945129394531, + "step": 996 + }, + { + "epoch": 0.22988240719391284, + "grad_norm": 0.9590894945496666, + "learning_rate": 1.977132128549468e-06, + "loss": 0.5189366936683655, + "step": 997 + }, + { + "epoch": 0.2301129813234955, + "grad_norm": 1.229825794085491, + "learning_rate": 1.977050988801666e-06, + "loss": 0.6578037738800049, + "step": 998 + }, + { + "epoch": 0.23034355545307816, + "grad_norm": 1.0761110723698188, + "learning_rate": 1.9769697070296006e-06, + "loss": 0.5787034034729004, + "step": 999 + }, + { + "epoch": 0.23057412958266083, + "grad_norm": 1.0414208441736372, + "learning_rate": 1.976888283245087e-06, + "loss": 0.5169408321380615, + "step": 1000 + }, + { + "epoch": 0.2308047037122435, + "grad_norm": 1.1228864795023747, + "learning_rate": 1.976806717459961e-06, + "loss": 0.6326704025268555, + "step": 1001 + }, + { + "epoch": 0.23103527784182615, + "grad_norm": 1.2998118201322668, + "learning_rate": 1.9767250096860785e-06, + "loss": 0.5188414454460144, + "step": 1002 + }, + { + "epoch": 0.2312658519714088, + "grad_norm": 0.9684429634366722, + "learning_rate": 1.9766431599353173e-06, + "loss": 0.5788798928260803, + "step": 1003 + }, + { + "epoch": 0.23149642610099147, + "grad_norm": 1.011079377555661, + "learning_rate": 1.976561168219575e-06, + "loss": 0.5513355731964111, + "step": 1004 + }, + { + "epoch": 0.23172700023057413, + "grad_norm": 0.9242770139183195, + "learning_rate": 1.97647903455077e-06, + "loss": 0.5810542106628418, + "step": 1005 + }, + { + "epoch": 0.2319575743601568, + "grad_norm": 0.9036081245550505, + "learning_rate": 1.9763967589408407e-06, + "loss": 0.6541746854782104, + "step": 1006 + }, + { + "epoch": 0.23218814848973945, + "grad_norm": 0.972339176589073, + "learning_rate": 1.976314341401747e-06, + "loss": 0.48837774991989136, + "step": 1007 + }, + { + "epoch": 0.23241872261932212, + "grad_norm": 1.0622732331560878, + "learning_rate": 1.976231781945469e-06, + "loss": 0.514664888381958, + "step": 1008 + }, + { + "epoch": 0.23264929674890478, + "grad_norm": 1.1476741578183667, + "learning_rate": 1.976149080584008e-06, + "loss": 0.48295027017593384, + "step": 1009 + }, + { + "epoch": 0.23287987087848744, + "grad_norm": 0.9532553897028984, + "learning_rate": 1.9760662373293847e-06, + "loss": 0.5975791811943054, + "step": 1010 + }, + { + "epoch": 0.2331104450080701, + "grad_norm": 1.0101722687438028, + "learning_rate": 1.9759832521936424e-06, + "loss": 0.4810718297958374, + "step": 1011 + }, + { + "epoch": 0.23334101913765276, + "grad_norm": 0.8377461102160731, + "learning_rate": 1.9759001251888425e-06, + "loss": 0.5984642505645752, + "step": 1012 + }, + { + "epoch": 0.23357159326723542, + "grad_norm": 1.1428510363276687, + "learning_rate": 1.975816856327069e-06, + "loss": 0.600128710269928, + "step": 1013 + }, + { + "epoch": 0.23380216739681808, + "grad_norm": 0.976646115631477, + "learning_rate": 1.9757334456204263e-06, + "loss": 0.5036175847053528, + "step": 1014 + }, + { + "epoch": 0.23403274152640074, + "grad_norm": 0.781296299293608, + "learning_rate": 1.975649893081038e-06, + "loss": 0.49270063638687134, + "step": 1015 + }, + { + "epoch": 0.2342633156559834, + "grad_norm": 1.0782515218974933, + "learning_rate": 1.97556619872105e-06, + "loss": 0.5337218642234802, + "step": 1016 + }, + { + "epoch": 0.23449388978556607, + "grad_norm": 1.279305397178248, + "learning_rate": 1.9754823625526277e-06, + "loss": 0.5263136625289917, + "step": 1017 + }, + { + "epoch": 0.23472446391514873, + "grad_norm": 1.1321753640293293, + "learning_rate": 1.975398384587958e-06, + "loss": 0.6271284818649292, + "step": 1018 + }, + { + "epoch": 0.2349550380447314, + "grad_norm": 0.9524936816808555, + "learning_rate": 1.975314264839248e-06, + "loss": 0.7009197473526001, + "step": 1019 + }, + { + "epoch": 0.23518561217431405, + "grad_norm": 1.0291281498015452, + "learning_rate": 1.9752300033187248e-06, + "loss": 0.5781605839729309, + "step": 1020 + }, + { + "epoch": 0.2354161863038967, + "grad_norm": 1.0439195983844425, + "learning_rate": 1.9751456000386367e-06, + "loss": 0.549934446811676, + "step": 1021 + }, + { + "epoch": 0.23564676043347937, + "grad_norm": 1.1313488046553661, + "learning_rate": 1.9750610550112535e-06, + "loss": 0.5856816172599792, + "step": 1022 + }, + { + "epoch": 0.23587733456306204, + "grad_norm": 1.1355877980298148, + "learning_rate": 1.9749763682488638e-06, + "loss": 0.6225322484970093, + "step": 1023 + }, + { + "epoch": 0.2361079086926447, + "grad_norm": 0.8829653489765357, + "learning_rate": 1.9748915397637775e-06, + "loss": 0.5533155202865601, + "step": 1024 + }, + { + "epoch": 0.23633848282222736, + "grad_norm": 0.9964032830251005, + "learning_rate": 1.974806569568326e-06, + "loss": 0.4960908889770508, + "step": 1025 + }, + { + "epoch": 0.23656905695181002, + "grad_norm": 1.0642112431572752, + "learning_rate": 1.97472145767486e-06, + "loss": 0.5960450768470764, + "step": 1026 + }, + { + "epoch": 0.23679963108139268, + "grad_norm": 1.0609331852795814, + "learning_rate": 1.9746362040957517e-06, + "loss": 0.5653714537620544, + "step": 1027 + }, + { + "epoch": 0.23703020521097534, + "grad_norm": 0.9636699324332547, + "learning_rate": 1.9745508088433936e-06, + "loss": 0.6400578022003174, + "step": 1028 + }, + { + "epoch": 0.23726077934055798, + "grad_norm": 1.0105210896498236, + "learning_rate": 1.9744652719301987e-06, + "loss": 0.5459057092666626, + "step": 1029 + }, + { + "epoch": 0.23749135347014064, + "grad_norm": 1.0859828591491134, + "learning_rate": 1.9743795933686005e-06, + "loss": 0.46735280752182007, + "step": 1030 + }, + { + "epoch": 0.2377219275997233, + "grad_norm": 0.9440768334185448, + "learning_rate": 1.9742937731710533e-06, + "loss": 0.526339590549469, + "step": 1031 + }, + { + "epoch": 0.23795250172930596, + "grad_norm": 1.013077702945683, + "learning_rate": 1.9742078113500323e-06, + "loss": 0.5976641178131104, + "step": 1032 + }, + { + "epoch": 0.23818307585888862, + "grad_norm": 0.9655038700233691, + "learning_rate": 1.9741217079180325e-06, + "loss": 0.5331728458404541, + "step": 1033 + }, + { + "epoch": 0.23841364998847128, + "grad_norm": 0.9368079955738086, + "learning_rate": 1.9740354628875696e-06, + "loss": 0.5743261575698853, + "step": 1034 + }, + { + "epoch": 0.23864422411805394, + "grad_norm": 0.9982653104570526, + "learning_rate": 1.973949076271181e-06, + "loss": 0.54700767993927, + "step": 1035 + }, + { + "epoch": 0.2388747982476366, + "grad_norm": 0.8919318869448586, + "learning_rate": 1.9738625480814235e-06, + "loss": 0.5483411550521851, + "step": 1036 + }, + { + "epoch": 0.23910537237721927, + "grad_norm": 0.9314153856468148, + "learning_rate": 1.973775878330875e-06, + "loss": 0.5677193403244019, + "step": 1037 + }, + { + "epoch": 0.23933594650680193, + "grad_norm": 0.9867371078797748, + "learning_rate": 1.973689067032133e-06, + "loss": 0.5092767477035522, + "step": 1038 + }, + { + "epoch": 0.2395665206363846, + "grad_norm": 0.9526587430164372, + "learning_rate": 1.973602114197818e-06, + "loss": 0.5618614554405212, + "step": 1039 + }, + { + "epoch": 0.23979709476596725, + "grad_norm": 1.1304270434054837, + "learning_rate": 1.9735150198405677e-06, + "loss": 0.5601966977119446, + "step": 1040 + }, + { + "epoch": 0.2400276688955499, + "grad_norm": 1.2376653334727166, + "learning_rate": 1.973427783973043e-06, + "loss": 0.5945397019386292, + "step": 1041 + }, + { + "epoch": 0.24025824302513257, + "grad_norm": 1.084452486357135, + "learning_rate": 1.9733404066079253e-06, + "loss": 0.42448002099990845, + "step": 1042 + }, + { + "epoch": 0.24048881715471523, + "grad_norm": 1.0671556472806993, + "learning_rate": 1.9732528877579146e-06, + "loss": 0.5237313508987427, + "step": 1043 + }, + { + "epoch": 0.2407193912842979, + "grad_norm": 1.085642930506958, + "learning_rate": 1.973165227435733e-06, + "loss": 0.6006743907928467, + "step": 1044 + }, + { + "epoch": 0.24094996541388056, + "grad_norm": 0.9267133414742948, + "learning_rate": 1.973077425654123e-06, + "loss": 0.547584056854248, + "step": 1045 + }, + { + "epoch": 0.24118053954346322, + "grad_norm": 1.0824218376223906, + "learning_rate": 1.972989482425847e-06, + "loss": 0.5472346544265747, + "step": 1046 + }, + { + "epoch": 0.24141111367304588, + "grad_norm": 1.1106806941355478, + "learning_rate": 1.972901397763689e-06, + "loss": 0.5962260365486145, + "step": 1047 + }, + { + "epoch": 0.24164168780262854, + "grad_norm": 0.9770536598072448, + "learning_rate": 1.9728131716804525e-06, + "loss": 0.561386227607727, + "step": 1048 + }, + { + "epoch": 0.2418722619322112, + "grad_norm": 1.2169602038706573, + "learning_rate": 1.9727248041889624e-06, + "loss": 0.46618524193763733, + "step": 1049 + }, + { + "epoch": 0.24210283606179386, + "grad_norm": 0.9641011081185654, + "learning_rate": 1.9726362953020643e-06, + "loss": 0.4684019088745117, + "step": 1050 + }, + { + "epoch": 0.24233341019137652, + "grad_norm": 1.1116892767931694, + "learning_rate": 1.9725476450326227e-06, + "loss": 0.5670303106307983, + "step": 1051 + }, + { + "epoch": 0.2425639843209592, + "grad_norm": 1.0413794589983083, + "learning_rate": 1.9724588533935246e-06, + "loss": 0.5451534986495972, + "step": 1052 + }, + { + "epoch": 0.24279455845054185, + "grad_norm": 1.3028651104025368, + "learning_rate": 1.9723699203976766e-06, + "loss": 0.578605592250824, + "step": 1053 + }, + { + "epoch": 0.2430251325801245, + "grad_norm": 1.072521418141734, + "learning_rate": 1.972280846058006e-06, + "loss": 0.5844857692718506, + "step": 1054 + }, + { + "epoch": 0.24325570670970717, + "grad_norm": 0.8882845471690917, + "learning_rate": 1.9721916303874603e-06, + "loss": 0.5152320861816406, + "step": 1055 + }, + { + "epoch": 0.24348628083928983, + "grad_norm": 0.994596822062513, + "learning_rate": 1.9721022733990087e-06, + "loss": 0.5108952522277832, + "step": 1056 + }, + { + "epoch": 0.2437168549688725, + "grad_norm": 1.2179028657479944, + "learning_rate": 1.97201277510564e-06, + "loss": 0.6345964670181274, + "step": 1057 + }, + { + "epoch": 0.24394742909845515, + "grad_norm": 1.0322609868377797, + "learning_rate": 1.9719231355203627e-06, + "loss": 0.6699639558792114, + "step": 1058 + }, + { + "epoch": 0.24417800322803782, + "grad_norm": 1.0786593444912098, + "learning_rate": 1.971833354656208e-06, + "loss": 0.5426750779151917, + "step": 1059 + }, + { + "epoch": 0.24440857735762048, + "grad_norm": 0.9469348439661489, + "learning_rate": 1.9717434325262253e-06, + "loss": 0.45797908306121826, + "step": 1060 + }, + { + "epoch": 0.24463915148720314, + "grad_norm": 0.9212142090514559, + "learning_rate": 1.9716533691434872e-06, + "loss": 0.46754708886146545, + "step": 1061 + }, + { + "epoch": 0.2448697256167858, + "grad_norm": 1.0419375830533737, + "learning_rate": 1.9715631645210838e-06, + "loss": 0.6593209505081177, + "step": 1062 + }, + { + "epoch": 0.24510029974636846, + "grad_norm": 0.8714440933836988, + "learning_rate": 1.9714728186721287e-06, + "loss": 0.5634866952896118, + "step": 1063 + }, + { + "epoch": 0.24533087387595112, + "grad_norm": 1.3414429697713321, + "learning_rate": 1.971382331609753e-06, + "loss": 0.5066277980804443, + "step": 1064 + }, + { + "epoch": 0.24556144800553378, + "grad_norm": 0.9735373407478976, + "learning_rate": 1.9712917033471113e-06, + "loss": 0.5721756219863892, + "step": 1065 + }, + { + "epoch": 0.24579202213511644, + "grad_norm": 0.9116883309182201, + "learning_rate": 1.9712009338973765e-06, + "loss": 0.5188664197921753, + "step": 1066 + }, + { + "epoch": 0.2460225962646991, + "grad_norm": 1.1314636983505006, + "learning_rate": 1.9711100232737434e-06, + "loss": 0.4879762828350067, + "step": 1067 + }, + { + "epoch": 0.24625317039428177, + "grad_norm": 1.2412816829375237, + "learning_rate": 1.971018971489426e-06, + "loss": 0.5169111490249634, + "step": 1068 + }, + { + "epoch": 0.24648374452386443, + "grad_norm": 1.2239551353327036, + "learning_rate": 1.9709277785576605e-06, + "loss": 0.7341418862342834, + "step": 1069 + }, + { + "epoch": 0.2467143186534471, + "grad_norm": 0.9353793197150668, + "learning_rate": 1.970836444491702e-06, + "loss": 0.48676228523254395, + "step": 1070 + }, + { + "epoch": 0.24694489278302975, + "grad_norm": 1.1049152340951753, + "learning_rate": 1.9707449693048277e-06, + "loss": 0.5594040751457214, + "step": 1071 + }, + { + "epoch": 0.2471754669126124, + "grad_norm": 1.1275772388460679, + "learning_rate": 1.970653353010334e-06, + "loss": 0.575579047203064, + "step": 1072 + }, + { + "epoch": 0.24740604104219507, + "grad_norm": 0.9990792550863451, + "learning_rate": 1.9705615956215375e-06, + "loss": 0.5212938189506531, + "step": 1073 + }, + { + "epoch": 0.24763661517177774, + "grad_norm": 1.2242480620016798, + "learning_rate": 1.970469697151777e-06, + "loss": 0.49838072061538696, + "step": 1074 + }, + { + "epoch": 0.2478671893013604, + "grad_norm": 1.0069439526224342, + "learning_rate": 1.9703776576144106e-06, + "loss": 0.505547285079956, + "step": 1075 + }, + { + "epoch": 0.24809776343094306, + "grad_norm": 0.9320138812686547, + "learning_rate": 1.970285477022817e-06, + "loss": 0.5236082077026367, + "step": 1076 + }, + { + "epoch": 0.24832833756052572, + "grad_norm": 1.1096851604663263, + "learning_rate": 1.9701931553903963e-06, + "loss": 0.5417677760124207, + "step": 1077 + }, + { + "epoch": 0.24855891169010838, + "grad_norm": 1.4437484296393372, + "learning_rate": 1.9701006927305676e-06, + "loss": 0.624547004699707, + "step": 1078 + }, + { + "epoch": 0.24878948581969104, + "grad_norm": 1.1814609406249081, + "learning_rate": 1.9700080890567713e-06, + "loss": 0.7127759456634521, + "step": 1079 + }, + { + "epoch": 0.2490200599492737, + "grad_norm": 1.1432146079503174, + "learning_rate": 1.9699153443824686e-06, + "loss": 0.44590264558792114, + "step": 1080 + }, + { + "epoch": 0.24925063407885636, + "grad_norm": 0.9565451374538135, + "learning_rate": 1.9698224587211407e-06, + "loss": 0.6311746835708618, + "step": 1081 + }, + { + "epoch": 0.24948120820843903, + "grad_norm": 0.870591902169041, + "learning_rate": 1.9697294320862898e-06, + "loss": 0.4837970733642578, + "step": 1082 + }, + { + "epoch": 0.2497117823380217, + "grad_norm": 0.8760016768814028, + "learning_rate": 1.969636264491438e-06, + "loss": 0.5749634504318237, + "step": 1083 + }, + { + "epoch": 0.24994235646760435, + "grad_norm": 0.9733867387062589, + "learning_rate": 1.9695429559501283e-06, + "loss": 0.5002774000167847, + "step": 1084 + }, + { + "epoch": 0.250172930597187, + "grad_norm": 0.9904270135981337, + "learning_rate": 1.9694495064759236e-06, + "loss": 0.5407592058181763, + "step": 1085 + }, + { + "epoch": 0.25040350472676964, + "grad_norm": 0.9112103184885231, + "learning_rate": 1.969355916082408e-06, + "loss": 0.5557315349578857, + "step": 1086 + }, + { + "epoch": 0.2506340788563523, + "grad_norm": 1.073902907739282, + "learning_rate": 1.9692621847831865e-06, + "loss": 0.4710160493850708, + "step": 1087 + }, + { + "epoch": 0.25086465298593497, + "grad_norm": 0.946965380647112, + "learning_rate": 1.969168312591883e-06, + "loss": 0.5935187339782715, + "step": 1088 + }, + { + "epoch": 0.2510952271155176, + "grad_norm": 0.9849357353961209, + "learning_rate": 1.969074299522143e-06, + "loss": 0.5358916521072388, + "step": 1089 + }, + { + "epoch": 0.2513258012451003, + "grad_norm": 0.9196749680008564, + "learning_rate": 1.968980145587632e-06, + "loss": 0.40736621618270874, + "step": 1090 + }, + { + "epoch": 0.25155637537468295, + "grad_norm": 0.8048789415521217, + "learning_rate": 1.968885850802037e-06, + "loss": 0.4986698627471924, + "step": 1091 + }, + { + "epoch": 0.2517869495042656, + "grad_norm": 0.9340127152994311, + "learning_rate": 1.968791415179064e-06, + "loss": 0.5547258853912354, + "step": 1092 + }, + { + "epoch": 0.2520175236338483, + "grad_norm": 1.0477998347740531, + "learning_rate": 1.96869683873244e-06, + "loss": 0.5187167525291443, + "step": 1093 + }, + { + "epoch": 0.25224809776343093, + "grad_norm": 0.9456931065936238, + "learning_rate": 1.9686021214759136e-06, + "loss": 0.560575008392334, + "step": 1094 + }, + { + "epoch": 0.2524786718930136, + "grad_norm": 1.0595767044992972, + "learning_rate": 1.968507263423252e-06, + "loss": 0.6441233158111572, + "step": 1095 + }, + { + "epoch": 0.25270924602259626, + "grad_norm": 1.1650850474563572, + "learning_rate": 1.9684122645882446e-06, + "loss": 0.6693669557571411, + "step": 1096 + }, + { + "epoch": 0.2529398201521789, + "grad_norm": 0.9107773905688578, + "learning_rate": 1.9683171249846992e-06, + "loss": 0.4713742434978485, + "step": 1097 + }, + { + "epoch": 0.2531703942817616, + "grad_norm": 1.0855755163203802, + "learning_rate": 1.9682218446264466e-06, + "loss": 0.5393046140670776, + "step": 1098 + }, + { + "epoch": 0.25340096841134424, + "grad_norm": 0.8304628447343301, + "learning_rate": 1.968126423527336e-06, + "loss": 0.44416874647140503, + "step": 1099 + }, + { + "epoch": 0.2536315425409269, + "grad_norm": 0.8560775526129268, + "learning_rate": 1.9680308617012383e-06, + "loss": 0.486186683177948, + "step": 1100 + }, + { + "epoch": 0.25386211667050956, + "grad_norm": 0.8812542184427957, + "learning_rate": 1.9679351591620446e-06, + "loss": 0.5523893237113953, + "step": 1101 + }, + { + "epoch": 0.2540926908000922, + "grad_norm": 0.9964866126205207, + "learning_rate": 1.967839315923665e-06, + "loss": 0.49889492988586426, + "step": 1102 + }, + { + "epoch": 0.2543232649296749, + "grad_norm": 1.1438608764608638, + "learning_rate": 1.9677433320000325e-06, + "loss": 0.6084630489349365, + "step": 1103 + }, + { + "epoch": 0.25455383905925755, + "grad_norm": 0.9684259335546852, + "learning_rate": 1.967647207405099e-06, + "loss": 0.5458555221557617, + "step": 1104 + }, + { + "epoch": 0.2547844131888402, + "grad_norm": 1.3299718075912128, + "learning_rate": 1.9675509421528367e-06, + "loss": 0.5453877449035645, + "step": 1105 + }, + { + "epoch": 0.25501498731842287, + "grad_norm": 1.0404901274691463, + "learning_rate": 1.9674545362572393e-06, + "loss": 0.5226954221725464, + "step": 1106 + }, + { + "epoch": 0.25524556144800553, + "grad_norm": 1.0740163604419912, + "learning_rate": 1.96735798973232e-06, + "loss": 0.5736720561981201, + "step": 1107 + }, + { + "epoch": 0.2554761355775882, + "grad_norm": 0.9184855028566775, + "learning_rate": 1.9672613025921135e-06, + "loss": 0.5474177598953247, + "step": 1108 + }, + { + "epoch": 0.25570670970717085, + "grad_norm": 1.2485055919980548, + "learning_rate": 1.967164474850673e-06, + "loss": 0.5146498084068298, + "step": 1109 + }, + { + "epoch": 0.2559372838367535, + "grad_norm": 1.1137167951471605, + "learning_rate": 1.967067506522075e-06, + "loss": 0.6319057941436768, + "step": 1110 + }, + { + "epoch": 0.2561678579663362, + "grad_norm": 0.9087550652455604, + "learning_rate": 1.9669703976204136e-06, + "loss": 0.44495588541030884, + "step": 1111 + }, + { + "epoch": 0.25639843209591884, + "grad_norm": 0.9108509097161608, + "learning_rate": 1.9668731481598052e-06, + "loss": 0.5331558585166931, + "step": 1112 + }, + { + "epoch": 0.2566290062255015, + "grad_norm": 0.9795245602848469, + "learning_rate": 1.9667757581543856e-06, + "loss": 0.5409468412399292, + "step": 1113 + }, + { + "epoch": 0.25685958035508416, + "grad_norm": 1.054007279778104, + "learning_rate": 1.9666782276183112e-06, + "loss": 0.5743308663368225, + "step": 1114 + }, + { + "epoch": 0.2570901544846668, + "grad_norm": 1.004577427685411, + "learning_rate": 1.96658055656576e-06, + "loss": 0.5612793564796448, + "step": 1115 + }, + { + "epoch": 0.2573207286142495, + "grad_norm": 0.9750416454144903, + "learning_rate": 1.9664827450109285e-06, + "loss": 0.554356575012207, + "step": 1116 + }, + { + "epoch": 0.25755130274383214, + "grad_norm": 0.9682247695156199, + "learning_rate": 1.9663847929680352e-06, + "loss": 0.5999840497970581, + "step": 1117 + }, + { + "epoch": 0.2577818768734148, + "grad_norm": 1.0370889815397122, + "learning_rate": 1.9662867004513184e-06, + "loss": 0.5152497291564941, + "step": 1118 + }, + { + "epoch": 0.25801245100299747, + "grad_norm": 1.098663296506931, + "learning_rate": 1.966188467475036e-06, + "loss": 0.6333990097045898, + "step": 1119 + }, + { + "epoch": 0.25824302513258013, + "grad_norm": 0.9734180757824468, + "learning_rate": 1.9660900940534685e-06, + "loss": 0.5826340913772583, + "step": 1120 + }, + { + "epoch": 0.2584735992621628, + "grad_norm": 1.0258650855361047, + "learning_rate": 1.965991580200915e-06, + "loss": 0.5968586206436157, + "step": 1121 + }, + { + "epoch": 0.25870417339174545, + "grad_norm": 1.1400845768454182, + "learning_rate": 1.9658929259316945e-06, + "loss": 0.6164212226867676, + "step": 1122 + }, + { + "epoch": 0.2589347475213281, + "grad_norm": 0.9979393096335119, + "learning_rate": 1.9657941312601487e-06, + "loss": 0.6115970611572266, + "step": 1123 + }, + { + "epoch": 0.2591653216509108, + "grad_norm": 1.0595728674513747, + "learning_rate": 1.9656951962006376e-06, + "loss": 0.5490012168884277, + "step": 1124 + }, + { + "epoch": 0.25939589578049344, + "grad_norm": 0.9502072685023252, + "learning_rate": 1.9655961207675425e-06, + "loss": 0.6350439786911011, + "step": 1125 + }, + { + "epoch": 0.2596264699100761, + "grad_norm": 1.0657411847577343, + "learning_rate": 1.965496904975266e-06, + "loss": 0.5667803287506104, + "step": 1126 + }, + { + "epoch": 0.25985704403965876, + "grad_norm": 1.1821679518558437, + "learning_rate": 1.9653975488382287e-06, + "loss": 0.6443949937820435, + "step": 1127 + }, + { + "epoch": 0.2600876181692414, + "grad_norm": 0.9716559479774245, + "learning_rate": 1.965298052370874e-06, + "loss": 0.6085849404335022, + "step": 1128 + }, + { + "epoch": 0.2603181922988241, + "grad_norm": 1.0823001356947075, + "learning_rate": 1.9651984155876644e-06, + "loss": 0.6633332967758179, + "step": 1129 + }, + { + "epoch": 0.26054876642840674, + "grad_norm": 1.2848504053653516, + "learning_rate": 1.965098638503083e-06, + "loss": 0.5997219085693359, + "step": 1130 + }, + { + "epoch": 0.2607793405579894, + "grad_norm": 1.0454096533900064, + "learning_rate": 1.9649987211316333e-06, + "loss": 0.5425878167152405, + "step": 1131 + }, + { + "epoch": 0.26100991468757206, + "grad_norm": 1.1511928917305188, + "learning_rate": 1.9648986634878397e-06, + "loss": 0.5894105434417725, + "step": 1132 + }, + { + "epoch": 0.2612404888171547, + "grad_norm": 1.0098199878370706, + "learning_rate": 1.9647984655862464e-06, + "loss": 0.5967395901679993, + "step": 1133 + }, + { + "epoch": 0.2614710629467374, + "grad_norm": 1.026032503619318, + "learning_rate": 1.964698127441418e-06, + "loss": 0.5129253268241882, + "step": 1134 + }, + { + "epoch": 0.26170163707632005, + "grad_norm": 0.8680242413092717, + "learning_rate": 1.96459764906794e-06, + "loss": 0.4503140449523926, + "step": 1135 + }, + { + "epoch": 0.2619322112059027, + "grad_norm": 1.3487730716398616, + "learning_rate": 1.964497030480418e-06, + "loss": 0.5533326864242554, + "step": 1136 + }, + { + "epoch": 0.26216278533548537, + "grad_norm": 1.020191268815397, + "learning_rate": 1.9643962716934776e-06, + "loss": 0.695278525352478, + "step": 1137 + }, + { + "epoch": 0.26239335946506803, + "grad_norm": 1.0637915159693183, + "learning_rate": 1.9642953727217654e-06, + "loss": 0.5198212265968323, + "step": 1138 + }, + { + "epoch": 0.2626239335946507, + "grad_norm": 0.8691408428805534, + "learning_rate": 1.9641943335799476e-06, + "loss": 0.4348503351211548, + "step": 1139 + }, + { + "epoch": 0.26285450772423335, + "grad_norm": 1.075781292907759, + "learning_rate": 1.9640931542827116e-06, + "loss": 0.5241343975067139, + "step": 1140 + }, + { + "epoch": 0.263085081853816, + "grad_norm": 1.1170175690927264, + "learning_rate": 1.9639918348447654e-06, + "loss": 0.6621984839439392, + "step": 1141 + }, + { + "epoch": 0.2633156559833987, + "grad_norm": 0.9797970310895017, + "learning_rate": 1.9638903752808358e-06, + "loss": 0.6091395020484924, + "step": 1142 + }, + { + "epoch": 0.26354623011298134, + "grad_norm": 1.358580155566318, + "learning_rate": 1.963788775605671e-06, + "loss": 0.4857162833213806, + "step": 1143 + }, + { + "epoch": 0.263776804242564, + "grad_norm": 1.155872598215321, + "learning_rate": 1.9636870358340408e-06, + "loss": 0.5912413597106934, + "step": 1144 + }, + { + "epoch": 0.26400737837214666, + "grad_norm": 0.9493926626803307, + "learning_rate": 1.9635851559807326e-06, + "loss": 0.6006268858909607, + "step": 1145 + }, + { + "epoch": 0.2642379525017293, + "grad_norm": 1.0095494395510323, + "learning_rate": 1.9634831360605567e-06, + "loss": 0.5580735802650452, + "step": 1146 + }, + { + "epoch": 0.264468526631312, + "grad_norm": 1.09443652681985, + "learning_rate": 1.9633809760883423e-06, + "loss": 0.5554602146148682, + "step": 1147 + }, + { + "epoch": 0.26469910076089465, + "grad_norm": 1.0073361110439816, + "learning_rate": 1.9632786760789393e-06, + "loss": 0.5648301839828491, + "step": 1148 + }, + { + "epoch": 0.2649296748904773, + "grad_norm": 0.9958775096480507, + "learning_rate": 1.9631762360472186e-06, + "loss": 0.5317412614822388, + "step": 1149 + }, + { + "epoch": 0.26516024902005997, + "grad_norm": 0.8377541227122274, + "learning_rate": 1.96307365600807e-06, + "loss": 0.5608310699462891, + "step": 1150 + }, + { + "epoch": 0.26539082314964263, + "grad_norm": 0.9709108194630034, + "learning_rate": 1.962970935976405e-06, + "loss": 0.49922698736190796, + "step": 1151 + }, + { + "epoch": 0.2656213972792253, + "grad_norm": 1.0372577064435262, + "learning_rate": 1.9628680759671556e-06, + "loss": 0.5840054750442505, + "step": 1152 + }, + { + "epoch": 0.26585197140880795, + "grad_norm": 1.1264168952681184, + "learning_rate": 1.9627650759952727e-06, + "loss": 0.6038475632667542, + "step": 1153 + }, + { + "epoch": 0.2660825455383906, + "grad_norm": 0.969212515968761, + "learning_rate": 1.9626619360757284e-06, + "loss": 0.5923193097114563, + "step": 1154 + }, + { + "epoch": 0.2663131196679733, + "grad_norm": 1.1606889211687668, + "learning_rate": 1.962558656223516e-06, + "loss": 0.5278598666191101, + "step": 1155 + }, + { + "epoch": 0.26654369379755594, + "grad_norm": 0.9873103600473375, + "learning_rate": 1.9624552364536472e-06, + "loss": 0.47691023349761963, + "step": 1156 + }, + { + "epoch": 0.2667742679271386, + "grad_norm": 0.9087676067471127, + "learning_rate": 1.962351676781156e-06, + "loss": 0.5801899433135986, + "step": 1157 + }, + { + "epoch": 0.26700484205672126, + "grad_norm": 1.253961482177072, + "learning_rate": 1.962247977221095e-06, + "loss": 0.5170506238937378, + "step": 1158 + }, + { + "epoch": 0.2672354161863039, + "grad_norm": 1.0951542684812736, + "learning_rate": 1.9621441377885387e-06, + "loss": 0.6114981174468994, + "step": 1159 + }, + { + "epoch": 0.2674659903158866, + "grad_norm": 1.0027892727643062, + "learning_rate": 1.9620401584985807e-06, + "loss": 0.6377004384994507, + "step": 1160 + }, + { + "epoch": 0.26769656444546924, + "grad_norm": 0.9961094597216124, + "learning_rate": 1.9619360393663356e-06, + "loss": 0.6177431344985962, + "step": 1161 + }, + { + "epoch": 0.2679271385750519, + "grad_norm": 1.1384478708718946, + "learning_rate": 1.9618317804069384e-06, + "loss": 0.579784095287323, + "step": 1162 + }, + { + "epoch": 0.26815771270463457, + "grad_norm": 0.8744752952973797, + "learning_rate": 1.9617273816355444e-06, + "loss": 0.6078776121139526, + "step": 1163 + }, + { + "epoch": 0.2683882868342172, + "grad_norm": 0.9801356210694869, + "learning_rate": 1.961622843067328e-06, + "loss": 0.5583093166351318, + "step": 1164 + }, + { + "epoch": 0.2686188609637999, + "grad_norm": 0.8741287294678143, + "learning_rate": 1.961518164717486e-06, + "loss": 0.46033143997192383, + "step": 1165 + }, + { + "epoch": 0.26884943509338255, + "grad_norm": 1.250568820610365, + "learning_rate": 1.961413346601234e-06, + "loss": 0.5637123584747314, + "step": 1166 + }, + { + "epoch": 0.2690800092229652, + "grad_norm": 1.0360456860810905, + "learning_rate": 1.9613083887338085e-06, + "loss": 0.5943595170974731, + "step": 1167 + }, + { + "epoch": 0.2693105833525478, + "grad_norm": 1.0495419121458136, + "learning_rate": 1.961203291130466e-06, + "loss": 0.5440319776535034, + "step": 1168 + }, + { + "epoch": 0.2695411574821305, + "grad_norm": 0.9704830315061433, + "learning_rate": 1.961098053806484e-06, + "loss": 0.5665608048439026, + "step": 1169 + }, + { + "epoch": 0.26977173161171314, + "grad_norm": 1.0522625707521382, + "learning_rate": 1.960992676777159e-06, + "loss": 0.5707683563232422, + "step": 1170 + }, + { + "epoch": 0.2700023057412958, + "grad_norm": 1.034604689259721, + "learning_rate": 1.9608871600578093e-06, + "loss": 0.5447777509689331, + "step": 1171 + }, + { + "epoch": 0.27023287987087846, + "grad_norm": 1.1920689559592121, + "learning_rate": 1.9607815036637726e-06, + "loss": 0.5598857402801514, + "step": 1172 + }, + { + "epoch": 0.2704634540004611, + "grad_norm": 1.208701571232948, + "learning_rate": 1.960675707610407e-06, + "loss": 0.558403491973877, + "step": 1173 + }, + { + "epoch": 0.2706940281300438, + "grad_norm": 1.3006493228897391, + "learning_rate": 1.960569771913091e-06, + "loss": 0.6696962118148804, + "step": 1174 + }, + { + "epoch": 0.27092460225962645, + "grad_norm": 1.0597715788538418, + "learning_rate": 1.960463696587224e-06, + "loss": 0.519884467124939, + "step": 1175 + }, + { + "epoch": 0.2711551763892091, + "grad_norm": 1.0090714718428708, + "learning_rate": 1.9603574816482243e-06, + "loss": 0.6440261602401733, + "step": 1176 + }, + { + "epoch": 0.27138575051879177, + "grad_norm": 1.1163188497552168, + "learning_rate": 1.9602511271115317e-06, + "loss": 0.48713982105255127, + "step": 1177 + }, + { + "epoch": 0.27161632464837443, + "grad_norm": 0.9570997011710476, + "learning_rate": 1.960144632992606e-06, + "loss": 0.5257129073143005, + "step": 1178 + }, + { + "epoch": 0.2718468987779571, + "grad_norm": 1.3308862733434774, + "learning_rate": 1.9600379993069272e-06, + "loss": 0.5220426917076111, + "step": 1179 + }, + { + "epoch": 0.27207747290753975, + "grad_norm": 1.0690404222828096, + "learning_rate": 1.9599312260699955e-06, + "loss": 0.569817304611206, + "step": 1180 + }, + { + "epoch": 0.2723080470371224, + "grad_norm": 1.0650857331550394, + "learning_rate": 1.9598243132973317e-06, + "loss": 0.4370031952857971, + "step": 1181 + }, + { + "epoch": 0.2725386211667051, + "grad_norm": 1.125403283606087, + "learning_rate": 1.959717261004476e-06, + "loss": 0.6060882210731506, + "step": 1182 + }, + { + "epoch": 0.27276919529628774, + "grad_norm": 0.9065361051198069, + "learning_rate": 1.9596100692069905e-06, + "loss": 0.5830891132354736, + "step": 1183 + }, + { + "epoch": 0.2729997694258704, + "grad_norm": 1.4570032441462188, + "learning_rate": 1.9595027379204556e-06, + "loss": 0.5689493417739868, + "step": 1184 + }, + { + "epoch": 0.27323034355545306, + "grad_norm": 1.3244280690129522, + "learning_rate": 1.9593952671604735e-06, + "loss": 0.5550887584686279, + "step": 1185 + }, + { + "epoch": 0.2734609176850357, + "grad_norm": 1.0207521269848765, + "learning_rate": 1.9592876569426665e-06, + "loss": 0.48127567768096924, + "step": 1186 + }, + { + "epoch": 0.2736914918146184, + "grad_norm": 1.071211669612227, + "learning_rate": 1.9591799072826764e-06, + "loss": 0.640753984451294, + "step": 1187 + }, + { + "epoch": 0.27392206594420104, + "grad_norm": 1.1730143666350425, + "learning_rate": 1.959072018196165e-06, + "loss": 0.5266000032424927, + "step": 1188 + }, + { + "epoch": 0.2741526400737837, + "grad_norm": 0.927867514508325, + "learning_rate": 1.958963989698817e-06, + "loss": 0.5586614608764648, + "step": 1189 + }, + { + "epoch": 0.27438321420336637, + "grad_norm": 1.1860842675481242, + "learning_rate": 1.9588558218063336e-06, + "loss": 0.5937967896461487, + "step": 1190 + }, + { + "epoch": 0.274613788332949, + "grad_norm": 1.3761930600193095, + "learning_rate": 1.958747514534439e-06, + "loss": 0.5887218713760376, + "step": 1191 + }, + { + "epoch": 0.2748443624625317, + "grad_norm": 1.0541442430853707, + "learning_rate": 1.9586390678988766e-06, + "loss": 0.5151614546775818, + "step": 1192 + }, + { + "epoch": 0.27507493659211435, + "grad_norm": 0.9782419657689414, + "learning_rate": 1.95853048191541e-06, + "loss": 0.5392748713493347, + "step": 1193 + }, + { + "epoch": 0.275305510721697, + "grad_norm": 1.330179141409128, + "learning_rate": 1.9584217565998237e-06, + "loss": 0.5649560689926147, + "step": 1194 + }, + { + "epoch": 0.2755360848512797, + "grad_norm": 1.0628047614804303, + "learning_rate": 1.9583128919679213e-06, + "loss": 0.4888305962085724, + "step": 1195 + }, + { + "epoch": 0.27576665898086233, + "grad_norm": 0.8838567368205815, + "learning_rate": 1.9582038880355282e-06, + "loss": 0.5026978850364685, + "step": 1196 + }, + { + "epoch": 0.275997233110445, + "grad_norm": 1.094585503881071, + "learning_rate": 1.9580947448184887e-06, + "loss": 0.5358047485351562, + "step": 1197 + }, + { + "epoch": 0.27622780724002766, + "grad_norm": 1.0838231861798517, + "learning_rate": 1.957985462332668e-06, + "loss": 0.6145739555358887, + "step": 1198 + }, + { + "epoch": 0.2764583813696103, + "grad_norm": 1.1469394336927528, + "learning_rate": 1.957876040593952e-06, + "loss": 0.5155332684516907, + "step": 1199 + }, + { + "epoch": 0.276688955499193, + "grad_norm": 0.9936014396625975, + "learning_rate": 1.957766479618245e-06, + "loss": 0.48794522881507874, + "step": 1200 + }, + { + "epoch": 0.27691952962877564, + "grad_norm": 1.135029138979863, + "learning_rate": 1.957656779421474e-06, + "loss": 0.5851761102676392, + "step": 1201 + }, + { + "epoch": 0.2771501037583583, + "grad_norm": 1.0236207003793518, + "learning_rate": 1.957546940019584e-06, + "loss": 0.603874683380127, + "step": 1202 + }, + { + "epoch": 0.27738067788794096, + "grad_norm": 1.0658787224753152, + "learning_rate": 1.9574369614285426e-06, + "loss": 0.5022559762001038, + "step": 1203 + }, + { + "epoch": 0.2776112520175236, + "grad_norm": 1.4179237341040045, + "learning_rate": 1.9573268436643347e-06, + "loss": 0.6469730138778687, + "step": 1204 + }, + { + "epoch": 0.2778418261471063, + "grad_norm": 0.9207501665109726, + "learning_rate": 1.9572165867429685e-06, + "loss": 0.49918532371520996, + "step": 1205 + }, + { + "epoch": 0.27807240027668895, + "grad_norm": 0.9656836684424259, + "learning_rate": 1.95710619068047e-06, + "loss": 0.48623788356781006, + "step": 1206 + }, + { + "epoch": 0.2783029744062716, + "grad_norm": 0.9837814076450196, + "learning_rate": 1.956995655492887e-06, + "loss": 0.4868438243865967, + "step": 1207 + }, + { + "epoch": 0.27853354853585427, + "grad_norm": 1.3533879485069031, + "learning_rate": 1.9568849811962862e-06, + "loss": 0.5989904403686523, + "step": 1208 + }, + { + "epoch": 0.27876412266543693, + "grad_norm": 1.3345070230968985, + "learning_rate": 1.956774167806756e-06, + "loss": 0.5125104188919067, + "step": 1209 + }, + { + "epoch": 0.2789946967950196, + "grad_norm": 1.0305365483781255, + "learning_rate": 1.956663215340404e-06, + "loss": 0.5126978158950806, + "step": 1210 + }, + { + "epoch": 0.27922527092460225, + "grad_norm": 0.9524616726362105, + "learning_rate": 1.9565521238133576e-06, + "loss": 0.5009375810623169, + "step": 1211 + }, + { + "epoch": 0.2794558450541849, + "grad_norm": 1.0762476710184214, + "learning_rate": 1.956440893241766e-06, + "loss": 0.5601603984832764, + "step": 1212 + }, + { + "epoch": 0.2796864191837676, + "grad_norm": 1.2962045971613827, + "learning_rate": 1.956329523641797e-06, + "loss": 0.6310690641403198, + "step": 1213 + }, + { + "epoch": 0.27991699331335024, + "grad_norm": 1.0395130987242733, + "learning_rate": 1.95621801502964e-06, + "loss": 0.498830646276474, + "step": 1214 + }, + { + "epoch": 0.2801475674429329, + "grad_norm": 1.0547121574701517, + "learning_rate": 1.9561063674215036e-06, + "loss": 0.6612650156021118, + "step": 1215 + }, + { + "epoch": 0.28037814157251556, + "grad_norm": 1.0369778810130763, + "learning_rate": 1.9559945808336166e-06, + "loss": 0.5651615858078003, + "step": 1216 + }, + { + "epoch": 0.2806087157020982, + "grad_norm": 1.0028009497915646, + "learning_rate": 1.955882655282229e-06, + "loss": 0.5675203800201416, + "step": 1217 + }, + { + "epoch": 0.2808392898316809, + "grad_norm": 1.0910384567165883, + "learning_rate": 1.9557705907836095e-06, + "loss": 0.5691455006599426, + "step": 1218 + }, + { + "epoch": 0.28106986396126354, + "grad_norm": 1.2440322291047097, + "learning_rate": 1.955658387354048e-06, + "loss": 0.6018673181533813, + "step": 1219 + }, + { + "epoch": 0.2813004380908462, + "grad_norm": 0.8594681913500082, + "learning_rate": 1.955546045009855e-06, + "loss": 0.5188831090927124, + "step": 1220 + }, + { + "epoch": 0.28153101222042887, + "grad_norm": 0.9611802055135819, + "learning_rate": 1.9554335637673596e-06, + "loss": 0.5161044597625732, + "step": 1221 + }, + { + "epoch": 0.28176158635001153, + "grad_norm": 1.0764912433641416, + "learning_rate": 1.9553209436429132e-06, + "loss": 0.5651452541351318, + "step": 1222 + }, + { + "epoch": 0.2819921604795942, + "grad_norm": 1.0362033432012678, + "learning_rate": 1.9552081846528858e-06, + "loss": 0.5763273239135742, + "step": 1223 + }, + { + "epoch": 0.28222273460917685, + "grad_norm": 1.0512305083546745, + "learning_rate": 1.9550952868136677e-06, + "loss": 0.6379664540290833, + "step": 1224 + }, + { + "epoch": 0.2824533087387595, + "grad_norm": 0.966358468685478, + "learning_rate": 1.95498225014167e-06, + "loss": 0.4021342396736145, + "step": 1225 + }, + { + "epoch": 0.2826838828683422, + "grad_norm": 1.3065298085361052, + "learning_rate": 1.954869074653324e-06, + "loss": 0.49230247735977173, + "step": 1226 + }, + { + "epoch": 0.28291445699792483, + "grad_norm": 0.9198430971109288, + "learning_rate": 1.954755760365081e-06, + "loss": 0.5921554565429688, + "step": 1227 + }, + { + "epoch": 0.2831450311275075, + "grad_norm": 1.2338068239582654, + "learning_rate": 1.954642307293412e-06, + "loss": 0.6495868563652039, + "step": 1228 + }, + { + "epoch": 0.28337560525709016, + "grad_norm": 1.0310593371372254, + "learning_rate": 1.954528715454808e-06, + "loss": 0.5699795484542847, + "step": 1229 + }, + { + "epoch": 0.2836061793866728, + "grad_norm": 1.3462988930710962, + "learning_rate": 1.9544149848657816e-06, + "loss": 0.582231879234314, + "step": 1230 + }, + { + "epoch": 0.2838367535162555, + "grad_norm": 1.0033811085419764, + "learning_rate": 1.9543011155428647e-06, + "loss": 0.5952359437942505, + "step": 1231 + }, + { + "epoch": 0.28406732764583814, + "grad_norm": 1.150479906025031, + "learning_rate": 1.9541871075026092e-06, + "loss": 0.646816611289978, + "step": 1232 + }, + { + "epoch": 0.2842979017754208, + "grad_norm": 1.2509776515814615, + "learning_rate": 1.9540729607615866e-06, + "loss": 0.5781043767929077, + "step": 1233 + }, + { + "epoch": 0.28452847590500346, + "grad_norm": 1.1718295930905136, + "learning_rate": 1.95395867533639e-06, + "loss": 0.609764814376831, + "step": 1234 + }, + { + "epoch": 0.2847590500345861, + "grad_norm": 1.2826152398089232, + "learning_rate": 1.9538442512436325e-06, + "loss": 0.4673759341239929, + "step": 1235 + }, + { + "epoch": 0.2849896241641688, + "grad_norm": 1.1343052125955835, + "learning_rate": 1.953729688499946e-06, + "loss": 0.6310999393463135, + "step": 1236 + }, + { + "epoch": 0.28522019829375145, + "grad_norm": 1.075568996273352, + "learning_rate": 1.953614987121983e-06, + "loss": 0.5103853344917297, + "step": 1237 + }, + { + "epoch": 0.2854507724233341, + "grad_norm": 1.1329951189185654, + "learning_rate": 1.9535001471264178e-06, + "loss": 0.5735328197479248, + "step": 1238 + }, + { + "epoch": 0.28568134655291677, + "grad_norm": 1.010063337652323, + "learning_rate": 1.953385168529942e-06, + "loss": 0.5617454051971436, + "step": 1239 + }, + { + "epoch": 0.28591192068249943, + "grad_norm": 1.1392481671873862, + "learning_rate": 1.9532700513492705e-06, + "loss": 0.49873489141464233, + "step": 1240 + }, + { + "epoch": 0.2861424948120821, + "grad_norm": 0.9923008758606798, + "learning_rate": 1.9531547956011353e-06, + "loss": 0.49185073375701904, + "step": 1241 + }, + { + "epoch": 0.28637306894166475, + "grad_norm": 1.1119890456844754, + "learning_rate": 1.9530394013022907e-06, + "loss": 0.6016734838485718, + "step": 1242 + }, + { + "epoch": 0.2866036430712474, + "grad_norm": 0.984310677257317, + "learning_rate": 1.9529238684695105e-06, + "loss": 0.5922054052352905, + "step": 1243 + }, + { + "epoch": 0.2868342172008301, + "grad_norm": 1.2933601588161594, + "learning_rate": 1.952808197119588e-06, + "loss": 0.6498355269432068, + "step": 1244 + }, + { + "epoch": 0.28706479133041274, + "grad_norm": 1.106145681286101, + "learning_rate": 1.9526923872693382e-06, + "loss": 0.5564426183700562, + "step": 1245 + }, + { + "epoch": 0.2872953654599954, + "grad_norm": 1.0410162813090216, + "learning_rate": 1.9525764389355945e-06, + "loss": 0.6144154071807861, + "step": 1246 + }, + { + "epoch": 0.28752593958957806, + "grad_norm": 0.9304288925500919, + "learning_rate": 1.9524603521352116e-06, + "loss": 0.5958914756774902, + "step": 1247 + }, + { + "epoch": 0.2877565137191607, + "grad_norm": 1.167763375182377, + "learning_rate": 1.952344126885063e-06, + "loss": 0.5471549034118652, + "step": 1248 + }, + { + "epoch": 0.2879870878487434, + "grad_norm": 1.0658282088084226, + "learning_rate": 1.952227763202044e-06, + "loss": 0.5512329936027527, + "step": 1249 + }, + { + "epoch": 0.28821766197832605, + "grad_norm": 0.9336952567830841, + "learning_rate": 1.9521112611030695e-06, + "loss": 0.5545130968093872, + "step": 1250 + }, + { + "epoch": 0.2884482361079087, + "grad_norm": 0.9540157404500241, + "learning_rate": 1.9519946206050734e-06, + "loss": 0.5409479737281799, + "step": 1251 + }, + { + "epoch": 0.28867881023749137, + "grad_norm": 1.0425656776824677, + "learning_rate": 1.9518778417250114e-06, + "loss": 0.5248778462409973, + "step": 1252 + }, + { + "epoch": 0.28890938436707403, + "grad_norm": 1.1108036883068904, + "learning_rate": 1.951760924479858e-06, + "loss": 0.4985620975494385, + "step": 1253 + }, + { + "epoch": 0.2891399584966567, + "grad_norm": 1.1956376798663733, + "learning_rate": 1.951643868886608e-06, + "loss": 0.5470424890518188, + "step": 1254 + }, + { + "epoch": 0.28937053262623935, + "grad_norm": 0.830517770820401, + "learning_rate": 1.9515266749622776e-06, + "loss": 0.5082905292510986, + "step": 1255 + }, + { + "epoch": 0.289601106755822, + "grad_norm": 1.1321002460273393, + "learning_rate": 1.9514093427239013e-06, + "loss": 0.5734596252441406, + "step": 1256 + }, + { + "epoch": 0.2898316808854047, + "grad_norm": 1.133005147672039, + "learning_rate": 1.951291872188535e-06, + "loss": 0.4727100431919098, + "step": 1257 + }, + { + "epoch": 0.29006225501498734, + "grad_norm": 1.044180363768592, + "learning_rate": 1.951174263373254e-06, + "loss": 0.6727551221847534, + "step": 1258 + }, + { + "epoch": 0.29029282914457, + "grad_norm": 0.9491498247436025, + "learning_rate": 1.9510565162951534e-06, + "loss": 0.5225725173950195, + "step": 1259 + }, + { + "epoch": 0.29052340327415266, + "grad_norm": 0.9861385624887246, + "learning_rate": 1.95093863097135e-06, + "loss": 0.46537530422210693, + "step": 1260 + }, + { + "epoch": 0.2907539774037353, + "grad_norm": 1.0433291271591505, + "learning_rate": 1.950820607418979e-06, + "loss": 0.4729498624801636, + "step": 1261 + }, + { + "epoch": 0.290984551533318, + "grad_norm": 1.0319083654914931, + "learning_rate": 1.950702445655196e-06, + "loss": 0.519434928894043, + "step": 1262 + }, + { + "epoch": 0.29121512566290064, + "grad_norm": 1.0839075745171884, + "learning_rate": 1.9505841456971784e-06, + "loss": 0.5487297177314758, + "step": 1263 + }, + { + "epoch": 0.2914456997924833, + "grad_norm": 0.9970964597897494, + "learning_rate": 1.9504657075621207e-06, + "loss": 0.6228574514389038, + "step": 1264 + }, + { + "epoch": 0.29167627392206597, + "grad_norm": 1.076219157850212, + "learning_rate": 1.95034713126724e-06, + "loss": 0.486205518245697, + "step": 1265 + }, + { + "epoch": 0.2919068480516486, + "grad_norm": 1.220321517878089, + "learning_rate": 1.950228416829772e-06, + "loss": 0.6465567350387573, + "step": 1266 + }, + { + "epoch": 0.2921374221812313, + "grad_norm": 1.0227736343783316, + "learning_rate": 1.9501095642669735e-06, + "loss": 0.5160506963729858, + "step": 1267 + }, + { + "epoch": 0.29236799631081395, + "grad_norm": 1.0494858452172506, + "learning_rate": 1.9499905735961206e-06, + "loss": 0.47334107756614685, + "step": 1268 + }, + { + "epoch": 0.2925985704403966, + "grad_norm": 1.1563719640673416, + "learning_rate": 1.9498714448345103e-06, + "loss": 0.46453380584716797, + "step": 1269 + }, + { + "epoch": 0.29282914456997927, + "grad_norm": 0.9754273704287023, + "learning_rate": 1.9497521779994582e-06, + "loss": 0.5617728233337402, + "step": 1270 + }, + { + "epoch": 0.29305971869956193, + "grad_norm": 1.3129160300173046, + "learning_rate": 1.9496327731083026e-06, + "loss": 0.6129153966903687, + "step": 1271 + }, + { + "epoch": 0.2932902928291446, + "grad_norm": 1.2949114738936178, + "learning_rate": 1.9495132301783983e-06, + "loss": 0.4903183579444885, + "step": 1272 + }, + { + "epoch": 0.29352086695872726, + "grad_norm": 1.1167146830002543, + "learning_rate": 1.9493935492271235e-06, + "loss": 0.5087980628013611, + "step": 1273 + }, + { + "epoch": 0.2937514410883099, + "grad_norm": 1.0447162269466075, + "learning_rate": 1.949273730271874e-06, + "loss": 0.5102910399436951, + "step": 1274 + }, + { + "epoch": 0.2939820152178926, + "grad_norm": 1.0971342006057034, + "learning_rate": 1.9491537733300674e-06, + "loss": 0.5581132769584656, + "step": 1275 + }, + { + "epoch": 0.29421258934747524, + "grad_norm": 1.0166201989797772, + "learning_rate": 1.949033678419141e-06, + "loss": 0.5668213367462158, + "step": 1276 + }, + { + "epoch": 0.2944431634770579, + "grad_norm": 1.1646263878722904, + "learning_rate": 1.9489134455565503e-06, + "loss": 0.5352080464363098, + "step": 1277 + }, + { + "epoch": 0.29467373760664056, + "grad_norm": 1.0375138174364513, + "learning_rate": 1.948793074759774e-06, + "loss": 0.47343915700912476, + "step": 1278 + }, + { + "epoch": 0.29490431173622317, + "grad_norm": 1.2395532163204355, + "learning_rate": 1.9486725660463084e-06, + "loss": 0.5169435143470764, + "step": 1279 + }, + { + "epoch": 0.29513488586580583, + "grad_norm": 1.2035025560649288, + "learning_rate": 1.9485519194336707e-06, + "loss": 0.4801402688026428, + "step": 1280 + }, + { + "epoch": 0.2953654599953885, + "grad_norm": 1.2115883619737033, + "learning_rate": 1.9484311349393984e-06, + "loss": 0.6537381410598755, + "step": 1281 + }, + { + "epoch": 0.29559603412497115, + "grad_norm": 0.9306094110342265, + "learning_rate": 1.9483102125810483e-06, + "loss": 0.5160089135169983, + "step": 1282 + }, + { + "epoch": 0.2958266082545538, + "grad_norm": 1.0525832312633145, + "learning_rate": 1.9481891523761985e-06, + "loss": 0.5332320332527161, + "step": 1283 + }, + { + "epoch": 0.2960571823841365, + "grad_norm": 0.9112280719646961, + "learning_rate": 1.9480679543424453e-06, + "loss": 0.5076215267181396, + "step": 1284 + }, + { + "epoch": 0.29628775651371914, + "grad_norm": 1.1265706213450601, + "learning_rate": 1.947946618497407e-06, + "loss": 0.607105016708374, + "step": 1285 + }, + { + "epoch": 0.2965183306433018, + "grad_norm": 1.076771624610464, + "learning_rate": 1.9478251448587203e-06, + "loss": 0.6265846490859985, + "step": 1286 + }, + { + "epoch": 0.29674890477288446, + "grad_norm": 1.164803442921585, + "learning_rate": 1.9477035334440426e-06, + "loss": 0.5313390493392944, + "step": 1287 + }, + { + "epoch": 0.2969794789024671, + "grad_norm": 1.0583207692233336, + "learning_rate": 1.947581784271052e-06, + "loss": 0.5059833526611328, + "step": 1288 + }, + { + "epoch": 0.2972100530320498, + "grad_norm": 1.171630953302918, + "learning_rate": 1.9474598973574455e-06, + "loss": 0.5550922155380249, + "step": 1289 + }, + { + "epoch": 0.29744062716163244, + "grad_norm": 0.9941233964259298, + "learning_rate": 1.947337872720941e-06, + "loss": 0.5594801306724548, + "step": 1290 + }, + { + "epoch": 0.2976712012912151, + "grad_norm": 1.1672729516761162, + "learning_rate": 1.9472157103792753e-06, + "loss": 0.6404933333396912, + "step": 1291 + }, + { + "epoch": 0.29790177542079777, + "grad_norm": 1.216836258446271, + "learning_rate": 1.947093410350206e-06, + "loss": 0.5884830355644226, + "step": 1292 + }, + { + "epoch": 0.2981323495503804, + "grad_norm": 1.313520165154308, + "learning_rate": 1.9469709726515114e-06, + "loss": 0.5723487138748169, + "step": 1293 + }, + { + "epoch": 0.2983629236799631, + "grad_norm": 1.047985941483805, + "learning_rate": 1.946848397300989e-06, + "loss": 0.5298895239830017, + "step": 1294 + }, + { + "epoch": 0.29859349780954575, + "grad_norm": 1.009793366380185, + "learning_rate": 1.9467256843164557e-06, + "loss": 0.6118877530097961, + "step": 1295 + }, + { + "epoch": 0.2988240719391284, + "grad_norm": 1.2369344702112195, + "learning_rate": 1.9466028337157498e-06, + "loss": 0.6014599800109863, + "step": 1296 + }, + { + "epoch": 0.29905464606871107, + "grad_norm": 0.9889478752374168, + "learning_rate": 1.9464798455167278e-06, + "loss": 0.5861071944236755, + "step": 1297 + }, + { + "epoch": 0.29928522019829373, + "grad_norm": 1.238998066636259, + "learning_rate": 1.9463567197372684e-06, + "loss": 0.5863409042358398, + "step": 1298 + }, + { + "epoch": 0.2995157943278764, + "grad_norm": 1.217300214744882, + "learning_rate": 1.9462334563952687e-06, + "loss": 0.6576352119445801, + "step": 1299 + }, + { + "epoch": 0.29974636845745906, + "grad_norm": 1.074029788035818, + "learning_rate": 1.9461100555086463e-06, + "loss": 0.5458395481109619, + "step": 1300 + }, + { + "epoch": 0.2999769425870417, + "grad_norm": 1.2759220903954522, + "learning_rate": 1.945986517095339e-06, + "loss": 0.48430997133255005, + "step": 1301 + }, + { + "epoch": 0.3002075167166244, + "grad_norm": 1.2436119574902915, + "learning_rate": 1.945862841173304e-06, + "loss": 0.4212522506713867, + "step": 1302 + }, + { + "epoch": 0.30043809084620704, + "grad_norm": 1.1823128908009017, + "learning_rate": 1.9457390277605188e-06, + "loss": 0.5671685934066772, + "step": 1303 + }, + { + "epoch": 0.3006686649757897, + "grad_norm": 1.0831721181422946, + "learning_rate": 1.945615076874981e-06, + "loss": 0.5350982546806335, + "step": 1304 + }, + { + "epoch": 0.30089923910537236, + "grad_norm": 0.9247033101108441, + "learning_rate": 1.9454909885347088e-06, + "loss": 0.45792657136917114, + "step": 1305 + }, + { + "epoch": 0.301129813234955, + "grad_norm": 1.0473073919925908, + "learning_rate": 1.9453667627577387e-06, + "loss": 0.5644106864929199, + "step": 1306 + }, + { + "epoch": 0.3013603873645377, + "grad_norm": 1.3332547603439018, + "learning_rate": 1.945242399562129e-06, + "loss": 0.554198145866394, + "step": 1307 + }, + { + "epoch": 0.30159096149412035, + "grad_norm": 0.9232575644574793, + "learning_rate": 1.9451178989659565e-06, + "loss": 0.5073474049568176, + "step": 1308 + }, + { + "epoch": 0.301821535623703, + "grad_norm": 1.0206284762622284, + "learning_rate": 1.944993260987319e-06, + "loss": 0.569359302520752, + "step": 1309 + }, + { + "epoch": 0.30205210975328567, + "grad_norm": 1.0382686851233573, + "learning_rate": 1.944868485644334e-06, + "loss": 0.5011791586875916, + "step": 1310 + }, + { + "epoch": 0.30228268388286833, + "grad_norm": 0.9869955270819804, + "learning_rate": 1.9447435729551384e-06, + "loss": 0.41121986508369446, + "step": 1311 + }, + { + "epoch": 0.302513258012451, + "grad_norm": 1.3489170954309295, + "learning_rate": 1.9446185229378896e-06, + "loss": 0.5615876913070679, + "step": 1312 + }, + { + "epoch": 0.30274383214203365, + "grad_norm": 1.2244043366760826, + "learning_rate": 1.9444933356107652e-06, + "loss": 0.5450695157051086, + "step": 1313 + }, + { + "epoch": 0.3029744062716163, + "grad_norm": 1.0371383598149113, + "learning_rate": 1.9443680109919626e-06, + "loss": 0.522222101688385, + "step": 1314 + }, + { + "epoch": 0.303204980401199, + "grad_norm": 0.9638880730108786, + "learning_rate": 1.9442425490996984e-06, + "loss": 0.5081876516342163, + "step": 1315 + }, + { + "epoch": 0.30343555453078164, + "grad_norm": 1.1506604859779093, + "learning_rate": 1.9441169499522104e-06, + "loss": 0.4955870509147644, + "step": 1316 + }, + { + "epoch": 0.3036661286603643, + "grad_norm": 1.0185303369767542, + "learning_rate": 1.9439912135677553e-06, + "loss": 0.5098991990089417, + "step": 1317 + }, + { + "epoch": 0.30389670278994696, + "grad_norm": 0.9949182918503017, + "learning_rate": 1.94386533996461e-06, + "loss": 0.5686191320419312, + "step": 1318 + }, + { + "epoch": 0.3041272769195296, + "grad_norm": 1.180090494573931, + "learning_rate": 1.943739329161072e-06, + "loss": 0.606401264667511, + "step": 1319 + }, + { + "epoch": 0.3043578510491123, + "grad_norm": 1.0411002752171188, + "learning_rate": 1.9436131811754576e-06, + "loss": 0.49249163269996643, + "step": 1320 + }, + { + "epoch": 0.30458842517869494, + "grad_norm": 1.1079741007732102, + "learning_rate": 1.9434868960261047e-06, + "loss": 0.5373499989509583, + "step": 1321 + }, + { + "epoch": 0.3048189993082776, + "grad_norm": 1.4236897413447511, + "learning_rate": 1.943360473731369e-06, + "loss": 0.4568977355957031, + "step": 1322 + }, + { + "epoch": 0.30504957343786027, + "grad_norm": 1.034905077800575, + "learning_rate": 1.943233914309628e-06, + "loss": 0.562126636505127, + "step": 1323 + }, + { + "epoch": 0.3052801475674429, + "grad_norm": 1.343019932527111, + "learning_rate": 1.943107217779278e-06, + "loss": 0.5795382261276245, + "step": 1324 + }, + { + "epoch": 0.3055107216970256, + "grad_norm": 0.9852538064889438, + "learning_rate": 1.942980384158736e-06, + "loss": 0.5671530365943909, + "step": 1325 + }, + { + "epoch": 0.30574129582660825, + "grad_norm": 0.8981413519731547, + "learning_rate": 1.942853413466438e-06, + "loss": 0.5511401891708374, + "step": 1326 + }, + { + "epoch": 0.3059718699561909, + "grad_norm": 1.1491379693233763, + "learning_rate": 1.942726305720841e-06, + "loss": 0.5712149739265442, + "step": 1327 + }, + { + "epoch": 0.3062024440857736, + "grad_norm": 1.171535283311252, + "learning_rate": 1.9425990609404215e-06, + "loss": 0.5181496739387512, + "step": 1328 + }, + { + "epoch": 0.30643301821535623, + "grad_norm": 1.1968505005842098, + "learning_rate": 1.9424716791436753e-06, + "loss": 0.5758726596832275, + "step": 1329 + }, + { + "epoch": 0.3066635923449389, + "grad_norm": 0.9714627365066287, + "learning_rate": 1.942344160349119e-06, + "loss": 0.5757049322128296, + "step": 1330 + }, + { + "epoch": 0.30689416647452156, + "grad_norm": 0.9271633895158528, + "learning_rate": 1.9422165045752886e-06, + "loss": 0.47352534532546997, + "step": 1331 + }, + { + "epoch": 0.3071247406041042, + "grad_norm": 1.1418817146577889, + "learning_rate": 1.94208871184074e-06, + "loss": 0.5940845012664795, + "step": 1332 + }, + { + "epoch": 0.3073553147336869, + "grad_norm": 1.0590875448509756, + "learning_rate": 1.9419607821640496e-06, + "loss": 0.5225652456283569, + "step": 1333 + }, + { + "epoch": 0.30758588886326954, + "grad_norm": 1.0803440664833228, + "learning_rate": 1.9418327155638126e-06, + "loss": 0.5253404378890991, + "step": 1334 + }, + { + "epoch": 0.3078164629928522, + "grad_norm": 0.9995333811538123, + "learning_rate": 1.941704512058646e-06, + "loss": 0.5637744665145874, + "step": 1335 + }, + { + "epoch": 0.30804703712243486, + "grad_norm": 0.9947267518967771, + "learning_rate": 1.941576171667184e-06, + "loss": 0.48273587226867676, + "step": 1336 + }, + { + "epoch": 0.3082776112520175, + "grad_norm": 0.9569882979404835, + "learning_rate": 1.9414476944080833e-06, + "loss": 0.5989019870758057, + "step": 1337 + }, + { + "epoch": 0.3085081853816002, + "grad_norm": 1.1125936950721667, + "learning_rate": 1.9413190803000183e-06, + "loss": 0.5231547951698303, + "step": 1338 + }, + { + "epoch": 0.30873875951118285, + "grad_norm": 1.0300527191348772, + "learning_rate": 1.9411903293616853e-06, + "loss": 0.5125160217285156, + "step": 1339 + }, + { + "epoch": 0.3089693336407655, + "grad_norm": 1.251133475270548, + "learning_rate": 1.9410614416117993e-06, + "loss": 0.50664883852005, + "step": 1340 + }, + { + "epoch": 0.30919990777034817, + "grad_norm": 1.063411016331963, + "learning_rate": 1.9409324170690955e-06, + "loss": 0.5555824637413025, + "step": 1341 + }, + { + "epoch": 0.30943048189993083, + "grad_norm": 0.9621002533491156, + "learning_rate": 1.940803255752329e-06, + "loss": 0.5182096362113953, + "step": 1342 + }, + { + "epoch": 0.3096610560295135, + "grad_norm": 1.0359415249922332, + "learning_rate": 1.940673957680274e-06, + "loss": 0.5202751159667969, + "step": 1343 + }, + { + "epoch": 0.30989163015909615, + "grad_norm": 0.9908809268815285, + "learning_rate": 1.940544522871726e-06, + "loss": 0.49791598320007324, + "step": 1344 + }, + { + "epoch": 0.3101222042886788, + "grad_norm": 0.990495096784543, + "learning_rate": 1.9404149513454995e-06, + "loss": 0.48691657185554504, + "step": 1345 + }, + { + "epoch": 0.3103527784182615, + "grad_norm": 1.0649987362093034, + "learning_rate": 1.9402852431204293e-06, + "loss": 0.5726481676101685, + "step": 1346 + }, + { + "epoch": 0.31058335254784414, + "grad_norm": 0.9750258824279312, + "learning_rate": 1.940155398215369e-06, + "loss": 0.5443148016929626, + "step": 1347 + }, + { + "epoch": 0.3108139266774268, + "grad_norm": 1.1005441671416878, + "learning_rate": 1.9400254166491935e-06, + "loss": 0.5767767429351807, + "step": 1348 + }, + { + "epoch": 0.31104450080700946, + "grad_norm": 1.059167179602632, + "learning_rate": 1.9398952984407967e-06, + "loss": 0.5208882689476013, + "step": 1349 + }, + { + "epoch": 0.3112750749365921, + "grad_norm": 0.8304820941291429, + "learning_rate": 1.939765043609093e-06, + "loss": 0.5152548551559448, + "step": 1350 + }, + { + "epoch": 0.3115056490661748, + "grad_norm": 1.1875548530259965, + "learning_rate": 1.939634652173016e-06, + "loss": 0.42542198300361633, + "step": 1351 + }, + { + "epoch": 0.31173622319575744, + "grad_norm": 1.1424220130032787, + "learning_rate": 1.9395041241515197e-06, + "loss": 0.6471734046936035, + "step": 1352 + }, + { + "epoch": 0.3119667973253401, + "grad_norm": 1.1191897598164906, + "learning_rate": 1.9393734595635767e-06, + "loss": 0.6257486343383789, + "step": 1353 + }, + { + "epoch": 0.31219737145492277, + "grad_norm": 1.1348942815080005, + "learning_rate": 1.9392426584281815e-06, + "loss": 0.562118649482727, + "step": 1354 + }, + { + "epoch": 0.31242794558450543, + "grad_norm": 1.223083488663697, + "learning_rate": 1.939111720764347e-06, + "loss": 0.5602811574935913, + "step": 1355 + }, + { + "epoch": 0.3126585197140881, + "grad_norm": 1.041642546930775, + "learning_rate": 1.9389806465911056e-06, + "loss": 0.54469895362854, + "step": 1356 + }, + { + "epoch": 0.31288909384367075, + "grad_norm": 1.159034123821878, + "learning_rate": 1.9388494359275115e-06, + "loss": 0.5262914896011353, + "step": 1357 + }, + { + "epoch": 0.3131196679732534, + "grad_norm": 1.184281074720895, + "learning_rate": 1.938718088792637e-06, + "loss": 0.6137207746505737, + "step": 1358 + }, + { + "epoch": 0.3133502421028361, + "grad_norm": 1.0740150522099046, + "learning_rate": 1.9385866052055744e-06, + "loss": 0.5792986750602722, + "step": 1359 + }, + { + "epoch": 0.31358081623241874, + "grad_norm": 0.9946259290534466, + "learning_rate": 1.938454985185437e-06, + "loss": 0.4953799843788147, + "step": 1360 + }, + { + "epoch": 0.3138113903620014, + "grad_norm": 1.2906978669163651, + "learning_rate": 1.938323228751356e-06, + "loss": 0.5722379684448242, + "step": 1361 + }, + { + "epoch": 0.31404196449158406, + "grad_norm": 0.9996513214249106, + "learning_rate": 1.938191335922484e-06, + "loss": 0.513651967048645, + "step": 1362 + }, + { + "epoch": 0.3142725386211667, + "grad_norm": 1.0509635344773647, + "learning_rate": 1.9380593067179935e-06, + "loss": 0.4911235272884369, + "step": 1363 + }, + { + "epoch": 0.3145031127507494, + "grad_norm": 1.0029036193486218, + "learning_rate": 1.9379271411570753e-06, + "loss": 0.5478678941726685, + "step": 1364 + }, + { + "epoch": 0.31473368688033204, + "grad_norm": 0.8901015021428158, + "learning_rate": 1.9377948392589417e-06, + "loss": 0.46698129177093506, + "step": 1365 + }, + { + "epoch": 0.3149642610099147, + "grad_norm": 1.3327357773387452, + "learning_rate": 1.9376624010428243e-06, + "loss": 0.5081343650817871, + "step": 1366 + }, + { + "epoch": 0.31519483513949736, + "grad_norm": 1.1172038301784757, + "learning_rate": 1.9375298265279735e-06, + "loss": 0.583903431892395, + "step": 1367 + }, + { + "epoch": 0.31542540926908, + "grad_norm": 1.0403870552320973, + "learning_rate": 1.937397115733661e-06, + "loss": 0.5249435901641846, + "step": 1368 + }, + { + "epoch": 0.3156559833986627, + "grad_norm": 1.184866053048378, + "learning_rate": 1.9372642686791777e-06, + "loss": 0.5463817119598389, + "step": 1369 + }, + { + "epoch": 0.31588655752824535, + "grad_norm": 1.2179956171685966, + "learning_rate": 1.9371312853838338e-06, + "loss": 0.4634520709514618, + "step": 1370 + }, + { + "epoch": 0.316117131657828, + "grad_norm": 1.2606144259751904, + "learning_rate": 1.93699816586696e-06, + "loss": 0.6018840074539185, + "step": 1371 + }, + { + "epoch": 0.31634770578741067, + "grad_norm": 1.1911067691024062, + "learning_rate": 1.9368649101479072e-06, + "loss": 0.5507885813713074, + "step": 1372 + }, + { + "epoch": 0.31657827991699333, + "grad_norm": 0.9991148637431415, + "learning_rate": 1.9367315182460442e-06, + "loss": 0.5520491600036621, + "step": 1373 + }, + { + "epoch": 0.316808854046576, + "grad_norm": 1.2455223208218802, + "learning_rate": 1.936597990180762e-06, + "loss": 0.5410347580909729, + "step": 1374 + }, + { + "epoch": 0.31703942817615866, + "grad_norm": 1.6049117927004484, + "learning_rate": 1.9364643259714694e-06, + "loss": 0.5771749019622803, + "step": 1375 + }, + { + "epoch": 0.3172700023057413, + "grad_norm": 1.123905862633382, + "learning_rate": 1.9363305256375965e-06, + "loss": 0.5071828365325928, + "step": 1376 + }, + { + "epoch": 0.317500576435324, + "grad_norm": 1.1240180544134455, + "learning_rate": 1.936196589198592e-06, + "loss": 0.558908224105835, + "step": 1377 + }, + { + "epoch": 0.31773115056490664, + "grad_norm": 1.1984781772064843, + "learning_rate": 1.9360625166739256e-06, + "loss": 0.5509803295135498, + "step": 1378 + }, + { + "epoch": 0.3179617246944893, + "grad_norm": 1.1703050385431384, + "learning_rate": 1.935928308083085e-06, + "loss": 0.5333945155143738, + "step": 1379 + }, + { + "epoch": 0.31819229882407196, + "grad_norm": 1.2141630137674275, + "learning_rate": 1.93579396344558e-06, + "loss": 0.5337819457054138, + "step": 1380 + }, + { + "epoch": 0.3184228729536546, + "grad_norm": 1.161230429960398, + "learning_rate": 1.9356594827809387e-06, + "loss": 0.5286899209022522, + "step": 1381 + }, + { + "epoch": 0.3186534470832373, + "grad_norm": 1.3042082103630104, + "learning_rate": 1.9355248661087083e-06, + "loss": 0.5915369987487793, + "step": 1382 + }, + { + "epoch": 0.31888402121281995, + "grad_norm": 1.2725859277548193, + "learning_rate": 1.9353901134484575e-06, + "loss": 0.5843492746353149, + "step": 1383 + }, + { + "epoch": 0.3191145953424026, + "grad_norm": 1.0723106790063142, + "learning_rate": 1.935255224819774e-06, + "loss": 0.5015528202056885, + "step": 1384 + }, + { + "epoch": 0.31934516947198527, + "grad_norm": 1.2053658641154292, + "learning_rate": 1.935120200242265e-06, + "loss": 0.5650957822799683, + "step": 1385 + }, + { + "epoch": 0.31957574360156793, + "grad_norm": 0.9993056241167617, + "learning_rate": 1.9349850397355576e-06, + "loss": 0.5452740788459778, + "step": 1386 + }, + { + "epoch": 0.3198063177311506, + "grad_norm": 1.138341645042275, + "learning_rate": 1.934849743319299e-06, + "loss": 0.5069071054458618, + "step": 1387 + }, + { + "epoch": 0.32003689186073325, + "grad_norm": 1.3097523217194937, + "learning_rate": 1.934714311013156e-06, + "loss": 0.5350260734558105, + "step": 1388 + }, + { + "epoch": 0.3202674659903159, + "grad_norm": 1.065882395696928, + "learning_rate": 1.9345787428368146e-06, + "loss": 0.6002014875411987, + "step": 1389 + }, + { + "epoch": 0.3204980401198986, + "grad_norm": 1.0951548438177328, + "learning_rate": 1.9344430388099813e-06, + "loss": 0.5111383199691772, + "step": 1390 + }, + { + "epoch": 0.3207286142494812, + "grad_norm": 1.3896947100609738, + "learning_rate": 1.934307198952382e-06, + "loss": 0.6029741168022156, + "step": 1391 + }, + { + "epoch": 0.32095918837906384, + "grad_norm": 1.0076386708324083, + "learning_rate": 1.9341712232837628e-06, + "loss": 0.48339328169822693, + "step": 1392 + }, + { + "epoch": 0.3211897625086465, + "grad_norm": 1.5017597017671664, + "learning_rate": 1.9340351118238882e-06, + "loss": 0.6080894470214844, + "step": 1393 + }, + { + "epoch": 0.32142033663822916, + "grad_norm": 1.1935202429445742, + "learning_rate": 1.9338988645925444e-06, + "loss": 0.46375036239624023, + "step": 1394 + }, + { + "epoch": 0.3216509107678118, + "grad_norm": 1.2397479694281224, + "learning_rate": 1.9337624816095357e-06, + "loss": 0.5974088907241821, + "step": 1395 + }, + { + "epoch": 0.3218814848973945, + "grad_norm": 1.4525926184759388, + "learning_rate": 1.9336259628946865e-06, + "loss": 0.5759298801422119, + "step": 1396 + }, + { + "epoch": 0.32211205902697715, + "grad_norm": 1.0361695525185906, + "learning_rate": 1.9334893084678417e-06, + "loss": 0.6050859689712524, + "step": 1397 + }, + { + "epoch": 0.3223426331565598, + "grad_norm": 1.1306650773102374, + "learning_rate": 1.9333525183488657e-06, + "loss": 0.5879993438720703, + "step": 1398 + }, + { + "epoch": 0.32257320728614247, + "grad_norm": 1.055350398289763, + "learning_rate": 1.933215592557642e-06, + "loss": 0.5496323108673096, + "step": 1399 + }, + { + "epoch": 0.32280378141572513, + "grad_norm": 1.2847712135798797, + "learning_rate": 1.9330785311140732e-06, + "loss": 0.48447534441947937, + "step": 1400 + }, + { + "epoch": 0.3230343555453078, + "grad_norm": 1.2583031445613762, + "learning_rate": 1.932941334038084e-06, + "loss": 0.5687322020530701, + "step": 1401 + }, + { + "epoch": 0.32326492967489046, + "grad_norm": 1.1545356458260727, + "learning_rate": 1.9328040013496166e-06, + "loss": 0.4070928990840912, + "step": 1402 + }, + { + "epoch": 0.3234955038044731, + "grad_norm": 0.9643847324304846, + "learning_rate": 1.9326665330686344e-06, + "loss": 0.5131539106369019, + "step": 1403 + }, + { + "epoch": 0.3237260779340558, + "grad_norm": 1.0846567553359194, + "learning_rate": 1.932528929215119e-06, + "loss": 0.47571802139282227, + "step": 1404 + }, + { + "epoch": 0.32395665206363844, + "grad_norm": 1.095169764239565, + "learning_rate": 1.9323911898090728e-06, + "loss": 0.5676391124725342, + "step": 1405 + }, + { + "epoch": 0.3241872261932211, + "grad_norm": 1.0653010445083047, + "learning_rate": 1.9322533148705177e-06, + "loss": 0.5464721322059631, + "step": 1406 + }, + { + "epoch": 0.32441780032280376, + "grad_norm": 1.044728614529827, + "learning_rate": 1.9321153044194953e-06, + "loss": 0.6130954027175903, + "step": 1407 + }, + { + "epoch": 0.3246483744523864, + "grad_norm": 1.6513732337511444, + "learning_rate": 1.9319771584760666e-06, + "loss": 0.6058028936386108, + "step": 1408 + }, + { + "epoch": 0.3248789485819691, + "grad_norm": 1.1251884535657009, + "learning_rate": 1.9318388770603123e-06, + "loss": 0.5326286554336548, + "step": 1409 + }, + { + "epoch": 0.32510952271155175, + "grad_norm": 1.2184625691329178, + "learning_rate": 1.9317004601923337e-06, + "loss": 0.6046053767204285, + "step": 1410 + }, + { + "epoch": 0.3253400968411344, + "grad_norm": 1.058617017669887, + "learning_rate": 1.931561907892251e-06, + "loss": 0.4597975015640259, + "step": 1411 + }, + { + "epoch": 0.32557067097071707, + "grad_norm": 1.1843983331118075, + "learning_rate": 1.9314232201802035e-06, + "loss": 0.6024897694587708, + "step": 1412 + }, + { + "epoch": 0.32580124510029973, + "grad_norm": 1.037552834044261, + "learning_rate": 1.9312843970763512e-06, + "loss": 0.45463523268699646, + "step": 1413 + }, + { + "epoch": 0.3260318192298824, + "grad_norm": 0.9412245310618959, + "learning_rate": 1.9311454386008736e-06, + "loss": 0.512498140335083, + "step": 1414 + }, + { + "epoch": 0.32626239335946505, + "grad_norm": 0.8929271577435476, + "learning_rate": 1.9310063447739695e-06, + "loss": 0.4851795434951782, + "step": 1415 + }, + { + "epoch": 0.3264929674890477, + "grad_norm": 1.1131717345806365, + "learning_rate": 1.930867115615858e-06, + "loss": 0.5464169979095459, + "step": 1416 + }, + { + "epoch": 0.3267235416186304, + "grad_norm": 0.9649299588738096, + "learning_rate": 1.930727751146777e-06, + "loss": 0.5614463090896606, + "step": 1417 + }, + { + "epoch": 0.32695411574821304, + "grad_norm": 1.1279163828506724, + "learning_rate": 1.930588251386985e-06, + "loss": 0.635399341583252, + "step": 1418 + }, + { + "epoch": 0.3271846898777957, + "grad_norm": 1.0116750083389472, + "learning_rate": 1.9304486163567588e-06, + "loss": 0.4862840175628662, + "step": 1419 + }, + { + "epoch": 0.32741526400737836, + "grad_norm": 1.3810849020281415, + "learning_rate": 1.930308846076397e-06, + "loss": 0.6548877954483032, + "step": 1420 + }, + { + "epoch": 0.327645838136961, + "grad_norm": 0.9726550652757486, + "learning_rate": 1.9301689405662154e-06, + "loss": 0.5781031250953674, + "step": 1421 + }, + { + "epoch": 0.3278764122665437, + "grad_norm": 1.0075078554250574, + "learning_rate": 1.930028899846552e-06, + "loss": 0.4945180118083954, + "step": 1422 + }, + { + "epoch": 0.32810698639612634, + "grad_norm": 1.1661473529435082, + "learning_rate": 1.9298887239377623e-06, + "loss": 0.548690915107727, + "step": 1423 + }, + { + "epoch": 0.328337560525709, + "grad_norm": 1.0120278252177992, + "learning_rate": 1.929748412860222e-06, + "loss": 0.44515126943588257, + "step": 1424 + }, + { + "epoch": 0.32856813465529167, + "grad_norm": 0.8968526552864172, + "learning_rate": 1.9296079666343273e-06, + "loss": 0.433849573135376, + "step": 1425 + }, + { + "epoch": 0.3287987087848743, + "grad_norm": 1.185097032812299, + "learning_rate": 1.9294673852804938e-06, + "loss": 0.5600666403770447, + "step": 1426 + }, + { + "epoch": 0.329029282914457, + "grad_norm": 1.1490365285996864, + "learning_rate": 1.9293266688191555e-06, + "loss": 0.5302737355232239, + "step": 1427 + }, + { + "epoch": 0.32925985704403965, + "grad_norm": 1.1854633228597617, + "learning_rate": 1.929185817270768e-06, + "loss": 0.5590239763259888, + "step": 1428 + }, + { + "epoch": 0.3294904311736223, + "grad_norm": 0.9322915581005059, + "learning_rate": 1.929044830655804e-06, + "loss": 0.43225252628326416, + "step": 1429 + }, + { + "epoch": 0.329721005303205, + "grad_norm": 1.0987581728513967, + "learning_rate": 1.9289037089947595e-06, + "loss": 0.4932950735092163, + "step": 1430 + }, + { + "epoch": 0.32995157943278763, + "grad_norm": 1.1539316791656467, + "learning_rate": 1.9287624523081457e-06, + "loss": 0.48358941078186035, + "step": 1431 + }, + { + "epoch": 0.3301821535623703, + "grad_norm": 1.1348341469716536, + "learning_rate": 1.928621060616497e-06, + "loss": 0.48359012603759766, + "step": 1432 + }, + { + "epoch": 0.33041272769195296, + "grad_norm": 0.9278501695529541, + "learning_rate": 1.9284795339403663e-06, + "loss": 0.48462390899658203, + "step": 1433 + }, + { + "epoch": 0.3306433018215356, + "grad_norm": 1.439376655816269, + "learning_rate": 1.9283378723003253e-06, + "loss": 0.5167088508605957, + "step": 1434 + }, + { + "epoch": 0.3308738759511183, + "grad_norm": 1.0184323306356053, + "learning_rate": 1.928196075716966e-06, + "loss": 0.47352856397628784, + "step": 1435 + }, + { + "epoch": 0.33110445008070094, + "grad_norm": 0.9676467825700396, + "learning_rate": 1.9280541442109e-06, + "loss": 0.5013144016265869, + "step": 1436 + }, + { + "epoch": 0.3313350242102836, + "grad_norm": 1.1746874818237374, + "learning_rate": 1.927912077802759e-06, + "loss": 0.5061586499214172, + "step": 1437 + }, + { + "epoch": 0.33156559833986626, + "grad_norm": 1.3055289684633111, + "learning_rate": 1.9277698765131927e-06, + "loss": 0.5718814134597778, + "step": 1438 + }, + { + "epoch": 0.3317961724694489, + "grad_norm": 1.147604660511156, + "learning_rate": 1.9276275403628727e-06, + "loss": 0.47547006607055664, + "step": 1439 + }, + { + "epoch": 0.3320267465990316, + "grad_norm": 1.1585259805283974, + "learning_rate": 1.9274850693724884e-06, + "loss": 0.5387942790985107, + "step": 1440 + }, + { + "epoch": 0.33225732072861425, + "grad_norm": 1.013907046172662, + "learning_rate": 1.9273424635627494e-06, + "loss": 0.524285078048706, + "step": 1441 + }, + { + "epoch": 0.3324878948581969, + "grad_norm": 1.1737357855070976, + "learning_rate": 1.927199722954385e-06, + "loss": 0.5073943138122559, + "step": 1442 + }, + { + "epoch": 0.33271846898777957, + "grad_norm": 1.2047946851654725, + "learning_rate": 1.927056847568144e-06, + "loss": 0.4609600007534027, + "step": 1443 + }, + { + "epoch": 0.33294904311736223, + "grad_norm": 1.0416538135601094, + "learning_rate": 1.926913837424795e-06, + "loss": 0.4861013889312744, + "step": 1444 + }, + { + "epoch": 0.3331796172469449, + "grad_norm": 1.0835107342484427, + "learning_rate": 1.9267706925451253e-06, + "loss": 0.5255436897277832, + "step": 1445 + }, + { + "epoch": 0.33341019137652755, + "grad_norm": 1.4634923921780199, + "learning_rate": 1.9266274129499434e-06, + "loss": 0.6673840880393982, + "step": 1446 + }, + { + "epoch": 0.3336407655061102, + "grad_norm": 0.9656915858584796, + "learning_rate": 1.9264839986600757e-06, + "loss": 0.38582634925842285, + "step": 1447 + }, + { + "epoch": 0.3338713396356929, + "grad_norm": 0.9567963925410773, + "learning_rate": 1.926340449696369e-06, + "loss": 0.4597562253475189, + "step": 1448 + }, + { + "epoch": 0.33410191376527554, + "grad_norm": 1.130778436617546, + "learning_rate": 1.92619676607969e-06, + "loss": 0.5901148319244385, + "step": 1449 + }, + { + "epoch": 0.3343324878948582, + "grad_norm": 1.2252206522255358, + "learning_rate": 1.9260529478309242e-06, + "loss": 0.49872028827667236, + "step": 1450 + }, + { + "epoch": 0.33456306202444086, + "grad_norm": 0.9242619738807548, + "learning_rate": 1.925908994970977e-06, + "loss": 0.4611232578754425, + "step": 1451 + }, + { + "epoch": 0.3347936361540235, + "grad_norm": 1.1122995891321772, + "learning_rate": 1.9257649075207738e-06, + "loss": 0.5671408176422119, + "step": 1452 + }, + { + "epoch": 0.3350242102836062, + "grad_norm": 1.2073453603933548, + "learning_rate": 1.925620685501259e-06, + "loss": 0.4892054498195648, + "step": 1453 + }, + { + "epoch": 0.33525478441318884, + "grad_norm": 1.1748595063207394, + "learning_rate": 1.9254763289333966e-06, + "loss": 0.5506503582000732, + "step": 1454 + }, + { + "epoch": 0.3354853585427715, + "grad_norm": 1.4352362120603241, + "learning_rate": 1.9253318378381702e-06, + "loss": 0.6233078241348267, + "step": 1455 + }, + { + "epoch": 0.33571593267235417, + "grad_norm": 1.2159230168553836, + "learning_rate": 1.9251872122365835e-06, + "loss": 0.5551373958587646, + "step": 1456 + }, + { + "epoch": 0.33594650680193683, + "grad_norm": 1.0308435059717576, + "learning_rate": 1.925042452149659e-06, + "loss": 0.5561612844467163, + "step": 1457 + }, + { + "epoch": 0.3361770809315195, + "grad_norm": 1.0286600789295617, + "learning_rate": 1.924897557598439e-06, + "loss": 0.613766074180603, + "step": 1458 + }, + { + "epoch": 0.33640765506110215, + "grad_norm": 1.092154153863493, + "learning_rate": 1.9247525286039852e-06, + "loss": 0.5767652988433838, + "step": 1459 + }, + { + "epoch": 0.3366382291906848, + "grad_norm": 1.1221153049255785, + "learning_rate": 1.9246073651873795e-06, + "loss": 0.49292564392089844, + "step": 1460 + }, + { + "epoch": 0.3368688033202675, + "grad_norm": 1.2909262812986786, + "learning_rate": 1.9244620673697224e-06, + "loss": 0.5901867151260376, + "step": 1461 + }, + { + "epoch": 0.33709937744985013, + "grad_norm": 1.1013040204716718, + "learning_rate": 1.924316635172135e-06, + "loss": 0.5543808937072754, + "step": 1462 + }, + { + "epoch": 0.3373299515794328, + "grad_norm": 1.3433064818976315, + "learning_rate": 1.9241710686157568e-06, + "loss": 0.528805136680603, + "step": 1463 + }, + { + "epoch": 0.33756052570901546, + "grad_norm": 1.2569454583762516, + "learning_rate": 1.924025367721748e-06, + "loss": 0.6396733522415161, + "step": 1464 + }, + { + "epoch": 0.3377910998385981, + "grad_norm": 0.9764691877916688, + "learning_rate": 1.9238795325112867e-06, + "loss": 0.5558862686157227, + "step": 1465 + }, + { + "epoch": 0.3380216739681808, + "grad_norm": 1.2329860923893396, + "learning_rate": 1.9237335630055724e-06, + "loss": 0.5863986015319824, + "step": 1466 + }, + { + "epoch": 0.33825224809776344, + "grad_norm": 1.0929132974739206, + "learning_rate": 1.923587459225823e-06, + "loss": 0.5636321306228638, + "step": 1467 + }, + { + "epoch": 0.3384828222273461, + "grad_norm": 1.1286586205882263, + "learning_rate": 1.923441221193276e-06, + "loss": 0.6065811514854431, + "step": 1468 + }, + { + "epoch": 0.33871339635692876, + "grad_norm": 1.4147716425908794, + "learning_rate": 1.9232948489291886e-06, + "loss": 0.580939769744873, + "step": 1469 + }, + { + "epoch": 0.3389439704865114, + "grad_norm": 1.1018333541876169, + "learning_rate": 1.9231483424548377e-06, + "loss": 0.5429994463920593, + "step": 1470 + }, + { + "epoch": 0.3391745446160941, + "grad_norm": 1.1834314239894592, + "learning_rate": 1.92300170179152e-06, + "loss": 0.5090892910957336, + "step": 1471 + }, + { + "epoch": 0.33940511874567675, + "grad_norm": 1.053685812356228, + "learning_rate": 1.9228549269605498e-06, + "loss": 0.5280312299728394, + "step": 1472 + }, + { + "epoch": 0.3396356928752594, + "grad_norm": 0.992641626439364, + "learning_rate": 1.9227080179832634e-06, + "loss": 0.5098810195922852, + "step": 1473 + }, + { + "epoch": 0.33986626700484207, + "grad_norm": 1.110706876976592, + "learning_rate": 1.922560974881015e-06, + "loss": 0.4554474353790283, + "step": 1474 + }, + { + "epoch": 0.34009684113442473, + "grad_norm": 1.042826154870894, + "learning_rate": 1.9224137976751793e-06, + "loss": 0.4492517113685608, + "step": 1475 + }, + { + "epoch": 0.3403274152640074, + "grad_norm": 1.3050966518961793, + "learning_rate": 1.9222664863871495e-06, + "loss": 0.47606343030929565, + "step": 1476 + }, + { + "epoch": 0.34055798939359005, + "grad_norm": 1.331553847580159, + "learning_rate": 1.9221190410383394e-06, + "loss": 0.5939435362815857, + "step": 1477 + }, + { + "epoch": 0.3407885635231727, + "grad_norm": 1.0156905582890146, + "learning_rate": 1.921971461650181e-06, + "loss": 0.5418350696563721, + "step": 1478 + }, + { + "epoch": 0.3410191376527554, + "grad_norm": 1.258400628812999, + "learning_rate": 1.9218237482441265e-06, + "loss": 0.5307733416557312, + "step": 1479 + }, + { + "epoch": 0.34124971178233804, + "grad_norm": 1.097634429758053, + "learning_rate": 1.9216759008416483e-06, + "loss": 0.5102016925811768, + "step": 1480 + }, + { + "epoch": 0.3414802859119207, + "grad_norm": 1.6070497683125828, + "learning_rate": 1.9215279194642366e-06, + "loss": 0.5043876767158508, + "step": 1481 + }, + { + "epoch": 0.34171086004150336, + "grad_norm": 1.0925329335071103, + "learning_rate": 1.9213798041334025e-06, + "loss": 0.5365253686904907, + "step": 1482 + }, + { + "epoch": 0.341941434171086, + "grad_norm": 1.1923005853358424, + "learning_rate": 1.921231554870676e-06, + "loss": 0.4938368797302246, + "step": 1483 + }, + { + "epoch": 0.3421720083006687, + "grad_norm": 1.0865439416616147, + "learning_rate": 1.921083171697607e-06, + "loss": 0.5274159908294678, + "step": 1484 + }, + { + "epoch": 0.34240258243025135, + "grad_norm": 1.1913792015364102, + "learning_rate": 1.9209346546357637e-06, + "loss": 0.4720276892185211, + "step": 1485 + }, + { + "epoch": 0.342633156559834, + "grad_norm": 0.9383641214181552, + "learning_rate": 1.920786003706735e-06, + "loss": 0.42276352643966675, + "step": 1486 + }, + { + "epoch": 0.34286373068941667, + "grad_norm": 1.0581324959121157, + "learning_rate": 1.920637218932129e-06, + "loss": 0.5319294333457947, + "step": 1487 + }, + { + "epoch": 0.34309430481899933, + "grad_norm": 1.1819330354237378, + "learning_rate": 1.920488300333572e-06, + "loss": 0.5197560787200928, + "step": 1488 + }, + { + "epoch": 0.343324878948582, + "grad_norm": 1.5013538667422215, + "learning_rate": 1.9203392479327127e-06, + "loss": 0.550025463104248, + "step": 1489 + }, + { + "epoch": 0.34355545307816465, + "grad_norm": 1.0981284345294107, + "learning_rate": 1.920190061751216e-06, + "loss": 0.50255286693573, + "step": 1490 + }, + { + "epoch": 0.3437860272077473, + "grad_norm": 1.1895622589876538, + "learning_rate": 1.9200407418107678e-06, + "loss": 0.5952906608581543, + "step": 1491 + }, + { + "epoch": 0.34401660133733, + "grad_norm": 0.9421522918126589, + "learning_rate": 1.9198912881330737e-06, + "loss": 0.48161056637763977, + "step": 1492 + }, + { + "epoch": 0.34424717546691264, + "grad_norm": 1.177243819966174, + "learning_rate": 1.919741700739858e-06, + "loss": 0.5490972995758057, + "step": 1493 + }, + { + "epoch": 0.3444777495964953, + "grad_norm": 1.4788962836499655, + "learning_rate": 1.9195919796528647e-06, + "loss": 0.45651519298553467, + "step": 1494 + }, + { + "epoch": 0.34470832372607796, + "grad_norm": 1.2203060266370191, + "learning_rate": 1.919442124893857e-06, + "loss": 0.5318460464477539, + "step": 1495 + }, + { + "epoch": 0.3449388978556606, + "grad_norm": 1.0748079339537138, + "learning_rate": 1.9192921364846187e-06, + "loss": 0.5052516460418701, + "step": 1496 + }, + { + "epoch": 0.3451694719852433, + "grad_norm": 1.3171544150804408, + "learning_rate": 1.9191420144469515e-06, + "loss": 0.6653434038162231, + "step": 1497 + }, + { + "epoch": 0.34540004611482594, + "grad_norm": 0.962422061512943, + "learning_rate": 1.9189917588026774e-06, + "loss": 0.47182875871658325, + "step": 1498 + }, + { + "epoch": 0.3456306202444086, + "grad_norm": 1.0305251609345925, + "learning_rate": 1.9188413695736376e-06, + "loss": 0.5257801413536072, + "step": 1499 + }, + { + "epoch": 0.34586119437399127, + "grad_norm": 1.1090254531285808, + "learning_rate": 1.918690846781692e-06, + "loss": 0.565075695514679, + "step": 1500 + }, + { + "epoch": 0.3460917685035739, + "grad_norm": 1.1909717210416553, + "learning_rate": 1.9185401904487214e-06, + "loss": 0.49737876653671265, + "step": 1501 + }, + { + "epoch": 0.34632234263315653, + "grad_norm": 1.021716441788736, + "learning_rate": 1.918389400596625e-06, + "loss": 0.5136237144470215, + "step": 1502 + }, + { + "epoch": 0.3465529167627392, + "grad_norm": 1.011829912931323, + "learning_rate": 1.9182384772473216e-06, + "loss": 0.5122819542884827, + "step": 1503 + }, + { + "epoch": 0.34678349089232186, + "grad_norm": 1.1232586653417744, + "learning_rate": 1.91808742042275e-06, + "loss": 0.4586041271686554, + "step": 1504 + }, + { + "epoch": 0.3470140650219045, + "grad_norm": 1.0599756649712084, + "learning_rate": 1.9179362301448666e-06, + "loss": 0.49752146005630493, + "step": 1505 + }, + { + "epoch": 0.3472446391514872, + "grad_norm": 1.0110535685015802, + "learning_rate": 1.917784906435649e-06, + "loss": 0.4423530101776123, + "step": 1506 + }, + { + "epoch": 0.34747521328106984, + "grad_norm": 1.2828635133632034, + "learning_rate": 1.9176334493170946e-06, + "loss": 0.4979468882083893, + "step": 1507 + }, + { + "epoch": 0.3477057874106525, + "grad_norm": 1.0086748218378025, + "learning_rate": 1.9174818588112178e-06, + "loss": 0.5229524374008179, + "step": 1508 + }, + { + "epoch": 0.34793636154023516, + "grad_norm": 1.006104946386604, + "learning_rate": 1.9173301349400546e-06, + "loss": 0.47884654998779297, + "step": 1509 + }, + { + "epoch": 0.3481669356698178, + "grad_norm": 1.161430061405767, + "learning_rate": 1.9171782777256594e-06, + "loss": 0.5204922556877136, + "step": 1510 + }, + { + "epoch": 0.3483975097994005, + "grad_norm": 1.1268415177845295, + "learning_rate": 1.917026287190106e-06, + "loss": 0.5077674984931946, + "step": 1511 + }, + { + "epoch": 0.34862808392898315, + "grad_norm": 0.9750269271228661, + "learning_rate": 1.9168741633554885e-06, + "loss": 0.4171299934387207, + "step": 1512 + }, + { + "epoch": 0.3488586580585658, + "grad_norm": 1.065613083459404, + "learning_rate": 1.9167219062439187e-06, + "loss": 0.5228694081306458, + "step": 1513 + }, + { + "epoch": 0.34908923218814847, + "grad_norm": 1.188410464922724, + "learning_rate": 1.916569515877529e-06, + "loss": 0.5496635437011719, + "step": 1514 + }, + { + "epoch": 0.34931980631773113, + "grad_norm": 0.969674279609777, + "learning_rate": 1.9164169922784716e-06, + "loss": 0.5197573900222778, + "step": 1515 + }, + { + "epoch": 0.3495503804473138, + "grad_norm": 1.3265152215611398, + "learning_rate": 1.9162643354689163e-06, + "loss": 0.5726813077926636, + "step": 1516 + }, + { + "epoch": 0.34978095457689645, + "grad_norm": 1.0368094455843846, + "learning_rate": 1.916111545471054e-06, + "loss": 0.53382408618927, + "step": 1517 + }, + { + "epoch": 0.3500115287064791, + "grad_norm": 1.0676291023728657, + "learning_rate": 1.915958622307094e-06, + "loss": 0.5535515546798706, + "step": 1518 + }, + { + "epoch": 0.3502421028360618, + "grad_norm": 1.183098293067818, + "learning_rate": 1.9158055659992648e-06, + "loss": 0.5295307040214539, + "step": 1519 + }, + { + "epoch": 0.35047267696564444, + "grad_norm": 1.3231709310936663, + "learning_rate": 1.9156523765698158e-06, + "loss": 0.5397933125495911, + "step": 1520 + }, + { + "epoch": 0.3507032510952271, + "grad_norm": 1.217082341703879, + "learning_rate": 1.915499054041014e-06, + "loss": 0.5614666938781738, + "step": 1521 + }, + { + "epoch": 0.35093382522480976, + "grad_norm": 1.155125291987811, + "learning_rate": 1.915345598435146e-06, + "loss": 0.5321720838546753, + "step": 1522 + }, + { + "epoch": 0.3511643993543924, + "grad_norm": 1.172353935810673, + "learning_rate": 1.9151920097745185e-06, + "loss": 0.51869797706604, + "step": 1523 + }, + { + "epoch": 0.3513949734839751, + "grad_norm": 1.0936179296558388, + "learning_rate": 1.9150382880814577e-06, + "loss": 0.58238685131073, + "step": 1524 + }, + { + "epoch": 0.35162554761355774, + "grad_norm": 1.135142968184709, + "learning_rate": 1.914884433378308e-06, + "loss": 0.5617767572402954, + "step": 1525 + }, + { + "epoch": 0.3518561217431404, + "grad_norm": 0.9232400306777988, + "learning_rate": 1.9147304456874336e-06, + "loss": 0.5207428932189941, + "step": 1526 + }, + { + "epoch": 0.35208669587272307, + "grad_norm": 1.0829138732821308, + "learning_rate": 1.914576325031218e-06, + "loss": 0.5929840207099915, + "step": 1527 + }, + { + "epoch": 0.3523172700023057, + "grad_norm": 1.0372438860332964, + "learning_rate": 1.914422071432065e-06, + "loss": 0.510567307472229, + "step": 1528 + }, + { + "epoch": 0.3525478441318884, + "grad_norm": 1.2529291445912578, + "learning_rate": 1.914267684912397e-06, + "loss": 0.5524177551269531, + "step": 1529 + }, + { + "epoch": 0.35277841826147105, + "grad_norm": 1.0844290023080794, + "learning_rate": 1.9141131654946548e-06, + "loss": 0.5622289180755615, + "step": 1530 + }, + { + "epoch": 0.3530089923910537, + "grad_norm": 1.1655531028574153, + "learning_rate": 1.9139585132012995e-06, + "loss": 0.5085979700088501, + "step": 1531 + }, + { + "epoch": 0.35323956652063637, + "grad_norm": 1.0367412290626608, + "learning_rate": 1.9138037280548117e-06, + "loss": 0.47232770919799805, + "step": 1532 + }, + { + "epoch": 0.35347014065021903, + "grad_norm": 1.3584148636864177, + "learning_rate": 1.913648810077691e-06, + "loss": 0.535300612449646, + "step": 1533 + }, + { + "epoch": 0.3537007147798017, + "grad_norm": 1.1457507125445123, + "learning_rate": 1.9134937592924562e-06, + "loss": 0.4351940155029297, + "step": 1534 + }, + { + "epoch": 0.35393128890938436, + "grad_norm": 0.9891980196576595, + "learning_rate": 1.9133385757216456e-06, + "loss": 0.4691917896270752, + "step": 1535 + }, + { + "epoch": 0.354161863038967, + "grad_norm": 1.03905005054118, + "learning_rate": 1.9131832593878167e-06, + "loss": 0.4911034107208252, + "step": 1536 + }, + { + "epoch": 0.3543924371685497, + "grad_norm": 0.9599946260153974, + "learning_rate": 1.9130278103135458e-06, + "loss": 0.3954068422317505, + "step": 1537 + }, + { + "epoch": 0.35462301129813234, + "grad_norm": 1.2512488183212185, + "learning_rate": 1.9128722285214297e-06, + "loss": 0.5541605949401855, + "step": 1538 + }, + { + "epoch": 0.354853585427715, + "grad_norm": 1.2362059407886639, + "learning_rate": 1.9127165140340832e-06, + "loss": 0.5719314217567444, + "step": 1539 + }, + { + "epoch": 0.35508415955729766, + "grad_norm": 1.342530930822934, + "learning_rate": 1.9125606668741418e-06, + "loss": 0.60889732837677, + "step": 1540 + }, + { + "epoch": 0.3553147336868803, + "grad_norm": 1.2098741685807175, + "learning_rate": 1.9124046870642587e-06, + "loss": 0.5247465968132019, + "step": 1541 + }, + { + "epoch": 0.355545307816463, + "grad_norm": 1.3096766952611592, + "learning_rate": 1.912248574627107e-06, + "loss": 0.5681591033935547, + "step": 1542 + }, + { + "epoch": 0.35577588194604565, + "grad_norm": 1.0008372683888578, + "learning_rate": 1.91209232958538e-06, + "loss": 0.5995845794677734, + "step": 1543 + }, + { + "epoch": 0.3560064560756283, + "grad_norm": 1.0463229098086306, + "learning_rate": 1.9119359519617893e-06, + "loss": 0.514456033706665, + "step": 1544 + }, + { + "epoch": 0.35623703020521097, + "grad_norm": 1.0680000709528683, + "learning_rate": 1.9117794417790657e-06, + "loss": 0.45192602276802063, + "step": 1545 + }, + { + "epoch": 0.35646760433479363, + "grad_norm": 1.042670075197141, + "learning_rate": 1.911622799059959e-06, + "loss": 0.5529573559761047, + "step": 1546 + }, + { + "epoch": 0.3566981784643763, + "grad_norm": 1.2129822836493795, + "learning_rate": 1.9114660238272403e-06, + "loss": 0.4544152021408081, + "step": 1547 + }, + { + "epoch": 0.35692875259395895, + "grad_norm": 1.516629148023364, + "learning_rate": 1.9113091161036974e-06, + "loss": 0.5676225423812866, + "step": 1548 + }, + { + "epoch": 0.3571593267235416, + "grad_norm": 1.1320627323756525, + "learning_rate": 1.9111520759121384e-06, + "loss": 0.5571830868721008, + "step": 1549 + }, + { + "epoch": 0.3573899008531243, + "grad_norm": 1.1377531274302592, + "learning_rate": 1.910994903275391e-06, + "loss": 0.5091487765312195, + "step": 1550 + }, + { + "epoch": 0.35762047498270694, + "grad_norm": 1.107456889270875, + "learning_rate": 1.9108375982163015e-06, + "loss": 0.5484684705734253, + "step": 1551 + }, + { + "epoch": 0.3578510491122896, + "grad_norm": 1.261905478374622, + "learning_rate": 1.9106801607577364e-06, + "loss": 0.49742424488067627, + "step": 1552 + }, + { + "epoch": 0.35808162324187226, + "grad_norm": 1.2341261046425518, + "learning_rate": 1.9105225909225804e-06, + "loss": 0.5871520638465881, + "step": 1553 + }, + { + "epoch": 0.3583121973714549, + "grad_norm": 1.2329576492287886, + "learning_rate": 1.910364888733738e-06, + "loss": 0.5096076726913452, + "step": 1554 + }, + { + "epoch": 0.3585427715010376, + "grad_norm": 1.3375416968847058, + "learning_rate": 1.910207054214133e-06, + "loss": 0.7168693542480469, + "step": 1555 + }, + { + "epoch": 0.35877334563062024, + "grad_norm": 1.126707169388949, + "learning_rate": 1.910049087386707e-06, + "loss": 0.5603561997413635, + "step": 1556 + }, + { + "epoch": 0.3590039197602029, + "grad_norm": 1.299433383477777, + "learning_rate": 1.909890988274424e-06, + "loss": 0.5857734680175781, + "step": 1557 + }, + { + "epoch": 0.35923449388978557, + "grad_norm": 1.040543925807462, + "learning_rate": 1.9097327569002642e-06, + "loss": 0.5612708926200867, + "step": 1558 + }, + { + "epoch": 0.35946506801936823, + "grad_norm": 1.146949414139332, + "learning_rate": 1.909574393287228e-06, + "loss": 0.5264564752578735, + "step": 1559 + }, + { + "epoch": 0.3596956421489509, + "grad_norm": 0.9390137754415148, + "learning_rate": 1.9094158974583357e-06, + "loss": 0.4163395166397095, + "step": 1560 + }, + { + "epoch": 0.35992621627853355, + "grad_norm": 1.0884801214343747, + "learning_rate": 1.909257269436626e-06, + "loss": 0.483236163854599, + "step": 1561 + }, + { + "epoch": 0.3601567904081162, + "grad_norm": 1.0086049535834347, + "learning_rate": 1.9090985092451572e-06, + "loss": 0.48892003297805786, + "step": 1562 + }, + { + "epoch": 0.3603873645376989, + "grad_norm": 1.0090138133688373, + "learning_rate": 1.908939616907007e-06, + "loss": 0.45310860872268677, + "step": 1563 + }, + { + "epoch": 0.36061793866728153, + "grad_norm": 1.0130833457744266, + "learning_rate": 1.908780592445271e-06, + "loss": 0.5242425799369812, + "step": 1564 + }, + { + "epoch": 0.3608485127968642, + "grad_norm": 1.0425805251353624, + "learning_rate": 1.9086214358830663e-06, + "loss": 0.47026845812797546, + "step": 1565 + }, + { + "epoch": 0.36107908692644686, + "grad_norm": 1.2209406413770176, + "learning_rate": 1.9084621472435267e-06, + "loss": 0.5783924460411072, + "step": 1566 + }, + { + "epoch": 0.3613096610560295, + "grad_norm": 1.0139793238266448, + "learning_rate": 1.9083027265498073e-06, + "loss": 0.5534437894821167, + "step": 1567 + }, + { + "epoch": 0.3615402351856122, + "grad_norm": 1.27522834837266, + "learning_rate": 1.9081431738250815e-06, + "loss": 0.49131953716278076, + "step": 1568 + }, + { + "epoch": 0.36177080931519484, + "grad_norm": 1.0466765845853998, + "learning_rate": 1.9079834890925412e-06, + "loss": 0.4798020124435425, + "step": 1569 + }, + { + "epoch": 0.3620013834447775, + "grad_norm": 1.1201181573638213, + "learning_rate": 1.9078236723753987e-06, + "loss": 0.4928893446922302, + "step": 1570 + }, + { + "epoch": 0.36223195757436016, + "grad_norm": 0.884047440430311, + "learning_rate": 1.9076637236968847e-06, + "loss": 0.4483630657196045, + "step": 1571 + }, + { + "epoch": 0.3624625317039428, + "grad_norm": 1.0983581542959335, + "learning_rate": 1.90750364308025e-06, + "loss": 0.593490481376648, + "step": 1572 + }, + { + "epoch": 0.3626931058335255, + "grad_norm": 1.1430514811975505, + "learning_rate": 1.9073434305487631e-06, + "loss": 0.5944634675979614, + "step": 1573 + }, + { + "epoch": 0.36292367996310815, + "grad_norm": 1.003698560447405, + "learning_rate": 1.9071830861257134e-06, + "loss": 0.5010452270507812, + "step": 1574 + }, + { + "epoch": 0.3631542540926908, + "grad_norm": 1.0687566975761509, + "learning_rate": 1.9070226098344078e-06, + "loss": 0.5128473043441772, + "step": 1575 + }, + { + "epoch": 0.36338482822227347, + "grad_norm": 1.0854169038402666, + "learning_rate": 1.9068620016981733e-06, + "loss": 0.6256363987922668, + "step": 1576 + }, + { + "epoch": 0.36361540235185613, + "grad_norm": 1.0796360454107574, + "learning_rate": 1.9067012617403565e-06, + "loss": 0.5502322912216187, + "step": 1577 + }, + { + "epoch": 0.3638459764814388, + "grad_norm": 1.2842731628323776, + "learning_rate": 1.906540389984322e-06, + "loss": 0.5756800174713135, + "step": 1578 + }, + { + "epoch": 0.36407655061102145, + "grad_norm": 1.135643566986845, + "learning_rate": 1.9063793864534543e-06, + "loss": 0.5131359696388245, + "step": 1579 + }, + { + "epoch": 0.3643071247406041, + "grad_norm": 0.9714084254330834, + "learning_rate": 1.9062182511711567e-06, + "loss": 0.5776810646057129, + "step": 1580 + }, + { + "epoch": 0.3645376988701868, + "grad_norm": 1.0973639487789169, + "learning_rate": 1.9060569841608523e-06, + "loss": 0.49460822343826294, + "step": 1581 + }, + { + "epoch": 0.36476827299976944, + "grad_norm": 0.942012419923591, + "learning_rate": 1.9058955854459823e-06, + "loss": 0.5031022429466248, + "step": 1582 + }, + { + "epoch": 0.3649988471293521, + "grad_norm": 1.2106661637014209, + "learning_rate": 1.9057340550500082e-06, + "loss": 0.4957816004753113, + "step": 1583 + }, + { + "epoch": 0.36522942125893476, + "grad_norm": 0.9363710565312526, + "learning_rate": 1.9055723929964102e-06, + "loss": 0.47861093282699585, + "step": 1584 + }, + { + "epoch": 0.3654599953885174, + "grad_norm": 1.027272725701274, + "learning_rate": 1.9054105993086868e-06, + "loss": 0.44517919421195984, + "step": 1585 + }, + { + "epoch": 0.3656905695181001, + "grad_norm": 1.1724343492985738, + "learning_rate": 1.9052486740103568e-06, + "loss": 0.46661484241485596, + "step": 1586 + }, + { + "epoch": 0.36592114364768275, + "grad_norm": 0.9788001147307338, + "learning_rate": 1.9050866171249575e-06, + "loss": 0.517694890499115, + "step": 1587 + }, + { + "epoch": 0.3661517177772654, + "grad_norm": 1.1284193922698917, + "learning_rate": 1.904924428676046e-06, + "loss": 0.49465644359588623, + "step": 1588 + }, + { + "epoch": 0.36638229190684807, + "grad_norm": 1.0036913999315975, + "learning_rate": 1.9047621086871971e-06, + "loss": 0.41830652952194214, + "step": 1589 + }, + { + "epoch": 0.36661286603643073, + "grad_norm": 1.1944977036427056, + "learning_rate": 1.9045996571820067e-06, + "loss": 0.5540663003921509, + "step": 1590 + }, + { + "epoch": 0.3668434401660134, + "grad_norm": 1.072580109375711, + "learning_rate": 1.9044370741840882e-06, + "loss": 0.5619527101516724, + "step": 1591 + }, + { + "epoch": 0.36707401429559605, + "grad_norm": 1.1509533440805209, + "learning_rate": 1.9042743597170746e-06, + "loss": 0.5086055994033813, + "step": 1592 + }, + { + "epoch": 0.3673045884251787, + "grad_norm": 1.050425223739088, + "learning_rate": 1.9041115138046183e-06, + "loss": 0.5839927196502686, + "step": 1593 + }, + { + "epoch": 0.3675351625547614, + "grad_norm": 1.0464789939377692, + "learning_rate": 1.9039485364703904e-06, + "loss": 0.508616030216217, + "step": 1594 + }, + { + "epoch": 0.36776573668434404, + "grad_norm": 1.15877506638183, + "learning_rate": 1.903785427738082e-06, + "loss": 0.46514832973480225, + "step": 1595 + }, + { + "epoch": 0.3679963108139267, + "grad_norm": 1.525284603977575, + "learning_rate": 1.9036221876314016e-06, + "loss": 0.42142176628112793, + "step": 1596 + }, + { + "epoch": 0.36822688494350936, + "grad_norm": 1.3114380851226077, + "learning_rate": 1.9034588161740786e-06, + "loss": 0.42195791006088257, + "step": 1597 + }, + { + "epoch": 0.368457459073092, + "grad_norm": 1.0276642661247686, + "learning_rate": 1.9032953133898601e-06, + "loss": 0.46705931425094604, + "step": 1598 + }, + { + "epoch": 0.3686880332026747, + "grad_norm": 1.1002100436754347, + "learning_rate": 1.9031316793025134e-06, + "loss": 0.4741164743900299, + "step": 1599 + }, + { + "epoch": 0.36891860733225734, + "grad_norm": 1.269728601723268, + "learning_rate": 1.902967913935824e-06, + "loss": 0.49730339646339417, + "step": 1600 + }, + { + "epoch": 0.36914918146184, + "grad_norm": 0.9594474153361355, + "learning_rate": 1.902804017313597e-06, + "loss": 0.47678127884864807, + "step": 1601 + }, + { + "epoch": 0.36937975559142266, + "grad_norm": 1.1964394586929104, + "learning_rate": 1.9026399894596565e-06, + "loss": 0.4954279661178589, + "step": 1602 + }, + { + "epoch": 0.3696103297210053, + "grad_norm": 0.9685506818723637, + "learning_rate": 1.9024758303978456e-06, + "loss": 0.5115381479263306, + "step": 1603 + }, + { + "epoch": 0.369840903850588, + "grad_norm": 1.0632901548704432, + "learning_rate": 1.9023115401520264e-06, + "loss": 0.6147117614746094, + "step": 1604 + }, + { + "epoch": 0.37007147798017065, + "grad_norm": 1.4566806194426465, + "learning_rate": 1.9021471187460802e-06, + "loss": 0.5334371328353882, + "step": 1605 + }, + { + "epoch": 0.3703020521097533, + "grad_norm": 1.2820059739478686, + "learning_rate": 1.9019825662039073e-06, + "loss": 0.4702361226081848, + "step": 1606 + }, + { + "epoch": 0.37053262623933597, + "grad_norm": 1.1889012346736458, + "learning_rate": 1.901817882549427e-06, + "loss": 0.5049586892127991, + "step": 1607 + }, + { + "epoch": 0.37076320036891863, + "grad_norm": 1.2055092488358514, + "learning_rate": 1.901653067806578e-06, + "loss": 0.5063170194625854, + "step": 1608 + }, + { + "epoch": 0.3709937744985013, + "grad_norm": 1.1599393359430212, + "learning_rate": 1.9014881219993175e-06, + "loss": 0.540824294090271, + "step": 1609 + }, + { + "epoch": 0.37122434862808396, + "grad_norm": 1.372148291928607, + "learning_rate": 1.901323045151622e-06, + "loss": 0.4744170904159546, + "step": 1610 + }, + { + "epoch": 0.3714549227576666, + "grad_norm": 1.2144026597364277, + "learning_rate": 1.9011578372874876e-06, + "loss": 0.5090929269790649, + "step": 1611 + }, + { + "epoch": 0.3716854968872493, + "grad_norm": 1.0610635938586983, + "learning_rate": 1.9009924984309284e-06, + "loss": 0.3886772394180298, + "step": 1612 + }, + { + "epoch": 0.3719160710168319, + "grad_norm": 1.1192663585328575, + "learning_rate": 1.9008270286059782e-06, + "loss": 0.4976482391357422, + "step": 1613 + }, + { + "epoch": 0.37214664514641455, + "grad_norm": 1.0577168176218985, + "learning_rate": 1.9006614278366898e-06, + "loss": 0.4629209041595459, + "step": 1614 + }, + { + "epoch": 0.3723772192759972, + "grad_norm": 1.0381238100092287, + "learning_rate": 1.9004956961471352e-06, + "loss": 0.49334412813186646, + "step": 1615 + }, + { + "epoch": 0.37260779340557987, + "grad_norm": 1.2336018114177745, + "learning_rate": 1.9003298335614047e-06, + "loss": 0.614592432975769, + "step": 1616 + }, + { + "epoch": 0.37283836753516253, + "grad_norm": 0.9895019344615126, + "learning_rate": 1.9001638401036082e-06, + "loss": 0.5339843034744263, + "step": 1617 + }, + { + "epoch": 0.3730689416647452, + "grad_norm": 0.9743667038154072, + "learning_rate": 1.8999977157978749e-06, + "loss": 0.5516937375068665, + "step": 1618 + }, + { + "epoch": 0.37329951579432785, + "grad_norm": 1.2149293301312265, + "learning_rate": 1.8998314606683522e-06, + "loss": 0.5034124255180359, + "step": 1619 + }, + { + "epoch": 0.3735300899239105, + "grad_norm": 0.9412969527830801, + "learning_rate": 1.8996650747392073e-06, + "loss": 0.49766790866851807, + "step": 1620 + }, + { + "epoch": 0.3737606640534932, + "grad_norm": 1.1063112007683722, + "learning_rate": 1.899498558034626e-06, + "loss": 0.6662446856498718, + "step": 1621 + }, + { + "epoch": 0.37399123818307584, + "grad_norm": 1.3692241861945424, + "learning_rate": 1.8993319105788129e-06, + "loss": 0.5416747331619263, + "step": 1622 + }, + { + "epoch": 0.3742218123126585, + "grad_norm": 1.2377768970666951, + "learning_rate": 1.8991651323959922e-06, + "loss": 0.5137313604354858, + "step": 1623 + }, + { + "epoch": 0.37445238644224116, + "grad_norm": 1.0509326993065755, + "learning_rate": 1.8989982235104072e-06, + "loss": 0.566002607345581, + "step": 1624 + }, + { + "epoch": 0.3746829605718238, + "grad_norm": 1.314391237074608, + "learning_rate": 1.8988311839463188e-06, + "loss": 0.5201380252838135, + "step": 1625 + }, + { + "epoch": 0.3749135347014065, + "grad_norm": 1.2844709164103703, + "learning_rate": 1.8986640137280087e-06, + "loss": 0.5103918313980103, + "step": 1626 + }, + { + "epoch": 0.37514410883098914, + "grad_norm": 1.081063959726764, + "learning_rate": 1.8984967128797763e-06, + "loss": 0.47900843620300293, + "step": 1627 + }, + { + "epoch": 0.3753746829605718, + "grad_norm": 1.0524739811683044, + "learning_rate": 1.898329281425941e-06, + "loss": 0.42991960048675537, + "step": 1628 + }, + { + "epoch": 0.37560525709015447, + "grad_norm": 1.2087969734027784, + "learning_rate": 1.89816171939084e-06, + "loss": 0.5707317590713501, + "step": 1629 + }, + { + "epoch": 0.3758358312197371, + "grad_norm": 1.0714171850017424, + "learning_rate": 1.8979940267988309e-06, + "loss": 0.565521240234375, + "step": 1630 + }, + { + "epoch": 0.3760664053493198, + "grad_norm": 1.2721353238917528, + "learning_rate": 1.8978262036742888e-06, + "loss": 0.6584400534629822, + "step": 1631 + }, + { + "epoch": 0.37629697947890245, + "grad_norm": 1.1181726564305359, + "learning_rate": 1.897658250041609e-06, + "loss": 0.4749317169189453, + "step": 1632 + }, + { + "epoch": 0.3765275536084851, + "grad_norm": 1.3732616000652873, + "learning_rate": 1.8974901659252048e-06, + "loss": 0.5495604872703552, + "step": 1633 + }, + { + "epoch": 0.37675812773806777, + "grad_norm": 1.6408199477459455, + "learning_rate": 1.8973219513495094e-06, + "loss": 0.465708464384079, + "step": 1634 + }, + { + "epoch": 0.37698870186765043, + "grad_norm": 1.1887777428919946, + "learning_rate": 1.8971536063389742e-06, + "loss": 0.4599069058895111, + "step": 1635 + }, + { + "epoch": 0.3772192759972331, + "grad_norm": 1.1348638946303797, + "learning_rate": 1.89698513091807e-06, + "loss": 0.4716145694255829, + "step": 1636 + }, + { + "epoch": 0.37744985012681576, + "grad_norm": 0.990973234996169, + "learning_rate": 1.8968165251112863e-06, + "loss": 0.594079852104187, + "step": 1637 + }, + { + "epoch": 0.3776804242563984, + "grad_norm": 1.3300173886007076, + "learning_rate": 1.8966477889431317e-06, + "loss": 0.4588915705680847, + "step": 1638 + }, + { + "epoch": 0.3779109983859811, + "grad_norm": 1.5111913527277292, + "learning_rate": 1.8964789224381337e-06, + "loss": 0.5236901044845581, + "step": 1639 + }, + { + "epoch": 0.37814157251556374, + "grad_norm": 1.067104402214014, + "learning_rate": 1.8963099256208388e-06, + "loss": 0.4954737424850464, + "step": 1640 + }, + { + "epoch": 0.3783721466451464, + "grad_norm": 1.066408318154628, + "learning_rate": 1.8961407985158125e-06, + "loss": 0.4194701910018921, + "step": 1641 + }, + { + "epoch": 0.37860272077472906, + "grad_norm": 0.9999478144515371, + "learning_rate": 1.8959715411476388e-06, + "loss": 0.5368303060531616, + "step": 1642 + }, + { + "epoch": 0.3788332949043117, + "grad_norm": 1.2178837934755509, + "learning_rate": 1.8958021535409214e-06, + "loss": 0.5181677341461182, + "step": 1643 + }, + { + "epoch": 0.3790638690338944, + "grad_norm": 1.0342390187480546, + "learning_rate": 1.8956326357202821e-06, + "loss": 0.4755169749259949, + "step": 1644 + }, + { + "epoch": 0.37929444316347705, + "grad_norm": 1.1097461588236448, + "learning_rate": 1.8954629877103625e-06, + "loss": 0.5460895299911499, + "step": 1645 + }, + { + "epoch": 0.3795250172930597, + "grad_norm": 1.090972908814234, + "learning_rate": 1.8952932095358224e-06, + "loss": 0.47811684012413025, + "step": 1646 + }, + { + "epoch": 0.37975559142264237, + "grad_norm": 1.1794844360929688, + "learning_rate": 1.8951233012213405e-06, + "loss": 0.5791733860969543, + "step": 1647 + }, + { + "epoch": 0.37998616555222503, + "grad_norm": 1.1163036430533217, + "learning_rate": 1.8949532627916151e-06, + "loss": 0.4996911585330963, + "step": 1648 + }, + { + "epoch": 0.3802167396818077, + "grad_norm": 1.3190959058791496, + "learning_rate": 1.8947830942713628e-06, + "loss": 0.6108353137969971, + "step": 1649 + }, + { + "epoch": 0.38044731381139035, + "grad_norm": 1.2084081721604487, + "learning_rate": 1.8946127956853195e-06, + "loss": 0.5303040742874146, + "step": 1650 + }, + { + "epoch": 0.380677887940973, + "grad_norm": 1.0581391679258725, + "learning_rate": 1.8944423670582397e-06, + "loss": 0.4651896357536316, + "step": 1651 + }, + { + "epoch": 0.3809084620705557, + "grad_norm": 1.1464415021916683, + "learning_rate": 1.8942718084148969e-06, + "loss": 0.6321637630462646, + "step": 1652 + }, + { + "epoch": 0.38113903620013834, + "grad_norm": 1.1535120052175352, + "learning_rate": 1.8941011197800836e-06, + "loss": 0.5124787092208862, + "step": 1653 + }, + { + "epoch": 0.381369610329721, + "grad_norm": 1.2712538370269149, + "learning_rate": 1.893930301178611e-06, + "loss": 0.5779180526733398, + "step": 1654 + }, + { + "epoch": 0.38160018445930366, + "grad_norm": 1.2579128550158534, + "learning_rate": 1.8937593526353096e-06, + "loss": 0.5723867416381836, + "step": 1655 + }, + { + "epoch": 0.3818307585888863, + "grad_norm": 1.0216965854263103, + "learning_rate": 1.8935882741750281e-06, + "loss": 0.4312398433685303, + "step": 1656 + }, + { + "epoch": 0.382061332718469, + "grad_norm": 1.7195703110538068, + "learning_rate": 1.893417065822635e-06, + "loss": 0.6503756046295166, + "step": 1657 + }, + { + "epoch": 0.38229190684805164, + "grad_norm": 1.2691180997694498, + "learning_rate": 1.8932457276030166e-06, + "loss": 0.508478045463562, + "step": 1658 + }, + { + "epoch": 0.3825224809776343, + "grad_norm": 0.9328619594784499, + "learning_rate": 1.8930742595410792e-06, + "loss": 0.46552446484565735, + "step": 1659 + }, + { + "epoch": 0.38275305510721697, + "grad_norm": 0.983497277362264, + "learning_rate": 1.8929026616617467e-06, + "loss": 0.4739278256893158, + "step": 1660 + }, + { + "epoch": 0.3829836292367996, + "grad_norm": 1.2642164913655083, + "learning_rate": 1.8927309339899634e-06, + "loss": 0.5584233403205872, + "step": 1661 + }, + { + "epoch": 0.3832142033663823, + "grad_norm": 1.0681648876128738, + "learning_rate": 1.8925590765506911e-06, + "loss": 0.6155074238777161, + "step": 1662 + }, + { + "epoch": 0.38344477749596495, + "grad_norm": 1.1479148469369402, + "learning_rate": 1.8923870893689112e-06, + "loss": 0.5253106951713562, + "step": 1663 + }, + { + "epoch": 0.3836753516255476, + "grad_norm": 1.2179992400932398, + "learning_rate": 1.8922149724696238e-06, + "loss": 0.4190565347671509, + "step": 1664 + }, + { + "epoch": 0.3839059257551303, + "grad_norm": 1.124098215736467, + "learning_rate": 1.892042725877848e-06, + "loss": 0.5263853073120117, + "step": 1665 + }, + { + "epoch": 0.38413649988471293, + "grad_norm": 1.0385777204325046, + "learning_rate": 1.8918703496186214e-06, + "loss": 0.4492432773113251, + "step": 1666 + }, + { + "epoch": 0.3843670740142956, + "grad_norm": 1.3356308613758272, + "learning_rate": 1.8916978437170004e-06, + "loss": 0.49745023250579834, + "step": 1667 + }, + { + "epoch": 0.38459764814387826, + "grad_norm": 1.2023114319635457, + "learning_rate": 1.891525208198061e-06, + "loss": 0.6003707647323608, + "step": 1668 + }, + { + "epoch": 0.3848282222734609, + "grad_norm": 1.6371184982518272, + "learning_rate": 1.8913524430868973e-06, + "loss": 0.5430049300193787, + "step": 1669 + }, + { + "epoch": 0.3850587964030436, + "grad_norm": 1.0715049923324578, + "learning_rate": 1.8911795484086222e-06, + "loss": 0.5561289191246033, + "step": 1670 + }, + { + "epoch": 0.38528937053262624, + "grad_norm": 1.1416350409171048, + "learning_rate": 1.8910065241883678e-06, + "loss": 0.5488184690475464, + "step": 1671 + }, + { + "epoch": 0.3855199446622089, + "grad_norm": 1.0082475661815067, + "learning_rate": 1.890833370451285e-06, + "loss": 0.46347010135650635, + "step": 1672 + }, + { + "epoch": 0.38575051879179156, + "grad_norm": 1.0668592703569681, + "learning_rate": 1.8906600872225438e-06, + "loss": 0.553687334060669, + "step": 1673 + }, + { + "epoch": 0.3859810929213742, + "grad_norm": 1.1035800532005071, + "learning_rate": 1.8904866745273323e-06, + "loss": 0.46162208914756775, + "step": 1674 + }, + { + "epoch": 0.3862116670509569, + "grad_norm": 1.076914158561248, + "learning_rate": 1.8903131323908576e-06, + "loss": 0.4478996992111206, + "step": 1675 + }, + { + "epoch": 0.38644224118053955, + "grad_norm": 1.1488135535707533, + "learning_rate": 1.8901394608383463e-06, + "loss": 0.5857031345367432, + "step": 1676 + }, + { + "epoch": 0.3866728153101222, + "grad_norm": 1.5929334393746841, + "learning_rate": 1.8899656598950432e-06, + "loss": 0.592833399772644, + "step": 1677 + }, + { + "epoch": 0.38690338943970487, + "grad_norm": 1.0232228390237461, + "learning_rate": 1.8897917295862117e-06, + "loss": 0.6007786989212036, + "step": 1678 + }, + { + "epoch": 0.38713396356928753, + "grad_norm": 1.109869111259485, + "learning_rate": 1.8896176699371343e-06, + "loss": 0.5248164534568787, + "step": 1679 + }, + { + "epoch": 0.3873645376988702, + "grad_norm": 0.856016560201164, + "learning_rate": 1.8894434809731128e-06, + "loss": 0.43112409114837646, + "step": 1680 + }, + { + "epoch": 0.38759511182845285, + "grad_norm": 1.318795823918729, + "learning_rate": 1.8892691627194673e-06, + "loss": 0.56545090675354, + "step": 1681 + }, + { + "epoch": 0.3878256859580355, + "grad_norm": 1.1470159881146635, + "learning_rate": 1.8890947152015363e-06, + "loss": 0.6287904977798462, + "step": 1682 + }, + { + "epoch": 0.3880562600876182, + "grad_norm": 1.155806897456587, + "learning_rate": 1.8889201384446775e-06, + "loss": 0.48461633920669556, + "step": 1683 + }, + { + "epoch": 0.38828683421720084, + "grad_norm": 1.2251476021613918, + "learning_rate": 1.888745432474268e-06, + "loss": 0.5089331865310669, + "step": 1684 + }, + { + "epoch": 0.3885174083467835, + "grad_norm": 0.9661641286318025, + "learning_rate": 1.8885705973157027e-06, + "loss": 0.4805281162261963, + "step": 1685 + }, + { + "epoch": 0.38874798247636616, + "grad_norm": 1.070887780603473, + "learning_rate": 1.8883956329943955e-06, + "loss": 0.5243096947669983, + "step": 1686 + }, + { + "epoch": 0.3889785566059488, + "grad_norm": 1.240979728566986, + "learning_rate": 1.8882205395357795e-06, + "loss": 0.5808781981468201, + "step": 1687 + }, + { + "epoch": 0.3892091307355315, + "grad_norm": 1.2574299318006046, + "learning_rate": 1.8880453169653063e-06, + "loss": 0.5397018194198608, + "step": 1688 + }, + { + "epoch": 0.38943970486511414, + "grad_norm": 1.182945649827907, + "learning_rate": 1.8878699653084462e-06, + "loss": 0.4475638270378113, + "step": 1689 + }, + { + "epoch": 0.3896702789946968, + "grad_norm": 1.3095447574792232, + "learning_rate": 1.8876944845906884e-06, + "loss": 0.6212958693504333, + "step": 1690 + }, + { + "epoch": 0.38990085312427947, + "grad_norm": 1.1726349359481907, + "learning_rate": 1.8875188748375407e-06, + "loss": 0.44465404748916626, + "step": 1691 + }, + { + "epoch": 0.39013142725386213, + "grad_norm": 1.2650698772045321, + "learning_rate": 1.8873431360745297e-06, + "loss": 0.5711641311645508, + "step": 1692 + }, + { + "epoch": 0.3903620013834448, + "grad_norm": 1.2039233000565408, + "learning_rate": 1.8871672683272012e-06, + "loss": 0.4527866244316101, + "step": 1693 + }, + { + "epoch": 0.39059257551302745, + "grad_norm": 1.515756125658867, + "learning_rate": 1.8869912716211188e-06, + "loss": 0.6242899894714355, + "step": 1694 + }, + { + "epoch": 0.3908231496426101, + "grad_norm": 1.6198907712835393, + "learning_rate": 1.8868151459818656e-06, + "loss": 0.6294416189193726, + "step": 1695 + }, + { + "epoch": 0.3910537237721928, + "grad_norm": 1.2238875456694314, + "learning_rate": 1.8866388914350435e-06, + "loss": 0.49869638681411743, + "step": 1696 + }, + { + "epoch": 0.39128429790177544, + "grad_norm": 1.1755814842525432, + "learning_rate": 1.886462508006273e-06, + "loss": 0.5456752777099609, + "step": 1697 + }, + { + "epoch": 0.3915148720313581, + "grad_norm": 1.0114016306766007, + "learning_rate": 1.8862859957211926e-06, + "loss": 0.4197172224521637, + "step": 1698 + }, + { + "epoch": 0.39174544616094076, + "grad_norm": 1.0278658872450297, + "learning_rate": 1.8861093546054603e-06, + "loss": 0.5012276768684387, + "step": 1699 + }, + { + "epoch": 0.3919760202905234, + "grad_norm": 1.2065880303446173, + "learning_rate": 1.8859325846847531e-06, + "loss": 0.48108845949172974, + "step": 1700 + }, + { + "epoch": 0.3922065944201061, + "grad_norm": 1.1190986847477769, + "learning_rate": 1.885755685984766e-06, + "loss": 0.48592355847358704, + "step": 1701 + }, + { + "epoch": 0.39243716854968874, + "grad_norm": 1.136053467553038, + "learning_rate": 1.8855786585312132e-06, + "loss": 0.5744791030883789, + "step": 1702 + }, + { + "epoch": 0.3926677426792714, + "grad_norm": 1.1435558229801501, + "learning_rate": 1.8854015023498273e-06, + "loss": 0.5378769040107727, + "step": 1703 + }, + { + "epoch": 0.39289831680885406, + "grad_norm": 1.0710678493453967, + "learning_rate": 1.8852242174663594e-06, + "loss": 0.5630123615264893, + "step": 1704 + }, + { + "epoch": 0.3931288909384367, + "grad_norm": 1.0913466409725974, + "learning_rate": 1.8850468039065806e-06, + "loss": 0.5247849225997925, + "step": 1705 + }, + { + "epoch": 0.3933594650680194, + "grad_norm": 1.282307381217427, + "learning_rate": 1.884869261696279e-06, + "loss": 0.5679286122322083, + "step": 1706 + }, + { + "epoch": 0.39359003919760205, + "grad_norm": 1.0140902583392881, + "learning_rate": 1.8846915908612622e-06, + "loss": 0.4505179524421692, + "step": 1707 + }, + { + "epoch": 0.3938206133271847, + "grad_norm": 1.233342858229108, + "learning_rate": 1.8845137914273566e-06, + "loss": 0.6077077388763428, + "step": 1708 + }, + { + "epoch": 0.39405118745676737, + "grad_norm": 1.1523756442286543, + "learning_rate": 1.8843358634204069e-06, + "loss": 0.4703037738800049, + "step": 1709 + }, + { + "epoch": 0.39428176158635003, + "grad_norm": 1.3467147447696661, + "learning_rate": 1.8841578068662773e-06, + "loss": 0.6085091829299927, + "step": 1710 + }, + { + "epoch": 0.3945123357159327, + "grad_norm": 1.3769264461225226, + "learning_rate": 1.8839796217908498e-06, + "loss": 0.6075730919837952, + "step": 1711 + }, + { + "epoch": 0.39474290984551536, + "grad_norm": 1.4068518720273175, + "learning_rate": 1.8838013082200252e-06, + "loss": 0.581851601600647, + "step": 1712 + }, + { + "epoch": 0.394973483975098, + "grad_norm": 0.9365976129961602, + "learning_rate": 1.8836228661797234e-06, + "loss": 0.555284857749939, + "step": 1713 + }, + { + "epoch": 0.3952040581046807, + "grad_norm": 1.205134330479215, + "learning_rate": 1.8834442956958832e-06, + "loss": 0.5342675447463989, + "step": 1714 + }, + { + "epoch": 0.39543463223426334, + "grad_norm": 1.2329889286532099, + "learning_rate": 1.8832655967944605e-06, + "loss": 0.47501081228256226, + "step": 1715 + }, + { + "epoch": 0.395665206363846, + "grad_norm": 1.1350943426800137, + "learning_rate": 1.8830867695014323e-06, + "loss": 0.592293918132782, + "step": 1716 + }, + { + "epoch": 0.39589578049342866, + "grad_norm": 1.2591938264724012, + "learning_rate": 1.8829078138427921e-06, + "loss": 0.5903242826461792, + "step": 1717 + }, + { + "epoch": 0.3961263546230113, + "grad_norm": 1.203385992389072, + "learning_rate": 1.882728729844553e-06, + "loss": 0.5292568206787109, + "step": 1718 + }, + { + "epoch": 0.396356928752594, + "grad_norm": 1.070652075724697, + "learning_rate": 1.8825495175327468e-06, + "loss": 0.5748786926269531, + "step": 1719 + }, + { + "epoch": 0.39658750288217665, + "grad_norm": 1.230421737483, + "learning_rate": 1.8823701769334242e-06, + "loss": 0.6191601753234863, + "step": 1720 + }, + { + "epoch": 0.3968180770117593, + "grad_norm": 1.180452919869617, + "learning_rate": 1.8821907080726535e-06, + "loss": 0.5569231510162354, + "step": 1721 + }, + { + "epoch": 0.39704865114134197, + "grad_norm": 1.291275382361216, + "learning_rate": 1.882011110976523e-06, + "loss": 0.5103349089622498, + "step": 1722 + }, + { + "epoch": 0.39727922527092463, + "grad_norm": 1.1952555855906501, + "learning_rate": 1.8818313856711382e-06, + "loss": 0.4981175363063812, + "step": 1723 + }, + { + "epoch": 0.39750979940050724, + "grad_norm": 1.5157833486690673, + "learning_rate": 1.8816515321826248e-06, + "loss": 0.5429514050483704, + "step": 1724 + }, + { + "epoch": 0.3977403735300899, + "grad_norm": 1.1377768164918185, + "learning_rate": 1.8814715505371254e-06, + "loss": 0.5318386554718018, + "step": 1725 + }, + { + "epoch": 0.39797094765967256, + "grad_norm": 1.0451576127270763, + "learning_rate": 1.881291440760803e-06, + "loss": 0.47451460361480713, + "step": 1726 + }, + { + "epoch": 0.3982015217892552, + "grad_norm": 1.2815255131055066, + "learning_rate": 1.8811112028798384e-06, + "loss": 0.5141372680664062, + "step": 1727 + }, + { + "epoch": 0.3984320959188379, + "grad_norm": 1.0864089006893662, + "learning_rate": 1.8809308369204302e-06, + "loss": 0.4950217008590698, + "step": 1728 + }, + { + "epoch": 0.39866267004842054, + "grad_norm": 0.9530925154379366, + "learning_rate": 1.880750342908797e-06, + "loss": 0.4961693286895752, + "step": 1729 + }, + { + "epoch": 0.3988932441780032, + "grad_norm": 1.1860643451162984, + "learning_rate": 1.8805697208711752e-06, + "loss": 0.43443650007247925, + "step": 1730 + }, + { + "epoch": 0.39912381830758586, + "grad_norm": 1.1332453377909741, + "learning_rate": 1.8803889708338203e-06, + "loss": 0.6116896867752075, + "step": 1731 + }, + { + "epoch": 0.3993543924371685, + "grad_norm": 0.9403622624868753, + "learning_rate": 1.8802080928230062e-06, + "loss": 0.46244728565216064, + "step": 1732 + }, + { + "epoch": 0.3995849665667512, + "grad_norm": 1.3180964068285155, + "learning_rate": 1.880027086865025e-06, + "loss": 0.5728162527084351, + "step": 1733 + }, + { + "epoch": 0.39981554069633385, + "grad_norm": 1.1310284579414278, + "learning_rate": 1.8798459529861876e-06, + "loss": 0.4472135901451111, + "step": 1734 + }, + { + "epoch": 0.4000461148259165, + "grad_norm": 1.4100215542732757, + "learning_rate": 1.8796646912128246e-06, + "loss": 0.5862090587615967, + "step": 1735 + }, + { + "epoch": 0.40027668895549917, + "grad_norm": 1.428537555998266, + "learning_rate": 1.8794833015712831e-06, + "loss": 0.6406301259994507, + "step": 1736 + }, + { + "epoch": 0.40050726308508183, + "grad_norm": 1.3320783455965834, + "learning_rate": 1.8793017840879306e-06, + "loss": 0.5865743160247803, + "step": 1737 + }, + { + "epoch": 0.4007378372146645, + "grad_norm": 1.2736301947050057, + "learning_rate": 1.8791201387891524e-06, + "loss": 0.5521814823150635, + "step": 1738 + }, + { + "epoch": 0.40096841134424716, + "grad_norm": 0.9710129928143749, + "learning_rate": 1.8789383657013522e-06, + "loss": 0.40027791261672974, + "step": 1739 + }, + { + "epoch": 0.4011989854738298, + "grad_norm": 1.213730124395359, + "learning_rate": 1.8787564648509528e-06, + "loss": 0.5594751238822937, + "step": 1740 + }, + { + "epoch": 0.4014295596034125, + "grad_norm": 1.2077878384788876, + "learning_rate": 1.8785744362643955e-06, + "loss": 0.5029730796813965, + "step": 1741 + }, + { + "epoch": 0.40166013373299514, + "grad_norm": 1.086599940670418, + "learning_rate": 1.8783922799681397e-06, + "loss": 0.6089034676551819, + "step": 1742 + }, + { + "epoch": 0.4018907078625778, + "grad_norm": 1.178028157014987, + "learning_rate": 1.8782099959886639e-06, + "loss": 0.5238372683525085, + "step": 1743 + }, + { + "epoch": 0.40212128199216046, + "grad_norm": 1.0430681899893623, + "learning_rate": 1.8780275843524643e-06, + "loss": 0.47281232476234436, + "step": 1744 + }, + { + "epoch": 0.4023518561217431, + "grad_norm": 1.0603667709126336, + "learning_rate": 1.8778450450860571e-06, + "loss": 0.44885876774787903, + "step": 1745 + }, + { + "epoch": 0.4025824302513258, + "grad_norm": 1.1187549409367323, + "learning_rate": 1.8776623782159762e-06, + "loss": 0.5915139317512512, + "step": 1746 + }, + { + "epoch": 0.40281300438090845, + "grad_norm": 1.6743224234561098, + "learning_rate": 1.8774795837687736e-06, + "loss": 0.49341484904289246, + "step": 1747 + }, + { + "epoch": 0.4030435785104911, + "grad_norm": 1.1133076324661322, + "learning_rate": 1.8772966617710205e-06, + "loss": 0.43253493309020996, + "step": 1748 + }, + { + "epoch": 0.40327415264007377, + "grad_norm": 1.2596810310862556, + "learning_rate": 1.8771136122493064e-06, + "loss": 0.48660045862197876, + "step": 1749 + }, + { + "epoch": 0.40350472676965643, + "grad_norm": 1.158836920018239, + "learning_rate": 1.8769304352302396e-06, + "loss": 0.4493838846683502, + "step": 1750 + }, + { + "epoch": 0.4037353008992391, + "grad_norm": 1.1033409495303377, + "learning_rate": 1.8767471307404464e-06, + "loss": 0.5656435489654541, + "step": 1751 + }, + { + "epoch": 0.40396587502882175, + "grad_norm": 1.1945430976561655, + "learning_rate": 1.876563698806572e-06, + "loss": 0.48047327995300293, + "step": 1752 + }, + { + "epoch": 0.4041964491584044, + "grad_norm": 1.117811372759575, + "learning_rate": 1.8763801394552806e-06, + "loss": 0.5314204692840576, + "step": 1753 + }, + { + "epoch": 0.4044270232879871, + "grad_norm": 1.212293607312766, + "learning_rate": 1.876196452713254e-06, + "loss": 0.5436627864837646, + "step": 1754 + }, + { + "epoch": 0.40465759741756974, + "grad_norm": 1.1748084841171984, + "learning_rate": 1.8760126386071933e-06, + "loss": 0.5383991599082947, + "step": 1755 + }, + { + "epoch": 0.4048881715471524, + "grad_norm": 1.1737559222863878, + "learning_rate": 1.8758286971638171e-06, + "loss": 0.48271507024765015, + "step": 1756 + }, + { + "epoch": 0.40511874567673506, + "grad_norm": 1.0323965631837329, + "learning_rate": 1.8756446284098638e-06, + "loss": 0.5920745134353638, + "step": 1757 + }, + { + "epoch": 0.4053493198063177, + "grad_norm": 1.1254236464300211, + "learning_rate": 1.875460432372089e-06, + "loss": 0.4467526078224182, + "step": 1758 + }, + { + "epoch": 0.4055798939359004, + "grad_norm": 0.9503211623796617, + "learning_rate": 1.875276109077268e-06, + "loss": 0.425409734249115, + "step": 1759 + }, + { + "epoch": 0.40581046806548304, + "grad_norm": 1.1318149217921376, + "learning_rate": 1.8750916585521938e-06, + "loss": 0.4911944568157196, + "step": 1760 + }, + { + "epoch": 0.4060410421950657, + "grad_norm": 1.5865124774001016, + "learning_rate": 1.8749070808236787e-06, + "loss": 0.49605780839920044, + "step": 1761 + }, + { + "epoch": 0.40627161632464837, + "grad_norm": 1.322640956813398, + "learning_rate": 1.874722375918552e-06, + "loss": 0.5582889914512634, + "step": 1762 + }, + { + "epoch": 0.406502190454231, + "grad_norm": 1.0487904765861873, + "learning_rate": 1.874537543863663e-06, + "loss": 0.4867294132709503, + "step": 1763 + }, + { + "epoch": 0.4067327645838137, + "grad_norm": 1.062364022734449, + "learning_rate": 1.8743525846858787e-06, + "loss": 0.5050587058067322, + "step": 1764 + }, + { + "epoch": 0.40696333871339635, + "grad_norm": 1.0581562602291477, + "learning_rate": 1.8741674984120852e-06, + "loss": 0.4380977749824524, + "step": 1765 + }, + { + "epoch": 0.407193912842979, + "grad_norm": 1.326690473297383, + "learning_rate": 1.8739822850691865e-06, + "loss": 0.5159280300140381, + "step": 1766 + }, + { + "epoch": 0.4074244869725617, + "grad_norm": 1.3542586293022822, + "learning_rate": 1.8737969446841046e-06, + "loss": 0.6999780535697937, + "step": 1767 + }, + { + "epoch": 0.40765506110214433, + "grad_norm": 1.110421221417803, + "learning_rate": 1.8736114772837816e-06, + "loss": 0.5844931602478027, + "step": 1768 + }, + { + "epoch": 0.407885635231727, + "grad_norm": 1.2621793403708754, + "learning_rate": 1.8734258828951764e-06, + "loss": 0.5078610181808472, + "step": 1769 + }, + { + "epoch": 0.40811620936130966, + "grad_norm": 1.1260800835324682, + "learning_rate": 1.8732401615452673e-06, + "loss": 0.564793586730957, + "step": 1770 + }, + { + "epoch": 0.4083467834908923, + "grad_norm": 1.2906459398399637, + "learning_rate": 1.8730543132610506e-06, + "loss": 0.6145100593566895, + "step": 1771 + }, + { + "epoch": 0.408577357620475, + "grad_norm": 1.181953537531204, + "learning_rate": 1.8728683380695414e-06, + "loss": 0.45434027910232544, + "step": 1772 + }, + { + "epoch": 0.40880793175005764, + "grad_norm": 1.0716516851559217, + "learning_rate": 1.872682235997773e-06, + "loss": 0.4917553961277008, + "step": 1773 + }, + { + "epoch": 0.4090385058796403, + "grad_norm": 1.0983534367258283, + "learning_rate": 1.872496007072797e-06, + "loss": 0.5677252411842346, + "step": 1774 + }, + { + "epoch": 0.40926908000922296, + "grad_norm": 1.042591224606922, + "learning_rate": 1.872309651321684e-06, + "loss": 0.5516688823699951, + "step": 1775 + }, + { + "epoch": 0.4094996541388056, + "grad_norm": 0.9746786592567609, + "learning_rate": 1.8721231687715227e-06, + "loss": 0.46755337715148926, + "step": 1776 + }, + { + "epoch": 0.4097302282683883, + "grad_norm": 1.3130136596789415, + "learning_rate": 1.8719365594494202e-06, + "loss": 0.6575521230697632, + "step": 1777 + }, + { + "epoch": 0.40996080239797095, + "grad_norm": 1.147271087293654, + "learning_rate": 1.8717498233825019e-06, + "loss": 0.6088716983795166, + "step": 1778 + }, + { + "epoch": 0.4101913765275536, + "grad_norm": 0.9692417840942277, + "learning_rate": 1.8715629605979118e-06, + "loss": 0.39476478099823, + "step": 1779 + }, + { + "epoch": 0.41042195065713627, + "grad_norm": 1.1915743629339146, + "learning_rate": 1.8713759711228123e-06, + "loss": 0.4893898665904999, + "step": 1780 + }, + { + "epoch": 0.41065252478671893, + "grad_norm": 1.298092223223541, + "learning_rate": 1.8711888549843842e-06, + "loss": 0.5077828764915466, + "step": 1781 + }, + { + "epoch": 0.4108830989163016, + "grad_norm": 1.0084481520460131, + "learning_rate": 1.8710016122098269e-06, + "loss": 0.5212582349777222, + "step": 1782 + }, + { + "epoch": 0.41111367304588425, + "grad_norm": 1.1325685052130308, + "learning_rate": 1.870814242826358e-06, + "loss": 0.5135321617126465, + "step": 1783 + }, + { + "epoch": 0.4113442471754669, + "grad_norm": 1.3281766258765773, + "learning_rate": 1.8706267468612133e-06, + "loss": 0.5398930311203003, + "step": 1784 + }, + { + "epoch": 0.4115748213050496, + "grad_norm": 1.3736547238310808, + "learning_rate": 1.8704391243416477e-06, + "loss": 0.49205562472343445, + "step": 1785 + }, + { + "epoch": 0.41180539543463224, + "grad_norm": 1.1386437791047925, + "learning_rate": 1.8702513752949335e-06, + "loss": 0.5145718455314636, + "step": 1786 + }, + { + "epoch": 0.4120359695642149, + "grad_norm": 0.9532031818658743, + "learning_rate": 1.8700634997483622e-06, + "loss": 0.4868374466896057, + "step": 1787 + }, + { + "epoch": 0.41226654369379756, + "grad_norm": 1.3881400467911258, + "learning_rate": 1.8698754977292435e-06, + "loss": 0.5409311652183533, + "step": 1788 + }, + { + "epoch": 0.4124971178233802, + "grad_norm": 1.307800898328953, + "learning_rate": 1.8696873692649052e-06, + "loss": 0.5476658344268799, + "step": 1789 + }, + { + "epoch": 0.4127276919529629, + "grad_norm": 1.251951597359409, + "learning_rate": 1.8694991143826937e-06, + "loss": 0.5545511245727539, + "step": 1790 + }, + { + "epoch": 0.41295826608254554, + "grad_norm": 1.1923559975321376, + "learning_rate": 1.869310733109974e-06, + "loss": 0.5479267835617065, + "step": 1791 + }, + { + "epoch": 0.4131888402121282, + "grad_norm": 1.1567279350887396, + "learning_rate": 1.8691222254741289e-06, + "loss": 0.5261585712432861, + "step": 1792 + }, + { + "epoch": 0.41341941434171087, + "grad_norm": 1.035636889065738, + "learning_rate": 1.8689335915025599e-06, + "loss": 0.5478091239929199, + "step": 1793 + }, + { + "epoch": 0.41364998847129353, + "grad_norm": 1.5699808716332777, + "learning_rate": 1.8687448312226872e-06, + "loss": 0.6739054322242737, + "step": 1794 + }, + { + "epoch": 0.4138805626008762, + "grad_norm": 1.2236857571837823, + "learning_rate": 1.8685559446619487e-06, + "loss": 0.613865315914154, + "step": 1795 + }, + { + "epoch": 0.41411113673045885, + "grad_norm": 1.0357788562325108, + "learning_rate": 1.8683669318478012e-06, + "loss": 0.3936721384525299, + "step": 1796 + }, + { + "epoch": 0.4143417108600415, + "grad_norm": 1.2330991076599302, + "learning_rate": 1.8681777928077197e-06, + "loss": 0.5508556365966797, + "step": 1797 + }, + { + "epoch": 0.4145722849896242, + "grad_norm": 1.1597942164225867, + "learning_rate": 1.867988527569197e-06, + "loss": 0.47734567523002625, + "step": 1798 + }, + { + "epoch": 0.41480285911920683, + "grad_norm": 1.0741273588884312, + "learning_rate": 1.8677991361597449e-06, + "loss": 0.46847039461135864, + "step": 1799 + }, + { + "epoch": 0.4150334332487895, + "grad_norm": 1.0364595457718502, + "learning_rate": 1.8676096186068937e-06, + "loss": 0.5202786326408386, + "step": 1800 + }, + { + "epoch": 0.41526400737837216, + "grad_norm": 1.2972392907268704, + "learning_rate": 1.8674199749381914e-06, + "loss": 0.5144700407981873, + "step": 1801 + }, + { + "epoch": 0.4154945815079548, + "grad_norm": 1.1959128972921023, + "learning_rate": 1.8672302051812048e-06, + "loss": 0.4394092559814453, + "step": 1802 + }, + { + "epoch": 0.4157251556375375, + "grad_norm": 1.159378410595036, + "learning_rate": 1.8670403093635185e-06, + "loss": 0.5017338991165161, + "step": 1803 + }, + { + "epoch": 0.41595572976712014, + "grad_norm": 1.173120824085894, + "learning_rate": 1.8668502875127366e-06, + "loss": 0.409381628036499, + "step": 1804 + }, + { + "epoch": 0.4161863038967028, + "grad_norm": 1.0538601271665184, + "learning_rate": 1.8666601396564795e-06, + "loss": 0.5193957090377808, + "step": 1805 + }, + { + "epoch": 0.41641687802628546, + "grad_norm": 1.1338279816499315, + "learning_rate": 1.8664698658223882e-06, + "loss": 0.5933586359024048, + "step": 1806 + }, + { + "epoch": 0.4166474521558681, + "grad_norm": 1.1304820859227924, + "learning_rate": 1.8662794660381204e-06, + "loss": 0.5283366441726685, + "step": 1807 + }, + { + "epoch": 0.4168780262854508, + "grad_norm": 1.118558214164988, + "learning_rate": 1.8660889403313526e-06, + "loss": 0.5063748359680176, + "step": 1808 + }, + { + "epoch": 0.41710860041503345, + "grad_norm": 1.087893149342631, + "learning_rate": 1.86589828872978e-06, + "loss": 0.6386028528213501, + "step": 1809 + }, + { + "epoch": 0.4173391745446161, + "grad_norm": 1.0041938541729358, + "learning_rate": 1.8657075112611153e-06, + "loss": 0.4618440270423889, + "step": 1810 + }, + { + "epoch": 0.41756974867419877, + "grad_norm": 1.3214046412105014, + "learning_rate": 1.8655166079530903e-06, + "loss": 0.4523535966873169, + "step": 1811 + }, + { + "epoch": 0.41780032280378143, + "grad_norm": 1.0747078557029888, + "learning_rate": 1.8653255788334544e-06, + "loss": 0.501311719417572, + "step": 1812 + }, + { + "epoch": 0.4180308969333641, + "grad_norm": 1.112333239244982, + "learning_rate": 1.865134423929976e-06, + "loss": 0.5504614114761353, + "step": 1813 + }, + { + "epoch": 0.41826147106294675, + "grad_norm": 1.0979124892402103, + "learning_rate": 1.864943143270441e-06, + "loss": 0.44275063276290894, + "step": 1814 + }, + { + "epoch": 0.4184920451925294, + "grad_norm": 1.2558217334961832, + "learning_rate": 1.8647517368826545e-06, + "loss": 0.5628173351287842, + "step": 1815 + }, + { + "epoch": 0.4187226193221121, + "grad_norm": 1.032119999950418, + "learning_rate": 1.864560204794439e-06, + "loss": 0.489221453666687, + "step": 1816 + }, + { + "epoch": 0.41895319345169474, + "grad_norm": 1.2211401188891802, + "learning_rate": 1.8643685470336355e-06, + "loss": 0.5440137386322021, + "step": 1817 + }, + { + "epoch": 0.4191837675812774, + "grad_norm": 1.169073111073683, + "learning_rate": 1.8641767636281035e-06, + "loss": 0.4518952965736389, + "step": 1818 + }, + { + "epoch": 0.41941434171086006, + "grad_norm": 1.3403542594346476, + "learning_rate": 1.8639848546057209e-06, + "loss": 0.591090977191925, + "step": 1819 + }, + { + "epoch": 0.4196449158404427, + "grad_norm": 1.1775626126130905, + "learning_rate": 1.8637928199943836e-06, + "loss": 0.5622411966323853, + "step": 1820 + }, + { + "epoch": 0.4198754899700254, + "grad_norm": 1.1913164061698733, + "learning_rate": 1.8636006598220052e-06, + "loss": 0.5086779594421387, + "step": 1821 + }, + { + "epoch": 0.42010606409960805, + "grad_norm": 1.1334153574078034, + "learning_rate": 1.8634083741165188e-06, + "loss": 0.5055384635925293, + "step": 1822 + }, + { + "epoch": 0.4203366382291907, + "grad_norm": 1.129676706405598, + "learning_rate": 1.863215962905875e-06, + "loss": 0.5076277852058411, + "step": 1823 + }, + { + "epoch": 0.42056721235877337, + "grad_norm": 1.2637764937692704, + "learning_rate": 1.8630234262180424e-06, + "loss": 0.5378403067588806, + "step": 1824 + }, + { + "epoch": 0.42079778648835603, + "grad_norm": 1.0886873342980177, + "learning_rate": 1.8628307640810083e-06, + "loss": 0.6133165955543518, + "step": 1825 + }, + { + "epoch": 0.4210283606179387, + "grad_norm": 1.1726755470049002, + "learning_rate": 1.8626379765227782e-06, + "loss": 0.4978156089782715, + "step": 1826 + }, + { + "epoch": 0.42125893474752135, + "grad_norm": 1.0651427070474233, + "learning_rate": 1.8624450635713759e-06, + "loss": 0.43159037828445435, + "step": 1827 + }, + { + "epoch": 0.421489508877104, + "grad_norm": 1.0498543002649237, + "learning_rate": 1.8622520252548424e-06, + "loss": 0.48821642994880676, + "step": 1828 + }, + { + "epoch": 0.4217200830066867, + "grad_norm": 1.016883491579865, + "learning_rate": 1.8620588616012387e-06, + "loss": 0.4666696786880493, + "step": 1829 + }, + { + "epoch": 0.42195065713626934, + "grad_norm": 1.3621906870852534, + "learning_rate": 1.8618655726386425e-06, + "loss": 0.5278067588806152, + "step": 1830 + }, + { + "epoch": 0.422181231265852, + "grad_norm": 1.0791230542588068, + "learning_rate": 1.8616721583951512e-06, + "loss": 0.4357749819755554, + "step": 1831 + }, + { + "epoch": 0.42241180539543466, + "grad_norm": 1.2299213864410639, + "learning_rate": 1.8614786188988782e-06, + "loss": 0.5388439893722534, + "step": 1832 + }, + { + "epoch": 0.4226423795250173, + "grad_norm": 1.4108572710321559, + "learning_rate": 1.8612849541779573e-06, + "loss": 0.5443956255912781, + "step": 1833 + }, + { + "epoch": 0.4228729536546, + "grad_norm": 1.2641105463427431, + "learning_rate": 1.86109116426054e-06, + "loss": 0.5614160895347595, + "step": 1834 + }, + { + "epoch": 0.4231035277841826, + "grad_norm": 1.2744746751945835, + "learning_rate": 1.8608972491747943e-06, + "loss": 0.45780229568481445, + "step": 1835 + }, + { + "epoch": 0.42333410191376525, + "grad_norm": 1.4638598184796152, + "learning_rate": 1.8607032089489088e-06, + "loss": 0.6354867219924927, + "step": 1836 + }, + { + "epoch": 0.4235646760433479, + "grad_norm": 1.2548140048045007, + "learning_rate": 1.860509043611089e-06, + "loss": 0.5172948241233826, + "step": 1837 + }, + { + "epoch": 0.42379525017293057, + "grad_norm": 1.1235697857312772, + "learning_rate": 1.8603147531895586e-06, + "loss": 0.4353157877922058, + "step": 1838 + }, + { + "epoch": 0.42402582430251323, + "grad_norm": 1.1680682893696177, + "learning_rate": 1.8601203377125599e-06, + "loss": 0.4971036911010742, + "step": 1839 + }, + { + "epoch": 0.4242563984320959, + "grad_norm": 1.0750331417799794, + "learning_rate": 1.859925797208353e-06, + "loss": 0.5037736296653748, + "step": 1840 + }, + { + "epoch": 0.42448697256167855, + "grad_norm": 1.052234823772871, + "learning_rate": 1.8597311317052165e-06, + "loss": 0.4480808675289154, + "step": 1841 + }, + { + "epoch": 0.4247175466912612, + "grad_norm": 1.2441100874175304, + "learning_rate": 1.8595363412314468e-06, + "loss": 0.5102680325508118, + "step": 1842 + }, + { + "epoch": 0.4249481208208439, + "grad_norm": 1.1806961844163353, + "learning_rate": 1.8593414258153585e-06, + "loss": 0.5979090929031372, + "step": 1843 + }, + { + "epoch": 0.42517869495042654, + "grad_norm": 1.0776260642041309, + "learning_rate": 1.8591463854852854e-06, + "loss": 0.4616047143936157, + "step": 1844 + }, + { + "epoch": 0.4254092690800092, + "grad_norm": 1.0059742827824252, + "learning_rate": 1.8589512202695773e-06, + "loss": 0.4893925189971924, + "step": 1845 + }, + { + "epoch": 0.42563984320959186, + "grad_norm": 1.0527785435538273, + "learning_rate": 1.8587559301966045e-06, + "loss": 0.49619823694229126, + "step": 1846 + }, + { + "epoch": 0.4258704173391745, + "grad_norm": 1.0558967393125807, + "learning_rate": 1.858560515294754e-06, + "loss": 0.5205181837081909, + "step": 1847 + }, + { + "epoch": 0.4261009914687572, + "grad_norm": 1.3589791827910958, + "learning_rate": 1.8583649755924315e-06, + "loss": 0.5910394191741943, + "step": 1848 + }, + { + "epoch": 0.42633156559833985, + "grad_norm": 1.0092224062378152, + "learning_rate": 1.8581693111180603e-06, + "loss": 0.4916709363460541, + "step": 1849 + }, + { + "epoch": 0.4265621397279225, + "grad_norm": 1.261654259944108, + "learning_rate": 1.8579735219000824e-06, + "loss": 0.5728994011878967, + "step": 1850 + }, + { + "epoch": 0.42679271385750517, + "grad_norm": 1.162885813109175, + "learning_rate": 1.857777607966958e-06, + "loss": 0.49620527029037476, + "step": 1851 + }, + { + "epoch": 0.42702328798708783, + "grad_norm": 1.2230754640158692, + "learning_rate": 1.8575815693471649e-06, + "loss": 0.5100233554840088, + "step": 1852 + }, + { + "epoch": 0.4272538621166705, + "grad_norm": 1.1713081386962017, + "learning_rate": 1.8573854060691994e-06, + "loss": 0.48981544375419617, + "step": 1853 + }, + { + "epoch": 0.42748443624625315, + "grad_norm": 1.0875128431195988, + "learning_rate": 1.8571891181615755e-06, + "loss": 0.44190293550491333, + "step": 1854 + }, + { + "epoch": 0.4277150103758358, + "grad_norm": 1.2645757986317834, + "learning_rate": 1.8569927056528264e-06, + "loss": 0.42867448925971985, + "step": 1855 + }, + { + "epoch": 0.4279455845054185, + "grad_norm": 1.849182592399251, + "learning_rate": 1.8567961685715016e-06, + "loss": 0.4873782694339752, + "step": 1856 + }, + { + "epoch": 0.42817615863500114, + "grad_norm": 1.2007241803680166, + "learning_rate": 1.8565995069461706e-06, + "loss": 0.4985312819480896, + "step": 1857 + }, + { + "epoch": 0.4284067327645838, + "grad_norm": 1.2242163730204847, + "learning_rate": 1.85640272080542e-06, + "loss": 0.5525496006011963, + "step": 1858 + }, + { + "epoch": 0.42863730689416646, + "grad_norm": 1.293851624108558, + "learning_rate": 1.8562058101778547e-06, + "loss": 0.5645877122879028, + "step": 1859 + }, + { + "epoch": 0.4288678810237491, + "grad_norm": 1.0805291431045556, + "learning_rate": 1.856008775092097e-06, + "loss": 0.4304332137107849, + "step": 1860 + }, + { + "epoch": 0.4290984551533318, + "grad_norm": 1.14759009112306, + "learning_rate": 1.8558116155767888e-06, + "loss": 0.4970170259475708, + "step": 1861 + }, + { + "epoch": 0.42932902928291444, + "grad_norm": 1.344010966492771, + "learning_rate": 1.8556143316605888e-06, + "loss": 0.5718003511428833, + "step": 1862 + }, + { + "epoch": 0.4295596034124971, + "grad_norm": 1.3157067542574963, + "learning_rate": 1.8554169233721741e-06, + "loss": 0.4445415139198303, + "step": 1863 + }, + { + "epoch": 0.42979017754207977, + "grad_norm": 1.1001033203387223, + "learning_rate": 1.8552193907402404e-06, + "loss": 0.5297178626060486, + "step": 1864 + }, + { + "epoch": 0.4300207516716624, + "grad_norm": 0.9618626645905404, + "learning_rate": 1.8550217337935013e-06, + "loss": 0.4564483165740967, + "step": 1865 + }, + { + "epoch": 0.4302513258012451, + "grad_norm": 1.2509575429906847, + "learning_rate": 1.8548239525606872e-06, + "loss": 0.4789202809333801, + "step": 1866 + }, + { + "epoch": 0.43048189993082775, + "grad_norm": 1.0950598228304256, + "learning_rate": 1.8546260470705485e-06, + "loss": 0.5240263938903809, + "step": 1867 + }, + { + "epoch": 0.4307124740604104, + "grad_norm": 1.0326884664902543, + "learning_rate": 1.8544280173518523e-06, + "loss": 0.4190866947174072, + "step": 1868 + }, + { + "epoch": 0.43094304818999307, + "grad_norm": 1.098749197470929, + "learning_rate": 1.8542298634333844e-06, + "loss": 0.502301812171936, + "step": 1869 + }, + { + "epoch": 0.43117362231957573, + "grad_norm": 1.3711612309046508, + "learning_rate": 1.8540315853439488e-06, + "loss": 0.5752545595169067, + "step": 1870 + }, + { + "epoch": 0.4314041964491584, + "grad_norm": 0.9641480143185914, + "learning_rate": 1.8538331831123667e-06, + "loss": 0.44959962368011475, + "step": 1871 + }, + { + "epoch": 0.43163477057874106, + "grad_norm": 1.2299121621798328, + "learning_rate": 1.8536346567674782e-06, + "loss": 0.5320106148719788, + "step": 1872 + }, + { + "epoch": 0.4318653447083237, + "grad_norm": 1.393182956860924, + "learning_rate": 1.8534360063381407e-06, + "loss": 0.5981979966163635, + "step": 1873 + }, + { + "epoch": 0.4320959188379064, + "grad_norm": 1.350381662747622, + "learning_rate": 1.8532372318532306e-06, + "loss": 0.5567579865455627, + "step": 1874 + }, + { + "epoch": 0.43232649296748904, + "grad_norm": 1.4350681093951811, + "learning_rate": 1.8530383333416415e-06, + "loss": 0.5604764223098755, + "step": 1875 + }, + { + "epoch": 0.4325570670970717, + "grad_norm": 1.4048444099270982, + "learning_rate": 1.8528393108322852e-06, + "loss": 0.5410721302032471, + "step": 1876 + }, + { + "epoch": 0.43278764122665436, + "grad_norm": 1.1191045271107989, + "learning_rate": 1.852640164354092e-06, + "loss": 0.5417271852493286, + "step": 1877 + }, + { + "epoch": 0.433018215356237, + "grad_norm": 1.1925092385457925, + "learning_rate": 1.8524408939360096e-06, + "loss": 0.5831471681594849, + "step": 1878 + }, + { + "epoch": 0.4332487894858197, + "grad_norm": 1.0939224950949575, + "learning_rate": 1.8522414996070045e-06, + "loss": 0.45030760765075684, + "step": 1879 + }, + { + "epoch": 0.43347936361540235, + "grad_norm": 1.1520994484307991, + "learning_rate": 1.8520419813960596e-06, + "loss": 0.44657936692237854, + "step": 1880 + }, + { + "epoch": 0.433709937744985, + "grad_norm": 1.1691007631884454, + "learning_rate": 1.851842339332178e-06, + "loss": 0.5472795963287354, + "step": 1881 + }, + { + "epoch": 0.43394051187456767, + "grad_norm": 1.1388268257083902, + "learning_rate": 1.8516425734443786e-06, + "loss": 0.4883359968662262, + "step": 1882 + }, + { + "epoch": 0.43417108600415033, + "grad_norm": 1.0473976151781044, + "learning_rate": 1.8514426837617006e-06, + "loss": 0.5172675848007202, + "step": 1883 + }, + { + "epoch": 0.434401660133733, + "grad_norm": 1.2812470936666533, + "learning_rate": 1.851242670313199e-06, + "loss": 0.5253418684005737, + "step": 1884 + }, + { + "epoch": 0.43463223426331565, + "grad_norm": 1.2940121862284113, + "learning_rate": 1.8510425331279485e-06, + "loss": 0.4684918522834778, + "step": 1885 + }, + { + "epoch": 0.4348628083928983, + "grad_norm": 1.7313907662218715, + "learning_rate": 1.8508422722350404e-06, + "loss": 0.522485077381134, + "step": 1886 + }, + { + "epoch": 0.435093382522481, + "grad_norm": 1.0862530759153244, + "learning_rate": 1.8506418876635852e-06, + "loss": 0.5123787522315979, + "step": 1887 + }, + { + "epoch": 0.43532395665206364, + "grad_norm": 1.2812741997977775, + "learning_rate": 1.8504413794427106e-06, + "loss": 0.5195976495742798, + "step": 1888 + }, + { + "epoch": 0.4355545307816463, + "grad_norm": 1.081503403719265, + "learning_rate": 1.8502407476015626e-06, + "loss": 0.48394906520843506, + "step": 1889 + }, + { + "epoch": 0.43578510491122896, + "grad_norm": 1.2031421687566246, + "learning_rate": 1.850039992169305e-06, + "loss": 0.5083323121070862, + "step": 1890 + }, + { + "epoch": 0.4360156790408116, + "grad_norm": 1.2379097603599272, + "learning_rate": 1.8498391131751196e-06, + "loss": 0.5303651094436646, + "step": 1891 + }, + { + "epoch": 0.4362462531703943, + "grad_norm": 1.010820397187413, + "learning_rate": 1.8496381106482062e-06, + "loss": 0.49429047107696533, + "step": 1892 + }, + { + "epoch": 0.43647682729997694, + "grad_norm": 1.2506572926955764, + "learning_rate": 1.8494369846177826e-06, + "loss": 0.5263347625732422, + "step": 1893 + }, + { + "epoch": 0.4367074014295596, + "grad_norm": 1.3195849148516783, + "learning_rate": 1.8492357351130848e-06, + "loss": 0.5332654714584351, + "step": 1894 + }, + { + "epoch": 0.43693797555914227, + "grad_norm": 1.1692381501686961, + "learning_rate": 1.8490343621633657e-06, + "loss": 0.5598278045654297, + "step": 1895 + }, + { + "epoch": 0.43716854968872493, + "grad_norm": 1.0323293964159153, + "learning_rate": 1.8488328657978975e-06, + "loss": 0.4026976227760315, + "step": 1896 + }, + { + "epoch": 0.4373991238183076, + "grad_norm": 1.3568102099956687, + "learning_rate": 1.8486312460459698e-06, + "loss": 0.4277791380882263, + "step": 1897 + }, + { + "epoch": 0.43762969794789025, + "grad_norm": 1.2550644818276735, + "learning_rate": 1.8484295029368896e-06, + "loss": 0.49567973613739014, + "step": 1898 + }, + { + "epoch": 0.4378602720774729, + "grad_norm": 1.3750960531365106, + "learning_rate": 1.8482276364999828e-06, + "loss": 0.4659258723258972, + "step": 1899 + }, + { + "epoch": 0.4380908462070556, + "grad_norm": 1.4921650354400726, + "learning_rate": 1.8480256467645923e-06, + "loss": 0.4950314164161682, + "step": 1900 + }, + { + "epoch": 0.43832142033663823, + "grad_norm": 1.2407118809889077, + "learning_rate": 1.8478235337600796e-06, + "loss": 0.5584981441497803, + "step": 1901 + }, + { + "epoch": 0.4385519944662209, + "grad_norm": 1.4539173472262998, + "learning_rate": 1.847621297515824e-06, + "loss": 0.6322404146194458, + "step": 1902 + }, + { + "epoch": 0.43878256859580356, + "grad_norm": 1.6859923054790666, + "learning_rate": 1.8474189380612225e-06, + "loss": 0.49535471200942993, + "step": 1903 + }, + { + "epoch": 0.4390131427253862, + "grad_norm": 1.0079272515569784, + "learning_rate": 1.8472164554256897e-06, + "loss": 0.40703707933425903, + "step": 1904 + }, + { + "epoch": 0.4392437168549689, + "grad_norm": 1.1125525506446694, + "learning_rate": 1.8470138496386588e-06, + "loss": 0.4540821313858032, + "step": 1905 + }, + { + "epoch": 0.43947429098455154, + "grad_norm": 1.1572392182622382, + "learning_rate": 1.846811120729581e-06, + "loss": 0.45964252948760986, + "step": 1906 + }, + { + "epoch": 0.4397048651141342, + "grad_norm": 1.018497744556974, + "learning_rate": 1.8466082687279244e-06, + "loss": 0.4604472517967224, + "step": 1907 + }, + { + "epoch": 0.43993543924371686, + "grad_norm": 1.114828518838774, + "learning_rate": 1.8464052936631758e-06, + "loss": 0.44585052132606506, + "step": 1908 + }, + { + "epoch": 0.4401660133732995, + "grad_norm": 1.2189161284011176, + "learning_rate": 1.8462021955648397e-06, + "loss": 0.43862414360046387, + "step": 1909 + }, + { + "epoch": 0.4403965875028822, + "grad_norm": 1.0484346475063675, + "learning_rate": 1.8459989744624386e-06, + "loss": 0.5148224234580994, + "step": 1910 + }, + { + "epoch": 0.44062716163246485, + "grad_norm": 1.3041727396087255, + "learning_rate": 1.8457956303855124e-06, + "loss": 0.6201390027999878, + "step": 1911 + }, + { + "epoch": 0.4408577357620475, + "grad_norm": 1.322348681007624, + "learning_rate": 1.8455921633636196e-06, + "loss": 0.5828813314437866, + "step": 1912 + }, + { + "epoch": 0.44108830989163017, + "grad_norm": 1.2413839772395276, + "learning_rate": 1.845388573426336e-06, + "loss": 0.5491579174995422, + "step": 1913 + }, + { + "epoch": 0.44131888402121283, + "grad_norm": 1.135006469141378, + "learning_rate": 1.8451848606032554e-06, + "loss": 0.4204079508781433, + "step": 1914 + }, + { + "epoch": 0.4415494581507955, + "grad_norm": 1.3248528862326203, + "learning_rate": 1.8449810249239898e-06, + "loss": 0.5734649300575256, + "step": 1915 + }, + { + "epoch": 0.44178003228037815, + "grad_norm": 1.1101812599659409, + "learning_rate": 1.8447770664181684e-06, + "loss": 0.48931679129600525, + "step": 1916 + }, + { + "epoch": 0.4420106064099608, + "grad_norm": 1.292831898773596, + "learning_rate": 1.8445729851154392e-06, + "loss": 0.5206375122070312, + "step": 1917 + }, + { + "epoch": 0.4422411805395435, + "grad_norm": 1.3590503413541226, + "learning_rate": 1.8443687810454666e-06, + "loss": 0.4916420578956604, + "step": 1918 + }, + { + "epoch": 0.44247175466912614, + "grad_norm": 1.0963843972341092, + "learning_rate": 1.8441644542379348e-06, + "loss": 0.5021753311157227, + "step": 1919 + }, + { + "epoch": 0.4427023287987088, + "grad_norm": 1.2556127492378621, + "learning_rate": 1.8439600047225441e-06, + "loss": 0.4615249037742615, + "step": 1920 + }, + { + "epoch": 0.44293290292829146, + "grad_norm": 1.3251855444784397, + "learning_rate": 1.8437554325290133e-06, + "loss": 0.4849514365196228, + "step": 1921 + }, + { + "epoch": 0.4431634770578741, + "grad_norm": 1.3926092312086646, + "learning_rate": 1.843550737687079e-06, + "loss": 0.5872727632522583, + "step": 1922 + }, + { + "epoch": 0.4433940511874568, + "grad_norm": 1.1422193923698303, + "learning_rate": 1.843345920226496e-06, + "loss": 0.48469966650009155, + "step": 1923 + }, + { + "epoch": 0.44362462531703944, + "grad_norm": 1.1078885152995024, + "learning_rate": 1.8431409801770364e-06, + "loss": 0.45931774377822876, + "step": 1924 + }, + { + "epoch": 0.4438551994466221, + "grad_norm": 1.0630184817249293, + "learning_rate": 1.8429359175684907e-06, + "loss": 0.5138596296310425, + "step": 1925 + }, + { + "epoch": 0.44408577357620477, + "grad_norm": 1.1576378783801253, + "learning_rate": 1.8427307324306661e-06, + "loss": 0.5586874485015869, + "step": 1926 + }, + { + "epoch": 0.44431634770578743, + "grad_norm": 0.9982496919132913, + "learning_rate": 1.8425254247933887e-06, + "loss": 0.5373901724815369, + "step": 1927 + }, + { + "epoch": 0.4445469218353701, + "grad_norm": 1.3044317948619655, + "learning_rate": 1.8423199946865022e-06, + "loss": 0.46104729175567627, + "step": 1928 + }, + { + "epoch": 0.44477749596495275, + "grad_norm": 1.2637964058278408, + "learning_rate": 1.8421144421398678e-06, + "loss": 0.4837646782398224, + "step": 1929 + }, + { + "epoch": 0.4450080700945354, + "grad_norm": 1.0579849017335872, + "learning_rate": 1.8419087671833647e-06, + "loss": 0.47685718536376953, + "step": 1930 + }, + { + "epoch": 0.4452386442241181, + "grad_norm": 1.3061309074235694, + "learning_rate": 1.8417029698468897e-06, + "loss": 0.5904572606086731, + "step": 1931 + }, + { + "epoch": 0.44546921835370074, + "grad_norm": 1.0698778232309683, + "learning_rate": 1.8414970501603577e-06, + "loss": 0.5434018969535828, + "step": 1932 + }, + { + "epoch": 0.4456997924832834, + "grad_norm": 1.0813116335575876, + "learning_rate": 1.8412910081537012e-06, + "loss": 0.5532705783843994, + "step": 1933 + }, + { + "epoch": 0.44593036661286606, + "grad_norm": 1.2746241772853588, + "learning_rate": 1.8410848438568704e-06, + "loss": 0.4900597929954529, + "step": 1934 + }, + { + "epoch": 0.4461609407424487, + "grad_norm": 1.1321871851277807, + "learning_rate": 1.8408785572998334e-06, + "loss": 0.40426892042160034, + "step": 1935 + }, + { + "epoch": 0.4463915148720314, + "grad_norm": 1.2056959007702837, + "learning_rate": 1.840672148512576e-06, + "loss": 0.48805081844329834, + "step": 1936 + }, + { + "epoch": 0.44662208900161404, + "grad_norm": 1.247599925173634, + "learning_rate": 1.8404656175251019e-06, + "loss": 0.4997096657752991, + "step": 1937 + }, + { + "epoch": 0.4468526631311967, + "grad_norm": 1.1300078883402307, + "learning_rate": 1.8402589643674325e-06, + "loss": 0.5113422274589539, + "step": 1938 + }, + { + "epoch": 0.44708323726077936, + "grad_norm": 1.2034211237767165, + "learning_rate": 1.8400521890696065e-06, + "loss": 0.44080060720443726, + "step": 1939 + }, + { + "epoch": 0.447313811390362, + "grad_norm": 1.1365386964776252, + "learning_rate": 1.8398452916616816e-06, + "loss": 0.4477943778038025, + "step": 1940 + }, + { + "epoch": 0.4475443855199447, + "grad_norm": 1.2171142668463, + "learning_rate": 1.8396382721737318e-06, + "loss": 0.4597470760345459, + "step": 1941 + }, + { + "epoch": 0.44777495964952735, + "grad_norm": 1.1079547319265362, + "learning_rate": 1.8394311306358494e-06, + "loss": 0.4758293628692627, + "step": 1942 + }, + { + "epoch": 0.44800553377911, + "grad_norm": 1.1579717682654027, + "learning_rate": 1.8392238670781453e-06, + "loss": 0.4573550224304199, + "step": 1943 + }, + { + "epoch": 0.44823610790869267, + "grad_norm": 1.318176172591765, + "learning_rate": 1.8390164815307465e-06, + "loss": 0.504696786403656, + "step": 1944 + }, + { + "epoch": 0.44846668203827533, + "grad_norm": 1.176904108457006, + "learning_rate": 1.8388089740237991e-06, + "loss": 0.4936453700065613, + "step": 1945 + }, + { + "epoch": 0.448697256167858, + "grad_norm": 1.0847569291854338, + "learning_rate": 1.8386013445874661e-06, + "loss": 0.4851078987121582, + "step": 1946 + }, + { + "epoch": 0.4489278302974406, + "grad_norm": 1.184810595622898, + "learning_rate": 1.8383935932519288e-06, + "loss": 0.4881519377231598, + "step": 1947 + }, + { + "epoch": 0.44915840442702326, + "grad_norm": 1.2389121525709461, + "learning_rate": 1.8381857200473859e-06, + "loss": 0.5604408979415894, + "step": 1948 + }, + { + "epoch": 0.4493889785566059, + "grad_norm": 1.2909928460674411, + "learning_rate": 1.8379777250040535e-06, + "loss": 0.5022269487380981, + "step": 1949 + }, + { + "epoch": 0.4496195526861886, + "grad_norm": 1.5074815200191058, + "learning_rate": 1.8377696081521666e-06, + "loss": 0.6519315242767334, + "step": 1950 + }, + { + "epoch": 0.44985012681577125, + "grad_norm": 1.0636886048128833, + "learning_rate": 1.8375613695219766e-06, + "loss": 0.3820997476577759, + "step": 1951 + }, + { + "epoch": 0.4500807009453539, + "grad_norm": 1.2705283632306288, + "learning_rate": 1.8373530091437526e-06, + "loss": 0.5473283529281616, + "step": 1952 + }, + { + "epoch": 0.45031127507493657, + "grad_norm": 1.3245130391551474, + "learning_rate": 1.8371445270477828e-06, + "loss": 0.5835955142974854, + "step": 1953 + }, + { + "epoch": 0.45054184920451923, + "grad_norm": 0.9645583101230016, + "learning_rate": 1.8369359232643716e-06, + "loss": 0.5398194789886475, + "step": 1954 + }, + { + "epoch": 0.4507724233341019, + "grad_norm": 1.363319289299188, + "learning_rate": 1.8367271978238418e-06, + "loss": 0.36561834812164307, + "step": 1955 + }, + { + "epoch": 0.45100299746368455, + "grad_norm": 1.212738724980002, + "learning_rate": 1.8365183507565342e-06, + "loss": 0.319802463054657, + "step": 1956 + }, + { + "epoch": 0.4512335715932672, + "grad_norm": 1.2303957915062576, + "learning_rate": 1.8363093820928063e-06, + "loss": 0.46466606855392456, + "step": 1957 + }, + { + "epoch": 0.4514641457228499, + "grad_norm": 1.0793723825771542, + "learning_rate": 1.8361002918630338e-06, + "loss": 0.5839806199073792, + "step": 1958 + }, + { + "epoch": 0.45169471985243254, + "grad_norm": 1.1018651408043991, + "learning_rate": 1.8358910800976105e-06, + "loss": 0.4472346603870392, + "step": 1959 + }, + { + "epoch": 0.4519252939820152, + "grad_norm": 1.2384424942976882, + "learning_rate": 1.835681746826947e-06, + "loss": 0.5191199779510498, + "step": 1960 + }, + { + "epoch": 0.45215586811159786, + "grad_norm": 1.199344967008703, + "learning_rate": 1.8354722920814722e-06, + "loss": 0.5832456350326538, + "step": 1961 + }, + { + "epoch": 0.4523864422411805, + "grad_norm": 1.17539846221013, + "learning_rate": 1.8352627158916326e-06, + "loss": 0.604708194732666, + "step": 1962 + }, + { + "epoch": 0.4526170163707632, + "grad_norm": 1.0362921929144542, + "learning_rate": 1.8350530182878924e-06, + "loss": 0.5640981793403625, + "step": 1963 + }, + { + "epoch": 0.45284759050034584, + "grad_norm": 1.6578766467164143, + "learning_rate": 1.8348431993007326e-06, + "loss": 0.4816977381706238, + "step": 1964 + }, + { + "epoch": 0.4530781646299285, + "grad_norm": 1.1374005988930347, + "learning_rate": 1.8346332589606526e-06, + "loss": 0.4226726293563843, + "step": 1965 + }, + { + "epoch": 0.45330873875951117, + "grad_norm": 1.1547528745449813, + "learning_rate": 1.8344231972981701e-06, + "loss": 0.49635130167007446, + "step": 1966 + }, + { + "epoch": 0.4535393128890938, + "grad_norm": 1.1372879426647424, + "learning_rate": 1.8342130143438193e-06, + "loss": 0.5275523662567139, + "step": 1967 + }, + { + "epoch": 0.4537698870186765, + "grad_norm": 1.202496816282669, + "learning_rate": 1.834002710128152e-06, + "loss": 0.48517313599586487, + "step": 1968 + }, + { + "epoch": 0.45400046114825915, + "grad_norm": 1.1968500607132941, + "learning_rate": 1.8337922846817388e-06, + "loss": 0.4352126717567444, + "step": 1969 + }, + { + "epoch": 0.4542310352778418, + "grad_norm": 1.116289808278095, + "learning_rate": 1.8335817380351668e-06, + "loss": 0.48131102323532104, + "step": 1970 + }, + { + "epoch": 0.45446160940742447, + "grad_norm": 1.1124663257243492, + "learning_rate": 1.8333710702190408e-06, + "loss": 0.48989611864089966, + "step": 1971 + }, + { + "epoch": 0.45469218353700713, + "grad_norm": 1.4370850989895667, + "learning_rate": 1.8331602812639839e-06, + "loss": 0.4841296076774597, + "step": 1972 + }, + { + "epoch": 0.4549227576665898, + "grad_norm": 1.1830445801916494, + "learning_rate": 1.8329493712006364e-06, + "loss": 0.5479841232299805, + "step": 1973 + }, + { + "epoch": 0.45515333179617246, + "grad_norm": 1.1923903658380426, + "learning_rate": 1.8327383400596559e-06, + "loss": 0.4732212424278259, + "step": 1974 + }, + { + "epoch": 0.4553839059257551, + "grad_norm": 1.0628413230145501, + "learning_rate": 1.8325271878717183e-06, + "loss": 0.46675610542297363, + "step": 1975 + }, + { + "epoch": 0.4556144800553378, + "grad_norm": 1.0416293786228703, + "learning_rate": 1.8323159146675163e-06, + "loss": 0.5464143753051758, + "step": 1976 + }, + { + "epoch": 0.45584505418492044, + "grad_norm": 1.0345078154587666, + "learning_rate": 1.832104520477761e-06, + "loss": 0.3888660669326782, + "step": 1977 + }, + { + "epoch": 0.4560756283145031, + "grad_norm": 1.4241654424068988, + "learning_rate": 1.8318930053331805e-06, + "loss": 0.5163271427154541, + "step": 1978 + }, + { + "epoch": 0.45630620244408576, + "grad_norm": 1.2347472844947731, + "learning_rate": 1.8316813692645208e-06, + "loss": 0.5471124649047852, + "step": 1979 + }, + { + "epoch": 0.4565367765736684, + "grad_norm": 1.1473833654009267, + "learning_rate": 1.8314696123025452e-06, + "loss": 0.5907406210899353, + "step": 1980 + }, + { + "epoch": 0.4567673507032511, + "grad_norm": 1.298768820373183, + "learning_rate": 1.8312577344780346e-06, + "loss": 0.5249447226524353, + "step": 1981 + }, + { + "epoch": 0.45699792483283375, + "grad_norm": 1.2135802460189444, + "learning_rate": 1.8310457358217879e-06, + "loss": 0.5063247084617615, + "step": 1982 + }, + { + "epoch": 0.4572284989624164, + "grad_norm": 1.361065103282706, + "learning_rate": 1.830833616364621e-06, + "loss": 0.4448107182979584, + "step": 1983 + }, + { + "epoch": 0.45745907309199907, + "grad_norm": 1.1036363497718666, + "learning_rate": 1.830621376137368e-06, + "loss": 0.5699697732925415, + "step": 1984 + }, + { + "epoch": 0.45768964722158173, + "grad_norm": 1.246349122018957, + "learning_rate": 1.8304090151708794e-06, + "loss": 0.5701720118522644, + "step": 1985 + }, + { + "epoch": 0.4579202213511644, + "grad_norm": 1.2319947144837158, + "learning_rate": 1.830196533496025e-06, + "loss": 0.4754391014575958, + "step": 1986 + }, + { + "epoch": 0.45815079548074705, + "grad_norm": 1.3528306833221286, + "learning_rate": 1.8299839311436903e-06, + "loss": 0.47649019956588745, + "step": 1987 + }, + { + "epoch": 0.4583813696103297, + "grad_norm": 1.3311097062461437, + "learning_rate": 1.8297712081447797e-06, + "loss": 0.5524393320083618, + "step": 1988 + }, + { + "epoch": 0.4586119437399124, + "grad_norm": 1.0762480086961639, + "learning_rate": 1.8295583645302144e-06, + "loss": 0.45731648802757263, + "step": 1989 + }, + { + "epoch": 0.45884251786949504, + "grad_norm": 1.130533269973984, + "learning_rate": 1.8293454003309336e-06, + "loss": 0.4999742805957794, + "step": 1990 + }, + { + "epoch": 0.4590730919990777, + "grad_norm": 1.1313506863251181, + "learning_rate": 1.829132315577894e-06, + "loss": 0.49084147810935974, + "step": 1991 + }, + { + "epoch": 0.45930366612866036, + "grad_norm": 1.2521400943324308, + "learning_rate": 1.828919110302069e-06, + "loss": 0.45332348346710205, + "step": 1992 + }, + { + "epoch": 0.459534240258243, + "grad_norm": 1.0776738520694769, + "learning_rate": 1.8287057845344504e-06, + "loss": 0.5029363632202148, + "step": 1993 + }, + { + "epoch": 0.4597648143878257, + "grad_norm": 1.1554006749910666, + "learning_rate": 1.8284923383060475e-06, + "loss": 0.5373274087905884, + "step": 1994 + }, + { + "epoch": 0.45999538851740834, + "grad_norm": 1.372219905846735, + "learning_rate": 1.8282787716478867e-06, + "loss": 0.5022158622741699, + "step": 1995 + }, + { + "epoch": 0.460225962646991, + "grad_norm": 1.5170390306548123, + "learning_rate": 1.828065084591012e-06, + "loss": 0.5093190670013428, + "step": 1996 + }, + { + "epoch": 0.46045653677657367, + "grad_norm": 1.1628780385550688, + "learning_rate": 1.827851277166485e-06, + "loss": 0.5406581163406372, + "step": 1997 + }, + { + "epoch": 0.4606871109061563, + "grad_norm": 1.0838824930169186, + "learning_rate": 1.8276373494053852e-06, + "loss": 0.4403364062309265, + "step": 1998 + }, + { + "epoch": 0.460917685035739, + "grad_norm": 1.0663930849179153, + "learning_rate": 1.8274233013388085e-06, + "loss": 0.48383134603500366, + "step": 1999 + }, + { + "epoch": 0.46114825916532165, + "grad_norm": 1.278024022767056, + "learning_rate": 1.8272091329978693e-06, + "loss": 0.5177836418151855, + "step": 2000 + }, + { + "epoch": 0.4613788332949043, + "grad_norm": 1.3026255484345248, + "learning_rate": 1.8269948444136991e-06, + "loss": 0.5699004530906677, + "step": 2001 + }, + { + "epoch": 0.461609407424487, + "grad_norm": 1.0712598167444656, + "learning_rate": 1.826780435617447e-06, + "loss": 0.5415153503417969, + "step": 2002 + }, + { + "epoch": 0.46183998155406963, + "grad_norm": 1.3243429308154806, + "learning_rate": 1.8265659066402792e-06, + "loss": 0.5521166920661926, + "step": 2003 + }, + { + "epoch": 0.4620705556836523, + "grad_norm": 1.0401918069659792, + "learning_rate": 1.8263512575133802e-06, + "loss": 0.4518507122993469, + "step": 2004 + }, + { + "epoch": 0.46230112981323496, + "grad_norm": 1.4036586027704223, + "learning_rate": 1.8261364882679508e-06, + "loss": 0.5997140407562256, + "step": 2005 + }, + { + "epoch": 0.4625317039428176, + "grad_norm": 1.2297832096563293, + "learning_rate": 1.8259215989352103e-06, + "loss": 0.5105265974998474, + "step": 2006 + }, + { + "epoch": 0.4627622780724003, + "grad_norm": 1.3620575066378895, + "learning_rate": 1.825706589546395e-06, + "loss": 0.5229371190071106, + "step": 2007 + }, + { + "epoch": 0.46299285220198294, + "grad_norm": 1.323713226525437, + "learning_rate": 1.825491460132759e-06, + "loss": 0.4833800792694092, + "step": 2008 + }, + { + "epoch": 0.4632234263315656, + "grad_norm": 1.443684310899243, + "learning_rate": 1.8252762107255727e-06, + "loss": 0.4323253035545349, + "step": 2009 + }, + { + "epoch": 0.46345400046114826, + "grad_norm": 1.0890999093716327, + "learning_rate": 1.8250608413561253e-06, + "loss": 0.4563494026660919, + "step": 2010 + }, + { + "epoch": 0.4636845745907309, + "grad_norm": 1.5474519259744821, + "learning_rate": 1.8248453520557228e-06, + "loss": 0.5656196475028992, + "step": 2011 + }, + { + "epoch": 0.4639151487203136, + "grad_norm": 1.4798653425077055, + "learning_rate": 1.8246297428556887e-06, + "loss": 0.5448226928710938, + "step": 2012 + }, + { + "epoch": 0.46414572284989625, + "grad_norm": 1.1620535147248132, + "learning_rate": 1.8244140137873645e-06, + "loss": 0.4692860543727875, + "step": 2013 + }, + { + "epoch": 0.4643762969794789, + "grad_norm": 1.1643805671555858, + "learning_rate": 1.8241981648821079e-06, + "loss": 0.5948643088340759, + "step": 2014 + }, + { + "epoch": 0.46460687110906157, + "grad_norm": 1.1853722372788744, + "learning_rate": 1.823982196171295e-06, + "loss": 0.54410719871521, + "step": 2015 + }, + { + "epoch": 0.46483744523864423, + "grad_norm": 1.1149495485691443, + "learning_rate": 1.8237661076863192e-06, + "loss": 0.430447518825531, + "step": 2016 + }, + { + "epoch": 0.4650680193682269, + "grad_norm": 1.2520273819748522, + "learning_rate": 1.8235498994585913e-06, + "loss": 0.5420910716056824, + "step": 2017 + }, + { + "epoch": 0.46529859349780955, + "grad_norm": 1.119152189162338, + "learning_rate": 1.823333571519539e-06, + "loss": 0.5140334963798523, + "step": 2018 + }, + { + "epoch": 0.4655291676273922, + "grad_norm": 1.1399919106847334, + "learning_rate": 1.8231171239006075e-06, + "loss": 0.5901660323143005, + "step": 2019 + }, + { + "epoch": 0.4657597417569749, + "grad_norm": 1.174060044130563, + "learning_rate": 1.8229005566332603e-06, + "loss": 0.5025908350944519, + "step": 2020 + }, + { + "epoch": 0.46599031588655754, + "grad_norm": 1.3363070549997977, + "learning_rate": 1.8226838697489772e-06, + "loss": 0.4884544909000397, + "step": 2021 + }, + { + "epoch": 0.4662208900161402, + "grad_norm": 1.1349219249551332, + "learning_rate": 1.822467063279256e-06, + "loss": 0.46449869871139526, + "step": 2022 + }, + { + "epoch": 0.46645146414572286, + "grad_norm": 1.2563720378844234, + "learning_rate": 1.8222501372556116e-06, + "loss": 0.49463552236557007, + "step": 2023 + }, + { + "epoch": 0.4666820382753055, + "grad_norm": 1.285405581097111, + "learning_rate": 1.8220330917095768e-06, + "loss": 0.5027149319648743, + "step": 2024 + }, + { + "epoch": 0.4669126124048882, + "grad_norm": 1.3048909901236199, + "learning_rate": 1.8218159266727007e-06, + "loss": 0.564018726348877, + "step": 2025 + }, + { + "epoch": 0.46714318653447084, + "grad_norm": 1.1965631228875364, + "learning_rate": 1.821598642176551e-06, + "loss": 0.4235766530036926, + "step": 2026 + }, + { + "epoch": 0.4673737606640535, + "grad_norm": 1.3354885477125742, + "learning_rate": 1.8213812382527118e-06, + "loss": 0.5696560144424438, + "step": 2027 + }, + { + "epoch": 0.46760433479363617, + "grad_norm": 1.2879943344932543, + "learning_rate": 1.8211637149327856e-06, + "loss": 0.6101738214492798, + "step": 2028 + }, + { + "epoch": 0.46783490892321883, + "grad_norm": 1.2787382273760666, + "learning_rate": 1.820946072248391e-06, + "loss": 0.46749603748321533, + "step": 2029 + }, + { + "epoch": 0.4680654830528015, + "grad_norm": 1.0137433334051962, + "learning_rate": 1.8207283102311646e-06, + "loss": 0.4713476300239563, + "step": 2030 + }, + { + "epoch": 0.46829605718238415, + "grad_norm": 1.1924917748606811, + "learning_rate": 1.8205104289127607e-06, + "loss": 0.5381859540939331, + "step": 2031 + }, + { + "epoch": 0.4685266313119668, + "grad_norm": 1.1753816722161505, + "learning_rate": 1.82029242832485e-06, + "loss": 0.4871833324432373, + "step": 2032 + }, + { + "epoch": 0.4687572054415495, + "grad_norm": 1.2889177236993268, + "learning_rate": 1.8200743084991217e-06, + "loss": 0.520627498626709, + "step": 2033 + }, + { + "epoch": 0.46898777957113214, + "grad_norm": 1.1168475824168262, + "learning_rate": 1.8198560694672813e-06, + "loss": 0.5382364392280579, + "step": 2034 + }, + { + "epoch": 0.4692183537007148, + "grad_norm": 1.0953401197844614, + "learning_rate": 1.8196377112610524e-06, + "loss": 0.384588360786438, + "step": 2035 + }, + { + "epoch": 0.46944892783029746, + "grad_norm": 1.3337847292368636, + "learning_rate": 1.8194192339121752e-06, + "loss": 0.5515186786651611, + "step": 2036 + }, + { + "epoch": 0.4696795019598801, + "grad_norm": 1.2634192136555153, + "learning_rate": 1.819200637452408e-06, + "loss": 0.5405331254005432, + "step": 2037 + }, + { + "epoch": 0.4699100760894628, + "grad_norm": 1.3408838607377604, + "learning_rate": 1.818981921913526e-06, + "loss": 0.5565645694732666, + "step": 2038 + }, + { + "epoch": 0.47014065021904544, + "grad_norm": 1.1845986031026676, + "learning_rate": 1.818763087327321e-06, + "loss": 0.4856358468532562, + "step": 2039 + }, + { + "epoch": 0.4703712243486281, + "grad_norm": 1.1018414398540533, + "learning_rate": 1.8185441337256035e-06, + "loss": 0.5495761632919312, + "step": 2040 + }, + { + "epoch": 0.47060179847821076, + "grad_norm": 1.1792744067343253, + "learning_rate": 1.8183250611402007e-06, + "loss": 0.509435772895813, + "step": 2041 + }, + { + "epoch": 0.4708323726077934, + "grad_norm": 1.0107628293119386, + "learning_rate": 1.8181058696029564e-06, + "loss": 0.4663920998573303, + "step": 2042 + }, + { + "epoch": 0.4710629467373761, + "grad_norm": 1.5093599722992523, + "learning_rate": 1.817886559145733e-06, + "loss": 0.5976128578186035, + "step": 2043 + }, + { + "epoch": 0.47129352086695875, + "grad_norm": 1.2084791393616294, + "learning_rate": 1.817667129800409e-06, + "loss": 0.49167966842651367, + "step": 2044 + }, + { + "epoch": 0.4715240949965414, + "grad_norm": 1.1457657477052965, + "learning_rate": 1.817447581598881e-06, + "loss": 0.5889153480529785, + "step": 2045 + }, + { + "epoch": 0.47175466912612407, + "grad_norm": 1.206584712735091, + "learning_rate": 1.8172279145730622e-06, + "loss": 0.4970330595970154, + "step": 2046 + }, + { + "epoch": 0.47198524325570673, + "grad_norm": 1.1497751548880843, + "learning_rate": 1.817008128754884e-06, + "loss": 0.4840531051158905, + "step": 2047 + }, + { + "epoch": 0.4722158173852894, + "grad_norm": 1.0450687693806986, + "learning_rate": 1.816788224176294e-06, + "loss": 0.48297861218452454, + "step": 2048 + }, + { + "epoch": 0.47244639151487205, + "grad_norm": 1.184218710920589, + "learning_rate": 1.8165682008692578e-06, + "loss": 0.540350079536438, + "step": 2049 + }, + { + "epoch": 0.4726769656444547, + "grad_norm": 1.0359041945652345, + "learning_rate": 1.8163480588657578e-06, + "loss": 0.46405351161956787, + "step": 2050 + }, + { + "epoch": 0.4729075397740374, + "grad_norm": 1.1107404730922064, + "learning_rate": 1.816127798197794e-06, + "loss": 0.5175468921661377, + "step": 2051 + }, + { + "epoch": 0.47313811390362004, + "grad_norm": 1.3876726162535544, + "learning_rate": 1.8159074188973836e-06, + "loss": 0.5923771858215332, + "step": 2052 + }, + { + "epoch": 0.4733686880332027, + "grad_norm": 1.135618311389398, + "learning_rate": 1.815686920996561e-06, + "loss": 0.4999024569988251, + "step": 2053 + }, + { + "epoch": 0.47359926216278536, + "grad_norm": 1.260203747569289, + "learning_rate": 1.8154663045273775e-06, + "loss": 0.5630939602851868, + "step": 2054 + }, + { + "epoch": 0.473829836292368, + "grad_norm": 1.0446947469213006, + "learning_rate": 1.8152455695219021e-06, + "loss": 0.5505836009979248, + "step": 2055 + }, + { + "epoch": 0.4740604104219507, + "grad_norm": 1.0593378648910954, + "learning_rate": 1.8150247160122213e-06, + "loss": 0.44550588726997375, + "step": 2056 + }, + { + "epoch": 0.47429098455153335, + "grad_norm": 1.3784716647825315, + "learning_rate": 1.8148037440304375e-06, + "loss": 0.5387516021728516, + "step": 2057 + }, + { + "epoch": 0.47452155868111595, + "grad_norm": 1.2100168024707112, + "learning_rate": 1.814582653608672e-06, + "loss": 0.5941788554191589, + "step": 2058 + }, + { + "epoch": 0.4747521328106986, + "grad_norm": 1.3537451578676338, + "learning_rate": 1.8143614447790622e-06, + "loss": 0.552179217338562, + "step": 2059 + }, + { + "epoch": 0.4749827069402813, + "grad_norm": 1.4352695047482156, + "learning_rate": 1.8141401175737632e-06, + "loss": 0.4475885033607483, + "step": 2060 + }, + { + "epoch": 0.47521328106986394, + "grad_norm": 1.560782042661122, + "learning_rate": 1.813918672024947e-06, + "loss": 0.5821356773376465, + "step": 2061 + }, + { + "epoch": 0.4754438551994466, + "grad_norm": 1.0378834941031638, + "learning_rate": 1.8136971081648027e-06, + "loss": 0.4673501253128052, + "step": 2062 + }, + { + "epoch": 0.47567442932902926, + "grad_norm": 1.278556049660224, + "learning_rate": 1.8134754260255373e-06, + "loss": 0.582427978515625, + "step": 2063 + }, + { + "epoch": 0.4759050034586119, + "grad_norm": 1.050202225169388, + "learning_rate": 1.8132536256393744e-06, + "loss": 0.4494328498840332, + "step": 2064 + }, + { + "epoch": 0.4761355775881946, + "grad_norm": 1.2125688329070163, + "learning_rate": 1.8130317070385552e-06, + "loss": 0.44775205850601196, + "step": 2065 + }, + { + "epoch": 0.47636615171777724, + "grad_norm": 1.6939798990457848, + "learning_rate": 1.8128096702553372e-06, + "loss": 0.5456822514533997, + "step": 2066 + }, + { + "epoch": 0.4765967258473599, + "grad_norm": 1.3273956589633653, + "learning_rate": 1.8125875153219963e-06, + "loss": 0.46396178007125854, + "step": 2067 + }, + { + "epoch": 0.47682729997694256, + "grad_norm": 1.1515186039412058, + "learning_rate": 1.8123652422708247e-06, + "loss": 0.4479365944862366, + "step": 2068 + }, + { + "epoch": 0.4770578741065252, + "grad_norm": 1.2802069282774096, + "learning_rate": 1.8121428511341322e-06, + "loss": 0.4633978605270386, + "step": 2069 + }, + { + "epoch": 0.4772884482361079, + "grad_norm": 1.0517363876370052, + "learning_rate": 1.811920341944245e-06, + "loss": 0.5190213918685913, + "step": 2070 + }, + { + "epoch": 0.47751902236569055, + "grad_norm": 1.1502023331468956, + "learning_rate": 1.811697714733508e-06, + "loss": 0.3900855779647827, + "step": 2071 + }, + { + "epoch": 0.4777495964952732, + "grad_norm": 1.1255517906685018, + "learning_rate": 1.8114749695342816e-06, + "loss": 0.5130020380020142, + "step": 2072 + }, + { + "epoch": 0.47798017062485587, + "grad_norm": 1.181934216759251, + "learning_rate": 1.8112521063789444e-06, + "loss": 0.5279096364974976, + "step": 2073 + }, + { + "epoch": 0.47821074475443853, + "grad_norm": 1.1536132669518966, + "learning_rate": 1.8110291252998918e-06, + "loss": 0.5048732161521912, + "step": 2074 + }, + { + "epoch": 0.4784413188840212, + "grad_norm": 1.3979756779725594, + "learning_rate": 1.8108060263295362e-06, + "loss": 0.5410048365592957, + "step": 2075 + }, + { + "epoch": 0.47867189301360386, + "grad_norm": 1.2583345285712537, + "learning_rate": 1.8105828095003073e-06, + "loss": 0.5144593715667725, + "step": 2076 + }, + { + "epoch": 0.4789024671431865, + "grad_norm": 1.427505910251362, + "learning_rate": 1.810359474844652e-06, + "loss": 0.543846845626831, + "step": 2077 + }, + { + "epoch": 0.4791330412727692, + "grad_norm": 1.3389957969723305, + "learning_rate": 1.8101360223950346e-06, + "loss": 0.5628032684326172, + "step": 2078 + }, + { + "epoch": 0.47936361540235184, + "grad_norm": 1.2233623869672197, + "learning_rate": 1.8099124521839358e-06, + "loss": 0.5248516201972961, + "step": 2079 + }, + { + "epoch": 0.4795941895319345, + "grad_norm": 1.1882395736191633, + "learning_rate": 1.8096887642438537e-06, + "loss": 0.44171589612960815, + "step": 2080 + }, + { + "epoch": 0.47982476366151716, + "grad_norm": 1.1226478747483744, + "learning_rate": 1.809464958607304e-06, + "loss": 0.5003859996795654, + "step": 2081 + }, + { + "epoch": 0.4800553377910998, + "grad_norm": 1.2241972764897475, + "learning_rate": 1.8092410353068183e-06, + "loss": 0.5271269679069519, + "step": 2082 + }, + { + "epoch": 0.4802859119206825, + "grad_norm": 1.390627459359596, + "learning_rate": 1.8090169943749474e-06, + "loss": 0.5191465616226196, + "step": 2083 + }, + { + "epoch": 0.48051648605026515, + "grad_norm": 1.229186901325219, + "learning_rate": 1.8087928358442567e-06, + "loss": 0.4569256007671356, + "step": 2084 + }, + { + "epoch": 0.4807470601798478, + "grad_norm": 1.2586566204343959, + "learning_rate": 1.8085685597473307e-06, + "loss": 0.521030068397522, + "step": 2085 + }, + { + "epoch": 0.48097763430943047, + "grad_norm": 1.8616539280014968, + "learning_rate": 1.80834416611677e-06, + "loss": 0.48959439992904663, + "step": 2086 + }, + { + "epoch": 0.48120820843901313, + "grad_norm": 1.37464754051939, + "learning_rate": 1.8081196549851925e-06, + "loss": 0.6536514163017273, + "step": 2087 + }, + { + "epoch": 0.4814387825685958, + "grad_norm": 1.2292193685806807, + "learning_rate": 1.8078950263852327e-06, + "loss": 0.5746080875396729, + "step": 2088 + }, + { + "epoch": 0.48166935669817845, + "grad_norm": 1.244000490897379, + "learning_rate": 1.8076702803495437e-06, + "loss": 0.5518802404403687, + "step": 2089 + }, + { + "epoch": 0.4818999308277611, + "grad_norm": 1.0641823457217219, + "learning_rate": 1.8074454169107934e-06, + "loss": 0.49385470151901245, + "step": 2090 + }, + { + "epoch": 0.4821305049573438, + "grad_norm": 1.0197781900207734, + "learning_rate": 1.8072204361016688e-06, + "loss": 0.4488806426525116, + "step": 2091 + }, + { + "epoch": 0.48236107908692644, + "grad_norm": 1.1424753749617582, + "learning_rate": 1.8069953379548727e-06, + "loss": 0.4167511761188507, + "step": 2092 + }, + { + "epoch": 0.4825916532165091, + "grad_norm": 1.0650805504939584, + "learning_rate": 1.8067701225031258e-06, + "loss": 0.4181321859359741, + "step": 2093 + }, + { + "epoch": 0.48282222734609176, + "grad_norm": 1.4930083094447149, + "learning_rate": 1.806544789779165e-06, + "loss": 0.5257805585861206, + "step": 2094 + }, + { + "epoch": 0.4830528014756744, + "grad_norm": 1.2055270290247748, + "learning_rate": 1.806319339815745e-06, + "loss": 0.4687056541442871, + "step": 2095 + }, + { + "epoch": 0.4832833756052571, + "grad_norm": 1.4682007990950796, + "learning_rate": 1.8060937726456373e-06, + "loss": 0.48070380091667175, + "step": 2096 + }, + { + "epoch": 0.48351394973483974, + "grad_norm": 1.1555932423285984, + "learning_rate": 1.80586808830163e-06, + "loss": 0.516263484954834, + "step": 2097 + }, + { + "epoch": 0.4837445238644224, + "grad_norm": 1.1676344701764343, + "learning_rate": 1.805642286816529e-06, + "loss": 0.44018858671188354, + "step": 2098 + }, + { + "epoch": 0.48397509799400507, + "grad_norm": 1.1426045047454896, + "learning_rate": 1.8054163682231565e-06, + "loss": 0.469373881816864, + "step": 2099 + }, + { + "epoch": 0.4842056721235877, + "grad_norm": 1.2080131082183756, + "learning_rate": 1.8051903325543525e-06, + "loss": 0.4759753346443176, + "step": 2100 + }, + { + "epoch": 0.4844362462531704, + "grad_norm": 1.210070128706108, + "learning_rate": 1.804964179842973e-06, + "loss": 0.5002714395523071, + "step": 2101 + }, + { + "epoch": 0.48466682038275305, + "grad_norm": 1.5442585246670464, + "learning_rate": 1.804737910121892e-06, + "loss": 0.4869537353515625, + "step": 2102 + }, + { + "epoch": 0.4848973945123357, + "grad_norm": 1.0025531891942765, + "learning_rate": 1.804511523424e-06, + "loss": 0.4840247929096222, + "step": 2103 + }, + { + "epoch": 0.4851279686419184, + "grad_norm": 1.2125955941110753, + "learning_rate": 1.8042850197822049e-06, + "loss": 0.48390740156173706, + "step": 2104 + }, + { + "epoch": 0.48535854277150103, + "grad_norm": 1.2581816256760507, + "learning_rate": 1.8040583992294305e-06, + "loss": 0.5875431895256042, + "step": 2105 + }, + { + "epoch": 0.4855891169010837, + "grad_norm": 1.1530238586197006, + "learning_rate": 1.803831661798619e-06, + "loss": 0.4599287211894989, + "step": 2106 + }, + { + "epoch": 0.48581969103066636, + "grad_norm": 1.120967919274212, + "learning_rate": 1.803604807522729e-06, + "loss": 0.5266382694244385, + "step": 2107 + }, + { + "epoch": 0.486050265160249, + "grad_norm": 1.6402953005136756, + "learning_rate": 1.8033778364347359e-06, + "loss": 0.5592058897018433, + "step": 2108 + }, + { + "epoch": 0.4862808392898317, + "grad_norm": 1.278433491122833, + "learning_rate": 1.8031507485676324e-06, + "loss": 0.4385683834552765, + "step": 2109 + }, + { + "epoch": 0.48651141341941434, + "grad_norm": 0.9409152493815139, + "learning_rate": 1.8029235439544277e-06, + "loss": 0.4205859303474426, + "step": 2110 + }, + { + "epoch": 0.486741987548997, + "grad_norm": 1.2334271425613326, + "learning_rate": 1.8026962226281484e-06, + "loss": 0.4179378151893616, + "step": 2111 + }, + { + "epoch": 0.48697256167857966, + "grad_norm": 1.3018247329424364, + "learning_rate": 1.8024687846218382e-06, + "loss": 0.5022565126419067, + "step": 2112 + }, + { + "epoch": 0.4872031358081623, + "grad_norm": 1.092822670373115, + "learning_rate": 1.8022412299685574e-06, + "loss": 0.4591484069824219, + "step": 2113 + }, + { + "epoch": 0.487433709937745, + "grad_norm": 1.135644170855214, + "learning_rate": 1.8020135587013836e-06, + "loss": 0.44381004571914673, + "step": 2114 + }, + { + "epoch": 0.48766428406732765, + "grad_norm": 1.4882998519827229, + "learning_rate": 1.8017857708534106e-06, + "loss": 0.5418124198913574, + "step": 2115 + }, + { + "epoch": 0.4878948581969103, + "grad_norm": 1.1899076485341344, + "learning_rate": 1.80155786645775e-06, + "loss": 0.45836228132247925, + "step": 2116 + }, + { + "epoch": 0.48812543232649297, + "grad_norm": 1.0900529156655503, + "learning_rate": 1.80132984554753e-06, + "loss": 0.6028016805648804, + "step": 2117 + }, + { + "epoch": 0.48835600645607563, + "grad_norm": 1.2082046720219188, + "learning_rate": 1.8011017081558956e-06, + "loss": 0.461037814617157, + "step": 2118 + }, + { + "epoch": 0.4885865805856583, + "grad_norm": 1.2201342507223627, + "learning_rate": 1.8008734543160092e-06, + "loss": 0.45145073533058167, + "step": 2119 + }, + { + "epoch": 0.48881715471524095, + "grad_norm": 1.0786402560770025, + "learning_rate": 1.8006450840610495e-06, + "loss": 0.5074604153633118, + "step": 2120 + }, + { + "epoch": 0.4890477288448236, + "grad_norm": 1.047533414614444, + "learning_rate": 1.8004165974242124e-06, + "loss": 0.48518210649490356, + "step": 2121 + }, + { + "epoch": 0.4892783029744063, + "grad_norm": 1.3858118136014763, + "learning_rate": 1.800187994438711e-06, + "loss": 0.5427801609039307, + "step": 2122 + }, + { + "epoch": 0.48950887710398894, + "grad_norm": 1.1550068575676335, + "learning_rate": 1.799959275137775e-06, + "loss": 0.5002918839454651, + "step": 2123 + }, + { + "epoch": 0.4897394512335716, + "grad_norm": 1.1639768741422865, + "learning_rate": 1.799730439554651e-06, + "loss": 0.4417838454246521, + "step": 2124 + }, + { + "epoch": 0.48997002536315426, + "grad_norm": 1.1441558832004912, + "learning_rate": 1.7995014877226024e-06, + "loss": 0.4260700047016144, + "step": 2125 + }, + { + "epoch": 0.4902005994927369, + "grad_norm": 1.2965264900873492, + "learning_rate": 1.79927241967491e-06, + "loss": 0.5480694770812988, + "step": 2126 + }, + { + "epoch": 0.4904311736223196, + "grad_norm": 1.1303746553940783, + "learning_rate": 1.7990432354448713e-06, + "loss": 0.3911926746368408, + "step": 2127 + }, + { + "epoch": 0.49066174775190224, + "grad_norm": 1.6919718962195622, + "learning_rate": 1.7988139350657997e-06, + "loss": 0.5269262194633484, + "step": 2128 + }, + { + "epoch": 0.4908923218814849, + "grad_norm": 1.1850805062858767, + "learning_rate": 1.7985845185710272e-06, + "loss": 0.47482216358184814, + "step": 2129 + }, + { + "epoch": 0.49112289601106757, + "grad_norm": 1.1047509042558772, + "learning_rate": 1.7983549859939018e-06, + "loss": 0.5663374662399292, + "step": 2130 + }, + { + "epoch": 0.49135347014065023, + "grad_norm": 1.3067402879954033, + "learning_rate": 1.7981253373677875e-06, + "loss": 0.5322546362876892, + "step": 2131 + }, + { + "epoch": 0.4915840442702329, + "grad_norm": 1.3127111295082199, + "learning_rate": 1.797895572726067e-06, + "loss": 0.4238794445991516, + "step": 2132 + }, + { + "epoch": 0.49181461839981555, + "grad_norm": 1.3803934905983801, + "learning_rate": 1.7976656921021384e-06, + "loss": 0.49363791942596436, + "step": 2133 + }, + { + "epoch": 0.4920451925293982, + "grad_norm": 1.2075981604593182, + "learning_rate": 1.7974356955294178e-06, + "loss": 0.5079565048217773, + "step": 2134 + }, + { + "epoch": 0.4922757666589809, + "grad_norm": 1.2533809097279895, + "learning_rate": 1.7972055830413369e-06, + "loss": 0.5259063243865967, + "step": 2135 + }, + { + "epoch": 0.49250634078856353, + "grad_norm": 1.1936271771370206, + "learning_rate": 1.7969753546713448e-06, + "loss": 0.49021831154823303, + "step": 2136 + }, + { + "epoch": 0.4927369149181462, + "grad_norm": 1.1560183810694227, + "learning_rate": 1.7967450104529078e-06, + "loss": 0.49721387028694153, + "step": 2137 + }, + { + "epoch": 0.49296748904772886, + "grad_norm": 1.523657234221405, + "learning_rate": 1.796514550419509e-06, + "loss": 0.6129348278045654, + "step": 2138 + }, + { + "epoch": 0.4931980631773115, + "grad_norm": 1.245217894172975, + "learning_rate": 1.7962839746046479e-06, + "loss": 0.5034269094467163, + "step": 2139 + }, + { + "epoch": 0.4934286373068942, + "grad_norm": 1.2009412202372387, + "learning_rate": 1.7960532830418408e-06, + "loss": 0.490216463804245, + "step": 2140 + }, + { + "epoch": 0.49365921143647684, + "grad_norm": 1.3063386967377661, + "learning_rate": 1.7958224757646212e-06, + "loss": 0.5609744787216187, + "step": 2141 + }, + { + "epoch": 0.4938897855660595, + "grad_norm": 1.2989425251267097, + "learning_rate": 1.7955915528065395e-06, + "loss": 0.4438238739967346, + "step": 2142 + }, + { + "epoch": 0.49412035969564216, + "grad_norm": 1.1724755739495214, + "learning_rate": 1.7953605142011626e-06, + "loss": 0.4704767167568207, + "step": 2143 + }, + { + "epoch": 0.4943509338252248, + "grad_norm": 1.0972580275821462, + "learning_rate": 1.795129359982074e-06, + "loss": 0.44819536805152893, + "step": 2144 + }, + { + "epoch": 0.4945815079548075, + "grad_norm": 1.4390962273022694, + "learning_rate": 1.7948980901828746e-06, + "loss": 0.5311752557754517, + "step": 2145 + }, + { + "epoch": 0.49481208208439015, + "grad_norm": 1.524280309497039, + "learning_rate": 1.7946667048371818e-06, + "loss": 0.46144258975982666, + "step": 2146 + }, + { + "epoch": 0.4950426562139728, + "grad_norm": 1.719231407355215, + "learning_rate": 1.7944352039786297e-06, + "loss": 0.5973725914955139, + "step": 2147 + }, + { + "epoch": 0.49527323034355547, + "grad_norm": 1.4078850153564488, + "learning_rate": 1.7942035876408693e-06, + "loss": 0.4930835962295532, + "step": 2148 + }, + { + "epoch": 0.49550380447313813, + "grad_norm": 1.3404357985733748, + "learning_rate": 1.7939718558575685e-06, + "loss": 0.39137697219848633, + "step": 2149 + }, + { + "epoch": 0.4957343786027208, + "grad_norm": 1.364926902591579, + "learning_rate": 1.7937400086624117e-06, + "loss": 0.47618329524993896, + "step": 2150 + }, + { + "epoch": 0.49596495273230345, + "grad_norm": 1.1307446090872737, + "learning_rate": 1.7935080460891005e-06, + "loss": 0.4751483201980591, + "step": 2151 + }, + { + "epoch": 0.4961955268618861, + "grad_norm": 1.05862482163457, + "learning_rate": 1.7932759681713528e-06, + "loss": 0.4654052257537842, + "step": 2152 + }, + { + "epoch": 0.4964261009914688, + "grad_norm": 1.5078817597304273, + "learning_rate": 1.7930437749429035e-06, + "loss": 0.551579475402832, + "step": 2153 + }, + { + "epoch": 0.49665667512105144, + "grad_norm": 1.1496698915645684, + "learning_rate": 1.792811466437504e-06, + "loss": 0.4967789053916931, + "step": 2154 + }, + { + "epoch": 0.4968872492506341, + "grad_norm": 1.2983844202508301, + "learning_rate": 1.7925790426889234e-06, + "loss": 0.5826432108879089, + "step": 2155 + }, + { + "epoch": 0.49711782338021676, + "grad_norm": 1.1680445889037752, + "learning_rate": 1.792346503730946e-06, + "loss": 0.4260643720626831, + "step": 2156 + }, + { + "epoch": 0.4973483975097994, + "grad_norm": 1.287300561489553, + "learning_rate": 1.7921138495973741e-06, + "loss": 0.48679620027542114, + "step": 2157 + }, + { + "epoch": 0.4975789716393821, + "grad_norm": 1.219223301068072, + "learning_rate": 1.7918810803220266e-06, + "loss": 0.5048027634620667, + "step": 2158 + }, + { + "epoch": 0.49780954576896475, + "grad_norm": 1.3507694371861767, + "learning_rate": 1.7916481959387384e-06, + "loss": 0.5073787569999695, + "step": 2159 + }, + { + "epoch": 0.4980401198985474, + "grad_norm": 1.1692017846177098, + "learning_rate": 1.791415196481362e-06, + "loss": 0.47361671924591064, + "step": 2160 + }, + { + "epoch": 0.49827069402813007, + "grad_norm": 1.2422906508724816, + "learning_rate": 1.7911820819837659e-06, + "loss": 0.46382519602775574, + "step": 2161 + }, + { + "epoch": 0.49850126815771273, + "grad_norm": 1.2239936361904968, + "learning_rate": 1.7909488524798357e-06, + "loss": 0.5167688727378845, + "step": 2162 + }, + { + "epoch": 0.4987318422872954, + "grad_norm": 1.125831583037744, + "learning_rate": 1.7907155080034739e-06, + "loss": 0.4486730992794037, + "step": 2163 + }, + { + "epoch": 0.49896241641687805, + "grad_norm": 1.1343310195374692, + "learning_rate": 1.7904820485885991e-06, + "loss": 0.508470356464386, + "step": 2164 + }, + { + "epoch": 0.4991929905464607, + "grad_norm": 1.2928862741310794, + "learning_rate": 1.790248474269148e-06, + "loss": 0.4752856492996216, + "step": 2165 + }, + { + "epoch": 0.4994235646760434, + "grad_norm": 1.4158256008874892, + "learning_rate": 1.7900147850790713e-06, + "loss": 0.47191953659057617, + "step": 2166 + }, + { + "epoch": 0.49965413880562604, + "grad_norm": 1.2139421208311327, + "learning_rate": 1.7897809810523396e-06, + "loss": 0.48935621976852417, + "step": 2167 + }, + { + "epoch": 0.4998847129352087, + "grad_norm": 1.0547512942585364, + "learning_rate": 1.789547062222938e-06, + "loss": 0.5455219149589539, + "step": 2168 + }, + { + "epoch": 0.5001152870647914, + "grad_norm": 1.3471138253822197, + "learning_rate": 1.789313028624869e-06, + "loss": 0.5068193078041077, + "step": 2169 + }, + { + "epoch": 0.500345861194374, + "grad_norm": 1.354177516749214, + "learning_rate": 1.789078880292152e-06, + "loss": 0.5868322253227234, + "step": 2170 + }, + { + "epoch": 0.5005764353239567, + "grad_norm": 1.2474005261331733, + "learning_rate": 1.7888446172588222e-06, + "loss": 0.5132089853286743, + "step": 2171 + }, + { + "epoch": 0.5008070094535393, + "grad_norm": 1.6917901077948925, + "learning_rate": 1.788610239558933e-06, + "loss": 0.5673823356628418, + "step": 2172 + }, + { + "epoch": 0.501037583583122, + "grad_norm": 1.1902561905753382, + "learning_rate": 1.7883757472265533e-06, + "loss": 0.47085779905319214, + "step": 2173 + }, + { + "epoch": 0.5012681577127046, + "grad_norm": 1.38526914772559, + "learning_rate": 1.7881411402957685e-06, + "loss": 0.5286725163459778, + "step": 2174 + }, + { + "epoch": 0.5014987318422873, + "grad_norm": 1.1910792946448119, + "learning_rate": 1.7879064188006817e-06, + "loss": 0.5044010877609253, + "step": 2175 + }, + { + "epoch": 0.5017293059718699, + "grad_norm": 1.8451305262061892, + "learning_rate": 1.7876715827754113e-06, + "loss": 0.5329761505126953, + "step": 2176 + }, + { + "epoch": 0.5019598801014526, + "grad_norm": 1.1057498562542696, + "learning_rate": 1.7874366322540937e-06, + "loss": 0.5025275349617004, + "step": 2177 + }, + { + "epoch": 0.5021904542310353, + "grad_norm": 1.1913338911250846, + "learning_rate": 1.7872015672708814e-06, + "loss": 0.48466378450393677, + "step": 2178 + }, + { + "epoch": 0.502421028360618, + "grad_norm": 1.298497377256874, + "learning_rate": 1.7869663878599427e-06, + "loss": 0.505358099937439, + "step": 2179 + }, + { + "epoch": 0.5026516024902006, + "grad_norm": 1.3974305011742736, + "learning_rate": 1.7867310940554643e-06, + "loss": 0.4934875965118408, + "step": 2180 + }, + { + "epoch": 0.5028821766197833, + "grad_norm": 0.9670109365307766, + "learning_rate": 1.7864956858916482e-06, + "loss": 0.4726678133010864, + "step": 2181 + }, + { + "epoch": 0.5031127507493659, + "grad_norm": 1.3043022336942207, + "learning_rate": 1.786260163402713e-06, + "loss": 0.4619986414909363, + "step": 2182 + }, + { + "epoch": 0.5033433248789486, + "grad_norm": 1.17201330946801, + "learning_rate": 1.7860245266228946e-06, + "loss": 0.4483926594257355, + "step": 2183 + }, + { + "epoch": 0.5035738990085312, + "grad_norm": 1.0474549975114675, + "learning_rate": 1.7857887755864451e-06, + "loss": 0.4756368100643158, + "step": 2184 + }, + { + "epoch": 0.5038044731381139, + "grad_norm": 1.248404397964203, + "learning_rate": 1.7855529103276334e-06, + "loss": 0.5610564351081848, + "step": 2185 + }, + { + "epoch": 0.5040350472676965, + "grad_norm": 1.178944045969772, + "learning_rate": 1.7853169308807447e-06, + "loss": 0.49948322772979736, + "step": 2186 + }, + { + "epoch": 0.5042656213972793, + "grad_norm": 1.203613939490818, + "learning_rate": 1.7850808372800813e-06, + "loss": 0.5023819208145142, + "step": 2187 + }, + { + "epoch": 0.5044961955268619, + "grad_norm": 1.1738403952666703, + "learning_rate": 1.7848446295599617e-06, + "loss": 0.45893096923828125, + "step": 2188 + }, + { + "epoch": 0.5047267696564446, + "grad_norm": 1.2621327179460875, + "learning_rate": 1.7846083077547212e-06, + "loss": 0.39129459857940674, + "step": 2189 + }, + { + "epoch": 0.5049573437860272, + "grad_norm": 0.9495823494613052, + "learning_rate": 1.784371871898711e-06, + "loss": 0.42348673939704895, + "step": 2190 + }, + { + "epoch": 0.5051879179156099, + "grad_norm": 1.4438634303858584, + "learning_rate": 1.7841353220263e-06, + "loss": 0.5760704278945923, + "step": 2191 + }, + { + "epoch": 0.5054184920451925, + "grad_norm": 1.1475240268019702, + "learning_rate": 1.7838986581718731e-06, + "loss": 0.5281997323036194, + "step": 2192 + }, + { + "epoch": 0.5056490661747752, + "grad_norm": 1.3139768062702608, + "learning_rate": 1.7836618803698315e-06, + "loss": 0.543775200843811, + "step": 2193 + }, + { + "epoch": 0.5058796403043578, + "grad_norm": 1.2497491249667418, + "learning_rate": 1.7834249886545934e-06, + "loss": 0.4148549437522888, + "step": 2194 + }, + { + "epoch": 0.5061102144339406, + "grad_norm": 1.183178207015322, + "learning_rate": 1.7831879830605936e-06, + "loss": 0.5165001153945923, + "step": 2195 + }, + { + "epoch": 0.5063407885635232, + "grad_norm": 1.0854657175123028, + "learning_rate": 1.782950863622283e-06, + "loss": 0.4183283746242523, + "step": 2196 + }, + { + "epoch": 0.5065713626931059, + "grad_norm": 1.2476527930959387, + "learning_rate": 1.7827136303741292e-06, + "loss": 0.46558016538619995, + "step": 2197 + }, + { + "epoch": 0.5068019368226885, + "grad_norm": 1.2829595269176914, + "learning_rate": 1.782476283350617e-06, + "loss": 0.5491806268692017, + "step": 2198 + }, + { + "epoch": 0.5070325109522712, + "grad_norm": 1.3547672961051511, + "learning_rate": 1.7822388225862466e-06, + "loss": 0.42999008297920227, + "step": 2199 + }, + { + "epoch": 0.5072630850818538, + "grad_norm": 1.2776437457035281, + "learning_rate": 1.7820012481155358e-06, + "loss": 0.42478299140930176, + "step": 2200 + }, + { + "epoch": 0.5074936592114365, + "grad_norm": 4.51069636831696, + "learning_rate": 1.781763559973018e-06, + "loss": 0.4175076186656952, + "step": 2201 + }, + { + "epoch": 0.5077242333410191, + "grad_norm": 1.1985836355289028, + "learning_rate": 1.7815257581932439e-06, + "loss": 0.42197084426879883, + "step": 2202 + }, + { + "epoch": 0.5079548074706018, + "grad_norm": 1.2175005553032592, + "learning_rate": 1.7812878428107803e-06, + "loss": 0.39872926473617554, + "step": 2203 + }, + { + "epoch": 0.5081853816001844, + "grad_norm": 1.2908474732070376, + "learning_rate": 1.7810498138602106e-06, + "loss": 0.4572516977787018, + "step": 2204 + }, + { + "epoch": 0.5084159557297672, + "grad_norm": 1.1254873587347531, + "learning_rate": 1.780811671376135e-06, + "loss": 0.5261520147323608, + "step": 2205 + }, + { + "epoch": 0.5086465298593498, + "grad_norm": 1.8336847349223555, + "learning_rate": 1.7805734153931696e-06, + "loss": 0.4714658260345459, + "step": 2206 + }, + { + "epoch": 0.5088771039889325, + "grad_norm": 1.0757806041139168, + "learning_rate": 1.7803350459459472e-06, + "loss": 0.46184858679771423, + "step": 2207 + }, + { + "epoch": 0.5091076781185151, + "grad_norm": 1.2531712345918984, + "learning_rate": 1.7800965630691173e-06, + "loss": 0.48189157247543335, + "step": 2208 + }, + { + "epoch": 0.5093382522480978, + "grad_norm": 1.5363179586848308, + "learning_rate": 1.7798579667973463e-06, + "loss": 0.47865352034568787, + "step": 2209 + }, + { + "epoch": 0.5095688263776804, + "grad_norm": 1.1589101806191746, + "learning_rate": 1.7796192571653162e-06, + "loss": 0.46073317527770996, + "step": 2210 + }, + { + "epoch": 0.5097994005072631, + "grad_norm": 1.1781605500578527, + "learning_rate": 1.7793804342077253e-06, + "loss": 0.5099648237228394, + "step": 2211 + }, + { + "epoch": 0.5100299746368457, + "grad_norm": 1.2319682423717142, + "learning_rate": 1.7791414979592903e-06, + "loss": 0.5436147451400757, + "step": 2212 + }, + { + "epoch": 0.5102605487664285, + "grad_norm": 1.2305699349330186, + "learning_rate": 1.7789024484547417e-06, + "loss": 0.5455893278121948, + "step": 2213 + }, + { + "epoch": 0.5104911228960111, + "grad_norm": 1.2918560641722026, + "learning_rate": 1.7786632857288284e-06, + "loss": 0.4886546730995178, + "step": 2214 + }, + { + "epoch": 0.5107216970255938, + "grad_norm": 1.1611199451436964, + "learning_rate": 1.778424009816315e-06, + "loss": 0.4793723225593567, + "step": 2215 + }, + { + "epoch": 0.5109522711551764, + "grad_norm": 1.3312189289078886, + "learning_rate": 1.7781846207519826e-06, + "loss": 0.5814248323440552, + "step": 2216 + }, + { + "epoch": 0.5111828452847591, + "grad_norm": 1.1560984097631717, + "learning_rate": 1.777945118570629e-06, + "loss": 0.5057421326637268, + "step": 2217 + }, + { + "epoch": 0.5114134194143417, + "grad_norm": 1.3009634347843195, + "learning_rate": 1.7777055033070682e-06, + "loss": 0.3913435935974121, + "step": 2218 + }, + { + "epoch": 0.5116439935439244, + "grad_norm": 0.9761581598604525, + "learning_rate": 1.7774657749961305e-06, + "loss": 0.4450770616531372, + "step": 2219 + }, + { + "epoch": 0.511874567673507, + "grad_norm": 1.731999332658399, + "learning_rate": 1.7772259336726636e-06, + "loss": 0.5164940357208252, + "step": 2220 + }, + { + "epoch": 0.5121051418030897, + "grad_norm": 1.257043827333845, + "learning_rate": 1.7769859793715298e-06, + "loss": 0.44231802225112915, + "step": 2221 + }, + { + "epoch": 0.5123357159326724, + "grad_norm": 1.2521439253976214, + "learning_rate": 1.7767459121276093e-06, + "loss": 0.516791820526123, + "step": 2222 + }, + { + "epoch": 0.5125662900622551, + "grad_norm": 1.2456616904380073, + "learning_rate": 1.7765057319757989e-06, + "loss": 0.4180450737476349, + "step": 2223 + }, + { + "epoch": 0.5127968641918377, + "grad_norm": 1.1350275613249636, + "learning_rate": 1.77626543895101e-06, + "loss": 0.49246734380722046, + "step": 2224 + }, + { + "epoch": 0.5130274383214203, + "grad_norm": 1.1582721424765736, + "learning_rate": 1.7760250330881728e-06, + "loss": 0.5058225393295288, + "step": 2225 + }, + { + "epoch": 0.513258012451003, + "grad_norm": 1.4118813849041838, + "learning_rate": 1.7757845144222321e-06, + "loss": 0.4752033054828644, + "step": 2226 + }, + { + "epoch": 0.5134885865805856, + "grad_norm": 1.2950831387397626, + "learning_rate": 1.77554388298815e-06, + "loss": 0.45163947343826294, + "step": 2227 + }, + { + "epoch": 0.5137191607101683, + "grad_norm": 1.387042973653302, + "learning_rate": 1.7753031388209044e-06, + "loss": 0.46295779943466187, + "step": 2228 + }, + { + "epoch": 0.5139497348397509, + "grad_norm": 1.2958875463664286, + "learning_rate": 1.7750622819554903e-06, + "loss": 0.5682947635650635, + "step": 2229 + }, + { + "epoch": 0.5141803089693336, + "grad_norm": 1.353052791820573, + "learning_rate": 1.7748213124269187e-06, + "loss": 0.4890878200531006, + "step": 2230 + }, + { + "epoch": 0.5144108830989162, + "grad_norm": 1.4612536503294715, + "learning_rate": 1.7745802302702164e-06, + "loss": 0.5952332615852356, + "step": 2231 + }, + { + "epoch": 0.514641457228499, + "grad_norm": 1.1928368431775584, + "learning_rate": 1.7743390355204278e-06, + "loss": 0.43224406242370605, + "step": 2232 + }, + { + "epoch": 0.5148720313580816, + "grad_norm": 1.1851533508030387, + "learning_rate": 1.7740977282126122e-06, + "loss": 0.5010303258895874, + "step": 2233 + }, + { + "epoch": 0.5151026054876643, + "grad_norm": 1.105983766082305, + "learning_rate": 1.7738563083818469e-06, + "loss": 0.5166633725166321, + "step": 2234 + }, + { + "epoch": 0.5153331796172469, + "grad_norm": 1.0533784617555741, + "learning_rate": 1.7736147760632245e-06, + "loss": 0.4748263359069824, + "step": 2235 + }, + { + "epoch": 0.5155637537468296, + "grad_norm": 0.9010011595528595, + "learning_rate": 1.773373131291854e-06, + "loss": 0.46462053060531616, + "step": 2236 + }, + { + "epoch": 0.5157943278764122, + "grad_norm": 1.1288843437350349, + "learning_rate": 1.7731313741028608e-06, + "loss": 0.47799748182296753, + "step": 2237 + }, + { + "epoch": 0.5160249020059949, + "grad_norm": 1.2958124494051022, + "learning_rate": 1.772889504531387e-06, + "loss": 0.43448662757873535, + "step": 2238 + }, + { + "epoch": 0.5162554761355775, + "grad_norm": 1.2781442130344307, + "learning_rate": 1.7726475226125905e-06, + "loss": 0.4609360098838806, + "step": 2239 + }, + { + "epoch": 0.5164860502651603, + "grad_norm": 1.123946418980165, + "learning_rate": 1.7724054283816463e-06, + "loss": 0.505261242389679, + "step": 2240 + }, + { + "epoch": 0.5167166243947429, + "grad_norm": 1.1143888709548355, + "learning_rate": 1.772163221873745e-06, + "loss": 0.3812851905822754, + "step": 2241 + }, + { + "epoch": 0.5169471985243256, + "grad_norm": 1.1698544335678498, + "learning_rate": 1.7719209031240938e-06, + "loss": 0.42545294761657715, + "step": 2242 + }, + { + "epoch": 0.5171777726539082, + "grad_norm": 1.3964979839005025, + "learning_rate": 1.771678472167916e-06, + "loss": 0.45135340094566345, + "step": 2243 + }, + { + "epoch": 0.5174083467834909, + "grad_norm": 1.1118819857040387, + "learning_rate": 1.7714359290404514e-06, + "loss": 0.4499250650405884, + "step": 2244 + }, + { + "epoch": 0.5176389209130735, + "grad_norm": 1.2793420965554383, + "learning_rate": 1.7711932737769564e-06, + "loss": 0.4355557858943939, + "step": 2245 + }, + { + "epoch": 0.5178694950426562, + "grad_norm": 1.3068878220482505, + "learning_rate": 1.7709505064127036e-06, + "loss": 0.4140744209289551, + "step": 2246 + }, + { + "epoch": 0.5181000691722388, + "grad_norm": 1.2538619837975196, + "learning_rate": 1.7707076269829809e-06, + "loss": 0.5108504891395569, + "step": 2247 + }, + { + "epoch": 0.5183306433018215, + "grad_norm": 1.0866593797381727, + "learning_rate": 1.7704646355230936e-06, + "loss": 0.5064615607261658, + "step": 2248 + }, + { + "epoch": 0.5185612174314042, + "grad_norm": 1.4034267264652582, + "learning_rate": 1.7702215320683636e-06, + "loss": 0.5922794342041016, + "step": 2249 + }, + { + "epoch": 0.5187917915609869, + "grad_norm": 1.236045367714828, + "learning_rate": 1.7699783166541279e-06, + "loss": 0.3890082836151123, + "step": 2250 + }, + { + "epoch": 0.5190223656905695, + "grad_norm": 1.1663861833023768, + "learning_rate": 1.7697349893157402e-06, + "loss": 0.5585668087005615, + "step": 2251 + }, + { + "epoch": 0.5192529398201522, + "grad_norm": 1.2125542528327162, + "learning_rate": 1.7694915500885706e-06, + "loss": 0.3904608488082886, + "step": 2252 + }, + { + "epoch": 0.5194835139497348, + "grad_norm": 1.3213509465151734, + "learning_rate": 1.7692479990080056e-06, + "loss": 0.4764491617679596, + "step": 2253 + }, + { + "epoch": 0.5197140880793175, + "grad_norm": 1.3113796870909902, + "learning_rate": 1.769004336109448e-06, + "loss": 0.49443554878234863, + "step": 2254 + }, + { + "epoch": 0.5199446622089001, + "grad_norm": 1.2196571448758133, + "learning_rate": 1.7687605614283165e-06, + "loss": 0.4679003357887268, + "step": 2255 + }, + { + "epoch": 0.5201752363384828, + "grad_norm": 1.6767016497784393, + "learning_rate": 1.7685166750000465e-06, + "loss": 0.6968683004379272, + "step": 2256 + }, + { + "epoch": 0.5204058104680654, + "grad_norm": 1.406455012631932, + "learning_rate": 1.7682726768600888e-06, + "loss": 0.5688217878341675, + "step": 2257 + }, + { + "epoch": 0.5206363845976482, + "grad_norm": 1.176050025614157, + "learning_rate": 1.7680285670439115e-06, + "loss": 0.4688011705875397, + "step": 2258 + }, + { + "epoch": 0.5208669587272308, + "grad_norm": 1.1772680288415673, + "learning_rate": 1.7677843455869984e-06, + "loss": 0.6447713971138, + "step": 2259 + }, + { + "epoch": 0.5210975328568135, + "grad_norm": 1.3187686937196665, + "learning_rate": 1.767540012524849e-06, + "loss": 0.578650951385498, + "step": 2260 + }, + { + "epoch": 0.5213281069863961, + "grad_norm": 1.4425748519700892, + "learning_rate": 1.76729556789298e-06, + "loss": 0.5001357197761536, + "step": 2261 + }, + { + "epoch": 0.5215586811159788, + "grad_norm": 1.2145912604177214, + "learning_rate": 1.7670510117269242e-06, + "loss": 0.5336331129074097, + "step": 2262 + }, + { + "epoch": 0.5217892552455614, + "grad_norm": 1.2105621787494676, + "learning_rate": 1.76680634406223e-06, + "loss": 0.5628900527954102, + "step": 2263 + }, + { + "epoch": 0.5220198293751441, + "grad_norm": 1.2476030455409495, + "learning_rate": 1.766561564934462e-06, + "loss": 0.46497443318367004, + "step": 2264 + }, + { + "epoch": 0.5222504035047267, + "grad_norm": 1.4921989012106511, + "learning_rate": 1.7663166743792019e-06, + "loss": 0.617607831954956, + "step": 2265 + }, + { + "epoch": 0.5224809776343095, + "grad_norm": 1.1582259137476871, + "learning_rate": 1.7660716724320468e-06, + "loss": 0.5236914157867432, + "step": 2266 + }, + { + "epoch": 0.5227115517638921, + "grad_norm": 1.2919028654437321, + "learning_rate": 1.76582655912861e-06, + "loss": 0.5527941584587097, + "step": 2267 + }, + { + "epoch": 0.5229421258934748, + "grad_norm": 1.208274388494889, + "learning_rate": 1.7655813345045218e-06, + "loss": 0.5394654273986816, + "step": 2268 + }, + { + "epoch": 0.5231727000230574, + "grad_norm": 1.1822216818330542, + "learning_rate": 1.7653359985954275e-06, + "loss": 0.47050246596336365, + "step": 2269 + }, + { + "epoch": 0.5234032741526401, + "grad_norm": 1.2893306401147882, + "learning_rate": 1.7650905514369894e-06, + "loss": 0.49413689970970154, + "step": 2270 + }, + { + "epoch": 0.5236338482822227, + "grad_norm": 1.3086960549802995, + "learning_rate": 1.7648449930648856e-06, + "loss": 0.5568829774856567, + "step": 2271 + }, + { + "epoch": 0.5238644224118054, + "grad_norm": 1.2475799557753502, + "learning_rate": 1.7645993235148107e-06, + "loss": 0.49238815903663635, + "step": 2272 + }, + { + "epoch": 0.524094996541388, + "grad_norm": 1.16612817534413, + "learning_rate": 1.7643535428224752e-06, + "loss": 0.5580959320068359, + "step": 2273 + }, + { + "epoch": 0.5243255706709707, + "grad_norm": 1.4921637909191205, + "learning_rate": 1.7641076510236052e-06, + "loss": 0.5853499174118042, + "step": 2274 + }, + { + "epoch": 0.5245561448005533, + "grad_norm": 1.3988944269011947, + "learning_rate": 1.7638616481539448e-06, + "loss": 0.5638653635978699, + "step": 2275 + }, + { + "epoch": 0.5247867189301361, + "grad_norm": 1.2859178438597552, + "learning_rate": 1.7636155342492521e-06, + "loss": 0.5197241306304932, + "step": 2276 + }, + { + "epoch": 0.5250172930597187, + "grad_norm": 1.1094174928372944, + "learning_rate": 1.7633693093453026e-06, + "loss": 0.4137725234031677, + "step": 2277 + }, + { + "epoch": 0.5252478671893014, + "grad_norm": 1.2940062745509122, + "learning_rate": 1.7631229734778872e-06, + "loss": 0.54244065284729, + "step": 2278 + }, + { + "epoch": 0.525478441318884, + "grad_norm": 1.1871875469955007, + "learning_rate": 1.7628765266828137e-06, + "loss": 0.5215432047843933, + "step": 2279 + }, + { + "epoch": 0.5257090154484667, + "grad_norm": 1.1984410258580116, + "learning_rate": 1.7626299689959057e-06, + "loss": 0.5559565424919128, + "step": 2280 + }, + { + "epoch": 0.5259395895780493, + "grad_norm": 1.1663711332671047, + "learning_rate": 1.7623833004530026e-06, + "loss": 0.5251328945159912, + "step": 2281 + }, + { + "epoch": 0.526170163707632, + "grad_norm": 1.241523894329925, + "learning_rate": 1.7621365210899598e-06, + "loss": 0.5351072549819946, + "step": 2282 + }, + { + "epoch": 0.5264007378372146, + "grad_norm": 1.1901641374825476, + "learning_rate": 1.7618896309426504e-06, + "loss": 0.46850037574768066, + "step": 2283 + }, + { + "epoch": 0.5266313119667974, + "grad_norm": 1.1697893294442419, + "learning_rate": 1.761642630046961e-06, + "loss": 0.5001033544540405, + "step": 2284 + }, + { + "epoch": 0.52686188609638, + "grad_norm": 0.9279299862604019, + "learning_rate": 1.7613955184387968e-06, + "loss": 0.47946250438690186, + "step": 2285 + }, + { + "epoch": 0.5270924602259627, + "grad_norm": 1.0539631796672029, + "learning_rate": 1.761148296154077e-06, + "loss": 0.4743049144744873, + "step": 2286 + }, + { + "epoch": 0.5273230343555453, + "grad_norm": 1.154224335020326, + "learning_rate": 1.7609009632287389e-06, + "loss": 0.4518652558326721, + "step": 2287 + }, + { + "epoch": 0.527553608485128, + "grad_norm": 1.0859896497705106, + "learning_rate": 1.7606535196987338e-06, + "loss": 0.5021224617958069, + "step": 2288 + }, + { + "epoch": 0.5277841826147106, + "grad_norm": 1.4832483769951506, + "learning_rate": 1.760405965600031e-06, + "loss": 0.4848078489303589, + "step": 2289 + }, + { + "epoch": 0.5280147567442933, + "grad_norm": 1.22421773905119, + "learning_rate": 1.7601583009686142e-06, + "loss": 0.49077051877975464, + "step": 2290 + }, + { + "epoch": 0.5282453308738759, + "grad_norm": 1.2916718452438969, + "learning_rate": 1.7599105258404848e-06, + "loss": 0.4802943468093872, + "step": 2291 + }, + { + "epoch": 0.5284759050034586, + "grad_norm": 1.4055248895326071, + "learning_rate": 1.7596626402516589e-06, + "loss": 0.5397455096244812, + "step": 2292 + }, + { + "epoch": 0.5287064791330413, + "grad_norm": 1.0497017336135974, + "learning_rate": 1.759414644238169e-06, + "loss": 0.478559672832489, + "step": 2293 + }, + { + "epoch": 0.528937053262624, + "grad_norm": 1.112359888255478, + "learning_rate": 1.7591665378360644e-06, + "loss": 0.5080797672271729, + "step": 2294 + }, + { + "epoch": 0.5291676273922066, + "grad_norm": 1.0468621326779766, + "learning_rate": 1.7589183210814093e-06, + "loss": 0.4959479868412018, + "step": 2295 + }, + { + "epoch": 0.5293982015217893, + "grad_norm": 1.1985868339045591, + "learning_rate": 1.7586699940102853e-06, + "loss": 0.512288510799408, + "step": 2296 + }, + { + "epoch": 0.5296287756513719, + "grad_norm": 1.1129893572343195, + "learning_rate": 1.7584215566587886e-06, + "loss": 0.525113046169281, + "step": 2297 + }, + { + "epoch": 0.5298593497809546, + "grad_norm": 1.2088844531850982, + "learning_rate": 1.7581730090630322e-06, + "loss": 0.3715069890022278, + "step": 2298 + }, + { + "epoch": 0.5300899239105372, + "grad_norm": 1.3852845244524983, + "learning_rate": 1.757924351259145e-06, + "loss": 0.5833072662353516, + "step": 2299 + }, + { + "epoch": 0.5303204980401199, + "grad_norm": 1.638098016270419, + "learning_rate": 1.7576755832832721e-06, + "loss": 0.5942450761795044, + "step": 2300 + }, + { + "epoch": 0.5305510721697025, + "grad_norm": 1.1523961468173722, + "learning_rate": 1.7574267051715745e-06, + "loss": 0.4754432737827301, + "step": 2301 + }, + { + "epoch": 0.5307816462992853, + "grad_norm": 1.3593694553922624, + "learning_rate": 1.7571777169602287e-06, + "loss": 0.5272700190544128, + "step": 2302 + }, + { + "epoch": 0.5310122204288679, + "grad_norm": 1.137089307163323, + "learning_rate": 1.7569286186854283e-06, + "loss": 0.48376554250717163, + "step": 2303 + }, + { + "epoch": 0.5312427945584506, + "grad_norm": 1.324023805933818, + "learning_rate": 1.7566794103833816e-06, + "loss": 0.4324077367782593, + "step": 2304 + }, + { + "epoch": 0.5314733686880332, + "grad_norm": 1.2843168925212602, + "learning_rate": 1.7564300920903142e-06, + "loss": 0.44939202070236206, + "step": 2305 + }, + { + "epoch": 0.5317039428176159, + "grad_norm": 1.2413807013846574, + "learning_rate": 1.7561806638424662e-06, + "loss": 0.5256277322769165, + "step": 2306 + }, + { + "epoch": 0.5319345169471985, + "grad_norm": 1.0855894350628046, + "learning_rate": 1.7559311256760955e-06, + "loss": 0.43901991844177246, + "step": 2307 + }, + { + "epoch": 0.5321650910767812, + "grad_norm": 1.3134089338347328, + "learning_rate": 1.7556814776274746e-06, + "loss": 0.5256138443946838, + "step": 2308 + }, + { + "epoch": 0.5323956652063638, + "grad_norm": 1.3769537654510517, + "learning_rate": 1.7554317197328922e-06, + "loss": 0.4664478600025177, + "step": 2309 + }, + { + "epoch": 0.5326262393359465, + "grad_norm": 1.1227476903728313, + "learning_rate": 1.7551818520286532e-06, + "loss": 0.5042726397514343, + "step": 2310 + }, + { + "epoch": 0.5328568134655292, + "grad_norm": 1.3417267355052607, + "learning_rate": 1.754931874551079e-06, + "loss": 0.5682350397109985, + "step": 2311 + }, + { + "epoch": 0.5330873875951119, + "grad_norm": 1.2416043105842551, + "learning_rate": 1.754681787336505e-06, + "loss": 0.5082807540893555, + "step": 2312 + }, + { + "epoch": 0.5333179617246945, + "grad_norm": 1.4255568276367208, + "learning_rate": 1.754431590421285e-06, + "loss": 0.6020215749740601, + "step": 2313 + }, + { + "epoch": 0.5335485358542772, + "grad_norm": 1.4104154799235167, + "learning_rate": 1.7541812838417877e-06, + "loss": 0.5004276633262634, + "step": 2314 + }, + { + "epoch": 0.5337791099838598, + "grad_norm": 1.060415170291065, + "learning_rate": 1.753930867634397e-06, + "loss": 0.4889993667602539, + "step": 2315 + }, + { + "epoch": 0.5340096841134425, + "grad_norm": 1.0849217066026469, + "learning_rate": 1.7536803418355141e-06, + "loss": 0.4179444909095764, + "step": 2316 + }, + { + "epoch": 0.5342402582430251, + "grad_norm": 1.2618059778728548, + "learning_rate": 1.7534297064815554e-06, + "loss": 0.46807605028152466, + "step": 2317 + }, + { + "epoch": 0.5344708323726078, + "grad_norm": 1.2827117317411258, + "learning_rate": 1.7531789616089528e-06, + "loss": 0.39173221588134766, + "step": 2318 + }, + { + "epoch": 0.5347014065021904, + "grad_norm": 1.2820357654319097, + "learning_rate": 1.7529281072541548e-06, + "loss": 0.4290514886379242, + "step": 2319 + }, + { + "epoch": 0.5349319806317732, + "grad_norm": 1.3778694052072273, + "learning_rate": 1.752677143453626e-06, + "loss": 0.6052347421646118, + "step": 2320 + }, + { + "epoch": 0.5351625547613558, + "grad_norm": 1.054542888313722, + "learning_rate": 1.752426070243846e-06, + "loss": 0.47622209787368774, + "step": 2321 + }, + { + "epoch": 0.5353931288909385, + "grad_norm": 1.128157779747108, + "learning_rate": 1.7521748876613112e-06, + "loss": 0.4216923415660858, + "step": 2322 + }, + { + "epoch": 0.5356237030205211, + "grad_norm": 2.0737049391078384, + "learning_rate": 1.751923595742533e-06, + "loss": 0.5527430772781372, + "step": 2323 + }, + { + "epoch": 0.5358542771501038, + "grad_norm": 1.1406433043117166, + "learning_rate": 1.75167219452404e-06, + "loss": 0.5562101602554321, + "step": 2324 + }, + { + "epoch": 0.5360848512796864, + "grad_norm": 1.2183539446117024, + "learning_rate": 1.7514206840423757e-06, + "loss": 0.546181321144104, + "step": 2325 + }, + { + "epoch": 0.5363154254092691, + "grad_norm": 1.5216852196360238, + "learning_rate": 1.7511690643340995e-06, + "loss": 0.5883532762527466, + "step": 2326 + }, + { + "epoch": 0.5365459995388517, + "grad_norm": 1.2667138111118152, + "learning_rate": 1.750917335435787e-06, + "loss": 0.5231350660324097, + "step": 2327 + }, + { + "epoch": 0.5367765736684345, + "grad_norm": 1.200525241411545, + "learning_rate": 1.7506654973840292e-06, + "loss": 0.4846429228782654, + "step": 2328 + }, + { + "epoch": 0.5370071477980171, + "grad_norm": 1.0815584734915895, + "learning_rate": 1.7504135502154335e-06, + "loss": 0.43692171573638916, + "step": 2329 + }, + { + "epoch": 0.5372377219275998, + "grad_norm": 1.0658062374834336, + "learning_rate": 1.7501614939666234e-06, + "loss": 0.5076167583465576, + "step": 2330 + }, + { + "epoch": 0.5374682960571824, + "grad_norm": 1.2658937157989252, + "learning_rate": 1.7499093286742373e-06, + "loss": 0.5302891135215759, + "step": 2331 + }, + { + "epoch": 0.5376988701867651, + "grad_norm": 1.3200406937261826, + "learning_rate": 1.7496570543749303e-06, + "loss": 0.5827817916870117, + "step": 2332 + }, + { + "epoch": 0.5379294443163477, + "grad_norm": 1.3684047155196064, + "learning_rate": 1.7494046711053726e-06, + "loss": 0.6765470504760742, + "step": 2333 + }, + { + "epoch": 0.5381600184459304, + "grad_norm": 1.3001315312834418, + "learning_rate": 1.7491521789022513e-06, + "loss": 0.48666322231292725, + "step": 2334 + }, + { + "epoch": 0.538390592575513, + "grad_norm": 1.0490910849362622, + "learning_rate": 1.7488995778022685e-06, + "loss": 0.5163695812225342, + "step": 2335 + }, + { + "epoch": 0.5386211667050956, + "grad_norm": 1.1765286879203154, + "learning_rate": 1.748646867842142e-06, + "loss": 0.44487982988357544, + "step": 2336 + }, + { + "epoch": 0.5388517408346783, + "grad_norm": 1.2992285046307706, + "learning_rate": 1.7483940490586058e-06, + "loss": 0.5512663722038269, + "step": 2337 + }, + { + "epoch": 0.539082314964261, + "grad_norm": 1.1533551829707172, + "learning_rate": 1.7481411214884098e-06, + "loss": 0.461128294467926, + "step": 2338 + }, + { + "epoch": 0.5393128890938437, + "grad_norm": 1.2239639921661383, + "learning_rate": 1.7478880851683197e-06, + "loss": 0.47291088104248047, + "step": 2339 + }, + { + "epoch": 0.5395434632234263, + "grad_norm": 1.1568837363453548, + "learning_rate": 1.747634940135117e-06, + "loss": 0.5900166034698486, + "step": 2340 + }, + { + "epoch": 0.539774037353009, + "grad_norm": 1.0385421801821113, + "learning_rate": 1.7473816864255983e-06, + "loss": 0.3878340721130371, + "step": 2341 + }, + { + "epoch": 0.5400046114825916, + "grad_norm": 1.442772155197814, + "learning_rate": 1.7471283240765775e-06, + "loss": 0.5671564340591431, + "step": 2342 + }, + { + "epoch": 0.5402351856121743, + "grad_norm": 1.1602673867587185, + "learning_rate": 1.7468748531248824e-06, + "loss": 0.5153918266296387, + "step": 2343 + }, + { + "epoch": 0.5404657597417569, + "grad_norm": 1.2187996046056446, + "learning_rate": 1.7466212736073585e-06, + "loss": 0.49520084261894226, + "step": 2344 + }, + { + "epoch": 0.5406963338713396, + "grad_norm": 1.0955374839449357, + "learning_rate": 1.7463675855608654e-06, + "loss": 0.4884970784187317, + "step": 2345 + }, + { + "epoch": 0.5409269080009222, + "grad_norm": 1.401002336922335, + "learning_rate": 1.7461137890222798e-06, + "loss": 0.5233277678489685, + "step": 2346 + }, + { + "epoch": 0.541157482130505, + "grad_norm": 1.272363275240415, + "learning_rate": 1.7458598840284928e-06, + "loss": 0.44011372327804565, + "step": 2347 + }, + { + "epoch": 0.5413880562600876, + "grad_norm": 1.1593134205382656, + "learning_rate": 1.745605870616413e-06, + "loss": 0.4833263158798218, + "step": 2348 + }, + { + "epoch": 0.5416186303896703, + "grad_norm": 1.186578949511732, + "learning_rate": 1.7453517488229634e-06, + "loss": 0.4852379262447357, + "step": 2349 + }, + { + "epoch": 0.5418492045192529, + "grad_norm": 1.527590855990685, + "learning_rate": 1.7450975186850831e-06, + "loss": 0.4710320830345154, + "step": 2350 + }, + { + "epoch": 0.5420797786488356, + "grad_norm": 1.4382691899722804, + "learning_rate": 1.744843180239727e-06, + "loss": 0.5144790410995483, + "step": 2351 + }, + { + "epoch": 0.5423103527784182, + "grad_norm": 1.3784898997392558, + "learning_rate": 1.7445887335238663e-06, + "loss": 0.5815445184707642, + "step": 2352 + }, + { + "epoch": 0.5425409269080009, + "grad_norm": 1.1629274836022288, + "learning_rate": 1.7443341785744864e-06, + "loss": 0.5101407170295715, + "step": 2353 + }, + { + "epoch": 0.5427715010375835, + "grad_norm": 1.1760272227987194, + "learning_rate": 1.7440795154285905e-06, + "loss": 0.4584839940071106, + "step": 2354 + }, + { + "epoch": 0.5430020751671663, + "grad_norm": 1.323122873632264, + "learning_rate": 1.743824744123196e-06, + "loss": 0.482247531414032, + "step": 2355 + }, + { + "epoch": 0.5432326492967489, + "grad_norm": 1.1361176263052393, + "learning_rate": 1.7435698646953364e-06, + "loss": 0.5503325462341309, + "step": 2356 + }, + { + "epoch": 0.5434632234263316, + "grad_norm": 1.2952580221197654, + "learning_rate": 1.7433148771820612e-06, + "loss": 0.4803489148616791, + "step": 2357 + }, + { + "epoch": 0.5436937975559142, + "grad_norm": 1.303291620807208, + "learning_rate": 1.7430597816204351e-06, + "loss": 0.5388872027397156, + "step": 2358 + }, + { + "epoch": 0.5439243716854969, + "grad_norm": 1.6209081192397237, + "learning_rate": 1.742804578047539e-06, + "loss": 0.512636125087738, + "step": 2359 + }, + { + "epoch": 0.5441549458150795, + "grad_norm": 1.5943501598581358, + "learning_rate": 1.7425492665004699e-06, + "loss": 0.49154865741729736, + "step": 2360 + }, + { + "epoch": 0.5443855199446622, + "grad_norm": 1.1498651594774036, + "learning_rate": 1.7422938470163389e-06, + "loss": 0.5185250639915466, + "step": 2361 + }, + { + "epoch": 0.5446160940742448, + "grad_norm": 1.5663688017502957, + "learning_rate": 1.7420383196322747e-06, + "loss": 0.5474511384963989, + "step": 2362 + }, + { + "epoch": 0.5448466682038275, + "grad_norm": 1.3465441719791955, + "learning_rate": 1.7417826843854202e-06, + "loss": 0.48212137818336487, + "step": 2363 + }, + { + "epoch": 0.5450772423334102, + "grad_norm": 1.1320785808666363, + "learning_rate": 1.7415269413129348e-06, + "loss": 0.47983086109161377, + "step": 2364 + }, + { + "epoch": 0.5453078164629929, + "grad_norm": 1.1314426678618292, + "learning_rate": 1.7412710904519932e-06, + "loss": 0.4935225546360016, + "step": 2365 + }, + { + "epoch": 0.5455383905925755, + "grad_norm": 1.2528535153373956, + "learning_rate": 1.7410151318397862e-06, + "loss": 0.5167664289474487, + "step": 2366 + }, + { + "epoch": 0.5457689647221582, + "grad_norm": 1.1782327982922274, + "learning_rate": 1.74075906551352e-06, + "loss": 0.5116056799888611, + "step": 2367 + }, + { + "epoch": 0.5459995388517408, + "grad_norm": 1.1184728717072068, + "learning_rate": 1.7405028915104158e-06, + "loss": 0.4709595739841461, + "step": 2368 + }, + { + "epoch": 0.5462301129813235, + "grad_norm": 1.560534410686712, + "learning_rate": 1.7402466098677118e-06, + "loss": 0.3989061117172241, + "step": 2369 + }, + { + "epoch": 0.5464606871109061, + "grad_norm": 1.1397817693321244, + "learning_rate": 1.739990220622661e-06, + "loss": 0.45720764994621277, + "step": 2370 + }, + { + "epoch": 0.5466912612404888, + "grad_norm": 1.6154705847610804, + "learning_rate": 1.739733723812532e-06, + "loss": 0.5865384936332703, + "step": 2371 + }, + { + "epoch": 0.5469218353700714, + "grad_norm": 1.3129437136284077, + "learning_rate": 1.7394771194746092e-06, + "loss": 0.4451501965522766, + "step": 2372 + }, + { + "epoch": 0.5471524094996542, + "grad_norm": 1.2213938230584949, + "learning_rate": 1.7392204076461928e-06, + "loss": 0.4628486633300781, + "step": 2373 + }, + { + "epoch": 0.5473829836292368, + "grad_norm": 1.2854198948482758, + "learning_rate": 1.7389635883645984e-06, + "loss": 0.4797760248184204, + "step": 2374 + }, + { + "epoch": 0.5476135577588195, + "grad_norm": 1.2890601616689177, + "learning_rate": 1.7387066616671571e-06, + "loss": 0.4716770648956299, + "step": 2375 + }, + { + "epoch": 0.5478441318884021, + "grad_norm": 1.071991179643841, + "learning_rate": 1.738449627591216e-06, + "loss": 0.504901647567749, + "step": 2376 + }, + { + "epoch": 0.5480747060179848, + "grad_norm": 1.259141194312177, + "learning_rate": 1.7381924861741375e-06, + "loss": 0.5248615145683289, + "step": 2377 + }, + { + "epoch": 0.5483052801475674, + "grad_norm": 1.1551298194401718, + "learning_rate": 1.7379352374532998e-06, + "loss": 0.41704076528549194, + "step": 2378 + }, + { + "epoch": 0.5485358542771501, + "grad_norm": 1.1093382819710802, + "learning_rate": 1.7376778814660966e-06, + "loss": 0.42278197407722473, + "step": 2379 + }, + { + "epoch": 0.5487664284067327, + "grad_norm": 1.3240414194175114, + "learning_rate": 1.7374204182499372e-06, + "loss": 0.4104729890823364, + "step": 2380 + }, + { + "epoch": 0.5489970025363154, + "grad_norm": 1.237574436817826, + "learning_rate": 1.7371628478422467e-06, + "loss": 0.5205684304237366, + "step": 2381 + }, + { + "epoch": 0.549227576665898, + "grad_norm": 1.2914374831424469, + "learning_rate": 1.7369051702804648e-06, + "loss": 0.4743306040763855, + "step": 2382 + }, + { + "epoch": 0.5494581507954808, + "grad_norm": 1.4263628155545096, + "learning_rate": 1.7366473856020486e-06, + "loss": 0.6324253678321838, + "step": 2383 + }, + { + "epoch": 0.5496887249250634, + "grad_norm": 1.2093119037905458, + "learning_rate": 1.736389493844469e-06, + "loss": 0.46466588973999023, + "step": 2384 + }, + { + "epoch": 0.5499192990546461, + "grad_norm": 1.257464863029373, + "learning_rate": 1.7361314950452136e-06, + "loss": 0.4117918014526367, + "step": 2385 + }, + { + "epoch": 0.5501498731842287, + "grad_norm": 1.0582357147304537, + "learning_rate": 1.7358733892417848e-06, + "loss": 0.40341615676879883, + "step": 2386 + }, + { + "epoch": 0.5503804473138114, + "grad_norm": 1.2083128590610215, + "learning_rate": 1.735615176471701e-06, + "loss": 0.642855167388916, + "step": 2387 + }, + { + "epoch": 0.550611021443394, + "grad_norm": 1.3821025749968947, + "learning_rate": 1.7353568567724959e-06, + "loss": 0.5490958094596863, + "step": 2388 + }, + { + "epoch": 0.5508415955729767, + "grad_norm": 1.0972882559163057, + "learning_rate": 1.7350984301817192e-06, + "loss": 0.5154834985733032, + "step": 2389 + }, + { + "epoch": 0.5510721697025593, + "grad_norm": 1.5156914347306212, + "learning_rate": 1.7348398967369358e-06, + "loss": 0.49488651752471924, + "step": 2390 + }, + { + "epoch": 0.5513027438321421, + "grad_norm": 1.097164324799634, + "learning_rate": 1.7345812564757257e-06, + "loss": 0.4211215674877167, + "step": 2391 + }, + { + "epoch": 0.5515333179617247, + "grad_norm": 1.1060429845011046, + "learning_rate": 1.7343225094356855e-06, + "loss": 0.41840964555740356, + "step": 2392 + }, + { + "epoch": 0.5517638920913074, + "grad_norm": 1.1213399734290006, + "learning_rate": 1.7340636556544264e-06, + "loss": 0.540780782699585, + "step": 2393 + }, + { + "epoch": 0.55199446622089, + "grad_norm": 1.328334535307567, + "learning_rate": 1.7338046951695754e-06, + "loss": 0.4967775046825409, + "step": 2394 + }, + { + "epoch": 0.5522250403504727, + "grad_norm": 1.337457775660936, + "learning_rate": 1.733545628018775e-06, + "loss": 0.5155577659606934, + "step": 2395 + }, + { + "epoch": 0.5524556144800553, + "grad_norm": 1.3409169497631646, + "learning_rate": 1.7332864542396832e-06, + "loss": 0.5106005072593689, + "step": 2396 + }, + { + "epoch": 0.552686188609638, + "grad_norm": 1.106469342539302, + "learning_rate": 1.7330271738699737e-06, + "loss": 0.3459712862968445, + "step": 2397 + }, + { + "epoch": 0.5529167627392206, + "grad_norm": 1.238811250755909, + "learning_rate": 1.7327677869473356e-06, + "loss": 0.4877927303314209, + "step": 2398 + }, + { + "epoch": 0.5531473368688034, + "grad_norm": 1.298959309949219, + "learning_rate": 1.7325082935094732e-06, + "loss": 0.5183857679367065, + "step": 2399 + }, + { + "epoch": 0.553377910998386, + "grad_norm": 1.1165163437308863, + "learning_rate": 1.7322486935941068e-06, + "loss": 0.4326491057872772, + "step": 2400 + }, + { + "epoch": 0.5536084851279687, + "grad_norm": 1.2472729786065346, + "learning_rate": 1.7319889872389716e-06, + "loss": 0.4688712954521179, + "step": 2401 + }, + { + "epoch": 0.5538390592575513, + "grad_norm": 1.2787851295656323, + "learning_rate": 1.7317291744818184e-06, + "loss": 0.4997788071632385, + "step": 2402 + }, + { + "epoch": 0.554069633387134, + "grad_norm": 1.3085189564145994, + "learning_rate": 1.731469255360414e-06, + "loss": 0.5271172523498535, + "step": 2403 + }, + { + "epoch": 0.5543002075167166, + "grad_norm": 1.3689434717845856, + "learning_rate": 1.73120922991254e-06, + "loss": 0.5339269042015076, + "step": 2404 + }, + { + "epoch": 0.5545307816462993, + "grad_norm": 1.2181123008680574, + "learning_rate": 1.7309490981759938e-06, + "loss": 0.47052568197250366, + "step": 2405 + }, + { + "epoch": 0.5547613557758819, + "grad_norm": 1.2508289898124627, + "learning_rate": 1.7306888601885885e-06, + "loss": 0.4112280309200287, + "step": 2406 + }, + { + "epoch": 0.5549919299054646, + "grad_norm": 1.1812487853939355, + "learning_rate": 1.730428515988152e-06, + "loss": 0.5473710298538208, + "step": 2407 + }, + { + "epoch": 0.5552225040350472, + "grad_norm": 1.6509587018432181, + "learning_rate": 1.7301680656125277e-06, + "loss": 0.5079115629196167, + "step": 2408 + }, + { + "epoch": 0.55545307816463, + "grad_norm": 1.193259996108104, + "learning_rate": 1.7299075090995755e-06, + "loss": 0.4805012345314026, + "step": 2409 + }, + { + "epoch": 0.5556836522942126, + "grad_norm": 1.1958830357632493, + "learning_rate": 1.729646846487169e-06, + "loss": 0.4657474756240845, + "step": 2410 + }, + { + "epoch": 0.5559142264237953, + "grad_norm": 1.2442110767414496, + "learning_rate": 1.729386077813199e-06, + "loss": 0.5887978076934814, + "step": 2411 + }, + { + "epoch": 0.5561448005533779, + "grad_norm": 1.0093517139206267, + "learning_rate": 1.7291252031155704e-06, + "loss": 0.43841421604156494, + "step": 2412 + }, + { + "epoch": 0.5563753746829606, + "grad_norm": 1.304380451031228, + "learning_rate": 1.728864222432204e-06, + "loss": 0.5026551485061646, + "step": 2413 + }, + { + "epoch": 0.5566059488125432, + "grad_norm": 1.2344100865196312, + "learning_rate": 1.728603135801036e-06, + "loss": 0.4525277614593506, + "step": 2414 + }, + { + "epoch": 0.5568365229421259, + "grad_norm": 1.3128956010351178, + "learning_rate": 1.7283419432600182e-06, + "loss": 0.4095644950866699, + "step": 2415 + }, + { + "epoch": 0.5570670970717085, + "grad_norm": 1.2351186073808627, + "learning_rate": 1.7280806448471173e-06, + "loss": 0.5098834037780762, + "step": 2416 + }, + { + "epoch": 0.5572976712012913, + "grad_norm": 0.9689174321932323, + "learning_rate": 1.7278192406003159e-06, + "loss": 0.42802777886390686, + "step": 2417 + }, + { + "epoch": 0.5575282453308739, + "grad_norm": 1.283644069549869, + "learning_rate": 1.7275577305576113e-06, + "loss": 0.5036378502845764, + "step": 2418 + }, + { + "epoch": 0.5577588194604566, + "grad_norm": 1.2960652355454445, + "learning_rate": 1.7272961147570175e-06, + "loss": 0.5324885249137878, + "step": 2419 + }, + { + "epoch": 0.5579893935900392, + "grad_norm": 1.6334614504341187, + "learning_rate": 1.727034393236562e-06, + "loss": 0.5763842463493347, + "step": 2420 + }, + { + "epoch": 0.5582199677196219, + "grad_norm": 1.343133312027108, + "learning_rate": 1.7267725660342895e-06, + "loss": 0.49291908740997314, + "step": 2421 + }, + { + "epoch": 0.5584505418492045, + "grad_norm": 1.651006143174213, + "learning_rate": 1.7265106331882588e-06, + "loss": 0.5114868879318237, + "step": 2422 + }, + { + "epoch": 0.5586811159787872, + "grad_norm": 1.1152807378164393, + "learning_rate": 1.7262485947365449e-06, + "loss": 0.42442530393600464, + "step": 2423 + }, + { + "epoch": 0.5589116901083698, + "grad_norm": 1.1309517905090323, + "learning_rate": 1.725986450717237e-06, + "loss": 0.3680551052093506, + "step": 2424 + }, + { + "epoch": 0.5591422642379525, + "grad_norm": 1.2183025106634426, + "learning_rate": 1.725724201168441e-06, + "loss": 0.5849576592445374, + "step": 2425 + }, + { + "epoch": 0.5593728383675352, + "grad_norm": 1.3597945996239442, + "learning_rate": 1.7254618461282773e-06, + "loss": 0.48919233679771423, + "step": 2426 + }, + { + "epoch": 0.5596034124971179, + "grad_norm": 1.1753552641156777, + "learning_rate": 1.7251993856348821e-06, + "loss": 0.4857720732688904, + "step": 2427 + }, + { + "epoch": 0.5598339866267005, + "grad_norm": 1.3324934167522995, + "learning_rate": 1.7249368197264062e-06, + "loss": 0.5106808543205261, + "step": 2428 + }, + { + "epoch": 0.5600645607562832, + "grad_norm": 1.305986731975411, + "learning_rate": 1.724674148441017e-06, + "loss": 0.500100314617157, + "step": 2429 + }, + { + "epoch": 0.5602951348858658, + "grad_norm": 1.226560051936561, + "learning_rate": 1.7244113718168957e-06, + "loss": 0.5389110445976257, + "step": 2430 + }, + { + "epoch": 0.5605257090154485, + "grad_norm": 1.2848731557614161, + "learning_rate": 1.72414848989224e-06, + "loss": 0.42860496044158936, + "step": 2431 + }, + { + "epoch": 0.5607562831450311, + "grad_norm": 1.2392935426075953, + "learning_rate": 1.723885502705262e-06, + "loss": 0.4867728352546692, + "step": 2432 + }, + { + "epoch": 0.5609868572746138, + "grad_norm": 1.215687300161219, + "learning_rate": 1.7236224102941899e-06, + "loss": 0.49194633960723877, + "step": 2433 + }, + { + "epoch": 0.5612174314041964, + "grad_norm": 1.278802988367442, + "learning_rate": 1.7233592126972667e-06, + "loss": 0.5194358229637146, + "step": 2434 + }, + { + "epoch": 0.5614480055337792, + "grad_norm": 1.518126298536734, + "learning_rate": 1.723095909952751e-06, + "loss": 0.4738645553588867, + "step": 2435 + }, + { + "epoch": 0.5616785796633618, + "grad_norm": 1.1842233457279843, + "learning_rate": 1.7228325020989165e-06, + "loss": 0.48232927918434143, + "step": 2436 + }, + { + "epoch": 0.5619091537929445, + "grad_norm": 1.0590325088103263, + "learning_rate": 1.7225689891740522e-06, + "loss": 0.5192145109176636, + "step": 2437 + }, + { + "epoch": 0.5621397279225271, + "grad_norm": 1.2756639382228332, + "learning_rate": 1.7223053712164621e-06, + "loss": 0.4934930205345154, + "step": 2438 + }, + { + "epoch": 0.5623703020521098, + "grad_norm": 1.294610704846241, + "learning_rate": 1.722041648264466e-06, + "loss": 0.5022200345993042, + "step": 2439 + }, + { + "epoch": 0.5626008761816924, + "grad_norm": 1.15319893327068, + "learning_rate": 1.7217778203563986e-06, + "loss": 0.45300528407096863, + "step": 2440 + }, + { + "epoch": 0.5628314503112751, + "grad_norm": 1.1335234735988557, + "learning_rate": 1.7215138875306103e-06, + "loss": 0.4965200126171112, + "step": 2441 + }, + { + "epoch": 0.5630620244408577, + "grad_norm": 1.3081789750993726, + "learning_rate": 1.721249849825466e-06, + "loss": 0.4618280231952667, + "step": 2442 + }, + { + "epoch": 0.5632925985704405, + "grad_norm": 1.255070715358214, + "learning_rate": 1.7209857072793464e-06, + "loss": 0.42270147800445557, + "step": 2443 + }, + { + "epoch": 0.5635231727000231, + "grad_norm": 1.0830436199918496, + "learning_rate": 1.720721459930647e-06, + "loss": 0.5200725793838501, + "step": 2444 + }, + { + "epoch": 0.5637537468296058, + "grad_norm": 1.1368018551382484, + "learning_rate": 1.7204571078177792e-06, + "loss": 0.47475337982177734, + "step": 2445 + }, + { + "epoch": 0.5639843209591884, + "grad_norm": 1.5482537414338693, + "learning_rate": 1.7201926509791693e-06, + "loss": 0.5493113994598389, + "step": 2446 + }, + { + "epoch": 0.564214895088771, + "grad_norm": 1.2861044506324582, + "learning_rate": 1.719928089453259e-06, + "loss": 0.4743562340736389, + "step": 2447 + }, + { + "epoch": 0.5644454692183537, + "grad_norm": 1.2343956116266135, + "learning_rate": 1.7196634232785038e-06, + "loss": 0.5145455598831177, + "step": 2448 + }, + { + "epoch": 0.5646760433479363, + "grad_norm": 1.5340568803714763, + "learning_rate": 1.719398652493377e-06, + "loss": 0.45072540640830994, + "step": 2449 + }, + { + "epoch": 0.564906617477519, + "grad_norm": 1.2363775684809537, + "learning_rate": 1.7191337771363651e-06, + "loss": 0.5150895714759827, + "step": 2450 + }, + { + "epoch": 0.5651371916071016, + "grad_norm": 1.4238500687035243, + "learning_rate": 1.7188687972459705e-06, + "loss": 0.5025302171707153, + "step": 2451 + }, + { + "epoch": 0.5653677657366843, + "grad_norm": 1.2149895801108108, + "learning_rate": 1.7186037128607107e-06, + "loss": 0.618930459022522, + "step": 2452 + }, + { + "epoch": 0.565598339866267, + "grad_norm": 1.1681250836374313, + "learning_rate": 1.7183385240191183e-06, + "loss": 0.5841591358184814, + "step": 2453 + }, + { + "epoch": 0.5658289139958497, + "grad_norm": 1.2481599814364495, + "learning_rate": 1.7180732307597413e-06, + "loss": 0.4915233850479126, + "step": 2454 + }, + { + "epoch": 0.5660594881254323, + "grad_norm": 1.127625184290067, + "learning_rate": 1.7178078331211429e-06, + "loss": 0.46732476353645325, + "step": 2455 + }, + { + "epoch": 0.566290062255015, + "grad_norm": 1.1121526599443385, + "learning_rate": 1.7175423311419013e-06, + "loss": 0.4640737771987915, + "step": 2456 + }, + { + "epoch": 0.5665206363845976, + "grad_norm": 1.2800685498732043, + "learning_rate": 1.7172767248606095e-06, + "loss": 0.39535683393478394, + "step": 2457 + }, + { + "epoch": 0.5667512105141803, + "grad_norm": 1.196636942462094, + "learning_rate": 1.7170110143158766e-06, + "loss": 0.4782179594039917, + "step": 2458 + }, + { + "epoch": 0.5669817846437629, + "grad_norm": 1.5731644028680265, + "learning_rate": 1.7167451995463258e-06, + "loss": 0.6186003684997559, + "step": 2459 + }, + { + "epoch": 0.5672123587733456, + "grad_norm": 1.3163111292704002, + "learning_rate": 1.7164792805905965e-06, + "loss": 0.4915347099304199, + "step": 2460 + }, + { + "epoch": 0.5674429329029282, + "grad_norm": 1.2683630708246802, + "learning_rate": 1.7162132574873422e-06, + "loss": 0.4789005517959595, + "step": 2461 + }, + { + "epoch": 0.567673507032511, + "grad_norm": 1.6928847577315913, + "learning_rate": 1.7159471302752326e-06, + "loss": 0.6307233572006226, + "step": 2462 + }, + { + "epoch": 0.5679040811620936, + "grad_norm": 1.240574680316347, + "learning_rate": 1.7156808989929514e-06, + "loss": 0.5278424024581909, + "step": 2463 + }, + { + "epoch": 0.5681346552916763, + "grad_norm": 1.4388020329709479, + "learning_rate": 1.7154145636791988e-06, + "loss": 0.48552995920181274, + "step": 2464 + }, + { + "epoch": 0.5683652294212589, + "grad_norm": 1.3679954470869684, + "learning_rate": 1.7151481243726885e-06, + "loss": 0.5125370621681213, + "step": 2465 + }, + { + "epoch": 0.5685958035508416, + "grad_norm": 1.3448408660581435, + "learning_rate": 1.7148815811121506e-06, + "loss": 0.44231730699539185, + "step": 2466 + }, + { + "epoch": 0.5688263776804242, + "grad_norm": 1.367567415522102, + "learning_rate": 1.7146149339363296e-06, + "loss": 0.5593529939651489, + "step": 2467 + }, + { + "epoch": 0.5690569518100069, + "grad_norm": 1.347377301704866, + "learning_rate": 1.714348182883986e-06, + "loss": 0.4830925464630127, + "step": 2468 + }, + { + "epoch": 0.5692875259395895, + "grad_norm": 1.4913136319748062, + "learning_rate": 1.714081327993894e-06, + "loss": 0.5538743734359741, + "step": 2469 + }, + { + "epoch": 0.5695181000691723, + "grad_norm": 1.4135532975212044, + "learning_rate": 1.7138143693048441e-06, + "loss": 0.5145905613899231, + "step": 2470 + }, + { + "epoch": 0.5697486741987549, + "grad_norm": 1.301183082915478, + "learning_rate": 1.713547306855641e-06, + "loss": 0.47706612944602966, + "step": 2471 + }, + { + "epoch": 0.5699792483283376, + "grad_norm": 1.2528774428968483, + "learning_rate": 1.7132801406851056e-06, + "loss": 0.45162689685821533, + "step": 2472 + }, + { + "epoch": 0.5702098224579202, + "grad_norm": 1.5721475156494655, + "learning_rate": 1.7130128708320727e-06, + "loss": 0.5141111612319946, + "step": 2473 + }, + { + "epoch": 0.5704403965875029, + "grad_norm": 1.0845779630695374, + "learning_rate": 1.7127454973353932e-06, + "loss": 0.4443173408508301, + "step": 2474 + }, + { + "epoch": 0.5706709707170855, + "grad_norm": 1.2704796440823871, + "learning_rate": 1.7124780202339317e-06, + "loss": 0.4162046015262604, + "step": 2475 + }, + { + "epoch": 0.5709015448466682, + "grad_norm": 1.100254820278883, + "learning_rate": 1.7122104395665695e-06, + "loss": 0.44526439905166626, + "step": 2476 + }, + { + "epoch": 0.5711321189762508, + "grad_norm": 1.3237501807128542, + "learning_rate": 1.7119427553722016e-06, + "loss": 0.5069452524185181, + "step": 2477 + }, + { + "epoch": 0.5713626931058335, + "grad_norm": 1.2833720010816703, + "learning_rate": 1.7116749676897393e-06, + "loss": 0.46709829568862915, + "step": 2478 + }, + { + "epoch": 0.5715932672354161, + "grad_norm": 1.2011083992406753, + "learning_rate": 1.7114070765581078e-06, + "loss": 0.5443992614746094, + "step": 2479 + }, + { + "epoch": 0.5718238413649989, + "grad_norm": 1.5805836267397864, + "learning_rate": 1.7111390820162477e-06, + "loss": 0.4307284653186798, + "step": 2480 + }, + { + "epoch": 0.5720544154945815, + "grad_norm": 1.272693158326629, + "learning_rate": 1.7108709841031148e-06, + "loss": 0.4753509759902954, + "step": 2481 + }, + { + "epoch": 0.5722849896241642, + "grad_norm": 1.3966851487133662, + "learning_rate": 1.7106027828576798e-06, + "loss": 0.5689436197280884, + "step": 2482 + }, + { + "epoch": 0.5725155637537468, + "grad_norm": 1.3535603859222731, + "learning_rate": 1.710334478318929e-06, + "loss": 0.47182410955429077, + "step": 2483 + }, + { + "epoch": 0.5727461378833295, + "grad_norm": 1.4415402220476166, + "learning_rate": 1.7100660705258623e-06, + "loss": 0.4418888986110687, + "step": 2484 + }, + { + "epoch": 0.5729767120129121, + "grad_norm": 1.0842485548099412, + "learning_rate": 1.709797559517496e-06, + "loss": 0.4315544366836548, + "step": 2485 + }, + { + "epoch": 0.5732072861424948, + "grad_norm": 1.136143164844157, + "learning_rate": 1.709528945332861e-06, + "loss": 0.34541741013526917, + "step": 2486 + }, + { + "epoch": 0.5734378602720774, + "grad_norm": 1.444798755487831, + "learning_rate": 1.709260228011003e-06, + "loss": 0.5380317568778992, + "step": 2487 + }, + { + "epoch": 0.5736684344016602, + "grad_norm": 1.1490218932398577, + "learning_rate": 1.7089914075909824e-06, + "loss": 0.5017478466033936, + "step": 2488 + }, + { + "epoch": 0.5738990085312428, + "grad_norm": 1.317791376396268, + "learning_rate": 1.7087224841118756e-06, + "loss": 0.5608090162277222, + "step": 2489 + }, + { + "epoch": 0.5741295826608255, + "grad_norm": 1.3491498137629283, + "learning_rate": 1.708453457612773e-06, + "loss": 0.5360782146453857, + "step": 2490 + }, + { + "epoch": 0.5743601567904081, + "grad_norm": 1.3100243824681166, + "learning_rate": 1.7081843281327802e-06, + "loss": 0.5638090372085571, + "step": 2491 + }, + { + "epoch": 0.5745907309199908, + "grad_norm": 1.2532603581217905, + "learning_rate": 1.707915095711018e-06, + "loss": 0.45777082443237305, + "step": 2492 + }, + { + "epoch": 0.5748213050495734, + "grad_norm": 1.2028357712850113, + "learning_rate": 1.7076457603866224e-06, + "loss": 0.5423707962036133, + "step": 2493 + }, + { + "epoch": 0.5750518791791561, + "grad_norm": 1.3752974790416335, + "learning_rate": 1.7073763221987436e-06, + "loss": 0.4286508560180664, + "step": 2494 + }, + { + "epoch": 0.5752824533087387, + "grad_norm": 1.1304014566480758, + "learning_rate": 1.7071067811865474e-06, + "loss": 0.4197548031806946, + "step": 2495 + }, + { + "epoch": 0.5755130274383214, + "grad_norm": 1.1820720623961845, + "learning_rate": 1.7068371373892142e-06, + "loss": 0.47944843769073486, + "step": 2496 + }, + { + "epoch": 0.575743601567904, + "grad_norm": 1.5454364363464301, + "learning_rate": 1.7065673908459396e-06, + "loss": 0.49708908796310425, + "step": 2497 + }, + { + "epoch": 0.5759741756974868, + "grad_norm": 1.2002677488287707, + "learning_rate": 1.706297541595934e-06, + "loss": 0.46402662992477417, + "step": 2498 + }, + { + "epoch": 0.5762047498270694, + "grad_norm": 1.2375577528106843, + "learning_rate": 1.7060275896784222e-06, + "loss": 0.4665846824645996, + "step": 2499 + }, + { + "epoch": 0.5764353239566521, + "grad_norm": 1.333335025499966, + "learning_rate": 1.7057575351326452e-06, + "loss": 0.511766791343689, + "step": 2500 + }, + { + "epoch": 0.5766658980862347, + "grad_norm": 1.3129729051878996, + "learning_rate": 1.7054873779978578e-06, + "loss": 0.5731323957443237, + "step": 2501 + }, + { + "epoch": 0.5768964722158174, + "grad_norm": 1.208575824869893, + "learning_rate": 1.70521711831333e-06, + "loss": 0.43246185779571533, + "step": 2502 + }, + { + "epoch": 0.5771270463454, + "grad_norm": 1.3743994267646191, + "learning_rate": 1.704946756118347e-06, + "loss": 0.5062395334243774, + "step": 2503 + }, + { + "epoch": 0.5773576204749827, + "grad_norm": 1.2169597850499592, + "learning_rate": 1.7046762914522087e-06, + "loss": 0.5010061264038086, + "step": 2504 + }, + { + "epoch": 0.5775881946045653, + "grad_norm": 1.1915100175955862, + "learning_rate": 1.7044057243542293e-06, + "loss": 0.5118759870529175, + "step": 2505 + }, + { + "epoch": 0.5778187687341481, + "grad_norm": 1.2406153903833703, + "learning_rate": 1.7041350548637392e-06, + "loss": 0.5796714425086975, + "step": 2506 + }, + { + "epoch": 0.5780493428637307, + "grad_norm": 1.198072830487735, + "learning_rate": 1.7038642830200828e-06, + "loss": 0.43587976694107056, + "step": 2507 + }, + { + "epoch": 0.5782799169933134, + "grad_norm": 1.0836383921827997, + "learning_rate": 1.7035934088626193e-06, + "loss": 0.4780135154724121, + "step": 2508 + }, + { + "epoch": 0.578510491122896, + "grad_norm": 1.2949967246283594, + "learning_rate": 1.7033224324307232e-06, + "loss": 0.48039600253105164, + "step": 2509 + }, + { + "epoch": 0.5787410652524787, + "grad_norm": 1.4288262034065056, + "learning_rate": 1.7030513537637835e-06, + "loss": 0.48075419664382935, + "step": 2510 + }, + { + "epoch": 0.5789716393820613, + "grad_norm": 1.294455603546607, + "learning_rate": 1.7027801729012044e-06, + "loss": 0.5006246566772461, + "step": 2511 + }, + { + "epoch": 0.579202213511644, + "grad_norm": 1.3239915881424993, + "learning_rate": 1.7025088898824046e-06, + "loss": 0.550139307975769, + "step": 2512 + }, + { + "epoch": 0.5794327876412266, + "grad_norm": 1.273345251271078, + "learning_rate": 1.7022375047468178e-06, + "loss": 0.5228495001792908, + "step": 2513 + }, + { + "epoch": 0.5796633617708093, + "grad_norm": 1.223108155250479, + "learning_rate": 1.701966017533893e-06, + "loss": 0.4783739149570465, + "step": 2514 + }, + { + "epoch": 0.579893935900392, + "grad_norm": 1.3364695116135945, + "learning_rate": 1.701694428283093e-06, + "loss": 0.47218769788742065, + "step": 2515 + }, + { + "epoch": 0.5801245100299747, + "grad_norm": 1.271458214482931, + "learning_rate": 1.7014227370338967e-06, + "loss": 0.5340671539306641, + "step": 2516 + }, + { + "epoch": 0.5803550841595573, + "grad_norm": 1.1389068048001012, + "learning_rate": 1.7011509438257967e-06, + "loss": 0.4629259407520294, + "step": 2517 + }, + { + "epoch": 0.58058565828914, + "grad_norm": 1.6036419177897663, + "learning_rate": 1.7008790486983013e-06, + "loss": 0.6334242820739746, + "step": 2518 + }, + { + "epoch": 0.5808162324187226, + "grad_norm": 1.3328081079482175, + "learning_rate": 1.7006070516909327e-06, + "loss": 0.544147789478302, + "step": 2519 + }, + { + "epoch": 0.5810468065483053, + "grad_norm": 1.2269860514972317, + "learning_rate": 1.700334952843229e-06, + "loss": 0.47045618295669556, + "step": 2520 + }, + { + "epoch": 0.5812773806778879, + "grad_norm": 1.4613594501045561, + "learning_rate": 1.700062752194742e-06, + "loss": 0.4582393169403076, + "step": 2521 + }, + { + "epoch": 0.5815079548074706, + "grad_norm": 1.335231293513905, + "learning_rate": 1.699790449785039e-06, + "loss": 0.507327139377594, + "step": 2522 + }, + { + "epoch": 0.5817385289370532, + "grad_norm": 1.3812182502399277, + "learning_rate": 1.6995180456537022e-06, + "loss": 0.5345891714096069, + "step": 2523 + }, + { + "epoch": 0.581969103066636, + "grad_norm": 1.3766088909590293, + "learning_rate": 1.6992455398403277e-06, + "loss": 0.4847550094127655, + "step": 2524 + }, + { + "epoch": 0.5821996771962186, + "grad_norm": 1.2694420906725428, + "learning_rate": 1.6989729323845276e-06, + "loss": 0.4472479820251465, + "step": 2525 + }, + { + "epoch": 0.5824302513258013, + "grad_norm": 1.1676894033843348, + "learning_rate": 1.698700223325928e-06, + "loss": 0.4426107108592987, + "step": 2526 + }, + { + "epoch": 0.5826608254553839, + "grad_norm": 1.3669509353012406, + "learning_rate": 1.6984274127041696e-06, + "loss": 0.4814276099205017, + "step": 2527 + }, + { + "epoch": 0.5828913995849666, + "grad_norm": 1.3849093780882, + "learning_rate": 1.6981545005589084e-06, + "loss": 0.5286451578140259, + "step": 2528 + }, + { + "epoch": 0.5831219737145492, + "grad_norm": 1.3586645163698117, + "learning_rate": 1.6978814869298152e-06, + "loss": 0.5291767120361328, + "step": 2529 + }, + { + "epoch": 0.5833525478441319, + "grad_norm": 1.4376369092272532, + "learning_rate": 1.6976083718565748e-06, + "loss": 0.5807399749755859, + "step": 2530 + }, + { + "epoch": 0.5835831219737145, + "grad_norm": 1.5620885730430554, + "learning_rate": 1.6973351553788878e-06, + "loss": 0.5489222407341003, + "step": 2531 + }, + { + "epoch": 0.5838136961032973, + "grad_norm": 1.5080367455114985, + "learning_rate": 1.6970618375364683e-06, + "loss": 0.5295521020889282, + "step": 2532 + }, + { + "epoch": 0.5840442702328799, + "grad_norm": 1.281498688581256, + "learning_rate": 1.6967884183690467e-06, + "loss": 0.4979495406150818, + "step": 2533 + }, + { + "epoch": 0.5842748443624626, + "grad_norm": 1.0681769287073983, + "learning_rate": 1.6965148979163661e-06, + "loss": 0.45667344331741333, + "step": 2534 + }, + { + "epoch": 0.5845054184920452, + "grad_norm": 1.1552847245372566, + "learning_rate": 1.6962412762181866e-06, + "loss": 0.42687737941741943, + "step": 2535 + }, + { + "epoch": 0.5847359926216279, + "grad_norm": 1.2720388462434997, + "learning_rate": 1.6959675533142815e-06, + "loss": 0.5616278648376465, + "step": 2536 + }, + { + "epoch": 0.5849665667512105, + "grad_norm": 1.245024966542371, + "learning_rate": 1.6956937292444386e-06, + "loss": 0.4961121678352356, + "step": 2537 + }, + { + "epoch": 0.5851971408807932, + "grad_norm": 1.1864554840937962, + "learning_rate": 1.6954198040484617e-06, + "loss": 0.5115770101547241, + "step": 2538 + }, + { + "epoch": 0.5854277150103758, + "grad_norm": 1.41778667190123, + "learning_rate": 1.6951457777661686e-06, + "loss": 0.540202260017395, + "step": 2539 + }, + { + "epoch": 0.5856582891399585, + "grad_norm": 1.3238570605319384, + "learning_rate": 1.6948716504373914e-06, + "loss": 0.5312114357948303, + "step": 2540 + }, + { + "epoch": 0.5858888632695411, + "grad_norm": 1.1842147435507233, + "learning_rate": 1.694597422101978e-06, + "loss": 0.49323517084121704, + "step": 2541 + }, + { + "epoch": 0.5861194373991239, + "grad_norm": 1.3138451660312804, + "learning_rate": 1.6943230927997894e-06, + "loss": 0.42929738759994507, + "step": 2542 + }, + { + "epoch": 0.5863500115287065, + "grad_norm": 1.2474057622168624, + "learning_rate": 1.6940486625707021e-06, + "loss": 0.45236462354660034, + "step": 2543 + }, + { + "epoch": 0.5865805856582892, + "grad_norm": 1.1944700996273265, + "learning_rate": 1.6937741314546084e-06, + "loss": 0.5129071474075317, + "step": 2544 + }, + { + "epoch": 0.5868111597878718, + "grad_norm": 1.303867373152147, + "learning_rate": 1.693499499491413e-06, + "loss": 0.5562577247619629, + "step": 2545 + }, + { + "epoch": 0.5870417339174545, + "grad_norm": 1.472236761409707, + "learning_rate": 1.6932247667210372e-06, + "loss": 0.5593177080154419, + "step": 2546 + }, + { + "epoch": 0.5872723080470371, + "grad_norm": 1.666463518969871, + "learning_rate": 1.692949933183416e-06, + "loss": 0.5536680221557617, + "step": 2547 + }, + { + "epoch": 0.5875028821766198, + "grad_norm": 1.552275933236934, + "learning_rate": 1.6926749989184993e-06, + "loss": 0.5523338317871094, + "step": 2548 + }, + { + "epoch": 0.5877334563062024, + "grad_norm": 1.3066438958077835, + "learning_rate": 1.692399963966251e-06, + "loss": 0.41815924644470215, + "step": 2549 + }, + { + "epoch": 0.5879640304357852, + "grad_norm": 1.1800035534558937, + "learning_rate": 1.6921248283666508e-06, + "loss": 0.46959248185157776, + "step": 2550 + }, + { + "epoch": 0.5881946045653678, + "grad_norm": 1.2343992191174948, + "learning_rate": 1.6918495921596928e-06, + "loss": 0.4748489260673523, + "step": 2551 + }, + { + "epoch": 0.5884251786949505, + "grad_norm": 1.853505775613954, + "learning_rate": 1.6915742553853845e-06, + "loss": 0.4541524052619934, + "step": 2552 + }, + { + "epoch": 0.5886557528245331, + "grad_norm": 1.2688298570187295, + "learning_rate": 1.691298818083749e-06, + "loss": 0.47106000781059265, + "step": 2553 + }, + { + "epoch": 0.5888863269541158, + "grad_norm": 1.6112122400264717, + "learning_rate": 1.6910232802948246e-06, + "loss": 0.5364842414855957, + "step": 2554 + }, + { + "epoch": 0.5891169010836984, + "grad_norm": 1.402469759006704, + "learning_rate": 1.690747642058663e-06, + "loss": 0.48388350009918213, + "step": 2555 + }, + { + "epoch": 0.5893474752132811, + "grad_norm": 1.1992143425994695, + "learning_rate": 1.690471903415331e-06, + "loss": 0.5075609683990479, + "step": 2556 + }, + { + "epoch": 0.5895780493428637, + "grad_norm": 1.2039147901396619, + "learning_rate": 1.6901960644049102e-06, + "loss": 0.45098066329956055, + "step": 2557 + }, + { + "epoch": 0.5898086234724463, + "grad_norm": 1.1869247135212617, + "learning_rate": 1.6899201250674966e-06, + "loss": 0.5329077243804932, + "step": 2558 + }, + { + "epoch": 0.590039197602029, + "grad_norm": 1.2771607201573625, + "learning_rate": 1.6896440854432005e-06, + "loss": 0.4632904529571533, + "step": 2559 + }, + { + "epoch": 0.5902697717316117, + "grad_norm": 1.3016593794447966, + "learning_rate": 1.6893679455721474e-06, + "loss": 0.5302451848983765, + "step": 2560 + }, + { + "epoch": 0.5905003458611944, + "grad_norm": 1.1349040723062418, + "learning_rate": 1.6890917054944768e-06, + "loss": 0.45363447070121765, + "step": 2561 + }, + { + "epoch": 0.590730919990777, + "grad_norm": 1.3869965053274627, + "learning_rate": 1.688815365250343e-06, + "loss": 0.5103914737701416, + "step": 2562 + }, + { + "epoch": 0.5909614941203597, + "grad_norm": 1.2859854063949494, + "learning_rate": 1.6885389248799152e-06, + "loss": 0.45474469661712646, + "step": 2563 + }, + { + "epoch": 0.5911920682499423, + "grad_norm": 1.3905925832105772, + "learning_rate": 1.6882623844233766e-06, + "loss": 0.517952024936676, + "step": 2564 + }, + { + "epoch": 0.591422642379525, + "grad_norm": 1.456181517852448, + "learning_rate": 1.6879857439209245e-06, + "loss": 0.4872232973575592, + "step": 2565 + }, + { + "epoch": 0.5916532165091076, + "grad_norm": 1.146992588808451, + "learning_rate": 1.6877090034127726e-06, + "loss": 0.4938408136367798, + "step": 2566 + }, + { + "epoch": 0.5918837906386903, + "grad_norm": 0.9819996395503116, + "learning_rate": 1.6874321629391469e-06, + "loss": 0.42687565088272095, + "step": 2567 + }, + { + "epoch": 0.592114364768273, + "grad_norm": 1.8882181325825955, + "learning_rate": 1.6871552225402896e-06, + "loss": 0.5272493362426758, + "step": 2568 + }, + { + "epoch": 0.5923449388978557, + "grad_norm": 1.265485903227574, + "learning_rate": 1.6868781822564565e-06, + "loss": 0.4643193185329437, + "step": 2569 + }, + { + "epoch": 0.5925755130274383, + "grad_norm": 1.5054555077342378, + "learning_rate": 1.6866010421279183e-06, + "loss": 0.4957782030105591, + "step": 2570 + }, + { + "epoch": 0.592806087157021, + "grad_norm": 1.2319191303045371, + "learning_rate": 1.6863238021949605e-06, + "loss": 0.442360520362854, + "step": 2571 + }, + { + "epoch": 0.5930366612866036, + "grad_norm": 1.365610357460579, + "learning_rate": 1.6860464624978824e-06, + "loss": 0.5108935832977295, + "step": 2572 + }, + { + "epoch": 0.5932672354161863, + "grad_norm": 1.1047616502548026, + "learning_rate": 1.6857690230769976e-06, + "loss": 0.46559715270996094, + "step": 2573 + }, + { + "epoch": 0.5934978095457689, + "grad_norm": 1.2296310276846145, + "learning_rate": 1.6854914839726356e-06, + "loss": 0.44752076268196106, + "step": 2574 + }, + { + "epoch": 0.5937283836753516, + "grad_norm": 1.6735698653712807, + "learning_rate": 1.6852138452251387e-06, + "loss": 0.4018149971961975, + "step": 2575 + }, + { + "epoch": 0.5939589578049342, + "grad_norm": 1.407358523561205, + "learning_rate": 1.6849361068748652e-06, + "loss": 0.47711417078971863, + "step": 2576 + }, + { + "epoch": 0.594189531934517, + "grad_norm": 1.3386417354625197, + "learning_rate": 1.684658268962187e-06, + "loss": 0.4671875834465027, + "step": 2577 + }, + { + "epoch": 0.5944201060640996, + "grad_norm": 1.2780841808458634, + "learning_rate": 1.6843803315274906e-06, + "loss": 0.48041921854019165, + "step": 2578 + }, + { + "epoch": 0.5946506801936823, + "grad_norm": 1.105183308056311, + "learning_rate": 1.6841022946111772e-06, + "loss": 0.3444385528564453, + "step": 2579 + }, + { + "epoch": 0.5948812543232649, + "grad_norm": 1.3054472047651338, + "learning_rate": 1.6838241582536619e-06, + "loss": 0.46800029277801514, + "step": 2580 + }, + { + "epoch": 0.5951118284528476, + "grad_norm": 1.7022638621771704, + "learning_rate": 1.683545922495375e-06, + "loss": 0.4362339377403259, + "step": 2581 + }, + { + "epoch": 0.5953424025824302, + "grad_norm": 1.5138702229312708, + "learning_rate": 1.6832675873767606e-06, + "loss": 0.4818536043167114, + "step": 2582 + }, + { + "epoch": 0.5955729767120129, + "grad_norm": 1.1464685816902647, + "learning_rate": 1.6829891529382775e-06, + "loss": 0.47899681329727173, + "step": 2583 + }, + { + "epoch": 0.5958035508415955, + "grad_norm": 1.028545290493661, + "learning_rate": 1.6827106192203995e-06, + "loss": 0.4239576458930969, + "step": 2584 + }, + { + "epoch": 0.5960341249711782, + "grad_norm": 1.299757224081726, + "learning_rate": 1.6824319862636136e-06, + "loss": 0.545168399810791, + "step": 2585 + }, + { + "epoch": 0.5962646991007609, + "grad_norm": 1.1433294908143323, + "learning_rate": 1.6821532541084228e-06, + "loss": 0.4238642156124115, + "step": 2586 + }, + { + "epoch": 0.5964952732303436, + "grad_norm": 1.1214453575304018, + "learning_rate": 1.6818744227953422e-06, + "loss": 0.39589810371398926, + "step": 2587 + }, + { + "epoch": 0.5967258473599262, + "grad_norm": 1.1696584305728281, + "learning_rate": 1.6815954923649044e-06, + "loss": 0.4358367919921875, + "step": 2588 + }, + { + "epoch": 0.5969564214895089, + "grad_norm": 1.232714944175718, + "learning_rate": 1.6813164628576538e-06, + "loss": 0.5012080073356628, + "step": 2589 + }, + { + "epoch": 0.5971869956190915, + "grad_norm": 1.0762630624781258, + "learning_rate": 1.6810373343141503e-06, + "loss": 0.4637286365032196, + "step": 2590 + }, + { + "epoch": 0.5974175697486742, + "grad_norm": 1.4947457348694884, + "learning_rate": 1.6807581067749684e-06, + "loss": 0.6130828261375427, + "step": 2591 + }, + { + "epoch": 0.5976481438782568, + "grad_norm": 1.538167494741888, + "learning_rate": 1.680478780280696e-06, + "loss": 0.5430021286010742, + "step": 2592 + }, + { + "epoch": 0.5978787180078395, + "grad_norm": 1.4318445545867842, + "learning_rate": 1.6801993548719368e-06, + "loss": 0.5195741653442383, + "step": 2593 + }, + { + "epoch": 0.5981092921374221, + "grad_norm": 1.4741188457279395, + "learning_rate": 1.6799198305893077e-06, + "loss": 0.5452337265014648, + "step": 2594 + }, + { + "epoch": 0.5983398662670049, + "grad_norm": 1.1858829095847359, + "learning_rate": 1.6796402074734402e-06, + "loss": 0.4802110493183136, + "step": 2595 + }, + { + "epoch": 0.5985704403965875, + "grad_norm": 1.114234548006963, + "learning_rate": 1.679360485564981e-06, + "loss": 0.48554790019989014, + "step": 2596 + }, + { + "epoch": 0.5988010145261702, + "grad_norm": 1.3519600489481014, + "learning_rate": 1.6790806649045896e-06, + "loss": 0.5151324272155762, + "step": 2597 + }, + { + "epoch": 0.5990315886557528, + "grad_norm": 1.4134149785589025, + "learning_rate": 1.6788007455329419e-06, + "loss": 0.5122699737548828, + "step": 2598 + }, + { + "epoch": 0.5992621627853355, + "grad_norm": 1.0762809832802989, + "learning_rate": 1.6785207274907258e-06, + "loss": 0.47776496410369873, + "step": 2599 + }, + { + "epoch": 0.5994927369149181, + "grad_norm": 1.3625217888513212, + "learning_rate": 1.6782406108186455e-06, + "loss": 0.5653492212295532, + "step": 2600 + }, + { + "epoch": 0.5997233110445008, + "grad_norm": 1.2197147141619178, + "learning_rate": 1.677960395557419e-06, + "loss": 0.44313424825668335, + "step": 2601 + }, + { + "epoch": 0.5999538851740834, + "grad_norm": 1.137470066753919, + "learning_rate": 1.677680081747778e-06, + "loss": 0.40465259552001953, + "step": 2602 + }, + { + "epoch": 0.6001844593036662, + "grad_norm": 1.4481779333184874, + "learning_rate": 1.6773996694304687e-06, + "loss": 0.5488068461418152, + "step": 2603 + }, + { + "epoch": 0.6004150334332488, + "grad_norm": 1.2545703783665254, + "learning_rate": 1.6771191586462523e-06, + "loss": 0.5122859477996826, + "step": 2604 + }, + { + "epoch": 0.6006456075628315, + "grad_norm": 1.2685821503383574, + "learning_rate": 1.6768385494359039e-06, + "loss": 0.47173869609832764, + "step": 2605 + }, + { + "epoch": 0.6008761816924141, + "grad_norm": 1.342808103655164, + "learning_rate": 1.6765578418402129e-06, + "loss": 0.527764081954956, + "step": 2606 + }, + { + "epoch": 0.6011067558219968, + "grad_norm": 1.7106657610470863, + "learning_rate": 1.6762770358999826e-06, + "loss": 0.5399610996246338, + "step": 2607 + }, + { + "epoch": 0.6013373299515794, + "grad_norm": 1.1677908773060481, + "learning_rate": 1.6759961316560314e-06, + "loss": 0.3441581428050995, + "step": 2608 + }, + { + "epoch": 0.6015679040811621, + "grad_norm": 1.2546350672529525, + "learning_rate": 1.6757151291491916e-06, + "loss": 0.5027580857276917, + "step": 2609 + }, + { + "epoch": 0.6017984782107447, + "grad_norm": 1.6099655975362483, + "learning_rate": 1.6754340284203095e-06, + "loss": 0.3898310363292694, + "step": 2610 + }, + { + "epoch": 0.6020290523403274, + "grad_norm": 1.5075448921993653, + "learning_rate": 1.675152829510246e-06, + "loss": 0.5577199459075928, + "step": 2611 + }, + { + "epoch": 0.60225962646991, + "grad_norm": 1.178797634573082, + "learning_rate": 1.6748715324598763e-06, + "loss": 0.47849035263061523, + "step": 2612 + }, + { + "epoch": 0.6024902005994928, + "grad_norm": 1.2674537093214957, + "learning_rate": 1.6745901373100896e-06, + "loss": 0.46845290064811707, + "step": 2613 + }, + { + "epoch": 0.6027207747290754, + "grad_norm": 1.4078882858329094, + "learning_rate": 1.6743086441017899e-06, + "loss": 0.46008870005607605, + "step": 2614 + }, + { + "epoch": 0.6029513488586581, + "grad_norm": 1.3347721564783812, + "learning_rate": 1.6740270528758948e-06, + "loss": 0.44386154413223267, + "step": 2615 + }, + { + "epoch": 0.6031819229882407, + "grad_norm": 1.2103476019651458, + "learning_rate": 1.6737453636733364e-06, + "loss": 0.495368629693985, + "step": 2616 + }, + { + "epoch": 0.6034124971178234, + "grad_norm": 1.257056760083973, + "learning_rate": 1.6734635765350613e-06, + "loss": 0.519428551197052, + "step": 2617 + }, + { + "epoch": 0.603643071247406, + "grad_norm": 1.5181965589957365, + "learning_rate": 1.6731816915020302e-06, + "loss": 0.49346470832824707, + "step": 2618 + }, + { + "epoch": 0.6038736453769887, + "grad_norm": 1.3323089431428572, + "learning_rate": 1.6728997086152173e-06, + "loss": 0.554854691028595, + "step": 2619 + }, + { + "epoch": 0.6041042195065713, + "grad_norm": 1.503361315997137, + "learning_rate": 1.6726176279156125e-06, + "loss": 0.4930881857872009, + "step": 2620 + }, + { + "epoch": 0.604334793636154, + "grad_norm": 1.1576996092953873, + "learning_rate": 1.6723354494442186e-06, + "loss": 0.4082447588443756, + "step": 2621 + }, + { + "epoch": 0.6045653677657367, + "grad_norm": 1.2572245396068074, + "learning_rate": 1.6720531732420531e-06, + "loss": 0.5151821374893188, + "step": 2622 + }, + { + "epoch": 0.6047959418953194, + "grad_norm": 1.6316483356509275, + "learning_rate": 1.671770799350148e-06, + "loss": 0.44579264521598816, + "step": 2623 + }, + { + "epoch": 0.605026516024902, + "grad_norm": 1.5349454914737826, + "learning_rate": 1.6714883278095489e-06, + "loss": 0.4937717020511627, + "step": 2624 + }, + { + "epoch": 0.6052570901544847, + "grad_norm": 1.4939841287703146, + "learning_rate": 1.671205758661316e-06, + "loss": 0.46298685669898987, + "step": 2625 + }, + { + "epoch": 0.6054876642840673, + "grad_norm": 1.3089529059854432, + "learning_rate": 1.6709230919465233e-06, + "loss": 0.5535221695899963, + "step": 2626 + }, + { + "epoch": 0.60571823841365, + "grad_norm": 1.2781536932155106, + "learning_rate": 1.6706403277062599e-06, + "loss": 0.5289112329483032, + "step": 2627 + }, + { + "epoch": 0.6059488125432326, + "grad_norm": 1.2619858231183905, + "learning_rate": 1.6703574659816285e-06, + "loss": 0.506280779838562, + "step": 2628 + }, + { + "epoch": 0.6061793866728153, + "grad_norm": 1.366142383501645, + "learning_rate": 1.6700745068137451e-06, + "loss": 0.504257082939148, + "step": 2629 + }, + { + "epoch": 0.606409960802398, + "grad_norm": 1.2835196483556859, + "learning_rate": 1.6697914502437411e-06, + "loss": 0.624682605266571, + "step": 2630 + }, + { + "epoch": 0.6066405349319807, + "grad_norm": 1.1715096985967743, + "learning_rate": 1.6695082963127617e-06, + "loss": 0.4539645314216614, + "step": 2631 + }, + { + "epoch": 0.6068711090615633, + "grad_norm": 1.2852717924915888, + "learning_rate": 1.6692250450619665e-06, + "loss": 0.5461890697479248, + "step": 2632 + }, + { + "epoch": 0.607101683191146, + "grad_norm": 1.2251930368732282, + "learning_rate": 1.6689416965325282e-06, + "loss": 0.615606427192688, + "step": 2633 + }, + { + "epoch": 0.6073322573207286, + "grad_norm": 1.3904526684847855, + "learning_rate": 1.668658250765635e-06, + "loss": 0.5355387926101685, + "step": 2634 + }, + { + "epoch": 0.6075628314503113, + "grad_norm": 1.1464900003631002, + "learning_rate": 1.6683747078024886e-06, + "loss": 0.5804985165596008, + "step": 2635 + }, + { + "epoch": 0.6077934055798939, + "grad_norm": 1.1983123193544134, + "learning_rate": 1.6680910676843042e-06, + "loss": 0.4514031410217285, + "step": 2636 + }, + { + "epoch": 0.6080239797094766, + "grad_norm": 1.3446092692413514, + "learning_rate": 1.6678073304523123e-06, + "loss": 0.5621001720428467, + "step": 2637 + }, + { + "epoch": 0.6082545538390592, + "grad_norm": 1.3749875179413227, + "learning_rate": 1.667523496147757e-06, + "loss": 0.49387669563293457, + "step": 2638 + }, + { + "epoch": 0.608485127968642, + "grad_norm": 1.0479438264918854, + "learning_rate": 1.6672395648118966e-06, + "loss": 0.5857938528060913, + "step": 2639 + }, + { + "epoch": 0.6087157020982246, + "grad_norm": 1.149056345239141, + "learning_rate": 1.6669555364860029e-06, + "loss": 0.46403199434280396, + "step": 2640 + }, + { + "epoch": 0.6089462762278073, + "grad_norm": 1.2068025098167319, + "learning_rate": 1.6666714112113627e-06, + "loss": 0.4998488128185272, + "step": 2641 + }, + { + "epoch": 0.6091768503573899, + "grad_norm": 1.3686546841392573, + "learning_rate": 1.6663871890292765e-06, + "loss": 0.6291745901107788, + "step": 2642 + }, + { + "epoch": 0.6094074244869726, + "grad_norm": 1.7034971765108011, + "learning_rate": 1.6661028699810587e-06, + "loss": 0.6326058506965637, + "step": 2643 + }, + { + "epoch": 0.6096379986165552, + "grad_norm": 1.2748339439376004, + "learning_rate": 1.6658184541080378e-06, + "loss": 0.5737805366516113, + "step": 2644 + }, + { + "epoch": 0.6098685727461379, + "grad_norm": 1.435593858390691, + "learning_rate": 1.6655339414515568e-06, + "loss": 0.565047025680542, + "step": 2645 + }, + { + "epoch": 0.6100991468757205, + "grad_norm": 1.154269897254632, + "learning_rate": 1.6652493320529724e-06, + "loss": 0.5157296061515808, + "step": 2646 + }, + { + "epoch": 0.6103297210053032, + "grad_norm": 1.2671967095996914, + "learning_rate": 1.6649646259536554e-06, + "loss": 0.4475112855434418, + "step": 2647 + }, + { + "epoch": 0.6105602951348859, + "grad_norm": 1.4397592539357233, + "learning_rate": 1.6646798231949911e-06, + "loss": 0.5072107315063477, + "step": 2648 + }, + { + "epoch": 0.6107908692644686, + "grad_norm": 1.3901386223871963, + "learning_rate": 1.6643949238183778e-06, + "loss": 0.44673952460289, + "step": 2649 + }, + { + "epoch": 0.6110214433940512, + "grad_norm": 1.4046630639478026, + "learning_rate": 1.6641099278652293e-06, + "loss": 0.47460734844207764, + "step": 2650 + }, + { + "epoch": 0.6112520175236339, + "grad_norm": 1.251836663583678, + "learning_rate": 1.6638248353769718e-06, + "loss": 0.4529770612716675, + "step": 2651 + }, + { + "epoch": 0.6114825916532165, + "grad_norm": 1.4298404685971746, + "learning_rate": 1.6635396463950473e-06, + "loss": 0.5200958251953125, + "step": 2652 + }, + { + "epoch": 0.6117131657827992, + "grad_norm": 1.4871792439140996, + "learning_rate": 1.66325436096091e-06, + "loss": 0.465969979763031, + "step": 2653 + }, + { + "epoch": 0.6119437399123818, + "grad_norm": 1.1085493213804483, + "learning_rate": 1.6629689791160298e-06, + "loss": 0.5173276662826538, + "step": 2654 + }, + { + "epoch": 0.6121743140419645, + "grad_norm": 1.246647464420017, + "learning_rate": 1.6626835009018892e-06, + "loss": 0.5539907217025757, + "step": 2655 + }, + { + "epoch": 0.6124048881715471, + "grad_norm": 1.1686862955670068, + "learning_rate": 1.6623979263599857e-06, + "loss": 0.5617278814315796, + "step": 2656 + }, + { + "epoch": 0.6126354623011299, + "grad_norm": 1.3640942620216159, + "learning_rate": 1.6621122555318304e-06, + "loss": 0.46238285303115845, + "step": 2657 + }, + { + "epoch": 0.6128660364307125, + "grad_norm": 1.4695540598112733, + "learning_rate": 1.6618264884589484e-06, + "loss": 0.49247878789901733, + "step": 2658 + }, + { + "epoch": 0.6130966105602952, + "grad_norm": 1.0811892876151687, + "learning_rate": 1.6615406251828793e-06, + "loss": 0.4844072163105011, + "step": 2659 + }, + { + "epoch": 0.6133271846898778, + "grad_norm": 1.2024921886284354, + "learning_rate": 1.6612546657451754e-06, + "loss": 0.47372323274612427, + "step": 2660 + }, + { + "epoch": 0.6135577588194605, + "grad_norm": 1.299485129998275, + "learning_rate": 1.660968610187404e-06, + "loss": 0.5287426114082336, + "step": 2661 + }, + { + "epoch": 0.6137883329490431, + "grad_norm": 1.4640884136716181, + "learning_rate": 1.6606824585511471e-06, + "loss": 0.5862994194030762, + "step": 2662 + }, + { + "epoch": 0.6140189070786258, + "grad_norm": 1.0158009777389652, + "learning_rate": 1.6603962108779986e-06, + "loss": 0.4866197109222412, + "step": 2663 + }, + { + "epoch": 0.6142494812082084, + "grad_norm": 1.408246184243547, + "learning_rate": 1.660109867209568e-06, + "loss": 0.5561861991882324, + "step": 2664 + }, + { + "epoch": 0.6144800553377912, + "grad_norm": 1.214620364544681, + "learning_rate": 1.659823427587478e-06, + "loss": 0.4878644645214081, + "step": 2665 + }, + { + "epoch": 0.6147106294673738, + "grad_norm": 1.3262957238727335, + "learning_rate": 1.659536892053366e-06, + "loss": 0.5371976494789124, + "step": 2666 + }, + { + "epoch": 0.6149412035969565, + "grad_norm": 1.2817478175527077, + "learning_rate": 1.6592502606488824e-06, + "loss": 0.4816581606864929, + "step": 2667 + }, + { + "epoch": 0.6151717777265391, + "grad_norm": 1.1536826566839264, + "learning_rate": 1.6589635334156919e-06, + "loss": 0.5105183124542236, + "step": 2668 + }, + { + "epoch": 0.6154023518561217, + "grad_norm": 1.4584261311401567, + "learning_rate": 1.6586767103954737e-06, + "loss": 0.5524129271507263, + "step": 2669 + }, + { + "epoch": 0.6156329259857044, + "grad_norm": 1.3107384301518328, + "learning_rate": 1.6583897916299204e-06, + "loss": 0.42373913526535034, + "step": 2670 + }, + { + "epoch": 0.615863500115287, + "grad_norm": 1.3724263799580212, + "learning_rate": 1.658102777160738e-06, + "loss": 0.5620803833007812, + "step": 2671 + }, + { + "epoch": 0.6160940742448697, + "grad_norm": 1.3004346965884186, + "learning_rate": 1.6578156670296472e-06, + "loss": 0.38180166482925415, + "step": 2672 + }, + { + "epoch": 0.6163246483744523, + "grad_norm": 1.2109058692777805, + "learning_rate": 1.6575284612783825e-06, + "loss": 0.48596519231796265, + "step": 2673 + }, + { + "epoch": 0.616555222504035, + "grad_norm": 1.1846928230852602, + "learning_rate": 1.657241159948692e-06, + "loss": 0.5098127126693726, + "step": 2674 + }, + { + "epoch": 0.6167857966336177, + "grad_norm": 1.5943292852368571, + "learning_rate": 1.6569537630823382e-06, + "loss": 0.5650018453598022, + "step": 2675 + }, + { + "epoch": 0.6170163707632004, + "grad_norm": 1.1501551859696775, + "learning_rate": 1.6566662707210967e-06, + "loss": 0.45061948895454407, + "step": 2676 + }, + { + "epoch": 0.617246944892783, + "grad_norm": 1.3028951742766879, + "learning_rate": 1.6563786829067576e-06, + "loss": 0.4292137622833252, + "step": 2677 + }, + { + "epoch": 0.6174775190223657, + "grad_norm": 1.269567036808456, + "learning_rate": 1.656090999681125e-06, + "loss": 0.4837046265602112, + "step": 2678 + }, + { + "epoch": 0.6177080931519483, + "grad_norm": 1.9486185906204885, + "learning_rate": 1.6558032210860162e-06, + "loss": 0.43580353260040283, + "step": 2679 + }, + { + "epoch": 0.617938667281531, + "grad_norm": 1.2529677917985589, + "learning_rate": 1.6555153471632628e-06, + "loss": 0.47321656346321106, + "step": 2680 + }, + { + "epoch": 0.6181692414111136, + "grad_norm": 1.1423229113084605, + "learning_rate": 1.65522737795471e-06, + "loss": 0.47431111335754395, + "step": 2681 + }, + { + "epoch": 0.6183998155406963, + "grad_norm": 0.9698177160310311, + "learning_rate": 1.6549393135022181e-06, + "loss": 0.38062599301338196, + "step": 2682 + }, + { + "epoch": 0.618630389670279, + "grad_norm": 1.2758905094442272, + "learning_rate": 1.6546511538476584e-06, + "loss": 0.5941839218139648, + "step": 2683 + }, + { + "epoch": 0.6188609637998617, + "grad_norm": 1.453087551621585, + "learning_rate": 1.6543628990329195e-06, + "loss": 0.5323158502578735, + "step": 2684 + }, + { + "epoch": 0.6190915379294443, + "grad_norm": 1.100143863509344, + "learning_rate": 1.654074549099901e-06, + "loss": 0.3814772367477417, + "step": 2685 + }, + { + "epoch": 0.619322112059027, + "grad_norm": 1.5499952709692644, + "learning_rate": 1.6537861040905181e-06, + "loss": 0.5520694255828857, + "step": 2686 + }, + { + "epoch": 0.6195526861886096, + "grad_norm": 1.297782443862308, + "learning_rate": 1.653497564046699e-06, + "loss": 0.5514999628067017, + "step": 2687 + }, + { + "epoch": 0.6197832603181923, + "grad_norm": 1.2170603559624027, + "learning_rate": 1.653208929010386e-06, + "loss": 0.39057493209838867, + "step": 2688 + }, + { + "epoch": 0.6200138344477749, + "grad_norm": 1.0224470752428403, + "learning_rate": 1.6529201990235352e-06, + "loss": 0.4941304922103882, + "step": 2689 + }, + { + "epoch": 0.6202444085773576, + "grad_norm": 1.2590211215766611, + "learning_rate": 1.6526313741281164e-06, + "loss": 0.539762020111084, + "step": 2690 + }, + { + "epoch": 0.6204749827069402, + "grad_norm": 1.3801421787603734, + "learning_rate": 1.6523424543661127e-06, + "loss": 0.49524787068367004, + "step": 2691 + }, + { + "epoch": 0.620705556836523, + "grad_norm": 1.2158625492501351, + "learning_rate": 1.6520534397795225e-06, + "loss": 0.4261528253555298, + "step": 2692 + }, + { + "epoch": 0.6209361309661056, + "grad_norm": 1.3188986304771895, + "learning_rate": 1.6517643304103563e-06, + "loss": 0.578548789024353, + "step": 2693 + }, + { + "epoch": 0.6211667050956883, + "grad_norm": 1.24168526725964, + "learning_rate": 1.6514751263006393e-06, + "loss": 0.4766680598258972, + "step": 2694 + }, + { + "epoch": 0.6213972792252709, + "grad_norm": 1.135518406763033, + "learning_rate": 1.6511858274924098e-06, + "loss": 0.4146459996700287, + "step": 2695 + }, + { + "epoch": 0.6216278533548536, + "grad_norm": 1.4632792907408574, + "learning_rate": 1.650896434027721e-06, + "loss": 0.5148390531539917, + "step": 2696 + }, + { + "epoch": 0.6218584274844362, + "grad_norm": 1.1678475162221296, + "learning_rate": 1.6506069459486388e-06, + "loss": 0.4830890893936157, + "step": 2697 + }, + { + "epoch": 0.6220890016140189, + "grad_norm": 1.2027318756470287, + "learning_rate": 1.6503173632972434e-06, + "loss": 0.4550463557243347, + "step": 2698 + }, + { + "epoch": 0.6223195757436015, + "grad_norm": 1.3023820822101895, + "learning_rate": 1.6500276861156284e-06, + "loss": 0.5811448097229004, + "step": 2699 + }, + { + "epoch": 0.6225501498731842, + "grad_norm": 1.3807858518585416, + "learning_rate": 1.6497379144459014e-06, + "loss": 0.44733545184135437, + "step": 2700 + }, + { + "epoch": 0.6227807240027669, + "grad_norm": 1.103384717152327, + "learning_rate": 1.6494480483301835e-06, + "loss": 0.4379687011241913, + "step": 2701 + }, + { + "epoch": 0.6230112981323496, + "grad_norm": 1.326644045971959, + "learning_rate": 1.6491580878106102e-06, + "loss": 0.5163959860801697, + "step": 2702 + }, + { + "epoch": 0.6232418722619322, + "grad_norm": 1.2037310331107272, + "learning_rate": 1.6488680329293297e-06, + "loss": 0.5636980533599854, + "step": 2703 + }, + { + "epoch": 0.6234724463915149, + "grad_norm": 1.1847301227909297, + "learning_rate": 1.6485778837285044e-06, + "loss": 0.46942776441574097, + "step": 2704 + }, + { + "epoch": 0.6237030205210975, + "grad_norm": 1.3867166397057658, + "learning_rate": 1.6482876402503103e-06, + "loss": 0.5104436278343201, + "step": 2705 + }, + { + "epoch": 0.6239335946506802, + "grad_norm": 1.2701601489299654, + "learning_rate": 1.6479973025369379e-06, + "loss": 0.4689507484436035, + "step": 2706 + }, + { + "epoch": 0.6241641687802628, + "grad_norm": 1.2388644364900292, + "learning_rate": 1.64770687063059e-06, + "loss": 0.4009973406791687, + "step": 2707 + }, + { + "epoch": 0.6243947429098455, + "grad_norm": 1.4958191711517836, + "learning_rate": 1.6474163445734846e-06, + "loss": 0.4938286542892456, + "step": 2708 + }, + { + "epoch": 0.6246253170394281, + "grad_norm": 1.2939637643231117, + "learning_rate": 1.6471257244078519e-06, + "loss": 0.4756525754928589, + "step": 2709 + }, + { + "epoch": 0.6248558911690109, + "grad_norm": 1.0308841763344028, + "learning_rate": 1.6468350101759366e-06, + "loss": 0.4322332739830017, + "step": 2710 + }, + { + "epoch": 0.6250864652985935, + "grad_norm": 1.381148895283306, + "learning_rate": 1.6465442019199972e-06, + "loss": 0.4605666995048523, + "step": 2711 + }, + { + "epoch": 0.6253170394281762, + "grad_norm": 1.3288993921232848, + "learning_rate": 1.6462532996823053e-06, + "loss": 0.4576036334037781, + "step": 2712 + }, + { + "epoch": 0.6255476135577588, + "grad_norm": 1.1587792990864858, + "learning_rate": 1.645962303505147e-06, + "loss": 0.4860233664512634, + "step": 2713 + }, + { + "epoch": 0.6257781876873415, + "grad_norm": 1.2195714743605923, + "learning_rate": 1.6456712134308213e-06, + "loss": 0.4717915654182434, + "step": 2714 + }, + { + "epoch": 0.6260087618169241, + "grad_norm": 1.1008237671202603, + "learning_rate": 1.645380029501641e-06, + "loss": 0.49637067317962646, + "step": 2715 + }, + { + "epoch": 0.6262393359465068, + "grad_norm": 1.2218828759453872, + "learning_rate": 1.6450887517599326e-06, + "loss": 0.45388346910476685, + "step": 2716 + }, + { + "epoch": 0.6264699100760894, + "grad_norm": 1.6333623536070287, + "learning_rate": 1.6447973802480362e-06, + "loss": 0.5549031496047974, + "step": 2717 + }, + { + "epoch": 0.6267004842056721, + "grad_norm": 1.333805192555573, + "learning_rate": 1.644505915008306e-06, + "loss": 0.39759719371795654, + "step": 2718 + }, + { + "epoch": 0.6269310583352548, + "grad_norm": 1.2648542744381963, + "learning_rate": 1.644214356083109e-06, + "loss": 0.5126739740371704, + "step": 2719 + }, + { + "epoch": 0.6271616324648375, + "grad_norm": 1.1846129595938097, + "learning_rate": 1.6439227035148265e-06, + "loss": 0.41424083709716797, + "step": 2720 + }, + { + "epoch": 0.6273922065944201, + "grad_norm": 1.2295786085250646, + "learning_rate": 1.643630957345853e-06, + "loss": 0.5829803943634033, + "step": 2721 + }, + { + "epoch": 0.6276227807240028, + "grad_norm": 1.2114307243350246, + "learning_rate": 1.6433391176185972e-06, + "loss": 0.4736567437648773, + "step": 2722 + }, + { + "epoch": 0.6278533548535854, + "grad_norm": 1.4670818430092263, + "learning_rate": 1.6430471843754804e-06, + "loss": 0.41305306553840637, + "step": 2723 + }, + { + "epoch": 0.6280839289831681, + "grad_norm": 1.5480231340195962, + "learning_rate": 1.6427551576589383e-06, + "loss": 0.38422563672065735, + "step": 2724 + }, + { + "epoch": 0.6283145031127507, + "grad_norm": 1.3725795006115715, + "learning_rate": 1.6424630375114199e-06, + "loss": 0.48302626609802246, + "step": 2725 + }, + { + "epoch": 0.6285450772423334, + "grad_norm": 1.2880102228926575, + "learning_rate": 1.6421708239753875e-06, + "loss": 0.4657328128814697, + "step": 2726 + }, + { + "epoch": 0.628775651371916, + "grad_norm": 1.4057295929235551, + "learning_rate": 1.641878517093318e-06, + "loss": 0.46126431226730347, + "step": 2727 + }, + { + "epoch": 0.6290062255014988, + "grad_norm": 1.3246078376538457, + "learning_rate": 1.6415861169077007e-06, + "loss": 0.5196214914321899, + "step": 2728 + }, + { + "epoch": 0.6292367996310814, + "grad_norm": 1.4794856753558834, + "learning_rate": 1.641293623461039e-06, + "loss": 0.5007073879241943, + "step": 2729 + }, + { + "epoch": 0.6294673737606641, + "grad_norm": 1.1543847272279724, + "learning_rate": 1.64100103679585e-06, + "loss": 0.4699769616127014, + "step": 2730 + }, + { + "epoch": 0.6296979478902467, + "grad_norm": 1.3221766888407216, + "learning_rate": 1.6407083569546636e-06, + "loss": 0.5487842559814453, + "step": 2731 + }, + { + "epoch": 0.6299285220198294, + "grad_norm": 1.0556125358940756, + "learning_rate": 1.6404155839800244e-06, + "loss": 0.42733538150787354, + "step": 2732 + }, + { + "epoch": 0.630159096149412, + "grad_norm": 1.1933689155818472, + "learning_rate": 1.64012271791449e-06, + "loss": 0.5105363726615906, + "step": 2733 + }, + { + "epoch": 0.6303896702789947, + "grad_norm": 1.3185367260440977, + "learning_rate": 1.6398297588006305e-06, + "loss": 0.5836968421936035, + "step": 2734 + }, + { + "epoch": 0.6306202444085773, + "grad_norm": 1.3830049962050668, + "learning_rate": 1.639536706681031e-06, + "loss": 0.4350558817386627, + "step": 2735 + }, + { + "epoch": 0.63085081853816, + "grad_norm": 1.4225393539645832, + "learning_rate": 1.63924356159829e-06, + "loss": 0.5388341546058655, + "step": 2736 + }, + { + "epoch": 0.6310813926677427, + "grad_norm": 1.1218759160612528, + "learning_rate": 1.6389503235950186e-06, + "loss": 0.4576529860496521, + "step": 2737 + }, + { + "epoch": 0.6313119667973254, + "grad_norm": 1.524583554785293, + "learning_rate": 1.6386569927138422e-06, + "loss": 0.4525975286960602, + "step": 2738 + }, + { + "epoch": 0.631542540926908, + "grad_norm": 1.56840988374272, + "learning_rate": 1.6383635689973993e-06, + "loss": 0.42143142223358154, + "step": 2739 + }, + { + "epoch": 0.6317731150564907, + "grad_norm": 1.0672209595897675, + "learning_rate": 1.6380700524883423e-06, + "loss": 0.4440336227416992, + "step": 2740 + }, + { + "epoch": 0.6320036891860733, + "grad_norm": 1.2412570194863743, + "learning_rate": 1.637776443229336e-06, + "loss": 0.5009843707084656, + "step": 2741 + }, + { + "epoch": 0.632234263315656, + "grad_norm": 1.6736573631214935, + "learning_rate": 1.6374827412630604e-06, + "loss": 0.538151741027832, + "step": 2742 + }, + { + "epoch": 0.6324648374452386, + "grad_norm": 1.1895254537976463, + "learning_rate": 1.6371889466322077e-06, + "loss": 0.550201416015625, + "step": 2743 + }, + { + "epoch": 0.6326954115748213, + "grad_norm": 1.3861259597044466, + "learning_rate": 1.6368950593794836e-06, + "loss": 0.5707399845123291, + "step": 2744 + }, + { + "epoch": 0.632925985704404, + "grad_norm": 1.393827128295071, + "learning_rate": 1.6366010795476082e-06, + "loss": 0.5196787714958191, + "step": 2745 + }, + { + "epoch": 0.6331565598339867, + "grad_norm": 1.171378891149435, + "learning_rate": 1.636307007179314e-06, + "loss": 0.5243285894393921, + "step": 2746 + }, + { + "epoch": 0.6333871339635693, + "grad_norm": 1.249132441469792, + "learning_rate": 1.6360128423173473e-06, + "loss": 0.4202825427055359, + "step": 2747 + }, + { + "epoch": 0.633617708093152, + "grad_norm": 1.2547380834154716, + "learning_rate": 1.6357185850044681e-06, + "loss": 0.49080896377563477, + "step": 2748 + }, + { + "epoch": 0.6338482822227346, + "grad_norm": 1.2234752623414968, + "learning_rate": 1.6354242352834502e-06, + "loss": 0.5537371635437012, + "step": 2749 + }, + { + "epoch": 0.6340788563523173, + "grad_norm": 1.1077493127634728, + "learning_rate": 1.6351297931970796e-06, + "loss": 0.3744293451309204, + "step": 2750 + }, + { + "epoch": 0.6343094304818999, + "grad_norm": 1.237975564408939, + "learning_rate": 1.634835258788157e-06, + "loss": 0.5176748037338257, + "step": 2751 + }, + { + "epoch": 0.6345400046114826, + "grad_norm": 1.321137847220575, + "learning_rate": 1.6345406320994952e-06, + "loss": 0.5179395079612732, + "step": 2752 + }, + { + "epoch": 0.6347705787410652, + "grad_norm": 1.3158476651008661, + "learning_rate": 1.634245913173922e-06, + "loss": 0.4810818135738373, + "step": 2753 + }, + { + "epoch": 0.635001152870648, + "grad_norm": 1.2760288557710286, + "learning_rate": 1.6339511020542775e-06, + "loss": 0.5188307762145996, + "step": 2754 + }, + { + "epoch": 0.6352317270002306, + "grad_norm": 1.662662743900965, + "learning_rate": 1.6336561987834151e-06, + "loss": 0.41170865297317505, + "step": 2755 + }, + { + "epoch": 0.6354623011298133, + "grad_norm": 1.1982414473393, + "learning_rate": 1.6333612034042025e-06, + "loss": 0.48726415634155273, + "step": 2756 + }, + { + "epoch": 0.6356928752593959, + "grad_norm": 1.1790415390507374, + "learning_rate": 1.63306611595952e-06, + "loss": 0.4483524560928345, + "step": 2757 + }, + { + "epoch": 0.6359234493889786, + "grad_norm": 1.2150870765180466, + "learning_rate": 1.6327709364922618e-06, + "loss": 0.3979623019695282, + "step": 2758 + }, + { + "epoch": 0.6361540235185612, + "grad_norm": 1.2093786796022739, + "learning_rate": 1.6324756650453346e-06, + "loss": 0.461483895778656, + "step": 2759 + }, + { + "epoch": 0.6363845976481439, + "grad_norm": 1.2350751043575534, + "learning_rate": 1.6321803016616598e-06, + "loss": 0.40054333209991455, + "step": 2760 + }, + { + "epoch": 0.6366151717777265, + "grad_norm": 1.1196609017801307, + "learning_rate": 1.6318848463841712e-06, + "loss": 0.534996747970581, + "step": 2761 + }, + { + "epoch": 0.6368457459073092, + "grad_norm": 1.260260551672407, + "learning_rate": 1.631589299255816e-06, + "loss": 0.49408137798309326, + "step": 2762 + }, + { + "epoch": 0.6370763200368919, + "grad_norm": 1.305230846296416, + "learning_rate": 1.6312936603195557e-06, + "loss": 0.49098217487335205, + "step": 2763 + }, + { + "epoch": 0.6373068941664746, + "grad_norm": 1.1344163970655265, + "learning_rate": 1.6309979296183636e-06, + "loss": 0.4990113377571106, + "step": 2764 + }, + { + "epoch": 0.6375374682960572, + "grad_norm": 1.2952446438426217, + "learning_rate": 1.6307021071952276e-06, + "loss": 0.49399930238723755, + "step": 2765 + }, + { + "epoch": 0.6377680424256399, + "grad_norm": 1.320323762194689, + "learning_rate": 1.6304061930931478e-06, + "loss": 0.5029928684234619, + "step": 2766 + }, + { + "epoch": 0.6379986165552225, + "grad_norm": 1.2455728900211775, + "learning_rate": 1.6301101873551396e-06, + "loss": 0.5732289552688599, + "step": 2767 + }, + { + "epoch": 0.6382291906848052, + "grad_norm": 1.2965522975146178, + "learning_rate": 1.6298140900242293e-06, + "loss": 0.47334790229797363, + "step": 2768 + }, + { + "epoch": 0.6384597648143878, + "grad_norm": 1.2464510374223752, + "learning_rate": 1.6295179011434578e-06, + "loss": 0.44271016120910645, + "step": 2769 + }, + { + "epoch": 0.6386903389439705, + "grad_norm": 1.8250225519339747, + "learning_rate": 1.6292216207558798e-06, + "loss": 0.5768353939056396, + "step": 2770 + }, + { + "epoch": 0.6389209130735531, + "grad_norm": 1.074704735340539, + "learning_rate": 1.6289252489045625e-06, + "loss": 0.48315417766571045, + "step": 2771 + }, + { + "epoch": 0.6391514872031359, + "grad_norm": 1.338382007112913, + "learning_rate": 1.6286287856325855e-06, + "loss": 0.5745590925216675, + "step": 2772 + }, + { + "epoch": 0.6393820613327185, + "grad_norm": 1.473033213400145, + "learning_rate": 1.6283322309830444e-06, + "loss": 0.6084291934967041, + "step": 2773 + }, + { + "epoch": 0.6396126354623012, + "grad_norm": 1.083816855400547, + "learning_rate": 1.6280355849990451e-06, + "loss": 0.4995007812976837, + "step": 2774 + }, + { + "epoch": 0.6398432095918838, + "grad_norm": 1.1962451309299882, + "learning_rate": 1.6277388477237084e-06, + "loss": 0.45811381936073303, + "step": 2775 + }, + { + "epoch": 0.6400737837214665, + "grad_norm": 1.448203316971052, + "learning_rate": 1.6274420192001689e-06, + "loss": 0.5666211247444153, + "step": 2776 + }, + { + "epoch": 0.6403043578510491, + "grad_norm": 1.3871415999727634, + "learning_rate": 1.6271450994715723e-06, + "loss": 0.5059396028518677, + "step": 2777 + }, + { + "epoch": 0.6405349319806318, + "grad_norm": 1.4444216130733851, + "learning_rate": 1.6268480885810798e-06, + "loss": 0.5418530702590942, + "step": 2778 + }, + { + "epoch": 0.6407655061102144, + "grad_norm": 1.4034133564890543, + "learning_rate": 1.6265509865718647e-06, + "loss": 0.5047061443328857, + "step": 2779 + }, + { + "epoch": 0.6409960802397972, + "grad_norm": 1.6003350461542336, + "learning_rate": 1.6262537934871138e-06, + "loss": 0.5104432702064514, + "step": 2780 + }, + { + "epoch": 0.6412266543693798, + "grad_norm": 1.3065683677222188, + "learning_rate": 1.625956509370027e-06, + "loss": 0.44423484802246094, + "step": 2781 + }, + { + "epoch": 0.6414572284989624, + "grad_norm": 1.1820302321160245, + "learning_rate": 1.6256591342638179e-06, + "loss": 0.47618383169174194, + "step": 2782 + }, + { + "epoch": 0.6416878026285451, + "grad_norm": 1.3796601981562324, + "learning_rate": 1.625361668211713e-06, + "loss": 0.5423145890235901, + "step": 2783 + }, + { + "epoch": 0.6419183767581277, + "grad_norm": 1.380895745392916, + "learning_rate": 1.6250641112569515e-06, + "loss": 0.517102837562561, + "step": 2784 + }, + { + "epoch": 0.6421489508877104, + "grad_norm": 1.2388489917279923, + "learning_rate": 1.6247664634427864e-06, + "loss": 0.39601820707321167, + "step": 2785 + }, + { + "epoch": 0.642379525017293, + "grad_norm": 1.296572577942614, + "learning_rate": 1.6244687248124843e-06, + "loss": 0.5480250120162964, + "step": 2786 + }, + { + "epoch": 0.6426100991468757, + "grad_norm": 1.1105051491643492, + "learning_rate": 1.624170895409324e-06, + "loss": 0.4743092656135559, + "step": 2787 + }, + { + "epoch": 0.6428406732764583, + "grad_norm": 1.463202362201621, + "learning_rate": 1.6238729752765985e-06, + "loss": 0.4595726728439331, + "step": 2788 + }, + { + "epoch": 0.643071247406041, + "grad_norm": 1.2909676791556273, + "learning_rate": 1.6235749644576132e-06, + "loss": 0.5058779716491699, + "step": 2789 + }, + { + "epoch": 0.6433018215356237, + "grad_norm": 1.3145538108383794, + "learning_rate": 1.623276862995687e-06, + "loss": 0.5075543522834778, + "step": 2790 + }, + { + "epoch": 0.6435323956652064, + "grad_norm": 1.3185436913231439, + "learning_rate": 1.622978670934152e-06, + "loss": 0.5623351335525513, + "step": 2791 + }, + { + "epoch": 0.643762969794789, + "grad_norm": 1.1682118545924238, + "learning_rate": 1.6226803883163536e-06, + "loss": 0.3645760118961334, + "step": 2792 + }, + { + "epoch": 0.6439935439243717, + "grad_norm": 1.4617740663680228, + "learning_rate": 1.6223820151856501e-06, + "loss": 0.5666004419326782, + "step": 2793 + }, + { + "epoch": 0.6442241180539543, + "grad_norm": 1.3342697895697784, + "learning_rate": 1.6220835515854133e-06, + "loss": 0.6571217775344849, + "step": 2794 + }, + { + "epoch": 0.644454692183537, + "grad_norm": 1.4229199895470708, + "learning_rate": 1.6217849975590271e-06, + "loss": 0.5684333443641663, + "step": 2795 + }, + { + "epoch": 0.6446852663131196, + "grad_norm": 1.5289890556459427, + "learning_rate": 1.62148635314989e-06, + "loss": 0.43374937772750854, + "step": 2796 + }, + { + "epoch": 0.6449158404427023, + "grad_norm": 1.1182458179152783, + "learning_rate": 1.6211876184014134e-06, + "loss": 0.5102420449256897, + "step": 2797 + }, + { + "epoch": 0.6451464145722849, + "grad_norm": 1.0775475511417847, + "learning_rate": 1.6208887933570203e-06, + "loss": 0.39345985651016235, + "step": 2798 + }, + { + "epoch": 0.6453769887018677, + "grad_norm": 1.4503631372644623, + "learning_rate": 1.620589878060149e-06, + "loss": 0.47554945945739746, + "step": 2799 + }, + { + "epoch": 0.6456075628314503, + "grad_norm": 1.601431882721041, + "learning_rate": 1.6202908725542495e-06, + "loss": 0.4385503828525543, + "step": 2800 + }, + { + "epoch": 0.645838136961033, + "grad_norm": 1.1168858860640334, + "learning_rate": 1.619991776882785e-06, + "loss": 0.5589696168899536, + "step": 2801 + }, + { + "epoch": 0.6460687110906156, + "grad_norm": 1.265570460008291, + "learning_rate": 1.619692591089232e-06, + "loss": 0.4827546179294586, + "step": 2802 + }, + { + "epoch": 0.6462992852201983, + "grad_norm": 1.3309974001593363, + "learning_rate": 1.6193933152170809e-06, + "loss": 0.491131067276001, + "step": 2803 + }, + { + "epoch": 0.6465298593497809, + "grad_norm": 1.2647545815457555, + "learning_rate": 1.6190939493098341e-06, + "loss": 0.47185173630714417, + "step": 2804 + }, + { + "epoch": 0.6467604334793636, + "grad_norm": 1.235826049412326, + "learning_rate": 1.6187944934110072e-06, + "loss": 0.4411182701587677, + "step": 2805 + }, + { + "epoch": 0.6469910076089462, + "grad_norm": 1.2245067812038697, + "learning_rate": 1.6184949475641295e-06, + "loss": 0.47243285179138184, + "step": 2806 + }, + { + "epoch": 0.647221581738529, + "grad_norm": 1.3311536114931484, + "learning_rate": 1.6181953118127428e-06, + "loss": 0.4449295401573181, + "step": 2807 + }, + { + "epoch": 0.6474521558681116, + "grad_norm": 1.2292361204281614, + "learning_rate": 1.6178955862004024e-06, + "loss": 0.5148872137069702, + "step": 2808 + }, + { + "epoch": 0.6476827299976943, + "grad_norm": 1.2738055603189895, + "learning_rate": 1.6175957707706762e-06, + "loss": 0.5017277598381042, + "step": 2809 + }, + { + "epoch": 0.6479133041272769, + "grad_norm": 1.1324070696899262, + "learning_rate": 1.6172958655671458e-06, + "loss": 0.44220247864723206, + "step": 2810 + }, + { + "epoch": 0.6481438782568596, + "grad_norm": 1.215492495713019, + "learning_rate": 1.6169958706334053e-06, + "loss": 0.45421087741851807, + "step": 2811 + }, + { + "epoch": 0.6483744523864422, + "grad_norm": 1.5167053281985836, + "learning_rate": 1.6166957860130618e-06, + "loss": 0.4772147536277771, + "step": 2812 + }, + { + "epoch": 0.6486050265160249, + "grad_norm": 1.1252103890770975, + "learning_rate": 1.6163956117497357e-06, + "loss": 0.5319628715515137, + "step": 2813 + }, + { + "epoch": 0.6488356006456075, + "grad_norm": 1.2663721872672429, + "learning_rate": 1.6160953478870608e-06, + "loss": 0.5109438896179199, + "step": 2814 + }, + { + "epoch": 0.6490661747751902, + "grad_norm": 1.33543378668276, + "learning_rate": 1.6157949944686827e-06, + "loss": 0.4417513608932495, + "step": 2815 + }, + { + "epoch": 0.6492967489047728, + "grad_norm": 1.2535935822359765, + "learning_rate": 1.6154945515382616e-06, + "loss": 0.5013085007667542, + "step": 2816 + }, + { + "epoch": 0.6495273230343556, + "grad_norm": 1.1191581438601172, + "learning_rate": 1.6151940191394693e-06, + "loss": 0.5197368860244751, + "step": 2817 + }, + { + "epoch": 0.6497578971639382, + "grad_norm": 1.4218758858652996, + "learning_rate": 1.6148933973159914e-06, + "loss": 0.46540898084640503, + "step": 2818 + }, + { + "epoch": 0.6499884712935209, + "grad_norm": 1.2080431861739462, + "learning_rate": 1.6145926861115268e-06, + "loss": 0.4867633581161499, + "step": 2819 + }, + { + "epoch": 0.6502190454231035, + "grad_norm": 1.1380395234486869, + "learning_rate": 1.6142918855697864e-06, + "loss": 0.426607221364975, + "step": 2820 + }, + { + "epoch": 0.6504496195526862, + "grad_norm": 1.2737116095131904, + "learning_rate": 1.613990995734495e-06, + "loss": 0.5183024406433105, + "step": 2821 + }, + { + "epoch": 0.6506801936822688, + "grad_norm": 1.3839354752611597, + "learning_rate": 1.6136900166493893e-06, + "loss": 0.48635101318359375, + "step": 2822 + }, + { + "epoch": 0.6509107678118515, + "grad_norm": 1.5911912747422927, + "learning_rate": 1.6133889483582204e-06, + "loss": 0.47468632459640503, + "step": 2823 + }, + { + "epoch": 0.6511413419414341, + "grad_norm": 1.1598857858501956, + "learning_rate": 1.6130877909047515e-06, + "loss": 0.4665389358997345, + "step": 2824 + }, + { + "epoch": 0.6513719160710169, + "grad_norm": 1.1793258331020087, + "learning_rate": 1.6127865443327585e-06, + "loss": 0.5069966316223145, + "step": 2825 + }, + { + "epoch": 0.6516024902005995, + "grad_norm": 1.4107626754859688, + "learning_rate": 1.612485208686031e-06, + "loss": 0.47820740938186646, + "step": 2826 + }, + { + "epoch": 0.6518330643301822, + "grad_norm": 1.2189859420338702, + "learning_rate": 1.612183784008371e-06, + "loss": 0.43017104268074036, + "step": 2827 + }, + { + "epoch": 0.6520636384597648, + "grad_norm": 1.158515500774614, + "learning_rate": 1.6118822703435937e-06, + "loss": 0.45495298504829407, + "step": 2828 + }, + { + "epoch": 0.6522942125893475, + "grad_norm": 1.7108375139007879, + "learning_rate": 1.6115806677355272e-06, + "loss": 0.4624331593513489, + "step": 2829 + }, + { + "epoch": 0.6525247867189301, + "grad_norm": 1.0788742222165304, + "learning_rate": 1.6112789762280125e-06, + "loss": 0.39458876848220825, + "step": 2830 + }, + { + "epoch": 0.6527553608485128, + "grad_norm": 1.4194134450814206, + "learning_rate": 1.6109771958649035e-06, + "loss": 0.45552846789360046, + "step": 2831 + }, + { + "epoch": 0.6529859349780954, + "grad_norm": 1.4199555723058743, + "learning_rate": 1.6106753266900671e-06, + "loss": 0.4579755663871765, + "step": 2832 + }, + { + "epoch": 0.6532165091076781, + "grad_norm": 1.2589449636358518, + "learning_rate": 1.6103733687473823e-06, + "loss": 0.5164625644683838, + "step": 2833 + }, + { + "epoch": 0.6534470832372608, + "grad_norm": 1.3635551079325425, + "learning_rate": 1.6100713220807432e-06, + "loss": 0.43071237206459045, + "step": 2834 + }, + { + "epoch": 0.6536776573668435, + "grad_norm": 1.2757429725484968, + "learning_rate": 1.6097691867340543e-06, + "loss": 0.5174099802970886, + "step": 2835 + }, + { + "epoch": 0.6539082314964261, + "grad_norm": 1.31351831375575, + "learning_rate": 1.609466962751234e-06, + "loss": 0.5944932699203491, + "step": 2836 + }, + { + "epoch": 0.6541388056260088, + "grad_norm": 1.312815606757786, + "learning_rate": 1.6091646501762145e-06, + "loss": 0.45203912258148193, + "step": 2837 + }, + { + "epoch": 0.6543693797555914, + "grad_norm": 1.292859531347235, + "learning_rate": 1.6088622490529386e-06, + "loss": 0.4197826683521271, + "step": 2838 + }, + { + "epoch": 0.6545999538851741, + "grad_norm": 1.3008648230701247, + "learning_rate": 1.6085597594253649e-06, + "loss": 0.4806807339191437, + "step": 2839 + }, + { + "epoch": 0.6548305280147567, + "grad_norm": 1.233893928808971, + "learning_rate": 1.608257181337462e-06, + "loss": 0.4618797302246094, + "step": 2840 + }, + { + "epoch": 0.6550611021443394, + "grad_norm": 1.1215282144992917, + "learning_rate": 1.6079545148332137e-06, + "loss": 0.4901892840862274, + "step": 2841 + }, + { + "epoch": 0.655291676273922, + "grad_norm": 1.250624448026336, + "learning_rate": 1.607651759956615e-06, + "loss": 0.44869139790534973, + "step": 2842 + }, + { + "epoch": 0.6555222504035048, + "grad_norm": 1.1064395173732657, + "learning_rate": 1.6073489167516747e-06, + "loss": 0.41470903158187866, + "step": 2843 + }, + { + "epoch": 0.6557528245330874, + "grad_norm": 1.2796938856852533, + "learning_rate": 1.6070459852624143e-06, + "loss": 0.5498615503311157, + "step": 2844 + }, + { + "epoch": 0.6559833986626701, + "grad_norm": 1.4741717641783516, + "learning_rate": 1.6067429655328675e-06, + "loss": 0.5462392568588257, + "step": 2845 + }, + { + "epoch": 0.6562139727922527, + "grad_norm": 1.5147243124828937, + "learning_rate": 1.6064398576070815e-06, + "loss": 0.3775100111961365, + "step": 2846 + }, + { + "epoch": 0.6564445469218354, + "grad_norm": 1.3806942156086204, + "learning_rate": 1.6061366615291161e-06, + "loss": 0.4712100028991699, + "step": 2847 + }, + { + "epoch": 0.656675121051418, + "grad_norm": 1.1320542857842297, + "learning_rate": 1.6058333773430439e-06, + "loss": 0.5152161121368408, + "step": 2848 + }, + { + "epoch": 0.6569056951810007, + "grad_norm": 1.2222287817453417, + "learning_rate": 1.6055300050929502e-06, + "loss": 0.46678972244262695, + "step": 2849 + }, + { + "epoch": 0.6571362693105833, + "grad_norm": 1.1948519980696821, + "learning_rate": 1.6052265448229338e-06, + "loss": 0.4622490108013153, + "step": 2850 + }, + { + "epoch": 0.657366843440166, + "grad_norm": 1.2601521252962713, + "learning_rate": 1.6049229965771052e-06, + "loss": 0.49909311532974243, + "step": 2851 + }, + { + "epoch": 0.6575974175697487, + "grad_norm": 1.1801405687475501, + "learning_rate": 1.6046193603995884e-06, + "loss": 0.4428306221961975, + "step": 2852 + }, + { + "epoch": 0.6578279916993314, + "grad_norm": 1.5295557154716768, + "learning_rate": 1.6043156363345196e-06, + "loss": 0.5842458009719849, + "step": 2853 + }, + { + "epoch": 0.658058565828914, + "grad_norm": 1.4945011678677886, + "learning_rate": 1.604011824426049e-06, + "loss": 0.47183722257614136, + "step": 2854 + }, + { + "epoch": 0.6582891399584967, + "grad_norm": 1.2843309395390234, + "learning_rate": 1.6037079247183379e-06, + "loss": 0.44225364923477173, + "step": 2855 + }, + { + "epoch": 0.6585197140880793, + "grad_norm": 1.3795669225253144, + "learning_rate": 1.6034039372555617e-06, + "loss": 0.4820272922515869, + "step": 2856 + }, + { + "epoch": 0.658750288217662, + "grad_norm": 1.6263387244434722, + "learning_rate": 1.6030998620819075e-06, + "loss": 0.48118168115615845, + "step": 2857 + }, + { + "epoch": 0.6589808623472446, + "grad_norm": 1.4704169894155685, + "learning_rate": 1.6027956992415764e-06, + "loss": 0.4386011064052582, + "step": 2858 + }, + { + "epoch": 0.6592114364768273, + "grad_norm": 1.4148356020107666, + "learning_rate": 1.6024914487787814e-06, + "loss": 0.48740649223327637, + "step": 2859 + }, + { + "epoch": 0.65944201060641, + "grad_norm": 1.436235867684013, + "learning_rate": 1.602187110737748e-06, + "loss": 0.46782761812210083, + "step": 2860 + }, + { + "epoch": 0.6596725847359927, + "grad_norm": 1.2796166668007127, + "learning_rate": 1.6018826851627155e-06, + "loss": 0.5086358189582825, + "step": 2861 + }, + { + "epoch": 0.6599031588655753, + "grad_norm": 1.1582673721463366, + "learning_rate": 1.6015781720979344e-06, + "loss": 0.5631915330886841, + "step": 2862 + }, + { + "epoch": 0.660133732995158, + "grad_norm": 1.462417648098582, + "learning_rate": 1.6012735715876693e-06, + "loss": 0.5134458541870117, + "step": 2863 + }, + { + "epoch": 0.6603643071247406, + "grad_norm": 1.1268653967137703, + "learning_rate": 1.6009688836761969e-06, + "loss": 0.4308784008026123, + "step": 2864 + }, + { + "epoch": 0.6605948812543233, + "grad_norm": 1.3112517816231024, + "learning_rate": 1.6006641084078068e-06, + "loss": 0.5149765610694885, + "step": 2865 + }, + { + "epoch": 0.6608254553839059, + "grad_norm": 1.6101510783439525, + "learning_rate": 1.6003592458268005e-06, + "loss": 0.521892786026001, + "step": 2866 + }, + { + "epoch": 0.6610560295134886, + "grad_norm": 1.247084334907296, + "learning_rate": 1.6000542959774937e-06, + "loss": 0.46611008048057556, + "step": 2867 + }, + { + "epoch": 0.6612866036430712, + "grad_norm": 1.2517698630875118, + "learning_rate": 1.5997492589042135e-06, + "loss": 0.43080392479896545, + "step": 2868 + }, + { + "epoch": 0.661517177772654, + "grad_norm": 1.2239680444750303, + "learning_rate": 1.5994441346513003e-06, + "loss": 0.48026901483535767, + "step": 2869 + }, + { + "epoch": 0.6617477519022366, + "grad_norm": 1.1948228818170457, + "learning_rate": 1.5991389232631068e-06, + "loss": 0.48706555366516113, + "step": 2870 + }, + { + "epoch": 0.6619783260318193, + "grad_norm": 1.205848115890533, + "learning_rate": 1.598833624783999e-06, + "loss": 0.5093512535095215, + "step": 2871 + }, + { + "epoch": 0.6622089001614019, + "grad_norm": 1.37517746631934, + "learning_rate": 1.5985282392583542e-06, + "loss": 0.5197086930274963, + "step": 2872 + }, + { + "epoch": 0.6624394742909846, + "grad_norm": 1.3389415544634544, + "learning_rate": 1.5982227667305646e-06, + "loss": 0.497372031211853, + "step": 2873 + }, + { + "epoch": 0.6626700484205672, + "grad_norm": 1.6851191621911175, + "learning_rate": 1.597917207245033e-06, + "loss": 0.4746604561805725, + "step": 2874 + }, + { + "epoch": 0.6629006225501499, + "grad_norm": 1.2864362072574318, + "learning_rate": 1.5976115608461755e-06, + "loss": 0.5531996488571167, + "step": 2875 + }, + { + "epoch": 0.6631311966797325, + "grad_norm": 1.2032344825838508, + "learning_rate": 1.5973058275784208e-06, + "loss": 0.44950544834136963, + "step": 2876 + }, + { + "epoch": 0.6633617708093152, + "grad_norm": 1.231321509427461, + "learning_rate": 1.597000007486211e-06, + "loss": 0.45596158504486084, + "step": 2877 + }, + { + "epoch": 0.6635923449388978, + "grad_norm": 1.1813154846400662, + "learning_rate": 1.596694100613999e-06, + "loss": 0.5243046879768372, + "step": 2878 + }, + { + "epoch": 0.6638229190684806, + "grad_norm": 1.2111771126184059, + "learning_rate": 1.5963881070062528e-06, + "loss": 0.46450644731521606, + "step": 2879 + }, + { + "epoch": 0.6640534931980632, + "grad_norm": 1.286085494147619, + "learning_rate": 1.5960820267074509e-06, + "loss": 0.5565767288208008, + "step": 2880 + }, + { + "epoch": 0.6642840673276459, + "grad_norm": 1.574495375498682, + "learning_rate": 1.595775859762085e-06, + "loss": 0.4351605176925659, + "step": 2881 + }, + { + "epoch": 0.6645146414572285, + "grad_norm": 1.3382136213218339, + "learning_rate": 1.5954696062146603e-06, + "loss": 0.5113346576690674, + "step": 2882 + }, + { + "epoch": 0.6647452155868112, + "grad_norm": 1.203285083111209, + "learning_rate": 1.5951632661096932e-06, + "loss": 0.5005035996437073, + "step": 2883 + }, + { + "epoch": 0.6649757897163938, + "grad_norm": 1.1502074786882042, + "learning_rate": 1.5948568394917138e-06, + "loss": 0.4539811611175537, + "step": 2884 + }, + { + "epoch": 0.6652063638459765, + "grad_norm": 1.234546797786613, + "learning_rate": 1.5945503264052637e-06, + "loss": 0.4519865810871124, + "step": 2885 + }, + { + "epoch": 0.6654369379755591, + "grad_norm": 1.1932724883335695, + "learning_rate": 1.5942437268948985e-06, + "loss": 0.5688626766204834, + "step": 2886 + }, + { + "epoch": 0.6656675121051419, + "grad_norm": 1.1582733834983177, + "learning_rate": 1.5939370410051846e-06, + "loss": 0.5038400888442993, + "step": 2887 + }, + { + "epoch": 0.6658980862347245, + "grad_norm": 1.4308591259843988, + "learning_rate": 1.5936302687807028e-06, + "loss": 0.6332568526268005, + "step": 2888 + }, + { + "epoch": 0.6661286603643072, + "grad_norm": 1.2020172387992982, + "learning_rate": 1.593323410266045e-06, + "loss": 0.4994644820690155, + "step": 2889 + }, + { + "epoch": 0.6663592344938898, + "grad_norm": 1.3423031921779223, + "learning_rate": 1.5930164655058165e-06, + "loss": 0.4952617883682251, + "step": 2890 + }, + { + "epoch": 0.6665898086234725, + "grad_norm": 1.1769489968231674, + "learning_rate": 1.5927094345446345e-06, + "loss": 0.4188910722732544, + "step": 2891 + }, + { + "epoch": 0.6668203827530551, + "grad_norm": 1.319346697910086, + "learning_rate": 1.5924023174271295e-06, + "loss": 0.47160637378692627, + "step": 2892 + }, + { + "epoch": 0.6670509568826377, + "grad_norm": 1.0773369781050426, + "learning_rate": 1.592095114197944e-06, + "loss": 0.44884049892425537, + "step": 2893 + }, + { + "epoch": 0.6672815310122204, + "grad_norm": 1.3166895153069564, + "learning_rate": 1.5917878249017327e-06, + "loss": 0.4105216860771179, + "step": 2894 + }, + { + "epoch": 0.667512105141803, + "grad_norm": 1.3288589826448391, + "learning_rate": 1.5914804495831634e-06, + "loss": 0.5000967383384705, + "step": 2895 + }, + { + "epoch": 0.6677426792713858, + "grad_norm": 1.4772652615504442, + "learning_rate": 1.5911729882869163e-06, + "loss": 0.45515477657318115, + "step": 2896 + }, + { + "epoch": 0.6679732534009684, + "grad_norm": 1.2034912342077588, + "learning_rate": 1.590865441057684e-06, + "loss": 0.4492835998535156, + "step": 2897 + }, + { + "epoch": 0.6682038275305511, + "grad_norm": 1.5637287950189662, + "learning_rate": 1.5905578079401716e-06, + "loss": 0.553781270980835, + "step": 2898 + }, + { + "epoch": 0.6684344016601337, + "grad_norm": 1.235173143749482, + "learning_rate": 1.5902500889790967e-06, + "loss": 0.5085616111755371, + "step": 2899 + }, + { + "epoch": 0.6686649757897164, + "grad_norm": 1.2766607551584273, + "learning_rate": 1.5899422842191891e-06, + "loss": 0.4651145935058594, + "step": 2900 + }, + { + "epoch": 0.668895549919299, + "grad_norm": 1.3114841240621398, + "learning_rate": 1.5896343937051921e-06, + "loss": 0.5503841638565063, + "step": 2901 + }, + { + "epoch": 0.6691261240488817, + "grad_norm": 1.1881721760666544, + "learning_rate": 1.5893264174818599e-06, + "loss": 0.48213839530944824, + "step": 2902 + }, + { + "epoch": 0.6693566981784643, + "grad_norm": 1.2726619976847688, + "learning_rate": 1.5890183555939604e-06, + "loss": 0.4602949023246765, + "step": 2903 + }, + { + "epoch": 0.669587272308047, + "grad_norm": 1.213092004639277, + "learning_rate": 1.5887102080862736e-06, + "loss": 0.43991196155548096, + "step": 2904 + }, + { + "epoch": 0.6698178464376296, + "grad_norm": 1.2472416336517922, + "learning_rate": 1.5884019750035914e-06, + "loss": 0.48186323046684265, + "step": 2905 + }, + { + "epoch": 0.6700484205672124, + "grad_norm": 1.3445409358829308, + "learning_rate": 1.5880936563907189e-06, + "loss": 0.44907671213150024, + "step": 2906 + }, + { + "epoch": 0.670278994696795, + "grad_norm": 1.874421138474627, + "learning_rate": 1.587785252292473e-06, + "loss": 0.4475386142730713, + "step": 2907 + }, + { + "epoch": 0.6705095688263777, + "grad_norm": 1.2649536391923781, + "learning_rate": 1.587476762753684e-06, + "loss": 0.4504704475402832, + "step": 2908 + }, + { + "epoch": 0.6707401429559603, + "grad_norm": 2.0624210450483376, + "learning_rate": 1.5871681878191937e-06, + "loss": 0.5090106129646301, + "step": 2909 + }, + { + "epoch": 0.670970717085543, + "grad_norm": 1.3010076823717651, + "learning_rate": 1.5868595275338561e-06, + "loss": 0.46150895953178406, + "step": 2910 + }, + { + "epoch": 0.6712012912151256, + "grad_norm": 1.2556909013752833, + "learning_rate": 1.586550781942539e-06, + "loss": 0.5499979257583618, + "step": 2911 + }, + { + "epoch": 0.6714318653447083, + "grad_norm": 1.2089730243488483, + "learning_rate": 1.5862419510901211e-06, + "loss": 0.46628689765930176, + "step": 2912 + }, + { + "epoch": 0.6716624394742909, + "grad_norm": 1.2998808024776154, + "learning_rate": 1.5859330350214941e-06, + "loss": 0.4517399072647095, + "step": 2913 + }, + { + "epoch": 0.6718930136038737, + "grad_norm": 1.0879313971673985, + "learning_rate": 1.5856240337815621e-06, + "loss": 0.4696923792362213, + "step": 2914 + }, + { + "epoch": 0.6721235877334563, + "grad_norm": 1.5676723620382764, + "learning_rate": 1.585314947415242e-06, + "loss": 0.41357535123825073, + "step": 2915 + }, + { + "epoch": 0.672354161863039, + "grad_norm": 1.2988881169526059, + "learning_rate": 1.5850057759674621e-06, + "loss": 0.5223745107650757, + "step": 2916 + }, + { + "epoch": 0.6725847359926216, + "grad_norm": 1.5751566352241433, + "learning_rate": 1.584696519483164e-06, + "loss": 0.48562729358673096, + "step": 2917 + }, + { + "epoch": 0.6728153101222043, + "grad_norm": 1.147456021361514, + "learning_rate": 1.5843871780073009e-06, + "loss": 0.3675496280193329, + "step": 2918 + }, + { + "epoch": 0.6730458842517869, + "grad_norm": 1.4691177353786786, + "learning_rate": 1.5840777515848389e-06, + "loss": 0.5782667994499207, + "step": 2919 + }, + { + "epoch": 0.6732764583813696, + "grad_norm": 1.110911745804502, + "learning_rate": 1.583768240260756e-06, + "loss": 0.419716477394104, + "step": 2920 + }, + { + "epoch": 0.6735070325109522, + "grad_norm": 1.2625181785612978, + "learning_rate": 1.5834586440800434e-06, + "loss": 0.4004133939743042, + "step": 2921 + }, + { + "epoch": 0.673737606640535, + "grad_norm": 1.3860644175168617, + "learning_rate": 1.5831489630877037e-06, + "loss": 0.4917314350605011, + "step": 2922 + }, + { + "epoch": 0.6739681807701176, + "grad_norm": 1.3350109690747092, + "learning_rate": 1.5828391973287522e-06, + "loss": 0.5488141179084778, + "step": 2923 + }, + { + "epoch": 0.6741987548997003, + "grad_norm": 1.2547850876004316, + "learning_rate": 1.5825293468482163e-06, + "loss": 0.5047071576118469, + "step": 2924 + }, + { + "epoch": 0.6744293290292829, + "grad_norm": 1.3178326140677985, + "learning_rate": 1.5822194116911364e-06, + "loss": 0.4830411672592163, + "step": 2925 + }, + { + "epoch": 0.6746599031588656, + "grad_norm": 1.2591886503495524, + "learning_rate": 1.5819093919025641e-06, + "loss": 0.47517114877700806, + "step": 2926 + }, + { + "epoch": 0.6748904772884482, + "grad_norm": 1.3603729738722081, + "learning_rate": 1.5815992875275642e-06, + "loss": 0.5617963075637817, + "step": 2927 + }, + { + "epoch": 0.6751210514180309, + "grad_norm": 1.1752484838801127, + "learning_rate": 1.5812890986112137e-06, + "loss": 0.4360186457633972, + "step": 2928 + }, + { + "epoch": 0.6753516255476135, + "grad_norm": 1.5551926866200483, + "learning_rate": 1.5809788251986014e-06, + "loss": 0.49538636207580566, + "step": 2929 + }, + { + "epoch": 0.6755821996771962, + "grad_norm": 1.1285780293266063, + "learning_rate": 1.5806684673348288e-06, + "loss": 0.538766622543335, + "step": 2930 + }, + { + "epoch": 0.6758127738067788, + "grad_norm": 1.5395880930573347, + "learning_rate": 1.5803580250650094e-06, + "loss": 0.4113287329673767, + "step": 2931 + }, + { + "epoch": 0.6760433479363616, + "grad_norm": 1.4441179706006158, + "learning_rate": 1.5800474984342698e-06, + "loss": 0.5298923254013062, + "step": 2932 + }, + { + "epoch": 0.6762739220659442, + "grad_norm": 1.2285488161220737, + "learning_rate": 1.5797368874877472e-06, + "loss": 0.4891100227832794, + "step": 2933 + }, + { + "epoch": 0.6765044961955269, + "grad_norm": 1.3809520207822814, + "learning_rate": 1.579426192270593e-06, + "loss": 0.4412326216697693, + "step": 2934 + }, + { + "epoch": 0.6767350703251095, + "grad_norm": 1.3386538114869513, + "learning_rate": 1.5791154128279693e-06, + "loss": 0.5514793395996094, + "step": 2935 + }, + { + "epoch": 0.6769656444546922, + "grad_norm": 1.2065068425398038, + "learning_rate": 1.578804549205051e-06, + "loss": 0.44050243496894836, + "step": 2936 + }, + { + "epoch": 0.6771962185842748, + "grad_norm": 1.3084516018872256, + "learning_rate": 1.5784936014470256e-06, + "loss": 0.47503453493118286, + "step": 2937 + }, + { + "epoch": 0.6774267927138575, + "grad_norm": 1.445992727647949, + "learning_rate": 1.5781825695990922e-06, + "loss": 0.524544894695282, + "step": 2938 + }, + { + "epoch": 0.6776573668434401, + "grad_norm": 1.2672201923678605, + "learning_rate": 1.5778714537064628e-06, + "loss": 0.4203689694404602, + "step": 2939 + }, + { + "epoch": 0.6778879409730229, + "grad_norm": 1.255678429788082, + "learning_rate": 1.577560253814361e-06, + "loss": 0.4305247664451599, + "step": 2940 + }, + { + "epoch": 0.6781185151026055, + "grad_norm": 1.2383698343036857, + "learning_rate": 1.577248969968023e-06, + "loss": 0.6129249930381775, + "step": 2941 + }, + { + "epoch": 0.6783490892321882, + "grad_norm": 1.4217586280781416, + "learning_rate": 1.5769376022126969e-06, + "loss": 0.44431981444358826, + "step": 2942 + }, + { + "epoch": 0.6785796633617708, + "grad_norm": 1.2327303005745092, + "learning_rate": 1.576626150593643e-06, + "loss": 0.4394958019256592, + "step": 2943 + }, + { + "epoch": 0.6788102374913535, + "grad_norm": 1.2593798978560244, + "learning_rate": 1.5763146151561345e-06, + "loss": 0.44481268525123596, + "step": 2944 + }, + { + "epoch": 0.6790408116209361, + "grad_norm": 1.4440486279504336, + "learning_rate": 1.5760029959454556e-06, + "loss": 0.4251822829246521, + "step": 2945 + }, + { + "epoch": 0.6792713857505188, + "grad_norm": 1.338830252556874, + "learning_rate": 1.575691293006904e-06, + "loss": 0.41041696071624756, + "step": 2946 + }, + { + "epoch": 0.6795019598801014, + "grad_norm": 1.357017341106407, + "learning_rate": 1.5753795063857883e-06, + "loss": 0.5710239410400391, + "step": 2947 + }, + { + "epoch": 0.6797325340096841, + "grad_norm": 1.2834985119403657, + "learning_rate": 1.57506763612743e-06, + "loss": 0.48825210332870483, + "step": 2948 + }, + { + "epoch": 0.6799631081392667, + "grad_norm": 1.263284608882453, + "learning_rate": 1.5747556822771628e-06, + "loss": 0.37077784538269043, + "step": 2949 + }, + { + "epoch": 0.6801936822688495, + "grad_norm": 1.2458271352531185, + "learning_rate": 1.5744436448803322e-06, + "loss": 0.4618649482727051, + "step": 2950 + }, + { + "epoch": 0.6804242563984321, + "grad_norm": 1.0624348057433408, + "learning_rate": 1.574131523982296e-06, + "loss": 0.4415496289730072, + "step": 2951 + }, + { + "epoch": 0.6806548305280148, + "grad_norm": 1.4732593030941656, + "learning_rate": 1.5738193196284239e-06, + "loss": 0.440029501914978, + "step": 2952 + }, + { + "epoch": 0.6808854046575974, + "grad_norm": 1.3992294210480754, + "learning_rate": 1.5735070318640986e-06, + "loss": 0.5149378776550293, + "step": 2953 + }, + { + "epoch": 0.6811159787871801, + "grad_norm": 1.3173119180782331, + "learning_rate": 1.5731946607347136e-06, + "loss": 0.4838085174560547, + "step": 2954 + }, + { + "epoch": 0.6813465529167627, + "grad_norm": 1.3500402916158631, + "learning_rate": 1.5728822062856757e-06, + "loss": 0.48472005128860474, + "step": 2955 + }, + { + "epoch": 0.6815771270463454, + "grad_norm": 1.163167888868214, + "learning_rate": 1.572569668562403e-06, + "loss": 0.5154656767845154, + "step": 2956 + }, + { + "epoch": 0.681807701175928, + "grad_norm": 1.1906599654401737, + "learning_rate": 1.5722570476103263e-06, + "loss": 0.4094988703727722, + "step": 2957 + }, + { + "epoch": 0.6820382753055108, + "grad_norm": 1.2324943837281264, + "learning_rate": 1.5719443434748877e-06, + "loss": 0.5125937461853027, + "step": 2958 + }, + { + "epoch": 0.6822688494350934, + "grad_norm": 1.2538269370063608, + "learning_rate": 1.5716315562015428e-06, + "loss": 0.4807034730911255, + "step": 2959 + }, + { + "epoch": 0.6824994235646761, + "grad_norm": 1.3513545314522855, + "learning_rate": 1.5713186858357577e-06, + "loss": 0.6126741170883179, + "step": 2960 + }, + { + "epoch": 0.6827299976942587, + "grad_norm": 2.1674593801056887, + "learning_rate": 1.5710057324230113e-06, + "loss": 0.5450708866119385, + "step": 2961 + }, + { + "epoch": 0.6829605718238414, + "grad_norm": 1.8355809144200355, + "learning_rate": 1.5706926960087948e-06, + "loss": 0.47740328311920166, + "step": 2962 + }, + { + "epoch": 0.683191145953424, + "grad_norm": 1.311529987995532, + "learning_rate": 1.5703795766386112e-06, + "loss": 0.4731057584285736, + "step": 2963 + }, + { + "epoch": 0.6834217200830067, + "grad_norm": 1.3162153678952433, + "learning_rate": 1.5700663743579754e-06, + "loss": 0.49735045433044434, + "step": 2964 + }, + { + "epoch": 0.6836522942125893, + "grad_norm": 1.2346637447285915, + "learning_rate": 1.569753089212415e-06, + "loss": 0.5257318019866943, + "step": 2965 + }, + { + "epoch": 0.683882868342172, + "grad_norm": 1.1458467925306592, + "learning_rate": 1.5694397212474685e-06, + "loss": 0.3947733938694, + "step": 2966 + }, + { + "epoch": 0.6841134424717547, + "grad_norm": 1.424176183527685, + "learning_rate": 1.5691262705086875e-06, + "loss": 0.5078107714653015, + "step": 2967 + }, + { + "epoch": 0.6843440166013374, + "grad_norm": 1.7316538509871626, + "learning_rate": 1.5688127370416351e-06, + "loss": 0.5921520590782166, + "step": 2968 + }, + { + "epoch": 0.68457459073092, + "grad_norm": 1.2277129646213039, + "learning_rate": 1.5684991208918866e-06, + "loss": 0.45995181798934937, + "step": 2969 + }, + { + "epoch": 0.6848051648605027, + "grad_norm": 1.1894548452861071, + "learning_rate": 1.5681854221050293e-06, + "loss": 0.4874386787414551, + "step": 2970 + }, + { + "epoch": 0.6850357389900853, + "grad_norm": 1.3695475422493124, + "learning_rate": 1.5678716407266625e-06, + "loss": 0.4522739052772522, + "step": 2971 + }, + { + "epoch": 0.685266313119668, + "grad_norm": 1.3244142914830208, + "learning_rate": 1.5675577768023977e-06, + "loss": 0.4596391022205353, + "step": 2972 + }, + { + "epoch": 0.6854968872492506, + "grad_norm": 1.6847382830263626, + "learning_rate": 1.567243830377858e-06, + "loss": 0.5391427278518677, + "step": 2973 + }, + { + "epoch": 0.6857274613788333, + "grad_norm": 1.2164543996098884, + "learning_rate": 1.5669298014986786e-06, + "loss": 0.5583066940307617, + "step": 2974 + }, + { + "epoch": 0.6859580355084159, + "grad_norm": 1.3656527800334406, + "learning_rate": 1.566615690210507e-06, + "loss": 0.5410330295562744, + "step": 2975 + }, + { + "epoch": 0.6861886096379987, + "grad_norm": 1.2007908045124778, + "learning_rate": 1.566301496559002e-06, + "loss": 0.5145233273506165, + "step": 2976 + }, + { + "epoch": 0.6864191837675813, + "grad_norm": 1.4168885241389684, + "learning_rate": 1.5659872205898356e-06, + "loss": 0.5021970272064209, + "step": 2977 + }, + { + "epoch": 0.686649757897164, + "grad_norm": 1.0896663307775538, + "learning_rate": 1.5656728623486903e-06, + "loss": 0.48251593112945557, + "step": 2978 + }, + { + "epoch": 0.6868803320267466, + "grad_norm": 1.2502610536872558, + "learning_rate": 1.5653584218812617e-06, + "loss": 0.4228450655937195, + "step": 2979 + }, + { + "epoch": 0.6871109061563293, + "grad_norm": 1.4048596098114436, + "learning_rate": 1.5650438992332567e-06, + "loss": 0.3975197374820709, + "step": 2980 + }, + { + "epoch": 0.6873414802859119, + "grad_norm": 1.386478606714872, + "learning_rate": 1.5647292944503945e-06, + "loss": 0.5441234707832336, + "step": 2981 + }, + { + "epoch": 0.6875720544154946, + "grad_norm": 1.3552115877356068, + "learning_rate": 1.5644146075784057e-06, + "loss": 0.5357148051261902, + "step": 2982 + }, + { + "epoch": 0.6878026285450772, + "grad_norm": 1.2605289404512496, + "learning_rate": 1.5640998386630337e-06, + "loss": 0.530154824256897, + "step": 2983 + }, + { + "epoch": 0.68803320267466, + "grad_norm": 1.3830405468746736, + "learning_rate": 1.563784987750033e-06, + "loss": 0.480657696723938, + "step": 2984 + }, + { + "epoch": 0.6882637768042426, + "grad_norm": 1.2595390052779563, + "learning_rate": 1.5634700548851712e-06, + "loss": 0.4822859764099121, + "step": 2985 + }, + { + "epoch": 0.6884943509338253, + "grad_norm": 1.4511024891592457, + "learning_rate": 1.5631550401142257e-06, + "loss": 0.48551490902900696, + "step": 2986 + }, + { + "epoch": 0.6887249250634079, + "grad_norm": 1.252088599015217, + "learning_rate": 1.562839943482988e-06, + "loss": 0.43080294132232666, + "step": 2987 + }, + { + "epoch": 0.6889554991929906, + "grad_norm": 1.1661214157780933, + "learning_rate": 1.56252476503726e-06, + "loss": 0.42780637741088867, + "step": 2988 + }, + { + "epoch": 0.6891860733225732, + "grad_norm": 1.3057809079761946, + "learning_rate": 1.5622095048228565e-06, + "loss": 0.539027214050293, + "step": 2989 + }, + { + "epoch": 0.6894166474521559, + "grad_norm": 1.2289425463506802, + "learning_rate": 1.5618941628856037e-06, + "loss": 0.4529460668563843, + "step": 2990 + }, + { + "epoch": 0.6896472215817385, + "grad_norm": 1.4016140654354556, + "learning_rate": 1.5615787392713395e-06, + "loss": 0.49724727869033813, + "step": 2991 + }, + { + "epoch": 0.6898777957113212, + "grad_norm": 1.25157972103927, + "learning_rate": 1.5612632340259144e-06, + "loss": 0.4711928963661194, + "step": 2992 + }, + { + "epoch": 0.6901083698409038, + "grad_norm": 1.3707143585352468, + "learning_rate": 1.56094764719519e-06, + "loss": 0.42258220911026, + "step": 2993 + }, + { + "epoch": 0.6903389439704866, + "grad_norm": 1.371187363460567, + "learning_rate": 1.5606319788250398e-06, + "loss": 0.47754064202308655, + "step": 2994 + }, + { + "epoch": 0.6905695181000692, + "grad_norm": 1.307708883093593, + "learning_rate": 1.5603162289613501e-06, + "loss": 0.47200560569763184, + "step": 2995 + }, + { + "epoch": 0.6908000922296519, + "grad_norm": 1.359798809074, + "learning_rate": 1.5600003976500173e-06, + "loss": 0.5194537043571472, + "step": 2996 + }, + { + "epoch": 0.6910306663592345, + "grad_norm": 1.707437655194179, + "learning_rate": 1.5596844849369518e-06, + "loss": 0.4874703586101532, + "step": 2997 + }, + { + "epoch": 0.6912612404888172, + "grad_norm": 1.262990523197611, + "learning_rate": 1.5593684908680738e-06, + "loss": 0.5028672218322754, + "step": 2998 + }, + { + "epoch": 0.6914918146183998, + "grad_norm": 1.2420345591817543, + "learning_rate": 1.5590524154893169e-06, + "loss": 0.44250521063804626, + "step": 2999 + }, + { + "epoch": 0.6917223887479825, + "grad_norm": 1.6089998258276121, + "learning_rate": 1.5587362588466253e-06, + "loss": 0.536510705947876, + "step": 3000 + }, + { + "epoch": 0.6919529628775651, + "grad_norm": 1.3333649931769909, + "learning_rate": 1.5584200209859558e-06, + "loss": 0.4514959752559662, + "step": 3001 + }, + { + "epoch": 0.6921835370071479, + "grad_norm": 1.1923376457733827, + "learning_rate": 1.5581037019532773e-06, + "loss": 0.4402197301387787, + "step": 3002 + }, + { + "epoch": 0.6924141111367305, + "grad_norm": 1.1940429657833775, + "learning_rate": 1.5577873017945691e-06, + "loss": 0.508256196975708, + "step": 3003 + }, + { + "epoch": 0.6926446852663131, + "grad_norm": 1.2600794916577294, + "learning_rate": 1.5574708205558236e-06, + "loss": 0.5123175978660583, + "step": 3004 + }, + { + "epoch": 0.6928752593958958, + "grad_norm": 1.4303227599201425, + "learning_rate": 1.5571542582830447e-06, + "loss": 0.4874982237815857, + "step": 3005 + }, + { + "epoch": 0.6931058335254784, + "grad_norm": 1.314228379499143, + "learning_rate": 1.556837615022248e-06, + "loss": 0.44554391503334045, + "step": 3006 + }, + { + "epoch": 0.6933364076550611, + "grad_norm": 1.5428941228634732, + "learning_rate": 1.5565208908194603e-06, + "loss": 0.5899895429611206, + "step": 3007 + }, + { + "epoch": 0.6935669817846437, + "grad_norm": 1.2685614762262514, + "learning_rate": 1.5562040857207208e-06, + "loss": 0.5137951374053955, + "step": 3008 + }, + { + "epoch": 0.6937975559142264, + "grad_norm": 1.2863812659603593, + "learning_rate": 1.5558871997720805e-06, + "loss": 0.5435892343521118, + "step": 3009 + }, + { + "epoch": 0.694028130043809, + "grad_norm": 1.4463505314835092, + "learning_rate": 1.5555702330196021e-06, + "loss": 0.45998525619506836, + "step": 3010 + }, + { + "epoch": 0.6942587041733917, + "grad_norm": 1.324515476398786, + "learning_rate": 1.5552531855093597e-06, + "loss": 0.4676332473754883, + "step": 3011 + }, + { + "epoch": 0.6944892783029744, + "grad_norm": 1.2595225568514163, + "learning_rate": 1.5549360572874397e-06, + "loss": 0.48250633478164673, + "step": 3012 + }, + { + "epoch": 0.6947198524325571, + "grad_norm": 1.4537609539003187, + "learning_rate": 1.5546188483999396e-06, + "loss": 0.4841402769088745, + "step": 3013 + }, + { + "epoch": 0.6949504265621397, + "grad_norm": 1.401637069375295, + "learning_rate": 1.5543015588929688e-06, + "loss": 0.4717336893081665, + "step": 3014 + }, + { + "epoch": 0.6951810006917224, + "grad_norm": 1.3276052543558161, + "learning_rate": 1.5539841888126488e-06, + "loss": 0.48844897747039795, + "step": 3015 + }, + { + "epoch": 0.695411574821305, + "grad_norm": 1.539947517538627, + "learning_rate": 1.5536667382051127e-06, + "loss": 0.5244781970977783, + "step": 3016 + }, + { + "epoch": 0.6956421489508877, + "grad_norm": 1.2794123200247822, + "learning_rate": 1.5533492071165046e-06, + "loss": 0.4612278938293457, + "step": 3017 + }, + { + "epoch": 0.6958727230804703, + "grad_norm": 1.1978546028008836, + "learning_rate": 1.5530315955929817e-06, + "loss": 0.40461257100105286, + "step": 3018 + }, + { + "epoch": 0.696103297210053, + "grad_norm": 1.387518032200497, + "learning_rate": 1.5527139036807112e-06, + "loss": 0.5191174745559692, + "step": 3019 + }, + { + "epoch": 0.6963338713396356, + "grad_norm": 1.510370534054042, + "learning_rate": 1.5523961314258731e-06, + "loss": 0.45882558822631836, + "step": 3020 + }, + { + "epoch": 0.6965644454692184, + "grad_norm": 1.230362803290169, + "learning_rate": 1.552078278874659e-06, + "loss": 0.4766819477081299, + "step": 3021 + }, + { + "epoch": 0.696795019598801, + "grad_norm": 1.2822436220739486, + "learning_rate": 1.5517603460732724e-06, + "loss": 0.4572867751121521, + "step": 3022 + }, + { + "epoch": 0.6970255937283837, + "grad_norm": 1.5677891937472022, + "learning_rate": 1.5514423330679272e-06, + "loss": 0.4689183235168457, + "step": 3023 + }, + { + "epoch": 0.6972561678579663, + "grad_norm": 1.18549719550499, + "learning_rate": 1.5511242399048504e-06, + "loss": 0.45769914984703064, + "step": 3024 + }, + { + "epoch": 0.697486741987549, + "grad_norm": 1.3095011770493485, + "learning_rate": 1.5508060666302796e-06, + "loss": 0.47367236018180847, + "step": 3025 + }, + { + "epoch": 0.6977173161171316, + "grad_norm": 1.5441644429162589, + "learning_rate": 1.550487813290465e-06, + "loss": 0.40873080492019653, + "step": 3026 + }, + { + "epoch": 0.6979478902467143, + "grad_norm": 1.2349195465907241, + "learning_rate": 1.5501694799316671e-06, + "loss": 0.42366844415664673, + "step": 3027 + }, + { + "epoch": 0.6981784643762969, + "grad_norm": 1.2587292360565243, + "learning_rate": 1.5498510666001602e-06, + "loss": 0.3133828043937683, + "step": 3028 + }, + { + "epoch": 0.6984090385058797, + "grad_norm": 1.5168032500602213, + "learning_rate": 1.549532573342228e-06, + "loss": 0.5188712477684021, + "step": 3029 + }, + { + "epoch": 0.6986396126354623, + "grad_norm": 1.2707264640547211, + "learning_rate": 1.5492140002041668e-06, + "loss": 0.4374960660934448, + "step": 3030 + }, + { + "epoch": 0.698870186765045, + "grad_norm": 1.6828882278794643, + "learning_rate": 1.5488953472322845e-06, + "loss": 0.5285592079162598, + "step": 3031 + }, + { + "epoch": 0.6991007608946276, + "grad_norm": 1.5111090584536853, + "learning_rate": 1.5485766144729006e-06, + "loss": 0.5331767797470093, + "step": 3032 + }, + { + "epoch": 0.6993313350242103, + "grad_norm": 1.3626863062762309, + "learning_rate": 1.5482578019723462e-06, + "loss": 0.4546147584915161, + "step": 3033 + }, + { + "epoch": 0.6995619091537929, + "grad_norm": 1.2127032724557087, + "learning_rate": 1.5479389097769639e-06, + "loss": 0.47674182057380676, + "step": 3034 + }, + { + "epoch": 0.6997924832833756, + "grad_norm": 1.2042624102453106, + "learning_rate": 1.5476199379331078e-06, + "loss": 0.496138334274292, + "step": 3035 + }, + { + "epoch": 0.7000230574129582, + "grad_norm": 1.367736432364491, + "learning_rate": 1.547300886487144e-06, + "loss": 0.4843756854534149, + "step": 3036 + }, + { + "epoch": 0.7002536315425409, + "grad_norm": 1.5043582093976149, + "learning_rate": 1.5469817554854494e-06, + "loss": 0.6028264760971069, + "step": 3037 + }, + { + "epoch": 0.7004842056721235, + "grad_norm": 1.4959257460685322, + "learning_rate": 1.5466625449744134e-06, + "loss": 0.49528858065605164, + "step": 3038 + }, + { + "epoch": 0.7007147798017063, + "grad_norm": 1.1403876193260207, + "learning_rate": 1.5463432550004358e-06, + "loss": 0.466439425945282, + "step": 3039 + }, + { + "epoch": 0.7009453539312889, + "grad_norm": 1.1012676712945453, + "learning_rate": 1.5460238856099292e-06, + "loss": 0.4196532368659973, + "step": 3040 + }, + { + "epoch": 0.7011759280608716, + "grad_norm": 1.40353983379054, + "learning_rate": 1.5457044368493173e-06, + "loss": 0.47679999470710754, + "step": 3041 + }, + { + "epoch": 0.7014065021904542, + "grad_norm": 1.2594197008827683, + "learning_rate": 1.5453849087650346e-06, + "loss": 0.4368046522140503, + "step": 3042 + }, + { + "epoch": 0.7016370763200369, + "grad_norm": 1.2211703865137815, + "learning_rate": 1.5450653014035285e-06, + "loss": 0.45165273547172546, + "step": 3043 + }, + { + "epoch": 0.7018676504496195, + "grad_norm": 1.1456058151260982, + "learning_rate": 1.5447456148112563e-06, + "loss": 0.44813454151153564, + "step": 3044 + }, + { + "epoch": 0.7020982245792022, + "grad_norm": 1.269275990698592, + "learning_rate": 1.5444258490346882e-06, + "loss": 0.44681504368782043, + "step": 3045 + }, + { + "epoch": 0.7023287987087848, + "grad_norm": 1.3036360811480283, + "learning_rate": 1.5441060041203057e-06, + "loss": 0.44788169860839844, + "step": 3046 + }, + { + "epoch": 0.7025593728383676, + "grad_norm": 1.3232925218771132, + "learning_rate": 1.5437860801146013e-06, + "loss": 0.3754178285598755, + "step": 3047 + }, + { + "epoch": 0.7027899469679502, + "grad_norm": 1.001044690167693, + "learning_rate": 1.5434660770640787e-06, + "loss": 0.3582305908203125, + "step": 3048 + }, + { + "epoch": 0.7030205210975329, + "grad_norm": 1.3449464333610996, + "learning_rate": 1.543145995015254e-06, + "loss": 0.42649000883102417, + "step": 3049 + }, + { + "epoch": 0.7032510952271155, + "grad_norm": 1.2880551855073363, + "learning_rate": 1.5428258340146543e-06, + "loss": 0.5164098143577576, + "step": 3050 + }, + { + "epoch": 0.7034816693566982, + "grad_norm": 1.2456398303270981, + "learning_rate": 1.5425055941088181e-06, + "loss": 0.4193584620952606, + "step": 3051 + }, + { + "epoch": 0.7037122434862808, + "grad_norm": 1.3825374305431077, + "learning_rate": 1.5421852753442957e-06, + "loss": 0.5230807662010193, + "step": 3052 + }, + { + "epoch": 0.7039428176158635, + "grad_norm": 1.466681367301644, + "learning_rate": 1.5418648777676488e-06, + "loss": 0.4573478102684021, + "step": 3053 + }, + { + "epoch": 0.7041733917454461, + "grad_norm": 1.1343088214156583, + "learning_rate": 1.5415444014254503e-06, + "loss": 0.47031426429748535, + "step": 3054 + }, + { + "epoch": 0.7044039658750288, + "grad_norm": 1.3599997528041683, + "learning_rate": 1.5412238463642844e-06, + "loss": 0.4499198794364929, + "step": 3055 + }, + { + "epoch": 0.7046345400046115, + "grad_norm": 1.4014132343100743, + "learning_rate": 1.5409032126307477e-06, + "loss": 0.4775800406932831, + "step": 3056 + }, + { + "epoch": 0.7048651141341942, + "grad_norm": 1.4264420683743835, + "learning_rate": 1.540582500271447e-06, + "loss": 0.535969614982605, + "step": 3057 + }, + { + "epoch": 0.7050956882637768, + "grad_norm": 1.3808494199198469, + "learning_rate": 1.5402617093330013e-06, + "loss": 0.5358741283416748, + "step": 3058 + }, + { + "epoch": 0.7053262623933595, + "grad_norm": 1.2492824573732915, + "learning_rate": 1.5399408398620406e-06, + "loss": 0.5392765998840332, + "step": 3059 + }, + { + "epoch": 0.7055568365229421, + "grad_norm": 1.275809486426879, + "learning_rate": 1.5396198919052066e-06, + "loss": 0.47976016998291016, + "step": 3060 + }, + { + "epoch": 0.7057874106525248, + "grad_norm": 1.2226120465526635, + "learning_rate": 1.5392988655091526e-06, + "loss": 0.39919328689575195, + "step": 3061 + }, + { + "epoch": 0.7060179847821074, + "grad_norm": 1.6011371731611943, + "learning_rate": 1.538977760720543e-06, + "loss": 0.4503553509712219, + "step": 3062 + }, + { + "epoch": 0.7062485589116901, + "grad_norm": 1.2363983734925073, + "learning_rate": 1.5386565775860531e-06, + "loss": 0.4570388197898865, + "step": 3063 + }, + { + "epoch": 0.7064791330412727, + "grad_norm": 1.2640125065615475, + "learning_rate": 1.5383353161523706e-06, + "loss": 0.54588782787323, + "step": 3064 + }, + { + "epoch": 0.7067097071708555, + "grad_norm": 1.3495245665399438, + "learning_rate": 1.5380139764661945e-06, + "loss": 0.40369170904159546, + "step": 3065 + }, + { + "epoch": 0.7069402813004381, + "grad_norm": 1.40505470554238, + "learning_rate": 1.5376925585742341e-06, + "loss": 0.5079206228256226, + "step": 3066 + }, + { + "epoch": 0.7071708554300208, + "grad_norm": 1.2407138703812135, + "learning_rate": 1.5373710625232107e-06, + "loss": 0.41418159008026123, + "step": 3067 + }, + { + "epoch": 0.7074014295596034, + "grad_norm": 1.2523103492462024, + "learning_rate": 1.5370494883598575e-06, + "loss": 0.4546199142932892, + "step": 3068 + }, + { + "epoch": 0.7076320036891861, + "grad_norm": 1.1794904786936184, + "learning_rate": 1.5367278361309183e-06, + "loss": 0.48041367530822754, + "step": 3069 + }, + { + "epoch": 0.7078625778187687, + "grad_norm": 1.3468711432386478, + "learning_rate": 1.5364061058831486e-06, + "loss": 0.47676384449005127, + "step": 3070 + }, + { + "epoch": 0.7080931519483514, + "grad_norm": 1.1888236379295274, + "learning_rate": 1.5360842976633148e-06, + "loss": 0.47341692447662354, + "step": 3071 + }, + { + "epoch": 0.708323726077934, + "grad_norm": 1.3227579498868685, + "learning_rate": 1.5357624115181956e-06, + "loss": 0.38436269760131836, + "step": 3072 + }, + { + "epoch": 0.7085543002075168, + "grad_norm": 1.4827200040386144, + "learning_rate": 1.5354404474945798e-06, + "loss": 0.5369806289672852, + "step": 3073 + }, + { + "epoch": 0.7087848743370994, + "grad_norm": 1.404704151375413, + "learning_rate": 1.535118405639269e-06, + "loss": 0.5314677953720093, + "step": 3074 + }, + { + "epoch": 0.7090154484666821, + "grad_norm": 1.1927563297298747, + "learning_rate": 1.5347962859990742e-06, + "loss": 0.49233007431030273, + "step": 3075 + }, + { + "epoch": 0.7092460225962647, + "grad_norm": 1.3477590726762334, + "learning_rate": 1.5344740886208194e-06, + "loss": 0.4834766983985901, + "step": 3076 + }, + { + "epoch": 0.7094765967258474, + "grad_norm": 1.432138793969477, + "learning_rate": 1.534151813551339e-06, + "loss": 0.505670428276062, + "step": 3077 + }, + { + "epoch": 0.70970717085543, + "grad_norm": 1.3290190812046396, + "learning_rate": 1.533829460837479e-06, + "loss": 0.5256010293960571, + "step": 3078 + }, + { + "epoch": 0.7099377449850127, + "grad_norm": 1.463108893430833, + "learning_rate": 1.5335070305260967e-06, + "loss": 0.4186098873615265, + "step": 3079 + }, + { + "epoch": 0.7101683191145953, + "grad_norm": 1.2048981968166306, + "learning_rate": 1.5331845226640607e-06, + "loss": 0.4034464359283447, + "step": 3080 + }, + { + "epoch": 0.710398893244178, + "grad_norm": 1.346673761335588, + "learning_rate": 1.5328619372982505e-06, + "loss": 0.4521537721157074, + "step": 3081 + }, + { + "epoch": 0.7106294673737606, + "grad_norm": 1.5250190734837208, + "learning_rate": 1.5325392744755574e-06, + "loss": 0.4919602572917938, + "step": 3082 + }, + { + "epoch": 0.7108600415033434, + "grad_norm": 1.1734195700346683, + "learning_rate": 1.5322165342428835e-06, + "loss": 0.4464415907859802, + "step": 3083 + }, + { + "epoch": 0.711090615632926, + "grad_norm": 1.2610549525832775, + "learning_rate": 1.5318937166471427e-06, + "loss": 0.47444385290145874, + "step": 3084 + }, + { + "epoch": 0.7113211897625087, + "grad_norm": 1.1782687896584645, + "learning_rate": 1.5315708217352595e-06, + "loss": 0.4014730453491211, + "step": 3085 + }, + { + "epoch": 0.7115517638920913, + "grad_norm": 1.1806273152667501, + "learning_rate": 1.5312478495541703e-06, + "loss": 0.4528852701187134, + "step": 3086 + }, + { + "epoch": 0.711782338021674, + "grad_norm": 1.4716504682159035, + "learning_rate": 1.5309248001508216e-06, + "loss": 0.4919637441635132, + "step": 3087 + }, + { + "epoch": 0.7120129121512566, + "grad_norm": 1.3824738486934829, + "learning_rate": 1.530601673572173e-06, + "loss": 0.5630985498428345, + "step": 3088 + }, + { + "epoch": 0.7122434862808393, + "grad_norm": 1.4462966182250279, + "learning_rate": 1.5302784698651935e-06, + "loss": 0.3920522630214691, + "step": 3089 + }, + { + "epoch": 0.7124740604104219, + "grad_norm": 1.3282823423467587, + "learning_rate": 1.5299551890768642e-06, + "loss": 0.5502145290374756, + "step": 3090 + }, + { + "epoch": 0.7127046345400047, + "grad_norm": 1.2547204060730106, + "learning_rate": 1.5296318312541767e-06, + "loss": 0.4839448928833008, + "step": 3091 + }, + { + "epoch": 0.7129352086695873, + "grad_norm": 1.3486430423834108, + "learning_rate": 1.5293083964441355e-06, + "loss": 0.5029735565185547, + "step": 3092 + }, + { + "epoch": 0.71316578279917, + "grad_norm": 1.2299483009823662, + "learning_rate": 1.5289848846937544e-06, + "loss": 0.4724803566932678, + "step": 3093 + }, + { + "epoch": 0.7133963569287526, + "grad_norm": 1.1015042263762262, + "learning_rate": 1.528661296050059e-06, + "loss": 0.4609840512275696, + "step": 3094 + }, + { + "epoch": 0.7136269310583353, + "grad_norm": 1.4829248198628113, + "learning_rate": 1.5283376305600863e-06, + "loss": 0.49763959646224976, + "step": 3095 + }, + { + "epoch": 0.7138575051879179, + "grad_norm": 1.2090810088725865, + "learning_rate": 1.5280138882708847e-06, + "loss": 0.42384523153305054, + "step": 3096 + }, + { + "epoch": 0.7140880793175006, + "grad_norm": 1.3550047979469209, + "learning_rate": 1.5276900692295134e-06, + "loss": 0.5034611225128174, + "step": 3097 + }, + { + "epoch": 0.7143186534470832, + "grad_norm": 1.3321189275554508, + "learning_rate": 1.5273661734830423e-06, + "loss": 0.5617417097091675, + "step": 3098 + }, + { + "epoch": 0.714549227576666, + "grad_norm": 1.320340684589947, + "learning_rate": 1.527042201078553e-06, + "loss": 0.4562014937400818, + "step": 3099 + }, + { + "epoch": 0.7147798017062486, + "grad_norm": 1.6932438225785027, + "learning_rate": 1.5267181520631386e-06, + "loss": 0.5626288056373596, + "step": 3100 + }, + { + "epoch": 0.7150103758358313, + "grad_norm": 1.4526784651389733, + "learning_rate": 1.5263940264839028e-06, + "loss": 0.4882054924964905, + "step": 3101 + }, + { + "epoch": 0.7152409499654139, + "grad_norm": 1.523666745804484, + "learning_rate": 1.5260698243879603e-06, + "loss": 0.5371058583259583, + "step": 3102 + }, + { + "epoch": 0.7154715240949966, + "grad_norm": 1.1599798656247362, + "learning_rate": 1.5257455458224368e-06, + "loss": 0.4683259129524231, + "step": 3103 + }, + { + "epoch": 0.7157020982245792, + "grad_norm": 1.223986374608111, + "learning_rate": 1.5254211908344704e-06, + "loss": 0.4894726872444153, + "step": 3104 + }, + { + "epoch": 0.7159326723541619, + "grad_norm": 1.3226351110788483, + "learning_rate": 1.5250967594712089e-06, + "loss": 0.4517880082130432, + "step": 3105 + }, + { + "epoch": 0.7161632464837445, + "grad_norm": 1.162528176566508, + "learning_rate": 1.5247722517798118e-06, + "loss": 0.5062767267227173, + "step": 3106 + }, + { + "epoch": 0.7163938206133272, + "grad_norm": 1.6349408984878264, + "learning_rate": 1.5244476678074494e-06, + "loss": 0.5029302835464478, + "step": 3107 + }, + { + "epoch": 0.7166243947429098, + "grad_norm": 1.3765367207185526, + "learning_rate": 1.5241230076013035e-06, + "loss": 0.44112175703048706, + "step": 3108 + }, + { + "epoch": 0.7168549688724926, + "grad_norm": 1.3847966627377115, + "learning_rate": 1.5237982712085665e-06, + "loss": 0.43693509697914124, + "step": 3109 + }, + { + "epoch": 0.7170855430020752, + "grad_norm": 1.3509946026255297, + "learning_rate": 1.5234734586764422e-06, + "loss": 0.4544166922569275, + "step": 3110 + }, + { + "epoch": 0.7173161171316579, + "grad_norm": 1.1949924477500942, + "learning_rate": 1.5231485700521451e-06, + "loss": 0.5470178127288818, + "step": 3111 + }, + { + "epoch": 0.7175466912612405, + "grad_norm": 1.5007057362656466, + "learning_rate": 1.5228236053829017e-06, + "loss": 0.5215972065925598, + "step": 3112 + }, + { + "epoch": 0.7177772653908232, + "grad_norm": 1.1400006826022246, + "learning_rate": 1.5224985647159488e-06, + "loss": 0.3922381103038788, + "step": 3113 + }, + { + "epoch": 0.7180078395204058, + "grad_norm": 1.3432802481675237, + "learning_rate": 1.5221734480985341e-06, + "loss": 0.47455158829689026, + "step": 3114 + }, + { + "epoch": 0.7182384136499884, + "grad_norm": 1.517078162476979, + "learning_rate": 1.5218482555779164e-06, + "loss": 0.5776175260543823, + "step": 3115 + }, + { + "epoch": 0.7184689877795711, + "grad_norm": 1.4757174936390305, + "learning_rate": 1.521522987201366e-06, + "loss": 0.40414175391197205, + "step": 3116 + }, + { + "epoch": 0.7186995619091537, + "grad_norm": 1.5441693701407133, + "learning_rate": 1.5211976430161643e-06, + "loss": 0.44597384333610535, + "step": 3117 + }, + { + "epoch": 0.7189301360387365, + "grad_norm": 1.6495022083145716, + "learning_rate": 1.5208722230696024e-06, + "loss": 0.50276118516922, + "step": 3118 + }, + { + "epoch": 0.7191607101683191, + "grad_norm": 1.255966386168249, + "learning_rate": 1.5205467274089844e-06, + "loss": 0.43281811475753784, + "step": 3119 + }, + { + "epoch": 0.7193912842979018, + "grad_norm": 1.196003407991791, + "learning_rate": 1.5202211560816243e-06, + "loss": 0.3796764016151428, + "step": 3120 + }, + { + "epoch": 0.7196218584274844, + "grad_norm": 1.1855608567240021, + "learning_rate": 1.5198955091348463e-06, + "loss": 0.47820231318473816, + "step": 3121 + }, + { + "epoch": 0.7198524325570671, + "grad_norm": 1.3809241508956476, + "learning_rate": 1.5195697866159875e-06, + "loss": 0.4737284779548645, + "step": 3122 + }, + { + "epoch": 0.7200830066866497, + "grad_norm": 1.3019928778593748, + "learning_rate": 1.519243988572394e-06, + "loss": 0.44652169942855835, + "step": 3123 + }, + { + "epoch": 0.7203135808162324, + "grad_norm": 1.0393403987452434, + "learning_rate": 1.518918115051425e-06, + "loss": 0.42702072858810425, + "step": 3124 + }, + { + "epoch": 0.720544154945815, + "grad_norm": 1.3835329760109338, + "learning_rate": 1.5185921661004483e-06, + "loss": 0.5003541707992554, + "step": 3125 + }, + { + "epoch": 0.7207747290753977, + "grad_norm": 1.3444035589789487, + "learning_rate": 1.518266141766845e-06, + "loss": 0.5045102834701538, + "step": 3126 + }, + { + "epoch": 0.7210053032049804, + "grad_norm": 1.3069630488439725, + "learning_rate": 1.5179400420980052e-06, + "loss": 0.46619412302970886, + "step": 3127 + }, + { + "epoch": 0.7212358773345631, + "grad_norm": 1.7755918931491346, + "learning_rate": 1.5176138671413314e-06, + "loss": 0.5006855726242065, + "step": 3128 + }, + { + "epoch": 0.7214664514641457, + "grad_norm": 1.4202077937995432, + "learning_rate": 1.5172876169442362e-06, + "loss": 0.4394634962081909, + "step": 3129 + }, + { + "epoch": 0.7216970255937284, + "grad_norm": 1.203576429459206, + "learning_rate": 1.5169612915541428e-06, + "loss": 0.49311593174934387, + "step": 3130 + }, + { + "epoch": 0.721927599723311, + "grad_norm": 1.2610358507024448, + "learning_rate": 1.5166348910184868e-06, + "loss": 0.38406768441200256, + "step": 3131 + }, + { + "epoch": 0.7221581738528937, + "grad_norm": 1.52088025341024, + "learning_rate": 1.5163084153847132e-06, + "loss": 0.547613799571991, + "step": 3132 + }, + { + "epoch": 0.7223887479824763, + "grad_norm": 1.4599825671580298, + "learning_rate": 1.515981864700279e-06, + "loss": 0.43875589966773987, + "step": 3133 + }, + { + "epoch": 0.722619322112059, + "grad_norm": 1.3276172293945816, + "learning_rate": 1.5156552390126516e-06, + "loss": 0.41515982151031494, + "step": 3134 + }, + { + "epoch": 0.7228498962416416, + "grad_norm": 1.400170522869638, + "learning_rate": 1.5153285383693088e-06, + "loss": 0.43297481536865234, + "step": 3135 + }, + { + "epoch": 0.7230804703712244, + "grad_norm": 1.3346402467183769, + "learning_rate": 1.5150017628177408e-06, + "loss": 0.5059916377067566, + "step": 3136 + }, + { + "epoch": 0.723311044500807, + "grad_norm": 1.4474439218451525, + "learning_rate": 1.514674912405447e-06, + "loss": 0.4776325225830078, + "step": 3137 + }, + { + "epoch": 0.7235416186303897, + "grad_norm": 1.4332410620248028, + "learning_rate": 1.5143479871799381e-06, + "loss": 0.4925272464752197, + "step": 3138 + }, + { + "epoch": 0.7237721927599723, + "grad_norm": 0.9806444224416654, + "learning_rate": 1.5140209871887368e-06, + "loss": 0.3825960159301758, + "step": 3139 + }, + { + "epoch": 0.724002766889555, + "grad_norm": 1.811554812935443, + "learning_rate": 1.513693912479376e-06, + "loss": 0.5582098960876465, + "step": 3140 + }, + { + "epoch": 0.7242333410191376, + "grad_norm": 1.4229587145535472, + "learning_rate": 1.5133667630993983e-06, + "loss": 0.4079757630825043, + "step": 3141 + }, + { + "epoch": 0.7244639151487203, + "grad_norm": 1.3307764336864334, + "learning_rate": 1.513039539096359e-06, + "loss": 0.4996449947357178, + "step": 3142 + }, + { + "epoch": 0.7246944892783029, + "grad_norm": 1.2360600034220603, + "learning_rate": 1.5127122405178233e-06, + "loss": 0.4822157323360443, + "step": 3143 + }, + { + "epoch": 0.7249250634078857, + "grad_norm": 1.2687974509229507, + "learning_rate": 1.512384867411367e-06, + "loss": 0.43123728036880493, + "step": 3144 + }, + { + "epoch": 0.7251556375374683, + "grad_norm": 1.2723246094506335, + "learning_rate": 1.5120574198245776e-06, + "loss": 0.4942808151245117, + "step": 3145 + }, + { + "epoch": 0.725386211667051, + "grad_norm": 1.1117112525626116, + "learning_rate": 1.5117298978050525e-06, + "loss": 0.49165093898773193, + "step": 3146 + }, + { + "epoch": 0.7256167857966336, + "grad_norm": 1.2668452294382095, + "learning_rate": 1.5114023014004008e-06, + "loss": 0.4700804352760315, + "step": 3147 + }, + { + "epoch": 0.7258473599262163, + "grad_norm": 1.9638712043686382, + "learning_rate": 1.5110746306582413e-06, + "loss": 0.4703143835067749, + "step": 3148 + }, + { + "epoch": 0.7260779340557989, + "grad_norm": 1.2418379131661055, + "learning_rate": 1.5107468856262048e-06, + "loss": 0.47312211990356445, + "step": 3149 + }, + { + "epoch": 0.7263085081853816, + "grad_norm": 1.3558937860977873, + "learning_rate": 1.5104190663519323e-06, + "loss": 0.49607813358306885, + "step": 3150 + }, + { + "epoch": 0.7265390823149642, + "grad_norm": 1.2747447528869889, + "learning_rate": 1.5100911728830754e-06, + "loss": 0.4401499629020691, + "step": 3151 + }, + { + "epoch": 0.7267696564445469, + "grad_norm": 1.3050498169083122, + "learning_rate": 1.5097632052672973e-06, + "loss": 0.4979579448699951, + "step": 3152 + }, + { + "epoch": 0.7270002305741295, + "grad_norm": 1.1477032098667286, + "learning_rate": 1.5094351635522706e-06, + "loss": 0.42917048931121826, + "step": 3153 + }, + { + "epoch": 0.7272308047037123, + "grad_norm": 1.2688450847611672, + "learning_rate": 1.50910704778568e-06, + "loss": 0.41664260625839233, + "step": 3154 + }, + { + "epoch": 0.7274613788332949, + "grad_norm": 1.4083630490412662, + "learning_rate": 1.5087788580152206e-06, + "loss": 0.5000253915786743, + "step": 3155 + }, + { + "epoch": 0.7276919529628776, + "grad_norm": 1.2424572303309531, + "learning_rate": 1.5084505942885976e-06, + "loss": 0.5075093507766724, + "step": 3156 + }, + { + "epoch": 0.7279225270924602, + "grad_norm": 1.319578470826436, + "learning_rate": 1.508122256653528e-06, + "loss": 0.44975680112838745, + "step": 3157 + }, + { + "epoch": 0.7281531012220429, + "grad_norm": 1.1450711263341298, + "learning_rate": 1.5077938451577383e-06, + "loss": 0.44494926929473877, + "step": 3158 + }, + { + "epoch": 0.7283836753516255, + "grad_norm": 1.3333716905743178, + "learning_rate": 1.5074653598489673e-06, + "loss": 0.5664352178573608, + "step": 3159 + }, + { + "epoch": 0.7286142494812082, + "grad_norm": 1.1840094617058035, + "learning_rate": 1.507136800774963e-06, + "loss": 0.5694705247879028, + "step": 3160 + }, + { + "epoch": 0.7288448236107908, + "grad_norm": 1.5658434570152957, + "learning_rate": 1.506808167983485e-06, + "loss": 0.5121151804924011, + "step": 3161 + }, + { + "epoch": 0.7290753977403736, + "grad_norm": 1.3559529766390859, + "learning_rate": 1.5064794615223034e-06, + "loss": 0.45935380458831787, + "step": 3162 + }, + { + "epoch": 0.7293059718699562, + "grad_norm": 1.2036749528520703, + "learning_rate": 1.506150681439199e-06, + "loss": 0.517521858215332, + "step": 3163 + }, + { + "epoch": 0.7295365459995389, + "grad_norm": 1.271352713883254, + "learning_rate": 1.5058218277819638e-06, + "loss": 0.5078546404838562, + "step": 3164 + }, + { + "epoch": 0.7297671201291215, + "grad_norm": 1.4877111530715366, + "learning_rate": 1.5054929005983992e-06, + "loss": 0.47892552614212036, + "step": 3165 + }, + { + "epoch": 0.7299976942587042, + "grad_norm": 1.5569470487033794, + "learning_rate": 1.5051638999363185e-06, + "loss": 0.48825597763061523, + "step": 3166 + }, + { + "epoch": 0.7302282683882868, + "grad_norm": 1.2181600327145499, + "learning_rate": 1.5048348258435457e-06, + "loss": 0.488031804561615, + "step": 3167 + }, + { + "epoch": 0.7304588425178695, + "grad_norm": 1.178638754387744, + "learning_rate": 1.5045056783679143e-06, + "loss": 0.4669504761695862, + "step": 3168 + }, + { + "epoch": 0.7306894166474521, + "grad_norm": 1.364305786110939, + "learning_rate": 1.5041764575572695e-06, + "loss": 0.45620614290237427, + "step": 3169 + }, + { + "epoch": 0.7309199907770348, + "grad_norm": 1.4607481202185084, + "learning_rate": 1.5038471634594667e-06, + "loss": 0.4271177649497986, + "step": 3170 + }, + { + "epoch": 0.7311505649066175, + "grad_norm": 1.4441980354968733, + "learning_rate": 1.5035177961223726e-06, + "loss": 0.5170531272888184, + "step": 3171 + }, + { + "epoch": 0.7313811390362002, + "grad_norm": 1.046719642579895, + "learning_rate": 1.5031883555938638e-06, + "loss": 0.4261493682861328, + "step": 3172 + }, + { + "epoch": 0.7316117131657828, + "grad_norm": 1.4357281868096983, + "learning_rate": 1.502858841921828e-06, + "loss": 0.4958994686603546, + "step": 3173 + }, + { + "epoch": 0.7318422872953655, + "grad_norm": 1.631538220078115, + "learning_rate": 1.502529255154163e-06, + "loss": 0.49798572063446045, + "step": 3174 + }, + { + "epoch": 0.7320728614249481, + "grad_norm": 1.3524076496726538, + "learning_rate": 1.502199595338778e-06, + "loss": 0.4067850708961487, + "step": 3175 + }, + { + "epoch": 0.7323034355545308, + "grad_norm": 1.2000506588677564, + "learning_rate": 1.5018698625235916e-06, + "loss": 0.4680994153022766, + "step": 3176 + }, + { + "epoch": 0.7325340096841134, + "grad_norm": 1.3054261583860276, + "learning_rate": 1.501540056756535e-06, + "loss": 0.49181580543518066, + "step": 3177 + }, + { + "epoch": 0.7327645838136961, + "grad_norm": 1.485479754545564, + "learning_rate": 1.501210178085548e-06, + "loss": 0.5425546169281006, + "step": 3178 + }, + { + "epoch": 0.7329951579432787, + "grad_norm": 1.1514309763496005, + "learning_rate": 1.500880226558582e-06, + "loss": 0.4869355261325836, + "step": 3179 + }, + { + "epoch": 0.7332257320728615, + "grad_norm": 1.5737536993523387, + "learning_rate": 1.500550202223599e-06, + "loss": 0.5157885551452637, + "step": 3180 + }, + { + "epoch": 0.7334563062024441, + "grad_norm": 1.4471157017235972, + "learning_rate": 1.5002201051285707e-06, + "loss": 0.528350293636322, + "step": 3181 + }, + { + "epoch": 0.7336868803320268, + "grad_norm": 1.0924579051997452, + "learning_rate": 1.499889935321481e-06, + "loss": 0.3963279128074646, + "step": 3182 + }, + { + "epoch": 0.7339174544616094, + "grad_norm": 1.0536411378011648, + "learning_rate": 1.499559692850323e-06, + "loss": 0.36777108907699585, + "step": 3183 + }, + { + "epoch": 0.7341480285911921, + "grad_norm": 1.3572066258310391, + "learning_rate": 1.4992293777631004e-06, + "loss": 0.4592905044555664, + "step": 3184 + }, + { + "epoch": 0.7343786027207747, + "grad_norm": 1.3801194879873266, + "learning_rate": 1.4988989901078285e-06, + "loss": 0.458257257938385, + "step": 3185 + }, + { + "epoch": 0.7346091768503574, + "grad_norm": 1.2823442631336313, + "learning_rate": 1.4985685299325316e-06, + "loss": 0.4844989478588104, + "step": 3186 + }, + { + "epoch": 0.73483975097994, + "grad_norm": 1.3019212093413413, + "learning_rate": 1.498237997285247e-06, + "loss": 0.381417453289032, + "step": 3187 + }, + { + "epoch": 0.7350703251095227, + "grad_norm": 1.267517645310936, + "learning_rate": 1.4979073922140196e-06, + "loss": 0.42452555894851685, + "step": 3188 + }, + { + "epoch": 0.7353008992391054, + "grad_norm": 1.2143530957836637, + "learning_rate": 1.4975767147669063e-06, + "loss": 0.4660685956478119, + "step": 3189 + }, + { + "epoch": 0.7355314733686881, + "grad_norm": 1.243568614271109, + "learning_rate": 1.4972459649919748e-06, + "loss": 0.4332653880119324, + "step": 3190 + }, + { + "epoch": 0.7357620474982707, + "grad_norm": 1.4818958085574696, + "learning_rate": 1.496915142937303e-06, + "loss": 0.5580132007598877, + "step": 3191 + }, + { + "epoch": 0.7359926216278534, + "grad_norm": 1.102415574688255, + "learning_rate": 1.4965842486509792e-06, + "loss": 0.43711793422698975, + "step": 3192 + }, + { + "epoch": 0.736223195757436, + "grad_norm": 1.1786805187530485, + "learning_rate": 1.496253282181102e-06, + "loss": 0.44969767332077026, + "step": 3193 + }, + { + "epoch": 0.7364537698870187, + "grad_norm": 1.5017804708887366, + "learning_rate": 1.4959222435757809e-06, + "loss": 0.5288668870925903, + "step": 3194 + }, + { + "epoch": 0.7366843440166013, + "grad_norm": 1.2442315862489326, + "learning_rate": 1.4955911328831353e-06, + "loss": 0.45993220806121826, + "step": 3195 + }, + { + "epoch": 0.736914918146184, + "grad_norm": 1.6618645292728147, + "learning_rate": 1.4952599501512963e-06, + "loss": 0.5360512733459473, + "step": 3196 + }, + { + "epoch": 0.7371454922757666, + "grad_norm": 1.2833906478614454, + "learning_rate": 1.4949286954284044e-06, + "loss": 0.3923282325267792, + "step": 3197 + }, + { + "epoch": 0.7373760664053494, + "grad_norm": 1.2830570803742403, + "learning_rate": 1.4945973687626103e-06, + "loss": 0.5051449537277222, + "step": 3198 + }, + { + "epoch": 0.737606640534932, + "grad_norm": 1.288727241344276, + "learning_rate": 1.4942659702020763e-06, + "loss": 0.5035187602043152, + "step": 3199 + }, + { + "epoch": 0.7378372146645147, + "grad_norm": 1.1929311231536464, + "learning_rate": 1.4939344997949742e-06, + "loss": 0.4922195076942444, + "step": 3200 + }, + { + "epoch": 0.7380677887940973, + "grad_norm": 1.1654414900260779, + "learning_rate": 1.4936029575894865e-06, + "loss": 0.49664247035980225, + "step": 3201 + }, + { + "epoch": 0.73829836292368, + "grad_norm": 1.2090144084254086, + "learning_rate": 1.4932713436338065e-06, + "loss": 0.4240155816078186, + "step": 3202 + }, + { + "epoch": 0.7385289370532626, + "grad_norm": 1.150655085488804, + "learning_rate": 1.4929396579761376e-06, + "loss": 0.3830781579017639, + "step": 3203 + }, + { + "epoch": 0.7387595111828453, + "grad_norm": 1.2626520886498587, + "learning_rate": 1.4926079006646936e-06, + "loss": 0.37983447313308716, + "step": 3204 + }, + { + "epoch": 0.7389900853124279, + "grad_norm": 1.37294258180721, + "learning_rate": 1.4922760717476989e-06, + "loss": 0.4680769443511963, + "step": 3205 + }, + { + "epoch": 0.7392206594420107, + "grad_norm": 1.0992782157194299, + "learning_rate": 1.4919441712733878e-06, + "loss": 0.3801664710044861, + "step": 3206 + }, + { + "epoch": 0.7394512335715933, + "grad_norm": 1.2101909370157682, + "learning_rate": 1.4916121992900062e-06, + "loss": 0.5506627559661865, + "step": 3207 + }, + { + "epoch": 0.739681807701176, + "grad_norm": 1.4326210599966231, + "learning_rate": 1.4912801558458087e-06, + "loss": 0.4976215660572052, + "step": 3208 + }, + { + "epoch": 0.7399123818307586, + "grad_norm": 1.269851030633043, + "learning_rate": 1.4909480409890615e-06, + "loss": 0.42806485295295715, + "step": 3209 + }, + { + "epoch": 0.7401429559603413, + "grad_norm": 1.5738327378318604, + "learning_rate": 1.4906158547680413e-06, + "loss": 0.3850712180137634, + "step": 3210 + }, + { + "epoch": 0.7403735300899239, + "grad_norm": 1.1706966056418486, + "learning_rate": 1.4902835972310342e-06, + "loss": 0.4356945753097534, + "step": 3211 + }, + { + "epoch": 0.7406041042195066, + "grad_norm": 1.3196733008465567, + "learning_rate": 1.4899512684263373e-06, + "loss": 0.4806904196739197, + "step": 3212 + }, + { + "epoch": 0.7408346783490892, + "grad_norm": 1.6634902313002624, + "learning_rate": 1.489618868402258e-06, + "loss": 0.544597327709198, + "step": 3213 + }, + { + "epoch": 0.7410652524786719, + "grad_norm": 1.2400106880376924, + "learning_rate": 1.4892863972071141e-06, + "loss": 0.39847469329833984, + "step": 3214 + }, + { + "epoch": 0.7412958266082545, + "grad_norm": 1.165782132875825, + "learning_rate": 1.4889538548892336e-06, + "loss": 0.4959847331047058, + "step": 3215 + }, + { + "epoch": 0.7415264007378373, + "grad_norm": 1.1727701470106202, + "learning_rate": 1.488621241496955e-06, + "loss": 0.3839089870452881, + "step": 3216 + }, + { + "epoch": 0.7417569748674199, + "grad_norm": 1.4119004491894294, + "learning_rate": 1.4882885570786266e-06, + "loss": 0.5187599658966064, + "step": 3217 + }, + { + "epoch": 0.7419875489970026, + "grad_norm": 1.1715648701346035, + "learning_rate": 1.4879558016826082e-06, + "loss": 0.45735663175582886, + "step": 3218 + }, + { + "epoch": 0.7422181231265852, + "grad_norm": 1.2093385209256575, + "learning_rate": 1.4876229753572687e-06, + "loss": 0.5635267496109009, + "step": 3219 + }, + { + "epoch": 0.7424486972561679, + "grad_norm": 1.5737635031230153, + "learning_rate": 1.4872900781509876e-06, + "loss": 0.5255833268165588, + "step": 3220 + }, + { + "epoch": 0.7426792713857505, + "grad_norm": 1.3608013352784492, + "learning_rate": 1.486957110112155e-06, + "loss": 0.4563497304916382, + "step": 3221 + }, + { + "epoch": 0.7429098455153332, + "grad_norm": 1.2494840959741684, + "learning_rate": 1.4866240712891714e-06, + "loss": 0.3737669885158539, + "step": 3222 + }, + { + "epoch": 0.7431404196449158, + "grad_norm": 1.3341042787752078, + "learning_rate": 1.4862909617304473e-06, + "loss": 0.48965659737586975, + "step": 3223 + }, + { + "epoch": 0.7433709937744986, + "grad_norm": 1.138792861067833, + "learning_rate": 1.4859577814844036e-06, + "loss": 0.40867483615875244, + "step": 3224 + }, + { + "epoch": 0.7436015679040812, + "grad_norm": 1.6873709244395776, + "learning_rate": 1.4856245305994711e-06, + "loss": 0.5870566368103027, + "step": 3225 + }, + { + "epoch": 0.7438321420336638, + "grad_norm": 1.9479920905112817, + "learning_rate": 1.4852912091240914e-06, + "loss": 0.5424025654792786, + "step": 3226 + }, + { + "epoch": 0.7440627161632465, + "grad_norm": 1.3117337551828157, + "learning_rate": 1.4849578171067166e-06, + "loss": 0.5305285453796387, + "step": 3227 + }, + { + "epoch": 0.7442932902928291, + "grad_norm": 1.6524409541791285, + "learning_rate": 1.4846243545958078e-06, + "loss": 0.4189227819442749, + "step": 3228 + }, + { + "epoch": 0.7445238644224118, + "grad_norm": 1.3163917938675591, + "learning_rate": 1.4842908216398379e-06, + "loss": 0.44568121433258057, + "step": 3229 + }, + { + "epoch": 0.7447544385519944, + "grad_norm": 1.57546318763007, + "learning_rate": 1.4839572182872883e-06, + "loss": 0.5177523493766785, + "step": 3230 + }, + { + "epoch": 0.7449850126815771, + "grad_norm": 2.0231485633083213, + "learning_rate": 1.4836235445866528e-06, + "loss": 0.5100630521774292, + "step": 3231 + }, + { + "epoch": 0.7452155868111597, + "grad_norm": 1.2988766977840327, + "learning_rate": 1.4832898005864336e-06, + "loss": 0.45731791853904724, + "step": 3232 + }, + { + "epoch": 0.7454461609407425, + "grad_norm": 1.4418312758556044, + "learning_rate": 1.4829559863351437e-06, + "loss": 0.5161736011505127, + "step": 3233 + }, + { + "epoch": 0.7456767350703251, + "grad_norm": 1.2131599613200943, + "learning_rate": 1.4826221018813067e-06, + "loss": 0.4778611660003662, + "step": 3234 + }, + { + "epoch": 0.7459073091999078, + "grad_norm": 1.208766404583587, + "learning_rate": 1.482288147273456e-06, + "loss": 0.467506468296051, + "step": 3235 + }, + { + "epoch": 0.7461378833294904, + "grad_norm": 1.3564852786094337, + "learning_rate": 1.4819541225601352e-06, + "loss": 0.5061084032058716, + "step": 3236 + }, + { + "epoch": 0.7463684574590731, + "grad_norm": 1.3693293129226278, + "learning_rate": 1.4816200277898983e-06, + "loss": 0.5066365599632263, + "step": 3237 + }, + { + "epoch": 0.7465990315886557, + "grad_norm": 1.2091939411250054, + "learning_rate": 1.4812858630113093e-06, + "loss": 0.44285398721694946, + "step": 3238 + }, + { + "epoch": 0.7468296057182384, + "grad_norm": 1.3395886619598594, + "learning_rate": 1.4809516282729426e-06, + "loss": 0.5325936079025269, + "step": 3239 + }, + { + "epoch": 0.747060179847821, + "grad_norm": 1.2575363206535257, + "learning_rate": 1.4806173236233818e-06, + "loss": 0.37296950817108154, + "step": 3240 + }, + { + "epoch": 0.7472907539774037, + "grad_norm": 1.3466058050144787, + "learning_rate": 1.4802829491112228e-06, + "loss": 0.4596887230873108, + "step": 3241 + }, + { + "epoch": 0.7475213281069863, + "grad_norm": 1.4791727382559166, + "learning_rate": 1.4799485047850693e-06, + "loss": 0.4344385266304016, + "step": 3242 + }, + { + "epoch": 0.7477519022365691, + "grad_norm": 1.235031250671636, + "learning_rate": 1.4796139906935365e-06, + "loss": 0.458631306886673, + "step": 3243 + }, + { + "epoch": 0.7479824763661517, + "grad_norm": 1.3676048590005543, + "learning_rate": 1.4792794068852494e-06, + "loss": 0.5425032377243042, + "step": 3244 + }, + { + "epoch": 0.7482130504957344, + "grad_norm": 1.1764717045773245, + "learning_rate": 1.478944753408843e-06, + "loss": 0.4240065813064575, + "step": 3245 + }, + { + "epoch": 0.748443624625317, + "grad_norm": 1.3527342191314002, + "learning_rate": 1.478610030312963e-06, + "loss": 0.5533365607261658, + "step": 3246 + }, + { + "epoch": 0.7486741987548997, + "grad_norm": 1.4574041701217884, + "learning_rate": 1.4782752376462647e-06, + "loss": 0.4089345335960388, + "step": 3247 + }, + { + "epoch": 0.7489047728844823, + "grad_norm": 1.3793731191813918, + "learning_rate": 1.4779403754574131e-06, + "loss": 0.5098259449005127, + "step": 3248 + }, + { + "epoch": 0.749135347014065, + "grad_norm": 1.3041128935188901, + "learning_rate": 1.4776054437950842e-06, + "loss": 0.4615677297115326, + "step": 3249 + }, + { + "epoch": 0.7493659211436476, + "grad_norm": 1.3216071057711354, + "learning_rate": 1.4772704427079639e-06, + "loss": 0.460266649723053, + "step": 3250 + }, + { + "epoch": 0.7495964952732304, + "grad_norm": 1.4054347579351087, + "learning_rate": 1.4769353722447476e-06, + "loss": 0.4727064371109009, + "step": 3251 + }, + { + "epoch": 0.749827069402813, + "grad_norm": 1.3954753679563598, + "learning_rate": 1.4766002324541411e-06, + "loss": 0.4733152985572815, + "step": 3252 + }, + { + "epoch": 0.7500576435323957, + "grad_norm": 1.408517900798552, + "learning_rate": 1.4762650233848609e-06, + "loss": 0.5055218935012817, + "step": 3253 + }, + { + "epoch": 0.7502882176619783, + "grad_norm": 1.3285058616446128, + "learning_rate": 1.4759297450856324e-06, + "loss": 0.6129124164581299, + "step": 3254 + }, + { + "epoch": 0.750518791791561, + "grad_norm": 1.6354094862337523, + "learning_rate": 1.4755943976051926e-06, + "loss": 0.46197545528411865, + "step": 3255 + }, + { + "epoch": 0.7507493659211436, + "grad_norm": 1.3239897164772563, + "learning_rate": 1.4752589809922868e-06, + "loss": 0.5227653980255127, + "step": 3256 + }, + { + "epoch": 0.7509799400507263, + "grad_norm": 1.4638577740242362, + "learning_rate": 1.4749234952956715e-06, + "loss": 0.5189518928527832, + "step": 3257 + }, + { + "epoch": 0.7512105141803089, + "grad_norm": 1.2059107130307087, + "learning_rate": 1.474587940564113e-06, + "loss": 0.4850584864616394, + "step": 3258 + }, + { + "epoch": 0.7514410883098916, + "grad_norm": 1.4809027704015267, + "learning_rate": 1.4742523168463876e-06, + "loss": 0.5218943357467651, + "step": 3259 + }, + { + "epoch": 0.7516716624394743, + "grad_norm": 1.130064311367936, + "learning_rate": 1.4739166241912814e-06, + "loss": 0.4311223030090332, + "step": 3260 + }, + { + "epoch": 0.751902236569057, + "grad_norm": 1.372801682112421, + "learning_rate": 1.473580862647591e-06, + "loss": 0.525306224822998, + "step": 3261 + }, + { + "epoch": 0.7521328106986396, + "grad_norm": 1.291063350632538, + "learning_rate": 1.4732450322641225e-06, + "loss": 0.506609320640564, + "step": 3262 + }, + { + "epoch": 0.7523633848282223, + "grad_norm": 1.4043846834415283, + "learning_rate": 1.4729091330896926e-06, + "loss": 0.5477846264839172, + "step": 3263 + }, + { + "epoch": 0.7525939589578049, + "grad_norm": 1.1342853276703964, + "learning_rate": 1.4725731651731268e-06, + "loss": 0.48802629113197327, + "step": 3264 + }, + { + "epoch": 0.7528245330873876, + "grad_norm": 1.5090127096652195, + "learning_rate": 1.4722371285632626e-06, + "loss": 0.4774906635284424, + "step": 3265 + }, + { + "epoch": 0.7530551072169702, + "grad_norm": 1.4537920297241385, + "learning_rate": 1.4719010233089458e-06, + "loss": 0.4220488667488098, + "step": 3266 + }, + { + "epoch": 0.7532856813465529, + "grad_norm": 1.441465153643324, + "learning_rate": 1.4715648494590324e-06, + "loss": 0.43912187218666077, + "step": 3267 + }, + { + "epoch": 0.7535162554761355, + "grad_norm": 1.3653901674246531, + "learning_rate": 1.4712286070623892e-06, + "loss": 0.5302494764328003, + "step": 3268 + }, + { + "epoch": 0.7537468296057183, + "grad_norm": 1.3282339539348487, + "learning_rate": 1.4708922961678923e-06, + "loss": 0.4800306260585785, + "step": 3269 + }, + { + "epoch": 0.7539774037353009, + "grad_norm": 1.2634165352126685, + "learning_rate": 1.4705559168244275e-06, + "loss": 0.3993161618709564, + "step": 3270 + }, + { + "epoch": 0.7542079778648836, + "grad_norm": 1.446141365903489, + "learning_rate": 1.4702194690808916e-06, + "loss": 0.37037837505340576, + "step": 3271 + }, + { + "epoch": 0.7544385519944662, + "grad_norm": 1.3105522613811469, + "learning_rate": 1.4698829529861898e-06, + "loss": 0.44288602471351624, + "step": 3272 + }, + { + "epoch": 0.7546691261240489, + "grad_norm": 1.542566998549956, + "learning_rate": 1.469546368589239e-06, + "loss": 0.5480727553367615, + "step": 3273 + }, + { + "epoch": 0.7548997002536315, + "grad_norm": 1.5093924463506492, + "learning_rate": 1.4692097159389649e-06, + "loss": 0.4964104890823364, + "step": 3274 + }, + { + "epoch": 0.7551302743832142, + "grad_norm": 1.5912503319666471, + "learning_rate": 1.4688729950843033e-06, + "loss": 0.4744144082069397, + "step": 3275 + }, + { + "epoch": 0.7553608485127968, + "grad_norm": 1.1258853516330976, + "learning_rate": 1.4685362060741997e-06, + "loss": 0.44675350189208984, + "step": 3276 + }, + { + "epoch": 0.7555914226423796, + "grad_norm": 1.4768191837188436, + "learning_rate": 1.46819934895761e-06, + "loss": 0.45261216163635254, + "step": 3277 + }, + { + "epoch": 0.7558219967719622, + "grad_norm": 1.3183121513891758, + "learning_rate": 1.4678624237835005e-06, + "loss": 0.4180977940559387, + "step": 3278 + }, + { + "epoch": 0.7560525709015449, + "grad_norm": 1.34629761070606, + "learning_rate": 1.4675254306008456e-06, + "loss": 0.39477843046188354, + "step": 3279 + }, + { + "epoch": 0.7562831450311275, + "grad_norm": 1.439585323315283, + "learning_rate": 1.467188369458631e-06, + "loss": 0.5033801198005676, + "step": 3280 + }, + { + "epoch": 0.7565137191607102, + "grad_norm": 1.3522884656136929, + "learning_rate": 1.4668512404058527e-06, + "loss": 0.5719846487045288, + "step": 3281 + }, + { + "epoch": 0.7567442932902928, + "grad_norm": 1.6993262990855147, + "learning_rate": 1.4665140434915147e-06, + "loss": 0.5198945999145508, + "step": 3282 + }, + { + "epoch": 0.7569748674198755, + "grad_norm": 1.6486008286234453, + "learning_rate": 1.4661767787646326e-06, + "loss": 0.4641912579536438, + "step": 3283 + }, + { + "epoch": 0.7572054415494581, + "grad_norm": 1.542363438136225, + "learning_rate": 1.4658394462742309e-06, + "loss": 0.44070225954055786, + "step": 3284 + }, + { + "epoch": 0.7574360156790408, + "grad_norm": 1.1923089532877131, + "learning_rate": 1.465502046069345e-06, + "loss": 0.4324581027030945, + "step": 3285 + }, + { + "epoch": 0.7576665898086234, + "grad_norm": 1.5168087965785, + "learning_rate": 1.4651645781990187e-06, + "loss": 0.5789060592651367, + "step": 3286 + }, + { + "epoch": 0.7578971639382062, + "grad_norm": 1.7886030443223944, + "learning_rate": 1.4648270427123068e-06, + "loss": 0.45642149448394775, + "step": 3287 + }, + { + "epoch": 0.7581277380677888, + "grad_norm": 1.222780244920245, + "learning_rate": 1.4644894396582732e-06, + "loss": 0.4587763547897339, + "step": 3288 + }, + { + "epoch": 0.7583583121973715, + "grad_norm": 1.570757900264253, + "learning_rate": 1.4641517690859924e-06, + "loss": 0.5472866892814636, + "step": 3289 + }, + { + "epoch": 0.7585888863269541, + "grad_norm": 1.4662287757114318, + "learning_rate": 1.4638140310445476e-06, + "loss": 0.5274207592010498, + "step": 3290 + }, + { + "epoch": 0.7588194604565368, + "grad_norm": 1.5317060576828687, + "learning_rate": 1.4634762255830326e-06, + "loss": 0.46280741691589355, + "step": 3291 + }, + { + "epoch": 0.7590500345861194, + "grad_norm": 1.357303550008307, + "learning_rate": 1.4631383527505515e-06, + "loss": 0.5395090579986572, + "step": 3292 + }, + { + "epoch": 0.7592806087157021, + "grad_norm": 1.3556569618907826, + "learning_rate": 1.4628004125962168e-06, + "loss": 0.49923229217529297, + "step": 3293 + }, + { + "epoch": 0.7595111828452847, + "grad_norm": 1.437270857620585, + "learning_rate": 1.462462405169152e-06, + "loss": 0.5414037108421326, + "step": 3294 + }, + { + "epoch": 0.7597417569748675, + "grad_norm": 1.2450139122326453, + "learning_rate": 1.4621243305184895e-06, + "loss": 0.4246688485145569, + "step": 3295 + }, + { + "epoch": 0.7599723311044501, + "grad_norm": 1.2346000309431113, + "learning_rate": 1.461786188693372e-06, + "loss": 0.4997994005680084, + "step": 3296 + }, + { + "epoch": 0.7602029052340328, + "grad_norm": 1.2539682682883548, + "learning_rate": 1.4614479797429523e-06, + "loss": 0.4571123719215393, + "step": 3297 + }, + { + "epoch": 0.7604334793636154, + "grad_norm": 1.3546747118119653, + "learning_rate": 1.4611097037163917e-06, + "loss": 0.5178083181381226, + "step": 3298 + }, + { + "epoch": 0.7606640534931981, + "grad_norm": 1.438807896221459, + "learning_rate": 1.4607713606628625e-06, + "loss": 0.538001298904419, + "step": 3299 + }, + { + "epoch": 0.7608946276227807, + "grad_norm": 1.6495208547410056, + "learning_rate": 1.4604329506315464e-06, + "loss": 0.45941218733787537, + "step": 3300 + }, + { + "epoch": 0.7611252017523634, + "grad_norm": 1.469904127152949, + "learning_rate": 1.4600944736716344e-06, + "loss": 0.619648277759552, + "step": 3301 + }, + { + "epoch": 0.761355775881946, + "grad_norm": 1.3648924598961014, + "learning_rate": 1.4597559298323281e-06, + "loss": 0.4035170376300812, + "step": 3302 + }, + { + "epoch": 0.7615863500115287, + "grad_norm": 1.4623041349874883, + "learning_rate": 1.4594173191628374e-06, + "loss": 0.48657041788101196, + "step": 3303 + }, + { + "epoch": 0.7618169241411114, + "grad_norm": 1.3486514765257445, + "learning_rate": 1.4590786417123838e-06, + "loss": 0.43324801325798035, + "step": 3304 + }, + { + "epoch": 0.7620474982706941, + "grad_norm": 1.3543990457839288, + "learning_rate": 1.4587398975301968e-06, + "loss": 0.5020644664764404, + "step": 3305 + }, + { + "epoch": 0.7622780724002767, + "grad_norm": 1.4758408294809282, + "learning_rate": 1.4584010866655163e-06, + "loss": 0.4123230576515198, + "step": 3306 + }, + { + "epoch": 0.7625086465298594, + "grad_norm": 1.4629462638568174, + "learning_rate": 1.4580622091675925e-06, + "loss": 0.5110459327697754, + "step": 3307 + }, + { + "epoch": 0.762739220659442, + "grad_norm": 1.3128675599733384, + "learning_rate": 1.4577232650856842e-06, + "loss": 0.3956744074821472, + "step": 3308 + }, + { + "epoch": 0.7629697947890247, + "grad_norm": 1.028092913473986, + "learning_rate": 1.4573842544690602e-06, + "loss": 0.44418880343437195, + "step": 3309 + }, + { + "epoch": 0.7632003689186073, + "grad_norm": 1.2935675774179733, + "learning_rate": 1.4570451773669993e-06, + "loss": 0.46690821647644043, + "step": 3310 + }, + { + "epoch": 0.76343094304819, + "grad_norm": 1.7250402170715877, + "learning_rate": 1.45670603382879e-06, + "loss": 0.5631324052810669, + "step": 3311 + }, + { + "epoch": 0.7636615171777726, + "grad_norm": 1.3197309301962783, + "learning_rate": 1.4563668239037301e-06, + "loss": 0.42355209589004517, + "step": 3312 + }, + { + "epoch": 0.7638920913073554, + "grad_norm": 1.1819135136971526, + "learning_rate": 1.4560275476411273e-06, + "loss": 0.4509078860282898, + "step": 3313 + }, + { + "epoch": 0.764122665436938, + "grad_norm": 1.2704317123198696, + "learning_rate": 1.4556882050902986e-06, + "loss": 0.48707491159439087, + "step": 3314 + }, + { + "epoch": 0.7643532395665207, + "grad_norm": 1.2817274130067733, + "learning_rate": 1.455348796300571e-06, + "loss": 0.4768955707550049, + "step": 3315 + }, + { + "epoch": 0.7645838136961033, + "grad_norm": 1.1995539933150834, + "learning_rate": 1.4550093213212812e-06, + "loss": 0.44231370091438293, + "step": 3316 + }, + { + "epoch": 0.764814387825686, + "grad_norm": 1.283098801050818, + "learning_rate": 1.4546697802017752e-06, + "loss": 0.41919445991516113, + "step": 3317 + }, + { + "epoch": 0.7650449619552686, + "grad_norm": 1.3370966440445557, + "learning_rate": 1.4543301729914086e-06, + "loss": 0.5004634857177734, + "step": 3318 + }, + { + "epoch": 0.7652755360848513, + "grad_norm": 1.3058062554730827, + "learning_rate": 1.4539904997395467e-06, + "loss": 0.5327651500701904, + "step": 3319 + }, + { + "epoch": 0.7655061102144339, + "grad_norm": 1.2690140519120048, + "learning_rate": 1.4536507604955647e-06, + "loss": 0.4571789801120758, + "step": 3320 + }, + { + "epoch": 0.7657366843440166, + "grad_norm": 1.4712336124149359, + "learning_rate": 1.4533109553088474e-06, + "loss": 0.3989352583885193, + "step": 3321 + }, + { + "epoch": 0.7659672584735993, + "grad_norm": 1.390525487190819, + "learning_rate": 1.452971084228788e-06, + "loss": 0.4661702513694763, + "step": 3322 + }, + { + "epoch": 0.766197832603182, + "grad_norm": 1.4525582608827485, + "learning_rate": 1.4526311473047911e-06, + "loss": 0.5007051825523376, + "step": 3323 + }, + { + "epoch": 0.7664284067327646, + "grad_norm": 1.4087277102322913, + "learning_rate": 1.4522911445862697e-06, + "loss": 0.44391199946403503, + "step": 3324 + }, + { + "epoch": 0.7666589808623473, + "grad_norm": 1.5508781982933997, + "learning_rate": 1.4519510761226466e-06, + "loss": 0.48606377840042114, + "step": 3325 + }, + { + "epoch": 0.7668895549919299, + "grad_norm": 1.4942248011879364, + "learning_rate": 1.4516109419633543e-06, + "loss": 0.4831564426422119, + "step": 3326 + }, + { + "epoch": 0.7671201291215126, + "grad_norm": 1.2492238673667777, + "learning_rate": 1.4512707421578344e-06, + "loss": 0.5033055543899536, + "step": 3327 + }, + { + "epoch": 0.7673507032510952, + "grad_norm": 1.268639260981401, + "learning_rate": 1.4509304767555385e-06, + "loss": 0.40440869331359863, + "step": 3328 + }, + { + "epoch": 0.7675812773806779, + "grad_norm": 1.154540060885232, + "learning_rate": 1.4505901458059282e-06, + "loss": 0.4281578063964844, + "step": 3329 + }, + { + "epoch": 0.7678118515102605, + "grad_norm": 1.2646658661078, + "learning_rate": 1.4502497493584735e-06, + "loss": 0.45301395654678345, + "step": 3330 + }, + { + "epoch": 0.7680424256398433, + "grad_norm": 1.2708958618179473, + "learning_rate": 1.4499092874626545e-06, + "loss": 0.3971232771873474, + "step": 3331 + }, + { + "epoch": 0.7682729997694259, + "grad_norm": 1.470304815457328, + "learning_rate": 1.4495687601679607e-06, + "loss": 0.45382559299468994, + "step": 3332 + }, + { + "epoch": 0.7685035738990086, + "grad_norm": 1.5230375908041864, + "learning_rate": 1.4492281675238916e-06, + "loss": 0.4101349711418152, + "step": 3333 + }, + { + "epoch": 0.7687341480285912, + "grad_norm": 1.7708001369907398, + "learning_rate": 1.4488875095799555e-06, + "loss": 0.5322436690330505, + "step": 3334 + }, + { + "epoch": 0.7689647221581739, + "grad_norm": 1.4488936734065874, + "learning_rate": 1.4485467863856703e-06, + "loss": 0.5497866272926331, + "step": 3335 + }, + { + "epoch": 0.7691952962877565, + "grad_norm": 1.5286830910755105, + "learning_rate": 1.4482059979905642e-06, + "loss": 0.5088074207305908, + "step": 3336 + }, + { + "epoch": 0.7694258704173391, + "grad_norm": 1.2530470288119384, + "learning_rate": 1.4478651444441736e-06, + "loss": 0.4444946050643921, + "step": 3337 + }, + { + "epoch": 0.7696564445469218, + "grad_norm": 1.1602955966590311, + "learning_rate": 1.4475242257960454e-06, + "loss": 0.41257357597351074, + "step": 3338 + }, + { + "epoch": 0.7698870186765044, + "grad_norm": 1.3512416855290101, + "learning_rate": 1.4471832420957356e-06, + "loss": 0.47933512926101685, + "step": 3339 + }, + { + "epoch": 0.7701175928060872, + "grad_norm": 1.204411185284335, + "learning_rate": 1.4468421933928093e-06, + "loss": 0.41331803798675537, + "step": 3340 + }, + { + "epoch": 0.7703481669356698, + "grad_norm": 1.3617384100749454, + "learning_rate": 1.4465010797368416e-06, + "loss": 0.5047392845153809, + "step": 3341 + }, + { + "epoch": 0.7705787410652525, + "grad_norm": 1.2651645489335748, + "learning_rate": 1.446159901177417e-06, + "loss": 0.5265953540802002, + "step": 3342 + }, + { + "epoch": 0.7708093151948351, + "grad_norm": 1.5538943468041178, + "learning_rate": 1.4458186577641285e-06, + "loss": 0.48366689682006836, + "step": 3343 + }, + { + "epoch": 0.7710398893244178, + "grad_norm": 1.3170443751716914, + "learning_rate": 1.4454773495465805e-06, + "loss": 0.4303058087825775, + "step": 3344 + }, + { + "epoch": 0.7712704634540004, + "grad_norm": 1.2782967712931992, + "learning_rate": 1.4451359765743845e-06, + "loss": 0.44936758279800415, + "step": 3345 + }, + { + "epoch": 0.7715010375835831, + "grad_norm": 1.1273529926323729, + "learning_rate": 1.4447945388971631e-06, + "loss": 0.37891095876693726, + "step": 3346 + }, + { + "epoch": 0.7717316117131657, + "grad_norm": 1.3818395750162065, + "learning_rate": 1.4444530365645477e-06, + "loss": 0.4958759546279907, + "step": 3347 + }, + { + "epoch": 0.7719621858427484, + "grad_norm": 1.2809802910956953, + "learning_rate": 1.4441114696261791e-06, + "loss": 0.5180525183677673, + "step": 3348 + }, + { + "epoch": 0.772192759972331, + "grad_norm": 1.3137706702012002, + "learning_rate": 1.4437698381317076e-06, + "loss": 0.4760133624076843, + "step": 3349 + }, + { + "epoch": 0.7724233341019138, + "grad_norm": 1.6019634089420207, + "learning_rate": 1.4434281421307923e-06, + "loss": 0.5095269680023193, + "step": 3350 + }, + { + "epoch": 0.7726539082314964, + "grad_norm": 1.3897770832286553, + "learning_rate": 1.443086381673103e-06, + "loss": 0.41132962703704834, + "step": 3351 + }, + { + "epoch": 0.7728844823610791, + "grad_norm": 2.1191686086439687, + "learning_rate": 1.442744556808317e-06, + "loss": 0.5617398023605347, + "step": 3352 + }, + { + "epoch": 0.7731150564906617, + "grad_norm": 1.3926070515875653, + "learning_rate": 1.4424026675861229e-06, + "loss": 0.4421590566635132, + "step": 3353 + }, + { + "epoch": 0.7733456306202444, + "grad_norm": 1.3079796762796725, + "learning_rate": 1.4420607140562175e-06, + "loss": 0.5533363223075867, + "step": 3354 + }, + { + "epoch": 0.773576204749827, + "grad_norm": 1.2259362177236217, + "learning_rate": 1.441718696268307e-06, + "loss": 0.3703731298446655, + "step": 3355 + }, + { + "epoch": 0.7738067788794097, + "grad_norm": 1.3132566837825874, + "learning_rate": 1.4413766142721074e-06, + "loss": 0.4078833758831024, + "step": 3356 + }, + { + "epoch": 0.7740373530089923, + "grad_norm": 1.3669338987803128, + "learning_rate": 1.4410344681173436e-06, + "loss": 0.47297823429107666, + "step": 3357 + }, + { + "epoch": 0.7742679271385751, + "grad_norm": 1.44476399239333, + "learning_rate": 1.4406922578537501e-06, + "loss": 0.4586789309978485, + "step": 3358 + }, + { + "epoch": 0.7744985012681577, + "grad_norm": 2.005996053014414, + "learning_rate": 1.440349983531071e-06, + "loss": 0.5284359455108643, + "step": 3359 + }, + { + "epoch": 0.7747290753977404, + "grad_norm": 1.453810263762319, + "learning_rate": 1.4400076451990585e-06, + "loss": 0.47153323888778687, + "step": 3360 + }, + { + "epoch": 0.774959649527323, + "grad_norm": 1.277395230723769, + "learning_rate": 1.4396652429074758e-06, + "loss": 0.3862396478652954, + "step": 3361 + }, + { + "epoch": 0.7751902236569057, + "grad_norm": 1.4585054412515979, + "learning_rate": 1.4393227767060938e-06, + "loss": 0.48918354511260986, + "step": 3362 + }, + { + "epoch": 0.7754207977864883, + "grad_norm": 1.2680408475983538, + "learning_rate": 1.4389802466446942e-06, + "loss": 0.5541480779647827, + "step": 3363 + }, + { + "epoch": 0.775651371916071, + "grad_norm": 1.3507983643401953, + "learning_rate": 1.4386376527730665e-06, + "loss": 0.48972445726394653, + "step": 3364 + }, + { + "epoch": 0.7758819460456536, + "grad_norm": 1.7557497204808084, + "learning_rate": 1.4382949951410109e-06, + "loss": 0.5016083717346191, + "step": 3365 + }, + { + "epoch": 0.7761125201752364, + "grad_norm": 1.3196221720148595, + "learning_rate": 1.4379522737983351e-06, + "loss": 0.40227651596069336, + "step": 3366 + }, + { + "epoch": 0.776343094304819, + "grad_norm": 1.596207218013102, + "learning_rate": 1.4376094887948584e-06, + "loss": 0.42994722723960876, + "step": 3367 + }, + { + "epoch": 0.7765736684344017, + "grad_norm": 1.516975070106083, + "learning_rate": 1.4372666401804073e-06, + "loss": 0.5087350010871887, + "step": 3368 + }, + { + "epoch": 0.7768042425639843, + "grad_norm": 1.2618017709219296, + "learning_rate": 1.4369237280048186e-06, + "loss": 0.39419132471084595, + "step": 3369 + }, + { + "epoch": 0.777034816693567, + "grad_norm": 1.3456260179482487, + "learning_rate": 1.4365807523179376e-06, + "loss": 0.500682532787323, + "step": 3370 + }, + { + "epoch": 0.7772653908231496, + "grad_norm": 1.4316905894274476, + "learning_rate": 1.4362377131696198e-06, + "loss": 0.49243754148483276, + "step": 3371 + }, + { + "epoch": 0.7774959649527323, + "grad_norm": 1.4395314935622772, + "learning_rate": 1.4358946106097295e-06, + "loss": 0.5479283332824707, + "step": 3372 + }, + { + "epoch": 0.7777265390823149, + "grad_norm": 1.08521870178353, + "learning_rate": 1.4355514446881396e-06, + "loss": 0.43217700719833374, + "step": 3373 + }, + { + "epoch": 0.7779571132118976, + "grad_norm": 1.292406809665349, + "learning_rate": 1.435208215454733e-06, + "loss": 0.5351289510726929, + "step": 3374 + }, + { + "epoch": 0.7781876873414802, + "grad_norm": 1.2023765125576906, + "learning_rate": 1.4348649229594016e-06, + "loss": 0.45523375272750854, + "step": 3375 + }, + { + "epoch": 0.778418261471063, + "grad_norm": 1.1345172738470508, + "learning_rate": 1.4345215672520465e-06, + "loss": 0.49811118841171265, + "step": 3376 + }, + { + "epoch": 0.7786488356006456, + "grad_norm": 1.3017016981868919, + "learning_rate": 1.434178148382578e-06, + "loss": 0.40621131658554077, + "step": 3377 + }, + { + "epoch": 0.7788794097302283, + "grad_norm": 1.322929743849566, + "learning_rate": 1.4338346664009152e-06, + "loss": 0.43339842557907104, + "step": 3378 + }, + { + "epoch": 0.7791099838598109, + "grad_norm": 1.4276417953872829, + "learning_rate": 1.433491121356987e-06, + "loss": 0.4397253096103668, + "step": 3379 + }, + { + "epoch": 0.7793405579893936, + "grad_norm": 1.3957946390360352, + "learning_rate": 1.433147513300731e-06, + "loss": 0.5146217942237854, + "step": 3380 + }, + { + "epoch": 0.7795711321189762, + "grad_norm": 1.3181842447854462, + "learning_rate": 1.432803842282094e-06, + "loss": 0.46328768134117126, + "step": 3381 + }, + { + "epoch": 0.7798017062485589, + "grad_norm": 1.4008272791948313, + "learning_rate": 1.432460108351032e-06, + "loss": 0.47743386030197144, + "step": 3382 + }, + { + "epoch": 0.7800322803781415, + "grad_norm": 1.4765555896470939, + "learning_rate": 1.4321163115575105e-06, + "loss": 0.467747300863266, + "step": 3383 + }, + { + "epoch": 0.7802628545077243, + "grad_norm": 1.2334202034705792, + "learning_rate": 1.431772451951504e-06, + "loss": 0.4269976019859314, + "step": 3384 + }, + { + "epoch": 0.7804934286373069, + "grad_norm": 1.4332482963337814, + "learning_rate": 1.4314285295829956e-06, + "loss": 0.5440881252288818, + "step": 3385 + }, + { + "epoch": 0.7807240027668896, + "grad_norm": 1.5634188347498899, + "learning_rate": 1.431084544501978e-06, + "loss": 0.42413994669914246, + "step": 3386 + }, + { + "epoch": 0.7809545768964722, + "grad_norm": 1.250472551312306, + "learning_rate": 1.4307404967584528e-06, + "loss": 0.5563687086105347, + "step": 3387 + }, + { + "epoch": 0.7811851510260549, + "grad_norm": 1.2530390736213655, + "learning_rate": 1.4303963864024314e-06, + "loss": 0.4822027087211609, + "step": 3388 + }, + { + "epoch": 0.7814157251556375, + "grad_norm": 1.265644144731409, + "learning_rate": 1.430052213483933e-06, + "loss": 0.5267205834388733, + "step": 3389 + }, + { + "epoch": 0.7816462992852202, + "grad_norm": 1.464631682134491, + "learning_rate": 1.4297079780529868e-06, + "loss": 0.49257054924964905, + "step": 3390 + }, + { + "epoch": 0.7818768734148028, + "grad_norm": 1.4967498256417051, + "learning_rate": 1.4293636801596314e-06, + "loss": 0.45225608348846436, + "step": 3391 + }, + { + "epoch": 0.7821074475443855, + "grad_norm": 1.3090966398510886, + "learning_rate": 1.4290193198539133e-06, + "loss": 0.4891412854194641, + "step": 3392 + }, + { + "epoch": 0.7823380216739682, + "grad_norm": 1.2913501590758174, + "learning_rate": 1.4286748971858893e-06, + "loss": 0.4411062002182007, + "step": 3393 + }, + { + "epoch": 0.7825685958035509, + "grad_norm": 1.3634871078304074, + "learning_rate": 1.4283304122056242e-06, + "loss": 0.4584164619445801, + "step": 3394 + }, + { + "epoch": 0.7827991699331335, + "grad_norm": 1.2884433704058607, + "learning_rate": 1.4279858649631928e-06, + "loss": 0.46913737058639526, + "step": 3395 + }, + { + "epoch": 0.7830297440627162, + "grad_norm": 1.320207574562506, + "learning_rate": 1.4276412555086786e-06, + "loss": 0.40582767128944397, + "step": 3396 + }, + { + "epoch": 0.7832603181922988, + "grad_norm": 1.4930886994867976, + "learning_rate": 1.4272965838921737e-06, + "loss": 0.5089453458786011, + "step": 3397 + }, + { + "epoch": 0.7834908923218815, + "grad_norm": 1.3151641529095257, + "learning_rate": 1.4269518501637798e-06, + "loss": 0.4744444489479065, + "step": 3398 + }, + { + "epoch": 0.7837214664514641, + "grad_norm": 1.3271165993445435, + "learning_rate": 1.426607054373608e-06, + "loss": 0.49168163537979126, + "step": 3399 + }, + { + "epoch": 0.7839520405810468, + "grad_norm": 1.4774301348156431, + "learning_rate": 1.4262621965717768e-06, + "loss": 0.4423940181732178, + "step": 3400 + }, + { + "epoch": 0.7841826147106294, + "grad_norm": 1.541226385884193, + "learning_rate": 1.4259172768084152e-06, + "loss": 0.5138403177261353, + "step": 3401 + }, + { + "epoch": 0.7844131888402122, + "grad_norm": 1.5691210214340656, + "learning_rate": 1.425572295133661e-06, + "loss": 0.5248140096664429, + "step": 3402 + }, + { + "epoch": 0.7846437629697948, + "grad_norm": 1.4659537352972094, + "learning_rate": 1.4252272515976607e-06, + "loss": 0.39161059260368347, + "step": 3403 + }, + { + "epoch": 0.7848743370993775, + "grad_norm": 1.307338649596764, + "learning_rate": 1.4248821462505699e-06, + "loss": 0.46826744079589844, + "step": 3404 + }, + { + "epoch": 0.7851049112289601, + "grad_norm": 1.3428424961182877, + "learning_rate": 1.424536979142553e-06, + "loss": 0.4329161047935486, + "step": 3405 + }, + { + "epoch": 0.7853354853585428, + "grad_norm": 1.3831028347986385, + "learning_rate": 1.4241917503237834e-06, + "loss": 0.4691393971443176, + "step": 3406 + }, + { + "epoch": 0.7855660594881254, + "grad_norm": 1.819344171969547, + "learning_rate": 1.423846459844444e-06, + "loss": 0.5130072236061096, + "step": 3407 + }, + { + "epoch": 0.7857966336177081, + "grad_norm": 1.4381134289937085, + "learning_rate": 1.4235011077547264e-06, + "loss": 0.37478166818618774, + "step": 3408 + }, + { + "epoch": 0.7860272077472907, + "grad_norm": 1.1654669583674488, + "learning_rate": 1.4231556941048307e-06, + "loss": 0.46112769842147827, + "step": 3409 + }, + { + "epoch": 0.7862577818768735, + "grad_norm": 1.3711520199030207, + "learning_rate": 1.422810218944966e-06, + "loss": 0.5095282793045044, + "step": 3410 + }, + { + "epoch": 0.7864883560064561, + "grad_norm": 1.4830709787042864, + "learning_rate": 1.422464682325351e-06, + "loss": 0.4182342290878296, + "step": 3411 + }, + { + "epoch": 0.7867189301360388, + "grad_norm": 1.4898619625675633, + "learning_rate": 1.422119084296213e-06, + "loss": 0.3892830014228821, + "step": 3412 + }, + { + "epoch": 0.7869495042656214, + "grad_norm": 1.655445800570714, + "learning_rate": 1.4217734249077877e-06, + "loss": 0.5294528603553772, + "step": 3413 + }, + { + "epoch": 0.7871800783952041, + "grad_norm": 1.501568458574139, + "learning_rate": 1.4214277042103208e-06, + "loss": 0.471803218126297, + "step": 3414 + }, + { + "epoch": 0.7874106525247867, + "grad_norm": 1.2078819401351728, + "learning_rate": 1.4210819222540662e-06, + "loss": 0.4363842010498047, + "step": 3415 + }, + { + "epoch": 0.7876412266543694, + "grad_norm": 1.191025232167839, + "learning_rate": 1.4207360790892867e-06, + "loss": 0.3834928870201111, + "step": 3416 + }, + { + "epoch": 0.787871800783952, + "grad_norm": 1.342904245190706, + "learning_rate": 1.4203901747662539e-06, + "loss": 0.4639194905757904, + "step": 3417 + }, + { + "epoch": 0.7881023749135347, + "grad_norm": 1.4526860275619324, + "learning_rate": 1.4200442093352486e-06, + "loss": 0.47130632400512695, + "step": 3418 + }, + { + "epoch": 0.7883329490431173, + "grad_norm": 1.2585342771790389, + "learning_rate": 1.4196981828465606e-06, + "loss": 0.4848192632198334, + "step": 3419 + }, + { + "epoch": 0.7885635231727001, + "grad_norm": 1.2424140051596944, + "learning_rate": 1.4193520953504884e-06, + "loss": 0.5137286186218262, + "step": 3420 + }, + { + "epoch": 0.7887940973022827, + "grad_norm": 1.4833943072924853, + "learning_rate": 1.4190059468973385e-06, + "loss": 0.47639960050582886, + "step": 3421 + }, + { + "epoch": 0.7890246714318654, + "grad_norm": 1.3974399628621321, + "learning_rate": 1.418659737537428e-06, + "loss": 0.4300975799560547, + "step": 3422 + }, + { + "epoch": 0.789255245561448, + "grad_norm": 1.6248920549834995, + "learning_rate": 1.4183134673210817e-06, + "loss": 0.5669160485267639, + "step": 3423 + }, + { + "epoch": 0.7894858196910307, + "grad_norm": 1.3431432318053507, + "learning_rate": 1.4179671362986336e-06, + "loss": 0.4113837480545044, + "step": 3424 + }, + { + "epoch": 0.7897163938206133, + "grad_norm": 1.3611327690280945, + "learning_rate": 1.417620744520426e-06, + "loss": 0.4992315173149109, + "step": 3425 + }, + { + "epoch": 0.789946967950196, + "grad_norm": 1.6418572453635272, + "learning_rate": 1.417274292036811e-06, + "loss": 0.5556696653366089, + "step": 3426 + }, + { + "epoch": 0.7901775420797786, + "grad_norm": 1.367999541896107, + "learning_rate": 1.4169277788981485e-06, + "loss": 0.47911009192466736, + "step": 3427 + }, + { + "epoch": 0.7904081162093614, + "grad_norm": 1.2100320134669527, + "learning_rate": 1.416581205154808e-06, + "loss": 0.45395466685295105, + "step": 3428 + }, + { + "epoch": 0.790638690338944, + "grad_norm": 1.5386887400015699, + "learning_rate": 1.4162345708571674e-06, + "loss": 0.4404561519622803, + "step": 3429 + }, + { + "epoch": 0.7908692644685267, + "grad_norm": 1.3845404606780534, + "learning_rate": 1.4158878760556136e-06, + "loss": 0.5541578531265259, + "step": 3430 + }, + { + "epoch": 0.7910998385981093, + "grad_norm": 1.4234082473199938, + "learning_rate": 1.4155411208005422e-06, + "loss": 0.5517834424972534, + "step": 3431 + }, + { + "epoch": 0.791330412727692, + "grad_norm": 1.2851916229874634, + "learning_rate": 1.4151943051423574e-06, + "loss": 0.42650169134140015, + "step": 3432 + }, + { + "epoch": 0.7915609868572746, + "grad_norm": 1.7886227172970943, + "learning_rate": 1.414847429131472e-06, + "loss": 0.42724043130874634, + "step": 3433 + }, + { + "epoch": 0.7917915609868573, + "grad_norm": 1.3978336018588784, + "learning_rate": 1.414500492818309e-06, + "loss": 0.41757941246032715, + "step": 3434 + }, + { + "epoch": 0.7920221351164399, + "grad_norm": 1.4250040620354028, + "learning_rate": 1.4141534962532984e-06, + "loss": 0.47318267822265625, + "step": 3435 + }, + { + "epoch": 0.7922527092460226, + "grad_norm": 1.5092267765141392, + "learning_rate": 1.41380643948688e-06, + "loss": 0.5540967583656311, + "step": 3436 + }, + { + "epoch": 0.7924832833756053, + "grad_norm": 1.2943595959957308, + "learning_rate": 1.4134593225695013e-06, + "loss": 0.4459697902202606, + "step": 3437 + }, + { + "epoch": 0.792713857505188, + "grad_norm": 1.2950911274447663, + "learning_rate": 1.41311214555162e-06, + "loss": 0.5263698101043701, + "step": 3438 + }, + { + "epoch": 0.7929444316347706, + "grad_norm": 1.321260987570187, + "learning_rate": 1.4127649084837016e-06, + "loss": 0.40453940629959106, + "step": 3439 + }, + { + "epoch": 0.7931750057643533, + "grad_norm": 1.4138023773004598, + "learning_rate": 1.412417611416221e-06, + "loss": 0.3859207034111023, + "step": 3440 + }, + { + "epoch": 0.7934055798939359, + "grad_norm": 1.3373104076984894, + "learning_rate": 1.4120702543996603e-06, + "loss": 0.4604511260986328, + "step": 3441 + }, + { + "epoch": 0.7936361540235186, + "grad_norm": 1.2912472996688542, + "learning_rate": 1.411722837484512e-06, + "loss": 0.40292084217071533, + "step": 3442 + }, + { + "epoch": 0.7938667281531012, + "grad_norm": 1.3099743009304052, + "learning_rate": 1.4113753607212766e-06, + "loss": 0.40447625517845154, + "step": 3443 + }, + { + "epoch": 0.7940973022826839, + "grad_norm": 1.1711578682822494, + "learning_rate": 1.4110278241604635e-06, + "loss": 0.48472997546195984, + "step": 3444 + }, + { + "epoch": 0.7943278764122665, + "grad_norm": 1.304688924593958, + "learning_rate": 1.4106802278525902e-06, + "loss": 0.5404670238494873, + "step": 3445 + }, + { + "epoch": 0.7945584505418493, + "grad_norm": 1.2201185877258616, + "learning_rate": 1.4103325718481838e-06, + "loss": 0.5885064005851746, + "step": 3446 + }, + { + "epoch": 0.7947890246714319, + "grad_norm": 1.2045708529585497, + "learning_rate": 1.4099848561977794e-06, + "loss": 0.47806939482688904, + "step": 3447 + }, + { + "epoch": 0.7950195988010145, + "grad_norm": 1.2183758256079422, + "learning_rate": 1.4096370809519213e-06, + "loss": 0.4247834086418152, + "step": 3448 + }, + { + "epoch": 0.7952501729305972, + "grad_norm": 1.4701805176850054, + "learning_rate": 1.409289246161162e-06, + "loss": 0.508902370929718, + "step": 3449 + }, + { + "epoch": 0.7954807470601798, + "grad_norm": 1.3709386014599791, + "learning_rate": 1.4089413518760626e-06, + "loss": 0.4866124987602234, + "step": 3450 + }, + { + "epoch": 0.7957113211897625, + "grad_norm": 1.4351510328158692, + "learning_rate": 1.408593398147193e-06, + "loss": 0.5168731212615967, + "step": 3451 + }, + { + "epoch": 0.7959418953193451, + "grad_norm": 1.257672253058261, + "learning_rate": 1.4082453850251326e-06, + "loss": 0.5039271712303162, + "step": 3452 + }, + { + "epoch": 0.7961724694489278, + "grad_norm": 1.3767040030777011, + "learning_rate": 1.4078973125604674e-06, + "loss": 0.3660929799079895, + "step": 3453 + }, + { + "epoch": 0.7964030435785104, + "grad_norm": 1.5330992916300397, + "learning_rate": 1.407549180803794e-06, + "loss": 0.514503538608551, + "step": 3454 + }, + { + "epoch": 0.7966336177080932, + "grad_norm": 1.5704286671243526, + "learning_rate": 1.4072009898057172e-06, + "loss": 0.4803028702735901, + "step": 3455 + }, + { + "epoch": 0.7968641918376758, + "grad_norm": 1.2332119133725918, + "learning_rate": 1.4068527396168492e-06, + "loss": 0.43116262555122375, + "step": 3456 + }, + { + "epoch": 0.7970947659672585, + "grad_norm": 1.522287028583898, + "learning_rate": 1.4065044302878125e-06, + "loss": 0.5009680986404419, + "step": 3457 + }, + { + "epoch": 0.7973253400968411, + "grad_norm": 1.1307500814268987, + "learning_rate": 1.406156061869237e-06, + "loss": 0.4047713875770569, + "step": 3458 + }, + { + "epoch": 0.7975559142264238, + "grad_norm": 1.348066090689188, + "learning_rate": 1.4058076344117615e-06, + "loss": 0.5287230014801025, + "step": 3459 + }, + { + "epoch": 0.7977864883560064, + "grad_norm": 1.7810979263679612, + "learning_rate": 1.4054591479660335e-06, + "loss": 0.5602750778198242, + "step": 3460 + }, + { + "epoch": 0.7980170624855891, + "grad_norm": 1.0587308388288128, + "learning_rate": 1.4051106025827096e-06, + "loss": 0.4178144335746765, + "step": 3461 + }, + { + "epoch": 0.7982476366151717, + "grad_norm": 1.408691487644406, + "learning_rate": 1.4047619983124536e-06, + "loss": 0.5061960220336914, + "step": 3462 + }, + { + "epoch": 0.7984782107447544, + "grad_norm": 1.5043212480263244, + "learning_rate": 1.4044133352059392e-06, + "loss": 0.5091691017150879, + "step": 3463 + }, + { + "epoch": 0.798708784874337, + "grad_norm": 1.3793897642043385, + "learning_rate": 1.4040646133138478e-06, + "loss": 0.5100894570350647, + "step": 3464 + }, + { + "epoch": 0.7989393590039198, + "grad_norm": 1.2188849241203001, + "learning_rate": 1.4037158326868697e-06, + "loss": 0.47493505477905273, + "step": 3465 + }, + { + "epoch": 0.7991699331335024, + "grad_norm": 1.637846674977116, + "learning_rate": 1.4033669933757038e-06, + "loss": 0.5561350584030151, + "step": 3466 + }, + { + "epoch": 0.7994005072630851, + "grad_norm": 1.4971197328143675, + "learning_rate": 1.4030180954310574e-06, + "loss": 0.44552814960479736, + "step": 3467 + }, + { + "epoch": 0.7996310813926677, + "grad_norm": 1.219192969590734, + "learning_rate": 1.4026691389036465e-06, + "loss": 0.4624238908290863, + "step": 3468 + }, + { + "epoch": 0.7998616555222504, + "grad_norm": 1.348458578104898, + "learning_rate": 1.4023201238441951e-06, + "loss": 0.5424448251724243, + "step": 3469 + }, + { + "epoch": 0.800092229651833, + "grad_norm": 1.2410568882309463, + "learning_rate": 1.4019710503034367e-06, + "loss": 0.4629395008087158, + "step": 3470 + }, + { + "epoch": 0.8003228037814157, + "grad_norm": 1.3564725845833965, + "learning_rate": 1.401621918332112e-06, + "loss": 0.4375717043876648, + "step": 3471 + }, + { + "epoch": 0.8005533779109983, + "grad_norm": 1.5212509367699154, + "learning_rate": 1.401272727980971e-06, + "loss": 0.4419640302658081, + "step": 3472 + }, + { + "epoch": 0.8007839520405811, + "grad_norm": 1.3621301015547722, + "learning_rate": 1.4009234793007724e-06, + "loss": 0.42077577114105225, + "step": 3473 + }, + { + "epoch": 0.8010145261701637, + "grad_norm": 1.394506766094276, + "learning_rate": 1.400574172342283e-06, + "loss": 0.3735182583332062, + "step": 3474 + }, + { + "epoch": 0.8012451002997464, + "grad_norm": 1.3325918102604086, + "learning_rate": 1.4002248071562778e-06, + "loss": 0.4263458251953125, + "step": 3475 + }, + { + "epoch": 0.801475674429329, + "grad_norm": 1.3278985843191269, + "learning_rate": 1.3998753837935406e-06, + "loss": 0.42377904057502747, + "step": 3476 + }, + { + "epoch": 0.8017062485589117, + "grad_norm": 1.4415172635554745, + "learning_rate": 1.399525902304864e-06, + "loss": 0.5017589330673218, + "step": 3477 + }, + { + "epoch": 0.8019368226884943, + "grad_norm": 1.2695777372701094, + "learning_rate": 1.3991763627410485e-06, + "loss": 0.41022592782974243, + "step": 3478 + }, + { + "epoch": 0.802167396818077, + "grad_norm": 1.6097549722001219, + "learning_rate": 1.3988267651529028e-06, + "loss": 0.49957793951034546, + "step": 3479 + }, + { + "epoch": 0.8023979709476596, + "grad_norm": 1.4695518489034636, + "learning_rate": 1.398477109591245e-06, + "loss": 0.5065722465515137, + "step": 3480 + }, + { + "epoch": 0.8026285450772424, + "grad_norm": 1.264735145451503, + "learning_rate": 1.398127396106901e-06, + "loss": 0.4353798031806946, + "step": 3481 + }, + { + "epoch": 0.802859119206825, + "grad_norm": 1.5800938751579423, + "learning_rate": 1.3977776247507049e-06, + "loss": 0.41438236832618713, + "step": 3482 + }, + { + "epoch": 0.8030896933364077, + "grad_norm": 1.2712154799989346, + "learning_rate": 1.3974277955734996e-06, + "loss": 0.4348248839378357, + "step": 3483 + }, + { + "epoch": 0.8033202674659903, + "grad_norm": 1.3020033760882643, + "learning_rate": 1.3970779086261363e-06, + "loss": 0.49369150400161743, + "step": 3484 + }, + { + "epoch": 0.803550841595573, + "grad_norm": 1.445427514378273, + "learning_rate": 1.396727963959475e-06, + "loss": 0.5694580078125, + "step": 3485 + }, + { + "epoch": 0.8037814157251556, + "grad_norm": 1.3859575121879733, + "learning_rate": 1.3963779616243834e-06, + "loss": 0.5357070565223694, + "step": 3486 + }, + { + "epoch": 0.8040119898547383, + "grad_norm": 1.3071217267808923, + "learning_rate": 1.3960279016717377e-06, + "loss": 0.41300907731056213, + "step": 3487 + }, + { + "epoch": 0.8042425639843209, + "grad_norm": 1.4713226080636248, + "learning_rate": 1.395677784152423e-06, + "loss": 0.5058030486106873, + "step": 3488 + }, + { + "epoch": 0.8044731381139036, + "grad_norm": 1.394990226330868, + "learning_rate": 1.3953276091173326e-06, + "loss": 0.5225522518157959, + "step": 3489 + }, + { + "epoch": 0.8047037122434862, + "grad_norm": 1.3669211701935395, + "learning_rate": 1.3949773766173675e-06, + "loss": 0.43893736600875854, + "step": 3490 + }, + { + "epoch": 0.804934286373069, + "grad_norm": 1.575168458794386, + "learning_rate": 1.3946270867034375e-06, + "loss": 0.4583659768104553, + "step": 3491 + }, + { + "epoch": 0.8051648605026516, + "grad_norm": 1.2728568882138123, + "learning_rate": 1.394276739426461e-06, + "loss": 0.49550747871398926, + "step": 3492 + }, + { + "epoch": 0.8053954346322343, + "grad_norm": 1.9438900883437185, + "learning_rate": 1.3939263348373648e-06, + "loss": 0.5637674331665039, + "step": 3493 + }, + { + "epoch": 0.8056260087618169, + "grad_norm": 1.3206034443977903, + "learning_rate": 1.3935758729870835e-06, + "loss": 0.4853670299053192, + "step": 3494 + }, + { + "epoch": 0.8058565828913996, + "grad_norm": 1.479029501570459, + "learning_rate": 1.3932253539265603e-06, + "loss": 0.4535500407218933, + "step": 3495 + }, + { + "epoch": 0.8060871570209822, + "grad_norm": 1.4461411101486477, + "learning_rate": 1.3928747777067464e-06, + "loss": 0.4198870062828064, + "step": 3496 + }, + { + "epoch": 0.8063177311505649, + "grad_norm": 1.3336585529006162, + "learning_rate": 1.392524144378602e-06, + "loss": 0.45773670077323914, + "step": 3497 + }, + { + "epoch": 0.8065483052801475, + "grad_norm": 1.718264798623436, + "learning_rate": 1.3921734539930952e-06, + "loss": 0.45263248682022095, + "step": 3498 + }, + { + "epoch": 0.8067788794097303, + "grad_norm": 1.300886470112164, + "learning_rate": 1.3918227066012025e-06, + "loss": 0.473066508769989, + "step": 3499 + }, + { + "epoch": 0.8070094535393129, + "grad_norm": 1.1261914460441818, + "learning_rate": 1.3914719022539082e-06, + "loss": 0.35737159848213196, + "step": 3500 + }, + { + "epoch": 0.8072400276688956, + "grad_norm": 1.4095537979750905, + "learning_rate": 1.3911210410022054e-06, + "loss": 0.5162703394889832, + "step": 3501 + }, + { + "epoch": 0.8074706017984782, + "grad_norm": 1.494617165800155, + "learning_rate": 1.3907701228970955e-06, + "loss": 0.5347551703453064, + "step": 3502 + }, + { + "epoch": 0.8077011759280609, + "grad_norm": 1.7642790890319513, + "learning_rate": 1.390419147989588e-06, + "loss": 0.4889448881149292, + "step": 3503 + }, + { + "epoch": 0.8079317500576435, + "grad_norm": 1.380092267420659, + "learning_rate": 1.3900681163306999e-06, + "loss": 0.47468650341033936, + "step": 3504 + }, + { + "epoch": 0.8081623241872262, + "grad_norm": 1.4749480234582377, + "learning_rate": 1.3897170279714585e-06, + "loss": 0.43236857652664185, + "step": 3505 + }, + { + "epoch": 0.8083928983168088, + "grad_norm": 1.4419786763918543, + "learning_rate": 1.3893658829628974e-06, + "loss": 0.46778976917266846, + "step": 3506 + }, + { + "epoch": 0.8086234724463915, + "grad_norm": 1.353368455676612, + "learning_rate": 1.389014681356059e-06, + "loss": 0.49447667598724365, + "step": 3507 + }, + { + "epoch": 0.8088540465759742, + "grad_norm": 1.3574196281726325, + "learning_rate": 1.388663423201994e-06, + "loss": 0.5221220254898071, + "step": 3508 + }, + { + "epoch": 0.8090846207055569, + "grad_norm": 1.8319434066548141, + "learning_rate": 1.3883121085517615e-06, + "loss": 0.5037325620651245, + "step": 3509 + }, + { + "epoch": 0.8093151948351395, + "grad_norm": 1.1547190760847952, + "learning_rate": 1.387960737456429e-06, + "loss": 0.46879589557647705, + "step": 3510 + }, + { + "epoch": 0.8095457689647222, + "grad_norm": 1.3552976314399992, + "learning_rate": 1.387609309967071e-06, + "loss": 0.44216716289520264, + "step": 3511 + }, + { + "epoch": 0.8097763430943048, + "grad_norm": 1.2016377736710804, + "learning_rate": 1.3872578261347716e-06, + "loss": 0.4525749981403351, + "step": 3512 + }, + { + "epoch": 0.8100069172238875, + "grad_norm": 1.3138421579944453, + "learning_rate": 1.3869062860106224e-06, + "loss": 0.44681644439697266, + "step": 3513 + }, + { + "epoch": 0.8102374913534701, + "grad_norm": 1.5030736189155554, + "learning_rate": 1.3865546896457233e-06, + "loss": 0.4162617325782776, + "step": 3514 + }, + { + "epoch": 0.8104680654830528, + "grad_norm": 1.4360914568156404, + "learning_rate": 1.3862030370911827e-06, + "loss": 0.5262776613235474, + "step": 3515 + }, + { + "epoch": 0.8106986396126354, + "grad_norm": 1.3010389916824352, + "learning_rate": 1.3858513283981163e-06, + "loss": 0.48102372884750366, + "step": 3516 + }, + { + "epoch": 0.8109292137422182, + "grad_norm": 1.41037363508679, + "learning_rate": 1.385499563617649e-06, + "loss": 0.46166497468948364, + "step": 3517 + }, + { + "epoch": 0.8111597878718008, + "grad_norm": 1.4145741054815544, + "learning_rate": 1.3851477428009133e-06, + "loss": 0.43523284792900085, + "step": 3518 + }, + { + "epoch": 0.8113903620013835, + "grad_norm": 1.3662294611202825, + "learning_rate": 1.3847958659990497e-06, + "loss": 0.5413048267364502, + "step": 3519 + }, + { + "epoch": 0.8116209361309661, + "grad_norm": 1.1462124150969017, + "learning_rate": 1.3844439332632073e-06, + "loss": 0.4257383346557617, + "step": 3520 + }, + { + "epoch": 0.8118515102605488, + "grad_norm": 1.5928313905350753, + "learning_rate": 1.3840919446445427e-06, + "loss": 0.4812018871307373, + "step": 3521 + }, + { + "epoch": 0.8120820843901314, + "grad_norm": 1.5231442697754751, + "learning_rate": 1.3837399001942216e-06, + "loss": 0.4890254735946655, + "step": 3522 + }, + { + "epoch": 0.8123126585197141, + "grad_norm": 1.7091323269762855, + "learning_rate": 1.3833877999634166e-06, + "loss": 0.5079991817474365, + "step": 3523 + }, + { + "epoch": 0.8125432326492967, + "grad_norm": 1.6148941470526432, + "learning_rate": 1.3830356440033096e-06, + "loss": 0.44703438878059387, + "step": 3524 + }, + { + "epoch": 0.8127738067788794, + "grad_norm": 1.4685605039032132, + "learning_rate": 1.3826834323650898e-06, + "loss": 0.4218645989894867, + "step": 3525 + }, + { + "epoch": 0.813004380908462, + "grad_norm": 1.585977018929449, + "learning_rate": 1.3823311650999547e-06, + "loss": 0.4544546902179718, + "step": 3526 + }, + { + "epoch": 0.8132349550380448, + "grad_norm": 1.2954656146833265, + "learning_rate": 1.3819788422591099e-06, + "loss": 0.4978422224521637, + "step": 3527 + }, + { + "epoch": 0.8134655291676274, + "grad_norm": 1.3262250095489831, + "learning_rate": 1.3816264638937688e-06, + "loss": 0.42122140526771545, + "step": 3528 + }, + { + "epoch": 0.8136961032972101, + "grad_norm": 1.0995613789441223, + "learning_rate": 1.381274030055154e-06, + "loss": 0.45674729347229004, + "step": 3529 + }, + { + "epoch": 0.8139266774267927, + "grad_norm": 1.5614041042611542, + "learning_rate": 1.3809215407944947e-06, + "loss": 0.5075385570526123, + "step": 3530 + }, + { + "epoch": 0.8141572515563754, + "grad_norm": 1.4231357002591019, + "learning_rate": 1.380568996163029e-06, + "loss": 0.45952552556991577, + "step": 3531 + }, + { + "epoch": 0.814387825685958, + "grad_norm": 1.239122573849665, + "learning_rate": 1.3802163962120025e-06, + "loss": 0.5062624216079712, + "step": 3532 + }, + { + "epoch": 0.8146183998155407, + "grad_norm": 1.4910945652834293, + "learning_rate": 1.3798637409926698e-06, + "loss": 0.49294552206993103, + "step": 3533 + }, + { + "epoch": 0.8148489739451233, + "grad_norm": 1.347255149566569, + "learning_rate": 1.3795110305562926e-06, + "loss": 0.4389861822128296, + "step": 3534 + }, + { + "epoch": 0.8150795480747061, + "grad_norm": 1.5704776908584448, + "learning_rate": 1.3791582649541401e-06, + "loss": 0.47733181715011597, + "step": 3535 + }, + { + "epoch": 0.8153101222042887, + "grad_norm": 1.3661823105841888, + "learning_rate": 1.3788054442374918e-06, + "loss": 0.5007725358009338, + "step": 3536 + }, + { + "epoch": 0.8155406963338714, + "grad_norm": 1.617600694156108, + "learning_rate": 1.378452568457633e-06, + "loss": 0.4857913553714752, + "step": 3537 + }, + { + "epoch": 0.815771270463454, + "grad_norm": 1.4509204702050165, + "learning_rate": 1.3780996376658577e-06, + "loss": 0.5330549478530884, + "step": 3538 + }, + { + "epoch": 0.8160018445930367, + "grad_norm": 1.283827597345967, + "learning_rate": 1.3777466519134684e-06, + "loss": 0.45034217834472656, + "step": 3539 + }, + { + "epoch": 0.8162324187226193, + "grad_norm": 1.313177908039173, + "learning_rate": 1.3773936112517746e-06, + "loss": 0.4442213773727417, + "step": 3540 + }, + { + "epoch": 0.816462992852202, + "grad_norm": 1.479375223581317, + "learning_rate": 1.377040515732095e-06, + "loss": 0.5000369548797607, + "step": 3541 + }, + { + "epoch": 0.8166935669817846, + "grad_norm": 1.3177535399447533, + "learning_rate": 1.3766873654057551e-06, + "loss": 0.5117775797843933, + "step": 3542 + }, + { + "epoch": 0.8169241411113674, + "grad_norm": 1.4163300067502158, + "learning_rate": 1.3763341603240889e-06, + "loss": 0.431648850440979, + "step": 3543 + }, + { + "epoch": 0.81715471524095, + "grad_norm": 1.230235072546183, + "learning_rate": 1.3759809005384387e-06, + "loss": 0.39463019371032715, + "step": 3544 + }, + { + "epoch": 0.8173852893705327, + "grad_norm": 1.4412595458793114, + "learning_rate": 1.375627586100154e-06, + "loss": 0.38739651441574097, + "step": 3545 + }, + { + "epoch": 0.8176158635001153, + "grad_norm": 1.1409525851258608, + "learning_rate": 1.3752742170605927e-06, + "loss": 0.3973360061645508, + "step": 3546 + }, + { + "epoch": 0.817846437629698, + "grad_norm": 1.3276328290635366, + "learning_rate": 1.3749207934711207e-06, + "loss": 0.4791724383831024, + "step": 3547 + }, + { + "epoch": 0.8180770117592806, + "grad_norm": 1.2963607541712077, + "learning_rate": 1.3745673153831114e-06, + "loss": 0.5245905518531799, + "step": 3548 + }, + { + "epoch": 0.8183075858888633, + "grad_norm": 1.4724838776986868, + "learning_rate": 1.3742137828479472e-06, + "loss": 0.5507007241249084, + "step": 3549 + }, + { + "epoch": 0.8185381600184459, + "grad_norm": 1.6416778504866436, + "learning_rate": 1.373860195917017e-06, + "loss": 0.4555748701095581, + "step": 3550 + }, + { + "epoch": 0.8187687341480286, + "grad_norm": 1.2633428656921684, + "learning_rate": 1.3735065546417182e-06, + "loss": 0.39309239387512207, + "step": 3551 + }, + { + "epoch": 0.8189993082776112, + "grad_norm": 1.205265119124541, + "learning_rate": 1.3731528590734564e-06, + "loss": 0.4984157681465149, + "step": 3552 + }, + { + "epoch": 0.819229882407194, + "grad_norm": 1.4373490041823445, + "learning_rate": 1.3727991092636448e-06, + "loss": 0.45853057503700256, + "step": 3553 + }, + { + "epoch": 0.8194604565367766, + "grad_norm": 1.427750473352885, + "learning_rate": 1.3724453052637043e-06, + "loss": 0.47412237524986267, + "step": 3554 + }, + { + "epoch": 0.8196910306663593, + "grad_norm": 1.5140095273509309, + "learning_rate": 1.3720914471250642e-06, + "loss": 0.46433544158935547, + "step": 3555 + }, + { + "epoch": 0.8199216047959419, + "grad_norm": 1.3530305082066354, + "learning_rate": 1.3717375348991612e-06, + "loss": 0.5773437023162842, + "step": 3556 + }, + { + "epoch": 0.8201521789255246, + "grad_norm": 1.519657617219548, + "learning_rate": 1.37138356863744e-06, + "loss": 0.5943500995635986, + "step": 3557 + }, + { + "epoch": 0.8203827530551072, + "grad_norm": 1.1903323655602067, + "learning_rate": 1.3710295483913533e-06, + "loss": 0.4970731735229492, + "step": 3558 + }, + { + "epoch": 0.8206133271846898, + "grad_norm": 1.3936455952745408, + "learning_rate": 1.3706754742123611e-06, + "loss": 0.44726189970970154, + "step": 3559 + }, + { + "epoch": 0.8208439013142725, + "grad_norm": 1.257368755928624, + "learning_rate": 1.3703213461519325e-06, + "loss": 0.3980759382247925, + "step": 3560 + }, + { + "epoch": 0.8210744754438551, + "grad_norm": 1.510740752003684, + "learning_rate": 1.3699671642615434e-06, + "loss": 0.5521829724311829, + "step": 3561 + }, + { + "epoch": 0.8213050495734379, + "grad_norm": 1.4257916187791417, + "learning_rate": 1.3696129285926769e-06, + "loss": 0.42630624771118164, + "step": 3562 + }, + { + "epoch": 0.8215356237030205, + "grad_norm": 1.3813571407602123, + "learning_rate": 1.3692586391968254e-06, + "loss": 0.5060243606567383, + "step": 3563 + }, + { + "epoch": 0.8217661978326032, + "grad_norm": 1.553405319049413, + "learning_rate": 1.3689042961254884e-06, + "loss": 0.5803407430648804, + "step": 3564 + }, + { + "epoch": 0.8219967719621858, + "grad_norm": 1.1610478816524794, + "learning_rate": 1.3685498994301735e-06, + "loss": 0.4510403871536255, + "step": 3565 + }, + { + "epoch": 0.8222273460917685, + "grad_norm": 1.668001711945016, + "learning_rate": 1.3681954491623953e-06, + "loss": 0.5350467562675476, + "step": 3566 + }, + { + "epoch": 0.8224579202213511, + "grad_norm": 1.4589682016059282, + "learning_rate": 1.367840945373677e-06, + "loss": 0.5194679498672485, + "step": 3567 + }, + { + "epoch": 0.8226884943509338, + "grad_norm": 1.5164701950999842, + "learning_rate": 1.3674863881155495e-06, + "loss": 0.43574345111846924, + "step": 3568 + }, + { + "epoch": 0.8229190684805164, + "grad_norm": 1.2235692010100727, + "learning_rate": 1.367131777439551e-06, + "loss": 0.43051451444625854, + "step": 3569 + }, + { + "epoch": 0.8231496426100992, + "grad_norm": 1.4294583851960962, + "learning_rate": 1.3667771133972278e-06, + "loss": 0.44449925422668457, + "step": 3570 + }, + { + "epoch": 0.8233802167396818, + "grad_norm": 1.4281775124274958, + "learning_rate": 1.3664223960401342e-06, + "loss": 0.4466608464717865, + "step": 3571 + }, + { + "epoch": 0.8236107908692645, + "grad_norm": 1.506734312309144, + "learning_rate": 1.3660676254198318e-06, + "loss": 0.6172389984130859, + "step": 3572 + }, + { + "epoch": 0.8238413649988471, + "grad_norm": 1.3071294444794341, + "learning_rate": 1.36571280158789e-06, + "loss": 0.3789742588996887, + "step": 3573 + }, + { + "epoch": 0.8240719391284298, + "grad_norm": 1.2713531694738989, + "learning_rate": 1.365357924595886e-06, + "loss": 0.3871726095676422, + "step": 3574 + }, + { + "epoch": 0.8243025132580124, + "grad_norm": 1.3659394637334186, + "learning_rate": 1.3650029944954047e-06, + "loss": 0.5464534759521484, + "step": 3575 + }, + { + "epoch": 0.8245330873875951, + "grad_norm": 1.4254183485118588, + "learning_rate": 1.3646480113380392e-06, + "loss": 0.4924513101577759, + "step": 3576 + }, + { + "epoch": 0.8247636615171777, + "grad_norm": 1.3350624286567714, + "learning_rate": 1.3642929751753896e-06, + "loss": 0.39648669958114624, + "step": 3577 + }, + { + "epoch": 0.8249942356467604, + "grad_norm": 1.155634552535419, + "learning_rate": 1.3639378860590642e-06, + "loss": 0.44139498472213745, + "step": 3578 + }, + { + "epoch": 0.825224809776343, + "grad_norm": 1.4016430263315434, + "learning_rate": 1.3635827440406784e-06, + "loss": 0.4477856159210205, + "step": 3579 + }, + { + "epoch": 0.8254553839059258, + "grad_norm": 1.2543072909410065, + "learning_rate": 1.363227549171856e-06, + "loss": 0.48722583055496216, + "step": 3580 + }, + { + "epoch": 0.8256859580355084, + "grad_norm": 1.5407337854642607, + "learning_rate": 1.3628723015042285e-06, + "loss": 0.44485795497894287, + "step": 3581 + }, + { + "epoch": 0.8259165321650911, + "grad_norm": 1.481687909768813, + "learning_rate": 1.362517001089434e-06, + "loss": 0.510918140411377, + "step": 3582 + }, + { + "epoch": 0.8261471062946737, + "grad_norm": 1.4714123899535927, + "learning_rate": 1.3621616479791196e-06, + "loss": 0.5157535076141357, + "step": 3583 + }, + { + "epoch": 0.8263776804242564, + "grad_norm": 1.601097277197277, + "learning_rate": 1.361806242224939e-06, + "loss": 0.6120826005935669, + "step": 3584 + }, + { + "epoch": 0.826608254553839, + "grad_norm": 1.379062804125132, + "learning_rate": 1.3614507838785545e-06, + "loss": 0.47521674633026123, + "step": 3585 + }, + { + "epoch": 0.8268388286834217, + "grad_norm": 1.2544051986437676, + "learning_rate": 1.3610952729916352e-06, + "loss": 0.431441068649292, + "step": 3586 + }, + { + "epoch": 0.8270694028130043, + "grad_norm": 1.4333858511847595, + "learning_rate": 1.3607397096158587e-06, + "loss": 0.5168293118476868, + "step": 3587 + }, + { + "epoch": 0.8272999769425871, + "grad_norm": 1.4075386997192105, + "learning_rate": 1.3603840938029092e-06, + "loss": 0.47669821977615356, + "step": 3588 + }, + { + "epoch": 0.8275305510721697, + "grad_norm": 1.6345113020695277, + "learning_rate": 1.3600284256044791e-06, + "loss": 0.5170806050300598, + "step": 3589 + }, + { + "epoch": 0.8277611252017524, + "grad_norm": 1.3443972777893194, + "learning_rate": 1.359672705072269e-06, + "loss": 0.5578932762145996, + "step": 3590 + }, + { + "epoch": 0.827991699331335, + "grad_norm": 1.2931790064355784, + "learning_rate": 1.3593169322579855e-06, + "loss": 0.45000678300857544, + "step": 3591 + }, + { + "epoch": 0.8282222734609177, + "grad_norm": 1.7408157234389992, + "learning_rate": 1.3589611072133448e-06, + "loss": 0.47859635949134827, + "step": 3592 + }, + { + "epoch": 0.8284528475905003, + "grad_norm": 1.629320946493551, + "learning_rate": 1.3586052299900693e-06, + "loss": 0.5373919606208801, + "step": 3593 + }, + { + "epoch": 0.828683421720083, + "grad_norm": 1.4093194136520946, + "learning_rate": 1.3582493006398888e-06, + "loss": 0.5461571216583252, + "step": 3594 + }, + { + "epoch": 0.8289139958496656, + "grad_norm": 1.4221547222488737, + "learning_rate": 1.357893319214542e-06, + "loss": 0.522891640663147, + "step": 3595 + }, + { + "epoch": 0.8291445699792483, + "grad_norm": 1.3931497044748549, + "learning_rate": 1.3575372857657739e-06, + "loss": 0.503441572189331, + "step": 3596 + }, + { + "epoch": 0.829375144108831, + "grad_norm": 1.4755218467347275, + "learning_rate": 1.357181200345338e-06, + "loss": 0.45475268363952637, + "step": 3597 + }, + { + "epoch": 0.8296057182384137, + "grad_norm": 1.3529340787561033, + "learning_rate": 1.3568250630049944e-06, + "loss": 0.4626728296279907, + "step": 3598 + }, + { + "epoch": 0.8298362923679963, + "grad_norm": 1.5106243497530205, + "learning_rate": 1.3564688737965118e-06, + "loss": 0.590618371963501, + "step": 3599 + }, + { + "epoch": 0.830066866497579, + "grad_norm": 1.1729232075760356, + "learning_rate": 1.3561126327716658e-06, + "loss": 0.4252029061317444, + "step": 3600 + }, + { + "epoch": 0.8302974406271616, + "grad_norm": 1.5093126003070163, + "learning_rate": 1.3557563399822396e-06, + "loss": 0.5741503238677979, + "step": 3601 + }, + { + "epoch": 0.8305280147567443, + "grad_norm": 1.346541706093541, + "learning_rate": 1.3553999954800236e-06, + "loss": 0.4591038227081299, + "step": 3602 + }, + { + "epoch": 0.8307585888863269, + "grad_norm": 1.5342817778823432, + "learning_rate": 1.3550435993168164e-06, + "loss": 0.5761657953262329, + "step": 3603 + }, + { + "epoch": 0.8309891630159096, + "grad_norm": 1.4873747737215213, + "learning_rate": 1.3546871515444239e-06, + "loss": 0.4835323691368103, + "step": 3604 + }, + { + "epoch": 0.8312197371454922, + "grad_norm": 1.3474153162620106, + "learning_rate": 1.3543306522146594e-06, + "loss": 0.6152533292770386, + "step": 3605 + }, + { + "epoch": 0.831450311275075, + "grad_norm": 1.7615931586989606, + "learning_rate": 1.3539741013793431e-06, + "loss": 0.48106616735458374, + "step": 3606 + }, + { + "epoch": 0.8316808854046576, + "grad_norm": 1.3977429311647935, + "learning_rate": 1.3536174990903042e-06, + "loss": 0.48128771781921387, + "step": 3607 + }, + { + "epoch": 0.8319114595342403, + "grad_norm": 1.5624866131401935, + "learning_rate": 1.353260845399378e-06, + "loss": 0.4395609498023987, + "step": 3608 + }, + { + "epoch": 0.8321420336638229, + "grad_norm": 1.6243424583265862, + "learning_rate": 1.3529041403584076e-06, + "loss": 0.5298231840133667, + "step": 3609 + }, + { + "epoch": 0.8323726077934056, + "grad_norm": 1.610376085646533, + "learning_rate": 1.3525473840192436e-06, + "loss": 0.4694434404373169, + "step": 3610 + }, + { + "epoch": 0.8326031819229882, + "grad_norm": 1.3870293085196028, + "learning_rate": 1.3521905764337449e-06, + "loss": 0.4264890253543854, + "step": 3611 + }, + { + "epoch": 0.8328337560525709, + "grad_norm": 1.3900907609641087, + "learning_rate": 1.3518337176537762e-06, + "loss": 0.3266828656196594, + "step": 3612 + }, + { + "epoch": 0.8330643301821535, + "grad_norm": 1.548598004244933, + "learning_rate": 1.351476807731211e-06, + "loss": 0.5554935336112976, + "step": 3613 + }, + { + "epoch": 0.8332949043117363, + "grad_norm": 1.3139574983210685, + "learning_rate": 1.3511198467179295e-06, + "loss": 0.4375999867916107, + "step": 3614 + }, + { + "epoch": 0.8335254784413189, + "grad_norm": 1.3568296792682797, + "learning_rate": 1.35076283466582e-06, + "loss": 0.564457893371582, + "step": 3615 + }, + { + "epoch": 0.8337560525709016, + "grad_norm": 1.5648573569840147, + "learning_rate": 1.3504057716267776e-06, + "loss": 0.5141148567199707, + "step": 3616 + }, + { + "epoch": 0.8339866267004842, + "grad_norm": 1.2607282701974722, + "learning_rate": 1.350048657652705e-06, + "loss": 0.45514535903930664, + "step": 3617 + }, + { + "epoch": 0.8342172008300669, + "grad_norm": 1.298858308641179, + "learning_rate": 1.3496914927955122e-06, + "loss": 0.5224772691726685, + "step": 3618 + }, + { + "epoch": 0.8344477749596495, + "grad_norm": 1.3773935543957632, + "learning_rate": 1.349334277107117e-06, + "loss": 0.45185205340385437, + "step": 3619 + }, + { + "epoch": 0.8346783490892322, + "grad_norm": 1.3400411570126707, + "learning_rate": 1.3489770106394444e-06, + "loss": 0.47232794761657715, + "step": 3620 + }, + { + "epoch": 0.8349089232188148, + "grad_norm": 1.3564585933268873, + "learning_rate": 1.3486196934444264e-06, + "loss": 0.44031190872192383, + "step": 3621 + }, + { + "epoch": 0.8351394973483975, + "grad_norm": 1.2921832515242213, + "learning_rate": 1.3482623255740028e-06, + "loss": 0.4594510793685913, + "step": 3622 + }, + { + "epoch": 0.8353700714779801, + "grad_norm": 1.3491628541071723, + "learning_rate": 1.347904907080121e-06, + "loss": 0.38726723194122314, + "step": 3623 + }, + { + "epoch": 0.8356006456075629, + "grad_norm": 1.4086239991990677, + "learning_rate": 1.3475474380147347e-06, + "loss": 0.544617772102356, + "step": 3624 + }, + { + "epoch": 0.8358312197371455, + "grad_norm": 1.5645995914963535, + "learning_rate": 1.347189918429806e-06, + "loss": 0.503423810005188, + "step": 3625 + }, + { + "epoch": 0.8360617938667282, + "grad_norm": 1.3950432339665733, + "learning_rate": 1.3468323483773038e-06, + "loss": 0.4395143985748291, + "step": 3626 + }, + { + "epoch": 0.8362923679963108, + "grad_norm": 1.6308000434387062, + "learning_rate": 1.346474727909205e-06, + "loss": 0.41464856266975403, + "step": 3627 + }, + { + "epoch": 0.8365229421258935, + "grad_norm": 1.4008674771220466, + "learning_rate": 1.346117057077493e-06, + "loss": 0.4782845079898834, + "step": 3628 + }, + { + "epoch": 0.8367535162554761, + "grad_norm": 1.2484540580184977, + "learning_rate": 1.345759335934159e-06, + "loss": 0.48308104276657104, + "step": 3629 + }, + { + "epoch": 0.8369840903850588, + "grad_norm": 1.3935764281095124, + "learning_rate": 1.345401564531201e-06, + "loss": 0.5759967565536499, + "step": 3630 + }, + { + "epoch": 0.8372146645146414, + "grad_norm": 1.421077506310717, + "learning_rate": 1.3450437429206256e-06, + "loss": 0.5900512337684631, + "step": 3631 + }, + { + "epoch": 0.8374452386442242, + "grad_norm": 1.3643346247687353, + "learning_rate": 1.3446858711544451e-06, + "loss": 0.4776286482810974, + "step": 3632 + }, + { + "epoch": 0.8376758127738068, + "grad_norm": 1.5796891796446009, + "learning_rate": 1.34432794928468e-06, + "loss": 0.5123563408851624, + "step": 3633 + }, + { + "epoch": 0.8379063869033895, + "grad_norm": 1.6272139775850447, + "learning_rate": 1.3439699773633574e-06, + "loss": 0.5505821108818054, + "step": 3634 + }, + { + "epoch": 0.8381369610329721, + "grad_norm": 1.4456391396483874, + "learning_rate": 1.343611955442513e-06, + "loss": 0.5525364875793457, + "step": 3635 + }, + { + "epoch": 0.8383675351625548, + "grad_norm": 1.1644228181066894, + "learning_rate": 1.3432538835741884e-06, + "loss": 0.44074952602386475, + "step": 3636 + }, + { + "epoch": 0.8385981092921374, + "grad_norm": 1.3792820862390651, + "learning_rate": 1.3428957618104331e-06, + "loss": 0.5488649606704712, + "step": 3637 + }, + { + "epoch": 0.8388286834217201, + "grad_norm": 1.159150884236996, + "learning_rate": 1.3425375902033034e-06, + "loss": 0.4427725672721863, + "step": 3638 + }, + { + "epoch": 0.8390592575513027, + "grad_norm": 1.5753495335559473, + "learning_rate": 1.3421793688048636e-06, + "loss": 0.5244250297546387, + "step": 3639 + }, + { + "epoch": 0.8392898316808854, + "grad_norm": 1.2853956216426152, + "learning_rate": 1.3418210976671845e-06, + "loss": 0.4684640169143677, + "step": 3640 + }, + { + "epoch": 0.839520405810468, + "grad_norm": 1.4767228704961965, + "learning_rate": 1.3414627768423449e-06, + "loss": 0.4518035054206848, + "step": 3641 + }, + { + "epoch": 0.8397509799400508, + "grad_norm": 1.5338085000094812, + "learning_rate": 1.34110440638243e-06, + "loss": 0.47504323720932007, + "step": 3642 + }, + { + "epoch": 0.8399815540696334, + "grad_norm": 1.7182899921711987, + "learning_rate": 1.3407459863395326e-06, + "loss": 0.3835057020187378, + "step": 3643 + }, + { + "epoch": 0.8402121281992161, + "grad_norm": 1.4517538314936977, + "learning_rate": 1.3403875167657529e-06, + "loss": 0.4103546738624573, + "step": 3644 + }, + { + "epoch": 0.8404427023287987, + "grad_norm": 1.3338056576205999, + "learning_rate": 1.3400289977131974e-06, + "loss": 0.48064136505126953, + "step": 3645 + }, + { + "epoch": 0.8406732764583814, + "grad_norm": 1.5606949897639386, + "learning_rate": 1.3396704292339813e-06, + "loss": 0.49655234813690186, + "step": 3646 + }, + { + "epoch": 0.840903850587964, + "grad_norm": 1.3180737586627664, + "learning_rate": 1.3393118113802259e-06, + "loss": 0.5559303760528564, + "step": 3647 + }, + { + "epoch": 0.8411344247175467, + "grad_norm": 1.3902505896601203, + "learning_rate": 1.3389531442040599e-06, + "loss": 0.5173505544662476, + "step": 3648 + }, + { + "epoch": 0.8413649988471293, + "grad_norm": 1.4997400095057662, + "learning_rate": 1.338594427757619e-06, + "loss": 0.500524640083313, + "step": 3649 + }, + { + "epoch": 0.8415955729767121, + "grad_norm": 1.3017945585861477, + "learning_rate": 1.3382356620930467e-06, + "loss": 0.5167285203933716, + "step": 3650 + }, + { + "epoch": 0.8418261471062947, + "grad_norm": 1.4661199659605932, + "learning_rate": 1.3378768472624929e-06, + "loss": 0.5006825923919678, + "step": 3651 + }, + { + "epoch": 0.8420567212358774, + "grad_norm": 1.5253217794534257, + "learning_rate": 1.3375179833181153e-06, + "loss": 0.5421864986419678, + "step": 3652 + }, + { + "epoch": 0.84228729536546, + "grad_norm": 1.5304567180850979, + "learning_rate": 1.337159070312078e-06, + "loss": 0.4964475929737091, + "step": 3653 + }, + { + "epoch": 0.8425178694950427, + "grad_norm": 1.2795061721511742, + "learning_rate": 1.3368001082965528e-06, + "loss": 0.4020928144454956, + "step": 3654 + }, + { + "epoch": 0.8427484436246253, + "grad_norm": 1.3457912405228358, + "learning_rate": 1.3364410973237183e-06, + "loss": 0.43009278178215027, + "step": 3655 + }, + { + "epoch": 0.842979017754208, + "grad_norm": 1.3663101783603413, + "learning_rate": 1.3360820374457608e-06, + "loss": 0.5939761400222778, + "step": 3656 + }, + { + "epoch": 0.8432095918837906, + "grad_norm": 1.3723718945789372, + "learning_rate": 1.335722928714873e-06, + "loss": 0.43889346718788147, + "step": 3657 + }, + { + "epoch": 0.8434401660133733, + "grad_norm": 1.510811137049935, + "learning_rate": 1.335363771183255e-06, + "loss": 0.5125945806503296, + "step": 3658 + }, + { + "epoch": 0.843670740142956, + "grad_norm": 1.2988273180041983, + "learning_rate": 1.3350045649031143e-06, + "loss": 0.516818642616272, + "step": 3659 + }, + { + "epoch": 0.8439013142725387, + "grad_norm": 1.2172726171902464, + "learning_rate": 1.3346453099266649e-06, + "loss": 0.5098299980163574, + "step": 3660 + }, + { + "epoch": 0.8441318884021213, + "grad_norm": 1.4809835823543989, + "learning_rate": 1.334286006306128e-06, + "loss": 0.46228134632110596, + "step": 3661 + }, + { + "epoch": 0.844362462531704, + "grad_norm": 1.518730905252404, + "learning_rate": 1.3339266540937324e-06, + "loss": 0.38364481925964355, + "step": 3662 + }, + { + "epoch": 0.8445930366612866, + "grad_norm": 1.2447229933483466, + "learning_rate": 1.3335672533417134e-06, + "loss": 0.4363073706626892, + "step": 3663 + }, + { + "epoch": 0.8448236107908693, + "grad_norm": 1.5445839123019949, + "learning_rate": 1.3332078041023133e-06, + "loss": 0.463603675365448, + "step": 3664 + }, + { + "epoch": 0.8450541849204519, + "grad_norm": 1.118250112497339, + "learning_rate": 1.3328483064277816e-06, + "loss": 0.4173084795475006, + "step": 3665 + }, + { + "epoch": 0.8452847590500346, + "grad_norm": 1.2905398126594152, + "learning_rate": 1.3324887603703756e-06, + "loss": 0.41451913118362427, + "step": 3666 + }, + { + "epoch": 0.8455153331796172, + "grad_norm": 1.3301474043831027, + "learning_rate": 1.3321291659823587e-06, + "loss": 0.49418264627456665, + "step": 3667 + }, + { + "epoch": 0.8457459073092, + "grad_norm": 1.323747824550861, + "learning_rate": 1.3317695233160015e-06, + "loss": 0.48787444829940796, + "step": 3668 + }, + { + "epoch": 0.8459764814387826, + "grad_norm": 1.419516654753041, + "learning_rate": 1.3314098324235814e-06, + "loss": 0.484865665435791, + "step": 3669 + }, + { + "epoch": 0.8462070555683652, + "grad_norm": 1.4996660725713626, + "learning_rate": 1.3310500933573837e-06, + "loss": 0.44162076711654663, + "step": 3670 + }, + { + "epoch": 0.8464376296979479, + "grad_norm": 1.4496595059902684, + "learning_rate": 1.3306903061696999e-06, + "loss": 0.39880990982055664, + "step": 3671 + }, + { + "epoch": 0.8466682038275305, + "grad_norm": 1.596735486600776, + "learning_rate": 1.3303304709128288e-06, + "loss": 0.4405972957611084, + "step": 3672 + }, + { + "epoch": 0.8468987779571132, + "grad_norm": 1.8476371944591239, + "learning_rate": 1.3299705876390755e-06, + "loss": 0.4228917956352234, + "step": 3673 + }, + { + "epoch": 0.8471293520866958, + "grad_norm": 1.3245854918753257, + "learning_rate": 1.3296106564007532e-06, + "loss": 0.44533059000968933, + "step": 3674 + }, + { + "epoch": 0.8473599262162785, + "grad_norm": 1.324480419314636, + "learning_rate": 1.3292506772501816e-06, + "loss": 0.4672505855560303, + "step": 3675 + }, + { + "epoch": 0.8475905003458611, + "grad_norm": 1.5345690520656405, + "learning_rate": 1.3288906502396873e-06, + "loss": 0.5651025772094727, + "step": 3676 + }, + { + "epoch": 0.8478210744754439, + "grad_norm": 1.4113200785742674, + "learning_rate": 1.3285305754216034e-06, + "loss": 0.4877372086048126, + "step": 3677 + }, + { + "epoch": 0.8480516486050265, + "grad_norm": 1.6156626909271148, + "learning_rate": 1.3281704528482713e-06, + "loss": 0.43767499923706055, + "step": 3678 + }, + { + "epoch": 0.8482822227346092, + "grad_norm": 1.6309175000442955, + "learning_rate": 1.3278102825720376e-06, + "loss": 0.5077182650566101, + "step": 3679 + }, + { + "epoch": 0.8485127968641918, + "grad_norm": 1.5150502093819094, + "learning_rate": 1.3274500646452573e-06, + "loss": 0.4814456105232239, + "step": 3680 + }, + { + "epoch": 0.8487433709937745, + "grad_norm": 1.3626740483959299, + "learning_rate": 1.3270897991202913e-06, + "loss": 0.4454193115234375, + "step": 3681 + }, + { + "epoch": 0.8489739451233571, + "grad_norm": 1.1173863119708762, + "learning_rate": 1.3267294860495084e-06, + "loss": 0.3973482549190521, + "step": 3682 + }, + { + "epoch": 0.8492045192529398, + "grad_norm": 1.5337644837004238, + "learning_rate": 1.3263691254852834e-06, + "loss": 0.5115909576416016, + "step": 3683 + }, + { + "epoch": 0.8494350933825224, + "grad_norm": 1.2962888350788886, + "learning_rate": 1.3260087174799982e-06, + "loss": 0.4217768907546997, + "step": 3684 + }, + { + "epoch": 0.8496656675121051, + "grad_norm": 1.5676465439666392, + "learning_rate": 1.3256482620860414e-06, + "loss": 0.4462714195251465, + "step": 3685 + }, + { + "epoch": 0.8498962416416878, + "grad_norm": 1.278085511550712, + "learning_rate": 1.32528775935581e-06, + "loss": 0.4617312550544739, + "step": 3686 + }, + { + "epoch": 0.8501268157712705, + "grad_norm": 1.2760475898780375, + "learning_rate": 1.324927209341706e-06, + "loss": 0.4774616062641144, + "step": 3687 + }, + { + "epoch": 0.8503573899008531, + "grad_norm": 1.389927333157612, + "learning_rate": 1.3245666120961389e-06, + "loss": 0.38730189204216003, + "step": 3688 + }, + { + "epoch": 0.8505879640304358, + "grad_norm": 1.5164687032364252, + "learning_rate": 1.324205967671525e-06, + "loss": 0.45189517736434937, + "step": 3689 + }, + { + "epoch": 0.8508185381600184, + "grad_norm": 1.489462413187487, + "learning_rate": 1.3238452761202887e-06, + "loss": 0.4965584874153137, + "step": 3690 + }, + { + "epoch": 0.8510491122896011, + "grad_norm": 1.2283217886481297, + "learning_rate": 1.3234845374948591e-06, + "loss": 0.4409075975418091, + "step": 3691 + }, + { + "epoch": 0.8512796864191837, + "grad_norm": 1.3545920303070538, + "learning_rate": 1.3231237518476737e-06, + "loss": 0.4457218647003174, + "step": 3692 + }, + { + "epoch": 0.8515102605487664, + "grad_norm": 1.2432481704868787, + "learning_rate": 1.3227629192311762e-06, + "loss": 0.42810603976249695, + "step": 3693 + }, + { + "epoch": 0.851740834678349, + "grad_norm": 1.3504737245283156, + "learning_rate": 1.3224020396978172e-06, + "loss": 0.40753173828125, + "step": 3694 + }, + { + "epoch": 0.8519714088079318, + "grad_norm": 1.5063309076640758, + "learning_rate": 1.3220411133000542e-06, + "loss": 0.5057830810546875, + "step": 3695 + }, + { + "epoch": 0.8522019829375144, + "grad_norm": 1.4625648008354504, + "learning_rate": 1.3216801400903515e-06, + "loss": 0.42498981952667236, + "step": 3696 + }, + { + "epoch": 0.8524325570670971, + "grad_norm": 1.736302707969947, + "learning_rate": 1.3213191201211806e-06, + "loss": 0.44985881447792053, + "step": 3697 + }, + { + "epoch": 0.8526631311966797, + "grad_norm": 1.5257289791960187, + "learning_rate": 1.3209580534450192e-06, + "loss": 0.39984816312789917, + "step": 3698 + }, + { + "epoch": 0.8528937053262624, + "grad_norm": 1.4859934204912078, + "learning_rate": 1.3205969401143516e-06, + "loss": 0.4773896038532257, + "step": 3699 + }, + { + "epoch": 0.853124279455845, + "grad_norm": 1.5299580963987478, + "learning_rate": 1.3202357801816698e-06, + "loss": 0.5699855089187622, + "step": 3700 + }, + { + "epoch": 0.8533548535854277, + "grad_norm": 1.5124437197630332, + "learning_rate": 1.3198745736994714e-06, + "loss": 0.4486675262451172, + "step": 3701 + }, + { + "epoch": 0.8535854277150103, + "grad_norm": 1.3641053506348044, + "learning_rate": 1.3195133207202625e-06, + "loss": 0.47909995913505554, + "step": 3702 + }, + { + "epoch": 0.853816001844593, + "grad_norm": 1.3267279385735278, + "learning_rate": 1.3191520212965542e-06, + "loss": 0.4356222450733185, + "step": 3703 + }, + { + "epoch": 0.8540465759741757, + "grad_norm": 1.5161594053893233, + "learning_rate": 1.3187906754808646e-06, + "loss": 0.4734821319580078, + "step": 3704 + }, + { + "epoch": 0.8542771501037584, + "grad_norm": 1.1414361983546972, + "learning_rate": 1.3184292833257197e-06, + "loss": 0.4164031744003296, + "step": 3705 + }, + { + "epoch": 0.854507724233341, + "grad_norm": 1.5194682024268111, + "learning_rate": 1.3180678448836516e-06, + "loss": 0.505548357963562, + "step": 3706 + }, + { + "epoch": 0.8547382983629237, + "grad_norm": 1.4180879233512311, + "learning_rate": 1.3177063602071985e-06, + "loss": 0.4443202316761017, + "step": 3707 + }, + { + "epoch": 0.8549688724925063, + "grad_norm": 1.4808642334806548, + "learning_rate": 1.317344829348906e-06, + "loss": 0.4594070017337799, + "step": 3708 + }, + { + "epoch": 0.855199446622089, + "grad_norm": 1.595149298191138, + "learning_rate": 1.3169832523613265e-06, + "loss": 0.5346768498420715, + "step": 3709 + }, + { + "epoch": 0.8554300207516716, + "grad_norm": 1.4211934536480004, + "learning_rate": 1.3166216292970185e-06, + "loss": 0.44471168518066406, + "step": 3710 + }, + { + "epoch": 0.8556605948812543, + "grad_norm": 1.3967510109946715, + "learning_rate": 1.3162599602085482e-06, + "loss": 0.4414154589176178, + "step": 3711 + }, + { + "epoch": 0.855891169010837, + "grad_norm": 1.2591243363727789, + "learning_rate": 1.3158982451484873e-06, + "loss": 0.4267842769622803, + "step": 3712 + }, + { + "epoch": 0.8561217431404197, + "grad_norm": 1.5517519524370356, + "learning_rate": 1.315536484169415e-06, + "loss": 0.5282812118530273, + "step": 3713 + }, + { + "epoch": 0.8563523172700023, + "grad_norm": 1.3747848129200213, + "learning_rate": 1.3151746773239167e-06, + "loss": 0.3831692934036255, + "step": 3714 + }, + { + "epoch": 0.856582891399585, + "grad_norm": 1.3399055617764033, + "learning_rate": 1.3148128246645848e-06, + "loss": 0.4714779853820801, + "step": 3715 + }, + { + "epoch": 0.8568134655291676, + "grad_norm": 1.5957966977407376, + "learning_rate": 1.3144509262440185e-06, + "loss": 0.515029788017273, + "step": 3716 + }, + { + "epoch": 0.8570440396587503, + "grad_norm": 1.6565005005078866, + "learning_rate": 1.314088982114823e-06, + "loss": 0.48407065868377686, + "step": 3717 + }, + { + "epoch": 0.8572746137883329, + "grad_norm": 1.2250893853794216, + "learning_rate": 1.3137269923296111e-06, + "loss": 0.4756847620010376, + "step": 3718 + }, + { + "epoch": 0.8575051879179156, + "grad_norm": 1.4417516161095163, + "learning_rate": 1.313364956941001e-06, + "loss": 0.47744277119636536, + "step": 3719 + }, + { + "epoch": 0.8577357620474982, + "grad_norm": 1.4540506451139732, + "learning_rate": 1.3130028760016187e-06, + "loss": 0.4967440366744995, + "step": 3720 + }, + { + "epoch": 0.857966336177081, + "grad_norm": 1.5755023694033539, + "learning_rate": 1.312640749564096e-06, + "loss": 0.44999921321868896, + "step": 3721 + }, + { + "epoch": 0.8581969103066636, + "grad_norm": 1.1829331105101752, + "learning_rate": 1.3122785776810723e-06, + "loss": 0.4454652667045593, + "step": 3722 + }, + { + "epoch": 0.8584274844362463, + "grad_norm": 1.220523426514953, + "learning_rate": 1.3119163604051923e-06, + "loss": 0.37483078241348267, + "step": 3723 + }, + { + "epoch": 0.8586580585658289, + "grad_norm": 1.45963624909142, + "learning_rate": 1.3115540977891076e-06, + "loss": 0.3732140064239502, + "step": 3724 + }, + { + "epoch": 0.8588886326954116, + "grad_norm": 1.5667872254799649, + "learning_rate": 1.3111917898854779e-06, + "loss": 0.5709421634674072, + "step": 3725 + }, + { + "epoch": 0.8591192068249942, + "grad_norm": 2.0482790256244514, + "learning_rate": 1.3108294367469677e-06, + "loss": 0.5301297307014465, + "step": 3726 + }, + { + "epoch": 0.8593497809545769, + "grad_norm": 1.2253994153188903, + "learning_rate": 1.3104670384262484e-06, + "loss": 0.45979735255241394, + "step": 3727 + }, + { + "epoch": 0.8595803550841595, + "grad_norm": 1.5172885339612137, + "learning_rate": 1.3101045949759985e-06, + "loss": 0.5051921606063843, + "step": 3728 + }, + { + "epoch": 0.8598109292137422, + "grad_norm": 1.5432212262669465, + "learning_rate": 1.309742106448903e-06, + "loss": 0.5057204365730286, + "step": 3729 + }, + { + "epoch": 0.8600415033433249, + "grad_norm": 1.3029916397805466, + "learning_rate": 1.3093795728976535e-06, + "loss": 0.4265059530735016, + "step": 3730 + }, + { + "epoch": 0.8602720774729076, + "grad_norm": 1.2392416355330595, + "learning_rate": 1.3090169943749473e-06, + "loss": 0.39166492223739624, + "step": 3731 + }, + { + "epoch": 0.8605026516024902, + "grad_norm": 1.4335892651385718, + "learning_rate": 1.308654370933489e-06, + "loss": 0.4321832060813904, + "step": 3732 + }, + { + "epoch": 0.8607332257320729, + "grad_norm": 1.4026009292758175, + "learning_rate": 1.3082917026259906e-06, + "loss": 0.5028939247131348, + "step": 3733 + }, + { + "epoch": 0.8609637998616555, + "grad_norm": 1.461263824354524, + "learning_rate": 1.3079289895051681e-06, + "loss": 0.4642373323440552, + "step": 3734 + }, + { + "epoch": 0.8611943739912382, + "grad_norm": 1.2616373488525174, + "learning_rate": 1.3075662316237464e-06, + "loss": 0.416348397731781, + "step": 3735 + }, + { + "epoch": 0.8614249481208208, + "grad_norm": 1.9156143459520234, + "learning_rate": 1.3072034290344556e-06, + "loss": 0.48442524671554565, + "step": 3736 + }, + { + "epoch": 0.8616555222504035, + "grad_norm": 1.4675369296005183, + "learning_rate": 1.3068405817900332e-06, + "loss": 0.46903935074806213, + "step": 3737 + }, + { + "epoch": 0.8618860963799861, + "grad_norm": 1.433982633948309, + "learning_rate": 1.3064776899432224e-06, + "loss": 0.48172008991241455, + "step": 3738 + }, + { + "epoch": 0.8621166705095689, + "grad_norm": 1.4697783322173945, + "learning_rate": 1.3061147535467734e-06, + "loss": 0.44460922479629517, + "step": 3739 + }, + { + "epoch": 0.8623472446391515, + "grad_norm": 1.4552688390934359, + "learning_rate": 1.3057517726534423e-06, + "loss": 0.4728608727455139, + "step": 3740 + }, + { + "epoch": 0.8625778187687342, + "grad_norm": 1.2981084774118934, + "learning_rate": 1.3053887473159928e-06, + "loss": 0.36457544565200806, + "step": 3741 + }, + { + "epoch": 0.8628083928983168, + "grad_norm": 1.3219603285138386, + "learning_rate": 1.3050256775871936e-06, + "loss": 0.3753359317779541, + "step": 3742 + }, + { + "epoch": 0.8630389670278995, + "grad_norm": 1.71764180047156, + "learning_rate": 1.304662563519821e-06, + "loss": 0.38679057359695435, + "step": 3743 + }, + { + "epoch": 0.8632695411574821, + "grad_norm": 1.2517686459377946, + "learning_rate": 1.304299405166657e-06, + "loss": 0.5008635520935059, + "step": 3744 + }, + { + "epoch": 0.8635001152870648, + "grad_norm": 1.6524585351681906, + "learning_rate": 1.3039362025804903e-06, + "loss": 0.3723052740097046, + "step": 3745 + }, + { + "epoch": 0.8637306894166474, + "grad_norm": 1.4101013037777343, + "learning_rate": 1.3035729558141166e-06, + "loss": 0.4227592945098877, + "step": 3746 + }, + { + "epoch": 0.8639612635462302, + "grad_norm": 1.2385954175555658, + "learning_rate": 1.3032096649203369e-06, + "loss": 0.44072139263153076, + "step": 3747 + }, + { + "epoch": 0.8641918376758128, + "grad_norm": 1.330285491132409, + "learning_rate": 1.3028463299519594e-06, + "loss": 0.49321871995925903, + "step": 3748 + }, + { + "epoch": 0.8644224118053955, + "grad_norm": 1.1777120494442346, + "learning_rate": 1.3024829509617987e-06, + "loss": 0.3751382827758789, + "step": 3749 + }, + { + "epoch": 0.8646529859349781, + "grad_norm": 1.2092220891938048, + "learning_rate": 1.3021195280026755e-06, + "loss": 0.43967729806900024, + "step": 3750 + }, + { + "epoch": 0.8648835600645608, + "grad_norm": 1.2227774970491123, + "learning_rate": 1.3017560611274172e-06, + "loss": 0.4102880358695984, + "step": 3751 + }, + { + "epoch": 0.8651141341941434, + "grad_norm": 1.4524327131347594, + "learning_rate": 1.301392550388857e-06, + "loss": 0.5225233435630798, + "step": 3752 + }, + { + "epoch": 0.8653447083237261, + "grad_norm": 1.7121734467218848, + "learning_rate": 1.3010289958398352e-06, + "loss": 0.6021677255630493, + "step": 3753 + }, + { + "epoch": 0.8655752824533087, + "grad_norm": 1.294116122042798, + "learning_rate": 1.300665397533198e-06, + "loss": 0.5031560063362122, + "step": 3754 + }, + { + "epoch": 0.8658058565828914, + "grad_norm": 1.2573123861588813, + "learning_rate": 1.300301755521798e-06, + "loss": 0.5406110286712646, + "step": 3755 + }, + { + "epoch": 0.866036430712474, + "grad_norm": 1.3123644187859618, + "learning_rate": 1.2999380698584945e-06, + "loss": 0.5359587669372559, + "step": 3756 + }, + { + "epoch": 0.8662670048420568, + "grad_norm": 1.4006997771166723, + "learning_rate": 1.2995743405961525e-06, + "loss": 0.46089720726013184, + "step": 3757 + }, + { + "epoch": 0.8664975789716394, + "grad_norm": 1.3064464980724229, + "learning_rate": 1.2992105677876444e-06, + "loss": 0.4611746668815613, + "step": 3758 + }, + { + "epoch": 0.8667281531012221, + "grad_norm": 1.3860871410802968, + "learning_rate": 1.2988467514858478e-06, + "loss": 0.47040778398513794, + "step": 3759 + }, + { + "epoch": 0.8669587272308047, + "grad_norm": 1.4624604845389892, + "learning_rate": 1.2984828917436469e-06, + "loss": 0.5118452310562134, + "step": 3760 + }, + { + "epoch": 0.8671893013603874, + "grad_norm": 1.3248325273306294, + "learning_rate": 1.2981189886139326e-06, + "loss": 0.42349302768707275, + "step": 3761 + }, + { + "epoch": 0.86741987548997, + "grad_norm": 1.4983666129317725, + "learning_rate": 1.2977550421496022e-06, + "loss": 0.4888027310371399, + "step": 3762 + }, + { + "epoch": 0.8676504496195527, + "grad_norm": 1.5557430857836938, + "learning_rate": 1.2973910524035587e-06, + "loss": 0.5637897849082947, + "step": 3763 + }, + { + "epoch": 0.8678810237491353, + "grad_norm": 1.2906063231523421, + "learning_rate": 1.2970270194287119e-06, + "loss": 0.4159572124481201, + "step": 3764 + }, + { + "epoch": 0.868111597878718, + "grad_norm": 1.613449710248156, + "learning_rate": 1.2966629432779775e-06, + "loss": 0.4558612108230591, + "step": 3765 + }, + { + "epoch": 0.8683421720083007, + "grad_norm": 1.229959300374187, + "learning_rate": 1.2962988240042775e-06, + "loss": 0.4235115647315979, + "step": 3766 + }, + { + "epoch": 0.8685727461378834, + "grad_norm": 1.5042750051225975, + "learning_rate": 1.2959346616605404e-06, + "loss": 0.5096476078033447, + "step": 3767 + }, + { + "epoch": 0.868803320267466, + "grad_norm": 1.3849812365321899, + "learning_rate": 1.2955704562997013e-06, + "loss": 0.47097906470298767, + "step": 3768 + }, + { + "epoch": 0.8690338943970487, + "grad_norm": 1.2057643302548011, + "learning_rate": 1.2952062079747008e-06, + "loss": 0.4508157968521118, + "step": 3769 + }, + { + "epoch": 0.8692644685266313, + "grad_norm": 1.3904260388472953, + "learning_rate": 1.2948419167384864e-06, + "loss": 0.43800675868988037, + "step": 3770 + }, + { + "epoch": 0.869495042656214, + "grad_norm": 1.3552023829739699, + "learning_rate": 1.2944775826440108e-06, + "loss": 0.5512480735778809, + "step": 3771 + }, + { + "epoch": 0.8697256167857966, + "grad_norm": 1.4428129453899297, + "learning_rate": 1.2941132057442342e-06, + "loss": 0.4654430150985718, + "step": 3772 + }, + { + "epoch": 0.8699561909153793, + "grad_norm": 1.3297596373891312, + "learning_rate": 1.293748786092123e-06, + "loss": 0.5429458618164062, + "step": 3773 + }, + { + "epoch": 0.870186765044962, + "grad_norm": 1.7953090529311853, + "learning_rate": 1.2933843237406481e-06, + "loss": 0.415671169757843, + "step": 3774 + }, + { + "epoch": 0.8704173391745447, + "grad_norm": 1.3784118855195835, + "learning_rate": 1.2930198187427884e-06, + "loss": 0.4347325563430786, + "step": 3775 + }, + { + "epoch": 0.8706479133041273, + "grad_norm": 1.3858530201589612, + "learning_rate": 1.2926552711515287e-06, + "loss": 0.41997528076171875, + "step": 3776 + }, + { + "epoch": 0.87087848743371, + "grad_norm": 1.4475652450278216, + "learning_rate": 1.292290681019859e-06, + "loss": 0.45956090092658997, + "step": 3777 + }, + { + "epoch": 0.8711090615632926, + "grad_norm": 1.3318373392521217, + "learning_rate": 1.2919260484007767e-06, + "loss": 0.4615165889263153, + "step": 3778 + }, + { + "epoch": 0.8713396356928753, + "grad_norm": 1.5526291007190895, + "learning_rate": 1.2915613733472848e-06, + "loss": 0.3919866681098938, + "step": 3779 + }, + { + "epoch": 0.8715702098224579, + "grad_norm": 1.5182901628405527, + "learning_rate": 1.2911966559123922e-06, + "loss": 0.5324772000312805, + "step": 3780 + }, + { + "epoch": 0.8718007839520405, + "grad_norm": 1.4899431097732017, + "learning_rate": 1.2908318961491147e-06, + "loss": 0.4813354015350342, + "step": 3781 + }, + { + "epoch": 0.8720313580816232, + "grad_norm": 1.6904916219237236, + "learning_rate": 1.2904670941104735e-06, + "loss": 0.5617851614952087, + "step": 3782 + }, + { + "epoch": 0.8722619322112058, + "grad_norm": 1.5869523154671146, + "learning_rate": 1.2901022498494963e-06, + "loss": 0.5369905233383179, + "step": 3783 + }, + { + "epoch": 0.8724925063407886, + "grad_norm": 1.4103839502113327, + "learning_rate": 1.289737363419217e-06, + "loss": 0.469723641872406, + "step": 3784 + }, + { + "epoch": 0.8727230804703712, + "grad_norm": 1.5392452648373567, + "learning_rate": 1.2893724348726757e-06, + "loss": 0.5100580453872681, + "step": 3785 + }, + { + "epoch": 0.8729536545999539, + "grad_norm": 1.4522390007049084, + "learning_rate": 1.289007464262918e-06, + "loss": 0.3959219455718994, + "step": 3786 + }, + { + "epoch": 0.8731842287295365, + "grad_norm": 1.3370969443139462, + "learning_rate": 1.2886424516429967e-06, + "loss": 0.4237936735153198, + "step": 3787 + }, + { + "epoch": 0.8734148028591192, + "grad_norm": 1.6505369649722645, + "learning_rate": 1.2882773970659693e-06, + "loss": 0.4604552984237671, + "step": 3788 + }, + { + "epoch": 0.8736453769887018, + "grad_norm": 1.4408188813706955, + "learning_rate": 1.287912300584901e-06, + "loss": 0.4265769124031067, + "step": 3789 + }, + { + "epoch": 0.8738759511182845, + "grad_norm": 1.185765484689313, + "learning_rate": 1.2875471622528617e-06, + "loss": 0.4644312262535095, + "step": 3790 + }, + { + "epoch": 0.8741065252478671, + "grad_norm": 1.5605966972230738, + "learning_rate": 1.2871819821229282e-06, + "loss": 0.5520300269126892, + "step": 3791 + }, + { + "epoch": 0.8743370993774499, + "grad_norm": 1.2172431342127952, + "learning_rate": 1.2868167602481831e-06, + "loss": 0.42350637912750244, + "step": 3792 + }, + { + "epoch": 0.8745676735070325, + "grad_norm": 1.3605025828289865, + "learning_rate": 1.2864514966817155e-06, + "loss": 0.5148683786392212, + "step": 3793 + }, + { + "epoch": 0.8747982476366152, + "grad_norm": 1.2825363473778824, + "learning_rate": 1.2860861914766191e-06, + "loss": 0.4506865441799164, + "step": 3794 + }, + { + "epoch": 0.8750288217661978, + "grad_norm": 1.240014068038836, + "learning_rate": 1.2857208446859957e-06, + "loss": 0.4042026996612549, + "step": 3795 + }, + { + "epoch": 0.8752593958957805, + "grad_norm": 1.749789157467437, + "learning_rate": 1.2853554563629521e-06, + "loss": 0.4601382613182068, + "step": 3796 + }, + { + "epoch": 0.8754899700253631, + "grad_norm": 1.1956968937229655, + "learning_rate": 1.2849900265606007e-06, + "loss": 0.3387809097766876, + "step": 3797 + }, + { + "epoch": 0.8757205441549458, + "grad_norm": 1.3296970918872935, + "learning_rate": 1.2846245553320604e-06, + "loss": 0.5295180082321167, + "step": 3798 + }, + { + "epoch": 0.8759511182845284, + "grad_norm": 1.518762035085977, + "learning_rate": 1.2842590427304564e-06, + "loss": 0.47733891010284424, + "step": 3799 + }, + { + "epoch": 0.8761816924141111, + "grad_norm": 1.3675518552119075, + "learning_rate": 1.2838934888089198e-06, + "loss": 0.46294957399368286, + "step": 3800 + }, + { + "epoch": 0.8764122665436938, + "grad_norm": 1.3892016156570253, + "learning_rate": 1.2835278936205877e-06, + "loss": 0.4638972580432892, + "step": 3801 + }, + { + "epoch": 0.8766428406732765, + "grad_norm": 1.2670627732920314, + "learning_rate": 1.2831622572186027e-06, + "loss": 0.5078087449073792, + "step": 3802 + }, + { + "epoch": 0.8768734148028591, + "grad_norm": 1.2490466990727205, + "learning_rate": 1.2827965796561138e-06, + "loss": 0.49626827239990234, + "step": 3803 + }, + { + "epoch": 0.8771039889324418, + "grad_norm": 1.3784871825818807, + "learning_rate": 1.2824308609862758e-06, + "loss": 0.4857192635536194, + "step": 3804 + }, + { + "epoch": 0.8773345630620244, + "grad_norm": 1.5003545684747548, + "learning_rate": 1.2820651012622498e-06, + "loss": 0.5403131246566772, + "step": 3805 + }, + { + "epoch": 0.8775651371916071, + "grad_norm": 1.532730699853752, + "learning_rate": 1.2816993005372029e-06, + "loss": 0.519463837146759, + "step": 3806 + }, + { + "epoch": 0.8777957113211897, + "grad_norm": 1.648937105926222, + "learning_rate": 1.2813334588643077e-06, + "loss": 0.6038607954978943, + "step": 3807 + }, + { + "epoch": 0.8780262854507724, + "grad_norm": 1.5251750284604964, + "learning_rate": 1.280967576296743e-06, + "loss": 0.4892663359642029, + "step": 3808 + }, + { + "epoch": 0.878256859580355, + "grad_norm": 1.4437992115831912, + "learning_rate": 1.2806016528876934e-06, + "loss": 0.47872501611709595, + "step": 3809 + }, + { + "epoch": 0.8784874337099378, + "grad_norm": 1.401497704596745, + "learning_rate": 1.28023568869035e-06, + "loss": 0.4863993227481842, + "step": 3810 + }, + { + "epoch": 0.8787180078395204, + "grad_norm": 1.2319881889422357, + "learning_rate": 1.2798696837579088e-06, + "loss": 0.45241546630859375, + "step": 3811 + }, + { + "epoch": 0.8789485819691031, + "grad_norm": 1.26957816055566, + "learning_rate": 1.2795036381435728e-06, + "loss": 0.48720863461494446, + "step": 3812 + }, + { + "epoch": 0.8791791560986857, + "grad_norm": 1.4244000796725484, + "learning_rate": 1.2791375519005507e-06, + "loss": 0.49139827489852905, + "step": 3813 + }, + { + "epoch": 0.8794097302282684, + "grad_norm": 1.1021730064681352, + "learning_rate": 1.278771425082056e-06, + "loss": 0.41915225982666016, + "step": 3814 + }, + { + "epoch": 0.879640304357851, + "grad_norm": 1.164668093587021, + "learning_rate": 1.2784052577413095e-06, + "loss": 0.41831016540527344, + "step": 3815 + }, + { + "epoch": 0.8798708784874337, + "grad_norm": 1.392466935090571, + "learning_rate": 1.2780390499315374e-06, + "loss": 0.49456197023391724, + "step": 3816 + }, + { + "epoch": 0.8801014526170163, + "grad_norm": 1.4645341817096265, + "learning_rate": 1.2776728017059714e-06, + "loss": 0.4656866192817688, + "step": 3817 + }, + { + "epoch": 0.880332026746599, + "grad_norm": 1.375452516729426, + "learning_rate": 1.2773065131178494e-06, + "loss": 0.449514776468277, + "step": 3818 + }, + { + "epoch": 0.8805626008761817, + "grad_norm": 1.320026502962018, + "learning_rate": 1.2769401842204156e-06, + "loss": 0.3762073516845703, + "step": 3819 + }, + { + "epoch": 0.8807931750057644, + "grad_norm": 1.6471923718834367, + "learning_rate": 1.2765738150669192e-06, + "loss": 0.5680521130561829, + "step": 3820 + }, + { + "epoch": 0.881023749135347, + "grad_norm": 1.227867578043664, + "learning_rate": 1.276207405710616e-06, + "loss": 0.35371482372283936, + "step": 3821 + }, + { + "epoch": 0.8812543232649297, + "grad_norm": 1.6584454245429339, + "learning_rate": 1.2758409562047669e-06, + "loss": 0.5145018100738525, + "step": 3822 + }, + { + "epoch": 0.8814848973945123, + "grad_norm": 1.4264603788288566, + "learning_rate": 1.2754744666026392e-06, + "loss": 0.5425234436988831, + "step": 3823 + }, + { + "epoch": 0.881715471524095, + "grad_norm": 1.605664005655016, + "learning_rate": 1.275107936957506e-06, + "loss": 0.48439931869506836, + "step": 3824 + }, + { + "epoch": 0.8819460456536776, + "grad_norm": 1.4836193722422002, + "learning_rate": 1.2747413673226462e-06, + "loss": 0.5177323818206787, + "step": 3825 + }, + { + "epoch": 0.8821766197832603, + "grad_norm": 1.4672524591279896, + "learning_rate": 1.2743747577513437e-06, + "loss": 0.4718499779701233, + "step": 3826 + }, + { + "epoch": 0.882407193912843, + "grad_norm": 1.3580668132517044, + "learning_rate": 1.27400810829689e-06, + "loss": 0.5140804648399353, + "step": 3827 + }, + { + "epoch": 0.8826377680424257, + "grad_norm": 1.2476007061260952, + "learning_rate": 1.2736414190125805e-06, + "loss": 0.4611731767654419, + "step": 3828 + }, + { + "epoch": 0.8828683421720083, + "grad_norm": 1.3574827964922753, + "learning_rate": 1.2732746899517175e-06, + "loss": 0.526127815246582, + "step": 3829 + }, + { + "epoch": 0.883098916301591, + "grad_norm": 1.3368001624765957, + "learning_rate": 1.2729079211676085e-06, + "loss": 0.4039766192436218, + "step": 3830 + }, + { + "epoch": 0.8833294904311736, + "grad_norm": 1.5033466347185125, + "learning_rate": 1.2725411127135676e-06, + "loss": 0.4232807159423828, + "step": 3831 + }, + { + "epoch": 0.8835600645607563, + "grad_norm": 1.2556638937655993, + "learning_rate": 1.2721742646429142e-06, + "loss": 0.48490262031555176, + "step": 3832 + }, + { + "epoch": 0.8837906386903389, + "grad_norm": 1.278298782194165, + "learning_rate": 1.2718073770089729e-06, + "loss": 0.4664677083492279, + "step": 3833 + }, + { + "epoch": 0.8840212128199216, + "grad_norm": 1.3387833207328181, + "learning_rate": 1.2714404498650742e-06, + "loss": 0.4402846097946167, + "step": 3834 + }, + { + "epoch": 0.8842517869495042, + "grad_norm": 1.195436797590032, + "learning_rate": 1.2710734832645555e-06, + "loss": 0.45942988991737366, + "step": 3835 + }, + { + "epoch": 0.884482361079087, + "grad_norm": 1.3235253441897963, + "learning_rate": 1.2707064772607587e-06, + "loss": 0.45924365520477295, + "step": 3836 + }, + { + "epoch": 0.8847129352086696, + "grad_norm": 1.2350134713864223, + "learning_rate": 1.270339431907032e-06, + "loss": 0.3877851963043213, + "step": 3837 + }, + { + "epoch": 0.8849435093382523, + "grad_norm": 1.381311043724791, + "learning_rate": 1.2699723472567288e-06, + "loss": 0.45364105701446533, + "step": 3838 + }, + { + "epoch": 0.8851740834678349, + "grad_norm": 1.2798000201692457, + "learning_rate": 1.2696052233632089e-06, + "loss": 0.3527877926826477, + "step": 3839 + }, + { + "epoch": 0.8854046575974176, + "grad_norm": 1.7105597319107566, + "learning_rate": 1.2692380602798375e-06, + "loss": 0.499268501996994, + "step": 3840 + }, + { + "epoch": 0.8856352317270002, + "grad_norm": 1.2823188650483364, + "learning_rate": 1.2688708580599854e-06, + "loss": 0.39443689584732056, + "step": 3841 + }, + { + "epoch": 0.8858658058565829, + "grad_norm": 1.442355552170661, + "learning_rate": 1.268503616757029e-06, + "loss": 0.5262328386306763, + "step": 3842 + }, + { + "epoch": 0.8860963799861655, + "grad_norm": 1.4602798515117177, + "learning_rate": 1.2681363364243509e-06, + "loss": 0.4761236608028412, + "step": 3843 + }, + { + "epoch": 0.8863269541157482, + "grad_norm": 1.3806283660695482, + "learning_rate": 1.2677690171153391e-06, + "loss": 0.5173169374465942, + "step": 3844 + }, + { + "epoch": 0.8865575282453309, + "grad_norm": 1.4796905287439253, + "learning_rate": 1.2674016588833866e-06, + "loss": 0.5304574966430664, + "step": 3845 + }, + { + "epoch": 0.8867881023749136, + "grad_norm": 1.2451043989470143, + "learning_rate": 1.2670342617818925e-06, + "loss": 0.44707632064819336, + "step": 3846 + }, + { + "epoch": 0.8870186765044962, + "grad_norm": 1.4327430501013436, + "learning_rate": 1.2666668258642628e-06, + "loss": 0.44395360350608826, + "step": 3847 + }, + { + "epoch": 0.8872492506340789, + "grad_norm": 1.5382701800989709, + "learning_rate": 1.266299351183907e-06, + "loss": 0.4993078112602234, + "step": 3848 + }, + { + "epoch": 0.8874798247636615, + "grad_norm": 1.447761685140105, + "learning_rate": 1.2659318377942418e-06, + "loss": 0.4836229681968689, + "step": 3849 + }, + { + "epoch": 0.8877103988932442, + "grad_norm": 1.1586406035440977, + "learning_rate": 1.2655642857486885e-06, + "loss": 0.4898098111152649, + "step": 3850 + }, + { + "epoch": 0.8879409730228268, + "grad_norm": 1.4550595650341691, + "learning_rate": 1.2651966951006753e-06, + "loss": 0.5117218494415283, + "step": 3851 + }, + { + "epoch": 0.8881715471524095, + "grad_norm": 1.1751749847019868, + "learning_rate": 1.2648290659036347e-06, + "loss": 0.3920857906341553, + "step": 3852 + }, + { + "epoch": 0.8884021212819921, + "grad_norm": 1.2103531492140316, + "learning_rate": 1.2644613982110055e-06, + "loss": 0.42527467012405396, + "step": 3853 + }, + { + "epoch": 0.8886326954115749, + "grad_norm": 1.4673474591941762, + "learning_rate": 1.2640936920762318e-06, + "loss": 0.5283650159835815, + "step": 3854 + }, + { + "epoch": 0.8888632695411575, + "grad_norm": 1.1384795561192926, + "learning_rate": 1.2637259475527634e-06, + "loss": 0.3976718783378601, + "step": 3855 + }, + { + "epoch": 0.8890938436707402, + "grad_norm": 1.3777221980377923, + "learning_rate": 1.2633581646940555e-06, + "loss": 0.3767106533050537, + "step": 3856 + }, + { + "epoch": 0.8893244178003228, + "grad_norm": 1.2421308508382682, + "learning_rate": 1.2629903435535695e-06, + "loss": 0.4002486765384674, + "step": 3857 + }, + { + "epoch": 0.8895549919299055, + "grad_norm": 1.7761729251417224, + "learning_rate": 1.2626224841847718e-06, + "loss": 0.3829443156719208, + "step": 3858 + }, + { + "epoch": 0.8897855660594881, + "grad_norm": 1.6906089339859913, + "learning_rate": 1.2622545866411342e-06, + "loss": 0.5338312983512878, + "step": 3859 + }, + { + "epoch": 0.8900161401890708, + "grad_norm": 1.3435755743208722, + "learning_rate": 1.2618866509761347e-06, + "loss": 0.49615299701690674, + "step": 3860 + }, + { + "epoch": 0.8902467143186534, + "grad_norm": 1.3772165276715471, + "learning_rate": 1.2615186772432562e-06, + "loss": 0.5080281496047974, + "step": 3861 + }, + { + "epoch": 0.8904772884482361, + "grad_norm": 1.3191602759544514, + "learning_rate": 1.2611506654959877e-06, + "loss": 0.4631335139274597, + "step": 3862 + }, + { + "epoch": 0.8907078625778188, + "grad_norm": 1.6754337710064344, + "learning_rate": 1.2607826157878232e-06, + "loss": 0.5179207921028137, + "step": 3863 + }, + { + "epoch": 0.8909384367074015, + "grad_norm": 1.8689690583071528, + "learning_rate": 1.260414528172263e-06, + "loss": 0.5107406973838806, + "step": 3864 + }, + { + "epoch": 0.8911690108369841, + "grad_norm": 1.4263135964434357, + "learning_rate": 1.2600464027028112e-06, + "loss": 0.3719855844974518, + "step": 3865 + }, + { + "epoch": 0.8913995849665668, + "grad_norm": 1.2717821474296322, + "learning_rate": 1.2596782394329797e-06, + "loss": 0.4703129231929779, + "step": 3866 + }, + { + "epoch": 0.8916301590961494, + "grad_norm": 1.4971801597034615, + "learning_rate": 1.2593100384162842e-06, + "loss": 0.49239644408226013, + "step": 3867 + }, + { + "epoch": 0.8918607332257321, + "grad_norm": 1.505796830220407, + "learning_rate": 1.2589417997062468e-06, + "loss": 0.5194324851036072, + "step": 3868 + }, + { + "epoch": 0.8920913073553147, + "grad_norm": 1.2722329079463401, + "learning_rate": 1.2585735233563943e-06, + "loss": 0.4224633574485779, + "step": 3869 + }, + { + "epoch": 0.8923218814848974, + "grad_norm": 1.7020995758876771, + "learning_rate": 1.2582052094202594e-06, + "loss": 0.4377749562263489, + "step": 3870 + }, + { + "epoch": 0.89255245561448, + "grad_norm": 1.2037908365106704, + "learning_rate": 1.2578368579513809e-06, + "loss": 0.42847269773483276, + "step": 3871 + }, + { + "epoch": 0.8927830297440628, + "grad_norm": 1.4087908465200083, + "learning_rate": 1.2574684690033018e-06, + "loss": 0.5194802284240723, + "step": 3872 + }, + { + "epoch": 0.8930136038736454, + "grad_norm": 1.3553883811442613, + "learning_rate": 1.2571000426295716e-06, + "loss": 0.4401082396507263, + "step": 3873 + }, + { + "epoch": 0.8932441780032281, + "grad_norm": 1.5117708123403886, + "learning_rate": 1.2567315788837442e-06, + "loss": 0.38890570402145386, + "step": 3874 + }, + { + "epoch": 0.8934747521328107, + "grad_norm": 1.4931972330534145, + "learning_rate": 1.2563630778193802e-06, + "loss": 0.522612452507019, + "step": 3875 + }, + { + "epoch": 0.8937053262623934, + "grad_norm": 1.757870637645656, + "learning_rate": 1.2559945394900447e-06, + "loss": 0.516444981098175, + "step": 3876 + }, + { + "epoch": 0.893935900391976, + "grad_norm": 1.193092685346779, + "learning_rate": 1.255625963949308e-06, + "loss": 0.4084436297416687, + "step": 3877 + }, + { + "epoch": 0.8941664745215587, + "grad_norm": 1.4364911954858623, + "learning_rate": 1.2552573512507474e-06, + "loss": 0.4561755657196045, + "step": 3878 + }, + { + "epoch": 0.8943970486511413, + "grad_norm": 1.3498949478529019, + "learning_rate": 1.2548887014479435e-06, + "loss": 0.44372665882110596, + "step": 3879 + }, + { + "epoch": 0.894627622780724, + "grad_norm": 1.4181034577590674, + "learning_rate": 1.2545200145944837e-06, + "loss": 0.4714791774749756, + "step": 3880 + }, + { + "epoch": 0.8948581969103067, + "grad_norm": 1.506508633299638, + "learning_rate": 1.25415129074396e-06, + "loss": 0.48050814867019653, + "step": 3881 + }, + { + "epoch": 0.8950887710398894, + "grad_norm": 1.7788226663138391, + "learning_rate": 1.2537825299499708e-06, + "loss": 0.4078127145767212, + "step": 3882 + }, + { + "epoch": 0.895319345169472, + "grad_norm": 1.1273639481853348, + "learning_rate": 1.2534137322661187e-06, + "loss": 0.41556763648986816, + "step": 3883 + }, + { + "epoch": 0.8955499192990547, + "grad_norm": 1.2916565664076916, + "learning_rate": 1.2530448977460127e-06, + "loss": 0.3862306475639343, + "step": 3884 + }, + { + "epoch": 0.8957804934286373, + "grad_norm": 1.2417402269481763, + "learning_rate": 1.2526760264432656e-06, + "loss": 0.4071112871170044, + "step": 3885 + }, + { + "epoch": 0.89601106755822, + "grad_norm": 1.2074121865816745, + "learning_rate": 1.2523071184114978e-06, + "loss": 0.36956706643104553, + "step": 3886 + }, + { + "epoch": 0.8962416416878026, + "grad_norm": 1.5187969981751328, + "learning_rate": 1.251938173704333e-06, + "loss": 0.5087941884994507, + "step": 3887 + }, + { + "epoch": 0.8964722158173853, + "grad_norm": 1.5300476571906632, + "learning_rate": 1.2515691923754017e-06, + "loss": 0.5636804103851318, + "step": 3888 + }, + { + "epoch": 0.896702789946968, + "grad_norm": 1.2028947296679213, + "learning_rate": 1.2512001744783383e-06, + "loss": 0.40899237990379333, + "step": 3889 + }, + { + "epoch": 0.8969333640765507, + "grad_norm": 1.2319974158201112, + "learning_rate": 1.2508311200667839e-06, + "loss": 0.3964187800884247, + "step": 3890 + }, + { + "epoch": 0.8971639382061333, + "grad_norm": 1.1881521968898023, + "learning_rate": 1.2504620291943838e-06, + "loss": 0.43190568685531616, + "step": 3891 + }, + { + "epoch": 0.897394512335716, + "grad_norm": 1.5323277954151004, + "learning_rate": 1.25009290191479e-06, + "loss": 0.5640079379081726, + "step": 3892 + }, + { + "epoch": 0.8976250864652986, + "grad_norm": 1.5228387521540339, + "learning_rate": 1.2497237382816577e-06, + "loss": 0.4969727396965027, + "step": 3893 + }, + { + "epoch": 0.8978556605948812, + "grad_norm": 1.438395912517929, + "learning_rate": 1.2493545383486497e-06, + "loss": 0.43710076808929443, + "step": 3894 + }, + { + "epoch": 0.8980862347244639, + "grad_norm": 1.217545409086522, + "learning_rate": 1.248985302169432e-06, + "loss": 0.4246212840080261, + "step": 3895 + }, + { + "epoch": 0.8983168088540465, + "grad_norm": 1.1837244532547113, + "learning_rate": 1.2486160297976776e-06, + "loss": 0.3812369108200073, + "step": 3896 + }, + { + "epoch": 0.8985473829836292, + "grad_norm": 2.1554879190255685, + "learning_rate": 1.248246721287063e-06, + "loss": 0.6407653093338013, + "step": 3897 + }, + { + "epoch": 0.8987779571132118, + "grad_norm": 1.6947319293322312, + "learning_rate": 1.247877376691272e-06, + "loss": 0.47748661041259766, + "step": 3898 + }, + { + "epoch": 0.8990085312427946, + "grad_norm": 1.5504399903750061, + "learning_rate": 1.2475079960639922e-06, + "loss": 0.5047964453697205, + "step": 3899 + }, + { + "epoch": 0.8992391053723772, + "grad_norm": 1.1781117181895115, + "learning_rate": 1.2471385794589167e-06, + "loss": 0.37989485263824463, + "step": 3900 + }, + { + "epoch": 0.8994696795019599, + "grad_norm": 1.2955755733611327, + "learning_rate": 1.2467691269297437e-06, + "loss": 0.38857924938201904, + "step": 3901 + }, + { + "epoch": 0.8997002536315425, + "grad_norm": 1.2312069291338004, + "learning_rate": 1.2463996385301776e-06, + "loss": 0.45452386140823364, + "step": 3902 + }, + { + "epoch": 0.8999308277611252, + "grad_norm": 1.5565774035889273, + "learning_rate": 1.2460301143139267e-06, + "loss": 0.41920900344848633, + "step": 3903 + }, + { + "epoch": 0.9001614018907078, + "grad_norm": 1.542875547138451, + "learning_rate": 1.2456605543347051e-06, + "loss": 0.5979125499725342, + "step": 3904 + }, + { + "epoch": 0.9003919760202905, + "grad_norm": 1.5505304900467811, + "learning_rate": 1.2452909586462323e-06, + "loss": 0.5517082214355469, + "step": 3905 + }, + { + "epoch": 0.9006225501498731, + "grad_norm": 1.2381443535248697, + "learning_rate": 1.244921327302233e-06, + "loss": 0.4558248519897461, + "step": 3906 + }, + { + "epoch": 0.9008531242794559, + "grad_norm": 1.5503878716470787, + "learning_rate": 1.2445516603564362e-06, + "loss": 0.5637399554252625, + "step": 3907 + }, + { + "epoch": 0.9010836984090385, + "grad_norm": 1.2396897738245216, + "learning_rate": 1.2441819578625775e-06, + "loss": 0.5208043456077576, + "step": 3908 + }, + { + "epoch": 0.9013142725386212, + "grad_norm": 1.400218770913741, + "learning_rate": 1.243812219874396e-06, + "loss": 0.3901744484901428, + "step": 3909 + }, + { + "epoch": 0.9015448466682038, + "grad_norm": 1.4025338042989108, + "learning_rate": 1.2434424464456376e-06, + "loss": 0.5770972967147827, + "step": 3910 + }, + { + "epoch": 0.9017754207977865, + "grad_norm": 1.375223010916462, + "learning_rate": 1.2430726376300525e-06, + "loss": 0.3457295894622803, + "step": 3911 + }, + { + "epoch": 0.9020059949273691, + "grad_norm": 1.3118554362154196, + "learning_rate": 1.242702793481396e-06, + "loss": 0.4487595558166504, + "step": 3912 + }, + { + "epoch": 0.9022365690569518, + "grad_norm": 1.2548104794507453, + "learning_rate": 1.2423329140534286e-06, + "loss": 0.4369876980781555, + "step": 3913 + }, + { + "epoch": 0.9024671431865344, + "grad_norm": 1.5693012853497335, + "learning_rate": 1.2419629993999165e-06, + "loss": 0.43154388666152954, + "step": 3914 + }, + { + "epoch": 0.9026977173161171, + "grad_norm": 1.313977531855456, + "learning_rate": 1.24159304957463e-06, + "loss": 0.4528294801712036, + "step": 3915 + }, + { + "epoch": 0.9029282914456997, + "grad_norm": 1.4152554930408472, + "learning_rate": 1.2412230646313452e-06, + "loss": 0.4204830527305603, + "step": 3916 + }, + { + "epoch": 0.9031588655752825, + "grad_norm": 1.3117655747531898, + "learning_rate": 1.2408530446238433e-06, + "loss": 0.46544623374938965, + "step": 3917 + }, + { + "epoch": 0.9033894397048651, + "grad_norm": 1.19103055945586, + "learning_rate": 1.2404829896059107e-06, + "loss": 0.39419203996658325, + "step": 3918 + }, + { + "epoch": 0.9036200138344478, + "grad_norm": 1.3085505059347724, + "learning_rate": 1.240112899631338e-06, + "loss": 0.4214451014995575, + "step": 3919 + }, + { + "epoch": 0.9038505879640304, + "grad_norm": 1.310156094815825, + "learning_rate": 1.239742774753922e-06, + "loss": 0.42385220527648926, + "step": 3920 + }, + { + "epoch": 0.9040811620936131, + "grad_norm": 1.4457769612459037, + "learning_rate": 1.2393726150274636e-06, + "loss": 0.5206592082977295, + "step": 3921 + }, + { + "epoch": 0.9043117362231957, + "grad_norm": 1.4602545667694231, + "learning_rate": 1.23900242050577e-06, + "loss": 0.4358803629875183, + "step": 3922 + }, + { + "epoch": 0.9045423103527784, + "grad_norm": 1.3596132034754325, + "learning_rate": 1.2386321912426524e-06, + "loss": 0.4525173306465149, + "step": 3923 + }, + { + "epoch": 0.904772884482361, + "grad_norm": 1.4736466426478543, + "learning_rate": 1.2382619272919273e-06, + "loss": 0.48877185583114624, + "step": 3924 + }, + { + "epoch": 0.9050034586119438, + "grad_norm": 1.152358955118646, + "learning_rate": 1.2378916287074162e-06, + "loss": 0.4401814341545105, + "step": 3925 + }, + { + "epoch": 0.9052340327415264, + "grad_norm": 1.337265572878916, + "learning_rate": 1.2375212955429459e-06, + "loss": 0.37818846106529236, + "step": 3926 + }, + { + "epoch": 0.9054646068711091, + "grad_norm": 1.285760527835995, + "learning_rate": 1.2371509278523482e-06, + "loss": 0.36472904682159424, + "step": 3927 + }, + { + "epoch": 0.9056951810006917, + "grad_norm": 1.2999097028645303, + "learning_rate": 1.2367805256894596e-06, + "loss": 0.5113309025764465, + "step": 3928 + }, + { + "epoch": 0.9059257551302744, + "grad_norm": 1.2052405163032573, + "learning_rate": 1.2364100891081218e-06, + "loss": 0.36074432730674744, + "step": 3929 + }, + { + "epoch": 0.906156329259857, + "grad_norm": 1.3493065976556424, + "learning_rate": 1.2360396181621819e-06, + "loss": 0.39177048206329346, + "step": 3930 + }, + { + "epoch": 0.9063869033894397, + "grad_norm": 1.3736058093352046, + "learning_rate": 1.2356691129054912e-06, + "loss": 0.4758113622665405, + "step": 3931 + }, + { + "epoch": 0.9066174775190223, + "grad_norm": 1.3614234520329223, + "learning_rate": 1.2352985733919065e-06, + "loss": 0.3840598464012146, + "step": 3932 + }, + { + "epoch": 0.906848051648605, + "grad_norm": 1.510763334369694, + "learning_rate": 1.2349279996752892e-06, + "loss": 0.5103816986083984, + "step": 3933 + }, + { + "epoch": 0.9070786257781877, + "grad_norm": 1.466046011323441, + "learning_rate": 1.234557391809507e-06, + "loss": 0.4175255298614502, + "step": 3934 + }, + { + "epoch": 0.9073091999077704, + "grad_norm": 2.627411026682294, + "learning_rate": 1.2341867498484302e-06, + "loss": 0.4504377245903015, + "step": 3935 + }, + { + "epoch": 0.907539774037353, + "grad_norm": 1.2868923632717955, + "learning_rate": 1.2338160738459355e-06, + "loss": 0.45868122577667236, + "step": 3936 + }, + { + "epoch": 0.9077703481669357, + "grad_norm": 1.3231771761325972, + "learning_rate": 1.2334453638559054e-06, + "loss": 0.5161639451980591, + "step": 3937 + }, + { + "epoch": 0.9080009222965183, + "grad_norm": 1.5486748129834036, + "learning_rate": 1.2330746199322257e-06, + "loss": 0.44561630487442017, + "step": 3938 + }, + { + "epoch": 0.908231496426101, + "grad_norm": 1.595486700598371, + "learning_rate": 1.2327038421287876e-06, + "loss": 0.4780126214027405, + "step": 3939 + }, + { + "epoch": 0.9084620705556836, + "grad_norm": 1.2226582649026916, + "learning_rate": 1.2323330304994877e-06, + "loss": 0.505066990852356, + "step": 3940 + }, + { + "epoch": 0.9086926446852663, + "grad_norm": 1.3041405659013958, + "learning_rate": 1.2319621850982274e-06, + "loss": 0.5053813457489014, + "step": 3941 + }, + { + "epoch": 0.9089232188148489, + "grad_norm": 1.178162092657054, + "learning_rate": 1.2315913059789125e-06, + "loss": 0.3579134941101074, + "step": 3942 + }, + { + "epoch": 0.9091537929444317, + "grad_norm": 1.4949007072050957, + "learning_rate": 1.2312203931954543e-06, + "loss": 0.5703507661819458, + "step": 3943 + }, + { + "epoch": 0.9093843670740143, + "grad_norm": 1.4141867956521472, + "learning_rate": 1.2308494468017685e-06, + "loss": 0.4972035884857178, + "step": 3944 + }, + { + "epoch": 0.909614941203597, + "grad_norm": 1.8338477540837272, + "learning_rate": 1.230478466851776e-06, + "loss": 0.5528955459594727, + "step": 3945 + }, + { + "epoch": 0.9098455153331796, + "grad_norm": 1.4009292239467905, + "learning_rate": 1.2301074533994024e-06, + "loss": 0.4099786877632141, + "step": 3946 + }, + { + "epoch": 0.9100760894627623, + "grad_norm": 1.3414325662099453, + "learning_rate": 1.2297364064985786e-06, + "loss": 0.41020166873931885, + "step": 3947 + }, + { + "epoch": 0.9103066635923449, + "grad_norm": 1.4112377219226224, + "learning_rate": 1.2293653262032395e-06, + "loss": 0.4340355694293976, + "step": 3948 + }, + { + "epoch": 0.9105372377219276, + "grad_norm": 1.376446280407005, + "learning_rate": 1.2289942125673261e-06, + "loss": 0.4369847774505615, + "step": 3949 + }, + { + "epoch": 0.9107678118515102, + "grad_norm": 1.4688076477466663, + "learning_rate": 1.228623065644783e-06, + "loss": 0.406423956155777, + "step": 3950 + }, + { + "epoch": 0.910998385981093, + "grad_norm": 1.4230223897567287, + "learning_rate": 1.22825188548956e-06, + "loss": 0.5081946849822998, + "step": 3951 + }, + { + "epoch": 0.9112289601106756, + "grad_norm": 1.7017899930713631, + "learning_rate": 1.2278806721556124e-06, + "loss": 0.43494492769241333, + "step": 3952 + }, + { + "epoch": 0.9114595342402583, + "grad_norm": 1.348884752431283, + "learning_rate": 1.2275094256968996e-06, + "loss": 0.35356831550598145, + "step": 3953 + }, + { + "epoch": 0.9116901083698409, + "grad_norm": 1.2260567341450548, + "learning_rate": 1.227138146167386e-06, + "loss": 0.36741551756858826, + "step": 3954 + }, + { + "epoch": 0.9119206824994236, + "grad_norm": 1.4686302016765889, + "learning_rate": 1.226766833621041e-06, + "loss": 0.491504430770874, + "step": 3955 + }, + { + "epoch": 0.9121512566290062, + "grad_norm": 1.266294151631501, + "learning_rate": 1.2263954881118384e-06, + "loss": 0.4558037519454956, + "step": 3956 + }, + { + "epoch": 0.9123818307585889, + "grad_norm": 1.398276341256052, + "learning_rate": 1.2260241096937571e-06, + "loss": 0.3941671848297119, + "step": 3957 + }, + { + "epoch": 0.9126124048881715, + "grad_norm": 1.7133993603535684, + "learning_rate": 1.2256526984207809e-06, + "loss": 0.40505191683769226, + "step": 3958 + }, + { + "epoch": 0.9128429790177542, + "grad_norm": 1.3369540241008888, + "learning_rate": 1.2252812543468982e-06, + "loss": 0.4669588804244995, + "step": 3959 + }, + { + "epoch": 0.9130735531473368, + "grad_norm": 1.6346862522902008, + "learning_rate": 1.2249097775261014e-06, + "loss": 0.535057544708252, + "step": 3960 + }, + { + "epoch": 0.9133041272769196, + "grad_norm": 1.465530924269544, + "learning_rate": 1.2245382680123898e-06, + "loss": 0.5127478837966919, + "step": 3961 + }, + { + "epoch": 0.9135347014065022, + "grad_norm": 1.239878706419753, + "learning_rate": 1.224166725859765e-06, + "loss": 0.5004767179489136, + "step": 3962 + }, + { + "epoch": 0.9137652755360849, + "grad_norm": 1.3382850542269662, + "learning_rate": 1.2237951511222346e-06, + "loss": 0.47929924726486206, + "step": 3963 + }, + { + "epoch": 0.9139958496656675, + "grad_norm": 1.3650943807220162, + "learning_rate": 1.2234235438538109e-06, + "loss": 0.5619359016418457, + "step": 3964 + }, + { + "epoch": 0.9142264237952502, + "grad_norm": 2.173999313160228, + "learning_rate": 1.223051904108511e-06, + "loss": 0.44648507237434387, + "step": 3965 + }, + { + "epoch": 0.9144569979248328, + "grad_norm": 1.5081082363333118, + "learning_rate": 1.2226802319403562e-06, + "loss": 0.4451872706413269, + "step": 3966 + }, + { + "epoch": 0.9146875720544155, + "grad_norm": 1.1999813764066747, + "learning_rate": 1.222308527403373e-06, + "loss": 0.44295474886894226, + "step": 3967 + }, + { + "epoch": 0.9149181461839981, + "grad_norm": 1.4510785821223537, + "learning_rate": 1.221936790551592e-06, + "loss": 0.517430305480957, + "step": 3968 + }, + { + "epoch": 0.9151487203135809, + "grad_norm": 1.2648448897941866, + "learning_rate": 1.2215650214390493e-06, + "loss": 0.4819454252719879, + "step": 3969 + }, + { + "epoch": 0.9153792944431635, + "grad_norm": 1.40726836834287, + "learning_rate": 1.2211932201197855e-06, + "loss": 0.41739264130592346, + "step": 3970 + }, + { + "epoch": 0.9156098685727462, + "grad_norm": 1.214750449543567, + "learning_rate": 1.2208213866478452e-06, + "loss": 0.38833269476890564, + "step": 3971 + }, + { + "epoch": 0.9158404427023288, + "grad_norm": 1.4780394203565799, + "learning_rate": 1.2204495210772784e-06, + "loss": 0.48899054527282715, + "step": 3972 + }, + { + "epoch": 0.9160710168319115, + "grad_norm": 1.4236888721907983, + "learning_rate": 1.2200776234621395e-06, + "loss": 0.5201622247695923, + "step": 3973 + }, + { + "epoch": 0.9163015909614941, + "grad_norm": 1.4696703280770271, + "learning_rate": 1.219705693856488e-06, + "loss": 0.4105098843574524, + "step": 3974 + }, + { + "epoch": 0.9165321650910768, + "grad_norm": 1.2658629585457457, + "learning_rate": 1.2193337323143865e-06, + "loss": 0.45458245277404785, + "step": 3975 + }, + { + "epoch": 0.9167627392206594, + "grad_norm": 1.4906657502786624, + "learning_rate": 1.2189617388899049e-06, + "loss": 0.5013390779495239, + "step": 3976 + }, + { + "epoch": 0.9169933133502421, + "grad_norm": 1.3837275498584536, + "learning_rate": 1.218589713637115e-06, + "loss": 0.37065303325653076, + "step": 3977 + }, + { + "epoch": 0.9172238874798248, + "grad_norm": 1.4237915808433583, + "learning_rate": 1.218217656610095e-06, + "loss": 0.45158177614212036, + "step": 3978 + }, + { + "epoch": 0.9174544616094075, + "grad_norm": 1.3261399530988285, + "learning_rate": 1.2178455678629271e-06, + "loss": 0.4439426064491272, + "step": 3979 + }, + { + "epoch": 0.9176850357389901, + "grad_norm": 1.4056969202356144, + "learning_rate": 1.217473447449698e-06, + "loss": 0.42215704917907715, + "step": 3980 + }, + { + "epoch": 0.9179156098685728, + "grad_norm": 1.6572776500354818, + "learning_rate": 1.2171012954244991e-06, + "loss": 0.42273545265197754, + "step": 3981 + }, + { + "epoch": 0.9181461839981554, + "grad_norm": 1.5659197643503024, + "learning_rate": 1.216729111841427e-06, + "loss": 0.6045219898223877, + "step": 3982 + }, + { + "epoch": 0.9183767581277381, + "grad_norm": 1.318642532575583, + "learning_rate": 1.216356896754582e-06, + "loss": 0.49316874146461487, + "step": 3983 + }, + { + "epoch": 0.9186073322573207, + "grad_norm": 1.2984174252340932, + "learning_rate": 1.2159846502180692e-06, + "loss": 0.5222599506378174, + "step": 3984 + }, + { + "epoch": 0.9188379063869034, + "grad_norm": 1.21924477747188, + "learning_rate": 1.2156123722859988e-06, + "loss": 0.4513903856277466, + "step": 3985 + }, + { + "epoch": 0.919068480516486, + "grad_norm": 1.5286242494549134, + "learning_rate": 1.2152400630124846e-06, + "loss": 0.4946150779724121, + "step": 3986 + }, + { + "epoch": 0.9192990546460688, + "grad_norm": 1.6287340554518628, + "learning_rate": 1.2148677224516458e-06, + "loss": 0.5482569336891174, + "step": 3987 + }, + { + "epoch": 0.9195296287756514, + "grad_norm": 1.4490082622042646, + "learning_rate": 1.2144953506576061e-06, + "loss": 0.457091361284256, + "step": 3988 + }, + { + "epoch": 0.9197602029052341, + "grad_norm": 1.378032718586854, + "learning_rate": 1.2141229476844933e-06, + "loss": 0.4262084364891052, + "step": 3989 + }, + { + "epoch": 0.9199907770348167, + "grad_norm": 1.2394422456854066, + "learning_rate": 1.2137505135864402e-06, + "loss": 0.4905529022216797, + "step": 3990 + }, + { + "epoch": 0.9202213511643994, + "grad_norm": 1.3246738813802295, + "learning_rate": 1.2133780484175833e-06, + "loss": 0.5001873970031738, + "step": 3991 + }, + { + "epoch": 0.920451925293982, + "grad_norm": 1.4663495799657225, + "learning_rate": 1.2130055522320647e-06, + "loss": 0.396418035030365, + "step": 3992 + }, + { + "epoch": 0.9206824994235647, + "grad_norm": 1.5742445852004807, + "learning_rate": 1.2126330250840302e-06, + "loss": 0.5743722915649414, + "step": 3993 + }, + { + "epoch": 0.9209130735531473, + "grad_norm": 1.720134285882963, + "learning_rate": 1.212260467027631e-06, + "loss": 0.5134707689285278, + "step": 3994 + }, + { + "epoch": 0.92114364768273, + "grad_norm": 1.2913764867867046, + "learning_rate": 1.2118878781170213e-06, + "loss": 0.4191853404045105, + "step": 3995 + }, + { + "epoch": 0.9213742218123127, + "grad_norm": 1.8061166260156263, + "learning_rate": 1.2115152584063613e-06, + "loss": 0.3430103063583374, + "step": 3996 + }, + { + "epoch": 0.9216047959418954, + "grad_norm": 1.491788048135039, + "learning_rate": 1.2111426079498147e-06, + "loss": 0.5229896903038025, + "step": 3997 + }, + { + "epoch": 0.921835370071478, + "grad_norm": 1.9288487767080142, + "learning_rate": 1.2107699268015501e-06, + "loss": 0.5028181076049805, + "step": 3998 + }, + { + "epoch": 0.9220659442010607, + "grad_norm": 1.8323250729268132, + "learning_rate": 1.2103972150157407e-06, + "loss": 0.4662501811981201, + "step": 3999 + }, + { + "epoch": 0.9222965183306433, + "grad_norm": 1.7877363086665337, + "learning_rate": 1.2100244726465636e-06, + "loss": 0.5581385493278503, + "step": 4000 + }, + { + "epoch": 0.922527092460226, + "grad_norm": 1.5059656153682595, + "learning_rate": 1.2096516997482012e-06, + "loss": 0.3925841450691223, + "step": 4001 + }, + { + "epoch": 0.9227576665898086, + "grad_norm": 1.4478402824011334, + "learning_rate": 1.2092788963748393e-06, + "loss": 0.4021197557449341, + "step": 4002 + }, + { + "epoch": 0.9229882407193913, + "grad_norm": 1.5875480480080288, + "learning_rate": 1.2089060625806683e-06, + "loss": 0.5519800186157227, + "step": 4003 + }, + { + "epoch": 0.923218814848974, + "grad_norm": 1.4740215502095901, + "learning_rate": 1.2085331984198847e-06, + "loss": 0.4426038861274719, + "step": 4004 + }, + { + "epoch": 0.9234493889785566, + "grad_norm": 1.3127950735735558, + "learning_rate": 1.2081603039466872e-06, + "loss": 0.4370608925819397, + "step": 4005 + }, + { + "epoch": 0.9236799631081393, + "grad_norm": 1.6270244555647773, + "learning_rate": 1.2077873792152797e-06, + "loss": 0.5535042881965637, + "step": 4006 + }, + { + "epoch": 0.9239105372377219, + "grad_norm": 1.4254025319676356, + "learning_rate": 1.2074144242798708e-06, + "loss": 0.45786774158477783, + "step": 4007 + }, + { + "epoch": 0.9241411113673046, + "grad_norm": 1.305332226115227, + "learning_rate": 1.207041439194673e-06, + "loss": 0.38189244270324707, + "step": 4008 + }, + { + "epoch": 0.9243716854968872, + "grad_norm": 1.4825176983109143, + "learning_rate": 1.206668424013904e-06, + "loss": 0.48782190680503845, + "step": 4009 + }, + { + "epoch": 0.9246022596264699, + "grad_norm": 1.4182276344304934, + "learning_rate": 1.2062953787917852e-06, + "loss": 0.46295344829559326, + "step": 4010 + }, + { + "epoch": 0.9248328337560525, + "grad_norm": 1.370453601452758, + "learning_rate": 1.205922303582542e-06, + "loss": 0.5205795764923096, + "step": 4011 + }, + { + "epoch": 0.9250634078856352, + "grad_norm": 1.431830816120071, + "learning_rate": 1.205549198440405e-06, + "loss": 0.47622987627983093, + "step": 4012 + }, + { + "epoch": 0.9252939820152178, + "grad_norm": 1.3190370245605134, + "learning_rate": 1.2051760634196091e-06, + "loss": 0.4826146960258484, + "step": 4013 + }, + { + "epoch": 0.9255245561448006, + "grad_norm": 1.608771307027525, + "learning_rate": 1.2048028985743928e-06, + "loss": 0.46193474531173706, + "step": 4014 + }, + { + "epoch": 0.9257551302743832, + "grad_norm": 1.4926107871852312, + "learning_rate": 1.2044297039589996e-06, + "loss": 0.523394763469696, + "step": 4015 + }, + { + "epoch": 0.9259857044039659, + "grad_norm": 1.3096026982819484, + "learning_rate": 1.2040564796276773e-06, + "loss": 0.3963446617126465, + "step": 4016 + }, + { + "epoch": 0.9262162785335485, + "grad_norm": 1.3803899653039033, + "learning_rate": 1.2036832256346774e-06, + "loss": 0.5016456842422485, + "step": 4017 + }, + { + "epoch": 0.9264468526631312, + "grad_norm": 1.2198633348825472, + "learning_rate": 1.2033099420342566e-06, + "loss": 0.47298160195350647, + "step": 4018 + }, + { + "epoch": 0.9266774267927138, + "grad_norm": 1.5448162104307424, + "learning_rate": 1.2029366288806748e-06, + "loss": 0.387129545211792, + "step": 4019 + }, + { + "epoch": 0.9269080009222965, + "grad_norm": 1.4210281769521962, + "learning_rate": 1.2025632862281976e-06, + "loss": 0.46101367473602295, + "step": 4020 + }, + { + "epoch": 0.9271385750518791, + "grad_norm": 1.364554371793265, + "learning_rate": 1.2021899141310938e-06, + "loss": 0.4242950677871704, + "step": 4021 + }, + { + "epoch": 0.9273691491814618, + "grad_norm": 1.5524341283687932, + "learning_rate": 1.201816512643637e-06, + "loss": 0.45983830094337463, + "step": 4022 + }, + { + "epoch": 0.9275997233110445, + "grad_norm": 1.3760025635830133, + "learning_rate": 1.2014430818201044e-06, + "loss": 0.39785802364349365, + "step": 4023 + }, + { + "epoch": 0.9278302974406272, + "grad_norm": 1.254017871701417, + "learning_rate": 1.2010696217147783e-06, + "loss": 0.39265739917755127, + "step": 4024 + }, + { + "epoch": 0.9280608715702098, + "grad_norm": 1.4761130221315304, + "learning_rate": 1.2006961323819455e-06, + "loss": 0.49783703684806824, + "step": 4025 + }, + { + "epoch": 0.9282914456997925, + "grad_norm": 1.3764899481486361, + "learning_rate": 1.2003226138758953e-06, + "loss": 0.4479181170463562, + "step": 4026 + }, + { + "epoch": 0.9285220198293751, + "grad_norm": 1.4404345233811269, + "learning_rate": 1.199949066250923e-06, + "loss": 0.5205901265144348, + "step": 4027 + }, + { + "epoch": 0.9287525939589578, + "grad_norm": 1.3718010528366764, + "learning_rate": 1.1995754895613277e-06, + "loss": 0.5163009762763977, + "step": 4028 + }, + { + "epoch": 0.9289831680885404, + "grad_norm": 1.6219891318512447, + "learning_rate": 1.1992018838614124e-06, + "loss": 0.5746268033981323, + "step": 4029 + }, + { + "epoch": 0.9292137422181231, + "grad_norm": 1.2896226756922917, + "learning_rate": 1.1988282492054844e-06, + "loss": 0.5306442975997925, + "step": 4030 + }, + { + "epoch": 0.9294443163477057, + "grad_norm": 1.1978686339854372, + "learning_rate": 1.198454585647855e-06, + "loss": 0.4219534993171692, + "step": 4031 + }, + { + "epoch": 0.9296748904772885, + "grad_norm": 1.3997557750947305, + "learning_rate": 1.1980808932428406e-06, + "loss": 0.4167936444282532, + "step": 4032 + }, + { + "epoch": 0.9299054646068711, + "grad_norm": 1.2271684703243566, + "learning_rate": 1.197707172044761e-06, + "loss": 0.42376089096069336, + "step": 4033 + }, + { + "epoch": 0.9301360387364538, + "grad_norm": 1.5370602561856461, + "learning_rate": 1.1973334221079398e-06, + "loss": 0.48729848861694336, + "step": 4034 + }, + { + "epoch": 0.9303666128660364, + "grad_norm": 1.2353226603771892, + "learning_rate": 1.1969596434867062e-06, + "loss": 0.45877987146377563, + "step": 4035 + }, + { + "epoch": 0.9305971869956191, + "grad_norm": 1.2531522631367908, + "learning_rate": 1.196585836235392e-06, + "loss": 0.504621684551239, + "step": 4036 + }, + { + "epoch": 0.9308277611252017, + "grad_norm": 1.202880043912139, + "learning_rate": 1.1962120004083342e-06, + "loss": 0.45170748233795166, + "step": 4037 + }, + { + "epoch": 0.9310583352547844, + "grad_norm": 1.3604906368473153, + "learning_rate": 1.1958381360598737e-06, + "loss": 0.3969152569770813, + "step": 4038 + }, + { + "epoch": 0.931288909384367, + "grad_norm": 1.2718279913855612, + "learning_rate": 1.1954642432443553e-06, + "loss": 0.4286048412322998, + "step": 4039 + }, + { + "epoch": 0.9315194835139498, + "grad_norm": 1.4261317138789782, + "learning_rate": 1.1950903220161284e-06, + "loss": 0.3755400776863098, + "step": 4040 + }, + { + "epoch": 0.9317500576435324, + "grad_norm": 1.7559058405972485, + "learning_rate": 1.1947163724295457e-06, + "loss": 0.553135871887207, + "step": 4041 + }, + { + "epoch": 0.9319806317731151, + "grad_norm": 1.3529681190465184, + "learning_rate": 1.194342394538965e-06, + "loss": 0.53995281457901, + "step": 4042 + }, + { + "epoch": 0.9322112059026977, + "grad_norm": 1.3239114086556873, + "learning_rate": 1.1939683883987476e-06, + "loss": 0.4405739903450012, + "step": 4043 + }, + { + "epoch": 0.9324417800322804, + "grad_norm": 1.4320084668753248, + "learning_rate": 1.1935943540632591e-06, + "loss": 0.5046489238739014, + "step": 4044 + }, + { + "epoch": 0.932672354161863, + "grad_norm": 1.63220562819442, + "learning_rate": 1.1932202915868694e-06, + "loss": 0.4699453115463257, + "step": 4045 + }, + { + "epoch": 0.9329029282914457, + "grad_norm": 1.791152379500816, + "learning_rate": 1.192846201023952e-06, + "loss": 0.5643539428710938, + "step": 4046 + }, + { + "epoch": 0.9331335024210283, + "grad_norm": 1.3213038373558907, + "learning_rate": 1.192472082428885e-06, + "loss": 0.4423527121543884, + "step": 4047 + }, + { + "epoch": 0.933364076550611, + "grad_norm": 1.488626793530787, + "learning_rate": 1.1920979358560498e-06, + "loss": 0.4446362257003784, + "step": 4048 + }, + { + "epoch": 0.9335946506801936, + "grad_norm": 1.6284188135746005, + "learning_rate": 1.1917237613598332e-06, + "loss": 0.48347601294517517, + "step": 4049 + }, + { + "epoch": 0.9338252248097764, + "grad_norm": 1.339621886087554, + "learning_rate": 1.1913495589946243e-06, + "loss": 0.4736206531524658, + "step": 4050 + }, + { + "epoch": 0.934055798939359, + "grad_norm": 1.5821523477294297, + "learning_rate": 1.1909753288148181e-06, + "loss": 0.4896177053451538, + "step": 4051 + }, + { + "epoch": 0.9342863730689417, + "grad_norm": 1.3503870180183308, + "learning_rate": 1.1906010708748124e-06, + "loss": 0.3953405022621155, + "step": 4052 + }, + { + "epoch": 0.9345169471985243, + "grad_norm": 1.75805064255455, + "learning_rate": 1.1902267852290092e-06, + "loss": 0.30871689319610596, + "step": 4053 + }, + { + "epoch": 0.934747521328107, + "grad_norm": 1.4966149449301516, + "learning_rate": 1.1898524719318151e-06, + "loss": 0.44187474250793457, + "step": 4054 + }, + { + "epoch": 0.9349780954576896, + "grad_norm": 1.3440011557143472, + "learning_rate": 1.1894781310376396e-06, + "loss": 0.4069768488407135, + "step": 4055 + }, + { + "epoch": 0.9352086695872723, + "grad_norm": 1.2938244564986259, + "learning_rate": 1.1891037626008982e-06, + "loss": 0.36307692527770996, + "step": 4056 + }, + { + "epoch": 0.9354392437168549, + "grad_norm": 1.2107088826138788, + "learning_rate": 1.188729366676008e-06, + "loss": 0.38535594940185547, + "step": 4057 + }, + { + "epoch": 0.9356698178464377, + "grad_norm": 1.416105966319888, + "learning_rate": 1.1883549433173916e-06, + "loss": 0.46454256772994995, + "step": 4058 + }, + { + "epoch": 0.9359003919760203, + "grad_norm": 1.5618282514551205, + "learning_rate": 1.1879804925794752e-06, + "loss": 0.48537465929985046, + "step": 4059 + }, + { + "epoch": 0.936130966105603, + "grad_norm": 1.4027831120439134, + "learning_rate": 1.1876060145166893e-06, + "loss": 0.4355062246322632, + "step": 4060 + }, + { + "epoch": 0.9363615402351856, + "grad_norm": 1.4619447190479122, + "learning_rate": 1.1872315091834676e-06, + "loss": 0.47248804569244385, + "step": 4061 + }, + { + "epoch": 0.9365921143647683, + "grad_norm": 1.4336627602293526, + "learning_rate": 1.1868569766342488e-06, + "loss": 0.4896939992904663, + "step": 4062 + }, + { + "epoch": 0.9368226884943509, + "grad_norm": 1.7008224797561309, + "learning_rate": 1.1864824169234744e-06, + "loss": 0.4259600043296814, + "step": 4063 + }, + { + "epoch": 0.9370532626239336, + "grad_norm": 1.4119659383453314, + "learning_rate": 1.186107830105591e-06, + "loss": 0.4228817820549011, + "step": 4064 + }, + { + "epoch": 0.9372838367535162, + "grad_norm": 1.4911543620584802, + "learning_rate": 1.1857332162350484e-06, + "loss": 0.44750750064849854, + "step": 4065 + }, + { + "epoch": 0.937514410883099, + "grad_norm": 1.4424129451647476, + "learning_rate": 1.1853585753663003e-06, + "loss": 0.49125558137893677, + "step": 4066 + }, + { + "epoch": 0.9377449850126816, + "grad_norm": 1.2540485430842725, + "learning_rate": 1.1849839075538048e-06, + "loss": 0.446805477142334, + "step": 4067 + }, + { + "epoch": 0.9379755591422643, + "grad_norm": 1.6527694351266196, + "learning_rate": 1.1846092128520235e-06, + "loss": 0.4516616463661194, + "step": 4068 + }, + { + "epoch": 0.9382061332718469, + "grad_norm": 1.2461495462560317, + "learning_rate": 1.1842344913154223e-06, + "loss": 0.5271207690238953, + "step": 4069 + }, + { + "epoch": 0.9384367074014296, + "grad_norm": 1.3340471888093621, + "learning_rate": 1.1838597429984702e-06, + "loss": 0.46718811988830566, + "step": 4070 + }, + { + "epoch": 0.9386672815310122, + "grad_norm": 1.6970586095771742, + "learning_rate": 1.1834849679556416e-06, + "loss": 0.4948880672454834, + "step": 4071 + }, + { + "epoch": 0.9388978556605949, + "grad_norm": 1.570925891079885, + "learning_rate": 1.183110166241413e-06, + "loss": 0.5141744613647461, + "step": 4072 + }, + { + "epoch": 0.9391284297901775, + "grad_norm": 1.683475962747206, + "learning_rate": 1.1827353379102662e-06, + "loss": 0.43921130895614624, + "step": 4073 + }, + { + "epoch": 0.9393590039197602, + "grad_norm": 1.458461387708897, + "learning_rate": 1.182360483016686e-06, + "loss": 0.35931193828582764, + "step": 4074 + }, + { + "epoch": 0.9395895780493428, + "grad_norm": 1.4562814179425503, + "learning_rate": 1.1819856016151615e-06, + "loss": 0.4376310408115387, + "step": 4075 + }, + { + "epoch": 0.9398201521789256, + "grad_norm": 1.1615675527476144, + "learning_rate": 1.1816106937601856e-06, + "loss": 0.45419907569885254, + "step": 4076 + }, + { + "epoch": 0.9400507263085082, + "grad_norm": 1.447994335613413, + "learning_rate": 1.1812357595062545e-06, + "loss": 0.4077754616737366, + "step": 4077 + }, + { + "epoch": 0.9402813004380909, + "grad_norm": 1.4463033622550583, + "learning_rate": 1.1808607989078686e-06, + "loss": 0.5555585622787476, + "step": 4078 + }, + { + "epoch": 0.9405118745676735, + "grad_norm": 1.4616481074430372, + "learning_rate": 1.1804858120195334e-06, + "loss": 0.4566183090209961, + "step": 4079 + }, + { + "epoch": 0.9407424486972562, + "grad_norm": 1.3314435652232666, + "learning_rate": 1.180110798895756e-06, + "loss": 0.39149847626686096, + "step": 4080 + }, + { + "epoch": 0.9409730228268388, + "grad_norm": 1.3122400287018474, + "learning_rate": 1.1797357595910485e-06, + "loss": 0.42695966362953186, + "step": 4081 + }, + { + "epoch": 0.9412035969564215, + "grad_norm": 1.4264504061469645, + "learning_rate": 1.1793606941599266e-06, + "loss": 0.49673956632614136, + "step": 4082 + }, + { + "epoch": 0.9414341710860041, + "grad_norm": 1.3703442162376693, + "learning_rate": 1.17898560265691e-06, + "loss": 0.44765836000442505, + "step": 4083 + }, + { + "epoch": 0.9416647452155869, + "grad_norm": 1.2694691955405566, + "learning_rate": 1.1786104851365227e-06, + "loss": 0.40580642223358154, + "step": 4084 + }, + { + "epoch": 0.9418953193451695, + "grad_norm": 1.6554640938571203, + "learning_rate": 1.1782353416532907e-06, + "loss": 0.5389235019683838, + "step": 4085 + }, + { + "epoch": 0.9421258934747522, + "grad_norm": 1.4858385739097846, + "learning_rate": 1.1778601722617456e-06, + "loss": 0.5130764245986938, + "step": 4086 + }, + { + "epoch": 0.9423564676043348, + "grad_norm": 1.4406092108567712, + "learning_rate": 1.1774849770164218e-06, + "loss": 0.5031291842460632, + "step": 4087 + }, + { + "epoch": 0.9425870417339175, + "grad_norm": 1.474863885181778, + "learning_rate": 1.1771097559718581e-06, + "loss": 0.463434636592865, + "step": 4088 + }, + { + "epoch": 0.9428176158635001, + "grad_norm": 1.3059771334220434, + "learning_rate": 1.1767345091825962e-06, + "loss": 0.4249681234359741, + "step": 4089 + }, + { + "epoch": 0.9430481899930828, + "grad_norm": 1.322875104249168, + "learning_rate": 1.176359236703182e-06, + "loss": 0.39353805780410767, + "step": 4090 + }, + { + "epoch": 0.9432787641226654, + "grad_norm": 1.1645299347166784, + "learning_rate": 1.1759839385881657e-06, + "loss": 0.4554273188114166, + "step": 4091 + }, + { + "epoch": 0.9435093382522481, + "grad_norm": 1.5935626726835685, + "learning_rate": 1.1756086148921005e-06, + "loss": 0.6275606155395508, + "step": 4092 + }, + { + "epoch": 0.9437399123818307, + "grad_norm": 1.40548177481024, + "learning_rate": 1.1752332656695432e-06, + "loss": 0.5058892965316772, + "step": 4093 + }, + { + "epoch": 0.9439704865114135, + "grad_norm": 1.4618963991295721, + "learning_rate": 1.1748578909750547e-06, + "loss": 0.4318118095397949, + "step": 4094 + }, + { + "epoch": 0.9442010606409961, + "grad_norm": 1.5133013388223657, + "learning_rate": 1.1744824908631996e-06, + "loss": 0.4873964190483093, + "step": 4095 + }, + { + "epoch": 0.9444316347705788, + "grad_norm": 1.7199346017960337, + "learning_rate": 1.1741070653885467e-06, + "loss": 0.5026696920394897, + "step": 4096 + }, + { + "epoch": 0.9446622089001614, + "grad_norm": 1.1838920009196625, + "learning_rate": 1.1737316146056667e-06, + "loss": 0.4337490200996399, + "step": 4097 + }, + { + "epoch": 0.9448927830297441, + "grad_norm": 1.4841621540296046, + "learning_rate": 1.173356138569136e-06, + "loss": 0.4552634358406067, + "step": 4098 + }, + { + "epoch": 0.9451233571593267, + "grad_norm": 1.50340660176824, + "learning_rate": 1.1729806373335336e-06, + "loss": 0.4631303548812866, + "step": 4099 + }, + { + "epoch": 0.9453539312889094, + "grad_norm": 1.2840677998534646, + "learning_rate": 1.1726051109534424e-06, + "loss": 0.5004513263702393, + "step": 4100 + }, + { + "epoch": 0.945584505418492, + "grad_norm": 1.4218926297879624, + "learning_rate": 1.172229559483449e-06, + "loss": 0.4634668827056885, + "step": 4101 + }, + { + "epoch": 0.9458150795480748, + "grad_norm": 1.3580815662313042, + "learning_rate": 1.171853982978144e-06, + "loss": 0.4034295678138733, + "step": 4102 + }, + { + "epoch": 0.9460456536776574, + "grad_norm": 1.4066326558267837, + "learning_rate": 1.1714783814921206e-06, + "loss": 0.4981224536895752, + "step": 4103 + }, + { + "epoch": 0.9462762278072401, + "grad_norm": 1.637441573047362, + "learning_rate": 1.1711027550799767e-06, + "loss": 0.460249125957489, + "step": 4104 + }, + { + "epoch": 0.9465068019368227, + "grad_norm": 1.7282687422797383, + "learning_rate": 1.170727103796313e-06, + "loss": 0.4794936180114746, + "step": 4105 + }, + { + "epoch": 0.9467373760664054, + "grad_norm": 1.679442128589896, + "learning_rate": 1.170351427695735e-06, + "loss": 0.42724454402923584, + "step": 4106 + }, + { + "epoch": 0.946967950195988, + "grad_norm": 1.5092304593591768, + "learning_rate": 1.16997572683285e-06, + "loss": 0.4612593948841095, + "step": 4107 + }, + { + "epoch": 0.9471985243255707, + "grad_norm": 1.4462371891962704, + "learning_rate": 1.169600001262271e-06, + "loss": 0.49512046575546265, + "step": 4108 + }, + { + "epoch": 0.9474290984551533, + "grad_norm": 1.382963972341291, + "learning_rate": 1.1692242510386124e-06, + "loss": 0.49438196420669556, + "step": 4109 + }, + { + "epoch": 0.947659672584736, + "grad_norm": 1.246967438511099, + "learning_rate": 1.1688484762164938e-06, + "loss": 0.4833865165710449, + "step": 4110 + }, + { + "epoch": 0.9478902467143187, + "grad_norm": 1.6394354229670154, + "learning_rate": 1.1684726768505385e-06, + "loss": 0.49647942185401917, + "step": 4111 + }, + { + "epoch": 0.9481208208439014, + "grad_norm": 1.3141370309593936, + "learning_rate": 1.1680968529953718e-06, + "loss": 0.4299147129058838, + "step": 4112 + }, + { + "epoch": 0.948351394973484, + "grad_norm": 1.2751791494481195, + "learning_rate": 1.167721004705624e-06, + "loss": 0.42613041400909424, + "step": 4113 + }, + { + "epoch": 0.9485819691030667, + "grad_norm": 1.5850112492057793, + "learning_rate": 1.1673451320359284e-06, + "loss": 0.3989883065223694, + "step": 4114 + }, + { + "epoch": 0.9488125432326493, + "grad_norm": 1.6195345588406382, + "learning_rate": 1.1669692350409222e-06, + "loss": 0.41362684965133667, + "step": 4115 + }, + { + "epoch": 0.9490431173622319, + "grad_norm": 1.3043186455514282, + "learning_rate": 1.1665933137752452e-06, + "loss": 0.3807048201560974, + "step": 4116 + }, + { + "epoch": 0.9492736914918146, + "grad_norm": 1.452270133487064, + "learning_rate": 1.1662173682935414e-06, + "loss": 0.3440876007080078, + "step": 4117 + }, + { + "epoch": 0.9495042656213972, + "grad_norm": 1.5051121617765968, + "learning_rate": 1.165841398650459e-06, + "loss": 0.43534499406814575, + "step": 4118 + }, + { + "epoch": 0.9497348397509799, + "grad_norm": 1.2124174426672352, + "learning_rate": 1.1654654049006484e-06, + "loss": 0.4900544285774231, + "step": 4119 + }, + { + "epoch": 0.9499654138805625, + "grad_norm": 1.4219346573372744, + "learning_rate": 1.1650893870987643e-06, + "loss": 0.5189288854598999, + "step": 4120 + }, + { + "epoch": 0.9501959880101453, + "grad_norm": 1.5561303354373495, + "learning_rate": 1.1647133452994643e-06, + "loss": 0.587873101234436, + "step": 4121 + }, + { + "epoch": 0.9504265621397279, + "grad_norm": 1.2947612520331362, + "learning_rate": 1.1643372795574106e-06, + "loss": 0.4367108941078186, + "step": 4122 + }, + { + "epoch": 0.9506571362693106, + "grad_norm": 1.3855876287330298, + "learning_rate": 1.1639611899272679e-06, + "loss": 0.4121246635913849, + "step": 4123 + }, + { + "epoch": 0.9508877103988932, + "grad_norm": 1.371083137252789, + "learning_rate": 1.1635850764637042e-06, + "loss": 0.4993973672389984, + "step": 4124 + }, + { + "epoch": 0.9511182845284759, + "grad_norm": 1.3729377845652901, + "learning_rate": 1.163208939221392e-06, + "loss": 0.39145413041114807, + "step": 4125 + }, + { + "epoch": 0.9513488586580585, + "grad_norm": 1.5515816392895183, + "learning_rate": 1.1628327782550065e-06, + "loss": 0.45954760909080505, + "step": 4126 + }, + { + "epoch": 0.9515794327876412, + "grad_norm": 1.5137997254417062, + "learning_rate": 1.1624565936192263e-06, + "loss": 0.5159680843353271, + "step": 4127 + }, + { + "epoch": 0.9518100069172238, + "grad_norm": 1.5429829982679306, + "learning_rate": 1.1620803853687337e-06, + "loss": 0.4441346228122711, + "step": 4128 + }, + { + "epoch": 0.9520405810468066, + "grad_norm": 1.1994992888255296, + "learning_rate": 1.1617041535582144e-06, + "loss": 0.3842248320579529, + "step": 4129 + }, + { + "epoch": 0.9522711551763892, + "grad_norm": 1.5742838715827387, + "learning_rate": 1.1613278982423577e-06, + "loss": 0.5332437753677368, + "step": 4130 + }, + { + "epoch": 0.9525017293059719, + "grad_norm": 1.416443461852387, + "learning_rate": 1.160951619475856e-06, + "loss": 0.4265931248664856, + "step": 4131 + }, + { + "epoch": 0.9527323034355545, + "grad_norm": 1.344407559333665, + "learning_rate": 1.1605753173134052e-06, + "loss": 0.47442418336868286, + "step": 4132 + }, + { + "epoch": 0.9529628775651372, + "grad_norm": 1.4385000789860496, + "learning_rate": 1.1601989918097044e-06, + "loss": 0.6128898859024048, + "step": 4133 + }, + { + "epoch": 0.9531934516947198, + "grad_norm": 1.3167710707989233, + "learning_rate": 1.159822643019457e-06, + "loss": 0.5347775220870972, + "step": 4134 + }, + { + "epoch": 0.9534240258243025, + "grad_norm": 1.1478699481046142, + "learning_rate": 1.1594462709973682e-06, + "loss": 0.39984625577926636, + "step": 4135 + }, + { + "epoch": 0.9536545999538851, + "grad_norm": 1.411910940206958, + "learning_rate": 1.1590698757981483e-06, + "loss": 0.5146951675415039, + "step": 4136 + }, + { + "epoch": 0.9538851740834678, + "grad_norm": 1.4057451726772026, + "learning_rate": 1.1586934574765097e-06, + "loss": 0.3589641749858856, + "step": 4137 + }, + { + "epoch": 0.9541157482130505, + "grad_norm": 1.4047870659239305, + "learning_rate": 1.1583170160871689e-06, + "loss": 0.428930401802063, + "step": 4138 + }, + { + "epoch": 0.9543463223426332, + "grad_norm": 1.3760779428564116, + "learning_rate": 1.1579405516848452e-06, + "loss": 0.46921080350875854, + "step": 4139 + }, + { + "epoch": 0.9545768964722158, + "grad_norm": 1.462957669946579, + "learning_rate": 1.1575640643242616e-06, + "loss": 0.39079514145851135, + "step": 4140 + }, + { + "epoch": 0.9548074706017985, + "grad_norm": 1.5322762323160557, + "learning_rate": 1.1571875540601443e-06, + "loss": 0.4475102424621582, + "step": 4141 + }, + { + "epoch": 0.9550380447313811, + "grad_norm": 1.3964952325110702, + "learning_rate": 1.1568110209472232e-06, + "loss": 0.43881016969680786, + "step": 4142 + }, + { + "epoch": 0.9552686188609638, + "grad_norm": 1.2846843095885363, + "learning_rate": 1.156434465040231e-06, + "loss": 0.4382214844226837, + "step": 4143 + }, + { + "epoch": 0.9554991929905464, + "grad_norm": 1.6590322564778253, + "learning_rate": 1.1560578863939037e-06, + "loss": 0.5390958786010742, + "step": 4144 + }, + { + "epoch": 0.9557297671201291, + "grad_norm": 1.2966408722030756, + "learning_rate": 1.155681285062981e-06, + "loss": 0.4276137948036194, + "step": 4145 + }, + { + "epoch": 0.9559603412497117, + "grad_norm": 1.3756682316204962, + "learning_rate": 1.1553046611022058e-06, + "loss": 0.4541968107223511, + "step": 4146 + }, + { + "epoch": 0.9561909153792945, + "grad_norm": 1.4806679512404375, + "learning_rate": 1.1549280145663242e-06, + "loss": 0.43287473917007446, + "step": 4147 + }, + { + "epoch": 0.9564214895088771, + "grad_norm": 1.5507500145218385, + "learning_rate": 1.1545513455100855e-06, + "loss": 0.432822585105896, + "step": 4148 + }, + { + "epoch": 0.9566520636384598, + "grad_norm": 1.4662390355071035, + "learning_rate": 1.1541746539882424e-06, + "loss": 0.519271969795227, + "step": 4149 + }, + { + "epoch": 0.9568826377680424, + "grad_norm": 1.4521470663351335, + "learning_rate": 1.1537979400555506e-06, + "loss": 0.4158627390861511, + "step": 4150 + }, + { + "epoch": 0.9571132118976251, + "grad_norm": 1.4834584070713739, + "learning_rate": 1.1534212037667698e-06, + "loss": 0.42122989892959595, + "step": 4151 + }, + { + "epoch": 0.9573437860272077, + "grad_norm": 1.696588703842723, + "learning_rate": 1.1530444451766623e-06, + "loss": 0.4141794443130493, + "step": 4152 + }, + { + "epoch": 0.9575743601567904, + "grad_norm": 1.3149219500885996, + "learning_rate": 1.1526676643399933e-06, + "loss": 0.4935780167579651, + "step": 4153 + }, + { + "epoch": 0.957804934286373, + "grad_norm": 1.3661965645097156, + "learning_rate": 1.152290861311532e-06, + "loss": 0.5075733661651611, + "step": 4154 + }, + { + "epoch": 0.9580355084159557, + "grad_norm": 1.37824406851626, + "learning_rate": 1.151914036146051e-06, + "loss": 0.4852841794490814, + "step": 4155 + }, + { + "epoch": 0.9582660825455384, + "grad_norm": 1.2576277022731817, + "learning_rate": 1.151537188898325e-06, + "loss": 0.46114620566368103, + "step": 4156 + }, + { + "epoch": 0.9584966566751211, + "grad_norm": 1.6662322349225411, + "learning_rate": 1.1511603196231327e-06, + "loss": 0.519254207611084, + "step": 4157 + }, + { + "epoch": 0.9587272308047037, + "grad_norm": 1.3283960828325414, + "learning_rate": 1.1507834283752562e-06, + "loss": 0.43635690212249756, + "step": 4158 + }, + { + "epoch": 0.9589578049342864, + "grad_norm": 1.3730336798021219, + "learning_rate": 1.1504065152094802e-06, + "loss": 0.48448023200035095, + "step": 4159 + }, + { + "epoch": 0.959188379063869, + "grad_norm": 1.320755520801986, + "learning_rate": 1.1500295801805927e-06, + "loss": 0.4461054801940918, + "step": 4160 + }, + { + "epoch": 0.9594189531934517, + "grad_norm": 1.3183810948385437, + "learning_rate": 1.1496526233433852e-06, + "loss": 0.44869595766067505, + "step": 4161 + }, + { + "epoch": 0.9596495273230343, + "grad_norm": 1.5137169599039804, + "learning_rate": 1.1492756447526524e-06, + "loss": 0.4592103660106659, + "step": 4162 + }, + { + "epoch": 0.959880101452617, + "grad_norm": 1.3625000210250673, + "learning_rate": 1.1488986444631918e-06, + "loss": 0.48352301120758057, + "step": 4163 + }, + { + "epoch": 0.9601106755821996, + "grad_norm": 1.2039059688900335, + "learning_rate": 1.1485216225298043e-06, + "loss": 0.44718503952026367, + "step": 4164 + }, + { + "epoch": 0.9603412497117824, + "grad_norm": 1.7796976813489804, + "learning_rate": 1.1481445790072933e-06, + "loss": 0.44659486413002014, + "step": 4165 + }, + { + "epoch": 0.960571823841365, + "grad_norm": 1.464260426957605, + "learning_rate": 1.1477675139504665e-06, + "loss": 0.5143063068389893, + "step": 4166 + }, + { + "epoch": 0.9608023979709477, + "grad_norm": 1.825014649582591, + "learning_rate": 1.1473904274141344e-06, + "loss": 0.6708887815475464, + "step": 4167 + }, + { + "epoch": 0.9610329721005303, + "grad_norm": 1.4397638416262573, + "learning_rate": 1.1470133194531094e-06, + "loss": 0.3889666199684143, + "step": 4168 + }, + { + "epoch": 0.961263546230113, + "grad_norm": 1.2805774485856607, + "learning_rate": 1.1466361901222086e-06, + "loss": 0.4610622227191925, + "step": 4169 + }, + { + "epoch": 0.9614941203596956, + "grad_norm": 1.4320030308850267, + "learning_rate": 1.1462590394762514e-06, + "loss": 0.46372538805007935, + "step": 4170 + }, + { + "epoch": 0.9617246944892783, + "grad_norm": 1.5638922992309852, + "learning_rate": 1.1458818675700607e-06, + "loss": 0.5197097063064575, + "step": 4171 + }, + { + "epoch": 0.9619552686188609, + "grad_norm": 1.2417860513603916, + "learning_rate": 1.145504674458462e-06, + "loss": 0.3849745988845825, + "step": 4172 + }, + { + "epoch": 0.9621858427484437, + "grad_norm": 1.5196854039542969, + "learning_rate": 1.1451274601962841e-06, + "loss": 0.4572817385196686, + "step": 4173 + }, + { + "epoch": 0.9624164168780263, + "grad_norm": 1.4154832612934123, + "learning_rate": 1.1447502248383594e-06, + "loss": 0.4383746385574341, + "step": 4174 + }, + { + "epoch": 0.962646991007609, + "grad_norm": 1.473681287130909, + "learning_rate": 1.1443729684395222e-06, + "loss": 0.5319672226905823, + "step": 4175 + }, + { + "epoch": 0.9628775651371916, + "grad_norm": 1.2307542062760268, + "learning_rate": 1.143995691054611e-06, + "loss": 0.4351249933242798, + "step": 4176 + }, + { + "epoch": 0.9631081392667743, + "grad_norm": 1.42416527435209, + "learning_rate": 1.1436183927384668e-06, + "loss": 0.5453774929046631, + "step": 4177 + }, + { + "epoch": 0.9633387133963569, + "grad_norm": 1.569291329857932, + "learning_rate": 1.1432410735459336e-06, + "loss": 0.5605905055999756, + "step": 4178 + }, + { + "epoch": 0.9635692875259396, + "grad_norm": 1.3825364023898294, + "learning_rate": 1.1428637335318587e-06, + "loss": 0.4556693434715271, + "step": 4179 + }, + { + "epoch": 0.9637998616555222, + "grad_norm": 1.316766347101971, + "learning_rate": 1.142486372751092e-06, + "loss": 0.45428892970085144, + "step": 4180 + }, + { + "epoch": 0.9640304357851049, + "grad_norm": 1.4252168865652697, + "learning_rate": 1.142108991258487e-06, + "loss": 0.4897412657737732, + "step": 4181 + }, + { + "epoch": 0.9642610099146876, + "grad_norm": 1.984637391356181, + "learning_rate": 1.1417315891089004e-06, + "loss": 0.5478836894035339, + "step": 4182 + }, + { + "epoch": 0.9644915840442703, + "grad_norm": 1.4620834191298895, + "learning_rate": 1.1413541663571904e-06, + "loss": 0.42394131422042847, + "step": 4183 + }, + { + "epoch": 0.9647221581738529, + "grad_norm": 1.585175673978148, + "learning_rate": 1.1409767230582199e-06, + "loss": 0.5047104954719543, + "step": 4184 + }, + { + "epoch": 0.9649527323034356, + "grad_norm": 1.4749915601759833, + "learning_rate": 1.1405992592668538e-06, + "loss": 0.43985825777053833, + "step": 4185 + }, + { + "epoch": 0.9651833064330182, + "grad_norm": 1.3061643078097422, + "learning_rate": 1.1402217750379608e-06, + "loss": 0.4338407516479492, + "step": 4186 + }, + { + "epoch": 0.9654138805626009, + "grad_norm": 1.5404850502320075, + "learning_rate": 1.1398442704264118e-06, + "loss": 0.4532614052295685, + "step": 4187 + }, + { + "epoch": 0.9656444546921835, + "grad_norm": 1.2345047018331374, + "learning_rate": 1.1394667454870802e-06, + "loss": 0.4546123445034027, + "step": 4188 + }, + { + "epoch": 0.9658750288217662, + "grad_norm": 1.5321856096614175, + "learning_rate": 1.139089200274844e-06, + "loss": 0.44743451476097107, + "step": 4189 + }, + { + "epoch": 0.9661056029513488, + "grad_norm": 1.3411063865526411, + "learning_rate": 1.138711634844583e-06, + "loss": 0.4566968083381653, + "step": 4190 + }, + { + "epoch": 0.9663361770809316, + "grad_norm": 1.481468600614622, + "learning_rate": 1.13833404925118e-06, + "loss": 0.46385467052459717, + "step": 4191 + }, + { + "epoch": 0.9665667512105142, + "grad_norm": 1.2411450691863102, + "learning_rate": 1.137956443549521e-06, + "loss": 0.4614461660385132, + "step": 4192 + }, + { + "epoch": 0.9667973253400969, + "grad_norm": 1.3326432316915904, + "learning_rate": 1.1375788177944945e-06, + "loss": 0.4351955056190491, + "step": 4193 + }, + { + "epoch": 0.9670278994696795, + "grad_norm": 1.368161025215393, + "learning_rate": 1.1372011720409927e-06, + "loss": 0.4172135591506958, + "step": 4194 + }, + { + "epoch": 0.9672584735992622, + "grad_norm": 1.6941620223477674, + "learning_rate": 1.1368235063439102e-06, + "loss": 0.5482916831970215, + "step": 4195 + }, + { + "epoch": 0.9674890477288448, + "grad_norm": 1.3508434751874687, + "learning_rate": 1.136445820758144e-06, + "loss": 0.4336891770362854, + "step": 4196 + }, + { + "epoch": 0.9677196218584275, + "grad_norm": 1.5072664158429512, + "learning_rate": 1.1360681153385956e-06, + "loss": 0.42612385749816895, + "step": 4197 + }, + { + "epoch": 0.9679501959880101, + "grad_norm": 1.5000454097568379, + "learning_rate": 1.135690390140167e-06, + "loss": 0.513736367225647, + "step": 4198 + }, + { + "epoch": 0.9681807701175928, + "grad_norm": 1.8279069537189752, + "learning_rate": 1.1353126452177656e-06, + "loss": 0.45551058650016785, + "step": 4199 + }, + { + "epoch": 0.9684113442471755, + "grad_norm": 1.3479770342549766, + "learning_rate": 1.1349348806262994e-06, + "loss": 0.45450061559677124, + "step": 4200 + }, + { + "epoch": 0.9686419183767582, + "grad_norm": 1.5942392384347237, + "learning_rate": 1.1345570964206807e-06, + "loss": 0.43962353467941284, + "step": 4201 + }, + { + "epoch": 0.9688724925063408, + "grad_norm": 1.4695533515040724, + "learning_rate": 1.1341792926558245e-06, + "loss": 0.5304821729660034, + "step": 4202 + }, + { + "epoch": 0.9691030666359235, + "grad_norm": 1.57215629996827, + "learning_rate": 1.1338014693866483e-06, + "loss": 0.6079045534133911, + "step": 4203 + }, + { + "epoch": 0.9693336407655061, + "grad_norm": 1.3451772860900804, + "learning_rate": 1.1334236266680724e-06, + "loss": 0.39895182847976685, + "step": 4204 + }, + { + "epoch": 0.9695642148950888, + "grad_norm": 1.4224201035305835, + "learning_rate": 1.1330457645550202e-06, + "loss": 0.5264945030212402, + "step": 4205 + }, + { + "epoch": 0.9697947890246714, + "grad_norm": 1.3209691457440123, + "learning_rate": 1.1326678831024178e-06, + "loss": 0.4794533848762512, + "step": 4206 + }, + { + "epoch": 0.9700253631542541, + "grad_norm": 1.472204632290126, + "learning_rate": 1.1322899823651938e-06, + "loss": 0.42917680740356445, + "step": 4207 + }, + { + "epoch": 0.9702559372838367, + "grad_norm": 1.4163025348687577, + "learning_rate": 1.1319120623982804e-06, + "loss": 0.42155951261520386, + "step": 4208 + }, + { + "epoch": 0.9704865114134195, + "grad_norm": 1.455345134423215, + "learning_rate": 1.1315341232566121e-06, + "loss": 0.5119719505310059, + "step": 4209 + }, + { + "epoch": 0.9707170855430021, + "grad_norm": 1.4441630965274395, + "learning_rate": 1.1311561649951255e-06, + "loss": 0.5261529684066772, + "step": 4210 + }, + { + "epoch": 0.9709476596725848, + "grad_norm": 1.3046857195112773, + "learning_rate": 1.1307781876687609e-06, + "loss": 0.5133010149002075, + "step": 4211 + }, + { + "epoch": 0.9711782338021674, + "grad_norm": 1.4061037707348525, + "learning_rate": 1.1304001913324617e-06, + "loss": 0.5214196443557739, + "step": 4212 + }, + { + "epoch": 0.9714088079317501, + "grad_norm": 1.4191122003483587, + "learning_rate": 1.1300221760411732e-06, + "loss": 0.4665095806121826, + "step": 4213 + }, + { + "epoch": 0.9716393820613327, + "grad_norm": 1.2917310787961995, + "learning_rate": 1.1296441418498435e-06, + "loss": 0.44912537932395935, + "step": 4214 + }, + { + "epoch": 0.9718699561909154, + "grad_norm": 1.384060094796334, + "learning_rate": 1.1292660888134241e-06, + "loss": 0.48622840642929077, + "step": 4215 + }, + { + "epoch": 0.972100530320498, + "grad_norm": 1.3952506250953003, + "learning_rate": 1.128888016986868e-06, + "loss": 0.40099745988845825, + "step": 4216 + }, + { + "epoch": 0.9723311044500808, + "grad_norm": 1.6661609433762745, + "learning_rate": 1.1285099264251331e-06, + "loss": 0.4981631934642792, + "step": 4217 + }, + { + "epoch": 0.9725616785796634, + "grad_norm": 1.3061541456837051, + "learning_rate": 1.1281318171831778e-06, + "loss": 0.3902980387210846, + "step": 4218 + }, + { + "epoch": 0.9727922527092461, + "grad_norm": 1.646940009523485, + "learning_rate": 1.1277536893159641e-06, + "loss": 0.5120723843574524, + "step": 4219 + }, + { + "epoch": 0.9730228268388287, + "grad_norm": 1.4050676349560098, + "learning_rate": 1.1273755428784568e-06, + "loss": 0.47908157110214233, + "step": 4220 + }, + { + "epoch": 0.9732534009684114, + "grad_norm": 1.3980215754858654, + "learning_rate": 1.126997377925624e-06, + "loss": 0.44935697317123413, + "step": 4221 + }, + { + "epoch": 0.973483975097994, + "grad_norm": 1.7936737063106103, + "learning_rate": 1.1266191945124345e-06, + "loss": 0.46883124113082886, + "step": 4222 + }, + { + "epoch": 0.9737145492275767, + "grad_norm": 1.3605023071963889, + "learning_rate": 1.1262409926938622e-06, + "loss": 0.41385799646377563, + "step": 4223 + }, + { + "epoch": 0.9739451233571593, + "grad_norm": 1.352097187992639, + "learning_rate": 1.1258627725248821e-06, + "loss": 0.5450118780136108, + "step": 4224 + }, + { + "epoch": 0.974175697486742, + "grad_norm": 1.3149598759310381, + "learning_rate": 1.1254845340604725e-06, + "loss": 0.4728820323944092, + "step": 4225 + }, + { + "epoch": 0.9744062716163246, + "grad_norm": 1.490906480143449, + "learning_rate": 1.1251062773556143e-06, + "loss": 0.5111296772956848, + "step": 4226 + }, + { + "epoch": 0.9746368457459073, + "grad_norm": 1.6529549144482583, + "learning_rate": 1.1247280024652908e-06, + "loss": 0.4538743793964386, + "step": 4227 + }, + { + "epoch": 0.97486741987549, + "grad_norm": 1.4130886870951611, + "learning_rate": 1.1243497094444877e-06, + "loss": 0.4917091131210327, + "step": 4228 + }, + { + "epoch": 0.9750979940050726, + "grad_norm": 1.387244231549714, + "learning_rate": 1.1239713983481945e-06, + "loss": 0.40376198291778564, + "step": 4229 + }, + { + "epoch": 0.9753285681346553, + "grad_norm": 1.4554658551428983, + "learning_rate": 1.1235930692314019e-06, + "loss": 0.5356566905975342, + "step": 4230 + }, + { + "epoch": 0.9755591422642379, + "grad_norm": 1.4359135131794967, + "learning_rate": 1.123214722149104e-06, + "loss": 0.4374624490737915, + "step": 4231 + }, + { + "epoch": 0.9757897163938206, + "grad_norm": 1.4746549529981767, + "learning_rate": 1.1228363571562976e-06, + "loss": 0.4225429594516754, + "step": 4232 + }, + { + "epoch": 0.9760202905234032, + "grad_norm": 1.4500544144002923, + "learning_rate": 1.1224579743079819e-06, + "loss": 0.5389699935913086, + "step": 4233 + }, + { + "epoch": 0.9762508646529859, + "grad_norm": 1.39848035447059, + "learning_rate": 1.1220795736591584e-06, + "loss": 0.4925463795661926, + "step": 4234 + }, + { + "epoch": 0.9764814387825685, + "grad_norm": 1.2916834361485914, + "learning_rate": 1.1217011552648315e-06, + "loss": 0.4694328308105469, + "step": 4235 + }, + { + "epoch": 0.9767120129121513, + "grad_norm": 1.377557176325016, + "learning_rate": 1.1213227191800086e-06, + "loss": 0.39887624979019165, + "step": 4236 + }, + { + "epoch": 0.9769425870417339, + "grad_norm": 1.5555659299458584, + "learning_rate": 1.120944265459699e-06, + "loss": 0.4930388927459717, + "step": 4237 + }, + { + "epoch": 0.9771731611713166, + "grad_norm": 1.2486101676760866, + "learning_rate": 1.1205657941589143e-06, + "loss": 0.4595404863357544, + "step": 4238 + }, + { + "epoch": 0.9774037353008992, + "grad_norm": 1.4574273243269236, + "learning_rate": 1.1201873053326695e-06, + "loss": 0.44177496433258057, + "step": 4239 + }, + { + "epoch": 0.9776343094304819, + "grad_norm": 1.4308970126871865, + "learning_rate": 1.119808799035982e-06, + "loss": 0.47095373272895813, + "step": 4240 + }, + { + "epoch": 0.9778648835600645, + "grad_norm": 1.4049777741841016, + "learning_rate": 1.1194302753238716e-06, + "loss": 0.4649583697319031, + "step": 4241 + }, + { + "epoch": 0.9780954576896472, + "grad_norm": 1.5269711326381101, + "learning_rate": 1.1190517342513598e-06, + "loss": 0.44815266132354736, + "step": 4242 + }, + { + "epoch": 0.9783260318192298, + "grad_norm": 1.462868793648971, + "learning_rate": 1.118673175873472e-06, + "loss": 0.4861665368080139, + "step": 4243 + }, + { + "epoch": 0.9785566059488126, + "grad_norm": 1.3395897424173215, + "learning_rate": 1.1182946002452354e-06, + "loss": 0.5196468830108643, + "step": 4244 + }, + { + "epoch": 0.9787871800783952, + "grad_norm": 1.5910002582718288, + "learning_rate": 1.11791600742168e-06, + "loss": 0.49746841192245483, + "step": 4245 + }, + { + "epoch": 0.9790177542079779, + "grad_norm": 1.2919062217717159, + "learning_rate": 1.1175373974578377e-06, + "loss": 0.4637739956378937, + "step": 4246 + }, + { + "epoch": 0.9792483283375605, + "grad_norm": 1.228394275609753, + "learning_rate": 1.1171587704087434e-06, + "loss": 0.46009692549705505, + "step": 4247 + }, + { + "epoch": 0.9794789024671432, + "grad_norm": 2.1569798034684706, + "learning_rate": 1.1167801263294346e-06, + "loss": 0.49036258459091187, + "step": 4248 + }, + { + "epoch": 0.9797094765967258, + "grad_norm": 1.395933426650918, + "learning_rate": 1.1164014652749509e-06, + "loss": 0.4730580449104309, + "step": 4249 + }, + { + "epoch": 0.9799400507263085, + "grad_norm": 1.618438538763921, + "learning_rate": 1.1160227873003345e-06, + "loss": 0.5029968023300171, + "step": 4250 + }, + { + "epoch": 0.9801706248558911, + "grad_norm": 1.4870951402562973, + "learning_rate": 1.1156440924606299e-06, + "loss": 0.5149805545806885, + "step": 4251 + }, + { + "epoch": 0.9804011989854738, + "grad_norm": 1.6248587467562292, + "learning_rate": 1.1152653808108845e-06, + "loss": 0.5017384886741638, + "step": 4252 + }, + { + "epoch": 0.9806317731150564, + "grad_norm": 1.486462967422998, + "learning_rate": 1.114886652406148e-06, + "loss": 0.47569048404693604, + "step": 4253 + }, + { + "epoch": 0.9808623472446392, + "grad_norm": 1.4476623501612873, + "learning_rate": 1.1145079073014722e-06, + "loss": 0.5127655863761902, + "step": 4254 + }, + { + "epoch": 0.9810929213742218, + "grad_norm": 1.4943063660203757, + "learning_rate": 1.1141291455519114e-06, + "loss": 0.4014360308647156, + "step": 4255 + }, + { + "epoch": 0.9813234955038045, + "grad_norm": 1.4814879590427052, + "learning_rate": 1.1137503672125228e-06, + "loss": 0.43737465143203735, + "step": 4256 + }, + { + "epoch": 0.9815540696333871, + "grad_norm": 1.413525212350489, + "learning_rate": 1.1133715723383655e-06, + "loss": 0.4389764070510864, + "step": 4257 + }, + { + "epoch": 0.9817846437629698, + "grad_norm": 1.3532173754404184, + "learning_rate": 1.112992760984501e-06, + "loss": 0.5105381608009338, + "step": 4258 + }, + { + "epoch": 0.9820152178925524, + "grad_norm": 1.4052776017835835, + "learning_rate": 1.1126139332059937e-06, + "loss": 0.4393002688884735, + "step": 4259 + }, + { + "epoch": 0.9822457920221351, + "grad_norm": 1.3179147448132482, + "learning_rate": 1.1122350890579102e-06, + "loss": 0.541419267654419, + "step": 4260 + }, + { + "epoch": 0.9824763661517177, + "grad_norm": 1.5177150542407778, + "learning_rate": 1.1118562285953186e-06, + "loss": 0.4153546094894409, + "step": 4261 + }, + { + "epoch": 0.9827069402813005, + "grad_norm": 1.4649176443917427, + "learning_rate": 1.1114773518732907e-06, + "loss": 0.5060696601867676, + "step": 4262 + }, + { + "epoch": 0.9829375144108831, + "grad_norm": 1.6266321171712574, + "learning_rate": 1.1110984589468998e-06, + "loss": 0.5975456237792969, + "step": 4263 + }, + { + "epoch": 0.9831680885404658, + "grad_norm": 1.4920078622156363, + "learning_rate": 1.110719549871222e-06, + "loss": 0.5729621648788452, + "step": 4264 + }, + { + "epoch": 0.9833986626700484, + "grad_norm": 1.3838030985279757, + "learning_rate": 1.1103406247013356e-06, + "loss": 0.3948165476322174, + "step": 4265 + }, + { + "epoch": 0.9836292367996311, + "grad_norm": 1.3893062538653607, + "learning_rate": 1.1099616834923212e-06, + "loss": 0.41744932532310486, + "step": 4266 + }, + { + "epoch": 0.9838598109292137, + "grad_norm": 1.3638196246051946, + "learning_rate": 1.1095827262992611e-06, + "loss": 0.4701330065727234, + "step": 4267 + }, + { + "epoch": 0.9840903850587964, + "grad_norm": 1.4764746527882953, + "learning_rate": 1.109203753177242e-06, + "loss": 0.4841681718826294, + "step": 4268 + }, + { + "epoch": 0.984320959188379, + "grad_norm": 1.3604414964396274, + "learning_rate": 1.10882476418135e-06, + "loss": 0.4180435538291931, + "step": 4269 + }, + { + "epoch": 0.9845515333179617, + "grad_norm": 1.4211218067668543, + "learning_rate": 1.1084457593666758e-06, + "loss": 0.39362633228302, + "step": 4270 + }, + { + "epoch": 0.9847821074475444, + "grad_norm": 1.4239354595534417, + "learning_rate": 1.1080667387883116e-06, + "loss": 0.5192993879318237, + "step": 4271 + }, + { + "epoch": 0.9850126815771271, + "grad_norm": 1.5201720088447181, + "learning_rate": 1.1076877025013517e-06, + "loss": 0.48835504055023193, + "step": 4272 + }, + { + "epoch": 0.9852432557067097, + "grad_norm": 1.5142338003412266, + "learning_rate": 1.1073086505608925e-06, + "loss": 0.44442474842071533, + "step": 4273 + }, + { + "epoch": 0.9854738298362924, + "grad_norm": 1.3436041344969518, + "learning_rate": 1.1069295830220339e-06, + "loss": 0.4544455409049988, + "step": 4274 + }, + { + "epoch": 0.985704403965875, + "grad_norm": 1.5833831369807498, + "learning_rate": 1.106550499939876e-06, + "loss": 0.482341468334198, + "step": 4275 + }, + { + "epoch": 0.9859349780954577, + "grad_norm": 1.421534858967002, + "learning_rate": 1.1061714013695236e-06, + "loss": 0.5251357555389404, + "step": 4276 + }, + { + "epoch": 0.9861655522250403, + "grad_norm": 1.2537356796939523, + "learning_rate": 1.1057922873660819e-06, + "loss": 0.4538683295249939, + "step": 4277 + }, + { + "epoch": 0.986396126354623, + "grad_norm": 2.0128553783671728, + "learning_rate": 1.105413157984659e-06, + "loss": 0.5112448930740356, + "step": 4278 + }, + { + "epoch": 0.9866267004842056, + "grad_norm": 1.4914994042257563, + "learning_rate": 1.1050340132803654e-06, + "loss": 0.48863890767097473, + "step": 4279 + }, + { + "epoch": 0.9868572746137884, + "grad_norm": 1.494741313695512, + "learning_rate": 1.1046548533083134e-06, + "loss": 0.43637439608573914, + "step": 4280 + }, + { + "epoch": 0.987087848743371, + "grad_norm": 1.5727176113962202, + "learning_rate": 1.104275678123618e-06, + "loss": 0.5231983065605164, + "step": 4281 + }, + { + "epoch": 0.9873184228729537, + "grad_norm": 1.7169447967595874, + "learning_rate": 1.1038964877813955e-06, + "loss": 0.46838122606277466, + "step": 4282 + }, + { + "epoch": 0.9875489970025363, + "grad_norm": 1.3537630033218837, + "learning_rate": 1.1035172823367658e-06, + "loss": 0.4330589473247528, + "step": 4283 + }, + { + "epoch": 0.987779571132119, + "grad_norm": 1.4178119046272273, + "learning_rate": 1.1031380618448501e-06, + "loss": 0.44962531328201294, + "step": 4284 + }, + { + "epoch": 0.9880101452617016, + "grad_norm": 1.3547255909489988, + "learning_rate": 1.1027588263607719e-06, + "loss": 0.44549795985221863, + "step": 4285 + }, + { + "epoch": 0.9882407193912843, + "grad_norm": 1.7082954293487662, + "learning_rate": 1.1023795759396568e-06, + "loss": 0.43510758876800537, + "step": 4286 + }, + { + "epoch": 0.9884712935208669, + "grad_norm": 1.3135837847563279, + "learning_rate": 1.1020003106366324e-06, + "loss": 0.4369906187057495, + "step": 4287 + }, + { + "epoch": 0.9887018676504497, + "grad_norm": 1.416650593568537, + "learning_rate": 1.1016210305068296e-06, + "loss": 0.42049574851989746, + "step": 4288 + }, + { + "epoch": 0.9889324417800323, + "grad_norm": 1.6285692706476314, + "learning_rate": 1.10124173560538e-06, + "loss": 0.449156790971756, + "step": 4289 + }, + { + "epoch": 0.989163015909615, + "grad_norm": 1.5784410678150576, + "learning_rate": 1.1008624259874177e-06, + "loss": 0.4736451506614685, + "step": 4290 + }, + { + "epoch": 0.9893935900391976, + "grad_norm": 1.3029401584123959, + "learning_rate": 1.10048310170808e-06, + "loss": 0.3988722860813141, + "step": 4291 + }, + { + "epoch": 0.9896241641687803, + "grad_norm": 1.4221756045070393, + "learning_rate": 1.100103762822505e-06, + "loss": 0.44330862164497375, + "step": 4292 + }, + { + "epoch": 0.9898547382983629, + "grad_norm": 1.5471015099626197, + "learning_rate": 1.0997244093858336e-06, + "loss": 0.5294286608695984, + "step": 4293 + }, + { + "epoch": 0.9900853124279456, + "grad_norm": 1.3808712553027187, + "learning_rate": 1.0993450414532082e-06, + "loss": 0.463120698928833, + "step": 4294 + }, + { + "epoch": 0.9903158865575282, + "grad_norm": 1.294463919332552, + "learning_rate": 1.0989656590797747e-06, + "loss": 0.4481865167617798, + "step": 4295 + }, + { + "epoch": 0.9905464606871109, + "grad_norm": 1.4153337646078945, + "learning_rate": 1.0985862623206794e-06, + "loss": 0.4467630386352539, + "step": 4296 + }, + { + "epoch": 0.9907770348166935, + "grad_norm": 1.8865527079498654, + "learning_rate": 1.0982068512310717e-06, + "loss": 0.43485027551651, + "step": 4297 + }, + { + "epoch": 0.9910076089462763, + "grad_norm": 1.5277390713389145, + "learning_rate": 1.0978274258661032e-06, + "loss": 0.4556450843811035, + "step": 4298 + }, + { + "epoch": 0.9912381830758589, + "grad_norm": 1.4768070925377026, + "learning_rate": 1.0974479862809268e-06, + "loss": 0.48326122760772705, + "step": 4299 + }, + { + "epoch": 0.9914687572054416, + "grad_norm": 1.1782147993424035, + "learning_rate": 1.097068532530698e-06, + "loss": 0.42254534363746643, + "step": 4300 + }, + { + "epoch": 0.9916993313350242, + "grad_norm": 1.3623288149981243, + "learning_rate": 1.096689064670574e-06, + "loss": 0.4076887369155884, + "step": 4301 + }, + { + "epoch": 0.9919299054646069, + "grad_norm": 1.4246737986617306, + "learning_rate": 1.0963095827557146e-06, + "loss": 0.40615612268447876, + "step": 4302 + }, + { + "epoch": 0.9921604795941895, + "grad_norm": 1.391998245639926, + "learning_rate": 1.095930086841281e-06, + "loss": 0.47794467210769653, + "step": 4303 + }, + { + "epoch": 0.9923910537237722, + "grad_norm": 1.479591301344316, + "learning_rate": 1.0955505769824375e-06, + "loss": 0.4927758574485779, + "step": 4304 + }, + { + "epoch": 0.9926216278533548, + "grad_norm": 1.1962407216416377, + "learning_rate": 1.0951710532343493e-06, + "loss": 0.40777790546417236, + "step": 4305 + }, + { + "epoch": 0.9928522019829376, + "grad_norm": 1.2781565166204398, + "learning_rate": 1.0947915156521837e-06, + "loss": 0.41996532678604126, + "step": 4306 + }, + { + "epoch": 0.9930827761125202, + "grad_norm": 1.3495931588969972, + "learning_rate": 1.0944119642911107e-06, + "loss": 0.4366680383682251, + "step": 4307 + }, + { + "epoch": 0.9933133502421029, + "grad_norm": 1.4609250216040512, + "learning_rate": 1.094032399206302e-06, + "loss": 0.5350530743598938, + "step": 4308 + }, + { + "epoch": 0.9935439243716855, + "grad_norm": 1.5545326791900604, + "learning_rate": 1.093652820452931e-06, + "loss": 0.5166209936141968, + "step": 4309 + }, + { + "epoch": 0.9937744985012682, + "grad_norm": 1.3624754056256652, + "learning_rate": 1.0932732280861734e-06, + "loss": 0.5104992389678955, + "step": 4310 + }, + { + "epoch": 0.9940050726308508, + "grad_norm": 1.293281056582964, + "learning_rate": 1.0928936221612068e-06, + "loss": 0.38249820470809937, + "step": 4311 + }, + { + "epoch": 0.9942356467604335, + "grad_norm": 1.5718744647134053, + "learning_rate": 1.0925140027332107e-06, + "loss": 0.4930746555328369, + "step": 4312 + }, + { + "epoch": 0.9944662208900161, + "grad_norm": 1.5006868919231642, + "learning_rate": 1.092134369857367e-06, + "loss": 0.46536654233932495, + "step": 4313 + }, + { + "epoch": 0.9946967950195988, + "grad_norm": 1.5384946564391833, + "learning_rate": 1.0917547235888582e-06, + "loss": 0.4591559171676636, + "step": 4314 + }, + { + "epoch": 0.9949273691491815, + "grad_norm": 1.609102883203802, + "learning_rate": 1.0913750639828709e-06, + "loss": 0.5034719705581665, + "step": 4315 + }, + { + "epoch": 0.9951579432787642, + "grad_norm": 1.3461654572756176, + "learning_rate": 1.0909953910945921e-06, + "loss": 0.5289135575294495, + "step": 4316 + }, + { + "epoch": 0.9953885174083468, + "grad_norm": 1.5181970245510374, + "learning_rate": 1.090615704979211e-06, + "loss": 0.48736900091171265, + "step": 4317 + }, + { + "epoch": 0.9956190915379295, + "grad_norm": 1.347314123709775, + "learning_rate": 1.0902360056919186e-06, + "loss": 0.44812899827957153, + "step": 4318 + }, + { + "epoch": 0.9958496656675121, + "grad_norm": 1.717313100956624, + "learning_rate": 1.0898562932879083e-06, + "loss": 0.42837953567504883, + "step": 4319 + }, + { + "epoch": 0.9960802397970948, + "grad_norm": 1.3616068420969312, + "learning_rate": 1.089476567822375e-06, + "loss": 0.4946538805961609, + "step": 4320 + }, + { + "epoch": 0.9963108139266774, + "grad_norm": 1.3738772638549184, + "learning_rate": 1.089096829350516e-06, + "loss": 0.472694993019104, + "step": 4321 + }, + { + "epoch": 0.9965413880562601, + "grad_norm": 1.51102718471871, + "learning_rate": 1.0887170779275297e-06, + "loss": 0.546560525894165, + "step": 4322 + }, + { + "epoch": 0.9967719621858427, + "grad_norm": 1.7144585803126207, + "learning_rate": 1.088337313608617e-06, + "loss": 0.5098580718040466, + "step": 4323 + }, + { + "epoch": 0.9970025363154255, + "grad_norm": 1.4511718916783138, + "learning_rate": 1.0879575364489807e-06, + "loss": 0.4127371907234192, + "step": 4324 + }, + { + "epoch": 0.9972331104450081, + "grad_norm": 1.361622993253284, + "learning_rate": 1.0875777465038249e-06, + "loss": 0.41234201192855835, + "step": 4325 + }, + { + "epoch": 0.9974636845745908, + "grad_norm": 1.334187068919988, + "learning_rate": 1.087197943828356e-06, + "loss": 0.42657697200775146, + "step": 4326 + }, + { + "epoch": 0.9976942587041734, + "grad_norm": 1.5731685077464828, + "learning_rate": 1.0868181284777825e-06, + "loss": 0.5168975591659546, + "step": 4327 + }, + { + "epoch": 0.9979248328337561, + "grad_norm": 1.3417267376651396, + "learning_rate": 1.0864383005073142e-06, + "loss": 0.4712294340133667, + "step": 4328 + }, + { + "epoch": 0.9981554069633387, + "grad_norm": 1.514146578387226, + "learning_rate": 1.0860584599721624e-06, + "loss": 0.4685649871826172, + "step": 4329 + }, + { + "epoch": 0.9983859810929214, + "grad_norm": 1.4104009699586146, + "learning_rate": 1.0856786069275417e-06, + "loss": 0.4699268937110901, + "step": 4330 + }, + { + "epoch": 0.998616555222504, + "grad_norm": 1.5072273981885642, + "learning_rate": 1.0852987414286669e-06, + "loss": 0.44216299057006836, + "step": 4331 + }, + { + "epoch": 0.9988471293520867, + "grad_norm": 1.489870947647978, + "learning_rate": 1.0849188635307558e-06, + "loss": 0.4374035894870758, + "step": 4332 + }, + { + "epoch": 0.9990777034816694, + "grad_norm": 1.396380314188184, + "learning_rate": 1.0845389732890269e-06, + "loss": 0.4538502097129822, + "step": 4333 + }, + { + "epoch": 0.9993082776112521, + "grad_norm": 1.5201233043344708, + "learning_rate": 1.0841590707587017e-06, + "loss": 0.4432523250579834, + "step": 4334 + }, + { + "epoch": 0.9995388517408347, + "grad_norm": 1.3401246835224159, + "learning_rate": 1.0837791559950026e-06, + "loss": 0.3614054322242737, + "step": 4335 + }, + { + "epoch": 0.9997694258704174, + "grad_norm": 1.5241184734301618, + "learning_rate": 1.0833992290531542e-06, + "loss": 0.5412651300430298, + "step": 4336 + }, + { + "epoch": 1.0, + "grad_norm": 1.3961487739465548, + "learning_rate": 1.0830192899883825e-06, + "loss": 0.43333327770233154, + "step": 4337 + }, + { + "epoch": 1.0002305741295827, + "grad_norm": 1.3739097269887006, + "learning_rate": 1.0826393388559156e-06, + "loss": 0.40433377027511597, + "step": 4338 + }, + { + "epoch": 1.0004611482591652, + "grad_norm": 1.5246903566917884, + "learning_rate": 1.0822593757109835e-06, + "loss": 0.49699902534484863, + "step": 4339 + }, + { + "epoch": 1.000691722388748, + "grad_norm": 1.4093275236950669, + "learning_rate": 1.0818794006088174e-06, + "loss": 0.4992629289627075, + "step": 4340 + }, + { + "epoch": 1.0009222965183306, + "grad_norm": 1.546985643456235, + "learning_rate": 1.0814994136046503e-06, + "loss": 0.39532744884490967, + "step": 4341 + }, + { + "epoch": 1.0011528706479134, + "grad_norm": 1.4715614082094945, + "learning_rate": 1.0811194147537177e-06, + "loss": 0.48260024189949036, + "step": 4342 + }, + { + "epoch": 1.0013834447774959, + "grad_norm": 1.1813818983438111, + "learning_rate": 1.0807394041112562e-06, + "loss": 0.40896737575531006, + "step": 4343 + }, + { + "epoch": 1.0016140189070786, + "grad_norm": 1.373003199387245, + "learning_rate": 1.0803593817325037e-06, + "loss": 0.361757755279541, + "step": 4344 + }, + { + "epoch": 1.0018445930366613, + "grad_norm": 1.3113582417275997, + "learning_rate": 1.0799793476727006e-06, + "loss": 0.5524640083312988, + "step": 4345 + }, + { + "epoch": 1.002075167166244, + "grad_norm": 1.4504745740569693, + "learning_rate": 1.0795993019870891e-06, + "loss": 0.4798622727394104, + "step": 4346 + }, + { + "epoch": 1.0023057412958265, + "grad_norm": 1.1125620580650875, + "learning_rate": 1.079219244730912e-06, + "loss": 0.3408532440662384, + "step": 4347 + }, + { + "epoch": 1.0025363154254092, + "grad_norm": 1.6198320758392701, + "learning_rate": 1.0788391759594152e-06, + "loss": 0.4185452461242676, + "step": 4348 + }, + { + "epoch": 1.002766889554992, + "grad_norm": 1.4569047754589481, + "learning_rate": 1.078459095727845e-06, + "loss": 0.4656596779823303, + "step": 4349 + }, + { + "epoch": 1.0029974636845747, + "grad_norm": 1.2861299587948707, + "learning_rate": 1.07807900409145e-06, + "loss": 0.45649081468582153, + "step": 4350 + }, + { + "epoch": 1.0032280378141571, + "grad_norm": 1.4368410869138808, + "learning_rate": 1.0776989011054806e-06, + "loss": 0.4732903242111206, + "step": 4351 + }, + { + "epoch": 1.0034586119437399, + "grad_norm": 1.4875640347613817, + "learning_rate": 1.0773187868251882e-06, + "loss": 0.5313757658004761, + "step": 4352 + }, + { + "epoch": 1.0036891860733226, + "grad_norm": 1.7663418153227872, + "learning_rate": 1.0769386613058267e-06, + "loss": 0.5373719334602356, + "step": 4353 + }, + { + "epoch": 1.0039197602029053, + "grad_norm": 1.4108655227977445, + "learning_rate": 1.076558524602651e-06, + "loss": 0.4530528783798218, + "step": 4354 + }, + { + "epoch": 1.0041503343324878, + "grad_norm": 2.0172927781638816, + "learning_rate": 1.076178376770918e-06, + "loss": 0.361511766910553, + "step": 4355 + }, + { + "epoch": 1.0043809084620705, + "grad_norm": 1.5430566364369291, + "learning_rate": 1.0757982178658857e-06, + "loss": 0.4260486364364624, + "step": 4356 + }, + { + "epoch": 1.0046114825916532, + "grad_norm": 1.4352564218347874, + "learning_rate": 1.0754180479428142e-06, + "loss": 0.4765712320804596, + "step": 4357 + }, + { + "epoch": 1.004842056721236, + "grad_norm": 1.408849526827852, + "learning_rate": 1.0750378670569652e-06, + "loss": 0.485443115234375, + "step": 4358 + }, + { + "epoch": 1.0050726308508184, + "grad_norm": 1.3833154190721015, + "learning_rate": 1.074657675263602e-06, + "loss": 0.5010418891906738, + "step": 4359 + }, + { + "epoch": 1.0053032049804012, + "grad_norm": 1.2138138176978153, + "learning_rate": 1.074277472617989e-06, + "loss": 0.42195719480514526, + "step": 4360 + }, + { + "epoch": 1.0055337791099839, + "grad_norm": 1.4341592826356415, + "learning_rate": 1.073897259175392e-06, + "loss": 0.48555606603622437, + "step": 4361 + }, + { + "epoch": 1.0057643532395666, + "grad_norm": 1.4030257216310642, + "learning_rate": 1.07351703499108e-06, + "loss": 0.4991112947463989, + "step": 4362 + }, + { + "epoch": 1.005994927369149, + "grad_norm": 1.365972754336138, + "learning_rate": 1.0731368001203217e-06, + "loss": 0.43016430735588074, + "step": 4363 + }, + { + "epoch": 1.0062255014987318, + "grad_norm": 1.635861674358112, + "learning_rate": 1.0727565546183883e-06, + "loss": 0.47147876024246216, + "step": 4364 + }, + { + "epoch": 1.0064560756283145, + "grad_norm": 1.4724107461573035, + "learning_rate": 1.0723762985405522e-06, + "loss": 0.4695407748222351, + "step": 4365 + }, + { + "epoch": 1.0066866497578972, + "grad_norm": 1.4167512288976294, + "learning_rate": 1.0719960319420878e-06, + "loss": 0.42666512727737427, + "step": 4366 + }, + { + "epoch": 1.0069172238874797, + "grad_norm": 1.4965231034133355, + "learning_rate": 1.0716157548782705e-06, + "loss": 0.5685237050056458, + "step": 4367 + }, + { + "epoch": 1.0071477980170624, + "grad_norm": 1.2856237164503312, + "learning_rate": 1.0712354674043774e-06, + "loss": 0.45181894302368164, + "step": 4368 + }, + { + "epoch": 1.0073783721466452, + "grad_norm": 1.479568259964695, + "learning_rate": 1.070855169575687e-06, + "loss": 0.4079795479774475, + "step": 4369 + }, + { + "epoch": 1.0076089462762279, + "grad_norm": 1.196685278300245, + "learning_rate": 1.0704748614474798e-06, + "loss": 0.4011094570159912, + "step": 4370 + }, + { + "epoch": 1.0078395204058104, + "grad_norm": 1.5280378960817975, + "learning_rate": 1.0700945430750373e-06, + "loss": 0.48842671513557434, + "step": 4371 + }, + { + "epoch": 1.008070094535393, + "grad_norm": 1.237232307792151, + "learning_rate": 1.0697142145136425e-06, + "loss": 0.5183907151222229, + "step": 4372 + }, + { + "epoch": 1.0083006686649758, + "grad_norm": 1.4080736997180416, + "learning_rate": 1.0693338758185797e-06, + "loss": 0.5022784471511841, + "step": 4373 + }, + { + "epoch": 1.0085312427945585, + "grad_norm": 1.5160750764739457, + "learning_rate": 1.0689535270451358e-06, + "loss": 0.500054121017456, + "step": 4374 + }, + { + "epoch": 1.008761816924141, + "grad_norm": 1.331407944528498, + "learning_rate": 1.068573168248598e-06, + "loss": 0.43674880266189575, + "step": 4375 + }, + { + "epoch": 1.0089923910537237, + "grad_norm": 1.3441260000045296, + "learning_rate": 1.068192799484255e-06, + "loss": 0.4272059202194214, + "step": 4376 + }, + { + "epoch": 1.0092229651833065, + "grad_norm": 1.3188087584834265, + "learning_rate": 1.0678124208073972e-06, + "loss": 0.41053932905197144, + "step": 4377 + }, + { + "epoch": 1.0094535393128892, + "grad_norm": 1.3285405544041065, + "learning_rate": 1.0674320322733173e-06, + "loss": 0.4571593701839447, + "step": 4378 + }, + { + "epoch": 1.0096841134424717, + "grad_norm": 1.2947195973212757, + "learning_rate": 1.0670516339373081e-06, + "loss": 0.464965283870697, + "step": 4379 + }, + { + "epoch": 1.0099146875720544, + "grad_norm": 1.2757697611295247, + "learning_rate": 1.0666712258546639e-06, + "loss": 0.4086726903915405, + "step": 4380 + }, + { + "epoch": 1.010145261701637, + "grad_norm": 1.3664230084580502, + "learning_rate": 1.0662908080806815e-06, + "loss": 0.49988412857055664, + "step": 4381 + }, + { + "epoch": 1.0103758358312198, + "grad_norm": 1.33263070405775, + "learning_rate": 1.0659103806706587e-06, + "loss": 0.3976360559463501, + "step": 4382 + }, + { + "epoch": 1.0106064099608023, + "grad_norm": 1.3554444243435904, + "learning_rate": 1.065529943679894e-06, + "loss": 0.4500683546066284, + "step": 4383 + }, + { + "epoch": 1.010836984090385, + "grad_norm": 1.4532099828866123, + "learning_rate": 1.0651494971636875e-06, + "loss": 0.5617754459381104, + "step": 4384 + }, + { + "epoch": 1.0110675582199677, + "grad_norm": 1.2285766706051995, + "learning_rate": 1.0647690411773414e-06, + "loss": 0.4180886745452881, + "step": 4385 + }, + { + "epoch": 1.0112981323495505, + "grad_norm": 1.3797895213155087, + "learning_rate": 1.0643885757761588e-06, + "loss": 0.406663179397583, + "step": 4386 + }, + { + "epoch": 1.011528706479133, + "grad_norm": 1.2899676326462104, + "learning_rate": 1.0640081010154443e-06, + "loss": 0.4698946475982666, + "step": 4387 + }, + { + "epoch": 1.0117592806087157, + "grad_norm": 1.2421672055806043, + "learning_rate": 1.0636276169505034e-06, + "loss": 0.4845995306968689, + "step": 4388 + }, + { + "epoch": 1.0119898547382984, + "grad_norm": 1.7127723444190444, + "learning_rate": 1.0632471236366435e-06, + "loss": 0.5065066814422607, + "step": 4389 + }, + { + "epoch": 1.012220428867881, + "grad_norm": 1.5183614166838566, + "learning_rate": 1.0628666211291735e-06, + "loss": 0.4302946925163269, + "step": 4390 + }, + { + "epoch": 1.0124510029974636, + "grad_norm": 1.682116735922279, + "learning_rate": 1.0624861094834029e-06, + "loss": 0.5772345066070557, + "step": 4391 + }, + { + "epoch": 1.0126815771270463, + "grad_norm": 1.3399536785573158, + "learning_rate": 1.0621055887546425e-06, + "loss": 0.5294336080551147, + "step": 4392 + }, + { + "epoch": 1.012912151256629, + "grad_norm": 1.1967430772955985, + "learning_rate": 1.0617250589982059e-06, + "loss": 0.5028249621391296, + "step": 4393 + }, + { + "epoch": 1.0131427253862118, + "grad_norm": 1.3120231857267954, + "learning_rate": 1.0613445202694065e-06, + "loss": 0.5072348713874817, + "step": 4394 + }, + { + "epoch": 1.0133732995157942, + "grad_norm": 1.3107230472369709, + "learning_rate": 1.060963972623559e-06, + "loss": 0.3632262945175171, + "step": 4395 + }, + { + "epoch": 1.013603873645377, + "grad_norm": 1.4739700660925632, + "learning_rate": 1.06058341611598e-06, + "loss": 0.419277161359787, + "step": 4396 + }, + { + "epoch": 1.0138344477749597, + "grad_norm": 1.4201089967708693, + "learning_rate": 1.060202850801988e-06, + "loss": 0.4056069850921631, + "step": 4397 + }, + { + "epoch": 1.0140650219045424, + "grad_norm": 1.4908298419223913, + "learning_rate": 1.0598222767369014e-06, + "loss": 0.5591505765914917, + "step": 4398 + }, + { + "epoch": 1.014295596034125, + "grad_norm": 1.2646885984398546, + "learning_rate": 1.0594416939760408e-06, + "loss": 0.38529443740844727, + "step": 4399 + }, + { + "epoch": 1.0145261701637076, + "grad_norm": 1.3255980825912217, + "learning_rate": 1.0590611025747272e-06, + "loss": 0.3609437644481659, + "step": 4400 + }, + { + "epoch": 1.0147567442932903, + "grad_norm": 1.3538282738769345, + "learning_rate": 1.058680502588284e-06, + "loss": 0.4849050045013428, + "step": 4401 + }, + { + "epoch": 1.014987318422873, + "grad_norm": 1.4516377120705455, + "learning_rate": 1.058299894072035e-06, + "loss": 0.39454251527786255, + "step": 4402 + }, + { + "epoch": 1.0152178925524555, + "grad_norm": 1.5578248119945644, + "learning_rate": 1.0579192770813052e-06, + "loss": 0.39726459980010986, + "step": 4403 + }, + { + "epoch": 1.0154484666820383, + "grad_norm": 1.4398814364290877, + "learning_rate": 1.0575386516714218e-06, + "loss": 0.4730626940727234, + "step": 4404 + }, + { + "epoch": 1.015679040811621, + "grad_norm": 1.5842749126492264, + "learning_rate": 1.0571580178977123e-06, + "loss": 0.5436214804649353, + "step": 4405 + }, + { + "epoch": 1.0159096149412037, + "grad_norm": 1.4188700773135285, + "learning_rate": 1.0567773758155055e-06, + "loss": 0.4197273850440979, + "step": 4406 + }, + { + "epoch": 1.0161401890707862, + "grad_norm": 1.2873423308659837, + "learning_rate": 1.0563967254801316e-06, + "loss": 0.46460944414138794, + "step": 4407 + }, + { + "epoch": 1.016370763200369, + "grad_norm": 1.3771325056314752, + "learning_rate": 1.056016066946922e-06, + "loss": 0.3504630923271179, + "step": 4408 + }, + { + "epoch": 1.0166013373299516, + "grad_norm": 1.3484234762530152, + "learning_rate": 1.0556354002712098e-06, + "loss": 0.4620180130004883, + "step": 4409 + }, + { + "epoch": 1.0168319114595343, + "grad_norm": 1.414975730602458, + "learning_rate": 1.0552547255083283e-06, + "loss": 0.5642764568328857, + "step": 4410 + }, + { + "epoch": 1.0170624855891168, + "grad_norm": 1.3858649703726607, + "learning_rate": 1.054874042713612e-06, + "loss": 0.48283201456069946, + "step": 4411 + }, + { + "epoch": 1.0172930597186995, + "grad_norm": 1.3477248933257546, + "learning_rate": 1.0544933519423976e-06, + "loss": 0.5346091985702515, + "step": 4412 + }, + { + "epoch": 1.0175236338482823, + "grad_norm": 1.216774984460132, + "learning_rate": 1.0541126532500224e-06, + "loss": 0.4710259437561035, + "step": 4413 + }, + { + "epoch": 1.017754207977865, + "grad_norm": 1.6611025915045114, + "learning_rate": 1.0537319466918243e-06, + "loss": 0.535955548286438, + "step": 4414 + }, + { + "epoch": 1.0179847821074475, + "grad_norm": 1.298601209078171, + "learning_rate": 1.0533512323231438e-06, + "loss": 0.4127902388572693, + "step": 4415 + }, + { + "epoch": 1.0182153562370302, + "grad_norm": 1.6222892430544704, + "learning_rate": 1.0529705101993203e-06, + "loss": 0.5209894180297852, + "step": 4416 + }, + { + "epoch": 1.018445930366613, + "grad_norm": 1.5702821211846574, + "learning_rate": 1.0525897803756967e-06, + "loss": 0.45600390434265137, + "step": 4417 + }, + { + "epoch": 1.0186765044961956, + "grad_norm": 1.6858904509627837, + "learning_rate": 1.0522090429076155e-06, + "loss": 0.5043426156044006, + "step": 4418 + }, + { + "epoch": 1.0189070786257781, + "grad_norm": 1.8442717417612486, + "learning_rate": 1.0518282978504207e-06, + "loss": 0.43386173248291016, + "step": 4419 + }, + { + "epoch": 1.0191376527553608, + "grad_norm": 1.4810433748538916, + "learning_rate": 1.0514475452594578e-06, + "loss": 0.44956767559051514, + "step": 4420 + }, + { + "epoch": 1.0193682268849436, + "grad_norm": 1.4162663845873593, + "learning_rate": 1.0510667851900726e-06, + "loss": 0.47164878249168396, + "step": 4421 + }, + { + "epoch": 1.0195988010145263, + "grad_norm": 1.3111398742961289, + "learning_rate": 1.0506860176976127e-06, + "loss": 0.4977136552333832, + "step": 4422 + }, + { + "epoch": 1.0198293751441088, + "grad_norm": 1.2272027402421368, + "learning_rate": 1.0503052428374264e-06, + "loss": 0.4344305396080017, + "step": 4423 + }, + { + "epoch": 1.0200599492736915, + "grad_norm": 1.4594484344103595, + "learning_rate": 1.049924460664863e-06, + "loss": 0.46536487340927124, + "step": 4424 + }, + { + "epoch": 1.0202905234032742, + "grad_norm": 1.5676489928965973, + "learning_rate": 1.0495436712352733e-06, + "loss": 0.4583844840526581, + "step": 4425 + }, + { + "epoch": 1.020521097532857, + "grad_norm": 1.3353943490467204, + "learning_rate": 1.049162874604009e-06, + "loss": 0.4098002314567566, + "step": 4426 + }, + { + "epoch": 1.0207516716624394, + "grad_norm": 1.5212892459953231, + "learning_rate": 1.0487820708264227e-06, + "loss": 0.48168665170669556, + "step": 4427 + }, + { + "epoch": 1.0209822457920221, + "grad_norm": 1.575752706874104, + "learning_rate": 1.048401259957868e-06, + "loss": 0.5517562627792358, + "step": 4428 + }, + { + "epoch": 1.0212128199216048, + "grad_norm": 1.4762864972879257, + "learning_rate": 1.0480204420536998e-06, + "loss": 0.5131476521492004, + "step": 4429 + }, + { + "epoch": 1.0214433940511876, + "grad_norm": 1.3669237261259728, + "learning_rate": 1.0476396171692734e-06, + "loss": 0.4590519666671753, + "step": 4430 + }, + { + "epoch": 1.02167396818077, + "grad_norm": 1.6209541549743127, + "learning_rate": 1.0472587853599458e-06, + "loss": 0.5581461191177368, + "step": 4431 + }, + { + "epoch": 1.0219045423103528, + "grad_norm": 1.9464318549736228, + "learning_rate": 1.046877946681075e-06, + "loss": 0.4169657826423645, + "step": 4432 + }, + { + "epoch": 1.0221351164399355, + "grad_norm": 1.6990409231148407, + "learning_rate": 1.0464971011880195e-06, + "loss": 0.48135459423065186, + "step": 4433 + }, + { + "epoch": 1.0223656905695182, + "grad_norm": 1.5888684830629844, + "learning_rate": 1.046116248936139e-06, + "loss": 0.5116040706634521, + "step": 4434 + }, + { + "epoch": 1.0225962646991007, + "grad_norm": 1.2239425777755701, + "learning_rate": 1.0457353899807946e-06, + "loss": 0.4369809329509735, + "step": 4435 + }, + { + "epoch": 1.0228268388286834, + "grad_norm": 1.3094581394180187, + "learning_rate": 1.0453545243773474e-06, + "loss": 0.42936772108078003, + "step": 4436 + }, + { + "epoch": 1.0230574129582661, + "grad_norm": 1.4191745941139933, + "learning_rate": 1.0449736521811605e-06, + "loss": 0.3614712357521057, + "step": 4437 + }, + { + "epoch": 1.0232879870878488, + "grad_norm": 1.4958077731615864, + "learning_rate": 1.0445927734475977e-06, + "loss": 0.40728119015693665, + "step": 4438 + }, + { + "epoch": 1.0235185612174313, + "grad_norm": 1.6199665099354292, + "learning_rate": 1.0442118882320233e-06, + "loss": 0.4940561056137085, + "step": 4439 + }, + { + "epoch": 1.023749135347014, + "grad_norm": 1.5292135898443935, + "learning_rate": 1.0438309965898027e-06, + "loss": 0.49529674649238586, + "step": 4440 + }, + { + "epoch": 1.0239797094765968, + "grad_norm": 1.3839632419664316, + "learning_rate": 1.0434500985763027e-06, + "loss": 0.4849408268928528, + "step": 4441 + }, + { + "epoch": 1.0242102836061795, + "grad_norm": 1.2306090654878221, + "learning_rate": 1.0430691942468903e-06, + "loss": 0.4121132791042328, + "step": 4442 + }, + { + "epoch": 1.024440857735762, + "grad_norm": 1.3788405992777184, + "learning_rate": 1.042688283656934e-06, + "loss": 0.4348478317260742, + "step": 4443 + }, + { + "epoch": 1.0246714318653447, + "grad_norm": 1.4946594419770094, + "learning_rate": 1.0423073668618033e-06, + "loss": 0.46817919611930847, + "step": 4444 + }, + { + "epoch": 1.0249020059949274, + "grad_norm": 1.4309128927667782, + "learning_rate": 1.041926443916868e-06, + "loss": 0.4422008991241455, + "step": 4445 + }, + { + "epoch": 1.02513258012451, + "grad_norm": 1.4766353003575698, + "learning_rate": 1.041545514877499e-06, + "loss": 0.5108183026313782, + "step": 4446 + }, + { + "epoch": 1.0253631542540926, + "grad_norm": 1.4287581583003561, + "learning_rate": 1.0411645797990685e-06, + "loss": 0.4759529232978821, + "step": 4447 + }, + { + "epoch": 1.0255937283836754, + "grad_norm": 1.4822019265627726, + "learning_rate": 1.040783638736949e-06, + "loss": 0.44447648525238037, + "step": 4448 + }, + { + "epoch": 1.025824302513258, + "grad_norm": 1.9820121270715096, + "learning_rate": 1.0404026917465144e-06, + "loss": 0.4558752477169037, + "step": 4449 + }, + { + "epoch": 1.0260548766428408, + "grad_norm": 1.5117188074263472, + "learning_rate": 1.0400217388831393e-06, + "loss": 0.4728459417819977, + "step": 4450 + }, + { + "epoch": 1.0262854507724233, + "grad_norm": 1.2832295949174854, + "learning_rate": 1.0396407802021985e-06, + "loss": 0.4815519452095032, + "step": 4451 + }, + { + "epoch": 1.026516024902006, + "grad_norm": 1.493224641636315, + "learning_rate": 1.0392598157590685e-06, + "loss": 0.5173656344413757, + "step": 4452 + }, + { + "epoch": 1.0267465990315887, + "grad_norm": 1.389267472286255, + "learning_rate": 1.0388788456091267e-06, + "loss": 0.5280762910842896, + "step": 4453 + }, + { + "epoch": 1.0269771731611712, + "grad_norm": 1.3239342530675255, + "learning_rate": 1.0384978698077506e-06, + "loss": 0.4524118900299072, + "step": 4454 + }, + { + "epoch": 1.027207747290754, + "grad_norm": 1.3855017021962426, + "learning_rate": 1.0381168884103186e-06, + "loss": 0.4011715054512024, + "step": 4455 + }, + { + "epoch": 1.0274383214203366, + "grad_norm": 1.6664926632341406, + "learning_rate": 1.0377359014722108e-06, + "loss": 0.518020749092102, + "step": 4456 + }, + { + "epoch": 1.0276688955499194, + "grad_norm": 1.3443799803410221, + "learning_rate": 1.0373549090488073e-06, + "loss": 0.44726112484931946, + "step": 4457 + }, + { + "epoch": 1.0278994696795019, + "grad_norm": 1.5697915792497608, + "learning_rate": 1.0369739111954894e-06, + "loss": 0.5344264507293701, + "step": 4458 + }, + { + "epoch": 1.0281300438090846, + "grad_norm": 1.3300732692572412, + "learning_rate": 1.0365929079676387e-06, + "loss": 0.4902813732624054, + "step": 4459 + }, + { + "epoch": 1.0283606179386673, + "grad_norm": 1.6676294678142136, + "learning_rate": 1.0362118994206378e-06, + "loss": 0.38346555829048157, + "step": 4460 + }, + { + "epoch": 1.02859119206825, + "grad_norm": 1.4992112279059755, + "learning_rate": 1.0358308856098705e-06, + "loss": 0.4232872724533081, + "step": 4461 + }, + { + "epoch": 1.0288217661978325, + "grad_norm": 1.4973168899301483, + "learning_rate": 1.0354498665907207e-06, + "loss": 0.5184470415115356, + "step": 4462 + }, + { + "epoch": 1.0290523403274152, + "grad_norm": 1.3344202325848402, + "learning_rate": 1.0350688424185733e-06, + "loss": 0.4989054203033447, + "step": 4463 + }, + { + "epoch": 1.029282914456998, + "grad_norm": 1.4348006325476266, + "learning_rate": 1.0346878131488145e-06, + "loss": 0.5204064249992371, + "step": 4464 + }, + { + "epoch": 1.0295134885865806, + "grad_norm": 1.5066284997527284, + "learning_rate": 1.0343067788368307e-06, + "loss": 0.47872811555862427, + "step": 4465 + }, + { + "epoch": 1.0297440627161631, + "grad_norm": 1.4195028916227292, + "learning_rate": 1.0339257395380087e-06, + "loss": 0.4104915261268616, + "step": 4466 + }, + { + "epoch": 1.0299746368457459, + "grad_norm": 1.3696214178005537, + "learning_rate": 1.0335446953077366e-06, + "loss": 0.39327263832092285, + "step": 4467 + }, + { + "epoch": 1.0302052109753286, + "grad_norm": 1.4702497550106948, + "learning_rate": 1.033163646201403e-06, + "loss": 0.4395657777786255, + "step": 4468 + }, + { + "epoch": 1.0304357851049113, + "grad_norm": 1.419425725268843, + "learning_rate": 1.0327825922743976e-06, + "loss": 0.462537944316864, + "step": 4469 + }, + { + "epoch": 1.0306663592344938, + "grad_norm": 1.3686105119540095, + "learning_rate": 1.03240153358211e-06, + "loss": 0.4399976134300232, + "step": 4470 + }, + { + "epoch": 1.0308969333640765, + "grad_norm": 1.2004518913155955, + "learning_rate": 1.0320204701799311e-06, + "loss": 0.4289684593677521, + "step": 4471 + }, + { + "epoch": 1.0311275074936592, + "grad_norm": 1.700414177665105, + "learning_rate": 1.0316394021232524e-06, + "loss": 0.4771305322647095, + "step": 4472 + }, + { + "epoch": 1.031358081623242, + "grad_norm": 1.3381367861828992, + "learning_rate": 1.031258329467466e-06, + "loss": 0.4544849395751953, + "step": 4473 + }, + { + "epoch": 1.0315886557528244, + "grad_norm": 1.7319531178301495, + "learning_rate": 1.0308772522679646e-06, + "loss": 0.5362099409103394, + "step": 4474 + }, + { + "epoch": 1.0318192298824072, + "grad_norm": 1.564907240947497, + "learning_rate": 1.0304961705801413e-06, + "loss": 0.48966753482818604, + "step": 4475 + }, + { + "epoch": 1.0320498040119899, + "grad_norm": 1.379783010020372, + "learning_rate": 1.0301150844593908e-06, + "loss": 0.3750344216823578, + "step": 4476 + }, + { + "epoch": 1.0322803781415726, + "grad_norm": 1.3651499470494945, + "learning_rate": 1.0297339939611076e-06, + "loss": 0.453983873128891, + "step": 4477 + }, + { + "epoch": 1.032510952271155, + "grad_norm": 1.837467998410361, + "learning_rate": 1.029352899140687e-06, + "loss": 0.5096027255058289, + "step": 4478 + }, + { + "epoch": 1.0327415264007378, + "grad_norm": 1.395622916901131, + "learning_rate": 1.028971800053525e-06, + "loss": 0.4387558698654175, + "step": 4479 + }, + { + "epoch": 1.0329721005303205, + "grad_norm": 1.324708629656248, + "learning_rate": 1.0285906967550184e-06, + "loss": 0.45710843801498413, + "step": 4480 + }, + { + "epoch": 1.0332026746599032, + "grad_norm": 1.631576144246761, + "learning_rate": 1.0282095893005643e-06, + "loss": 0.5258994102478027, + "step": 4481 + }, + { + "epoch": 1.0334332487894857, + "grad_norm": 1.320456527047697, + "learning_rate": 1.0278284777455603e-06, + "loss": 0.5037236213684082, + "step": 4482 + }, + { + "epoch": 1.0336638229190684, + "grad_norm": 1.3671446032683054, + "learning_rate": 1.027447362145405e-06, + "loss": 0.4730300307273865, + "step": 4483 + }, + { + "epoch": 1.0338943970486512, + "grad_norm": 1.5284074958618745, + "learning_rate": 1.0270662425554974e-06, + "loss": 0.4373326301574707, + "step": 4484 + }, + { + "epoch": 1.0341249711782339, + "grad_norm": 1.379045843622324, + "learning_rate": 1.0266851190312373e-06, + "loss": 0.3915579319000244, + "step": 4485 + }, + { + "epoch": 1.0343555453078164, + "grad_norm": 1.3482794503547837, + "learning_rate": 1.0263039916280247e-06, + "loss": 0.36588191986083984, + "step": 4486 + }, + { + "epoch": 1.034586119437399, + "grad_norm": 1.2333606023937755, + "learning_rate": 1.0259228604012602e-06, + "loss": 0.4287286400794983, + "step": 4487 + }, + { + "epoch": 1.0348166935669818, + "grad_norm": 1.3775270616642934, + "learning_rate": 1.0255417254063454e-06, + "loss": 0.4405861496925354, + "step": 4488 + }, + { + "epoch": 1.0350472676965645, + "grad_norm": 1.443831892269548, + "learning_rate": 1.0251605866986818e-06, + "loss": 0.4859738349914551, + "step": 4489 + }, + { + "epoch": 1.035277841826147, + "grad_norm": 1.4103288990509777, + "learning_rate": 1.0247794443336722e-06, + "loss": 0.40879446268081665, + "step": 4490 + }, + { + "epoch": 1.0355084159557297, + "grad_norm": 1.4900612923986292, + "learning_rate": 1.024398298366719e-06, + "loss": 0.44872337579727173, + "step": 4491 + }, + { + "epoch": 1.0357389900853124, + "grad_norm": 1.3707597883324278, + "learning_rate": 1.0240171488532258e-06, + "loss": 0.41155117750167847, + "step": 4492 + }, + { + "epoch": 1.0359695642148952, + "grad_norm": 1.4935319402234073, + "learning_rate": 1.0236359958485966e-06, + "loss": 0.48941487073898315, + "step": 4493 + }, + { + "epoch": 1.0362001383444777, + "grad_norm": 1.3889526979110256, + "learning_rate": 1.0232548394082362e-06, + "loss": 0.4462544322013855, + "step": 4494 + }, + { + "epoch": 1.0364307124740604, + "grad_norm": 1.7635931454030804, + "learning_rate": 1.0228736795875487e-06, + "loss": 0.3791837692260742, + "step": 4495 + }, + { + "epoch": 1.036661286603643, + "grad_norm": 1.7988283203699307, + "learning_rate": 1.0224925164419404e-06, + "loss": 0.5037285685539246, + "step": 4496 + }, + { + "epoch": 1.0368918607332258, + "grad_norm": 1.5033654685782605, + "learning_rate": 1.0221113500268169e-06, + "loss": 0.4762890636920929, + "step": 4497 + }, + { + "epoch": 1.0371224348628083, + "grad_norm": 1.2678994584792878, + "learning_rate": 1.0217301803975844e-06, + "loss": 0.4673793315887451, + "step": 4498 + }, + { + "epoch": 1.037353008992391, + "grad_norm": 1.4491139066226089, + "learning_rate": 1.0213490076096501e-06, + "loss": 0.37522250413894653, + "step": 4499 + }, + { + "epoch": 1.0375835831219737, + "grad_norm": 1.4197729369573655, + "learning_rate": 1.020967831718421e-06, + "loss": 0.4986375570297241, + "step": 4500 + }, + { + "epoch": 1.0378141572515565, + "grad_norm": 1.3424622189818292, + "learning_rate": 1.0205866527793053e-06, + "loss": 0.488337904214859, + "step": 4501 + }, + { + "epoch": 1.038044731381139, + "grad_norm": 1.2513264252251595, + "learning_rate": 1.0202054708477107e-06, + "loss": 0.37420767545700073, + "step": 4502 + }, + { + "epoch": 1.0382753055107217, + "grad_norm": 1.1901249454864467, + "learning_rate": 1.0198242859790465e-06, + "loss": 0.42453843355178833, + "step": 4503 + }, + { + "epoch": 1.0385058796403044, + "grad_norm": 1.5998980096348292, + "learning_rate": 1.0194430982287211e-06, + "loss": 0.4431978166103363, + "step": 4504 + }, + { + "epoch": 1.038736453769887, + "grad_norm": 1.2584649975167521, + "learning_rate": 1.0190619076521445e-06, + "loss": 0.5079195499420166, + "step": 4505 + }, + { + "epoch": 1.0389670278994696, + "grad_norm": 1.3630757915855334, + "learning_rate": 1.0186807143047263e-06, + "loss": 0.442915678024292, + "step": 4506 + }, + { + "epoch": 1.0391976020290523, + "grad_norm": 1.4946032354137926, + "learning_rate": 1.018299518241877e-06, + "loss": 0.4720972180366516, + "step": 4507 + }, + { + "epoch": 1.039428176158635, + "grad_norm": 1.407838633939113, + "learning_rate": 1.0179183195190073e-06, + "loss": 0.4637352526187897, + "step": 4508 + }, + { + "epoch": 1.0396587502882177, + "grad_norm": 1.3457342565284411, + "learning_rate": 1.0175371181915283e-06, + "loss": 0.4207759499549866, + "step": 4509 + }, + { + "epoch": 1.0398893244178002, + "grad_norm": 1.5872196626053143, + "learning_rate": 1.0171559143148514e-06, + "loss": 0.49227845668792725, + "step": 4510 + }, + { + "epoch": 1.040119898547383, + "grad_norm": 1.4565076836431372, + "learning_rate": 1.0167747079443884e-06, + "loss": 0.5006893873214722, + "step": 4511 + }, + { + "epoch": 1.0403504726769657, + "grad_norm": 1.4618469895611303, + "learning_rate": 1.016393499135552e-06, + "loss": 0.42048192024230957, + "step": 4512 + }, + { + "epoch": 1.0405810468065484, + "grad_norm": 1.5634742093932859, + "learning_rate": 1.0160122879437538e-06, + "loss": 0.5275895595550537, + "step": 4513 + }, + { + "epoch": 1.0408116209361309, + "grad_norm": 1.1544305266604897, + "learning_rate": 1.0156310744244073e-06, + "loss": 0.4677985906600952, + "step": 4514 + }, + { + "epoch": 1.0410421950657136, + "grad_norm": 1.422644417212902, + "learning_rate": 1.015249858632926e-06, + "loss": 0.5214150547981262, + "step": 4515 + }, + { + "epoch": 1.0412727691952963, + "grad_norm": 1.2418435857264525, + "learning_rate": 1.0148686406247232e-06, + "loss": 0.40790024399757385, + "step": 4516 + }, + { + "epoch": 1.041503343324879, + "grad_norm": 1.6199751141856524, + "learning_rate": 1.0144874204552125e-06, + "loss": 0.5943785309791565, + "step": 4517 + }, + { + "epoch": 1.0417339174544615, + "grad_norm": 1.531988684910503, + "learning_rate": 1.0141061981798086e-06, + "loss": 0.4590263366699219, + "step": 4518 + }, + { + "epoch": 1.0419644915840443, + "grad_norm": 1.3212940799821826, + "learning_rate": 1.0137249738539257e-06, + "loss": 0.4106098413467407, + "step": 4519 + }, + { + "epoch": 1.042195065713627, + "grad_norm": 1.4102973636174063, + "learning_rate": 1.013343747532979e-06, + "loss": 0.4730203151702881, + "step": 4520 + }, + { + "epoch": 1.0424256398432097, + "grad_norm": 1.2769276209650842, + "learning_rate": 1.0129625192723833e-06, + "loss": 0.43245944380760193, + "step": 4521 + }, + { + "epoch": 1.0426562139727922, + "grad_norm": 1.3088740452256564, + "learning_rate": 1.012581289127554e-06, + "loss": 0.40828272700309753, + "step": 4522 + }, + { + "epoch": 1.042886788102375, + "grad_norm": 1.5940499075267438, + "learning_rate": 1.0122000571539069e-06, + "loss": 0.4232874810695648, + "step": 4523 + }, + { + "epoch": 1.0431173622319576, + "grad_norm": 1.45477003479617, + "learning_rate": 1.0118188234068579e-06, + "loss": 0.43044984340667725, + "step": 4524 + }, + { + "epoch": 1.0433479363615403, + "grad_norm": 1.6545172631907663, + "learning_rate": 1.011437587941823e-06, + "loss": 0.4502897262573242, + "step": 4525 + }, + { + "epoch": 1.0435785104911228, + "grad_norm": 2.0995258586192467, + "learning_rate": 1.0110563508142185e-06, + "loss": 0.5505340099334717, + "step": 4526 + }, + { + "epoch": 1.0438090846207055, + "grad_norm": 1.5629586322344833, + "learning_rate": 1.0106751120794617e-06, + "loss": 0.4026086628437042, + "step": 4527 + }, + { + "epoch": 1.0440396587502883, + "grad_norm": 1.5105039899180257, + "learning_rate": 1.0102938717929692e-06, + "loss": 0.3910222053527832, + "step": 4528 + }, + { + "epoch": 1.044270232879871, + "grad_norm": 1.6830902678008934, + "learning_rate": 1.009912630010158e-06, + "loss": 0.4134068191051483, + "step": 4529 + }, + { + "epoch": 1.0445008070094535, + "grad_norm": 1.4825250898714368, + "learning_rate": 1.0095313867864457e-06, + "loss": 0.4801563024520874, + "step": 4530 + }, + { + "epoch": 1.0447313811390362, + "grad_norm": 1.2424640239796358, + "learning_rate": 1.0091501421772495e-06, + "loss": 0.4269358515739441, + "step": 4531 + }, + { + "epoch": 1.044961955268619, + "grad_norm": 1.3485994976026512, + "learning_rate": 1.0087688962379877e-06, + "loss": 0.5300281047821045, + "step": 4532 + }, + { + "epoch": 1.0451925293982016, + "grad_norm": 1.6865287595757648, + "learning_rate": 1.0083876490240777e-06, + "loss": 0.4634189009666443, + "step": 4533 + }, + { + "epoch": 1.0454231035277841, + "grad_norm": 1.5187760856795984, + "learning_rate": 1.0080064005909379e-06, + "loss": 0.37037551403045654, + "step": 4534 + }, + { + "epoch": 1.0456536776573668, + "grad_norm": 1.2977267015714409, + "learning_rate": 1.0076251509939867e-06, + "loss": 0.4740016460418701, + "step": 4535 + }, + { + "epoch": 1.0458842517869495, + "grad_norm": 1.4686161726335998, + "learning_rate": 1.0072439002886426e-06, + "loss": 0.4824775159358978, + "step": 4536 + }, + { + "epoch": 1.0461148259165323, + "grad_norm": 1.4032368341998698, + "learning_rate": 1.0068626485303242e-06, + "loss": 0.4891430735588074, + "step": 4537 + }, + { + "epoch": 1.0463454000461148, + "grad_norm": 1.440410031419601, + "learning_rate": 1.00648139577445e-06, + "loss": 0.48089975118637085, + "step": 4538 + }, + { + "epoch": 1.0465759741756975, + "grad_norm": 1.3280505427696812, + "learning_rate": 1.0061001420764395e-06, + "loss": 0.4353799521923065, + "step": 4539 + }, + { + "epoch": 1.0468065483052802, + "grad_norm": 1.5425308952951848, + "learning_rate": 1.0057188874917117e-06, + "loss": 0.4259982705116272, + "step": 4540 + }, + { + "epoch": 1.047037122434863, + "grad_norm": 1.502788920344227, + "learning_rate": 1.0053376320756852e-06, + "loss": 0.4400532841682434, + "step": 4541 + }, + { + "epoch": 1.0472676965644454, + "grad_norm": 1.398609267878258, + "learning_rate": 1.00495637588378e-06, + "loss": 0.48598533868789673, + "step": 4542 + }, + { + "epoch": 1.0474982706940281, + "grad_norm": 1.7261761893493324, + "learning_rate": 1.0045751189714153e-06, + "loss": 0.6310586929321289, + "step": 4543 + }, + { + "epoch": 1.0477288448236108, + "grad_norm": 1.4822203646620422, + "learning_rate": 1.0041938613940108e-06, + "loss": 0.49084293842315674, + "step": 4544 + }, + { + "epoch": 1.0479594189531936, + "grad_norm": 1.6167393331453148, + "learning_rate": 1.003812603206986e-06, + "loss": 0.5144428014755249, + "step": 4545 + }, + { + "epoch": 1.048189993082776, + "grad_norm": 1.4962485615696877, + "learning_rate": 1.0034313444657605e-06, + "loss": 0.4480917155742645, + "step": 4546 + }, + { + "epoch": 1.0484205672123588, + "grad_norm": 1.4833727438286728, + "learning_rate": 1.0030500852257545e-06, + "loss": 0.4505491852760315, + "step": 4547 + }, + { + "epoch": 1.0486511413419415, + "grad_norm": 1.3728340651335322, + "learning_rate": 1.0026688255423876e-06, + "loss": 0.3344930410385132, + "step": 4548 + }, + { + "epoch": 1.0488817154715242, + "grad_norm": 1.3493238342876126, + "learning_rate": 1.0022875654710801e-06, + "loss": 0.4006739854812622, + "step": 4549 + }, + { + "epoch": 1.0491122896011067, + "grad_norm": 1.4777604777161095, + "learning_rate": 1.0019063050672517e-06, + "loss": 0.4815717935562134, + "step": 4550 + }, + { + "epoch": 1.0493428637306894, + "grad_norm": 1.4182246513528267, + "learning_rate": 1.0015250443863223e-06, + "loss": 0.4660469889640808, + "step": 4551 + }, + { + "epoch": 1.0495734378602721, + "grad_norm": 1.4298035442899577, + "learning_rate": 1.0011437834837125e-06, + "loss": 0.5233521461486816, + "step": 4552 + }, + { + "epoch": 1.0498040119898548, + "grad_norm": 1.7530768174577198, + "learning_rate": 1.0007625224148418e-06, + "loss": 0.6037864685058594, + "step": 4553 + }, + { + "epoch": 1.0500345861194373, + "grad_norm": 1.726860458569315, + "learning_rate": 1.000381261235131e-06, + "loss": 0.469952255487442, + "step": 4554 + }, + { + "epoch": 1.05026516024902, + "grad_norm": 1.302712404041117, + "learning_rate": 1e-06, + "loss": 0.4577752649784088, + "step": 4555 + }, + { + "epoch": 1.0504957343786028, + "grad_norm": 1.537724574807554, + "learning_rate": 9.996187387648692e-07, + "loss": 0.46796074509620667, + "step": 4556 + }, + { + "epoch": 1.0507263085081853, + "grad_norm": 1.3633141581703183, + "learning_rate": 9.992374775851583e-07, + "loss": 0.40709036588668823, + "step": 4557 + }, + { + "epoch": 1.050956882637768, + "grad_norm": 1.2121351653860253, + "learning_rate": 9.988562165162878e-07, + "loss": 0.3997795879840851, + "step": 4558 + }, + { + "epoch": 1.0511874567673507, + "grad_norm": 1.6938685288563167, + "learning_rate": 9.984749556136779e-07, + "loss": 0.4677845239639282, + "step": 4559 + }, + { + "epoch": 1.0514180308969334, + "grad_norm": 1.315537055431831, + "learning_rate": 9.980936949327487e-07, + "loss": 0.40411800146102905, + "step": 4560 + }, + { + "epoch": 1.0516486050265161, + "grad_norm": 1.3999939149032237, + "learning_rate": 9.9771243452892e-07, + "loss": 0.50546795129776, + "step": 4561 + }, + { + "epoch": 1.0518791791560986, + "grad_norm": 1.5468163611837324, + "learning_rate": 9.973311744576125e-07, + "loss": 0.4116637110710144, + "step": 4562 + }, + { + "epoch": 1.0521097532856813, + "grad_norm": 1.2997915019544943, + "learning_rate": 9.969499147742454e-07, + "loss": 0.4271109700202942, + "step": 4563 + }, + { + "epoch": 1.052340327415264, + "grad_norm": 1.1760164248835672, + "learning_rate": 9.965686555342396e-07, + "loss": 0.37195074558258057, + "step": 4564 + }, + { + "epoch": 1.0525709015448466, + "grad_norm": 1.6759945376385115, + "learning_rate": 9.96187396793014e-07, + "loss": 0.4020707607269287, + "step": 4565 + }, + { + "epoch": 1.0528014756744293, + "grad_norm": 1.5880882887273124, + "learning_rate": 9.95806138605989e-07, + "loss": 0.4980151951313019, + "step": 4566 + }, + { + "epoch": 1.053032049804012, + "grad_norm": 1.419377079967674, + "learning_rate": 9.95424881028585e-07, + "loss": 0.39553767442703247, + "step": 4567 + }, + { + "epoch": 1.0532626239335947, + "grad_norm": 1.3361167736969362, + "learning_rate": 9.9504362411622e-07, + "loss": 0.47618645429611206, + "step": 4568 + }, + { + "epoch": 1.0534931980631772, + "grad_norm": 1.6469408967264108, + "learning_rate": 9.94662367924315e-07, + "loss": 0.4613817036151886, + "step": 4569 + }, + { + "epoch": 1.05372377219276, + "grad_norm": 1.4563205269464143, + "learning_rate": 9.942811125082884e-07, + "loss": 0.35888034105300903, + "step": 4570 + }, + { + "epoch": 1.0539543463223426, + "grad_norm": 1.896669698951033, + "learning_rate": 9.938998579235606e-07, + "loss": 0.45810097455978394, + "step": 4571 + }, + { + "epoch": 1.0541849204519254, + "grad_norm": 1.4115626759758866, + "learning_rate": 9.935186042255499e-07, + "loss": 0.5351384878158569, + "step": 4572 + }, + { + "epoch": 1.0544154945815079, + "grad_norm": 1.4888165757644622, + "learning_rate": 9.931373514696759e-07, + "loss": 0.5261274576187134, + "step": 4573 + }, + { + "epoch": 1.0546460687110906, + "grad_norm": 1.368295507669899, + "learning_rate": 9.927560997113573e-07, + "loss": 0.483295202255249, + "step": 4574 + }, + { + "epoch": 1.0548766428406733, + "grad_norm": 1.5639325535974613, + "learning_rate": 9.923748490060132e-07, + "loss": 0.5371580719947815, + "step": 4575 + }, + { + "epoch": 1.055107216970256, + "grad_norm": 1.8721225876517977, + "learning_rate": 9.919935994090622e-07, + "loss": 0.4863673746585846, + "step": 4576 + }, + { + "epoch": 1.0553377910998385, + "grad_norm": 1.5391981555318386, + "learning_rate": 9.916123509759224e-07, + "loss": 0.47929099202156067, + "step": 4577 + }, + { + "epoch": 1.0555683652294212, + "grad_norm": 1.3884034720788059, + "learning_rate": 9.912311037620126e-07, + "loss": 0.4687851667404175, + "step": 4578 + }, + { + "epoch": 1.055798939359004, + "grad_norm": 1.5841867302150618, + "learning_rate": 9.908498578227504e-07, + "loss": 0.5308720469474792, + "step": 4579 + }, + { + "epoch": 1.0560295134885866, + "grad_norm": 1.8691314272616926, + "learning_rate": 9.904686132135546e-07, + "loss": 0.45900580286979675, + "step": 4580 + }, + { + "epoch": 1.0562600876181691, + "grad_norm": 1.4586686619480431, + "learning_rate": 9.900873699898422e-07, + "loss": 0.49392157793045044, + "step": 4581 + }, + { + "epoch": 1.0564906617477519, + "grad_norm": 1.6139111586944341, + "learning_rate": 9.89706128207031e-07, + "loss": 0.47190070152282715, + "step": 4582 + }, + { + "epoch": 1.0567212358773346, + "grad_norm": 1.7781894650458763, + "learning_rate": 9.893248879205382e-07, + "loss": 0.4431575834751129, + "step": 4583 + }, + { + "epoch": 1.0569518100069173, + "grad_norm": 1.293421470994464, + "learning_rate": 9.889436491857814e-07, + "loss": 0.49873441457748413, + "step": 4584 + }, + { + "epoch": 1.0571823841364998, + "grad_norm": 1.4263954197349762, + "learning_rate": 9.885624120581772e-07, + "loss": 0.41190844774246216, + "step": 4585 + }, + { + "epoch": 1.0574129582660825, + "grad_norm": 1.5698735406284627, + "learning_rate": 9.881811765931423e-07, + "loss": 0.5164123773574829, + "step": 4586 + }, + { + "epoch": 1.0576435323956652, + "grad_norm": 1.5034141006108586, + "learning_rate": 9.877999428460933e-07, + "loss": 0.4141567349433899, + "step": 4587 + }, + { + "epoch": 1.057874106525248, + "grad_norm": 1.557658840701198, + "learning_rate": 9.87418710872446e-07, + "loss": 0.457628458738327, + "step": 4588 + }, + { + "epoch": 1.0581046806548304, + "grad_norm": 1.4732865673601758, + "learning_rate": 9.870374807276168e-07, + "loss": 0.41788995265960693, + "step": 4589 + }, + { + "epoch": 1.0583352547844131, + "grad_norm": 1.6240063497851516, + "learning_rate": 9.866562524670209e-07, + "loss": 0.5124667882919312, + "step": 4590 + }, + { + "epoch": 1.0585658289139959, + "grad_norm": 1.1619873853554898, + "learning_rate": 9.862750261460742e-07, + "loss": 0.4192196726799011, + "step": 4591 + }, + { + "epoch": 1.0587964030435786, + "grad_norm": 1.3804521479784477, + "learning_rate": 9.858938018201913e-07, + "loss": 0.4345153868198395, + "step": 4592 + }, + { + "epoch": 1.059026977173161, + "grad_norm": 1.3186049119261667, + "learning_rate": 9.855125795447874e-07, + "loss": 0.391804963350296, + "step": 4593 + }, + { + "epoch": 1.0592575513027438, + "grad_norm": 1.3394610780120433, + "learning_rate": 9.851313593752767e-07, + "loss": 0.3904710114002228, + "step": 4594 + }, + { + "epoch": 1.0594881254323265, + "grad_norm": 1.4234043935357816, + "learning_rate": 9.847501413670742e-07, + "loss": 0.37314411997795105, + "step": 4595 + }, + { + "epoch": 1.0597186995619092, + "grad_norm": 1.7572920451540888, + "learning_rate": 9.843689255755926e-07, + "loss": 0.5402779579162598, + "step": 4596 + }, + { + "epoch": 1.0599492736914917, + "grad_norm": 1.4688689617213957, + "learning_rate": 9.839877120562463e-07, + "loss": 0.4243565797805786, + "step": 4597 + }, + { + "epoch": 1.0601798478210744, + "grad_norm": 1.6330717694890693, + "learning_rate": 9.836065008644484e-07, + "loss": 0.4504585564136505, + "step": 4598 + }, + { + "epoch": 1.0604104219506572, + "grad_norm": 1.3073319656874434, + "learning_rate": 9.832252920556115e-07, + "loss": 0.46487870812416077, + "step": 4599 + }, + { + "epoch": 1.0606409960802399, + "grad_norm": 1.452752590173503, + "learning_rate": 9.828440856851487e-07, + "loss": 0.470059871673584, + "step": 4600 + }, + { + "epoch": 1.0608715702098224, + "grad_norm": 1.4580866952416336, + "learning_rate": 9.824628818084716e-07, + "loss": 0.4307391047477722, + "step": 4601 + }, + { + "epoch": 1.061102144339405, + "grad_norm": 1.545423985207434, + "learning_rate": 9.820816804809927e-07, + "loss": 0.49449142813682556, + "step": 4602 + }, + { + "epoch": 1.0613327184689878, + "grad_norm": 1.4803985945664777, + "learning_rate": 9.817004817581229e-07, + "loss": 0.4932701885700226, + "step": 4603 + }, + { + "epoch": 1.0615632925985705, + "grad_norm": 1.4502372729626234, + "learning_rate": 9.813192856952739e-07, + "loss": 0.49543553590774536, + "step": 4604 + }, + { + "epoch": 1.061793866728153, + "grad_norm": 1.1578379554584357, + "learning_rate": 9.809380923478554e-07, + "loss": 0.3906818926334381, + "step": 4605 + }, + { + "epoch": 1.0620244408577357, + "grad_norm": 1.4436425775524195, + "learning_rate": 9.80556901771279e-07, + "loss": 0.41667112708091736, + "step": 4606 + }, + { + "epoch": 1.0622550149873184, + "grad_norm": 1.475010908303335, + "learning_rate": 9.801757140209538e-07, + "loss": 0.36195361614227295, + "step": 4607 + }, + { + "epoch": 1.0624855891169012, + "grad_norm": 1.4053500417900708, + "learning_rate": 9.797945291522892e-07, + "loss": 0.4056081175804138, + "step": 4608 + }, + { + "epoch": 1.0627161632464837, + "grad_norm": 1.4310559040175581, + "learning_rate": 9.794133472206948e-07, + "loss": 0.5048736929893494, + "step": 4609 + }, + { + "epoch": 1.0629467373760664, + "grad_norm": 1.3896886111265523, + "learning_rate": 9.790321682815788e-07, + "loss": 0.4846169352531433, + "step": 4610 + }, + { + "epoch": 1.063177311505649, + "grad_norm": 1.3569892439901554, + "learning_rate": 9.7865099239035e-07, + "loss": 0.5149316787719727, + "step": 4611 + }, + { + "epoch": 1.0634078856352318, + "grad_norm": 1.5344870466099163, + "learning_rate": 9.782698196024155e-07, + "loss": 0.3816874623298645, + "step": 4612 + }, + { + "epoch": 1.0636384597648143, + "grad_norm": 1.39688044025804, + "learning_rate": 9.77888649973183e-07, + "loss": 0.5469645261764526, + "step": 4613 + }, + { + "epoch": 1.063869033894397, + "grad_norm": 1.2954034757094786, + "learning_rate": 9.775074835580593e-07, + "loss": 0.42796647548675537, + "step": 4614 + }, + { + "epoch": 1.0640996080239797, + "grad_norm": 1.4924945772778404, + "learning_rate": 9.771263204124512e-07, + "loss": 0.4931715726852417, + "step": 4615 + }, + { + "epoch": 1.0643301821535625, + "grad_norm": 1.367565961969811, + "learning_rate": 9.767451605917641e-07, + "loss": 0.5435268878936768, + "step": 4616 + }, + { + "epoch": 1.064560756283145, + "grad_norm": 1.6066093331363582, + "learning_rate": 9.763640041514033e-07, + "loss": 0.46361953020095825, + "step": 4617 + }, + { + "epoch": 1.0647913304127277, + "grad_norm": 1.240667858579194, + "learning_rate": 9.759828511467743e-07, + "loss": 0.3742775619029999, + "step": 4618 + }, + { + "epoch": 1.0650219045423104, + "grad_norm": 1.5520509510364326, + "learning_rate": 9.75601701633281e-07, + "loss": 0.4060659408569336, + "step": 4619 + }, + { + "epoch": 1.065252478671893, + "grad_norm": 1.2052909018096978, + "learning_rate": 9.75220555666328e-07, + "loss": 0.45316505432128906, + "step": 4620 + }, + { + "epoch": 1.0654830528014756, + "grad_norm": 1.4180749825165042, + "learning_rate": 9.748394133013179e-07, + "loss": 0.4548850655555725, + "step": 4621 + }, + { + "epoch": 1.0657136269310583, + "grad_norm": 1.2793215690458788, + "learning_rate": 9.744582745936547e-07, + "loss": 0.5065705180168152, + "step": 4622 + }, + { + "epoch": 1.065944201060641, + "grad_norm": 1.4912306578981507, + "learning_rate": 9.740771395987395e-07, + "loss": 0.4114503860473633, + "step": 4623 + }, + { + "epoch": 1.0661747751902237, + "grad_norm": 1.4280192292492455, + "learning_rate": 9.736960083719752e-07, + "loss": 0.4568501114845276, + "step": 4624 + }, + { + "epoch": 1.0664053493198062, + "grad_norm": 1.2972553921673455, + "learning_rate": 9.733148809687624e-07, + "loss": 0.49967026710510254, + "step": 4625 + }, + { + "epoch": 1.066635923449389, + "grad_norm": 1.4642812597554793, + "learning_rate": 9.729337574445025e-07, + "loss": 0.529681384563446, + "step": 4626 + }, + { + "epoch": 1.0668664975789717, + "grad_norm": 1.4791668180519966, + "learning_rate": 9.72552637854595e-07, + "loss": 0.4819791316986084, + "step": 4627 + }, + { + "epoch": 1.0670970717085544, + "grad_norm": 1.3549019355661691, + "learning_rate": 9.721715222544396e-07, + "loss": 0.4186001718044281, + "step": 4628 + }, + { + "epoch": 1.0673276458381369, + "grad_norm": 1.221767945169434, + "learning_rate": 9.717904106994359e-07, + "loss": 0.4442529082298279, + "step": 4629 + }, + { + "epoch": 1.0675582199677196, + "grad_norm": 1.886711265076429, + "learning_rate": 9.714093032449815e-07, + "loss": 0.4655953049659729, + "step": 4630 + }, + { + "epoch": 1.0677887940973023, + "grad_norm": 1.2641786187672595, + "learning_rate": 9.71028199946475e-07, + "loss": 0.45248714089393616, + "step": 4631 + }, + { + "epoch": 1.068019368226885, + "grad_norm": 1.547270813258376, + "learning_rate": 9.706471008593128e-07, + "loss": 0.4244336485862732, + "step": 4632 + }, + { + "epoch": 1.0682499423564675, + "grad_norm": 1.441914160495435, + "learning_rate": 9.702660060388923e-07, + "loss": 0.4396495819091797, + "step": 4633 + }, + { + "epoch": 1.0684805164860502, + "grad_norm": 1.3832490714301353, + "learning_rate": 9.698849155406089e-07, + "loss": 0.4504232406616211, + "step": 4634 + }, + { + "epoch": 1.068711090615633, + "grad_norm": 1.5660708185651993, + "learning_rate": 9.695038294198588e-07, + "loss": 0.40112000703811646, + "step": 4635 + }, + { + "epoch": 1.0689416647452157, + "grad_norm": 1.5797332497697052, + "learning_rate": 9.691227477320357e-07, + "loss": 0.4511067271232605, + "step": 4636 + }, + { + "epoch": 1.0691722388747982, + "grad_norm": 1.4624732720511697, + "learning_rate": 9.687416705325342e-07, + "loss": 0.44541406631469727, + "step": 4637 + }, + { + "epoch": 1.069402813004381, + "grad_norm": 1.3872197811900322, + "learning_rate": 9.68360597876748e-07, + "loss": 0.5038847327232361, + "step": 4638 + }, + { + "epoch": 1.0696333871339636, + "grad_norm": 1.2356986255488158, + "learning_rate": 9.67979529820069e-07, + "loss": 0.41960060596466064, + "step": 4639 + }, + { + "epoch": 1.0698639612635463, + "grad_norm": 1.6121133741192841, + "learning_rate": 9.6759846641789e-07, + "loss": 0.49760064482688904, + "step": 4640 + }, + { + "epoch": 1.0700945353931288, + "grad_norm": 1.7920934015909264, + "learning_rate": 9.672174077256023e-07, + "loss": 0.46513333916664124, + "step": 4641 + }, + { + "epoch": 1.0703251095227115, + "grad_norm": 1.5128396951273724, + "learning_rate": 9.66836353798597e-07, + "loss": 0.41129356622695923, + "step": 4642 + }, + { + "epoch": 1.0705556836522943, + "grad_norm": 1.1803503202020598, + "learning_rate": 9.664553046922634e-07, + "loss": 0.5021853446960449, + "step": 4643 + }, + { + "epoch": 1.070786257781877, + "grad_norm": 1.7444146178498035, + "learning_rate": 9.660742604619912e-07, + "loss": 0.5184302926063538, + "step": 4644 + }, + { + "epoch": 1.0710168319114595, + "grad_norm": 1.8278981381437267, + "learning_rate": 9.65693221163169e-07, + "loss": 0.4793940484523773, + "step": 4645 + }, + { + "epoch": 1.0712474060410422, + "grad_norm": 1.6157027564363053, + "learning_rate": 9.653121868511854e-07, + "loss": 0.43454456329345703, + "step": 4646 + }, + { + "epoch": 1.071477980170625, + "grad_norm": 1.3605748894383922, + "learning_rate": 9.649311575814266e-07, + "loss": 0.49123185873031616, + "step": 4647 + }, + { + "epoch": 1.0717085543002076, + "grad_norm": 1.2316654311751212, + "learning_rate": 9.645501334092792e-07, + "loss": 0.37020617723464966, + "step": 4648 + }, + { + "epoch": 1.0719391284297901, + "grad_norm": 1.3370776970957903, + "learning_rate": 9.641691143901296e-07, + "loss": 0.461778849363327, + "step": 4649 + }, + { + "epoch": 1.0721697025593728, + "grad_norm": 1.7402606402657241, + "learning_rate": 9.63788100579362e-07, + "loss": 0.46640273928642273, + "step": 4650 + }, + { + "epoch": 1.0724002766889555, + "grad_norm": 1.543123481033078, + "learning_rate": 9.634070920323614e-07, + "loss": 0.44978517293930054, + "step": 4651 + }, + { + "epoch": 1.0726308508185383, + "grad_norm": 1.5280216878422028, + "learning_rate": 9.630260888045103e-07, + "loss": 0.5070945024490356, + "step": 4652 + }, + { + "epoch": 1.0728614249481208, + "grad_norm": 1.3361545028178132, + "learning_rate": 9.626450909511926e-07, + "loss": 0.4513545334339142, + "step": 4653 + }, + { + "epoch": 1.0730919990777035, + "grad_norm": 1.2352969540055843, + "learning_rate": 9.622640985277889e-07, + "loss": 0.4430030584335327, + "step": 4654 + }, + { + "epoch": 1.0733225732072862, + "grad_norm": 1.7185507494111099, + "learning_rate": 9.618831115896814e-07, + "loss": 0.45619165897369385, + "step": 4655 + }, + { + "epoch": 1.073553147336869, + "grad_norm": 1.3452693944435885, + "learning_rate": 9.615021301922497e-07, + "loss": 0.411594033241272, + "step": 4656 + }, + { + "epoch": 1.0737837214664514, + "grad_norm": 1.696260647190632, + "learning_rate": 9.611211543908732e-07, + "loss": 0.5230164527893066, + "step": 4657 + }, + { + "epoch": 1.0740142955960341, + "grad_norm": 1.2546383850728546, + "learning_rate": 9.607401842409316e-07, + "loss": 0.45379406213760376, + "step": 4658 + }, + { + "epoch": 1.0742448697256168, + "grad_norm": 1.4465974878955368, + "learning_rate": 9.603592197978016e-07, + "loss": 0.47254839539527893, + "step": 4659 + }, + { + "epoch": 1.0744754438551993, + "grad_norm": 1.4899733507525732, + "learning_rate": 9.59978261116861e-07, + "loss": 0.3990492820739746, + "step": 4660 + }, + { + "epoch": 1.074706017984782, + "grad_norm": 1.2629235312972213, + "learning_rate": 9.595973082534855e-07, + "loss": 0.41671720147132874, + "step": 4661 + }, + { + "epoch": 1.0749365921143648, + "grad_norm": 1.3769486256402874, + "learning_rate": 9.59216361263051e-07, + "loss": 0.4269324839115143, + "step": 4662 + }, + { + "epoch": 1.0751671662439475, + "grad_norm": 1.7548425902665015, + "learning_rate": 9.588354202009314e-07, + "loss": 0.42989516258239746, + "step": 4663 + }, + { + "epoch": 1.0753977403735302, + "grad_norm": 1.5474664125691167, + "learning_rate": 9.584544851225008e-07, + "loss": 0.5224605798721313, + "step": 4664 + }, + { + "epoch": 1.0756283145031127, + "grad_norm": 1.393419713492626, + "learning_rate": 9.580735560831318e-07, + "loss": 0.3853871524333954, + "step": 4665 + }, + { + "epoch": 1.0758588886326954, + "grad_norm": 1.360242198109215, + "learning_rate": 9.576926331381968e-07, + "loss": 0.4460698366165161, + "step": 4666 + }, + { + "epoch": 1.0760894627622781, + "grad_norm": 1.524802030014046, + "learning_rate": 9.57311716343066e-07, + "loss": 0.45617812871932983, + "step": 4667 + }, + { + "epoch": 1.0763200368918606, + "grad_norm": 1.7079854681006486, + "learning_rate": 9.569308057531096e-07, + "loss": 0.5631355047225952, + "step": 4668 + }, + { + "epoch": 1.0765506110214433, + "grad_norm": 1.3155596598859882, + "learning_rate": 9.565499014236977e-07, + "loss": 0.4197179973125458, + "step": 4669 + }, + { + "epoch": 1.076781185151026, + "grad_norm": 1.5894301477582775, + "learning_rate": 9.561690034101973e-07, + "loss": 0.4262646436691284, + "step": 4670 + }, + { + "epoch": 1.0770117592806088, + "grad_norm": 1.4805271814916348, + "learning_rate": 9.557881117679768e-07, + "loss": 0.42719966173171997, + "step": 4671 + }, + { + "epoch": 1.0772423334101915, + "grad_norm": 1.3479731294807211, + "learning_rate": 9.554072265524022e-07, + "loss": 0.4278491735458374, + "step": 4672 + }, + { + "epoch": 1.077472907539774, + "grad_norm": 1.4324931591130032, + "learning_rate": 9.550263478188396e-07, + "loss": 0.3915478587150574, + "step": 4673 + }, + { + "epoch": 1.0777034816693567, + "grad_norm": 1.4807606218185139, + "learning_rate": 9.546454756226525e-07, + "loss": 0.4391477704048157, + "step": 4674 + }, + { + "epoch": 1.0779340557989394, + "grad_norm": 1.6230153652074522, + "learning_rate": 9.542646100192055e-07, + "loss": 0.47325795888900757, + "step": 4675 + }, + { + "epoch": 1.078164629928522, + "grad_norm": 1.3326185339285364, + "learning_rate": 9.538837510638607e-07, + "loss": 0.4698373079299927, + "step": 4676 + }, + { + "epoch": 1.0783952040581046, + "grad_norm": 1.5843176103578385, + "learning_rate": 9.535028988119805e-07, + "loss": 0.4252272844314575, + "step": 4677 + }, + { + "epoch": 1.0786257781876873, + "grad_norm": 1.4642476960881914, + "learning_rate": 9.531220533189253e-07, + "loss": 0.46726179122924805, + "step": 4678 + }, + { + "epoch": 1.07885635231727, + "grad_norm": 1.3792408296611596, + "learning_rate": 9.527412146400542e-07, + "loss": 0.46616411209106445, + "step": 4679 + }, + { + "epoch": 1.0790869264468528, + "grad_norm": 1.3938952826758202, + "learning_rate": 9.523603828307268e-07, + "loss": 0.5607181787490845, + "step": 4680 + }, + { + "epoch": 1.0793175005764353, + "grad_norm": 1.6234566687004295, + "learning_rate": 9.519795579463002e-07, + "loss": 0.5039520859718323, + "step": 4681 + }, + { + "epoch": 1.079548074706018, + "grad_norm": 1.6358698645091259, + "learning_rate": 9.515987400421322e-07, + "loss": 0.45532113313674927, + "step": 4682 + }, + { + "epoch": 1.0797786488356007, + "grad_norm": 1.3987490622653254, + "learning_rate": 9.512179291735772e-07, + "loss": 0.4198398292064667, + "step": 4683 + }, + { + "epoch": 1.0800092229651832, + "grad_norm": 2.0745649369110577, + "learning_rate": 9.508371253959909e-07, + "loss": 0.371380090713501, + "step": 4684 + }, + { + "epoch": 1.080239797094766, + "grad_norm": 1.6602368865180097, + "learning_rate": 9.504563287647265e-07, + "loss": 0.44341978430747986, + "step": 4685 + }, + { + "epoch": 1.0804703712243486, + "grad_norm": 1.3233390600316475, + "learning_rate": 9.500755393351372e-07, + "loss": 0.4184574484825134, + "step": 4686 + }, + { + "epoch": 1.0807009453539314, + "grad_norm": 1.554478033670439, + "learning_rate": 9.496947571625739e-07, + "loss": 0.5584033727645874, + "step": 4687 + }, + { + "epoch": 1.0809315194835138, + "grad_norm": 1.4303675439776025, + "learning_rate": 9.493139823023874e-07, + "loss": 0.44405317306518555, + "step": 4688 + }, + { + "epoch": 1.0811620936130966, + "grad_norm": 1.5109921870756446, + "learning_rate": 9.489332148099277e-07, + "loss": 0.41137009859085083, + "step": 4689 + }, + { + "epoch": 1.0813926677426793, + "grad_norm": 1.5933695881826222, + "learning_rate": 9.485524547405424e-07, + "loss": 0.4831092357635498, + "step": 4690 + }, + { + "epoch": 1.081623241872262, + "grad_norm": 1.3224307777817799, + "learning_rate": 9.481717021495793e-07, + "loss": 0.41243845224380493, + "step": 4691 + }, + { + "epoch": 1.0818538160018445, + "grad_norm": 1.506253034871724, + "learning_rate": 9.477909570923844e-07, + "loss": 0.33649003505706787, + "step": 4692 + }, + { + "epoch": 1.0820843901314272, + "grad_norm": 1.3759728989311568, + "learning_rate": 9.474102196243033e-07, + "loss": 0.4959014654159546, + "step": 4693 + }, + { + "epoch": 1.08231496426101, + "grad_norm": 1.4717496348190642, + "learning_rate": 9.470294898006795e-07, + "loss": 0.43924248218536377, + "step": 4694 + }, + { + "epoch": 1.0825455383905926, + "grad_norm": 1.5425758669304555, + "learning_rate": 9.466487676768563e-07, + "loss": 0.4777243137359619, + "step": 4695 + }, + { + "epoch": 1.0827761125201751, + "grad_norm": 1.7258911046059784, + "learning_rate": 9.462680533081752e-07, + "loss": 0.4488077759742737, + "step": 4696 + }, + { + "epoch": 1.0830066866497579, + "grad_norm": 1.5375128445555653, + "learning_rate": 9.458873467499778e-07, + "loss": 0.5058270692825317, + "step": 4697 + }, + { + "epoch": 1.0832372607793406, + "grad_norm": 1.5052517610014813, + "learning_rate": 9.455066480576025e-07, + "loss": 0.4537619650363922, + "step": 4698 + }, + { + "epoch": 1.0834678349089233, + "grad_norm": 1.5194044905455244, + "learning_rate": 9.45125957286388e-07, + "loss": 0.4725874960422516, + "step": 4699 + }, + { + "epoch": 1.0836984090385058, + "grad_norm": 1.61840988882087, + "learning_rate": 9.447452744916722e-07, + "loss": 0.4967196583747864, + "step": 4700 + }, + { + "epoch": 1.0839289831680885, + "grad_norm": 1.3272496966479597, + "learning_rate": 9.443645997287902e-07, + "loss": 0.43682345747947693, + "step": 4701 + }, + { + "epoch": 1.0841595572976712, + "grad_norm": 1.4038050893134464, + "learning_rate": 9.439839330530781e-07, + "loss": 0.48844271898269653, + "step": 4702 + }, + { + "epoch": 1.084390131427254, + "grad_norm": 1.3581740542884078, + "learning_rate": 9.436032745198682e-07, + "loss": 0.43654918670654297, + "step": 4703 + }, + { + "epoch": 1.0846207055568364, + "grad_norm": 1.6070546851567389, + "learning_rate": 9.432226241844947e-07, + "loss": 0.5034382939338684, + "step": 4704 + }, + { + "epoch": 1.0848512796864191, + "grad_norm": 1.9516449815592325, + "learning_rate": 9.428419821022877e-07, + "loss": 0.5407527089118958, + "step": 4705 + }, + { + "epoch": 1.0850818538160019, + "grad_norm": 1.3188521673213394, + "learning_rate": 9.424613483285783e-07, + "loss": 0.4372078478336334, + "step": 4706 + }, + { + "epoch": 1.0853124279455846, + "grad_norm": 1.3673238165045705, + "learning_rate": 9.420807229186949e-07, + "loss": 0.5264855623245239, + "step": 4707 + }, + { + "epoch": 1.085543002075167, + "grad_norm": 1.2884056915833075, + "learning_rate": 9.417001059279652e-07, + "loss": 0.3810223937034607, + "step": 4708 + }, + { + "epoch": 1.0857735762047498, + "grad_norm": 1.318670262430079, + "learning_rate": 9.413194974117163e-07, + "loss": 0.368865430355072, + "step": 4709 + }, + { + "epoch": 1.0860041503343325, + "grad_norm": 1.3202107346651724, + "learning_rate": 9.409388974252729e-07, + "loss": 0.41845810413360596, + "step": 4710 + }, + { + "epoch": 1.0862347244639152, + "grad_norm": 1.4709870024189373, + "learning_rate": 9.405583060239594e-07, + "loss": 0.5185590982437134, + "step": 4711 + }, + { + "epoch": 1.0864652985934977, + "grad_norm": 1.7793671382372165, + "learning_rate": 9.401777232630983e-07, + "loss": 0.4848501682281494, + "step": 4712 + }, + { + "epoch": 1.0866958727230804, + "grad_norm": 1.5218788678149173, + "learning_rate": 9.397971491980119e-07, + "loss": 0.5581566691398621, + "step": 4713 + }, + { + "epoch": 1.0869264468526632, + "grad_norm": 1.475012350727374, + "learning_rate": 9.394165838840196e-07, + "loss": 0.42043447494506836, + "step": 4714 + }, + { + "epoch": 1.0871570209822459, + "grad_norm": 1.3731967040929853, + "learning_rate": 9.39036027376441e-07, + "loss": 0.45076289772987366, + "step": 4715 + }, + { + "epoch": 1.0873875951118284, + "grad_norm": 1.353578451117457, + "learning_rate": 9.386554797305934e-07, + "loss": 0.3650796413421631, + "step": 4716 + }, + { + "epoch": 1.087618169241411, + "grad_norm": 1.436571768450736, + "learning_rate": 9.38274941001794e-07, + "loss": 0.4837912321090698, + "step": 4717 + }, + { + "epoch": 1.0878487433709938, + "grad_norm": 1.5272898845570653, + "learning_rate": 9.378944112453574e-07, + "loss": 0.41277679800987244, + "step": 4718 + }, + { + "epoch": 1.0880793175005765, + "grad_norm": 1.7344713328668464, + "learning_rate": 9.375138905165973e-07, + "loss": 0.48409390449523926, + "step": 4719 + }, + { + "epoch": 1.088309891630159, + "grad_norm": 1.360949967282617, + "learning_rate": 9.371333788708268e-07, + "loss": 0.3952450752258301, + "step": 4720 + }, + { + "epoch": 1.0885404657597417, + "grad_norm": 1.6450358552008089, + "learning_rate": 9.367528763633563e-07, + "loss": 0.42314866185188293, + "step": 4721 + }, + { + "epoch": 1.0887710398893244, + "grad_norm": 1.492846868063658, + "learning_rate": 9.363723830494966e-07, + "loss": 0.5322449207305908, + "step": 4722 + }, + { + "epoch": 1.0890016140189072, + "grad_norm": 1.3552869600155872, + "learning_rate": 9.359918989845557e-07, + "loss": 0.42307883501052856, + "step": 4723 + }, + { + "epoch": 1.0892321881484897, + "grad_norm": 1.3481901437941268, + "learning_rate": 9.356114242238413e-07, + "loss": 0.39321061968803406, + "step": 4724 + }, + { + "epoch": 1.0894627622780724, + "grad_norm": 1.6333273110158268, + "learning_rate": 9.352309588226585e-07, + "loss": 0.5064421892166138, + "step": 4725 + }, + { + "epoch": 1.089693336407655, + "grad_norm": 1.4475724274606394, + "learning_rate": 9.348505028363125e-07, + "loss": 0.44825220108032227, + "step": 4726 + }, + { + "epoch": 1.0899239105372378, + "grad_norm": 1.384316241889946, + "learning_rate": 9.344700563201065e-07, + "loss": 0.4323306679725647, + "step": 4727 + }, + { + "epoch": 1.0901544846668203, + "grad_norm": 1.3254947105842285, + "learning_rate": 9.340896193293414e-07, + "loss": 0.44907987117767334, + "step": 4728 + }, + { + "epoch": 1.090385058796403, + "grad_norm": 1.3161326376052391, + "learning_rate": 9.337091919193185e-07, + "loss": 0.416559636592865, + "step": 4729 + }, + { + "epoch": 1.0906156329259857, + "grad_norm": 1.6044534711260028, + "learning_rate": 9.33328774145336e-07, + "loss": 0.5361836552619934, + "step": 4730 + }, + { + "epoch": 1.0908462070555685, + "grad_norm": 1.3742080048163032, + "learning_rate": 9.329483660626922e-07, + "loss": 0.4815465211868286, + "step": 4731 + }, + { + "epoch": 1.091076781185151, + "grad_norm": 1.4553535934080677, + "learning_rate": 9.325679677266826e-07, + "loss": 0.5205050110816956, + "step": 4732 + }, + { + "epoch": 1.0913073553147337, + "grad_norm": 1.9887709257052897, + "learning_rate": 9.321875791926028e-07, + "loss": 0.4830896258354187, + "step": 4733 + }, + { + "epoch": 1.0915379294443164, + "grad_norm": 1.3739860439026885, + "learning_rate": 9.318072005157451e-07, + "loss": 0.4394579827785492, + "step": 4734 + }, + { + "epoch": 1.091768503573899, + "grad_norm": 1.6664317769247758, + "learning_rate": 9.314268317514022e-07, + "loss": 0.4614049792289734, + "step": 4735 + }, + { + "epoch": 1.0919990777034816, + "grad_norm": 1.5989711566807139, + "learning_rate": 9.31046472954864e-07, + "loss": 0.5123867988586426, + "step": 4736 + }, + { + "epoch": 1.0922296518330643, + "grad_norm": 1.879970895540274, + "learning_rate": 9.306661241814204e-07, + "loss": 0.43548035621643066, + "step": 4737 + }, + { + "epoch": 1.092460225962647, + "grad_norm": 1.4190205685105515, + "learning_rate": 9.302857854863579e-07, + "loss": 0.4102709889411926, + "step": 4738 + }, + { + "epoch": 1.0926908000922297, + "grad_norm": 1.7007344632271022, + "learning_rate": 9.299054569249628e-07, + "loss": 0.46276605129241943, + "step": 4739 + }, + { + "epoch": 1.0929213742218122, + "grad_norm": 1.5950261365712695, + "learning_rate": 9.295251385525204e-07, + "loss": 0.47700244188308716, + "step": 4740 + }, + { + "epoch": 1.093151948351395, + "grad_norm": 1.5081940540312389, + "learning_rate": 9.29144830424313e-07, + "loss": 0.5492758750915527, + "step": 4741 + }, + { + "epoch": 1.0933825224809777, + "grad_norm": 1.6521559747103167, + "learning_rate": 9.287645325956228e-07, + "loss": 0.3846803307533264, + "step": 4742 + }, + { + "epoch": 1.0936130966105604, + "grad_norm": 1.4300122822608972, + "learning_rate": 9.283842451217294e-07, + "loss": 0.47237372398376465, + "step": 4743 + }, + { + "epoch": 1.0938436707401429, + "grad_norm": 1.6996074936661776, + "learning_rate": 9.280039680579122e-07, + "loss": 0.4651675820350647, + "step": 4744 + }, + { + "epoch": 1.0940742448697256, + "grad_norm": 1.6397662048344088, + "learning_rate": 9.276237014594476e-07, + "loss": 0.5472640991210938, + "step": 4745 + }, + { + "epoch": 1.0943048189993083, + "grad_norm": 1.3158004626748314, + "learning_rate": 9.272434453816117e-07, + "loss": 0.45672351121902466, + "step": 4746 + }, + { + "epoch": 1.094535393128891, + "grad_norm": 1.4246135812847533, + "learning_rate": 9.268631998796785e-07, + "loss": 0.4589729905128479, + "step": 4747 + }, + { + "epoch": 1.0947659672584735, + "grad_norm": 1.4398967186683822, + "learning_rate": 9.264829650089201e-07, + "loss": 0.45882588624954224, + "step": 4748 + }, + { + "epoch": 1.0949965413880562, + "grad_norm": 1.8586265213095916, + "learning_rate": 9.26102740824608e-07, + "loss": 0.6183863282203674, + "step": 4749 + }, + { + "epoch": 1.095227115517639, + "grad_norm": 1.4631882562588927, + "learning_rate": 9.257225273820112e-07, + "loss": 0.4512014389038086, + "step": 4750 + }, + { + "epoch": 1.0954576896472217, + "grad_norm": 1.5706161838979387, + "learning_rate": 9.253423247363983e-07, + "loss": 0.5006139874458313, + "step": 4751 + }, + { + "epoch": 1.0956882637768042, + "grad_norm": 1.4110458948787974, + "learning_rate": 9.249621329430346e-07, + "loss": 0.5394018888473511, + "step": 4752 + }, + { + "epoch": 1.095918837906387, + "grad_norm": 1.5150959480945791, + "learning_rate": 9.245819520571858e-07, + "loss": 0.35523056983947754, + "step": 4753 + }, + { + "epoch": 1.0961494120359696, + "grad_norm": 1.3819812548856059, + "learning_rate": 9.242017821341143e-07, + "loss": 0.44379743933677673, + "step": 4754 + }, + { + "epoch": 1.0963799861655523, + "grad_norm": 1.6129174796361336, + "learning_rate": 9.238216232290821e-07, + "loss": 0.4190908968448639, + "step": 4755 + }, + { + "epoch": 1.0966105602951348, + "grad_norm": 1.6222067534589701, + "learning_rate": 9.234414753973488e-07, + "loss": 0.44818970561027527, + "step": 4756 + }, + { + "epoch": 1.0968411344247175, + "grad_norm": 1.4925644141379035, + "learning_rate": 9.230613386941734e-07, + "loss": 0.4134204685688019, + "step": 4757 + }, + { + "epoch": 1.0970717085543003, + "grad_norm": 1.2148478016107016, + "learning_rate": 9.226812131748118e-07, + "loss": 0.3554952144622803, + "step": 4758 + }, + { + "epoch": 1.097302282683883, + "grad_norm": 1.674922299722459, + "learning_rate": 9.223010988945194e-07, + "loss": 0.522594690322876, + "step": 4759 + }, + { + "epoch": 1.0975328568134655, + "grad_norm": 1.4320622438584156, + "learning_rate": 9.219209959085502e-07, + "loss": 0.44814133644104004, + "step": 4760 + }, + { + "epoch": 1.0977634309430482, + "grad_norm": 1.4723286174250931, + "learning_rate": 9.215409042721551e-07, + "loss": 0.42479634284973145, + "step": 4761 + }, + { + "epoch": 1.097994005072631, + "grad_norm": 1.5414891522514993, + "learning_rate": 9.211608240405849e-07, + "loss": 0.4384934902191162, + "step": 4762 + }, + { + "epoch": 1.0982245792022136, + "grad_norm": 1.4811013868533904, + "learning_rate": 9.207807552690878e-07, + "loss": 0.5378658771514893, + "step": 4763 + }, + { + "epoch": 1.098455153331796, + "grad_norm": 1.4445039209024981, + "learning_rate": 9.204006980129111e-07, + "loss": 0.5071386694908142, + "step": 4764 + }, + { + "epoch": 1.0986857274613788, + "grad_norm": 1.5460474623164162, + "learning_rate": 9.200206523272992e-07, + "loss": 0.46085822582244873, + "step": 4765 + }, + { + "epoch": 1.0989163015909615, + "grad_norm": 1.544747382675103, + "learning_rate": 9.196406182674964e-07, + "loss": 0.5083057880401611, + "step": 4766 + }, + { + "epoch": 1.0991468757205443, + "grad_norm": 1.2845065354356755, + "learning_rate": 9.192605958887438e-07, + "loss": 0.48307740688323975, + "step": 4767 + }, + { + "epoch": 1.0993774498501268, + "grad_norm": 1.8405581264672015, + "learning_rate": 9.188805852462824e-07, + "loss": 0.5195509791374207, + "step": 4768 + }, + { + "epoch": 1.0996080239797095, + "grad_norm": 1.5537273798526559, + "learning_rate": 9.185005863953498e-07, + "loss": 0.5161266326904297, + "step": 4769 + }, + { + "epoch": 1.0998385981092922, + "grad_norm": 1.5985708455901557, + "learning_rate": 9.181205993911827e-07, + "loss": 0.4757764935493469, + "step": 4770 + }, + { + "epoch": 1.1000691722388747, + "grad_norm": 1.5307887938016926, + "learning_rate": 9.177406242890167e-07, + "loss": 0.4071381688117981, + "step": 4771 + }, + { + "epoch": 1.1002997463684574, + "grad_norm": 1.3525378547606768, + "learning_rate": 9.173606611440842e-07, + "loss": 0.4794449210166931, + "step": 4772 + }, + { + "epoch": 1.1005303204980401, + "grad_norm": 1.3205547171467464, + "learning_rate": 9.169807100116175e-07, + "loss": 0.4678712487220764, + "step": 4773 + }, + { + "epoch": 1.1007608946276228, + "grad_norm": 1.2863487713029464, + "learning_rate": 9.166007709468456e-07, + "loss": 0.43200960755348206, + "step": 4774 + }, + { + "epoch": 1.1009914687572055, + "grad_norm": 1.8114336882311408, + "learning_rate": 9.162208440049974e-07, + "loss": 0.49283260107040405, + "step": 4775 + }, + { + "epoch": 1.101222042886788, + "grad_norm": 1.2265456496064566, + "learning_rate": 9.158409292412982e-07, + "loss": 0.4430215358734131, + "step": 4776 + }, + { + "epoch": 1.1014526170163708, + "grad_norm": 1.282698473472426, + "learning_rate": 9.154610267109731e-07, + "loss": 0.4529581069946289, + "step": 4777 + }, + { + "epoch": 1.1016831911459535, + "grad_norm": 1.3698366211761768, + "learning_rate": 9.150811364692446e-07, + "loss": 0.3872554302215576, + "step": 4778 + }, + { + "epoch": 1.101913765275536, + "grad_norm": 1.4034579683870105, + "learning_rate": 9.147012585713331e-07, + "loss": 0.466983437538147, + "step": 4779 + }, + { + "epoch": 1.1021443394051187, + "grad_norm": 1.3799350437064777, + "learning_rate": 9.143213930724587e-07, + "loss": 0.4841456115245819, + "step": 4780 + }, + { + "epoch": 1.1023749135347014, + "grad_norm": 2.083063073101601, + "learning_rate": 9.139415400278376e-07, + "loss": 0.4506613612174988, + "step": 4781 + }, + { + "epoch": 1.1026054876642841, + "grad_norm": 1.5047320834529434, + "learning_rate": 9.135616994926861e-07, + "loss": 0.428241491317749, + "step": 4782 + }, + { + "epoch": 1.1028360617938668, + "grad_norm": 1.3329992006000018, + "learning_rate": 9.131818715222175e-07, + "loss": 0.46940821409225464, + "step": 4783 + }, + { + "epoch": 1.1030666359234493, + "grad_norm": 1.5416614978551508, + "learning_rate": 9.12802056171644e-07, + "loss": 0.4527658224105835, + "step": 4784 + }, + { + "epoch": 1.103297210053032, + "grad_norm": 1.3412511641642377, + "learning_rate": 9.124222534961749e-07, + "loss": 0.3284989893436432, + "step": 4785 + }, + { + "epoch": 1.1035277841826148, + "grad_norm": 1.497248247266052, + "learning_rate": 9.120424635510193e-07, + "loss": 0.448346883058548, + "step": 4786 + }, + { + "epoch": 1.1037583583121973, + "grad_norm": 1.5413647461227613, + "learning_rate": 9.116626863913826e-07, + "loss": 0.4625587463378906, + "step": 4787 + }, + { + "epoch": 1.10398893244178, + "grad_norm": 1.398727589269655, + "learning_rate": 9.112829220724703e-07, + "loss": 0.37891942262649536, + "step": 4788 + }, + { + "epoch": 1.1042195065713627, + "grad_norm": 1.510309439727558, + "learning_rate": 9.109031706494841e-07, + "loss": 0.48719239234924316, + "step": 4789 + }, + { + "epoch": 1.1044500807009454, + "grad_norm": 1.695631911449914, + "learning_rate": 9.105234321776247e-07, + "loss": 0.5341615676879883, + "step": 4790 + }, + { + "epoch": 1.1046806548305281, + "grad_norm": 1.30752453253924, + "learning_rate": 9.101437067120918e-07, + "loss": 0.36677777767181396, + "step": 4791 + }, + { + "epoch": 1.1049112289601106, + "grad_norm": 1.3000512165603213, + "learning_rate": 9.097639943080813e-07, + "loss": 0.4348159432411194, + "step": 4792 + }, + { + "epoch": 1.1051418030896933, + "grad_norm": 1.3763164723830184, + "learning_rate": 9.093842950207891e-07, + "loss": 0.44912683963775635, + "step": 4793 + }, + { + "epoch": 1.105372377219276, + "grad_norm": 1.655048045877048, + "learning_rate": 9.090046089054077e-07, + "loss": 0.5576057434082031, + "step": 4794 + }, + { + "epoch": 1.1056029513488586, + "grad_norm": 1.4655907130631036, + "learning_rate": 9.08624936017129e-07, + "loss": 0.43964770436286926, + "step": 4795 + }, + { + "epoch": 1.1058335254784413, + "grad_norm": 1.3648059541391266, + "learning_rate": 9.082452764111415e-07, + "loss": 0.4285386800765991, + "step": 4796 + }, + { + "epoch": 1.106064099608024, + "grad_norm": 1.6322901017927212, + "learning_rate": 9.078656301426332e-07, + "loss": 0.4257868230342865, + "step": 4797 + }, + { + "epoch": 1.1062946737376067, + "grad_norm": 1.9314022304382554, + "learning_rate": 9.074859972667895e-07, + "loss": 0.4540346562862396, + "step": 4798 + }, + { + "epoch": 1.1065252478671892, + "grad_norm": 1.6801359554397164, + "learning_rate": 9.071063778387933e-07, + "loss": 0.5273457765579224, + "step": 4799 + }, + { + "epoch": 1.106755821996772, + "grad_norm": 1.4107980839711056, + "learning_rate": 9.067267719138268e-07, + "loss": 0.391310453414917, + "step": 4800 + }, + { + "epoch": 1.1069863961263546, + "grad_norm": 1.4182050274963418, + "learning_rate": 9.063471795470691e-07, + "loss": 0.47945383191108704, + "step": 4801 + }, + { + "epoch": 1.1072169702559373, + "grad_norm": 1.7087277476088294, + "learning_rate": 9.05967600793698e-07, + "loss": 0.49561476707458496, + "step": 4802 + }, + { + "epoch": 1.1074475443855198, + "grad_norm": 1.3070252929290396, + "learning_rate": 9.05588035708889e-07, + "loss": 0.4505256414413452, + "step": 4803 + }, + { + "epoch": 1.1076781185151026, + "grad_norm": 1.6864844579974707, + "learning_rate": 9.052084843478164e-07, + "loss": 0.37591490149497986, + "step": 4804 + }, + { + "epoch": 1.1079086926446853, + "grad_norm": 1.486226704077577, + "learning_rate": 9.048289467656508e-07, + "loss": 0.478586345911026, + "step": 4805 + }, + { + "epoch": 1.108139266774268, + "grad_norm": 1.3819959446941394, + "learning_rate": 9.044494230175625e-07, + "loss": 0.4373725354671478, + "step": 4806 + }, + { + "epoch": 1.1083698409038505, + "grad_norm": 1.4091791216138099, + "learning_rate": 9.040699131587186e-07, + "loss": 0.3976345360279083, + "step": 4807 + }, + { + "epoch": 1.1086004150334332, + "grad_norm": 1.3848852740812903, + "learning_rate": 9.036904172442857e-07, + "loss": 0.44611310958862305, + "step": 4808 + }, + { + "epoch": 1.108830989163016, + "grad_norm": 1.3117584806534919, + "learning_rate": 9.033109353294262e-07, + "loss": 0.40816667675971985, + "step": 4809 + }, + { + "epoch": 1.1090615632925986, + "grad_norm": 1.359605756890841, + "learning_rate": 9.029314674693023e-07, + "loss": 0.37462317943573, + "step": 4810 + }, + { + "epoch": 1.1092921374221811, + "grad_norm": 1.3641846963299056, + "learning_rate": 9.025520137190735e-07, + "loss": 0.3856509327888489, + "step": 4811 + }, + { + "epoch": 1.1095227115517639, + "grad_norm": 1.5740711616700624, + "learning_rate": 9.021725741338969e-07, + "loss": 0.4728443920612335, + "step": 4812 + }, + { + "epoch": 1.1097532856813466, + "grad_norm": 2.0717537833557773, + "learning_rate": 9.017931487689282e-07, + "loss": 0.4614938795566559, + "step": 4813 + }, + { + "epoch": 1.1099838598109293, + "grad_norm": 1.4925546437709947, + "learning_rate": 9.014137376793203e-07, + "loss": 0.4137331247329712, + "step": 4814 + }, + { + "epoch": 1.1102144339405118, + "grad_norm": 1.2481779358565226, + "learning_rate": 9.010343409202255e-07, + "loss": 0.42436620593070984, + "step": 4815 + }, + { + "epoch": 1.1104450080700945, + "grad_norm": 1.3339513565407848, + "learning_rate": 9.006549585467916e-07, + "loss": 0.43592822551727295, + "step": 4816 + }, + { + "epoch": 1.1106755821996772, + "grad_norm": 1.3742872645989155, + "learning_rate": 9.002755906141666e-07, + "loss": 0.45627349615097046, + "step": 4817 + }, + { + "epoch": 1.11090615632926, + "grad_norm": 1.819907938722267, + "learning_rate": 8.998962371774953e-07, + "loss": 0.5103771686553955, + "step": 4818 + }, + { + "epoch": 1.1111367304588424, + "grad_norm": 1.4418115437773273, + "learning_rate": 8.995168982919201e-07, + "loss": 0.470276802778244, + "step": 4819 + }, + { + "epoch": 1.1113673045884251, + "grad_norm": 1.3186176277536419, + "learning_rate": 8.991375740125823e-07, + "loss": 0.49486416578292847, + "step": 4820 + }, + { + "epoch": 1.1115978787180079, + "grad_norm": 1.143316450397621, + "learning_rate": 8.987582643946201e-07, + "loss": 0.338329017162323, + "step": 4821 + }, + { + "epoch": 1.1118284528475906, + "grad_norm": 1.4885392176771477, + "learning_rate": 8.983789694931706e-07, + "loss": 0.38252198696136475, + "step": 4822 + }, + { + "epoch": 1.112059026977173, + "grad_norm": 1.4537319037859584, + "learning_rate": 8.979996893633675e-07, + "loss": 0.47691571712493896, + "step": 4823 + }, + { + "epoch": 1.1122896011067558, + "grad_norm": 1.41954873904419, + "learning_rate": 8.976204240603433e-07, + "loss": 0.40156808495521545, + "step": 4824 + }, + { + "epoch": 1.1125201752363385, + "grad_norm": 1.312743475511893, + "learning_rate": 8.97241173639228e-07, + "loss": 0.3837090730667114, + "step": 4825 + }, + { + "epoch": 1.1127507493659212, + "grad_norm": 1.6300077035939553, + "learning_rate": 8.968619381551499e-07, + "loss": 0.5094380378723145, + "step": 4826 + }, + { + "epoch": 1.1129813234955037, + "grad_norm": 1.4389159508234053, + "learning_rate": 8.964827176632339e-07, + "loss": 0.48674100637435913, + "step": 4827 + }, + { + "epoch": 1.1132118976250864, + "grad_norm": 1.7742534070601, + "learning_rate": 8.961035122186045e-07, + "loss": 0.49288761615753174, + "step": 4828 + }, + { + "epoch": 1.1134424717546691, + "grad_norm": 1.4156686622304593, + "learning_rate": 8.957243218763824e-07, + "loss": 0.42933952808380127, + "step": 4829 + }, + { + "epoch": 1.1136730458842519, + "grad_norm": 1.838762036908513, + "learning_rate": 8.953451466916866e-07, + "loss": 0.39244914054870605, + "step": 4830 + }, + { + "epoch": 1.1139036200138344, + "grad_norm": 1.3776049792093739, + "learning_rate": 8.949659867196348e-07, + "loss": 0.44688090682029724, + "step": 4831 + }, + { + "epoch": 1.114134194143417, + "grad_norm": 1.6923430022628052, + "learning_rate": 8.945868420153409e-07, + "loss": 0.5388743877410889, + "step": 4832 + }, + { + "epoch": 1.1143647682729998, + "grad_norm": 1.6108426528928312, + "learning_rate": 8.942077126339182e-07, + "loss": 0.4320666193962097, + "step": 4833 + }, + { + "epoch": 1.1145953424025825, + "grad_norm": 1.3700008221476991, + "learning_rate": 8.938285986304762e-07, + "loss": 0.37623411417007446, + "step": 4834 + }, + { + "epoch": 1.114825916532165, + "grad_norm": 1.4274453986312428, + "learning_rate": 8.93449500060124e-07, + "loss": 0.4743962287902832, + "step": 4835 + }, + { + "epoch": 1.1150564906617477, + "grad_norm": 1.4687481503878526, + "learning_rate": 8.930704169779663e-07, + "loss": 0.4833221435546875, + "step": 4836 + }, + { + "epoch": 1.1152870647913304, + "grad_norm": 1.580828459296504, + "learning_rate": 8.926913494391074e-07, + "loss": 0.48811084032058716, + "step": 4837 + }, + { + "epoch": 1.1155176389209132, + "grad_norm": 1.4663777441823886, + "learning_rate": 8.923122974986487e-07, + "loss": 0.42525774240493774, + "step": 4838 + }, + { + "epoch": 1.1157482130504957, + "grad_norm": 1.4773669175093567, + "learning_rate": 8.919332612116884e-07, + "loss": 0.4347909688949585, + "step": 4839 + }, + { + "epoch": 1.1159787871800784, + "grad_norm": 1.9619203877260345, + "learning_rate": 8.915542406333241e-07, + "loss": 0.5085601806640625, + "step": 4840 + }, + { + "epoch": 1.116209361309661, + "grad_norm": 1.4214902735687815, + "learning_rate": 8.911752358186497e-07, + "loss": 0.4620482325553894, + "step": 4841 + }, + { + "epoch": 1.1164399354392438, + "grad_norm": 1.3147570239530335, + "learning_rate": 8.907962468227582e-07, + "loss": 0.44923216104507446, + "step": 4842 + }, + { + "epoch": 1.1166705095688263, + "grad_norm": 1.6422580107908513, + "learning_rate": 8.904172737007386e-07, + "loss": 0.547439694404602, + "step": 4843 + }, + { + "epoch": 1.116901083698409, + "grad_norm": 1.7769022711207687, + "learning_rate": 8.900383165076789e-07, + "loss": 0.4609268307685852, + "step": 4844 + }, + { + "epoch": 1.1171316578279917, + "grad_norm": 1.4046866803141593, + "learning_rate": 8.896593752986642e-07, + "loss": 0.41780030727386475, + "step": 4845 + }, + { + "epoch": 1.1173622319575744, + "grad_norm": 1.3641825367692086, + "learning_rate": 8.89280450128778e-07, + "loss": 0.506212592124939, + "step": 4846 + }, + { + "epoch": 1.117592806087157, + "grad_norm": 1.4049897839890735, + "learning_rate": 8.889015410531001e-07, + "loss": 0.4436545968055725, + "step": 4847 + }, + { + "epoch": 1.1178233802167397, + "grad_norm": 1.3856199735325436, + "learning_rate": 8.885226481267093e-07, + "loss": 0.4473826289176941, + "step": 4848 + }, + { + "epoch": 1.1180539543463224, + "grad_norm": 1.42622736433257, + "learning_rate": 8.881437714046815e-07, + "loss": 0.43499836325645447, + "step": 4849 + }, + { + "epoch": 1.118284528475905, + "grad_norm": 1.5927469786677344, + "learning_rate": 8.877649109420899e-07, + "loss": 0.522705078125, + "step": 4850 + }, + { + "epoch": 1.1185151026054876, + "grad_norm": 1.5596781330511842, + "learning_rate": 8.873860667940064e-07, + "loss": 0.42146036028862, + "step": 4851 + }, + { + "epoch": 1.1187456767350703, + "grad_norm": 1.649425162171124, + "learning_rate": 8.870072390154989e-07, + "loss": 0.5875130891799927, + "step": 4852 + }, + { + "epoch": 1.118976250864653, + "grad_norm": 1.6372722830693418, + "learning_rate": 8.866284276616345e-07, + "loss": 0.5187985301017761, + "step": 4853 + }, + { + "epoch": 1.1192068249942357, + "grad_norm": 2.6266893474509474, + "learning_rate": 8.86249632787477e-07, + "loss": 0.46115952730178833, + "step": 4854 + }, + { + "epoch": 1.1194373991238182, + "grad_norm": 1.4714921061709185, + "learning_rate": 8.858708544480886e-07, + "loss": 0.4926493167877197, + "step": 4855 + }, + { + "epoch": 1.119667973253401, + "grad_norm": 1.5525331026142626, + "learning_rate": 8.854920926985278e-07, + "loss": 0.44512006640434265, + "step": 4856 + }, + { + "epoch": 1.1198985473829837, + "grad_norm": 1.5145408688074757, + "learning_rate": 8.85113347593852e-07, + "loss": 0.45973241329193115, + "step": 4857 + }, + { + "epoch": 1.1201291215125664, + "grad_norm": 1.5400172209521554, + "learning_rate": 8.847346191891157e-07, + "loss": 0.4915385842323303, + "step": 4858 + }, + { + "epoch": 1.1203596956421489, + "grad_norm": 1.4900152202768027, + "learning_rate": 8.843559075393701e-07, + "loss": 0.4457864463329315, + "step": 4859 + }, + { + "epoch": 1.1205902697717316, + "grad_norm": 1.3414730221020197, + "learning_rate": 8.839772126996658e-07, + "loss": 0.4782453775405884, + "step": 4860 + }, + { + "epoch": 1.1208208439013143, + "grad_norm": 1.3591384899787133, + "learning_rate": 8.835985347250492e-07, + "loss": 0.42789584398269653, + "step": 4861 + }, + { + "epoch": 1.121051418030897, + "grad_norm": 1.8532602863182117, + "learning_rate": 8.832198736705657e-07, + "loss": 0.49990910291671753, + "step": 4862 + }, + { + "epoch": 1.1212819921604795, + "grad_norm": 1.4158258863269764, + "learning_rate": 8.828412295912566e-07, + "loss": 0.3735005855560303, + "step": 4863 + }, + { + "epoch": 1.1215125662900622, + "grad_norm": 1.3744374187815367, + "learning_rate": 8.824626025421624e-07, + "loss": 0.402673602104187, + "step": 4864 + }, + { + "epoch": 1.121743140419645, + "grad_norm": 1.57241412674585, + "learning_rate": 8.820839925783198e-07, + "loss": 0.4675491452217102, + "step": 4865 + }, + { + "epoch": 1.1219737145492277, + "grad_norm": 2.0200104658377254, + "learning_rate": 8.817053997547645e-07, + "loss": 0.5098662376403809, + "step": 4866 + }, + { + "epoch": 1.1222042886788102, + "grad_norm": 1.3880207155981488, + "learning_rate": 8.813268241265278e-07, + "loss": 0.44478029012680054, + "step": 4867 + }, + { + "epoch": 1.1224348628083929, + "grad_norm": 1.4983402004688406, + "learning_rate": 8.809482657486401e-07, + "loss": 0.410754919052124, + "step": 4868 + }, + { + "epoch": 1.1226654369379756, + "grad_norm": 1.193726420763111, + "learning_rate": 8.805697246761288e-07, + "loss": 0.4198191165924072, + "step": 4869 + }, + { + "epoch": 1.1228960110675583, + "grad_norm": 1.6015778378598091, + "learning_rate": 8.801912009640178e-07, + "loss": 0.5399911403656006, + "step": 4870 + }, + { + "epoch": 1.1231265851971408, + "grad_norm": 1.3209581029003303, + "learning_rate": 8.798126946673305e-07, + "loss": 0.3879680633544922, + "step": 4871 + }, + { + "epoch": 1.1233571593267235, + "grad_norm": 1.7893299917127135, + "learning_rate": 8.794342058410856e-07, + "loss": 0.4629073739051819, + "step": 4872 + }, + { + "epoch": 1.1235877334563062, + "grad_norm": 1.25180398717926, + "learning_rate": 8.790557345403013e-07, + "loss": 0.42299884557724, + "step": 4873 + }, + { + "epoch": 1.123818307585889, + "grad_norm": 1.5467146262725529, + "learning_rate": 8.786772808199912e-07, + "loss": 0.509437620639801, + "step": 4874 + }, + { + "epoch": 1.1240488817154715, + "grad_norm": 1.3436359029840506, + "learning_rate": 8.782988447351684e-07, + "loss": 0.4682687222957611, + "step": 4875 + }, + { + "epoch": 1.1242794558450542, + "grad_norm": 1.2884743737928093, + "learning_rate": 8.779204263408416e-07, + "loss": 0.41155606508255005, + "step": 4876 + }, + { + "epoch": 1.124510029974637, + "grad_norm": 1.6449136860944156, + "learning_rate": 8.775420256920182e-07, + "loss": 0.4705810844898224, + "step": 4877 + }, + { + "epoch": 1.1247406041042196, + "grad_norm": 1.4648471947605348, + "learning_rate": 8.771636428437022e-07, + "loss": 0.36571264266967773, + "step": 4878 + }, + { + "epoch": 1.124971178233802, + "grad_norm": 1.1768139651906544, + "learning_rate": 8.76785277850896e-07, + "loss": 0.36618396639823914, + "step": 4879 + }, + { + "epoch": 1.1252017523633848, + "grad_norm": 1.5334328638730685, + "learning_rate": 8.764069307685983e-07, + "loss": 0.4861210584640503, + "step": 4880 + }, + { + "epoch": 1.1254323264929675, + "grad_norm": 1.457839206264918, + "learning_rate": 8.760286016518056e-07, + "loss": 0.43346846103668213, + "step": 4881 + }, + { + "epoch": 1.12566290062255, + "grad_norm": 1.28421921022301, + "learning_rate": 8.756502905555123e-07, + "loss": 0.40088707208633423, + "step": 4882 + }, + { + "epoch": 1.1258934747521328, + "grad_norm": 1.4643062187844458, + "learning_rate": 8.752719975347092e-07, + "loss": 0.4088619649410248, + "step": 4883 + }, + { + "epoch": 1.1261240488817155, + "grad_norm": 1.5527291710325282, + "learning_rate": 8.748937226443857e-07, + "loss": 0.4988909661769867, + "step": 4884 + }, + { + "epoch": 1.1263546230112982, + "grad_norm": 1.5377239167998313, + "learning_rate": 8.745154659395271e-07, + "loss": 0.47022196650505066, + "step": 4885 + }, + { + "epoch": 1.126585197140881, + "grad_norm": 1.3259626220698026, + "learning_rate": 8.741372274751178e-07, + "loss": 0.45005398988723755, + "step": 4886 + }, + { + "epoch": 1.1268157712704634, + "grad_norm": 1.5001674672720546, + "learning_rate": 8.737590073061376e-07, + "loss": 0.4632537364959717, + "step": 4887 + }, + { + "epoch": 1.1270463454000461, + "grad_norm": 1.2983235840008036, + "learning_rate": 8.733808054875653e-07, + "loss": 0.41034963726997375, + "step": 4888 + }, + { + "epoch": 1.1272769195296288, + "grad_norm": 1.423352740140202, + "learning_rate": 8.730026220743765e-07, + "loss": 0.5169668793678284, + "step": 4889 + }, + { + "epoch": 1.1275074936592113, + "grad_norm": 1.46630659535839, + "learning_rate": 8.726244571215431e-07, + "loss": 0.44972485303878784, + "step": 4890 + }, + { + "epoch": 1.127738067788794, + "grad_norm": 1.5712937661942725, + "learning_rate": 8.722463106840361e-07, + "loss": 0.4854368567466736, + "step": 4891 + }, + { + "epoch": 1.1279686419183768, + "grad_norm": 1.0525840961962005, + "learning_rate": 8.718681828168223e-07, + "loss": 0.3029147982597351, + "step": 4892 + }, + { + "epoch": 1.1281992160479595, + "grad_norm": 1.5856241308624208, + "learning_rate": 8.714900735748671e-07, + "loss": 0.4770504832267761, + "step": 4893 + }, + { + "epoch": 1.1284297901775422, + "grad_norm": 1.3799690323722245, + "learning_rate": 8.711119830131317e-07, + "loss": 0.48508110642433167, + "step": 4894 + }, + { + "epoch": 1.1286603643071247, + "grad_norm": 1.4227656672873528, + "learning_rate": 8.707339111865761e-07, + "loss": 0.43302488327026367, + "step": 4895 + }, + { + "epoch": 1.1288909384367074, + "grad_norm": 1.3481652076868464, + "learning_rate": 8.703558581501563e-07, + "loss": 0.5720575451850891, + "step": 4896 + }, + { + "epoch": 1.1291215125662901, + "grad_norm": 1.1736572520471924, + "learning_rate": 8.69977823958827e-07, + "loss": 0.48236098885536194, + "step": 4897 + }, + { + "epoch": 1.1293520866958726, + "grad_norm": 1.6539784416028527, + "learning_rate": 8.69599808667538e-07, + "loss": 0.48531901836395264, + "step": 4898 + }, + { + "epoch": 1.1295826608254553, + "grad_norm": 1.390226643422974, + "learning_rate": 8.69221812331239e-07, + "loss": 0.4150174856185913, + "step": 4899 + }, + { + "epoch": 1.129813234955038, + "grad_norm": 1.4594360531114157, + "learning_rate": 8.688438350048748e-07, + "loss": 0.4729560911655426, + "step": 4900 + }, + { + "epoch": 1.1300438090846208, + "grad_norm": 1.5805161631694824, + "learning_rate": 8.684658767433881e-07, + "loss": 0.5081748962402344, + "step": 4901 + }, + { + "epoch": 1.1302743832142035, + "grad_norm": 1.3577399194161552, + "learning_rate": 8.680879376017197e-07, + "loss": 0.4552333354949951, + "step": 4902 + }, + { + "epoch": 1.130504957343786, + "grad_norm": 1.666206186626053, + "learning_rate": 8.67710017634806e-07, + "loss": 0.4784387946128845, + "step": 4903 + }, + { + "epoch": 1.1307355314733687, + "grad_norm": 1.7781011363806714, + "learning_rate": 8.673321168975823e-07, + "loss": 0.46922338008880615, + "step": 4904 + }, + { + "epoch": 1.1309661056029514, + "grad_norm": 1.414520843561148, + "learning_rate": 8.669542354449797e-07, + "loss": 0.38181525468826294, + "step": 4905 + }, + { + "epoch": 1.131196679732534, + "grad_norm": 1.409807627526861, + "learning_rate": 8.665763733319278e-07, + "loss": 0.4729689359664917, + "step": 4906 + }, + { + "epoch": 1.1314272538621166, + "grad_norm": 1.3128859029806206, + "learning_rate": 8.661985306133517e-07, + "loss": 0.3934294581413269, + "step": 4907 + }, + { + "epoch": 1.1316578279916993, + "grad_norm": 1.1525332387894895, + "learning_rate": 8.658207073441754e-07, + "loss": 0.40270352363586426, + "step": 4908 + }, + { + "epoch": 1.131888402121282, + "grad_norm": 1.245477282269021, + "learning_rate": 8.654429035793196e-07, + "loss": 0.43291163444519043, + "step": 4909 + }, + { + "epoch": 1.1321189762508648, + "grad_norm": 1.8011937733870678, + "learning_rate": 8.650651193737009e-07, + "loss": 0.5054877996444702, + "step": 4910 + }, + { + "epoch": 1.1323495503804473, + "grad_norm": 1.4188548576207016, + "learning_rate": 8.646873547822347e-07, + "loss": 0.5043776035308838, + "step": 4911 + }, + { + "epoch": 1.13258012451003, + "grad_norm": 1.511127988179462, + "learning_rate": 8.643096098598328e-07, + "loss": 0.4246225953102112, + "step": 4912 + }, + { + "epoch": 1.1328106986396127, + "grad_norm": 1.3198976342579845, + "learning_rate": 8.639318846614048e-07, + "loss": 0.4514849781990051, + "step": 4913 + }, + { + "epoch": 1.1330412727691952, + "grad_norm": 1.5409054507370947, + "learning_rate": 8.635541792418557e-07, + "loss": 0.4780477285385132, + "step": 4914 + }, + { + "epoch": 1.133271846898778, + "grad_norm": 1.4447509965410514, + "learning_rate": 8.631764936560899e-07, + "loss": 0.47164270281791687, + "step": 4915 + }, + { + "epoch": 1.1335024210283606, + "grad_norm": 1.4642572467177732, + "learning_rate": 8.62798827959007e-07, + "loss": 0.5462276339530945, + "step": 4916 + }, + { + "epoch": 1.1337329951579433, + "grad_norm": 1.3611348332418316, + "learning_rate": 8.624211822055055e-07, + "loss": 0.37229591608047485, + "step": 4917 + }, + { + "epoch": 1.133963569287526, + "grad_norm": 1.6004056206114348, + "learning_rate": 8.620435564504791e-07, + "loss": 0.46595901250839233, + "step": 4918 + }, + { + "epoch": 1.1341941434171086, + "grad_norm": 1.899603419019246, + "learning_rate": 8.616659507488201e-07, + "loss": 0.4645708203315735, + "step": 4919 + }, + { + "epoch": 1.1344247175466913, + "grad_norm": 1.3014565799840314, + "learning_rate": 8.612883651554173e-07, + "loss": 0.4309888482093811, + "step": 4920 + }, + { + "epoch": 1.134655291676274, + "grad_norm": 1.2254662174184374, + "learning_rate": 8.60910799725156e-07, + "loss": 0.4000548720359802, + "step": 4921 + }, + { + "epoch": 1.1348858658058565, + "grad_norm": 1.2990272231335294, + "learning_rate": 8.6053325451292e-07, + "loss": 0.41321274638175964, + "step": 4922 + }, + { + "epoch": 1.1351164399354392, + "grad_norm": 1.7479036509525407, + "learning_rate": 8.601557295735884e-07, + "loss": 0.38982951641082764, + "step": 4923 + }, + { + "epoch": 1.135347014065022, + "grad_norm": 1.3265126570648142, + "learning_rate": 8.597782249620394e-07, + "loss": 0.44623300433158875, + "step": 4924 + }, + { + "epoch": 1.1355775881946046, + "grad_norm": 1.6004563551212632, + "learning_rate": 8.594007407331458e-07, + "loss": 0.46876993775367737, + "step": 4925 + }, + { + "epoch": 1.1358081623241871, + "grad_norm": 1.4785026933128127, + "learning_rate": 8.590232769417803e-07, + "loss": 0.41345149278640747, + "step": 4926 + }, + { + "epoch": 1.1360387364537698, + "grad_norm": 1.6712340860086734, + "learning_rate": 8.586458336428095e-07, + "loss": 0.4199402332305908, + "step": 4927 + }, + { + "epoch": 1.1362693105833526, + "grad_norm": 1.5807454346525946, + "learning_rate": 8.582684108910998e-07, + "loss": 0.4424753785133362, + "step": 4928 + }, + { + "epoch": 1.1364998847129353, + "grad_norm": 1.5318763722061228, + "learning_rate": 8.57891008741513e-07, + "loss": 0.5066598057746887, + "step": 4929 + }, + { + "epoch": 1.1367304588425178, + "grad_norm": 1.409045447069904, + "learning_rate": 8.575136272489081e-07, + "loss": 0.45959407091140747, + "step": 4930 + }, + { + "epoch": 1.1369610329721005, + "grad_norm": 1.191773933725539, + "learning_rate": 8.571362664681415e-07, + "loss": 0.4579051733016968, + "step": 4931 + }, + { + "epoch": 1.1371916071016832, + "grad_norm": 1.4061203144708347, + "learning_rate": 8.567589264540665e-07, + "loss": 0.5125559568405151, + "step": 4932 + }, + { + "epoch": 1.137422181231266, + "grad_norm": 1.484125992313306, + "learning_rate": 8.563816072615335e-07, + "loss": 0.4236595630645752, + "step": 4933 + }, + { + "epoch": 1.1376527553608484, + "grad_norm": 1.3909472723060943, + "learning_rate": 8.56004308945389e-07, + "loss": 0.40187013149261475, + "step": 4934 + }, + { + "epoch": 1.1378833294904311, + "grad_norm": 1.7306785223672838, + "learning_rate": 8.556270315604778e-07, + "loss": 0.5069487690925598, + "step": 4935 + }, + { + "epoch": 1.1381139036200139, + "grad_norm": 1.2666499948179348, + "learning_rate": 8.552497751616406e-07, + "loss": 0.4032680094242096, + "step": 4936 + }, + { + "epoch": 1.1383444777495966, + "grad_norm": 1.5147949059405765, + "learning_rate": 8.548725398037158e-07, + "loss": 0.4745323061943054, + "step": 4937 + }, + { + "epoch": 1.138575051879179, + "grad_norm": 1.6025857024716508, + "learning_rate": 8.544953255415379e-07, + "loss": 0.5203470587730408, + "step": 4938 + }, + { + "epoch": 1.1388056260087618, + "grad_norm": 1.3018365690111693, + "learning_rate": 8.541181324299392e-07, + "loss": 0.3780457079410553, + "step": 4939 + }, + { + "epoch": 1.1390362001383445, + "grad_norm": 1.4908739703097478, + "learning_rate": 8.537409605237486e-07, + "loss": 0.4544069766998291, + "step": 4940 + }, + { + "epoch": 1.1392667742679272, + "grad_norm": 1.3726631913286653, + "learning_rate": 8.533638098777914e-07, + "loss": 0.3692469000816345, + "step": 4941 + }, + { + "epoch": 1.1394973483975097, + "grad_norm": 1.7461198015621147, + "learning_rate": 8.529866805468907e-07, + "loss": 0.4733508825302124, + "step": 4942 + }, + { + "epoch": 1.1397279225270924, + "grad_norm": 1.7055847796006547, + "learning_rate": 8.526095725858658e-07, + "loss": 0.5165152549743652, + "step": 4943 + }, + { + "epoch": 1.1399584966566751, + "grad_norm": 1.5781652989183093, + "learning_rate": 8.522324860495336e-07, + "loss": 0.40220290422439575, + "step": 4944 + }, + { + "epoch": 1.1401890707862579, + "grad_norm": 1.676524129553008, + "learning_rate": 8.518554209927066e-07, + "loss": 0.511976957321167, + "step": 4945 + }, + { + "epoch": 1.1404196449158404, + "grad_norm": 1.4578766238891505, + "learning_rate": 8.514783774701959e-07, + "loss": 0.4472247362136841, + "step": 4946 + }, + { + "epoch": 1.140650219045423, + "grad_norm": 1.3731717985494665, + "learning_rate": 8.51101355536808e-07, + "loss": 0.4368797242641449, + "step": 4947 + }, + { + "epoch": 1.1408807931750058, + "grad_norm": 1.3383514367818596, + "learning_rate": 8.507243552473476e-07, + "loss": 0.3794320225715637, + "step": 4948 + }, + { + "epoch": 1.1411113673045885, + "grad_norm": 1.7604514892248042, + "learning_rate": 8.50347376656615e-07, + "loss": 0.5229817628860474, + "step": 4949 + }, + { + "epoch": 1.141341941434171, + "grad_norm": 1.4803188344976619, + "learning_rate": 8.499704198194075e-07, + "loss": 0.4771326780319214, + "step": 4950 + }, + { + "epoch": 1.1415725155637537, + "grad_norm": 1.406078110966921, + "learning_rate": 8.495934847905201e-07, + "loss": 0.45151978731155396, + "step": 4951 + }, + { + "epoch": 1.1418030896933364, + "grad_norm": 1.3579359781108167, + "learning_rate": 8.492165716247439e-07, + "loss": 0.3963208496570587, + "step": 4952 + }, + { + "epoch": 1.1420336638229192, + "grad_norm": 1.2797227148111936, + "learning_rate": 8.488396803768675e-07, + "loss": 0.37465882301330566, + "step": 4953 + }, + { + "epoch": 1.1422642379525016, + "grad_norm": 1.7257432451816517, + "learning_rate": 8.484628111016752e-07, + "loss": 0.437372088432312, + "step": 4954 + }, + { + "epoch": 1.1424948120820844, + "grad_norm": 1.3198726990576308, + "learning_rate": 8.480859638539492e-07, + "loss": 0.40495651960372925, + "step": 4955 + }, + { + "epoch": 1.142725386211667, + "grad_norm": 1.5937176142563847, + "learning_rate": 8.477091386884677e-07, + "loss": 0.5346927642822266, + "step": 4956 + }, + { + "epoch": 1.1429559603412498, + "grad_norm": 1.7035083737998966, + "learning_rate": 8.473323356600068e-07, + "loss": 0.42448925971984863, + "step": 4957 + }, + { + "epoch": 1.1431865344708323, + "grad_norm": 1.4329878189218077, + "learning_rate": 8.469555548233378e-07, + "loss": 0.4715193808078766, + "step": 4958 + }, + { + "epoch": 1.143417108600415, + "grad_norm": 1.5249370547485697, + "learning_rate": 8.465787962332301e-07, + "loss": 0.4721440076828003, + "step": 4959 + }, + { + "epoch": 1.1436476827299977, + "grad_norm": 1.4963659204960478, + "learning_rate": 8.462020599444495e-07, + "loss": 0.5478333234786987, + "step": 4960 + }, + { + "epoch": 1.1438782568595804, + "grad_norm": 1.5534391969085817, + "learning_rate": 8.458253460117577e-07, + "loss": 0.4005582928657532, + "step": 4961 + }, + { + "epoch": 1.144108830989163, + "grad_norm": 1.4816205297794078, + "learning_rate": 8.454486544899146e-07, + "loss": 0.43886178731918335, + "step": 4962 + }, + { + "epoch": 1.1443394051187457, + "grad_norm": 1.2296294541393762, + "learning_rate": 8.450719854336758e-07, + "loss": 0.4404095709323883, + "step": 4963 + }, + { + "epoch": 1.1445699792483284, + "grad_norm": 1.5412493838775327, + "learning_rate": 8.446953388977943e-07, + "loss": 0.5386335849761963, + "step": 4964 + }, + { + "epoch": 1.144800553377911, + "grad_norm": 1.5969922474986569, + "learning_rate": 8.44318714937019e-07, + "loss": 0.4576258659362793, + "step": 4965 + }, + { + "epoch": 1.1450311275074936, + "grad_norm": 1.2968718824878773, + "learning_rate": 8.439421136060964e-07, + "loss": 0.4619024991989136, + "step": 4966 + }, + { + "epoch": 1.1452617016370763, + "grad_norm": 1.4106895392209726, + "learning_rate": 8.435655349597689e-07, + "loss": 0.4071081876754761, + "step": 4967 + }, + { + "epoch": 1.145492275766659, + "grad_norm": 1.3534750631649812, + "learning_rate": 8.431889790527769e-07, + "loss": 0.4605948328971863, + "step": 4968 + }, + { + "epoch": 1.1457228498962417, + "grad_norm": 1.4715761177473734, + "learning_rate": 8.428124459398554e-07, + "loss": 0.46706438064575195, + "step": 4969 + }, + { + "epoch": 1.1459534240258242, + "grad_norm": 1.480784825415981, + "learning_rate": 8.424359356757383e-07, + "loss": 0.39674657583236694, + "step": 4970 + }, + { + "epoch": 1.146183998155407, + "grad_norm": 1.4606371633345823, + "learning_rate": 8.42059448315155e-07, + "loss": 0.4421246647834778, + "step": 4971 + }, + { + "epoch": 1.1464145722849897, + "grad_norm": 1.6921922922853865, + "learning_rate": 8.416829839128312e-07, + "loss": 0.5220682621002197, + "step": 4972 + }, + { + "epoch": 1.1466451464145724, + "grad_norm": 1.338254387958773, + "learning_rate": 8.413065425234904e-07, + "loss": 0.40189129114151, + "step": 4973 + }, + { + "epoch": 1.1468757205441549, + "grad_norm": 1.3011913252808138, + "learning_rate": 8.409301242018517e-07, + "loss": 0.448421835899353, + "step": 4974 + }, + { + "epoch": 1.1471062946737376, + "grad_norm": 1.5996651322296722, + "learning_rate": 8.405537290026318e-07, + "loss": 0.49476757645606995, + "step": 4975 + }, + { + "epoch": 1.1473368688033203, + "grad_norm": 1.4573872381246367, + "learning_rate": 8.401773569805431e-07, + "loss": 0.3888528347015381, + "step": 4976 + }, + { + "epoch": 1.1475674429329028, + "grad_norm": 1.4760563096119323, + "learning_rate": 8.398010081902956e-07, + "loss": 0.49057653546333313, + "step": 4977 + }, + { + "epoch": 1.1477980170624855, + "grad_norm": 1.3851559333900214, + "learning_rate": 8.39424682686595e-07, + "loss": 0.41700610518455505, + "step": 4978 + }, + { + "epoch": 1.1480285911920682, + "grad_norm": 1.5382531029836037, + "learning_rate": 8.390483805241441e-07, + "loss": 0.4801902770996094, + "step": 4979 + }, + { + "epoch": 1.148259165321651, + "grad_norm": 1.5691797878096674, + "learning_rate": 8.386721017576426e-07, + "loss": 0.5438926219940186, + "step": 4980 + }, + { + "epoch": 1.1484897394512337, + "grad_norm": 1.3886510011393631, + "learning_rate": 8.382958464417857e-07, + "loss": 0.3991735577583313, + "step": 4981 + }, + { + "epoch": 1.1487203135808162, + "grad_norm": 1.5064271527131172, + "learning_rate": 8.379196146312664e-07, + "loss": 0.4918370246887207, + "step": 4982 + }, + { + "epoch": 1.1489508877103989, + "grad_norm": 1.713149481922198, + "learning_rate": 8.375434063807737e-07, + "loss": 0.5280467867851257, + "step": 4983 + }, + { + "epoch": 1.1491814618399816, + "grad_norm": 1.2990876069782782, + "learning_rate": 8.371672217449936e-07, + "loss": 0.4186179041862488, + "step": 4984 + }, + { + "epoch": 1.149412035969564, + "grad_norm": 1.3742464834005608, + "learning_rate": 8.367910607786079e-07, + "loss": 0.3698224723339081, + "step": 4985 + }, + { + "epoch": 1.1496426100991468, + "grad_norm": 1.4766762383505605, + "learning_rate": 8.364149235362956e-07, + "loss": 0.45402267575263977, + "step": 4986 + }, + { + "epoch": 1.1498731842287295, + "grad_norm": 1.530758978566143, + "learning_rate": 8.36038810072732e-07, + "loss": 0.5145484209060669, + "step": 4987 + }, + { + "epoch": 1.1501037583583122, + "grad_norm": 1.2257671687651395, + "learning_rate": 8.356627204425893e-07, + "loss": 0.4293951392173767, + "step": 4988 + }, + { + "epoch": 1.150334332487895, + "grad_norm": 1.5415847348488914, + "learning_rate": 8.352866547005354e-07, + "loss": 0.3916272521018982, + "step": 4989 + }, + { + "epoch": 1.1505649066174775, + "grad_norm": 1.6777087516004896, + "learning_rate": 8.349106129012357e-07, + "loss": 0.40171611309051514, + "step": 4990 + }, + { + "epoch": 1.1507954807470602, + "grad_norm": 1.5767244212385862, + "learning_rate": 8.345345950993518e-07, + "loss": 0.49580252170562744, + "step": 4991 + }, + { + "epoch": 1.151026054876643, + "grad_norm": 1.491822308561489, + "learning_rate": 8.34158601349541e-07, + "loss": 0.4521256685256958, + "step": 4992 + }, + { + "epoch": 1.1512566290062254, + "grad_norm": 1.5317445246777317, + "learning_rate": 8.337826317064585e-07, + "loss": 0.3920813798904419, + "step": 4993 + }, + { + "epoch": 1.151487203135808, + "grad_norm": 1.4336055128806646, + "learning_rate": 8.334066862247547e-07, + "loss": 0.4263145923614502, + "step": 4994 + }, + { + "epoch": 1.1517177772653908, + "grad_norm": 1.513949850078891, + "learning_rate": 8.330307649590779e-07, + "loss": 0.4746140241622925, + "step": 4995 + }, + { + "epoch": 1.1519483513949735, + "grad_norm": 1.6708741885004843, + "learning_rate": 8.326548679640713e-07, + "loss": 0.37520158290863037, + "step": 4996 + }, + { + "epoch": 1.1521789255245563, + "grad_norm": 1.4060610690176367, + "learning_rate": 8.322789952943759e-07, + "loss": 0.4481951892375946, + "step": 4997 + }, + { + "epoch": 1.1524094996541387, + "grad_norm": 1.4336851088246751, + "learning_rate": 8.319031470046281e-07, + "loss": 0.40319859981536865, + "step": 4998 + }, + { + "epoch": 1.1526400737837215, + "grad_norm": 1.805948160607668, + "learning_rate": 8.315273231494615e-07, + "loss": 0.47720152139663696, + "step": 4999 + }, + { + "epoch": 1.1528706479133042, + "grad_norm": 1.2994404231083814, + "learning_rate": 8.311515237835063e-07, + "loss": 0.4027557969093323, + "step": 5000 + }, + { + "epoch": 1.1531012220428867, + "grad_norm": 1.5346692874582604, + "learning_rate": 8.307757489613878e-07, + "loss": 0.3939552307128906, + "step": 5001 + }, + { + "epoch": 1.1533317961724694, + "grad_norm": 1.541801101637957, + "learning_rate": 8.303999987377295e-07, + "loss": 0.379425585269928, + "step": 5002 + }, + { + "epoch": 1.153562370302052, + "grad_norm": 1.3222707927494204, + "learning_rate": 8.300242731671499e-07, + "loss": 0.46231499314308167, + "step": 5003 + }, + { + "epoch": 1.1537929444316348, + "grad_norm": 1.5623820882279815, + "learning_rate": 8.296485723042654e-07, + "loss": 0.4639621675014496, + "step": 5004 + }, + { + "epoch": 1.1540235185612175, + "grad_norm": 1.4577901713449948, + "learning_rate": 8.29272896203687e-07, + "loss": 0.49264025688171387, + "step": 5005 + }, + { + "epoch": 1.1542540926908, + "grad_norm": 1.2796677798690286, + "learning_rate": 8.288972449200233e-07, + "loss": 0.4145156145095825, + "step": 5006 + }, + { + "epoch": 1.1544846668203828, + "grad_norm": 1.3338594060824709, + "learning_rate": 8.285216185078792e-07, + "loss": 0.39693811535835266, + "step": 5007 + }, + { + "epoch": 1.1547152409499655, + "grad_norm": 1.356694069152444, + "learning_rate": 8.281460170218561e-07, + "loss": 0.46224820613861084, + "step": 5008 + }, + { + "epoch": 1.154945815079548, + "grad_norm": 1.5380330607680774, + "learning_rate": 8.277704405165506e-07, + "loss": 0.48868128657341003, + "step": 5009 + }, + { + "epoch": 1.1551763892091307, + "grad_norm": 1.4024811483349113, + "learning_rate": 8.273948890465574e-07, + "loss": 0.5127776265144348, + "step": 5010 + }, + { + "epoch": 1.1554069633387134, + "grad_norm": 1.4092381840768406, + "learning_rate": 8.270193626664665e-07, + "loss": 0.4039389491081238, + "step": 5011 + }, + { + "epoch": 1.1556375374682961, + "grad_norm": 1.5807780806971976, + "learning_rate": 8.266438614308641e-07, + "loss": 0.4224502444267273, + "step": 5012 + }, + { + "epoch": 1.1558681115978788, + "grad_norm": 1.42726619115002, + "learning_rate": 8.262683853943335e-07, + "loss": 0.4392918050289154, + "step": 5013 + }, + { + "epoch": 1.1560986857274613, + "grad_norm": 1.5001771531608157, + "learning_rate": 8.258929346114534e-07, + "loss": 0.5055289268493652, + "step": 5014 + }, + { + "epoch": 1.156329259857044, + "grad_norm": 1.3839083181087675, + "learning_rate": 8.255175091368003e-07, + "loss": 0.43851351737976074, + "step": 5015 + }, + { + "epoch": 1.1565598339866268, + "grad_norm": 1.576893376736649, + "learning_rate": 8.251421090249451e-07, + "loss": 0.4557814598083496, + "step": 5016 + }, + { + "epoch": 1.1567904081162093, + "grad_norm": 1.2994912796642604, + "learning_rate": 8.247667343304568e-07, + "loss": 0.4288882613182068, + "step": 5017 + }, + { + "epoch": 1.157020982245792, + "grad_norm": 1.4237104241903844, + "learning_rate": 8.243913851078994e-07, + "loss": 0.42711886763572693, + "step": 5018 + }, + { + "epoch": 1.1572515563753747, + "grad_norm": 1.8597293679946851, + "learning_rate": 8.240160614118342e-07, + "loss": 0.515809953212738, + "step": 5019 + }, + { + "epoch": 1.1574821305049574, + "grad_norm": 1.828777504717114, + "learning_rate": 8.236407632968182e-07, + "loss": 0.5754632949829102, + "step": 5020 + }, + { + "epoch": 1.1577127046345401, + "grad_norm": 1.553176542229762, + "learning_rate": 8.232654908174038e-07, + "loss": 0.4601830244064331, + "step": 5021 + }, + { + "epoch": 1.1579432787641226, + "grad_norm": 1.500802040492981, + "learning_rate": 8.228902440281422e-07, + "loss": 0.4740797281265259, + "step": 5022 + }, + { + "epoch": 1.1581738528937053, + "grad_norm": 1.688304974088827, + "learning_rate": 8.225150229835781e-07, + "loss": 0.4066367745399475, + "step": 5023 + }, + { + "epoch": 1.158404427023288, + "grad_norm": 1.357187761009418, + "learning_rate": 8.221398277382546e-07, + "loss": 0.4664362668991089, + "step": 5024 + }, + { + "epoch": 1.1586350011528705, + "grad_norm": 1.3912425171719864, + "learning_rate": 8.217646583467093e-07, + "loss": 0.5204637050628662, + "step": 5025 + }, + { + "epoch": 1.1588655752824533, + "grad_norm": 1.4227227145637968, + "learning_rate": 8.213895148634775e-07, + "loss": 0.4991419017314911, + "step": 5026 + }, + { + "epoch": 1.159096149412036, + "grad_norm": 1.2844880437163813, + "learning_rate": 8.210143973430896e-07, + "loss": 0.40420424938201904, + "step": 5027 + }, + { + "epoch": 1.1593267235416187, + "grad_norm": 1.4946107412544847, + "learning_rate": 8.206393058400736e-07, + "loss": 0.523331880569458, + "step": 5028 + }, + { + "epoch": 1.1595572976712014, + "grad_norm": 1.4908780499938201, + "learning_rate": 8.202642404089516e-07, + "loss": 0.5019216537475586, + "step": 5029 + }, + { + "epoch": 1.159787871800784, + "grad_norm": 1.6451488656605473, + "learning_rate": 8.198892011042442e-07, + "loss": 0.522672712802887, + "step": 5030 + }, + { + "epoch": 1.1600184459303666, + "grad_norm": 1.505727418733034, + "learning_rate": 8.195141879804668e-07, + "loss": 0.418377548456192, + "step": 5031 + }, + { + "epoch": 1.1602490200599493, + "grad_norm": 1.5635210393713965, + "learning_rate": 8.191392010921312e-07, + "loss": 0.4914432764053345, + "step": 5032 + }, + { + "epoch": 1.1604795941895318, + "grad_norm": 1.3929576184838368, + "learning_rate": 8.187642404937459e-07, + "loss": 0.42149683833122253, + "step": 5033 + }, + { + "epoch": 1.1607101683191146, + "grad_norm": 1.6811040317548793, + "learning_rate": 8.183893062398145e-07, + "loss": 0.5637058019638062, + "step": 5034 + }, + { + "epoch": 1.1609407424486973, + "grad_norm": 1.2252559322458123, + "learning_rate": 8.180143983848387e-07, + "loss": 0.49930211901664734, + "step": 5035 + }, + { + "epoch": 1.16117131657828, + "grad_norm": 1.626369547940987, + "learning_rate": 8.176395169833139e-07, + "loss": 0.4217071235179901, + "step": 5036 + }, + { + "epoch": 1.1614018907078625, + "grad_norm": 1.9654976691842632, + "learning_rate": 8.172646620897336e-07, + "loss": 0.4208733141422272, + "step": 5037 + }, + { + "epoch": 1.1616324648374452, + "grad_norm": 1.434216808832, + "learning_rate": 8.168898337585866e-07, + "loss": 0.42970529198646545, + "step": 5038 + }, + { + "epoch": 1.161863038967028, + "grad_norm": 1.429859410744686, + "learning_rate": 8.165150320443584e-07, + "loss": 0.49482622742652893, + "step": 5039 + }, + { + "epoch": 1.1620936130966106, + "grad_norm": 1.2888747437309156, + "learning_rate": 8.161402570015297e-07, + "loss": 0.4106384217739105, + "step": 5040 + }, + { + "epoch": 1.1623241872261931, + "grad_norm": 1.8632515092828725, + "learning_rate": 8.157655086845778e-07, + "loss": 0.4550397992134094, + "step": 5041 + }, + { + "epoch": 1.1625547613557758, + "grad_norm": 1.4636128502892785, + "learning_rate": 8.153907871479768e-07, + "loss": 0.5144504308700562, + "step": 5042 + }, + { + "epoch": 1.1627853354853586, + "grad_norm": 1.4308354935014596, + "learning_rate": 8.150160924461953e-07, + "loss": 0.3970009684562683, + "step": 5043 + }, + { + "epoch": 1.1630159096149413, + "grad_norm": 1.4674063038688332, + "learning_rate": 8.146414246336998e-07, + "loss": 0.45825856924057007, + "step": 5044 + }, + { + "epoch": 1.1632464837445238, + "grad_norm": 1.6850972190756333, + "learning_rate": 8.142667837649515e-07, + "loss": 0.4515247344970703, + "step": 5045 + }, + { + "epoch": 1.1634770578741065, + "grad_norm": 1.347770803032681, + "learning_rate": 8.13892169894409e-07, + "loss": 0.41265833377838135, + "step": 5046 + }, + { + "epoch": 1.1637076320036892, + "grad_norm": 1.4117996459358377, + "learning_rate": 8.135175830765254e-07, + "loss": 0.39820557832717896, + "step": 5047 + }, + { + "epoch": 1.163938206133272, + "grad_norm": 1.4272016239744356, + "learning_rate": 8.131430233657514e-07, + "loss": 0.41528987884521484, + "step": 5048 + }, + { + "epoch": 1.1641687802628544, + "grad_norm": 1.3404996701874776, + "learning_rate": 8.127684908165323e-07, + "loss": 0.4453636407852173, + "step": 5049 + }, + { + "epoch": 1.1643993543924371, + "grad_norm": 1.846029547761043, + "learning_rate": 8.123939854833107e-07, + "loss": 0.45008519291877747, + "step": 5050 + }, + { + "epoch": 1.1646299285220199, + "grad_norm": 1.7254544812081525, + "learning_rate": 8.120195074205249e-07, + "loss": 0.456550657749176, + "step": 5051 + }, + { + "epoch": 1.1648605026516026, + "grad_norm": 1.4455041595835194, + "learning_rate": 8.116450566826086e-07, + "loss": 0.44465887546539307, + "step": 5052 + }, + { + "epoch": 1.165091076781185, + "grad_norm": 1.4606872040412728, + "learning_rate": 8.112706333239923e-07, + "loss": 0.4769172668457031, + "step": 5053 + }, + { + "epoch": 1.1653216509107678, + "grad_norm": 1.5800176181940382, + "learning_rate": 8.108962373991019e-07, + "loss": 0.42662739753723145, + "step": 5054 + }, + { + "epoch": 1.1655522250403505, + "grad_norm": 1.533727299161298, + "learning_rate": 8.105218689623603e-07, + "loss": 0.4923250079154968, + "step": 5055 + }, + { + "epoch": 1.1657827991699332, + "grad_norm": 1.5783599756682145, + "learning_rate": 8.10147528068185e-07, + "loss": 0.42462587356567383, + "step": 5056 + }, + { + "epoch": 1.1660133732995157, + "grad_norm": 1.3458818448335859, + "learning_rate": 8.097732147709908e-07, + "loss": 0.47610223293304443, + "step": 5057 + }, + { + "epoch": 1.1662439474290984, + "grad_norm": 1.6207397386125497, + "learning_rate": 8.093989291251875e-07, + "loss": 0.47519630193710327, + "step": 5058 + }, + { + "epoch": 1.1664745215586811, + "grad_norm": 1.3901575117179885, + "learning_rate": 8.090246711851819e-07, + "loss": 0.38865840435028076, + "step": 5059 + }, + { + "epoch": 1.1667050956882639, + "grad_norm": 1.271312682478528, + "learning_rate": 8.086504410053757e-07, + "loss": 0.39990776777267456, + "step": 5060 + }, + { + "epoch": 1.1669356698178464, + "grad_norm": 1.4665951386644982, + "learning_rate": 8.082762386401669e-07, + "loss": 0.4330836534500122, + "step": 5061 + }, + { + "epoch": 1.167166243947429, + "grad_norm": 1.286707043518209, + "learning_rate": 8.079020641439504e-07, + "loss": 0.4285934865474701, + "step": 5062 + }, + { + "epoch": 1.1673968180770118, + "grad_norm": 1.7499199825760443, + "learning_rate": 8.075279175711152e-07, + "loss": 0.3900645077228546, + "step": 5063 + }, + { + "epoch": 1.1676273922065945, + "grad_norm": 1.3606445329404238, + "learning_rate": 8.07153798976048e-07, + "loss": 0.48145759105682373, + "step": 5064 + }, + { + "epoch": 1.167857966336177, + "grad_norm": 1.7592322949259351, + "learning_rate": 8.067797084131305e-07, + "loss": 0.4239045977592468, + "step": 5065 + }, + { + "epoch": 1.1680885404657597, + "grad_norm": 1.7501505795878665, + "learning_rate": 8.064056459367409e-07, + "loss": 0.55517578125, + "step": 5066 + }, + { + "epoch": 1.1683191145953424, + "grad_norm": 1.588400616006081, + "learning_rate": 8.060316116012524e-07, + "loss": 0.4956046938896179, + "step": 5067 + }, + { + "epoch": 1.1685496887249252, + "grad_norm": 1.3607022789051413, + "learning_rate": 8.05657605461035e-07, + "loss": 0.4051878750324249, + "step": 5068 + }, + { + "epoch": 1.1687802628545076, + "grad_norm": 1.6471264462607456, + "learning_rate": 8.052836275704541e-07, + "loss": 0.47389912605285645, + "step": 5069 + }, + { + "epoch": 1.1690108369840904, + "grad_norm": 1.3462872241997197, + "learning_rate": 8.049096779838717e-07, + "loss": 0.5023842453956604, + "step": 5070 + }, + { + "epoch": 1.169241411113673, + "grad_norm": 1.3943514778037218, + "learning_rate": 8.045357567556449e-07, + "loss": 0.4895137548446655, + "step": 5071 + }, + { + "epoch": 1.1694719852432558, + "grad_norm": 1.5328176046123796, + "learning_rate": 8.041618639401264e-07, + "loss": 0.47874224185943604, + "step": 5072 + }, + { + "epoch": 1.1697025593728383, + "grad_norm": 1.4666773972258982, + "learning_rate": 8.037879995916659e-07, + "loss": 0.4784395694732666, + "step": 5073 + }, + { + "epoch": 1.169933133502421, + "grad_norm": 1.4433652991816976, + "learning_rate": 8.034141637646079e-07, + "loss": 0.45289772748947144, + "step": 5074 + }, + { + "epoch": 1.1701637076320037, + "grad_norm": 1.931933746015264, + "learning_rate": 8.030403565132942e-07, + "loss": 0.5375204682350159, + "step": 5075 + }, + { + "epoch": 1.1703942817615864, + "grad_norm": 1.4956339972756536, + "learning_rate": 8.026665778920602e-07, + "loss": 0.45003899931907654, + "step": 5076 + }, + { + "epoch": 1.170624855891169, + "grad_norm": 1.348037979358877, + "learning_rate": 8.022928279552392e-07, + "loss": 0.4236389994621277, + "step": 5077 + }, + { + "epoch": 1.1708554300207517, + "grad_norm": 1.3333943245649609, + "learning_rate": 8.019191067571592e-07, + "loss": 0.43182557821273804, + "step": 5078 + }, + { + "epoch": 1.1710860041503344, + "grad_norm": 1.7521692166476222, + "learning_rate": 8.01545414352145e-07, + "loss": 0.5171953439712524, + "step": 5079 + }, + { + "epoch": 1.171316578279917, + "grad_norm": 1.5319548219026522, + "learning_rate": 8.011717507945157e-07, + "loss": 0.5084770321846008, + "step": 5080 + }, + { + "epoch": 1.1715471524094996, + "grad_norm": 1.6342595542262888, + "learning_rate": 8.007981161385876e-07, + "loss": 0.4685532748699188, + "step": 5081 + }, + { + "epoch": 1.1717777265390823, + "grad_norm": 1.5086552244362486, + "learning_rate": 8.004245104386724e-07, + "loss": 0.4647448658943176, + "step": 5082 + }, + { + "epoch": 1.172008300668665, + "grad_norm": 1.4914913702780284, + "learning_rate": 8.000509337490768e-07, + "loss": 0.4038098454475403, + "step": 5083 + }, + { + "epoch": 1.1722388747982477, + "grad_norm": 1.435384500623052, + "learning_rate": 7.996773861241047e-07, + "loss": 0.4153759479522705, + "step": 5084 + }, + { + "epoch": 1.1724694489278302, + "grad_norm": 1.5573715225755111, + "learning_rate": 7.993038676180545e-07, + "loss": 0.4569447636604309, + "step": 5085 + }, + { + "epoch": 1.172700023057413, + "grad_norm": 1.4307958679817, + "learning_rate": 7.989303782852215e-07, + "loss": 0.4419426918029785, + "step": 5086 + }, + { + "epoch": 1.1729305971869957, + "grad_norm": 1.4177391878017933, + "learning_rate": 7.985569181798955e-07, + "loss": 0.3902894854545593, + "step": 5087 + }, + { + "epoch": 1.1731611713165782, + "grad_norm": 1.3935681641299988, + "learning_rate": 7.981834873563631e-07, + "loss": 0.4066358208656311, + "step": 5088 + }, + { + "epoch": 1.1733917454461609, + "grad_norm": 1.579270038843054, + "learning_rate": 7.978100858689059e-07, + "loss": 0.4589639902114868, + "step": 5089 + }, + { + "epoch": 1.1736223195757436, + "grad_norm": 1.5868805646941586, + "learning_rate": 7.974367137718024e-07, + "loss": 0.4431188106536865, + "step": 5090 + }, + { + "epoch": 1.1738528937053263, + "grad_norm": 1.3420666663317198, + "learning_rate": 7.970633711193252e-07, + "loss": 0.43412742018699646, + "step": 5091 + }, + { + "epoch": 1.174083467834909, + "grad_norm": 1.360898150528172, + "learning_rate": 7.966900579657435e-07, + "loss": 0.40296387672424316, + "step": 5092 + }, + { + "epoch": 1.1743140419644915, + "grad_norm": 1.4702894316239854, + "learning_rate": 7.963167743653228e-07, + "loss": 0.4814741611480713, + "step": 5093 + }, + { + "epoch": 1.1745446160940742, + "grad_norm": 1.7678935112109417, + "learning_rate": 7.959435203723228e-07, + "loss": 0.4412423372268677, + "step": 5094 + }, + { + "epoch": 1.174775190223657, + "grad_norm": 1.698823813376211, + "learning_rate": 7.955702960410006e-07, + "loss": 0.49773266911506653, + "step": 5095 + }, + { + "epoch": 1.1750057643532394, + "grad_norm": 1.445996901779518, + "learning_rate": 7.951971014256073e-07, + "loss": 0.4657529592514038, + "step": 5096 + }, + { + "epoch": 1.1752363384828222, + "grad_norm": 1.4844953949134, + "learning_rate": 7.94823936580391e-07, + "loss": 0.4062782824039459, + "step": 5097 + }, + { + "epoch": 1.1754669126124049, + "grad_norm": 1.3280643963390701, + "learning_rate": 7.944508015595948e-07, + "loss": 0.4154980182647705, + "step": 5098 + }, + { + "epoch": 1.1756974867419876, + "grad_norm": 1.3235405382692107, + "learning_rate": 7.940776964174582e-07, + "loss": 0.4724680185317993, + "step": 5099 + }, + { + "epoch": 1.1759280608715703, + "grad_norm": 1.4212228031547876, + "learning_rate": 7.937046212082149e-07, + "loss": 0.48808538913726807, + "step": 5100 + }, + { + "epoch": 1.1761586350011528, + "grad_norm": 1.3949555418133748, + "learning_rate": 7.933315759860959e-07, + "loss": 0.4985845983028412, + "step": 5101 + }, + { + "epoch": 1.1763892091307355, + "grad_norm": 1.2192149824969183, + "learning_rate": 7.92958560805327e-07, + "loss": 0.3735587000846863, + "step": 5102 + }, + { + "epoch": 1.1766197832603182, + "grad_norm": 1.3793872147262238, + "learning_rate": 7.925855757201294e-07, + "loss": 0.4198414385318756, + "step": 5103 + }, + { + "epoch": 1.1768503573899007, + "grad_norm": 1.7231390796467927, + "learning_rate": 7.922126207847204e-07, + "loss": 0.41973787546157837, + "step": 5104 + }, + { + "epoch": 1.1770809315194835, + "grad_norm": 1.8258365265115961, + "learning_rate": 7.918396960533128e-07, + "loss": 0.5179545283317566, + "step": 5105 + }, + { + "epoch": 1.1773115056490662, + "grad_norm": 1.5757377934881964, + "learning_rate": 7.914668015801153e-07, + "loss": 0.4917227625846863, + "step": 5106 + }, + { + "epoch": 1.1775420797786489, + "grad_norm": 1.5132865673859617, + "learning_rate": 7.910939374193312e-07, + "loss": 0.41775548458099365, + "step": 5107 + }, + { + "epoch": 1.1777726539082316, + "grad_norm": 1.484971286444874, + "learning_rate": 7.907211036251608e-07, + "loss": 0.45468997955322266, + "step": 5108 + }, + { + "epoch": 1.178003228037814, + "grad_norm": 1.292166499414124, + "learning_rate": 7.903483002517988e-07, + "loss": 0.3749620318412781, + "step": 5109 + }, + { + "epoch": 1.1782338021673968, + "grad_norm": 1.3945828421286317, + "learning_rate": 7.899755273534365e-07, + "loss": 0.48940956592559814, + "step": 5110 + }, + { + "epoch": 1.1784643762969795, + "grad_norm": 1.3575927994558319, + "learning_rate": 7.896027849842594e-07, + "loss": 0.4561386704444885, + "step": 5111 + }, + { + "epoch": 1.178694950426562, + "grad_norm": 1.4968176209501343, + "learning_rate": 7.892300731984498e-07, + "loss": 0.441898375749588, + "step": 5112 + }, + { + "epoch": 1.1789255245561447, + "grad_norm": 1.7617220832230103, + "learning_rate": 7.888573920501856e-07, + "loss": 0.43445056676864624, + "step": 5113 + }, + { + "epoch": 1.1791560986857275, + "grad_norm": 1.4680500200302005, + "learning_rate": 7.884847415936389e-07, + "loss": 0.42653167247772217, + "step": 5114 + }, + { + "epoch": 1.1793866728153102, + "grad_norm": 1.3867120793190437, + "learning_rate": 7.881121218829787e-07, + "loss": 0.42003321647644043, + "step": 5115 + }, + { + "epoch": 1.179617246944893, + "grad_norm": 1.613544333660259, + "learning_rate": 7.87739532972369e-07, + "loss": 0.4920128881931305, + "step": 5116 + }, + { + "epoch": 1.1798478210744754, + "grad_norm": 1.430783098871577, + "learning_rate": 7.873669749159697e-07, + "loss": 0.49529707431793213, + "step": 5117 + }, + { + "epoch": 1.180078395204058, + "grad_norm": 1.4915607575501106, + "learning_rate": 7.869944477679351e-07, + "loss": 0.4813005328178406, + "step": 5118 + }, + { + "epoch": 1.1803089693336408, + "grad_norm": 1.4923304237688, + "learning_rate": 7.866219515824168e-07, + "loss": 0.47239556908607483, + "step": 5119 + }, + { + "epoch": 1.1805395434632233, + "grad_norm": 1.7203098580351979, + "learning_rate": 7.862494864135596e-07, + "loss": 0.4808405935764313, + "step": 5120 + }, + { + "epoch": 1.180770117592806, + "grad_norm": 1.5206410201181635, + "learning_rate": 7.858770523155066e-07, + "loss": 0.44946521520614624, + "step": 5121 + }, + { + "epoch": 1.1810006917223888, + "grad_norm": 1.8958199353441048, + "learning_rate": 7.85504649342394e-07, + "loss": 0.5344874858856201, + "step": 5122 + }, + { + "epoch": 1.1812312658519715, + "grad_norm": 1.729692211161555, + "learning_rate": 7.851322775483542e-07, + "loss": 0.49354079365730286, + "step": 5123 + }, + { + "epoch": 1.1814618399815542, + "grad_norm": 1.6407900723292905, + "learning_rate": 7.847599369875155e-07, + "loss": 0.414085328578949, + "step": 5124 + }, + { + "epoch": 1.1816924141111367, + "grad_norm": 1.51838750003237, + "learning_rate": 7.843876277140013e-07, + "loss": 0.4638150632381439, + "step": 5125 + }, + { + "epoch": 1.1819229882407194, + "grad_norm": 1.5309477954820934, + "learning_rate": 7.84015349781931e-07, + "loss": 0.39239877462387085, + "step": 5126 + }, + { + "epoch": 1.1821535623703021, + "grad_norm": 1.456140160914471, + "learning_rate": 7.83643103245418e-07, + "loss": 0.46846455335617065, + "step": 5127 + }, + { + "epoch": 1.1823841364998846, + "grad_norm": 1.7368044200229882, + "learning_rate": 7.832708881585729e-07, + "loss": 0.5257229804992676, + "step": 5128 + }, + { + "epoch": 1.1826147106294673, + "grad_norm": 1.246852967804398, + "learning_rate": 7.828987045755006e-07, + "loss": 0.3858698904514313, + "step": 5129 + }, + { + "epoch": 1.18284528475905, + "grad_norm": 1.526790126487461, + "learning_rate": 7.82526552550302e-07, + "loss": 0.48664575815200806, + "step": 5130 + }, + { + "epoch": 1.1830758588886328, + "grad_norm": 1.4370667079865387, + "learning_rate": 7.821544321370731e-07, + "loss": 0.5246836543083191, + "step": 5131 + }, + { + "epoch": 1.1833064330182155, + "grad_norm": 1.6695741670894575, + "learning_rate": 7.817823433899049e-07, + "loss": 0.5538516640663147, + "step": 5132 + }, + { + "epoch": 1.183537007147798, + "grad_norm": 1.5154692060299837, + "learning_rate": 7.814102863628852e-07, + "loss": 0.4563618302345276, + "step": 5133 + }, + { + "epoch": 1.1837675812773807, + "grad_norm": 1.6013623117191365, + "learning_rate": 7.810382611100952e-07, + "loss": 0.48093757033348083, + "step": 5134 + }, + { + "epoch": 1.1839981554069634, + "grad_norm": 1.4079128694512013, + "learning_rate": 7.806662676856133e-07, + "loss": 0.41152772307395935, + "step": 5135 + }, + { + "epoch": 1.184228729536546, + "grad_norm": 1.470828934761741, + "learning_rate": 7.802943061435121e-07, + "loss": 0.4429926574230194, + "step": 5136 + }, + { + "epoch": 1.1844593036661286, + "grad_norm": 1.6844871985058756, + "learning_rate": 7.799223765378604e-07, + "loss": 0.5795058012008667, + "step": 5137 + }, + { + "epoch": 1.1846898777957113, + "grad_norm": 1.3964078038325152, + "learning_rate": 7.795504789227214e-07, + "loss": 0.43219637870788574, + "step": 5138 + }, + { + "epoch": 1.184920451925294, + "grad_norm": 1.3120429368988666, + "learning_rate": 7.791786133521547e-07, + "loss": 0.472915917634964, + "step": 5139 + }, + { + "epoch": 1.1851510260548768, + "grad_norm": 1.8547533260703066, + "learning_rate": 7.788067798802144e-07, + "loss": 0.609251081943512, + "step": 5140 + }, + { + "epoch": 1.1853816001844593, + "grad_norm": 1.5647854614729606, + "learning_rate": 7.784349785609506e-07, + "loss": 0.5051882266998291, + "step": 5141 + }, + { + "epoch": 1.185612174314042, + "grad_norm": 1.8256847598733492, + "learning_rate": 7.780632094484081e-07, + "loss": 0.5062044858932495, + "step": 5142 + }, + { + "epoch": 1.1858427484436247, + "grad_norm": 1.6792228276022907, + "learning_rate": 7.77691472596627e-07, + "loss": 0.48717936873435974, + "step": 5143 + }, + { + "epoch": 1.1860733225732072, + "grad_norm": 1.4962691739334948, + "learning_rate": 7.773197680596439e-07, + "loss": 0.4755759537220001, + "step": 5144 + }, + { + "epoch": 1.18630389670279, + "grad_norm": 1.5701944534084074, + "learning_rate": 7.769480958914889e-07, + "loss": 0.4549487829208374, + "step": 5145 + }, + { + "epoch": 1.1865344708323726, + "grad_norm": 1.3416043214582947, + "learning_rate": 7.765764561461891e-07, + "loss": 0.39759546518325806, + "step": 5146 + }, + { + "epoch": 1.1867650449619553, + "grad_norm": 1.7321999626139561, + "learning_rate": 7.762048488777654e-07, + "loss": 0.5151915550231934, + "step": 5147 + }, + { + "epoch": 1.1869956190915378, + "grad_norm": 1.739537041268416, + "learning_rate": 7.758332741402351e-07, + "loss": 0.4555166959762573, + "step": 5148 + }, + { + "epoch": 1.1872261932211206, + "grad_norm": 1.246823148309275, + "learning_rate": 7.754617319876102e-07, + "loss": 0.3639993667602539, + "step": 5149 + }, + { + "epoch": 1.1874567673507033, + "grad_norm": 1.4228626603425891, + "learning_rate": 7.750902224738984e-07, + "loss": 0.4158916473388672, + "step": 5150 + }, + { + "epoch": 1.187687341480286, + "grad_norm": 1.5159845507016538, + "learning_rate": 7.747187456531021e-07, + "loss": 0.44933754205703735, + "step": 5151 + }, + { + "epoch": 1.1879179156098685, + "grad_norm": 1.1574431418082898, + "learning_rate": 7.74347301579219e-07, + "loss": 0.35436397790908813, + "step": 5152 + }, + { + "epoch": 1.1881484897394512, + "grad_norm": 1.7559371420298944, + "learning_rate": 7.73975890306243e-07, + "loss": 0.40650928020477295, + "step": 5153 + }, + { + "epoch": 1.188379063869034, + "grad_norm": 1.655955114095899, + "learning_rate": 7.736045118881615e-07, + "loss": 0.424211710691452, + "step": 5154 + }, + { + "epoch": 1.1886096379986166, + "grad_norm": 1.386370427214692, + "learning_rate": 7.73233166378959e-07, + "loss": 0.38909512758255005, + "step": 5155 + }, + { + "epoch": 1.1888402121281991, + "grad_norm": 1.6273556393891413, + "learning_rate": 7.728618538326139e-07, + "loss": 0.4452083110809326, + "step": 5156 + }, + { + "epoch": 1.1890707862577818, + "grad_norm": 1.7325341862894768, + "learning_rate": 7.724905743031005e-07, + "loss": 0.45061540603637695, + "step": 5157 + }, + { + "epoch": 1.1893013603873646, + "grad_norm": 1.875195364158454, + "learning_rate": 7.721193278443875e-07, + "loss": 0.5301374197006226, + "step": 5158 + }, + { + "epoch": 1.1895319345169473, + "grad_norm": 1.32653936253781, + "learning_rate": 7.717481145104398e-07, + "loss": 0.4386521577835083, + "step": 5159 + }, + { + "epoch": 1.1897625086465298, + "grad_norm": 1.5893013583646332, + "learning_rate": 7.713769343552169e-07, + "loss": 0.447623074054718, + "step": 5160 + }, + { + "epoch": 1.1899930827761125, + "grad_norm": 1.4757184491338362, + "learning_rate": 7.71005787432674e-07, + "loss": 0.44326454401016235, + "step": 5161 + }, + { + "epoch": 1.1902236569056952, + "grad_norm": 1.4868394681814385, + "learning_rate": 7.706346737967603e-07, + "loss": 0.564007043838501, + "step": 5162 + }, + { + "epoch": 1.190454231035278, + "grad_norm": 1.4497565739191507, + "learning_rate": 7.702635935014213e-07, + "loss": 0.5338540077209473, + "step": 5163 + }, + { + "epoch": 1.1906848051648604, + "grad_norm": 1.5430964424900424, + "learning_rate": 7.698925466005977e-07, + "loss": 0.45307862758636475, + "step": 5164 + }, + { + "epoch": 1.1909153792944431, + "grad_norm": 1.4703583168080245, + "learning_rate": 7.69521533148224e-07, + "loss": 0.5383142232894897, + "step": 5165 + }, + { + "epoch": 1.1911459534240258, + "grad_norm": 1.46357622305891, + "learning_rate": 7.691505531982316e-07, + "loss": 0.3794770836830139, + "step": 5166 + }, + { + "epoch": 1.1913765275536086, + "grad_norm": 1.73725405615964, + "learning_rate": 7.687796068045455e-07, + "loss": 0.4633198082447052, + "step": 5167 + }, + { + "epoch": 1.191607101683191, + "grad_norm": 1.4824242158713679, + "learning_rate": 7.684086940210875e-07, + "loss": 0.5080294609069824, + "step": 5168 + }, + { + "epoch": 1.1918376758127738, + "grad_norm": 1.4742940614632714, + "learning_rate": 7.680378149017724e-07, + "loss": 0.3952289819717407, + "step": 5169 + }, + { + "epoch": 1.1920682499423565, + "grad_norm": 1.6284523488523228, + "learning_rate": 7.676669695005122e-07, + "loss": 0.4518551528453827, + "step": 5170 + }, + { + "epoch": 1.1922988240719392, + "grad_norm": 1.3915500318606786, + "learning_rate": 7.672961578712125e-07, + "loss": 0.4752943515777588, + "step": 5171 + }, + { + "epoch": 1.1925293982015217, + "grad_norm": 1.4424968675316805, + "learning_rate": 7.669253800677744e-07, + "loss": 0.5059680342674255, + "step": 5172 + }, + { + "epoch": 1.1927599723311044, + "grad_norm": 1.4513506332822887, + "learning_rate": 7.665546361440949e-07, + "loss": 0.47073960304260254, + "step": 5173 + }, + { + "epoch": 1.1929905464606871, + "grad_norm": 1.6974826094600077, + "learning_rate": 7.661839261540644e-07, + "loss": 0.5851496458053589, + "step": 5174 + }, + { + "epoch": 1.1932211205902699, + "grad_norm": 1.4255244135326766, + "learning_rate": 7.658132501515701e-07, + "loss": 0.44255387783050537, + "step": 5175 + }, + { + "epoch": 1.1934516947198524, + "grad_norm": 1.7360033352973823, + "learning_rate": 7.654426081904931e-07, + "loss": 0.543785810470581, + "step": 5176 + }, + { + "epoch": 1.193682268849435, + "grad_norm": 1.697289945139709, + "learning_rate": 7.650720003247107e-07, + "loss": 0.503501296043396, + "step": 5177 + }, + { + "epoch": 1.1939128429790178, + "grad_norm": 1.6448034142146566, + "learning_rate": 7.647014266080935e-07, + "loss": 0.43894368410110474, + "step": 5178 + }, + { + "epoch": 1.1941434171086005, + "grad_norm": 1.9780925681836061, + "learning_rate": 7.643308870945088e-07, + "loss": 0.5014036297798157, + "step": 5179 + }, + { + "epoch": 1.194373991238183, + "grad_norm": 1.3813934145743847, + "learning_rate": 7.639603818378178e-07, + "loss": 0.4859309196472168, + "step": 5180 + }, + { + "epoch": 1.1946045653677657, + "grad_norm": 1.611175852060371, + "learning_rate": 7.635899108918781e-07, + "loss": 0.40631920099258423, + "step": 5181 + }, + { + "epoch": 1.1948351394973484, + "grad_norm": 1.923584573200039, + "learning_rate": 7.632194743105405e-07, + "loss": 0.5206565856933594, + "step": 5182 + }, + { + "epoch": 1.1950657136269311, + "grad_norm": 1.659582338573284, + "learning_rate": 7.628490721476517e-07, + "loss": 0.5052351355552673, + "step": 5183 + }, + { + "epoch": 1.1952962877565136, + "grad_norm": 1.3967739180573415, + "learning_rate": 7.624787044570543e-07, + "loss": 0.4921465516090393, + "step": 5184 + }, + { + "epoch": 1.1955268618860964, + "grad_norm": 1.2706689377506823, + "learning_rate": 7.621083712925839e-07, + "loss": 0.3307859003543854, + "step": 5185 + }, + { + "epoch": 1.195757436015679, + "grad_norm": 1.5942715812711645, + "learning_rate": 7.617380727080728e-07, + "loss": 0.4276743531227112, + "step": 5186 + }, + { + "epoch": 1.1959880101452618, + "grad_norm": 1.434739100338101, + "learning_rate": 7.613678087573475e-07, + "loss": 0.5065702795982361, + "step": 5187 + }, + { + "epoch": 1.1962185842748443, + "grad_norm": 1.2918886211693255, + "learning_rate": 7.609975794942301e-07, + "loss": 0.3588709533214569, + "step": 5188 + }, + { + "epoch": 1.196449158404427, + "grad_norm": 1.4907134183008088, + "learning_rate": 7.606273849725362e-07, + "loss": 0.4296506941318512, + "step": 5189 + }, + { + "epoch": 1.1966797325340097, + "grad_norm": 1.5501182036176049, + "learning_rate": 7.602572252460782e-07, + "loss": 0.517792820930481, + "step": 5190 + }, + { + "epoch": 1.1969103066635924, + "grad_norm": 1.6883448687359832, + "learning_rate": 7.598871003686619e-07, + "loss": 0.38939881324768066, + "step": 5191 + }, + { + "epoch": 1.197140880793175, + "grad_norm": 1.5288548185908284, + "learning_rate": 7.595170103940896e-07, + "loss": 0.5759290456771851, + "step": 5192 + }, + { + "epoch": 1.1973714549227576, + "grad_norm": 1.975229876516129, + "learning_rate": 7.591469553761569e-07, + "loss": 0.4705851078033447, + "step": 5193 + }, + { + "epoch": 1.1976020290523404, + "grad_norm": 1.4820736709912923, + "learning_rate": 7.587769353686548e-07, + "loss": 0.5137619972229004, + "step": 5194 + }, + { + "epoch": 1.197832603181923, + "grad_norm": 1.426346211238444, + "learning_rate": 7.584069504253701e-07, + "loss": 0.43207496404647827, + "step": 5195 + }, + { + "epoch": 1.1980631773115056, + "grad_norm": 1.7446559629267169, + "learning_rate": 7.580370006000835e-07, + "loss": 0.3976139426231384, + "step": 5196 + }, + { + "epoch": 1.1982937514410883, + "grad_norm": 1.3117053560833851, + "learning_rate": 7.576670859465715e-07, + "loss": 0.41323673725128174, + "step": 5197 + }, + { + "epoch": 1.198524325570671, + "grad_norm": 1.5110343718270132, + "learning_rate": 7.57297206518604e-07, + "loss": 0.404024600982666, + "step": 5198 + }, + { + "epoch": 1.1987548997002535, + "grad_norm": 1.3684281900258655, + "learning_rate": 7.569273623699475e-07, + "loss": 0.4010540843009949, + "step": 5199 + }, + { + "epoch": 1.1989854738298362, + "grad_norm": 1.5739020793077496, + "learning_rate": 7.565575535543623e-07, + "loss": 0.44299256801605225, + "step": 5200 + }, + { + "epoch": 1.199216047959419, + "grad_norm": 1.5204166282494558, + "learning_rate": 7.561877801256041e-07, + "loss": 0.5217546820640564, + "step": 5201 + }, + { + "epoch": 1.1994466220890017, + "grad_norm": 1.868873770331591, + "learning_rate": 7.558180421374229e-07, + "loss": 0.5192688703536987, + "step": 5202 + }, + { + "epoch": 1.1996771962185844, + "grad_norm": 1.5743910950617057, + "learning_rate": 7.554483396435637e-07, + "loss": 0.38272884488105774, + "step": 5203 + }, + { + "epoch": 1.1999077703481669, + "grad_norm": 1.4246723536184043, + "learning_rate": 7.550786726977673e-07, + "loss": 0.474464476108551, + "step": 5204 + }, + { + "epoch": 1.2001383444777496, + "grad_norm": 1.6360159300410695, + "learning_rate": 7.547090413537676e-07, + "loss": 0.540134072303772, + "step": 5205 + }, + { + "epoch": 1.2003689186073323, + "grad_norm": 1.4752644193711169, + "learning_rate": 7.543394456652948e-07, + "loss": 0.4662882089614868, + "step": 5206 + }, + { + "epoch": 1.2005994927369148, + "grad_norm": 1.6858064119472538, + "learning_rate": 7.539698856860732e-07, + "loss": 0.440970778465271, + "step": 5207 + }, + { + "epoch": 1.2008300668664975, + "grad_norm": 1.3786365004169476, + "learning_rate": 7.536003614698225e-07, + "loss": 0.41787397861480713, + "step": 5208 + }, + { + "epoch": 1.2010606409960802, + "grad_norm": 1.4726677497641942, + "learning_rate": 7.532308730702561e-07, + "loss": 0.5503408908843994, + "step": 5209 + }, + { + "epoch": 1.201291215125663, + "grad_norm": 1.4739960164302617, + "learning_rate": 7.528614205410833e-07, + "loss": 0.43713903427124023, + "step": 5210 + }, + { + "epoch": 1.2015217892552457, + "grad_norm": 1.5362481289460599, + "learning_rate": 7.524920039360076e-07, + "loss": 0.4145667552947998, + "step": 5211 + }, + { + "epoch": 1.2017523633848282, + "grad_norm": 1.4800845890771783, + "learning_rate": 7.521226233087279e-07, + "loss": 0.4307587146759033, + "step": 5212 + }, + { + "epoch": 1.2019829375144109, + "grad_norm": 1.436182742461266, + "learning_rate": 7.517532787129369e-07, + "loss": 0.43784570693969727, + "step": 5213 + }, + { + "epoch": 1.2022135116439936, + "grad_norm": 1.3395031095564736, + "learning_rate": 7.513839702023226e-07, + "loss": 0.40003830194473267, + "step": 5214 + }, + { + "epoch": 1.202444085773576, + "grad_norm": 1.4786298792735793, + "learning_rate": 7.510146978305682e-07, + "loss": 0.4880738854408264, + "step": 5215 + }, + { + "epoch": 1.2026746599031588, + "grad_norm": 1.31895753202322, + "learning_rate": 7.506454616513505e-07, + "loss": 0.39548349380493164, + "step": 5216 + }, + { + "epoch": 1.2029052340327415, + "grad_norm": 1.5189592384869435, + "learning_rate": 7.502762617183425e-07, + "loss": 0.4060090184211731, + "step": 5217 + }, + { + "epoch": 1.2031358081623242, + "grad_norm": 1.6902238907281657, + "learning_rate": 7.499070980852101e-07, + "loss": 0.44657808542251587, + "step": 5218 + }, + { + "epoch": 1.203366382291907, + "grad_norm": 1.553015362629627, + "learning_rate": 7.495379708056161e-07, + "loss": 0.5283595323562622, + "step": 5219 + }, + { + "epoch": 1.2035969564214895, + "grad_norm": 1.5940858647104894, + "learning_rate": 7.49168879933216e-07, + "loss": 0.4424205422401428, + "step": 5220 + }, + { + "epoch": 1.2038275305510722, + "grad_norm": 1.4929497446465205, + "learning_rate": 7.487998255216619e-07, + "loss": 0.4998319745063782, + "step": 5221 + }, + { + "epoch": 1.2040581046806549, + "grad_norm": 1.3437939609448373, + "learning_rate": 7.484308076245987e-07, + "loss": 0.3821876645088196, + "step": 5222 + }, + { + "epoch": 1.2042886788102374, + "grad_norm": 1.4227177114495277, + "learning_rate": 7.480618262956669e-07, + "loss": 0.4567919373512268, + "step": 5223 + }, + { + "epoch": 1.20451925293982, + "grad_norm": 1.4207326358395804, + "learning_rate": 7.476928815885026e-07, + "loss": 0.4561428427696228, + "step": 5224 + }, + { + "epoch": 1.2047498270694028, + "grad_norm": 1.5720016799439587, + "learning_rate": 7.473239735567344e-07, + "loss": 0.4384823739528656, + "step": 5225 + }, + { + "epoch": 1.2049804011989855, + "grad_norm": 1.518914607229236, + "learning_rate": 7.469551022539877e-07, + "loss": 0.42840123176574707, + "step": 5226 + }, + { + "epoch": 1.2052109753285682, + "grad_norm": 1.4031825092609558, + "learning_rate": 7.465862677338812e-07, + "loss": 0.39553213119506836, + "step": 5227 + }, + { + "epoch": 1.2054415494581507, + "grad_norm": 1.521464998921144, + "learning_rate": 7.462174700500295e-07, + "loss": 0.4325043559074402, + "step": 5228 + }, + { + "epoch": 1.2056721235877335, + "grad_norm": 1.7451009485961195, + "learning_rate": 7.4584870925604e-07, + "loss": 0.5004623532295227, + "step": 5229 + }, + { + "epoch": 1.2059026977173162, + "grad_norm": 1.6975060246760258, + "learning_rate": 7.454799854055165e-07, + "loss": 0.42296791076660156, + "step": 5230 + }, + { + "epoch": 1.2061332718468987, + "grad_norm": 1.7859122255595659, + "learning_rate": 7.451112985520565e-07, + "loss": 0.45638370513916016, + "step": 5231 + }, + { + "epoch": 1.2063638459764814, + "grad_norm": 1.9018837416313183, + "learning_rate": 7.447426487492528e-07, + "loss": 0.5134493112564087, + "step": 5232 + }, + { + "epoch": 1.206594420106064, + "grad_norm": 1.382989024686568, + "learning_rate": 7.443740360506918e-07, + "loss": 0.4132578372955322, + "step": 5233 + }, + { + "epoch": 1.2068249942356468, + "grad_norm": 1.321784021070878, + "learning_rate": 7.440054605099552e-07, + "loss": 0.4363224506378174, + "step": 5234 + }, + { + "epoch": 1.2070555683652295, + "grad_norm": 1.4395608486144074, + "learning_rate": 7.4363692218062e-07, + "loss": 0.44970041513442993, + "step": 5235 + }, + { + "epoch": 1.207286142494812, + "grad_norm": 1.3219627332758312, + "learning_rate": 7.432684211162556e-07, + "loss": 0.39787235856056213, + "step": 5236 + }, + { + "epoch": 1.2075167166243947, + "grad_norm": 1.694639970069785, + "learning_rate": 7.428999573704284e-07, + "loss": 0.46057572960853577, + "step": 5237 + }, + { + "epoch": 1.2077472907539775, + "grad_norm": 1.3954230269661139, + "learning_rate": 7.42531530996698e-07, + "loss": 0.46754559874534607, + "step": 5238 + }, + { + "epoch": 1.20797786488356, + "grad_norm": 1.4060087118514482, + "learning_rate": 7.42163142048619e-07, + "loss": 0.5072697401046753, + "step": 5239 + }, + { + "epoch": 1.2082084390131427, + "grad_norm": 1.5355585762921151, + "learning_rate": 7.417947905797403e-07, + "loss": 0.4691959023475647, + "step": 5240 + }, + { + "epoch": 1.2084390131427254, + "grad_norm": 1.4596733170422231, + "learning_rate": 7.414264766436056e-07, + "loss": 0.43248072266578674, + "step": 5241 + }, + { + "epoch": 1.208669587272308, + "grad_norm": 1.8386458599943265, + "learning_rate": 7.410582002937534e-07, + "loss": 0.4748457968235016, + "step": 5242 + }, + { + "epoch": 1.2089001614018908, + "grad_norm": 1.413498638420547, + "learning_rate": 7.406899615837157e-07, + "loss": 0.4682820439338684, + "step": 5243 + }, + { + "epoch": 1.2091307355314733, + "grad_norm": 1.3788557575990639, + "learning_rate": 7.403217605670205e-07, + "loss": 0.41747021675109863, + "step": 5244 + }, + { + "epoch": 1.209361309661056, + "grad_norm": 1.5523861247321795, + "learning_rate": 7.399535972971886e-07, + "loss": 0.4968727231025696, + "step": 5245 + }, + { + "epoch": 1.2095918837906388, + "grad_norm": 1.6255626899279143, + "learning_rate": 7.395854718277372e-07, + "loss": 0.486778199672699, + "step": 5246 + }, + { + "epoch": 1.2098224579202213, + "grad_norm": 1.938770231002498, + "learning_rate": 7.392173842121765e-07, + "loss": 0.5153725147247314, + "step": 5247 + }, + { + "epoch": 1.210053032049804, + "grad_norm": 1.6258479412197122, + "learning_rate": 7.388493345040123e-07, + "loss": 0.42352354526519775, + "step": 5248 + }, + { + "epoch": 1.2102836061793867, + "grad_norm": 1.477454043811349, + "learning_rate": 7.384813227567437e-07, + "loss": 0.363994300365448, + "step": 5249 + }, + { + "epoch": 1.2105141803089694, + "grad_norm": 1.3450193947115454, + "learning_rate": 7.381133490238654e-07, + "loss": 0.44195863604545593, + "step": 5250 + }, + { + "epoch": 1.2107447544385521, + "grad_norm": 1.6510262733932026, + "learning_rate": 7.377454133588657e-07, + "loss": 0.5031026601791382, + "step": 5251 + }, + { + "epoch": 1.2109753285681346, + "grad_norm": 1.1126223170422647, + "learning_rate": 7.373775158152284e-07, + "loss": 0.3900304436683655, + "step": 5252 + }, + { + "epoch": 1.2112059026977173, + "grad_norm": 1.4718461813811798, + "learning_rate": 7.370096564464308e-07, + "loss": 0.406912624835968, + "step": 5253 + }, + { + "epoch": 1.2114364768273, + "grad_norm": 1.2742945351379469, + "learning_rate": 7.366418353059445e-07, + "loss": 0.407238632440567, + "step": 5254 + }, + { + "epoch": 1.2116670509568825, + "grad_norm": 2.3145771276343625, + "learning_rate": 7.36274052447237e-07, + "loss": 0.5605549216270447, + "step": 5255 + }, + { + "epoch": 1.2118976250864653, + "grad_norm": 1.7547311772877803, + "learning_rate": 7.359063079237684e-07, + "loss": 0.5016111731529236, + "step": 5256 + }, + { + "epoch": 1.212128199216048, + "grad_norm": 1.31999939383151, + "learning_rate": 7.355386017889946e-07, + "loss": 0.38812315464019775, + "step": 5257 + }, + { + "epoch": 1.2123587733456307, + "grad_norm": 1.5177330463551633, + "learning_rate": 7.35170934096365e-07, + "loss": 0.46022963523864746, + "step": 5258 + }, + { + "epoch": 1.2125893474752132, + "grad_norm": 1.4118628857930515, + "learning_rate": 7.348033048993246e-07, + "loss": 0.40029624104499817, + "step": 5259 + }, + { + "epoch": 1.212819921604796, + "grad_norm": 1.4051430521275825, + "learning_rate": 7.344357142513111e-07, + "loss": 0.4331943392753601, + "step": 5260 + }, + { + "epoch": 1.2130504957343786, + "grad_norm": 1.565074125850335, + "learning_rate": 7.340681622057582e-07, + "loss": 0.43757596611976624, + "step": 5261 + }, + { + "epoch": 1.2132810698639613, + "grad_norm": 1.7743971563599887, + "learning_rate": 7.337006488160931e-07, + "loss": 0.49733203649520874, + "step": 5262 + }, + { + "epoch": 1.2135116439935438, + "grad_norm": 1.341577967095045, + "learning_rate": 7.333331741357373e-07, + "loss": 0.35552018880844116, + "step": 5263 + }, + { + "epoch": 1.2137422181231265, + "grad_norm": 1.6321675762702066, + "learning_rate": 7.329657382181074e-07, + "loss": 0.4102798104286194, + "step": 5264 + }, + { + "epoch": 1.2139727922527093, + "grad_norm": 1.4184297160567871, + "learning_rate": 7.325983411166136e-07, + "loss": 0.4517349600791931, + "step": 5265 + }, + { + "epoch": 1.214203366382292, + "grad_norm": 1.6427775893660324, + "learning_rate": 7.322309828846613e-07, + "loss": 0.48924458026885986, + "step": 5266 + }, + { + "epoch": 1.2144339405118745, + "grad_norm": 1.4030974508932201, + "learning_rate": 7.31863663575649e-07, + "loss": 0.38971561193466187, + "step": 5267 + }, + { + "epoch": 1.2146645146414572, + "grad_norm": 1.6155044970268224, + "learning_rate": 7.31496383242971e-07, + "loss": 0.6503559350967407, + "step": 5268 + }, + { + "epoch": 1.21489508877104, + "grad_norm": 1.6905359606856467, + "learning_rate": 7.311291419400146e-07, + "loss": 0.4615272879600525, + "step": 5269 + }, + { + "epoch": 1.2151256629006226, + "grad_norm": 1.6629441467357413, + "learning_rate": 7.307619397201625e-07, + "loss": 0.3793429732322693, + "step": 5270 + }, + { + "epoch": 1.2153562370302051, + "grad_norm": 1.3076578533376795, + "learning_rate": 7.303947766367909e-07, + "loss": 0.48186585307121277, + "step": 5271 + }, + { + "epoch": 1.2155868111597878, + "grad_norm": 1.4243590091370186, + "learning_rate": 7.300276527432713e-07, + "loss": 0.4051778018474579, + "step": 5272 + }, + { + "epoch": 1.2158173852893706, + "grad_norm": 1.6820510248806995, + "learning_rate": 7.296605680929684e-07, + "loss": 0.43364250659942627, + "step": 5273 + }, + { + "epoch": 1.2160479594189533, + "grad_norm": 1.6130796939421093, + "learning_rate": 7.292935227392414e-07, + "loss": 0.4893898367881775, + "step": 5274 + }, + { + "epoch": 1.2162785335485358, + "grad_norm": 1.240780138685616, + "learning_rate": 7.289265167354448e-07, + "loss": 0.43125462532043457, + "step": 5275 + }, + { + "epoch": 1.2165091076781185, + "grad_norm": 1.6108443522760163, + "learning_rate": 7.285595501349258e-07, + "loss": 0.4086509943008423, + "step": 5276 + }, + { + "epoch": 1.2167396818077012, + "grad_norm": 1.838256686394942, + "learning_rate": 7.281926229910274e-07, + "loss": 0.5176471471786499, + "step": 5277 + }, + { + "epoch": 1.216970255937284, + "grad_norm": 1.8145364687667531, + "learning_rate": 7.278257353570857e-07, + "loss": 0.4783210754394531, + "step": 5278 + }, + { + "epoch": 1.2172008300668664, + "grad_norm": 1.5012148176529632, + "learning_rate": 7.274588872864322e-07, + "loss": 0.4847145080566406, + "step": 5279 + }, + { + "epoch": 1.2174314041964491, + "grad_norm": 1.4076947828029491, + "learning_rate": 7.270920788323911e-07, + "loss": 0.4691849946975708, + "step": 5280 + }, + { + "epoch": 1.2176619783260318, + "grad_norm": 1.8729494542899485, + "learning_rate": 7.267253100482824e-07, + "loss": 0.5755687952041626, + "step": 5281 + }, + { + "epoch": 1.2178925524556146, + "grad_norm": 1.3639853941099451, + "learning_rate": 7.263585809874193e-07, + "loss": 0.42995721101760864, + "step": 5282 + }, + { + "epoch": 1.218123126585197, + "grad_norm": 1.4560966669318844, + "learning_rate": 7.259918917031101e-07, + "loss": 0.501590371131897, + "step": 5283 + }, + { + "epoch": 1.2183537007147798, + "grad_norm": 1.5326641731074693, + "learning_rate": 7.256252422486563e-07, + "loss": 0.5499469041824341, + "step": 5284 + }, + { + "epoch": 1.2185842748443625, + "grad_norm": 1.7075536366613502, + "learning_rate": 7.25258632677354e-07, + "loss": 0.4567297399044037, + "step": 5285 + }, + { + "epoch": 1.2188148489739452, + "grad_norm": 1.3251311548344207, + "learning_rate": 7.248920630424942e-07, + "loss": 0.4046020805835724, + "step": 5286 + }, + { + "epoch": 1.2190454231035277, + "grad_norm": 1.4721989927884918, + "learning_rate": 7.245255333973608e-07, + "loss": 0.3534840941429138, + "step": 5287 + }, + { + "epoch": 1.2192759972331104, + "grad_norm": 1.4151850401024268, + "learning_rate": 7.241590437952331e-07, + "loss": 0.45795637369155884, + "step": 5288 + }, + { + "epoch": 1.2195065713626931, + "grad_norm": 1.4921564176260302, + "learning_rate": 7.237925942893839e-07, + "loss": 0.3984150290489197, + "step": 5289 + }, + { + "epoch": 1.2197371454922759, + "grad_norm": 1.5617581917582364, + "learning_rate": 7.234261849330807e-07, + "loss": 0.46833336353302, + "step": 5290 + }, + { + "epoch": 1.2199677196218583, + "grad_norm": 1.6200691445613622, + "learning_rate": 7.230598157795842e-07, + "loss": 0.5395709276199341, + "step": 5291 + }, + { + "epoch": 1.220198293751441, + "grad_norm": 1.300141768975315, + "learning_rate": 7.226934868821505e-07, + "loss": 0.4556152820587158, + "step": 5292 + }, + { + "epoch": 1.2204288678810238, + "grad_norm": 1.5916352600329198, + "learning_rate": 7.223271982940287e-07, + "loss": 0.49564266204833984, + "step": 5293 + }, + { + "epoch": 1.2206594420106065, + "grad_norm": 1.5492667362910795, + "learning_rate": 7.219609500684625e-07, + "loss": 0.5389127731323242, + "step": 5294 + }, + { + "epoch": 1.220890016140189, + "grad_norm": 1.3125997254034645, + "learning_rate": 7.215947422586905e-07, + "loss": 0.48815661668777466, + "step": 5295 + }, + { + "epoch": 1.2211205902697717, + "grad_norm": 1.6576709424363434, + "learning_rate": 7.21228574917944e-07, + "loss": 0.4204339385032654, + "step": 5296 + }, + { + "epoch": 1.2213511643993544, + "grad_norm": 1.2807688149232648, + "learning_rate": 7.208624480994494e-07, + "loss": 0.39993199706077576, + "step": 5297 + }, + { + "epoch": 1.2215817385289371, + "grad_norm": 1.7420778835945019, + "learning_rate": 7.204963618564268e-07, + "loss": 0.5679433941841125, + "step": 5298 + }, + { + "epoch": 1.2218123126585196, + "grad_norm": 1.819503614929131, + "learning_rate": 7.201303162420913e-07, + "loss": 0.46620815992355347, + "step": 5299 + }, + { + "epoch": 1.2220428867881024, + "grad_norm": 1.4667553556365653, + "learning_rate": 7.1976431130965e-07, + "loss": 0.44684547185897827, + "step": 5300 + }, + { + "epoch": 1.222273460917685, + "grad_norm": 1.6182813529173974, + "learning_rate": 7.193983471123066e-07, + "loss": 0.4518858790397644, + "step": 5301 + }, + { + "epoch": 1.2225040350472678, + "grad_norm": 1.497058969625444, + "learning_rate": 7.190324237032569e-07, + "loss": 0.3966304659843445, + "step": 5302 + }, + { + "epoch": 1.2227346091768503, + "grad_norm": 1.7688402904846452, + "learning_rate": 7.186665411356925e-07, + "loss": 0.5541782379150391, + "step": 5303 + }, + { + "epoch": 1.222965183306433, + "grad_norm": 1.5748150394963076, + "learning_rate": 7.183006994627972e-07, + "loss": 0.3986799120903015, + "step": 5304 + }, + { + "epoch": 1.2231957574360157, + "grad_norm": 1.3179167901427211, + "learning_rate": 7.1793489873775e-07, + "loss": 0.485867977142334, + "step": 5305 + }, + { + "epoch": 1.2234263315655984, + "grad_norm": 1.6264368495030206, + "learning_rate": 7.175691390137244e-07, + "loss": 0.40187692642211914, + "step": 5306 + }, + { + "epoch": 1.223656905695181, + "grad_norm": 1.5085798270078894, + "learning_rate": 7.172034203438864e-07, + "loss": 0.4679393172264099, + "step": 5307 + }, + { + "epoch": 1.2238874798247636, + "grad_norm": 1.3178949369734356, + "learning_rate": 7.168377427813974e-07, + "loss": 0.512301504611969, + "step": 5308 + }, + { + "epoch": 1.2241180539543464, + "grad_norm": 1.4684075358167812, + "learning_rate": 7.164721063794122e-07, + "loss": 0.5340646505355835, + "step": 5309 + }, + { + "epoch": 1.224348628083929, + "grad_norm": 1.6528941936609833, + "learning_rate": 7.1610651119108e-07, + "loss": 0.4757506847381592, + "step": 5310 + }, + { + "epoch": 1.2245792022135116, + "grad_norm": 1.5982652868975813, + "learning_rate": 7.157409572695434e-07, + "loss": 0.5697519779205322, + "step": 5311 + }, + { + "epoch": 1.2248097763430943, + "grad_norm": 1.4427165421847559, + "learning_rate": 7.153754446679395e-07, + "loss": 0.47521811723709106, + "step": 5312 + }, + { + "epoch": 1.225040350472677, + "grad_norm": 1.4092560589123113, + "learning_rate": 7.150099734393997e-07, + "loss": 0.40484973788261414, + "step": 5313 + }, + { + "epoch": 1.2252709246022597, + "grad_norm": 1.4095470452598946, + "learning_rate": 7.146445436370481e-07, + "loss": 0.4465969204902649, + "step": 5314 + }, + { + "epoch": 1.2255014987318422, + "grad_norm": 1.5543895211488108, + "learning_rate": 7.142791553140044e-07, + "loss": 0.44878089427948, + "step": 5315 + }, + { + "epoch": 1.225732072861425, + "grad_norm": 1.657847170962442, + "learning_rate": 7.139138085233809e-07, + "loss": 0.5049536228179932, + "step": 5316 + }, + { + "epoch": 1.2259626469910077, + "grad_norm": 1.377588971885486, + "learning_rate": 7.135485033182847e-07, + "loss": 0.42945951223373413, + "step": 5317 + }, + { + "epoch": 1.2261932211205901, + "grad_norm": 1.607627236207016, + "learning_rate": 7.131832397518167e-07, + "loss": 0.4668564200401306, + "step": 5318 + }, + { + "epoch": 1.2264237952501729, + "grad_norm": 1.640684584420395, + "learning_rate": 7.128180178770718e-07, + "loss": 0.4691551625728607, + "step": 5319 + }, + { + "epoch": 1.2266543693797556, + "grad_norm": 1.4653351758865718, + "learning_rate": 7.124528377471382e-07, + "loss": 0.4306211769580841, + "step": 5320 + }, + { + "epoch": 1.2268849435093383, + "grad_norm": 1.7130888177954928, + "learning_rate": 7.120876994150991e-07, + "loss": 0.4986322522163391, + "step": 5321 + }, + { + "epoch": 1.227115517638921, + "grad_norm": 1.4775997138779564, + "learning_rate": 7.117226029340304e-07, + "loss": 0.4058566093444824, + "step": 5322 + }, + { + "epoch": 1.2273460917685035, + "grad_norm": 1.3729187298835452, + "learning_rate": 7.113575483570036e-07, + "loss": 0.390174925327301, + "step": 5323 + }, + { + "epoch": 1.2275766658980862, + "grad_norm": 1.3070483816242904, + "learning_rate": 7.109925357370821e-07, + "loss": 0.38822996616363525, + "step": 5324 + }, + { + "epoch": 1.227807240027669, + "grad_norm": 1.3599088173875424, + "learning_rate": 7.106275651273244e-07, + "loss": 0.47792741656303406, + "step": 5325 + }, + { + "epoch": 1.2280378141572514, + "grad_norm": 1.52666177684785, + "learning_rate": 7.102626365807833e-07, + "loss": 0.5332789421081543, + "step": 5326 + }, + { + "epoch": 1.2282683882868342, + "grad_norm": 1.4337525635961101, + "learning_rate": 7.098977501505036e-07, + "loss": 0.5325096845626831, + "step": 5327 + }, + { + "epoch": 1.2284989624164169, + "grad_norm": 1.6185088994304762, + "learning_rate": 7.095329058895267e-07, + "loss": 0.4184231162071228, + "step": 5328 + }, + { + "epoch": 1.2287295365459996, + "grad_norm": 1.7570013482364435, + "learning_rate": 7.091681038508852e-07, + "loss": 0.43037641048431396, + "step": 5329 + }, + { + "epoch": 1.2289601106755823, + "grad_norm": 1.5067774692843796, + "learning_rate": 7.088033440876078e-07, + "loss": 0.4466821551322937, + "step": 5330 + }, + { + "epoch": 1.2291906848051648, + "grad_norm": 1.5083021571464743, + "learning_rate": 7.084386266527151e-07, + "loss": 0.35853004455566406, + "step": 5331 + }, + { + "epoch": 1.2294212589347475, + "grad_norm": 1.542402337323393, + "learning_rate": 7.080739515992231e-07, + "loss": 0.44986268877983093, + "step": 5332 + }, + { + "epoch": 1.2296518330643302, + "grad_norm": 1.7104999289185845, + "learning_rate": 7.07709318980141e-07, + "loss": 0.3563602566719055, + "step": 5333 + }, + { + "epoch": 1.2298824071939127, + "grad_norm": 1.5401970805558025, + "learning_rate": 7.073447288484715e-07, + "loss": 0.4505435824394226, + "step": 5334 + }, + { + "epoch": 1.2301129813234954, + "grad_norm": 1.3508208021904817, + "learning_rate": 7.069801812572116e-07, + "loss": 0.4477807283401489, + "step": 5335 + }, + { + "epoch": 1.2303435554530782, + "grad_norm": 1.5084663891676386, + "learning_rate": 7.066156762593518e-07, + "loss": 0.4470565915107727, + "step": 5336 + }, + { + "epoch": 1.2305741295826609, + "grad_norm": 1.4627780913359043, + "learning_rate": 7.062512139078773e-07, + "loss": 0.4236464500427246, + "step": 5337 + }, + { + "epoch": 1.2308047037122436, + "grad_norm": 1.3002436810863733, + "learning_rate": 7.058867942557655e-07, + "loss": 0.3221476376056671, + "step": 5338 + }, + { + "epoch": 1.231035277841826, + "grad_norm": 1.818660153327524, + "learning_rate": 7.055224173559891e-07, + "loss": 0.502305269241333, + "step": 5339 + }, + { + "epoch": 1.2312658519714088, + "grad_norm": 1.655814956644188, + "learning_rate": 7.051580832615136e-07, + "loss": 0.5121853351593018, + "step": 5340 + }, + { + "epoch": 1.2314964261009915, + "grad_norm": 1.713071870874518, + "learning_rate": 7.047937920252991e-07, + "loss": 0.5468438863754272, + "step": 5341 + }, + { + "epoch": 1.231727000230574, + "grad_norm": 1.2030374980808431, + "learning_rate": 7.044295437002985e-07, + "loss": 0.5026402473449707, + "step": 5342 + }, + { + "epoch": 1.2319575743601567, + "grad_norm": 1.9445671085046203, + "learning_rate": 7.040653383394596e-07, + "loss": 0.5205342173576355, + "step": 5343 + }, + { + "epoch": 1.2321881484897395, + "grad_norm": 1.5970504229179872, + "learning_rate": 7.037011759957228e-07, + "loss": 0.5184727311134338, + "step": 5344 + }, + { + "epoch": 1.2324187226193222, + "grad_norm": 1.3779493729990695, + "learning_rate": 7.033370567220227e-07, + "loss": 0.414316862821579, + "step": 5345 + }, + { + "epoch": 1.2326492967489049, + "grad_norm": 1.4260441300832385, + "learning_rate": 7.029729805712885e-07, + "loss": 0.42133980989456177, + "step": 5346 + }, + { + "epoch": 1.2328798708784874, + "grad_norm": 1.8139584962445312, + "learning_rate": 7.026089475964414e-07, + "loss": 0.4888553321361542, + "step": 5347 + }, + { + "epoch": 1.23311044500807, + "grad_norm": 1.3419182130591616, + "learning_rate": 7.022449578503979e-07, + "loss": 0.4702431857585907, + "step": 5348 + }, + { + "epoch": 1.2333410191376528, + "grad_norm": 1.7237576970327266, + "learning_rate": 7.018810113860672e-07, + "loss": 0.5312628746032715, + "step": 5349 + }, + { + "epoch": 1.2335715932672353, + "grad_norm": 1.3183810824607851, + "learning_rate": 7.015171082563533e-07, + "loss": 0.5297777056694031, + "step": 5350 + }, + { + "epoch": 1.233802167396818, + "grad_norm": 1.4423147751678271, + "learning_rate": 7.011532485141524e-07, + "loss": 0.5172504782676697, + "step": 5351 + }, + { + "epoch": 1.2340327415264007, + "grad_norm": 1.4663357988839691, + "learning_rate": 7.007894322123556e-07, + "loss": 0.4288995862007141, + "step": 5352 + }, + { + "epoch": 1.2342633156559835, + "grad_norm": 1.373863251988179, + "learning_rate": 7.004256594038475e-07, + "loss": 0.4194108247756958, + "step": 5353 + }, + { + "epoch": 1.2344938897855662, + "grad_norm": 1.6567765897983155, + "learning_rate": 7.000619301415056e-07, + "loss": 0.48825979232788086, + "step": 5354 + }, + { + "epoch": 1.2347244639151487, + "grad_norm": 1.5674749005570563, + "learning_rate": 6.99698244478202e-07, + "loss": 0.4721163213253021, + "step": 5355 + }, + { + "epoch": 1.2349550380447314, + "grad_norm": 1.4292932334311201, + "learning_rate": 6.993346024668019e-07, + "loss": 0.5104520916938782, + "step": 5356 + }, + { + "epoch": 1.235185612174314, + "grad_norm": 1.757397862406759, + "learning_rate": 6.98971004160165e-07, + "loss": 0.5257378816604614, + "step": 5357 + }, + { + "epoch": 1.2354161863038966, + "grad_norm": 1.5756368498047397, + "learning_rate": 6.986074496111429e-07, + "loss": 0.5624911785125732, + "step": 5358 + }, + { + "epoch": 1.2356467604334793, + "grad_norm": 1.4832170020848512, + "learning_rate": 6.982439388725828e-07, + "loss": 0.5186502933502197, + "step": 5359 + }, + { + "epoch": 1.235877334563062, + "grad_norm": 1.4333093290057806, + "learning_rate": 6.978804719973241e-07, + "loss": 0.42711856961250305, + "step": 5360 + }, + { + "epoch": 1.2361079086926448, + "grad_norm": 1.5710112274218073, + "learning_rate": 6.975170490382013e-07, + "loss": 0.525848388671875, + "step": 5361 + }, + { + "epoch": 1.2363384828222275, + "grad_norm": 1.475742371846223, + "learning_rate": 6.971536700480405e-07, + "loss": 0.41279107332229614, + "step": 5362 + }, + { + "epoch": 1.23656905695181, + "grad_norm": 1.381610773190275, + "learning_rate": 6.967903350796632e-07, + "loss": 0.38868075609207153, + "step": 5363 + }, + { + "epoch": 1.2367996310813927, + "grad_norm": 1.2852056850014901, + "learning_rate": 6.964270441858837e-07, + "loss": 0.41875284910202026, + "step": 5364 + }, + { + "epoch": 1.2370302052109754, + "grad_norm": 1.6506819982730945, + "learning_rate": 6.960637974195096e-07, + "loss": 0.4754808843135834, + "step": 5365 + }, + { + "epoch": 1.237260779340558, + "grad_norm": 1.367170455716087, + "learning_rate": 6.957005948333434e-07, + "loss": 0.5073249340057373, + "step": 5366 + }, + { + "epoch": 1.2374913534701406, + "grad_norm": 1.4682970250918908, + "learning_rate": 6.953374364801792e-07, + "loss": 0.4545915126800537, + "step": 5367 + }, + { + "epoch": 1.2377219275997233, + "grad_norm": 1.4664699450973697, + "learning_rate": 6.949743224128064e-07, + "loss": 0.42797422409057617, + "step": 5368 + }, + { + "epoch": 1.237952501729306, + "grad_norm": 1.7409270878989862, + "learning_rate": 6.946112526840071e-07, + "loss": 0.570556104183197, + "step": 5369 + }, + { + "epoch": 1.2381830758588885, + "grad_norm": 1.21807525986395, + "learning_rate": 6.942482273465577e-07, + "loss": 0.3866136074066162, + "step": 5370 + }, + { + "epoch": 1.2384136499884713, + "grad_norm": 1.385922338157159, + "learning_rate": 6.938852464532267e-07, + "loss": 0.3716529309749603, + "step": 5371 + }, + { + "epoch": 1.238644224118054, + "grad_norm": 1.5756601150848535, + "learning_rate": 6.935223100567776e-07, + "loss": 0.4781096577644348, + "step": 5372 + }, + { + "epoch": 1.2388747982476367, + "grad_norm": 1.5023911555765588, + "learning_rate": 6.931594182099671e-07, + "loss": 0.4262877106666565, + "step": 5373 + }, + { + "epoch": 1.2391053723772192, + "grad_norm": 1.6023295142223875, + "learning_rate": 6.927965709655444e-07, + "loss": 0.49859267473220825, + "step": 5374 + }, + { + "epoch": 1.239335946506802, + "grad_norm": 1.8550612096678925, + "learning_rate": 6.924337683762539e-07, + "loss": 0.4710119664669037, + "step": 5375 + }, + { + "epoch": 1.2395665206363846, + "grad_norm": 1.518585467890365, + "learning_rate": 6.92071010494832e-07, + "loss": 0.4974974989891052, + "step": 5376 + }, + { + "epoch": 1.2397970947659673, + "grad_norm": 2.029509938602293, + "learning_rate": 6.917082973740098e-07, + "loss": 0.4118514657020569, + "step": 5377 + }, + { + "epoch": 1.2400276688955498, + "grad_norm": 1.391922482329176, + "learning_rate": 6.913456290665106e-07, + "loss": 0.4223165214061737, + "step": 5378 + }, + { + "epoch": 1.2402582430251325, + "grad_norm": 1.5760276199817416, + "learning_rate": 6.909830056250526e-07, + "loss": 0.4896865487098694, + "step": 5379 + }, + { + "epoch": 1.2404888171547153, + "grad_norm": 1.35318854532684, + "learning_rate": 6.906204271023463e-07, + "loss": 0.36112266778945923, + "step": 5380 + }, + { + "epoch": 1.240719391284298, + "grad_norm": 1.4255868593911465, + "learning_rate": 6.902578935510969e-07, + "loss": 0.4665502905845642, + "step": 5381 + }, + { + "epoch": 1.2409499654138805, + "grad_norm": 1.6036447338223971, + "learning_rate": 6.898954050240013e-07, + "loss": 0.46059858798980713, + "step": 5382 + }, + { + "epoch": 1.2411805395434632, + "grad_norm": 1.4844055015741944, + "learning_rate": 6.895329615737515e-07, + "loss": 0.46149420738220215, + "step": 5383 + }, + { + "epoch": 1.241411113673046, + "grad_norm": 1.5602784439666317, + "learning_rate": 6.891705632530327e-07, + "loss": 0.42226743698120117, + "step": 5384 + }, + { + "epoch": 1.2416416878026286, + "grad_norm": 1.4308699177023212, + "learning_rate": 6.88808210114522e-07, + "loss": 0.45789939165115356, + "step": 5385 + }, + { + "epoch": 1.2418722619322111, + "grad_norm": 1.5754200685163184, + "learning_rate": 6.884459022108922e-07, + "loss": 0.44569891691207886, + "step": 5386 + }, + { + "epoch": 1.2421028360617938, + "grad_norm": 1.4099412845136035, + "learning_rate": 6.880836395948078e-07, + "loss": 0.3971112370491028, + "step": 5387 + }, + { + "epoch": 1.2423334101913766, + "grad_norm": 1.6636550459216706, + "learning_rate": 6.877214223189278e-07, + "loss": 0.46052566170692444, + "step": 5388 + }, + { + "epoch": 1.2425639843209593, + "grad_norm": 1.2735689149473257, + "learning_rate": 6.873592504359037e-07, + "loss": 0.42730599641799927, + "step": 5389 + }, + { + "epoch": 1.2427945584505418, + "grad_norm": 1.5806143555224212, + "learning_rate": 6.869971239983814e-07, + "loss": 0.4391734004020691, + "step": 5390 + }, + { + "epoch": 1.2430251325801245, + "grad_norm": 1.5314248582389964, + "learning_rate": 6.866350430589989e-07, + "loss": 0.4523593485355377, + "step": 5391 + }, + { + "epoch": 1.2432557067097072, + "grad_norm": 1.587550694342246, + "learning_rate": 6.86273007670389e-07, + "loss": 0.5398315787315369, + "step": 5392 + }, + { + "epoch": 1.24348628083929, + "grad_norm": 1.2298139407771986, + "learning_rate": 6.859110178851767e-07, + "loss": 0.40480807423591614, + "step": 5393 + }, + { + "epoch": 1.2437168549688724, + "grad_norm": 1.4233815325100456, + "learning_rate": 6.855490737559816e-07, + "loss": 0.42483675479888916, + "step": 5394 + }, + { + "epoch": 1.2439474290984551, + "grad_norm": 1.611497963721617, + "learning_rate": 6.851871753354153e-07, + "loss": 0.39951619505882263, + "step": 5395 + }, + { + "epoch": 1.2441780032280378, + "grad_norm": 1.5084898015563448, + "learning_rate": 6.848253226760833e-07, + "loss": 0.48650771379470825, + "step": 5396 + }, + { + "epoch": 1.2444085773576206, + "grad_norm": 1.5899141960647352, + "learning_rate": 6.844635158305853e-07, + "loss": 0.5377830266952515, + "step": 5397 + }, + { + "epoch": 1.244639151487203, + "grad_norm": 1.667763606347776, + "learning_rate": 6.841017548515127e-07, + "loss": 0.4365614950656891, + "step": 5398 + }, + { + "epoch": 1.2448697256167858, + "grad_norm": 1.2560105349082187, + "learning_rate": 6.837400397914519e-07, + "loss": 0.39739400148391724, + "step": 5399 + }, + { + "epoch": 1.2451002997463685, + "grad_norm": 1.3287360038901976, + "learning_rate": 6.833783707029812e-07, + "loss": 0.4005683660507202, + "step": 5400 + }, + { + "epoch": 1.2453308738759512, + "grad_norm": 1.6646043641444999, + "learning_rate": 6.830167476386737e-07, + "loss": 0.5635108351707458, + "step": 5401 + }, + { + "epoch": 1.2455614480055337, + "grad_norm": 1.6642180514990483, + "learning_rate": 6.82655170651094e-07, + "loss": 0.4332388639450073, + "step": 5402 + }, + { + "epoch": 1.2457920221351164, + "grad_norm": 1.525164084943155, + "learning_rate": 6.822936397928015e-07, + "loss": 0.47506433725357056, + "step": 5403 + }, + { + "epoch": 1.2460225962646991, + "grad_norm": 1.600563207739989, + "learning_rate": 6.819321551163486e-07, + "loss": 0.5081777572631836, + "step": 5404 + }, + { + "epoch": 1.2462531703942819, + "grad_norm": 1.6650056699718765, + "learning_rate": 6.815707166742801e-07, + "loss": 0.4038957953453064, + "step": 5405 + }, + { + "epoch": 1.2464837445238643, + "grad_norm": 1.759676797230376, + "learning_rate": 6.812093245191354e-07, + "loss": 0.4665706753730774, + "step": 5406 + }, + { + "epoch": 1.246714318653447, + "grad_norm": 1.8957165771048585, + "learning_rate": 6.808479787034459e-07, + "loss": 0.45610785484313965, + "step": 5407 + }, + { + "epoch": 1.2469448927830298, + "grad_norm": 1.443572019443965, + "learning_rate": 6.804866792797377e-07, + "loss": 0.4334958493709564, + "step": 5408 + }, + { + "epoch": 1.2471754669126125, + "grad_norm": 1.4719822396111175, + "learning_rate": 6.801254263005283e-07, + "loss": 0.5505996942520142, + "step": 5409 + }, + { + "epoch": 1.247406041042195, + "grad_norm": 1.5261896109132582, + "learning_rate": 6.797642198183303e-07, + "loss": 0.5589424967765808, + "step": 5410 + }, + { + "epoch": 1.2476366151717777, + "grad_norm": 1.892082521677576, + "learning_rate": 6.794030598856483e-07, + "loss": 0.48142847418785095, + "step": 5411 + }, + { + "epoch": 1.2478671893013604, + "grad_norm": 1.6606812394072976, + "learning_rate": 6.790419465549811e-07, + "loss": 0.5549830198287964, + "step": 5412 + }, + { + "epoch": 1.2480977634309431, + "grad_norm": 1.6097248774465256, + "learning_rate": 6.786808798788193e-07, + "loss": 0.5974072217941284, + "step": 5413 + }, + { + "epoch": 1.2483283375605256, + "grad_norm": 1.3333137403479542, + "learning_rate": 6.783198599096484e-07, + "loss": 0.38189029693603516, + "step": 5414 + }, + { + "epoch": 1.2485589116901084, + "grad_norm": 1.4543286006354934, + "learning_rate": 6.779588866999459e-07, + "loss": 0.41150039434432983, + "step": 5415 + }, + { + "epoch": 1.248789485819691, + "grad_norm": 1.451215833026304, + "learning_rate": 6.775979603021828e-07, + "loss": 0.4291636645793915, + "step": 5416 + }, + { + "epoch": 1.2490200599492738, + "grad_norm": 1.2798211834451962, + "learning_rate": 6.772370807688242e-07, + "loss": 0.45324140787124634, + "step": 5417 + }, + { + "epoch": 1.2492506340788563, + "grad_norm": 1.3895968147090427, + "learning_rate": 6.768762481523262e-07, + "loss": 0.4748731851577759, + "step": 5418 + }, + { + "epoch": 1.249481208208439, + "grad_norm": 1.618628812481624, + "learning_rate": 6.765154625051408e-07, + "loss": 0.43602505326271057, + "step": 5419 + }, + { + "epoch": 1.2497117823380217, + "grad_norm": 1.4027608933739075, + "learning_rate": 6.761547238797112e-07, + "loss": 0.49135684967041016, + "step": 5420 + }, + { + "epoch": 1.2499423564676044, + "grad_norm": 1.6315360373382408, + "learning_rate": 6.757940323284747e-07, + "loss": 0.47508272528648376, + "step": 5421 + }, + { + "epoch": 1.250172930597187, + "grad_norm": 1.612865868213556, + "learning_rate": 6.754333879038611e-07, + "loss": 0.399259090423584, + "step": 5422 + }, + { + "epoch": 1.2504035047267696, + "grad_norm": 1.6878741312884291, + "learning_rate": 6.750727906582941e-07, + "loss": 0.426364004611969, + "step": 5423 + }, + { + "epoch": 1.2506340788563524, + "grad_norm": 1.4584807010931917, + "learning_rate": 6.747122406441903e-07, + "loss": 0.4641951322555542, + "step": 5424 + }, + { + "epoch": 1.250864652985935, + "grad_norm": 1.3880451781756755, + "learning_rate": 6.743517379139585e-07, + "loss": 0.35008323192596436, + "step": 5425 + }, + { + "epoch": 1.2510952271155176, + "grad_norm": 1.4485633708895984, + "learning_rate": 6.739912825200022e-07, + "loss": 0.49627771973609924, + "step": 5426 + }, + { + "epoch": 1.2513258012451003, + "grad_norm": 1.628398042874366, + "learning_rate": 6.736308745147168e-07, + "loss": 0.4926851987838745, + "step": 5427 + }, + { + "epoch": 1.251556375374683, + "grad_norm": 1.622960147434406, + "learning_rate": 6.732705139504917e-07, + "loss": 0.44777536392211914, + "step": 5428 + }, + { + "epoch": 1.2517869495042655, + "grad_norm": 1.6523545202218224, + "learning_rate": 6.729102008797085e-07, + "loss": 0.39160430431365967, + "step": 5429 + }, + { + "epoch": 1.2520175236338482, + "grad_norm": 1.5184849781676724, + "learning_rate": 6.725499353547426e-07, + "loss": 0.4585273861885071, + "step": 5430 + }, + { + "epoch": 1.252248097763431, + "grad_norm": 1.5327675196324342, + "learning_rate": 6.721897174279621e-07, + "loss": 0.5245224237442017, + "step": 5431 + }, + { + "epoch": 1.2524786718930137, + "grad_norm": 1.5257069000403813, + "learning_rate": 6.718295471517288e-07, + "loss": 0.4217349886894226, + "step": 5432 + }, + { + "epoch": 1.2527092460225964, + "grad_norm": 1.4826939266004133, + "learning_rate": 6.714694245783963e-07, + "loss": 0.4944193661212921, + "step": 5433 + }, + { + "epoch": 1.2529398201521789, + "grad_norm": 1.387839760206308, + "learning_rate": 6.711093497603127e-07, + "loss": 0.5058057904243469, + "step": 5434 + }, + { + "epoch": 1.2531703942817616, + "grad_norm": 1.381621888753065, + "learning_rate": 6.707493227498186e-07, + "loss": 0.45669037103652954, + "step": 5435 + }, + { + "epoch": 1.2534009684113443, + "grad_norm": 1.5997486257834712, + "learning_rate": 6.703893435992469e-07, + "loss": 0.4248945116996765, + "step": 5436 + }, + { + "epoch": 1.2536315425409268, + "grad_norm": 1.6056111266165571, + "learning_rate": 6.700294123609249e-07, + "loss": 0.3984343707561493, + "step": 5437 + }, + { + "epoch": 1.2538621166705095, + "grad_norm": 1.5349078061254786, + "learning_rate": 6.696695290871715e-07, + "loss": 0.435299813747406, + "step": 5438 + }, + { + "epoch": 1.2540926908000922, + "grad_norm": 1.6277363060500583, + "learning_rate": 6.693096938303002e-07, + "loss": 0.4225304126739502, + "step": 5439 + }, + { + "epoch": 1.254323264929675, + "grad_norm": 1.6495416759002697, + "learning_rate": 6.689499066426161e-07, + "loss": 0.4686669111251831, + "step": 5440 + }, + { + "epoch": 1.2545538390592577, + "grad_norm": 1.5168957851404996, + "learning_rate": 6.685901675764186e-07, + "loss": 0.45163553953170776, + "step": 5441 + }, + { + "epoch": 1.2547844131888402, + "grad_norm": 1.3593822737620262, + "learning_rate": 6.682304766839986e-07, + "loss": 0.44223567843437195, + "step": 5442 + }, + { + "epoch": 1.2550149873184229, + "grad_norm": 1.5363469724843986, + "learning_rate": 6.678708340176413e-07, + "loss": 0.4008648991584778, + "step": 5443 + }, + { + "epoch": 1.2552455614480056, + "grad_norm": 1.4199248627467993, + "learning_rate": 6.675112396296245e-07, + "loss": 0.4500792324542999, + "step": 5444 + }, + { + "epoch": 1.255476135577588, + "grad_norm": 1.490145734356762, + "learning_rate": 6.671516935722183e-07, + "loss": 0.42558690905570984, + "step": 5445 + }, + { + "epoch": 1.2557067097071708, + "grad_norm": 1.7098682543926618, + "learning_rate": 6.667921958976871e-07, + "loss": 0.4676043391227722, + "step": 5446 + }, + { + "epoch": 1.2559372838367535, + "grad_norm": 1.8041492407407758, + "learning_rate": 6.664327466582869e-07, + "loss": 0.44114184379577637, + "step": 5447 + }, + { + "epoch": 1.2561678579663362, + "grad_norm": 1.6102069805165957, + "learning_rate": 6.660733459062679e-07, + "loss": 0.33865463733673096, + "step": 5448 + }, + { + "epoch": 1.256398432095919, + "grad_norm": 1.8619975614063338, + "learning_rate": 6.65713993693872e-07, + "loss": 0.5397414565086365, + "step": 5449 + }, + { + "epoch": 1.2566290062255014, + "grad_norm": 1.4730562973077854, + "learning_rate": 6.653546900733352e-07, + "loss": 0.49249517917633057, + "step": 5450 + }, + { + "epoch": 1.2568595803550842, + "grad_norm": 1.5757041605280757, + "learning_rate": 6.649954350968855e-07, + "loss": 0.5438433885574341, + "step": 5451 + }, + { + "epoch": 1.2570901544846669, + "grad_norm": 1.4727448576353426, + "learning_rate": 6.646362288167448e-07, + "loss": 0.43725037574768066, + "step": 5452 + }, + { + "epoch": 1.2573207286142494, + "grad_norm": 1.5159104216766552, + "learning_rate": 6.642770712851269e-07, + "loss": 0.5369226336479187, + "step": 5453 + }, + { + "epoch": 1.257551302743832, + "grad_norm": 1.4915531986930697, + "learning_rate": 6.63917962554239e-07, + "loss": 0.45022842288017273, + "step": 5454 + }, + { + "epoch": 1.2577818768734148, + "grad_norm": 1.6219974371712227, + "learning_rate": 6.635589026762818e-07, + "loss": 0.42483362555503845, + "step": 5455 + }, + { + "epoch": 1.2580124510029975, + "grad_norm": 1.4115832140490556, + "learning_rate": 6.631998917034474e-07, + "loss": 0.4909497797489166, + "step": 5456 + }, + { + "epoch": 1.2582430251325802, + "grad_norm": 1.3159817254483799, + "learning_rate": 6.628409296879223e-07, + "loss": 0.4927433431148529, + "step": 5457 + }, + { + "epoch": 1.2584735992621627, + "grad_norm": 1.550356576361105, + "learning_rate": 6.624820166818847e-07, + "loss": 0.4452761113643646, + "step": 5458 + }, + { + "epoch": 1.2587041733917455, + "grad_norm": 1.5683413746620685, + "learning_rate": 6.62123152737507e-07, + "loss": 0.4637982249259949, + "step": 5459 + }, + { + "epoch": 1.2589347475213282, + "grad_norm": 1.3293268937895057, + "learning_rate": 6.617643379069532e-07, + "loss": 0.3189438581466675, + "step": 5460 + }, + { + "epoch": 1.2591653216509107, + "grad_norm": 1.3296675722252447, + "learning_rate": 6.614055722423808e-07, + "loss": 0.420698881149292, + "step": 5461 + }, + { + "epoch": 1.2593958957804934, + "grad_norm": 1.5202476608747133, + "learning_rate": 6.610468557959398e-07, + "loss": 0.5187642574310303, + "step": 5462 + }, + { + "epoch": 1.259626469910076, + "grad_norm": 1.4954844764147424, + "learning_rate": 6.606881886197741e-07, + "loss": 0.48519381880760193, + "step": 5463 + }, + { + "epoch": 1.2598570440396588, + "grad_norm": 1.4755140585184632, + "learning_rate": 6.60329570766019e-07, + "loss": 0.3930806815624237, + "step": 5464 + }, + { + "epoch": 1.2600876181692415, + "grad_norm": 1.8617928902566707, + "learning_rate": 6.599710022868027e-07, + "loss": 0.4890612065792084, + "step": 5465 + }, + { + "epoch": 1.260318192298824, + "grad_norm": 1.2781262224531547, + "learning_rate": 6.596124832342476e-07, + "loss": 0.4202774465084076, + "step": 5466 + }, + { + "epoch": 1.2605487664284067, + "grad_norm": 1.5196012608537903, + "learning_rate": 6.592540136604674e-07, + "loss": 0.5053761005401611, + "step": 5467 + }, + { + "epoch": 1.2607793405579895, + "grad_norm": 1.4874107682553572, + "learning_rate": 6.588955936175702e-07, + "loss": 0.4827175736427307, + "step": 5468 + }, + { + "epoch": 1.261009914687572, + "grad_norm": 1.4659080652243894, + "learning_rate": 6.585372231576551e-07, + "loss": 0.45179229974746704, + "step": 5469 + }, + { + "epoch": 1.2612404888171547, + "grad_norm": 1.3781712796058982, + "learning_rate": 6.581789023328155e-07, + "loss": 0.4024949073791504, + "step": 5470 + }, + { + "epoch": 1.2614710629467374, + "grad_norm": 1.7288759385339574, + "learning_rate": 6.578206311951363e-07, + "loss": 0.48839491605758667, + "step": 5471 + }, + { + "epoch": 1.26170163707632, + "grad_norm": 1.4778086795689929, + "learning_rate": 6.574624097966968e-07, + "loss": 0.45897620916366577, + "step": 5472 + }, + { + "epoch": 1.2619322112059028, + "grad_norm": 1.5548512112712307, + "learning_rate": 6.571042381895671e-07, + "loss": 0.48471882939338684, + "step": 5473 + }, + { + "epoch": 1.2621627853354853, + "grad_norm": 2.0045804163216414, + "learning_rate": 6.567461164258117e-07, + "loss": 0.44159913063049316, + "step": 5474 + }, + { + "epoch": 1.262393359465068, + "grad_norm": 1.5752243442253915, + "learning_rate": 6.563880445574872e-07, + "loss": 0.39186012744903564, + "step": 5475 + }, + { + "epoch": 1.2626239335946507, + "grad_norm": 1.818057995697113, + "learning_rate": 6.560300226366425e-07, + "loss": 0.5332233905792236, + "step": 5476 + }, + { + "epoch": 1.2628545077242332, + "grad_norm": 1.350222227503923, + "learning_rate": 6.556720507153201e-07, + "loss": 0.4252084195613861, + "step": 5477 + }, + { + "epoch": 1.263085081853816, + "grad_norm": 1.4204993118440263, + "learning_rate": 6.553141288455548e-07, + "loss": 0.36927711963653564, + "step": 5478 + }, + { + "epoch": 1.2633156559833987, + "grad_norm": 1.5676826878414558, + "learning_rate": 6.549562570793745e-07, + "loss": 0.4405602216720581, + "step": 5479 + }, + { + "epoch": 1.2635462301129814, + "grad_norm": 1.5245742985153417, + "learning_rate": 6.545984354687986e-07, + "loss": 0.5691590309143066, + "step": 5480 + }, + { + "epoch": 1.2637768042425641, + "grad_norm": 1.468644623890153, + "learning_rate": 6.542406640658411e-07, + "loss": 0.3750354051589966, + "step": 5481 + }, + { + "epoch": 1.2640073783721466, + "grad_norm": 1.5266320276968284, + "learning_rate": 6.538829429225068e-07, + "loss": 0.47816041111946106, + "step": 5482 + }, + { + "epoch": 1.2642379525017293, + "grad_norm": 1.4911563737024116, + "learning_rate": 6.535252720907951e-07, + "loss": 0.42470186948776245, + "step": 5483 + }, + { + "epoch": 1.264468526631312, + "grad_norm": 1.4256480441382235, + "learning_rate": 6.531676516226961e-07, + "loss": 0.37356555461883545, + "step": 5484 + }, + { + "epoch": 1.2646991007608945, + "grad_norm": 1.4604810104028516, + "learning_rate": 6.528100815701942e-07, + "loss": 0.4895293116569519, + "step": 5485 + }, + { + "epoch": 1.2649296748904773, + "grad_norm": 1.9575945537740915, + "learning_rate": 6.524525619852656e-07, + "loss": 0.4963725805282593, + "step": 5486 + }, + { + "epoch": 1.26516024902006, + "grad_norm": 1.7629474018170985, + "learning_rate": 6.520950929198792e-07, + "loss": 0.5443764925003052, + "step": 5487 + }, + { + "epoch": 1.2653908231496427, + "grad_norm": 1.2536482779264142, + "learning_rate": 6.517376744259972e-07, + "loss": 0.400549054145813, + "step": 5488 + }, + { + "epoch": 1.2656213972792254, + "grad_norm": 1.8850482793273033, + "learning_rate": 6.513803065555736e-07, + "loss": 0.46384042501449585, + "step": 5489 + }, + { + "epoch": 1.265851971408808, + "grad_norm": 1.4893040501119004, + "learning_rate": 6.510229893605556e-07, + "loss": 0.5044240951538086, + "step": 5490 + }, + { + "epoch": 1.2660825455383906, + "grad_norm": 1.477450831039122, + "learning_rate": 6.506657228928827e-07, + "loss": 0.4544214904308319, + "step": 5491 + }, + { + "epoch": 1.2663131196679733, + "grad_norm": 1.441487086349296, + "learning_rate": 6.503085072044878e-07, + "loss": 0.36688071489334106, + "step": 5492 + }, + { + "epoch": 1.2665436937975558, + "grad_norm": 1.4594163949727883, + "learning_rate": 6.499513423472951e-07, + "loss": 0.4058225154876709, + "step": 5493 + }, + { + "epoch": 1.2667742679271385, + "grad_norm": 1.4647938941101153, + "learning_rate": 6.495942283732225e-07, + "loss": 0.36429229378700256, + "step": 5494 + }, + { + "epoch": 1.2670048420567213, + "grad_norm": 1.7674965095028434, + "learning_rate": 6.492371653341802e-07, + "loss": 0.47116899490356445, + "step": 5495 + }, + { + "epoch": 1.267235416186304, + "grad_norm": 1.4923904627456126, + "learning_rate": 6.488801532820706e-07, + "loss": 0.4437965750694275, + "step": 5496 + }, + { + "epoch": 1.2674659903158867, + "grad_norm": 1.5533994295939695, + "learning_rate": 6.485231922687893e-07, + "loss": 0.4810328483581543, + "step": 5497 + }, + { + "epoch": 1.2676965644454692, + "grad_norm": 1.4632129166419525, + "learning_rate": 6.481662823462238e-07, + "loss": 0.362907350063324, + "step": 5498 + }, + { + "epoch": 1.267927138575052, + "grad_norm": 1.375729756251652, + "learning_rate": 6.478094235662554e-07, + "loss": 0.43647170066833496, + "step": 5499 + }, + { + "epoch": 1.2681577127046346, + "grad_norm": 1.422215620145209, + "learning_rate": 6.474526159807563e-07, + "loss": 0.4566631317138672, + "step": 5500 + }, + { + "epoch": 1.2683882868342171, + "grad_norm": 1.5097982290449063, + "learning_rate": 6.470958596415925e-07, + "loss": 0.3940081298351288, + "step": 5501 + }, + { + "epoch": 1.2686188609637998, + "grad_norm": 1.617526881385646, + "learning_rate": 6.46739154600622e-07, + "loss": 0.5275603532791138, + "step": 5502 + }, + { + "epoch": 1.2688494350933825, + "grad_norm": 1.846449658895825, + "learning_rate": 6.463825009096959e-07, + "loss": 0.42546436190605164, + "step": 5503 + }, + { + "epoch": 1.2690800092229653, + "grad_norm": 1.6068032996774941, + "learning_rate": 6.460258986206566e-07, + "loss": 0.3833821713924408, + "step": 5504 + }, + { + "epoch": 1.2693105833525478, + "grad_norm": 1.4806797403979666, + "learning_rate": 6.456693477853408e-07, + "loss": 0.5056046843528748, + "step": 5505 + }, + { + "epoch": 1.2695411574821305, + "grad_norm": 1.6345259734279236, + "learning_rate": 6.453128484555764e-07, + "loss": 0.3544192910194397, + "step": 5506 + }, + { + "epoch": 1.2697717316117132, + "grad_norm": 1.684231386275673, + "learning_rate": 6.449564006831836e-07, + "loss": 0.47164130210876465, + "step": 5507 + }, + { + "epoch": 1.2700023057412957, + "grad_norm": 1.3334241214641123, + "learning_rate": 6.446000045199765e-07, + "loss": 0.4580638110637665, + "step": 5508 + }, + { + "epoch": 1.2702328798708784, + "grad_norm": 1.2809631136030655, + "learning_rate": 6.442436600177606e-07, + "loss": 0.45945844054222107, + "step": 5509 + }, + { + "epoch": 1.2704634540004611, + "grad_norm": 1.447660138842985, + "learning_rate": 6.438873672283343e-07, + "loss": 0.5539910793304443, + "step": 5510 + }, + { + "epoch": 1.2706940281300438, + "grad_norm": 1.6550705344684873, + "learning_rate": 6.43531126203488e-07, + "loss": 0.4661790132522583, + "step": 5511 + }, + { + "epoch": 1.2709246022596266, + "grad_norm": 1.7015547164246037, + "learning_rate": 6.431749369950057e-07, + "loss": 0.3781178891658783, + "step": 5512 + }, + { + "epoch": 1.271155176389209, + "grad_norm": 1.571227420481097, + "learning_rate": 6.428187996546621e-07, + "loss": 0.4858461618423462, + "step": 5513 + }, + { + "epoch": 1.2713857505187918, + "grad_norm": 1.5308384830726272, + "learning_rate": 6.424627142342262e-07, + "loss": 0.5003963708877563, + "step": 5514 + }, + { + "epoch": 1.2716163246483745, + "grad_norm": 1.3605664168425382, + "learning_rate": 6.421066807854584e-07, + "loss": 0.4620795249938965, + "step": 5515 + }, + { + "epoch": 1.271846898777957, + "grad_norm": 1.385915858471925, + "learning_rate": 6.417506993601114e-07, + "loss": 0.43998581171035767, + "step": 5516 + }, + { + "epoch": 1.2720774729075397, + "grad_norm": 1.6777446711260993, + "learning_rate": 6.413947700099311e-07, + "loss": 0.5204107165336609, + "step": 5517 + }, + { + "epoch": 1.2723080470371224, + "grad_norm": 1.5515853600398104, + "learning_rate": 6.410388927866551e-07, + "loss": 0.46675950288772583, + "step": 5518 + }, + { + "epoch": 1.2725386211667051, + "grad_norm": 1.4020610518461032, + "learning_rate": 6.406830677420146e-07, + "loss": 0.4002436101436615, + "step": 5519 + }, + { + "epoch": 1.2727691952962878, + "grad_norm": 1.6847281008342299, + "learning_rate": 6.403272949277312e-07, + "loss": 0.4051012396812439, + "step": 5520 + }, + { + "epoch": 1.2729997694258703, + "grad_norm": 1.4780078562694616, + "learning_rate": 6.399715743955209e-07, + "loss": 0.4847797751426697, + "step": 5521 + }, + { + "epoch": 1.273230343555453, + "grad_norm": 1.6389704995828815, + "learning_rate": 6.396159061970907e-07, + "loss": 0.4742053151130676, + "step": 5522 + }, + { + "epoch": 1.2734609176850358, + "grad_norm": 1.4123933831310747, + "learning_rate": 6.392602903841415e-07, + "loss": 0.44291001558303833, + "step": 5523 + }, + { + "epoch": 1.2736914918146183, + "grad_norm": 1.438016627678946, + "learning_rate": 6.389047270083646e-07, + "loss": 0.38993996381759644, + "step": 5524 + }, + { + "epoch": 1.273922065944201, + "grad_norm": 1.5621491080936318, + "learning_rate": 6.385492161214454e-07, + "loss": 0.5045995116233826, + "step": 5525 + }, + { + "epoch": 1.2741526400737837, + "grad_norm": 1.4769511790871679, + "learning_rate": 6.381937577750611e-07, + "loss": 0.4377788305282593, + "step": 5526 + }, + { + "epoch": 1.2743832142033664, + "grad_norm": 1.470801087764595, + "learning_rate": 6.378383520208806e-07, + "loss": 0.5363353490829468, + "step": 5527 + }, + { + "epoch": 1.2746137883329491, + "grad_norm": 1.340047582641372, + "learning_rate": 6.374829989105661e-07, + "loss": 0.42230546474456787, + "step": 5528 + }, + { + "epoch": 1.2748443624625316, + "grad_norm": 1.2882420810653734, + "learning_rate": 6.371276984957715e-07, + "loss": 0.39565908908843994, + "step": 5529 + }, + { + "epoch": 1.2750749365921143, + "grad_norm": 1.3633189139651096, + "learning_rate": 6.36772450828144e-07, + "loss": 0.4375323951244354, + "step": 5530 + }, + { + "epoch": 1.275305510721697, + "grad_norm": 1.5028848525750826, + "learning_rate": 6.364172559593215e-07, + "loss": 0.4901241660118103, + "step": 5531 + }, + { + "epoch": 1.2755360848512796, + "grad_norm": 1.3653729298225772, + "learning_rate": 6.360621139409359e-07, + "loss": 0.4108780026435852, + "step": 5532 + }, + { + "epoch": 1.2757666589808623, + "grad_norm": 1.4800363393725149, + "learning_rate": 6.357070248246102e-07, + "loss": 0.43631279468536377, + "step": 5533 + }, + { + "epoch": 1.275997233110445, + "grad_norm": 1.5982504223136969, + "learning_rate": 6.353519886619607e-07, + "loss": 0.4623757004737854, + "step": 5534 + }, + { + "epoch": 1.2762278072400277, + "grad_norm": 1.5284512936045929, + "learning_rate": 6.349970055045954e-07, + "loss": 0.41303062438964844, + "step": 5535 + }, + { + "epoch": 1.2764583813696104, + "grad_norm": 1.7689201212047627, + "learning_rate": 6.34642075404114e-07, + "loss": 0.5157878994941711, + "step": 5536 + }, + { + "epoch": 1.276688955499193, + "grad_norm": 1.6093049161057067, + "learning_rate": 6.342871984121103e-07, + "loss": 0.41295093297958374, + "step": 5537 + }, + { + "epoch": 1.2769195296287756, + "grad_norm": 1.4185213028911483, + "learning_rate": 6.339323745801682e-07, + "loss": 0.4636460542678833, + "step": 5538 + }, + { + "epoch": 1.2771501037583584, + "grad_norm": 1.44057433861511, + "learning_rate": 6.335776039598659e-07, + "loss": 0.45273804664611816, + "step": 5539 + }, + { + "epoch": 1.2773806778879409, + "grad_norm": 1.7212686324453035, + "learning_rate": 6.332228866027721e-07, + "loss": 0.4562758803367615, + "step": 5540 + }, + { + "epoch": 1.2776112520175236, + "grad_norm": 1.5821328258880776, + "learning_rate": 6.328682225604491e-07, + "loss": 0.3162837326526642, + "step": 5541 + }, + { + "epoch": 1.2778418261471063, + "grad_norm": 1.4226618207277133, + "learning_rate": 6.325136118844504e-07, + "loss": 0.48594871163368225, + "step": 5542 + }, + { + "epoch": 1.278072400276689, + "grad_norm": 1.398820126458318, + "learning_rate": 6.321590546263231e-07, + "loss": 0.4346798360347748, + "step": 5543 + }, + { + "epoch": 1.2783029744062717, + "grad_norm": 1.7945463027279862, + "learning_rate": 6.318045508376046e-07, + "loss": 0.5133204460144043, + "step": 5544 + }, + { + "epoch": 1.2785335485358542, + "grad_norm": 1.6462955147402891, + "learning_rate": 6.314501005698266e-07, + "loss": 0.40679338574409485, + "step": 5545 + }, + { + "epoch": 1.278764122665437, + "grad_norm": 1.341754342655084, + "learning_rate": 6.310957038745117e-07, + "loss": 0.363874614238739, + "step": 5546 + }, + { + "epoch": 1.2789946967950196, + "grad_norm": 1.3013776361069782, + "learning_rate": 6.307413608031746e-07, + "loss": 0.43020665645599365, + "step": 5547 + }, + { + "epoch": 1.2792252709246021, + "grad_norm": 1.301444097702827, + "learning_rate": 6.303870714073233e-07, + "loss": 0.5280083417892456, + "step": 5548 + }, + { + "epoch": 1.2794558450541849, + "grad_norm": 1.803757705570539, + "learning_rate": 6.300328357384568e-07, + "loss": 0.4584185481071472, + "step": 5549 + }, + { + "epoch": 1.2796864191837676, + "grad_norm": 1.4682285924702114, + "learning_rate": 6.296786538480675e-07, + "loss": 0.4068162441253662, + "step": 5550 + }, + { + "epoch": 1.2799169933133503, + "grad_norm": 1.361515758715701, + "learning_rate": 6.293245257876387e-07, + "loss": 0.4336085915565491, + "step": 5551 + }, + { + "epoch": 1.280147567442933, + "grad_norm": 1.4906971509519245, + "learning_rate": 6.289704516086468e-07, + "loss": 0.4932886064052582, + "step": 5552 + }, + { + "epoch": 1.2803781415725155, + "grad_norm": 1.3660207414526373, + "learning_rate": 6.2861643136256e-07, + "loss": 0.437292218208313, + "step": 5553 + }, + { + "epoch": 1.2806087157020982, + "grad_norm": 1.5017461161180483, + "learning_rate": 6.28262465100839e-07, + "loss": 0.4131085276603699, + "step": 5554 + }, + { + "epoch": 1.280839289831681, + "grad_norm": 1.441603184912447, + "learning_rate": 6.27908552874936e-07, + "loss": 0.4146266579627991, + "step": 5555 + }, + { + "epoch": 1.2810698639612634, + "grad_norm": 1.6115588407174422, + "learning_rate": 6.275546947362957e-07, + "loss": 0.4778539538383484, + "step": 5556 + }, + { + "epoch": 1.2813004380908461, + "grad_norm": 1.4722189673341872, + "learning_rate": 6.272008907363555e-07, + "loss": 0.3989019989967346, + "step": 5557 + }, + { + "epoch": 1.2815310122204289, + "grad_norm": 1.5188067628601776, + "learning_rate": 6.268471409265436e-07, + "loss": 0.4433528184890747, + "step": 5558 + }, + { + "epoch": 1.2817615863500116, + "grad_norm": 1.4551631195697798, + "learning_rate": 6.264934453582817e-07, + "loss": 0.46929931640625, + "step": 5559 + }, + { + "epoch": 1.2819921604795943, + "grad_norm": 1.749202490253535, + "learning_rate": 6.261398040829829e-07, + "loss": 0.4908202886581421, + "step": 5560 + }, + { + "epoch": 1.2822227346091768, + "grad_norm": 1.766310768413501, + "learning_rate": 6.257862171520528e-07, + "loss": 0.44195377826690674, + "step": 5561 + }, + { + "epoch": 1.2824533087387595, + "grad_norm": 1.8716445464357578, + "learning_rate": 6.254326846168882e-07, + "loss": 0.548696756362915, + "step": 5562 + }, + { + "epoch": 1.2826838828683422, + "grad_norm": 1.6355324229757326, + "learning_rate": 6.250792065288794e-07, + "loss": 0.4015994668006897, + "step": 5563 + }, + { + "epoch": 1.2829144569979247, + "grad_norm": 1.5798153885574688, + "learning_rate": 6.247257829394074e-07, + "loss": 0.4281688928604126, + "step": 5564 + }, + { + "epoch": 1.2831450311275074, + "grad_norm": 1.2159971773233473, + "learning_rate": 6.243724138998462e-07, + "loss": 0.37623634934425354, + "step": 5565 + }, + { + "epoch": 1.2833756052570902, + "grad_norm": 1.7282596196498647, + "learning_rate": 6.240190994615617e-07, + "loss": 0.4753819704055786, + "step": 5566 + }, + { + "epoch": 1.2836061793866729, + "grad_norm": 1.8092084567061366, + "learning_rate": 6.236658396759111e-07, + "loss": 0.4584893584251404, + "step": 5567 + }, + { + "epoch": 1.2838367535162556, + "grad_norm": 1.598249680169706, + "learning_rate": 6.23312634594245e-07, + "loss": 0.445067435503006, + "step": 5568 + }, + { + "epoch": 1.284067327645838, + "grad_norm": 1.402901275205923, + "learning_rate": 6.229594842679049e-07, + "loss": 0.4209640920162201, + "step": 5569 + }, + { + "epoch": 1.2842979017754208, + "grad_norm": 1.3481434606649714, + "learning_rate": 6.226063887482254e-07, + "loss": 0.34620141983032227, + "step": 5570 + }, + { + "epoch": 1.2845284759050035, + "grad_norm": 1.2702834444597235, + "learning_rate": 6.222533480865315e-07, + "loss": 0.43683767318725586, + "step": 5571 + }, + { + "epoch": 1.284759050034586, + "grad_norm": 1.5394879174992184, + "learning_rate": 6.219003623341421e-07, + "loss": 0.45881450176239014, + "step": 5572 + }, + { + "epoch": 1.2849896241641687, + "grad_norm": 1.2015099259152706, + "learning_rate": 6.215474315423667e-07, + "loss": 0.40115928649902344, + "step": 5573 + }, + { + "epoch": 1.2852201982937514, + "grad_norm": 1.5480428253925462, + "learning_rate": 6.211945557625082e-07, + "loss": 0.4181373119354248, + "step": 5574 + }, + { + "epoch": 1.2854507724233342, + "grad_norm": 1.6874872010842208, + "learning_rate": 6.208417350458598e-07, + "loss": 0.4743300676345825, + "step": 5575 + }, + { + "epoch": 1.2856813465529169, + "grad_norm": 1.6331906817141153, + "learning_rate": 6.204889694437077e-07, + "loss": 0.4236707091331482, + "step": 5576 + }, + { + "epoch": 1.2859119206824994, + "grad_norm": 1.1887995996963334, + "learning_rate": 6.201362590073305e-07, + "loss": 0.4105497896671295, + "step": 5577 + }, + { + "epoch": 1.286142494812082, + "grad_norm": 1.3982883240902815, + "learning_rate": 6.197836037879973e-07, + "loss": 0.4164474606513977, + "step": 5578 + }, + { + "epoch": 1.2863730689416648, + "grad_norm": 1.648111600369129, + "learning_rate": 6.19431003836971e-07, + "loss": 0.49809616804122925, + "step": 5579 + }, + { + "epoch": 1.2866036430712473, + "grad_norm": 1.608787056057215, + "learning_rate": 6.19078459205505e-07, + "loss": 0.4902994632720947, + "step": 5580 + }, + { + "epoch": 1.28683421720083, + "grad_norm": 1.336430500063446, + "learning_rate": 6.18725969944846e-07, + "loss": 0.3697085380554199, + "step": 5581 + }, + { + "epoch": 1.2870647913304127, + "grad_norm": 1.353359914681952, + "learning_rate": 6.183735361062309e-07, + "loss": 0.446627140045166, + "step": 5582 + }, + { + "epoch": 1.2872953654599955, + "grad_norm": 1.590519620379444, + "learning_rate": 6.180211577408901e-07, + "loss": 0.39521220326423645, + "step": 5583 + }, + { + "epoch": 1.2875259395895782, + "grad_norm": 1.7929636253307002, + "learning_rate": 6.176688349000452e-07, + "loss": 0.6308573484420776, + "step": 5584 + }, + { + "epoch": 1.2877565137191607, + "grad_norm": 1.5017758457543093, + "learning_rate": 6.173165676349102e-07, + "loss": 0.4558343291282654, + "step": 5585 + }, + { + "epoch": 1.2879870878487434, + "grad_norm": 1.4546689222111522, + "learning_rate": 6.169643559966906e-07, + "loss": 0.5487015247344971, + "step": 5586 + }, + { + "epoch": 1.288217661978326, + "grad_norm": 1.3949279502201517, + "learning_rate": 6.166122000365834e-07, + "loss": 0.39074039459228516, + "step": 5587 + }, + { + "epoch": 1.2884482361079086, + "grad_norm": 1.4687466147876906, + "learning_rate": 6.162600998057787e-07, + "loss": 0.5136120915412903, + "step": 5588 + }, + { + "epoch": 1.2886788102374913, + "grad_norm": 1.5457442901158343, + "learning_rate": 6.159080553554572e-07, + "loss": 0.5344336628913879, + "step": 5589 + }, + { + "epoch": 1.288909384367074, + "grad_norm": 1.5840783894802135, + "learning_rate": 6.15556066736793e-07, + "loss": 0.5204205513000488, + "step": 5590 + }, + { + "epoch": 1.2891399584966567, + "grad_norm": 1.588345092971114, + "learning_rate": 6.152041340009504e-07, + "loss": 0.4768211245536804, + "step": 5591 + }, + { + "epoch": 1.2893705326262395, + "grad_norm": 2.0914169507965936, + "learning_rate": 6.148522571990868e-07, + "loss": 0.44098299741744995, + "step": 5592 + }, + { + "epoch": 1.289601106755822, + "grad_norm": 1.6411833405865308, + "learning_rate": 6.145004363823509e-07, + "loss": 0.5038055181503296, + "step": 5593 + }, + { + "epoch": 1.2898316808854047, + "grad_norm": 1.6256634474518743, + "learning_rate": 6.141486716018837e-07, + "loss": 0.417998343706131, + "step": 5594 + }, + { + "epoch": 1.2900622550149874, + "grad_norm": 1.755327490864145, + "learning_rate": 6.137969629088174e-07, + "loss": 0.48858124017715454, + "step": 5595 + }, + { + "epoch": 1.2902928291445699, + "grad_norm": 1.6236287189755654, + "learning_rate": 6.134453103542765e-07, + "loss": 0.46988582611083984, + "step": 5596 + }, + { + "epoch": 1.2905234032741526, + "grad_norm": 1.4715150644247719, + "learning_rate": 6.130937139893779e-07, + "loss": 0.5100589394569397, + "step": 5597 + }, + { + "epoch": 1.2907539774037353, + "grad_norm": 1.861124742863941, + "learning_rate": 6.127421738652286e-07, + "loss": 0.490558922290802, + "step": 5598 + }, + { + "epoch": 1.290984551533318, + "grad_norm": 1.624496792014592, + "learning_rate": 6.123906900329291e-07, + "loss": 0.4749597907066345, + "step": 5599 + }, + { + "epoch": 1.2912151256629008, + "grad_norm": 1.4155787175262067, + "learning_rate": 6.12039262543571e-07, + "loss": 0.5006792545318604, + "step": 5600 + }, + { + "epoch": 1.2914456997924832, + "grad_norm": 1.6772265070157861, + "learning_rate": 6.116878914482384e-07, + "loss": 0.46902909874916077, + "step": 5601 + }, + { + "epoch": 1.291676273922066, + "grad_norm": 1.4563548131763813, + "learning_rate": 6.113365767980059e-07, + "loss": 0.46765559911727905, + "step": 5602 + }, + { + "epoch": 1.2919068480516487, + "grad_norm": 1.4143636586875892, + "learning_rate": 6.10985318643941e-07, + "loss": 0.45960646867752075, + "step": 5603 + }, + { + "epoch": 1.2921374221812312, + "grad_norm": 1.578129032516793, + "learning_rate": 6.106341170371024e-07, + "loss": 0.4067912697792053, + "step": 5604 + }, + { + "epoch": 1.292367996310814, + "grad_norm": 1.653263856685772, + "learning_rate": 6.102829720285414e-07, + "loss": 0.45004114508628845, + "step": 5605 + }, + { + "epoch": 1.2925985704403966, + "grad_norm": 1.698803058368325, + "learning_rate": 6.099318836692999e-07, + "loss": 0.5086014270782471, + "step": 5606 + }, + { + "epoch": 1.2928291445699793, + "grad_norm": 1.5400277013654406, + "learning_rate": 6.095808520104122e-07, + "loss": 0.49985191226005554, + "step": 5607 + }, + { + "epoch": 1.293059718699562, + "grad_norm": 1.5622376081366391, + "learning_rate": 6.092298771029047e-07, + "loss": 0.5066381096839905, + "step": 5608 + }, + { + "epoch": 1.2932902928291445, + "grad_norm": 1.5786958248418999, + "learning_rate": 6.088789589977947e-07, + "loss": 0.49626559019088745, + "step": 5609 + }, + { + "epoch": 1.2935208669587273, + "grad_norm": 1.6542820345168319, + "learning_rate": 6.085280977460921e-07, + "loss": 0.4837498962879181, + "step": 5610 + }, + { + "epoch": 1.29375144108831, + "grad_norm": 1.3607897650960659, + "learning_rate": 6.081772933987977e-07, + "loss": 0.41308102011680603, + "step": 5611 + }, + { + "epoch": 1.2939820152178925, + "grad_norm": 1.4026215025684987, + "learning_rate": 6.078265460069048e-07, + "loss": 0.4453086853027344, + "step": 5612 + }, + { + "epoch": 1.2942125893474752, + "grad_norm": 1.5506248233039113, + "learning_rate": 6.074758556213976e-07, + "loss": 0.4700174927711487, + "step": 5613 + }, + { + "epoch": 1.294443163477058, + "grad_norm": 1.6021152444285431, + "learning_rate": 6.071252222932537e-07, + "loss": 0.578227162361145, + "step": 5614 + }, + { + "epoch": 1.2946737376066406, + "grad_norm": 1.3711009132002785, + "learning_rate": 6.067746460734398e-07, + "loss": 0.36468571424484253, + "step": 5615 + }, + { + "epoch": 1.2949043117362231, + "grad_norm": 1.7197393040240752, + "learning_rate": 6.064241270129166e-07, + "loss": 0.4793199896812439, + "step": 5616 + }, + { + "epoch": 1.2951348858658058, + "grad_norm": 1.4731744493442007, + "learning_rate": 6.060736651626355e-07, + "loss": 0.40342214703559875, + "step": 5617 + }, + { + "epoch": 1.2953654599953885, + "grad_norm": 1.2868571274228024, + "learning_rate": 6.05723260573539e-07, + "loss": 0.4212435185909271, + "step": 5618 + }, + { + "epoch": 1.295596034124971, + "grad_norm": 1.592545901664945, + "learning_rate": 6.053729132965626e-07, + "loss": 0.44668713212013245, + "step": 5619 + }, + { + "epoch": 1.2958266082545538, + "grad_norm": 1.3590289444558108, + "learning_rate": 6.050226233826326e-07, + "loss": 0.5159831643104553, + "step": 5620 + }, + { + "epoch": 1.2960571823841365, + "grad_norm": 1.792827614220507, + "learning_rate": 6.046723908826676e-07, + "loss": 0.5091866850852966, + "step": 5621 + }, + { + "epoch": 1.2962877565137192, + "grad_norm": 1.3636713576072057, + "learning_rate": 6.043222158475767e-07, + "loss": 0.34838563203811646, + "step": 5622 + }, + { + "epoch": 1.296518330643302, + "grad_norm": 1.679394698956229, + "learning_rate": 6.039720983282621e-07, + "loss": 0.46576952934265137, + "step": 5623 + }, + { + "epoch": 1.2967489047728844, + "grad_norm": 1.5739745386461328, + "learning_rate": 6.036220383756163e-07, + "loss": 0.4971234202384949, + "step": 5624 + }, + { + "epoch": 1.2969794789024671, + "grad_norm": 1.3832811037885837, + "learning_rate": 6.03272036040525e-07, + "loss": 0.4792482256889343, + "step": 5625 + }, + { + "epoch": 1.2972100530320498, + "grad_norm": 1.5438407741127544, + "learning_rate": 6.029220913738636e-07, + "loss": 0.45584213733673096, + "step": 5626 + }, + { + "epoch": 1.2974406271616323, + "grad_norm": 2.1628056802136686, + "learning_rate": 6.025722044265004e-07, + "loss": 0.5094096064567566, + "step": 5627 + }, + { + "epoch": 1.297671201291215, + "grad_norm": 1.2707985126710273, + "learning_rate": 6.022223752492954e-07, + "loss": 0.33178865909576416, + "step": 5628 + }, + { + "epoch": 1.2979017754207978, + "grad_norm": 1.4977758648466553, + "learning_rate": 6.018726038930991e-07, + "loss": 0.4955121874809265, + "step": 5629 + }, + { + "epoch": 1.2981323495503805, + "grad_norm": 1.9087861970540962, + "learning_rate": 6.01522890408755e-07, + "loss": 0.46253639459609985, + "step": 5630 + }, + { + "epoch": 1.2983629236799632, + "grad_norm": 1.725580686624441, + "learning_rate": 6.011732348470971e-07, + "loss": 0.4760236442089081, + "step": 5631 + }, + { + "epoch": 1.2985934978095457, + "grad_norm": 1.487451213133888, + "learning_rate": 6.008236372589516e-07, + "loss": 0.44413092732429504, + "step": 5632 + }, + { + "epoch": 1.2988240719391284, + "grad_norm": 1.5710401716420814, + "learning_rate": 6.004740976951358e-07, + "loss": 0.5431559681892395, + "step": 5633 + }, + { + "epoch": 1.2990546460687111, + "grad_norm": 1.448678008923642, + "learning_rate": 6.001246162064592e-07, + "loss": 0.41276806592941284, + "step": 5634 + }, + { + "epoch": 1.2992852201982936, + "grad_norm": 1.8698453553316883, + "learning_rate": 5.997751928437219e-07, + "loss": 0.3998986482620239, + "step": 5635 + }, + { + "epoch": 1.2995157943278763, + "grad_norm": 1.7019145009400753, + "learning_rate": 5.994258276577169e-07, + "loss": 0.47741782665252686, + "step": 5636 + }, + { + "epoch": 1.299746368457459, + "grad_norm": 1.8471752326794122, + "learning_rate": 5.990765206992277e-07, + "loss": 0.4294115900993347, + "step": 5637 + }, + { + "epoch": 1.2999769425870418, + "grad_norm": 1.2676173155963009, + "learning_rate": 5.987272720190288e-07, + "loss": 0.4717773199081421, + "step": 5638 + }, + { + "epoch": 1.3002075167166245, + "grad_norm": 1.4764264012124577, + "learning_rate": 5.983780816678881e-07, + "loss": 0.5169499516487122, + "step": 5639 + }, + { + "epoch": 1.300438090846207, + "grad_norm": 1.3402196455719508, + "learning_rate": 5.980289496965634e-07, + "loss": 0.3796359598636627, + "step": 5640 + }, + { + "epoch": 1.3006686649757897, + "grad_norm": 1.439771899645747, + "learning_rate": 5.976798761558048e-07, + "loss": 0.44377613067626953, + "step": 5641 + }, + { + "epoch": 1.3008992391053724, + "grad_norm": 1.4787491173073983, + "learning_rate": 5.973308610963534e-07, + "loss": 0.46863383054733276, + "step": 5642 + }, + { + "epoch": 1.301129813234955, + "grad_norm": 1.6231703309548882, + "learning_rate": 5.969819045689426e-07, + "loss": 0.5437184572219849, + "step": 5643 + }, + { + "epoch": 1.3013603873645376, + "grad_norm": 1.3526724102376106, + "learning_rate": 5.96633006624296e-07, + "loss": 0.4487720727920532, + "step": 5644 + }, + { + "epoch": 1.3015909614941203, + "grad_norm": 1.4099594164441491, + "learning_rate": 5.962841673131305e-07, + "loss": 0.42834270000457764, + "step": 5645 + }, + { + "epoch": 1.301821535623703, + "grad_norm": 1.6303538612123332, + "learning_rate": 5.959353866861525e-07, + "loss": 0.5242533087730408, + "step": 5646 + }, + { + "epoch": 1.3020521097532858, + "grad_norm": 1.467793467454458, + "learning_rate": 5.955866647940609e-07, + "loss": 0.4529950022697449, + "step": 5647 + }, + { + "epoch": 1.3022826838828683, + "grad_norm": 1.704233159172443, + "learning_rate": 5.952380016875465e-07, + "loss": 0.41109561920166016, + "step": 5648 + }, + { + "epoch": 1.302513258012451, + "grad_norm": 2.1978948521850237, + "learning_rate": 5.948893974172904e-07, + "loss": 0.5468418598175049, + "step": 5649 + }, + { + "epoch": 1.3027438321420337, + "grad_norm": 1.6524182777322811, + "learning_rate": 5.945408520339663e-07, + "loss": 0.4594927430152893, + "step": 5650 + }, + { + "epoch": 1.3029744062716162, + "grad_norm": 1.8822005278969978, + "learning_rate": 5.941923655882383e-07, + "loss": 0.5011999011039734, + "step": 5651 + }, + { + "epoch": 1.303204980401199, + "grad_norm": 1.3940543055361847, + "learning_rate": 5.938439381307632e-07, + "loss": 0.519101083278656, + "step": 5652 + }, + { + "epoch": 1.3034355545307816, + "grad_norm": 1.3048743953658823, + "learning_rate": 5.934955697121875e-07, + "loss": 0.521979570388794, + "step": 5653 + }, + { + "epoch": 1.3036661286603644, + "grad_norm": 1.5140544105240696, + "learning_rate": 5.931472603831507e-07, + "loss": 0.5969122648239136, + "step": 5654 + }, + { + "epoch": 1.303896702789947, + "grad_norm": 1.6283257057537612, + "learning_rate": 5.927990101942826e-07, + "loss": 0.47013232111930847, + "step": 5655 + }, + { + "epoch": 1.3041272769195296, + "grad_norm": 1.485470149052559, + "learning_rate": 5.924508191962059e-07, + "loss": 0.4135271906852722, + "step": 5656 + }, + { + "epoch": 1.3043578510491123, + "grad_norm": 1.6826248484124529, + "learning_rate": 5.921026874395327e-07, + "loss": 0.45639151334762573, + "step": 5657 + }, + { + "epoch": 1.304588425178695, + "grad_norm": 1.4851105420204929, + "learning_rate": 5.917546149748676e-07, + "loss": 0.4047633409500122, + "step": 5658 + }, + { + "epoch": 1.3048189993082775, + "grad_norm": 1.470073094956581, + "learning_rate": 5.91406601852807e-07, + "loss": 0.4352290630340576, + "step": 5659 + }, + { + "epoch": 1.3050495734378602, + "grad_norm": 1.569723084578139, + "learning_rate": 5.910586481239375e-07, + "loss": 0.4912130534648895, + "step": 5660 + }, + { + "epoch": 1.305280147567443, + "grad_norm": 1.4302762159123064, + "learning_rate": 5.907107538388383e-07, + "loss": 0.4114433526992798, + "step": 5661 + }, + { + "epoch": 1.3055107216970256, + "grad_norm": 1.6307461117750972, + "learning_rate": 5.903629190480786e-07, + "loss": 0.4230955243110657, + "step": 5662 + }, + { + "epoch": 1.3057412958266084, + "grad_norm": 1.525164874833489, + "learning_rate": 5.900151438022205e-07, + "loss": 0.5020648241043091, + "step": 5663 + }, + { + "epoch": 1.3059718699561909, + "grad_norm": 1.6834639607808413, + "learning_rate": 5.89667428151816e-07, + "loss": 0.48636388778686523, + "step": 5664 + }, + { + "epoch": 1.3062024440857736, + "grad_norm": 1.376635193773143, + "learning_rate": 5.893197721474099e-07, + "loss": 0.412000447511673, + "step": 5665 + }, + { + "epoch": 1.3064330182153563, + "grad_norm": 1.8328035722486296, + "learning_rate": 5.889721758395369e-07, + "loss": 0.3584952652454376, + "step": 5666 + }, + { + "epoch": 1.3066635923449388, + "grad_norm": 1.599166825150926, + "learning_rate": 5.886246392787234e-07, + "loss": 0.4538918733596802, + "step": 5667 + }, + { + "epoch": 1.3068941664745215, + "grad_norm": 1.3551701558323133, + "learning_rate": 5.882771625154883e-07, + "loss": 0.478498637676239, + "step": 5668 + }, + { + "epoch": 1.3071247406041042, + "grad_norm": 1.5353917292288828, + "learning_rate": 5.879297456003398e-07, + "loss": 0.49535906314849854, + "step": 5669 + }, + { + "epoch": 1.307355314733687, + "grad_norm": 1.4516733372645705, + "learning_rate": 5.875823885837793e-07, + "loss": 0.48975661396980286, + "step": 5670 + }, + { + "epoch": 1.3075858888632697, + "grad_norm": 1.675865776424194, + "learning_rate": 5.87235091516298e-07, + "loss": 0.4870087802410126, + "step": 5671 + }, + { + "epoch": 1.3078164629928521, + "grad_norm": 1.5358758810801338, + "learning_rate": 5.8688785444838e-07, + "loss": 0.43411481380462646, + "step": 5672 + }, + { + "epoch": 1.3080470371224349, + "grad_norm": 1.5956307221574964, + "learning_rate": 5.865406774304986e-07, + "loss": 0.5108835697174072, + "step": 5673 + }, + { + "epoch": 1.3082776112520176, + "grad_norm": 1.6165992027891032, + "learning_rate": 5.861935605131202e-07, + "loss": 0.47449198365211487, + "step": 5674 + }, + { + "epoch": 1.3085081853816, + "grad_norm": 1.8165499378032328, + "learning_rate": 5.858465037467014e-07, + "loss": 0.5550234913825989, + "step": 5675 + }, + { + "epoch": 1.3087387595111828, + "grad_norm": 1.5758581559369806, + "learning_rate": 5.854995071816911e-07, + "loss": 0.4548208713531494, + "step": 5676 + }, + { + "epoch": 1.3089693336407655, + "grad_norm": 1.4849539841305146, + "learning_rate": 5.851525708685279e-07, + "loss": 0.5176935195922852, + "step": 5677 + }, + { + "epoch": 1.3091999077703482, + "grad_norm": 1.5664760566663032, + "learning_rate": 5.848056948576428e-07, + "loss": 0.4460016191005707, + "step": 5678 + }, + { + "epoch": 1.309430481899931, + "grad_norm": 1.808203061607658, + "learning_rate": 5.84458879199458e-07, + "loss": 0.5344464182853699, + "step": 5679 + }, + { + "epoch": 1.3096610560295134, + "grad_norm": 1.3109840468073877, + "learning_rate": 5.841121239443863e-07, + "loss": 0.48601672053337097, + "step": 5680 + }, + { + "epoch": 1.3098916301590962, + "grad_norm": 1.3467689115963568, + "learning_rate": 5.837654291428327e-07, + "loss": 0.46849286556243896, + "step": 5681 + }, + { + "epoch": 1.3101222042886789, + "grad_norm": 1.2665516862618484, + "learning_rate": 5.834187948451918e-07, + "loss": 0.4353019893169403, + "step": 5682 + }, + { + "epoch": 1.3103527784182614, + "grad_norm": 1.7099740749541261, + "learning_rate": 5.830722211018516e-07, + "loss": 0.5345665812492371, + "step": 5683 + }, + { + "epoch": 1.310583352547844, + "grad_norm": 1.4659221660940824, + "learning_rate": 5.827257079631886e-07, + "loss": 0.4060036540031433, + "step": 5684 + }, + { + "epoch": 1.3108139266774268, + "grad_norm": 1.3640742579072, + "learning_rate": 5.823792554795738e-07, + "loss": 0.43724536895751953, + "step": 5685 + }, + { + "epoch": 1.3110445008070095, + "grad_norm": 1.550163679413481, + "learning_rate": 5.820328637013665e-07, + "loss": 0.4600690007209778, + "step": 5686 + }, + { + "epoch": 1.3112750749365922, + "grad_norm": 1.5199243554334652, + "learning_rate": 5.816865326789182e-07, + "loss": 0.4352531433105469, + "step": 5687 + }, + { + "epoch": 1.3115056490661747, + "grad_norm": 1.4575114943022274, + "learning_rate": 5.813402624625722e-07, + "loss": 0.39384984970092773, + "step": 5688 + }, + { + "epoch": 1.3117362231957574, + "grad_norm": 1.329194110980277, + "learning_rate": 5.809940531026616e-07, + "loss": 0.44367098808288574, + "step": 5689 + }, + { + "epoch": 1.3119667973253402, + "grad_norm": 1.4497223943190725, + "learning_rate": 5.806479046495123e-07, + "loss": 0.4757416546344757, + "step": 5690 + }, + { + "epoch": 1.3121973714549227, + "grad_norm": 1.5821654764353048, + "learning_rate": 5.803018171534396e-07, + "loss": 0.521708607673645, + "step": 5691 + }, + { + "epoch": 1.3124279455845054, + "grad_norm": 1.3510537988002305, + "learning_rate": 5.799557906647514e-07, + "loss": 0.4127439260482788, + "step": 5692 + }, + { + "epoch": 1.312658519714088, + "grad_norm": 1.4570205213875538, + "learning_rate": 5.79609825233746e-07, + "loss": 0.4809693396091461, + "step": 5693 + }, + { + "epoch": 1.3128890938436708, + "grad_norm": 1.2590938015478794, + "learning_rate": 5.792639209107134e-07, + "loss": 0.5075684189796448, + "step": 5694 + }, + { + "epoch": 1.3131196679732535, + "grad_norm": 1.3738792104421846, + "learning_rate": 5.789180777459336e-07, + "loss": 0.416393518447876, + "step": 5695 + }, + { + "epoch": 1.313350242102836, + "grad_norm": 1.4282126857493198, + "learning_rate": 5.78572295789679e-07, + "loss": 0.4456642270088196, + "step": 5696 + }, + { + "epoch": 1.3135808162324187, + "grad_norm": 1.327521871832615, + "learning_rate": 5.782265750922124e-07, + "loss": 0.4757812023162842, + "step": 5697 + }, + { + "epoch": 1.3138113903620015, + "grad_norm": 1.6103197546493997, + "learning_rate": 5.778809157037872e-07, + "loss": 0.5081768035888672, + "step": 5698 + }, + { + "epoch": 1.314041964491584, + "grad_norm": 1.6849043068796357, + "learning_rate": 5.775353176746489e-07, + "loss": 0.4604584872722626, + "step": 5699 + }, + { + "epoch": 1.3142725386211667, + "grad_norm": 1.3964100189157245, + "learning_rate": 5.771897810550339e-07, + "loss": 0.4153773784637451, + "step": 5700 + }, + { + "epoch": 1.3145031127507494, + "grad_norm": 1.5346514188080242, + "learning_rate": 5.768443058951695e-07, + "loss": 0.5194085836410522, + "step": 5701 + }, + { + "epoch": 1.314733686880332, + "grad_norm": 1.6610989574168062, + "learning_rate": 5.764988922452733e-07, + "loss": 0.4398482143878937, + "step": 5702 + }, + { + "epoch": 1.3149642610099148, + "grad_norm": 1.747178590910114, + "learning_rate": 5.761535401555558e-07, + "loss": 0.5148836374282837, + "step": 5703 + }, + { + "epoch": 1.3151948351394973, + "grad_norm": 1.8977812861580863, + "learning_rate": 5.758082496762163e-07, + "loss": 0.533142626285553, + "step": 5704 + }, + { + "epoch": 1.31542540926908, + "grad_norm": 1.3488739739710767, + "learning_rate": 5.754630208574473e-07, + "loss": 0.4059423804283142, + "step": 5705 + }, + { + "epoch": 1.3156559833986627, + "grad_norm": 1.3213051571946475, + "learning_rate": 5.751178537494302e-07, + "loss": 0.4685533940792084, + "step": 5706 + }, + { + "epoch": 1.3158865575282452, + "grad_norm": 1.5403217644159128, + "learning_rate": 5.747727484023392e-07, + "loss": 0.4454694986343384, + "step": 5707 + }, + { + "epoch": 1.316117131657828, + "grad_norm": 1.481350859430692, + "learning_rate": 5.74427704866339e-07, + "loss": 0.4058796167373657, + "step": 5708 + }, + { + "epoch": 1.3163477057874107, + "grad_norm": 1.3294270142641733, + "learning_rate": 5.740827231915847e-07, + "loss": 0.3891766369342804, + "step": 5709 + }, + { + "epoch": 1.3165782799169934, + "grad_norm": 1.5072356875610937, + "learning_rate": 5.737378034282235e-07, + "loss": 0.47912657260894775, + "step": 5710 + }, + { + "epoch": 1.316808854046576, + "grad_norm": 1.5228549079910219, + "learning_rate": 5.733929456263922e-07, + "loss": 0.4221952557563782, + "step": 5711 + }, + { + "epoch": 1.3170394281761586, + "grad_norm": 1.5405159904484362, + "learning_rate": 5.730481498362202e-07, + "loss": 0.39018404483795166, + "step": 5712 + }, + { + "epoch": 1.3172700023057413, + "grad_norm": 1.6184406292698126, + "learning_rate": 5.727034161078262e-07, + "loss": 0.5388307571411133, + "step": 5713 + }, + { + "epoch": 1.317500576435324, + "grad_norm": 1.5278965195377916, + "learning_rate": 5.723587444913216e-07, + "loss": 0.3243408501148224, + "step": 5714 + }, + { + "epoch": 1.3177311505649065, + "grad_norm": 1.6496814482710773, + "learning_rate": 5.720141350368072e-07, + "loss": 0.46480363607406616, + "step": 5715 + }, + { + "epoch": 1.3179617246944892, + "grad_norm": 1.6265951465013608, + "learning_rate": 5.716695877943757e-07, + "loss": 0.5286417603492737, + "step": 5716 + }, + { + "epoch": 1.318192298824072, + "grad_norm": 1.455901542591345, + "learning_rate": 5.71325102814111e-07, + "loss": 0.4170069694519043, + "step": 5717 + }, + { + "epoch": 1.3184228729536547, + "grad_norm": 1.5051159019770526, + "learning_rate": 5.709806801460867e-07, + "loss": 0.5738973617553711, + "step": 5718 + }, + { + "epoch": 1.3186534470832374, + "grad_norm": 1.4473352410585376, + "learning_rate": 5.706363198403689e-07, + "loss": 0.5309658050537109, + "step": 5719 + }, + { + "epoch": 1.31888402121282, + "grad_norm": 1.588487236125564, + "learning_rate": 5.70292021947013e-07, + "loss": 0.4569379389286041, + "step": 5720 + }, + { + "epoch": 1.3191145953424026, + "grad_norm": 1.5641598702256398, + "learning_rate": 5.699477865160674e-07, + "loss": 0.46686258912086487, + "step": 5721 + }, + { + "epoch": 1.3193451694719853, + "grad_norm": 1.551220703032623, + "learning_rate": 5.696036135975688e-07, + "loss": 0.5333213806152344, + "step": 5722 + }, + { + "epoch": 1.3195757436015678, + "grad_norm": 1.6027893782611593, + "learning_rate": 5.69259503241547e-07, + "loss": 0.3519536256790161, + "step": 5723 + }, + { + "epoch": 1.3198063177311505, + "grad_norm": 1.5104260104986362, + "learning_rate": 5.689154554980218e-07, + "loss": 0.4763161242008209, + "step": 5724 + }, + { + "epoch": 1.3200368918607333, + "grad_norm": 1.5061315373489772, + "learning_rate": 5.685714704170044e-07, + "loss": 0.43600207567214966, + "step": 5725 + }, + { + "epoch": 1.320267465990316, + "grad_norm": 1.4992417251350876, + "learning_rate": 5.682275480484958e-07, + "loss": 0.41991305351257324, + "step": 5726 + }, + { + "epoch": 1.3204980401198987, + "grad_norm": 1.663551629444692, + "learning_rate": 5.678836884424894e-07, + "loss": 0.44275131821632385, + "step": 5727 + }, + { + "epoch": 1.3207286142494812, + "grad_norm": 1.65999947024113, + "learning_rate": 5.675398916489682e-07, + "loss": 0.4339372515678406, + "step": 5728 + }, + { + "epoch": 1.320959188379064, + "grad_norm": 1.484455134036602, + "learning_rate": 5.671961577179062e-07, + "loss": 0.4462248384952545, + "step": 5729 + }, + { + "epoch": 1.3211897625086464, + "grad_norm": 1.4704913213821902, + "learning_rate": 5.668524866992693e-07, + "loss": 0.36548441648483276, + "step": 5730 + }, + { + "epoch": 1.321420336638229, + "grad_norm": 1.5370532211440713, + "learning_rate": 5.665088786430129e-07, + "loss": 0.4709678888320923, + "step": 5731 + }, + { + "epoch": 1.3216509107678118, + "grad_norm": 1.4993066403144744, + "learning_rate": 5.661653335990848e-07, + "loss": 0.40125030279159546, + "step": 5732 + }, + { + "epoch": 1.3218814848973945, + "grad_norm": 1.8517319571144346, + "learning_rate": 5.658218516174218e-07, + "loss": 0.5288605690002441, + "step": 5733 + }, + { + "epoch": 1.3221120590269773, + "grad_norm": 1.2954018601150643, + "learning_rate": 5.654784327479534e-07, + "loss": 0.41306072473526, + "step": 5734 + }, + { + "epoch": 1.3223426331565598, + "grad_norm": 1.3199807449430407, + "learning_rate": 5.651350770405983e-07, + "loss": 0.34327009320259094, + "step": 5735 + }, + { + "epoch": 1.3225732072861425, + "grad_norm": 1.4524630442098247, + "learning_rate": 5.647917845452671e-07, + "loss": 0.5055800080299377, + "step": 5736 + }, + { + "epoch": 1.3228037814157252, + "grad_norm": 1.7153085926535214, + "learning_rate": 5.644485553118609e-07, + "loss": 0.45496249198913574, + "step": 5737 + }, + { + "epoch": 1.3230343555453077, + "grad_norm": 1.6142993934275558, + "learning_rate": 5.641053893902708e-07, + "loss": 0.4626169502735138, + "step": 5738 + }, + { + "epoch": 1.3232649296748904, + "grad_norm": 1.3569624734396053, + "learning_rate": 5.637622868303802e-07, + "loss": 0.46621328592300415, + "step": 5739 + }, + { + "epoch": 1.3234955038044731, + "grad_norm": 1.5833136701466524, + "learning_rate": 5.634192476820623e-07, + "loss": 0.47793662548065186, + "step": 5740 + }, + { + "epoch": 1.3237260779340558, + "grad_norm": 1.5367680790773321, + "learning_rate": 5.630762719951816e-07, + "loss": 0.42578715085983276, + "step": 5741 + }, + { + "epoch": 1.3239566520636386, + "grad_norm": 1.7421270871218182, + "learning_rate": 5.627333598195927e-07, + "loss": 0.3146113157272339, + "step": 5742 + }, + { + "epoch": 1.324187226193221, + "grad_norm": 1.376620002714832, + "learning_rate": 5.623905112051417e-07, + "loss": 0.39731544256210327, + "step": 5743 + }, + { + "epoch": 1.3244178003228038, + "grad_norm": 1.6655684412604148, + "learning_rate": 5.620477262016647e-07, + "loss": 0.3755846619606018, + "step": 5744 + }, + { + "epoch": 1.3246483744523865, + "grad_norm": 1.5953907301532468, + "learning_rate": 5.617050048589896e-07, + "loss": 0.43060415983200073, + "step": 5745 + }, + { + "epoch": 1.324878948581969, + "grad_norm": 1.54564820857706, + "learning_rate": 5.613623472269334e-07, + "loss": 0.4213481545448303, + "step": 5746 + }, + { + "epoch": 1.3251095227115517, + "grad_norm": 1.2422408749001486, + "learning_rate": 5.610197533553057e-07, + "loss": 0.3923456072807312, + "step": 5747 + }, + { + "epoch": 1.3253400968411344, + "grad_norm": 1.6088447345623693, + "learning_rate": 5.606772232939061e-07, + "loss": 0.42293328046798706, + "step": 5748 + }, + { + "epoch": 1.3255706709707171, + "grad_norm": 1.596682526932072, + "learning_rate": 5.603347570925242e-07, + "loss": 0.4545479118824005, + "step": 5749 + }, + { + "epoch": 1.3258012451002998, + "grad_norm": 1.4262513090332916, + "learning_rate": 5.599923548009416e-07, + "loss": 0.3969312310218811, + "step": 5750 + }, + { + "epoch": 1.3260318192298823, + "grad_norm": 1.687653911460881, + "learning_rate": 5.59650016468929e-07, + "loss": 0.4296644330024719, + "step": 5751 + }, + { + "epoch": 1.326262393359465, + "grad_norm": 1.4928189267328964, + "learning_rate": 5.5930774214625e-07, + "loss": 0.43291348218917847, + "step": 5752 + }, + { + "epoch": 1.3264929674890478, + "grad_norm": 1.4463941028108167, + "learning_rate": 5.589655318826564e-07, + "loss": 0.47684454917907715, + "step": 5753 + }, + { + "epoch": 1.3267235416186303, + "grad_norm": 1.3515496302725483, + "learning_rate": 5.586233857278924e-07, + "loss": 0.48520004749298096, + "step": 5754 + }, + { + "epoch": 1.326954115748213, + "grad_norm": 1.6127441732883512, + "learning_rate": 5.582813037316926e-07, + "loss": 0.4434587359428406, + "step": 5755 + }, + { + "epoch": 1.3271846898777957, + "grad_norm": 1.7808352880972456, + "learning_rate": 5.579392859437825e-07, + "loss": 0.47306808829307556, + "step": 5756 + }, + { + "epoch": 1.3274152640073784, + "grad_norm": 1.5663021335869645, + "learning_rate": 5.575973324138772e-07, + "loss": 0.4349653720855713, + "step": 5757 + }, + { + "epoch": 1.3276458381369611, + "grad_norm": 1.2914359149982935, + "learning_rate": 5.572554431916829e-07, + "loss": 0.31277602910995483, + "step": 5758 + }, + { + "epoch": 1.3278764122665436, + "grad_norm": 1.5658319454866303, + "learning_rate": 5.569136183268974e-07, + "loss": 0.4281114637851715, + "step": 5759 + }, + { + "epoch": 1.3281069863961263, + "grad_norm": 1.2867721627127386, + "learning_rate": 5.565718578692076e-07, + "loss": 0.45071113109588623, + "step": 5760 + }, + { + "epoch": 1.328337560525709, + "grad_norm": 1.4460147363867, + "learning_rate": 5.562301618682927e-07, + "loss": 0.426133394241333, + "step": 5761 + }, + { + "epoch": 1.3285681346552916, + "grad_norm": 1.3630920926710801, + "learning_rate": 5.558885303738209e-07, + "loss": 0.3882424235343933, + "step": 5762 + }, + { + "epoch": 1.3287987087848743, + "grad_norm": 1.3878174095068123, + "learning_rate": 5.55546963435452e-07, + "loss": 0.4706958532333374, + "step": 5763 + }, + { + "epoch": 1.329029282914457, + "grad_norm": 1.9122348340273743, + "learning_rate": 5.552054611028365e-07, + "loss": 0.4868433475494385, + "step": 5764 + }, + { + "epoch": 1.3292598570440397, + "grad_norm": 1.4411048310630292, + "learning_rate": 5.548640234256154e-07, + "loss": 0.41839566826820374, + "step": 5765 + }, + { + "epoch": 1.3294904311736224, + "grad_norm": 1.9627530346102546, + "learning_rate": 5.545226504534195e-07, + "loss": 0.4088629484176636, + "step": 5766 + }, + { + "epoch": 1.329721005303205, + "grad_norm": 1.3819218540316194, + "learning_rate": 5.541813422358715e-07, + "loss": 0.34617769718170166, + "step": 5767 + }, + { + "epoch": 1.3299515794327876, + "grad_norm": 1.5711021474470717, + "learning_rate": 5.538400988225835e-07, + "loss": 0.5098900198936462, + "step": 5768 + }, + { + "epoch": 1.3301821535623704, + "grad_norm": 1.5683015797269382, + "learning_rate": 5.534989202631586e-07, + "loss": 0.4294108748435974, + "step": 5769 + }, + { + "epoch": 1.3304127276919528, + "grad_norm": 1.3488716534216894, + "learning_rate": 5.531578066071907e-07, + "loss": 0.42205139994621277, + "step": 5770 + }, + { + "epoch": 1.3306433018215356, + "grad_norm": 1.8657910300729754, + "learning_rate": 5.528167579042645e-07, + "loss": 0.5009530186653137, + "step": 5771 + }, + { + "epoch": 1.3308738759511183, + "grad_norm": 1.468249228101101, + "learning_rate": 5.524757742039545e-07, + "loss": 0.554497241973877, + "step": 5772 + }, + { + "epoch": 1.331104450080701, + "grad_norm": 1.711116822757576, + "learning_rate": 5.521348555558263e-07, + "loss": 0.3514432907104492, + "step": 5773 + }, + { + "epoch": 1.3313350242102837, + "grad_norm": 1.4224522574801144, + "learning_rate": 5.51794002009436e-07, + "loss": 0.4712038040161133, + "step": 5774 + }, + { + "epoch": 1.3315655983398662, + "grad_norm": 1.6288850118765847, + "learning_rate": 5.514532136143295e-07, + "loss": 0.48556071519851685, + "step": 5775 + }, + { + "epoch": 1.331796172469449, + "grad_norm": 1.42798680480441, + "learning_rate": 5.511124904200448e-07, + "loss": 0.43158456683158875, + "step": 5776 + }, + { + "epoch": 1.3320267465990316, + "grad_norm": 1.8128360066016722, + "learning_rate": 5.507718324761085e-07, + "loss": 0.5376255512237549, + "step": 5777 + }, + { + "epoch": 1.3322573207286141, + "grad_norm": 1.446480187929883, + "learning_rate": 5.504312398320392e-07, + "loss": 0.3800685405731201, + "step": 5778 + }, + { + "epoch": 1.3324878948581969, + "grad_norm": 1.3675185316121448, + "learning_rate": 5.500907125373458e-07, + "loss": 0.4015260338783264, + "step": 5779 + }, + { + "epoch": 1.3327184689877796, + "grad_norm": 1.7400186621828952, + "learning_rate": 5.497502506415266e-07, + "loss": 0.42762285470962524, + "step": 5780 + }, + { + "epoch": 1.3329490431173623, + "grad_norm": 1.4501572722598215, + "learning_rate": 5.494098541940719e-07, + "loss": 0.4467644691467285, + "step": 5781 + }, + { + "epoch": 1.333179617246945, + "grad_norm": 1.9298171674754279, + "learning_rate": 5.490695232444613e-07, + "loss": 0.42699599266052246, + "step": 5782 + }, + { + "epoch": 1.3334101913765275, + "grad_norm": 1.6654850032985582, + "learning_rate": 5.487292578421659e-07, + "loss": 0.586537778377533, + "step": 5783 + }, + { + "epoch": 1.3336407655061102, + "grad_norm": 1.761605169999467, + "learning_rate": 5.48389058036646e-07, + "loss": 0.4525066018104553, + "step": 5784 + }, + { + "epoch": 1.333871339635693, + "grad_norm": 1.4697934550209713, + "learning_rate": 5.480489238773535e-07, + "loss": 0.40520548820495605, + "step": 5785 + }, + { + "epoch": 1.3341019137652754, + "grad_norm": 1.7127717596843188, + "learning_rate": 5.477088554137304e-07, + "loss": 0.3910450339317322, + "step": 5786 + }, + { + "epoch": 1.3343324878948581, + "grad_norm": 1.781985995356997, + "learning_rate": 5.473688526952087e-07, + "loss": 0.45285511016845703, + "step": 5787 + }, + { + "epoch": 1.3345630620244409, + "grad_norm": 1.3079701521023397, + "learning_rate": 5.47028915771212e-07, + "loss": 0.39207279682159424, + "step": 5788 + }, + { + "epoch": 1.3347936361540236, + "grad_norm": 1.3401224496215014, + "learning_rate": 5.466890446911527e-07, + "loss": 0.40281063318252563, + "step": 5789 + }, + { + "epoch": 1.3350242102836063, + "grad_norm": 1.5855589292084546, + "learning_rate": 5.463492395044354e-07, + "loss": 0.5087814927101135, + "step": 5790 + }, + { + "epoch": 1.3352547844131888, + "grad_norm": 1.6443172906836578, + "learning_rate": 5.460095002604532e-07, + "loss": 0.47597891092300415, + "step": 5791 + }, + { + "epoch": 1.3354853585427715, + "grad_norm": 1.656230003127049, + "learning_rate": 5.456698270085917e-07, + "loss": 0.5722953677177429, + "step": 5792 + }, + { + "epoch": 1.3357159326723542, + "grad_norm": 1.6424947586218923, + "learning_rate": 5.45330219798225e-07, + "loss": 0.5133349299430847, + "step": 5793 + }, + { + "epoch": 1.3359465068019367, + "grad_norm": 1.5413030595202453, + "learning_rate": 5.449906786787187e-07, + "loss": 0.46230804920196533, + "step": 5794 + }, + { + "epoch": 1.3361770809315194, + "grad_norm": 1.6839619437291453, + "learning_rate": 5.446512036994286e-07, + "loss": 0.42002394795417786, + "step": 5795 + }, + { + "epoch": 1.3364076550611022, + "grad_norm": 1.46623243210155, + "learning_rate": 5.443117949097013e-07, + "loss": 0.42281097173690796, + "step": 5796 + }, + { + "epoch": 1.3366382291906849, + "grad_norm": 1.4476698476010996, + "learning_rate": 5.439724523588726e-07, + "loss": 0.511898398399353, + "step": 5797 + }, + { + "epoch": 1.3368688033202676, + "grad_norm": 1.4307520026731049, + "learning_rate": 5.4363317609627e-07, + "loss": 0.4475559592247009, + "step": 5798 + }, + { + "epoch": 1.33709937744985, + "grad_norm": 1.509864957359139, + "learning_rate": 5.432939661712103e-07, + "loss": 0.4872414469718933, + "step": 5799 + }, + { + "epoch": 1.3373299515794328, + "grad_norm": 1.3480605234272842, + "learning_rate": 5.429548226330009e-07, + "loss": 0.40401679277420044, + "step": 5800 + }, + { + "epoch": 1.3375605257090155, + "grad_norm": 2.083088707198395, + "learning_rate": 5.426157455309399e-07, + "loss": 0.43559926748275757, + "step": 5801 + }, + { + "epoch": 1.337791099838598, + "grad_norm": 1.6000855398004097, + "learning_rate": 5.422767349143158e-07, + "loss": 0.44283759593963623, + "step": 5802 + }, + { + "epoch": 1.3380216739681807, + "grad_norm": 1.310277684226626, + "learning_rate": 5.419377908324077e-07, + "loss": 0.3770032525062561, + "step": 5803 + }, + { + "epoch": 1.3382522480977634, + "grad_norm": 1.3856773934136148, + "learning_rate": 5.415989133344834e-07, + "loss": 0.4497501850128174, + "step": 5804 + }, + { + "epoch": 1.3384828222273462, + "grad_norm": 1.49195449044666, + "learning_rate": 5.412601024698033e-07, + "loss": 0.5008253455162048, + "step": 5805 + }, + { + "epoch": 1.3387133963569289, + "grad_norm": 1.3694796854029274, + "learning_rate": 5.409213582876162e-07, + "loss": 0.46178537607192993, + "step": 5806 + }, + { + "epoch": 1.3389439704865114, + "grad_norm": 1.1951838089282807, + "learning_rate": 5.405826808371625e-07, + "loss": 0.39843931794166565, + "step": 5807 + }, + { + "epoch": 1.339174544616094, + "grad_norm": 1.4243934050525646, + "learning_rate": 5.402440701676724e-07, + "loss": 0.4829174280166626, + "step": 5808 + }, + { + "epoch": 1.3394051187456768, + "grad_norm": 1.0859530853021675, + "learning_rate": 5.399055263283656e-07, + "loss": 0.36173316836357117, + "step": 5809 + }, + { + "epoch": 1.3396356928752593, + "grad_norm": 1.5741135880130834, + "learning_rate": 5.395670493684536e-07, + "loss": 0.400304913520813, + "step": 5810 + }, + { + "epoch": 1.339866267004842, + "grad_norm": 1.507879612413509, + "learning_rate": 5.392286393371372e-07, + "loss": 0.4536975622177124, + "step": 5811 + }, + { + "epoch": 1.3400968411344247, + "grad_norm": 1.7310508291395992, + "learning_rate": 5.388902962836084e-07, + "loss": 0.6474577188491821, + "step": 5812 + }, + { + "epoch": 1.3403274152640074, + "grad_norm": 1.6348182443046517, + "learning_rate": 5.385520202570477e-07, + "loss": 0.48008009791374207, + "step": 5813 + }, + { + "epoch": 1.3405579893935902, + "grad_norm": 1.6214175923335088, + "learning_rate": 5.38213811306628e-07, + "loss": 0.4518657326698303, + "step": 5814 + }, + { + "epoch": 1.3407885635231727, + "grad_norm": 1.280530895656809, + "learning_rate": 5.378756694815105e-07, + "loss": 0.449008584022522, + "step": 5815 + }, + { + "epoch": 1.3410191376527554, + "grad_norm": 1.689898643370083, + "learning_rate": 5.375375948308483e-07, + "loss": 0.5448319315910339, + "step": 5816 + }, + { + "epoch": 1.341249711782338, + "grad_norm": 1.5166178678578832, + "learning_rate": 5.371995874037832e-07, + "loss": 0.5078369379043579, + "step": 5817 + }, + { + "epoch": 1.3414802859119206, + "grad_norm": 1.611364899344997, + "learning_rate": 5.368616472494482e-07, + "loss": 0.508685290813446, + "step": 5818 + }, + { + "epoch": 1.3417108600415033, + "grad_norm": 1.3809568946566115, + "learning_rate": 5.365237744169672e-07, + "loss": 0.4166705012321472, + "step": 5819 + }, + { + "epoch": 1.341941434171086, + "grad_norm": 1.432431964622234, + "learning_rate": 5.361859689554524e-07, + "loss": 0.4741361737251282, + "step": 5820 + }, + { + "epoch": 1.3421720083006687, + "grad_norm": 1.5546451283342237, + "learning_rate": 5.358482309140079e-07, + "loss": 0.36658185720443726, + "step": 5821 + }, + { + "epoch": 1.3424025824302515, + "grad_norm": 1.9632157270552801, + "learning_rate": 5.355105603417267e-07, + "loss": 0.38921263813972473, + "step": 5822 + }, + { + "epoch": 1.342633156559834, + "grad_norm": 1.9732368197118861, + "learning_rate": 5.351729572876935e-07, + "loss": 0.5553977489471436, + "step": 5823 + }, + { + "epoch": 1.3428637306894167, + "grad_norm": 1.4618484003422054, + "learning_rate": 5.348354218009813e-07, + "loss": 0.3968391418457031, + "step": 5824 + }, + { + "epoch": 1.3430943048189994, + "grad_norm": 1.4937275325292458, + "learning_rate": 5.344979539306549e-07, + "loss": 0.4289783239364624, + "step": 5825 + }, + { + "epoch": 1.3433248789485819, + "grad_norm": 1.313862309148984, + "learning_rate": 5.341605537257686e-07, + "loss": 0.45359861850738525, + "step": 5826 + }, + { + "epoch": 1.3435554530781646, + "grad_norm": 1.366684570776694, + "learning_rate": 5.338232212353675e-07, + "loss": 0.3571642339229584, + "step": 5827 + }, + { + "epoch": 1.3437860272077473, + "grad_norm": 1.1954938252676188, + "learning_rate": 5.334859565084855e-07, + "loss": 0.3784096837043762, + "step": 5828 + }, + { + "epoch": 1.34401660133733, + "grad_norm": 1.5372749019268697, + "learning_rate": 5.331487595941475e-07, + "loss": 0.44996407628059387, + "step": 5829 + }, + { + "epoch": 1.3442471754669127, + "grad_norm": 1.4793854978740197, + "learning_rate": 5.32811630541369e-07, + "loss": 0.4466405510902405, + "step": 5830 + }, + { + "epoch": 1.3444777495964952, + "grad_norm": 1.3432081322840168, + "learning_rate": 5.324745693991545e-07, + "loss": 0.34488850831985474, + "step": 5831 + }, + { + "epoch": 1.344708323726078, + "grad_norm": 1.589654871057016, + "learning_rate": 5.321375762164999e-07, + "loss": 0.5530165433883667, + "step": 5832 + }, + { + "epoch": 1.3449388978556607, + "grad_norm": 1.6555576202053326, + "learning_rate": 5.318006510423898e-07, + "loss": 0.40732342004776, + "step": 5833 + }, + { + "epoch": 1.3451694719852432, + "grad_norm": 1.5528027430812303, + "learning_rate": 5.314637939258002e-07, + "loss": 0.3364611566066742, + "step": 5834 + }, + { + "epoch": 1.3454000461148259, + "grad_norm": 1.4557702222082582, + "learning_rate": 5.311270049156966e-07, + "loss": 0.43964290618896484, + "step": 5835 + }, + { + "epoch": 1.3456306202444086, + "grad_norm": 1.5963363545263636, + "learning_rate": 5.30790284061035e-07, + "loss": 0.5203431844711304, + "step": 5836 + }, + { + "epoch": 1.3458611943739913, + "grad_norm": 1.356219303149177, + "learning_rate": 5.304536314107607e-07, + "loss": 0.4779793620109558, + "step": 5837 + }, + { + "epoch": 1.346091768503574, + "grad_norm": 1.4030454651132978, + "learning_rate": 5.301170470138102e-07, + "loss": 0.4769410490989685, + "step": 5838 + }, + { + "epoch": 1.3463223426331565, + "grad_norm": 1.5437367488200047, + "learning_rate": 5.297805309191089e-07, + "loss": 0.42390304803848267, + "step": 5839 + }, + { + "epoch": 1.3465529167627392, + "grad_norm": 1.6498587295444291, + "learning_rate": 5.294440831755727e-07, + "loss": 0.5550302863121033, + "step": 5840 + }, + { + "epoch": 1.3467834908923217, + "grad_norm": 1.5927381474044073, + "learning_rate": 5.291077038321078e-07, + "loss": 0.4897978901863098, + "step": 5841 + }, + { + "epoch": 1.3470140650219045, + "grad_norm": 1.5707311912828865, + "learning_rate": 5.287713929376105e-07, + "loss": 0.4014284610748291, + "step": 5842 + }, + { + "epoch": 1.3472446391514872, + "grad_norm": 1.61036503253005, + "learning_rate": 5.284351505409675e-07, + "loss": 0.4299513101577759, + "step": 5843 + }, + { + "epoch": 1.34747521328107, + "grad_norm": 1.382725158348277, + "learning_rate": 5.280989766910541e-07, + "loss": 0.44863104820251465, + "step": 5844 + }, + { + "epoch": 1.3477057874106526, + "grad_norm": 1.4391517424186664, + "learning_rate": 5.277628714367374e-07, + "loss": 0.41933274269104004, + "step": 5845 + }, + { + "epoch": 1.347936361540235, + "grad_norm": 1.5110585127257306, + "learning_rate": 5.274268348268729e-07, + "loss": 0.48257556557655334, + "step": 5846 + }, + { + "epoch": 1.3481669356698178, + "grad_norm": 1.6840388322451993, + "learning_rate": 5.270908669103078e-07, + "loss": 0.435384064912796, + "step": 5847 + }, + { + "epoch": 1.3483975097994005, + "grad_norm": 1.502056490079635, + "learning_rate": 5.267549677358775e-07, + "loss": 0.43291670083999634, + "step": 5848 + }, + { + "epoch": 1.348628083928983, + "grad_norm": 2.07427587572329, + "learning_rate": 5.264191373524089e-07, + "loss": 0.4584086537361145, + "step": 5849 + }, + { + "epoch": 1.3488586580585658, + "grad_norm": 1.4212548389061759, + "learning_rate": 5.260833758087187e-07, + "loss": 0.44879037141799927, + "step": 5850 + }, + { + "epoch": 1.3490892321881485, + "grad_norm": 1.4876230861981237, + "learning_rate": 5.257476831536124e-07, + "loss": 0.48467326164245605, + "step": 5851 + }, + { + "epoch": 1.3493198063177312, + "grad_norm": 1.4803329007154076, + "learning_rate": 5.254120594358871e-07, + "loss": 0.4126189947128296, + "step": 5852 + }, + { + "epoch": 1.349550380447314, + "grad_norm": 1.494164620045959, + "learning_rate": 5.250765047043284e-07, + "loss": 0.5592546463012695, + "step": 5853 + }, + { + "epoch": 1.3497809545768964, + "grad_norm": 1.2572079660485564, + "learning_rate": 5.247410190077134e-07, + "loss": 0.3269529342651367, + "step": 5854 + }, + { + "epoch": 1.3500115287064791, + "grad_norm": 1.4784058003593112, + "learning_rate": 5.244056023948075e-07, + "loss": 0.42812949419021606, + "step": 5855 + }, + { + "epoch": 1.3502421028360618, + "grad_norm": 1.643847647603701, + "learning_rate": 5.240702549143676e-07, + "loss": 0.4266297221183777, + "step": 5856 + }, + { + "epoch": 1.3504726769656443, + "grad_norm": 1.6490610440384348, + "learning_rate": 5.237349766151392e-07, + "loss": 0.43848085403442383, + "step": 5857 + }, + { + "epoch": 1.350703251095227, + "grad_norm": 1.5778355488021025, + "learning_rate": 5.233997675458588e-07, + "loss": 0.47512906789779663, + "step": 5858 + }, + { + "epoch": 1.3509338252248098, + "grad_norm": 1.4893970639177625, + "learning_rate": 5.230646277552527e-07, + "loss": 0.3484492897987366, + "step": 5859 + }, + { + "epoch": 1.3511643993543925, + "grad_norm": 1.5529244445697006, + "learning_rate": 5.227295572920363e-07, + "loss": 0.48915669322013855, + "step": 5860 + }, + { + "epoch": 1.3513949734839752, + "grad_norm": 1.687195391171769, + "learning_rate": 5.223945562049159e-07, + "loss": 0.415932834148407, + "step": 5861 + }, + { + "epoch": 1.3516255476135577, + "grad_norm": 1.8036222540660396, + "learning_rate": 5.220596245425869e-07, + "loss": 0.47945982217788696, + "step": 5862 + }, + { + "epoch": 1.3518561217431404, + "grad_norm": 1.7032993247582504, + "learning_rate": 5.217247623537356e-07, + "loss": 0.4322330951690674, + "step": 5863 + }, + { + "epoch": 1.3520866958727231, + "grad_norm": 1.7271334098970212, + "learning_rate": 5.213899696870369e-07, + "loss": 0.4608469605445862, + "step": 5864 + }, + { + "epoch": 1.3523172700023056, + "grad_norm": 1.4726583260713841, + "learning_rate": 5.210552465911566e-07, + "loss": 0.5108528137207031, + "step": 5865 + }, + { + "epoch": 1.3525478441318883, + "grad_norm": 1.3172906919344538, + "learning_rate": 5.207205931147502e-07, + "loss": 0.37947285175323486, + "step": 5866 + }, + { + "epoch": 1.352778418261471, + "grad_norm": 1.5825329658520386, + "learning_rate": 5.203860093064635e-07, + "loss": 0.49094486236572266, + "step": 5867 + }, + { + "epoch": 1.3530089923910538, + "grad_norm": 1.7057097538270483, + "learning_rate": 5.200514952149308e-07, + "loss": 0.34238702058792114, + "step": 5868 + }, + { + "epoch": 1.3532395665206365, + "grad_norm": 1.4815052827701158, + "learning_rate": 5.197170508887774e-07, + "loss": 0.46390393376350403, + "step": 5869 + }, + { + "epoch": 1.353470140650219, + "grad_norm": 1.517083535949924, + "learning_rate": 5.193826763766183e-07, + "loss": 0.44219160079956055, + "step": 5870 + }, + { + "epoch": 1.3537007147798017, + "grad_norm": 1.2444078580604416, + "learning_rate": 5.190483717270578e-07, + "loss": 0.42801350355148315, + "step": 5871 + }, + { + "epoch": 1.3539312889093844, + "grad_norm": 1.5276855271974423, + "learning_rate": 5.187141369886906e-07, + "loss": 0.43861454725265503, + "step": 5872 + }, + { + "epoch": 1.354161863038967, + "grad_norm": 1.3684710867849712, + "learning_rate": 5.183799722101014e-07, + "loss": 0.4381449222564697, + "step": 5873 + }, + { + "epoch": 1.3543924371685496, + "grad_norm": 1.6990772878337996, + "learning_rate": 5.180458774398646e-07, + "loss": 0.4341619610786438, + "step": 5874 + }, + { + "epoch": 1.3546230112981323, + "grad_norm": 1.5170997767832792, + "learning_rate": 5.177118527265437e-07, + "loss": 0.4376588463783264, + "step": 5875 + }, + { + "epoch": 1.354853585427715, + "grad_norm": 1.4712846387139202, + "learning_rate": 5.173778981186932e-07, + "loss": 0.38568538427352905, + "step": 5876 + }, + { + "epoch": 1.3550841595572978, + "grad_norm": 1.4162179235966152, + "learning_rate": 5.170440136648561e-07, + "loss": 0.44178056716918945, + "step": 5877 + }, + { + "epoch": 1.3553147336868803, + "grad_norm": 1.434763306400174, + "learning_rate": 5.167101994135665e-07, + "loss": 0.49847882986068726, + "step": 5878 + }, + { + "epoch": 1.355545307816463, + "grad_norm": 1.3114035605969607, + "learning_rate": 5.163764554133476e-07, + "loss": 0.33697545528411865, + "step": 5879 + }, + { + "epoch": 1.3557758819460457, + "grad_norm": 1.9314852987462174, + "learning_rate": 5.160427817127117e-07, + "loss": 0.5216578841209412, + "step": 5880 + }, + { + "epoch": 1.3560064560756282, + "grad_norm": 1.5367735086016923, + "learning_rate": 5.157091783601624e-07, + "loss": 0.5101301670074463, + "step": 5881 + }, + { + "epoch": 1.356237030205211, + "grad_norm": 1.4437708354871932, + "learning_rate": 5.15375645404192e-07, + "loss": 0.47876495122909546, + "step": 5882 + }, + { + "epoch": 1.3564676043347936, + "grad_norm": 1.413429948502146, + "learning_rate": 5.150421828932837e-07, + "loss": 0.4656233787536621, + "step": 5883 + }, + { + "epoch": 1.3566981784643763, + "grad_norm": 1.4503708847221477, + "learning_rate": 5.147087908759082e-07, + "loss": 0.4392930269241333, + "step": 5884 + }, + { + "epoch": 1.356928752593959, + "grad_norm": 1.6187538312851866, + "learning_rate": 5.143754694005289e-07, + "loss": 0.5044047832489014, + "step": 5885 + }, + { + "epoch": 1.3571593267235416, + "grad_norm": 1.3914560087628793, + "learning_rate": 5.140422185155964e-07, + "loss": 0.4345476031303406, + "step": 5886 + }, + { + "epoch": 1.3573899008531243, + "grad_norm": 1.768236932460398, + "learning_rate": 5.137090382695528e-07, + "loss": 0.49207669496536255, + "step": 5887 + }, + { + "epoch": 1.357620474982707, + "grad_norm": 1.531417533887488, + "learning_rate": 5.133759287108286e-07, + "loss": 0.4054356813430786, + "step": 5888 + }, + { + "epoch": 1.3578510491122895, + "grad_norm": 1.9704323937726442, + "learning_rate": 5.130428898878449e-07, + "loss": 0.5436004400253296, + "step": 5889 + }, + { + "epoch": 1.3580816232418722, + "grad_norm": 1.521959500035041, + "learning_rate": 5.127099218490127e-07, + "loss": 0.4832550287246704, + "step": 5890 + }, + { + "epoch": 1.358312197371455, + "grad_norm": 1.4438750839498624, + "learning_rate": 5.123770246427315e-07, + "loss": 0.38890475034713745, + "step": 5891 + }, + { + "epoch": 1.3585427715010376, + "grad_norm": 1.3028583829520697, + "learning_rate": 5.12044198317392e-07, + "loss": 0.49784210324287415, + "step": 5892 + }, + { + "epoch": 1.3587733456306204, + "grad_norm": 1.5058620289816076, + "learning_rate": 5.117114429213732e-07, + "loss": 0.5033924579620361, + "step": 5893 + }, + { + "epoch": 1.3590039197602028, + "grad_norm": 1.5069016697055244, + "learning_rate": 5.113787585030454e-07, + "loss": 0.4857698678970337, + "step": 5894 + }, + { + "epoch": 1.3592344938897856, + "grad_norm": 1.6430229342698937, + "learning_rate": 5.110461451107663e-07, + "loss": 0.4269944429397583, + "step": 5895 + }, + { + "epoch": 1.3594650680193683, + "grad_norm": 1.5554523008644683, + "learning_rate": 5.107136027928858e-07, + "loss": 0.44045162200927734, + "step": 5896 + }, + { + "epoch": 1.3596956421489508, + "grad_norm": 1.6719472262672752, + "learning_rate": 5.103811315977418e-07, + "loss": 0.5223391056060791, + "step": 5897 + }, + { + "epoch": 1.3599262162785335, + "grad_norm": 1.6234993813736853, + "learning_rate": 5.100487315736627e-07, + "loss": 0.45988473296165466, + "step": 5898 + }, + { + "epoch": 1.3601567904081162, + "grad_norm": 1.3494964030299075, + "learning_rate": 5.097164027689661e-07, + "loss": 0.46342164278030396, + "step": 5899 + }, + { + "epoch": 1.360387364537699, + "grad_norm": 1.6151646749241875, + "learning_rate": 5.093841452319588e-07, + "loss": 0.48150479793548584, + "step": 5900 + }, + { + "epoch": 1.3606179386672816, + "grad_norm": 1.3258214555354595, + "learning_rate": 5.090519590109386e-07, + "loss": 0.3971351981163025, + "step": 5901 + }, + { + "epoch": 1.3608485127968641, + "grad_norm": 1.755266254483419, + "learning_rate": 5.087198441541914e-07, + "loss": 0.44869956374168396, + "step": 5902 + }, + { + "epoch": 1.3610790869264469, + "grad_norm": 1.4425507935259798, + "learning_rate": 5.083878007099943e-07, + "loss": 0.3402775526046753, + "step": 5903 + }, + { + "epoch": 1.3613096610560296, + "grad_norm": 1.3415772700158808, + "learning_rate": 5.080558287266119e-07, + "loss": 0.4031033515930176, + "step": 5904 + }, + { + "epoch": 1.361540235185612, + "grad_norm": 1.6435607583739225, + "learning_rate": 5.077239282523012e-07, + "loss": 0.493259459733963, + "step": 5905 + }, + { + "epoch": 1.3617708093151948, + "grad_norm": 1.4120722192098578, + "learning_rate": 5.073920993353063e-07, + "loss": 0.39178919792175293, + "step": 5906 + }, + { + "epoch": 1.3620013834447775, + "grad_norm": 1.6684880889475469, + "learning_rate": 5.070603420238624e-07, + "loss": 0.5091253519058228, + "step": 5907 + }, + { + "epoch": 1.3622319575743602, + "grad_norm": 1.3497137288112562, + "learning_rate": 5.067286563661934e-07, + "loss": 0.416462779045105, + "step": 5908 + }, + { + "epoch": 1.362462531703943, + "grad_norm": 1.7821137618482668, + "learning_rate": 5.063970424105137e-07, + "loss": 0.5018768310546875, + "step": 5909 + }, + { + "epoch": 1.3626931058335254, + "grad_norm": 1.4656990143163084, + "learning_rate": 5.060655002050262e-07, + "loss": 0.5512624979019165, + "step": 5910 + }, + { + "epoch": 1.3629236799631081, + "grad_norm": 1.3507263825947706, + "learning_rate": 5.057340297979241e-07, + "loss": 0.3953768014907837, + "step": 5911 + }, + { + "epoch": 1.3631542540926909, + "grad_norm": 1.2807145092132266, + "learning_rate": 5.054026312373896e-07, + "loss": 0.4355456233024597, + "step": 5912 + }, + { + "epoch": 1.3633848282222734, + "grad_norm": 1.7515987196576535, + "learning_rate": 5.050713045715955e-07, + "loss": 0.4826827645301819, + "step": 5913 + }, + { + "epoch": 1.363615402351856, + "grad_norm": 1.5075633708078446, + "learning_rate": 5.047400498487035e-07, + "loss": 0.47084230184555054, + "step": 5914 + }, + { + "epoch": 1.3638459764814388, + "grad_norm": 1.750968751768445, + "learning_rate": 5.044088671168644e-07, + "loss": 0.5273452997207642, + "step": 5915 + }, + { + "epoch": 1.3640765506110215, + "grad_norm": 1.484245498844297, + "learning_rate": 5.040777564242194e-07, + "loss": 0.44878947734832764, + "step": 5916 + }, + { + "epoch": 1.3643071247406042, + "grad_norm": 1.5815904358854045, + "learning_rate": 5.03746717818898e-07, + "loss": 0.47986388206481934, + "step": 5917 + }, + { + "epoch": 1.3645376988701867, + "grad_norm": 1.4148899602283196, + "learning_rate": 5.034157513490211e-07, + "loss": 0.4807628393173218, + "step": 5918 + }, + { + "epoch": 1.3647682729997694, + "grad_norm": 1.3747301384734179, + "learning_rate": 5.030848570626969e-07, + "loss": 0.46027708053588867, + "step": 5919 + }, + { + "epoch": 1.3649988471293522, + "grad_norm": 1.517934310152821, + "learning_rate": 5.027540350080249e-07, + "loss": 0.3803088963031769, + "step": 5920 + }, + { + "epoch": 1.3652294212589347, + "grad_norm": 1.7239494972976075, + "learning_rate": 5.024232852330939e-07, + "loss": 0.5530920028686523, + "step": 5921 + }, + { + "epoch": 1.3654599953885174, + "grad_norm": 1.7183928961648565, + "learning_rate": 5.020926077859805e-07, + "loss": 0.45984846353530884, + "step": 5922 + }, + { + "epoch": 1.3656905695181, + "grad_norm": 1.5752429840016822, + "learning_rate": 5.017620027147533e-07, + "loss": 0.4448089301586151, + "step": 5923 + }, + { + "epoch": 1.3659211436476828, + "grad_norm": 1.713335636587649, + "learning_rate": 5.01431470067468e-07, + "loss": 0.4226706326007843, + "step": 5924 + }, + { + "epoch": 1.3661517177772655, + "grad_norm": 1.9953320185051966, + "learning_rate": 5.011010098921718e-07, + "loss": 0.5243814587593079, + "step": 5925 + }, + { + "epoch": 1.366382291906848, + "grad_norm": 1.6278540239253128, + "learning_rate": 5.007706222368995e-07, + "loss": 0.5733383893966675, + "step": 5926 + }, + { + "epoch": 1.3666128660364307, + "grad_norm": 1.373199955472141, + "learning_rate": 5.00440307149677e-07, + "loss": 0.4583539366722107, + "step": 5927 + }, + { + "epoch": 1.3668434401660134, + "grad_norm": 1.5871148090703988, + "learning_rate": 5.001100646785186e-07, + "loss": 0.474712610244751, + "step": 5928 + }, + { + "epoch": 1.367074014295596, + "grad_norm": 1.6888872351824356, + "learning_rate": 4.997798948714291e-07, + "loss": 0.3995950222015381, + "step": 5929 + }, + { + "epoch": 1.3673045884251787, + "grad_norm": 1.7317310910620232, + "learning_rate": 4.994497977764011e-07, + "loss": 0.4236767888069153, + "step": 5930 + }, + { + "epoch": 1.3675351625547614, + "grad_norm": 1.6853541022393534, + "learning_rate": 4.991197734414178e-07, + "loss": 0.4972396492958069, + "step": 5931 + }, + { + "epoch": 1.367765736684344, + "grad_norm": 1.503037819471691, + "learning_rate": 4.98789821914452e-07, + "loss": 0.444613516330719, + "step": 5932 + }, + { + "epoch": 1.3679963108139268, + "grad_norm": 1.6912958330957677, + "learning_rate": 4.984599432434649e-07, + "loss": 0.4955690801143646, + "step": 5933 + }, + { + "epoch": 1.3682268849435093, + "grad_norm": 1.559115794882019, + "learning_rate": 4.981301374764084e-07, + "loss": 0.4983398914337158, + "step": 5934 + }, + { + "epoch": 1.368457459073092, + "grad_norm": 1.5588186216828477, + "learning_rate": 4.978004046612223e-07, + "loss": 0.45190921425819397, + "step": 5935 + }, + { + "epoch": 1.3686880332026747, + "grad_norm": 1.757499738470118, + "learning_rate": 4.974707448458369e-07, + "loss": 0.5014151334762573, + "step": 5936 + }, + { + "epoch": 1.3689186073322572, + "grad_norm": 1.5399509659752455, + "learning_rate": 4.971411580781719e-07, + "loss": 0.3868405818939209, + "step": 5937 + }, + { + "epoch": 1.36914918146184, + "grad_norm": 1.42775142494789, + "learning_rate": 4.968116444061363e-07, + "loss": 0.4093654155731201, + "step": 5938 + }, + { + "epoch": 1.3693797555914227, + "grad_norm": 1.318689202230345, + "learning_rate": 4.964822038776276e-07, + "loss": 0.3945506513118744, + "step": 5939 + }, + { + "epoch": 1.3696103297210054, + "grad_norm": 1.5874458283663229, + "learning_rate": 4.961528365405333e-07, + "loss": 0.3645547330379486, + "step": 5940 + }, + { + "epoch": 1.369840903850588, + "grad_norm": 1.760752800086673, + "learning_rate": 4.958235424427309e-07, + "loss": 0.36679786443710327, + "step": 5941 + }, + { + "epoch": 1.3700714779801706, + "grad_norm": 1.5458160371079348, + "learning_rate": 4.954943216320861e-07, + "loss": 0.4892774820327759, + "step": 5942 + }, + { + "epoch": 1.3703020521097533, + "grad_norm": 1.4817693224477149, + "learning_rate": 4.951651741564544e-07, + "loss": 0.40406349301338196, + "step": 5943 + }, + { + "epoch": 1.370532626239336, + "grad_norm": 1.277384097830529, + "learning_rate": 4.948361000636812e-07, + "loss": 0.4219849407672882, + "step": 5944 + }, + { + "epoch": 1.3707632003689185, + "grad_norm": 1.7190062313169097, + "learning_rate": 4.945070994016008e-07, + "loss": 0.5329363346099854, + "step": 5945 + }, + { + "epoch": 1.3709937744985012, + "grad_norm": 1.5495655705207303, + "learning_rate": 4.941781722180361e-07, + "loss": 0.42577850818634033, + "step": 5946 + }, + { + "epoch": 1.371224348628084, + "grad_norm": 1.3916296167797302, + "learning_rate": 4.938493185608008e-07, + "loss": 0.4157155156135559, + "step": 5947 + }, + { + "epoch": 1.3714549227576667, + "grad_norm": 1.5016286739703502, + "learning_rate": 4.935205384776965e-07, + "loss": 0.46491485834121704, + "step": 5948 + }, + { + "epoch": 1.3716854968872494, + "grad_norm": 1.6766694792768029, + "learning_rate": 4.931918320165151e-07, + "loss": 0.39582759141921997, + "step": 5949 + }, + { + "epoch": 1.3719160710168319, + "grad_norm": 1.3277840228822322, + "learning_rate": 4.928631992250371e-07, + "loss": 0.4380473792552948, + "step": 5950 + }, + { + "epoch": 1.3721466451464146, + "grad_norm": 1.5358043238579873, + "learning_rate": 4.925346401510327e-07, + "loss": 0.5044572949409485, + "step": 5951 + }, + { + "epoch": 1.372377219275997, + "grad_norm": 1.6172521688559274, + "learning_rate": 4.922061548422617e-07, + "loss": 0.4808889627456665, + "step": 5952 + }, + { + "epoch": 1.3726077934055798, + "grad_norm": 1.370713689883329, + "learning_rate": 4.91877743346472e-07, + "loss": 0.4215632677078247, + "step": 5953 + }, + { + "epoch": 1.3728383675351625, + "grad_norm": 1.4640509349497177, + "learning_rate": 4.915494057114025e-07, + "loss": 0.4999268651008606, + "step": 5954 + }, + { + "epoch": 1.3730689416647452, + "grad_norm": 1.593000178254792, + "learning_rate": 4.912211419847793e-07, + "loss": 0.476152241230011, + "step": 5955 + }, + { + "epoch": 1.373299515794328, + "grad_norm": 1.5436036358421792, + "learning_rate": 4.908929522143201e-07, + "loss": 0.4253045320510864, + "step": 5956 + }, + { + "epoch": 1.3735300899239105, + "grad_norm": 1.6726587032262756, + "learning_rate": 4.905648364477293e-07, + "loss": 0.4251098036766052, + "step": 5957 + }, + { + "epoch": 1.3737606640534932, + "grad_norm": 1.5635582188699524, + "learning_rate": 4.902367947327029e-07, + "loss": 0.3820844888687134, + "step": 5958 + }, + { + "epoch": 1.373991238183076, + "grad_norm": 1.5563353591748068, + "learning_rate": 4.899088271169245e-07, + "loss": 0.4725508689880371, + "step": 5959 + }, + { + "epoch": 1.3742218123126584, + "grad_norm": 1.4545077693536257, + "learning_rate": 4.895809336480675e-07, + "loss": 0.48313626646995544, + "step": 5960 + }, + { + "epoch": 1.374452386442241, + "grad_norm": 1.6596316713803083, + "learning_rate": 4.892531143737952e-07, + "loss": 0.5344939231872559, + "step": 5961 + }, + { + "epoch": 1.3746829605718238, + "grad_norm": 1.7551620350578117, + "learning_rate": 4.889253693417585e-07, + "loss": 0.4305552840232849, + "step": 5962 + }, + { + "epoch": 1.3749135347014065, + "grad_norm": 1.4302106398553562, + "learning_rate": 4.885976985995996e-07, + "loss": 0.3564034700393677, + "step": 5963 + }, + { + "epoch": 1.3751441088309893, + "grad_norm": 1.4796542999179279, + "learning_rate": 4.882701021949475e-07, + "loss": 0.5498751997947693, + "step": 5964 + }, + { + "epoch": 1.3753746829605717, + "grad_norm": 1.5956710623028654, + "learning_rate": 4.879425801754226e-07, + "loss": 0.4489964246749878, + "step": 5965 + }, + { + "epoch": 1.3756052570901545, + "grad_norm": 1.7595842751992934, + "learning_rate": 4.87615132588633e-07, + "loss": 0.4142688810825348, + "step": 5966 + }, + { + "epoch": 1.3758358312197372, + "grad_norm": 1.483255834477138, + "learning_rate": 4.872877594821767e-07, + "loss": 0.3823632597923279, + "step": 5967 + }, + { + "epoch": 1.3760664053493197, + "grad_norm": 1.603982795420405, + "learning_rate": 4.869604609036408e-07, + "loss": 0.39014697074890137, + "step": 5968 + }, + { + "epoch": 1.3762969794789024, + "grad_norm": 1.5363032345717058, + "learning_rate": 4.866332369006016e-07, + "loss": 0.3907933235168457, + "step": 5969 + }, + { + "epoch": 1.376527553608485, + "grad_norm": 1.5125931439342233, + "learning_rate": 4.863060875206244e-07, + "loss": 0.3872087001800537, + "step": 5970 + }, + { + "epoch": 1.3767581277380678, + "grad_norm": 1.5847290584713085, + "learning_rate": 4.85979012811263e-07, + "loss": 0.40380537509918213, + "step": 5971 + }, + { + "epoch": 1.3769887018676505, + "grad_norm": 1.3127541034285726, + "learning_rate": 4.856520128200621e-07, + "loss": 0.39867663383483887, + "step": 5972 + }, + { + "epoch": 1.377219275997233, + "grad_norm": 1.7829413941875683, + "learning_rate": 4.853250875945534e-07, + "loss": 0.5337423086166382, + "step": 5973 + }, + { + "epoch": 1.3774498501268158, + "grad_norm": 1.4903518724810052, + "learning_rate": 4.849982371822593e-07, + "loss": 0.3824300765991211, + "step": 5974 + }, + { + "epoch": 1.3776804242563985, + "grad_norm": 1.4611697760932394, + "learning_rate": 4.846714616306907e-07, + "loss": 0.3613823652267456, + "step": 5975 + }, + { + "epoch": 1.377910998385981, + "grad_norm": 1.5701851835478555, + "learning_rate": 4.843447609873484e-07, + "loss": 0.5040241479873657, + "step": 5976 + }, + { + "epoch": 1.3781415725155637, + "grad_norm": 1.5801365248176698, + "learning_rate": 4.840181352997207e-07, + "loss": 0.4639400243759155, + "step": 5977 + }, + { + "epoch": 1.3783721466451464, + "grad_norm": 1.730401874176074, + "learning_rate": 4.836915846152867e-07, + "loss": 0.503246009349823, + "step": 5978 + }, + { + "epoch": 1.3786027207747291, + "grad_norm": 1.6695377873006745, + "learning_rate": 4.833651089815135e-07, + "loss": 0.3974607586860657, + "step": 5979 + }, + { + "epoch": 1.3788332949043118, + "grad_norm": 1.556324884896908, + "learning_rate": 4.830387084458573e-07, + "loss": 0.43200844526290894, + "step": 5980 + }, + { + "epoch": 1.3790638690338943, + "grad_norm": 1.8355646307086506, + "learning_rate": 4.827123830557644e-07, + "loss": 0.547272801399231, + "step": 5981 + }, + { + "epoch": 1.379294443163477, + "grad_norm": 1.5723785141918243, + "learning_rate": 4.823861328586688e-07, + "loss": 0.4509696960449219, + "step": 5982 + }, + { + "epoch": 1.3795250172930598, + "grad_norm": 1.53889123165165, + "learning_rate": 4.820599579019946e-07, + "loss": 0.46022483706474304, + "step": 5983 + }, + { + "epoch": 1.3797555914226423, + "grad_norm": 1.5251655198087088, + "learning_rate": 4.817338582331548e-07, + "loss": 0.40973198413848877, + "step": 5984 + }, + { + "epoch": 1.379986165552225, + "grad_norm": 1.6235538954137896, + "learning_rate": 4.814078338995515e-07, + "loss": 0.39012736082077026, + "step": 5985 + }, + { + "epoch": 1.3802167396818077, + "grad_norm": 1.6954879615528178, + "learning_rate": 4.810818849485749e-07, + "loss": 0.40657323598861694, + "step": 5986 + }, + { + "epoch": 1.3804473138113904, + "grad_norm": 1.4158383607530642, + "learning_rate": 4.80756011427606e-07, + "loss": 0.38662189245224, + "step": 5987 + }, + { + "epoch": 1.3806778879409731, + "grad_norm": 1.629559894183336, + "learning_rate": 4.804302133840126e-07, + "loss": 0.4888705015182495, + "step": 5988 + }, + { + "epoch": 1.3809084620705556, + "grad_norm": 1.4732586688358036, + "learning_rate": 4.801044908651537e-07, + "loss": 0.4559556245803833, + "step": 5989 + }, + { + "epoch": 1.3811390362001383, + "grad_norm": 1.773370569584542, + "learning_rate": 4.797788439183757e-07, + "loss": 0.40912386775016785, + "step": 5990 + }, + { + "epoch": 1.381369610329721, + "grad_norm": 1.3364334005028415, + "learning_rate": 4.794532725910152e-07, + "loss": 0.3848627209663391, + "step": 5991 + }, + { + "epoch": 1.3816001844593035, + "grad_norm": 1.3860556916017956, + "learning_rate": 4.791277769303975e-07, + "loss": 0.4995359778404236, + "step": 5992 + }, + { + "epoch": 1.3818307585888863, + "grad_norm": 1.3898521995378452, + "learning_rate": 4.788023569838356e-07, + "loss": 0.38717859983444214, + "step": 5993 + }, + { + "epoch": 1.382061332718469, + "grad_norm": 1.7766923949498086, + "learning_rate": 4.784770127986339e-07, + "loss": 0.39855217933654785, + "step": 5994 + }, + { + "epoch": 1.3822919068480517, + "grad_norm": 1.337680228597258, + "learning_rate": 4.781517444220835e-07, + "loss": 0.38494858145713806, + "step": 5995 + }, + { + "epoch": 1.3825224809776344, + "grad_norm": 1.4735802599680248, + "learning_rate": 4.778265519014661e-07, + "loss": 0.44064784049987793, + "step": 5996 + }, + { + "epoch": 1.382753055107217, + "grad_norm": 1.8926413264660993, + "learning_rate": 4.775014352840512e-07, + "loss": 0.39377373456954956, + "step": 5997 + }, + { + "epoch": 1.3829836292367996, + "grad_norm": 1.5108151654480286, + "learning_rate": 4.771763946170979e-07, + "loss": 0.45127296447753906, + "step": 5998 + }, + { + "epoch": 1.3832142033663823, + "grad_norm": 1.4916107560429466, + "learning_rate": 4.768514299478545e-07, + "loss": 0.4999358654022217, + "step": 5999 + }, + { + "epoch": 1.3834447774959648, + "grad_norm": 1.7185286370183794, + "learning_rate": 4.7652654132355784e-07, + "loss": 0.49552851915359497, + "step": 6000 + }, + { + "epoch": 1.3836753516255476, + "grad_norm": 1.7765151369959267, + "learning_rate": 4.762017287914338e-07, + "loss": 0.49196135997772217, + "step": 6001 + }, + { + "epoch": 1.3839059257551303, + "grad_norm": 1.6417248034868954, + "learning_rate": 4.758769923986966e-07, + "loss": 0.3870600461959839, + "step": 6002 + }, + { + "epoch": 1.384136499884713, + "grad_norm": 1.6104154654929026, + "learning_rate": 4.7555233219255074e-07, + "loss": 0.4585425853729248, + "step": 6003 + }, + { + "epoch": 1.3843670740142957, + "grad_norm": 1.3699827425500786, + "learning_rate": 4.752277482201882e-07, + "loss": 0.4332588315010071, + "step": 6004 + }, + { + "epoch": 1.3845976481438782, + "grad_norm": 1.6005942921335146, + "learning_rate": 4.749032405287913e-07, + "loss": 0.4386274814605713, + "step": 6005 + }, + { + "epoch": 1.384828222273461, + "grad_norm": 1.430715117905666, + "learning_rate": 4.745788091655295e-07, + "loss": 0.5064895749092102, + "step": 6006 + }, + { + "epoch": 1.3850587964030436, + "grad_norm": 1.470846994377081, + "learning_rate": 4.7425445417756295e-07, + "loss": 0.4441327452659607, + "step": 6007 + }, + { + "epoch": 1.3852893705326261, + "grad_norm": 1.6191746478584856, + "learning_rate": 4.7393017561203965e-07, + "loss": 0.4415687918663025, + "step": 6008 + }, + { + "epoch": 1.3855199446622088, + "grad_norm": 1.4021203224812295, + "learning_rate": 4.736059735160973e-07, + "loss": 0.4668382704257965, + "step": 6009 + }, + { + "epoch": 1.3857505187917916, + "grad_norm": 1.6079029250549948, + "learning_rate": 4.732818479368615e-07, + "loss": 0.3981805443763733, + "step": 6010 + }, + { + "epoch": 1.3859810929213743, + "grad_norm": 1.4448652226463723, + "learning_rate": 4.7295779892144694e-07, + "loss": 0.4465348720550537, + "step": 6011 + }, + { + "epoch": 1.386211667050957, + "grad_norm": 1.7530840597871544, + "learning_rate": 4.7263382651695805e-07, + "loss": 0.4844682812690735, + "step": 6012 + }, + { + "epoch": 1.3864422411805395, + "grad_norm": 1.417618664232542, + "learning_rate": 4.723099307704868e-07, + "loss": 0.4261378347873688, + "step": 6013 + }, + { + "epoch": 1.3866728153101222, + "grad_norm": 1.4997543603341101, + "learning_rate": 4.7198611172911506e-07, + "loss": 0.457815945148468, + "step": 6014 + }, + { + "epoch": 1.386903389439705, + "grad_norm": 1.570655771567204, + "learning_rate": 4.7166236943991333e-07, + "loss": 0.46352216601371765, + "step": 6015 + }, + { + "epoch": 1.3871339635692874, + "grad_norm": 1.486567492766103, + "learning_rate": 4.7133870394994104e-07, + "loss": 0.4166485667228699, + "step": 6016 + }, + { + "epoch": 1.3873645376988701, + "grad_norm": 1.6982826579565595, + "learning_rate": 4.710151153062456e-07, + "loss": 0.405789852142334, + "step": 6017 + }, + { + "epoch": 1.3875951118284529, + "grad_norm": 1.7459761562612983, + "learning_rate": 4.7069160355586456e-07, + "loss": 0.47718119621276855, + "step": 6018 + }, + { + "epoch": 1.3878256859580356, + "grad_norm": 1.5824023496617, + "learning_rate": 4.7036816874582307e-07, + "loss": 0.5040356516838074, + "step": 6019 + }, + { + "epoch": 1.3880562600876183, + "grad_norm": 1.5657039890557007, + "learning_rate": 4.700448109231362e-07, + "loss": 0.45093637704849243, + "step": 6020 + }, + { + "epoch": 1.3882868342172008, + "grad_norm": 1.4929438188817195, + "learning_rate": 4.6972153013480666e-07, + "loss": 0.5363638997077942, + "step": 6021 + }, + { + "epoch": 1.3885174083467835, + "grad_norm": 1.6076509313088967, + "learning_rate": 4.6939832642782684e-07, + "loss": 0.4917050004005432, + "step": 6022 + }, + { + "epoch": 1.3887479824763662, + "grad_norm": 1.692377103708349, + "learning_rate": 4.690751998491782e-07, + "loss": 0.43033331632614136, + "step": 6023 + }, + { + "epoch": 1.3889785566059487, + "grad_norm": 1.5272594017885164, + "learning_rate": 4.6875215044582973e-07, + "loss": 0.36168330907821655, + "step": 6024 + }, + { + "epoch": 1.3892091307355314, + "grad_norm": 1.693805471797637, + "learning_rate": 4.6842917826474047e-07, + "loss": 0.48347967863082886, + "step": 6025 + }, + { + "epoch": 1.3894397048651141, + "grad_norm": 1.332022962916858, + "learning_rate": 4.681062833528572e-07, + "loss": 0.4493439495563507, + "step": 6026 + }, + { + "epoch": 1.3896702789946969, + "grad_norm": 1.4842335012941816, + "learning_rate": 4.677834657571165e-07, + "loss": 0.385773628950119, + "step": 6027 + }, + { + "epoch": 1.3899008531242796, + "grad_norm": 1.396017775513053, + "learning_rate": 4.674607255244426e-07, + "loss": 0.4254469573497772, + "step": 6028 + }, + { + "epoch": 1.390131427253862, + "grad_norm": 1.6964811881797437, + "learning_rate": 4.671380627017497e-07, + "loss": 0.5070454478263855, + "step": 6029 + }, + { + "epoch": 1.3903620013834448, + "grad_norm": 1.4647574188657595, + "learning_rate": 4.668154773359394e-07, + "loss": 0.44099801778793335, + "step": 6030 + }, + { + "epoch": 1.3905925755130275, + "grad_norm": 1.6731498815474952, + "learning_rate": 4.6649296947390314e-07, + "loss": 0.4965481162071228, + "step": 6031 + }, + { + "epoch": 1.39082314964261, + "grad_norm": 1.6621123973009748, + "learning_rate": 4.6617053916252116e-07, + "loss": 0.4085753262042999, + "step": 6032 + }, + { + "epoch": 1.3910537237721927, + "grad_norm": 1.473260966023028, + "learning_rate": 4.6584818644866106e-07, + "loss": 0.3768424391746521, + "step": 6033 + }, + { + "epoch": 1.3912842979017754, + "grad_norm": 1.7152094772871185, + "learning_rate": 4.6552591137918087e-07, + "loss": 0.4330044388771057, + "step": 6034 + }, + { + "epoch": 1.3915148720313582, + "grad_norm": 1.5907700374750249, + "learning_rate": 4.6520371400092584e-07, + "loss": 0.4669216275215149, + "step": 6035 + }, + { + "epoch": 1.3917454461609409, + "grad_norm": 1.8634085835731031, + "learning_rate": 4.648815943607314e-07, + "loss": 0.5491182208061218, + "step": 6036 + }, + { + "epoch": 1.3919760202905234, + "grad_norm": 1.439715262819595, + "learning_rate": 4.6455955250542e-07, + "loss": 0.4842255413532257, + "step": 6037 + }, + { + "epoch": 1.392206594420106, + "grad_norm": 1.598726710739168, + "learning_rate": 4.6423758848180427e-07, + "loss": 0.45479631423950195, + "step": 6038 + }, + { + "epoch": 1.3924371685496888, + "grad_norm": 1.5770365297702393, + "learning_rate": 4.6391570233668486e-07, + "loss": 0.4209587574005127, + "step": 6039 + }, + { + "epoch": 1.3926677426792713, + "grad_norm": 1.4722680740741498, + "learning_rate": 4.6359389411685145e-07, + "loss": 0.5061464905738831, + "step": 6040 + }, + { + "epoch": 1.392898316808854, + "grad_norm": 1.5166334201375402, + "learning_rate": 4.6327216386908196e-07, + "loss": 0.39443570375442505, + "step": 6041 + }, + { + "epoch": 1.3931288909384367, + "grad_norm": 1.6936024892202146, + "learning_rate": 4.6295051164014256e-07, + "loss": 0.4784463942050934, + "step": 6042 + }, + { + "epoch": 1.3933594650680194, + "grad_norm": 1.623401531095956, + "learning_rate": 4.6262893747678957e-07, + "loss": 0.41256606578826904, + "step": 6043 + }, + { + "epoch": 1.3935900391976022, + "grad_norm": 1.430742297932055, + "learning_rate": 4.623074414257662e-07, + "loss": 0.4507666230201721, + "step": 6044 + }, + { + "epoch": 1.3938206133271847, + "grad_norm": 1.4646678303979026, + "learning_rate": 4.6198602353380545e-07, + "loss": 0.3783376216888428, + "step": 6045 + }, + { + "epoch": 1.3940511874567674, + "grad_norm": 1.5485119918407955, + "learning_rate": 4.616646838476289e-07, + "loss": 0.47854840755462646, + "step": 6046 + }, + { + "epoch": 1.39428176158635, + "grad_norm": 1.506150277535636, + "learning_rate": 4.6134342241394685e-07, + "loss": 0.47121208906173706, + "step": 6047 + }, + { + "epoch": 1.3945123357159326, + "grad_norm": 1.4779397331062858, + "learning_rate": 4.610222392794569e-07, + "loss": 0.5211559534072876, + "step": 6048 + }, + { + "epoch": 1.3947429098455153, + "grad_norm": 2.0522570691736606, + "learning_rate": 4.6070113449084747e-07, + "loss": 0.5846370458602905, + "step": 6049 + }, + { + "epoch": 1.394973483975098, + "grad_norm": 1.6651959806589232, + "learning_rate": 4.6038010809479365e-07, + "loss": 0.4787401854991913, + "step": 6050 + }, + { + "epoch": 1.3952040581046807, + "grad_norm": 1.336725780471279, + "learning_rate": 4.600591601379596e-07, + "loss": 0.36429738998413086, + "step": 6051 + }, + { + "epoch": 1.3954346322342635, + "grad_norm": 1.606284081701607, + "learning_rate": 4.597382906669992e-07, + "loss": 0.49923771619796753, + "step": 6052 + }, + { + "epoch": 1.395665206363846, + "grad_norm": 1.5476584348847333, + "learning_rate": 4.5941749972855326e-07, + "loss": 0.408005028963089, + "step": 6053 + }, + { + "epoch": 1.3958957804934287, + "grad_norm": 1.72927604568786, + "learning_rate": 4.590967873692523e-07, + "loss": 0.4524402618408203, + "step": 6054 + }, + { + "epoch": 1.3961263546230114, + "grad_norm": 1.5041096845532136, + "learning_rate": 4.587761536357152e-07, + "loss": 0.5264980792999268, + "step": 6055 + }, + { + "epoch": 1.3963569287525939, + "grad_norm": 1.6066275699787076, + "learning_rate": 4.5845559857454976e-07, + "loss": 0.5324279069900513, + "step": 6056 + }, + { + "epoch": 1.3965875028821766, + "grad_norm": 1.4996065290876746, + "learning_rate": 4.581351222323511e-07, + "loss": 0.5197574496269226, + "step": 6057 + }, + { + "epoch": 1.3968180770117593, + "grad_norm": 1.6418756331716369, + "learning_rate": 4.578147246557043e-07, + "loss": 0.4549001157283783, + "step": 6058 + }, + { + "epoch": 1.397048651141342, + "grad_norm": 1.374490396915421, + "learning_rate": 4.5749440589118183e-07, + "loss": 0.38597673177719116, + "step": 6059 + }, + { + "epoch": 1.3972792252709247, + "grad_norm": 1.3707652210777583, + "learning_rate": 4.57174165985346e-07, + "loss": 0.4104316532611847, + "step": 6060 + }, + { + "epoch": 1.3975097994005072, + "grad_norm": 1.7242255092716443, + "learning_rate": 4.5685400498474614e-07, + "loss": 0.5241787433624268, + "step": 6061 + }, + { + "epoch": 1.39774037353009, + "grad_norm": 1.668574015144598, + "learning_rate": 4.565339229359213e-07, + "loss": 0.5033289790153503, + "step": 6062 + }, + { + "epoch": 1.3979709476596724, + "grad_norm": 1.3309384356199967, + "learning_rate": 4.5621391988539894e-07, + "loss": 0.436188280582428, + "step": 6063 + }, + { + "epoch": 1.3982015217892552, + "grad_norm": 1.4783680897212301, + "learning_rate": 4.5589399587969414e-07, + "loss": 0.3885838985443115, + "step": 6064 + }, + { + "epoch": 1.3984320959188379, + "grad_norm": 1.6395174483956128, + "learning_rate": 4.555741509653116e-07, + "loss": 0.5140193104743958, + "step": 6065 + }, + { + "epoch": 1.3986626700484206, + "grad_norm": 1.360236032045127, + "learning_rate": 4.552543851887436e-07, + "loss": 0.41084468364715576, + "step": 6066 + }, + { + "epoch": 1.3988932441780033, + "grad_norm": 1.417896120601143, + "learning_rate": 4.549346985964718e-07, + "loss": 0.3606417179107666, + "step": 6067 + }, + { + "epoch": 1.3991238183075858, + "grad_norm": 1.5212574193639694, + "learning_rate": 4.546150912349653e-07, + "loss": 0.48518556356430054, + "step": 6068 + }, + { + "epoch": 1.3993543924371685, + "grad_norm": 1.6821671640024862, + "learning_rate": 4.5429556315068264e-07, + "loss": 0.5394424200057983, + "step": 6069 + }, + { + "epoch": 1.3995849665667512, + "grad_norm": 1.3734997636022714, + "learning_rate": 4.539761143900708e-07, + "loss": 0.40272367000579834, + "step": 6070 + }, + { + "epoch": 1.3998155406963337, + "grad_norm": 1.6175896107942709, + "learning_rate": 4.536567449995641e-07, + "loss": 0.4279879331588745, + "step": 6071 + }, + { + "epoch": 1.4000461148259165, + "grad_norm": 1.4620694447822713, + "learning_rate": 4.5333745502558695e-07, + "loss": 0.48560982942581177, + "step": 6072 + }, + { + "epoch": 1.4002766889554992, + "grad_norm": 1.7184355426607418, + "learning_rate": 4.530182445145506e-07, + "loss": 0.49256429076194763, + "step": 6073 + }, + { + "epoch": 1.4005072630850819, + "grad_norm": 1.4236944961072253, + "learning_rate": 4.5269911351285614e-07, + "loss": 0.5015553832054138, + "step": 6074 + }, + { + "epoch": 1.4007378372146646, + "grad_norm": 1.4505255602543088, + "learning_rate": 4.5238006206689204e-07, + "loss": 0.4313800632953644, + "step": 6075 + }, + { + "epoch": 1.400968411344247, + "grad_norm": 1.311079736416616, + "learning_rate": 4.520610902230363e-07, + "loss": 0.3440586030483246, + "step": 6076 + }, + { + "epoch": 1.4011989854738298, + "grad_norm": 1.4064686390113332, + "learning_rate": 4.517421980276538e-07, + "loss": 0.43868017196655273, + "step": 6077 + }, + { + "epoch": 1.4014295596034125, + "grad_norm": 1.6307364330463041, + "learning_rate": 4.5142338552709923e-07, + "loss": 0.5581029057502747, + "step": 6078 + }, + { + "epoch": 1.401660133732995, + "grad_norm": 1.6962393590938891, + "learning_rate": 4.5110465276771524e-07, + "loss": 0.4543154835700989, + "step": 6079 + }, + { + "epoch": 1.4018907078625777, + "grad_norm": 1.5554679193557313, + "learning_rate": 4.507859997958333e-07, + "loss": 0.5229466557502747, + "step": 6080 + }, + { + "epoch": 1.4021212819921605, + "grad_norm": 1.5285075075955497, + "learning_rate": 4.504674266577724e-07, + "loss": 0.46781739592552185, + "step": 6081 + }, + { + "epoch": 1.4023518561217432, + "grad_norm": 1.6198419428344395, + "learning_rate": 4.5014893339983993e-07, + "loss": 0.48040711879730225, + "step": 6082 + }, + { + "epoch": 1.402582430251326, + "grad_norm": 1.5279313939865138, + "learning_rate": 4.49830520068333e-07, + "loss": 0.5039708018302917, + "step": 6083 + }, + { + "epoch": 1.4028130043809084, + "grad_norm": 1.4998739241266676, + "learning_rate": 4.495121867095354e-07, + "loss": 0.43496155738830566, + "step": 6084 + }, + { + "epoch": 1.403043578510491, + "grad_norm": 1.3838778339679694, + "learning_rate": 4.4919393336972045e-07, + "loss": 0.4603109061717987, + "step": 6085 + }, + { + "epoch": 1.4032741526400738, + "grad_norm": 1.476085268646584, + "learning_rate": 4.488757600951496e-07, + "loss": 0.4571962356567383, + "step": 6086 + }, + { + "epoch": 1.4035047267696563, + "grad_norm": 1.4791952167701867, + "learning_rate": 4.485576669320729e-07, + "loss": 0.46302443742752075, + "step": 6087 + }, + { + "epoch": 1.403735300899239, + "grad_norm": 1.675302072516594, + "learning_rate": 4.482396539267275e-07, + "loss": 0.39066869020462036, + "step": 6088 + }, + { + "epoch": 1.4039658750288218, + "grad_norm": 1.704176039322231, + "learning_rate": 4.4792172112534076e-07, + "loss": 0.4797130823135376, + "step": 6089 + }, + { + "epoch": 1.4041964491584045, + "grad_norm": 1.5835144658620484, + "learning_rate": 4.4760386857412704e-07, + "loss": 0.4578198492527008, + "step": 6090 + }, + { + "epoch": 1.4044270232879872, + "grad_norm": 1.3987211085891795, + "learning_rate": 4.472860963192889e-07, + "loss": 0.40768736600875854, + "step": 6091 + }, + { + "epoch": 1.4046575974175697, + "grad_norm": 1.4530633567004236, + "learning_rate": 4.4696840440701846e-07, + "loss": 0.4201413094997406, + "step": 6092 + }, + { + "epoch": 1.4048881715471524, + "grad_norm": 1.3648395822246437, + "learning_rate": 4.466507928834951e-07, + "loss": 0.45901796221733093, + "step": 6093 + }, + { + "epoch": 1.4051187456767351, + "grad_norm": 1.6465847208416895, + "learning_rate": 4.463332617948874e-07, + "loss": 0.4699435830116272, + "step": 6094 + }, + { + "epoch": 1.4053493198063176, + "grad_norm": 1.4755445259366653, + "learning_rate": 4.46015811187351e-07, + "loss": 0.4526669383049011, + "step": 6095 + }, + { + "epoch": 1.4055798939359003, + "grad_norm": 1.5721685230021194, + "learning_rate": 4.456984411070313e-07, + "loss": 0.46754884719848633, + "step": 6096 + }, + { + "epoch": 1.405810468065483, + "grad_norm": 2.1874728205075495, + "learning_rate": 4.453811516000604e-07, + "loss": 0.5119268894195557, + "step": 6097 + }, + { + "epoch": 1.4060410421950658, + "grad_norm": 2.056110026644097, + "learning_rate": 4.4506394271256043e-07, + "loss": 0.42980802059173584, + "step": 6098 + }, + { + "epoch": 1.4062716163246485, + "grad_norm": 1.5339161636381375, + "learning_rate": 4.447468144906401e-07, + "loss": 0.5895063281059265, + "step": 6099 + }, + { + "epoch": 1.406502190454231, + "grad_norm": 1.3796241305160553, + "learning_rate": 4.4442976698039803e-07, + "loss": 0.42768803238868713, + "step": 6100 + }, + { + "epoch": 1.4067327645838137, + "grad_norm": 1.608854909074267, + "learning_rate": 4.4411280022791943e-07, + "loss": 0.44234544038772583, + "step": 6101 + }, + { + "epoch": 1.4069633387133964, + "grad_norm": 1.3028889839673445, + "learning_rate": 4.437959142792791e-07, + "loss": 0.4382736086845398, + "step": 6102 + }, + { + "epoch": 1.407193912842979, + "grad_norm": 1.6088674485493302, + "learning_rate": 4.4347910918054e-07, + "loss": 0.47603681683540344, + "step": 6103 + }, + { + "epoch": 1.4074244869725616, + "grad_norm": 1.8816511615485159, + "learning_rate": 4.431623849777522e-07, + "loss": 0.5562035441398621, + "step": 6104 + }, + { + "epoch": 1.4076550611021443, + "grad_norm": 2.2517510056002763, + "learning_rate": 4.4284574171695535e-07, + "loss": 0.4153141677379608, + "step": 6105 + }, + { + "epoch": 1.407885635231727, + "grad_norm": 1.2534764690727898, + "learning_rate": 4.425291794441762e-07, + "loss": 0.4825887680053711, + "step": 6106 + }, + { + "epoch": 1.4081162093613098, + "grad_norm": 1.4829126230878127, + "learning_rate": 4.4221269820543104e-07, + "loss": 0.4853668808937073, + "step": 6107 + }, + { + "epoch": 1.4083467834908923, + "grad_norm": 1.6140810272295893, + "learning_rate": 4.418962980467229e-07, + "loss": 0.5615251064300537, + "step": 6108 + }, + { + "epoch": 1.408577357620475, + "grad_norm": 1.8397680714752904, + "learning_rate": 4.4157997901404396e-07, + "loss": 0.38605546951293945, + "step": 6109 + }, + { + "epoch": 1.4088079317500577, + "grad_norm": 1.412066772348378, + "learning_rate": 4.412637411533745e-07, + "loss": 0.41582173109054565, + "step": 6110 + }, + { + "epoch": 1.4090385058796402, + "grad_norm": 1.4963267141581975, + "learning_rate": 4.4094758451068327e-07, + "loss": 0.38091376423835754, + "step": 6111 + }, + { + "epoch": 1.409269080009223, + "grad_norm": 1.5465721612260863, + "learning_rate": 4.4063150913192635e-07, + "loss": 0.43319058418273926, + "step": 6112 + }, + { + "epoch": 1.4094996541388056, + "grad_norm": 1.2123497825560654, + "learning_rate": 4.403155150630484e-07, + "loss": 0.43207013607025146, + "step": 6113 + }, + { + "epoch": 1.4097302282683883, + "grad_norm": 1.7217391258871346, + "learning_rate": 4.399996023499829e-07, + "loss": 0.43750250339508057, + "step": 6114 + }, + { + "epoch": 1.409960802397971, + "grad_norm": 1.5123653802002535, + "learning_rate": 4.3968377103865016e-07, + "loss": 0.44084444642066956, + "step": 6115 + }, + { + "epoch": 1.4101913765275536, + "grad_norm": 1.4135580211481893, + "learning_rate": 4.3936802117495997e-07, + "loss": 0.4752010405063629, + "step": 6116 + }, + { + "epoch": 1.4104219506571363, + "grad_norm": 1.384945744446678, + "learning_rate": 4.390523528048098e-07, + "loss": 0.39239025115966797, + "step": 6117 + }, + { + "epoch": 1.410652524786719, + "grad_norm": 1.7179287290824201, + "learning_rate": 4.387367659740856e-07, + "loss": 0.46021080017089844, + "step": 6118 + }, + { + "epoch": 1.4108830989163015, + "grad_norm": 1.3751290560349647, + "learning_rate": 4.3842126072866014e-07, + "loss": 0.4079766571521759, + "step": 6119 + }, + { + "epoch": 1.4111136730458842, + "grad_norm": 1.5182170234243058, + "learning_rate": 4.381058371143964e-07, + "loss": 0.4922672510147095, + "step": 6120 + }, + { + "epoch": 1.411344247175467, + "grad_norm": 1.5200373777326295, + "learning_rate": 4.377904951771438e-07, + "loss": 0.3950929045677185, + "step": 6121 + }, + { + "epoch": 1.4115748213050496, + "grad_norm": 1.6189013836504815, + "learning_rate": 4.374752349627402e-07, + "loss": 0.503406286239624, + "step": 6122 + }, + { + "epoch": 1.4118053954346323, + "grad_norm": 1.724327270706253, + "learning_rate": 4.3716005651701215e-07, + "loss": 0.49198317527770996, + "step": 6123 + }, + { + "epoch": 1.4120359695642148, + "grad_norm": 1.424527206510087, + "learning_rate": 4.368449598857742e-07, + "loss": 0.47396305203437805, + "step": 6124 + }, + { + "epoch": 1.4122665436937976, + "grad_norm": 1.7537535213801698, + "learning_rate": 4.365299451148291e-07, + "loss": 0.5248152017593384, + "step": 6125 + }, + { + "epoch": 1.4124971178233803, + "grad_norm": 1.310814657820865, + "learning_rate": 4.362150122499666e-07, + "loss": 0.44327419996261597, + "step": 6126 + }, + { + "epoch": 1.4127276919529628, + "grad_norm": 1.5885906377106098, + "learning_rate": 4.3590016133696626e-07, + "loss": 0.4628877639770508, + "step": 6127 + }, + { + "epoch": 1.4129582660825455, + "grad_norm": 1.5166490469327556, + "learning_rate": 4.355853924215942e-07, + "loss": 0.5277193188667297, + "step": 6128 + }, + { + "epoch": 1.4131888402121282, + "grad_norm": 1.6202759290555122, + "learning_rate": 4.3527070554960577e-07, + "loss": 0.4675426781177521, + "step": 6129 + }, + { + "epoch": 1.413419414341711, + "grad_norm": 1.668904355836008, + "learning_rate": 4.349561007667433e-07, + "loss": 0.3762160539627075, + "step": 6130 + }, + { + "epoch": 1.4136499884712936, + "grad_norm": 1.5686457690092273, + "learning_rate": 4.346415781187385e-07, + "loss": 0.4797256588935852, + "step": 6131 + }, + { + "epoch": 1.4138805626008761, + "grad_norm": 1.283129438483415, + "learning_rate": 4.3432713765130967e-07, + "loss": 0.4348931312561035, + "step": 6132 + }, + { + "epoch": 1.4141111367304589, + "grad_norm": 1.72495987311985, + "learning_rate": 4.3401277941016435e-07, + "loss": 0.5080585479736328, + "step": 6133 + }, + { + "epoch": 1.4143417108600416, + "grad_norm": 1.5083246190317607, + "learning_rate": 4.33698503440998e-07, + "loss": 0.40223604440689087, + "step": 6134 + }, + { + "epoch": 1.414572284989624, + "grad_norm": 1.5888336584861464, + "learning_rate": 4.3338430978949315e-07, + "loss": 0.4460202753543854, + "step": 6135 + }, + { + "epoch": 1.4148028591192068, + "grad_norm": 1.6992292342961226, + "learning_rate": 4.3307019850132167e-07, + "loss": 0.5814889669418335, + "step": 6136 + }, + { + "epoch": 1.4150334332487895, + "grad_norm": 1.366462724450419, + "learning_rate": 4.3275616962214214e-07, + "loss": 0.39237886667251587, + "step": 6137 + }, + { + "epoch": 1.4152640073783722, + "grad_norm": 1.8844588932900945, + "learning_rate": 4.324422231976025e-07, + "loss": 0.4621772766113281, + "step": 6138 + }, + { + "epoch": 1.415494581507955, + "grad_norm": 1.2090393738968102, + "learning_rate": 4.3212835927333745e-07, + "loss": 0.3722139596939087, + "step": 6139 + }, + { + "epoch": 1.4157251556375374, + "grad_norm": 1.4849768206374545, + "learning_rate": 4.3181457789497055e-07, + "loss": 0.5007534623146057, + "step": 6140 + }, + { + "epoch": 1.4159557297671201, + "grad_norm": 1.603501037396303, + "learning_rate": 4.315008791081135e-07, + "loss": 0.470672607421875, + "step": 6141 + }, + { + "epoch": 1.4161863038967029, + "grad_norm": 1.6882048347200689, + "learning_rate": 4.3118726295836495e-07, + "loss": 0.5196114778518677, + "step": 6142 + }, + { + "epoch": 1.4164168780262854, + "grad_norm": 1.686399785386393, + "learning_rate": 4.3087372949131275e-07, + "loss": 0.4606804847717285, + "step": 6143 + }, + { + "epoch": 1.416647452155868, + "grad_norm": 1.2427386262927842, + "learning_rate": 4.3056027875253156e-07, + "loss": 0.3926661014556885, + "step": 6144 + }, + { + "epoch": 1.4168780262854508, + "grad_norm": 1.5075319697699416, + "learning_rate": 4.3024691078758536e-07, + "loss": 0.4570828080177307, + "step": 6145 + }, + { + "epoch": 1.4171086004150335, + "grad_norm": 1.4876286685500335, + "learning_rate": 4.299336256420245e-07, + "loss": 0.398615300655365, + "step": 6146 + }, + { + "epoch": 1.4173391745446162, + "grad_norm": 1.5413174329970663, + "learning_rate": 4.2962042336138873e-07, + "loss": 0.47571802139282227, + "step": 6147 + }, + { + "epoch": 1.4175697486741987, + "grad_norm": 1.5960399575320494, + "learning_rate": 4.2930730399120487e-07, + "loss": 0.4266431927680969, + "step": 6148 + }, + { + "epoch": 1.4178003228037814, + "grad_norm": 1.5511638894349447, + "learning_rate": 4.289942675769886e-07, + "loss": 0.47870057821273804, + "step": 6149 + }, + { + "epoch": 1.4180308969333641, + "grad_norm": 1.3514029969532406, + "learning_rate": 4.2868131416424223e-07, + "loss": 0.3947669267654419, + "step": 6150 + }, + { + "epoch": 1.4182614710629466, + "grad_norm": 1.6045441623823578, + "learning_rate": 4.283684437984573e-07, + "loss": 0.49074164032936096, + "step": 6151 + }, + { + "epoch": 1.4184920451925294, + "grad_norm": 1.5267380397937564, + "learning_rate": 4.280556565251123e-07, + "loss": 0.5540445446968079, + "step": 6152 + }, + { + "epoch": 1.418722619322112, + "grad_norm": 1.4292058799019856, + "learning_rate": 4.2774295238967386e-07, + "loss": 0.4898286461830139, + "step": 6153 + }, + { + "epoch": 1.4189531934516948, + "grad_norm": 1.5872207462828773, + "learning_rate": 4.2743033143759733e-07, + "loss": 0.5432708859443665, + "step": 6154 + }, + { + "epoch": 1.4191837675812775, + "grad_norm": 1.811563729099354, + "learning_rate": 4.2711779371432445e-07, + "loss": 0.4438853859901428, + "step": 6155 + }, + { + "epoch": 1.41941434171086, + "grad_norm": 1.4197202159023756, + "learning_rate": 4.268053392652863e-07, + "loss": 0.4885905385017395, + "step": 6156 + }, + { + "epoch": 1.4196449158404427, + "grad_norm": 2.10234923243058, + "learning_rate": 4.264929681359013e-07, + "loss": 0.4465547204017639, + "step": 6157 + }, + { + "epoch": 1.4198754899700254, + "grad_norm": 1.5987256760741122, + "learning_rate": 4.2618068037157594e-07, + "loss": 0.4392780661582947, + "step": 6158 + }, + { + "epoch": 1.420106064099608, + "grad_norm": 1.7421664904589054, + "learning_rate": 4.258684760177039e-07, + "loss": 0.4501269459724426, + "step": 6159 + }, + { + "epoch": 1.4203366382291907, + "grad_norm": 1.399976858224263, + "learning_rate": 4.2555635511966783e-07, + "loss": 0.38439738750457764, + "step": 6160 + }, + { + "epoch": 1.4205672123587734, + "grad_norm": 1.4211214514262747, + "learning_rate": 4.2524431772283743e-07, + "loss": 0.4679202437400818, + "step": 6161 + }, + { + "epoch": 1.420797786488356, + "grad_norm": 1.3094843029172225, + "learning_rate": 4.2493236387257e-07, + "loss": 0.33505773544311523, + "step": 6162 + }, + { + "epoch": 1.4210283606179388, + "grad_norm": 1.7083049967506945, + "learning_rate": 4.246204936142116e-07, + "loss": 0.39141514897346497, + "step": 6163 + }, + { + "epoch": 1.4212589347475213, + "grad_norm": 1.5786326298364493, + "learning_rate": 4.243087069930958e-07, + "loss": 0.49278295040130615, + "step": 6164 + }, + { + "epoch": 1.421489508877104, + "grad_norm": 2.2314439595882214, + "learning_rate": 4.239970040545442e-07, + "loss": 0.44093143939971924, + "step": 6165 + }, + { + "epoch": 1.4217200830066867, + "grad_norm": 1.5138193694081605, + "learning_rate": 4.236853848438654e-07, + "loss": 0.3840683102607727, + "step": 6166 + }, + { + "epoch": 1.4219506571362692, + "grad_norm": 1.7654139979291832, + "learning_rate": 4.23373849406357e-07, + "loss": 0.49814748764038086, + "step": 6167 + }, + { + "epoch": 1.422181231265852, + "grad_norm": 1.672205831624779, + "learning_rate": 4.2306239778730314e-07, + "loss": 0.37481504678726196, + "step": 6168 + }, + { + "epoch": 1.4224118053954347, + "grad_norm": 1.6089555356775624, + "learning_rate": 4.227510300319772e-07, + "loss": 0.3936859965324402, + "step": 6169 + }, + { + "epoch": 1.4226423795250174, + "grad_norm": 1.6958111197730896, + "learning_rate": 4.224397461856389e-07, + "loss": 0.4448816478252411, + "step": 6170 + }, + { + "epoch": 1.4228729536546, + "grad_norm": 1.7506080980818486, + "learning_rate": 4.22128546293537e-07, + "loss": 0.5494886040687561, + "step": 6171 + }, + { + "epoch": 1.4231035277841826, + "grad_norm": 1.6093955633210433, + "learning_rate": 4.218174304009078e-07, + "loss": 0.4532161355018616, + "step": 6172 + }, + { + "epoch": 1.4233341019137653, + "grad_norm": 1.5423276922709723, + "learning_rate": 4.215063985529743e-07, + "loss": 0.4771450161933899, + "step": 6173 + }, + { + "epoch": 1.4235646760433478, + "grad_norm": 1.4359456178719159, + "learning_rate": 4.211954507949491e-07, + "loss": 0.40784329175949097, + "step": 6174 + }, + { + "epoch": 1.4237952501729305, + "grad_norm": 1.6548161498628766, + "learning_rate": 4.208845871720308e-07, + "loss": 0.5336268544197083, + "step": 6175 + }, + { + "epoch": 1.4240258243025132, + "grad_norm": 1.495644640745375, + "learning_rate": 4.205738077294072e-07, + "loss": 0.44641751050949097, + "step": 6176 + }, + { + "epoch": 1.424256398432096, + "grad_norm": 1.650188328042211, + "learning_rate": 4.2026311251225264e-07, + "loss": 0.4370793104171753, + "step": 6177 + }, + { + "epoch": 1.4244869725616787, + "grad_norm": 1.5423618719597711, + "learning_rate": 4.1995250156573046e-07, + "loss": 0.4290730953216553, + "step": 6178 + }, + { + "epoch": 1.4247175466912612, + "grad_norm": 1.8757556733756044, + "learning_rate": 4.196419749349904e-07, + "loss": 0.5021491646766663, + "step": 6179 + }, + { + "epoch": 1.4249481208208439, + "grad_norm": 1.4243786827618563, + "learning_rate": 4.193315326651711e-07, + "loss": 0.3880186080932617, + "step": 6180 + }, + { + "epoch": 1.4251786949504266, + "grad_norm": 1.6032235222838507, + "learning_rate": 4.1902117480139876e-07, + "loss": 0.46498721837997437, + "step": 6181 + }, + { + "epoch": 1.425409269080009, + "grad_norm": 1.6074916356613946, + "learning_rate": 4.187109013887863e-07, + "loss": 0.45799821615219116, + "step": 6182 + }, + { + "epoch": 1.4256398432095918, + "grad_norm": 1.7936327965955485, + "learning_rate": 4.1840071247243594e-07, + "loss": 0.47459733486175537, + "step": 6183 + }, + { + "epoch": 1.4258704173391745, + "grad_norm": 1.7628830057109544, + "learning_rate": 4.18090608097436e-07, + "loss": 0.47636276483535767, + "step": 6184 + }, + { + "epoch": 1.4261009914687572, + "grad_norm": 1.4575388433663756, + "learning_rate": 4.17780588308864e-07, + "loss": 0.4710165858268738, + "step": 6185 + }, + { + "epoch": 1.42633156559834, + "grad_norm": 1.6068491390352067, + "learning_rate": 4.174706531517836e-07, + "loss": 0.4222904443740845, + "step": 6186 + }, + { + "epoch": 1.4265621397279225, + "grad_norm": 1.6136307494472921, + "learning_rate": 4.171608026712476e-07, + "loss": 0.43496620655059814, + "step": 6187 + }, + { + "epoch": 1.4267927138575052, + "grad_norm": 1.6637888441260775, + "learning_rate": 4.1685103691229597e-07, + "loss": 0.5178344249725342, + "step": 6188 + }, + { + "epoch": 1.4270232879870879, + "grad_norm": 1.2438461713878222, + "learning_rate": 4.1654135591995644e-07, + "loss": 0.4033231735229492, + "step": 6189 + }, + { + "epoch": 1.4272538621166704, + "grad_norm": 1.6711330724791171, + "learning_rate": 4.162317597392436e-07, + "loss": 0.3368793725967407, + "step": 6190 + }, + { + "epoch": 1.427484436246253, + "grad_norm": 1.6185157962363963, + "learning_rate": 4.159222484151612e-07, + "loss": 0.44133609533309937, + "step": 6191 + }, + { + "epoch": 1.4277150103758358, + "grad_norm": 1.4778493402771002, + "learning_rate": 4.1561282199269944e-07, + "loss": 0.431888222694397, + "step": 6192 + }, + { + "epoch": 1.4279455845054185, + "grad_norm": 1.6042487363335018, + "learning_rate": 4.1530348051683615e-07, + "loss": 0.4319697618484497, + "step": 6193 + }, + { + "epoch": 1.4281761586350012, + "grad_norm": 2.1012743912812986, + "learning_rate": 4.1499422403253783e-07, + "loss": 0.5468018054962158, + "step": 6194 + }, + { + "epoch": 1.4284067327645837, + "grad_norm": 1.5851271799276925, + "learning_rate": 4.1468505258475784e-07, + "loss": 0.5083246231079102, + "step": 6195 + }, + { + "epoch": 1.4286373068941665, + "grad_norm": 1.5639019523203612, + "learning_rate": 4.1437596621843774e-07, + "loss": 0.3767821788787842, + "step": 6196 + }, + { + "epoch": 1.4288678810237492, + "grad_norm": 1.7459586887034657, + "learning_rate": 4.140669649785058e-07, + "loss": 0.5210238099098206, + "step": 6197 + }, + { + "epoch": 1.4290984551533317, + "grad_norm": 1.7429606479800976, + "learning_rate": 4.1375804890987907e-07, + "loss": 0.4498119354248047, + "step": 6198 + }, + { + "epoch": 1.4293290292829144, + "grad_norm": 1.8267093368864302, + "learning_rate": 4.134492180574609e-07, + "loss": 0.5093557238578796, + "step": 6199 + }, + { + "epoch": 1.429559603412497, + "grad_norm": 1.422406352052411, + "learning_rate": 4.131404724661438e-07, + "loss": 0.4745742082595825, + "step": 6200 + }, + { + "epoch": 1.4297901775420798, + "grad_norm": 1.506088588333767, + "learning_rate": 4.128318121808068e-07, + "loss": 0.45697301626205444, + "step": 6201 + }, + { + "epoch": 1.4300207516716625, + "grad_norm": 1.7309660786915744, + "learning_rate": 4.125232372463161e-07, + "loss": 0.4690994918346405, + "step": 6202 + }, + { + "epoch": 1.430251325801245, + "grad_norm": 1.6241026421208185, + "learning_rate": 4.1221474770752696e-07, + "loss": 0.49369046092033386, + "step": 6203 + }, + { + "epoch": 1.4304818999308277, + "grad_norm": 1.573925179309737, + "learning_rate": 4.1190634360928113e-07, + "loss": 0.5137126445770264, + "step": 6204 + }, + { + "epoch": 1.4307124740604105, + "grad_norm": 1.492371449937338, + "learning_rate": 4.1159802499640883e-07, + "loss": 0.43663549423217773, + "step": 6205 + }, + { + "epoch": 1.430943048189993, + "grad_norm": 1.373244593865611, + "learning_rate": 4.112897919137265e-07, + "loss": 0.40197718143463135, + "step": 6206 + }, + { + "epoch": 1.4311736223195757, + "grad_norm": 1.782636444844866, + "learning_rate": 4.1098164440603967e-07, + "loss": 0.5537480115890503, + "step": 6207 + }, + { + "epoch": 1.4314041964491584, + "grad_norm": 1.415124349915093, + "learning_rate": 4.1067358251814e-07, + "loss": 0.36077365279197693, + "step": 6208 + }, + { + "epoch": 1.4316347705787411, + "grad_norm": 1.8848844116732066, + "learning_rate": 4.103656062948081e-07, + "loss": 0.5421038866043091, + "step": 6209 + }, + { + "epoch": 1.4318653447083238, + "grad_norm": 1.5989095555214856, + "learning_rate": 4.100577157808107e-07, + "loss": 0.4330317974090576, + "step": 6210 + }, + { + "epoch": 1.4320959188379063, + "grad_norm": 1.5778977933757077, + "learning_rate": 4.0974991102090315e-07, + "loss": 0.4734618067741394, + "step": 6211 + }, + { + "epoch": 1.432326492967489, + "grad_norm": 1.7307541730622933, + "learning_rate": 4.0944219205982853e-07, + "loss": 0.4664125442504883, + "step": 6212 + }, + { + "epoch": 1.4325570670970718, + "grad_norm": 1.5163510968488794, + "learning_rate": 4.09134558942316e-07, + "loss": 0.5214053988456726, + "step": 6213 + }, + { + "epoch": 1.4327876412266543, + "grad_norm": 1.4446024999002893, + "learning_rate": 4.08827011713084e-07, + "loss": 0.4694370627403259, + "step": 6214 + }, + { + "epoch": 1.433018215356237, + "grad_norm": 1.4399092047479434, + "learning_rate": 4.0851955041683674e-07, + "loss": 0.46517378091812134, + "step": 6215 + }, + { + "epoch": 1.4332487894858197, + "grad_norm": 1.589744461016997, + "learning_rate": 4.0821217509826766e-07, + "loss": 0.49152523279190063, + "step": 6216 + }, + { + "epoch": 1.4334793636154024, + "grad_norm": 1.3335404796705832, + "learning_rate": 4.0790488580205616e-07, + "loss": 0.4272884726524353, + "step": 6217 + }, + { + "epoch": 1.4337099377449851, + "grad_norm": 1.7167989658225775, + "learning_rate": 4.075976825728703e-07, + "loss": 0.4585829973220825, + "step": 6218 + }, + { + "epoch": 1.4339405118745676, + "grad_norm": 1.4284884424474726, + "learning_rate": 4.07290565455365e-07, + "loss": 0.33463186025619507, + "step": 6219 + }, + { + "epoch": 1.4341710860041503, + "grad_norm": 1.618873724040505, + "learning_rate": 4.0698353449418344e-07, + "loss": 0.4228953719139099, + "step": 6220 + }, + { + "epoch": 1.434401660133733, + "grad_norm": 1.688194150248175, + "learning_rate": 4.066765897339547e-07, + "loss": 0.5336583256721497, + "step": 6221 + }, + { + "epoch": 1.4346322342633155, + "grad_norm": 1.590308662997971, + "learning_rate": 4.063697312192972e-07, + "loss": 0.4779771864414215, + "step": 6222 + }, + { + "epoch": 1.4348628083928983, + "grad_norm": 1.4786534556099964, + "learning_rate": 4.060629589948155e-07, + "loss": 0.35226666927337646, + "step": 6223 + }, + { + "epoch": 1.435093382522481, + "grad_norm": 1.7110004239307235, + "learning_rate": 4.0575627310510174e-07, + "loss": 0.5006309747695923, + "step": 6224 + }, + { + "epoch": 1.4353239566520637, + "grad_norm": 1.5102552970375984, + "learning_rate": 4.0544967359473645e-07, + "loss": 0.3925382196903229, + "step": 6225 + }, + { + "epoch": 1.4355545307816464, + "grad_norm": 1.4323897305301354, + "learning_rate": 4.0514316050828643e-07, + "loss": 0.3443659543991089, + "step": 6226 + }, + { + "epoch": 1.435785104911229, + "grad_norm": 1.3832333833383677, + "learning_rate": 4.048367338903067e-07, + "loss": 0.35585030913352966, + "step": 6227 + }, + { + "epoch": 1.4360156790408116, + "grad_norm": 1.551815991519559, + "learning_rate": 4.045303937853395e-07, + "loss": 0.4147206246852875, + "step": 6228 + }, + { + "epoch": 1.4362462531703943, + "grad_norm": 1.2817256800052734, + "learning_rate": 4.0422414023791486e-07, + "loss": 0.4475427567958832, + "step": 6229 + }, + { + "epoch": 1.4364768272999768, + "grad_norm": 1.3842198366935599, + "learning_rate": 4.0391797329254897e-07, + "loss": 0.5235386490821838, + "step": 6230 + }, + { + "epoch": 1.4367074014295595, + "grad_norm": 1.4929978689012695, + "learning_rate": 4.036118929937472e-07, + "loss": 0.3543087840080261, + "step": 6231 + }, + { + "epoch": 1.4369379755591423, + "grad_norm": 1.793735853632873, + "learning_rate": 4.03305899386001e-07, + "loss": 0.4718255400657654, + "step": 6232 + }, + { + "epoch": 1.437168549688725, + "grad_norm": 1.338180352532036, + "learning_rate": 4.0299999251378924e-07, + "loss": 0.41239792108535767, + "step": 6233 + }, + { + "epoch": 1.4373991238183077, + "grad_norm": 1.5900128771725797, + "learning_rate": 4.026941724215791e-07, + "loss": 0.4241238236427307, + "step": 6234 + }, + { + "epoch": 1.4376296979478902, + "grad_norm": 1.4625134538700348, + "learning_rate": 4.0238843915382435e-07, + "loss": 0.43678992986679077, + "step": 6235 + }, + { + "epoch": 1.437860272077473, + "grad_norm": 1.3845075397304552, + "learning_rate": 4.0208279275496706e-07, + "loss": 0.4304202198982239, + "step": 6236 + }, + { + "epoch": 1.4380908462070556, + "grad_norm": 1.4379971371115365, + "learning_rate": 4.0177723326943516e-07, + "loss": 0.4297143816947937, + "step": 6237 + }, + { + "epoch": 1.4383214203366381, + "grad_norm": 1.4713452003345164, + "learning_rate": 4.0147176074164557e-07, + "loss": 0.4823951721191406, + "step": 6238 + }, + { + "epoch": 1.4385519944662208, + "grad_norm": 1.4766475893290447, + "learning_rate": 4.0116637521600104e-07, + "loss": 0.41384291648864746, + "step": 6239 + }, + { + "epoch": 1.4387825685958036, + "grad_norm": 1.4772189735738515, + "learning_rate": 4.008610767368933e-07, + "loss": 0.5725995898246765, + "step": 6240 + }, + { + "epoch": 1.4390131427253863, + "grad_norm": 1.580155865045121, + "learning_rate": 4.0055586534869976e-07, + "loss": 0.5222553014755249, + "step": 6241 + }, + { + "epoch": 1.439243716854969, + "grad_norm": 1.3886146191032183, + "learning_rate": 4.002507410957864e-07, + "loss": 0.33871912956237793, + "step": 6242 + }, + { + "epoch": 1.4394742909845515, + "grad_norm": 1.6215524550661136, + "learning_rate": 3.9994570402250647e-07, + "loss": 0.423028826713562, + "step": 6243 + }, + { + "epoch": 1.4397048651141342, + "grad_norm": 1.5682836985778081, + "learning_rate": 3.996407541731994e-07, + "loss": 0.4235682785511017, + "step": 6244 + }, + { + "epoch": 1.439935439243717, + "grad_norm": 1.231022526448631, + "learning_rate": 3.993358915921936e-07, + "loss": 0.43758147954940796, + "step": 6245 + }, + { + "epoch": 1.4401660133732994, + "grad_norm": 1.4111669631590298, + "learning_rate": 3.9903111632380314e-07, + "loss": 0.4462485611438751, + "step": 6246 + }, + { + "epoch": 1.4403965875028821, + "grad_norm": 1.4290246546090093, + "learning_rate": 3.9872642841233086e-07, + "loss": 0.4650310277938843, + "step": 6247 + }, + { + "epoch": 1.4406271616324648, + "grad_norm": 1.4998946903017614, + "learning_rate": 3.984218279020656e-07, + "loss": 0.36653342843055725, + "step": 6248 + }, + { + "epoch": 1.4408577357620476, + "grad_norm": 1.4936296304301175, + "learning_rate": 3.9811731483728483e-07, + "loss": 0.4102433919906616, + "step": 6249 + }, + { + "epoch": 1.4410883098916303, + "grad_norm": 1.6065631349936378, + "learning_rate": 3.9781288926225187e-07, + "loss": 0.46611371636390686, + "step": 6250 + }, + { + "epoch": 1.4413188840212128, + "grad_norm": 1.4339333577964222, + "learning_rate": 3.9750855122121854e-07, + "loss": 0.39757978916168213, + "step": 6251 + }, + { + "epoch": 1.4415494581507955, + "grad_norm": 1.762654016187883, + "learning_rate": 3.972043007584236e-07, + "loss": 0.3736093044281006, + "step": 6252 + }, + { + "epoch": 1.4417800322803782, + "grad_norm": 1.463877920104907, + "learning_rate": 3.9690013791809243e-07, + "loss": 0.4907599091529846, + "step": 6253 + }, + { + "epoch": 1.4420106064099607, + "grad_norm": 1.8306810417206691, + "learning_rate": 3.965960627444387e-07, + "loss": 0.4852679967880249, + "step": 6254 + }, + { + "epoch": 1.4422411805395434, + "grad_norm": 1.379992571943406, + "learning_rate": 3.962920752816622e-07, + "loss": 0.3681846261024475, + "step": 6255 + }, + { + "epoch": 1.4424717546691261, + "grad_norm": 1.3930271555712797, + "learning_rate": 3.9598817557395136e-07, + "loss": 0.36029407382011414, + "step": 6256 + }, + { + "epoch": 1.4427023287987089, + "grad_norm": 1.5468752557100751, + "learning_rate": 3.9568436366548044e-07, + "loss": 0.4156547486782074, + "step": 6257 + }, + { + "epoch": 1.4429329029282916, + "grad_norm": 1.2893479866141693, + "learning_rate": 3.9538063960041155e-07, + "loss": 0.417999804019928, + "step": 6258 + }, + { + "epoch": 1.443163477057874, + "grad_norm": 1.5873772931626444, + "learning_rate": 3.9507700342289454e-07, + "loss": 0.34347790479660034, + "step": 6259 + }, + { + "epoch": 1.4433940511874568, + "grad_norm": 1.6747174695424258, + "learning_rate": 3.9477345517706606e-07, + "loss": 0.5093958973884583, + "step": 6260 + }, + { + "epoch": 1.4436246253170395, + "grad_norm": 1.3786087360846342, + "learning_rate": 3.9446999490704935e-07, + "loss": 0.45406264066696167, + "step": 6261 + }, + { + "epoch": 1.443855199446622, + "grad_norm": 1.4643807349818905, + "learning_rate": 3.941666226569561e-07, + "loss": 0.35074740648269653, + "step": 6262 + }, + { + "epoch": 1.4440857735762047, + "grad_norm": 1.9209061652207753, + "learning_rate": 3.9386333847088414e-07, + "loss": 0.4588093161582947, + "step": 6263 + }, + { + "epoch": 1.4443163477057874, + "grad_norm": 1.706957598822881, + "learning_rate": 3.935601423929187e-07, + "loss": 0.5431508421897888, + "step": 6264 + }, + { + "epoch": 1.4445469218353701, + "grad_norm": 2.1293944579193744, + "learning_rate": 3.9325703446713253e-07, + "loss": 0.5942284464836121, + "step": 6265 + }, + { + "epoch": 1.4447774959649529, + "grad_norm": 1.563688512589723, + "learning_rate": 3.929540147375856e-07, + "loss": 0.45533287525177, + "step": 6266 + }, + { + "epoch": 1.4450080700945354, + "grad_norm": 1.4069649860322977, + "learning_rate": 3.926510832483252e-07, + "loss": 0.41154634952545166, + "step": 6267 + }, + { + "epoch": 1.445238644224118, + "grad_norm": 1.7442081379649044, + "learning_rate": 3.923482400433847e-07, + "loss": 0.548882246017456, + "step": 6268 + }, + { + "epoch": 1.4454692183537008, + "grad_norm": 1.6064445647457797, + "learning_rate": 3.9204548516678635e-07, + "loss": 0.4062466621398926, + "step": 6269 + }, + { + "epoch": 1.4456997924832833, + "grad_norm": 1.4970160030578672, + "learning_rate": 3.917428186625378e-07, + "loss": 0.39035165309906006, + "step": 6270 + }, + { + "epoch": 1.445930366612866, + "grad_norm": 1.647666751716306, + "learning_rate": 3.9144024057463545e-07, + "loss": 0.44899889826774597, + "step": 6271 + }, + { + "epoch": 1.4461609407424487, + "grad_norm": 1.6865824844286113, + "learning_rate": 3.911377509470616e-07, + "loss": 0.5676968097686768, + "step": 6272 + }, + { + "epoch": 1.4463915148720314, + "grad_norm": 1.5001442753287921, + "learning_rate": 3.9083534982378596e-07, + "loss": 0.5157150626182556, + "step": 6273 + }, + { + "epoch": 1.4466220890016142, + "grad_norm": 1.3999116109701921, + "learning_rate": 3.9053303724876595e-07, + "loss": 0.4405839443206787, + "step": 6274 + }, + { + "epoch": 1.4468526631311966, + "grad_norm": 1.4027072316284976, + "learning_rate": 3.9023081326594564e-07, + "loss": 0.4184240400791168, + "step": 6275 + }, + { + "epoch": 1.4470832372607794, + "grad_norm": 1.4676581347164595, + "learning_rate": 3.8992867791925687e-07, + "loss": 0.46825113892555237, + "step": 6276 + }, + { + "epoch": 1.447313811390362, + "grad_norm": 1.5974669468558875, + "learning_rate": 3.896266312526174e-07, + "loss": 0.39870697259902954, + "step": 6277 + }, + { + "epoch": 1.4475443855199446, + "grad_norm": 1.5056097224989398, + "learning_rate": 3.893246733099332e-07, + "loss": 0.5021681785583496, + "step": 6278 + }, + { + "epoch": 1.4477749596495273, + "grad_norm": 1.6448123845050522, + "learning_rate": 3.890228041350966e-07, + "loss": 0.5453378558158875, + "step": 6279 + }, + { + "epoch": 1.44800553377911, + "grad_norm": 1.6411917622938994, + "learning_rate": 3.887210237719877e-07, + "loss": 0.4488704800605774, + "step": 6280 + }, + { + "epoch": 1.4482361079086927, + "grad_norm": 1.5018657352386517, + "learning_rate": 3.8841933226447274e-07, + "loss": 0.45669007301330566, + "step": 6281 + }, + { + "epoch": 1.4484666820382754, + "grad_norm": 1.704954137797073, + "learning_rate": 3.881177296564061e-07, + "loss": 0.43954944610595703, + "step": 6282 + }, + { + "epoch": 1.448697256167858, + "grad_norm": 1.3077525799414271, + "learning_rate": 3.8781621599162896e-07, + "loss": 0.39490729570388794, + "step": 6283 + }, + { + "epoch": 1.4489278302974407, + "grad_norm": 1.8875404119821422, + "learning_rate": 3.875147913139688e-07, + "loss": 0.44206392765045166, + "step": 6284 + }, + { + "epoch": 1.4491584044270232, + "grad_norm": 1.5003627073617865, + "learning_rate": 3.872134556672415e-07, + "loss": 0.3874932527542114, + "step": 6285 + }, + { + "epoch": 1.4493889785566059, + "grad_norm": 1.616983828039009, + "learning_rate": 3.8691220909524847e-07, + "loss": 0.4762042760848999, + "step": 6286 + }, + { + "epoch": 1.4496195526861886, + "grad_norm": 1.4983771405139852, + "learning_rate": 3.8661105164177955e-07, + "loss": 0.45220378041267395, + "step": 6287 + }, + { + "epoch": 1.4498501268157713, + "grad_norm": 1.5182044259213916, + "learning_rate": 3.863099833506105e-07, + "loss": 0.48711973428726196, + "step": 6288 + }, + { + "epoch": 1.450080700945354, + "grad_norm": 1.795485740865634, + "learning_rate": 3.8600900426550495e-07, + "loss": 0.3985457420349121, + "step": 6289 + }, + { + "epoch": 1.4503112750749365, + "grad_norm": 1.8111920220274738, + "learning_rate": 3.8570811443021324e-07, + "loss": 0.4626576006412506, + "step": 6290 + }, + { + "epoch": 1.4505418492045192, + "grad_norm": 1.3056530217454654, + "learning_rate": 3.8540731388847303e-07, + "loss": 0.49909156560897827, + "step": 6291 + }, + { + "epoch": 1.450772423334102, + "grad_norm": 1.6088418800938844, + "learning_rate": 3.8510660268400853e-07, + "loss": 0.47779160737991333, + "step": 6292 + }, + { + "epoch": 1.4510029974636844, + "grad_norm": 1.7546373602134575, + "learning_rate": 3.8480598086053073e-07, + "loss": 0.41273951530456543, + "step": 6293 + }, + { + "epoch": 1.4512335715932672, + "grad_norm": 1.372334717947673, + "learning_rate": 3.8450544846173873e-07, + "loss": 0.49659836292266846, + "step": 6294 + }, + { + "epoch": 1.4514641457228499, + "grad_norm": 1.5745738888755318, + "learning_rate": 3.842050055313174e-07, + "loss": 0.48864418268203735, + "step": 6295 + }, + { + "epoch": 1.4516947198524326, + "grad_norm": 1.5511685453466029, + "learning_rate": 3.8390465211293964e-07, + "loss": 0.4437263011932373, + "step": 6296 + }, + { + "epoch": 1.4519252939820153, + "grad_norm": 1.425822828962689, + "learning_rate": 3.83604388250264e-07, + "loss": 0.4785847067832947, + "step": 6297 + }, + { + "epoch": 1.4521558681115978, + "grad_norm": 1.4667204310824673, + "learning_rate": 3.8330421398693815e-07, + "loss": 0.4376726746559143, + "step": 6298 + }, + { + "epoch": 1.4523864422411805, + "grad_norm": 1.3570227959381094, + "learning_rate": 3.8300412936659456e-07, + "loss": 0.39121049642562866, + "step": 6299 + }, + { + "epoch": 1.4526170163707632, + "grad_norm": 1.3658035995507571, + "learning_rate": 3.827041344328541e-07, + "loss": 0.4635738730430603, + "step": 6300 + }, + { + "epoch": 1.4528475905003457, + "grad_norm": 2.0304852722065068, + "learning_rate": 3.8240422922932345e-07, + "loss": 0.502306342124939, + "step": 6301 + }, + { + "epoch": 1.4530781646299284, + "grad_norm": 1.4029845821737765, + "learning_rate": 3.8210441379959765e-07, + "loss": 0.4401247799396515, + "step": 6302 + }, + { + "epoch": 1.4533087387595112, + "grad_norm": 1.3861824238158087, + "learning_rate": 3.8180468818725744e-07, + "loss": 0.5291532874107361, + "step": 6303 + }, + { + "epoch": 1.4535393128890939, + "grad_norm": 1.6276608547131342, + "learning_rate": 3.8150505243587074e-07, + "loss": 0.44658181071281433, + "step": 6304 + }, + { + "epoch": 1.4537698870186766, + "grad_norm": 1.6458326531407963, + "learning_rate": 3.8120550658899284e-07, + "loss": 0.45127803087234497, + "step": 6305 + }, + { + "epoch": 1.454000461148259, + "grad_norm": 1.492007208083286, + "learning_rate": 3.809060506901659e-07, + "loss": 0.42187097668647766, + "step": 6306 + }, + { + "epoch": 1.4542310352778418, + "grad_norm": 1.5038936507089915, + "learning_rate": 3.806066847829191e-07, + "loss": 0.3573130667209625, + "step": 6307 + }, + { + "epoch": 1.4544616094074245, + "grad_norm": 1.9148379623538745, + "learning_rate": 3.8030740891076775e-07, + "loss": 0.4350733757019043, + "step": 6308 + }, + { + "epoch": 1.454692183537007, + "grad_norm": 1.541900067739278, + "learning_rate": 3.8000822311721526e-07, + "loss": 0.48514148592948914, + "step": 6309 + }, + { + "epoch": 1.4549227576665897, + "grad_norm": 1.4827947959124368, + "learning_rate": 3.797091274457507e-07, + "loss": 0.41036373376846313, + "step": 6310 + }, + { + "epoch": 1.4551533317961725, + "grad_norm": 1.494922453363639, + "learning_rate": 3.7941012193985113e-07, + "loss": 0.4141424298286438, + "step": 6311 + }, + { + "epoch": 1.4553839059257552, + "grad_norm": 1.273366480801725, + "learning_rate": 3.7911120664297947e-07, + "loss": 0.4465962052345276, + "step": 6312 + }, + { + "epoch": 1.455614480055338, + "grad_norm": 1.5781844793110138, + "learning_rate": 3.7881238159858653e-07, + "loss": 0.42370718717575073, + "step": 6313 + }, + { + "epoch": 1.4558450541849204, + "grad_norm": 1.5971127849956464, + "learning_rate": 3.785136468501098e-07, + "loss": 0.5199419260025024, + "step": 6314 + }, + { + "epoch": 1.456075628314503, + "grad_norm": 1.617344004292436, + "learning_rate": 3.782150024409727e-07, + "loss": 0.4802842140197754, + "step": 6315 + }, + { + "epoch": 1.4563062024440858, + "grad_norm": 1.24431475405318, + "learning_rate": 3.77916448414587e-07, + "loss": 0.4640405476093292, + "step": 6316 + }, + { + "epoch": 1.4565367765736683, + "grad_norm": 1.4636172678889559, + "learning_rate": 3.776179848143497e-07, + "loss": 0.4338728189468384, + "step": 6317 + }, + { + "epoch": 1.456767350703251, + "grad_norm": 2.139264242241595, + "learning_rate": 3.7731961168364644e-07, + "loss": 0.42709267139434814, + "step": 6318 + }, + { + "epoch": 1.4569979248328337, + "grad_norm": 1.6617712318798017, + "learning_rate": 3.7702132906584784e-07, + "loss": 0.4985729455947876, + "step": 6319 + }, + { + "epoch": 1.4572284989624165, + "grad_norm": 1.441274937368423, + "learning_rate": 3.7672313700431277e-07, + "loss": 0.46335911750793457, + "step": 6320 + }, + { + "epoch": 1.4574590730919992, + "grad_norm": 1.416712646344965, + "learning_rate": 3.7642503554238657e-07, + "loss": 0.39897364377975464, + "step": 6321 + }, + { + "epoch": 1.4576896472215817, + "grad_norm": 1.7524170106258121, + "learning_rate": 3.761270247234014e-07, + "loss": 0.4338347017765045, + "step": 6322 + }, + { + "epoch": 1.4579202213511644, + "grad_norm": 1.5421394568485456, + "learning_rate": 3.7582910459067607e-07, + "loss": 0.4619752764701843, + "step": 6323 + }, + { + "epoch": 1.458150795480747, + "grad_norm": 1.6592584693059589, + "learning_rate": 3.7553127518751583e-07, + "loss": 0.4676104784011841, + "step": 6324 + }, + { + "epoch": 1.4583813696103296, + "grad_norm": 1.495504668484879, + "learning_rate": 3.752335365572138e-07, + "loss": 0.37536361813545227, + "step": 6325 + }, + { + "epoch": 1.4586119437399123, + "grad_norm": 1.5747560176376743, + "learning_rate": 3.749358887430487e-07, + "loss": 0.4389209449291229, + "step": 6326 + }, + { + "epoch": 1.458842517869495, + "grad_norm": 1.561809426616513, + "learning_rate": 3.746383317882874e-07, + "loss": 0.44722115993499756, + "step": 6327 + }, + { + "epoch": 1.4590730919990778, + "grad_norm": 1.8177515516918266, + "learning_rate": 3.743408657361821e-07, + "loss": 0.39179277420043945, + "step": 6328 + }, + { + "epoch": 1.4593036661286605, + "grad_norm": 1.5511886302037754, + "learning_rate": 3.7404349062997275e-07, + "loss": 0.4704967737197876, + "step": 6329 + }, + { + "epoch": 1.459534240258243, + "grad_norm": 1.4679557991806869, + "learning_rate": 3.737462065128859e-07, + "loss": 0.4294360876083374, + "step": 6330 + }, + { + "epoch": 1.4597648143878257, + "grad_norm": 1.5082268745032619, + "learning_rate": 3.734490134281353e-07, + "loss": 0.5070170760154724, + "step": 6331 + }, + { + "epoch": 1.4599953885174084, + "grad_norm": 1.4285887900302483, + "learning_rate": 3.7315191141892013e-07, + "loss": 0.3670409023761749, + "step": 6332 + }, + { + "epoch": 1.460225962646991, + "grad_norm": 1.4866250279072872, + "learning_rate": 3.7285490052842785e-07, + "loss": 0.5043025016784668, + "step": 6333 + }, + { + "epoch": 1.4604565367765736, + "grad_norm": 1.5557807366245089, + "learning_rate": 3.725579807998316e-07, + "loss": 0.43942689895629883, + "step": 6334 + }, + { + "epoch": 1.4606871109061563, + "grad_norm": 1.61242194971354, + "learning_rate": 3.7226115227629164e-07, + "loss": 0.3444882035255432, + "step": 6335 + }, + { + "epoch": 1.460917685035739, + "grad_norm": 1.4093154726677697, + "learning_rate": 3.71964415000955e-07, + "loss": 0.3994483947753906, + "step": 6336 + }, + { + "epoch": 1.4611482591653218, + "grad_norm": 1.799524270186483, + "learning_rate": 3.7166776901695564e-07, + "loss": 0.3581928014755249, + "step": 6337 + }, + { + "epoch": 1.4613788332949043, + "grad_norm": 1.4094806965107296, + "learning_rate": 3.7137121436741423e-07, + "loss": 0.4068276286125183, + "step": 6338 + }, + { + "epoch": 1.461609407424487, + "grad_norm": 1.5430920931361498, + "learning_rate": 3.710747510954376e-07, + "loss": 0.4140080213546753, + "step": 6339 + }, + { + "epoch": 1.4618399815540697, + "grad_norm": 1.5667918006300834, + "learning_rate": 3.707783792441201e-07, + "loss": 0.4328460097312927, + "step": 6340 + }, + { + "epoch": 1.4620705556836522, + "grad_norm": 1.7344820768552758, + "learning_rate": 3.704820988565419e-07, + "loss": 0.49252209067344666, + "step": 6341 + }, + { + "epoch": 1.462301129813235, + "grad_norm": 1.4564646974830249, + "learning_rate": 3.7018590997577093e-07, + "loss": 0.43051671981811523, + "step": 6342 + }, + { + "epoch": 1.4625317039428176, + "grad_norm": 1.5901870751351228, + "learning_rate": 3.698898126448605e-07, + "loss": 0.5131059288978577, + "step": 6343 + }, + { + "epoch": 1.4627622780724003, + "grad_norm": 2.025312431684147, + "learning_rate": 3.6959380690685185e-07, + "loss": 0.4633597731590271, + "step": 6344 + }, + { + "epoch": 1.462992852201983, + "grad_norm": 1.5138095102076332, + "learning_rate": 3.6929789280477265e-07, + "loss": 0.3603428602218628, + "step": 6345 + }, + { + "epoch": 1.4632234263315655, + "grad_norm": 1.4981993836978438, + "learning_rate": 3.6900207038163633e-07, + "loss": 0.5337490439414978, + "step": 6346 + }, + { + "epoch": 1.4634540004611483, + "grad_norm": 1.8305905685338713, + "learning_rate": 3.687063396804444e-07, + "loss": 0.4940665066242218, + "step": 6347 + }, + { + "epoch": 1.463684574590731, + "grad_norm": 2.012256207996667, + "learning_rate": 3.6841070074418367e-07, + "loss": 0.45664387941360474, + "step": 6348 + }, + { + "epoch": 1.4639151487203135, + "grad_norm": 1.6965611532451377, + "learning_rate": 3.681151536158289e-07, + "loss": 0.4546254277229309, + "step": 6349 + }, + { + "epoch": 1.4641457228498962, + "grad_norm": 1.4760234786987596, + "learning_rate": 3.6781969833834015e-07, + "loss": 0.37474149465560913, + "step": 6350 + }, + { + "epoch": 1.464376296979479, + "grad_norm": 1.473821341410815, + "learning_rate": 3.675243349546655e-07, + "loss": 0.38016337156295776, + "step": 6351 + }, + { + "epoch": 1.4646068711090616, + "grad_norm": 1.3725937182091388, + "learning_rate": 3.672290635077384e-07, + "loss": 0.46079233288764954, + "step": 6352 + }, + { + "epoch": 1.4648374452386443, + "grad_norm": 1.754716547965532, + "learning_rate": 3.669338840404799e-07, + "loss": 0.39382117986679077, + "step": 6353 + }, + { + "epoch": 1.4650680193682268, + "grad_norm": 1.5018040161914972, + "learning_rate": 3.6663879659579766e-07, + "loss": 0.4502074718475342, + "step": 6354 + }, + { + "epoch": 1.4652985934978096, + "grad_norm": 1.4446726503170868, + "learning_rate": 3.663438012165848e-07, + "loss": 0.38199833035469055, + "step": 6355 + }, + { + "epoch": 1.4655291676273923, + "grad_norm": 1.4760781012903512, + "learning_rate": 3.660488979457228e-07, + "loss": 0.4340086579322815, + "step": 6356 + }, + { + "epoch": 1.4657597417569748, + "grad_norm": 1.7005769563076596, + "learning_rate": 3.65754086826078e-07, + "loss": 0.5425105094909668, + "step": 6357 + }, + { + "epoch": 1.4659903158865575, + "grad_norm": 1.4480393161895644, + "learning_rate": 3.654593679005048e-07, + "loss": 0.4671604633331299, + "step": 6358 + }, + { + "epoch": 1.4662208900161402, + "grad_norm": 1.6404775976624013, + "learning_rate": 3.6516474121184317e-07, + "loss": 0.4608290195465088, + "step": 6359 + }, + { + "epoch": 1.466451464145723, + "grad_norm": 1.9415349791307541, + "learning_rate": 3.6487020680292023e-07, + "loss": 0.5272650122642517, + "step": 6360 + }, + { + "epoch": 1.4666820382753056, + "grad_norm": 1.4115666654764834, + "learning_rate": 3.645757647165495e-07, + "loss": 0.40990152955055237, + "step": 6361 + }, + { + "epoch": 1.4669126124048881, + "grad_norm": 1.405277693008717, + "learning_rate": 3.6428141499553166e-07, + "loss": 0.4723639488220215, + "step": 6362 + }, + { + "epoch": 1.4671431865344708, + "grad_norm": 1.7789473556982454, + "learning_rate": 3.639871576826529e-07, + "loss": 0.5115963220596313, + "step": 6363 + }, + { + "epoch": 1.4673737606640536, + "grad_norm": 1.669989973617769, + "learning_rate": 3.636929928206862e-07, + "loss": 0.44548431038856506, + "step": 6364 + }, + { + "epoch": 1.467604334793636, + "grad_norm": 1.5904330694852653, + "learning_rate": 3.633989204523922e-07, + "loss": 0.48599356412887573, + "step": 6365 + }, + { + "epoch": 1.4678349089232188, + "grad_norm": 1.4664661517676485, + "learning_rate": 3.631049406205164e-07, + "loss": 0.463236004114151, + "step": 6366 + }, + { + "epoch": 1.4680654830528015, + "grad_norm": 1.7238002544119735, + "learning_rate": 3.6281105336779225e-07, + "loss": 0.4840255379676819, + "step": 6367 + }, + { + "epoch": 1.4682960571823842, + "grad_norm": 1.5727046676978498, + "learning_rate": 3.6251725873693926e-07, + "loss": 0.39191675186157227, + "step": 6368 + }, + { + "epoch": 1.468526631311967, + "grad_norm": 1.4333992251496341, + "learning_rate": 3.622235567706637e-07, + "loss": 0.5161769986152649, + "step": 6369 + }, + { + "epoch": 1.4687572054415494, + "grad_norm": 1.811820117175508, + "learning_rate": 3.6192994751165764e-07, + "loss": 0.4579160213470459, + "step": 6370 + }, + { + "epoch": 1.4689877795711321, + "grad_norm": 1.5348364339019953, + "learning_rate": 3.616364310026006e-07, + "loss": 0.4254727363586426, + "step": 6371 + }, + { + "epoch": 1.4692183537007149, + "grad_norm": 1.60846510703603, + "learning_rate": 3.613430072861575e-07, + "loss": 0.3614911139011383, + "step": 6372 + }, + { + "epoch": 1.4694489278302973, + "grad_norm": 1.332197813540827, + "learning_rate": 3.610496764049814e-07, + "loss": 0.4501386284828186, + "step": 6373 + }, + { + "epoch": 1.46967950195988, + "grad_norm": 1.4207205401720155, + "learning_rate": 3.607564384017102e-07, + "loss": 0.4988802671432495, + "step": 6374 + }, + { + "epoch": 1.4699100760894628, + "grad_norm": 1.5751788296655767, + "learning_rate": 3.6046329331896907e-07, + "loss": 0.4277713894844055, + "step": 6375 + }, + { + "epoch": 1.4701406502190455, + "grad_norm": 1.5414838298104503, + "learning_rate": 3.601702411993697e-07, + "loss": 0.5007919073104858, + "step": 6376 + }, + { + "epoch": 1.4703712243486282, + "grad_norm": 1.5705777345927519, + "learning_rate": 3.5987728208551015e-07, + "loss": 0.4857282042503357, + "step": 6377 + }, + { + "epoch": 1.4706017984782107, + "grad_norm": 1.3913774043642957, + "learning_rate": 3.595844160199756e-07, + "loss": 0.45752188563346863, + "step": 6378 + }, + { + "epoch": 1.4708323726077934, + "grad_norm": 1.3374827793978188, + "learning_rate": 3.592916430453361e-07, + "loss": 0.4364059269428253, + "step": 6379 + }, + { + "epoch": 1.4710629467373761, + "grad_norm": 1.4896729369612345, + "learning_rate": 3.589989632041501e-07, + "loss": 0.48765695095062256, + "step": 6380 + }, + { + "epoch": 1.4712935208669586, + "grad_norm": 1.8321401665511103, + "learning_rate": 3.5870637653896087e-07, + "loss": 0.5505347847938538, + "step": 6381 + }, + { + "epoch": 1.4715240949965414, + "grad_norm": 1.5940287914496154, + "learning_rate": 3.584138830922994e-07, + "loss": 0.4468069076538086, + "step": 6382 + }, + { + "epoch": 1.471754669126124, + "grad_norm": 1.2639532856264213, + "learning_rate": 3.5812148290668186e-07, + "loss": 0.4050968289375305, + "step": 6383 + }, + { + "epoch": 1.4719852432557068, + "grad_norm": 1.6709771008348266, + "learning_rate": 3.578291760246122e-07, + "loss": 0.47324883937835693, + "step": 6384 + }, + { + "epoch": 1.4722158173852895, + "grad_norm": 1.646291535207369, + "learning_rate": 3.5753696248858025e-07, + "loss": 0.4431450366973877, + "step": 6385 + }, + { + "epoch": 1.472446391514872, + "grad_norm": 1.3398593447687968, + "learning_rate": 3.5724484234106166e-07, + "loss": 0.4599822163581848, + "step": 6386 + }, + { + "epoch": 1.4726769656444547, + "grad_norm": 1.6764694987177748, + "learning_rate": 3.5695281562451964e-07, + "loss": 0.3655046224594116, + "step": 6387 + }, + { + "epoch": 1.4729075397740374, + "grad_norm": 1.925765064850511, + "learning_rate": 3.5666088238140267e-07, + "loss": 0.4543811082839966, + "step": 6388 + }, + { + "epoch": 1.47313811390362, + "grad_norm": 1.7682119668466059, + "learning_rate": 3.563690426541469e-07, + "loss": 0.45380568504333496, + "step": 6389 + }, + { + "epoch": 1.4733686880332026, + "grad_norm": 1.3928278789748259, + "learning_rate": 3.5607729648517336e-07, + "loss": 0.3640294373035431, + "step": 6390 + }, + { + "epoch": 1.4735992621627854, + "grad_norm": 1.4826659174775283, + "learning_rate": 3.557856439168907e-07, + "loss": 0.39890235662460327, + "step": 6391 + }, + { + "epoch": 1.473829836292368, + "grad_norm": 1.7657939773449876, + "learning_rate": 3.5549408499169374e-07, + "loss": 0.47551727294921875, + "step": 6392 + }, + { + "epoch": 1.4740604104219508, + "grad_norm": 1.5946717850777934, + "learning_rate": 3.5520261975196364e-07, + "loss": 0.43851834535598755, + "step": 6393 + }, + { + "epoch": 1.4742909845515333, + "grad_norm": 1.7160257871535318, + "learning_rate": 3.549112482400676e-07, + "loss": 0.45289307832717896, + "step": 6394 + }, + { + "epoch": 1.474521558681116, + "grad_norm": 1.660677297447299, + "learning_rate": 3.546199704983591e-07, + "loss": 0.5229180455207825, + "step": 6395 + }, + { + "epoch": 1.4747521328106985, + "grad_norm": 1.5089259577077747, + "learning_rate": 3.5432878656917884e-07, + "loss": 0.47332310676574707, + "step": 6396 + }, + { + "epoch": 1.4749827069402812, + "grad_norm": 1.402371205517633, + "learning_rate": 3.540376964948529e-07, + "loss": 0.4079092741012573, + "step": 6397 + }, + { + "epoch": 1.475213281069864, + "grad_norm": 1.607654850710184, + "learning_rate": 3.5374670031769484e-07, + "loss": 0.43366020917892456, + "step": 6398 + }, + { + "epoch": 1.4754438551994467, + "grad_norm": 1.6067458113996615, + "learning_rate": 3.5345579808000294e-07, + "loss": 0.45040106773376465, + "step": 6399 + }, + { + "epoch": 1.4756744293290294, + "grad_norm": 1.584960802510298, + "learning_rate": 3.531649898240634e-07, + "loss": 0.4409756064414978, + "step": 6400 + }, + { + "epoch": 1.4759050034586119, + "grad_norm": 1.5204759785794038, + "learning_rate": 3.528742755921481e-07, + "loss": 0.4141521751880646, + "step": 6401 + }, + { + "epoch": 1.4761355775881946, + "grad_norm": 1.6363482264143396, + "learning_rate": 3.525836554265156e-07, + "loss": 0.4697296619415283, + "step": 6402 + }, + { + "epoch": 1.4763661517177773, + "grad_norm": 1.3771953803345143, + "learning_rate": 3.5229312936941013e-07, + "loss": 0.4369434714317322, + "step": 6403 + }, + { + "epoch": 1.4765967258473598, + "grad_norm": 1.3415133870830294, + "learning_rate": 3.5200269746306224e-07, + "loss": 0.4197359085083008, + "step": 6404 + }, + { + "epoch": 1.4768272999769425, + "grad_norm": 1.8249279231813902, + "learning_rate": 3.5171235974968996e-07, + "loss": 0.495933473110199, + "step": 6405 + }, + { + "epoch": 1.4770578741065252, + "grad_norm": 1.3638396377453934, + "learning_rate": 3.51422116271496e-07, + "loss": 0.4177231192588806, + "step": 6406 + }, + { + "epoch": 1.477288448236108, + "grad_norm": 1.5336568107147823, + "learning_rate": 3.511319670706705e-07, + "loss": 0.5366500020027161, + "step": 6407 + }, + { + "epoch": 1.4775190223656907, + "grad_norm": 1.5479295323166011, + "learning_rate": 3.508419121893897e-07, + "loss": 0.3900446891784668, + "step": 6408 + }, + { + "epoch": 1.4777495964952732, + "grad_norm": 1.8223854522009124, + "learning_rate": 3.5055195166981646e-07, + "loss": 0.40877431631088257, + "step": 6409 + }, + { + "epoch": 1.4779801706248559, + "grad_norm": 1.3594177124317366, + "learning_rate": 3.502620855540985e-07, + "loss": 0.4381163716316223, + "step": 6410 + }, + { + "epoch": 1.4782107447544386, + "grad_norm": 1.2256800281998605, + "learning_rate": 3.4997231388437167e-07, + "loss": 0.3449817895889282, + "step": 6411 + }, + { + "epoch": 1.478441318884021, + "grad_norm": 1.4879818959728963, + "learning_rate": 3.4968263670275653e-07, + "loss": 0.4879523515701294, + "step": 6412 + }, + { + "epoch": 1.4786718930136038, + "grad_norm": 1.5651020351069762, + "learning_rate": 3.493930540513613e-07, + "loss": 0.3781365156173706, + "step": 6413 + }, + { + "epoch": 1.4789024671431865, + "grad_norm": 1.6645622352676888, + "learning_rate": 3.49103565972279e-07, + "loss": 0.4505656361579895, + "step": 6414 + }, + { + "epoch": 1.4791330412727692, + "grad_norm": 1.4565716791756764, + "learning_rate": 3.4881417250759006e-07, + "loss": 0.4285612106323242, + "step": 6415 + }, + { + "epoch": 1.479363615402352, + "grad_norm": 1.5357416036601346, + "learning_rate": 3.48524873699361e-07, + "loss": 0.5285177826881409, + "step": 6416 + }, + { + "epoch": 1.4795941895319344, + "grad_norm": 1.6484784065232339, + "learning_rate": 3.482356695896437e-07, + "loss": 0.4504782259464264, + "step": 6417 + }, + { + "epoch": 1.4798247636615172, + "grad_norm": 1.5658620514352724, + "learning_rate": 3.4794656022047765e-07, + "loss": 0.45295125246047974, + "step": 6418 + }, + { + "epoch": 1.4800553377910999, + "grad_norm": 1.3627022105594853, + "learning_rate": 3.47657545633887e-07, + "loss": 0.35889285802841187, + "step": 6419 + }, + { + "epoch": 1.4802859119206824, + "grad_norm": 1.5560865897069756, + "learning_rate": 3.4736862587188384e-07, + "loss": 0.49129703640937805, + "step": 6420 + }, + { + "epoch": 1.480516486050265, + "grad_norm": 1.6626930717329957, + "learning_rate": 3.4707980097646474e-07, + "loss": 0.5018036365509033, + "step": 6421 + }, + { + "epoch": 1.4807470601798478, + "grad_norm": 1.6557207215915222, + "learning_rate": 3.46791070989614e-07, + "loss": 0.48743095993995667, + "step": 6422 + }, + { + "epoch": 1.4809776343094305, + "grad_norm": 1.5043027194300391, + "learning_rate": 3.46502435953301e-07, + "loss": 0.4876127243041992, + "step": 6423 + }, + { + "epoch": 1.4812082084390132, + "grad_norm": 1.971149486413709, + "learning_rate": 3.462138959094818e-07, + "loss": 0.517420768737793, + "step": 6424 + }, + { + "epoch": 1.4814387825685957, + "grad_norm": 1.8274785313456325, + "learning_rate": 3.4592545090009907e-07, + "loss": 0.49587076902389526, + "step": 6425 + }, + { + "epoch": 1.4816693566981785, + "grad_norm": 1.5362037346917286, + "learning_rate": 3.4563710096708063e-07, + "loss": 0.43007123470306396, + "step": 6426 + }, + { + "epoch": 1.4818999308277612, + "grad_norm": 1.358212427456112, + "learning_rate": 3.4534884615234163e-07, + "loss": 0.41231095790863037, + "step": 6427 + }, + { + "epoch": 1.4821305049573437, + "grad_norm": 1.6451517308598724, + "learning_rate": 3.450606864977822e-07, + "loss": 0.4454977512359619, + "step": 6428 + }, + { + "epoch": 1.4823610790869264, + "grad_norm": 1.3739971676037328, + "learning_rate": 3.447726220452899e-07, + "loss": 0.4432292878627777, + "step": 6429 + }, + { + "epoch": 1.482591653216509, + "grad_norm": 1.6222705799101154, + "learning_rate": 3.444846528367372e-07, + "loss": 0.47547852993011475, + "step": 6430 + }, + { + "epoch": 1.4828222273460918, + "grad_norm": 1.522255385470065, + "learning_rate": 3.441967789139837e-07, + "loss": 0.45712774991989136, + "step": 6431 + }, + { + "epoch": 1.4830528014756745, + "grad_norm": 2.2700209255759107, + "learning_rate": 3.439090003188748e-07, + "loss": 0.4485551714897156, + "step": 6432 + }, + { + "epoch": 1.483283375605257, + "grad_norm": 1.4019614855782472, + "learning_rate": 3.4362131709324225e-07, + "loss": 0.5157139301300049, + "step": 6433 + }, + { + "epoch": 1.4835139497348397, + "grad_norm": 1.6970431173839349, + "learning_rate": 3.4333372927890346e-07, + "loss": 0.3786337375640869, + "step": 6434 + }, + { + "epoch": 1.4837445238644225, + "grad_norm": 1.430215191007922, + "learning_rate": 3.430462369176619e-07, + "loss": 0.444644033908844, + "step": 6435 + }, + { + "epoch": 1.483975097994005, + "grad_norm": 1.5213084700296855, + "learning_rate": 3.427588400513082e-07, + "loss": 0.450777530670166, + "step": 6436 + }, + { + "epoch": 1.4842056721235877, + "grad_norm": 1.6553650689166306, + "learning_rate": 3.424715387216176e-07, + "loss": 0.4547499418258667, + "step": 6437 + }, + { + "epoch": 1.4844362462531704, + "grad_norm": 1.3603667716838959, + "learning_rate": 3.4218433297035274e-07, + "loss": 0.41394394636154175, + "step": 6438 + }, + { + "epoch": 1.484666820382753, + "grad_norm": 1.3921623882761025, + "learning_rate": 3.4189722283926194e-07, + "loss": 0.46392822265625, + "step": 6439 + }, + { + "epoch": 1.4848973945123358, + "grad_norm": 1.3499969732544597, + "learning_rate": 3.416102083700797e-07, + "loss": 0.443311870098114, + "step": 6440 + }, + { + "epoch": 1.4851279686419183, + "grad_norm": 1.3830140570978715, + "learning_rate": 3.4132328960452594e-07, + "loss": 0.49744826555252075, + "step": 6441 + }, + { + "epoch": 1.485358542771501, + "grad_norm": 1.5191431970911358, + "learning_rate": 3.4103646658430787e-07, + "loss": 0.3906005620956421, + "step": 6442 + }, + { + "epoch": 1.4855891169010838, + "grad_norm": 1.3526583076340324, + "learning_rate": 3.407497393511175e-07, + "loss": 0.4236280918121338, + "step": 6443 + }, + { + "epoch": 1.4858196910306662, + "grad_norm": 1.6787824686307624, + "learning_rate": 3.4046310794663403e-07, + "loss": 0.5457645654678345, + "step": 6444 + }, + { + "epoch": 1.486050265160249, + "grad_norm": 1.7325001007084588, + "learning_rate": 3.4017657241252217e-07, + "loss": 0.541573703289032, + "step": 6445 + }, + { + "epoch": 1.4862808392898317, + "grad_norm": 1.9081537369674455, + "learning_rate": 3.398901327904322e-07, + "loss": 0.496945858001709, + "step": 6446 + }, + { + "epoch": 1.4865114134194144, + "grad_norm": 1.5413856714091914, + "learning_rate": 3.3960378912200136e-07, + "loss": 0.46119701862335205, + "step": 6447 + }, + { + "epoch": 1.4867419875489971, + "grad_norm": 1.8976464043536114, + "learning_rate": 3.3931754144885284e-07, + "loss": 0.5169441103935242, + "step": 6448 + }, + { + "epoch": 1.4869725616785796, + "grad_norm": 1.7130869588848308, + "learning_rate": 3.390313898125957e-07, + "loss": 0.525173544883728, + "step": 6449 + }, + { + "epoch": 1.4872031358081623, + "grad_norm": 1.6684348208587065, + "learning_rate": 3.3874533425482457e-07, + "loss": 0.46877139806747437, + "step": 6450 + }, + { + "epoch": 1.487433709937745, + "grad_norm": 1.6810644095850389, + "learning_rate": 3.3845937481712096e-07, + "loss": 0.49436479806900024, + "step": 6451 + }, + { + "epoch": 1.4876642840673275, + "grad_norm": 1.2950679928032611, + "learning_rate": 3.3817351154105145e-07, + "loss": 0.40879231691360474, + "step": 6452 + }, + { + "epoch": 1.4878948581969103, + "grad_norm": 1.5253823933458253, + "learning_rate": 3.378877444681697e-07, + "loss": 0.5060825347900391, + "step": 6453 + }, + { + "epoch": 1.488125432326493, + "grad_norm": 1.4561081118713566, + "learning_rate": 3.3760207364001434e-07, + "loss": 0.4875546097755432, + "step": 6454 + }, + { + "epoch": 1.4883560064560757, + "grad_norm": 1.5036556031092911, + "learning_rate": 3.373164990981108e-07, + "loss": 0.3791916072368622, + "step": 6455 + }, + { + "epoch": 1.4885865805856584, + "grad_norm": 1.4585716739422292, + "learning_rate": 3.370310208839704e-07, + "loss": 0.46757322549819946, + "step": 6456 + }, + { + "epoch": 1.488817154715241, + "grad_norm": 1.4061567541704671, + "learning_rate": 3.3674563903908994e-07, + "loss": 0.4334050416946411, + "step": 6457 + }, + { + "epoch": 1.4890477288448236, + "grad_norm": 1.4217577265821555, + "learning_rate": 3.3646035360495294e-07, + "loss": 0.4408720135688782, + "step": 6458 + }, + { + "epoch": 1.4892783029744063, + "grad_norm": 1.637938092148249, + "learning_rate": 3.3617516462302795e-07, + "loss": 0.46556228399276733, + "step": 6459 + }, + { + "epoch": 1.4895088771039888, + "grad_norm": 1.3694379850190115, + "learning_rate": 3.3589007213477096e-07, + "loss": 0.5212184190750122, + "step": 6460 + }, + { + "epoch": 1.4897394512335715, + "grad_norm": 1.6425370019041445, + "learning_rate": 3.35605076181622e-07, + "loss": 0.5340084433555603, + "step": 6461 + }, + { + "epoch": 1.4899700253631543, + "grad_norm": 1.4674031830711234, + "learning_rate": 3.353201768050088e-07, + "loss": 0.38049495220184326, + "step": 6462 + }, + { + "epoch": 1.490200599492737, + "grad_norm": 1.5849611777401629, + "learning_rate": 3.350353740463442e-07, + "loss": 0.5480734705924988, + "step": 6463 + }, + { + "epoch": 1.4904311736223197, + "grad_norm": 1.4050939080217109, + "learning_rate": 3.3475066794702756e-07, + "loss": 0.4179231524467468, + "step": 6464 + }, + { + "epoch": 1.4906617477519022, + "grad_norm": 1.8331951463468434, + "learning_rate": 3.3446605854844335e-07, + "loss": 0.5380987524986267, + "step": 6465 + }, + { + "epoch": 1.490892321881485, + "grad_norm": 1.4221970681414315, + "learning_rate": 3.3418154589196226e-07, + "loss": 0.41146454215049744, + "step": 6466 + }, + { + "epoch": 1.4911228960110676, + "grad_norm": 1.5814296524447065, + "learning_rate": 3.3389713001894157e-07, + "loss": 0.4586387276649475, + "step": 6467 + }, + { + "epoch": 1.4913534701406501, + "grad_norm": 1.1757977126470995, + "learning_rate": 3.336128109707236e-07, + "loss": 0.4023931920528412, + "step": 6468 + }, + { + "epoch": 1.4915840442702328, + "grad_norm": 1.6673237012516164, + "learning_rate": 3.333285887886373e-07, + "loss": 0.5373448133468628, + "step": 6469 + }, + { + "epoch": 1.4918146183998156, + "grad_norm": 1.4523946751037105, + "learning_rate": 3.330444635139971e-07, + "loss": 0.4413643479347229, + "step": 6470 + }, + { + "epoch": 1.4920451925293983, + "grad_norm": 1.3734904271626787, + "learning_rate": 3.3276043518810327e-07, + "loss": 0.399494469165802, + "step": 6471 + }, + { + "epoch": 1.492275766658981, + "grad_norm": 1.4170973987364872, + "learning_rate": 3.3247650385224256e-07, + "loss": 0.4353644847869873, + "step": 6472 + }, + { + "epoch": 1.4925063407885635, + "grad_norm": 1.7462483377307876, + "learning_rate": 3.3219266954768743e-07, + "loss": 0.5231607556343079, + "step": 6473 + }, + { + "epoch": 1.4927369149181462, + "grad_norm": 1.55800999194994, + "learning_rate": 3.3190893231569596e-07, + "loss": 0.414408802986145, + "step": 6474 + }, + { + "epoch": 1.492967489047729, + "grad_norm": 1.6408204727748315, + "learning_rate": 3.3162529219751155e-07, + "loss": 0.3921009302139282, + "step": 6475 + }, + { + "epoch": 1.4931980631773114, + "grad_norm": 1.6197044883986413, + "learning_rate": 3.3134174923436506e-07, + "loss": 0.4317164421081543, + "step": 6476 + }, + { + "epoch": 1.4934286373068941, + "grad_norm": 1.5697343564549593, + "learning_rate": 3.3105830346747175e-07, + "loss": 0.46302181482315063, + "step": 6477 + }, + { + "epoch": 1.4936592114364768, + "grad_norm": 1.464087037907405, + "learning_rate": 3.307749549380335e-07, + "loss": 0.45704615116119385, + "step": 6478 + }, + { + "epoch": 1.4938897855660596, + "grad_norm": 1.5032451370482525, + "learning_rate": 3.304917036872379e-07, + "loss": 0.45455485582351685, + "step": 6479 + }, + { + "epoch": 1.4941203596956423, + "grad_norm": 1.5465084069557762, + "learning_rate": 3.302085497562588e-07, + "loss": 0.41939157247543335, + "step": 6480 + }, + { + "epoch": 1.4943509338252248, + "grad_norm": 1.3682263746176198, + "learning_rate": 3.2992549318625487e-07, + "loss": 0.4109286367893219, + "step": 6481 + }, + { + "epoch": 1.4945815079548075, + "grad_norm": 2.0164734849697, + "learning_rate": 3.2964253401837173e-07, + "loss": 0.44710463285446167, + "step": 6482 + }, + { + "epoch": 1.4948120820843902, + "grad_norm": 1.6884711291100036, + "learning_rate": 3.2935967229373986e-07, + "loss": 0.4330691695213318, + "step": 6483 + }, + { + "epoch": 1.4950426562139727, + "grad_norm": 1.4066891595951536, + "learning_rate": 3.2907690805347667e-07, + "loss": 0.41174834966659546, + "step": 6484 + }, + { + "epoch": 1.4952732303435554, + "grad_norm": 1.5235589172624593, + "learning_rate": 3.2879424133868406e-07, + "loss": 0.4368870258331299, + "step": 6485 + }, + { + "epoch": 1.4955038044731381, + "grad_norm": 1.581699276196859, + "learning_rate": 3.2851167219045107e-07, + "loss": 0.5155518651008606, + "step": 6486 + }, + { + "epoch": 1.4957343786027208, + "grad_norm": 1.4965040692694338, + "learning_rate": 3.282292006498522e-07, + "loss": 0.47015419602394104, + "step": 6487 + }, + { + "epoch": 1.4959649527323036, + "grad_norm": 1.4271101962383341, + "learning_rate": 3.2794682675794684e-07, + "loss": 0.41059884428977966, + "step": 6488 + }, + { + "epoch": 1.496195526861886, + "grad_norm": 1.7728377181019612, + "learning_rate": 3.2766455055578157e-07, + "loss": 0.4864136278629303, + "step": 6489 + }, + { + "epoch": 1.4964261009914688, + "grad_norm": 1.1780419841322618, + "learning_rate": 3.2738237208438744e-07, + "loss": 0.3599165976047516, + "step": 6490 + }, + { + "epoch": 1.4966566751210515, + "grad_norm": 1.4373611771192503, + "learning_rate": 3.2710029138478267e-07, + "loss": 0.4734029769897461, + "step": 6491 + }, + { + "epoch": 1.496887249250634, + "grad_norm": 1.5053587105753783, + "learning_rate": 3.268183084979699e-07, + "loss": 0.46739861369132996, + "step": 6492 + }, + { + "epoch": 1.4971178233802167, + "grad_norm": 1.745789102022849, + "learning_rate": 3.265364234649387e-07, + "loss": 0.46794670820236206, + "step": 6493 + }, + { + "epoch": 1.4973483975097994, + "grad_norm": 1.6683012395243093, + "learning_rate": 3.262546363266635e-07, + "loss": 0.463203489780426, + "step": 6494 + }, + { + "epoch": 1.4975789716393821, + "grad_norm": 1.4489172807794646, + "learning_rate": 3.2597294712410504e-07, + "loss": 0.4495059847831726, + "step": 6495 + }, + { + "epoch": 1.4978095457689649, + "grad_norm": 1.464704014292867, + "learning_rate": 3.256913558982101e-07, + "loss": 0.43549245595932007, + "step": 6496 + }, + { + "epoch": 1.4980401198985474, + "grad_norm": 1.552183908593376, + "learning_rate": 3.254098626899102e-07, + "loss": 0.40582704544067383, + "step": 6497 + }, + { + "epoch": 1.49827069402813, + "grad_norm": 1.527774566610999, + "learning_rate": 3.251284675401238e-07, + "loss": 0.3720378279685974, + "step": 6498 + }, + { + "epoch": 1.4985012681577128, + "grad_norm": 1.4814613073983138, + "learning_rate": 3.24847170489754e-07, + "loss": 0.42694520950317383, + "step": 6499 + }, + { + "epoch": 1.4987318422872953, + "grad_norm": 1.4768231117771715, + "learning_rate": 3.2456597157969066e-07, + "loss": 0.442158043384552, + "step": 6500 + }, + { + "epoch": 1.498962416416878, + "grad_norm": 1.4765054194953837, + "learning_rate": 3.2428487085080846e-07, + "loss": 0.44245558977127075, + "step": 6501 + }, + { + "epoch": 1.4991929905464607, + "grad_norm": 1.3559485373971267, + "learning_rate": 3.240038683439684e-07, + "loss": 0.4127236008644104, + "step": 6502 + }, + { + "epoch": 1.4994235646760434, + "grad_norm": 1.4985576311709152, + "learning_rate": 3.237229641000171e-07, + "loss": 0.4262787103652954, + "step": 6503 + }, + { + "epoch": 1.4996541388056261, + "grad_norm": 1.6706445028718073, + "learning_rate": 3.2344215815978714e-07, + "loss": 0.4181264042854309, + "step": 6504 + }, + { + "epoch": 1.4998847129352086, + "grad_norm": 1.6044294628436637, + "learning_rate": 3.2316145056409616e-07, + "loss": 0.4416937530040741, + "step": 6505 + }, + { + "epoch": 1.5001152870647914, + "grad_norm": 1.8850023720212492, + "learning_rate": 3.228808413537476e-07, + "loss": 0.4901489019393921, + "step": 6506 + }, + { + "epoch": 1.5003458611943739, + "grad_norm": 1.3996173090866784, + "learning_rate": 3.2260033056953153e-07, + "loss": 0.37932026386260986, + "step": 6507 + }, + { + "epoch": 1.5005764353239566, + "grad_norm": 1.649923361135509, + "learning_rate": 3.223199182522223e-07, + "loss": 0.4680899381637573, + "step": 6508 + }, + { + "epoch": 1.5008070094535393, + "grad_norm": 1.6955418693371036, + "learning_rate": 3.2203960444258105e-07, + "loss": 0.508334219455719, + "step": 6509 + }, + { + "epoch": 1.501037583583122, + "grad_norm": 2.0480591557575685, + "learning_rate": 3.2175938918135415e-07, + "loss": 0.3386784791946411, + "step": 6510 + }, + { + "epoch": 1.5012681577127047, + "grad_norm": 1.860117074212897, + "learning_rate": 3.214792725092741e-07, + "loss": 0.4315892457962036, + "step": 6511 + }, + { + "epoch": 1.5014987318422874, + "grad_norm": 1.4533616152071933, + "learning_rate": 3.211992544670582e-07, + "loss": 0.3709627389907837, + "step": 6512 + }, + { + "epoch": 1.50172930597187, + "grad_norm": 1.6433224440752017, + "learning_rate": 3.2091933509541023e-07, + "loss": 0.5260987877845764, + "step": 6513 + }, + { + "epoch": 1.5019598801014526, + "grad_norm": 1.5201640514539732, + "learning_rate": 3.20639514435019e-07, + "loss": 0.5379073619842529, + "step": 6514 + }, + { + "epoch": 1.5021904542310351, + "grad_norm": 1.2867052063244526, + "learning_rate": 3.2035979252655976e-07, + "loss": 0.47530391812324524, + "step": 6515 + }, + { + "epoch": 1.5024210283606179, + "grad_norm": 1.5201328820105404, + "learning_rate": 3.200801694106926e-07, + "loss": 0.459227979183197, + "step": 6516 + }, + { + "epoch": 1.5026516024902006, + "grad_norm": 1.5330729417783509, + "learning_rate": 3.19800645128063e-07, + "loss": 0.4867238998413086, + "step": 6517 + }, + { + "epoch": 1.5028821766197833, + "grad_norm": 1.4246709864782185, + "learning_rate": 3.195212197193039e-07, + "loss": 0.38478928804397583, + "step": 6518 + }, + { + "epoch": 1.503112750749366, + "grad_norm": 1.625989812299007, + "learning_rate": 3.192418932250316e-07, + "loss": 0.3938423991203308, + "step": 6519 + }, + { + "epoch": 1.5033433248789487, + "grad_norm": 1.8227844221564524, + "learning_rate": 3.1896266568584975e-07, + "loss": 0.457303911447525, + "step": 6520 + }, + { + "epoch": 1.5035738990085312, + "grad_norm": 1.5422494994233005, + "learning_rate": 3.1868353714234607e-07, + "loss": 0.5007269382476807, + "step": 6521 + }, + { + "epoch": 1.503804473138114, + "grad_norm": 1.4891205198132078, + "learning_rate": 3.1840450763509576e-07, + "loss": 0.3878381848335266, + "step": 6522 + }, + { + "epoch": 1.5040350472676964, + "grad_norm": 1.798955261342233, + "learning_rate": 3.181255772046575e-07, + "loss": 0.488269567489624, + "step": 6523 + }, + { + "epoch": 1.5042656213972792, + "grad_norm": 1.4981578078592954, + "learning_rate": 3.1784674589157767e-07, + "loss": 0.41664889454841614, + "step": 6524 + }, + { + "epoch": 1.5044961955268619, + "grad_norm": 1.6014375227212925, + "learning_rate": 3.175680137363863e-07, + "loss": 0.4862533509731293, + "step": 6525 + }, + { + "epoch": 1.5047267696564446, + "grad_norm": 1.599713126186934, + "learning_rate": 3.172893807796004e-07, + "loss": 0.4629037380218506, + "step": 6526 + }, + { + "epoch": 1.5049573437860273, + "grad_norm": 1.6094632634811818, + "learning_rate": 3.1701084706172245e-07, + "loss": 0.46300196647644043, + "step": 6527 + }, + { + "epoch": 1.50518791791561, + "grad_norm": 1.4186362500626026, + "learning_rate": 3.1673241262323934e-07, + "loss": 0.40698888897895813, + "step": 6528 + }, + { + "epoch": 1.5054184920451925, + "grad_norm": 1.484473947418196, + "learning_rate": 3.1645407750462514e-07, + "loss": 0.4344380497932434, + "step": 6529 + }, + { + "epoch": 1.5056490661747752, + "grad_norm": 1.6200348544461498, + "learning_rate": 3.1617584174633806e-07, + "loss": 0.49757128953933716, + "step": 6530 + }, + { + "epoch": 1.5058796403043577, + "grad_norm": 1.6256839483530447, + "learning_rate": 3.15897705388823e-07, + "loss": 0.4506916105747223, + "step": 6531 + }, + { + "epoch": 1.5061102144339404, + "grad_norm": 1.5009759227514647, + "learning_rate": 3.156196684725093e-07, + "loss": 0.3941146731376648, + "step": 6532 + }, + { + "epoch": 1.5063407885635232, + "grad_norm": 1.9065405733956409, + "learning_rate": 3.153417310378127e-07, + "loss": 0.5400820374488831, + "step": 6533 + }, + { + "epoch": 1.5065713626931059, + "grad_norm": 1.774411964329925, + "learning_rate": 3.1506389312513435e-07, + "loss": 0.4418470859527588, + "step": 6534 + }, + { + "epoch": 1.5068019368226886, + "grad_norm": 1.3196915654196755, + "learning_rate": 3.1478615477486113e-07, + "loss": 0.3897334933280945, + "step": 6535 + }, + { + "epoch": 1.5070325109522713, + "grad_norm": 1.5772083777596413, + "learning_rate": 3.145085160273647e-07, + "loss": 0.4923437833786011, + "step": 6536 + }, + { + "epoch": 1.5072630850818538, + "grad_norm": 1.575539005736493, + "learning_rate": 3.142309769230025e-07, + "loss": 0.41996920108795166, + "step": 6537 + }, + { + "epoch": 1.5074936592114365, + "grad_norm": 1.5634954618427415, + "learning_rate": 3.1395353750211806e-07, + "loss": 0.38584667444229126, + "step": 6538 + }, + { + "epoch": 1.507724233341019, + "grad_norm": 1.5469052539454182, + "learning_rate": 3.136761978050395e-07, + "loss": 0.5093455910682678, + "step": 6539 + }, + { + "epoch": 1.5079548074706017, + "grad_norm": 1.8844111555093896, + "learning_rate": 3.1339895787208126e-07, + "loss": 0.5592935681343079, + "step": 6540 + }, + { + "epoch": 1.5081853816001844, + "grad_norm": 1.7670191671756568, + "learning_rate": 3.1312181774354306e-07, + "loss": 0.38311779499053955, + "step": 6541 + }, + { + "epoch": 1.5084159557297672, + "grad_norm": 1.6894588927823573, + "learning_rate": 3.1284477745971025e-07, + "loss": 0.4422299265861511, + "step": 6542 + }, + { + "epoch": 1.5086465298593499, + "grad_norm": 1.5653024747826005, + "learning_rate": 3.125678370608528e-07, + "loss": 0.5097527503967285, + "step": 6543 + }, + { + "epoch": 1.5088771039889326, + "grad_norm": 1.4635088499535702, + "learning_rate": 3.1229099658722747e-07, + "loss": 0.42586642503738403, + "step": 6544 + }, + { + "epoch": 1.509107678118515, + "grad_norm": 1.7853929312810684, + "learning_rate": 3.120142560790755e-07, + "loss": 0.5006861686706543, + "step": 6545 + }, + { + "epoch": 1.5093382522480978, + "grad_norm": 1.292111562170076, + "learning_rate": 3.117376155766237e-07, + "loss": 0.4361686706542969, + "step": 6546 + }, + { + "epoch": 1.5095688263776803, + "grad_norm": 1.4890005224956508, + "learning_rate": 3.11461075120085e-07, + "loss": 0.45466339588165283, + "step": 6547 + }, + { + "epoch": 1.509799400507263, + "grad_norm": 1.4657261766322067, + "learning_rate": 3.1118463474965697e-07, + "loss": 0.39591068029403687, + "step": 6548 + }, + { + "epoch": 1.5100299746368457, + "grad_norm": 1.669083463008409, + "learning_rate": 3.1090829450552316e-07, + "loss": 0.4672427475452423, + "step": 6549 + }, + { + "epoch": 1.5102605487664285, + "grad_norm": 1.6273442700037082, + "learning_rate": 3.1063205442785234e-07, + "loss": 0.4785880148410797, + "step": 6550 + }, + { + "epoch": 1.5104911228960112, + "grad_norm": 1.3915985235576667, + "learning_rate": 3.103559145567994e-07, + "loss": 0.441936731338501, + "step": 6551 + }, + { + "epoch": 1.510721697025594, + "grad_norm": 1.5501390159164539, + "learning_rate": 3.1007987493250334e-07, + "loss": 0.49719512462615967, + "step": 6552 + }, + { + "epoch": 1.5109522711551764, + "grad_norm": 1.7806538694012621, + "learning_rate": 3.098039355950899e-07, + "loss": 0.40702491998672485, + "step": 6553 + }, + { + "epoch": 1.511182845284759, + "grad_norm": 1.4605232780084745, + "learning_rate": 3.0952809658466896e-07, + "loss": 0.44754648208618164, + "step": 6554 + }, + { + "epoch": 1.5114134194143416, + "grad_norm": 1.7119927234849008, + "learning_rate": 3.0925235794133717e-07, + "loss": 0.5370102524757385, + "step": 6555 + }, + { + "epoch": 1.5116439935439243, + "grad_norm": 1.4781444883115034, + "learning_rate": 3.089767197051755e-07, + "loss": 0.46693646907806396, + "step": 6556 + }, + { + "epoch": 1.511874567673507, + "grad_norm": 1.3940905139236526, + "learning_rate": 3.0870118191625084e-07, + "loss": 0.3887597322463989, + "step": 6557 + }, + { + "epoch": 1.5121051418030897, + "grad_norm": 1.509297997221229, + "learning_rate": 3.0842574461461577e-07, + "loss": 0.4783397912979126, + "step": 6558 + }, + { + "epoch": 1.5123357159326725, + "grad_norm": 2.254982960205746, + "learning_rate": 3.081504078403073e-07, + "loss": 0.5305588245391846, + "step": 6559 + }, + { + "epoch": 1.5125662900622552, + "grad_norm": 1.867807225680096, + "learning_rate": 3.078751716333492e-07, + "loss": 0.45315784215927124, + "step": 6560 + }, + { + "epoch": 1.5127968641918377, + "grad_norm": 1.6356411182801975, + "learning_rate": 3.0760003603374897e-07, + "loss": 0.4805132746696472, + "step": 6561 + }, + { + "epoch": 1.5130274383214202, + "grad_norm": 1.5579254915377003, + "learning_rate": 3.0732500108150104e-07, + "loss": 0.4956076145172119, + "step": 6562 + }, + { + "epoch": 1.5132580124510029, + "grad_norm": 1.6872988549232402, + "learning_rate": 3.07050066816584e-07, + "loss": 0.3629196882247925, + "step": 6563 + }, + { + "epoch": 1.5134885865805856, + "grad_norm": 1.4271734684348691, + "learning_rate": 3.067752332789626e-07, + "loss": 0.43240371346473694, + "step": 6564 + }, + { + "epoch": 1.5137191607101683, + "grad_norm": 1.4730845718882644, + "learning_rate": 3.065005005085869e-07, + "loss": 0.4933302402496338, + "step": 6565 + }, + { + "epoch": 1.513949734839751, + "grad_norm": 1.5594123406832316, + "learning_rate": 3.0622586854539155e-07, + "loss": 0.47905197739601135, + "step": 6566 + }, + { + "epoch": 1.5141803089693338, + "grad_norm": 1.3120965583955209, + "learning_rate": 3.059513374292978e-07, + "loss": 0.4245232343673706, + "step": 6567 + }, + { + "epoch": 1.5144108830989162, + "grad_norm": 1.6401225191596096, + "learning_rate": 3.0567690720021077e-07, + "loss": 0.40526312589645386, + "step": 6568 + }, + { + "epoch": 1.514641457228499, + "grad_norm": 1.7208705138340397, + "learning_rate": 3.0540257789802227e-07, + "loss": 0.5808804631233215, + "step": 6569 + }, + { + "epoch": 1.5148720313580815, + "grad_norm": 1.791338069752229, + "learning_rate": 3.0512834956260836e-07, + "loss": 0.44997286796569824, + "step": 6570 + }, + { + "epoch": 1.5151026054876642, + "grad_norm": 1.6800897456169108, + "learning_rate": 3.048542222338315e-07, + "loss": 0.44051581621170044, + "step": 6571 + }, + { + "epoch": 1.515333179617247, + "grad_norm": 1.525217042834723, + "learning_rate": 3.045801959515382e-07, + "loss": 0.5113236308097839, + "step": 6572 + }, + { + "epoch": 1.5155637537468296, + "grad_norm": 1.5439102757372205, + "learning_rate": 3.0430627075556125e-07, + "loss": 0.554703950881958, + "step": 6573 + }, + { + "epoch": 1.5157943278764123, + "grad_norm": 1.600156572288611, + "learning_rate": 3.0403244668571847e-07, + "loss": 0.3819808065891266, + "step": 6574 + }, + { + "epoch": 1.516024902005995, + "grad_norm": 1.4872928405937125, + "learning_rate": 3.037587237818133e-07, + "loss": 0.47970864176750183, + "step": 6575 + }, + { + "epoch": 1.5162554761355775, + "grad_norm": 1.4776778157711579, + "learning_rate": 3.0348510208363386e-07, + "loss": 0.4296469986438751, + "step": 6576 + }, + { + "epoch": 1.5164860502651603, + "grad_norm": 1.462836798021035, + "learning_rate": 3.032115816309535e-07, + "loss": 0.4372752904891968, + "step": 6577 + }, + { + "epoch": 1.5167166243947428, + "grad_norm": 1.673613757204577, + "learning_rate": 3.029381624635318e-07, + "loss": 0.4711950719356537, + "step": 6578 + }, + { + "epoch": 1.5169471985243255, + "grad_norm": 1.3932522433513406, + "learning_rate": 3.026648446211124e-07, + "loss": 0.4448170065879822, + "step": 6579 + }, + { + "epoch": 1.5171777726539082, + "grad_norm": 1.6184181695445041, + "learning_rate": 3.02391628143425e-07, + "loss": 0.4527873992919922, + "step": 6580 + }, + { + "epoch": 1.517408346783491, + "grad_norm": 1.6799725255249693, + "learning_rate": 3.0211851307018463e-07, + "loss": 0.453765332698822, + "step": 6581 + }, + { + "epoch": 1.5176389209130736, + "grad_norm": 1.686193810125547, + "learning_rate": 3.018454994410915e-07, + "loss": 0.46818265318870544, + "step": 6582 + }, + { + "epoch": 1.5178694950426563, + "grad_norm": 1.6601834563107158, + "learning_rate": 3.0157258729583026e-07, + "loss": 0.38551369309425354, + "step": 6583 + }, + { + "epoch": 1.5181000691722388, + "grad_norm": 1.2759146716130436, + "learning_rate": 3.012997766740721e-07, + "loss": 0.3651260733604431, + "step": 6584 + }, + { + "epoch": 1.5183306433018215, + "grad_norm": 1.4942378521466573, + "learning_rate": 3.010270676154726e-07, + "loss": 0.36894726753234863, + "step": 6585 + }, + { + "epoch": 1.518561217431404, + "grad_norm": 1.5163949110289714, + "learning_rate": 3.007544601596722e-07, + "loss": 0.42595791816711426, + "step": 6586 + }, + { + "epoch": 1.5187917915609868, + "grad_norm": 1.9011368495730705, + "learning_rate": 3.004819543462979e-07, + "loss": 0.4916795492172241, + "step": 6587 + }, + { + "epoch": 1.5190223656905695, + "grad_norm": 3.958756092482824, + "learning_rate": 3.0020955021496073e-07, + "loss": 0.5098932385444641, + "step": 6588 + }, + { + "epoch": 1.5192529398201522, + "grad_norm": 1.7429564765653418, + "learning_rate": 2.9993724780525796e-07, + "loss": 0.6336305737495422, + "step": 6589 + }, + { + "epoch": 1.519483513949735, + "grad_norm": 1.6454779446539551, + "learning_rate": 2.996650471567709e-07, + "loss": 0.4911893606185913, + "step": 6590 + }, + { + "epoch": 1.5197140880793176, + "grad_norm": 1.6053455149976412, + "learning_rate": 2.9939294830906727e-07, + "loss": 0.4388008117675781, + "step": 6591 + }, + { + "epoch": 1.5199446622089001, + "grad_norm": 1.4960203678707569, + "learning_rate": 2.991209513016986e-07, + "loss": 0.392263799905777, + "step": 6592 + }, + { + "epoch": 1.5201752363384828, + "grad_norm": 1.4101720949081316, + "learning_rate": 2.988490561742032e-07, + "loss": 0.36495402455329895, + "step": 6593 + }, + { + "epoch": 1.5204058104680653, + "grad_norm": 1.6817212910549741, + "learning_rate": 2.985772629661032e-07, + "loss": 0.5280855298042297, + "step": 6594 + }, + { + "epoch": 1.520636384597648, + "grad_norm": 1.4575719708434207, + "learning_rate": 2.9830557171690693e-07, + "loss": 0.43953752517700195, + "step": 6595 + }, + { + "epoch": 1.5208669587272308, + "grad_norm": 1.261754251016282, + "learning_rate": 2.980339824661071e-07, + "loss": 0.41361862421035767, + "step": 6596 + }, + { + "epoch": 1.5210975328568135, + "grad_norm": 1.4525947923531464, + "learning_rate": 2.977624952531821e-07, + "loss": 0.39955854415893555, + "step": 6597 + }, + { + "epoch": 1.5213281069863962, + "grad_norm": 1.664684863463753, + "learning_rate": 2.9749111011759565e-07, + "loss": 0.505165696144104, + "step": 6598 + }, + { + "epoch": 1.521558681115979, + "grad_norm": 1.5619432117854901, + "learning_rate": 2.9721982709879566e-07, + "loss": 0.4388153851032257, + "step": 6599 + }, + { + "epoch": 1.5217892552455614, + "grad_norm": 1.454152411615684, + "learning_rate": 2.969486462362167e-07, + "loss": 0.4479100704193115, + "step": 6600 + }, + { + "epoch": 1.5220198293751441, + "grad_norm": 1.4345831092951191, + "learning_rate": 2.9667756756927686e-07, + "loss": 0.4005380868911743, + "step": 6601 + }, + { + "epoch": 1.5222504035047266, + "grad_norm": 1.707280681236192, + "learning_rate": 2.9640659113738087e-07, + "loss": 0.43774881958961487, + "step": 6602 + }, + { + "epoch": 1.5224809776343093, + "grad_norm": 1.5608510724785551, + "learning_rate": 2.9613571697991725e-07, + "loss": 0.4449707865715027, + "step": 6603 + }, + { + "epoch": 1.522711551763892, + "grad_norm": 1.6567386639534631, + "learning_rate": 2.958649451362606e-07, + "loss": 0.454499751329422, + "step": 6604 + }, + { + "epoch": 1.5229421258934748, + "grad_norm": 1.2977143159727098, + "learning_rate": 2.955942756457707e-07, + "loss": 0.35601305961608887, + "step": 6605 + }, + { + "epoch": 1.5231727000230575, + "grad_norm": 1.6684183476509384, + "learning_rate": 2.9532370854779143e-07, + "loss": 0.5252523422241211, + "step": 6606 + }, + { + "epoch": 1.5234032741526402, + "grad_norm": 1.3731317276647081, + "learning_rate": 2.950532438816531e-07, + "loss": 0.4311884939670563, + "step": 6607 + }, + { + "epoch": 1.5236338482822227, + "grad_norm": 1.5784692430456444, + "learning_rate": 2.9478288168667e-07, + "loss": 0.43956485390663147, + "step": 6608 + }, + { + "epoch": 1.5238644224118054, + "grad_norm": 1.4213527447836085, + "learning_rate": 2.9451262200214235e-07, + "loss": 0.400115430355072, + "step": 6609 + }, + { + "epoch": 1.524094996541388, + "grad_norm": 1.6612091081011793, + "learning_rate": 2.942424648673548e-07, + "loss": 0.41738802194595337, + "step": 6610 + }, + { + "epoch": 1.5243255706709706, + "grad_norm": 1.5951584459105572, + "learning_rate": 2.939724103215776e-07, + "loss": 0.412765771150589, + "step": 6611 + }, + { + "epoch": 1.5245561448005533, + "grad_norm": 1.6739308031441762, + "learning_rate": 2.937024584040659e-07, + "loss": 0.44869422912597656, + "step": 6612 + }, + { + "epoch": 1.524786718930136, + "grad_norm": 1.5443554211834334, + "learning_rate": 2.934326091540603e-07, + "loss": 0.39191997051239014, + "step": 6613 + }, + { + "epoch": 1.5250172930597188, + "grad_norm": 1.307209963924962, + "learning_rate": 2.9316286261078547e-07, + "loss": 0.36575692892074585, + "step": 6614 + }, + { + "epoch": 1.5252478671893015, + "grad_norm": 1.5775953874602453, + "learning_rate": 2.9289321881345254e-07, + "loss": 0.49928778409957886, + "step": 6615 + }, + { + "epoch": 1.525478441318884, + "grad_norm": 1.5029437064522762, + "learning_rate": 2.926236778012565e-07, + "loss": 0.49619296193122864, + "step": 6616 + }, + { + "epoch": 1.5257090154484667, + "grad_norm": 1.5175956935877304, + "learning_rate": 2.923542396133777e-07, + "loss": 0.4614447355270386, + "step": 6617 + }, + { + "epoch": 1.5259395895780492, + "grad_norm": 1.5326379965687464, + "learning_rate": 2.9208490428898213e-07, + "loss": 0.43820804357528687, + "step": 6618 + }, + { + "epoch": 1.526170163707632, + "grad_norm": 1.7297859153701105, + "learning_rate": 2.9181567186722e-07, + "loss": 0.46856528520584106, + "step": 6619 + }, + { + "epoch": 1.5264007378372146, + "grad_norm": 1.5560178508678546, + "learning_rate": 2.915465423872272e-07, + "loss": 0.45428818464279175, + "step": 6620 + }, + { + "epoch": 1.5266313119667974, + "grad_norm": 1.765757281110695, + "learning_rate": 2.912775158881243e-07, + "loss": 0.44715386629104614, + "step": 6621 + }, + { + "epoch": 1.52686188609638, + "grad_norm": 1.845941311143575, + "learning_rate": 2.9100859240901764e-07, + "loss": 0.537441611289978, + "step": 6622 + }, + { + "epoch": 1.5270924602259628, + "grad_norm": 2.100811269468338, + "learning_rate": 2.9073977198899714e-07, + "loss": 0.4430112838745117, + "step": 6623 + }, + { + "epoch": 1.5273230343555453, + "grad_norm": 1.625928583733216, + "learning_rate": 2.904710546671392e-07, + "loss": 0.41713255643844604, + "step": 6624 + }, + { + "epoch": 1.527553608485128, + "grad_norm": 1.639578198355071, + "learning_rate": 2.9020244048250396e-07, + "loss": 0.4313931465148926, + "step": 6625 + }, + { + "epoch": 1.5277841826147105, + "grad_norm": 1.617455818460061, + "learning_rate": 2.899339294741379e-07, + "loss": 0.5038034319877625, + "step": 6626 + }, + { + "epoch": 1.5280147567442932, + "grad_norm": 1.6017224429954546, + "learning_rate": 2.8966552168107127e-07, + "loss": 0.45088762044906616, + "step": 6627 + }, + { + "epoch": 1.528245330873876, + "grad_norm": 1.6027378992570083, + "learning_rate": 2.8939721714232e-07, + "loss": 0.40857064723968506, + "step": 6628 + }, + { + "epoch": 1.5284759050034586, + "grad_norm": 1.5432592985198028, + "learning_rate": 2.891290158968853e-07, + "loss": 0.43766242265701294, + "step": 6629 + }, + { + "epoch": 1.5287064791330414, + "grad_norm": 1.6663524119863393, + "learning_rate": 2.888609179837523e-07, + "loss": 0.45986247062683105, + "step": 6630 + }, + { + "epoch": 1.528937053262624, + "grad_norm": 1.5102818288035118, + "learning_rate": 2.8859292344189236e-07, + "loss": 0.4681728482246399, + "step": 6631 + }, + { + "epoch": 1.5291676273922066, + "grad_norm": 1.4009274503220306, + "learning_rate": 2.883250323102605e-07, + "loss": 0.36730295419692993, + "step": 6632 + }, + { + "epoch": 1.5293982015217893, + "grad_norm": 1.6785355662696937, + "learning_rate": 2.880572446277982e-07, + "loss": 0.43494418263435364, + "step": 6633 + }, + { + "epoch": 1.5296287756513718, + "grad_norm": 1.6257441783659756, + "learning_rate": 2.877895604334305e-07, + "loss": 0.49145790934562683, + "step": 6634 + }, + { + "epoch": 1.5298593497809545, + "grad_norm": 1.4638603112091872, + "learning_rate": 2.875219797660681e-07, + "loss": 0.4166264832019806, + "step": 6635 + }, + { + "epoch": 1.5300899239105372, + "grad_norm": 1.3504636181719787, + "learning_rate": 2.8725450266460704e-07, + "loss": 0.4336514472961426, + "step": 6636 + }, + { + "epoch": 1.53032049804012, + "grad_norm": 1.6796430942391267, + "learning_rate": 2.869871291679271e-07, + "loss": 0.44186240434646606, + "step": 6637 + }, + { + "epoch": 1.5305510721697027, + "grad_norm": 1.4751166079505253, + "learning_rate": 2.867198593148945e-07, + "loss": 0.40619733929634094, + "step": 6638 + }, + { + "epoch": 1.5307816462992854, + "grad_norm": 1.4034694689938345, + "learning_rate": 2.864526931443588e-07, + "loss": 0.45552101731300354, + "step": 6639 + }, + { + "epoch": 1.5310122204288679, + "grad_norm": 1.3563039501008287, + "learning_rate": 2.861856306951562e-07, + "loss": 0.45153865218162537, + "step": 6640 + }, + { + "epoch": 1.5312427945584506, + "grad_norm": 1.5793746333655185, + "learning_rate": 2.859186720061061e-07, + "loss": 0.5146148204803467, + "step": 6641 + }, + { + "epoch": 1.531473368688033, + "grad_norm": 1.5627792728055054, + "learning_rate": 2.856518171160143e-07, + "loss": 0.4566080868244171, + "step": 6642 + }, + { + "epoch": 1.5317039428176158, + "grad_norm": 1.93802928616596, + "learning_rate": 2.853850660636703e-07, + "loss": 0.4390585124492645, + "step": 6643 + }, + { + "epoch": 1.5319345169471985, + "grad_norm": 1.7734959004013588, + "learning_rate": 2.851184188878493e-07, + "loss": 0.5508195757865906, + "step": 6644 + }, + { + "epoch": 1.5321650910767812, + "grad_norm": 1.6721581584041076, + "learning_rate": 2.8485187562731126e-07, + "loss": 0.47640183568000793, + "step": 6645 + }, + { + "epoch": 1.532395665206364, + "grad_norm": 1.421769874384772, + "learning_rate": 2.8458543632080123e-07, + "loss": 0.4511566758155823, + "step": 6646 + }, + { + "epoch": 1.5326262393359467, + "grad_norm": 1.5003089507123706, + "learning_rate": 2.843191010070486e-07, + "loss": 0.414367139339447, + "step": 6647 + }, + { + "epoch": 1.5328568134655292, + "grad_norm": 1.5192326893049226, + "learning_rate": 2.840528697247674e-07, + "loss": 0.4611589312553406, + "step": 6648 + }, + { + "epoch": 1.5330873875951119, + "grad_norm": 1.6397285440449882, + "learning_rate": 2.8378674251265787e-07, + "loss": 0.4675883948802948, + "step": 6649 + }, + { + "epoch": 1.5333179617246944, + "grad_norm": 1.6281144487220143, + "learning_rate": 2.835207194094036e-07, + "loss": 0.49039095640182495, + "step": 6650 + }, + { + "epoch": 1.533548535854277, + "grad_norm": 1.6636356702139277, + "learning_rate": 2.832548004536741e-07, + "loss": 0.45641693472862244, + "step": 6651 + }, + { + "epoch": 1.5337791099838598, + "grad_norm": 1.7323507398911224, + "learning_rate": 2.829889856841233e-07, + "loss": 0.4858587682247162, + "step": 6652 + }, + { + "epoch": 1.5340096841134425, + "grad_norm": 1.3640056940377991, + "learning_rate": 2.8272327513939055e-07, + "loss": 0.3640017807483673, + "step": 6653 + }, + { + "epoch": 1.5342402582430252, + "grad_norm": 1.5342226074105705, + "learning_rate": 2.8245766885809865e-07, + "loss": 0.42915207147598267, + "step": 6654 + }, + { + "epoch": 1.534470832372608, + "grad_norm": 1.5250515427099394, + "learning_rate": 2.8219216687885707e-07, + "loss": 0.5041407346725464, + "step": 6655 + }, + { + "epoch": 1.5347014065021904, + "grad_norm": 1.479165849869464, + "learning_rate": 2.8192676924025885e-07, + "loss": 0.4748334288597107, + "step": 6656 + }, + { + "epoch": 1.5349319806317732, + "grad_norm": 1.5854109757101433, + "learning_rate": 2.8166147598088173e-07, + "loss": 0.4745975136756897, + "step": 6657 + }, + { + "epoch": 1.5351625547613557, + "grad_norm": 1.6430139570672564, + "learning_rate": 2.813962871392893e-07, + "loss": 0.49246084690093994, + "step": 6658 + }, + { + "epoch": 1.5353931288909384, + "grad_norm": 1.3796442061928538, + "learning_rate": 2.8113120275402936e-07, + "loss": 0.47876033186912537, + "step": 6659 + }, + { + "epoch": 1.535623703020521, + "grad_norm": 1.6460545742229191, + "learning_rate": 2.808662228636348e-07, + "loss": 0.5244987607002258, + "step": 6660 + }, + { + "epoch": 1.5358542771501038, + "grad_norm": 1.6433381019004774, + "learning_rate": 2.8060134750662277e-07, + "loss": 0.44661569595336914, + "step": 6661 + }, + { + "epoch": 1.5360848512796865, + "grad_norm": 1.4583799872096337, + "learning_rate": 2.8033657672149615e-07, + "loss": 0.4508060812950134, + "step": 6662 + }, + { + "epoch": 1.5363154254092692, + "grad_norm": 1.3497148067649773, + "learning_rate": 2.8007191054674117e-07, + "loss": 0.4657326340675354, + "step": 6663 + }, + { + "epoch": 1.5365459995388517, + "grad_norm": 1.4227603766742651, + "learning_rate": 2.798073490208307e-07, + "loss": 0.495077520608902, + "step": 6664 + }, + { + "epoch": 1.5367765736684345, + "grad_norm": 1.4557135691757939, + "learning_rate": 2.795428921822206e-07, + "loss": 0.40721309185028076, + "step": 6665 + }, + { + "epoch": 1.537007147798017, + "grad_norm": 1.4109014285343175, + "learning_rate": 2.7927854006935315e-07, + "loss": 0.3279367685317993, + "step": 6666 + }, + { + "epoch": 1.5372377219275997, + "grad_norm": 1.6893419118169095, + "learning_rate": 2.790142927206538e-07, + "loss": 0.4849242866039276, + "step": 6667 + }, + { + "epoch": 1.5374682960571824, + "grad_norm": 1.7502055418971636, + "learning_rate": 2.7875015017453394e-07, + "loss": 0.45151397585868835, + "step": 6668 + }, + { + "epoch": 1.537698870186765, + "grad_norm": 1.7275509884274352, + "learning_rate": 2.784861124693898e-07, + "loss": 0.43480992317199707, + "step": 6669 + }, + { + "epoch": 1.5379294443163478, + "grad_norm": 1.606181868361543, + "learning_rate": 2.782221796436012e-07, + "loss": 0.48764440417289734, + "step": 6670 + }, + { + "epoch": 1.5381600184459305, + "grad_norm": 1.5345831310523104, + "learning_rate": 2.7795835173553407e-07, + "loss": 0.4164161682128906, + "step": 6671 + }, + { + "epoch": 1.538390592575513, + "grad_norm": 1.8060994369656536, + "learning_rate": 2.7769462878353777e-07, + "loss": 0.49934858083724976, + "step": 6672 + }, + { + "epoch": 1.5386211667050955, + "grad_norm": 1.4004311994850918, + "learning_rate": 2.77431010825948e-07, + "loss": 0.4877321124076843, + "step": 6673 + }, + { + "epoch": 1.5388517408346782, + "grad_norm": 1.7442704894714258, + "learning_rate": 2.771674979010834e-07, + "loss": 0.44518858194351196, + "step": 6674 + }, + { + "epoch": 1.539082314964261, + "grad_norm": 1.4902795732558884, + "learning_rate": 2.769040900472488e-07, + "loss": 0.4237474203109741, + "step": 6675 + }, + { + "epoch": 1.5393128890938437, + "grad_norm": 1.8818051716593445, + "learning_rate": 2.7664078730273335e-07, + "loss": 0.45270341634750366, + "step": 6676 + }, + { + "epoch": 1.5395434632234264, + "grad_norm": 1.9777420597791724, + "learning_rate": 2.7637758970581004e-07, + "loss": 0.3866819739341736, + "step": 6677 + }, + { + "epoch": 1.539774037353009, + "grad_norm": 1.709571144624541, + "learning_rate": 2.7611449729473825e-07, + "loss": 0.4384220838546753, + "step": 6678 + }, + { + "epoch": 1.5400046114825916, + "grad_norm": 1.523752237168306, + "learning_rate": 2.758515101077602e-07, + "loss": 0.4462182819843292, + "step": 6679 + }, + { + "epoch": 1.5402351856121743, + "grad_norm": 1.6129576485586044, + "learning_rate": 2.755886281831046e-07, + "loss": 0.3927033245563507, + "step": 6680 + }, + { + "epoch": 1.5404657597417568, + "grad_norm": 1.7095013933604486, + "learning_rate": 2.7532585155898314e-07, + "loss": 0.4678634703159332, + "step": 6681 + }, + { + "epoch": 1.5406963338713395, + "grad_norm": 1.4524055684149206, + "learning_rate": 2.750631802735935e-07, + "loss": 0.4165131151676178, + "step": 6682 + }, + { + "epoch": 1.5409269080009222, + "grad_norm": 1.1494402193253566, + "learning_rate": 2.748006143651178e-07, + "loss": 0.3705793023109436, + "step": 6683 + }, + { + "epoch": 1.541157482130505, + "grad_norm": 1.5819526439113667, + "learning_rate": 2.745381538717226e-07, + "loss": 0.5428882837295532, + "step": 6684 + }, + { + "epoch": 1.5413880562600877, + "grad_norm": 1.6426127293668795, + "learning_rate": 2.742757988315589e-07, + "loss": 0.4116673171520233, + "step": 6685 + }, + { + "epoch": 1.5416186303896704, + "grad_norm": 1.4540567592422353, + "learning_rate": 2.740135492827631e-07, + "loss": 0.4617515802383423, + "step": 6686 + }, + { + "epoch": 1.541849204519253, + "grad_norm": 1.6140828940427878, + "learning_rate": 2.737514052634555e-07, + "loss": 0.5002453923225403, + "step": 6687 + }, + { + "epoch": 1.5420797786488356, + "grad_norm": 1.4130856063185002, + "learning_rate": 2.734893668117412e-07, + "loss": 0.46029362082481384, + "step": 6688 + }, + { + "epoch": 1.542310352778418, + "grad_norm": 1.4809565956171882, + "learning_rate": 2.732274339657107e-07, + "loss": 0.40502026677131653, + "step": 6689 + }, + { + "epoch": 1.5425409269080008, + "grad_norm": 1.6538580711421296, + "learning_rate": 2.7296560676343803e-07, + "loss": 0.5267831087112427, + "step": 6690 + }, + { + "epoch": 1.5427715010375835, + "grad_norm": 1.3087993674480496, + "learning_rate": 2.727038852429826e-07, + "loss": 0.3464335799217224, + "step": 6691 + }, + { + "epoch": 1.5430020751671663, + "grad_norm": 1.5384863769893498, + "learning_rate": 2.7244226944238847e-07, + "loss": 0.36635881662368774, + "step": 6692 + }, + { + "epoch": 1.543232649296749, + "grad_norm": 1.7314925345176482, + "learning_rate": 2.7218075939968435e-07, + "loss": 0.4567757844924927, + "step": 6693 + }, + { + "epoch": 1.5434632234263317, + "grad_norm": 1.9452957704897642, + "learning_rate": 2.719193551528827e-07, + "loss": 0.539220929145813, + "step": 6694 + }, + { + "epoch": 1.5436937975559142, + "grad_norm": 1.653206530012829, + "learning_rate": 2.71658056739982e-07, + "loss": 0.48553818464279175, + "step": 6695 + }, + { + "epoch": 1.543924371685497, + "grad_norm": 1.5040526715775615, + "learning_rate": 2.7139686419896424e-07, + "loss": 0.48564499616622925, + "step": 6696 + }, + { + "epoch": 1.5441549458150794, + "grad_norm": 1.3502417010865393, + "learning_rate": 2.7113577756779616e-07, + "loss": 0.4163014590740204, + "step": 6697 + }, + { + "epoch": 1.544385519944662, + "grad_norm": 1.864828438533457, + "learning_rate": 2.708747968844296e-07, + "loss": 0.5686431527137756, + "step": 6698 + }, + { + "epoch": 1.5446160940742448, + "grad_norm": 1.8608147536494253, + "learning_rate": 2.706139221868008e-07, + "loss": 0.5365211963653564, + "step": 6699 + }, + { + "epoch": 1.5448466682038275, + "grad_norm": 1.5480523179756653, + "learning_rate": 2.7035315351283084e-07, + "loss": 0.4147397577762604, + "step": 6700 + }, + { + "epoch": 1.5450772423334103, + "grad_norm": 1.5279455451058772, + "learning_rate": 2.7009249090042454e-07, + "loss": 0.3938590884208679, + "step": 6701 + }, + { + "epoch": 1.545307816462993, + "grad_norm": 1.726862148896079, + "learning_rate": 2.698319343874722e-07, + "loss": 0.3521370589733124, + "step": 6702 + }, + { + "epoch": 1.5455383905925755, + "grad_norm": 1.6305887024948476, + "learning_rate": 2.69571484011848e-07, + "loss": 0.430014967918396, + "step": 6703 + }, + { + "epoch": 1.5457689647221582, + "grad_norm": 1.636933956561892, + "learning_rate": 2.6931113981141164e-07, + "loss": 0.4697108864784241, + "step": 6704 + }, + { + "epoch": 1.5459995388517407, + "grad_norm": 1.5552943329509785, + "learning_rate": 2.69050901824006e-07, + "loss": 0.46567851305007935, + "step": 6705 + }, + { + "epoch": 1.5462301129813234, + "grad_norm": 1.620367133120872, + "learning_rate": 2.6879077008745986e-07, + "loss": 0.46061819791793823, + "step": 6706 + }, + { + "epoch": 1.5464606871109061, + "grad_norm": 1.5411435279833592, + "learning_rate": 2.6853074463958614e-07, + "loss": 0.568658709526062, + "step": 6707 + }, + { + "epoch": 1.5466912612404888, + "grad_norm": 1.3834999667432357, + "learning_rate": 2.682708255181815e-07, + "loss": 0.42816412448883057, + "step": 6708 + }, + { + "epoch": 1.5469218353700716, + "grad_norm": 1.576410551372393, + "learning_rate": 2.6801101276102866e-07, + "loss": 0.42515552043914795, + "step": 6709 + }, + { + "epoch": 1.5471524094996543, + "grad_norm": 1.5447523266389376, + "learning_rate": 2.677513064058932e-07, + "loss": 0.46513399481773376, + "step": 6710 + }, + { + "epoch": 1.5473829836292368, + "grad_norm": 1.3853944144224488, + "learning_rate": 2.6749170649052675e-07, + "loss": 0.4194756746292114, + "step": 6711 + }, + { + "epoch": 1.5476135577588195, + "grad_norm": 1.4035563039276318, + "learning_rate": 2.672322130526643e-07, + "loss": 0.4456541836261749, + "step": 6712 + }, + { + "epoch": 1.547844131888402, + "grad_norm": 1.5113453932130136, + "learning_rate": 2.669728261300264e-07, + "loss": 0.493444561958313, + "step": 6713 + }, + { + "epoch": 1.5480747060179847, + "grad_norm": 1.582884732282312, + "learning_rate": 2.6671354576031645e-07, + "loss": 0.47202616930007935, + "step": 6714 + }, + { + "epoch": 1.5483052801475674, + "grad_norm": 1.824788636144565, + "learning_rate": 2.66454371981225e-07, + "loss": 0.4584811329841614, + "step": 6715 + }, + { + "epoch": 1.5485358542771501, + "grad_norm": 1.3167028831683925, + "learning_rate": 2.6619530483042485e-07, + "loss": 0.4072091579437256, + "step": 6716 + }, + { + "epoch": 1.5487664284067328, + "grad_norm": 1.5656021898929726, + "learning_rate": 2.6593634434557365e-07, + "loss": 0.49742361903190613, + "step": 6717 + }, + { + "epoch": 1.5489970025363156, + "grad_norm": 1.6686846450785309, + "learning_rate": 2.6567749056431467e-07, + "loss": 0.49291643500328064, + "step": 6718 + }, + { + "epoch": 1.549227576665898, + "grad_norm": 1.5234565390584587, + "learning_rate": 2.6541874352427427e-07, + "loss": 0.5210362076759338, + "step": 6719 + }, + { + "epoch": 1.5494581507954808, + "grad_norm": 1.523136615036839, + "learning_rate": 2.651601032630645e-07, + "loss": 0.4489557147026062, + "step": 6720 + }, + { + "epoch": 1.5496887249250633, + "grad_norm": 1.515706035484409, + "learning_rate": 2.649015698182808e-07, + "loss": 0.4417908191680908, + "step": 6721 + }, + { + "epoch": 1.549919299054646, + "grad_norm": 1.5123745571810647, + "learning_rate": 2.6464314322750404e-07, + "loss": 0.45177266001701355, + "step": 6722 + }, + { + "epoch": 1.5501498731842287, + "grad_norm": 1.5422888438788165, + "learning_rate": 2.6438482352829896e-07, + "loss": 0.37720638513565063, + "step": 6723 + }, + { + "epoch": 1.5503804473138114, + "grad_norm": 1.5572735157633186, + "learning_rate": 2.641266107582153e-07, + "loss": 0.5108897089958191, + "step": 6724 + }, + { + "epoch": 1.5506110214433941, + "grad_norm": 1.5098940840101445, + "learning_rate": 2.638685049547863e-07, + "loss": 0.449248731136322, + "step": 6725 + }, + { + "epoch": 1.5508415955729768, + "grad_norm": 1.4667668469814954, + "learning_rate": 2.636105061555309e-07, + "loss": 0.4692652225494385, + "step": 6726 + }, + { + "epoch": 1.5510721697025593, + "grad_norm": 1.5150559633489926, + "learning_rate": 2.6335261439795153e-07, + "loss": 0.49128347635269165, + "step": 6727 + }, + { + "epoch": 1.551302743832142, + "grad_norm": 1.5725646817979666, + "learning_rate": 2.630948297195351e-07, + "loss": 0.4618053436279297, + "step": 6728 + }, + { + "epoch": 1.5515333179617246, + "grad_norm": 1.5786249232029208, + "learning_rate": 2.6283715215775336e-07, + "loss": 0.4342828094959259, + "step": 6729 + }, + { + "epoch": 1.5517638920913073, + "grad_norm": 1.5592983853420144, + "learning_rate": 2.625795817500626e-07, + "loss": 0.5214434862136841, + "step": 6730 + }, + { + "epoch": 1.55199446622089, + "grad_norm": 1.521395946192631, + "learning_rate": 2.623221185339034e-07, + "loss": 0.4873029589653015, + "step": 6731 + }, + { + "epoch": 1.5522250403504727, + "grad_norm": 1.5014817933254478, + "learning_rate": 2.6206476254670007e-07, + "loss": 0.4510548412799835, + "step": 6732 + }, + { + "epoch": 1.5524556144800554, + "grad_norm": 1.5931454307395074, + "learning_rate": 2.6180751382586265e-07, + "loss": 0.4832548499107361, + "step": 6733 + }, + { + "epoch": 1.5526861886096381, + "grad_norm": 1.8273040799326088, + "learning_rate": 2.6155037240878406e-07, + "loss": 0.5438823699951172, + "step": 6734 + }, + { + "epoch": 1.5529167627392206, + "grad_norm": 1.488758610712305, + "learning_rate": 2.6129333833284315e-07, + "loss": 0.4967566728591919, + "step": 6735 + }, + { + "epoch": 1.5531473368688034, + "grad_norm": 1.419700158234616, + "learning_rate": 2.610364116354018e-07, + "loss": 0.5187437534332275, + "step": 6736 + }, + { + "epoch": 1.5533779109983858, + "grad_norm": 1.3624978155475462, + "learning_rate": 2.607795923538072e-07, + "loss": 0.4199862480163574, + "step": 6737 + }, + { + "epoch": 1.5536084851279686, + "grad_norm": 1.463828508781327, + "learning_rate": 2.6052288052539084e-07, + "loss": 0.5009325742721558, + "step": 6738 + }, + { + "epoch": 1.5538390592575513, + "grad_norm": 1.5361155892650822, + "learning_rate": 2.602662761874679e-07, + "loss": 0.48698678612709045, + "step": 6739 + }, + { + "epoch": 1.554069633387134, + "grad_norm": 1.4600353762817446, + "learning_rate": 2.6000977937733905e-07, + "loss": 0.4845883846282959, + "step": 6740 + }, + { + "epoch": 1.5543002075167167, + "grad_norm": 1.6153802807658302, + "learning_rate": 2.59753390132288e-07, + "loss": 0.512161135673523, + "step": 6741 + }, + { + "epoch": 1.5545307816462994, + "grad_norm": 1.756231295082545, + "learning_rate": 2.5949710848958415e-07, + "loss": 0.42334964871406555, + "step": 6742 + }, + { + "epoch": 1.554761355775882, + "grad_norm": 1.2927501946290025, + "learning_rate": 2.592409344864801e-07, + "loss": 0.3781980276107788, + "step": 6743 + }, + { + "epoch": 1.5549919299054646, + "grad_norm": 1.5363470406300028, + "learning_rate": 2.5898486816021394e-07, + "loss": 0.4989853501319885, + "step": 6744 + }, + { + "epoch": 1.5552225040350471, + "grad_norm": 1.5873964925893267, + "learning_rate": 2.5872890954800676e-07, + "loss": 0.45715585350990295, + "step": 6745 + }, + { + "epoch": 1.5554530781646299, + "grad_norm": 1.3499060893753405, + "learning_rate": 2.5847305868706515e-07, + "loss": 0.5025684833526611, + "step": 6746 + }, + { + "epoch": 1.5556836522942126, + "grad_norm": 1.5290460697986008, + "learning_rate": 2.5821731561457994e-07, + "loss": 0.47298115491867065, + "step": 6747 + }, + { + "epoch": 1.5559142264237953, + "grad_norm": 1.4250590830459762, + "learning_rate": 2.5796168036772524e-07, + "loss": 0.45412957668304443, + "step": 6748 + }, + { + "epoch": 1.556144800553378, + "grad_norm": 1.6230149340497857, + "learning_rate": 2.5770615298366107e-07, + "loss": 0.3958669602870941, + "step": 6749 + }, + { + "epoch": 1.5563753746829607, + "grad_norm": 1.4992477100706287, + "learning_rate": 2.574507334995302e-07, + "loss": 0.4748396873474121, + "step": 6750 + }, + { + "epoch": 1.5566059488125432, + "grad_norm": 2.1473408883216534, + "learning_rate": 2.5719542195246093e-07, + "loss": 0.4741169810295105, + "step": 6751 + }, + { + "epoch": 1.556836522942126, + "grad_norm": 1.5072269547692108, + "learning_rate": 2.569402183795648e-07, + "loss": 0.4362972378730774, + "step": 6752 + }, + { + "epoch": 1.5570670970717084, + "grad_norm": 1.5695384848079892, + "learning_rate": 2.5668512281793873e-07, + "loss": 0.48013412952423096, + "step": 6753 + }, + { + "epoch": 1.5572976712012911, + "grad_norm": 1.4514603270444408, + "learning_rate": 2.564301353046634e-07, + "loss": 0.4728567600250244, + "step": 6754 + }, + { + "epoch": 1.5575282453308739, + "grad_norm": 1.7592773476195727, + "learning_rate": 2.56175255876804e-07, + "loss": 0.4304337501525879, + "step": 6755 + }, + { + "epoch": 1.5577588194604566, + "grad_norm": 1.5275686028016913, + "learning_rate": 2.5592048457140926e-07, + "loss": 0.43467870354652405, + "step": 6756 + }, + { + "epoch": 1.5579893935900393, + "grad_norm": 1.9596482130933712, + "learning_rate": 2.556658214255134e-07, + "loss": 0.3912844657897949, + "step": 6757 + }, + { + "epoch": 1.558219967719622, + "grad_norm": 1.5284327791141838, + "learning_rate": 2.5541126647613397e-07, + "loss": 0.4462862014770508, + "step": 6758 + }, + { + "epoch": 1.5584505418492045, + "grad_norm": 1.5847675751494867, + "learning_rate": 2.551568197602729e-07, + "loss": 0.43929487466812134, + "step": 6759 + }, + { + "epoch": 1.5586811159787872, + "grad_norm": 1.5077581986013873, + "learning_rate": 2.549024813149169e-07, + "loss": 0.44473958015441895, + "step": 6760 + }, + { + "epoch": 1.5589116901083697, + "grad_norm": 1.5536876763085832, + "learning_rate": 2.546482511770365e-07, + "loss": 0.5159727931022644, + "step": 6761 + }, + { + "epoch": 1.5591422642379524, + "grad_norm": 1.7371461951042986, + "learning_rate": 2.5439412938358696e-07, + "loss": 0.3975204825401306, + "step": 6762 + }, + { + "epoch": 1.5593728383675352, + "grad_norm": 1.493493619365051, + "learning_rate": 2.54140115971507e-07, + "loss": 0.5198286175727844, + "step": 6763 + }, + { + "epoch": 1.5596034124971179, + "grad_norm": 1.4309109790386, + "learning_rate": 2.5388621097772046e-07, + "loss": 0.4815763831138611, + "step": 6764 + }, + { + "epoch": 1.5598339866267006, + "grad_norm": 1.3803469238514527, + "learning_rate": 2.5363241443913454e-07, + "loss": 0.365215539932251, + "step": 6765 + }, + { + "epoch": 1.5600645607562833, + "grad_norm": 1.6088793691676593, + "learning_rate": 2.533787263926417e-07, + "loss": 0.486020028591156, + "step": 6766 + }, + { + "epoch": 1.5602951348858658, + "grad_norm": 1.5355383857513338, + "learning_rate": 2.5312514687511766e-07, + "loss": 0.38536715507507324, + "step": 6767 + }, + { + "epoch": 1.5605257090154485, + "grad_norm": 1.649862765507334, + "learning_rate": 2.528716759234227e-07, + "loss": 0.44713371992111206, + "step": 6768 + }, + { + "epoch": 1.560756283145031, + "grad_norm": 1.868794454538197, + "learning_rate": 2.5261831357440154e-07, + "loss": 0.4122806489467621, + "step": 6769 + }, + { + "epoch": 1.5609868572746137, + "grad_norm": 1.6234940940069353, + "learning_rate": 2.523650598648829e-07, + "loss": 0.40514320135116577, + "step": 6770 + }, + { + "epoch": 1.5612174314041964, + "grad_norm": 1.4417973525561176, + "learning_rate": 2.5211191483168027e-07, + "loss": 0.4273102283477783, + "step": 6771 + }, + { + "epoch": 1.5614480055337792, + "grad_norm": 1.4229504510118502, + "learning_rate": 2.5185887851159005e-07, + "loss": 0.4774209260940552, + "step": 6772 + }, + { + "epoch": 1.5616785796633619, + "grad_norm": 1.583645566960067, + "learning_rate": 2.5160595094139436e-07, + "loss": 0.3928600549697876, + "step": 6773 + }, + { + "epoch": 1.5619091537929446, + "grad_norm": 1.6757793450729852, + "learning_rate": 2.5135313215785816e-07, + "loss": 0.4414944052696228, + "step": 6774 + }, + { + "epoch": 1.562139727922527, + "grad_norm": 1.733143939427008, + "learning_rate": 2.5110042219773176e-07, + "loss": 0.36133646965026855, + "step": 6775 + }, + { + "epoch": 1.5623703020521098, + "grad_norm": 1.8443586806925936, + "learning_rate": 2.508478210977486e-07, + "loss": 0.44824904203414917, + "step": 6776 + }, + { + "epoch": 1.5626008761816923, + "grad_norm": 1.1693439456079453, + "learning_rate": 2.5059532889462707e-07, + "loss": 0.3699820637702942, + "step": 6777 + }, + { + "epoch": 1.562831450311275, + "grad_norm": 1.9309547773144982, + "learning_rate": 2.5034294562506976e-07, + "loss": 0.4809808135032654, + "step": 6778 + }, + { + "epoch": 1.5630620244408577, + "grad_norm": 1.7665230327633363, + "learning_rate": 2.5009067132576256e-07, + "loss": 0.487751841545105, + "step": 6779 + }, + { + "epoch": 1.5632925985704405, + "grad_norm": 1.5839144124062823, + "learning_rate": 2.4983850603337675e-07, + "loss": 0.47932374477386475, + "step": 6780 + }, + { + "epoch": 1.5635231727000232, + "grad_norm": 1.4782012523005248, + "learning_rate": 2.495864497845663e-07, + "loss": 0.42852234840393066, + "step": 6781 + }, + { + "epoch": 1.5637537468296059, + "grad_norm": 1.4802387383863571, + "learning_rate": 2.49334502615971e-07, + "loss": 0.4392131567001343, + "step": 6782 + }, + { + "epoch": 1.5639843209591884, + "grad_norm": 1.5042475261036963, + "learning_rate": 2.4908266456421323e-07, + "loss": 0.45050233602523804, + "step": 6783 + }, + { + "epoch": 1.5642148950887709, + "grad_norm": 1.4962883173938244, + "learning_rate": 2.488309356659004e-07, + "loss": 0.45328110456466675, + "step": 6784 + }, + { + "epoch": 1.5644454692183536, + "grad_norm": 1.451199382042834, + "learning_rate": 2.4857931595762403e-07, + "loss": 0.3851325511932373, + "step": 6785 + }, + { + "epoch": 1.5646760433479363, + "grad_norm": 1.5269726027188475, + "learning_rate": 2.4832780547595976e-07, + "loss": 0.4096960127353668, + "step": 6786 + }, + { + "epoch": 1.564906617477519, + "grad_norm": 1.4158017969205454, + "learning_rate": 2.480764042574669e-07, + "loss": 0.4439825117588043, + "step": 6787 + }, + { + "epoch": 1.5651371916071017, + "grad_norm": 1.5084778231824414, + "learning_rate": 2.4782511233868895e-07, + "loss": 0.4259459972381592, + "step": 6788 + }, + { + "epoch": 1.5653677657366845, + "grad_norm": 1.6383230301383533, + "learning_rate": 2.475739297561542e-07, + "loss": 0.4701216220855713, + "step": 6789 + }, + { + "epoch": 1.565598339866267, + "grad_norm": 1.4707071600317903, + "learning_rate": 2.473228565463742e-07, + "loss": 0.4435737133026123, + "step": 6790 + }, + { + "epoch": 1.5658289139958497, + "grad_norm": 1.4361527011832544, + "learning_rate": 2.4707189274584537e-07, + "loss": 0.4476662278175354, + "step": 6791 + }, + { + "epoch": 1.5660594881254322, + "grad_norm": 1.8319243980176085, + "learning_rate": 2.468210383910474e-07, + "loss": 0.4399911165237427, + "step": 6792 + }, + { + "epoch": 1.5662900622550149, + "grad_norm": 1.5617800363149925, + "learning_rate": 2.465702935184446e-07, + "loss": 0.4206039309501648, + "step": 6793 + }, + { + "epoch": 1.5665206363845976, + "grad_norm": 1.5998109403316092, + "learning_rate": 2.463196581644855e-07, + "loss": 0.44936686754226685, + "step": 6794 + }, + { + "epoch": 1.5667512105141803, + "grad_norm": 1.4750351364947134, + "learning_rate": 2.4606913236560277e-07, + "loss": 0.39926016330718994, + "step": 6795 + }, + { + "epoch": 1.566981784643763, + "grad_norm": 1.607414705164721, + "learning_rate": 2.4581871615821216e-07, + "loss": 0.4338487982749939, + "step": 6796 + }, + { + "epoch": 1.5672123587733457, + "grad_norm": 1.6693881073802184, + "learning_rate": 2.455684095787148e-07, + "loss": 0.5047430992126465, + "step": 6797 + }, + { + "epoch": 1.5674429329029282, + "grad_norm": 1.623571142038879, + "learning_rate": 2.4531821266349504e-07, + "loss": 0.46082550287246704, + "step": 6798 + }, + { + "epoch": 1.567673507032511, + "grad_norm": 1.5687485332342288, + "learning_rate": 2.450681254489214e-07, + "loss": 0.44586509466171265, + "step": 6799 + }, + { + "epoch": 1.5679040811620935, + "grad_norm": 1.6011741376497353, + "learning_rate": 2.4481814797134657e-07, + "loss": 0.5167746543884277, + "step": 6800 + }, + { + "epoch": 1.5681346552916762, + "grad_norm": 1.4074512111564024, + "learning_rate": 2.4456828026710753e-07, + "loss": 0.44062116742134094, + "step": 6801 + }, + { + "epoch": 1.5683652294212589, + "grad_norm": 1.718295945554571, + "learning_rate": 2.4431852237252524e-07, + "loss": 0.5096040368080139, + "step": 6802 + }, + { + "epoch": 1.5685958035508416, + "grad_norm": 1.3369851313651875, + "learning_rate": 2.440688743239042e-07, + "loss": 0.44234153628349304, + "step": 6803 + }, + { + "epoch": 1.5688263776804243, + "grad_norm": 1.7878168925295264, + "learning_rate": 2.4381933615753357e-07, + "loss": 0.431011825799942, + "step": 6804 + }, + { + "epoch": 1.569056951810007, + "grad_norm": 1.5221569168970472, + "learning_rate": 2.435699079096858e-07, + "loss": 0.4903266131877899, + "step": 6805 + }, + { + "epoch": 1.5692875259395895, + "grad_norm": 1.4830626229942445, + "learning_rate": 2.433205896166185e-07, + "loss": 0.4698626399040222, + "step": 6806 + }, + { + "epoch": 1.5695181000691723, + "grad_norm": 1.7678576287420633, + "learning_rate": 2.4307138131457184e-07, + "loss": 0.37576574087142944, + "step": 6807 + }, + { + "epoch": 1.5697486741987547, + "grad_norm": 1.442601981615427, + "learning_rate": 2.4282228303977113e-07, + "loss": 0.47068172693252563, + "step": 6808 + }, + { + "epoch": 1.5699792483283375, + "grad_norm": 1.5121414961596256, + "learning_rate": 2.425732948284257e-07, + "loss": 0.45246315002441406, + "step": 6809 + }, + { + "epoch": 1.5702098224579202, + "grad_norm": 1.670746435704044, + "learning_rate": 2.423244167167278e-07, + "loss": 0.4746376574039459, + "step": 6810 + }, + { + "epoch": 1.570440396587503, + "grad_norm": 1.6491072802367082, + "learning_rate": 2.420756487408551e-07, + "loss": 0.413469135761261, + "step": 6811 + }, + { + "epoch": 1.5706709707170856, + "grad_norm": 1.4392614299059656, + "learning_rate": 2.418269909369678e-07, + "loss": 0.3567890226840973, + "step": 6812 + }, + { + "epoch": 1.5709015448466683, + "grad_norm": 1.9034789277869502, + "learning_rate": 2.415784433412116e-07, + "loss": 0.4676034450531006, + "step": 6813 + }, + { + "epoch": 1.5711321189762508, + "grad_norm": 1.5100461636177536, + "learning_rate": 2.4133000598971477e-07, + "loss": 0.429337739944458, + "step": 6814 + }, + { + "epoch": 1.5713626931058335, + "grad_norm": 1.657098818036463, + "learning_rate": 2.4108167891859065e-07, + "loss": 0.35861289501190186, + "step": 6815 + }, + { + "epoch": 1.571593267235416, + "grad_norm": 1.7985300174152374, + "learning_rate": 2.4083346216393564e-07, + "loss": 0.43728363513946533, + "step": 6816 + }, + { + "epoch": 1.5718238413649988, + "grad_norm": 1.6655671112295587, + "learning_rate": 2.405853557618308e-07, + "loss": 0.44594380259513855, + "step": 6817 + }, + { + "epoch": 1.5720544154945815, + "grad_norm": 1.430621764890317, + "learning_rate": 2.403373597483414e-07, + "loss": 0.36871337890625, + "step": 6818 + }, + { + "epoch": 1.5722849896241642, + "grad_norm": 1.4284927159530842, + "learning_rate": 2.400894741595152e-07, + "loss": 0.3769477307796478, + "step": 6819 + }, + { + "epoch": 1.572515563753747, + "grad_norm": 1.6803573488891066, + "learning_rate": 2.3984169903138583e-07, + "loss": 0.503145694732666, + "step": 6820 + }, + { + "epoch": 1.5727461378833296, + "grad_norm": 1.552866324250783, + "learning_rate": 2.395940343999691e-07, + "loss": 0.4082655906677246, + "step": 6821 + }, + { + "epoch": 1.5729767120129121, + "grad_norm": 1.4215190376699491, + "learning_rate": 2.3934648030126625e-07, + "loss": 0.4106418192386627, + "step": 6822 + }, + { + "epoch": 1.5732072861424948, + "grad_norm": 1.663561714777188, + "learning_rate": 2.390990367712613e-07, + "loss": 0.45363783836364746, + "step": 6823 + }, + { + "epoch": 1.5734378602720773, + "grad_norm": 1.4253235303875884, + "learning_rate": 2.388517038459227e-07, + "loss": 0.4416825473308563, + "step": 6824 + }, + { + "epoch": 1.57366843440166, + "grad_norm": 1.5727508875619094, + "learning_rate": 2.3860448156120304e-07, + "loss": 0.5106863379478455, + "step": 6825 + }, + { + "epoch": 1.5738990085312428, + "grad_norm": 1.431151413456896, + "learning_rate": 2.3835736995303879e-07, + "loss": 0.4618466794490814, + "step": 6826 + }, + { + "epoch": 1.5741295826608255, + "grad_norm": 1.6611294255159201, + "learning_rate": 2.381103690573495e-07, + "loss": 0.414678692817688, + "step": 6827 + }, + { + "epoch": 1.5743601567904082, + "grad_norm": 1.3583782134926532, + "learning_rate": 2.3786347891004e-07, + "loss": 0.39774662256240845, + "step": 6828 + }, + { + "epoch": 1.574590730919991, + "grad_norm": 1.3689702631653482, + "learning_rate": 2.376166995469977e-07, + "loss": 0.4513537287712097, + "step": 6829 + }, + { + "epoch": 1.5748213050495734, + "grad_norm": 1.5433747348092586, + "learning_rate": 2.3737003100409447e-07, + "loss": 0.44062697887420654, + "step": 6830 + }, + { + "epoch": 1.5750518791791561, + "grad_norm": 1.6549219639884087, + "learning_rate": 2.3712347331718617e-07, + "loss": 0.42305582761764526, + "step": 6831 + }, + { + "epoch": 1.5752824533087386, + "grad_norm": 1.628456252942963, + "learning_rate": 2.3687702652211262e-07, + "loss": 0.46731626987457275, + "step": 6832 + }, + { + "epoch": 1.5755130274383213, + "grad_norm": 1.569042371408869, + "learning_rate": 2.3663069065469753e-07, + "loss": 0.4926149845123291, + "step": 6833 + }, + { + "epoch": 1.575743601567904, + "grad_norm": 1.8433451746214373, + "learning_rate": 2.3638446575074777e-07, + "loss": 0.49002933502197266, + "step": 6834 + }, + { + "epoch": 1.5759741756974868, + "grad_norm": 1.9286763636552064, + "learning_rate": 2.3613835184605523e-07, + "loss": 0.47110694646835327, + "step": 6835 + }, + { + "epoch": 1.5762047498270695, + "grad_norm": 1.7003781450027053, + "learning_rate": 2.3589234897639444e-07, + "loss": 0.4257816672325134, + "step": 6836 + }, + { + "epoch": 1.5764353239566522, + "grad_norm": 1.4515610553726317, + "learning_rate": 2.3564645717752506e-07, + "loss": 0.4031051695346832, + "step": 6837 + }, + { + "epoch": 1.5766658980862347, + "grad_norm": 1.7208107126331553, + "learning_rate": 2.3540067648518957e-07, + "loss": 0.5077808499336243, + "step": 6838 + }, + { + "epoch": 1.5768964722158174, + "grad_norm": 1.4184547433402042, + "learning_rate": 2.3515500693511449e-07, + "loss": 0.3877585232257843, + "step": 6839 + }, + { + "epoch": 1.5771270463454, + "grad_norm": 1.6806127701824354, + "learning_rate": 2.3490944856301064e-07, + "loss": 0.4356805682182312, + "step": 6840 + }, + { + "epoch": 1.5773576204749826, + "grad_norm": 1.5102184976880006, + "learning_rate": 2.346640014045723e-07, + "loss": 0.46679362654685974, + "step": 6841 + }, + { + "epoch": 1.5775881946045653, + "grad_norm": 1.4361079018846885, + "learning_rate": 2.3441866549547817e-07, + "loss": 0.4837648272514343, + "step": 6842 + }, + { + "epoch": 1.577818768734148, + "grad_norm": 1.5395603940472438, + "learning_rate": 2.341734408713897e-07, + "loss": 0.42723533511161804, + "step": 6843 + }, + { + "epoch": 1.5780493428637308, + "grad_norm": 1.7296429757269751, + "learning_rate": 2.3392832756795322e-07, + "loss": 0.3680928647518158, + "step": 6844 + }, + { + "epoch": 1.5782799169933135, + "grad_norm": 1.3398871717628533, + "learning_rate": 2.3368332562079797e-07, + "loss": 0.434980571269989, + "step": 6845 + }, + { + "epoch": 1.578510491122896, + "grad_norm": 1.5976407072584213, + "learning_rate": 2.3343843506553805e-07, + "loss": 0.45552271604537964, + "step": 6846 + }, + { + "epoch": 1.5787410652524787, + "grad_norm": 1.5496903398620734, + "learning_rate": 2.331936559377702e-07, + "loss": 0.4292616844177246, + "step": 6847 + }, + { + "epoch": 1.5789716393820612, + "grad_norm": 1.6907239258434268, + "learning_rate": 2.3294898827307573e-07, + "loss": 0.5025339126586914, + "step": 6848 + }, + { + "epoch": 1.579202213511644, + "grad_norm": 1.434142265629081, + "learning_rate": 2.3270443210701996e-07, + "loss": 0.47567370533943176, + "step": 6849 + }, + { + "epoch": 1.5794327876412266, + "grad_norm": 1.9792768486961878, + "learning_rate": 2.3245998747515095e-07, + "loss": 0.5435467958450317, + "step": 6850 + }, + { + "epoch": 1.5796633617708093, + "grad_norm": 1.2141081677893035, + "learning_rate": 2.3221565441300194e-07, + "loss": 0.4409145712852478, + "step": 6851 + }, + { + "epoch": 1.579893935900392, + "grad_norm": 1.3643265195449554, + "learning_rate": 2.3197143295608845e-07, + "loss": 0.40482181310653687, + "step": 6852 + }, + { + "epoch": 1.5801245100299748, + "grad_norm": 1.8983898955785605, + "learning_rate": 2.317273231399113e-07, + "loss": 0.40231794118881226, + "step": 6853 + }, + { + "epoch": 1.5803550841595573, + "grad_norm": 1.3860542767537625, + "learning_rate": 2.314833249999535e-07, + "loss": 0.43245166540145874, + "step": 6854 + }, + { + "epoch": 1.58058565828914, + "grad_norm": 1.5386782332278715, + "learning_rate": 2.3123943857168315e-07, + "loss": 0.40237659215927124, + "step": 6855 + }, + { + "epoch": 1.5808162324187225, + "grad_norm": 1.7869361833965254, + "learning_rate": 2.309956638905517e-07, + "loss": 0.48900318145751953, + "step": 6856 + }, + { + "epoch": 1.5810468065483052, + "grad_norm": 1.482622476685355, + "learning_rate": 2.3075200099199422e-07, + "loss": 0.42364567518234253, + "step": 6857 + }, + { + "epoch": 1.581277380677888, + "grad_norm": 1.6159587255295897, + "learning_rate": 2.3050844991142958e-07, + "loss": 0.4658735990524292, + "step": 6858 + }, + { + "epoch": 1.5815079548074706, + "grad_norm": 1.4775627716781476, + "learning_rate": 2.3026501068426007e-07, + "loss": 0.42268991470336914, + "step": 6859 + }, + { + "epoch": 1.5817385289370534, + "grad_norm": 1.4348002511722773, + "learning_rate": 2.3002168334587247e-07, + "loss": 0.44876742362976074, + "step": 6860 + }, + { + "epoch": 1.581969103066636, + "grad_norm": 1.5171591869453156, + "learning_rate": 2.2977846793163646e-07, + "loss": 0.42540132999420166, + "step": 6861 + }, + { + "epoch": 1.5821996771962186, + "grad_norm": 1.4296859038074168, + "learning_rate": 2.2953536447690636e-07, + "loss": 0.48768138885498047, + "step": 6862 + }, + { + "epoch": 1.5824302513258013, + "grad_norm": 1.5445046236967466, + "learning_rate": 2.292923730170192e-07, + "loss": 0.42905953526496887, + "step": 6863 + }, + { + "epoch": 1.5826608254553838, + "grad_norm": 1.4472242985886439, + "learning_rate": 2.2904949358729653e-07, + "loss": 0.4103778004646301, + "step": 6864 + }, + { + "epoch": 1.5828913995849665, + "grad_norm": 1.5180272333652802, + "learning_rate": 2.2880672622304331e-07, + "loss": 0.39303290843963623, + "step": 6865 + }, + { + "epoch": 1.5831219737145492, + "grad_norm": 1.4702183686842207, + "learning_rate": 2.2856407095954843e-07, + "loss": 0.5087130069732666, + "step": 6866 + }, + { + "epoch": 1.583352547844132, + "grad_norm": 1.5644640444387603, + "learning_rate": 2.283215278320839e-07, + "loss": 0.33117055892944336, + "step": 6867 + }, + { + "epoch": 1.5835831219737146, + "grad_norm": 1.7090383225203818, + "learning_rate": 2.280790968759063e-07, + "loss": 0.41781488060951233, + "step": 6868 + }, + { + "epoch": 1.5838136961032974, + "grad_norm": 1.4121975925065597, + "learning_rate": 2.2783677812625523e-07, + "loss": 0.5104382634162903, + "step": 6869 + }, + { + "epoch": 1.5840442702328799, + "grad_norm": 1.5723614045021508, + "learning_rate": 2.2759457161835372e-07, + "loss": 0.3987969160079956, + "step": 6870 + }, + { + "epoch": 1.5842748443624626, + "grad_norm": 1.705658009146651, + "learning_rate": 2.2735247738740936e-07, + "loss": 0.4723064601421356, + "step": 6871 + }, + { + "epoch": 1.584505418492045, + "grad_norm": 1.707721278006975, + "learning_rate": 2.2711049546861293e-07, + "loss": 0.3942141830921173, + "step": 6872 + }, + { + "epoch": 1.5847359926216278, + "grad_norm": 1.5657011191058785, + "learning_rate": 2.268686258971393e-07, + "loss": 0.38271787762641907, + "step": 6873 + }, + { + "epoch": 1.5849665667512105, + "grad_norm": 1.3977071321322045, + "learning_rate": 2.2662686870814607e-07, + "loss": 0.4944665729999542, + "step": 6874 + }, + { + "epoch": 1.5851971408807932, + "grad_norm": 1.7910306093530013, + "learning_rate": 2.2638522393677562e-07, + "loss": 0.46695005893707275, + "step": 6875 + }, + { + "epoch": 1.585427715010376, + "grad_norm": 1.7074115790208728, + "learning_rate": 2.2614369161815295e-07, + "loss": 0.4620080888271332, + "step": 6876 + }, + { + "epoch": 1.5856582891399587, + "grad_norm": 1.6877087434684872, + "learning_rate": 2.2590227178738776e-07, + "loss": 0.5650279521942139, + "step": 6877 + }, + { + "epoch": 1.5858888632695411, + "grad_norm": 1.3471081039016284, + "learning_rate": 2.2566096447957227e-07, + "loss": 0.3556622564792633, + "step": 6878 + }, + { + "epoch": 1.5861194373991239, + "grad_norm": 1.3889188451731431, + "learning_rate": 2.254197697297834e-07, + "loss": 0.4978718161582947, + "step": 6879 + }, + { + "epoch": 1.5863500115287064, + "grad_norm": 1.375490517958548, + "learning_rate": 2.2517868757308146e-07, + "loss": 0.4759003520011902, + "step": 6880 + }, + { + "epoch": 1.586580585658289, + "grad_norm": 1.579013983466932, + "learning_rate": 2.2493771804450945e-07, + "loss": 0.5078370571136475, + "step": 6881 + }, + { + "epoch": 1.5868111597878718, + "grad_norm": 1.3607586792133322, + "learning_rate": 2.2469686117909547e-07, + "loss": 0.4188239276409149, + "step": 6882 + }, + { + "epoch": 1.5870417339174545, + "grad_norm": 1.3488510335317552, + "learning_rate": 2.2445611701184997e-07, + "loss": 0.4075232744216919, + "step": 6883 + }, + { + "epoch": 1.5872723080470372, + "grad_norm": 1.5004910712339554, + "learning_rate": 2.2421548557776794e-07, + "loss": 0.3643442988395691, + "step": 6884 + }, + { + "epoch": 1.58750288217662, + "grad_norm": 1.4193604715362476, + "learning_rate": 2.2397496691182716e-07, + "loss": 0.38767147064208984, + "step": 6885 + }, + { + "epoch": 1.5877334563062024, + "grad_norm": 1.6373352976605955, + "learning_rate": 2.2373456104899e-07, + "loss": 0.4874354600906372, + "step": 6886 + }, + { + "epoch": 1.5879640304357852, + "grad_norm": 1.5573200679287742, + "learning_rate": 2.2349426802420134e-07, + "loss": 0.46412762999534607, + "step": 6887 + }, + { + "epoch": 1.5881946045653677, + "grad_norm": 1.3720639419051985, + "learning_rate": 2.2325408787239054e-07, + "loss": 0.4299372434616089, + "step": 6888 + }, + { + "epoch": 1.5884251786949504, + "grad_norm": 1.6309152140238423, + "learning_rate": 2.230140206284703e-07, + "loss": 0.3962220549583435, + "step": 6889 + }, + { + "epoch": 1.588655752824533, + "grad_norm": 1.617512400235996, + "learning_rate": 2.2277406632733653e-07, + "loss": 0.5048998594284058, + "step": 6890 + }, + { + "epoch": 1.5888863269541158, + "grad_norm": 2.0443646004817024, + "learning_rate": 2.2253422500386932e-07, + "loss": 0.35463857650756836, + "step": 6891 + }, + { + "epoch": 1.5891169010836985, + "grad_norm": 1.5696832175175914, + "learning_rate": 2.2229449669293165e-07, + "loss": 0.3969672620296478, + "step": 6892 + }, + { + "epoch": 1.5893474752132812, + "grad_norm": 1.5166803382402412, + "learning_rate": 2.22054881429371e-07, + "loss": 0.36300575733184814, + "step": 6893 + }, + { + "epoch": 1.5895780493428637, + "grad_norm": 1.41057555150973, + "learning_rate": 2.2181537924801729e-07, + "loss": 0.45796507596969604, + "step": 6894 + }, + { + "epoch": 1.5898086234724462, + "grad_norm": 1.556089643432737, + "learning_rate": 2.2157599018368488e-07, + "loss": 0.42725688219070435, + "step": 6895 + }, + { + "epoch": 1.590039197602029, + "grad_norm": 1.8436048050065164, + "learning_rate": 2.213367142711714e-07, + "loss": 0.4959419369697571, + "step": 6896 + }, + { + "epoch": 1.5902697717316117, + "grad_norm": 1.6607109480306586, + "learning_rate": 2.2109755154525821e-07, + "loss": 0.3707115948200226, + "step": 6897 + }, + { + "epoch": 1.5905003458611944, + "grad_norm": 1.4025605906760028, + "learning_rate": 2.2085850204070989e-07, + "loss": 0.3647577166557312, + "step": 6898 + }, + { + "epoch": 1.590730919990777, + "grad_norm": 1.505368584241417, + "learning_rate": 2.2061956579227447e-07, + "loss": 0.42227697372436523, + "step": 6899 + }, + { + "epoch": 1.5909614941203598, + "grad_norm": 1.508703122498175, + "learning_rate": 2.2038074283468412e-07, + "loss": 0.41736292839050293, + "step": 6900 + }, + { + "epoch": 1.5911920682499423, + "grad_norm": 1.6418039973045746, + "learning_rate": 2.201420332026538e-07, + "loss": 0.46005967259407043, + "step": 6901 + }, + { + "epoch": 1.591422642379525, + "grad_norm": 1.4328523009517202, + "learning_rate": 2.1990343693088243e-07, + "loss": 0.3572643995285034, + "step": 6902 + }, + { + "epoch": 1.5916532165091075, + "grad_norm": 1.744760153255399, + "learning_rate": 2.196649540540527e-07, + "loss": 0.5321012735366821, + "step": 6903 + }, + { + "epoch": 1.5918837906386902, + "grad_norm": 1.5415731453823578, + "learning_rate": 2.194265846068305e-07, + "loss": 0.4913836419582367, + "step": 6904 + }, + { + "epoch": 1.592114364768273, + "grad_norm": 1.7016363411577065, + "learning_rate": 2.1918832862386493e-07, + "loss": 0.37674903869628906, + "step": 6905 + }, + { + "epoch": 1.5923449388978557, + "grad_norm": 1.5772289300833298, + "learning_rate": 2.1895018613978934e-07, + "loss": 0.4385930001735687, + "step": 6906 + }, + { + "epoch": 1.5925755130274384, + "grad_norm": 2.224743671968565, + "learning_rate": 2.1871215718921964e-07, + "loss": 0.5219674706459045, + "step": 6907 + }, + { + "epoch": 1.592806087157021, + "grad_norm": 1.5215408344839954, + "learning_rate": 2.1847424180675622e-07, + "loss": 0.4241113066673279, + "step": 6908 + }, + { + "epoch": 1.5930366612866036, + "grad_norm": 1.4296843598144484, + "learning_rate": 2.1823644002698237e-07, + "loss": 0.4008786082267761, + "step": 6909 + }, + { + "epoch": 1.5932672354161863, + "grad_norm": 1.5021365471039205, + "learning_rate": 2.179987518844645e-07, + "loss": 0.3333933651447296, + "step": 6910 + }, + { + "epoch": 1.5934978095457688, + "grad_norm": 1.652596855301234, + "learning_rate": 2.1776117741375343e-07, + "loss": 0.48857730627059937, + "step": 6911 + }, + { + "epoch": 1.5937283836753515, + "grad_norm": 1.4724322236306013, + "learning_rate": 2.1752371664938306e-07, + "loss": 0.37393617630004883, + "step": 6912 + }, + { + "epoch": 1.5939589578049342, + "grad_norm": 1.4102085657254086, + "learning_rate": 2.172863696258709e-07, + "loss": 0.5365080833435059, + "step": 6913 + }, + { + "epoch": 1.594189531934517, + "grad_norm": 1.7683912421422305, + "learning_rate": 2.1704913637771705e-07, + "loss": 0.49318936467170715, + "step": 6914 + }, + { + "epoch": 1.5944201060640997, + "grad_norm": 1.8200372673393599, + "learning_rate": 2.1681201693940666e-07, + "loss": 0.37682920694351196, + "step": 6915 + }, + { + "epoch": 1.5946506801936824, + "grad_norm": 1.4120260343966702, + "learning_rate": 2.1657501134540657e-07, + "loss": 0.4894877076148987, + "step": 6916 + }, + { + "epoch": 1.5948812543232649, + "grad_norm": 1.5895963005275906, + "learning_rate": 2.1633811963016869e-07, + "loss": 0.4200783967971802, + "step": 6917 + }, + { + "epoch": 1.5951118284528476, + "grad_norm": 1.7361608161591027, + "learning_rate": 2.1610134182812702e-07, + "loss": 0.3953052759170532, + "step": 6918 + }, + { + "epoch": 1.59534240258243, + "grad_norm": 1.4727518091374385, + "learning_rate": 2.158646779736999e-07, + "loss": 0.4006558656692505, + "step": 6919 + }, + { + "epoch": 1.5955729767120128, + "grad_norm": 1.7355475804217702, + "learning_rate": 2.1562812810128906e-07, + "loss": 0.3749210238456726, + "step": 6920 + }, + { + "epoch": 1.5958035508415955, + "grad_norm": 1.5378158592599445, + "learning_rate": 2.1539169224527887e-07, + "loss": 0.4688538610935211, + "step": 6921 + }, + { + "epoch": 1.5960341249711782, + "grad_norm": 1.590308500795848, + "learning_rate": 2.151553704400383e-07, + "loss": 0.4483727216720581, + "step": 6922 + }, + { + "epoch": 1.596264699100761, + "grad_norm": 1.589431373760787, + "learning_rate": 2.149191627199185e-07, + "loss": 0.5118253827095032, + "step": 6923 + }, + { + "epoch": 1.5964952732303437, + "grad_norm": 1.644731800905039, + "learning_rate": 2.1468306911925525e-07, + "loss": 0.43641170859336853, + "step": 6924 + }, + { + "epoch": 1.5967258473599262, + "grad_norm": 1.4755114053374785, + "learning_rate": 2.1444708967236657e-07, + "loss": 0.38253384828567505, + "step": 6925 + }, + { + "epoch": 1.596956421489509, + "grad_norm": 1.5638213373412855, + "learning_rate": 2.1421122441355476e-07, + "loss": 0.43674635887145996, + "step": 6926 + }, + { + "epoch": 1.5971869956190914, + "grad_norm": 1.3940207891491625, + "learning_rate": 2.1397547337710519e-07, + "loss": 0.37392908334732056, + "step": 6927 + }, + { + "epoch": 1.597417569748674, + "grad_norm": 1.5097907813025324, + "learning_rate": 2.13739836597287e-07, + "loss": 0.4531250298023224, + "step": 6928 + }, + { + "epoch": 1.5976481438782568, + "grad_norm": 1.3308296891253455, + "learning_rate": 2.13504314108352e-07, + "loss": 0.38579899072647095, + "step": 6929 + }, + { + "epoch": 1.5978787180078395, + "grad_norm": 1.8618083111554995, + "learning_rate": 2.1326890594453563e-07, + "loss": 0.5215288400650024, + "step": 6930 + }, + { + "epoch": 1.5981092921374223, + "grad_norm": 1.6019249166669218, + "learning_rate": 2.130336121400572e-07, + "loss": 0.4396743178367615, + "step": 6931 + }, + { + "epoch": 1.598339866267005, + "grad_norm": 1.5371889029106374, + "learning_rate": 2.127984327291188e-07, + "loss": 0.5068432688713074, + "step": 6932 + }, + { + "epoch": 1.5985704403965875, + "grad_norm": 1.7855756215277538, + "learning_rate": 2.1256336774590643e-07, + "loss": 0.48809194564819336, + "step": 6933 + }, + { + "epoch": 1.5988010145261702, + "grad_norm": 1.4166815561679078, + "learning_rate": 2.123284172245885e-07, + "loss": 0.4191613793373108, + "step": 6934 + }, + { + "epoch": 1.5990315886557527, + "grad_norm": 1.5763678308245206, + "learning_rate": 2.1209358119931843e-07, + "loss": 0.41901010274887085, + "step": 6935 + }, + { + "epoch": 1.5992621627853354, + "grad_norm": 1.8296822391624505, + "learning_rate": 2.1185885970423133e-07, + "loss": 0.5046913623809814, + "step": 6936 + }, + { + "epoch": 1.5994927369149181, + "grad_norm": 2.1559492699976492, + "learning_rate": 2.1162425277344675e-07, + "loss": 0.5113730430603027, + "step": 6937 + }, + { + "epoch": 1.5997233110445008, + "grad_norm": 1.520077424866564, + "learning_rate": 2.1138976044106672e-07, + "loss": 0.34129637479782104, + "step": 6938 + }, + { + "epoch": 1.5999538851740835, + "grad_norm": 1.5890047902961466, + "learning_rate": 2.1115538274117762e-07, + "loss": 0.4492289423942566, + "step": 6939 + }, + { + "epoch": 1.6001844593036663, + "grad_norm": 1.5532375131614289, + "learning_rate": 2.1092111970784833e-07, + "loss": 0.41002708673477173, + "step": 6940 + }, + { + "epoch": 1.6004150334332488, + "grad_norm": 1.887817008406582, + "learning_rate": 2.1068697137513113e-07, + "loss": 0.5444740056991577, + "step": 6941 + }, + { + "epoch": 1.6006456075628315, + "grad_norm": 1.518981510824895, + "learning_rate": 2.1045293777706196e-07, + "loss": 0.3489699959754944, + "step": 6942 + }, + { + "epoch": 1.600876181692414, + "grad_norm": 1.5115486172446684, + "learning_rate": 2.1021901894766025e-07, + "loss": 0.41807419061660767, + "step": 6943 + }, + { + "epoch": 1.6011067558219967, + "grad_norm": 1.7376028221450257, + "learning_rate": 2.0998521492092857e-07, + "loss": 0.41074657440185547, + "step": 6944 + }, + { + "epoch": 1.6013373299515794, + "grad_norm": 1.370751011576157, + "learning_rate": 2.097515257308521e-07, + "loss": 0.4085312485694885, + "step": 6945 + }, + { + "epoch": 1.6015679040811621, + "grad_norm": 1.6632563260665783, + "learning_rate": 2.095179514114006e-07, + "loss": 0.42699170112609863, + "step": 6946 + }, + { + "epoch": 1.6017984782107448, + "grad_norm": 1.6347540938108835, + "learning_rate": 2.0928449199652597e-07, + "loss": 0.40041583776474, + "step": 6947 + }, + { + "epoch": 1.6020290523403276, + "grad_norm": 1.385214375087801, + "learning_rate": 2.090511475201643e-07, + "loss": 0.47465208172798157, + "step": 6948 + }, + { + "epoch": 1.60225962646991, + "grad_norm": 1.5233208405026366, + "learning_rate": 2.0881791801623405e-07, + "loss": 0.4338058829307556, + "step": 6949 + }, + { + "epoch": 1.6024902005994928, + "grad_norm": 1.857588116409586, + "learning_rate": 2.0858480351863794e-07, + "loss": 0.5398772954940796, + "step": 6950 + }, + { + "epoch": 1.6027207747290753, + "grad_norm": 1.41461865858101, + "learning_rate": 2.0835180406126151e-07, + "loss": 0.40750259160995483, + "step": 6951 + }, + { + "epoch": 1.602951348858658, + "grad_norm": 1.6330208123854022, + "learning_rate": 2.0811891967797336e-07, + "loss": 0.4365716278553009, + "step": 6952 + }, + { + "epoch": 1.6031819229882407, + "grad_norm": 1.395812913626374, + "learning_rate": 2.078861504026258e-07, + "loss": 0.41537174582481384, + "step": 6953 + }, + { + "epoch": 1.6034124971178234, + "grad_norm": 1.331855885968294, + "learning_rate": 2.0765349626905394e-07, + "loss": 0.3687853217124939, + "step": 6954 + }, + { + "epoch": 1.6036430712474061, + "grad_norm": 1.4291699726024594, + "learning_rate": 2.074209573110769e-07, + "loss": 0.48866790533065796, + "step": 6955 + }, + { + "epoch": 1.6038736453769888, + "grad_norm": 1.7541297686576787, + "learning_rate": 2.0718853356249588e-07, + "loss": 0.4618760347366333, + "step": 6956 + }, + { + "epoch": 1.6041042195065713, + "grad_norm": 1.820272898606224, + "learning_rate": 2.0695622505709654e-07, + "loss": 0.365873247385025, + "step": 6957 + }, + { + "epoch": 1.604334793636154, + "grad_norm": 1.7127779412462347, + "learning_rate": 2.0672403182864706e-07, + "loss": 0.4346495270729065, + "step": 6958 + }, + { + "epoch": 1.6045653677657365, + "grad_norm": 1.4385774019168192, + "learning_rate": 2.0649195391089935e-07, + "loss": 0.3995724618434906, + "step": 6959 + }, + { + "epoch": 1.6047959418953193, + "grad_norm": 1.890499669463449, + "learning_rate": 2.062599913375882e-07, + "loss": 0.4628515839576721, + "step": 6960 + }, + { + "epoch": 1.605026516024902, + "grad_norm": 1.8491035226730044, + "learning_rate": 2.060281441424314e-07, + "loss": 0.39776262640953064, + "step": 6961 + }, + { + "epoch": 1.6052570901544847, + "grad_norm": 1.6838333142700899, + "learning_rate": 2.057964123591307e-07, + "loss": 0.4622994065284729, + "step": 6962 + }, + { + "epoch": 1.6054876642840674, + "grad_norm": 1.3806987670969462, + "learning_rate": 2.0556479602137033e-07, + "loss": 0.4028933048248291, + "step": 6963 + }, + { + "epoch": 1.6057182384136501, + "grad_norm": 1.592137730506949, + "learning_rate": 2.0533329516281838e-07, + "loss": 0.46639660000801086, + "step": 6964 + }, + { + "epoch": 1.6059488125432326, + "grad_norm": 1.3243378898371028, + "learning_rate": 2.0510190981712537e-07, + "loss": 0.4063863158226013, + "step": 6965 + }, + { + "epoch": 1.6061793866728153, + "grad_norm": 1.6927530193908227, + "learning_rate": 2.0487064001792586e-07, + "loss": 0.471376895904541, + "step": 6966 + }, + { + "epoch": 1.6064099608023978, + "grad_norm": 1.5262354616100662, + "learning_rate": 2.0463948579883727e-07, + "loss": 0.5094102025032043, + "step": 6967 + }, + { + "epoch": 1.6066405349319806, + "grad_norm": 1.613731344454896, + "learning_rate": 2.0440844719346039e-07, + "loss": 0.3922441005706787, + "step": 6968 + }, + { + "epoch": 1.6068711090615633, + "grad_norm": 1.7524315605420397, + "learning_rate": 2.0417752423537882e-07, + "loss": 0.47777149081230164, + "step": 6969 + }, + { + "epoch": 1.607101683191146, + "grad_norm": 2.2487851564601065, + "learning_rate": 2.0394671695815924e-07, + "loss": 0.5780138969421387, + "step": 6970 + }, + { + "epoch": 1.6073322573207287, + "grad_norm": 1.6028588432287403, + "learning_rate": 2.0371602539535237e-07, + "loss": 0.43968862295150757, + "step": 6971 + }, + { + "epoch": 1.6075628314503114, + "grad_norm": 1.877374036184133, + "learning_rate": 2.0348544958049096e-07, + "loss": 0.5204722881317139, + "step": 6972 + }, + { + "epoch": 1.607793405579894, + "grad_norm": 1.5207193577135807, + "learning_rate": 2.0325498954709198e-07, + "loss": 0.3944805860519409, + "step": 6973 + }, + { + "epoch": 1.6080239797094766, + "grad_norm": 1.454235622222141, + "learning_rate": 2.0302464532865505e-07, + "loss": 0.42686349153518677, + "step": 6974 + }, + { + "epoch": 1.6082545538390591, + "grad_norm": 1.5958289830519565, + "learning_rate": 2.027944169586633e-07, + "loss": 0.3860762119293213, + "step": 6975 + }, + { + "epoch": 1.6084851279686418, + "grad_norm": 1.880005605643703, + "learning_rate": 2.0256430447058215e-07, + "loss": 0.5570458769798279, + "step": 6976 + }, + { + "epoch": 1.6087157020982246, + "grad_norm": 1.8351241687154358, + "learning_rate": 2.0233430789786132e-07, + "loss": 0.4556728005409241, + "step": 6977 + }, + { + "epoch": 1.6089462762278073, + "grad_norm": 1.4746534507162423, + "learning_rate": 2.0210442727393285e-07, + "loss": 0.48365700244903564, + "step": 6978 + }, + { + "epoch": 1.60917685035739, + "grad_norm": 1.7835628524046172, + "learning_rate": 2.018746626322124e-07, + "loss": 0.4456971287727356, + "step": 6979 + }, + { + "epoch": 1.6094074244869727, + "grad_norm": 1.6700237073697568, + "learning_rate": 2.0164501400609835e-07, + "loss": 0.41877123713493347, + "step": 6980 + }, + { + "epoch": 1.6096379986165552, + "grad_norm": 1.3803715462197303, + "learning_rate": 2.0141548142897246e-07, + "loss": 0.4073547124862671, + "step": 6981 + }, + { + "epoch": 1.609868572746138, + "grad_norm": 1.5181775501419725, + "learning_rate": 2.0118606493420021e-07, + "loss": 0.4987693727016449, + "step": 6982 + }, + { + "epoch": 1.6100991468757204, + "grad_norm": 1.603543806365415, + "learning_rate": 2.0095676455512878e-07, + "loss": 0.4391751289367676, + "step": 6983 + }, + { + "epoch": 1.6103297210053031, + "grad_norm": 1.4062982467603231, + "learning_rate": 2.0072758032508996e-07, + "loss": 0.409262478351593, + "step": 6984 + }, + { + "epoch": 1.6105602951348859, + "grad_norm": 1.353394057864669, + "learning_rate": 2.0049851227739744e-07, + "loss": 0.38653457164764404, + "step": 6985 + }, + { + "epoch": 1.6107908692644686, + "grad_norm": 1.9189325963312815, + "learning_rate": 2.0026956044534914e-07, + "loss": 0.4824348986148834, + "step": 6986 + }, + { + "epoch": 1.6110214433940513, + "grad_norm": 1.7037748706735498, + "learning_rate": 2.00040724862225e-07, + "loss": 0.45774850249290466, + "step": 6987 + }, + { + "epoch": 1.611252017523634, + "grad_norm": 1.5419477618151842, + "learning_rate": 1.9981200556128906e-07, + "loss": 0.45437830686569214, + "step": 6988 + }, + { + "epoch": 1.6114825916532165, + "grad_norm": 1.4581568342693196, + "learning_rate": 1.9958340257578753e-07, + "loss": 0.4563155770301819, + "step": 6989 + }, + { + "epoch": 1.6117131657827992, + "grad_norm": 1.7363246075229848, + "learning_rate": 1.9935491593895048e-07, + "loss": 0.5786794424057007, + "step": 6990 + }, + { + "epoch": 1.6119437399123817, + "grad_norm": 1.6120161181322603, + "learning_rate": 1.991265456839909e-07, + "loss": 0.5290218591690063, + "step": 6991 + }, + { + "epoch": 1.6121743140419644, + "grad_norm": 1.607774677113548, + "learning_rate": 1.9889829184410434e-07, + "loss": 0.3456650376319885, + "step": 6992 + }, + { + "epoch": 1.6124048881715471, + "grad_norm": 1.414142582496391, + "learning_rate": 1.9867015445247015e-07, + "loss": 0.40869832038879395, + "step": 6993 + }, + { + "epoch": 1.6126354623011299, + "grad_norm": 2.3563881452147992, + "learning_rate": 1.9844213354225004e-07, + "loss": 0.49926644563674927, + "step": 6994 + }, + { + "epoch": 1.6128660364307126, + "grad_norm": 1.904270429684393, + "learning_rate": 1.9821422914658957e-07, + "loss": 0.4874018132686615, + "step": 6995 + }, + { + "epoch": 1.6130966105602953, + "grad_norm": 1.872252891476363, + "learning_rate": 1.9798644129861654e-07, + "loss": 0.4228810667991638, + "step": 6996 + }, + { + "epoch": 1.6133271846898778, + "grad_norm": 1.4437194678200662, + "learning_rate": 1.9775877003144237e-07, + "loss": 0.4309043884277344, + "step": 6997 + }, + { + "epoch": 1.6135577588194605, + "grad_norm": 1.6133739556944033, + "learning_rate": 1.9753121537816142e-07, + "loss": 0.3917756676673889, + "step": 6998 + }, + { + "epoch": 1.613788332949043, + "grad_norm": 1.492105866056543, + "learning_rate": 1.9730377737185145e-07, + "loss": 0.4074435830116272, + "step": 6999 + }, + { + "epoch": 1.6140189070786257, + "grad_norm": 1.7474889804918834, + "learning_rate": 1.9707645604557243e-07, + "loss": 0.4581322968006134, + "step": 7000 + }, + { + "epoch": 1.6142494812082084, + "grad_norm": 1.5240615238309698, + "learning_rate": 1.9684925143236776e-07, + "loss": 0.4479151666164398, + "step": 7001 + }, + { + "epoch": 1.6144800553377912, + "grad_norm": 1.4379805154063257, + "learning_rate": 1.966221635652643e-07, + "loss": 0.3378838300704956, + "step": 7002 + }, + { + "epoch": 1.6147106294673739, + "grad_norm": 1.6755517427089033, + "learning_rate": 1.96395192477271e-07, + "loss": 0.3383278250694275, + "step": 7003 + }, + { + "epoch": 1.6149412035969566, + "grad_norm": 1.5430108527415651, + "learning_rate": 1.9616833820138091e-07, + "loss": 0.5164717435836792, + "step": 7004 + }, + { + "epoch": 1.615171777726539, + "grad_norm": 1.6927378959186403, + "learning_rate": 1.9594160077056932e-07, + "loss": 0.4548792243003845, + "step": 7005 + }, + { + "epoch": 1.6154023518561216, + "grad_norm": 1.608730816141968, + "learning_rate": 1.9571498021779531e-07, + "loss": 0.41074928641319275, + "step": 7006 + }, + { + "epoch": 1.6156329259857043, + "grad_norm": 1.5384399915677613, + "learning_rate": 1.9548847657599976e-07, + "loss": 0.4156193137168884, + "step": 7007 + }, + { + "epoch": 1.615863500115287, + "grad_norm": 1.742725966102226, + "learning_rate": 1.95262089878108e-07, + "loss": 0.4602770209312439, + "step": 7008 + }, + { + "epoch": 1.6160940742448697, + "grad_norm": 1.5880816009582301, + "learning_rate": 1.9503582015702713e-07, + "loss": 0.4911346733570099, + "step": 7009 + }, + { + "epoch": 1.6163246483744524, + "grad_norm": 1.5007140709934312, + "learning_rate": 1.9480966744564764e-07, + "loss": 0.394087553024292, + "step": 7010 + }, + { + "epoch": 1.6165552225040352, + "grad_norm": 1.5836059389854649, + "learning_rate": 1.9458363177684367e-07, + "loss": 0.4845706820487976, + "step": 7011 + }, + { + "epoch": 1.6167857966336177, + "grad_norm": 1.7088454795128305, + "learning_rate": 1.9435771318347116e-07, + "loss": 0.49142736196517944, + "step": 7012 + }, + { + "epoch": 1.6170163707632004, + "grad_norm": 1.3798831769041013, + "learning_rate": 1.9413191169836996e-07, + "loss": 0.4408283829689026, + "step": 7013 + }, + { + "epoch": 1.6172469448927829, + "grad_norm": 1.6476950016993046, + "learning_rate": 1.9390622735436268e-07, + "loss": 0.6088640689849854, + "step": 7014 + }, + { + "epoch": 1.6174775190223656, + "grad_norm": 1.912745817268737, + "learning_rate": 1.93680660184255e-07, + "loss": 0.5208842158317566, + "step": 7015 + }, + { + "epoch": 1.6177080931519483, + "grad_norm": 1.7742607180865566, + "learning_rate": 1.9345521022083488e-07, + "loss": 0.5652821660041809, + "step": 7016 + }, + { + "epoch": 1.617938667281531, + "grad_norm": 1.5895189074949856, + "learning_rate": 1.9322987749687437e-07, + "loss": 0.4861832857131958, + "step": 7017 + }, + { + "epoch": 1.6181692414111137, + "grad_norm": 1.5693969535816144, + "learning_rate": 1.930046620451272e-07, + "loss": 0.39583832025527954, + "step": 7018 + }, + { + "epoch": 1.6183998155406965, + "grad_norm": 1.6283824576887038, + "learning_rate": 1.927795638983313e-07, + "loss": 0.5638653039932251, + "step": 7019 + }, + { + "epoch": 1.618630389670279, + "grad_norm": 1.7595661530223012, + "learning_rate": 1.9255458308920648e-07, + "loss": 0.4737275242805481, + "step": 7020 + }, + { + "epoch": 1.6188609637998617, + "grad_norm": 1.3807112997659796, + "learning_rate": 1.923297196504563e-07, + "loss": 0.4526802897453308, + "step": 7021 + }, + { + "epoch": 1.6190915379294442, + "grad_norm": 1.5519742811018764, + "learning_rate": 1.9210497361476708e-07, + "loss": 0.40800565481185913, + "step": 7022 + }, + { + "epoch": 1.6193221120590269, + "grad_norm": 1.3169867108502276, + "learning_rate": 1.9188034501480744e-07, + "loss": 0.39532414078712463, + "step": 7023 + }, + { + "epoch": 1.6195526861886096, + "grad_norm": 1.3982522966659368, + "learning_rate": 1.9165583388322993e-07, + "loss": 0.40236538648605347, + "step": 7024 + }, + { + "epoch": 1.6197832603181923, + "grad_norm": 1.4838960013292628, + "learning_rate": 1.91431440252669e-07, + "loss": 0.4421047866344452, + "step": 7025 + }, + { + "epoch": 1.620013834447775, + "grad_norm": 1.5688320926864374, + "learning_rate": 1.9120716415574322e-07, + "loss": 0.4149084687232971, + "step": 7026 + }, + { + "epoch": 1.6202444085773577, + "grad_norm": 1.8747733544619556, + "learning_rate": 1.9098300562505264e-07, + "loss": 0.4186127185821533, + "step": 7027 + }, + { + "epoch": 1.6204749827069402, + "grad_norm": 1.5276498671204974, + "learning_rate": 1.9075896469318132e-07, + "loss": 0.4649406671524048, + "step": 7028 + }, + { + "epoch": 1.620705556836523, + "grad_norm": 1.5217002126023946, + "learning_rate": 1.9053504139269593e-07, + "loss": 0.43240052461624146, + "step": 7029 + }, + { + "epoch": 1.6209361309661054, + "grad_norm": 1.7731525747902717, + "learning_rate": 1.9031123575614628e-07, + "loss": 0.4874862730503082, + "step": 7030 + }, + { + "epoch": 1.6211667050956882, + "grad_norm": 1.6133636879972175, + "learning_rate": 1.900875478160644e-07, + "loss": 0.3771815896034241, + "step": 7031 + }, + { + "epoch": 1.6213972792252709, + "grad_norm": 1.548316338784864, + "learning_rate": 1.898639776049653e-07, + "loss": 0.49882376194000244, + "step": 7032 + }, + { + "epoch": 1.6216278533548536, + "grad_norm": 1.5189621230999546, + "learning_rate": 1.896405251553479e-07, + "loss": 0.3813830614089966, + "step": 7033 + }, + { + "epoch": 1.6218584274844363, + "grad_norm": 1.588790821712345, + "learning_rate": 1.8941719049969272e-07, + "loss": 0.41883599758148193, + "step": 7034 + }, + { + "epoch": 1.622089001614019, + "grad_norm": 1.4271058877816405, + "learning_rate": 1.8919397367046409e-07, + "loss": 0.42194586992263794, + "step": 7035 + }, + { + "epoch": 1.6223195757436015, + "grad_norm": 1.5957469997065072, + "learning_rate": 1.889708747001084e-07, + "loss": 0.36967700719833374, + "step": 7036 + }, + { + "epoch": 1.6225501498731842, + "grad_norm": 1.4373460175753532, + "learning_rate": 1.887478936210556e-07, + "loss": 0.4493946433067322, + "step": 7037 + }, + { + "epoch": 1.6227807240027667, + "grad_norm": 1.6526676224310628, + "learning_rate": 1.8852503046571833e-07, + "loss": 0.42121458053588867, + "step": 7038 + }, + { + "epoch": 1.6230112981323495, + "grad_norm": 1.430632776113786, + "learning_rate": 1.8830228526649207e-07, + "loss": 0.4529588222503662, + "step": 7039 + }, + { + "epoch": 1.6232418722619322, + "grad_norm": 1.537552702708545, + "learning_rate": 1.88079658055755e-07, + "loss": 0.387844443321228, + "step": 7040 + }, + { + "epoch": 1.623472446391515, + "grad_norm": 1.4872655198554567, + "learning_rate": 1.8785714886586802e-07, + "loss": 0.49954158067703247, + "step": 7041 + }, + { + "epoch": 1.6237030205210976, + "grad_norm": 1.3845875929093436, + "learning_rate": 1.8763475772917548e-07, + "loss": 0.4016296863555908, + "step": 7042 + }, + { + "epoch": 1.6239335946506803, + "grad_norm": 1.5208389143205874, + "learning_rate": 1.8741248467800362e-07, + "loss": 0.358657568693161, + "step": 7043 + }, + { + "epoch": 1.6241641687802628, + "grad_norm": 1.471037478852436, + "learning_rate": 1.8719032974466264e-07, + "loss": 0.434385746717453, + "step": 7044 + }, + { + "epoch": 1.6243947429098455, + "grad_norm": 1.4705602216948914, + "learning_rate": 1.8696829296144466e-07, + "loss": 0.4658992886543274, + "step": 7045 + }, + { + "epoch": 1.624625317039428, + "grad_norm": 1.8724382429627917, + "learning_rate": 1.8674637436062545e-07, + "loss": 0.5438188910484314, + "step": 7046 + }, + { + "epoch": 1.6248558911690107, + "grad_norm": 1.9024479318941907, + "learning_rate": 1.8652457397446254e-07, + "loss": 0.47364577651023865, + "step": 7047 + }, + { + "epoch": 1.6250864652985935, + "grad_norm": 1.386287471529149, + "learning_rate": 1.8630289183519733e-07, + "loss": 0.3664509654045105, + "step": 7048 + }, + { + "epoch": 1.6253170394281762, + "grad_norm": 1.5676786934992741, + "learning_rate": 1.8608132797505317e-07, + "loss": 0.4226282835006714, + "step": 7049 + }, + { + "epoch": 1.625547613557759, + "grad_norm": 1.4581751590991685, + "learning_rate": 1.8585988242623706e-07, + "loss": 0.47477972507476807, + "step": 7050 + }, + { + "epoch": 1.6257781876873416, + "grad_norm": 2.082606809210874, + "learning_rate": 1.8563855522093786e-07, + "loss": 0.5372269749641418, + "step": 7051 + }, + { + "epoch": 1.626008761816924, + "grad_norm": 1.3565872618977541, + "learning_rate": 1.8541734639132788e-07, + "loss": 0.37929385900497437, + "step": 7052 + }, + { + "epoch": 1.6262393359465068, + "grad_norm": 1.5119164625864447, + "learning_rate": 1.8519625596956244e-07, + "loss": 0.4029538631439209, + "step": 7053 + }, + { + "epoch": 1.6264699100760893, + "grad_norm": 1.5739338248608081, + "learning_rate": 1.8497528398777874e-07, + "loss": 0.3932439982891083, + "step": 7054 + }, + { + "epoch": 1.626700484205672, + "grad_norm": 1.5806776566898322, + "learning_rate": 1.847544304780978e-07, + "loss": 0.45190152525901794, + "step": 7055 + }, + { + "epoch": 1.6269310583352548, + "grad_norm": 1.8629994959724827, + "learning_rate": 1.8453369547262242e-07, + "loss": 0.4852195382118225, + "step": 7056 + }, + { + "epoch": 1.6271616324648375, + "grad_norm": 1.608209634523461, + "learning_rate": 1.8431307900343918e-07, + "loss": 0.41676801443099976, + "step": 7057 + }, + { + "epoch": 1.6273922065944202, + "grad_norm": 1.388166685170728, + "learning_rate": 1.8409258110261626e-07, + "loss": 0.44374561309814453, + "step": 7058 + }, + { + "epoch": 1.627622780724003, + "grad_norm": 1.5975340281654677, + "learning_rate": 1.838722018022061e-07, + "loss": 0.4348192811012268, + "step": 7059 + }, + { + "epoch": 1.6278533548535854, + "grad_norm": 1.626194256762104, + "learning_rate": 1.836519411342422e-07, + "loss": 0.46572640538215637, + "step": 7060 + }, + { + "epoch": 1.6280839289831681, + "grad_norm": 1.4985871084379754, + "learning_rate": 1.8343179913074214e-07, + "loss": 0.4633631408214569, + "step": 7061 + }, + { + "epoch": 1.6283145031127506, + "grad_norm": 1.3260867645697678, + "learning_rate": 1.8321177582370605e-07, + "loss": 0.44420552253723145, + "step": 7062 + }, + { + "epoch": 1.6285450772423333, + "grad_norm": 1.8207040168707305, + "learning_rate": 1.8299187124511594e-07, + "loss": 0.5628370046615601, + "step": 7063 + }, + { + "epoch": 1.628775651371916, + "grad_norm": 1.7448936691285617, + "learning_rate": 1.8277208542693778e-07, + "loss": 0.5342314839363098, + "step": 7064 + }, + { + "epoch": 1.6290062255014988, + "grad_norm": 1.529076197622531, + "learning_rate": 1.82552418401119e-07, + "loss": 0.440934419631958, + "step": 7065 + }, + { + "epoch": 1.6292367996310815, + "grad_norm": 1.4532572456773438, + "learning_rate": 1.823328701995912e-07, + "loss": 0.45218637585639954, + "step": 7066 + }, + { + "epoch": 1.6294673737606642, + "grad_norm": 1.456173637640115, + "learning_rate": 1.8211344085426716e-07, + "loss": 0.4059211015701294, + "step": 7067 + }, + { + "epoch": 1.6296979478902467, + "grad_norm": 2.0474805024349876, + "learning_rate": 1.818941303970435e-07, + "loss": 0.5036444067955017, + "step": 7068 + }, + { + "epoch": 1.6299285220198294, + "grad_norm": 1.6421868165266436, + "learning_rate": 1.8167493885979935e-07, + "loss": 0.5034196972846985, + "step": 7069 + }, + { + "epoch": 1.630159096149412, + "grad_norm": 1.5247456374523982, + "learning_rate": 1.8145586627439645e-07, + "loss": 0.4199259281158447, + "step": 7070 + }, + { + "epoch": 1.6303896702789946, + "grad_norm": 1.5913722133067008, + "learning_rate": 1.8123691267267915e-07, + "loss": 0.5439015626907349, + "step": 7071 + }, + { + "epoch": 1.6306202444085773, + "grad_norm": 1.6181852234306913, + "learning_rate": 1.810180780864743e-07, + "loss": 0.4349868893623352, + "step": 7072 + }, + { + "epoch": 1.63085081853816, + "grad_norm": 1.5299206997440553, + "learning_rate": 1.807993625475921e-07, + "loss": 0.39939552545547485, + "step": 7073 + }, + { + "epoch": 1.6310813926677428, + "grad_norm": 1.575600412629914, + "learning_rate": 1.8058076608782468e-07, + "loss": 0.43073540925979614, + "step": 7074 + }, + { + "epoch": 1.6313119667973255, + "grad_norm": 1.6461603718238804, + "learning_rate": 1.8036228873894744e-07, + "loss": 0.4735824465751648, + "step": 7075 + }, + { + "epoch": 1.631542540926908, + "grad_norm": 1.466337846989889, + "learning_rate": 1.8014393053271836e-07, + "loss": 0.42971551418304443, + "step": 7076 + }, + { + "epoch": 1.6317731150564907, + "grad_norm": 1.694502155411865, + "learning_rate": 1.7992569150087823e-07, + "loss": 0.48593759536743164, + "step": 7077 + }, + { + "epoch": 1.6320036891860732, + "grad_norm": 1.55292324755966, + "learning_rate": 1.7970757167514973e-07, + "loss": 0.530194878578186, + "step": 7078 + }, + { + "epoch": 1.632234263315656, + "grad_norm": 1.7324585048939796, + "learning_rate": 1.794895710872394e-07, + "loss": 0.43393629789352417, + "step": 7079 + }, + { + "epoch": 1.6324648374452386, + "grad_norm": 1.5827349286667418, + "learning_rate": 1.7927168976883556e-07, + "loss": 0.4211798906326294, + "step": 7080 + }, + { + "epoch": 1.6326954115748213, + "grad_norm": 1.5939322533043618, + "learning_rate": 1.790539277516091e-07, + "loss": 0.39001476764678955, + "step": 7081 + }, + { + "epoch": 1.632925985704404, + "grad_norm": 1.6028280785725797, + "learning_rate": 1.788362850672146e-07, + "loss": 0.4360283613204956, + "step": 7082 + }, + { + "epoch": 1.6331565598339868, + "grad_norm": 1.6516207153980025, + "learning_rate": 1.7861876174728807e-07, + "loss": 0.47754842042922974, + "step": 7083 + }, + { + "epoch": 1.6333871339635693, + "grad_norm": 1.634690883802538, + "learning_rate": 1.7840135782344888e-07, + "loss": 0.35193490982055664, + "step": 7084 + }, + { + "epoch": 1.633617708093152, + "grad_norm": 1.2825662437681398, + "learning_rate": 1.7818407332729912e-07, + "loss": 0.39997392892837524, + "step": 7085 + }, + { + "epoch": 1.6338482822227345, + "grad_norm": 1.324570823301632, + "learning_rate": 1.7796690829042328e-07, + "loss": 0.3255331218242645, + "step": 7086 + }, + { + "epoch": 1.6340788563523172, + "grad_norm": 1.424074701555127, + "learning_rate": 1.777498627443882e-07, + "loss": 0.47072282433509827, + "step": 7087 + }, + { + "epoch": 1.6343094304819, + "grad_norm": 1.5293726959445282, + "learning_rate": 1.775329367207441e-07, + "loss": 0.4231484830379486, + "step": 7088 + }, + { + "epoch": 1.6345400046114826, + "grad_norm": 1.4406985915809287, + "learning_rate": 1.7731613025102276e-07, + "loss": 0.37112197279930115, + "step": 7089 + }, + { + "epoch": 1.6347705787410653, + "grad_norm": 1.5117815815493545, + "learning_rate": 1.7709944336673986e-07, + "loss": 0.5772623419761658, + "step": 7090 + }, + { + "epoch": 1.635001152870648, + "grad_norm": 1.4205344879838042, + "learning_rate": 1.7688287609939244e-07, + "loss": 0.45922917127609253, + "step": 7091 + }, + { + "epoch": 1.6352317270002306, + "grad_norm": 1.6262912271430976, + "learning_rate": 1.7666642848046098e-07, + "loss": 0.42784950137138367, + "step": 7092 + }, + { + "epoch": 1.6354623011298133, + "grad_norm": 1.585709168390131, + "learning_rate": 1.7645010054140873e-07, + "loss": 0.4676967263221741, + "step": 7093 + }, + { + "epoch": 1.6356928752593958, + "grad_norm": 1.4782811209898545, + "learning_rate": 1.7623389231368046e-07, + "loss": 0.434337317943573, + "step": 7094 + }, + { + "epoch": 1.6359234493889785, + "grad_norm": 1.512954791126533, + "learning_rate": 1.760178038287048e-07, + "loss": 0.4667350947856903, + "step": 7095 + }, + { + "epoch": 1.6361540235185612, + "grad_norm": 1.3397712801467159, + "learning_rate": 1.7580183511789204e-07, + "loss": 0.42233705520629883, + "step": 7096 + }, + { + "epoch": 1.636384597648144, + "grad_norm": 1.5093056460018237, + "learning_rate": 1.7558598621263565e-07, + "loss": 0.4488460421562195, + "step": 7097 + }, + { + "epoch": 1.6366151717777266, + "grad_norm": 1.6708888950919063, + "learning_rate": 1.753702571443112e-07, + "loss": 0.4264194667339325, + "step": 7098 + }, + { + "epoch": 1.6368457459073094, + "grad_norm": 1.414729354018089, + "learning_rate": 1.7515464794427715e-07, + "loss": 0.32695144414901733, + "step": 7099 + }, + { + "epoch": 1.6370763200368919, + "grad_norm": 2.0744464699438825, + "learning_rate": 1.7493915864387487e-07, + "loss": 0.3573018014431, + "step": 7100 + }, + { + "epoch": 1.6373068941664746, + "grad_norm": 1.4506197336511393, + "learning_rate": 1.7472378927442732e-07, + "loss": 0.4545198082923889, + "step": 7101 + }, + { + "epoch": 1.637537468296057, + "grad_norm": 1.59875503504847, + "learning_rate": 1.7450853986724123e-07, + "loss": 0.42589202523231506, + "step": 7102 + }, + { + "epoch": 1.6377680424256398, + "grad_norm": 1.5169081767342318, + "learning_rate": 1.742934104536048e-07, + "loss": 0.4403502345085144, + "step": 7103 + }, + { + "epoch": 1.6379986165552225, + "grad_norm": 1.7606747961526963, + "learning_rate": 1.7407840106478955e-07, + "loss": 0.4262208938598633, + "step": 7104 + }, + { + "epoch": 1.6382291906848052, + "grad_norm": 1.6000265796951778, + "learning_rate": 1.7386351173204905e-07, + "loss": 0.4706578254699707, + "step": 7105 + }, + { + "epoch": 1.638459764814388, + "grad_norm": 1.4657752166922586, + "learning_rate": 1.7364874248661986e-07, + "loss": 0.4526079297065735, + "step": 7106 + }, + { + "epoch": 1.6386903389439706, + "grad_norm": 1.7833403214487409, + "learning_rate": 1.734340933597207e-07, + "loss": 0.42836326360702515, + "step": 7107 + }, + { + "epoch": 1.6389209130735531, + "grad_norm": 1.4453465477500804, + "learning_rate": 1.7321956438255292e-07, + "loss": 0.42680823802948, + "step": 7108 + }, + { + "epoch": 1.6391514872031359, + "grad_norm": 1.3964828689114657, + "learning_rate": 1.7300515558630068e-07, + "loss": 0.38365036249160767, + "step": 7109 + }, + { + "epoch": 1.6393820613327184, + "grad_norm": 1.4748773918598719, + "learning_rate": 1.7279086700213063e-07, + "loss": 0.4153991937637329, + "step": 7110 + }, + { + "epoch": 1.639612635462301, + "grad_norm": 1.5777502702437645, + "learning_rate": 1.7257669866119163e-07, + "loss": 0.42257291078567505, + "step": 7111 + }, + { + "epoch": 1.6398432095918838, + "grad_norm": 1.7309640190055833, + "learning_rate": 1.7236265059461498e-07, + "loss": 0.34990063309669495, + "step": 7112 + }, + { + "epoch": 1.6400737837214665, + "grad_norm": 1.3939407429934887, + "learning_rate": 1.72148722833515e-07, + "loss": 0.44848760962486267, + "step": 7113 + }, + { + "epoch": 1.6403043578510492, + "grad_norm": 1.4649667660689574, + "learning_rate": 1.7193491540898808e-07, + "loss": 0.4649186134338379, + "step": 7114 + }, + { + "epoch": 1.640534931980632, + "grad_norm": 1.5050161434573055, + "learning_rate": 1.7172122835211333e-07, + "loss": 0.480952650308609, + "step": 7115 + }, + { + "epoch": 1.6407655061102144, + "grad_norm": 1.6101365826637175, + "learning_rate": 1.7150766169395235e-07, + "loss": 0.4669501483440399, + "step": 7116 + }, + { + "epoch": 1.6409960802397972, + "grad_norm": 1.486994174732026, + "learning_rate": 1.7129421546554957e-07, + "loss": 0.4273250102996826, + "step": 7117 + }, + { + "epoch": 1.6412266543693796, + "grad_norm": 1.8106380448833757, + "learning_rate": 1.71080889697931e-07, + "loss": 0.47923076152801514, + "step": 7118 + }, + { + "epoch": 1.6414572284989624, + "grad_norm": 1.5033931180120297, + "learning_rate": 1.708676844221061e-07, + "loss": 0.42801159620285034, + "step": 7119 + }, + { + "epoch": 1.641687802628545, + "grad_norm": 1.4792875147029159, + "learning_rate": 1.7065459966906636e-07, + "loss": 0.39929044246673584, + "step": 7120 + }, + { + "epoch": 1.6419183767581278, + "grad_norm": 1.4727601001923896, + "learning_rate": 1.7044163546978553e-07, + "loss": 0.4919764995574951, + "step": 7121 + }, + { + "epoch": 1.6421489508877105, + "grad_norm": 1.5018740505050776, + "learning_rate": 1.702287918552202e-07, + "loss": 0.45943617820739746, + "step": 7122 + }, + { + "epoch": 1.642379525017293, + "grad_norm": 1.5202994857697039, + "learning_rate": 1.7001606885630948e-07, + "loss": 0.48078954219818115, + "step": 7123 + }, + { + "epoch": 1.6426100991468757, + "grad_norm": 1.406204806461001, + "learning_rate": 1.6980346650397505e-07, + "loss": 0.4217113256454468, + "step": 7124 + }, + { + "epoch": 1.6428406732764582, + "grad_norm": 1.479814078881505, + "learning_rate": 1.6959098482912037e-07, + "loss": 0.4643937051296234, + "step": 7125 + }, + { + "epoch": 1.643071247406041, + "grad_norm": 1.6157838326637273, + "learning_rate": 1.6937862386263212e-07, + "loss": 0.43977001309394836, + "step": 7126 + }, + { + "epoch": 1.6433018215356237, + "grad_norm": 1.4653862858165947, + "learning_rate": 1.6916638363537882e-07, + "loss": 0.3872392177581787, + "step": 7127 + }, + { + "epoch": 1.6435323956652064, + "grad_norm": 1.4668608493131068, + "learning_rate": 1.6895426417821213e-07, + "loss": 0.44625502824783325, + "step": 7128 + }, + { + "epoch": 1.643762969794789, + "grad_norm": 1.6445652935798991, + "learning_rate": 1.6874226552196523e-07, + "loss": 0.36836186051368713, + "step": 7129 + }, + { + "epoch": 1.6439935439243718, + "grad_norm": 1.5181829131466213, + "learning_rate": 1.6853038769745465e-07, + "loss": 0.35491907596588135, + "step": 7130 + }, + { + "epoch": 1.6442241180539543, + "grad_norm": 1.5107933584098798, + "learning_rate": 1.6831863073547913e-07, + "loss": 0.5210527181625366, + "step": 7131 + }, + { + "epoch": 1.644454692183537, + "grad_norm": 1.5854667470103982, + "learning_rate": 1.6810699466681932e-07, + "loss": 0.3805693984031677, + "step": 7132 + }, + { + "epoch": 1.6446852663131195, + "grad_norm": 1.8089883418272688, + "learning_rate": 1.6789547952223893e-07, + "loss": 0.5768346786499023, + "step": 7133 + }, + { + "epoch": 1.6449158404427022, + "grad_norm": 1.8423402992377882, + "learning_rate": 1.6768408533248356e-07, + "loss": 0.46465635299682617, + "step": 7134 + }, + { + "epoch": 1.645146414572285, + "grad_norm": 1.8710111931219464, + "learning_rate": 1.674728121282819e-07, + "loss": 0.43119215965270996, + "step": 7135 + }, + { + "epoch": 1.6453769887018677, + "grad_norm": 1.4436891948188744, + "learning_rate": 1.6726165994034402e-07, + "loss": 0.42814093828201294, + "step": 7136 + }, + { + "epoch": 1.6456075628314504, + "grad_norm": 1.5822684467576347, + "learning_rate": 1.6705062879936382e-07, + "loss": 0.41762328147888184, + "step": 7137 + }, + { + "epoch": 1.645838136961033, + "grad_norm": 2.059560914873905, + "learning_rate": 1.668397187360161e-07, + "loss": 0.42717012763023376, + "step": 7138 + }, + { + "epoch": 1.6460687110906156, + "grad_norm": 1.3692759576709286, + "learning_rate": 1.666289297809591e-07, + "loss": 0.37660926580429077, + "step": 7139 + }, + { + "epoch": 1.6462992852201983, + "grad_norm": 1.689926156627043, + "learning_rate": 1.664182619648331e-07, + "loss": 0.3905887007713318, + "step": 7140 + }, + { + "epoch": 1.6465298593497808, + "grad_norm": 1.5648955881343065, + "learning_rate": 1.6620771531826117e-07, + "loss": 0.4848547577857971, + "step": 7141 + }, + { + "epoch": 1.6467604334793635, + "grad_norm": 1.5642509939041707, + "learning_rate": 1.659972898718479e-07, + "loss": 0.37895438075065613, + "step": 7142 + }, + { + "epoch": 1.6469910076089462, + "grad_norm": 1.6050388867308452, + "learning_rate": 1.6578698565618075e-07, + "loss": 0.46770527958869934, + "step": 7143 + }, + { + "epoch": 1.647221581738529, + "grad_norm": 1.705579614415488, + "learning_rate": 1.6557680270182995e-07, + "loss": 0.44138044118881226, + "step": 7144 + }, + { + "epoch": 1.6474521558681117, + "grad_norm": 1.7922951246817975, + "learning_rate": 1.6536674103934734e-07, + "loss": 0.3681126832962036, + "step": 7145 + }, + { + "epoch": 1.6476827299976944, + "grad_norm": 1.454313444949356, + "learning_rate": 1.651568006992675e-07, + "loss": 0.4410884380340576, + "step": 7146 + }, + { + "epoch": 1.6479133041272769, + "grad_norm": 1.444668904765709, + "learning_rate": 1.6494698171210776e-07, + "loss": 0.4161960482597351, + "step": 7147 + }, + { + "epoch": 1.6481438782568596, + "grad_norm": 1.6873012096950248, + "learning_rate": 1.647372841083674e-07, + "loss": 0.4912784695625305, + "step": 7148 + }, + { + "epoch": 1.648374452386442, + "grad_norm": 1.8457570973340096, + "learning_rate": 1.6452770791852766e-07, + "loss": 0.5137985944747925, + "step": 7149 + }, + { + "epoch": 1.6486050265160248, + "grad_norm": 1.845102008062213, + "learning_rate": 1.6431825317305303e-07, + "loss": 0.43644070625305176, + "step": 7150 + }, + { + "epoch": 1.6488356006456075, + "grad_norm": 1.508191131690363, + "learning_rate": 1.6410891990238973e-07, + "loss": 0.4319378733634949, + "step": 7151 + }, + { + "epoch": 1.6490661747751902, + "grad_norm": 1.6137067673031091, + "learning_rate": 1.6389970813696619e-07, + "loss": 0.474090039730072, + "step": 7152 + }, + { + "epoch": 1.649296748904773, + "grad_norm": 1.656766330100741, + "learning_rate": 1.6369061790719375e-07, + "loss": 0.40291503071784973, + "step": 7153 + }, + { + "epoch": 1.6495273230343557, + "grad_norm": 1.5434308580585603, + "learning_rate": 1.6348164924346562e-07, + "loss": 0.51482754945755, + "step": 7154 + }, + { + "epoch": 1.6497578971639382, + "grad_norm": 1.421069671161851, + "learning_rate": 1.632728021761579e-07, + "loss": 0.35308974981307983, + "step": 7155 + }, + { + "epoch": 1.6499884712935209, + "grad_norm": 1.7501565194944115, + "learning_rate": 1.6306407673562815e-07, + "loss": 0.5269055366516113, + "step": 7156 + }, + { + "epoch": 1.6502190454231034, + "grad_norm": 1.4775332310798848, + "learning_rate": 1.6285547295221724e-07, + "loss": 0.41290512681007385, + "step": 7157 + }, + { + "epoch": 1.650449619552686, + "grad_norm": 1.4513808656924674, + "learning_rate": 1.6264699085624721e-07, + "loss": 0.39930522441864014, + "step": 7158 + }, + { + "epoch": 1.6506801936822688, + "grad_norm": 1.475028134913826, + "learning_rate": 1.6243863047802365e-07, + "loss": 0.4617648422718048, + "step": 7159 + }, + { + "epoch": 1.6509107678118515, + "grad_norm": 1.6583284073308129, + "learning_rate": 1.6223039184783337e-07, + "loss": 0.4618498980998993, + "step": 7160 + }, + { + "epoch": 1.6511413419414342, + "grad_norm": 1.5177380348824272, + "learning_rate": 1.6202227499594635e-07, + "loss": 0.43138834834098816, + "step": 7161 + }, + { + "epoch": 1.651371916071017, + "grad_norm": 1.9944130162827052, + "learning_rate": 1.618142799526141e-07, + "loss": 0.5330632925033569, + "step": 7162 + }, + { + "epoch": 1.6516024902005995, + "grad_norm": 1.4381555357456468, + "learning_rate": 1.6160640674807103e-07, + "loss": 0.45410698652267456, + "step": 7163 + }, + { + "epoch": 1.6518330643301822, + "grad_norm": 1.52256812211894, + "learning_rate": 1.6139865541253384e-07, + "loss": 0.4216715693473816, + "step": 7164 + }, + { + "epoch": 1.6520636384597647, + "grad_norm": 1.6818151368938485, + "learning_rate": 1.6119102597620083e-07, + "loss": 0.3738868832588196, + "step": 7165 + }, + { + "epoch": 1.6522942125893474, + "grad_norm": 1.587335339212439, + "learning_rate": 1.609835184692535e-07, + "loss": 0.44595998525619507, + "step": 7166 + }, + { + "epoch": 1.65252478671893, + "grad_norm": 1.8461813575956394, + "learning_rate": 1.6077613292185466e-07, + "loss": 0.5446096062660217, + "step": 7167 + }, + { + "epoch": 1.6527553608485128, + "grad_norm": 1.5661326715584178, + "learning_rate": 1.605688693641505e-07, + "loss": 0.47280746698379517, + "step": 7168 + }, + { + "epoch": 1.6529859349780955, + "grad_norm": 1.6260653553703972, + "learning_rate": 1.6036172782626823e-07, + "loss": 0.5280133485794067, + "step": 7169 + }, + { + "epoch": 1.6532165091076783, + "grad_norm": 1.6507744528919734, + "learning_rate": 1.6015470833831835e-07, + "loss": 0.4659959375858307, + "step": 7170 + }, + { + "epoch": 1.6534470832372608, + "grad_norm": 1.5548632331284282, + "learning_rate": 1.5994781093039335e-07, + "loss": 0.5196797251701355, + "step": 7171 + }, + { + "epoch": 1.6536776573668435, + "grad_norm": 1.298650586457363, + "learning_rate": 1.597410356325676e-07, + "loss": 0.41855669021606445, + "step": 7172 + }, + { + "epoch": 1.653908231496426, + "grad_norm": 1.6301682003715197, + "learning_rate": 1.5953438247489814e-07, + "loss": 0.43063706159591675, + "step": 7173 + }, + { + "epoch": 1.6541388056260087, + "grad_norm": 1.556025937846025, + "learning_rate": 1.59327851487424e-07, + "loss": 0.3954850435256958, + "step": 7174 + }, + { + "epoch": 1.6543693797555914, + "grad_norm": 1.6096102290125367, + "learning_rate": 1.591214427001667e-07, + "loss": 0.4497464895248413, + "step": 7175 + }, + { + "epoch": 1.6545999538851741, + "grad_norm": 1.573427243133678, + "learning_rate": 1.5891515614312967e-07, + "loss": 0.47012704610824585, + "step": 7176 + }, + { + "epoch": 1.6548305280147568, + "grad_norm": 1.345166831078004, + "learning_rate": 1.5870899184629872e-07, + "loss": 0.399054616689682, + "step": 7177 + }, + { + "epoch": 1.6550611021443395, + "grad_norm": 1.68897296856965, + "learning_rate": 1.5850294983964208e-07, + "loss": 0.41277164220809937, + "step": 7178 + }, + { + "epoch": 1.655291676273922, + "grad_norm": 1.6410807386564468, + "learning_rate": 1.5829703015311013e-07, + "loss": 0.4735640287399292, + "step": 7179 + }, + { + "epoch": 1.6555222504035048, + "grad_norm": 1.5414168893805387, + "learning_rate": 1.5809123281663516e-07, + "loss": 0.4244140386581421, + "step": 7180 + }, + { + "epoch": 1.6557528245330873, + "grad_norm": 1.6196858148033184, + "learning_rate": 1.5788555786013212e-07, + "loss": 0.4291320741176605, + "step": 7181 + }, + { + "epoch": 1.65598339866267, + "grad_norm": 1.8656270771434302, + "learning_rate": 1.576800053134979e-07, + "loss": 0.3965643048286438, + "step": 7182 + }, + { + "epoch": 1.6562139727922527, + "grad_norm": 1.5939688831505687, + "learning_rate": 1.5747457520661123e-07, + "loss": 0.4087764620780945, + "step": 7183 + }, + { + "epoch": 1.6564445469218354, + "grad_norm": 1.523375144006796, + "learning_rate": 1.5726926756933411e-07, + "loss": 0.4207920432090759, + "step": 7184 + }, + { + "epoch": 1.6566751210514181, + "grad_norm": 1.757376584691626, + "learning_rate": 1.570640824315095e-07, + "loss": 0.34311753511428833, + "step": 7185 + }, + { + "epoch": 1.6569056951810008, + "grad_norm": 2.079059544313622, + "learning_rate": 1.5685901982296345e-07, + "loss": 0.44728145003318787, + "step": 7186 + }, + { + "epoch": 1.6571362693105833, + "grad_norm": 1.6933442739443483, + "learning_rate": 1.5665407977350386e-07, + "loss": 0.38300156593322754, + "step": 7187 + }, + { + "epoch": 1.657366843440166, + "grad_norm": 1.4613322908312483, + "learning_rate": 1.56449262312921e-07, + "loss": 0.32724204659461975, + "step": 7188 + }, + { + "epoch": 1.6575974175697485, + "grad_norm": 1.5277123552551555, + "learning_rate": 1.562445674709868e-07, + "loss": 0.4812743067741394, + "step": 7189 + }, + { + "epoch": 1.6578279916993313, + "grad_norm": 1.279031260784297, + "learning_rate": 1.5603999527745615e-07, + "loss": 0.3974485397338867, + "step": 7190 + }, + { + "epoch": 1.658058565828914, + "grad_norm": 1.729819799365075, + "learning_rate": 1.5583554576206536e-07, + "loss": 0.5058138370513916, + "step": 7191 + }, + { + "epoch": 1.6582891399584967, + "grad_norm": 1.451214505055382, + "learning_rate": 1.5563121895453323e-07, + "loss": 0.4442358613014221, + "step": 7192 + }, + { + "epoch": 1.6585197140880794, + "grad_norm": 1.6317499919466611, + "learning_rate": 1.5542701488456077e-07, + "loss": 0.35400623083114624, + "step": 7193 + }, + { + "epoch": 1.6587502882176621, + "grad_norm": 1.8335890419904581, + "learning_rate": 1.5522293358183125e-07, + "loss": 0.5046352744102478, + "step": 7194 + }, + { + "epoch": 1.6589808623472446, + "grad_norm": 1.8150914477063191, + "learning_rate": 1.5501897507601015e-07, + "loss": 0.45344769954681396, + "step": 7195 + }, + { + "epoch": 1.6592114364768273, + "grad_norm": 1.7111771949579255, + "learning_rate": 1.548151393967444e-07, + "loss": 0.4251500368118286, + "step": 7196 + }, + { + "epoch": 1.6594420106064098, + "grad_norm": 1.4323459769713944, + "learning_rate": 1.5461142657366399e-07, + "loss": 0.3728788495063782, + "step": 7197 + }, + { + "epoch": 1.6596725847359926, + "grad_norm": 1.5246938682723656, + "learning_rate": 1.5440783663638036e-07, + "loss": 0.3143829107284546, + "step": 7198 + }, + { + "epoch": 1.6599031588655753, + "grad_norm": 1.3416076020806418, + "learning_rate": 1.5420436961448758e-07, + "loss": 0.5070813894271851, + "step": 7199 + }, + { + "epoch": 1.660133732995158, + "grad_norm": 1.2380684135092845, + "learning_rate": 1.5400102553756145e-07, + "loss": 0.3644014000892639, + "step": 7200 + }, + { + "epoch": 1.6603643071247407, + "grad_norm": 2.973338937285917, + "learning_rate": 1.5379780443516023e-07, + "loss": 0.4120270609855652, + "step": 7201 + }, + { + "epoch": 1.6605948812543234, + "grad_norm": 1.6150469405356445, + "learning_rate": 1.5359470633682425e-07, + "loss": 0.4327865242958069, + "step": 7202 + }, + { + "epoch": 1.660825455383906, + "grad_norm": 2.011470811225138, + "learning_rate": 1.5339173127207562e-07, + "loss": 0.626624584197998, + "step": 7203 + }, + { + "epoch": 1.6610560295134886, + "grad_norm": 1.6601868604564274, + "learning_rate": 1.5318887927041913e-07, + "loss": 0.45536088943481445, + "step": 7204 + }, + { + "epoch": 1.6612866036430711, + "grad_norm": 1.6789895391694964, + "learning_rate": 1.52986150361341e-07, + "loss": 0.5306276082992554, + "step": 7205 + }, + { + "epoch": 1.6615171777726538, + "grad_norm": 1.5374267124283623, + "learning_rate": 1.5278354457431043e-07, + "loss": 0.4263244867324829, + "step": 7206 + }, + { + "epoch": 1.6617477519022366, + "grad_norm": 1.5390387444640852, + "learning_rate": 1.5258106193877762e-07, + "loss": 0.4578266143798828, + "step": 7207 + }, + { + "epoch": 1.6619783260318193, + "grad_norm": 1.4963429405053086, + "learning_rate": 1.5237870248417605e-07, + "loss": 0.5120365619659424, + "step": 7208 + }, + { + "epoch": 1.662208900161402, + "grad_norm": 1.7987725718508283, + "learning_rate": 1.521764662399202e-07, + "loss": 0.4491463005542755, + "step": 7209 + }, + { + "epoch": 1.6624394742909847, + "grad_norm": 1.588713571736857, + "learning_rate": 1.5197435323540752e-07, + "loss": 0.4810635447502136, + "step": 7210 + }, + { + "epoch": 1.6626700484205672, + "grad_norm": 1.549550087406024, + "learning_rate": 1.5177236350001722e-07, + "loss": 0.4250200390815735, + "step": 7211 + }, + { + "epoch": 1.66290062255015, + "grad_norm": 1.8619243359226805, + "learning_rate": 1.515704970631102e-07, + "loss": 0.49981385469436646, + "step": 7212 + }, + { + "epoch": 1.6631311966797324, + "grad_norm": 1.621928409701738, + "learning_rate": 1.5136875395403027e-07, + "loss": 0.40204358100891113, + "step": 7213 + }, + { + "epoch": 1.6633617708093151, + "grad_norm": 1.504987607563178, + "learning_rate": 1.5116713420210236e-07, + "loss": 0.514127254486084, + "step": 7214 + }, + { + "epoch": 1.6635923449388978, + "grad_norm": 1.8745773841611948, + "learning_rate": 1.509656378366343e-07, + "loss": 0.5119338631629944, + "step": 7215 + }, + { + "epoch": 1.6638229190684806, + "grad_norm": 1.6137446017437618, + "learning_rate": 1.507642648869153e-07, + "loss": 0.45031970739364624, + "step": 7216 + }, + { + "epoch": 1.6640534931980633, + "grad_norm": 1.427878863576358, + "learning_rate": 1.5056301538221716e-07, + "loss": 0.4503582715988159, + "step": 7217 + }, + { + "epoch": 1.664284067327646, + "grad_norm": 1.4651953746761925, + "learning_rate": 1.503618893517935e-07, + "loss": 0.38793227076530457, + "step": 7218 + }, + { + "epoch": 1.6645146414572285, + "grad_norm": 1.4683280962315126, + "learning_rate": 1.5016088682488026e-07, + "loss": 0.4446987211704254, + "step": 7219 + }, + { + "epoch": 1.6647452155868112, + "grad_norm": 1.7835855909787117, + "learning_rate": 1.4996000783069485e-07, + "loss": 0.4687119722366333, + "step": 7220 + }, + { + "epoch": 1.6649757897163937, + "grad_norm": 1.6205230957470973, + "learning_rate": 1.4975925239843734e-07, + "loss": 0.48283010721206665, + "step": 7221 + }, + { + "epoch": 1.6652063638459764, + "grad_norm": 1.630894562773258, + "learning_rate": 1.4955862055728941e-07, + "loss": 0.510201632976532, + "step": 7222 + }, + { + "epoch": 1.6654369379755591, + "grad_norm": 1.4932233099831633, + "learning_rate": 1.4935811233641471e-07, + "loss": 0.4070482850074768, + "step": 7223 + }, + { + "epoch": 1.6656675121051419, + "grad_norm": 1.5683915035975688, + "learning_rate": 1.4915772776495948e-07, + "loss": 0.44347989559173584, + "step": 7224 + }, + { + "epoch": 1.6658980862347246, + "grad_norm": 1.6817444257008654, + "learning_rate": 1.4895746687205147e-07, + "loss": 0.4160166382789612, + "step": 7225 + }, + { + "epoch": 1.6661286603643073, + "grad_norm": 1.5428277862719844, + "learning_rate": 1.4875732968680098e-07, + "loss": 0.39939236640930176, + "step": 7226 + }, + { + "epoch": 1.6663592344938898, + "grad_norm": 1.8461591057744162, + "learning_rate": 1.4855731623829936e-07, + "loss": 0.4604174494743347, + "step": 7227 + }, + { + "epoch": 1.6665898086234725, + "grad_norm": 1.5963571116977615, + "learning_rate": 1.4835742655562134e-07, + "loss": 0.4691208004951477, + "step": 7228 + }, + { + "epoch": 1.666820382753055, + "grad_norm": 1.358957710417088, + "learning_rate": 1.481576606678222e-07, + "loss": 0.4146147668361664, + "step": 7229 + }, + { + "epoch": 1.6670509568826377, + "grad_norm": 1.4681059084163257, + "learning_rate": 1.4795801860394041e-07, + "loss": 0.4064391255378723, + "step": 7230 + }, + { + "epoch": 1.6672815310122204, + "grad_norm": 1.233349352710464, + "learning_rate": 1.4775850039299587e-07, + "loss": 0.3696960210800171, + "step": 7231 + }, + { + "epoch": 1.6675121051418031, + "grad_norm": 1.763624641268307, + "learning_rate": 1.4755910606399023e-07, + "loss": 0.4356287121772766, + "step": 7232 + }, + { + "epoch": 1.6677426792713859, + "grad_norm": 1.6119962512147328, + "learning_rate": 1.473598356459078e-07, + "loss": 0.39327436685562134, + "step": 7233 + }, + { + "epoch": 1.6679732534009684, + "grad_norm": 1.4528281796334948, + "learning_rate": 1.4716068916771452e-07, + "loss": 0.4722225069999695, + "step": 7234 + }, + { + "epoch": 1.668203827530551, + "grad_norm": 1.3954919737652625, + "learning_rate": 1.4696166665835852e-07, + "loss": 0.3645583987236023, + "step": 7235 + }, + { + "epoch": 1.6684344016601336, + "grad_norm": 1.628738998914794, + "learning_rate": 1.4676276814676935e-07, + "loss": 0.4153117537498474, + "step": 7236 + }, + { + "epoch": 1.6686649757897163, + "grad_norm": 1.2987847859472657, + "learning_rate": 1.4656399366185933e-07, + "loss": 0.3470612168312073, + "step": 7237 + }, + { + "epoch": 1.668895549919299, + "grad_norm": 1.424067964832139, + "learning_rate": 1.4636534323252203e-07, + "loss": 0.3934207260608673, + "step": 7238 + }, + { + "epoch": 1.6691261240488817, + "grad_norm": 1.6191654953115664, + "learning_rate": 1.4616681688763355e-07, + "loss": 0.35530412197113037, + "step": 7239 + }, + { + "epoch": 1.6693566981784644, + "grad_norm": 1.5867473768730196, + "learning_rate": 1.4596841465605136e-07, + "loss": 0.5218726396560669, + "step": 7240 + }, + { + "epoch": 1.6695872723080472, + "grad_norm": 1.9070671037743527, + "learning_rate": 1.4577013656661542e-07, + "loss": 0.4287494421005249, + "step": 7241 + }, + { + "epoch": 1.6698178464376296, + "grad_norm": 2.099754040079973, + "learning_rate": 1.4557198264814775e-07, + "loss": 0.5161805152893066, + "step": 7242 + }, + { + "epoch": 1.6700484205672124, + "grad_norm": 1.485709070131558, + "learning_rate": 1.4537395292945153e-07, + "loss": 0.4843006730079651, + "step": 7243 + }, + { + "epoch": 1.6702789946967949, + "grad_norm": 1.416657421952009, + "learning_rate": 1.4517604743931288e-07, + "loss": 0.526993989944458, + "step": 7244 + }, + { + "epoch": 1.6705095688263776, + "grad_norm": 1.318696888956493, + "learning_rate": 1.4497826620649888e-07, + "loss": 0.43705734610557556, + "step": 7245 + }, + { + "epoch": 1.6707401429559603, + "grad_norm": 1.626300355229789, + "learning_rate": 1.4478060925975942e-07, + "loss": 0.6001747846603394, + "step": 7246 + }, + { + "epoch": 1.670970717085543, + "grad_norm": 1.6701240840694564, + "learning_rate": 1.4458307662782564e-07, + "loss": 0.4041635990142822, + "step": 7247 + }, + { + "epoch": 1.6712012912151257, + "grad_norm": 1.6291301094782007, + "learning_rate": 1.4438566833941112e-07, + "loss": 0.4425908923149109, + "step": 7248 + }, + { + "epoch": 1.6714318653447084, + "grad_norm": 1.8234242321709921, + "learning_rate": 1.4418838442321102e-07, + "loss": 0.5202267169952393, + "step": 7249 + }, + { + "epoch": 1.671662439474291, + "grad_norm": 1.3646967283137599, + "learning_rate": 1.4399122490790293e-07, + "loss": 0.44352006912231445, + "step": 7250 + }, + { + "epoch": 1.6718930136038737, + "grad_norm": 1.5745296606833632, + "learning_rate": 1.4379418982214542e-07, + "loss": 0.4757179021835327, + "step": 7251 + }, + { + "epoch": 1.6721235877334562, + "grad_norm": 2.0125776677757825, + "learning_rate": 1.4359727919457998e-07, + "loss": 0.4748988747596741, + "step": 7252 + }, + { + "epoch": 1.6723541618630389, + "grad_norm": 1.4390886859105494, + "learning_rate": 1.434004930538294e-07, + "loss": 0.4280398190021515, + "step": 7253 + }, + { + "epoch": 1.6725847359926216, + "grad_norm": 1.5844583735943714, + "learning_rate": 1.4320383142849834e-07, + "loss": 0.4959871172904968, + "step": 7254 + }, + { + "epoch": 1.6728153101222043, + "grad_norm": 1.6551218088905322, + "learning_rate": 1.4300729434717396e-07, + "loss": 0.506413996219635, + "step": 7255 + }, + { + "epoch": 1.673045884251787, + "grad_norm": 1.5894513628120581, + "learning_rate": 1.4281088183842448e-07, + "loss": 0.4723675847053528, + "step": 7256 + }, + { + "epoch": 1.6732764583813697, + "grad_norm": 1.5735532616627814, + "learning_rate": 1.4261459393080076e-07, + "loss": 0.41801339387893677, + "step": 7257 + }, + { + "epoch": 1.6735070325109522, + "grad_norm": 1.651784117733762, + "learning_rate": 1.424184306528351e-07, + "loss": 0.4463369846343994, + "step": 7258 + }, + { + "epoch": 1.673737606640535, + "grad_norm": 1.6205372576102755, + "learning_rate": 1.422223920330421e-07, + "loss": 0.4167429506778717, + "step": 7259 + }, + { + "epoch": 1.6739681807701174, + "grad_norm": 1.448285732733219, + "learning_rate": 1.420264780999174e-07, + "loss": 0.48808401823043823, + "step": 7260 + }, + { + "epoch": 1.6741987548997002, + "grad_norm": 1.7994342785579152, + "learning_rate": 1.4183068888193973e-07, + "loss": 0.515659749507904, + "step": 7261 + }, + { + "epoch": 1.6744293290292829, + "grad_norm": 1.6582236339460064, + "learning_rate": 1.416350244075688e-07, + "loss": 0.4393026530742645, + "step": 7262 + }, + { + "epoch": 1.6746599031588656, + "grad_norm": 1.6750398739214198, + "learning_rate": 1.4143948470524602e-07, + "loss": 0.35053056478500366, + "step": 7263 + }, + { + "epoch": 1.6748904772884483, + "grad_norm": 1.1872706234379884, + "learning_rate": 1.4124406980339532e-07, + "loss": 0.35598453879356384, + "step": 7264 + }, + { + "epoch": 1.675121051418031, + "grad_norm": 1.747342634360751, + "learning_rate": 1.410487797304224e-07, + "loss": 0.47989165782928467, + "step": 7265 + }, + { + "epoch": 1.6753516255476135, + "grad_norm": 1.4767801179152846, + "learning_rate": 1.408536145147148e-07, + "loss": 0.4621499180793762, + "step": 7266 + }, + { + "epoch": 1.6755821996771962, + "grad_norm": 1.4469255776490486, + "learning_rate": 1.4065857418464122e-07, + "loss": 0.40567925572395325, + "step": 7267 + }, + { + "epoch": 1.6758127738067787, + "grad_norm": 2.121901896007684, + "learning_rate": 1.4046365876855326e-07, + "loss": 0.38889849185943604, + "step": 7268 + }, + { + "epoch": 1.6760433479363614, + "grad_norm": 1.8036845925466258, + "learning_rate": 1.4026886829478345e-07, + "loss": 0.516187846660614, + "step": 7269 + }, + { + "epoch": 1.6762739220659442, + "grad_norm": 1.3670995724086425, + "learning_rate": 1.4007420279164706e-07, + "loss": 0.4007910192012787, + "step": 7270 + }, + { + "epoch": 1.6765044961955269, + "grad_norm": 1.4513245632029468, + "learning_rate": 1.3987966228744007e-07, + "loss": 0.4426886737346649, + "step": 7271 + }, + { + "epoch": 1.6767350703251096, + "grad_norm": 1.7767592903800882, + "learning_rate": 1.3968524681044114e-07, + "loss": 0.46890369057655334, + "step": 7272 + }, + { + "epoch": 1.6769656444546923, + "grad_norm": 1.714201330640179, + "learning_rate": 1.3949095638891096e-07, + "loss": 0.510369598865509, + "step": 7273 + }, + { + "epoch": 1.6771962185842748, + "grad_norm": 1.697492362317676, + "learning_rate": 1.3929679105109106e-07, + "loss": 0.47810226678848267, + "step": 7274 + }, + { + "epoch": 1.6774267927138575, + "grad_norm": 1.6234301902278867, + "learning_rate": 1.3910275082520572e-07, + "loss": 0.48592591285705566, + "step": 7275 + }, + { + "epoch": 1.67765736684344, + "grad_norm": 1.5107060260742486, + "learning_rate": 1.3890883573946021e-07, + "loss": 0.4664943814277649, + "step": 7276 + }, + { + "epoch": 1.6778879409730227, + "grad_norm": 1.6514095493299281, + "learning_rate": 1.3871504582204263e-07, + "loss": 0.47146645188331604, + "step": 7277 + }, + { + "epoch": 1.6781185151026055, + "grad_norm": 1.615997642769361, + "learning_rate": 1.3852138110112166e-07, + "loss": 0.5171671509742737, + "step": 7278 + }, + { + "epoch": 1.6783490892321882, + "grad_norm": 1.8275491234958787, + "learning_rate": 1.3832784160484913e-07, + "loss": 0.45887336134910583, + "step": 7279 + }, + { + "epoch": 1.678579663361771, + "grad_norm": 1.494861700798582, + "learning_rate": 1.3813442736135728e-07, + "loss": 0.4363539516925812, + "step": 7280 + }, + { + "epoch": 1.6788102374913536, + "grad_norm": 2.0171892009876147, + "learning_rate": 1.379411383987612e-07, + "loss": 0.4626097083091736, + "step": 7281 + }, + { + "epoch": 1.679040811620936, + "grad_norm": 1.8196525383976765, + "learning_rate": 1.3774797474515766e-07, + "loss": 0.5939204096794128, + "step": 7282 + }, + { + "epoch": 1.6792713857505188, + "grad_norm": 1.6878435890648014, + "learning_rate": 1.3755493642862437e-07, + "loss": 0.5463666915893555, + "step": 7283 + }, + { + "epoch": 1.6795019598801013, + "grad_norm": 1.622691460463702, + "learning_rate": 1.3736202347722182e-07, + "loss": 0.3634001910686493, + "step": 7284 + }, + { + "epoch": 1.679732534009684, + "grad_norm": 1.6327202188647956, + "learning_rate": 1.3716923591899166e-07, + "loss": 0.39512360095977783, + "step": 7285 + }, + { + "epoch": 1.6799631081392667, + "grad_norm": 1.3361978857608434, + "learning_rate": 1.3697657378195772e-07, + "loss": 0.3858473300933838, + "step": 7286 + }, + { + "epoch": 1.6801936822688495, + "grad_norm": 1.4527844976472322, + "learning_rate": 1.36784037094125e-07, + "loss": 0.473757266998291, + "step": 7287 + }, + { + "epoch": 1.6804242563984322, + "grad_norm": 1.410877918262981, + "learning_rate": 1.3659162588348107e-07, + "loss": 0.41679126024246216, + "step": 7288 + }, + { + "epoch": 1.680654830528015, + "grad_norm": 1.7135792249847552, + "learning_rate": 1.363993401779946e-07, + "loss": 0.4267998933792114, + "step": 7289 + }, + { + "epoch": 1.6808854046575974, + "grad_norm": 1.6476835268765473, + "learning_rate": 1.3620718000561648e-07, + "loss": 0.5453667044639587, + "step": 7290 + }, + { + "epoch": 1.68111597878718, + "grad_norm": 1.4347316593862658, + "learning_rate": 1.3601514539427895e-07, + "loss": 0.3882933259010315, + "step": 7291 + }, + { + "epoch": 1.6813465529167626, + "grad_norm": 1.7177796725752086, + "learning_rate": 1.3582323637189653e-07, + "loss": 0.5565635561943054, + "step": 7292 + }, + { + "epoch": 1.6815771270463453, + "grad_norm": 1.448665873125515, + "learning_rate": 1.356314529663647e-07, + "loss": 0.49807024002075195, + "step": 7293 + }, + { + "epoch": 1.681807701175928, + "grad_norm": 1.5449122885779156, + "learning_rate": 1.3543979520556116e-07, + "loss": 0.40868130326271057, + "step": 7294 + }, + { + "epoch": 1.6820382753055108, + "grad_norm": 1.4045709349742252, + "learning_rate": 1.352482631173455e-07, + "loss": 0.46088406443595886, + "step": 7295 + }, + { + "epoch": 1.6822688494350935, + "grad_norm": 1.7658846162202777, + "learning_rate": 1.3505685672955869e-07, + "loss": 0.44346722960472107, + "step": 7296 + }, + { + "epoch": 1.6824994235646762, + "grad_norm": 1.3703801713050607, + "learning_rate": 1.348655760700239e-07, + "loss": 0.36585044860839844, + "step": 7297 + }, + { + "epoch": 1.6827299976942587, + "grad_norm": 1.8199719530329925, + "learning_rate": 1.3467442116654536e-07, + "loss": 0.46082472801208496, + "step": 7298 + }, + { + "epoch": 1.6829605718238414, + "grad_norm": 1.8043564550526412, + "learning_rate": 1.3448339204690974e-07, + "loss": 0.5011709928512573, + "step": 7299 + }, + { + "epoch": 1.683191145953424, + "grad_norm": 2.1355217293891378, + "learning_rate": 1.3429248873888454e-07, + "loss": 0.4382838010787964, + "step": 7300 + }, + { + "epoch": 1.6834217200830066, + "grad_norm": 1.4118543770807777, + "learning_rate": 1.3410171127022008e-07, + "loss": 0.35204610228538513, + "step": 7301 + }, + { + "epoch": 1.6836522942125893, + "grad_norm": 1.3718001359049319, + "learning_rate": 1.3391105966864745e-07, + "loss": 0.3915257453918457, + "step": 7302 + }, + { + "epoch": 1.683882868342172, + "grad_norm": 1.4102637825932318, + "learning_rate": 1.3372053396187967e-07, + "loss": 0.3945339322090149, + "step": 7303 + }, + { + "epoch": 1.6841134424717548, + "grad_norm": 1.7911618298179695, + "learning_rate": 1.335301341776117e-07, + "loss": 0.48783642053604126, + "step": 7304 + }, + { + "epoch": 1.6843440166013375, + "grad_norm": 1.745012134293522, + "learning_rate": 1.333398603435203e-07, + "loss": 0.49026161432266235, + "step": 7305 + }, + { + "epoch": 1.68457459073092, + "grad_norm": 1.9699708710220791, + "learning_rate": 1.3314971248726358e-07, + "loss": 0.5035061836242676, + "step": 7306 + }, + { + "epoch": 1.6848051648605027, + "grad_norm": 1.7602149086036532, + "learning_rate": 1.3295969063648126e-07, + "loss": 0.5452826023101807, + "step": 7307 + }, + { + "epoch": 1.6850357389900852, + "grad_norm": 1.7088858518580703, + "learning_rate": 1.3276979481879524e-07, + "loss": 0.4609105885028839, + "step": 7308 + }, + { + "epoch": 1.685266313119668, + "grad_norm": 1.6869514802612067, + "learning_rate": 1.3258002506180855e-07, + "loss": 0.5799046754837036, + "step": 7309 + }, + { + "epoch": 1.6854968872492506, + "grad_norm": 1.6691103426337504, + "learning_rate": 1.3239038139310644e-07, + "loss": 0.42096465826034546, + "step": 7310 + }, + { + "epoch": 1.6857274613788333, + "grad_norm": 1.9781377178498367, + "learning_rate": 1.3220086384025508e-07, + "loss": 0.4741813540458679, + "step": 7311 + }, + { + "epoch": 1.685958035508416, + "grad_norm": 1.5972207301313162, + "learning_rate": 1.3201147243080302e-07, + "loss": 0.4872191250324249, + "step": 7312 + }, + { + "epoch": 1.6861886096379988, + "grad_norm": 1.7767879845396581, + "learning_rate": 1.3182220719228054e-07, + "loss": 0.5210198163986206, + "step": 7313 + }, + { + "epoch": 1.6864191837675813, + "grad_norm": 1.932834262840403, + "learning_rate": 1.3163306815219878e-07, + "loss": 0.4873948395252228, + "step": 7314 + }, + { + "epoch": 1.686649757897164, + "grad_norm": 1.723686253702064, + "learning_rate": 1.3144405533805136e-07, + "loss": 0.46856212615966797, + "step": 7315 + }, + { + "epoch": 1.6868803320267465, + "grad_norm": 1.549399332710726, + "learning_rate": 1.3125516877731279e-07, + "loss": 0.3931645154953003, + "step": 7316 + }, + { + "epoch": 1.6871109061563292, + "grad_norm": 1.5988122745666866, + "learning_rate": 1.3106640849744023e-07, + "loss": 0.4473317861557007, + "step": 7317 + }, + { + "epoch": 1.687341480285912, + "grad_norm": 1.5841372684708825, + "learning_rate": 1.3087777452587124e-07, + "loss": 0.4499043822288513, + "step": 7318 + }, + { + "epoch": 1.6875720544154946, + "grad_norm": 1.6054649930580802, + "learning_rate": 1.30689266890026e-07, + "loss": 0.4992508292198181, + "step": 7319 + }, + { + "epoch": 1.6878026285450773, + "grad_norm": 1.426896936128743, + "learning_rate": 1.305008856173061e-07, + "loss": 0.4684743583202362, + "step": 7320 + }, + { + "epoch": 1.68803320267466, + "grad_norm": 1.7876602073965717, + "learning_rate": 1.303126307350948e-07, + "loss": 0.5543930530548096, + "step": 7321 + }, + { + "epoch": 1.6882637768042426, + "grad_norm": 1.3482084944505501, + "learning_rate": 1.3012450227075655e-07, + "loss": 0.3812211751937866, + "step": 7322 + }, + { + "epoch": 1.6884943509338253, + "grad_norm": 2.079165248146425, + "learning_rate": 1.299365002516377e-07, + "loss": 0.5455845594406128, + "step": 7323 + }, + { + "epoch": 1.6887249250634078, + "grad_norm": 1.3768890960712863, + "learning_rate": 1.2974862470506654e-07, + "loss": 0.4256778657436371, + "step": 7324 + }, + { + "epoch": 1.6889554991929905, + "grad_norm": 1.9468423520002898, + "learning_rate": 1.2956087565835228e-07, + "loss": 0.4973354637622833, + "step": 7325 + }, + { + "epoch": 1.6891860733225732, + "grad_norm": 1.5779840439512345, + "learning_rate": 1.2937325313878666e-07, + "loss": 0.5141343474388123, + "step": 7326 + }, + { + "epoch": 1.689416647452156, + "grad_norm": 1.5179632497576485, + "learning_rate": 1.2918575717364178e-07, + "loss": 0.3872978687286377, + "step": 7327 + }, + { + "epoch": 1.6896472215817386, + "grad_norm": 1.3857087225021212, + "learning_rate": 1.2899838779017292e-07, + "loss": 0.4333486557006836, + "step": 7328 + }, + { + "epoch": 1.6898777957113214, + "grad_norm": 1.5624646221048997, + "learning_rate": 1.2881114501561553e-07, + "loss": 0.42979496717453003, + "step": 7329 + }, + { + "epoch": 1.6901083698409038, + "grad_norm": 1.6512939392276094, + "learning_rate": 1.2862402887718771e-07, + "loss": 0.43296414613723755, + "step": 7330 + }, + { + "epoch": 1.6903389439704866, + "grad_norm": 1.4822998528875215, + "learning_rate": 1.2843703940208816e-07, + "loss": 0.41763681173324585, + "step": 7331 + }, + { + "epoch": 1.690569518100069, + "grad_norm": 1.4433304691783968, + "learning_rate": 1.2825017661749814e-07, + "loss": 0.4531592130661011, + "step": 7332 + }, + { + "epoch": 1.6908000922296518, + "grad_norm": 1.5515786608723572, + "learning_rate": 1.2806344055057995e-07, + "loss": 0.4608149826526642, + "step": 7333 + }, + { + "epoch": 1.6910306663592345, + "grad_norm": 1.5678716271625897, + "learning_rate": 1.2787683122847726e-07, + "loss": 0.4298786520957947, + "step": 7334 + }, + { + "epoch": 1.6912612404888172, + "grad_norm": 1.5882305453896473, + "learning_rate": 1.2769034867831586e-07, + "loss": 0.4404297471046448, + "step": 7335 + }, + { + "epoch": 1.6914918146184, + "grad_norm": 1.590662947019878, + "learning_rate": 1.2750399292720281e-07, + "loss": 0.3857702910900116, + "step": 7336 + }, + { + "epoch": 1.6917223887479826, + "grad_norm": 1.5092920813034143, + "learning_rate": 1.2731776400222716e-07, + "loss": 0.351214200258255, + "step": 7337 + }, + { + "epoch": 1.6919529628775651, + "grad_norm": 1.6618460717985095, + "learning_rate": 1.2713166193045854e-07, + "loss": 0.4711484909057617, + "step": 7338 + }, + { + "epoch": 1.6921835370071479, + "grad_norm": 1.605912014604012, + "learning_rate": 1.2694568673894946e-07, + "loss": 0.4819946587085724, + "step": 7339 + }, + { + "epoch": 1.6924141111367303, + "grad_norm": 1.5366035327097678, + "learning_rate": 1.267598384547327e-07, + "loss": 0.39870262145996094, + "step": 7340 + }, + { + "epoch": 1.692644685266313, + "grad_norm": 1.410709311062986, + "learning_rate": 1.265741171048237e-07, + "loss": 0.4775997996330261, + "step": 7341 + }, + { + "epoch": 1.6928752593958958, + "grad_norm": 1.5031428119722987, + "learning_rate": 1.2638852271621836e-07, + "loss": 0.4166836738586426, + "step": 7342 + }, + { + "epoch": 1.6931058335254785, + "grad_norm": 1.362546283009112, + "learning_rate": 1.2620305531589514e-07, + "loss": 0.396761953830719, + "step": 7343 + }, + { + "epoch": 1.6933364076550612, + "grad_norm": 1.5811036971551204, + "learning_rate": 1.260177149308136e-07, + "loss": 0.36929184198379517, + "step": 7344 + }, + { + "epoch": 1.6935669817846437, + "grad_norm": 1.6142308009439483, + "learning_rate": 1.2583250158791459e-07, + "loss": 0.4664369821548462, + "step": 7345 + }, + { + "epoch": 1.6937975559142264, + "grad_norm": 1.4490673957983151, + "learning_rate": 1.2564741531412115e-07, + "loss": 0.40877625346183777, + "step": 7346 + }, + { + "epoch": 1.694028130043809, + "grad_norm": 1.3363670323915413, + "learning_rate": 1.254624561363369e-07, + "loss": 0.4282684922218323, + "step": 7347 + }, + { + "epoch": 1.6942587041733916, + "grad_norm": 1.7781191335343183, + "learning_rate": 1.2527762408144805e-07, + "loss": 0.5430412292480469, + "step": 7348 + }, + { + "epoch": 1.6944892783029744, + "grad_norm": 1.7384245962384524, + "learning_rate": 1.2509291917632147e-07, + "loss": 0.45990923047065735, + "step": 7349 + }, + { + "epoch": 1.694719852432557, + "grad_norm": 1.5699544039589348, + "learning_rate": 1.2490834144780593e-07, + "loss": 0.38062262535095215, + "step": 7350 + }, + { + "epoch": 1.6949504265621398, + "grad_norm": 1.5427808320923257, + "learning_rate": 1.2472389092273172e-07, + "loss": 0.4704701900482178, + "step": 7351 + }, + { + "epoch": 1.6951810006917225, + "grad_norm": 1.3215044901700805, + "learning_rate": 1.2453956762791084e-07, + "loss": 0.4439951181411743, + "step": 7352 + }, + { + "epoch": 1.695411574821305, + "grad_norm": 1.6827848110964911, + "learning_rate": 1.2435537159013632e-07, + "loss": 0.49405014514923096, + "step": 7353 + }, + { + "epoch": 1.6956421489508877, + "grad_norm": 1.4071924274505998, + "learning_rate": 1.2417130283618282e-07, + "loss": 0.4282076060771942, + "step": 7354 + }, + { + "epoch": 1.6958727230804702, + "grad_norm": 1.4129187553888694, + "learning_rate": 1.2398736139280687e-07, + "loss": 0.43492811918258667, + "step": 7355 + }, + { + "epoch": 1.696103297210053, + "grad_norm": 1.550272919478409, + "learning_rate": 1.238035472867458e-07, + "loss": 0.37239378690719604, + "step": 7356 + }, + { + "epoch": 1.6963338713396356, + "grad_norm": 1.2721176079849843, + "learning_rate": 1.236198605447194e-07, + "loss": 0.39911961555480957, + "step": 7357 + }, + { + "epoch": 1.6965644454692184, + "grad_norm": 1.911188398718987, + "learning_rate": 1.2343630119342786e-07, + "loss": 0.4962255656719208, + "step": 7358 + }, + { + "epoch": 1.696795019598801, + "grad_norm": 1.3131623819116638, + "learning_rate": 1.2325286925955358e-07, + "loss": 0.37414759397506714, + "step": 7359 + }, + { + "epoch": 1.6970255937283838, + "grad_norm": 1.5092759235813635, + "learning_rate": 1.230695647697604e-07, + "loss": 0.41224929690361023, + "step": 7360 + }, + { + "epoch": 1.6972561678579663, + "grad_norm": 1.3964295729715615, + "learning_rate": 1.228863877506936e-07, + "loss": 0.43184489011764526, + "step": 7361 + }, + { + "epoch": 1.697486741987549, + "grad_norm": 1.6991026917946972, + "learning_rate": 1.227033382289795e-07, + "loss": 0.4741829037666321, + "step": 7362 + }, + { + "epoch": 1.6977173161171315, + "grad_norm": 1.677947901828469, + "learning_rate": 1.2252041623122643e-07, + "loss": 0.43224620819091797, + "step": 7363 + }, + { + "epoch": 1.6979478902467142, + "grad_norm": 1.678576477296345, + "learning_rate": 1.2233762178402386e-07, + "loss": 0.46645525097846985, + "step": 7364 + }, + { + "epoch": 1.698178464376297, + "grad_norm": 1.4201537921120515, + "learning_rate": 1.2215495491394256e-07, + "loss": 0.4237707555294037, + "step": 7365 + }, + { + "epoch": 1.6984090385058797, + "grad_norm": 1.3069690432597363, + "learning_rate": 1.2197241564753535e-07, + "loss": 0.36378395557403564, + "step": 7366 + }, + { + "epoch": 1.6986396126354624, + "grad_norm": 1.6387935949488672, + "learning_rate": 1.21790004011336e-07, + "loss": 0.4564269185066223, + "step": 7367 + }, + { + "epoch": 1.698870186765045, + "grad_norm": 1.3009015849639454, + "learning_rate": 1.2160772003186027e-07, + "loss": 0.4492420256137848, + "step": 7368 + }, + { + "epoch": 1.6991007608946276, + "grad_norm": 1.6097888974991954, + "learning_rate": 1.214255637356043e-07, + "loss": 0.515146017074585, + "step": 7369 + }, + { + "epoch": 1.6993313350242103, + "grad_norm": 1.5565943453492384, + "learning_rate": 1.2124353514904707e-07, + "loss": 0.41473329067230225, + "step": 7370 + }, + { + "epoch": 1.6995619091537928, + "grad_norm": 1.6571527829218886, + "learning_rate": 1.210616342986477e-07, + "loss": 0.4408412575721741, + "step": 7371 + }, + { + "epoch": 1.6997924832833755, + "grad_norm": 1.6546450900594125, + "learning_rate": 1.208798612108477e-07, + "loss": 0.5370820760726929, + "step": 7372 + }, + { + "epoch": 1.7000230574129582, + "grad_norm": 1.502975927661507, + "learning_rate": 1.206982159120693e-07, + "loss": 0.46518170833587646, + "step": 7373 + }, + { + "epoch": 1.700253631542541, + "grad_norm": 1.5801444025292624, + "learning_rate": 1.205166984287167e-07, + "loss": 0.45063477754592896, + "step": 7374 + }, + { + "epoch": 1.7004842056721237, + "grad_norm": 1.4109266758667123, + "learning_rate": 1.2033530878717546e-07, + "loss": 0.47391965985298157, + "step": 7375 + }, + { + "epoch": 1.7007147798017064, + "grad_norm": 1.680591382104731, + "learning_rate": 1.2015404701381205e-07, + "loss": 0.45812156796455383, + "step": 7376 + }, + { + "epoch": 1.7009453539312889, + "grad_norm": 1.7661450796417113, + "learning_rate": 1.1997291313497503e-07, + "loss": 0.5174708366394043, + "step": 7377 + }, + { + "epoch": 1.7011759280608716, + "grad_norm": 1.2379321910437706, + "learning_rate": 1.1979190717699373e-07, + "loss": 0.3412814736366272, + "step": 7378 + }, + { + "epoch": 1.701406502190454, + "grad_norm": 1.6619687091053885, + "learning_rate": 1.196110291661796e-07, + "loss": 0.41912171244621277, + "step": 7379 + }, + { + "epoch": 1.7016370763200368, + "grad_norm": 1.7384039938738447, + "learning_rate": 1.1943027912882464e-07, + "loss": 0.5569772720336914, + "step": 7380 + }, + { + "epoch": 1.7018676504496195, + "grad_norm": 1.309448309717786, + "learning_rate": 1.1924965709120304e-07, + "loss": 0.40875375270843506, + "step": 7381 + }, + { + "epoch": 1.7020982245792022, + "grad_norm": 1.5803953469974217, + "learning_rate": 1.1906916307956983e-07, + "loss": 0.46906760334968567, + "step": 7382 + }, + { + "epoch": 1.702328798708785, + "grad_norm": 1.2850228520937832, + "learning_rate": 1.1888879712016165e-07, + "loss": 0.40830397605895996, + "step": 7383 + }, + { + "epoch": 1.7025593728383677, + "grad_norm": 1.4770811279187035, + "learning_rate": 1.1870855923919687e-07, + "loss": 0.4051646590232849, + "step": 7384 + }, + { + "epoch": 1.7027899469679502, + "grad_norm": 1.696009847928002, + "learning_rate": 1.1852844946287432e-07, + "loss": 0.5042610764503479, + "step": 7385 + }, + { + "epoch": 1.7030205210975329, + "grad_norm": 1.6262740295484197, + "learning_rate": 1.183484678173754e-07, + "loss": 0.5304923057556152, + "step": 7386 + }, + { + "epoch": 1.7032510952271154, + "grad_norm": 1.2604579461831944, + "learning_rate": 1.1816861432886171e-07, + "loss": 0.443366676568985, + "step": 7387 + }, + { + "epoch": 1.703481669356698, + "grad_norm": 1.3836719865657088, + "learning_rate": 1.1798888902347714e-07, + "loss": 0.4527779817581177, + "step": 7388 + }, + { + "epoch": 1.7037122434862808, + "grad_norm": 1.3616715508883823, + "learning_rate": 1.1780929192734634e-07, + "loss": 0.4277183413505554, + "step": 7389 + }, + { + "epoch": 1.7039428176158635, + "grad_norm": 1.3714415020573154, + "learning_rate": 1.1762982306657577e-07, + "loss": 0.4908677637577057, + "step": 7390 + }, + { + "epoch": 1.7041733917454462, + "grad_norm": 1.4373179697113392, + "learning_rate": 1.1745048246725286e-07, + "loss": 0.398892879486084, + "step": 7391 + }, + { + "epoch": 1.704403965875029, + "grad_norm": 1.801155926723525, + "learning_rate": 1.1727127015544691e-07, + "loss": 0.4654615521430969, + "step": 7392 + }, + { + "epoch": 1.7046345400046115, + "grad_norm": 1.6258673974312492, + "learning_rate": 1.1709218615720806e-07, + "loss": 0.4850313663482666, + "step": 7393 + }, + { + "epoch": 1.7048651141341942, + "grad_norm": 1.3854283292952871, + "learning_rate": 1.1691323049856772e-07, + "loss": 0.4036976099014282, + "step": 7394 + }, + { + "epoch": 1.7050956882637767, + "grad_norm": 1.6824325261066553, + "learning_rate": 1.167344032055394e-07, + "loss": 0.39174383878707886, + "step": 7395 + }, + { + "epoch": 1.7053262623933594, + "grad_norm": 1.49190685623753, + "learning_rate": 1.1655570430411699e-07, + "loss": 0.44915109872817993, + "step": 7396 + }, + { + "epoch": 1.705556836522942, + "grad_norm": 1.4487302731781821, + "learning_rate": 1.1637713382027636e-07, + "loss": 0.4720522165298462, + "step": 7397 + }, + { + "epoch": 1.7057874106525248, + "grad_norm": 1.5236154065511855, + "learning_rate": 1.1619869177997455e-07, + "loss": 0.4452325105667114, + "step": 7398 + }, + { + "epoch": 1.7060179847821075, + "grad_norm": 1.489108876491428, + "learning_rate": 1.1602037820915023e-07, + "loss": 0.4009271562099457, + "step": 7399 + }, + { + "epoch": 1.7062485589116902, + "grad_norm": 1.3320502296097492, + "learning_rate": 1.1584219313372257e-07, + "loss": 0.37518051266670227, + "step": 7400 + }, + { + "epoch": 1.7064791330412727, + "grad_norm": 1.5361245639590775, + "learning_rate": 1.1566413657959295e-07, + "loss": 0.42883241176605225, + "step": 7401 + }, + { + "epoch": 1.7067097071708555, + "grad_norm": 1.5311391941499002, + "learning_rate": 1.1548620857264346e-07, + "loss": 0.4597551226615906, + "step": 7402 + }, + { + "epoch": 1.706940281300438, + "grad_norm": 1.4815045613998048, + "learning_rate": 1.1530840913873797e-07, + "loss": 0.5491876006126404, + "step": 7403 + }, + { + "epoch": 1.7071708554300207, + "grad_norm": 1.8810828492754625, + "learning_rate": 1.1513073830372122e-07, + "loss": 0.5632074475288391, + "step": 7404 + }, + { + "epoch": 1.7074014295596034, + "grad_norm": 1.557196455612015, + "learning_rate": 1.1495319609341947e-07, + "loss": 0.5251858234405518, + "step": 7405 + }, + { + "epoch": 1.707632003689186, + "grad_norm": 1.7979639485315768, + "learning_rate": 1.1477578253364028e-07, + "loss": 0.5388965606689453, + "step": 7406 + }, + { + "epoch": 1.7078625778187688, + "grad_norm": 1.7322317596816112, + "learning_rate": 1.145984976501726e-07, + "loss": 0.4429551959037781, + "step": 7407 + }, + { + "epoch": 1.7080931519483515, + "grad_norm": 1.5048923212213088, + "learning_rate": 1.144213414687868e-07, + "loss": 0.4702358841896057, + "step": 7408 + }, + { + "epoch": 1.708323726077934, + "grad_norm": 1.616629635802576, + "learning_rate": 1.1424431401523382e-07, + "loss": 0.4506569504737854, + "step": 7409 + }, + { + "epoch": 1.7085543002075168, + "grad_norm": 1.5722880063833475, + "learning_rate": 1.1406741531524689e-07, + "loss": 0.384244441986084, + "step": 7410 + }, + { + "epoch": 1.7087848743370992, + "grad_norm": 1.6254816299222574, + "learning_rate": 1.1389064539453952e-07, + "loss": 0.4642629027366638, + "step": 7411 + }, + { + "epoch": 1.709015448466682, + "grad_norm": 1.5180284715923413, + "learning_rate": 1.1371400427880761e-07, + "loss": 0.4568482041358948, + "step": 7412 + }, + { + "epoch": 1.7092460225962647, + "grad_norm": 1.6058744016500281, + "learning_rate": 1.135374919937272e-07, + "loss": 0.536895215511322, + "step": 7413 + }, + { + "epoch": 1.7094765967258474, + "grad_norm": 1.6944575711634469, + "learning_rate": 1.1336110856495628e-07, + "loss": 0.49696239829063416, + "step": 7414 + }, + { + "epoch": 1.7097071708554301, + "grad_norm": 1.802031783829704, + "learning_rate": 1.1318485401813438e-07, + "loss": 0.3857358694076538, + "step": 7415 + }, + { + "epoch": 1.7099377449850128, + "grad_norm": 1.5410848248596472, + "learning_rate": 1.1300872837888121e-07, + "loss": 0.38111335039138794, + "step": 7416 + }, + { + "epoch": 1.7101683191145953, + "grad_norm": 1.6014644101172142, + "learning_rate": 1.1283273167279906e-07, + "loss": 0.4255755543708801, + "step": 7417 + }, + { + "epoch": 1.710398893244178, + "grad_norm": 1.6646696692039435, + "learning_rate": 1.1265686392547024e-07, + "loss": 0.5048757791519165, + "step": 7418 + }, + { + "epoch": 1.7106294673737605, + "grad_norm": 1.6262992093918878, + "learning_rate": 1.1248112516245944e-07, + "loss": 0.5402916073799133, + "step": 7419 + }, + { + "epoch": 1.7108600415033433, + "grad_norm": 1.6105931834922984, + "learning_rate": 1.1230551540931165e-07, + "loss": 0.3617591857910156, + "step": 7420 + }, + { + "epoch": 1.711090615632926, + "grad_norm": 1.584818843359006, + "learning_rate": 1.1213003469155369e-07, + "loss": 0.4636116921901703, + "step": 7421 + }, + { + "epoch": 1.7113211897625087, + "grad_norm": 1.7626797404606351, + "learning_rate": 1.1195468303469346e-07, + "loss": 0.4675198495388031, + "step": 7422 + }, + { + "epoch": 1.7115517638920914, + "grad_norm": 1.6024517382949015, + "learning_rate": 1.1177946046422038e-07, + "loss": 0.48491787910461426, + "step": 7423 + }, + { + "epoch": 1.7117823380216741, + "grad_norm": 1.5413352133121294, + "learning_rate": 1.1160436700560449e-07, + "loss": 0.3898283839225769, + "step": 7424 + }, + { + "epoch": 1.7120129121512566, + "grad_norm": 1.5514584947710022, + "learning_rate": 1.1142940268429735e-07, + "loss": 0.41522908210754395, + "step": 7425 + }, + { + "epoch": 1.7122434862808393, + "grad_norm": 1.430903522239028, + "learning_rate": 1.1125456752573215e-07, + "loss": 0.4681985378265381, + "step": 7426 + }, + { + "epoch": 1.7124740604104218, + "grad_norm": 1.8962296460852388, + "learning_rate": 1.1107986155532245e-07, + "loss": 0.4788553714752197, + "step": 7427 + }, + { + "epoch": 1.7127046345400045, + "grad_norm": 1.5072364623848036, + "learning_rate": 1.1090528479846406e-07, + "loss": 0.43853843212127686, + "step": 7428 + }, + { + "epoch": 1.7129352086695873, + "grad_norm": 1.542463594674994, + "learning_rate": 1.107308372805329e-07, + "loss": 0.3736591637134552, + "step": 7429 + }, + { + "epoch": 1.71316578279917, + "grad_norm": 1.8237435289536401, + "learning_rate": 1.1055651902688712e-07, + "loss": 0.5770819783210754, + "step": 7430 + }, + { + "epoch": 1.7133963569287527, + "grad_norm": 1.7972828104133267, + "learning_rate": 1.1038233006286558e-07, + "loss": 0.5906555652618408, + "step": 7431 + }, + { + "epoch": 1.7136269310583354, + "grad_norm": 1.396062928601261, + "learning_rate": 1.1020827041378844e-07, + "loss": 0.4621407389640808, + "step": 7432 + }, + { + "epoch": 1.713857505187918, + "grad_norm": 1.6487194571266346, + "learning_rate": 1.1003434010495705e-07, + "loss": 0.4203164279460907, + "step": 7433 + }, + { + "epoch": 1.7140880793175006, + "grad_norm": 1.59720117870823, + "learning_rate": 1.0986053916165373e-07, + "loss": 0.4607565104961395, + "step": 7434 + }, + { + "epoch": 1.7143186534470831, + "grad_norm": 1.4411738322949479, + "learning_rate": 1.0968686760914248e-07, + "loss": 0.47256794571876526, + "step": 7435 + }, + { + "epoch": 1.7145492275766658, + "grad_norm": 2.1203032230505414, + "learning_rate": 1.0951332547266778e-07, + "loss": 0.479513943195343, + "step": 7436 + }, + { + "epoch": 1.7147798017062486, + "grad_norm": 1.7633354860000339, + "learning_rate": 1.0933991277745614e-07, + "loss": 0.47687965631484985, + "step": 7437 + }, + { + "epoch": 1.7150103758358313, + "grad_norm": 1.6696730348311766, + "learning_rate": 1.091666295487147e-07, + "loss": 0.45799845457077026, + "step": 7438 + }, + { + "epoch": 1.715240949965414, + "grad_norm": 1.4765505689651048, + "learning_rate": 1.089934758116322e-07, + "loss": 0.43398863077163696, + "step": 7439 + }, + { + "epoch": 1.7154715240949967, + "grad_norm": 1.627580558092534, + "learning_rate": 1.0882045159137788e-07, + "loss": 0.4098217189311981, + "step": 7440 + }, + { + "epoch": 1.7157020982245792, + "grad_norm": 1.8062601643320504, + "learning_rate": 1.086475569131029e-07, + "loss": 0.49889707565307617, + "step": 7441 + }, + { + "epoch": 1.715932672354162, + "grad_norm": 1.4613353368332702, + "learning_rate": 1.0847479180193897e-07, + "loss": 0.4187192916870117, + "step": 7442 + }, + { + "epoch": 1.7161632464837444, + "grad_norm": 2.068945016126778, + "learning_rate": 1.0830215628299954e-07, + "loss": 0.44331133365631104, + "step": 7443 + }, + { + "epoch": 1.7163938206133271, + "grad_norm": 1.6773749938074582, + "learning_rate": 1.0812965038137856e-07, + "loss": 0.4888196587562561, + "step": 7444 + }, + { + "epoch": 1.7166243947429098, + "grad_norm": 1.6578617629701122, + "learning_rate": 1.0795727412215183e-07, + "loss": 0.4884798228740692, + "step": 7445 + }, + { + "epoch": 1.7168549688724926, + "grad_norm": 1.5723023883356735, + "learning_rate": 1.07785027530376e-07, + "loss": 0.45655232667922974, + "step": 7446 + }, + { + "epoch": 1.7170855430020753, + "grad_norm": 1.685893884498356, + "learning_rate": 1.0761291063108857e-07, + "loss": 0.3086237907409668, + "step": 7447 + }, + { + "epoch": 1.717316117131658, + "grad_norm": 1.5738053973393145, + "learning_rate": 1.0744092344930888e-07, + "loss": 0.4279823899269104, + "step": 7448 + }, + { + "epoch": 1.7175466912612405, + "grad_norm": 1.7221029802689058, + "learning_rate": 1.072690660100366e-07, + "loss": 0.4241681396961212, + "step": 7449 + }, + { + "epoch": 1.7177772653908232, + "grad_norm": 1.7874830878272077, + "learning_rate": 1.070973383382533e-07, + "loss": 0.47086501121520996, + "step": 7450 + }, + { + "epoch": 1.7180078395204057, + "grad_norm": 1.3780373187479635, + "learning_rate": 1.0692574045892099e-07, + "loss": 0.43798619508743286, + "step": 7451 + }, + { + "epoch": 1.7182384136499884, + "grad_norm": 1.7289936352675708, + "learning_rate": 1.0675427239698354e-07, + "loss": 0.5781964659690857, + "step": 7452 + }, + { + "epoch": 1.7184689877795711, + "grad_norm": 1.4621228929512655, + "learning_rate": 1.0658293417736508e-07, + "loss": 0.4850879907608032, + "step": 7453 + }, + { + "epoch": 1.7186995619091539, + "grad_norm": 1.3236244677460836, + "learning_rate": 1.064117258249717e-07, + "loss": 0.40468811988830566, + "step": 7454 + }, + { + "epoch": 1.7189301360387366, + "grad_norm": 1.7069112900372936, + "learning_rate": 1.0624064736469052e-07, + "loss": 0.4054880142211914, + "step": 7455 + }, + { + "epoch": 1.719160710168319, + "grad_norm": 1.7589002706519377, + "learning_rate": 1.0606969882138894e-07, + "loss": 0.38633522391319275, + "step": 7456 + }, + { + "epoch": 1.7193912842979018, + "grad_norm": 1.6917357500409704, + "learning_rate": 1.0589888021991644e-07, + "loss": 0.4287499785423279, + "step": 7457 + }, + { + "epoch": 1.7196218584274843, + "grad_norm": 1.613018561241669, + "learning_rate": 1.0572819158510316e-07, + "loss": 0.49269533157348633, + "step": 7458 + }, + { + "epoch": 1.719852432557067, + "grad_norm": 1.4600608769783265, + "learning_rate": 1.0555763294176045e-07, + "loss": 0.38874679803848267, + "step": 7459 + }, + { + "epoch": 1.7200830066866497, + "grad_norm": 1.5663184097893508, + "learning_rate": 1.0538720431468051e-07, + "loss": 0.4381089508533478, + "step": 7460 + }, + { + "epoch": 1.7203135808162324, + "grad_norm": 1.6242553694361792, + "learning_rate": 1.0521690572863706e-07, + "loss": 0.4550422430038452, + "step": 7461 + }, + { + "epoch": 1.7205441549458151, + "grad_norm": 1.5017985009159773, + "learning_rate": 1.0504673720838476e-07, + "loss": 0.5173785090446472, + "step": 7462 + }, + { + "epoch": 1.7207747290753979, + "grad_norm": 1.4906138636113029, + "learning_rate": 1.0487669877865945e-07, + "loss": 0.5082184076309204, + "step": 7463 + }, + { + "epoch": 1.7210053032049804, + "grad_norm": 1.7383580581523643, + "learning_rate": 1.0470679046417786e-07, + "loss": 0.49810969829559326, + "step": 7464 + }, + { + "epoch": 1.721235877334563, + "grad_norm": 1.7302456540952424, + "learning_rate": 1.0453701228963751e-07, + "loss": 0.47808337211608887, + "step": 7465 + }, + { + "epoch": 1.7214664514641456, + "grad_norm": 1.6093569631380469, + "learning_rate": 1.0436736427971782e-07, + "loss": 0.5100537538528442, + "step": 7466 + }, + { + "epoch": 1.7216970255937283, + "grad_norm": 1.5019138408689112, + "learning_rate": 1.0419784645907858e-07, + "loss": 0.44948023557662964, + "step": 7467 + }, + { + "epoch": 1.721927599723311, + "grad_norm": 1.3792836042899619, + "learning_rate": 1.040284588523611e-07, + "loss": 0.4653180241584778, + "step": 7468 + }, + { + "epoch": 1.7221581738528937, + "grad_norm": 1.901421358760061, + "learning_rate": 1.0385920148418737e-07, + "loss": 0.4930723309516907, + "step": 7469 + }, + { + "epoch": 1.7223887479824764, + "grad_norm": 1.5964124799736943, + "learning_rate": 1.036900743791611e-07, + "loss": 0.48883867263793945, + "step": 7470 + }, + { + "epoch": 1.7226193221120591, + "grad_norm": 1.27924002772244, + "learning_rate": 1.0352107756186624e-07, + "loss": 0.4030319154262543, + "step": 7471 + }, + { + "epoch": 1.7228498962416416, + "grad_norm": 1.8060139526740588, + "learning_rate": 1.033522110568683e-07, + "loss": 0.4174875319004059, + "step": 7472 + }, + { + "epoch": 1.7230804703712244, + "grad_norm": 1.731157383735833, + "learning_rate": 1.0318347488871371e-07, + "loss": 0.5152361392974854, + "step": 7473 + }, + { + "epoch": 1.7233110445008069, + "grad_norm": 1.3983774946509473, + "learning_rate": 1.0301486908193014e-07, + "loss": 0.43221428990364075, + "step": 7474 + }, + { + "epoch": 1.7235416186303896, + "grad_norm": 1.6931290113673243, + "learning_rate": 1.0284639366102598e-07, + "loss": 0.4239969849586487, + "step": 7475 + }, + { + "epoch": 1.7237721927599723, + "grad_norm": 1.5094560861426634, + "learning_rate": 1.0267804865049068e-07, + "loss": 0.5171400904655457, + "step": 7476 + }, + { + "epoch": 1.724002766889555, + "grad_norm": 1.3913671775557208, + "learning_rate": 1.0250983407479518e-07, + "loss": 0.45670178532600403, + "step": 7477 + }, + { + "epoch": 1.7242333410191377, + "grad_norm": 1.3489970844922, + "learning_rate": 1.0234174995839107e-07, + "loss": 0.36458373069763184, + "step": 7478 + }, + { + "epoch": 1.7244639151487204, + "grad_norm": 1.6926167509742018, + "learning_rate": 1.0217379632571122e-07, + "loss": 0.4940750002861023, + "step": 7479 + }, + { + "epoch": 1.724694489278303, + "grad_norm": 1.3742895139526408, + "learning_rate": 1.0200597320116911e-07, + "loss": 0.43453872203826904, + "step": 7480 + }, + { + "epoch": 1.7249250634078857, + "grad_norm": 1.4325916198137496, + "learning_rate": 1.0183828060915989e-07, + "loss": 0.49255162477493286, + "step": 7481 + }, + { + "epoch": 1.7251556375374681, + "grad_norm": 1.5551839406586245, + "learning_rate": 1.0167071857405906e-07, + "loss": 0.46221014857292175, + "step": 7482 + }, + { + "epoch": 1.7253862116670509, + "grad_norm": 1.6044214909369097, + "learning_rate": 1.015032871202236e-07, + "loss": 0.43426087498664856, + "step": 7483 + }, + { + "epoch": 1.7256167857966336, + "grad_norm": 1.3471292376409894, + "learning_rate": 1.0133598627199136e-07, + "loss": 0.45327985286712646, + "step": 7484 + }, + { + "epoch": 1.7258473599262163, + "grad_norm": 1.7300792096053668, + "learning_rate": 1.011688160536811e-07, + "loss": 0.4691676199436188, + "step": 7485 + }, + { + "epoch": 1.726077934055799, + "grad_norm": 1.7168424748125397, + "learning_rate": 1.0100177648959296e-07, + "loss": 0.5080254077911377, + "step": 7486 + }, + { + "epoch": 1.7263085081853817, + "grad_norm": 1.3360541862160926, + "learning_rate": 1.008348676040075e-07, + "loss": 0.34122025966644287, + "step": 7487 + }, + { + "epoch": 1.7265390823149642, + "grad_norm": 1.650892930499383, + "learning_rate": 1.0066808942118699e-07, + "loss": 0.44408074021339417, + "step": 7488 + }, + { + "epoch": 1.726769656444547, + "grad_norm": 1.4603224951411022, + "learning_rate": 1.0050144196537402e-07, + "loss": 0.3777790665626526, + "step": 7489 + }, + { + "epoch": 1.7270002305741294, + "grad_norm": 1.6365267437093343, + "learning_rate": 1.0033492526079279e-07, + "loss": 0.48730146884918213, + "step": 7490 + }, + { + "epoch": 1.7272308047037122, + "grad_norm": 1.5792338555913825, + "learning_rate": 1.001685393316477e-07, + "loss": 0.35903626680374146, + "step": 7491 + }, + { + "epoch": 1.7274613788332949, + "grad_norm": 1.3953813288199584, + "learning_rate": 1.0000228420212509e-07, + "loss": 0.37729373574256897, + "step": 7492 + }, + { + "epoch": 1.7276919529628776, + "grad_norm": 1.6314801226105193, + "learning_rate": 9.98361598963916e-08, + "loss": 0.4388326406478882, + "step": 7493 + }, + { + "epoch": 1.7279225270924603, + "grad_norm": 1.4829220781258674, + "learning_rate": 9.967016643859527e-08, + "loss": 0.45095232129096985, + "step": 7494 + }, + { + "epoch": 1.728153101222043, + "grad_norm": 1.5130736602015042, + "learning_rate": 9.95043038528649e-08, + "loss": 0.4736475944519043, + "step": 7495 + }, + { + "epoch": 1.7283836753516255, + "grad_norm": 1.6393405202034401, + "learning_rate": 9.933857216330999e-08, + "loss": 0.2984190285205841, + "step": 7496 + }, + { + "epoch": 1.7286142494812082, + "grad_norm": 1.5993261500159095, + "learning_rate": 9.91729713940218e-08, + "loss": 0.45391780138015747, + "step": 7497 + }, + { + "epoch": 1.7288448236107907, + "grad_norm": 1.732905558263472, + "learning_rate": 9.900750156907157e-08, + "loss": 0.5150727033615112, + "step": 7498 + }, + { + "epoch": 1.7290753977403734, + "grad_norm": 1.372519788443724, + "learning_rate": 9.884216271251256e-08, + "loss": 0.41298598051071167, + "step": 7499 + }, + { + "epoch": 1.7293059718699562, + "grad_norm": 1.5310483983437806, + "learning_rate": 9.86769548483779e-08, + "loss": 0.4820541441440582, + "step": 7500 + }, + { + "epoch": 1.7295365459995389, + "grad_norm": 1.4103659952581913, + "learning_rate": 9.85118780006825e-08, + "loss": 0.4148511290550232, + "step": 7501 + }, + { + "epoch": 1.7297671201291216, + "grad_norm": 1.535383378975012, + "learning_rate": 9.834693219342183e-08, + "loss": 0.39676210284233093, + "step": 7502 + }, + { + "epoch": 1.7299976942587043, + "grad_norm": 1.3969764743432636, + "learning_rate": 9.818211745057292e-08, + "loss": 0.3665908873081207, + "step": 7503 + }, + { + "epoch": 1.7302282683882868, + "grad_norm": 1.5255452230855382, + "learning_rate": 9.801743379609274e-08, + "loss": 0.39340025186538696, + "step": 7504 + }, + { + "epoch": 1.7304588425178695, + "grad_norm": 1.4673439514671116, + "learning_rate": 9.785288125391977e-08, + "loss": 0.4677412807941437, + "step": 7505 + }, + { + "epoch": 1.730689416647452, + "grad_norm": 1.8421716352805986, + "learning_rate": 9.768845984797369e-08, + "loss": 0.49413764476776123, + "step": 7506 + }, + { + "epoch": 1.7309199907770347, + "grad_norm": 2.1097980684598223, + "learning_rate": 9.752416960215437e-08, + "loss": 0.5312438607215881, + "step": 7507 + }, + { + "epoch": 1.7311505649066175, + "grad_norm": 1.408973464564324, + "learning_rate": 9.736001054034338e-08, + "loss": 0.38522863388061523, + "step": 7508 + }, + { + "epoch": 1.7313811390362002, + "grad_norm": 1.4496862609377634, + "learning_rate": 9.719598268640283e-08, + "loss": 0.49167078733444214, + "step": 7509 + }, + { + "epoch": 1.7316117131657829, + "grad_norm": 1.7071655256469307, + "learning_rate": 9.7032086064176e-08, + "loss": 0.4465949535369873, + "step": 7510 + }, + { + "epoch": 1.7318422872953656, + "grad_norm": 1.580755639233498, + "learning_rate": 9.686832069748663e-08, + "loss": 0.4627634882926941, + "step": 7511 + }, + { + "epoch": 1.732072861424948, + "grad_norm": 1.5945960217093318, + "learning_rate": 9.670468661013998e-08, + "loss": 0.4188409447669983, + "step": 7512 + }, + { + "epoch": 1.7323034355545308, + "grad_norm": 1.6767285085334622, + "learning_rate": 9.654118382592146e-08, + "loss": 0.5775213241577148, + "step": 7513 + }, + { + "epoch": 1.7325340096841133, + "grad_norm": 1.4889326648746473, + "learning_rate": 9.637781236859843e-08, + "loss": 0.43912672996520996, + "step": 7514 + }, + { + "epoch": 1.732764583813696, + "grad_norm": 1.677177851910315, + "learning_rate": 9.62145722619182e-08, + "loss": 0.5364755392074585, + "step": 7515 + }, + { + "epoch": 1.7329951579432787, + "grad_norm": 1.5135890648676678, + "learning_rate": 9.605146352960935e-08, + "loss": 0.4832648038864136, + "step": 7516 + }, + { + "epoch": 1.7332257320728615, + "grad_norm": 1.640472153194824, + "learning_rate": 9.588848619538182e-08, + "loss": 0.36932459473609924, + "step": 7517 + }, + { + "epoch": 1.7334563062024442, + "grad_norm": 1.4731235594964114, + "learning_rate": 9.57256402829254e-08, + "loss": 0.43458276987075806, + "step": 7518 + }, + { + "epoch": 1.733686880332027, + "grad_norm": 1.457966513875051, + "learning_rate": 9.556292581591196e-08, + "loss": 0.41533568501472473, + "step": 7519 + }, + { + "epoch": 1.7339174544616094, + "grad_norm": 1.4363289807621746, + "learning_rate": 9.540034281799325e-08, + "loss": 0.45898690819740295, + "step": 7520 + }, + { + "epoch": 1.734148028591192, + "grad_norm": 1.610315429506808, + "learning_rate": 9.523789131280279e-08, + "loss": 0.3321181535720825, + "step": 7521 + }, + { + "epoch": 1.7343786027207746, + "grad_norm": 1.5824862936232118, + "learning_rate": 9.507557132395416e-08, + "loss": 0.3926161229610443, + "step": 7522 + }, + { + "epoch": 1.7346091768503573, + "grad_norm": 1.264710302836967, + "learning_rate": 9.491338287504247e-08, + "loss": 0.41051846742630005, + "step": 7523 + }, + { + "epoch": 1.73483975097994, + "grad_norm": 1.3604853902379428, + "learning_rate": 9.47513259896432e-08, + "loss": 0.4440652132034302, + "step": 7524 + }, + { + "epoch": 1.7350703251095227, + "grad_norm": 1.5933781203678954, + "learning_rate": 9.458940069131304e-08, + "loss": 0.5175125598907471, + "step": 7525 + }, + { + "epoch": 1.7353008992391055, + "grad_norm": 1.4535445480892137, + "learning_rate": 9.442760700358987e-08, + "loss": 0.45521751046180725, + "step": 7526 + }, + { + "epoch": 1.7355314733686882, + "grad_norm": 1.5707484811695662, + "learning_rate": 9.426594494999151e-08, + "loss": 0.5133911967277527, + "step": 7527 + }, + { + "epoch": 1.7357620474982707, + "grad_norm": 1.8770278394623805, + "learning_rate": 9.410441455401752e-08, + "loss": 0.4397609233856201, + "step": 7528 + }, + { + "epoch": 1.7359926216278534, + "grad_norm": 3.7292879258339693, + "learning_rate": 9.394301583914765e-08, + "loss": 0.4503510594367981, + "step": 7529 + }, + { + "epoch": 1.7362231957574359, + "grad_norm": 1.5909450336667472, + "learning_rate": 9.378174882884327e-08, + "loss": 0.44119834899902344, + "step": 7530 + }, + { + "epoch": 1.7364537698870186, + "grad_norm": 1.5959659498105105, + "learning_rate": 9.362061354654583e-08, + "loss": 0.46257996559143066, + "step": 7531 + }, + { + "epoch": 1.7366843440166013, + "grad_norm": 1.4727698319610416, + "learning_rate": 9.345961001567792e-08, + "loss": 0.4468308687210083, + "step": 7532 + }, + { + "epoch": 1.736914918146184, + "grad_norm": 1.329652616869682, + "learning_rate": 9.32987382596433e-08, + "loss": 0.3837989568710327, + "step": 7533 + }, + { + "epoch": 1.7371454922757668, + "grad_norm": 1.7149798865191848, + "learning_rate": 9.313799830182644e-08, + "loss": 0.4224961996078491, + "step": 7534 + }, + { + "epoch": 1.7373760664053495, + "grad_norm": 1.3527154365554523, + "learning_rate": 9.297739016559225e-08, + "loss": 0.37379956245422363, + "step": 7535 + }, + { + "epoch": 1.737606640534932, + "grad_norm": 1.3983736958193809, + "learning_rate": 9.281691387428658e-08, + "loss": 0.4204242527484894, + "step": 7536 + }, + { + "epoch": 1.7378372146645147, + "grad_norm": 1.550547566194999, + "learning_rate": 9.265656945123678e-08, + "loss": 0.5270572900772095, + "step": 7537 + }, + { + "epoch": 1.7380677887940972, + "grad_norm": 1.6826850331086136, + "learning_rate": 9.249635691975e-08, + "loss": 0.44208282232284546, + "step": 7538 + }, + { + "epoch": 1.73829836292368, + "grad_norm": 1.158547237110862, + "learning_rate": 9.233627630311502e-08, + "loss": 0.32514283061027527, + "step": 7539 + }, + { + "epoch": 1.7385289370532626, + "grad_norm": 1.42135951118167, + "learning_rate": 9.217632762460126e-08, + "loss": 0.35472434759140015, + "step": 7540 + }, + { + "epoch": 1.7387595111828453, + "grad_norm": 1.9134735814581072, + "learning_rate": 9.201651090745888e-08, + "loss": 0.5034215450286865, + "step": 7541 + }, + { + "epoch": 1.738990085312428, + "grad_norm": 1.4950522917395752, + "learning_rate": 9.185682617491863e-08, + "loss": 0.4779762029647827, + "step": 7542 + }, + { + "epoch": 1.7392206594420108, + "grad_norm": 1.7544463226218252, + "learning_rate": 9.169727345019263e-08, + "loss": 0.4964079260826111, + "step": 7543 + }, + { + "epoch": 1.7394512335715933, + "grad_norm": 1.8208500448761544, + "learning_rate": 9.153785275647319e-08, + "loss": 0.5125068426132202, + "step": 7544 + }, + { + "epoch": 1.739681807701176, + "grad_norm": 1.369096268264849, + "learning_rate": 9.13785641169339e-08, + "loss": 0.39051756262779236, + "step": 7545 + }, + { + "epoch": 1.7399123818307585, + "grad_norm": 1.6132499721446665, + "learning_rate": 9.121940755472901e-08, + "loss": 0.45951950550079346, + "step": 7546 + }, + { + "epoch": 1.7401429559603412, + "grad_norm": 1.402513218333582, + "learning_rate": 9.106038309299302e-08, + "loss": 0.42676979303359985, + "step": 7547 + }, + { + "epoch": 1.740373530089924, + "grad_norm": 1.6248647623340229, + "learning_rate": 9.090149075484255e-08, + "loss": 0.3585033416748047, + "step": 7548 + }, + { + "epoch": 1.7406041042195066, + "grad_norm": 1.5204418845888263, + "learning_rate": 9.074273056337366e-08, + "loss": 0.4613775312900543, + "step": 7549 + }, + { + "epoch": 1.7408346783490893, + "grad_norm": 1.5756472296671777, + "learning_rate": 9.058410254166415e-08, + "loss": 0.48934412002563477, + "step": 7550 + }, + { + "epoch": 1.741065252478672, + "grad_norm": 2.3682357853653895, + "learning_rate": 9.042560671277177e-08, + "loss": 0.5749069452285767, + "step": 7551 + }, + { + "epoch": 1.7412958266082545, + "grad_norm": 1.4990310296288942, + "learning_rate": 9.026724309973588e-08, + "loss": 0.4760423004627228, + "step": 7552 + }, + { + "epoch": 1.7415264007378373, + "grad_norm": 1.38070744019409, + "learning_rate": 9.010901172557594e-08, + "loss": 0.43080049753189087, + "step": 7553 + }, + { + "epoch": 1.7417569748674198, + "grad_norm": 1.4636238536042068, + "learning_rate": 8.99509126132928e-08, + "loss": 0.44850271940231323, + "step": 7554 + }, + { + "epoch": 1.7419875489970025, + "grad_norm": 1.5357653243690434, + "learning_rate": 8.979294578586738e-08, + "loss": 0.34593498706817627, + "step": 7555 + }, + { + "epoch": 1.7422181231265852, + "grad_norm": 1.3635590695208566, + "learning_rate": 8.963511126626188e-08, + "loss": 0.3738324046134949, + "step": 7556 + }, + { + "epoch": 1.742448697256168, + "grad_norm": 1.6262402635208488, + "learning_rate": 8.947740907741952e-08, + "loss": 0.47988662123680115, + "step": 7557 + }, + { + "epoch": 1.7426792713857506, + "grad_norm": 1.904530616299084, + "learning_rate": 8.931983924226338e-08, + "loss": 0.5863034725189209, + "step": 7558 + }, + { + "epoch": 1.7429098455153333, + "grad_norm": 1.497315511162884, + "learning_rate": 8.916240178369827e-08, + "loss": 0.38455232977867126, + "step": 7559 + }, + { + "epoch": 1.7431404196449158, + "grad_norm": 1.711133818053075, + "learning_rate": 8.900509672460899e-08, + "loss": 0.3919760584831238, + "step": 7560 + }, + { + "epoch": 1.7433709937744986, + "grad_norm": 1.8876361089943499, + "learning_rate": 8.884792408786169e-08, + "loss": 0.4090653657913208, + "step": 7561 + }, + { + "epoch": 1.743601567904081, + "grad_norm": 1.458591423296693, + "learning_rate": 8.869088389630264e-08, + "loss": 0.42597073316574097, + "step": 7562 + }, + { + "epoch": 1.7438321420336638, + "grad_norm": 1.4410906971279085, + "learning_rate": 8.853397617275959e-08, + "loss": 0.38760805130004883, + "step": 7563 + }, + { + "epoch": 1.7440627161632465, + "grad_norm": 1.3930314463175644, + "learning_rate": 8.837720094004042e-08, + "loss": 0.3753165900707245, + "step": 7564 + }, + { + "epoch": 1.7442932902928292, + "grad_norm": 1.4708100181524995, + "learning_rate": 8.822055822093432e-08, + "loss": 0.5169536471366882, + "step": 7565 + }, + { + "epoch": 1.744523864422412, + "grad_norm": 1.436339252382814, + "learning_rate": 8.806404803821077e-08, + "loss": 0.3886902332305908, + "step": 7566 + }, + { + "epoch": 1.7447544385519944, + "grad_norm": 1.7378167101447366, + "learning_rate": 8.790767041461977e-08, + "loss": 0.48971402645111084, + "step": 7567 + }, + { + "epoch": 1.7449850126815771, + "grad_norm": 1.3555756556469605, + "learning_rate": 8.775142537289282e-08, + "loss": 0.4656449556350708, + "step": 7568 + }, + { + "epoch": 1.7452155868111596, + "grad_norm": 1.24689144854066, + "learning_rate": 8.75953129357414e-08, + "loss": 0.43197786808013916, + "step": 7569 + }, + { + "epoch": 1.7454461609407423, + "grad_norm": 1.6584429086506909, + "learning_rate": 8.743933312585816e-08, + "loss": 0.5062606930732727, + "step": 7570 + }, + { + "epoch": 1.745676735070325, + "grad_norm": 1.714345013647294, + "learning_rate": 8.728348596591639e-08, + "loss": 0.5489983558654785, + "step": 7571 + }, + { + "epoch": 1.7459073091999078, + "grad_norm": 1.4457283500823468, + "learning_rate": 8.712777147857031e-08, + "loss": 0.4351652264595032, + "step": 7572 + }, + { + "epoch": 1.7461378833294905, + "grad_norm": 2.160367880410759, + "learning_rate": 8.697218968645403e-08, + "loss": 0.5096884965896606, + "step": 7573 + }, + { + "epoch": 1.7463684574590732, + "grad_norm": 1.2837319415683648, + "learning_rate": 8.681674061218347e-08, + "loss": 0.3127269744873047, + "step": 7574 + }, + { + "epoch": 1.7465990315886557, + "grad_norm": 1.8378362837335938, + "learning_rate": 8.666142427835443e-08, + "loss": 0.4738629460334778, + "step": 7575 + }, + { + "epoch": 1.7468296057182384, + "grad_norm": 1.5090024147723615, + "learning_rate": 8.650624070754375e-08, + "loss": 0.46921902894973755, + "step": 7576 + }, + { + "epoch": 1.747060179847821, + "grad_norm": 1.578667567709185, + "learning_rate": 8.635118992230906e-08, + "loss": 0.5296987891197205, + "step": 7577 + }, + { + "epoch": 1.7472907539774036, + "grad_norm": 1.1732895039201416, + "learning_rate": 8.619627194518819e-08, + "loss": 0.3522387742996216, + "step": 7578 + }, + { + "epoch": 1.7475213281069863, + "grad_norm": 1.550879536093582, + "learning_rate": 8.604148679870049e-08, + "loss": 0.42747724056243896, + "step": 7579 + }, + { + "epoch": 1.747751902236569, + "grad_norm": 1.535695568842986, + "learning_rate": 8.588683450534528e-08, + "loss": 0.399990439414978, + "step": 7580 + }, + { + "epoch": 1.7479824763661518, + "grad_norm": 1.688266581429453, + "learning_rate": 8.573231508760315e-08, + "loss": 0.48220518231391907, + "step": 7581 + }, + { + "epoch": 1.7482130504957345, + "grad_norm": 1.8452105924711204, + "learning_rate": 8.557792856793455e-08, + "loss": 0.5227106213569641, + "step": 7582 + }, + { + "epoch": 1.748443624625317, + "grad_norm": 1.596076015195143, + "learning_rate": 8.542367496878178e-08, + "loss": 0.5436732769012451, + "step": 7583 + }, + { + "epoch": 1.7486741987548997, + "grad_norm": 1.5781135040763308, + "learning_rate": 8.526955431256644e-08, + "loss": 0.48398053646087646, + "step": 7584 + }, + { + "epoch": 1.7489047728844822, + "grad_norm": 1.8109008330023073, + "learning_rate": 8.511556662169217e-08, + "loss": 0.5727924108505249, + "step": 7585 + }, + { + "epoch": 1.749135347014065, + "grad_norm": 1.7451913815699138, + "learning_rate": 8.496171191854229e-08, + "loss": 0.48077693581581116, + "step": 7586 + }, + { + "epoch": 1.7493659211436476, + "grad_norm": 1.4513314868999736, + "learning_rate": 8.480799022548113e-08, + "loss": 0.45447635650634766, + "step": 7587 + }, + { + "epoch": 1.7495964952732304, + "grad_norm": 1.7305734402801412, + "learning_rate": 8.465440156485392e-08, + "loss": 0.4605486989021301, + "step": 7588 + }, + { + "epoch": 1.749827069402813, + "grad_norm": 1.6087138586576477, + "learning_rate": 8.450094595898604e-08, + "loss": 0.4229927062988281, + "step": 7589 + }, + { + "epoch": 1.7500576435323958, + "grad_norm": 1.371495589643338, + "learning_rate": 8.434762343018408e-08, + "loss": 0.43005260825157166, + "step": 7590 + }, + { + "epoch": 1.7502882176619783, + "grad_norm": 1.739761797548497, + "learning_rate": 8.41944340007349e-08, + "loss": 0.47446098923683167, + "step": 7591 + }, + { + "epoch": 1.750518791791561, + "grad_norm": 1.6084919754115274, + "learning_rate": 8.40413776929062e-08, + "loss": 0.40554216504096985, + "step": 7592 + }, + { + "epoch": 1.7507493659211435, + "grad_norm": 1.2363538330087616, + "learning_rate": 8.38884545289461e-08, + "loss": 0.4144189953804016, + "step": 7593 + }, + { + "epoch": 1.7509799400507262, + "grad_norm": 1.6677815347140812, + "learning_rate": 8.373566453108361e-08, + "loss": 0.449351966381073, + "step": 7594 + }, + { + "epoch": 1.751210514180309, + "grad_norm": 1.8357616333643774, + "learning_rate": 8.358300772152849e-08, + "loss": 0.4584103226661682, + "step": 7595 + }, + { + "epoch": 1.7514410883098916, + "grad_norm": 1.6545876792386258, + "learning_rate": 8.343048412247066e-08, + "loss": 0.4739362895488739, + "step": 7596 + }, + { + "epoch": 1.7516716624394744, + "grad_norm": 1.3684829539670578, + "learning_rate": 8.327809375608131e-08, + "loss": 0.3970356583595276, + "step": 7597 + }, + { + "epoch": 1.751902236569057, + "grad_norm": 1.390074068538192, + "learning_rate": 8.312583664451157e-08, + "loss": 0.4298238754272461, + "step": 7598 + }, + { + "epoch": 1.7521328106986396, + "grad_norm": 1.5218432452457022, + "learning_rate": 8.297371280989385e-08, + "loss": 0.4920361340045929, + "step": 7599 + }, + { + "epoch": 1.7523633848282223, + "grad_norm": 1.6001856104794878, + "learning_rate": 8.282172227434059e-08, + "loss": 0.5035870671272278, + "step": 7600 + }, + { + "epoch": 1.7525939589578048, + "grad_norm": 1.8053658495544915, + "learning_rate": 8.266986505994555e-08, + "loss": 0.373248815536499, + "step": 7601 + }, + { + "epoch": 1.7528245330873875, + "grad_norm": 2.0338367024251345, + "learning_rate": 8.25181411887822e-08, + "loss": 0.48491543531417847, + "step": 7602 + }, + { + "epoch": 1.7530551072169702, + "grad_norm": 1.6403088167242337, + "learning_rate": 8.236655068290554e-08, + "loss": 0.4298476576805115, + "step": 7603 + }, + { + "epoch": 1.753285681346553, + "grad_norm": 1.5503246605292686, + "learning_rate": 8.221509356435064e-08, + "loss": 0.48804932832717896, + "step": 7604 + }, + { + "epoch": 1.7535162554761357, + "grad_norm": 1.595278442494436, + "learning_rate": 8.206376985513353e-08, + "loss": 0.467857301235199, + "step": 7605 + }, + { + "epoch": 1.7537468296057184, + "grad_norm": 1.8978537163965867, + "learning_rate": 8.19125795772504e-08, + "loss": 0.48995548486709595, + "step": 7606 + }, + { + "epoch": 1.7539774037353009, + "grad_norm": 1.488521983097995, + "learning_rate": 8.176152275267823e-08, + "loss": 0.4459487795829773, + "step": 7607 + }, + { + "epoch": 1.7542079778648836, + "grad_norm": 1.4326042778667836, + "learning_rate": 8.1610599403375e-08, + "loss": 0.5054866671562195, + "step": 7608 + }, + { + "epoch": 1.754438551994466, + "grad_norm": 1.4563884146816763, + "learning_rate": 8.145980955127862e-08, + "loss": 0.46223869919776917, + "step": 7609 + }, + { + "epoch": 1.7546691261240488, + "grad_norm": 1.696768225081691, + "learning_rate": 8.1309153218308e-08, + "loss": 0.4743426442146301, + "step": 7610 + }, + { + "epoch": 1.7548997002536315, + "grad_norm": 1.7623915082520603, + "learning_rate": 8.115863042636262e-08, + "loss": 0.40808072686195374, + "step": 7611 + }, + { + "epoch": 1.7551302743832142, + "grad_norm": 1.3859431275297254, + "learning_rate": 8.100824119732263e-08, + "loss": 0.4452321231365204, + "step": 7612 + }, + { + "epoch": 1.755360848512797, + "grad_norm": 1.556764426976114, + "learning_rate": 8.085798555304824e-08, + "loss": 0.4211857318878174, + "step": 7613 + }, + { + "epoch": 1.7555914226423797, + "grad_norm": 1.5080375348033017, + "learning_rate": 8.070786351538117e-08, + "loss": 0.3356667757034302, + "step": 7614 + }, + { + "epoch": 1.7558219967719622, + "grad_norm": 1.7842469682737618, + "learning_rate": 8.055787510614287e-08, + "loss": 0.4636021852493286, + "step": 7615 + }, + { + "epoch": 1.7560525709015449, + "grad_norm": 1.624229543588168, + "learning_rate": 8.040802034713546e-08, + "loss": 0.4066168963909149, + "step": 7616 + }, + { + "epoch": 1.7562831450311274, + "grad_norm": 1.4896510438449921, + "learning_rate": 8.025829926014216e-08, + "loss": 0.426937460899353, + "step": 7617 + }, + { + "epoch": 1.75651371916071, + "grad_norm": 1.838065393231424, + "learning_rate": 8.010871186692625e-08, + "loss": 0.464493989944458, + "step": 7618 + }, + { + "epoch": 1.7567442932902928, + "grad_norm": 1.7522078931434732, + "learning_rate": 7.995925818923222e-08, + "loss": 0.44130605459213257, + "step": 7619 + }, + { + "epoch": 1.7569748674198755, + "grad_norm": 1.6877219329526134, + "learning_rate": 7.980993824878402e-08, + "loss": 0.5241909027099609, + "step": 7620 + }, + { + "epoch": 1.7572054415494582, + "grad_norm": 1.605603526262718, + "learning_rate": 7.96607520672874e-08, + "loss": 0.45450860261917114, + "step": 7621 + }, + { + "epoch": 1.757436015679041, + "grad_norm": 1.6393742771356723, + "learning_rate": 7.951169966642757e-08, + "loss": 0.443767786026001, + "step": 7622 + }, + { + "epoch": 1.7576665898086234, + "grad_norm": 1.5258486167332923, + "learning_rate": 7.936278106787131e-08, + "loss": 0.3951075077056885, + "step": 7623 + }, + { + "epoch": 1.7578971639382062, + "grad_norm": 1.8216713225734935, + "learning_rate": 7.921399629326509e-08, + "loss": 0.44628477096557617, + "step": 7624 + }, + { + "epoch": 1.7581277380677887, + "grad_norm": 1.7421703870668572, + "learning_rate": 7.906534536423648e-08, + "loss": 0.38743889331817627, + "step": 7625 + }, + { + "epoch": 1.7583583121973714, + "grad_norm": 1.4726686928375068, + "learning_rate": 7.891682830239311e-08, + "loss": 0.4338032007217407, + "step": 7626 + }, + { + "epoch": 1.758588886326954, + "grad_norm": 1.7605246972541082, + "learning_rate": 7.876844512932367e-08, + "loss": 0.47387874126434326, + "step": 7627 + }, + { + "epoch": 1.7588194604565368, + "grad_norm": 1.6222674378421518, + "learning_rate": 7.86201958665973e-08, + "loss": 0.4082717299461365, + "step": 7628 + }, + { + "epoch": 1.7590500345861195, + "grad_norm": 1.462169761343313, + "learning_rate": 7.847208053576326e-08, + "loss": 0.4254682958126068, + "step": 7629 + }, + { + "epoch": 1.7592806087157022, + "grad_norm": 1.319688989297758, + "learning_rate": 7.832409915835181e-08, + "loss": 0.3572045564651489, + "step": 7630 + }, + { + "epoch": 1.7595111828452847, + "grad_norm": 1.398732808330898, + "learning_rate": 7.817625175587328e-08, + "loss": 0.39110279083251953, + "step": 7631 + }, + { + "epoch": 1.7597417569748675, + "grad_norm": 2.455493892116574, + "learning_rate": 7.802853834981926e-08, + "loss": 0.49292176961898804, + "step": 7632 + }, + { + "epoch": 1.75997233110445, + "grad_norm": 1.460109162216243, + "learning_rate": 7.78809589616608e-08, + "loss": 0.4271275997161865, + "step": 7633 + }, + { + "epoch": 1.7602029052340327, + "grad_norm": 1.5973984242111468, + "learning_rate": 7.77335136128503e-08, + "loss": 0.470772922039032, + "step": 7634 + }, + { + "epoch": 1.7604334793636154, + "grad_norm": 1.5415713448452681, + "learning_rate": 7.758620232482083e-08, + "loss": 0.4872988760471344, + "step": 7635 + }, + { + "epoch": 1.760664053493198, + "grad_norm": 1.2959777480648245, + "learning_rate": 7.743902511898492e-08, + "loss": 0.4300990104675293, + "step": 7636 + }, + { + "epoch": 1.7608946276227808, + "grad_norm": 1.4331560277043864, + "learning_rate": 7.729198201673682e-08, + "loss": 0.4524795711040497, + "step": 7637 + }, + { + "epoch": 1.7611252017523635, + "grad_norm": 1.580884966063861, + "learning_rate": 7.714507303945028e-08, + "loss": 0.4673241376876831, + "step": 7638 + }, + { + "epoch": 1.761355775881946, + "grad_norm": 1.7656151539321776, + "learning_rate": 7.699829820848048e-08, + "loss": 0.5171443223953247, + "step": 7639 + }, + { + "epoch": 1.7615863500115287, + "grad_norm": 1.5721911288259287, + "learning_rate": 7.68516575451621e-08, + "loss": 0.44416171312332153, + "step": 7640 + }, + { + "epoch": 1.7618169241411112, + "grad_norm": 1.8596688405579505, + "learning_rate": 7.670515107081122e-08, + "loss": 0.4456225633621216, + "step": 7641 + }, + { + "epoch": 1.762047498270694, + "grad_norm": 1.427384194238264, + "learning_rate": 7.65587788067239e-08, + "loss": 0.5235984921455383, + "step": 7642 + }, + { + "epoch": 1.7622780724002767, + "grad_norm": 1.5098894741733768, + "learning_rate": 7.641254077417702e-08, + "loss": 0.4957311749458313, + "step": 7643 + }, + { + "epoch": 1.7625086465298594, + "grad_norm": 1.9524483698152115, + "learning_rate": 7.626643699442748e-08, + "loss": 0.48401015996932983, + "step": 7644 + }, + { + "epoch": 1.762739220659442, + "grad_norm": 1.5925905896008645, + "learning_rate": 7.612046748871326e-08, + "loss": 0.5440249443054199, + "step": 7645 + }, + { + "epoch": 1.7629697947890248, + "grad_norm": 1.5363697612706335, + "learning_rate": 7.597463227825229e-08, + "loss": 0.3922181725502014, + "step": 7646 + }, + { + "epoch": 1.7632003689186073, + "grad_norm": 1.7121602067196948, + "learning_rate": 7.582893138424318e-08, + "loss": 0.4679541289806366, + "step": 7647 + }, + { + "epoch": 1.76343094304819, + "grad_norm": 1.63738592997542, + "learning_rate": 7.568336482786508e-08, + "loss": 0.4461076557636261, + "step": 7648 + }, + { + "epoch": 1.7636615171777725, + "grad_norm": 1.769800706819883, + "learning_rate": 7.553793263027752e-08, + "loss": 0.4028201997280121, + "step": 7649 + }, + { + "epoch": 1.7638920913073552, + "grad_norm": 1.6924130336118084, + "learning_rate": 7.53926348126206e-08, + "loss": 0.47307640314102173, + "step": 7650 + }, + { + "epoch": 1.764122665436938, + "grad_norm": 1.7236868707009407, + "learning_rate": 7.524747139601473e-08, + "loss": 0.4763333201408386, + "step": 7651 + }, + { + "epoch": 1.7643532395665207, + "grad_norm": 1.5475351462285587, + "learning_rate": 7.510244240156127e-08, + "loss": 0.5062815546989441, + "step": 7652 + }, + { + "epoch": 1.7645838136961034, + "grad_norm": 1.4648234779945293, + "learning_rate": 7.495754785034114e-08, + "loss": 0.38344740867614746, + "step": 7653 + }, + { + "epoch": 1.7648143878256861, + "grad_norm": 1.5630602768230752, + "learning_rate": 7.48127877634166e-08, + "loss": 0.36255425214767456, + "step": 7654 + }, + { + "epoch": 1.7650449619552686, + "grad_norm": 1.4144647369682326, + "learning_rate": 7.466816216182969e-08, + "loss": 0.4136468172073364, + "step": 7655 + }, + { + "epoch": 1.7652755360848513, + "grad_norm": 1.5589028620208925, + "learning_rate": 7.452367106660351e-08, + "loss": 0.4294041395187378, + "step": 7656 + }, + { + "epoch": 1.7655061102144338, + "grad_norm": 1.5271012787948486, + "learning_rate": 7.437931449874101e-08, + "loss": 0.3865356147289276, + "step": 7657 + }, + { + "epoch": 1.7657366843440165, + "grad_norm": 1.5355711497321805, + "learning_rate": 7.42350924792261e-08, + "loss": 0.44538289308547974, + "step": 7658 + }, + { + "epoch": 1.7659672584735993, + "grad_norm": 1.6285566114230512, + "learning_rate": 7.409100502902299e-08, + "loss": 0.4943844676017761, + "step": 7659 + }, + { + "epoch": 1.766197832603182, + "grad_norm": 1.759721404059002, + "learning_rate": 7.394705216907582e-08, + "loss": 0.41705092787742615, + "step": 7660 + }, + { + "epoch": 1.7664284067327647, + "grad_norm": 1.4175389623557053, + "learning_rate": 7.380323392031018e-08, + "loss": 0.4304206967353821, + "step": 7661 + }, + { + "epoch": 1.7666589808623474, + "grad_norm": 1.3933381760031749, + "learning_rate": 7.365955030363102e-08, + "loss": 0.4830179214477539, + "step": 7662 + }, + { + "epoch": 1.76688955499193, + "grad_norm": 1.51616499834235, + "learning_rate": 7.351600133992452e-08, + "loss": 0.47749078273773193, + "step": 7663 + }, + { + "epoch": 1.7671201291215126, + "grad_norm": 1.4074934707168656, + "learning_rate": 7.337258705005667e-08, + "loss": 0.3899204730987549, + "step": 7664 + }, + { + "epoch": 1.7673507032510951, + "grad_norm": 1.4123867126002758, + "learning_rate": 7.322930745487443e-08, + "loss": 0.4621524214744568, + "step": 7665 + }, + { + "epoch": 1.7675812773806778, + "grad_norm": 1.725639837898645, + "learning_rate": 7.308616257520506e-08, + "loss": 0.5305047035217285, + "step": 7666 + }, + { + "epoch": 1.7678118515102605, + "grad_norm": 2.1356750734168646, + "learning_rate": 7.294315243185578e-08, + "loss": 0.5894631147384644, + "step": 7667 + }, + { + "epoch": 1.7680424256398433, + "grad_norm": 1.5389151696841823, + "learning_rate": 7.280027704561498e-08, + "loss": 0.38509970903396606, + "step": 7668 + }, + { + "epoch": 1.768272999769426, + "grad_norm": 1.7309245548099654, + "learning_rate": 7.265753643725048e-08, + "loss": 0.45494410395622253, + "step": 7669 + }, + { + "epoch": 1.7685035738990087, + "grad_norm": 1.7035489800713894, + "learning_rate": 7.251493062751169e-08, + "loss": 0.4819248914718628, + "step": 7670 + }, + { + "epoch": 1.7687341480285912, + "grad_norm": 1.4325571648838293, + "learning_rate": 7.237245963712724e-08, + "loss": 0.43286386132240295, + "step": 7671 + }, + { + "epoch": 1.768964722158174, + "grad_norm": 1.3036122364237743, + "learning_rate": 7.223012348680724e-08, + "loss": 0.4285479187965393, + "step": 7672 + }, + { + "epoch": 1.7691952962877564, + "grad_norm": 1.6598071005655777, + "learning_rate": 7.208792219724124e-08, + "loss": 0.42678505182266235, + "step": 7673 + }, + { + "epoch": 1.7694258704173391, + "grad_norm": 1.647090361621967, + "learning_rate": 7.194585578909995e-08, + "loss": 0.47091686725616455, + "step": 7674 + }, + { + "epoch": 1.7696564445469218, + "grad_norm": 1.5115484466399114, + "learning_rate": 7.180392428303394e-08, + "loss": 0.41932445764541626, + "step": 7675 + }, + { + "epoch": 1.7698870186765046, + "grad_norm": 1.2463006271885857, + "learning_rate": 7.166212769967483e-08, + "loss": 0.4043616056442261, + "step": 7676 + }, + { + "epoch": 1.7701175928060873, + "grad_norm": 1.5310666660883137, + "learning_rate": 7.15204660596338e-08, + "loss": 0.395826518535614, + "step": 7677 + }, + { + "epoch": 1.7703481669356698, + "grad_norm": 1.4874807127430703, + "learning_rate": 7.13789393835027e-08, + "loss": 0.4684498906135559, + "step": 7678 + }, + { + "epoch": 1.7705787410652525, + "grad_norm": 1.8560085011670902, + "learning_rate": 7.12375476918542e-08, + "loss": 0.4713285565376282, + "step": 7679 + }, + { + "epoch": 1.770809315194835, + "grad_norm": 1.487262641155755, + "learning_rate": 7.109629100524073e-08, + "loss": 0.47559499740600586, + "step": 7680 + }, + { + "epoch": 1.7710398893244177, + "grad_norm": 1.5741914036439861, + "learning_rate": 7.095516934419554e-08, + "loss": 0.5364210605621338, + "step": 7681 + }, + { + "epoch": 1.7712704634540004, + "grad_norm": 1.942648846069337, + "learning_rate": 7.081418272923212e-08, + "loss": 0.5731894969940186, + "step": 7682 + }, + { + "epoch": 1.7715010375835831, + "grad_norm": 1.7006107903804015, + "learning_rate": 7.067333118084428e-08, + "loss": 0.4287458062171936, + "step": 7683 + }, + { + "epoch": 1.7717316117131658, + "grad_norm": 1.5575643616743255, + "learning_rate": 7.053261471950612e-08, + "loss": 0.3849913775920868, + "step": 7684 + }, + { + "epoch": 1.7719621858427486, + "grad_norm": 1.4243498094919005, + "learning_rate": 7.039203336567245e-08, + "loss": 0.4933156371116638, + "step": 7685 + }, + { + "epoch": 1.772192759972331, + "grad_norm": 1.897795122632639, + "learning_rate": 7.025158713977808e-08, + "loss": 0.5185002088546753, + "step": 7686 + }, + { + "epoch": 1.7724233341019138, + "grad_norm": 1.634847266537775, + "learning_rate": 7.011127606223799e-08, + "loss": 0.514995276927948, + "step": 7687 + }, + { + "epoch": 1.7726539082314963, + "grad_norm": 1.5845868665458605, + "learning_rate": 6.99711001534481e-08, + "loss": 0.4362761676311493, + "step": 7688 + }, + { + "epoch": 1.772884482361079, + "grad_norm": 1.699858455397738, + "learning_rate": 6.983105943378431e-08, + "loss": 0.44117432832717896, + "step": 7689 + }, + { + "epoch": 1.7731150564906617, + "grad_norm": 1.5875521204144505, + "learning_rate": 6.969115392360325e-08, + "loss": 0.4940808415412903, + "step": 7690 + }, + { + "epoch": 1.7733456306202444, + "grad_norm": 1.9046624573594293, + "learning_rate": 6.955138364324109e-08, + "loss": 0.4322758913040161, + "step": 7691 + }, + { + "epoch": 1.7735762047498271, + "grad_norm": 1.467450936859881, + "learning_rate": 6.941174861301536e-08, + "loss": 0.3867933750152588, + "step": 7692 + }, + { + "epoch": 1.7738067788794099, + "grad_norm": 1.6321329987514115, + "learning_rate": 6.927224885322302e-08, + "loss": 0.4380000829696655, + "step": 7693 + }, + { + "epoch": 1.7740373530089923, + "grad_norm": 1.7183023620516549, + "learning_rate": 6.913288438414222e-08, + "loss": 0.46499723196029663, + "step": 7694 + }, + { + "epoch": 1.774267927138575, + "grad_norm": 1.6625572218896962, + "learning_rate": 6.89936552260304e-08, + "loss": 0.4845675230026245, + "step": 7695 + }, + { + "epoch": 1.7744985012681576, + "grad_norm": 1.3920222388819354, + "learning_rate": 6.88545613991266e-08, + "loss": 0.3755526542663574, + "step": 7696 + }, + { + "epoch": 1.7747290753977403, + "grad_norm": 1.358162383242242, + "learning_rate": 6.871560292364887e-08, + "loss": 0.4765484929084778, + "step": 7697 + }, + { + "epoch": 1.774959649527323, + "grad_norm": 1.5701618596645643, + "learning_rate": 6.857677981979659e-08, + "loss": 0.4176154136657715, + "step": 7698 + }, + { + "epoch": 1.7751902236569057, + "grad_norm": 1.5881043143352427, + "learning_rate": 6.84380921077492e-08, + "loss": 0.410483717918396, + "step": 7699 + }, + { + "epoch": 1.7754207977864884, + "grad_norm": 1.876508092569716, + "learning_rate": 6.829953980766612e-08, + "loss": 0.5188060998916626, + "step": 7700 + }, + { + "epoch": 1.7756513719160711, + "grad_norm": 1.5514145308665186, + "learning_rate": 6.816112293968745e-08, + "loss": 0.47039783000946045, + "step": 7701 + }, + { + "epoch": 1.7758819460456536, + "grad_norm": 1.6296649452825585, + "learning_rate": 6.802284152393345e-08, + "loss": 0.5367648601531982, + "step": 7702 + }, + { + "epoch": 1.7761125201752364, + "grad_norm": 1.55513001656084, + "learning_rate": 6.78846955805048e-08, + "loss": 0.500449538230896, + "step": 7703 + }, + { + "epoch": 1.7763430943048188, + "grad_norm": 1.5060722099238588, + "learning_rate": 6.774668512948234e-08, + "loss": 0.4579819440841675, + "step": 7704 + }, + { + "epoch": 1.7765736684344016, + "grad_norm": 1.7824280377613644, + "learning_rate": 6.760881019092712e-08, + "loss": 0.41459107398986816, + "step": 7705 + }, + { + "epoch": 1.7768042425639843, + "grad_norm": 1.7900526752813857, + "learning_rate": 6.747107078488112e-08, + "loss": 0.46020573377609253, + "step": 7706 + }, + { + "epoch": 1.777034816693567, + "grad_norm": 1.7709884076088374, + "learning_rate": 6.733346693136566e-08, + "loss": 0.48069459199905396, + "step": 7707 + }, + { + "epoch": 1.7772653908231497, + "grad_norm": 1.4499402707441236, + "learning_rate": 6.719599865038328e-08, + "loss": 0.3514458239078522, + "step": 7708 + }, + { + "epoch": 1.7774959649527324, + "grad_norm": 1.7044500533180955, + "learning_rate": 6.705866596191601e-08, + "loss": 0.4696041941642761, + "step": 7709 + }, + { + "epoch": 1.777726539082315, + "grad_norm": 1.6058185659780073, + "learning_rate": 6.692146888592675e-08, + "loss": 0.45286083221435547, + "step": 7710 + }, + { + "epoch": 1.7779571132118976, + "grad_norm": 1.8525271361461533, + "learning_rate": 6.678440744235848e-08, + "loss": 0.4659677743911743, + "step": 7711 + }, + { + "epoch": 1.7781876873414801, + "grad_norm": 1.5770202034991272, + "learning_rate": 6.664748165113432e-08, + "loss": 0.4030906558036804, + "step": 7712 + }, + { + "epoch": 1.7784182614710629, + "grad_norm": 1.4781448065809968, + "learning_rate": 6.651069153215804e-08, + "loss": 0.4878493547439575, + "step": 7713 + }, + { + "epoch": 1.7786488356006456, + "grad_norm": 2.5716911461046115, + "learning_rate": 6.637403710531352e-08, + "loss": 0.4651924669742584, + "step": 7714 + }, + { + "epoch": 1.7788794097302283, + "grad_norm": 1.5268258649377473, + "learning_rate": 6.623751839046455e-08, + "loss": 0.37795954942703247, + "step": 7715 + }, + { + "epoch": 1.779109983859811, + "grad_norm": 1.8617699048987524, + "learning_rate": 6.610113540745577e-08, + "loss": 0.5722923278808594, + "step": 7716 + }, + { + "epoch": 1.7793405579893937, + "grad_norm": 2.039919155814789, + "learning_rate": 6.59648881761118e-08, + "loss": 0.46933984756469727, + "step": 7717 + }, + { + "epoch": 1.7795711321189762, + "grad_norm": 1.7692714186594531, + "learning_rate": 6.582877671623732e-08, + "loss": 0.5066707134246826, + "step": 7718 + }, + { + "epoch": 1.779801706248559, + "grad_norm": 1.5518843020711044, + "learning_rate": 6.569280104761787e-08, + "loss": 0.5064150094985962, + "step": 7719 + }, + { + "epoch": 1.7800322803781414, + "grad_norm": 1.4858522723338492, + "learning_rate": 6.555696119001853e-08, + "loss": 0.408633828163147, + "step": 7720 + }, + { + "epoch": 1.7802628545077241, + "grad_norm": 1.9460802080180855, + "learning_rate": 6.542125716318514e-08, + "loss": 0.4960691034793854, + "step": 7721 + }, + { + "epoch": 1.7804934286373069, + "grad_norm": 1.609433139750494, + "learning_rate": 6.528568898684373e-08, + "loss": 0.4275667071342468, + "step": 7722 + }, + { + "epoch": 1.7807240027668896, + "grad_norm": 1.5242191505097453, + "learning_rate": 6.515025668070062e-08, + "loss": 0.5309962630271912, + "step": 7723 + }, + { + "epoch": 1.7809545768964723, + "grad_norm": 1.3218748644597216, + "learning_rate": 6.501496026444197e-08, + "loss": 0.42067253589630127, + "step": 7724 + }, + { + "epoch": 1.781185151026055, + "grad_norm": 1.5205678956011466, + "learning_rate": 6.487979975773484e-08, + "loss": 0.43419337272644043, + "step": 7725 + }, + { + "epoch": 1.7814157251556375, + "grad_norm": 1.728456021255068, + "learning_rate": 6.474477518022592e-08, + "loss": 0.46563541889190674, + "step": 7726 + }, + { + "epoch": 1.7816462992852202, + "grad_norm": 1.2994636821353438, + "learning_rate": 6.460988655154232e-08, + "loss": 0.4233010411262512, + "step": 7727 + }, + { + "epoch": 1.7818768734148027, + "grad_norm": 1.5541073736247684, + "learning_rate": 6.447513389129155e-08, + "loss": 0.47119754552841187, + "step": 7728 + }, + { + "epoch": 1.7821074475443854, + "grad_norm": 1.7457851161988949, + "learning_rate": 6.434051721906142e-08, + "loss": 0.5227707624435425, + "step": 7729 + }, + { + "epoch": 1.7823380216739682, + "grad_norm": 1.6453844551794445, + "learning_rate": 6.42060365544198e-08, + "loss": 0.4521239399909973, + "step": 7730 + }, + { + "epoch": 1.7825685958035509, + "grad_norm": 1.5739071323130231, + "learning_rate": 6.407169191691464e-08, + "loss": 0.36693084239959717, + "step": 7731 + }, + { + "epoch": 1.7827991699331336, + "grad_norm": 1.9032214424835083, + "learning_rate": 6.393748332607463e-08, + "loss": 0.43610745668411255, + "step": 7732 + }, + { + "epoch": 1.7830297440627163, + "grad_norm": 1.4784257370105836, + "learning_rate": 6.380341080140794e-08, + "loss": 0.4471576511859894, + "step": 7733 + }, + { + "epoch": 1.7832603181922988, + "grad_norm": 1.61284007349941, + "learning_rate": 6.366947436240367e-08, + "loss": 0.48119011521339417, + "step": 7734 + }, + { + "epoch": 1.7834908923218815, + "grad_norm": 1.4393647934894105, + "learning_rate": 6.353567402853055e-08, + "loss": 0.44503623247146606, + "step": 7735 + }, + { + "epoch": 1.783721466451464, + "grad_norm": 1.3430253886827939, + "learning_rate": 6.340200981923804e-08, + "loss": 0.3350965678691864, + "step": 7736 + }, + { + "epoch": 1.7839520405810467, + "grad_norm": 1.4031838686370632, + "learning_rate": 6.326848175395572e-08, + "loss": 0.4814649224281311, + "step": 7737 + }, + { + "epoch": 1.7841826147106294, + "grad_norm": 1.3042254858214102, + "learning_rate": 6.313508985209281e-08, + "loss": 0.42114442586898804, + "step": 7738 + }, + { + "epoch": 1.7844131888402122, + "grad_norm": 1.4924201661244643, + "learning_rate": 6.30018341330396e-08, + "loss": 0.5044004917144775, + "step": 7739 + }, + { + "epoch": 1.7846437629697949, + "grad_norm": 1.7211591431218773, + "learning_rate": 6.286871461616594e-08, + "loss": 0.46084678173065186, + "step": 7740 + }, + { + "epoch": 1.7848743370993776, + "grad_norm": 1.8074380950640034, + "learning_rate": 6.273573132082222e-08, + "loss": 0.5159536600112915, + "step": 7741 + }, + { + "epoch": 1.78510491122896, + "grad_norm": 2.6340339816007394, + "learning_rate": 6.260288426633875e-08, + "loss": 0.4394105076789856, + "step": 7742 + }, + { + "epoch": 1.7853354853585428, + "grad_norm": 1.415651636415873, + "learning_rate": 6.247017347202643e-08, + "loss": 0.39798909425735474, + "step": 7743 + }, + { + "epoch": 1.7855660594881253, + "grad_norm": 1.439083218855293, + "learning_rate": 6.23375989571756e-08, + "loss": 0.3865649104118347, + "step": 7744 + }, + { + "epoch": 1.785796633617708, + "grad_norm": 1.3172940172138528, + "learning_rate": 6.220516074105808e-08, + "loss": 0.3641304671764374, + "step": 7745 + }, + { + "epoch": 1.7860272077472907, + "grad_norm": 1.7148086023867872, + "learning_rate": 6.207285884292468e-08, + "loss": 0.5025773644447327, + "step": 7746 + }, + { + "epoch": 1.7862577818768735, + "grad_norm": 1.5237733931532715, + "learning_rate": 6.194069328200669e-08, + "loss": 0.4289078414440155, + "step": 7747 + }, + { + "epoch": 1.7864883560064562, + "grad_norm": 1.5368409458369108, + "learning_rate": 6.180866407751595e-08, + "loss": 0.37442147731781006, + "step": 7748 + }, + { + "epoch": 1.7867189301360389, + "grad_norm": 1.6962674881863276, + "learning_rate": 6.167677124864412e-08, + "loss": 0.4975471794605255, + "step": 7749 + }, + { + "epoch": 1.7869495042656214, + "grad_norm": 1.7290797112616507, + "learning_rate": 6.154501481456331e-08, + "loss": 0.42754751443862915, + "step": 7750 + }, + { + "epoch": 1.787180078395204, + "grad_norm": 1.508949301788889, + "learning_rate": 6.141339479442542e-08, + "loss": 0.40203964710235596, + "step": 7751 + }, + { + "epoch": 1.7874106525247866, + "grad_norm": 1.6453479393381845, + "learning_rate": 6.128191120736293e-08, + "loss": 0.46465349197387695, + "step": 7752 + }, + { + "epoch": 1.7876412266543693, + "grad_norm": 1.527112166022553, + "learning_rate": 6.11505640724882e-08, + "loss": 0.43915730714797974, + "step": 7753 + }, + { + "epoch": 1.787871800783952, + "grad_norm": 1.6855929805801586, + "learning_rate": 6.101935340889419e-08, + "loss": 0.5205652713775635, + "step": 7754 + }, + { + "epoch": 1.7881023749135347, + "grad_norm": 1.8024849017160496, + "learning_rate": 6.088827923565321e-08, + "loss": 0.39400190114974976, + "step": 7755 + }, + { + "epoch": 1.7883329490431175, + "grad_norm": 1.585632228373493, + "learning_rate": 6.075734157181855e-08, + "loss": 0.48021531105041504, + "step": 7756 + }, + { + "epoch": 1.7885635231727002, + "grad_norm": 1.313118747015303, + "learning_rate": 6.062654043642334e-08, + "loss": 0.42780327796936035, + "step": 7757 + }, + { + "epoch": 1.7887940973022827, + "grad_norm": 1.5444008946931698, + "learning_rate": 6.049587584848059e-08, + "loss": 0.4307866096496582, + "step": 7758 + }, + { + "epoch": 1.7890246714318654, + "grad_norm": 1.8803266889221286, + "learning_rate": 6.036534782698377e-08, + "loss": 0.4258533716201782, + "step": 7759 + }, + { + "epoch": 1.7892552455614479, + "grad_norm": 1.7033971690196206, + "learning_rate": 6.02349563909067e-08, + "loss": 0.5159060955047607, + "step": 7760 + }, + { + "epoch": 1.7894858196910306, + "grad_norm": 1.4016246032179807, + "learning_rate": 6.0104701559203e-08, + "loss": 0.4407171308994293, + "step": 7761 + }, + { + "epoch": 1.7897163938206133, + "grad_norm": 1.4060175796774192, + "learning_rate": 5.99745833508063e-08, + "loss": 0.40273964405059814, + "step": 7762 + }, + { + "epoch": 1.789946967950196, + "grad_norm": 1.5929040194351833, + "learning_rate": 5.984460178463102e-08, + "loss": 0.42018163204193115, + "step": 7763 + }, + { + "epoch": 1.7901775420797787, + "grad_norm": 1.5421517490968868, + "learning_rate": 5.971475687957084e-08, + "loss": 0.519807755947113, + "step": 7764 + }, + { + "epoch": 1.7904081162093615, + "grad_norm": 1.4320196013314206, + "learning_rate": 5.9585048654500535e-08, + "loss": 0.42557477951049805, + "step": 7765 + }, + { + "epoch": 1.790638690338944, + "grad_norm": 1.520426042431449, + "learning_rate": 5.9455477128273924e-08, + "loss": 0.39568305015563965, + "step": 7766 + }, + { + "epoch": 1.7908692644685267, + "grad_norm": 1.566797519717712, + "learning_rate": 5.932604231972593e-08, + "loss": 0.43125781416893005, + "step": 7767 + }, + { + "epoch": 1.7910998385981092, + "grad_norm": 1.5764190405770546, + "learning_rate": 5.919674424767129e-08, + "loss": 0.46194958686828613, + "step": 7768 + }, + { + "epoch": 1.791330412727692, + "grad_norm": 1.3811294262508054, + "learning_rate": 5.906758293090441e-08, + "loss": 0.40115779638290405, + "step": 7769 + }, + { + "epoch": 1.7915609868572746, + "grad_norm": 1.4511176958262644, + "learning_rate": 5.893855838820061e-08, + "loss": 0.46589648723602295, + "step": 7770 + }, + { + "epoch": 1.7917915609868573, + "grad_norm": 1.4613820552852321, + "learning_rate": 5.880967063831455e-08, + "loss": 0.3540228605270386, + "step": 7771 + }, + { + "epoch": 1.79202213511644, + "grad_norm": 1.3900736631273891, + "learning_rate": 5.868091969998168e-08, + "loss": 0.4324638545513153, + "step": 7772 + }, + { + "epoch": 1.7922527092460228, + "grad_norm": 1.426811730253004, + "learning_rate": 5.855230559191693e-08, + "loss": 0.4301075339317322, + "step": 7773 + }, + { + "epoch": 1.7924832833756053, + "grad_norm": 1.4903234676277026, + "learning_rate": 5.842382833281612e-08, + "loss": 0.4496096670627594, + "step": 7774 + }, + { + "epoch": 1.792713857505188, + "grad_norm": 1.7119132871592322, + "learning_rate": 5.8295487941354195e-08, + "loss": 0.4554907977581024, + "step": 7775 + }, + { + "epoch": 1.7929444316347705, + "grad_norm": 1.6357284914311145, + "learning_rate": 5.816728443618701e-08, + "loss": 0.5020148158073425, + "step": 7776 + }, + { + "epoch": 1.7931750057643532, + "grad_norm": 1.5886767874513543, + "learning_rate": 5.803921783595045e-08, + "loss": 0.4073353409767151, + "step": 7777 + }, + { + "epoch": 1.793405579893936, + "grad_norm": 1.7806143022342438, + "learning_rate": 5.791128815925983e-08, + "loss": 0.4995894432067871, + "step": 7778 + }, + { + "epoch": 1.7936361540235186, + "grad_norm": 1.4290018525481676, + "learning_rate": 5.778349542471139e-08, + "loss": 0.5383706092834473, + "step": 7779 + }, + { + "epoch": 1.7938667281531013, + "grad_norm": 1.5928372327878688, + "learning_rate": 5.765583965088083e-08, + "loss": 0.4206235408782959, + "step": 7780 + }, + { + "epoch": 1.794097302282684, + "grad_norm": 1.516533597399375, + "learning_rate": 5.752832085632453e-08, + "loss": 0.49053555727005005, + "step": 7781 + }, + { + "epoch": 1.7943278764122665, + "grad_norm": 1.4761016261714877, + "learning_rate": 5.740093905957832e-08, + "loss": 0.4372660517692566, + "step": 7782 + }, + { + "epoch": 1.7945584505418493, + "grad_norm": 1.364372499711938, + "learning_rate": 5.727369427915851e-08, + "loss": 0.40125733613967896, + "step": 7783 + }, + { + "epoch": 1.7947890246714318, + "grad_norm": 1.5421908029736124, + "learning_rate": 5.714658653356153e-08, + "loss": 0.3595162034034729, + "step": 7784 + }, + { + "epoch": 1.7950195988010145, + "grad_norm": 1.4909078230640012, + "learning_rate": 5.7019615841263915e-08, + "loss": 0.42618101835250854, + "step": 7785 + }, + { + "epoch": 1.7952501729305972, + "grad_norm": 1.2890347032019704, + "learning_rate": 5.6892782220721694e-08, + "loss": 0.39135509729385376, + "step": 7786 + }, + { + "epoch": 1.79548074706018, + "grad_norm": 1.2930421412734876, + "learning_rate": 5.6766085690372004e-08, + "loss": 0.3792929947376251, + "step": 7787 + }, + { + "epoch": 1.7957113211897626, + "grad_norm": 2.137954515105217, + "learning_rate": 5.6639526268631e-08, + "loss": 0.5193231105804443, + "step": 7788 + }, + { + "epoch": 1.7959418953193451, + "grad_norm": 1.3992061535387368, + "learning_rate": 5.6513103973895415e-08, + "loss": 0.3896862268447876, + "step": 7789 + }, + { + "epoch": 1.7961724694489278, + "grad_norm": 1.6107653457361368, + "learning_rate": 5.638681882454211e-08, + "loss": 0.5345273017883301, + "step": 7790 + }, + { + "epoch": 1.7964030435785103, + "grad_norm": 1.597285051654587, + "learning_rate": 5.626067083892794e-08, + "loss": 0.4297627806663513, + "step": 7791 + }, + { + "epoch": 1.796633617708093, + "grad_norm": 1.8890048408663909, + "learning_rate": 5.6134660035389914e-08, + "loss": 0.3176969587802887, + "step": 7792 + }, + { + "epoch": 1.7968641918376758, + "grad_norm": 1.684652354437091, + "learning_rate": 5.600878643224471e-08, + "loss": 0.5449323654174805, + "step": 7793 + }, + { + "epoch": 1.7970947659672585, + "grad_norm": 1.3924882582172304, + "learning_rate": 5.588305004778959e-08, + "loss": 0.38096293807029724, + "step": 7794 + }, + { + "epoch": 1.7973253400968412, + "grad_norm": 1.6284420500901806, + "learning_rate": 5.575745090030137e-08, + "loss": 0.3917475938796997, + "step": 7795 + }, + { + "epoch": 1.797555914226424, + "grad_norm": 1.8012275849309003, + "learning_rate": 5.563198900803734e-08, + "loss": 0.41522616147994995, + "step": 7796 + }, + { + "epoch": 1.7977864883560064, + "grad_norm": 1.4000666419018515, + "learning_rate": 5.550666438923468e-08, + "loss": 0.46558207273483276, + "step": 7797 + }, + { + "epoch": 1.7980170624855891, + "grad_norm": 1.4562091239424864, + "learning_rate": 5.538147706211038e-08, + "loss": 0.43256324529647827, + "step": 7798 + }, + { + "epoch": 1.7982476366151716, + "grad_norm": 1.5167378404298808, + "learning_rate": 5.5256427044861666e-08, + "loss": 0.37302178144454956, + "step": 7799 + }, + { + "epoch": 1.7984782107447543, + "grad_norm": 1.7103098772379584, + "learning_rate": 5.5131514355666095e-08, + "loss": 0.5247504711151123, + "step": 7800 + }, + { + "epoch": 1.798708784874337, + "grad_norm": 1.3345270008803303, + "learning_rate": 5.5006739012680934e-08, + "loss": 0.3906348943710327, + "step": 7801 + }, + { + "epoch": 1.7989393590039198, + "grad_norm": 1.863821074304618, + "learning_rate": 5.488210103404345e-08, + "loss": 0.5293325185775757, + "step": 7802 + }, + { + "epoch": 1.7991699331335025, + "grad_norm": 1.8021445170106478, + "learning_rate": 5.4757600437871146e-08, + "loss": 0.4189381003379822, + "step": 7803 + }, + { + "epoch": 1.7994005072630852, + "grad_norm": 1.4161978936431723, + "learning_rate": 5.4633237242261207e-08, + "loss": 0.40476128458976746, + "step": 7804 + }, + { + "epoch": 1.7996310813926677, + "grad_norm": 1.6288403815954717, + "learning_rate": 5.45090114652913e-08, + "loss": 0.3908376097679138, + "step": 7805 + }, + { + "epoch": 1.7998616555222504, + "grad_norm": 1.4731211435711635, + "learning_rate": 5.438492312501885e-08, + "loss": 0.42332786321640015, + "step": 7806 + }, + { + "epoch": 1.800092229651833, + "grad_norm": 1.2492034971721793, + "learning_rate": 5.426097223948123e-08, + "loss": 0.3398321866989136, + "step": 7807 + }, + { + "epoch": 1.8003228037814156, + "grad_norm": 1.410970674481118, + "learning_rate": 5.413715882669623e-08, + "loss": 0.4610673189163208, + "step": 7808 + }, + { + "epoch": 1.8005533779109983, + "grad_norm": 1.4416956666235687, + "learning_rate": 5.401348290466112e-08, + "loss": 0.4149124026298523, + "step": 7809 + }, + { + "epoch": 1.800783952040581, + "grad_norm": 1.4475278396115219, + "learning_rate": 5.388994449135376e-08, + "loss": 0.47464168071746826, + "step": 7810 + }, + { + "epoch": 1.8010145261701638, + "grad_norm": 1.4581354291230397, + "learning_rate": 5.376654360473121e-08, + "loss": 0.4530913829803467, + "step": 7811 + }, + { + "epoch": 1.8012451002997465, + "grad_norm": 1.7198902838066041, + "learning_rate": 5.364328026273157e-08, + "loss": 0.5577078461647034, + "step": 7812 + }, + { + "epoch": 1.801475674429329, + "grad_norm": 1.828526033611825, + "learning_rate": 5.3520154483272075e-08, + "loss": 0.4772539436817169, + "step": 7813 + }, + { + "epoch": 1.8017062485589117, + "grad_norm": 1.690066578469317, + "learning_rate": 5.339716628425039e-08, + "loss": 0.5387610197067261, + "step": 7814 + }, + { + "epoch": 1.8019368226884942, + "grad_norm": 1.7130913599502742, + "learning_rate": 5.327431568354401e-08, + "loss": 0.4505125880241394, + "step": 7815 + }, + { + "epoch": 1.802167396818077, + "grad_norm": 1.5145450098970203, + "learning_rate": 5.3151602699010867e-08, + "loss": 0.43021589517593384, + "step": 7816 + }, + { + "epoch": 1.8023979709476596, + "grad_norm": 1.6184493194868252, + "learning_rate": 5.3029027348488244e-08, + "loss": 0.44107457995414734, + "step": 7817 + }, + { + "epoch": 1.8026285450772424, + "grad_norm": 1.6224833006548345, + "learning_rate": 5.2906589649793666e-08, + "loss": 0.42265504598617554, + "step": 7818 + }, + { + "epoch": 1.802859119206825, + "grad_norm": 1.3828256021454344, + "learning_rate": 5.2784289620724895e-08, + "loss": 0.4814263582229614, + "step": 7819 + }, + { + "epoch": 1.8030896933364078, + "grad_norm": 1.3840958899744187, + "learning_rate": 5.2662127279059275e-08, + "loss": 0.4255106747150421, + "step": 7820 + }, + { + "epoch": 1.8033202674659903, + "grad_norm": 1.3789211684549096, + "learning_rate": 5.2540102642554593e-08, + "loss": 0.43405312299728394, + "step": 7821 + }, + { + "epoch": 1.803550841595573, + "grad_norm": 1.5062041567676776, + "learning_rate": 5.2418215728948004e-08, + "loss": 0.3986097574234009, + "step": 7822 + }, + { + "epoch": 1.8037814157251555, + "grad_norm": 1.7653469724585684, + "learning_rate": 5.2296466555957205e-08, + "loss": 0.4988093972206116, + "step": 7823 + }, + { + "epoch": 1.8040119898547382, + "grad_norm": 1.6382094442265007, + "learning_rate": 5.217485514127973e-08, + "loss": 0.5290527939796448, + "step": 7824 + }, + { + "epoch": 1.804242563984321, + "grad_norm": 1.4794199807921353, + "learning_rate": 5.205338150259308e-08, + "loss": 0.3705815076828003, + "step": 7825 + }, + { + "epoch": 1.8044731381139036, + "grad_norm": 1.3872232407887637, + "learning_rate": 5.193204565755449e-08, + "loss": 0.37735384702682495, + "step": 7826 + }, + { + "epoch": 1.8047037122434864, + "grad_norm": 1.38875357732027, + "learning_rate": 5.1810847623801504e-08, + "loss": 0.39033758640289307, + "step": 7827 + }, + { + "epoch": 1.804934286373069, + "grad_norm": 1.5105458662939806, + "learning_rate": 5.168978741895147e-08, + "loss": 0.4669237732887268, + "step": 7828 + }, + { + "epoch": 1.8051648605026516, + "grad_norm": 1.6910832171163468, + "learning_rate": 5.156886506060154e-08, + "loss": 0.5178482532501221, + "step": 7829 + }, + { + "epoch": 1.8053954346322343, + "grad_norm": 1.4473544670706617, + "learning_rate": 5.14480805663291e-08, + "loss": 0.44134122133255005, + "step": 7830 + }, + { + "epoch": 1.8056260087618168, + "grad_norm": 1.5836257156251672, + "learning_rate": 5.132743395369144e-08, + "loss": 0.44371920824050903, + "step": 7831 + }, + { + "epoch": 1.8058565828913995, + "grad_norm": 1.513244295553376, + "learning_rate": 5.1206925240225964e-08, + "loss": 0.43268662691116333, + "step": 7832 + }, + { + "epoch": 1.8060871570209822, + "grad_norm": 1.736730853895812, + "learning_rate": 5.1086554443449445e-08, + "loss": 0.5035665035247803, + "step": 7833 + }, + { + "epoch": 1.806317731150565, + "grad_norm": 1.3694047806165788, + "learning_rate": 5.0966321580859336e-08, + "loss": 0.4987141191959381, + "step": 7834 + }, + { + "epoch": 1.8065483052801476, + "grad_norm": 1.816085685560109, + "learning_rate": 5.0846226669932437e-08, + "loss": 0.5951617956161499, + "step": 7835 + }, + { + "epoch": 1.8067788794097304, + "grad_norm": 1.464038827862328, + "learning_rate": 5.072626972812599e-08, + "loss": 0.4710814654827118, + "step": 7836 + }, + { + "epoch": 1.8070094535393129, + "grad_norm": 1.6196482413694708, + "learning_rate": 5.060645077287662e-08, + "loss": 0.5173348188400269, + "step": 7837 + }, + { + "epoch": 1.8072400276688956, + "grad_norm": 1.4170272466334293, + "learning_rate": 5.048676982160161e-08, + "loss": 0.49508416652679443, + "step": 7838 + }, + { + "epoch": 1.807470601798478, + "grad_norm": 1.7639395740589152, + "learning_rate": 5.03672268916977e-08, + "loss": 0.4535290598869324, + "step": 7839 + }, + { + "epoch": 1.8077011759280608, + "grad_norm": 1.7696762607003815, + "learning_rate": 5.024782200054145e-08, + "loss": 0.5337553024291992, + "step": 7840 + }, + { + "epoch": 1.8079317500576435, + "grad_norm": 1.6346280356935987, + "learning_rate": 5.012855516548986e-08, + "loss": 0.47118210792541504, + "step": 7841 + }, + { + "epoch": 1.8081623241872262, + "grad_norm": 1.504680600844573, + "learning_rate": 5.0009426403879283e-08, + "loss": 0.4458848237991333, + "step": 7842 + }, + { + "epoch": 1.808392898316809, + "grad_norm": 1.5297682575974059, + "learning_rate": 4.9890435733026536e-08, + "loss": 0.5055558681488037, + "step": 7843 + }, + { + "epoch": 1.8086234724463917, + "grad_norm": 1.4365609441585347, + "learning_rate": 4.9771583170228006e-08, + "loss": 0.43715038895606995, + "step": 7844 + }, + { + "epoch": 1.8088540465759742, + "grad_norm": 1.545411862707653, + "learning_rate": 4.96528687327602e-08, + "loss": 0.427906334400177, + "step": 7845 + }, + { + "epoch": 1.8090846207055569, + "grad_norm": 1.6703597275780244, + "learning_rate": 4.953429243787932e-08, + "loss": 0.48160994052886963, + "step": 7846 + }, + { + "epoch": 1.8093151948351394, + "grad_norm": 1.3261658854233023, + "learning_rate": 4.941585430282158e-08, + "loss": 0.40856754779815674, + "step": 7847 + }, + { + "epoch": 1.809545768964722, + "grad_norm": 1.3569384823756985, + "learning_rate": 4.929755434480354e-08, + "loss": 0.40482330322265625, + "step": 7848 + }, + { + "epoch": 1.8097763430943048, + "grad_norm": 1.530544362283251, + "learning_rate": 4.9179392581021e-08, + "loss": 0.4286755323410034, + "step": 7849 + }, + { + "epoch": 1.8100069172238875, + "grad_norm": 1.5805205551700128, + "learning_rate": 4.906136902864999e-08, + "loss": 0.4436051547527313, + "step": 7850 + }, + { + "epoch": 1.8102374913534702, + "grad_norm": 1.5320309451669083, + "learning_rate": 4.8943483704846465e-08, + "loss": 0.41794437170028687, + "step": 7851 + }, + { + "epoch": 1.810468065483053, + "grad_norm": 1.4506407579843814, + "learning_rate": 4.8825736626746384e-08, + "loss": 0.4308912754058838, + "step": 7852 + }, + { + "epoch": 1.8106986396126354, + "grad_norm": 1.5274898640972132, + "learning_rate": 4.870812781146516e-08, + "loss": 0.43090081214904785, + "step": 7853 + }, + { + "epoch": 1.8109292137422182, + "grad_norm": 1.3117483081436436, + "learning_rate": 4.859065727609857e-08, + "loss": 0.4329320192337036, + "step": 7854 + }, + { + "epoch": 1.8111597878718007, + "grad_norm": 1.266199300666261, + "learning_rate": 4.8473325037722276e-08, + "loss": 0.3162953853607178, + "step": 7855 + }, + { + "epoch": 1.8113903620013834, + "grad_norm": 1.4534333887380995, + "learning_rate": 4.835613111339165e-08, + "loss": 0.37513065338134766, + "step": 7856 + }, + { + "epoch": 1.811620936130966, + "grad_norm": 1.494207838495638, + "learning_rate": 4.823907552014195e-08, + "loss": 0.4120938181877136, + "step": 7857 + }, + { + "epoch": 1.8118515102605488, + "grad_norm": 1.555741011782435, + "learning_rate": 4.8122158274988555e-08, + "loss": 0.4295421242713928, + "step": 7858 + }, + { + "epoch": 1.8120820843901315, + "grad_norm": 1.4697042695976983, + "learning_rate": 4.8005379394926435e-08, + "loss": 0.44738203287124634, + "step": 7859 + }, + { + "epoch": 1.8123126585197142, + "grad_norm": 1.7388489283467792, + "learning_rate": 4.7888738896930456e-08, + "loss": 0.447609007358551, + "step": 7860 + }, + { + "epoch": 1.8125432326492967, + "grad_norm": 1.6367328188270214, + "learning_rate": 4.777223679795561e-08, + "loss": 0.38288167119026184, + "step": 7861 + }, + { + "epoch": 1.8127738067788794, + "grad_norm": 1.5566909994885838, + "learning_rate": 4.765587311493668e-08, + "loss": 0.5003981590270996, + "step": 7862 + }, + { + "epoch": 1.813004380908462, + "grad_norm": 1.5140425774804767, + "learning_rate": 4.7539647864788476e-08, + "loss": 0.5244492888450623, + "step": 7863 + }, + { + "epoch": 1.8132349550380447, + "grad_norm": 1.4098788698269693, + "learning_rate": 4.742356106440526e-08, + "loss": 0.505184531211853, + "step": 7864 + }, + { + "epoch": 1.8134655291676274, + "grad_norm": 2.493869291024891, + "learning_rate": 4.7307612730661636e-08, + "loss": 0.5364291071891785, + "step": 7865 + }, + { + "epoch": 1.81369610329721, + "grad_norm": 1.5655893218937025, + "learning_rate": 4.719180288041158e-08, + "loss": 0.4370742738246918, + "step": 7866 + }, + { + "epoch": 1.8139266774267928, + "grad_norm": 1.3233268572547954, + "learning_rate": 4.7076131530489505e-08, + "loss": 0.37784355878829956, + "step": 7867 + }, + { + "epoch": 1.8141572515563755, + "grad_norm": 1.6040150628213576, + "learning_rate": 4.6960598697709294e-08, + "loss": 0.5184513330459595, + "step": 7868 + }, + { + "epoch": 1.814387825685958, + "grad_norm": 1.6174173359265467, + "learning_rate": 4.6845204398864743e-08, + "loss": 0.41221511363983154, + "step": 7869 + }, + { + "epoch": 1.8146183998155407, + "grad_norm": 1.960596641519608, + "learning_rate": 4.672994865072965e-08, + "loss": 0.43040651082992554, + "step": 7870 + }, + { + "epoch": 1.8148489739451232, + "grad_norm": 1.887961823292038, + "learning_rate": 4.6614831470057625e-08, + "loss": 0.4681999385356903, + "step": 7871 + }, + { + "epoch": 1.815079548074706, + "grad_norm": 1.5463001442495705, + "learning_rate": 4.649985287358227e-08, + "loss": 0.49752098321914673, + "step": 7872 + }, + { + "epoch": 1.8153101222042887, + "grad_norm": 1.4528059880154254, + "learning_rate": 4.6385012878016663e-08, + "loss": 0.4621706008911133, + "step": 7873 + }, + { + "epoch": 1.8155406963338714, + "grad_norm": 1.339046035541834, + "learning_rate": 4.627031150005401e-08, + "loss": 0.4359724521636963, + "step": 7874 + }, + { + "epoch": 1.815771270463454, + "grad_norm": 1.4288119410903932, + "learning_rate": 4.6155748756367294e-08, + "loss": 0.4901214838027954, + "step": 7875 + }, + { + "epoch": 1.8160018445930368, + "grad_norm": 1.7234395975437273, + "learning_rate": 4.604132466360955e-08, + "loss": 0.5012428760528564, + "step": 7876 + }, + { + "epoch": 1.8162324187226193, + "grad_norm": 1.6768636456338364, + "learning_rate": 4.592703923841323e-08, + "loss": 0.5048446655273438, + "step": 7877 + }, + { + "epoch": 1.816462992852202, + "grad_norm": 1.5761086054200695, + "learning_rate": 4.5812892497390955e-08, + "loss": 0.5025140047073364, + "step": 7878 + }, + { + "epoch": 1.8166935669817845, + "grad_norm": 1.5593886228823222, + "learning_rate": 4.5698884457135324e-08, + "loss": 0.4456709623336792, + "step": 7879 + }, + { + "epoch": 1.8169241411113672, + "grad_norm": 1.4583950124069596, + "learning_rate": 4.5585015134218196e-08, + "loss": 0.38283586502075195, + "step": 7880 + }, + { + "epoch": 1.81715471524095, + "grad_norm": 1.5479198908902716, + "learning_rate": 4.5471284545192004e-08, + "loss": 0.3458648920059204, + "step": 7881 + }, + { + "epoch": 1.8173852893705327, + "grad_norm": 1.7126815699296334, + "learning_rate": 4.53576927065884e-08, + "loss": 0.4609532952308655, + "step": 7882 + }, + { + "epoch": 1.8176158635001154, + "grad_norm": 1.238404719965568, + "learning_rate": 4.524423963491919e-08, + "loss": 0.4250793159008026, + "step": 7883 + }, + { + "epoch": 1.817846437629698, + "grad_norm": 1.7276559977997992, + "learning_rate": 4.513092534667584e-08, + "loss": 0.41343796253204346, + "step": 7884 + }, + { + "epoch": 1.8180770117592806, + "grad_norm": 1.5863495927207087, + "learning_rate": 4.5017749858329736e-08, + "loss": 0.46575528383255005, + "step": 7885 + }, + { + "epoch": 1.8183075858888633, + "grad_norm": 1.7387493602059383, + "learning_rate": 4.4904713186332156e-08, + "loss": 0.47052180767059326, + "step": 7886 + }, + { + "epoch": 1.8185381600184458, + "grad_norm": 1.4938009961123744, + "learning_rate": 4.479181534711429e-08, + "loss": 0.42979568243026733, + "step": 7887 + }, + { + "epoch": 1.8187687341480285, + "grad_norm": 1.4298617258142596, + "learning_rate": 4.46790563570868e-08, + "loss": 0.4278537929058075, + "step": 7888 + }, + { + "epoch": 1.8189993082776112, + "grad_norm": 1.6571154898401685, + "learning_rate": 4.456643623264022e-08, + "loss": 0.45380616188049316, + "step": 7889 + }, + { + "epoch": 1.819229882407194, + "grad_norm": 1.6141969165708208, + "learning_rate": 4.445395499014526e-08, + "loss": 0.46085125207901, + "step": 7890 + }, + { + "epoch": 1.8194604565367767, + "grad_norm": 1.7363894486391924, + "learning_rate": 4.434161264595204e-08, + "loss": 0.47558531165122986, + "step": 7891 + }, + { + "epoch": 1.8196910306663594, + "grad_norm": 1.552212209885486, + "learning_rate": 4.4229409216390845e-08, + "loss": 0.42082321643829346, + "step": 7892 + }, + { + "epoch": 1.819921604795942, + "grad_norm": 1.6844917452185877, + "learning_rate": 4.411734471777129e-08, + "loss": 0.40222978591918945, + "step": 7893 + }, + { + "epoch": 1.8201521789255246, + "grad_norm": 1.7385505168528088, + "learning_rate": 4.400541916638323e-08, + "loss": 0.39737701416015625, + "step": 7894 + }, + { + "epoch": 1.820382753055107, + "grad_norm": 1.6976347614290264, + "learning_rate": 4.389363257849632e-08, + "loss": 0.46538835763931274, + "step": 7895 + }, + { + "epoch": 1.8206133271846898, + "grad_norm": 2.034464057065236, + "learning_rate": 4.378198497035979e-08, + "loss": 0.4994567036628723, + "step": 7896 + }, + { + "epoch": 1.8208439013142725, + "grad_norm": 1.517699554285521, + "learning_rate": 4.367047635820264e-08, + "loss": 0.4574298858642578, + "step": 7897 + }, + { + "epoch": 1.8210744754438553, + "grad_norm": 1.7361916973448048, + "learning_rate": 4.3559106758234044e-08, + "loss": 0.4716116786003113, + "step": 7898 + }, + { + "epoch": 1.821305049573438, + "grad_norm": 1.7495776361282012, + "learning_rate": 4.344787618664247e-08, + "loss": 0.35549741983413696, + "step": 7899 + }, + { + "epoch": 1.8215356237030205, + "grad_norm": 1.673931935617008, + "learning_rate": 4.3336784659596226e-08, + "loss": 0.44955599308013916, + "step": 7900 + }, + { + "epoch": 1.8217661978326032, + "grad_norm": 1.2588104675314307, + "learning_rate": 4.322583219324394e-08, + "loss": 0.4047467112541199, + "step": 7901 + }, + { + "epoch": 1.8219967719621857, + "grad_norm": 1.3892625958432285, + "learning_rate": 4.3115018803713596e-08, + "loss": 0.40367889404296875, + "step": 7902 + }, + { + "epoch": 1.8222273460917684, + "grad_norm": 1.3189968956301878, + "learning_rate": 4.3004344507113096e-08, + "loss": 0.32705235481262207, + "step": 7903 + }, + { + "epoch": 1.8224579202213511, + "grad_norm": 1.3777118561947166, + "learning_rate": 4.2893809319529794e-08, + "loss": 0.3845488727092743, + "step": 7904 + }, + { + "epoch": 1.8226884943509338, + "grad_norm": 1.4977030222677208, + "learning_rate": 4.2783413257031495e-08, + "loss": 0.49070197343826294, + "step": 7905 + }, + { + "epoch": 1.8229190684805165, + "grad_norm": 1.729181630904155, + "learning_rate": 4.267315633566493e-08, + "loss": 0.550437867641449, + "step": 7906 + }, + { + "epoch": 1.8231496426100993, + "grad_norm": 1.6119404797366197, + "learning_rate": 4.25630385714576e-08, + "loss": 0.5042926073074341, + "step": 7907 + }, + { + "epoch": 1.8233802167396818, + "grad_norm": 1.5956788246532367, + "learning_rate": 4.245305998041571e-08, + "loss": 0.48839205503463745, + "step": 7908 + }, + { + "epoch": 1.8236107908692645, + "grad_norm": 1.6028821186444346, + "learning_rate": 4.234322057852602e-08, + "loss": 0.4754030108451843, + "step": 7909 + }, + { + "epoch": 1.823841364998847, + "grad_norm": 1.5406282114264656, + "learning_rate": 4.223352038175487e-08, + "loss": 0.394174188375473, + "step": 7910 + }, + { + "epoch": 1.8240719391284297, + "grad_norm": 1.3144512253416945, + "learning_rate": 4.2123959406048183e-08, + "loss": 0.39882469177246094, + "step": 7911 + }, + { + "epoch": 1.8243025132580124, + "grad_norm": 1.3036980510979261, + "learning_rate": 4.201453766733176e-08, + "loss": 0.4611927270889282, + "step": 7912 + }, + { + "epoch": 1.8245330873875951, + "grad_norm": 1.3717750651706109, + "learning_rate": 4.190525518151122e-08, + "loss": 0.4164184331893921, + "step": 7913 + }, + { + "epoch": 1.8247636615171778, + "grad_norm": 1.7048234275294294, + "learning_rate": 4.179611196447186e-08, + "loss": 0.41586828231811523, + "step": 7914 + }, + { + "epoch": 1.8249942356467606, + "grad_norm": 1.486464242852147, + "learning_rate": 4.168710803207864e-08, + "loss": 0.4707748591899872, + "step": 7915 + }, + { + "epoch": 1.825224809776343, + "grad_norm": 1.6925426332325308, + "learning_rate": 4.157824340017657e-08, + "loss": 0.4235571622848511, + "step": 7916 + }, + { + "epoch": 1.8254553839059258, + "grad_norm": 1.5746767320284107, + "learning_rate": 4.146951808458998e-08, + "loss": 0.3761681914329529, + "step": 7917 + }, + { + "epoch": 1.8256859580355083, + "grad_norm": 1.9541083814793623, + "learning_rate": 4.136093210112346e-08, + "loss": 0.45545494556427, + "step": 7918 + }, + { + "epoch": 1.825916532165091, + "grad_norm": 1.4946968371557119, + "learning_rate": 4.1252485465561035e-08, + "loss": 0.4154251515865326, + "step": 7919 + }, + { + "epoch": 1.8261471062946737, + "grad_norm": 1.4442817043721163, + "learning_rate": 4.114417819366633e-08, + "loss": 0.3664330244064331, + "step": 7920 + }, + { + "epoch": 1.8263776804242564, + "grad_norm": 1.4915985489350694, + "learning_rate": 4.10360103011832e-08, + "loss": 0.4527730643749237, + "step": 7921 + }, + { + "epoch": 1.8266082545538391, + "grad_norm": 1.6683615123339999, + "learning_rate": 4.092798180383461e-08, + "loss": 0.5245767831802368, + "step": 7922 + }, + { + "epoch": 1.8268388286834218, + "grad_norm": 1.6122193238326974, + "learning_rate": 4.0820092717323894e-08, + "loss": 0.39781343936920166, + "step": 7923 + }, + { + "epoch": 1.8270694028130043, + "grad_norm": 1.592304216861808, + "learning_rate": 4.071234305733362e-08, + "loss": 0.4173957109451294, + "step": 7924 + }, + { + "epoch": 1.827299976942587, + "grad_norm": 1.7592031102615102, + "learning_rate": 4.0604732839526256e-08, + "loss": 0.38840869069099426, + "step": 7925 + }, + { + "epoch": 1.8275305510721696, + "grad_norm": 1.777360398097105, + "learning_rate": 4.0497262079544294e-08, + "loss": 0.4107547998428345, + "step": 7926 + }, + { + "epoch": 1.8277611252017523, + "grad_norm": 1.5475583296259725, + "learning_rate": 4.038993079300956e-08, + "loss": 0.41102874279022217, + "step": 7927 + }, + { + "epoch": 1.827991699331335, + "grad_norm": 1.4229533643496446, + "learning_rate": 4.028273899552381e-08, + "loss": 0.3393939733505249, + "step": 7928 + }, + { + "epoch": 1.8282222734609177, + "grad_norm": 1.4844610719466356, + "learning_rate": 4.017568670266835e-08, + "loss": 0.42469024658203125, + "step": 7929 + }, + { + "epoch": 1.8284528475905004, + "grad_norm": 1.316542585504155, + "learning_rate": 4.006877393000441e-08, + "loss": 0.4869099259376526, + "step": 7930 + }, + { + "epoch": 1.8286834217200831, + "grad_norm": 1.3905230120628338, + "learning_rate": 3.996200069307265e-08, + "loss": 0.4463779926300049, + "step": 7931 + }, + { + "epoch": 1.8289139958496656, + "grad_norm": 1.908726864953878, + "learning_rate": 3.985536700739378e-08, + "loss": 0.429579496383667, + "step": 7932 + }, + { + "epoch": 1.8291445699792483, + "grad_norm": 1.555687929117211, + "learning_rate": 3.9748872888468065e-08, + "loss": 0.38837558031082153, + "step": 7933 + }, + { + "epoch": 1.8293751441088308, + "grad_norm": 1.467502995951613, + "learning_rate": 3.964251835177568e-08, + "loss": 0.4444499909877777, + "step": 7934 + }, + { + "epoch": 1.8296057182384136, + "grad_norm": 1.5836026531003116, + "learning_rate": 3.953630341277603e-08, + "loss": 0.5216259360313416, + "step": 7935 + }, + { + "epoch": 1.8298362923679963, + "grad_norm": 1.316614330242316, + "learning_rate": 3.943022808690888e-08, + "loss": 0.46454817056655884, + "step": 7936 + }, + { + "epoch": 1.830066866497579, + "grad_norm": 1.5390661326727673, + "learning_rate": 3.9324292389593005e-08, + "loss": 0.38960570096969604, + "step": 7937 + }, + { + "epoch": 1.8302974406271617, + "grad_norm": 1.2960127878271992, + "learning_rate": 3.9218496336227426e-08, + "loss": 0.3318006992340088, + "step": 7938 + }, + { + "epoch": 1.8305280147567444, + "grad_norm": 1.501585055160058, + "learning_rate": 3.9112839942190725e-08, + "loss": 0.41555076837539673, + "step": 7939 + }, + { + "epoch": 1.830758588886327, + "grad_norm": 1.4035625255113318, + "learning_rate": 3.900732322284095e-08, + "loss": 0.4296320080757141, + "step": 7940 + }, + { + "epoch": 1.8309891630159096, + "grad_norm": 1.6738155247978692, + "learning_rate": 3.8901946193516055e-08, + "loss": 0.4416658282279968, + "step": 7941 + }, + { + "epoch": 1.8312197371454921, + "grad_norm": 1.885789179393057, + "learning_rate": 3.8796708869533676e-08, + "loss": 0.4539029598236084, + "step": 7942 + }, + { + "epoch": 1.8314503112750748, + "grad_norm": 1.4867619575158202, + "learning_rate": 3.869161126619136e-08, + "loss": 0.4526992440223694, + "step": 7943 + }, + { + "epoch": 1.8316808854046576, + "grad_norm": 1.5927522884216676, + "learning_rate": 3.8586653398765766e-08, + "loss": 0.3991963863372803, + "step": 7944 + }, + { + "epoch": 1.8319114595342403, + "grad_norm": 1.4460483349984772, + "learning_rate": 3.848183528251381e-08, + "loss": 0.44474589824676514, + "step": 7945 + }, + { + "epoch": 1.832142033663823, + "grad_norm": 1.7969739964524274, + "learning_rate": 3.837715693267174e-08, + "loss": 0.5022028684616089, + "step": 7946 + }, + { + "epoch": 1.8323726077934057, + "grad_norm": 1.6274178723126447, + "learning_rate": 3.8272618364455836e-08, + "loss": 0.4839058518409729, + "step": 7947 + }, + { + "epoch": 1.8326031819229882, + "grad_norm": 1.7924980398771633, + "learning_rate": 3.8168219593061376e-08, + "loss": 0.3580874800682068, + "step": 7948 + }, + { + "epoch": 1.832833756052571, + "grad_norm": 1.6096517551702718, + "learning_rate": 3.806396063366424e-08, + "loss": 0.4350799024105072, + "step": 7949 + }, + { + "epoch": 1.8330643301821534, + "grad_norm": 1.3546161389632028, + "learning_rate": 3.79598415014194e-08, + "loss": 0.4386145770549774, + "step": 7950 + }, + { + "epoch": 1.8332949043117361, + "grad_norm": 1.4421267919386862, + "learning_rate": 3.785586221146142e-08, + "loss": 0.5122627019882202, + "step": 7951 + }, + { + "epoch": 1.8335254784413189, + "grad_norm": 1.3507016201924953, + "learning_rate": 3.77520227789051e-08, + "loss": 0.41197121143341064, + "step": 7952 + }, + { + "epoch": 1.8337560525709016, + "grad_norm": 1.7729553069577912, + "learning_rate": 3.764832321884426e-08, + "loss": 0.5508084297180176, + "step": 7953 + }, + { + "epoch": 1.8339866267004843, + "grad_norm": 1.3788371713361898, + "learning_rate": 3.754476354635283e-08, + "loss": 0.40791934728622437, + "step": 7954 + }, + { + "epoch": 1.834217200830067, + "grad_norm": 1.4693932480728087, + "learning_rate": 3.7441343776484113e-08, + "loss": 0.3880457878112793, + "step": 7955 + }, + { + "epoch": 1.8344477749596495, + "grad_norm": 1.4561569110121497, + "learning_rate": 3.7338063924271304e-08, + "loss": 0.40519118309020996, + "step": 7956 + }, + { + "epoch": 1.8346783490892322, + "grad_norm": 1.4799489730655653, + "learning_rate": 3.723492400472716e-08, + "loss": 0.46081095933914185, + "step": 7957 + }, + { + "epoch": 1.8349089232188147, + "grad_norm": 1.3167338346767847, + "learning_rate": 3.713192403284438e-08, + "loss": 0.3946321904659271, + "step": 7958 + }, + { + "epoch": 1.8351394973483974, + "grad_norm": 1.743632986191688, + "learning_rate": 3.702906402359474e-08, + "loss": 0.4699859023094177, + "step": 7959 + }, + { + "epoch": 1.8353700714779801, + "grad_norm": 1.4691817330554993, + "learning_rate": 3.692634399192995e-08, + "loss": 0.43031781911849976, + "step": 7960 + }, + { + "epoch": 1.8356006456075629, + "grad_norm": 1.5694622813964751, + "learning_rate": 3.6823763952781636e-08, + "loss": 0.4072418212890625, + "step": 7961 + }, + { + "epoch": 1.8358312197371456, + "grad_norm": 1.7009922761684866, + "learning_rate": 3.672132392106053e-08, + "loss": 0.40659528970718384, + "step": 7962 + }, + { + "epoch": 1.8360617938667283, + "grad_norm": 1.2845193385628964, + "learning_rate": 3.661902391165772e-08, + "loss": 0.41279205679893494, + "step": 7963 + }, + { + "epoch": 1.8362923679963108, + "grad_norm": 1.407521764327922, + "learning_rate": 3.65168639394432e-08, + "loss": 0.43887826800346375, + "step": 7964 + }, + { + "epoch": 1.8365229421258935, + "grad_norm": 1.585883988281566, + "learning_rate": 3.6414844019267196e-08, + "loss": 0.46111762523651123, + "step": 7965 + }, + { + "epoch": 1.836753516255476, + "grad_norm": 1.5089060420061358, + "learning_rate": 3.63129641659593e-08, + "loss": 0.42694801092147827, + "step": 7966 + }, + { + "epoch": 1.8369840903850587, + "grad_norm": 1.563222995065882, + "learning_rate": 3.6211224394328775e-08, + "loss": 0.4674855172634125, + "step": 7967 + }, + { + "epoch": 1.8372146645146414, + "grad_norm": 1.6612957725595774, + "learning_rate": 3.610962471916435e-08, + "loss": 0.48998844623565674, + "step": 7968 + }, + { + "epoch": 1.8374452386442242, + "grad_norm": 1.517118505836267, + "learning_rate": 3.600816515523486e-08, + "loss": 0.4162273406982422, + "step": 7969 + }, + { + "epoch": 1.8376758127738069, + "grad_norm": 1.6498845355681542, + "learning_rate": 3.5906845717288304e-08, + "loss": 0.4446166753768921, + "step": 7970 + }, + { + "epoch": 1.8379063869033896, + "grad_norm": 1.6723175784368125, + "learning_rate": 3.580566642005245e-08, + "loss": 0.4782527983188629, + "step": 7971 + }, + { + "epoch": 1.838136961032972, + "grad_norm": 1.667138689471541, + "learning_rate": 3.570462727823476e-08, + "loss": 0.43014609813690186, + "step": 7972 + }, + { + "epoch": 1.8383675351625548, + "grad_norm": 1.5808858327085533, + "learning_rate": 3.560372830652225e-08, + "loss": 0.5155357122421265, + "step": 7973 + }, + { + "epoch": 1.8385981092921373, + "grad_norm": 1.4181681095350445, + "learning_rate": 3.5502969519581984e-08, + "loss": 0.4231104254722595, + "step": 7974 + }, + { + "epoch": 1.83882868342172, + "grad_norm": 1.8426199170185766, + "learning_rate": 3.540235093205979e-08, + "loss": 0.529877245426178, + "step": 7975 + }, + { + "epoch": 1.8390592575513027, + "grad_norm": 1.5632800597633676, + "learning_rate": 3.530187255858186e-08, + "loss": 0.4841991662979126, + "step": 7976 + }, + { + "epoch": 1.8392898316808854, + "grad_norm": 1.5770240615602402, + "learning_rate": 3.520153441375362e-08, + "loss": 0.40202534198760986, + "step": 7977 + }, + { + "epoch": 1.8395204058104682, + "grad_norm": 1.4104759549786023, + "learning_rate": 3.51013365121603e-08, + "loss": 0.398551344871521, + "step": 7978 + }, + { + "epoch": 1.8397509799400509, + "grad_norm": 1.5102819529399165, + "learning_rate": 3.500127886836668e-08, + "loss": 0.49139225482940674, + "step": 7979 + }, + { + "epoch": 1.8399815540696334, + "grad_norm": 1.7659081046335245, + "learning_rate": 3.4901361496917135e-08, + "loss": 0.4708287715911865, + "step": 7980 + }, + { + "epoch": 1.840212128199216, + "grad_norm": 1.3491474153090526, + "learning_rate": 3.4801584412335714e-08, + "loss": 0.4174381494522095, + "step": 7981 + }, + { + "epoch": 1.8404427023287986, + "grad_norm": 1.6453019064878467, + "learning_rate": 3.470194762912593e-08, + "loss": 0.535778284072876, + "step": 7982 + }, + { + "epoch": 1.8406732764583813, + "grad_norm": 1.7228199406120377, + "learning_rate": 3.4602451161771186e-08, + "loss": 0.540034294128418, + "step": 7983 + }, + { + "epoch": 1.840903850587964, + "grad_norm": 1.794022377740068, + "learning_rate": 3.450309502473403e-08, + "loss": 0.4399121403694153, + "step": 7984 + }, + { + "epoch": 1.8411344247175467, + "grad_norm": 1.6932512977389786, + "learning_rate": 3.4403879232457134e-08, + "loss": 0.5011022686958313, + "step": 7985 + }, + { + "epoch": 1.8413649988471295, + "grad_norm": 1.580497796669037, + "learning_rate": 3.4304803799362405e-08, + "loss": 0.392477810382843, + "step": 7986 + }, + { + "epoch": 1.8415955729767122, + "grad_norm": 1.5439573803469637, + "learning_rate": 3.420586873985132e-08, + "loss": 0.4734686315059662, + "step": 7987 + }, + { + "epoch": 1.8418261471062947, + "grad_norm": 1.3285059669744466, + "learning_rate": 3.410707406830537e-08, + "loss": 0.37347573041915894, + "step": 7988 + }, + { + "epoch": 1.8420567212358774, + "grad_norm": 1.6328708193086845, + "learning_rate": 3.400841979908531e-08, + "loss": 0.38837599754333496, + "step": 7989 + }, + { + "epoch": 1.8422872953654599, + "grad_norm": 1.6277616294407593, + "learning_rate": 3.390990594653142e-08, + "loss": 0.38598424196243286, + "step": 7990 + }, + { + "epoch": 1.8425178694950426, + "grad_norm": 1.584379501910531, + "learning_rate": 3.381153252496371e-08, + "loss": 0.48508739471435547, + "step": 7991 + }, + { + "epoch": 1.8427484436246253, + "grad_norm": 1.609395355542375, + "learning_rate": 3.3713299548681736e-08, + "loss": 0.41946491599082947, + "step": 7992 + }, + { + "epoch": 1.842979017754208, + "grad_norm": 1.4959274640542461, + "learning_rate": 3.3615207031964744e-08, + "loss": 0.4803915023803711, + "step": 7993 + }, + { + "epoch": 1.8432095918837907, + "grad_norm": 1.3835076847275678, + "learning_rate": 3.351725498907143e-08, + "loss": 0.39463797211647034, + "step": 7994 + }, + { + "epoch": 1.8434401660133735, + "grad_norm": 1.5742658557245284, + "learning_rate": 3.341944343424008e-08, + "loss": 0.43345123529434204, + "step": 7995 + }, + { + "epoch": 1.843670740142956, + "grad_norm": 1.7826616989180466, + "learning_rate": 3.332177238168854e-08, + "loss": 0.5164570212364197, + "step": 7996 + }, + { + "epoch": 1.8439013142725387, + "grad_norm": 1.71354580792071, + "learning_rate": 3.322424184561445e-08, + "loss": 0.5313355922698975, + "step": 7997 + }, + { + "epoch": 1.8441318884021212, + "grad_norm": 1.901316143248936, + "learning_rate": 3.3126851840194815e-08, + "loss": 0.4488258361816406, + "step": 7998 + }, + { + "epoch": 1.8443624625317039, + "grad_norm": 1.479116299891256, + "learning_rate": 3.30296023795863e-08, + "loss": 0.5122581720352173, + "step": 7999 + }, + { + "epoch": 1.8445930366612866, + "grad_norm": 1.4735639536720297, + "learning_rate": 3.293249347792493e-08, + "loss": 0.4619610905647278, + "step": 8000 + }, + { + "epoch": 1.8448236107908693, + "grad_norm": 1.3540260330438945, + "learning_rate": 3.2835525149326636e-08, + "loss": 0.4214603006839752, + "step": 8001 + }, + { + "epoch": 1.845054184920452, + "grad_norm": 1.4074387483331625, + "learning_rate": 3.2738697407886485e-08, + "loss": 0.40279510617256165, + "step": 8002 + }, + { + "epoch": 1.8452847590500348, + "grad_norm": 1.4474967943141424, + "learning_rate": 3.264201026767977e-08, + "loss": 0.4797242283821106, + "step": 8003 + }, + { + "epoch": 1.8455153331796172, + "grad_norm": 1.3554973222515974, + "learning_rate": 3.254546374276057e-08, + "loss": 0.3833237588405609, + "step": 8004 + }, + { + "epoch": 1.8457459073092, + "grad_norm": 1.4594426546625732, + "learning_rate": 3.244905784716323e-08, + "loss": 0.41461342573165894, + "step": 8005 + }, + { + "epoch": 1.8459764814387825, + "grad_norm": 1.5177617199741877, + "learning_rate": 3.235279259490109e-08, + "loss": 0.592107892036438, + "step": 8006 + }, + { + "epoch": 1.8462070555683652, + "grad_norm": 1.684042887917187, + "learning_rate": 3.2256667999967405e-08, + "loss": 0.39025670289993286, + "step": 8007 + }, + { + "epoch": 1.846437629697948, + "grad_norm": 1.286539298720562, + "learning_rate": 3.2160684076334766e-08, + "loss": 0.40197378396987915, + "step": 8008 + }, + { + "epoch": 1.8466682038275306, + "grad_norm": 1.8155125046022762, + "learning_rate": 3.206484083795558e-08, + "loss": 0.4013815224170685, + "step": 8009 + }, + { + "epoch": 1.8468987779571133, + "grad_norm": 1.5762142363003944, + "learning_rate": 3.1969138298761356e-08, + "loss": 0.45386412739753723, + "step": 8010 + }, + { + "epoch": 1.8471293520866958, + "grad_norm": 1.8756892627173425, + "learning_rate": 3.187357647266353e-08, + "loss": 0.43034985661506653, + "step": 8011 + }, + { + "epoch": 1.8473599262162785, + "grad_norm": 1.6730495727197179, + "learning_rate": 3.177815537355322e-08, + "loss": 0.4346637725830078, + "step": 8012 + }, + { + "epoch": 1.847590500345861, + "grad_norm": 1.8461631710642654, + "learning_rate": 3.1682875015300535e-08, + "loss": 0.5203511118888855, + "step": 8013 + }, + { + "epoch": 1.8478210744754437, + "grad_norm": 1.5817324628827356, + "learning_rate": 3.1587735411755636e-08, + "loss": 0.37658393383026123, + "step": 8014 + }, + { + "epoch": 1.8480516486050265, + "grad_norm": 1.6304961028131815, + "learning_rate": 3.149273657674789e-08, + "loss": 0.5473518371582031, + "step": 8015 + }, + { + "epoch": 1.8482822227346092, + "grad_norm": 1.800633884327913, + "learning_rate": 3.1397878524086484e-08, + "loss": 0.5171597599983215, + "step": 8016 + }, + { + "epoch": 1.848512796864192, + "grad_norm": 1.585245081928725, + "learning_rate": 3.130316126755983e-08, + "loss": 0.46588706970214844, + "step": 8017 + }, + { + "epoch": 1.8487433709937746, + "grad_norm": 1.496582071882617, + "learning_rate": 3.1208584820936244e-08, + "loss": 0.5571366548538208, + "step": 8018 + }, + { + "epoch": 1.848973945123357, + "grad_norm": 1.5249372170069353, + "learning_rate": 3.111414919796318e-08, + "loss": 0.45803195238113403, + "step": 8019 + }, + { + "epoch": 1.8492045192529398, + "grad_norm": 1.4834943043987898, + "learning_rate": 3.1019854412367875e-08, + "loss": 0.4732629060745239, + "step": 8020 + }, + { + "epoch": 1.8494350933825223, + "grad_norm": 1.7625144420898597, + "learning_rate": 3.092570047785714e-08, + "loss": 0.5268767476081848, + "step": 8021 + }, + { + "epoch": 1.849665667512105, + "grad_norm": 1.5017810734056087, + "learning_rate": 3.0831687408117035e-08, + "loss": 0.5179537534713745, + "step": 8022 + }, + { + "epoch": 1.8498962416416878, + "grad_norm": 1.7406452748153565, + "learning_rate": 3.073781521681351e-08, + "loss": 0.5110389590263367, + "step": 8023 + }, + { + "epoch": 1.8501268157712705, + "grad_norm": 1.442631804804713, + "learning_rate": 3.064408391759154e-08, + "loss": 0.4078633189201355, + "step": 8024 + }, + { + "epoch": 1.8503573899008532, + "grad_norm": 1.6619024740283894, + "learning_rate": 3.055049352407624e-08, + "loss": 0.4632648229598999, + "step": 8025 + }, + { + "epoch": 1.850587964030436, + "grad_norm": 1.577432813868154, + "learning_rate": 3.0457044049871705e-08, + "loss": 0.41569265723228455, + "step": 8026 + }, + { + "epoch": 1.8508185381600184, + "grad_norm": 1.3795657287644, + "learning_rate": 3.036373550856186e-08, + "loss": 0.4105853736400604, + "step": 8027 + }, + { + "epoch": 1.8510491122896011, + "grad_norm": 1.6584799060214424, + "learning_rate": 3.027056791370996e-08, + "loss": 0.4415978789329529, + "step": 8028 + }, + { + "epoch": 1.8512796864191836, + "grad_norm": 1.571030596092026, + "learning_rate": 3.017754127885908e-08, + "loss": 0.3990614414215088, + "step": 8029 + }, + { + "epoch": 1.8515102605487663, + "grad_norm": 1.5323241652532567, + "learning_rate": 3.0084655617531376e-08, + "loss": 0.42349040508270264, + "step": 8030 + }, + { + "epoch": 1.851740834678349, + "grad_norm": 1.4436112405033301, + "learning_rate": 2.9991910943228725e-08, + "loss": 0.4687228798866272, + "step": 8031 + }, + { + "epoch": 1.8519714088079318, + "grad_norm": 1.91227305815919, + "learning_rate": 2.989930726943268e-08, + "loss": 0.6091229915618896, + "step": 8032 + }, + { + "epoch": 1.8522019829375145, + "grad_norm": 1.527659992048368, + "learning_rate": 2.980684460960381e-08, + "loss": 0.43401795625686646, + "step": 8033 + }, + { + "epoch": 1.8524325570670972, + "grad_norm": 1.521615388244922, + "learning_rate": 2.9714522977182688e-08, + "loss": 0.47280481457710266, + "step": 8034 + }, + { + "epoch": 1.8526631311966797, + "grad_norm": 1.6019291161476, + "learning_rate": 2.962234238558925e-08, + "loss": 0.5078729391098022, + "step": 8035 + }, + { + "epoch": 1.8528937053262624, + "grad_norm": 1.8353491661496104, + "learning_rate": 2.9530302848223e-08, + "loss": 0.4279085695743561, + "step": 8036 + }, + { + "epoch": 1.853124279455845, + "grad_norm": 1.4587208506754334, + "learning_rate": 2.9438404378462455e-08, + "loss": 0.3720093369483948, + "step": 8037 + }, + { + "epoch": 1.8533548535854276, + "grad_norm": 1.810026420285634, + "learning_rate": 2.934664698966627e-08, + "loss": 0.26778513193130493, + "step": 8038 + }, + { + "epoch": 1.8535854277150103, + "grad_norm": 1.569617242169025, + "learning_rate": 2.9255030695172324e-08, + "loss": 0.47606828808784485, + "step": 8039 + }, + { + "epoch": 1.853816001844593, + "grad_norm": 1.8330928647910023, + "learning_rate": 2.9163555508297632e-08, + "loss": 0.437153160572052, + "step": 8040 + }, + { + "epoch": 1.8540465759741758, + "grad_norm": 1.3219241142527494, + "learning_rate": 2.907222144233945e-08, + "loss": 0.408009797334671, + "step": 8041 + }, + { + "epoch": 1.8542771501037585, + "grad_norm": 1.3761080217774861, + "learning_rate": 2.8981028510573824e-08, + "loss": 0.3435688018798828, + "step": 8042 + }, + { + "epoch": 1.854507724233341, + "grad_norm": 1.881646492298394, + "learning_rate": 2.8889976726256705e-08, + "loss": 0.4829018712043762, + "step": 8043 + }, + { + "epoch": 1.8547382983629237, + "grad_norm": 1.5758694223281, + "learning_rate": 2.879906610262339e-08, + "loss": 0.44579288363456726, + "step": 8044 + }, + { + "epoch": 1.8549688724925062, + "grad_norm": 1.3922554430382053, + "learning_rate": 2.8708296652888764e-08, + "loss": 0.4952869415283203, + "step": 8045 + }, + { + "epoch": 1.855199446622089, + "grad_norm": 1.4450922871815606, + "learning_rate": 2.8617668390246818e-08, + "loss": 0.4870997965335846, + "step": 8046 + }, + { + "epoch": 1.8554300207516716, + "grad_norm": 1.5651252792966914, + "learning_rate": 2.8527181327871465e-08, + "loss": 0.5009135603904724, + "step": 8047 + }, + { + "epoch": 1.8556605948812543, + "grad_norm": 1.3977550991376733, + "learning_rate": 2.8436835478915954e-08, + "loss": 0.4837114214897156, + "step": 8048 + }, + { + "epoch": 1.855891169010837, + "grad_norm": 1.6474653449248091, + "learning_rate": 2.8346630856512897e-08, + "loss": 0.47955578565597534, + "step": 8049 + }, + { + "epoch": 1.8561217431404198, + "grad_norm": 1.705788106947518, + "learning_rate": 2.8256567473774363e-08, + "loss": 0.4882965385913849, + "step": 8050 + }, + { + "epoch": 1.8563523172700023, + "grad_norm": 1.5940097685845425, + "learning_rate": 2.8166645343792094e-08, + "loss": 0.4542367458343506, + "step": 8051 + }, + { + "epoch": 1.856582891399585, + "grad_norm": 1.5880265061576002, + "learning_rate": 2.8076864479637198e-08, + "loss": 0.4506416916847229, + "step": 8052 + }, + { + "epoch": 1.8568134655291675, + "grad_norm": 1.699970116686096, + "learning_rate": 2.798722489436012e-08, + "loss": 0.5043084025382996, + "step": 8053 + }, + { + "epoch": 1.8570440396587502, + "grad_norm": 1.397398070036947, + "learning_rate": 2.78977266009911e-08, + "loss": 0.3711032271385193, + "step": 8054 + }, + { + "epoch": 1.857274613788333, + "grad_norm": 1.3008294527362816, + "learning_rate": 2.7808369612539405e-08, + "loss": 0.33371198177337646, + "step": 8055 + }, + { + "epoch": 1.8575051879179156, + "grad_norm": 1.7364482681056421, + "learning_rate": 2.771915394199409e-08, + "loss": 0.5328178405761719, + "step": 8056 + }, + { + "epoch": 1.8577357620474984, + "grad_norm": 1.925308909381556, + "learning_rate": 2.7630079602323443e-08, + "loss": 0.4615975618362427, + "step": 8057 + }, + { + "epoch": 1.857966336177081, + "grad_norm": 1.506605490676224, + "learning_rate": 2.754114660647533e-08, + "loss": 0.4667460024356842, + "step": 8058 + }, + { + "epoch": 1.8581969103066636, + "grad_norm": 1.7246190337812906, + "learning_rate": 2.745235496737719e-08, + "loss": 0.483825147151947, + "step": 8059 + }, + { + "epoch": 1.8584274844362463, + "grad_norm": 1.7802094460466942, + "learning_rate": 2.736370469793592e-08, + "loss": 0.4376814365386963, + "step": 8060 + }, + { + "epoch": 1.8586580585658288, + "grad_norm": 1.4605341926622646, + "learning_rate": 2.7275195811037432e-08, + "loss": 0.4862465262413025, + "step": 8061 + }, + { + "epoch": 1.8588886326954115, + "grad_norm": 1.6497121576486102, + "learning_rate": 2.718682831954744e-08, + "loss": 0.48104172945022583, + "step": 8062 + }, + { + "epoch": 1.8591192068249942, + "grad_norm": 1.3643295104524422, + "learning_rate": 2.709860223631122e-08, + "loss": 0.43358030915260315, + "step": 8063 + }, + { + "epoch": 1.859349780954577, + "grad_norm": 1.3052220670178016, + "learning_rate": 2.701051757415307e-08, + "loss": 0.44614607095718384, + "step": 8064 + }, + { + "epoch": 1.8595803550841596, + "grad_norm": 1.8220525339474862, + "learning_rate": 2.6922574345877303e-08, + "loss": 0.49824249744415283, + "step": 8065 + }, + { + "epoch": 1.8598109292137424, + "grad_norm": 1.3314333068504594, + "learning_rate": 2.683477256426714e-08, + "loss": 0.39621901512145996, + "step": 8066 + }, + { + "epoch": 1.8600415033433249, + "grad_norm": 1.3391032368154236, + "learning_rate": 2.6747112242085478e-08, + "loss": 0.40166205167770386, + "step": 8067 + }, + { + "epoch": 1.8602720774729076, + "grad_norm": 1.720101921843303, + "learning_rate": 2.6659593392074575e-08, + "loss": 0.4249534606933594, + "step": 8068 + }, + { + "epoch": 1.86050265160249, + "grad_norm": 1.3203085704476971, + "learning_rate": 2.6572216026956473e-08, + "loss": 0.4015510678291321, + "step": 8069 + }, + { + "epoch": 1.8607332257320728, + "grad_norm": 1.8982655978960439, + "learning_rate": 2.6484980159432236e-08, + "loss": 0.4691264033317566, + "step": 8070 + }, + { + "epoch": 1.8609637998616555, + "grad_norm": 1.6363630573411998, + "learning_rate": 2.639788580218216e-08, + "loss": 0.5095053315162659, + "step": 8071 + }, + { + "epoch": 1.8611943739912382, + "grad_norm": 1.707433776183968, + "learning_rate": 2.6310932967866794e-08, + "loss": 0.4658794403076172, + "step": 8072 + }, + { + "epoch": 1.861424948120821, + "grad_norm": 1.7622547433521365, + "learning_rate": 2.622412166912513e-08, + "loss": 0.495827853679657, + "step": 8073 + }, + { + "epoch": 1.8616555222504036, + "grad_norm": 1.6584095706736666, + "learning_rate": 2.6137451918576413e-08, + "loss": 0.43652772903442383, + "step": 8074 + }, + { + "epoch": 1.8618860963799861, + "grad_norm": 1.410927084601702, + "learning_rate": 2.6050923728818784e-08, + "loss": 0.4636423587799072, + "step": 8075 + }, + { + "epoch": 1.8621166705095689, + "grad_norm": 1.6137478822178715, + "learning_rate": 2.5964537112430186e-08, + "loss": 0.4572441577911377, + "step": 8076 + }, + { + "epoch": 1.8623472446391514, + "grad_norm": 1.5268149737583054, + "learning_rate": 2.587829208196757e-08, + "loss": 0.4549320340156555, + "step": 8077 + }, + { + "epoch": 1.862577818768734, + "grad_norm": 1.4757300368438027, + "learning_rate": 2.5792188649967795e-08, + "loss": 0.46412795782089233, + "step": 8078 + }, + { + "epoch": 1.8628083928983168, + "grad_norm": 1.566100546942984, + "learning_rate": 2.570622682894652e-08, + "loss": 0.40059781074523926, + "step": 8079 + }, + { + "epoch": 1.8630389670278995, + "grad_norm": 1.8382248312833556, + "learning_rate": 2.5620406631399416e-08, + "loss": 0.5396246910095215, + "step": 8080 + }, + { + "epoch": 1.8632695411574822, + "grad_norm": 1.630240250521673, + "learning_rate": 2.553472806980128e-08, + "loss": 0.4793856143951416, + "step": 8081 + }, + { + "epoch": 1.863500115287065, + "grad_norm": 1.7081981493499068, + "learning_rate": 2.5449191156606264e-08, + "loss": 0.4428815543651581, + "step": 8082 + }, + { + "epoch": 1.8637306894166474, + "grad_norm": 1.3161952024113066, + "learning_rate": 2.5363795904248086e-08, + "loss": 0.4024256467819214, + "step": 8083 + }, + { + "epoch": 1.8639612635462302, + "grad_norm": 1.7334425937535092, + "learning_rate": 2.5278542325139818e-08, + "loss": 0.4868123531341553, + "step": 8084 + }, + { + "epoch": 1.8641918376758126, + "grad_norm": 1.8199560965911645, + "learning_rate": 2.519343043167399e-08, + "loss": 0.602108359336853, + "step": 8085 + }, + { + "epoch": 1.8644224118053954, + "grad_norm": 1.8527423308196338, + "learning_rate": 2.510846023622237e-08, + "loss": 0.4500008225440979, + "step": 8086 + }, + { + "epoch": 1.864652985934978, + "grad_norm": 1.4521386296534855, + "learning_rate": 2.502363175113642e-08, + "loss": 0.3894640803337097, + "step": 8087 + }, + { + "epoch": 1.8648835600645608, + "grad_norm": 1.471988486213167, + "learning_rate": 2.493894498874649e-08, + "loss": 0.4525550305843353, + "step": 8088 + }, + { + "epoch": 1.8651141341941435, + "grad_norm": 1.362693221908779, + "learning_rate": 2.485439996136296e-08, + "loss": 0.3908608555793762, + "step": 8089 + }, + { + "epoch": 1.8653447083237262, + "grad_norm": 1.5537540661666722, + "learning_rate": 2.4769996681275106e-08, + "loss": 0.4551984667778015, + "step": 8090 + }, + { + "epoch": 1.8655752824533087, + "grad_norm": 1.3331466559033927, + "learning_rate": 2.468573516075201e-08, + "loss": 0.34474045038223267, + "step": 8091 + }, + { + "epoch": 1.8658058565828914, + "grad_norm": 1.675344505563735, + "learning_rate": 2.4601615412041755e-08, + "loss": 0.41480594873428345, + "step": 8092 + }, + { + "epoch": 1.866036430712474, + "grad_norm": 1.6368782805002868, + "learning_rate": 2.4517637447372007e-08, + "loss": 0.5043104887008667, + "step": 8093 + }, + { + "epoch": 1.8662670048420567, + "grad_norm": 1.7139805676568358, + "learning_rate": 2.4433801278950007e-08, + "loss": 0.4467152953147888, + "step": 8094 + }, + { + "epoch": 1.8664975789716394, + "grad_norm": 1.5274424401661542, + "learning_rate": 2.4350106918962e-08, + "loss": 0.454445481300354, + "step": 8095 + }, + { + "epoch": 1.866728153101222, + "grad_norm": 1.5661075903861215, + "learning_rate": 2.426655437957392e-08, + "loss": 0.4639291763305664, + "step": 8096 + }, + { + "epoch": 1.8669587272308048, + "grad_norm": 1.6251687636184629, + "learning_rate": 2.418314367293084e-08, + "loss": 0.46178731322288513, + "step": 8097 + }, + { + "epoch": 1.8671893013603875, + "grad_norm": 1.5047265923361783, + "learning_rate": 2.4099874811157383e-08, + "loss": 0.43832290172576904, + "step": 8098 + }, + { + "epoch": 1.86741987548997, + "grad_norm": 1.569040322283118, + "learning_rate": 2.4016747806357652e-08, + "loss": 0.4586114287376404, + "step": 8099 + }, + { + "epoch": 1.8676504496195527, + "grad_norm": 1.403368540081911, + "learning_rate": 2.3933762670614978e-08, + "loss": 0.37975889444351196, + "step": 8100 + }, + { + "epoch": 1.8678810237491352, + "grad_norm": 1.6666819300781532, + "learning_rate": 2.3850919415992042e-08, + "loss": 0.4579748511314392, + "step": 8101 + }, + { + "epoch": 1.868111597878718, + "grad_norm": 1.5976733248377182, + "learning_rate": 2.3768218054530775e-08, + "loss": 0.5120238661766052, + "step": 8102 + }, + { + "epoch": 1.8683421720083007, + "grad_norm": 1.47865092584181, + "learning_rate": 2.3685658598253e-08, + "loss": 0.41514822840690613, + "step": 8103 + }, + { + "epoch": 1.8685727461378834, + "grad_norm": 1.6132937806442644, + "learning_rate": 2.360324105915934e-08, + "loss": 0.49480026960372925, + "step": 8104 + }, + { + "epoch": 1.868803320267466, + "grad_norm": 1.516759878457302, + "learning_rate": 2.352096544922999e-08, + "loss": 0.41115111112594604, + "step": 8105 + }, + { + "epoch": 1.8690338943970488, + "grad_norm": 1.8593225608723183, + "learning_rate": 2.3438831780424607e-08, + "loss": 0.44793501496315, + "step": 8106 + }, + { + "epoch": 1.8692644685266313, + "grad_norm": 2.087747863463927, + "learning_rate": 2.3356840064682305e-08, + "loss": 0.4197582006454468, + "step": 8107 + }, + { + "epoch": 1.869495042656214, + "grad_norm": 1.3708560469219937, + "learning_rate": 2.3274990313921218e-08, + "loss": 0.3654597997665405, + "step": 8108 + }, + { + "epoch": 1.8697256167857965, + "grad_norm": 1.6733057347639861, + "learning_rate": 2.319328254003927e-08, + "loss": 0.5105487704277039, + "step": 8109 + }, + { + "epoch": 1.8699561909153792, + "grad_norm": 1.6787548385436994, + "learning_rate": 2.3111716754913192e-08, + "loss": 0.5202287435531616, + "step": 8110 + }, + { + "epoch": 1.870186765044962, + "grad_norm": 1.5305524386936447, + "learning_rate": 2.303029297039949e-08, + "loss": 0.4475836753845215, + "step": 8111 + }, + { + "epoch": 1.8704173391745447, + "grad_norm": 1.579007380002247, + "learning_rate": 2.2949011198334144e-08, + "loss": 0.5010285973548889, + "step": 8112 + }, + { + "epoch": 1.8706479133041274, + "grad_norm": 1.4473541177707174, + "learning_rate": 2.286787145053204e-08, + "loss": 0.41949477791786194, + "step": 8113 + }, + { + "epoch": 1.87087848743371, + "grad_norm": 1.3276801089952157, + "learning_rate": 2.2786873738787738e-08, + "loss": 0.38505449891090393, + "step": 8114 + }, + { + "epoch": 1.8711090615632926, + "grad_norm": 1.8776948972547884, + "learning_rate": 2.2706018074875043e-08, + "loss": 0.4854990839958191, + "step": 8115 + }, + { + "epoch": 1.8713396356928753, + "grad_norm": 1.3982424394333428, + "learning_rate": 2.2625304470547336e-08, + "loss": 0.3846585154533386, + "step": 8116 + }, + { + "epoch": 1.8715702098224578, + "grad_norm": 1.7499321509858707, + "learning_rate": 2.2544732937537003e-08, + "loss": 0.48948657512664795, + "step": 8117 + }, + { + "epoch": 1.8718007839520405, + "grad_norm": 2.062408637955344, + "learning_rate": 2.2464303487555902e-08, + "loss": 0.5571197867393494, + "step": 8118 + }, + { + "epoch": 1.8720313580816232, + "grad_norm": 1.6301482456607912, + "learning_rate": 2.2384016132295345e-08, + "loss": 0.514819324016571, + "step": 8119 + }, + { + "epoch": 1.872261932211206, + "grad_norm": 1.5677432247071832, + "learning_rate": 2.230387088342589e-08, + "loss": 0.4411713182926178, + "step": 8120 + }, + { + "epoch": 1.8724925063407887, + "grad_norm": 1.4508146354194726, + "learning_rate": 2.2223867752597437e-08, + "loss": 0.4494340717792511, + "step": 8121 + }, + { + "epoch": 1.8727230804703712, + "grad_norm": 1.6205003929883524, + "learning_rate": 2.2144006751439236e-08, + "loss": 0.4186316132545471, + "step": 8122 + }, + { + "epoch": 1.8729536545999539, + "grad_norm": 1.5017815147990925, + "learning_rate": 2.2064287891560007e-08, + "loss": 0.45932692289352417, + "step": 8123 + }, + { + "epoch": 1.8731842287295364, + "grad_norm": 1.475598332139336, + "learning_rate": 2.1984711184547477e-08, + "loss": 0.4095005989074707, + "step": 8124 + }, + { + "epoch": 1.873414802859119, + "grad_norm": 1.4633944208901333, + "learning_rate": 2.1905276641969284e-08, + "loss": 0.3822292685508728, + "step": 8125 + }, + { + "epoch": 1.8736453769887018, + "grad_norm": 1.5993925787143786, + "learning_rate": 2.1825984275371633e-08, + "loss": 0.41837501525878906, + "step": 8126 + }, + { + "epoch": 1.8738759511182845, + "grad_norm": 1.6176173713553115, + "learning_rate": 2.1746834096280752e-08, + "loss": 0.3903341591358185, + "step": 8127 + }, + { + "epoch": 1.8741065252478672, + "grad_norm": 1.4079834631265329, + "learning_rate": 2.166782611620177e-08, + "loss": 0.4760533571243286, + "step": 8128 + }, + { + "epoch": 1.87433709937745, + "grad_norm": 1.4208864897990974, + "learning_rate": 2.1588960346619388e-08, + "loss": 0.43960827589035034, + "step": 8129 + }, + { + "epoch": 1.8745676735070325, + "grad_norm": 1.7654096006141957, + "learning_rate": 2.151023679899755e-08, + "loss": 0.47941142320632935, + "step": 8130 + }, + { + "epoch": 1.8747982476366152, + "grad_norm": 1.41048993466122, + "learning_rate": 2.143165548477943e-08, + "loss": 0.4467000961303711, + "step": 8131 + }, + { + "epoch": 1.8750288217661977, + "grad_norm": 1.4796609851220597, + "learning_rate": 2.1353216415387788e-08, + "loss": 0.42472416162490845, + "step": 8132 + }, + { + "epoch": 1.8752593958957804, + "grad_norm": 1.9200971165248846, + "learning_rate": 2.1274919602224273e-08, + "loss": 0.5127208232879639, + "step": 8133 + }, + { + "epoch": 1.875489970025363, + "grad_norm": 1.8325759046238386, + "learning_rate": 2.119676505667045e-08, + "loss": 0.5362575650215149, + "step": 8134 + }, + { + "epoch": 1.8757205441549458, + "grad_norm": 1.2983178226172876, + "learning_rate": 2.111875279008657e-08, + "loss": 0.4025413990020752, + "step": 8135 + }, + { + "epoch": 1.8759511182845285, + "grad_norm": 1.5647543555868217, + "learning_rate": 2.1040882813812667e-08, + "loss": 0.49126237630844116, + "step": 8136 + }, + { + "epoch": 1.8761816924141113, + "grad_norm": 1.64373423682739, + "learning_rate": 2.096315513916791e-08, + "loss": 0.40609198808670044, + "step": 8137 + }, + { + "epoch": 1.8764122665436938, + "grad_norm": 1.4881317882345182, + "learning_rate": 2.0885569777450707e-08, + "loss": 0.47826945781707764, + "step": 8138 + }, + { + "epoch": 1.8766428406732765, + "grad_norm": 1.4578062807690564, + "learning_rate": 2.0808126739939035e-08, + "loss": 0.39987948536872864, + "step": 8139 + }, + { + "epoch": 1.876873414802859, + "grad_norm": 1.6010627164873539, + "learning_rate": 2.0730826037890003e-08, + "loss": 0.5727471113204956, + "step": 8140 + }, + { + "epoch": 1.8771039889324417, + "grad_norm": 1.3737495035065335, + "learning_rate": 2.0653667682540066e-08, + "loss": 0.4772847294807434, + "step": 8141 + }, + { + "epoch": 1.8773345630620244, + "grad_norm": 1.54097710668183, + "learning_rate": 2.0576651685104697e-08, + "loss": 0.3258974552154541, + "step": 8142 + }, + { + "epoch": 1.8775651371916071, + "grad_norm": 1.4067173519179077, + "learning_rate": 2.049977805677938e-08, + "loss": 0.5220766067504883, + "step": 8143 + }, + { + "epoch": 1.8777957113211898, + "grad_norm": 1.2918102910413813, + "learning_rate": 2.0423046808738077e-08, + "loss": 0.39550334215164185, + "step": 8144 + }, + { + "epoch": 1.8780262854507725, + "grad_norm": 2.3983596335767334, + "learning_rate": 2.034645795213463e-08, + "loss": 0.4487137198448181, + "step": 8145 + }, + { + "epoch": 1.878256859580355, + "grad_norm": 1.3947776950768658, + "learning_rate": 2.0270011498102147e-08, + "loss": 0.3363339304924011, + "step": 8146 + }, + { + "epoch": 1.8784874337099378, + "grad_norm": 1.5333942075668883, + "learning_rate": 2.019370745775273e-08, + "loss": 0.5161975026130676, + "step": 8147 + }, + { + "epoch": 1.8787180078395203, + "grad_norm": 1.4587907721196531, + "learning_rate": 2.011754584217784e-08, + "loss": 0.359643816947937, + "step": 8148 + }, + { + "epoch": 1.878948581969103, + "grad_norm": 1.3696377552673178, + "learning_rate": 2.0041526662448625e-08, + "loss": 0.4472349286079407, + "step": 8149 + }, + { + "epoch": 1.8791791560986857, + "grad_norm": 1.6693442042315434, + "learning_rate": 1.9965649929615135e-08, + "loss": 0.40363550186157227, + "step": 8150 + }, + { + "epoch": 1.8794097302282684, + "grad_norm": 1.7598833036688746, + "learning_rate": 1.9889915654706656e-08, + "loss": 0.46063172817230225, + "step": 8151 + }, + { + "epoch": 1.8796403043578511, + "grad_norm": 1.6348416553504144, + "learning_rate": 1.981432384873205e-08, + "loss": 0.4478832483291626, + "step": 8152 + }, + { + "epoch": 1.8798708784874338, + "grad_norm": 1.7016857171242656, + "learning_rate": 1.9738874522679304e-08, + "loss": 0.3438538908958435, + "step": 8153 + }, + { + "epoch": 1.8801014526170163, + "grad_norm": 2.2031337611169435, + "learning_rate": 1.966356768751598e-08, + "loss": 0.6035101413726807, + "step": 8154 + }, + { + "epoch": 1.880332026746599, + "grad_norm": 1.6642481554824737, + "learning_rate": 1.958840335418832e-08, + "loss": 0.42533814907073975, + "step": 8155 + }, + { + "epoch": 1.8805626008761815, + "grad_norm": 1.5825430260849223, + "learning_rate": 1.9513381533622587e-08, + "loss": 0.4117417633533478, + "step": 8156 + }, + { + "epoch": 1.8807931750057643, + "grad_norm": 1.6218701576707837, + "learning_rate": 1.943850223672361e-08, + "loss": 0.4353973865509033, + "step": 8157 + }, + { + "epoch": 1.881023749135347, + "grad_norm": 1.5613174256794196, + "learning_rate": 1.9363765474376125e-08, + "loss": 0.46115410327911377, + "step": 8158 + }, + { + "epoch": 1.8812543232649297, + "grad_norm": 1.4415196194001674, + "learning_rate": 1.9289171257443782e-08, + "loss": 0.3851476311683655, + "step": 8159 + }, + { + "epoch": 1.8814848973945124, + "grad_norm": 1.5586436794771006, + "learning_rate": 1.921471959676957e-08, + "loss": 0.4786919355392456, + "step": 8160 + }, + { + "epoch": 1.8817154715240951, + "grad_norm": 1.6398537249529117, + "learning_rate": 1.914041050317583e-08, + "loss": 0.4427906274795532, + "step": 8161 + }, + { + "epoch": 1.8819460456536776, + "grad_norm": 1.495606046913042, + "learning_rate": 1.906624398746415e-08, + "loss": 0.37774696946144104, + "step": 8162 + }, + { + "epoch": 1.8821766197832603, + "grad_norm": 1.5733237369323263, + "learning_rate": 1.8992220060415343e-08, + "loss": 0.43793195486068726, + "step": 8163 + }, + { + "epoch": 1.8824071939128428, + "grad_norm": 1.2904039749569203, + "learning_rate": 1.8918338732789587e-08, + "loss": 0.3869394063949585, + "step": 8164 + }, + { + "epoch": 1.8826377680424256, + "grad_norm": 1.9325019962539283, + "learning_rate": 1.8844600015326283e-08, + "loss": 0.4963928461074829, + "step": 8165 + }, + { + "epoch": 1.8828683421720083, + "grad_norm": 1.5945637624217548, + "learning_rate": 1.8771003918743978e-08, + "loss": 0.45727187395095825, + "step": 8166 + }, + { + "epoch": 1.883098916301591, + "grad_norm": 1.8455372682093192, + "learning_rate": 1.8697550453740884e-08, + "loss": 0.4878919720649719, + "step": 8167 + }, + { + "epoch": 1.8833294904311737, + "grad_norm": 1.7826396913976752, + "learning_rate": 1.862423963099391e-08, + "loss": 0.5376998782157898, + "step": 8168 + }, + { + "epoch": 1.8835600645607564, + "grad_norm": 1.4765870494853872, + "learning_rate": 1.8551071461159638e-08, + "loss": 0.4534180760383606, + "step": 8169 + }, + { + "epoch": 1.883790638690339, + "grad_norm": 1.561114582514347, + "learning_rate": 1.847804595487379e-08, + "loss": 0.43389183282852173, + "step": 8170 + }, + { + "epoch": 1.8840212128199216, + "grad_norm": 1.535519375075225, + "learning_rate": 1.8405163122751532e-08, + "loss": 0.4833742678165436, + "step": 8171 + }, + { + "epoch": 1.8842517869495041, + "grad_norm": 1.622186588307033, + "learning_rate": 1.833242297538695e-08, + "loss": 0.49344220757484436, + "step": 8172 + }, + { + "epoch": 1.8844823610790868, + "grad_norm": 1.4984978840285303, + "learning_rate": 1.8259825523353478e-08, + "loss": 0.49290287494659424, + "step": 8173 + }, + { + "epoch": 1.8847129352086696, + "grad_norm": 1.3380486770022888, + "learning_rate": 1.8187370777204115e-08, + "loss": 0.3971661627292633, + "step": 8174 + }, + { + "epoch": 1.8849435093382523, + "grad_norm": 1.5640300636460862, + "learning_rate": 1.811505874747066e-08, + "loss": 0.4984559416770935, + "step": 8175 + }, + { + "epoch": 1.885174083467835, + "grad_norm": 1.5865101985098036, + "learning_rate": 1.804288944466459e-08, + "loss": 0.38448822498321533, + "step": 8176 + }, + { + "epoch": 1.8854046575974177, + "grad_norm": 1.9477188873182039, + "learning_rate": 1.7970862879276406e-08, + "loss": 0.5468838214874268, + "step": 8177 + }, + { + "epoch": 1.8856352317270002, + "grad_norm": 1.4768596083300787, + "learning_rate": 1.7898979061775844e-08, + "loss": 0.46132227778434753, + "step": 8178 + }, + { + "epoch": 1.885865805856583, + "grad_norm": 1.436520509516384, + "learning_rate": 1.782723800261199e-08, + "loss": 0.4636603593826294, + "step": 8179 + }, + { + "epoch": 1.8860963799861654, + "grad_norm": 1.5429934177783204, + "learning_rate": 1.7755639712213057e-08, + "loss": 0.5302075147628784, + "step": 8180 + }, + { + "epoch": 1.8863269541157481, + "grad_norm": 1.6563780466455296, + "learning_rate": 1.7684184200986718e-08, + "loss": 0.4817178249359131, + "step": 8181 + }, + { + "epoch": 1.8865575282453309, + "grad_norm": 1.4897334937072715, + "learning_rate": 1.7612871479319668e-08, + "loss": 0.4535263180732727, + "step": 8182 + }, + { + "epoch": 1.8867881023749136, + "grad_norm": 1.6029244875460678, + "learning_rate": 1.7541701557577837e-08, + "loss": 0.5260534286499023, + "step": 8183 + }, + { + "epoch": 1.8870186765044963, + "grad_norm": 1.4065276330082377, + "learning_rate": 1.7470674446106614e-08, + "loss": 0.4526366591453552, + "step": 8184 + }, + { + "epoch": 1.887249250634079, + "grad_norm": 1.663451618032215, + "learning_rate": 1.7399790155230632e-08, + "loss": 0.4721973240375519, + "step": 8185 + }, + { + "epoch": 1.8874798247636615, + "grad_norm": 1.6510288712519465, + "learning_rate": 1.7329048695253422e-08, + "loss": 0.4331268072128296, + "step": 8186 + }, + { + "epoch": 1.8877103988932442, + "grad_norm": 1.9623503418050199, + "learning_rate": 1.7258450076458097e-08, + "loss": 0.5175650119781494, + "step": 8187 + }, + { + "epoch": 1.8879409730228267, + "grad_norm": 1.3640756960267433, + "learning_rate": 1.718799430910678e-08, + "loss": 0.45537033677101135, + "step": 8188 + }, + { + "epoch": 1.8881715471524094, + "grad_norm": 1.540072753548263, + "learning_rate": 1.7117681403441054e-08, + "loss": 0.5055547952651978, + "step": 8189 + }, + { + "epoch": 1.8884021212819921, + "grad_norm": 1.5849214553434074, + "learning_rate": 1.7047511369681522e-08, + "loss": 0.45514553785324097, + "step": 8190 + }, + { + "epoch": 1.8886326954115749, + "grad_norm": 1.4821599822935887, + "learning_rate": 1.6977484218028136e-08, + "loss": 0.44227129220962524, + "step": 8191 + }, + { + "epoch": 1.8888632695411576, + "grad_norm": 1.7163429603820965, + "learning_rate": 1.690759995866009e-08, + "loss": 0.4916682839393616, + "step": 8192 + }, + { + "epoch": 1.8890938436707403, + "grad_norm": 1.8219225402151713, + "learning_rate": 1.683785860173559e-08, + "loss": 0.48626652359962463, + "step": 8193 + }, + { + "epoch": 1.8893244178003228, + "grad_norm": 1.491517373721971, + "learning_rate": 1.676826015739252e-08, + "loss": 0.39982378482818604, + "step": 8194 + }, + { + "epoch": 1.8895549919299055, + "grad_norm": 1.8710391095575285, + "learning_rate": 1.6698804635747576e-08, + "loss": 0.49218645691871643, + "step": 8195 + }, + { + "epoch": 1.889785566059488, + "grad_norm": 1.5127362254029266, + "learning_rate": 1.6629492046896897e-08, + "loss": 0.38896578550338745, + "step": 8196 + }, + { + "epoch": 1.8900161401890707, + "grad_norm": 1.5870268370960243, + "learning_rate": 1.6560322400915538e-08, + "loss": 0.4217762053012848, + "step": 8197 + }, + { + "epoch": 1.8902467143186534, + "grad_norm": 1.5231528042475502, + "learning_rate": 1.6491295707858343e-08, + "loss": 0.4020112156867981, + "step": 8198 + }, + { + "epoch": 1.8904772884482361, + "grad_norm": 2.1189678944561954, + "learning_rate": 1.6422411977758843e-08, + "loss": 0.4630794823169708, + "step": 8199 + }, + { + "epoch": 1.8907078625778189, + "grad_norm": 1.526138087578761, + "learning_rate": 1.6353671220629917e-08, + "loss": 0.3673272132873535, + "step": 8200 + }, + { + "epoch": 1.8909384367074016, + "grad_norm": 1.4930616058109705, + "learning_rate": 1.6285073446463903e-08, + "loss": 0.4677228331565857, + "step": 8201 + }, + { + "epoch": 1.891169010836984, + "grad_norm": 1.718939922651036, + "learning_rate": 1.621661866523216e-08, + "loss": 0.4532579183578491, + "step": 8202 + }, + { + "epoch": 1.8913995849665668, + "grad_norm": 1.4990742550855458, + "learning_rate": 1.6148306886885287e-08, + "loss": 0.3011256456375122, + "step": 8203 + }, + { + "epoch": 1.8916301590961493, + "grad_norm": 1.731114486954807, + "learning_rate": 1.6080138121352892e-08, + "loss": 0.43071651458740234, + "step": 8204 + }, + { + "epoch": 1.891860733225732, + "grad_norm": 1.4183554819693576, + "learning_rate": 1.6012112378544272e-08, + "loss": 0.3180675506591797, + "step": 8205 + }, + { + "epoch": 1.8920913073553147, + "grad_norm": 1.6038525214828652, + "learning_rate": 1.594422966834741e-08, + "loss": 0.35130774974823, + "step": 8206 + }, + { + "epoch": 1.8923218814848974, + "grad_norm": 1.388613528735296, + "learning_rate": 1.587649000062996e-08, + "loss": 0.4953269958496094, + "step": 8207 + }, + { + "epoch": 1.8925524556144802, + "grad_norm": 1.5668590048532676, + "learning_rate": 1.5808893385238388e-08, + "loss": 0.3713166415691376, + "step": 8208 + }, + { + "epoch": 1.8927830297440629, + "grad_norm": 1.4824855259294067, + "learning_rate": 1.5741439831998827e-08, + "loss": 0.4273546040058136, + "step": 8209 + }, + { + "epoch": 1.8930136038736454, + "grad_norm": 1.8212221910711959, + "learning_rate": 1.5674129350715994e-08, + "loss": 0.45312386751174927, + "step": 8210 + }, + { + "epoch": 1.893244178003228, + "grad_norm": 1.4687276423683582, + "learning_rate": 1.560696195117439e-08, + "loss": 0.40246695280075073, + "step": 8211 + }, + { + "epoch": 1.8934747521328106, + "grad_norm": 1.9323139227263069, + "learning_rate": 1.5539937643137325e-08, + "loss": 0.5229366421699524, + "step": 8212 + }, + { + "epoch": 1.8937053262623933, + "grad_norm": 1.4419033757005335, + "learning_rate": 1.5473056436347554e-08, + "loss": 0.43834251165390015, + "step": 8213 + }, + { + "epoch": 1.893935900391976, + "grad_norm": 1.5176292463299432, + "learning_rate": 1.540631834052697e-08, + "loss": 0.4423528015613556, + "step": 8214 + }, + { + "epoch": 1.8941664745215587, + "grad_norm": 1.6176606345399394, + "learning_rate": 1.5339723365376478e-08, + "loss": 0.49888452887535095, + "step": 8215 + }, + { + "epoch": 1.8943970486511414, + "grad_norm": 1.7422668701695732, + "learning_rate": 1.5273271520576448e-08, + "loss": 0.44023919105529785, + "step": 8216 + }, + { + "epoch": 1.8946276227807242, + "grad_norm": 1.5430241161700802, + "learning_rate": 1.5206962815786262e-08, + "loss": 0.4733201861381531, + "step": 8217 + }, + { + "epoch": 1.8948581969103067, + "grad_norm": 1.992567039765999, + "learning_rate": 1.5140797260644768e-08, + "loss": 0.5393285751342773, + "step": 8218 + }, + { + "epoch": 1.8950887710398894, + "grad_norm": 1.5439154792235448, + "learning_rate": 1.507477486476949e-08, + "loss": 0.4240071773529053, + "step": 8219 + }, + { + "epoch": 1.8953193451694719, + "grad_norm": 1.4272355688005478, + "learning_rate": 1.5008895637757647e-08, + "loss": 0.42983078956604004, + "step": 8220 + }, + { + "epoch": 1.8955499192990546, + "grad_norm": 1.470069283076572, + "learning_rate": 1.4943159589185462e-08, + "loss": 0.47513502836227417, + "step": 8221 + }, + { + "epoch": 1.8957804934286373, + "grad_norm": 1.49966428795426, + "learning_rate": 1.4877566728608293e-08, + "loss": 0.41938167810440063, + "step": 8222 + }, + { + "epoch": 1.89601106755822, + "grad_norm": 1.513306290399523, + "learning_rate": 1.4812117065560625e-08, + "loss": 0.44817137718200684, + "step": 8223 + }, + { + "epoch": 1.8962416416878027, + "grad_norm": 1.6563869108965783, + "learning_rate": 1.4746810609556292e-08, + "loss": 0.46840909123420715, + "step": 8224 + }, + { + "epoch": 1.8964722158173855, + "grad_norm": 1.4822882914533433, + "learning_rate": 1.4681647370088369e-08, + "loss": 0.377409964799881, + "step": 8225 + }, + { + "epoch": 1.896702789946968, + "grad_norm": 1.595495246407856, + "learning_rate": 1.4616627356628831e-08, + "loss": 0.41149425506591797, + "step": 8226 + }, + { + "epoch": 1.8969333640765507, + "grad_norm": 1.548113444870098, + "learning_rate": 1.455175057862923e-08, + "loss": 0.39183878898620605, + "step": 8227 + }, + { + "epoch": 1.8971639382061332, + "grad_norm": 1.3643453838150799, + "learning_rate": 1.448701704551969e-08, + "loss": 0.3629387617111206, + "step": 8228 + }, + { + "epoch": 1.8973945123357159, + "grad_norm": 1.6546771139251113, + "learning_rate": 1.4422426766710239e-08, + "loss": 0.4007713794708252, + "step": 8229 + }, + { + "epoch": 1.8976250864652986, + "grad_norm": 1.648419698601457, + "learning_rate": 1.4357979751589477e-08, + "loss": 0.42354586720466614, + "step": 8230 + }, + { + "epoch": 1.8978556605948813, + "grad_norm": 1.9683167812350795, + "learning_rate": 1.429367600952558e-08, + "loss": 0.5321829319000244, + "step": 8231 + }, + { + "epoch": 1.898086234724464, + "grad_norm": 1.5240649560541817, + "learning_rate": 1.4229515549865845e-08, + "loss": 0.4840988218784332, + "step": 8232 + }, + { + "epoch": 1.8983168088540465, + "grad_norm": 1.6587626955063286, + "learning_rate": 1.4165498381936369e-08, + "loss": 0.5006803870201111, + "step": 8233 + }, + { + "epoch": 1.8985473829836292, + "grad_norm": 1.855334923621547, + "learning_rate": 1.4101624515042821e-08, + "loss": 0.40582865476608276, + "step": 8234 + }, + { + "epoch": 1.8987779571132117, + "grad_norm": 1.6458084674224973, + "learning_rate": 1.4037893958469993e-08, + "loss": 0.38199514150619507, + "step": 8235 + }, + { + "epoch": 1.8990085312427945, + "grad_norm": 1.4513711417071327, + "learning_rate": 1.3974306721481699e-08, + "loss": 0.39234936237335205, + "step": 8236 + }, + { + "epoch": 1.8992391053723772, + "grad_norm": 1.661857153956049, + "learning_rate": 1.391086281332099e-08, + "loss": 0.42211759090423584, + "step": 8237 + }, + { + "epoch": 1.8994696795019599, + "grad_norm": 1.5171507269414566, + "learning_rate": 1.3847562243210043e-08, + "loss": 0.4519961476325989, + "step": 8238 + }, + { + "epoch": 1.8997002536315426, + "grad_norm": 1.618394005210342, + "learning_rate": 1.3784405020350276e-08, + "loss": 0.4795762896537781, + "step": 8239 + }, + { + "epoch": 1.8999308277611253, + "grad_norm": 1.5749927795923588, + "learning_rate": 1.3721391153922235e-08, + "loss": 0.4549542963504791, + "step": 8240 + }, + { + "epoch": 1.9001614018907078, + "grad_norm": 1.759482125374446, + "learning_rate": 1.3658520653085703e-08, + "loss": 0.5253233313560486, + "step": 8241 + }, + { + "epoch": 1.9003919760202905, + "grad_norm": 1.4274315163192688, + "learning_rate": 1.3595793526979371e-08, + "loss": 0.44850921630859375, + "step": 8242 + }, + { + "epoch": 1.900622550149873, + "grad_norm": 1.5448941620644567, + "learning_rate": 1.35332097847215e-08, + "loss": 0.4416281580924988, + "step": 8243 + }, + { + "epoch": 1.9008531242794557, + "grad_norm": 1.932595440608825, + "learning_rate": 1.3470769435409036e-08, + "loss": 0.5567417740821838, + "step": 8244 + }, + { + "epoch": 1.9010836984090385, + "grad_norm": 1.4810071060864598, + "learning_rate": 1.3408472488118383e-08, + "loss": 0.43554848432540894, + "step": 8245 + }, + { + "epoch": 1.9013142725386212, + "grad_norm": 1.6729713604736038, + "learning_rate": 1.3346318951905077e-08, + "loss": 0.4219995141029358, + "step": 8246 + }, + { + "epoch": 1.901544846668204, + "grad_norm": 1.5600368865419485, + "learning_rate": 1.328430883580367e-08, + "loss": 0.45862913131713867, + "step": 8247 + }, + { + "epoch": 1.9017754207977866, + "grad_norm": 1.5932092717655322, + "learning_rate": 1.3222442148828172e-08, + "loss": 0.5026064515113831, + "step": 8248 + }, + { + "epoch": 1.902005994927369, + "grad_norm": 1.6308659122795583, + "learning_rate": 1.316071889997139e-08, + "loss": 0.46948713064193726, + "step": 8249 + }, + { + "epoch": 1.9022365690569518, + "grad_norm": 1.5718314790268124, + "learning_rate": 1.3099139098205258e-08, + "loss": 0.4263686537742615, + "step": 8250 + }, + { + "epoch": 1.9024671431865343, + "grad_norm": 1.516002170215572, + "learning_rate": 1.3037702752481394e-08, + "loss": 0.4652191400527954, + "step": 8251 + }, + { + "epoch": 1.902697717316117, + "grad_norm": 1.553138573631746, + "learning_rate": 1.2976409871729987e-08, + "loss": 0.4918743371963501, + "step": 8252 + }, + { + "epoch": 1.9029282914456997, + "grad_norm": 1.4916920711393407, + "learning_rate": 1.2915260464860466e-08, + "loss": 0.5297696590423584, + "step": 8253 + }, + { + "epoch": 1.9031588655752825, + "grad_norm": 1.7049232652010609, + "learning_rate": 1.2854254540761722e-08, + "loss": 0.5320281982421875, + "step": 8254 + }, + { + "epoch": 1.9033894397048652, + "grad_norm": 1.6403951625522013, + "learning_rate": 1.2793392108301437e-08, + "loss": 0.4424601197242737, + "step": 8255 + }, + { + "epoch": 1.903620013834448, + "grad_norm": 1.7301429652605729, + "learning_rate": 1.2732673176326758e-08, + "loss": 0.4811365008354187, + "step": 8256 + }, + { + "epoch": 1.9038505879640304, + "grad_norm": 1.4707627617860477, + "learning_rate": 1.2672097753663624e-08, + "loss": 0.3744504451751709, + "step": 8257 + }, + { + "epoch": 1.904081162093613, + "grad_norm": 1.4178929694153364, + "learning_rate": 1.2611665849117326e-08, + "loss": 0.4703986644744873, + "step": 8258 + }, + { + "epoch": 1.9043117362231956, + "grad_norm": 1.7267205141598052, + "learning_rate": 1.255137747147228e-08, + "loss": 0.5431181192398071, + "step": 8259 + }, + { + "epoch": 1.9045423103527783, + "grad_norm": 1.8088892551764337, + "learning_rate": 1.2491232629492143e-08, + "loss": 0.5066450238227844, + "step": 8260 + }, + { + "epoch": 1.904772884482361, + "grad_norm": 1.4945728049455276, + "learning_rate": 1.2431231331919368e-08, + "loss": 0.4374620020389557, + "step": 8261 + }, + { + "epoch": 1.9050034586119438, + "grad_norm": 1.5574450804582989, + "learning_rate": 1.2371373587475753e-08, + "loss": 0.3628976345062256, + "step": 8262 + }, + { + "epoch": 1.9052340327415265, + "grad_norm": 1.6159357629155715, + "learning_rate": 1.231165940486234e-08, + "loss": 0.43471890687942505, + "step": 8263 + }, + { + "epoch": 1.9054646068711092, + "grad_norm": 1.4892272896008858, + "learning_rate": 1.2252088792759074e-08, + "loss": 0.5038785934448242, + "step": 8264 + }, + { + "epoch": 1.9056951810006917, + "grad_norm": 1.388813738509663, + "learning_rate": 1.2192661759825363e-08, + "loss": 0.44022035598754883, + "step": 8265 + }, + { + "epoch": 1.9059257551302744, + "grad_norm": 1.8473214990080156, + "learning_rate": 1.2133378314699294e-08, + "loss": 0.4924722909927368, + "step": 8266 + }, + { + "epoch": 1.906156329259857, + "grad_norm": 1.525292247487046, + "learning_rate": 1.2074238465998532e-08, + "loss": 0.3824247121810913, + "step": 8267 + }, + { + "epoch": 1.9063869033894396, + "grad_norm": 1.821466956277618, + "learning_rate": 1.2015242222319422e-08, + "loss": 0.47094473242759705, + "step": 8268 + }, + { + "epoch": 1.9066174775190223, + "grad_norm": 1.7313158547849, + "learning_rate": 1.1956389592237881e-08, + "loss": 0.5653735399246216, + "step": 8269 + }, + { + "epoch": 1.906848051648605, + "grad_norm": 1.7620428814203788, + "learning_rate": 1.1897680584308512e-08, + "loss": 0.4763476848602295, + "step": 8270 + }, + { + "epoch": 1.9070786257781878, + "grad_norm": 1.5194232107831984, + "learning_rate": 1.1839115207065487e-08, + "loss": 0.3845449686050415, + "step": 8271 + }, + { + "epoch": 1.9073091999077705, + "grad_norm": 1.5881713237890829, + "learning_rate": 1.1780693469021775e-08, + "loss": 0.43071988224983215, + "step": 8272 + }, + { + "epoch": 1.907539774037353, + "grad_norm": 1.4466344827167648, + "learning_rate": 1.172241537866947e-08, + "loss": 0.43860751390457153, + "step": 8273 + }, + { + "epoch": 1.9077703481669357, + "grad_norm": 1.7623171007667486, + "learning_rate": 1.1664280944480132e-08, + "loss": 0.5077678561210632, + "step": 8274 + }, + { + "epoch": 1.9080009222965182, + "grad_norm": 1.4297374268054954, + "learning_rate": 1.1606290174903888e-08, + "loss": 0.3832993805408478, + "step": 8275 + }, + { + "epoch": 1.908231496426101, + "grad_norm": 1.629527864713481, + "learning_rate": 1.1548443078370551e-08, + "loss": 0.48003530502319336, + "step": 8276 + }, + { + "epoch": 1.9084620705556836, + "grad_norm": 1.5503547776003848, + "learning_rate": 1.1490739663288618e-08, + "loss": 0.6109439134597778, + "step": 8277 + }, + { + "epoch": 1.9086926446852663, + "grad_norm": 1.9064677948637023, + "learning_rate": 1.1433179938045823e-08, + "loss": 0.4559859037399292, + "step": 8278 + }, + { + "epoch": 1.908923218814849, + "grad_norm": 1.4670877218502, + "learning_rate": 1.137576391100925e-08, + "loss": 0.3935600221157074, + "step": 8279 + }, + { + "epoch": 1.9091537929444318, + "grad_norm": 1.6460426557554972, + "learning_rate": 1.1318491590524782e-08, + "loss": 0.44477611780166626, + "step": 8280 + }, + { + "epoch": 1.9093843670740143, + "grad_norm": 1.652813391764361, + "learning_rate": 1.1261362984917533e-08, + "loss": 0.47065627574920654, + "step": 8281 + }, + { + "epoch": 1.909614941203597, + "grad_norm": 1.567401132156008, + "learning_rate": 1.1204378102491862e-08, + "loss": 0.44851434230804443, + "step": 8282 + }, + { + "epoch": 1.9098455153331795, + "grad_norm": 1.6119259284309502, + "learning_rate": 1.1147536951530923e-08, + "loss": 0.38606488704681396, + "step": 8283 + }, + { + "epoch": 1.9100760894627622, + "grad_norm": 1.7145601291142103, + "learning_rate": 1.1090839540297103e-08, + "loss": 0.5400182008743286, + "step": 8284 + }, + { + "epoch": 1.910306663592345, + "grad_norm": 1.5193110263706777, + "learning_rate": 1.1034285877032146e-08, + "loss": 0.4225059449672699, + "step": 8285 + }, + { + "epoch": 1.9105372377219276, + "grad_norm": 1.8787563951518915, + "learning_rate": 1.0977875969956584e-08, + "loss": 0.5111556649208069, + "step": 8286 + }, + { + "epoch": 1.9107678118515103, + "grad_norm": 1.583999151547768, + "learning_rate": 1.0921609827270196e-08, + "loss": 0.40596213936805725, + "step": 8287 + }, + { + "epoch": 1.910998385981093, + "grad_norm": 1.619272502884341, + "learning_rate": 1.0865487457151768e-08, + "loss": 0.47917360067367554, + "step": 8288 + }, + { + "epoch": 1.9112289601106756, + "grad_norm": 1.8556422558472565, + "learning_rate": 1.0809508867759331e-08, + "loss": 0.45154574513435364, + "step": 8289 + }, + { + "epoch": 1.9114595342402583, + "grad_norm": 1.7391028962680364, + "learning_rate": 1.0753674067229935e-08, + "loss": 0.5024373531341553, + "step": 8290 + }, + { + "epoch": 1.9116901083698408, + "grad_norm": 1.6003253992080113, + "learning_rate": 1.069798306367975e-08, + "loss": 0.5084686875343323, + "step": 8291 + }, + { + "epoch": 1.9119206824994235, + "grad_norm": 1.5906220140950642, + "learning_rate": 1.064243586520408e-08, + "loss": 0.3947920501232147, + "step": 8292 + }, + { + "epoch": 1.9121512566290062, + "grad_norm": 1.5037329879323602, + "learning_rate": 1.0587032479877023e-08, + "loss": 0.5011960864067078, + "step": 8293 + }, + { + "epoch": 1.912381830758589, + "grad_norm": 1.6116996984750152, + "learning_rate": 1.0531772915752247e-08, + "loss": 0.43622612953186035, + "step": 8294 + }, + { + "epoch": 1.9126124048881716, + "grad_norm": 1.664400790122745, + "learning_rate": 1.0476657180862325e-08, + "loss": 0.380764365196228, + "step": 8295 + }, + { + "epoch": 1.9128429790177544, + "grad_norm": 1.59176785573853, + "learning_rate": 1.042168528321874e-08, + "loss": 0.4183109700679779, + "step": 8296 + }, + { + "epoch": 1.9130735531473368, + "grad_norm": 1.7993335153125511, + "learning_rate": 1.036685723081221e-08, + "loss": 0.4221222698688507, + "step": 8297 + }, + { + "epoch": 1.9133041272769196, + "grad_norm": 1.7816315005923467, + "learning_rate": 1.0312173031612804e-08, + "loss": 0.543656051158905, + "step": 8298 + }, + { + "epoch": 1.913534701406502, + "grad_norm": 1.5681621709441897, + "learning_rate": 1.0257632693569052e-08, + "loss": 0.48872441053390503, + "step": 8299 + }, + { + "epoch": 1.9137652755360848, + "grad_norm": 1.5640812032082956, + "learning_rate": 1.0203236224609169e-08, + "loss": 0.5447995662689209, + "step": 8300 + }, + { + "epoch": 1.9139958496656675, + "grad_norm": 1.4954141524676323, + "learning_rate": 1.0148983632640162e-08, + "loss": 0.39448055624961853, + "step": 8301 + }, + { + "epoch": 1.9142264237952502, + "grad_norm": 1.755968676337724, + "learning_rate": 1.009487492554828e-08, + "loss": 0.44735193252563477, + "step": 8302 + }, + { + "epoch": 1.914456997924833, + "grad_norm": 1.6151813931913763, + "learning_rate": 1.0040910111198786e-08, + "loss": 0.4747859537601471, + "step": 8303 + }, + { + "epoch": 1.9146875720544156, + "grad_norm": 1.6130507888649155, + "learning_rate": 9.987089197435739e-09, + "loss": 0.5120220184326172, + "step": 8304 + }, + { + "epoch": 1.9149181461839981, + "grad_norm": 1.6267491510418168, + "learning_rate": 9.933412192082991e-09, + "loss": 0.3889455795288086, + "step": 8305 + }, + { + "epoch": 1.9151487203135809, + "grad_norm": 1.497355606160038, + "learning_rate": 9.879879102942635e-09, + "loss": 0.36584073305130005, + "step": 8306 + }, + { + "epoch": 1.9153792944431633, + "grad_norm": 2.0010610263228643, + "learning_rate": 9.826489937796556e-09, + "loss": 0.6259280443191528, + "step": 8307 + }, + { + "epoch": 1.915609868572746, + "grad_norm": 1.780257440356438, + "learning_rate": 9.773244704405104e-09, + "loss": 0.45160970091819763, + "step": 8308 + }, + { + "epoch": 1.9158404427023288, + "grad_norm": 1.559258218463348, + "learning_rate": 9.720143410508309e-09, + "loss": 0.47028589248657227, + "step": 8309 + }, + { + "epoch": 1.9160710168319115, + "grad_norm": 1.7146410364961069, + "learning_rate": 9.667186063824773e-09, + "loss": 0.3850802183151245, + "step": 8310 + }, + { + "epoch": 1.9163015909614942, + "grad_norm": 1.69252010891113, + "learning_rate": 9.614372672052451e-09, + "loss": 0.4134417772293091, + "step": 8311 + }, + { + "epoch": 1.916532165091077, + "grad_norm": 1.4197660481073355, + "learning_rate": 9.561703242868425e-09, + "loss": 0.5340328216552734, + "step": 8312 + }, + { + "epoch": 1.9167627392206594, + "grad_norm": 1.5089395557239718, + "learning_rate": 9.509177783928569e-09, + "loss": 0.4580942392349243, + "step": 8313 + }, + { + "epoch": 1.9169933133502421, + "grad_norm": 1.559427035261756, + "learning_rate": 9.45679630286811e-09, + "loss": 0.4227365553379059, + "step": 8314 + }, + { + "epoch": 1.9172238874798246, + "grad_norm": 1.462151537342571, + "learning_rate": 9.404558807301065e-09, + "loss": 0.42711400985717773, + "step": 8315 + }, + { + "epoch": 1.9174544616094074, + "grad_norm": 1.6466969798320865, + "learning_rate": 9.352465304820811e-09, + "loss": 0.41088467836380005, + "step": 8316 + }, + { + "epoch": 1.91768503573899, + "grad_norm": 1.7161905508950221, + "learning_rate": 9.30051580299962e-09, + "loss": 0.4669058918952942, + "step": 8317 + }, + { + "epoch": 1.9179156098685728, + "grad_norm": 1.8956617878589224, + "learning_rate": 9.248710309388896e-09, + "loss": 0.34129124879837036, + "step": 8318 + }, + { + "epoch": 1.9181461839981555, + "grad_norm": 1.6346151888813216, + "learning_rate": 9.19704883151906e-09, + "loss": 0.5538367033004761, + "step": 8319 + }, + { + "epoch": 1.9183767581277382, + "grad_norm": 1.8993289351204807, + "learning_rate": 9.145531376899773e-09, + "loss": 0.4591939151287079, + "step": 8320 + }, + { + "epoch": 1.9186073322573207, + "grad_norm": 1.531598340011727, + "learning_rate": 9.094157953019376e-09, + "loss": 0.38709723949432373, + "step": 8321 + }, + { + "epoch": 1.9188379063869034, + "grad_norm": 1.7947823187484588, + "learning_rate": 9.042928567345787e-09, + "loss": 0.503919780254364, + "step": 8322 + }, + { + "epoch": 1.919068480516486, + "grad_norm": 1.6367087262197295, + "learning_rate": 8.991843227325491e-09, + "loss": 0.510110080242157, + "step": 8323 + }, + { + "epoch": 1.9192990546460686, + "grad_norm": 1.6066272425773898, + "learning_rate": 8.940901940384437e-09, + "loss": 0.5100687146186829, + "step": 8324 + }, + { + "epoch": 1.9195296287756514, + "grad_norm": 1.513750458500578, + "learning_rate": 8.89010471392726e-09, + "loss": 0.44701308012008667, + "step": 8325 + }, + { + "epoch": 1.919760202905234, + "grad_norm": 1.563320875474341, + "learning_rate": 8.83945155533794e-09, + "loss": 0.4657078981399536, + "step": 8326 + }, + { + "epoch": 1.9199907770348168, + "grad_norm": 1.9297827676028427, + "learning_rate": 8.788942471979588e-09, + "loss": 0.510329008102417, + "step": 8327 + }, + { + "epoch": 1.9202213511643995, + "grad_norm": 1.471307451139604, + "learning_rate": 8.738577471193997e-09, + "loss": 0.5373008847236633, + "step": 8328 + }, + { + "epoch": 1.920451925293982, + "grad_norm": 1.9012550118721963, + "learning_rate": 8.688356560302313e-09, + "loss": 0.46517014503479004, + "step": 8329 + }, + { + "epoch": 1.9206824994235647, + "grad_norm": 1.6705233787528915, + "learning_rate": 8.638279746604582e-09, + "loss": 0.3993692398071289, + "step": 8330 + }, + { + "epoch": 1.9209130735531472, + "grad_norm": 1.366585505535673, + "learning_rate": 8.588347037380095e-09, + "loss": 0.42480504512786865, + "step": 8331 + }, + { + "epoch": 1.92114364768273, + "grad_norm": 1.7413386006663227, + "learning_rate": 8.538558439887044e-09, + "loss": 0.44433218240737915, + "step": 8332 + }, + { + "epoch": 1.9213742218123127, + "grad_norm": 1.59463524320548, + "learning_rate": 8.488913961362643e-09, + "loss": 0.4645090103149414, + "step": 8333 + }, + { + "epoch": 1.9216047959418954, + "grad_norm": 1.7690127959905497, + "learning_rate": 8.439413609023227e-09, + "loss": 0.47265806794166565, + "step": 8334 + }, + { + "epoch": 1.921835370071478, + "grad_norm": 1.6930025984848287, + "learning_rate": 8.390057390064265e-09, + "loss": 0.46389561891555786, + "step": 8335 + }, + { + "epoch": 1.9220659442010608, + "grad_norm": 1.8286869444988214, + "learning_rate": 8.340845311660127e-09, + "loss": 0.45355337858200073, + "step": 8336 + }, + { + "epoch": 1.9222965183306433, + "grad_norm": 1.6861508362464954, + "learning_rate": 8.291777380964315e-09, + "loss": 0.47136229276657104, + "step": 8337 + }, + { + "epoch": 1.922527092460226, + "grad_norm": 1.7162470073135112, + "learning_rate": 8.242853605109234e-09, + "loss": 0.4914461374282837, + "step": 8338 + }, + { + "epoch": 1.9227576665898085, + "grad_norm": 1.5896610300054894, + "learning_rate": 8.194073991206641e-09, + "loss": 0.48298412561416626, + "step": 8339 + }, + { + "epoch": 1.9229882407193912, + "grad_norm": 1.591559243664797, + "learning_rate": 8.145438546346971e-09, + "loss": 0.5316052436828613, + "step": 8340 + }, + { + "epoch": 1.923218814848974, + "grad_norm": 1.530763445371585, + "learning_rate": 8.09694727760002e-09, + "loss": 0.45742303133010864, + "step": 8341 + }, + { + "epoch": 1.9234493889785567, + "grad_norm": 1.800664891434664, + "learning_rate": 8.048600192014365e-09, + "loss": 0.41579365730285645, + "step": 8342 + }, + { + "epoch": 1.9236799631081394, + "grad_norm": 1.4284255731817002, + "learning_rate": 8.000397296617834e-09, + "loss": 0.37775835394859314, + "step": 8343 + }, + { + "epoch": 1.9239105372377219, + "grad_norm": 1.7051685129810905, + "learning_rate": 7.95233859841704e-09, + "loss": 0.4720783531665802, + "step": 8344 + }, + { + "epoch": 1.9241411113673046, + "grad_norm": 1.608380789109436, + "learning_rate": 7.904424104398067e-09, + "loss": 0.5015095472335815, + "step": 8345 + }, + { + "epoch": 1.924371685496887, + "grad_norm": 1.5886093342032406, + "learning_rate": 7.856653821525672e-09, + "loss": 0.6053783893585205, + "step": 8346 + }, + { + "epoch": 1.9246022596264698, + "grad_norm": 1.71106607476921, + "learning_rate": 7.809027756743635e-09, + "loss": 0.47775521874427795, + "step": 8347 + }, + { + "epoch": 1.9248328337560525, + "grad_norm": 1.559597916397487, + "learning_rate": 7.761545916974976e-09, + "loss": 0.36487245559692383, + "step": 8348 + }, + { + "epoch": 1.9250634078856352, + "grad_norm": 1.6596969619350017, + "learning_rate": 7.714208309121617e-09, + "loss": 0.48085975646972656, + "step": 8349 + }, + { + "epoch": 1.925293982015218, + "grad_norm": 1.6156245324091865, + "learning_rate": 7.667014940064609e-09, + "loss": 0.48800790309906006, + "step": 8350 + }, + { + "epoch": 1.9255245561448007, + "grad_norm": 1.654653168113963, + "learning_rate": 7.61996581666402e-09, + "loss": 0.5294181704521179, + "step": 8351 + }, + { + "epoch": 1.9257551302743832, + "grad_norm": 1.4725020612800932, + "learning_rate": 7.573060945758936e-09, + "loss": 0.44024503231048584, + "step": 8352 + }, + { + "epoch": 1.9259857044039659, + "grad_norm": 1.8377372608503795, + "learning_rate": 7.526300334167235e-09, + "loss": 0.4359186887741089, + "step": 8353 + }, + { + "epoch": 1.9262162785335484, + "grad_norm": 1.6594669465231893, + "learning_rate": 7.479683988686259e-09, + "loss": 0.4803895652294159, + "step": 8354 + }, + { + "epoch": 1.926446852663131, + "grad_norm": 1.5824042504509404, + "learning_rate": 7.433211916092141e-09, + "loss": 0.43153274059295654, + "step": 8355 + }, + { + "epoch": 1.9266774267927138, + "grad_norm": 1.812737055881384, + "learning_rate": 7.386884123140036e-09, + "loss": 0.38263070583343506, + "step": 8356 + }, + { + "epoch": 1.9269080009222965, + "grad_norm": 1.42789662226475, + "learning_rate": 7.340700616564e-09, + "loss": 0.42121192812919617, + "step": 8357 + }, + { + "epoch": 1.9271385750518792, + "grad_norm": 1.6902764865159838, + "learning_rate": 7.294661403077662e-09, + "loss": 0.46008965373039246, + "step": 8358 + }, + { + "epoch": 1.927369149181462, + "grad_norm": 1.5923895901686829, + "learning_rate": 7.248766489372893e-09, + "loss": 0.48495203256607056, + "step": 8359 + }, + { + "epoch": 1.9275997233110445, + "grad_norm": 1.6833123633851883, + "learning_rate": 7.203015882121244e-09, + "loss": 0.5004169940948486, + "step": 8360 + }, + { + "epoch": 1.9278302974406272, + "grad_norm": 1.4732497687996942, + "learning_rate": 7.15740958797284e-09, + "loss": 0.5660319328308105, + "step": 8361 + }, + { + "epoch": 1.9280608715702097, + "grad_norm": 1.588922332622674, + "learning_rate": 7.111947613557268e-09, + "loss": 0.43854010105133057, + "step": 8362 + }, + { + "epoch": 1.9282914456997924, + "grad_norm": 2.093362311602714, + "learning_rate": 7.066629965482574e-09, + "loss": 0.44730937480926514, + "step": 8363 + }, + { + "epoch": 1.928522019829375, + "grad_norm": 1.6568658526601971, + "learning_rate": 7.021456650336377e-09, + "loss": 0.45642590522766113, + "step": 8364 + }, + { + "epoch": 1.9287525939589578, + "grad_norm": 1.9173353497487595, + "learning_rate": 6.976427674684871e-09, + "loss": 0.5613523721694946, + "step": 8365 + }, + { + "epoch": 1.9289831680885405, + "grad_norm": 1.7976713831697748, + "learning_rate": 6.931543045073706e-09, + "loss": 0.4231454133987427, + "step": 8366 + }, + { + "epoch": 1.9292137422181233, + "grad_norm": 1.9184335289270926, + "learning_rate": 6.886802768027223e-09, + "loss": 0.464144766330719, + "step": 8367 + }, + { + "epoch": 1.9294443163477057, + "grad_norm": 1.6282751196601715, + "learning_rate": 6.8422068500487705e-09, + "loss": 0.4303344488143921, + "step": 8368 + }, + { + "epoch": 1.9296748904772885, + "grad_norm": 1.5717538042291814, + "learning_rate": 6.797755297620944e-09, + "loss": 0.4333549737930298, + "step": 8369 + }, + { + "epoch": 1.929905464606871, + "grad_norm": 1.5673646456508366, + "learning_rate": 6.753448117205241e-09, + "loss": 0.4656146466732025, + "step": 8370 + }, + { + "epoch": 1.9301360387364537, + "grad_norm": 2.0556236314521077, + "learning_rate": 6.709285315242063e-09, + "loss": 0.3823866844177246, + "step": 8371 + }, + { + "epoch": 1.9303666128660364, + "grad_norm": 1.5412445917312292, + "learning_rate": 6.665266898150946e-09, + "loss": 0.4552363157272339, + "step": 8372 + }, + { + "epoch": 1.930597186995619, + "grad_norm": 1.5304233694461045, + "learning_rate": 6.6213928723304335e-09, + "loss": 0.48757460713386536, + "step": 8373 + }, + { + "epoch": 1.9308277611252018, + "grad_norm": 1.0877844091844102, + "learning_rate": 6.577663244158094e-09, + "loss": 0.3263235092163086, + "step": 8374 + }, + { + "epoch": 1.9310583352547845, + "grad_norm": 1.6065207890727204, + "learning_rate": 6.534078019990397e-09, + "loss": 0.510450541973114, + "step": 8375 + }, + { + "epoch": 1.931288909384367, + "grad_norm": 1.4737968731950963, + "learning_rate": 6.490637206162941e-09, + "loss": 0.37407904863357544, + "step": 8376 + }, + { + "epoch": 1.9315194835139498, + "grad_norm": 1.5691906942234775, + "learning_rate": 6.4473408089902315e-09, + "loss": 0.4216376543045044, + "step": 8377 + }, + { + "epoch": 1.9317500576435322, + "grad_norm": 1.647678033925203, + "learning_rate": 6.404188834766011e-09, + "loss": 0.41611379384994507, + "step": 8378 + }, + { + "epoch": 1.931980631773115, + "grad_norm": 1.6406917387427478, + "learning_rate": 6.361181289762596e-09, + "loss": 0.5301774740219116, + "step": 8379 + }, + { + "epoch": 1.9322112059026977, + "grad_norm": 1.457780743812755, + "learning_rate": 6.3183181802317635e-09, + "loss": 0.43767407536506653, + "step": 8380 + }, + { + "epoch": 1.9324417800322804, + "grad_norm": 1.5497586314138279, + "learning_rate": 6.275599512404084e-09, + "loss": 0.417082279920578, + "step": 8381 + }, + { + "epoch": 1.9326723541618631, + "grad_norm": 1.646560289289956, + "learning_rate": 6.233025292489147e-09, + "loss": 0.41670864820480347, + "step": 8382 + }, + { + "epoch": 1.9329029282914458, + "grad_norm": 1.4085441335066406, + "learning_rate": 6.190595526675446e-09, + "loss": 0.48778587579727173, + "step": 8383 + }, + { + "epoch": 1.9331335024210283, + "grad_norm": 1.39299487584749, + "learning_rate": 6.148310221130604e-09, + "loss": 0.44433802366256714, + "step": 8384 + }, + { + "epoch": 1.933364076550611, + "grad_norm": 1.7057166388160585, + "learning_rate": 6.106169382001369e-09, + "loss": 0.46826764941215515, + "step": 8385 + }, + { + "epoch": 1.9335946506801935, + "grad_norm": 1.6832081073908207, + "learning_rate": 6.064173015413177e-09, + "loss": 0.5509334802627563, + "step": 8386 + }, + { + "epoch": 1.9338252248097763, + "grad_norm": 1.4200036599053338, + "learning_rate": 6.022321127470698e-09, + "loss": 0.4436245560646057, + "step": 8387 + }, + { + "epoch": 1.934055798939359, + "grad_norm": 1.4658061886752614, + "learning_rate": 5.9806137242574e-09, + "loss": 0.3577145040035248, + "step": 8388 + }, + { + "epoch": 1.9342863730689417, + "grad_norm": 1.3485508447539643, + "learning_rate": 5.939050811835988e-09, + "loss": 0.39893999695777893, + "step": 8389 + }, + { + "epoch": 1.9345169471985244, + "grad_norm": 1.4373848732418595, + "learning_rate": 5.897632396248075e-09, + "loss": 0.4109868109226227, + "step": 8390 + }, + { + "epoch": 1.9347475213281071, + "grad_norm": 1.6148537069486861, + "learning_rate": 5.85635848351429e-09, + "loss": 0.4193134307861328, + "step": 8391 + }, + { + "epoch": 1.9349780954576896, + "grad_norm": 1.774944389887914, + "learning_rate": 5.8152290796340545e-09, + "loss": 0.44189178943634033, + "step": 8392 + }, + { + "epoch": 1.9352086695872723, + "grad_norm": 1.7653802191556502, + "learning_rate": 5.774244190586141e-09, + "loss": 0.5014302730560303, + "step": 8393 + }, + { + "epoch": 1.9354392437168548, + "grad_norm": 1.5565367331009852, + "learning_rate": 5.733403822328009e-09, + "loss": 0.4962024688720703, + "step": 8394 + }, + { + "epoch": 1.9356698178464375, + "grad_norm": 1.585877874844532, + "learning_rate": 5.69270798079613e-09, + "loss": 0.45495474338531494, + "step": 8395 + }, + { + "epoch": 1.9359003919760203, + "grad_norm": 1.4665884192601668, + "learning_rate": 5.652156671906105e-09, + "loss": 0.49062758684158325, + "step": 8396 + }, + { + "epoch": 1.936130966105603, + "grad_norm": 1.6573434385643893, + "learning_rate": 5.611749901552554e-09, + "loss": 0.45899879932403564, + "step": 8397 + }, + { + "epoch": 1.9363615402351857, + "grad_norm": 1.511951038657192, + "learning_rate": 5.57148767560911e-09, + "loss": 0.47287002205848694, + "step": 8398 + }, + { + "epoch": 1.9365921143647684, + "grad_norm": 1.5970704539129832, + "learning_rate": 5.531369999927982e-09, + "loss": 0.439136266708374, + "step": 8399 + }, + { + "epoch": 1.936822688494351, + "grad_norm": 1.2795152915391526, + "learning_rate": 5.4913968803410594e-09, + "loss": 0.3920954465866089, + "step": 8400 + }, + { + "epoch": 1.9370532626239336, + "grad_norm": 1.254790295470771, + "learning_rate": 5.451568322658473e-09, + "loss": 0.4608895480632782, + "step": 8401 + }, + { + "epoch": 1.9372838367535161, + "grad_norm": 1.4389672316514175, + "learning_rate": 5.4118843326699246e-09, + "loss": 0.4617875814437866, + "step": 8402 + }, + { + "epoch": 1.9375144108830988, + "grad_norm": 1.8398027260263112, + "learning_rate": 5.372344916143912e-09, + "loss": 0.5293254852294922, + "step": 8403 + }, + { + "epoch": 1.9377449850126816, + "grad_norm": 1.2603762011573385, + "learning_rate": 5.332950078827725e-09, + "loss": 0.3935343623161316, + "step": 8404 + }, + { + "epoch": 1.9379755591422643, + "grad_norm": 1.3159194137267558, + "learning_rate": 5.293699826447895e-09, + "loss": 0.4612414240837097, + "step": 8405 + }, + { + "epoch": 1.938206133271847, + "grad_norm": 1.5616222982589931, + "learning_rate": 5.254594164709858e-09, + "loss": 0.4779428243637085, + "step": 8406 + }, + { + "epoch": 1.9384367074014297, + "grad_norm": 1.3393838173044101, + "learning_rate": 5.215633099298067e-09, + "loss": 0.37436819076538086, + "step": 8407 + }, + { + "epoch": 1.9386672815310122, + "grad_norm": 1.5367283978531407, + "learning_rate": 5.1768166358757695e-09, + "loss": 0.458698570728302, + "step": 8408 + }, + { + "epoch": 1.938897855660595, + "grad_norm": 1.52395102556278, + "learning_rate": 5.1381447800854515e-09, + "loss": 0.39365172386169434, + "step": 8409 + }, + { + "epoch": 1.9391284297901774, + "grad_norm": 1.6915141620999796, + "learning_rate": 5.099617537548284e-09, + "loss": 0.46358722448349, + "step": 8410 + }, + { + "epoch": 1.9393590039197601, + "grad_norm": 1.4920931037664487, + "learning_rate": 5.061234913864898e-09, + "loss": 0.4286697506904602, + "step": 8411 + }, + { + "epoch": 1.9395895780493428, + "grad_norm": 1.2865245997479036, + "learning_rate": 5.022996914614275e-09, + "loss": 0.4925898015499115, + "step": 8412 + }, + { + "epoch": 1.9398201521789256, + "grad_norm": 1.5226712255874009, + "learning_rate": 4.984903545354857e-09, + "loss": 0.46924275159835815, + "step": 8413 + }, + { + "epoch": 1.9400507263085083, + "grad_norm": 1.5857623247989538, + "learning_rate": 4.946954811623994e-09, + "loss": 0.5326268672943115, + "step": 8414 + }, + { + "epoch": 1.940281300438091, + "grad_norm": 1.5901041586459477, + "learning_rate": 4.909150718937716e-09, + "loss": 0.4367690682411194, + "step": 8415 + }, + { + "epoch": 1.9405118745676735, + "grad_norm": 1.5390541996103484, + "learning_rate": 4.8714912727914055e-09, + "loss": 0.45579224824905396, + "step": 8416 + }, + { + "epoch": 1.9407424486972562, + "grad_norm": 1.5246826105956603, + "learning_rate": 4.8339764786590186e-09, + "loss": 0.4420431852340698, + "step": 8417 + }, + { + "epoch": 1.9409730228268387, + "grad_norm": 1.7713819487127218, + "learning_rate": 4.79660634199397e-09, + "loss": 0.4175274670124054, + "step": 8418 + }, + { + "epoch": 1.9412035969564214, + "grad_norm": 1.4046803968065067, + "learning_rate": 4.759380868228246e-09, + "loss": 0.41451364755630493, + "step": 8419 + }, + { + "epoch": 1.9414341710860041, + "grad_norm": 1.5394804899846177, + "learning_rate": 4.722300062772966e-09, + "loss": 0.4211805462837219, + "step": 8420 + }, + { + "epoch": 1.9416647452155869, + "grad_norm": 1.5805052208208792, + "learning_rate": 4.68536393101826e-09, + "loss": 0.4458296000957489, + "step": 8421 + }, + { + "epoch": 1.9418953193451696, + "grad_norm": 1.8263114249420374, + "learning_rate": 4.648572478333057e-09, + "loss": 0.6226488351821899, + "step": 8422 + }, + { + "epoch": 1.9421258934747523, + "grad_norm": 1.467298573422793, + "learning_rate": 4.611925710065523e-09, + "loss": 0.343037486076355, + "step": 8423 + }, + { + "epoch": 1.9423564676043348, + "grad_norm": 1.4279799784372957, + "learning_rate": 4.575423631542397e-09, + "loss": 0.42478299140930176, + "step": 8424 + }, + { + "epoch": 1.9425870417339175, + "grad_norm": 1.4809253602160373, + "learning_rate": 4.539066248069878e-09, + "loss": 0.4467424750328064, + "step": 8425 + }, + { + "epoch": 1.9428176158635, + "grad_norm": 1.5230213064501263, + "learning_rate": 4.50285356493274e-09, + "loss": 0.4598960876464844, + "step": 8426 + }, + { + "epoch": 1.9430481899930827, + "grad_norm": 1.767389183054306, + "learning_rate": 4.466785587394883e-09, + "loss": 0.43005913496017456, + "step": 8427 + }, + { + "epoch": 1.9432787641226654, + "grad_norm": 1.6819998310369073, + "learning_rate": 4.430862320699114e-09, + "loss": 0.4259253740310669, + "step": 8428 + }, + { + "epoch": 1.9435093382522481, + "grad_norm": 1.4809575809160866, + "learning_rate": 4.395083770067476e-09, + "loss": 0.4275285601615906, + "step": 8429 + }, + { + "epoch": 1.9437399123818309, + "grad_norm": 1.5009509074634573, + "learning_rate": 4.3594499407003656e-09, + "loss": 0.42151302099227905, + "step": 8430 + }, + { + "epoch": 1.9439704865114136, + "grad_norm": 1.2121055184272223, + "learning_rate": 4.3239608377778625e-09, + "loss": 0.41727957129478455, + "step": 8431 + }, + { + "epoch": 1.944201060640996, + "grad_norm": 1.6993320655678226, + "learning_rate": 4.288616466458395e-09, + "loss": 0.5026905536651611, + "step": 8432 + }, + { + "epoch": 1.9444316347705788, + "grad_norm": 1.7732059667125062, + "learning_rate": 4.2534168318798524e-09, + "loss": 0.5170408487319946, + "step": 8433 + }, + { + "epoch": 1.9446622089001613, + "grad_norm": 1.4027101607713113, + "learning_rate": 4.21836193915881e-09, + "loss": 0.3918447196483612, + "step": 8434 + }, + { + "epoch": 1.944892783029744, + "grad_norm": 1.6652823795220828, + "learning_rate": 4.183451793390747e-09, + "loss": 0.49871906638145447, + "step": 8435 + }, + { + "epoch": 1.9451233571593267, + "grad_norm": 1.4696705484226025, + "learning_rate": 4.1486863996502694e-09, + "loss": 0.43729400634765625, + "step": 8436 + }, + { + "epoch": 1.9453539312889094, + "grad_norm": 1.6971586346839116, + "learning_rate": 4.114065762990781e-09, + "loss": 0.49198442697525024, + "step": 8437 + }, + { + "epoch": 1.9455845054184921, + "grad_norm": 1.7555960999646751, + "learning_rate": 4.079589888444923e-09, + "loss": 0.48610788583755493, + "step": 8438 + }, + { + "epoch": 1.9458150795480749, + "grad_norm": 1.4385738810997333, + "learning_rate": 4.045258781024019e-09, + "loss": 0.43962734937667847, + "step": 8439 + }, + { + "epoch": 1.9460456536776574, + "grad_norm": 1.5800303425440292, + "learning_rate": 4.011072445718522e-09, + "loss": 0.3320704400539398, + "step": 8440 + }, + { + "epoch": 1.94627622780724, + "grad_norm": 1.6634559640737916, + "learning_rate": 3.977030887497568e-09, + "loss": 0.4773918092250824, + "step": 8441 + }, + { + "epoch": 1.9465068019368226, + "grad_norm": 1.6386159776295786, + "learning_rate": 3.9431341113096425e-09, + "loss": 0.424363911151886, + "step": 8442 + }, + { + "epoch": 1.9467373760664053, + "grad_norm": 1.9939094308024221, + "learning_rate": 3.9093821220818055e-09, + "loss": 0.5321601033210754, + "step": 8443 + }, + { + "epoch": 1.946967950195988, + "grad_norm": 1.7091737329216896, + "learning_rate": 3.875774924720465e-09, + "loss": 0.48579344153404236, + "step": 8444 + }, + { + "epoch": 1.9471985243255707, + "grad_norm": 1.4617398717494952, + "learning_rate": 3.842312524110603e-09, + "loss": 0.39313316345214844, + "step": 8445 + }, + { + "epoch": 1.9474290984551534, + "grad_norm": 1.6233833617742501, + "learning_rate": 3.8089949251163264e-09, + "loss": 0.522427499294281, + "step": 8446 + }, + { + "epoch": 1.9476596725847362, + "grad_norm": 1.601217744469266, + "learning_rate": 3.775822132580875e-09, + "loss": 0.3822653889656067, + "step": 8447 + }, + { + "epoch": 1.9478902467143187, + "grad_norm": 1.5787465509087006, + "learning_rate": 3.7427941513259454e-09, + "loss": 0.4322483241558075, + "step": 8448 + }, + { + "epoch": 1.9481208208439014, + "grad_norm": 1.6934897718136162, + "learning_rate": 3.7099109861528087e-09, + "loss": 0.4862939715385437, + "step": 8449 + }, + { + "epoch": 1.9483513949734839, + "grad_norm": 1.5875963080752307, + "learning_rate": 3.6771726418410863e-09, + "loss": 0.45388323068618774, + "step": 8450 + }, + { + "epoch": 1.9485819691030666, + "grad_norm": 1.5187043160616758, + "learning_rate": 3.644579123149749e-09, + "loss": 0.3937215805053711, + "step": 8451 + }, + { + "epoch": 1.9488125432326493, + "grad_norm": 1.5446261991465484, + "learning_rate": 3.6121304348165628e-09, + "loss": 0.46887993812561035, + "step": 8452 + }, + { + "epoch": 1.949043117362232, + "grad_norm": 1.763834546986469, + "learning_rate": 3.5798265815584204e-09, + "loss": 0.4444226026535034, + "step": 8453 + }, + { + "epoch": 1.9492736914918147, + "grad_norm": 1.639572253352884, + "learning_rate": 3.5476675680709e-09, + "loss": 0.4938625991344452, + "step": 8454 + }, + { + "epoch": 1.9495042656213972, + "grad_norm": 1.456362188758518, + "learning_rate": 3.5156533990285953e-09, + "loss": 0.37632471323013306, + "step": 8455 + }, + { + "epoch": 1.94973483975098, + "grad_norm": 1.8608548289842328, + "learning_rate": 3.483784079085117e-09, + "loss": 0.4345025420188904, + "step": 8456 + }, + { + "epoch": 1.9499654138805624, + "grad_norm": 1.4598938490767328, + "learning_rate": 3.4520596128729818e-09, + "loss": 0.3721727132797241, + "step": 8457 + }, + { + "epoch": 1.9501959880101452, + "grad_norm": 1.6409042038383927, + "learning_rate": 3.4204800050037232e-09, + "loss": 0.4871670603752136, + "step": 8458 + }, + { + "epoch": 1.9504265621397279, + "grad_norm": 1.8307964169711943, + "learning_rate": 3.38904526006778e-09, + "loss": 0.578133225440979, + "step": 8459 + }, + { + "epoch": 1.9506571362693106, + "grad_norm": 1.5202457315236042, + "learning_rate": 3.357755382634386e-09, + "loss": 0.4721870422363281, + "step": 8460 + }, + { + "epoch": 1.9508877103988933, + "grad_norm": 1.798795599183991, + "learning_rate": 3.3266103772519037e-09, + "loss": 0.4569184184074402, + "step": 8461 + }, + { + "epoch": 1.951118284528476, + "grad_norm": 1.7311036262190431, + "learning_rate": 3.2956102484477112e-09, + "loss": 0.48763811588287354, + "step": 8462 + }, + { + "epoch": 1.9513488586580585, + "grad_norm": 1.5898725581558353, + "learning_rate": 3.264755000727759e-09, + "loss": 0.45957818627357483, + "step": 8463 + }, + { + "epoch": 1.9515794327876412, + "grad_norm": 1.661536076059429, + "learning_rate": 3.234044638577238e-09, + "loss": 0.49398598074913025, + "step": 8464 + }, + { + "epoch": 1.9518100069172237, + "grad_norm": 1.8367269278410805, + "learning_rate": 3.2034791664603544e-09, + "loss": 0.48884931206703186, + "step": 8465 + }, + { + "epoch": 1.9520405810468064, + "grad_norm": 1.4322798652039197, + "learning_rate": 3.173058588819999e-09, + "loss": 0.45171886682510376, + "step": 8466 + }, + { + "epoch": 1.9522711551763892, + "grad_norm": 1.7896431151356735, + "learning_rate": 3.142782910077968e-09, + "loss": 0.45110028982162476, + "step": 8467 + }, + { + "epoch": 1.9525017293059719, + "grad_norm": 1.6339596386172939, + "learning_rate": 3.1126521346354074e-09, + "loss": 0.4602523446083069, + "step": 8468 + }, + { + "epoch": 1.9527323034355546, + "grad_norm": 1.4993439724695443, + "learning_rate": 3.082666266872036e-09, + "loss": 0.3908727169036865, + "step": 8469 + }, + { + "epoch": 1.9529628775651373, + "grad_norm": 1.6588394319404383, + "learning_rate": 3.0528253111464786e-09, + "loss": 0.4886831045150757, + "step": 8470 + }, + { + "epoch": 1.9531934516947198, + "grad_norm": 1.8142188930520524, + "learning_rate": 3.023129271796598e-09, + "loss": 0.4407721161842346, + "step": 8471 + }, + { + "epoch": 1.9534240258243025, + "grad_norm": 1.545809203271424, + "learning_rate": 2.9935781531389425e-09, + "loss": 0.46958622336387634, + "step": 8472 + }, + { + "epoch": 1.953654599953885, + "grad_norm": 1.5632050072309709, + "learning_rate": 2.964171959469075e-09, + "loss": 0.4642796516418457, + "step": 8473 + }, + { + "epoch": 1.9538851740834677, + "grad_norm": 1.5522529280671595, + "learning_rate": 2.9349106950613545e-09, + "loss": 0.5124588012695312, + "step": 8474 + }, + { + "epoch": 1.9541157482130505, + "grad_norm": 1.7441462887025347, + "learning_rate": 2.9057943641693784e-09, + "loss": 0.516730546951294, + "step": 8475 + }, + { + "epoch": 1.9543463223426332, + "grad_norm": 1.6015713883307108, + "learning_rate": 2.876822971025428e-09, + "loss": 0.47847944498062134, + "step": 8476 + }, + { + "epoch": 1.9545768964722159, + "grad_norm": 1.9133896423438201, + "learning_rate": 2.8479965198408007e-09, + "loss": 0.5167095065116882, + "step": 8477 + }, + { + "epoch": 1.9548074706017986, + "grad_norm": 1.4489948600651796, + "learning_rate": 2.819315014805812e-09, + "loss": 0.40728163719177246, + "step": 8478 + }, + { + "epoch": 1.955038044731381, + "grad_norm": 1.4413821780207463, + "learning_rate": 2.790778460089349e-09, + "loss": 0.49741852283477783, + "step": 8479 + }, + { + "epoch": 1.9552686188609638, + "grad_norm": 1.3759130199865537, + "learning_rate": 2.7623868598397603e-09, + "loss": 0.33847475051879883, + "step": 8480 + }, + { + "epoch": 1.9554991929905463, + "grad_norm": 1.6995475203184411, + "learning_rate": 2.734140218183856e-09, + "loss": 0.39727652072906494, + "step": 8481 + }, + { + "epoch": 1.955729767120129, + "grad_norm": 1.7012108842781224, + "learning_rate": 2.706038539227795e-09, + "loss": 0.40332260727882385, + "step": 8482 + }, + { + "epoch": 1.9559603412497117, + "grad_norm": 1.3388931691886075, + "learning_rate": 2.6780818270562e-09, + "loss": 0.40296924114227295, + "step": 8483 + }, + { + "epoch": 1.9561909153792945, + "grad_norm": 1.4889010944404621, + "learning_rate": 2.650270085732931e-09, + "loss": 0.4253476858139038, + "step": 8484 + }, + { + "epoch": 1.9564214895088772, + "grad_norm": 1.5794301308382195, + "learning_rate": 2.6226033193007535e-09, + "loss": 0.448941171169281, + "step": 8485 + }, + { + "epoch": 1.95665206363846, + "grad_norm": 1.9411463996799059, + "learning_rate": 2.59508153178134e-09, + "loss": 0.48213180899620056, + "step": 8486 + }, + { + "epoch": 1.9568826377680424, + "grad_norm": 1.6243019689896288, + "learning_rate": 2.5677047271752683e-09, + "loss": 0.48886558413505554, + "step": 8487 + }, + { + "epoch": 1.957113211897625, + "grad_norm": 1.4212209484619593, + "learning_rate": 2.5404729094619103e-09, + "loss": 0.49786341190338135, + "step": 8488 + }, + { + "epoch": 1.9573437860272076, + "grad_norm": 2.1312601270605365, + "learning_rate": 2.5133860825997667e-09, + "loss": 0.4487866163253784, + "step": 8489 + }, + { + "epoch": 1.9575743601567903, + "grad_norm": 1.7672945087914924, + "learning_rate": 2.486444250526243e-09, + "loss": 0.46193206310272217, + "step": 8490 + }, + { + "epoch": 1.957804934286373, + "grad_norm": 1.5923899778865398, + "learning_rate": 2.459647417157429e-09, + "loss": 0.44729042053222656, + "step": 8491 + }, + { + "epoch": 1.9580355084159557, + "grad_norm": 1.8298057614969963, + "learning_rate": 2.432995586388764e-09, + "loss": 0.4646851718425751, + "step": 8492 + }, + { + "epoch": 1.9582660825455385, + "grad_norm": 1.6514495959092017, + "learning_rate": 2.40648876209415e-09, + "loss": 0.49538400769233704, + "step": 8493 + }, + { + "epoch": 1.9584966566751212, + "grad_norm": 1.7330889796307278, + "learning_rate": 2.3801269481267262e-09, + "loss": 0.5548783540725708, + "step": 8494 + }, + { + "epoch": 1.9587272308047037, + "grad_norm": 1.65108674708811, + "learning_rate": 2.3539101483184277e-09, + "loss": 0.4390280544757843, + "step": 8495 + }, + { + "epoch": 1.9589578049342864, + "grad_norm": 1.323831070791993, + "learning_rate": 2.327838366480095e-09, + "loss": 0.3079942464828491, + "step": 8496 + }, + { + "epoch": 1.959188379063869, + "grad_norm": 2.030408303723105, + "learning_rate": 2.301911606401585e-09, + "loss": 0.5199894309043884, + "step": 8497 + }, + { + "epoch": 1.9594189531934516, + "grad_norm": 1.6402740340647268, + "learning_rate": 2.276129871851662e-09, + "loss": 0.3403523564338684, + "step": 8498 + }, + { + "epoch": 1.9596495273230343, + "grad_norm": 1.785907762491574, + "learning_rate": 2.2504931665777714e-09, + "loss": 0.49699991941452026, + "step": 8499 + }, + { + "epoch": 1.959880101452617, + "grad_norm": 1.5969429106714301, + "learning_rate": 2.2250014943066e-09, + "loss": 0.4178547263145447, + "step": 8500 + }, + { + "epoch": 1.9601106755821998, + "grad_norm": 1.8924231136601524, + "learning_rate": 2.199654858743627e-09, + "loss": 0.5622760057449341, + "step": 8501 + }, + { + "epoch": 1.9603412497117825, + "grad_norm": 1.4610200259542554, + "learning_rate": 2.1744532635733505e-09, + "loss": 0.4072464406490326, + "step": 8502 + }, + { + "epoch": 1.960571823841365, + "grad_norm": 1.5401248564682235, + "learning_rate": 2.1493967124587287e-09, + "loss": 0.475033164024353, + "step": 8503 + }, + { + "epoch": 1.9608023979709477, + "grad_norm": 1.7291130993603476, + "learning_rate": 2.1244852090424035e-09, + "loss": 0.4734419584274292, + "step": 8504 + }, + { + "epoch": 1.9610329721005302, + "grad_norm": 1.7230208360471804, + "learning_rate": 2.099718756945257e-09, + "loss": 0.42523911595344543, + "step": 8505 + }, + { + "epoch": 1.961263546230113, + "grad_norm": 1.510126016418521, + "learning_rate": 2.075097359767297e-09, + "loss": 0.5085049867630005, + "step": 8506 + }, + { + "epoch": 1.9614941203596956, + "grad_norm": 1.6269226735706044, + "learning_rate": 2.0506210210877728e-09, + "loss": 0.5682120323181152, + "step": 8507 + }, + { + "epoch": 1.9617246944892783, + "grad_norm": 1.5852715445159862, + "learning_rate": 2.0262897444642823e-09, + "loss": 0.4550264775753021, + "step": 8508 + }, + { + "epoch": 1.961955268618861, + "grad_norm": 1.560540594785291, + "learning_rate": 2.0021035334337745e-09, + "loss": 0.43745940923690796, + "step": 8509 + }, + { + "epoch": 1.9621858427484438, + "grad_norm": 1.421824915655791, + "learning_rate": 1.9780623915118812e-09, + "loss": 0.4523237347602844, + "step": 8510 + }, + { + "epoch": 1.9624164168780263, + "grad_norm": 1.354930266701351, + "learning_rate": 1.9541663221933623e-09, + "loss": 0.43080687522888184, + "step": 8511 + }, + { + "epoch": 1.962646991007609, + "grad_norm": 1.6208010256189354, + "learning_rate": 1.930415328951551e-09, + "loss": 0.5265613794326782, + "step": 8512 + }, + { + "epoch": 1.9628775651371915, + "grad_norm": 1.6858160892782517, + "learning_rate": 1.906809415239019e-09, + "loss": 0.5482667684555054, + "step": 8513 + }, + { + "epoch": 1.9631081392667742, + "grad_norm": 1.8258400073226166, + "learning_rate": 1.8833485844871322e-09, + "loss": 0.43548330664634705, + "step": 8514 + }, + { + "epoch": 1.963338713396357, + "grad_norm": 1.4726232338870595, + "learning_rate": 1.8600328401061627e-09, + "loss": 0.45715010166168213, + "step": 8515 + }, + { + "epoch": 1.9635692875259396, + "grad_norm": 1.4143739917928304, + "learning_rate": 1.8368621854852884e-09, + "loss": 0.48137760162353516, + "step": 8516 + }, + { + "epoch": 1.9637998616555223, + "grad_norm": 1.5443669851131265, + "learning_rate": 1.8138366239924818e-09, + "loss": 0.4607926607131958, + "step": 8517 + }, + { + "epoch": 1.964030435785105, + "grad_norm": 1.2018843862548443, + "learning_rate": 1.7909561589749545e-09, + "loss": 0.3551321029663086, + "step": 8518 + }, + { + "epoch": 1.9642610099146876, + "grad_norm": 1.4318523604861806, + "learning_rate": 1.7682207937583792e-09, + "loss": 0.4075126647949219, + "step": 8519 + }, + { + "epoch": 1.9644915840442703, + "grad_norm": 1.5238435411050293, + "learning_rate": 1.7456305316477793e-09, + "loss": 0.4470815658569336, + "step": 8520 + }, + { + "epoch": 1.9647221581738528, + "grad_norm": 1.7248235582994178, + "learning_rate": 1.72318537592675e-09, + "loss": 0.5074938535690308, + "step": 8521 + }, + { + "epoch": 1.9649527323034355, + "grad_norm": 1.684987227657268, + "learning_rate": 1.700885329857904e-09, + "loss": 0.4799109697341919, + "step": 8522 + }, + { + "epoch": 1.9651833064330182, + "grad_norm": 1.6217891186344597, + "learning_rate": 1.6787303966828703e-09, + "loss": 0.5603263974189758, + "step": 8523 + }, + { + "epoch": 1.965413880562601, + "grad_norm": 1.386089333333111, + "learning_rate": 1.656720579622073e-09, + "loss": 0.45492851734161377, + "step": 8524 + }, + { + "epoch": 1.9656444546921836, + "grad_norm": 1.9563157820273458, + "learning_rate": 1.6348558818748414e-09, + "loss": 0.47700050473213196, + "step": 8525 + }, + { + "epoch": 1.9658750288217663, + "grad_norm": 1.7426284772598926, + "learning_rate": 1.6131363066194115e-09, + "loss": 0.5105462074279785, + "step": 8526 + }, + { + "epoch": 1.9661056029513488, + "grad_norm": 1.6514750536849407, + "learning_rate": 1.5915618570130351e-09, + "loss": 0.47818124294281006, + "step": 8527 + }, + { + "epoch": 1.9663361770809316, + "grad_norm": 1.7136861974622173, + "learning_rate": 1.5701325361916484e-09, + "loss": 0.4549172520637512, + "step": 8528 + }, + { + "epoch": 1.966566751210514, + "grad_norm": 1.7152545383952742, + "learning_rate": 1.5488483472703151e-09, + "loss": 0.406271755695343, + "step": 8529 + }, + { + "epoch": 1.9667973253400968, + "grad_norm": 1.772427841344589, + "learning_rate": 1.5277092933427827e-09, + "loss": 0.4452788829803467, + "step": 8530 + }, + { + "epoch": 1.9670278994696795, + "grad_norm": 1.7369674304649072, + "learning_rate": 1.5067153774820374e-09, + "loss": 0.46621495485305786, + "step": 8531 + }, + { + "epoch": 1.9672584735992622, + "grad_norm": 1.294422205793256, + "learning_rate": 1.4858666027395272e-09, + "loss": 0.47837382555007935, + "step": 8532 + }, + { + "epoch": 1.967489047728845, + "grad_norm": 1.754058349269308, + "learning_rate": 1.4651629721460501e-09, + "loss": 0.5690933465957642, + "step": 8533 + }, + { + "epoch": 1.9677196218584276, + "grad_norm": 1.7627173783003411, + "learning_rate": 1.4446044887109764e-09, + "loss": 0.478906512260437, + "step": 8534 + }, + { + "epoch": 1.9679501959880101, + "grad_norm": 1.7296669537147416, + "learning_rate": 1.4241911554225827e-09, + "loss": 0.5024028420448303, + "step": 8535 + }, + { + "epoch": 1.9681807701175928, + "grad_norm": 1.6971062366905785, + "learning_rate": 1.4039229752483839e-09, + "loss": 0.4430769979953766, + "step": 8536 + }, + { + "epoch": 1.9684113442471753, + "grad_norm": 1.5177256060076265, + "learning_rate": 1.3837999511343567e-09, + "loss": 0.34506234526634216, + "step": 8537 + }, + { + "epoch": 1.968641918376758, + "grad_norm": 1.6051884301428612, + "learning_rate": 1.363822086005717e-09, + "loss": 0.47483426332473755, + "step": 8538 + }, + { + "epoch": 1.9688724925063408, + "grad_norm": 1.4685071017788778, + "learning_rate": 1.343989382766475e-09, + "loss": 0.3902367651462555, + "step": 8539 + }, + { + "epoch": 1.9691030666359235, + "grad_norm": 1.5919563191923878, + "learning_rate": 1.3243018442994358e-09, + "loss": 0.5114254951477051, + "step": 8540 + }, + { + "epoch": 1.9693336407655062, + "grad_norm": 1.6064476628756739, + "learning_rate": 1.3047594734663104e-09, + "loss": 0.4048948287963867, + "step": 8541 + }, + { + "epoch": 1.969564214895089, + "grad_norm": 1.3533697409791567, + "learning_rate": 1.2853622731079372e-09, + "loss": 0.4168536067008972, + "step": 8542 + }, + { + "epoch": 1.9697947890246714, + "grad_norm": 1.459175077584749, + "learning_rate": 1.2661102460437279e-09, + "loss": 0.38410186767578125, + "step": 8543 + }, + { + "epoch": 1.9700253631542541, + "grad_norm": 1.5096843994913236, + "learning_rate": 1.2470033950724435e-09, + "loss": 0.4931117296218872, + "step": 8544 + }, + { + "epoch": 1.9702559372838366, + "grad_norm": 1.863771997387379, + "learning_rate": 1.228041722971085e-09, + "loss": 0.41142135858535767, + "step": 8545 + }, + { + "epoch": 1.9704865114134194, + "grad_norm": 1.7868633908108185, + "learning_rate": 1.209225232496225e-09, + "loss": 0.5165313482284546, + "step": 8546 + }, + { + "epoch": 1.970717085543002, + "grad_norm": 1.284821780038077, + "learning_rate": 1.190553926382898e-09, + "loss": 0.3330427408218384, + "step": 8547 + }, + { + "epoch": 1.9709476596725848, + "grad_norm": 1.5242411906386457, + "learning_rate": 1.172027807345155e-09, + "loss": 0.43116509914398193, + "step": 8548 + }, + { + "epoch": 1.9711782338021675, + "grad_norm": 1.8011852071569119, + "learning_rate": 1.1536468780760643e-09, + "loss": 0.43564409017562866, + "step": 8549 + }, + { + "epoch": 1.9714088079317502, + "grad_norm": 1.7422483041269035, + "learning_rate": 1.1354111412472666e-09, + "loss": 0.5361013412475586, + "step": 8550 + }, + { + "epoch": 1.9716393820613327, + "grad_norm": 1.6110906687473352, + "learning_rate": 1.1173205995097524e-09, + "loss": 0.4049466550350189, + "step": 8551 + }, + { + "epoch": 1.9718699561909154, + "grad_norm": 1.6636539568656024, + "learning_rate": 1.0993752554930847e-09, + "loss": 0.45090144872665405, + "step": 8552 + }, + { + "epoch": 1.972100530320498, + "grad_norm": 1.5627616190247176, + "learning_rate": 1.0815751118057326e-09, + "loss": 0.43933606147766113, + "step": 8553 + }, + { + "epoch": 1.9723311044500806, + "grad_norm": 1.672183185343667, + "learning_rate": 1.063920171035182e-09, + "loss": 0.5254300832748413, + "step": 8554 + }, + { + "epoch": 1.9725616785796634, + "grad_norm": 1.4309558177904258, + "learning_rate": 1.0464104357477132e-09, + "loss": 0.45544567704200745, + "step": 8555 + }, + { + "epoch": 1.972792252709246, + "grad_norm": 1.9479324504983593, + "learning_rate": 1.0290459084886238e-09, + "loss": 0.5177001357078552, + "step": 8556 + }, + { + "epoch": 1.9730228268388288, + "grad_norm": 1.585288183336846, + "learning_rate": 1.0118265917818946e-09, + "loss": 0.4669674038887024, + "step": 8557 + }, + { + "epoch": 1.9732534009684115, + "grad_norm": 1.5203759714638625, + "learning_rate": 9.947524881307456e-10, + "loss": 0.4244263172149658, + "step": 8558 + }, + { + "epoch": 1.973483975097994, + "grad_norm": 1.810087521792982, + "learning_rate": 9.778236000168583e-10, + "loss": 0.44121527671813965, + "step": 8559 + }, + { + "epoch": 1.9737145492275767, + "grad_norm": 1.59326202559186, + "learning_rate": 9.610399299010418e-10, + "loss": 0.44209837913513184, + "step": 8560 + }, + { + "epoch": 1.9739451233571592, + "grad_norm": 1.5399236076354037, + "learning_rate": 9.444014802231226e-10, + "loss": 0.4036273956298828, + "step": 8561 + }, + { + "epoch": 1.974175697486742, + "grad_norm": 1.5589230288439277, + "learning_rate": 9.279082534014992e-10, + "loss": 0.47106266021728516, + "step": 8562 + }, + { + "epoch": 1.9744062716163246, + "grad_norm": 1.6389105898260865, + "learning_rate": 9.115602518338095e-10, + "loss": 0.41080260276794434, + "step": 8563 + }, + { + "epoch": 1.9746368457459074, + "grad_norm": 2.0418613187292918, + "learning_rate": 8.953574778962635e-10, + "loss": 0.4333069920539856, + "step": 8564 + }, + { + "epoch": 1.97486741987549, + "grad_norm": 1.4286669807437469, + "learning_rate": 8.792999339440887e-10, + "loss": 0.3939141631126404, + "step": 8565 + }, + { + "epoch": 1.9750979940050726, + "grad_norm": 1.7648959719228037, + "learning_rate": 8.633876223114178e-10, + "loss": 0.4202404022216797, + "step": 8566 + }, + { + "epoch": 1.9753285681346553, + "grad_norm": 1.6239377555078118, + "learning_rate": 8.476205453114005e-10, + "loss": 0.44722893834114075, + "step": 8567 + }, + { + "epoch": 1.9755591422642378, + "grad_norm": 1.6159852265335335, + "learning_rate": 8.319987052357591e-10, + "loss": 0.4095258414745331, + "step": 8568 + }, + { + "epoch": 1.9757897163938205, + "grad_norm": 1.359270850467109, + "learning_rate": 8.165221043553439e-10, + "loss": 0.43372297286987305, + "step": 8569 + }, + { + "epoch": 1.9760202905234032, + "grad_norm": 1.7602005237852472, + "learning_rate": 8.011907449199106e-10, + "loss": 0.4697731137275696, + "step": 8570 + }, + { + "epoch": 1.976250864652986, + "grad_norm": 1.759646277514859, + "learning_rate": 7.860046291580103e-10, + "loss": 0.49179136753082275, + "step": 8571 + }, + { + "epoch": 1.9764814387825687, + "grad_norm": 1.5966011788910657, + "learning_rate": 7.70963759277099e-10, + "loss": 0.35898157954216003, + "step": 8572 + }, + { + "epoch": 1.9767120129121514, + "grad_norm": 1.5427594087958296, + "learning_rate": 7.560681374634282e-10, + "loss": 0.48293429613113403, + "step": 8573 + }, + { + "epoch": 1.9769425870417339, + "grad_norm": 1.4911498565229593, + "learning_rate": 7.413177658822656e-10, + "loss": 0.39636045694351196, + "step": 8574 + }, + { + "epoch": 1.9771731611713166, + "grad_norm": 1.294544438076297, + "learning_rate": 7.267126466777851e-10, + "loss": 0.375876784324646, + "step": 8575 + }, + { + "epoch": 1.977403735300899, + "grad_norm": 1.438449662082489, + "learning_rate": 7.122527819729551e-10, + "loss": 0.4064311385154724, + "step": 8576 + }, + { + "epoch": 1.9776343094304818, + "grad_norm": 1.3024542737808098, + "learning_rate": 6.979381738696499e-10, + "loss": 0.4373857378959656, + "step": 8577 + }, + { + "epoch": 1.9778648835600645, + "grad_norm": 2.013857406007071, + "learning_rate": 6.837688244486494e-10, + "loss": 0.5008025765419006, + "step": 8578 + }, + { + "epoch": 1.9780954576896472, + "grad_norm": 1.5523385427514034, + "learning_rate": 6.697447357695285e-10, + "loss": 0.4286271035671234, + "step": 8579 + }, + { + "epoch": 1.97832603181923, + "grad_norm": 1.6941567857927917, + "learning_rate": 6.558659098711006e-10, + "loss": 0.4420759081840515, + "step": 8580 + }, + { + "epoch": 1.9785566059488127, + "grad_norm": 1.314306142904572, + "learning_rate": 6.421323487705299e-10, + "loss": 0.3946709632873535, + "step": 8581 + }, + { + "epoch": 1.9787871800783952, + "grad_norm": 1.6731376396011677, + "learning_rate": 6.285440544641085e-10, + "loss": 0.42874544858932495, + "step": 8582 + }, + { + "epoch": 1.9790177542079779, + "grad_norm": 1.5147129393930194, + "learning_rate": 6.151010289272563e-10, + "loss": 0.4728921055793762, + "step": 8583 + }, + { + "epoch": 1.9792483283375604, + "grad_norm": 1.4681942656331504, + "learning_rate": 6.018032741139656e-10, + "loss": 0.3756295442581177, + "step": 8584 + }, + { + "epoch": 1.979478902467143, + "grad_norm": 1.5314225760860438, + "learning_rate": 5.886507919570239e-10, + "loss": 0.48663657903671265, + "step": 8585 + }, + { + "epoch": 1.9797094765967258, + "grad_norm": 2.0571870297763377, + "learning_rate": 5.756435843685681e-10, + "loss": 0.46127766370773315, + "step": 8586 + }, + { + "epoch": 1.9799400507263085, + "grad_norm": 1.4783867212667936, + "learning_rate": 5.627816532390862e-10, + "loss": 0.493796169757843, + "step": 8587 + }, + { + "epoch": 1.9801706248558912, + "grad_norm": 1.2639174296233155, + "learning_rate": 5.500650004383045e-10, + "loss": 0.3703004717826843, + "step": 8588 + }, + { + "epoch": 1.980401198985474, + "grad_norm": 1.6202036973245495, + "learning_rate": 5.374936278146336e-10, + "loss": 0.5385284423828125, + "step": 8589 + }, + { + "epoch": 1.9806317731150564, + "grad_norm": 1.5325088206554112, + "learning_rate": 5.250675371956115e-10, + "loss": 0.3996584713459015, + "step": 8590 + }, + { + "epoch": 1.9808623472446392, + "grad_norm": 1.6001328200790206, + "learning_rate": 5.12786730387349e-10, + "loss": 0.4513227641582489, + "step": 8591 + }, + { + "epoch": 1.9810929213742217, + "grad_norm": 1.5317035339628575, + "learning_rate": 5.006512091750848e-10, + "loss": 0.46632474660873413, + "step": 8592 + }, + { + "epoch": 1.9813234955038044, + "grad_norm": 1.5599775050602098, + "learning_rate": 4.886609753227411e-10, + "loss": 0.5379712581634521, + "step": 8593 + }, + { + "epoch": 1.981554069633387, + "grad_norm": 1.6572300992446405, + "learning_rate": 4.768160305732572e-10, + "loss": 0.3606422543525696, + "step": 8594 + }, + { + "epoch": 1.9817846437629698, + "grad_norm": 1.927352159029303, + "learning_rate": 4.651163766484778e-10, + "loss": 0.39339596033096313, + "step": 8595 + }, + { + "epoch": 1.9820152178925525, + "grad_norm": 1.5930436461957604, + "learning_rate": 4.535620152489317e-10, + "loss": 0.4606707692146301, + "step": 8596 + }, + { + "epoch": 1.9822457920221352, + "grad_norm": 1.484957242621252, + "learning_rate": 4.421529480543862e-10, + "loss": 0.4234154522418976, + "step": 8597 + }, + { + "epoch": 1.9824763661517177, + "grad_norm": 1.3985130447330405, + "learning_rate": 4.308891767229594e-10, + "loss": 0.49317437410354614, + "step": 8598 + }, + { + "epoch": 1.9827069402813005, + "grad_norm": 1.5795407686648721, + "learning_rate": 4.197707028922304e-10, + "loss": 0.47756847739219666, + "step": 8599 + }, + { + "epoch": 1.982937514410883, + "grad_norm": 1.437347041692997, + "learning_rate": 4.0879752817823963e-10, + "loss": 0.37664321064949036, + "step": 8600 + } + ], + "logging_steps": 1, + "max_steps": 8674, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2968756727906304.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8600/training_args.bin b/checkpoint-8600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9d22a9f5260d66a35a24391e4e9c5ae1d42e2bf --- /dev/null +++ b/checkpoint-8600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b4d99570e121a32da71712aa554f3b32e79266529670ac42e5a5b8fc07e99d +size 6968 diff --git a/checkpoint-8600/zero_to_fp32.py b/checkpoint-8600/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-8600/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/checkpoint-8674/README.md b/checkpoint-8674/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4005c4d8e7a819833408da4794e4e74d2ced6553 --- /dev/null +++ b/checkpoint-8674/README.md @@ -0,0 +1,208 @@ +--- +base_model: Qwen/Qwen2.5-VL-7B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:Qwen/Qwen2.5-VL-7B-Instruct +- llama-factory +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/checkpoint-8674/adapter_config.json b/checkpoint-8674/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f36e32e61c434af152644134a13070b69334e6c --- /dev/null +++ b/checkpoint-8674/adapter_config.json @@ -0,0 +1,127 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "Qwen/Qwen2.5-VL-7B-Instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "layers.0.mlp.up_proj", + "layers.16.mlp.up_proj", + "layers.7.mlp.down_proj", + "layers.4.mlp.up_proj", + "layers.26.mlp.down_proj", + "layers.13.mlp.gate_proj", + "layers.19.mlp.gate_proj", + "layers.18.mlp.up_proj", + "layers.18.mlp.down_proj", + "layers.22.mlp.down_proj", + "layers.18.mlp.gate_proj", + "layers.8.mlp.down_proj", + "layers.7.mlp.gate_proj", + "layers.7.mlp.up_proj", + "layers.6.mlp.down_proj", + "layers.27.mlp.down_proj", + "layers.23.mlp.down_proj", + "layers.22.mlp.up_proj", + "layers.1.mlp.gate_proj", + "layers.5.mlp.gate_proj", + "layers.11.mlp.gate_proj", + "layers.21.mlp.up_proj", + "layers.24.mlp.up_proj", + "layers.23.mlp.up_proj", + "layers.12.mlp.down_proj", + "layers.24.mlp.down_proj", + "layers.17.mlp.gate_proj", + "layers.21.mlp.down_proj", + "layers.15.mlp.gate_proj", + "layers.11.mlp.up_proj", + "layers.8.mlp.up_proj", + "layers.17.mlp.down_proj", + "layers.10.mlp.up_proj", + "layers.4.mlp.gate_proj", + "layers.6.mlp.up_proj", + "layers.10.mlp.gate_proj", + "layers.26.mlp.up_proj", + "q_proj", + "layers.20.mlp.gate_proj", + "layers.19.mlp.down_proj", + "layers.2.mlp.gate_proj", + "layers.20.mlp.down_proj", + "layers.14.mlp.down_proj", + "layers.27.mlp.gate_proj", + "layers.3.mlp.up_proj", + "layers.15.mlp.up_proj", + "layers.22.mlp.gate_proj", + "layers.13.mlp.up_proj", + "layers.8.mlp.gate_proj", + "layers.0.mlp.down_proj", + "layers.26.mlp.gate_proj", + "layers.13.mlp.down_proj", + "layers.1.mlp.down_proj", + "layers.11.mlp.down_proj", + "layers.0.mlp.gate_proj", + "layers.12.mlp.gate_proj", + "layers.2.mlp.down_proj", + "layers.17.mlp.up_proj", + "layers.25.mlp.up_proj", + "k_proj", + "layers.1.mlp.up_proj", + "layers.24.mlp.gate_proj", + "layers.9.mlp.gate_proj", + "layers.4.mlp.down_proj", + "layers.5.mlp.down_proj", + "layers.15.mlp.down_proj", + "layers.16.mlp.down_proj", + "layers.9.mlp.down_proj", + "layers.20.mlp.up_proj", + "layers.9.mlp.up_proj", + "o_proj", + "layers.19.mlp.up_proj", + "layers.14.mlp.gate_proj", + "layers.6.mlp.gate_proj", + "layers.12.mlp.up_proj", + "layers.5.mlp.up_proj", + "layers.2.mlp.up_proj", + "layers.16.mlp.gate_proj", + "layers.21.mlp.gate_proj", + "layers.25.mlp.gate_proj", + "layers.27.mlp.up_proj", + "layers.3.mlp.gate_proj", + "layers.14.mlp.up_proj", + "layers.25.mlp.down_proj", + "layers.3.mlp.down_proj", + "layers.23.mlp.gate_proj", + "layers.10.mlp.down_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-8674/adapter_model.safetensors b/checkpoint-8674/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ebb12ed330789be5f2e1499163e626ac73edf85 --- /dev/null +++ b/checkpoint-8674/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df8ac9086865872cf3b09183d585810d90e12118d7385364d0defec1f0d81db +size 40428088 diff --git a/checkpoint-8674/chat_template.jinja b/checkpoint-8674/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..6c226632394ae7474b0d4b13e15793eac2e21ee9 --- /dev/null +++ b/checkpoint-8674/chat_template.jinja @@ -0,0 +1,7 @@ +{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system +You are a helpful assistant.<|im_end|> +{% endif %}<|im_start|>{{ message['role'] }} +{% if message['content'] is string %}{{ message['content'] }}<|im_end|> +{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|> +{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant +{% endif %} \ No newline at end of file diff --git a/checkpoint-8674/global_step8674/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/checkpoint-8674/global_step8674/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffe25c9fc3813e98c2c9cbea1ac995bb3301a0f6 --- /dev/null +++ b/checkpoint-8674/global_step8674/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d7fe2adbc41abc710f1c3a8ff594316138ffd136600282ad461bbe0b2a313b +size 242224880 diff --git a/checkpoint-8674/global_step8674/zero_pp_rank_0_mp_rank_00_model_states.pt b/checkpoint-8674/global_step8674/zero_pp_rank_0_mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ef0707a72eeaf3db2757439d9fa5f867498a957 --- /dev/null +++ b/checkpoint-8674/global_step8674/zero_pp_rank_0_mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1104b0124fee94b33793552f962ea2bb9948219a35a1c8ae0fddf37d5c84276f +size 460630 diff --git a/checkpoint-8674/latest b/checkpoint-8674/latest new file mode 100644 index 0000000000000000000000000000000000000000..06d4e9b5731d4e07e1dce6259be592d90b74213f --- /dev/null +++ b/checkpoint-8674/latest @@ -0,0 +1 @@ +global_step8674 \ No newline at end of file diff --git a/checkpoint-8674/processor_config.json b/checkpoint-8674/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/checkpoint-8674/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/checkpoint-8674/rng_state.pth b/checkpoint-8674/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fcf11c9b78de2c2c55fdfc44daef09cd9181c14 --- /dev/null +++ b/checkpoint-8674/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc398a73e46bca50defc25b4467441315246a33383a5d6c80985d238e57127f +size 14244 diff --git a/checkpoint-8674/scheduler.pt b/checkpoint-8674/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba60403d20dafbf147ea16c177cbeb0aadbca657 --- /dev/null +++ b/checkpoint-8674/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e790f8c5abb3b5234de84b9aef05ff1072d4fdbbefa94ef20cde987f6186b6b7 +size 1000 diff --git a/checkpoint-8674/tokenizer.json b/checkpoint-8674/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/checkpoint-8674/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/checkpoint-8674/tokenizer_config.json b/checkpoint-8674/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/checkpoint-8674/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/checkpoint-8674/trainer_state.json b/checkpoint-8674/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6e8dc85a6130e3cd4dbb74eda3414793922b25f2 --- /dev/null +++ b/checkpoint-8674/trainer_state.json @@ -0,0 +1,60752 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 8674, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00023057412958266084, + "grad_norm": 0.5456158480642083, + "learning_rate": 0.0, + "loss": 1.2793785333633423, + "step": 1 + }, + { + "epoch": 0.0004611482591653217, + "grad_norm": 0.5348414425588685, + "learning_rate": 4.6082949308755755e-09, + "loss": 1.2810249328613281, + "step": 2 + }, + { + "epoch": 0.0006917223887479825, + "grad_norm": 0.5742665952103186, + "learning_rate": 9.216589861751151e-09, + "loss": 1.5180970430374146, + "step": 3 + }, + { + "epoch": 0.0009222965183306433, + "grad_norm": 0.47570843593061296, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.2771815061569214, + "step": 4 + }, + { + "epoch": 0.001152870647913304, + "grad_norm": 0.6179854753010914, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.6275714635849, + "step": 5 + }, + { + "epoch": 0.001383444777495965, + "grad_norm": 0.5728287935763549, + "learning_rate": 2.304147465437788e-08, + "loss": 1.4852838516235352, + "step": 6 + }, + { + "epoch": 0.0016140189070786258, + "grad_norm": 0.7402806033919309, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.3845010995864868, + "step": 7 + }, + { + "epoch": 0.0018445930366612867, + "grad_norm": 0.5357861516775319, + "learning_rate": 3.225806451612903e-08, + "loss": 1.2716574668884277, + "step": 8 + }, + { + "epoch": 0.0020751671662439476, + "grad_norm": 0.49378309074438254, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.4046194553375244, + "step": 9 + }, + { + "epoch": 0.002305741295826608, + "grad_norm": 0.5231726157264511, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.4988269805908203, + "step": 10 + }, + { + "epoch": 0.002536315425409269, + "grad_norm": 0.5469518790093721, + "learning_rate": 4.608294930875576e-08, + "loss": 1.3523340225219727, + "step": 11 + }, + { + "epoch": 0.00276688955499193, + "grad_norm": 0.5125117134786147, + "learning_rate": 5.069124423963134e-08, + "loss": 1.3664941787719727, + "step": 12 + }, + { + "epoch": 0.0029974636845745907, + "grad_norm": 0.5526794406387441, + "learning_rate": 5.529953917050691e-08, + "loss": 1.4892609119415283, + "step": 13 + }, + { + "epoch": 0.0032280378141572516, + "grad_norm": 0.5197262159341672, + "learning_rate": 5.990783410138249e-08, + "loss": 1.305836796760559, + "step": 14 + }, + { + "epoch": 0.0034586119437399125, + "grad_norm": 0.5214120337499729, + "learning_rate": 6.451612903225806e-08, + "loss": 1.3458774089813232, + "step": 15 + }, + { + "epoch": 0.0036891860733225734, + "grad_norm": 0.5249821302153419, + "learning_rate": 6.912442396313364e-08, + "loss": 1.4305222034454346, + "step": 16 + }, + { + "epoch": 0.003919760202905234, + "grad_norm": 0.48597332722440695, + "learning_rate": 7.373271889400921e-08, + "loss": 1.4247705936431885, + "step": 17 + }, + { + "epoch": 0.004150334332487895, + "grad_norm": 0.5492563451667527, + "learning_rate": 7.834101382488478e-08, + "loss": 1.4151098728179932, + "step": 18 + }, + { + "epoch": 0.004380908462070556, + "grad_norm": 0.4931832122178826, + "learning_rate": 8.294930875576037e-08, + "loss": 1.4633708000183105, + "step": 19 + }, + { + "epoch": 0.004611482591653216, + "grad_norm": 0.4601872454406169, + "learning_rate": 8.755760368663594e-08, + "loss": 1.2271082401275635, + "step": 20 + }, + { + "epoch": 0.004842056721235877, + "grad_norm": 0.5482366075993729, + "learning_rate": 9.216589861751152e-08, + "loss": 1.493757724761963, + "step": 21 + }, + { + "epoch": 0.005072630850818538, + "grad_norm": 0.5190439230451068, + "learning_rate": 9.677419354838709e-08, + "loss": 1.446916103363037, + "step": 22 + }, + { + "epoch": 0.005303204980401199, + "grad_norm": 0.5010656217784003, + "learning_rate": 1.0138248847926267e-07, + "loss": 1.4575269222259521, + "step": 23 + }, + { + "epoch": 0.00553377910998386, + "grad_norm": 0.5983934917725938, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.5000505447387695, + "step": 24 + }, + { + "epoch": 0.005764353239566521, + "grad_norm": 0.5264341016273323, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.32895827293396, + "step": 25 + }, + { + "epoch": 0.005994927369149181, + "grad_norm": 0.5507902323042685, + "learning_rate": 1.152073732718894e-07, + "loss": 1.479337215423584, + "step": 26 + }, + { + "epoch": 0.006225501498731842, + "grad_norm": 0.4597707182389027, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.3543293476104736, + "step": 27 + }, + { + "epoch": 0.006456075628314503, + "grad_norm": 0.4984681813259071, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.3075106143951416, + "step": 28 + }, + { + "epoch": 0.006686649757897164, + "grad_norm": 0.540668752320374, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.2077248096466064, + "step": 29 + }, + { + "epoch": 0.006917223887479825, + "grad_norm": 0.5053904313535789, + "learning_rate": 1.336405529953917e-07, + "loss": 1.2841781377792358, + "step": 30 + }, + { + "epoch": 0.0071477980170624855, + "grad_norm": 0.5007265235886551, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.4022557735443115, + "step": 31 + }, + { + "epoch": 0.007378372146645147, + "grad_norm": 0.5376464155945276, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.4971141815185547, + "step": 32 + }, + { + "epoch": 0.007608946276227807, + "grad_norm": 0.49485432736210644, + "learning_rate": 1.4746543778801842e-07, + "loss": 1.3699426651000977, + "step": 33 + }, + { + "epoch": 0.007839520405810468, + "grad_norm": 0.602690054138726, + "learning_rate": 1.52073732718894e-07, + "loss": 1.466570258140564, + "step": 34 + }, + { + "epoch": 0.008070094535393129, + "grad_norm": 0.544784030735669, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.3031455278396606, + "step": 35 + }, + { + "epoch": 0.00830066866497579, + "grad_norm": 0.5516628365932859, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.3989369869232178, + "step": 36 + }, + { + "epoch": 0.00853124279455845, + "grad_norm": 0.5375908894429152, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.41139817237854, + "step": 37 + }, + { + "epoch": 0.008761816924141111, + "grad_norm": 0.4923010186613349, + "learning_rate": 1.705069124423963e-07, + "loss": 1.305363655090332, + "step": 38 + }, + { + "epoch": 0.008992391053723773, + "grad_norm": 0.5782996548067549, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.3931915760040283, + "step": 39 + }, + { + "epoch": 0.009222965183306432, + "grad_norm": 0.5425552369520273, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.4728009700775146, + "step": 40 + }, + { + "epoch": 0.009453539312889093, + "grad_norm": 0.5162050268750099, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.4165544509887695, + "step": 41 + }, + { + "epoch": 0.009684113442471755, + "grad_norm": 0.509079818266607, + "learning_rate": 1.889400921658986e-07, + "loss": 1.3693115711212158, + "step": 42 + }, + { + "epoch": 0.009914687572054416, + "grad_norm": 0.5804116282906935, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.468721866607666, + "step": 43 + }, + { + "epoch": 0.010145261701637076, + "grad_norm": 0.5466645633601509, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.4732704162597656, + "step": 44 + }, + { + "epoch": 0.010375835831219737, + "grad_norm": 0.4534942899185725, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.2579209804534912, + "step": 45 + }, + { + "epoch": 0.010606409960802398, + "grad_norm": 0.4766380716605293, + "learning_rate": 2.073732718894009e-07, + "loss": 1.3587429523468018, + "step": 46 + }, + { + "epoch": 0.010836984090385058, + "grad_norm": 0.5409254453286721, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.368800401687622, + "step": 47 + }, + { + "epoch": 0.01106755821996772, + "grad_norm": 0.5103994243466702, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.2960132360458374, + "step": 48 + }, + { + "epoch": 0.01129813234955038, + "grad_norm": 0.47493679434319974, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.3035235404968262, + "step": 49 + }, + { + "epoch": 0.011528706479133042, + "grad_norm": 0.5271868916321076, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.5074443817138672, + "step": 50 + }, + { + "epoch": 0.011759280608715702, + "grad_norm": 0.5381217045242119, + "learning_rate": 2.304147465437788e-07, + "loss": 1.4689760208129883, + "step": 51 + }, + { + "epoch": 0.011989854738298363, + "grad_norm": 0.4629483381608022, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.3542251586914062, + "step": 52 + }, + { + "epoch": 0.012220428867881024, + "grad_norm": 0.4592532760230554, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.3521728515625, + "step": 53 + }, + { + "epoch": 0.012451002997463684, + "grad_norm": 0.5030837073491258, + "learning_rate": 2.442396313364055e-07, + "loss": 1.3577494621276855, + "step": 54 + }, + { + "epoch": 0.012681577127046345, + "grad_norm": 0.5438911836333451, + "learning_rate": 2.488479262672811e-07, + "loss": 1.459476351737976, + "step": 55 + }, + { + "epoch": 0.012912151256629006, + "grad_norm": 0.52516269169267, + "learning_rate": 2.534562211981567e-07, + "loss": 1.484410047531128, + "step": 56 + }, + { + "epoch": 0.013142725386211668, + "grad_norm": 0.5188914022486312, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.3589065074920654, + "step": 57 + }, + { + "epoch": 0.013373299515794327, + "grad_norm": 0.5619229477118247, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.3558262586593628, + "step": 58 + }, + { + "epoch": 0.013603873645376989, + "grad_norm": 0.5534574014271282, + "learning_rate": 2.672811059907834e-07, + "loss": 1.5165367126464844, + "step": 59 + }, + { + "epoch": 0.01383444777495965, + "grad_norm": 0.47598313164662104, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.3051776885986328, + "step": 60 + }, + { + "epoch": 0.01406502190454231, + "grad_norm": 0.45011107968146047, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.2916524410247803, + "step": 61 + }, + { + "epoch": 0.014295596034124971, + "grad_norm": 0.513792634149487, + "learning_rate": 2.8110599078341015e-07, + "loss": 1.440261721611023, + "step": 62 + }, + { + "epoch": 0.014526170163707632, + "grad_norm": 0.5424492375693261, + "learning_rate": 2.857142857142857e-07, + "loss": 1.3422625064849854, + "step": 63 + }, + { + "epoch": 0.014756744293290294, + "grad_norm": 0.4598784526258713, + "learning_rate": 2.903225806451613e-07, + "loss": 1.374439001083374, + "step": 64 + }, + { + "epoch": 0.014987318422872953, + "grad_norm": 0.5339252174305668, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.4382294416427612, + "step": 65 + }, + { + "epoch": 0.015217892552455614, + "grad_norm": 0.5302645203365586, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.3971002101898193, + "step": 66 + }, + { + "epoch": 0.015448466682038276, + "grad_norm": 0.5711144083332746, + "learning_rate": 3.04147465437788e-07, + "loss": 1.376272439956665, + "step": 67 + }, + { + "epoch": 0.015679040811620935, + "grad_norm": 0.5016109357973636, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.3135097026824951, + "step": 68 + }, + { + "epoch": 0.015909614941203597, + "grad_norm": 0.5041882505031982, + "learning_rate": 3.133640552995391e-07, + "loss": 1.2688875198364258, + "step": 69 + }, + { + "epoch": 0.016140189070786258, + "grad_norm": 0.544108037399583, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.4380691051483154, + "step": 70 + }, + { + "epoch": 0.01637076320036892, + "grad_norm": 0.5634345795303867, + "learning_rate": 3.225806451612903e-07, + "loss": 1.319260835647583, + "step": 71 + }, + { + "epoch": 0.01660133732995158, + "grad_norm": 0.5352869486400713, + "learning_rate": 3.271889400921659e-07, + "loss": 1.4083738327026367, + "step": 72 + }, + { + "epoch": 0.01683191145953424, + "grad_norm": 0.5524091199068598, + "learning_rate": 3.317972350230415e-07, + "loss": 1.4904775619506836, + "step": 73 + }, + { + "epoch": 0.0170624855891169, + "grad_norm": 0.5488563092854116, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.4534616470336914, + "step": 74 + }, + { + "epoch": 0.01729305971869956, + "grad_norm": 0.621117268365485, + "learning_rate": 3.410138248847926e-07, + "loss": 1.6545689105987549, + "step": 75 + }, + { + "epoch": 0.017523633848282223, + "grad_norm": 0.4834761822798673, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.2267192602157593, + "step": 76 + }, + { + "epoch": 0.017754207977864884, + "grad_norm": 0.5801091305703396, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.4207227230072021, + "step": 77 + }, + { + "epoch": 0.017984782107447545, + "grad_norm": 0.5253671028782199, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.4952092170715332, + "step": 78 + }, + { + "epoch": 0.018215356237030206, + "grad_norm": 0.4832223487637491, + "learning_rate": 3.594470046082949e-07, + "loss": 1.2932121753692627, + "step": 79 + }, + { + "epoch": 0.018445930366612864, + "grad_norm": 0.5623376259320272, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.3855851888656616, + "step": 80 + }, + { + "epoch": 0.018676504496195526, + "grad_norm": 0.45682252121341854, + "learning_rate": 3.686635944700461e-07, + "loss": 1.3645650148391724, + "step": 81 + }, + { + "epoch": 0.018907078625778187, + "grad_norm": 0.49579660369860507, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.322283387184143, + "step": 82 + }, + { + "epoch": 0.01913765275536085, + "grad_norm": 0.5177315365924456, + "learning_rate": 3.778801843317972e-07, + "loss": 1.3363629579544067, + "step": 83 + }, + { + "epoch": 0.01936822688494351, + "grad_norm": 0.616201260540867, + "learning_rate": 3.824884792626728e-07, + "loss": 1.553279161453247, + "step": 84 + }, + { + "epoch": 0.01959880101452617, + "grad_norm": 0.5198473540371843, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.4434814453125, + "step": 85 + }, + { + "epoch": 0.019829375144108832, + "grad_norm": 0.5923570018189629, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.5134285688400269, + "step": 86 + }, + { + "epoch": 0.02005994927369149, + "grad_norm": 0.5850924486743854, + "learning_rate": 3.963133640552995e-07, + "loss": 1.4244651794433594, + "step": 87 + }, + { + "epoch": 0.02029052340327415, + "grad_norm": 0.560105193358992, + "learning_rate": 4.009216589861751e-07, + "loss": 1.4571855068206787, + "step": 88 + }, + { + "epoch": 0.020521097532856813, + "grad_norm": 0.48108556089196525, + "learning_rate": 4.055299539170507e-07, + "loss": 1.2940685749053955, + "step": 89 + }, + { + "epoch": 0.020751671662439474, + "grad_norm": 0.5203979535892653, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.3537572622299194, + "step": 90 + }, + { + "epoch": 0.020982245792022135, + "grad_norm": 0.5791117780548783, + "learning_rate": 4.147465437788018e-07, + "loss": 1.524500846862793, + "step": 91 + }, + { + "epoch": 0.021212819921604797, + "grad_norm": 0.4890632694429427, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.4414368867874146, + "step": 92 + }, + { + "epoch": 0.021443394051187458, + "grad_norm": 0.49954451696473423, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.284010887145996, + "step": 93 + }, + { + "epoch": 0.021673968180770116, + "grad_norm": 0.6088073736973271, + "learning_rate": 4.285714285714285e-07, + "loss": 1.5901892185211182, + "step": 94 + }, + { + "epoch": 0.021904542310352777, + "grad_norm": 0.5856129890195899, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.4408211708068848, + "step": 95 + }, + { + "epoch": 0.02213511643993544, + "grad_norm": 0.49571353442310634, + "learning_rate": 4.377880184331797e-07, + "loss": 1.2293554544448853, + "step": 96 + }, + { + "epoch": 0.0223656905695181, + "grad_norm": 0.570508723127356, + "learning_rate": 4.423963133640553e-07, + "loss": 1.4144377708435059, + "step": 97 + }, + { + "epoch": 0.02259626469910076, + "grad_norm": 0.5952794755762669, + "learning_rate": 4.4700460829493084e-07, + "loss": 1.359034776687622, + "step": 98 + }, + { + "epoch": 0.022826838828683423, + "grad_norm": 0.5878914385748992, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.3299517631530762, + "step": 99 + }, + { + "epoch": 0.023057412958266084, + "grad_norm": 0.5039341997298462, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.3072423934936523, + "step": 100 + }, + { + "epoch": 0.023287987087848742, + "grad_norm": 0.6205508042108064, + "learning_rate": 4.608294930875576e-07, + "loss": 1.5683096647262573, + "step": 101 + }, + { + "epoch": 0.023518561217431403, + "grad_norm": 0.6300075069307655, + "learning_rate": 4.654377880184331e-07, + "loss": 1.6294015645980835, + "step": 102 + }, + { + "epoch": 0.023749135347014064, + "grad_norm": 0.5245849244619794, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.424511194229126, + "step": 103 + }, + { + "epoch": 0.023979709476596726, + "grad_norm": 0.5471205081131801, + "learning_rate": 4.746543778801843e-07, + "loss": 1.4169164896011353, + "step": 104 + }, + { + "epoch": 0.024210283606179387, + "grad_norm": 0.5854813174619509, + "learning_rate": 4.792626728110599e-07, + "loss": 1.3933480978012085, + "step": 105 + }, + { + "epoch": 0.02444085773576205, + "grad_norm": 0.6166413586526565, + "learning_rate": 4.838709677419355e-07, + "loss": 1.488750696182251, + "step": 106 + }, + { + "epoch": 0.02467143186534471, + "grad_norm": 0.6052025315612124, + "learning_rate": 4.88479262672811e-07, + "loss": 1.4852150678634644, + "step": 107 + }, + { + "epoch": 0.024902005994927368, + "grad_norm": 0.5750922845804657, + "learning_rate": 4.930875576036866e-07, + "loss": 1.4256765842437744, + "step": 108 + }, + { + "epoch": 0.02513258012451003, + "grad_norm": 0.5231547313189364, + "learning_rate": 4.976958525345622e-07, + "loss": 1.3063642978668213, + "step": 109 + }, + { + "epoch": 0.02536315425409269, + "grad_norm": 0.5734263022927267, + "learning_rate": 5.023041474654378e-07, + "loss": 1.549802303314209, + "step": 110 + }, + { + "epoch": 0.02559372838367535, + "grad_norm": 0.5041709928346361, + "learning_rate": 5.069124423963134e-07, + "loss": 1.301950454711914, + "step": 111 + }, + { + "epoch": 0.025824302513258013, + "grad_norm": 0.5567596794280206, + "learning_rate": 5.11520737327189e-07, + "loss": 1.3025325536727905, + "step": 112 + }, + { + "epoch": 0.026054876642840674, + "grad_norm": 0.5369405016436734, + "learning_rate": 5.161290322580645e-07, + "loss": 1.40749192237854, + "step": 113 + }, + { + "epoch": 0.026285450772423335, + "grad_norm": 0.5208396194792263, + "learning_rate": 5.2073732718894e-07, + "loss": 1.3216793537139893, + "step": 114 + }, + { + "epoch": 0.026516024902005993, + "grad_norm": 0.5052494958784187, + "learning_rate": 5.253456221198155e-07, + "loss": 1.3189308643341064, + "step": 115 + }, + { + "epoch": 0.026746599031588655, + "grad_norm": 0.5632602249643789, + "learning_rate": 5.299539170506912e-07, + "loss": 1.430384635925293, + "step": 116 + }, + { + "epoch": 0.026977173161171316, + "grad_norm": 0.5516062364182813, + "learning_rate": 5.345622119815668e-07, + "loss": 1.4081478118896484, + "step": 117 + }, + { + "epoch": 0.027207747290753977, + "grad_norm": 0.6385508559977366, + "learning_rate": 5.391705069124423e-07, + "loss": 1.434388518333435, + "step": 118 + }, + { + "epoch": 0.02743832142033664, + "grad_norm": 0.6138756203209041, + "learning_rate": 5.437788018433179e-07, + "loss": 1.4139282703399658, + "step": 119 + }, + { + "epoch": 0.0276688955499193, + "grad_norm": 0.5683069275087388, + "learning_rate": 5.483870967741935e-07, + "loss": 1.4511487483978271, + "step": 120 + }, + { + "epoch": 0.02789946967950196, + "grad_norm": 0.6423215590072974, + "learning_rate": 5.529953917050691e-07, + "loss": 1.5713481903076172, + "step": 121 + }, + { + "epoch": 0.02813004380908462, + "grad_norm": 0.5705917499340588, + "learning_rate": 5.576036866359447e-07, + "loss": 1.4315730333328247, + "step": 122 + }, + { + "epoch": 0.02836061793866728, + "grad_norm": 0.5316898536625556, + "learning_rate": 5.622119815668203e-07, + "loss": 1.3283708095550537, + "step": 123 + }, + { + "epoch": 0.028591192068249942, + "grad_norm": 0.6184222176453401, + "learning_rate": 5.668202764976958e-07, + "loss": 1.4329016208648682, + "step": 124 + }, + { + "epoch": 0.028821766197832603, + "grad_norm": 0.5872933055537319, + "learning_rate": 5.714285714285714e-07, + "loss": 1.444648265838623, + "step": 125 + }, + { + "epoch": 0.029052340327415264, + "grad_norm": 0.5205647887621043, + "learning_rate": 5.760368663594469e-07, + "loss": 1.3584785461425781, + "step": 126 + }, + { + "epoch": 0.029282914456997926, + "grad_norm": 0.5687232002808722, + "learning_rate": 5.806451612903226e-07, + "loss": 1.2815918922424316, + "step": 127 + }, + { + "epoch": 0.029513488586580587, + "grad_norm": 0.5252774303203537, + "learning_rate": 5.852534562211982e-07, + "loss": 1.3332037925720215, + "step": 128 + }, + { + "epoch": 0.029744062716163245, + "grad_norm": 0.5694649769044726, + "learning_rate": 5.898617511520737e-07, + "loss": 1.4522390365600586, + "step": 129 + }, + { + "epoch": 0.029974636845745906, + "grad_norm": 0.5607244925516301, + "learning_rate": 5.944700460829493e-07, + "loss": 1.4362024068832397, + "step": 130 + }, + { + "epoch": 0.030205210975328568, + "grad_norm": 0.5432906779366606, + "learning_rate": 5.990783410138249e-07, + "loss": 1.3271276950836182, + "step": 131 + }, + { + "epoch": 0.03043578510491123, + "grad_norm": 0.6175056690394787, + "learning_rate": 6.036866359447004e-07, + "loss": 1.5936369895935059, + "step": 132 + }, + { + "epoch": 0.03066635923449389, + "grad_norm": 0.5887629397700789, + "learning_rate": 6.08294930875576e-07, + "loss": 1.4786381721496582, + "step": 133 + }, + { + "epoch": 0.03089693336407655, + "grad_norm": 0.5490770556101789, + "learning_rate": 6.129032258064516e-07, + "loss": 1.3499064445495605, + "step": 134 + }, + { + "epoch": 0.031127507493659213, + "grad_norm": 0.583021664079577, + "learning_rate": 6.175115207373271e-07, + "loss": 1.4434795379638672, + "step": 135 + }, + { + "epoch": 0.03135808162324187, + "grad_norm": 0.6037371306112707, + "learning_rate": 6.221198156682027e-07, + "loss": 1.4064602851867676, + "step": 136 + }, + { + "epoch": 0.03158865575282453, + "grad_norm": 0.5005511365111003, + "learning_rate": 6.267281105990782e-07, + "loss": 1.3325507640838623, + "step": 137 + }, + { + "epoch": 0.03181922988240719, + "grad_norm": 0.516984621863849, + "learning_rate": 6.313364055299539e-07, + "loss": 1.2584879398345947, + "step": 138 + }, + { + "epoch": 0.032049804011989855, + "grad_norm": 0.5401703370709408, + "learning_rate": 6.359447004608295e-07, + "loss": 1.3754582405090332, + "step": 139 + }, + { + "epoch": 0.032280378141572516, + "grad_norm": 0.5773695778497429, + "learning_rate": 6.40552995391705e-07, + "loss": 1.2700412273406982, + "step": 140 + }, + { + "epoch": 0.03251095227115518, + "grad_norm": 0.580045410672373, + "learning_rate": 6.451612903225806e-07, + "loss": 1.395858645439148, + "step": 141 + }, + { + "epoch": 0.03274152640073784, + "grad_norm": 0.6146943532430481, + "learning_rate": 6.497695852534562e-07, + "loss": 1.402890682220459, + "step": 142 + }, + { + "epoch": 0.0329721005303205, + "grad_norm": 0.5736524878471048, + "learning_rate": 6.543778801843318e-07, + "loss": 1.5405397415161133, + "step": 143 + }, + { + "epoch": 0.03320267465990316, + "grad_norm": 0.5418174501474893, + "learning_rate": 6.589861751152074e-07, + "loss": 1.2394921779632568, + "step": 144 + }, + { + "epoch": 0.03343324878948582, + "grad_norm": 0.6276742940359161, + "learning_rate": 6.63594470046083e-07, + "loss": 1.453255295753479, + "step": 145 + }, + { + "epoch": 0.03366382291906848, + "grad_norm": 0.6191808042065741, + "learning_rate": 6.682027649769585e-07, + "loss": 1.3661112785339355, + "step": 146 + }, + { + "epoch": 0.03389439704865114, + "grad_norm": 0.5260230971069313, + "learning_rate": 6.728110599078341e-07, + "loss": 1.2952282428741455, + "step": 147 + }, + { + "epoch": 0.0341249711782338, + "grad_norm": 0.6693704726704671, + "learning_rate": 6.774193548387096e-07, + "loss": 1.396565318107605, + "step": 148 + }, + { + "epoch": 0.03435554530781646, + "grad_norm": 0.5881355966882998, + "learning_rate": 6.820276497695853e-07, + "loss": 1.3207082748413086, + "step": 149 + }, + { + "epoch": 0.03458611943739912, + "grad_norm": 0.5727010424261832, + "learning_rate": 6.866359447004608e-07, + "loss": 1.4085125923156738, + "step": 150 + }, + { + "epoch": 0.034816693566981784, + "grad_norm": 0.6667208730018341, + "learning_rate": 6.912442396313363e-07, + "loss": 1.5698528289794922, + "step": 151 + }, + { + "epoch": 0.035047267696564445, + "grad_norm": 0.5847511619477141, + "learning_rate": 6.958525345622119e-07, + "loss": 1.4091004133224487, + "step": 152 + }, + { + "epoch": 0.035277841826147106, + "grad_norm": 0.5143540253572731, + "learning_rate": 7.004608294930875e-07, + "loss": 1.2392504215240479, + "step": 153 + }, + { + "epoch": 0.03550841595572977, + "grad_norm": 0.6061996419355483, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3355891704559326, + "step": 154 + }, + { + "epoch": 0.03573899008531243, + "grad_norm": 0.5654677060773288, + "learning_rate": 7.096774193548387e-07, + "loss": 1.330599308013916, + "step": 155 + }, + { + "epoch": 0.03596956421489509, + "grad_norm": 0.5625277163359125, + "learning_rate": 7.142857142857143e-07, + "loss": 1.344653844833374, + "step": 156 + }, + { + "epoch": 0.03620013834447775, + "grad_norm": 0.5693935421186345, + "learning_rate": 7.188940092165898e-07, + "loss": 1.341560959815979, + "step": 157 + }, + { + "epoch": 0.03643071247406041, + "grad_norm": 0.5761507210889462, + "learning_rate": 7.235023041474654e-07, + "loss": 1.2242077589035034, + "step": 158 + }, + { + "epoch": 0.036661286603643074, + "grad_norm": 0.61477283253827, + "learning_rate": 7.281105990783409e-07, + "loss": 1.2858202457427979, + "step": 159 + }, + { + "epoch": 0.03689186073322573, + "grad_norm": 0.6410836439864531, + "learning_rate": 7.327188940092166e-07, + "loss": 1.479524850845337, + "step": 160 + }, + { + "epoch": 0.03712243486280839, + "grad_norm": 0.5918139936623208, + "learning_rate": 7.373271889400922e-07, + "loss": 1.43915855884552, + "step": 161 + }, + { + "epoch": 0.03735300899239105, + "grad_norm": 0.6478814183526712, + "learning_rate": 7.419354838709677e-07, + "loss": 1.3939034938812256, + "step": 162 + }, + { + "epoch": 0.03758358312197371, + "grad_norm": 0.6065250961726126, + "learning_rate": 7.465437788018433e-07, + "loss": 1.2733443975448608, + "step": 163 + }, + { + "epoch": 0.037814157251556374, + "grad_norm": 0.5670760124517911, + "learning_rate": 7.511520737327189e-07, + "loss": 1.3436474800109863, + "step": 164 + }, + { + "epoch": 0.038044731381139035, + "grad_norm": 0.622037546591312, + "learning_rate": 7.557603686635944e-07, + "loss": 1.4250465631484985, + "step": 165 + }, + { + "epoch": 0.0382753055107217, + "grad_norm": 0.607298640184171, + "learning_rate": 7.603686635944701e-07, + "loss": 1.4244422912597656, + "step": 166 + }, + { + "epoch": 0.03850587964030436, + "grad_norm": 0.6986289389542176, + "learning_rate": 7.649769585253457e-07, + "loss": 1.5487544536590576, + "step": 167 + }, + { + "epoch": 0.03873645376988702, + "grad_norm": 0.5793907792629099, + "learning_rate": 7.695852534562211e-07, + "loss": 1.3282281160354614, + "step": 168 + }, + { + "epoch": 0.03896702789946968, + "grad_norm": 0.5428953608010194, + "learning_rate": 7.741935483870967e-07, + "loss": 1.2823774814605713, + "step": 169 + }, + { + "epoch": 0.03919760202905234, + "grad_norm": 0.5889853233557574, + "learning_rate": 7.788018433179722e-07, + "loss": 1.2402329444885254, + "step": 170 + }, + { + "epoch": 0.039428176158635, + "grad_norm": 0.6219537569729359, + "learning_rate": 7.834101382488479e-07, + "loss": 1.3755587339401245, + "step": 171 + }, + { + "epoch": 0.039658750288217665, + "grad_norm": 0.5509851701904478, + "learning_rate": 7.880184331797235e-07, + "loss": 1.3403921127319336, + "step": 172 + }, + { + "epoch": 0.039889324417800326, + "grad_norm": 0.5971512014225002, + "learning_rate": 7.92626728110599e-07, + "loss": 1.3742129802703857, + "step": 173 + }, + { + "epoch": 0.04011989854738298, + "grad_norm": 0.7068161569826883, + "learning_rate": 7.972350230414746e-07, + "loss": 1.6444599628448486, + "step": 174 + }, + { + "epoch": 0.04035047267696564, + "grad_norm": 0.6019721571978455, + "learning_rate": 8.018433179723502e-07, + "loss": 1.3891929388046265, + "step": 175 + }, + { + "epoch": 0.0405810468065483, + "grad_norm": 0.5520157347061957, + "learning_rate": 8.064516129032257e-07, + "loss": 1.2279409170150757, + "step": 176 + }, + { + "epoch": 0.040811620936130964, + "grad_norm": 0.6346481492269727, + "learning_rate": 8.110599078341014e-07, + "loss": 1.4576997756958008, + "step": 177 + }, + { + "epoch": 0.041042195065713626, + "grad_norm": 0.612489332435889, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3585199117660522, + "step": 178 + }, + { + "epoch": 0.04127276919529629, + "grad_norm": 0.5908354773562909, + "learning_rate": 8.202764976958525e-07, + "loss": 1.3056905269622803, + "step": 179 + }, + { + "epoch": 0.04150334332487895, + "grad_norm": 0.5749600887070265, + "learning_rate": 8.248847926267281e-07, + "loss": 1.3029698133468628, + "step": 180 + }, + { + "epoch": 0.04173391745446161, + "grad_norm": 0.6598409427706357, + "learning_rate": 8.294930875576036e-07, + "loss": 1.4368736743927002, + "step": 181 + }, + { + "epoch": 0.04196449158404427, + "grad_norm": 0.5781034108869284, + "learning_rate": 8.341013824884793e-07, + "loss": 1.3243422508239746, + "step": 182 + }, + { + "epoch": 0.04219506571362693, + "grad_norm": 0.5206395827762466, + "learning_rate": 8.387096774193549e-07, + "loss": 1.232081413269043, + "step": 183 + }, + { + "epoch": 0.042425639843209594, + "grad_norm": 0.656527379150416, + "learning_rate": 8.433179723502303e-07, + "loss": 1.4601390361785889, + "step": 184 + }, + { + "epoch": 0.042656213972792255, + "grad_norm": 0.7159376690159417, + "learning_rate": 8.479262672811059e-07, + "loss": 1.3778860569000244, + "step": 185 + }, + { + "epoch": 0.042886788102374916, + "grad_norm": 0.590059263278645, + "learning_rate": 8.525345622119815e-07, + "loss": 1.3235092163085938, + "step": 186 + }, + { + "epoch": 0.04311736223195758, + "grad_norm": 0.6886704124574455, + "learning_rate": 8.57142857142857e-07, + "loss": 1.4480581283569336, + "step": 187 + }, + { + "epoch": 0.04334793636154023, + "grad_norm": 0.6346582437238362, + "learning_rate": 8.617511520737327e-07, + "loss": 1.4530816078186035, + "step": 188 + }, + { + "epoch": 0.04357851049112289, + "grad_norm": 0.6767670706852607, + "learning_rate": 8.663594470046083e-07, + "loss": 1.4447407722473145, + "step": 189 + }, + { + "epoch": 0.043809084620705555, + "grad_norm": 0.6049885392306779, + "learning_rate": 8.709677419354838e-07, + "loss": 1.3610244989395142, + "step": 190 + }, + { + "epoch": 0.044039658750288216, + "grad_norm": 0.6415008170468611, + "learning_rate": 8.755760368663594e-07, + "loss": 1.4084277153015137, + "step": 191 + }, + { + "epoch": 0.04427023287987088, + "grad_norm": 0.579530872526008, + "learning_rate": 8.801843317972349e-07, + "loss": 1.3652758598327637, + "step": 192 + }, + { + "epoch": 0.04450080700945354, + "grad_norm": 0.7106489880805067, + "learning_rate": 8.847926267281106e-07, + "loss": 1.4791496992111206, + "step": 193 + }, + { + "epoch": 0.0447313811390362, + "grad_norm": 0.6211187249917176, + "learning_rate": 8.894009216589862e-07, + "loss": 1.3958008289337158, + "step": 194 + }, + { + "epoch": 0.04496195526861886, + "grad_norm": 0.700016972508283, + "learning_rate": 8.940092165898617e-07, + "loss": 1.4134410619735718, + "step": 195 + }, + { + "epoch": 0.04519252939820152, + "grad_norm": 0.6911089974612981, + "learning_rate": 8.986175115207373e-07, + "loss": 1.4062776565551758, + "step": 196 + }, + { + "epoch": 0.045423103527784184, + "grad_norm": 0.6823334536756955, + "learning_rate": 9.032258064516129e-07, + "loss": 1.375224232673645, + "step": 197 + }, + { + "epoch": 0.045653677657366845, + "grad_norm": 0.6003343488972004, + "learning_rate": 9.078341013824884e-07, + "loss": 1.2440606355667114, + "step": 198 + }, + { + "epoch": 0.045884251786949506, + "grad_norm": 0.6737684280449967, + "learning_rate": 9.124423963133641e-07, + "loss": 1.4068349599838257, + "step": 199 + }, + { + "epoch": 0.04611482591653217, + "grad_norm": 0.6181499859340271, + "learning_rate": 9.170506912442397e-07, + "loss": 1.3797581195831299, + "step": 200 + }, + { + "epoch": 0.04634540004611483, + "grad_norm": 0.6445170966825345, + "learning_rate": 9.216589861751152e-07, + "loss": 1.4441678524017334, + "step": 201 + }, + { + "epoch": 0.046575974175697483, + "grad_norm": 0.6677276378953197, + "learning_rate": 9.262672811059907e-07, + "loss": 1.4727370738983154, + "step": 202 + }, + { + "epoch": 0.046806548305280145, + "grad_norm": 0.7032332117559357, + "learning_rate": 9.308755760368662e-07, + "loss": 1.448495864868164, + "step": 203 + }, + { + "epoch": 0.047037122434862806, + "grad_norm": 0.674429398641426, + "learning_rate": 9.354838709677418e-07, + "loss": 1.3727293014526367, + "step": 204 + }, + { + "epoch": 0.04726769656444547, + "grad_norm": 0.6701259318687961, + "learning_rate": 9.400921658986175e-07, + "loss": 1.4234352111816406, + "step": 205 + }, + { + "epoch": 0.04749827069402813, + "grad_norm": 0.5974678653003657, + "learning_rate": 9.44700460829493e-07, + "loss": 1.2407056093215942, + "step": 206 + }, + { + "epoch": 0.04772884482361079, + "grad_norm": 0.672276356974357, + "learning_rate": 9.493087557603686e-07, + "loss": 1.3502311706542969, + "step": 207 + }, + { + "epoch": 0.04795941895319345, + "grad_norm": 0.7465400676066979, + "learning_rate": 9.539170506912442e-07, + "loss": 1.4618254899978638, + "step": 208 + }, + { + "epoch": 0.04818999308277611, + "grad_norm": 0.681303163705478, + "learning_rate": 9.585253456221198e-07, + "loss": 1.3624317646026611, + "step": 209 + }, + { + "epoch": 0.048420567212358774, + "grad_norm": 0.7608712138693399, + "learning_rate": 9.631336405529954e-07, + "loss": 1.512046456336975, + "step": 210 + }, + { + "epoch": 0.048651141341941435, + "grad_norm": 0.6018077766578277, + "learning_rate": 9.67741935483871e-07, + "loss": 1.2896164655685425, + "step": 211 + }, + { + "epoch": 0.0488817154715241, + "grad_norm": 0.7063578249182565, + "learning_rate": 9.723502304147466e-07, + "loss": 1.5507850646972656, + "step": 212 + }, + { + "epoch": 0.04911228960110676, + "grad_norm": 0.7081498572564182, + "learning_rate": 9.76958525345622e-07, + "loss": 1.425408124923706, + "step": 213 + }, + { + "epoch": 0.04934286373068942, + "grad_norm": 0.7025877080602252, + "learning_rate": 9.815668202764976e-07, + "loss": 1.347771406173706, + "step": 214 + }, + { + "epoch": 0.04957343786027208, + "grad_norm": 0.7201983919068122, + "learning_rate": 9.861751152073732e-07, + "loss": 1.4044904708862305, + "step": 215 + }, + { + "epoch": 0.049804011989854735, + "grad_norm": 0.7045020078596302, + "learning_rate": 9.907834101382488e-07, + "loss": 1.3507332801818848, + "step": 216 + }, + { + "epoch": 0.050034586119437396, + "grad_norm": 0.6820424993070572, + "learning_rate": 9.953917050691244e-07, + "loss": 1.3022946119308472, + "step": 217 + }, + { + "epoch": 0.05026516024902006, + "grad_norm": 0.6561516180690095, + "learning_rate": 1e-06, + "loss": 1.284754991531372, + "step": 218 + }, + { + "epoch": 0.05049573437860272, + "grad_norm": 0.6003085662526402, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.2985923290252686, + "step": 219 + }, + { + "epoch": 0.05072630850818538, + "grad_norm": 0.6214608767923379, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.3855717182159424, + "step": 220 + }, + { + "epoch": 0.05095688263776804, + "grad_norm": 0.675694738994849, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.357919692993164, + "step": 221 + }, + { + "epoch": 0.0511874567673507, + "grad_norm": 0.6736529895786637, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.2818949222564697, + "step": 222 + }, + { + "epoch": 0.051418030896933364, + "grad_norm": 0.6226203332882617, + "learning_rate": 1.023041474654378e-06, + "loss": 1.2488511800765991, + "step": 223 + }, + { + "epoch": 0.051648605026516026, + "grad_norm": 0.7420146271711324, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.3824148178100586, + "step": 224 + }, + { + "epoch": 0.05187917915609869, + "grad_norm": 0.6473939851836901, + "learning_rate": 1.032258064516129e-06, + "loss": 1.3114633560180664, + "step": 225 + }, + { + "epoch": 0.05210975328568135, + "grad_norm": 0.6372141360329365, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.272273063659668, + "step": 226 + }, + { + "epoch": 0.05234032741526401, + "grad_norm": 0.8216490037105428, + "learning_rate": 1.04147465437788e-06, + "loss": 1.5072649717330933, + "step": 227 + }, + { + "epoch": 0.05257090154484667, + "grad_norm": 0.7183581578734374, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.4087142944335938, + "step": 228 + }, + { + "epoch": 0.05280147567442933, + "grad_norm": 0.8332625481322393, + "learning_rate": 1.050691244239631e-06, + "loss": 1.4866605997085571, + "step": 229 + }, + { + "epoch": 0.05303204980401199, + "grad_norm": 0.6315632875144884, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.3377184867858887, + "step": 230 + }, + { + "epoch": 0.05326262393359465, + "grad_norm": 0.6695801561741619, + "learning_rate": 1.0599078341013825e-06, + "loss": 1.4009103775024414, + "step": 231 + }, + { + "epoch": 0.05349319806317731, + "grad_norm": 0.7832755910275336, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.4878556728363037, + "step": 232 + }, + { + "epoch": 0.05372377219275997, + "grad_norm": 0.7218421394327601, + "learning_rate": 1.0691244239631337e-06, + "loss": 1.4002021551132202, + "step": 233 + }, + { + "epoch": 0.05395434632234263, + "grad_norm": 0.6918832056192313, + "learning_rate": 1.073732718894009e-06, + "loss": 1.337146520614624, + "step": 234 + }, + { + "epoch": 0.05418492045192529, + "grad_norm": 0.7101215642172168, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.4084792137145996, + "step": 235 + }, + { + "epoch": 0.054415494581507955, + "grad_norm": 0.8413614642264606, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.4131449460983276, + "step": 236 + }, + { + "epoch": 0.054646068711090616, + "grad_norm": 0.6587637953772119, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.1869292259216309, + "step": 237 + }, + { + "epoch": 0.05487664284067328, + "grad_norm": 0.7608337119634553, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.3970961570739746, + "step": 238 + }, + { + "epoch": 0.05510721697025594, + "grad_norm": 0.7677503323555195, + "learning_rate": 1.096774193548387e-06, + "loss": 1.2682442665100098, + "step": 239 + }, + { + "epoch": 0.0553377910998386, + "grad_norm": 0.6546621813731868, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2983934879302979, + "step": 240 + }, + { + "epoch": 0.05556836522942126, + "grad_norm": 0.7451544478647047, + "learning_rate": 1.1059907834101382e-06, + "loss": 1.3980869054794312, + "step": 241 + }, + { + "epoch": 0.05579893935900392, + "grad_norm": 0.6116475273591584, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.3068631887435913, + "step": 242 + }, + { + "epoch": 0.056029513488586584, + "grad_norm": 0.7974654782353883, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.5353353023529053, + "step": 243 + }, + { + "epoch": 0.05626008761816924, + "grad_norm": 0.663054900024182, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.290163278579712, + "step": 244 + }, + { + "epoch": 0.0564906617477519, + "grad_norm": 0.6761997400626832, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.3671848773956299, + "step": 245 + }, + { + "epoch": 0.05672123587733456, + "grad_norm": 0.6294209937786865, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.3020408153533936, + "step": 246 + }, + { + "epoch": 0.05695181000691722, + "grad_norm": 0.7207247726421506, + "learning_rate": 1.1336405529953916e-06, + "loss": 1.3159775733947754, + "step": 247 + }, + { + "epoch": 0.057182384136499884, + "grad_norm": 0.6708051542823367, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.3163995742797852, + "step": 248 + }, + { + "epoch": 0.057412958266082545, + "grad_norm": 0.8019994049858626, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.5215930938720703, + "step": 249 + }, + { + "epoch": 0.057643532395665206, + "grad_norm": 0.6559479072990889, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.2870161533355713, + "step": 250 + }, + { + "epoch": 0.05787410652524787, + "grad_norm": 0.7147869966218979, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.2624198198318481, + "step": 251 + }, + { + "epoch": 0.05810468065483053, + "grad_norm": 0.7319832858668294, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.2778981924057007, + "step": 252 + }, + { + "epoch": 0.05833525478441319, + "grad_norm": 0.6564800467165074, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.1934442520141602, + "step": 253 + }, + { + "epoch": 0.05856582891399585, + "grad_norm": 0.7291335446235057, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.3840088844299316, + "step": 254 + }, + { + "epoch": 0.05879640304357851, + "grad_norm": 0.7017610521536986, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.373002290725708, + "step": 255 + }, + { + "epoch": 0.059026977173161174, + "grad_norm": 0.6853330554611681, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.3614685535430908, + "step": 256 + }, + { + "epoch": 0.059257551302743836, + "grad_norm": 0.7170055632885292, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.3525335788726807, + "step": 257 + }, + { + "epoch": 0.05948812543232649, + "grad_norm": 0.7471586447698318, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.3806469440460205, + "step": 258 + }, + { + "epoch": 0.05971869956190915, + "grad_norm": 0.7262354481718393, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.372736930847168, + "step": 259 + }, + { + "epoch": 0.05994927369149181, + "grad_norm": 0.7470794959515278, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.309061050415039, + "step": 260 + }, + { + "epoch": 0.060179847821074474, + "grad_norm": 0.7217295951903909, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.3500525951385498, + "step": 261 + }, + { + "epoch": 0.060410421950657135, + "grad_norm": 0.7498906773328822, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.4197357892990112, + "step": 262 + }, + { + "epoch": 0.0606409960802398, + "grad_norm": 0.9553336191863615, + "learning_rate": 1.207373271889401e-06, + "loss": 1.6454131603240967, + "step": 263 + }, + { + "epoch": 0.06087157020982246, + "grad_norm": 0.7361372249879211, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.269604206085205, + "step": 264 + }, + { + "epoch": 0.06110214433940512, + "grad_norm": 0.6596823046141973, + "learning_rate": 1.216589861751152e-06, + "loss": 1.2358057498931885, + "step": 265 + }, + { + "epoch": 0.06133271846898778, + "grad_norm": 0.7203751630823346, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.2713422775268555, + "step": 266 + }, + { + "epoch": 0.06156329259857044, + "grad_norm": 0.7033446179657081, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.225820779800415, + "step": 267 + }, + { + "epoch": 0.0617938667281531, + "grad_norm": 0.6900817599997362, + "learning_rate": 1.2304147465437787e-06, + "loss": 1.279617190361023, + "step": 268 + }, + { + "epoch": 0.062024440857735764, + "grad_norm": 0.6800159728233099, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.2081385850906372, + "step": 269 + }, + { + "epoch": 0.062255014987318426, + "grad_norm": 0.7378639399050563, + "learning_rate": 1.23963133640553e-06, + "loss": 1.3121249675750732, + "step": 270 + }, + { + "epoch": 0.06248558911690109, + "grad_norm": 0.7497904685097676, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.28495454788208, + "step": 271 + }, + { + "epoch": 0.06271616324648374, + "grad_norm": 0.7749777957183016, + "learning_rate": 1.248847926267281e-06, + "loss": 1.3837053775787354, + "step": 272 + }, + { + "epoch": 0.0629467373760664, + "grad_norm": 0.7210838772374344, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.2119230031967163, + "step": 273 + }, + { + "epoch": 0.06317731150564906, + "grad_norm": 0.7143072591295863, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.323190450668335, + "step": 274 + }, + { + "epoch": 0.06340788563523173, + "grad_norm": 0.7546501032980093, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.4300715923309326, + "step": 275 + }, + { + "epoch": 0.06363845976481439, + "grad_norm": 0.7154461007442852, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.1680996417999268, + "step": 276 + }, + { + "epoch": 0.06386903389439705, + "grad_norm": 0.8088364505140268, + "learning_rate": 1.271889400921659e-06, + "loss": 1.3980211019515991, + "step": 277 + }, + { + "epoch": 0.06409960802397971, + "grad_norm": 0.7801914373505492, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.40798020362854, + "step": 278 + }, + { + "epoch": 0.06433018215356237, + "grad_norm": 0.7237186405433459, + "learning_rate": 1.28110599078341e-06, + "loss": 1.2535033226013184, + "step": 279 + }, + { + "epoch": 0.06456075628314503, + "grad_norm": 0.7779219570683336, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.3866907358169556, + "step": 280 + }, + { + "epoch": 0.0647913304127277, + "grad_norm": 0.7036374523288562, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.1985647678375244, + "step": 281 + }, + { + "epoch": 0.06502190454231035, + "grad_norm": 0.8186126171093759, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.3741936683654785, + "step": 282 + }, + { + "epoch": 0.06525247867189302, + "grad_norm": 0.7795060457073558, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.3684422969818115, + "step": 283 + }, + { + "epoch": 0.06548305280147568, + "grad_norm": 0.7685811594695469, + "learning_rate": 1.304147465437788e-06, + "loss": 1.3792086839675903, + "step": 284 + }, + { + "epoch": 0.06571362693105834, + "grad_norm": 0.8541112738893439, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.3252873420715332, + "step": 285 + }, + { + "epoch": 0.065944201060641, + "grad_norm": 0.7272989570317888, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.1918525695800781, + "step": 286 + }, + { + "epoch": 0.06617477519022366, + "grad_norm": 0.8825171015262823, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.3760654926300049, + "step": 287 + }, + { + "epoch": 0.06640534931980632, + "grad_norm": 0.8100539272477522, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.3452839851379395, + "step": 288 + }, + { + "epoch": 0.06663592344938898, + "grad_norm": 0.7635396360128843, + "learning_rate": 1.327188940092166e-06, + "loss": 1.321220874786377, + "step": 289 + }, + { + "epoch": 0.06686649757897165, + "grad_norm": 0.724002123288283, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.222012996673584, + "step": 290 + }, + { + "epoch": 0.0670970717085543, + "grad_norm": 0.7939713970528558, + "learning_rate": 1.336405529953917e-06, + "loss": 1.3209044933319092, + "step": 291 + }, + { + "epoch": 0.06732764583813695, + "grad_norm": 0.834643855588948, + "learning_rate": 1.3410138248847927e-06, + "loss": 1.3250432014465332, + "step": 292 + }, + { + "epoch": 0.06755821996771962, + "grad_norm": 0.6522445861220314, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.1738805770874023, + "step": 293 + }, + { + "epoch": 0.06778879409730228, + "grad_norm": 0.7430324759377445, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.238675832748413, + "step": 294 + }, + { + "epoch": 0.06801936822688494, + "grad_norm": 0.6872443402637277, + "learning_rate": 1.354838709677419e-06, + "loss": 1.2162814140319824, + "step": 295 + }, + { + "epoch": 0.0682499423564676, + "grad_norm": 0.7451321254668013, + "learning_rate": 1.359447004608295e-06, + "loss": 1.2087210416793823, + "step": 296 + }, + { + "epoch": 0.06848051648605026, + "grad_norm": 0.7183129418570579, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.2657420635223389, + "step": 297 + }, + { + "epoch": 0.06871109061563292, + "grad_norm": 0.8828866176671843, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.496249794960022, + "step": 298 + }, + { + "epoch": 0.06894166474521558, + "grad_norm": 0.7852198432087445, + "learning_rate": 1.3732718894009217e-06, + "loss": 1.2698930501937866, + "step": 299 + }, + { + "epoch": 0.06917223887479824, + "grad_norm": 0.723866375282328, + "learning_rate": 1.377880184331797e-06, + "loss": 1.2088165283203125, + "step": 300 + }, + { + "epoch": 0.0694028130043809, + "grad_norm": 0.764377981893855, + "learning_rate": 1.3824884792626727e-06, + "loss": 1.392000436782837, + "step": 301 + }, + { + "epoch": 0.06963338713396357, + "grad_norm": 0.7252481501169622, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.366544485092163, + "step": 302 + }, + { + "epoch": 0.06986396126354623, + "grad_norm": 0.7900814443800929, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.3276031017303467, + "step": 303 + }, + { + "epoch": 0.07009453539312889, + "grad_norm": 0.7000339586583599, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1413768529891968, + "step": 304 + }, + { + "epoch": 0.07032510952271155, + "grad_norm": 0.7903483195817192, + "learning_rate": 1.400921658986175e-06, + "loss": 1.2958520650863647, + "step": 305 + }, + { + "epoch": 0.07055568365229421, + "grad_norm": 0.7651988170590107, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.3514549732208252, + "step": 306 + }, + { + "epoch": 0.07078625778187687, + "grad_norm": 0.767117117462576, + "learning_rate": 1.410138248847926e-06, + "loss": 1.332120418548584, + "step": 307 + }, + { + "epoch": 0.07101683191145954, + "grad_norm": 0.8380945550826328, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.282820463180542, + "step": 308 + }, + { + "epoch": 0.0712474060410422, + "grad_norm": 0.7478573370757386, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.3927665948867798, + "step": 309 + }, + { + "epoch": 0.07147798017062486, + "grad_norm": 0.7471336867744233, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.2459386587142944, + "step": 310 + }, + { + "epoch": 0.07170855430020752, + "grad_norm": 0.715680538211599, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.1996700763702393, + "step": 311 + }, + { + "epoch": 0.07193912842979018, + "grad_norm": 0.7466366577926873, + "learning_rate": 1.433179723502304e-06, + "loss": 1.1007883548736572, + "step": 312 + }, + { + "epoch": 0.07216970255937284, + "grad_norm": 0.6505103448142013, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.211327314376831, + "step": 313 + }, + { + "epoch": 0.0724002766889555, + "grad_norm": 0.7475198907178121, + "learning_rate": 1.4423963133640554e-06, + "loss": 1.314349889755249, + "step": 314 + }, + { + "epoch": 0.07263085081853816, + "grad_norm": 0.7782372886671983, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.2270662784576416, + "step": 315 + }, + { + "epoch": 0.07286142494812083, + "grad_norm": 0.7521500862086049, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.1802537441253662, + "step": 316 + }, + { + "epoch": 0.07309199907770349, + "grad_norm": 0.7684137773026678, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.275806188583374, + "step": 317 + }, + { + "epoch": 0.07332257320728615, + "grad_norm": 0.789590997753613, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.2713148593902588, + "step": 318 + }, + { + "epoch": 0.07355314733686881, + "grad_norm": 0.8345280857312554, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.3091093301773071, + "step": 319 + }, + { + "epoch": 0.07378372146645146, + "grad_norm": 0.7108154017524825, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.1274672746658325, + "step": 320 + }, + { + "epoch": 0.07401429559603412, + "grad_norm": 0.7137227522476419, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.236955165863037, + "step": 321 + }, + { + "epoch": 0.07424486972561678, + "grad_norm": 0.7825967305477171, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.2561366558074951, + "step": 322 + }, + { + "epoch": 0.07447544385519944, + "grad_norm": 0.7250730413423113, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.1229519844055176, + "step": 323 + }, + { + "epoch": 0.0747060179847821, + "grad_norm": 0.7688658143017724, + "learning_rate": 1.4884792626728112e-06, + "loss": 1.200115442276001, + "step": 324 + }, + { + "epoch": 0.07493659211436476, + "grad_norm": 0.7499295220603182, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1930850744247437, + "step": 325 + }, + { + "epoch": 0.07516716624394743, + "grad_norm": 0.8209913282027874, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.3204331398010254, + "step": 326 + }, + { + "epoch": 0.07539774037353009, + "grad_norm": 0.7429612395335268, + "learning_rate": 1.5023041474654377e-06, + "loss": 1.109247088432312, + "step": 327 + }, + { + "epoch": 0.07562831450311275, + "grad_norm": 0.7097388789784923, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.1239254474639893, + "step": 328 + }, + { + "epoch": 0.07585888863269541, + "grad_norm": 0.7867677832004493, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.22686767578125, + "step": 329 + }, + { + "epoch": 0.07608946276227807, + "grad_norm": 0.8425243281826544, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.2846856117248535, + "step": 330 + }, + { + "epoch": 0.07632003689186073, + "grad_norm": 0.7611030204070008, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.1720764636993408, + "step": 331 + }, + { + "epoch": 0.0765506110214434, + "grad_norm": 0.6783089545901869, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.05867338180542, + "step": 332 + }, + { + "epoch": 0.07678118515102605, + "grad_norm": 0.781197296597327, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.2652220726013184, + "step": 333 + }, + { + "epoch": 0.07701175928060872, + "grad_norm": 0.7674267376615101, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.1367218494415283, + "step": 334 + }, + { + "epoch": 0.07724233341019138, + "grad_norm": 0.7149265599125916, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.169439673423767, + "step": 335 + }, + { + "epoch": 0.07747290753977404, + "grad_norm": 0.8284832797024527, + "learning_rate": 1.543778801843318e-06, + "loss": 1.265104055404663, + "step": 336 + }, + { + "epoch": 0.0777034816693567, + "grad_norm": 0.6605498491920537, + "learning_rate": 1.5483870967741935e-06, + "loss": 1.059098243713379, + "step": 337 + }, + { + "epoch": 0.07793405579893936, + "grad_norm": 0.8255024678570093, + "learning_rate": 1.552995391705069e-06, + "loss": 1.0998419523239136, + "step": 338 + }, + { + "epoch": 0.07816462992852202, + "grad_norm": 0.8285993940213782, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.1361349821090698, + "step": 339 + }, + { + "epoch": 0.07839520405810468, + "grad_norm": 0.7677612111698353, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.1051890850067139, + "step": 340 + }, + { + "epoch": 0.07862577818768735, + "grad_norm": 0.8204078401725609, + "learning_rate": 1.5668202764976959e-06, + "loss": 1.1675043106079102, + "step": 341 + }, + { + "epoch": 0.07885635231727, + "grad_norm": 0.8428908363907526, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.180741786956787, + "step": 342 + }, + { + "epoch": 0.07908692644685267, + "grad_norm": 0.8559354133772745, + "learning_rate": 1.576036866359447e-06, + "loss": 1.241147518157959, + "step": 343 + }, + { + "epoch": 0.07931750057643533, + "grad_norm": 0.848204694935563, + "learning_rate": 1.5806451612903224e-06, + "loss": 1.2831401824951172, + "step": 344 + }, + { + "epoch": 0.07954807470601799, + "grad_norm": 0.7281233645086155, + "learning_rate": 1.585253456221198e-06, + "loss": 1.2328094244003296, + "step": 345 + }, + { + "epoch": 0.07977864883560065, + "grad_norm": 0.7932743453051899, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.296494960784912, + "step": 346 + }, + { + "epoch": 0.08000922296518331, + "grad_norm": 0.7368517201206619, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.1802153587341309, + "step": 347 + }, + { + "epoch": 0.08023979709476596, + "grad_norm": 0.8829436639082808, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.2387690544128418, + "step": 348 + }, + { + "epoch": 0.08047037122434862, + "grad_norm": 0.8002618721063425, + "learning_rate": 1.6036866359447004e-06, + "loss": 1.1307916641235352, + "step": 349 + }, + { + "epoch": 0.08070094535393128, + "grad_norm": 0.8185303488247757, + "learning_rate": 1.608294930875576e-06, + "loss": 1.117497444152832, + "step": 350 + }, + { + "epoch": 0.08093151948351394, + "grad_norm": 0.7524331692605707, + "learning_rate": 1.6129032258064514e-06, + "loss": 1.1360805034637451, + "step": 351 + }, + { + "epoch": 0.0811620936130966, + "grad_norm": 0.7626049955851422, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.1756231784820557, + "step": 352 + }, + { + "epoch": 0.08139266774267927, + "grad_norm": 0.7605864356179197, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.0260417461395264, + "step": 353 + }, + { + "epoch": 0.08162324187226193, + "grad_norm": 0.6949706544727091, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.0863536596298218, + "step": 354 + }, + { + "epoch": 0.08185381600184459, + "grad_norm": 0.7427032746567218, + "learning_rate": 1.631336405529954e-06, + "loss": 1.0529779195785522, + "step": 355 + }, + { + "epoch": 0.08208439013142725, + "grad_norm": 0.7626426518406405, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.0374994277954102, + "step": 356 + }, + { + "epoch": 0.08231496426100991, + "grad_norm": 0.7762352327056515, + "learning_rate": 1.640552995391705e-06, + "loss": 1.153419017791748, + "step": 357 + }, + { + "epoch": 0.08254553839059257, + "grad_norm": 0.7455681546697154, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.0155376195907593, + "step": 358 + }, + { + "epoch": 0.08277611252017524, + "grad_norm": 0.779838920397346, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1288530826568604, + "step": 359 + }, + { + "epoch": 0.0830066866497579, + "grad_norm": 0.8920666311969824, + "learning_rate": 1.6543778801843317e-06, + "loss": 1.1493456363677979, + "step": 360 + }, + { + "epoch": 0.08323726077934056, + "grad_norm": 0.8383114858680324, + "learning_rate": 1.6589861751152071e-06, + "loss": 1.1064895391464233, + "step": 361 + }, + { + "epoch": 0.08346783490892322, + "grad_norm": 0.752156167882629, + "learning_rate": 1.663594470046083e-06, + "loss": 1.0102828741073608, + "step": 362 + }, + { + "epoch": 0.08369840903850588, + "grad_norm": 0.8341451005387022, + "learning_rate": 1.6682027649769585e-06, + "loss": 1.0750138759613037, + "step": 363 + }, + { + "epoch": 0.08392898316808854, + "grad_norm": 0.8504953523340792, + "learning_rate": 1.672811059907834e-06, + "loss": 1.1611195802688599, + "step": 364 + }, + { + "epoch": 0.0841595572976712, + "grad_norm": 0.8228646683486963, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.2799829244613647, + "step": 365 + }, + { + "epoch": 0.08439013142725386, + "grad_norm": 0.9626273899315478, + "learning_rate": 1.682027649769585e-06, + "loss": 1.2427947521209717, + "step": 366 + }, + { + "epoch": 0.08462070555683653, + "grad_norm": 0.724553415716276, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0379959344863892, + "step": 367 + }, + { + "epoch": 0.08485127968641919, + "grad_norm": 0.7173602639018404, + "learning_rate": 1.6912442396313363e-06, + "loss": 0.8439304828643799, + "step": 368 + }, + { + "epoch": 0.08508185381600185, + "grad_norm": 0.8477542480910312, + "learning_rate": 1.6958525345622119e-06, + "loss": 1.1249288320541382, + "step": 369 + }, + { + "epoch": 0.08531242794558451, + "grad_norm": 0.8715705993798011, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.186207890510559, + "step": 370 + }, + { + "epoch": 0.08554300207516717, + "grad_norm": 0.9990300341847143, + "learning_rate": 1.705069124423963e-06, + "loss": 1.1181306838989258, + "step": 371 + }, + { + "epoch": 0.08577357620474983, + "grad_norm": 0.8792678686182055, + "learning_rate": 1.7096774193548387e-06, + "loss": 0.9828017950057983, + "step": 372 + }, + { + "epoch": 0.0860041503343325, + "grad_norm": 0.7710250186072433, + "learning_rate": 1.714285714285714e-06, + "loss": 1.1158804893493652, + "step": 373 + }, + { + "epoch": 0.08623472446391516, + "grad_norm": 0.9602707019706166, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.1771481037139893, + "step": 374 + }, + { + "epoch": 0.08646529859349782, + "grad_norm": 0.8137176951163696, + "learning_rate": 1.7235023041474655e-06, + "loss": 1.1378540992736816, + "step": 375 + }, + { + "epoch": 0.08669587272308046, + "grad_norm": 0.819557644912057, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.2011152505874634, + "step": 376 + }, + { + "epoch": 0.08692644685266313, + "grad_norm": 0.8779923853134601, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0932848453521729, + "step": 377 + }, + { + "epoch": 0.08715702098224579, + "grad_norm": 0.7579888078286682, + "learning_rate": 1.737327188940092e-06, + "loss": 1.0530626773834229, + "step": 378 + }, + { + "epoch": 0.08738759511182845, + "grad_norm": 0.8123881302713649, + "learning_rate": 1.7419354838709676e-06, + "loss": 1.09238600730896, + "step": 379 + }, + { + "epoch": 0.08761816924141111, + "grad_norm": 0.8179032370650432, + "learning_rate": 1.7465437788018434e-06, + "loss": 1.10097336769104, + "step": 380 + }, + { + "epoch": 0.08784874337099377, + "grad_norm": 0.9066182701404021, + "learning_rate": 1.7511520737327188e-06, + "loss": 1.1483392715454102, + "step": 381 + }, + { + "epoch": 0.08807931750057643, + "grad_norm": 0.7929757896387074, + "learning_rate": 1.7557603686635944e-06, + "loss": 0.9776606559753418, + "step": 382 + }, + { + "epoch": 0.08830989163015909, + "grad_norm": 0.7070713392242878, + "learning_rate": 1.7603686635944698e-06, + "loss": 0.9363219738006592, + "step": 383 + }, + { + "epoch": 0.08854046575974175, + "grad_norm": 0.8829017901239412, + "learning_rate": 1.7649769585253456e-06, + "loss": 1.1259841918945312, + "step": 384 + }, + { + "epoch": 0.08877103988932442, + "grad_norm": 0.8379913612296851, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.0652339458465576, + "step": 385 + }, + { + "epoch": 0.08900161401890708, + "grad_norm": 0.9016264696692738, + "learning_rate": 1.7741935483870966e-06, + "loss": 1.1088197231292725, + "step": 386 + }, + { + "epoch": 0.08923218814848974, + "grad_norm": 0.8434226175443441, + "learning_rate": 1.7788018433179724e-06, + "loss": 1.0171717405319214, + "step": 387 + }, + { + "epoch": 0.0894627622780724, + "grad_norm": 0.893116506697827, + "learning_rate": 1.7834101382488478e-06, + "loss": 1.0391405820846558, + "step": 388 + }, + { + "epoch": 0.08969333640765506, + "grad_norm": 0.9558704899064524, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9970325231552124, + "step": 389 + }, + { + "epoch": 0.08992391053723772, + "grad_norm": 0.8304308575964876, + "learning_rate": 1.792626728110599e-06, + "loss": 1.1427147388458252, + "step": 390 + }, + { + "epoch": 0.09015448466682038, + "grad_norm": 0.8319398781501527, + "learning_rate": 1.7972350230414746e-06, + "loss": 0.8830767273902893, + "step": 391 + }, + { + "epoch": 0.09038505879640304, + "grad_norm": 0.8983385232838542, + "learning_rate": 1.8018433179723502e-06, + "loss": 1.0469788312911987, + "step": 392 + }, + { + "epoch": 0.0906156329259857, + "grad_norm": 1.0033385350969977, + "learning_rate": 1.8064516129032258e-06, + "loss": 1.022156834602356, + "step": 393 + }, + { + "epoch": 0.09084620705556837, + "grad_norm": 0.8626168210196775, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.0723674297332764, + "step": 394 + }, + { + "epoch": 0.09107678118515103, + "grad_norm": 0.8060308252194399, + "learning_rate": 1.8156682027649767e-06, + "loss": 0.9089772701263428, + "step": 395 + }, + { + "epoch": 0.09130735531473369, + "grad_norm": 0.8875270675183294, + "learning_rate": 1.8202764976958525e-06, + "loss": 1.1029877662658691, + "step": 396 + }, + { + "epoch": 0.09153792944431635, + "grad_norm": 0.94113090982248, + "learning_rate": 1.8248847926267281e-06, + "loss": 0.998812198638916, + "step": 397 + }, + { + "epoch": 0.09176850357389901, + "grad_norm": 1.0016962443263888, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.116652250289917, + "step": 398 + }, + { + "epoch": 0.09199907770348167, + "grad_norm": 0.8575568562545252, + "learning_rate": 1.8341013824884793e-06, + "loss": 1.0071923732757568, + "step": 399 + }, + { + "epoch": 0.09222965183306434, + "grad_norm": 0.9758059413772218, + "learning_rate": 1.8387096774193547e-06, + "loss": 1.0713586807250977, + "step": 400 + }, + { + "epoch": 0.092460225962647, + "grad_norm": 0.8883854169226675, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.0897400379180908, + "step": 401 + }, + { + "epoch": 0.09269080009222966, + "grad_norm": 0.9342253113098401, + "learning_rate": 1.8479262672811061e-06, + "loss": 0.9571444392204285, + "step": 402 + }, + { + "epoch": 0.09292137422181232, + "grad_norm": 0.9173411430110425, + "learning_rate": 1.8525345622119815e-06, + "loss": 0.9822309017181396, + "step": 403 + }, + { + "epoch": 0.09315194835139497, + "grad_norm": 0.8821702665182305, + "learning_rate": 1.857142857142857e-06, + "loss": 1.0010900497436523, + "step": 404 + }, + { + "epoch": 0.09338252248097763, + "grad_norm": 0.8417761058687274, + "learning_rate": 1.8617511520737325e-06, + "loss": 0.8548961877822876, + "step": 405 + }, + { + "epoch": 0.09361309661056029, + "grad_norm": 0.9390158571311362, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.0856781005859375, + "step": 406 + }, + { + "epoch": 0.09384367074014295, + "grad_norm": 0.9100547740927183, + "learning_rate": 1.8709677419354837e-06, + "loss": 1.0913856029510498, + "step": 407 + }, + { + "epoch": 0.09407424486972561, + "grad_norm": 1.0379606890495185, + "learning_rate": 1.8755760368663593e-06, + "loss": 0.9409916400909424, + "step": 408 + }, + { + "epoch": 0.09430481899930827, + "grad_norm": 0.9523962354053698, + "learning_rate": 1.880184331797235e-06, + "loss": 0.9950551390647888, + "step": 409 + }, + { + "epoch": 0.09453539312889093, + "grad_norm": 0.861704297563458, + "learning_rate": 1.8847926267281104e-06, + "loss": 0.9915211200714111, + "step": 410 + }, + { + "epoch": 0.0947659672584736, + "grad_norm": 0.9290893256356082, + "learning_rate": 1.889400921658986e-06, + "loss": 1.0381574630737305, + "step": 411 + }, + { + "epoch": 0.09499654138805626, + "grad_norm": 0.9228539253940193, + "learning_rate": 1.8940092165898616e-06, + "loss": 0.8911284804344177, + "step": 412 + }, + { + "epoch": 0.09522711551763892, + "grad_norm": 0.9426577567548815, + "learning_rate": 1.8986175115207372e-06, + "loss": 0.8757172226905823, + "step": 413 + }, + { + "epoch": 0.09545768964722158, + "grad_norm": 0.7971911677154941, + "learning_rate": 1.9032258064516128e-06, + "loss": 0.8362075090408325, + "step": 414 + }, + { + "epoch": 0.09568826377680424, + "grad_norm": 0.9051810749284879, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.906524658203125, + "step": 415 + }, + { + "epoch": 0.0959188379063869, + "grad_norm": 0.9304511138009018, + "learning_rate": 1.912442396313364e-06, + "loss": 1.100447654724121, + "step": 416 + }, + { + "epoch": 0.09614941203596956, + "grad_norm": 0.8321943001479206, + "learning_rate": 1.9170506912442396e-06, + "loss": 0.9658455848693848, + "step": 417 + }, + { + "epoch": 0.09637998616555223, + "grad_norm": 0.9393736008547379, + "learning_rate": 1.921658986175115e-06, + "loss": 0.971304714679718, + "step": 418 + }, + { + "epoch": 0.09661056029513489, + "grad_norm": 0.8792304256570437, + "learning_rate": 1.926267281105991e-06, + "loss": 0.916153073310852, + "step": 419 + }, + { + "epoch": 0.09684113442471755, + "grad_norm": 0.960700719296913, + "learning_rate": 1.930875576036866e-06, + "loss": 0.9166572093963623, + "step": 420 + }, + { + "epoch": 0.09707170855430021, + "grad_norm": 0.8385154496673872, + "learning_rate": 1.935483870967742e-06, + "loss": 0.8754867315292358, + "step": 421 + }, + { + "epoch": 0.09730228268388287, + "grad_norm": 0.8951117289542856, + "learning_rate": 1.9400921658986174e-06, + "loss": 0.9507668018341064, + "step": 422 + }, + { + "epoch": 0.09753285681346553, + "grad_norm": 1.0251554467069826, + "learning_rate": 1.944700460829493e-06, + "loss": 0.8977904319763184, + "step": 423 + }, + { + "epoch": 0.0977634309430482, + "grad_norm": 0.8433365129133346, + "learning_rate": 1.9493087557603686e-06, + "loss": 0.8359580039978027, + "step": 424 + }, + { + "epoch": 0.09799400507263085, + "grad_norm": 0.8653781711190967, + "learning_rate": 1.953917050691244e-06, + "loss": 0.8928875923156738, + "step": 425 + }, + { + "epoch": 0.09822457920221352, + "grad_norm": 1.016156538051323, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.9031360149383545, + "step": 426 + }, + { + "epoch": 0.09845515333179618, + "grad_norm": 0.9535004151409068, + "learning_rate": 1.963133640552995e-06, + "loss": 0.9135938286781311, + "step": 427 + }, + { + "epoch": 0.09868572746137884, + "grad_norm": 0.9913179989235431, + "learning_rate": 1.967741935483871e-06, + "loss": 0.8978056907653809, + "step": 428 + }, + { + "epoch": 0.0989163015909615, + "grad_norm": 0.7393338474601954, + "learning_rate": 1.9723502304147463e-06, + "loss": 0.8236517906188965, + "step": 429 + }, + { + "epoch": 0.09914687572054416, + "grad_norm": 0.9578937542491764, + "learning_rate": 1.976958525345622e-06, + "loss": 0.8279497027397156, + "step": 430 + }, + { + "epoch": 0.09937744985012681, + "grad_norm": 0.8687224271614162, + "learning_rate": 1.9815668202764975e-06, + "loss": 0.9273175001144409, + "step": 431 + }, + { + "epoch": 0.09960802397970947, + "grad_norm": 0.9008857811722423, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.8990100622177124, + "step": 432 + }, + { + "epoch": 0.09983859810929213, + "grad_norm": 0.9051637314581525, + "learning_rate": 1.9907834101382487e-06, + "loss": 0.9221487045288086, + "step": 433 + }, + { + "epoch": 0.10006917223887479, + "grad_norm": 0.8468556051112544, + "learning_rate": 1.995391705069124e-06, + "loss": 0.7376757264137268, + "step": 434 + }, + { + "epoch": 0.10029974636845745, + "grad_norm": 0.8651656722450953, + "learning_rate": 2e-06, + "loss": 0.8496265411376953, + "step": 435 + }, + { + "epoch": 0.10053032049804012, + "grad_norm": 0.8177327534577133, + "learning_rate": 1.9999999273199326e-06, + "loss": 0.73260897397995, + "step": 436 + }, + { + "epoch": 0.10076089462762278, + "grad_norm": 1.2545811776233549, + "learning_rate": 1.999999709279741e-06, + "loss": 0.9583776593208313, + "step": 437 + }, + { + "epoch": 0.10099146875720544, + "grad_norm": 0.7771019547302918, + "learning_rate": 1.9999993458794573e-06, + "loss": 0.810507595539093, + "step": 438 + }, + { + "epoch": 0.1012220428867881, + "grad_norm": 0.8756547566965167, + "learning_rate": 1.9999988371191337e-06, + "loss": 0.7957329750061035, + "step": 439 + }, + { + "epoch": 0.10145261701637076, + "grad_norm": 0.8325539024899065, + "learning_rate": 1.9999981829988444e-06, + "loss": 0.8141027688980103, + "step": 440 + }, + { + "epoch": 0.10168319114595342, + "grad_norm": 0.9256731752358246, + "learning_rate": 1.9999973835186847e-06, + "loss": 0.8454669117927551, + "step": 441 + }, + { + "epoch": 0.10191376527553608, + "grad_norm": 0.9086105801784582, + "learning_rate": 1.9999964386787706e-06, + "loss": 0.7966687679290771, + "step": 442 + }, + { + "epoch": 0.10214433940511874, + "grad_norm": 0.8420803725442093, + "learning_rate": 1.9999953484792394e-06, + "loss": 0.8623852133750916, + "step": 443 + }, + { + "epoch": 0.1023749135347014, + "grad_norm": 0.976279238987049, + "learning_rate": 1.9999941129202494e-06, + "loss": 0.9604165554046631, + "step": 444 + }, + { + "epoch": 0.10260548766428407, + "grad_norm": 0.8427059790049124, + "learning_rate": 1.999992732001981e-06, + "loss": 0.7461415529251099, + "step": 445 + }, + { + "epoch": 0.10283606179386673, + "grad_norm": 0.8066869506045082, + "learning_rate": 1.9999912057246342e-06, + "loss": 0.7243722677230835, + "step": 446 + }, + { + "epoch": 0.10306663592344939, + "grad_norm": 0.8507773615519725, + "learning_rate": 1.999989534088431e-06, + "loss": 0.8466402292251587, + "step": 447 + }, + { + "epoch": 0.10329721005303205, + "grad_norm": 0.9504023717644374, + "learning_rate": 1.9999877170936142e-06, + "loss": 0.8062578439712524, + "step": 448 + }, + { + "epoch": 0.10352778418261471, + "grad_norm": 0.8134117517887439, + "learning_rate": 1.9999857547404484e-06, + "loss": 0.8979625701904297, + "step": 449 + }, + { + "epoch": 0.10375835831219737, + "grad_norm": 0.7889840834274454, + "learning_rate": 1.999983647029219e-06, + "loss": 0.7970046401023865, + "step": 450 + }, + { + "epoch": 0.10398893244178004, + "grad_norm": 0.8933195109789729, + "learning_rate": 1.999981393960231e-06, + "loss": 0.9027936458587646, + "step": 451 + }, + { + "epoch": 0.1042195065713627, + "grad_norm": 0.9428128689196352, + "learning_rate": 1.9999789955338133e-06, + "loss": 0.8347916007041931, + "step": 452 + }, + { + "epoch": 0.10445008070094536, + "grad_norm": 0.7636783217821816, + "learning_rate": 1.9999764517503146e-06, + "loss": 0.7856979370117188, + "step": 453 + }, + { + "epoch": 0.10468065483052802, + "grad_norm": 0.8588750023960529, + "learning_rate": 1.9999737626101037e-06, + "loss": 0.8370383381843567, + "step": 454 + }, + { + "epoch": 0.10491122896011068, + "grad_norm": 0.7607065236764231, + "learning_rate": 1.9999709281135718e-06, + "loss": 0.8629742860794067, + "step": 455 + }, + { + "epoch": 0.10514180308969334, + "grad_norm": 0.7031266959727278, + "learning_rate": 1.9999679482611315e-06, + "loss": 0.8187414407730103, + "step": 456 + }, + { + "epoch": 0.105372377219276, + "grad_norm": 0.7996485745988237, + "learning_rate": 1.9999648230532156e-06, + "loss": 0.8169279098510742, + "step": 457 + }, + { + "epoch": 0.10560295134885866, + "grad_norm": 0.7291726430068795, + "learning_rate": 1.999961552490278e-06, + "loss": 0.7186012268066406, + "step": 458 + }, + { + "epoch": 0.10583352547844131, + "grad_norm": 0.8814433348597316, + "learning_rate": 1.9999581365727947e-06, + "loss": 0.8088201284408569, + "step": 459 + }, + { + "epoch": 0.10606409960802397, + "grad_norm": 0.8945815471698739, + "learning_rate": 1.999954575301262e-06, + "loss": 0.7067796587944031, + "step": 460 + }, + { + "epoch": 0.10629467373760663, + "grad_norm": 0.8727386643724712, + "learning_rate": 1.9999508686761974e-06, + "loss": 0.8839461803436279, + "step": 461 + }, + { + "epoch": 0.1065252478671893, + "grad_norm": 0.7752145606049893, + "learning_rate": 1.99994701669814e-06, + "loss": 0.750046968460083, + "step": 462 + }, + { + "epoch": 0.10675582199677196, + "grad_norm": 0.8246620057663118, + "learning_rate": 1.999943019367649e-06, + "loss": 0.7954964637756348, + "step": 463 + }, + { + "epoch": 0.10698639612635462, + "grad_norm": 0.8139454190246876, + "learning_rate": 1.9999388766853065e-06, + "loss": 0.7178900241851807, + "step": 464 + }, + { + "epoch": 0.10721697025593728, + "grad_norm": 0.7775108685144316, + "learning_rate": 1.999934588651714e-06, + "loss": 0.7583869695663452, + "step": 465 + }, + { + "epoch": 0.10744754438551994, + "grad_norm": 0.7294165374555056, + "learning_rate": 1.999930155267495e-06, + "loss": 0.8068876266479492, + "step": 466 + }, + { + "epoch": 0.1076781185151026, + "grad_norm": 0.7396884936816651, + "learning_rate": 1.9999255765332946e-06, + "loss": 0.7507776021957397, + "step": 467 + }, + { + "epoch": 0.10790869264468526, + "grad_norm": 0.7418847797451098, + "learning_rate": 1.999920852449777e-06, + "loss": 0.7719494104385376, + "step": 468 + }, + { + "epoch": 0.10813926677426793, + "grad_norm": 0.7666886626519035, + "learning_rate": 1.99991598301763e-06, + "loss": 0.7420990467071533, + "step": 469 + }, + { + "epoch": 0.10836984090385059, + "grad_norm": 0.7701810012003275, + "learning_rate": 1.9999109682375606e-06, + "loss": 0.7152374386787415, + "step": 470 + }, + { + "epoch": 0.10860041503343325, + "grad_norm": 0.6850973266115482, + "learning_rate": 1.9999058081102985e-06, + "loss": 0.7971220016479492, + "step": 471 + }, + { + "epoch": 0.10883098916301591, + "grad_norm": 0.7306176016482578, + "learning_rate": 1.9999005026365936e-06, + "loss": 0.774874746799469, + "step": 472 + }, + { + "epoch": 0.10906156329259857, + "grad_norm": 0.8957955356096076, + "learning_rate": 1.999895051817216e-06, + "loss": 0.7567731142044067, + "step": 473 + }, + { + "epoch": 0.10929213742218123, + "grad_norm": 0.9679087986333686, + "learning_rate": 1.99988945565296e-06, + "loss": 0.7221060991287231, + "step": 474 + }, + { + "epoch": 0.1095227115517639, + "grad_norm": 0.7758710632294333, + "learning_rate": 1.9998837141446378e-06, + "loss": 0.8064852952957153, + "step": 475 + }, + { + "epoch": 0.10975328568134655, + "grad_norm": 0.7342367942239104, + "learning_rate": 1.9998778272930842e-06, + "loss": 0.7329462766647339, + "step": 476 + }, + { + "epoch": 0.10998385981092922, + "grad_norm": 0.6944047501493505, + "learning_rate": 1.999871795099155e-06, + "loss": 0.715752363204956, + "step": 477 + }, + { + "epoch": 0.11021443394051188, + "grad_norm": 1.250464562888065, + "learning_rate": 1.9998656175637265e-06, + "loss": 0.8702882528305054, + "step": 478 + }, + { + "epoch": 0.11044500807009454, + "grad_norm": 0.9132853105204283, + "learning_rate": 1.9998592946876976e-06, + "loss": 0.8559622764587402, + "step": 479 + }, + { + "epoch": 0.1106755821996772, + "grad_norm": 1.0302853941011325, + "learning_rate": 1.999852826471987e-06, + "loss": 0.910442590713501, + "step": 480 + }, + { + "epoch": 0.11090615632925986, + "grad_norm": 0.7658983046756905, + "learning_rate": 1.9998462129175347e-06, + "loss": 0.8159372806549072, + "step": 481 + }, + { + "epoch": 0.11113673045884252, + "grad_norm": 0.6814545269174561, + "learning_rate": 1.9998394540253022e-06, + "loss": 0.8120635747909546, + "step": 482 + }, + { + "epoch": 0.11136730458842518, + "grad_norm": 0.9382461503301303, + "learning_rate": 1.999832549796272e-06, + "loss": 0.7867682576179504, + "step": 483 + }, + { + "epoch": 0.11159787871800785, + "grad_norm": 0.7285854274509946, + "learning_rate": 1.999825500231448e-06, + "loss": 0.695517897605896, + "step": 484 + }, + { + "epoch": 0.1118284528475905, + "grad_norm": 0.7426222297635688, + "learning_rate": 1.999818305331854e-06, + "loss": 0.8402971029281616, + "step": 485 + }, + { + "epoch": 0.11205902697717317, + "grad_norm": 0.9496598665654408, + "learning_rate": 1.9998109650985372e-06, + "loss": 0.7987074851989746, + "step": 486 + }, + { + "epoch": 0.11228960110675582, + "grad_norm": 0.7601824170608918, + "learning_rate": 1.9998034795325634e-06, + "loss": 0.6525362133979797, + "step": 487 + }, + { + "epoch": 0.11252017523633848, + "grad_norm": 0.6649425764525309, + "learning_rate": 1.999795848635021e-06, + "loss": 0.6218863725662231, + "step": 488 + }, + { + "epoch": 0.11275074936592114, + "grad_norm": 0.6793237780262881, + "learning_rate": 1.99978807240702e-06, + "loss": 0.7225729823112488, + "step": 489 + }, + { + "epoch": 0.1129813234955038, + "grad_norm": 0.7289774462660574, + "learning_rate": 1.9997801508496893e-06, + "loss": 0.7553551197052002, + "step": 490 + }, + { + "epoch": 0.11321189762508646, + "grad_norm": 0.7070554840091658, + "learning_rate": 1.999772083964182e-06, + "loss": 0.6695772409439087, + "step": 491 + }, + { + "epoch": 0.11344247175466912, + "grad_norm": 0.7937000317220514, + "learning_rate": 1.999763871751669e-06, + "loss": 0.7683162689208984, + "step": 492 + }, + { + "epoch": 0.11367304588425178, + "grad_norm": 0.7958897510308529, + "learning_rate": 1.9997555142133457e-06, + "loss": 0.7761441469192505, + "step": 493 + }, + { + "epoch": 0.11390362001383444, + "grad_norm": 0.8391915745578431, + "learning_rate": 1.999747011350426e-06, + "loss": 0.7204692959785461, + "step": 494 + }, + { + "epoch": 0.1141341941434171, + "grad_norm": 0.6535908344557003, + "learning_rate": 1.999738363164146e-06, + "loss": 0.6960519552230835, + "step": 495 + }, + { + "epoch": 0.11436476827299977, + "grad_norm": 0.669834933810116, + "learning_rate": 1.999729569655763e-06, + "loss": 0.7502788305282593, + "step": 496 + }, + { + "epoch": 0.11459534240258243, + "grad_norm": 0.7119093873273127, + "learning_rate": 1.999720630826555e-06, + "loss": 0.7649067640304565, + "step": 497 + }, + { + "epoch": 0.11482591653216509, + "grad_norm": 0.865452520980124, + "learning_rate": 1.9997115466778214e-06, + "loss": 0.6867918968200684, + "step": 498 + }, + { + "epoch": 0.11505649066174775, + "grad_norm": 0.7725462530919065, + "learning_rate": 1.9997023172108828e-06, + "loss": 0.7324330806732178, + "step": 499 + }, + { + "epoch": 0.11528706479133041, + "grad_norm": 0.7493898462804314, + "learning_rate": 1.999692942427081e-06, + "loss": 0.7452527284622192, + "step": 500 + }, + { + "epoch": 0.11551763892091307, + "grad_norm": 0.8849003751162662, + "learning_rate": 1.9996834223277775e-06, + "loss": 0.8311381340026855, + "step": 501 + }, + { + "epoch": 0.11574821305049574, + "grad_norm": 0.7698737492516583, + "learning_rate": 1.999673756914358e-06, + "loss": 0.6955340504646301, + "step": 502 + }, + { + "epoch": 0.1159787871800784, + "grad_norm": 0.9035827861690212, + "learning_rate": 1.999663946188226e-06, + "loss": 0.802892804145813, + "step": 503 + }, + { + "epoch": 0.11620936130966106, + "grad_norm": 0.9827928009523055, + "learning_rate": 1.9996539901508086e-06, + "loss": 0.8307123184204102, + "step": 504 + }, + { + "epoch": 0.11643993543924372, + "grad_norm": 0.7167523084062808, + "learning_rate": 1.9996438888035525e-06, + "loss": 0.7604272365570068, + "step": 505 + }, + { + "epoch": 0.11667050956882638, + "grad_norm": 0.7887244154559485, + "learning_rate": 1.9996336421479256e-06, + "loss": 0.798006534576416, + "step": 506 + }, + { + "epoch": 0.11690108369840904, + "grad_norm": 0.9102232519285063, + "learning_rate": 1.999623250185418e-06, + "loss": 0.7342728972434998, + "step": 507 + }, + { + "epoch": 0.1171316578279917, + "grad_norm": 0.689331248687117, + "learning_rate": 1.9996127129175402e-06, + "loss": 0.7659468650817871, + "step": 508 + }, + { + "epoch": 0.11736223195757436, + "grad_norm": 0.9057052272338976, + "learning_rate": 1.999602030345824e-06, + "loss": 0.6467913389205933, + "step": 509 + }, + { + "epoch": 0.11759280608715703, + "grad_norm": 0.9026632882900626, + "learning_rate": 1.9995912024718214e-06, + "loss": 0.8207371234893799, + "step": 510 + }, + { + "epoch": 0.11782338021673969, + "grad_norm": 0.6427345565408408, + "learning_rate": 1.999580229297108e-06, + "loss": 0.6865919232368469, + "step": 511 + }, + { + "epoch": 0.11805395434632235, + "grad_norm": 0.9123825063372557, + "learning_rate": 1.999569110823277e-06, + "loss": 0.7367759346961975, + "step": 512 + }, + { + "epoch": 0.11828452847590501, + "grad_norm": 0.7732312467631449, + "learning_rate": 1.9995578470519455e-06, + "loss": 0.678460955619812, + "step": 513 + }, + { + "epoch": 0.11851510260548767, + "grad_norm": 0.9273893139854266, + "learning_rate": 1.999546437984751e-06, + "loss": 0.7442954182624817, + "step": 514 + }, + { + "epoch": 0.11874567673507032, + "grad_norm": 0.7064385006159516, + "learning_rate": 1.9995348836233515e-06, + "loss": 0.6881241798400879, + "step": 515 + }, + { + "epoch": 0.11897625086465298, + "grad_norm": 0.7494917485319132, + "learning_rate": 1.9995231839694267e-06, + "loss": 0.6957181692123413, + "step": 516 + }, + { + "epoch": 0.11920682499423564, + "grad_norm": 1.0228956088069594, + "learning_rate": 1.9995113390246773e-06, + "loss": 0.655665934085846, + "step": 517 + }, + { + "epoch": 0.1194373991238183, + "grad_norm": 0.8789756041062182, + "learning_rate": 1.9994993487908245e-06, + "loss": 0.8156173229217529, + "step": 518 + }, + { + "epoch": 0.11966797325340096, + "grad_norm": 0.8973364358315123, + "learning_rate": 1.9994872132696125e-06, + "loss": 0.7063135504722595, + "step": 519 + }, + { + "epoch": 0.11989854738298363, + "grad_norm": 0.91785396837973, + "learning_rate": 1.9994749324628046e-06, + "loss": 0.694409966468811, + "step": 520 + }, + { + "epoch": 0.12012912151256629, + "grad_norm": 0.7331348179727938, + "learning_rate": 1.9994625063721852e-06, + "loss": 0.8167020082473755, + "step": 521 + }, + { + "epoch": 0.12035969564214895, + "grad_norm": 0.9326590546614593, + "learning_rate": 1.9994499349995615e-06, + "loss": 0.7214051485061646, + "step": 522 + }, + { + "epoch": 0.12059026977173161, + "grad_norm": 0.8993621490561152, + "learning_rate": 1.999437218346761e-06, + "loss": 0.8798317909240723, + "step": 523 + }, + { + "epoch": 0.12082084390131427, + "grad_norm": 0.6552492075288662, + "learning_rate": 1.9994243564156316e-06, + "loss": 0.684230387210846, + "step": 524 + }, + { + "epoch": 0.12105141803089693, + "grad_norm": 0.9112132053465716, + "learning_rate": 1.999411349208043e-06, + "loss": 0.7519755363464355, + "step": 525 + }, + { + "epoch": 0.1212819921604796, + "grad_norm": 0.8052315425352758, + "learning_rate": 1.9993981967258857e-06, + "loss": 0.8420398235321045, + "step": 526 + }, + { + "epoch": 0.12151256629006225, + "grad_norm": 0.7105743668928439, + "learning_rate": 1.999384898971073e-06, + "loss": 0.8349270820617676, + "step": 527 + }, + { + "epoch": 0.12174314041964492, + "grad_norm": 1.0983006521395142, + "learning_rate": 1.999371455945536e-06, + "loss": 0.794980525970459, + "step": 528 + }, + { + "epoch": 0.12197371454922758, + "grad_norm": 1.1816598770476783, + "learning_rate": 1.9993578676512294e-06, + "loss": 0.666529655456543, + "step": 529 + }, + { + "epoch": 0.12220428867881024, + "grad_norm": 0.7564948773505585, + "learning_rate": 1.999344134090129e-06, + "loss": 0.7356991767883301, + "step": 530 + }, + { + "epoch": 0.1224348628083929, + "grad_norm": 0.8210277180950322, + "learning_rate": 1.9993302552642305e-06, + "loss": 0.6289858818054199, + "step": 531 + }, + { + "epoch": 0.12266543693797556, + "grad_norm": 0.7570779839057131, + "learning_rate": 1.9993162311755516e-06, + "loss": 0.706937313079834, + "step": 532 + }, + { + "epoch": 0.12289601106755822, + "grad_norm": 0.8676215771749471, + "learning_rate": 1.99930206182613e-06, + "loss": 0.7265158891677856, + "step": 533 + }, + { + "epoch": 0.12312658519714088, + "grad_norm": 0.7802472371537522, + "learning_rate": 1.999287747218027e-06, + "loss": 0.6575910449028015, + "step": 534 + }, + { + "epoch": 0.12335715932672355, + "grad_norm": 0.6298254280489823, + "learning_rate": 1.999273287353322e-06, + "loss": 0.6696841716766357, + "step": 535 + }, + { + "epoch": 0.1235877334563062, + "grad_norm": 1.071079002554872, + "learning_rate": 1.9992586822341177e-06, + "loss": 0.7749101519584656, + "step": 536 + }, + { + "epoch": 0.12381830758588887, + "grad_norm": 0.9432884782892066, + "learning_rate": 1.9992439318625367e-06, + "loss": 0.6880518198013306, + "step": 537 + }, + { + "epoch": 0.12404888171547153, + "grad_norm": 0.7827285978985046, + "learning_rate": 1.999229036240723e-06, + "loss": 0.6871178150177002, + "step": 538 + }, + { + "epoch": 0.12427945584505419, + "grad_norm": 0.7976778538474537, + "learning_rate": 1.999213995370842e-06, + "loss": 0.5867285132408142, + "step": 539 + }, + { + "epoch": 0.12451002997463685, + "grad_norm": 0.9357527236724963, + "learning_rate": 1.99919880925508e-06, + "loss": 0.8276966214179993, + "step": 540 + }, + { + "epoch": 0.12474060410421951, + "grad_norm": 1.0175450529032033, + "learning_rate": 1.9991834778956445e-06, + "loss": 0.7710754871368408, + "step": 541 + }, + { + "epoch": 0.12497117823380217, + "grad_norm": 0.9390745817535735, + "learning_rate": 1.9991680012947642e-06, + "loss": 0.7753217816352844, + "step": 542 + }, + { + "epoch": 0.12520175236338482, + "grad_norm": 0.8094522929040034, + "learning_rate": 1.9991523794546886e-06, + "loss": 0.7906090617179871, + "step": 543 + }, + { + "epoch": 0.12543232649296748, + "grad_norm": 0.9340000664605023, + "learning_rate": 1.9991366123776885e-06, + "loss": 0.7199760675430298, + "step": 544 + }, + { + "epoch": 0.12566290062255014, + "grad_norm": 0.7023452308433018, + "learning_rate": 1.9991207000660556e-06, + "loss": 0.671667218208313, + "step": 545 + }, + { + "epoch": 0.1258934747521328, + "grad_norm": 0.8347026711317173, + "learning_rate": 1.9991046425221036e-06, + "loss": 0.7289182543754578, + "step": 546 + }, + { + "epoch": 0.12612404888171547, + "grad_norm": 0.7827652568460417, + "learning_rate": 1.999088439748166e-06, + "loss": 0.6894270181655884, + "step": 547 + }, + { + "epoch": 0.12635462301129813, + "grad_norm": 0.7280796152072353, + "learning_rate": 1.9990720917465983e-06, + "loss": 0.5861620306968689, + "step": 548 + }, + { + "epoch": 0.1265851971408808, + "grad_norm": 0.9057106564897087, + "learning_rate": 1.999055598519777e-06, + "loss": 0.7082245349884033, + "step": 549 + }, + { + "epoch": 0.12681577127046345, + "grad_norm": 0.9647506404446157, + "learning_rate": 1.999038960070099e-06, + "loss": 0.6746149659156799, + "step": 550 + }, + { + "epoch": 0.1270463454000461, + "grad_norm": 0.8620899067636014, + "learning_rate": 1.999022176399983e-06, + "loss": 0.7791188955307007, + "step": 551 + }, + { + "epoch": 0.12727691952962877, + "grad_norm": 0.7157725370776972, + "learning_rate": 1.999005247511869e-06, + "loss": 0.6371017694473267, + "step": 552 + }, + { + "epoch": 0.12750749365921143, + "grad_norm": 1.0373263968991309, + "learning_rate": 1.9989881734082182e-06, + "loss": 0.7006558179855347, + "step": 553 + }, + { + "epoch": 0.1277380677887941, + "grad_norm": 1.0670128946400503, + "learning_rate": 1.9989709540915115e-06, + "loss": 0.7011476755142212, + "step": 554 + }, + { + "epoch": 0.12796864191837676, + "grad_norm": 0.7293348024241428, + "learning_rate": 1.998953589564252e-06, + "loss": 0.6518280506134033, + "step": 555 + }, + { + "epoch": 0.12819921604795942, + "grad_norm": 1.013490270581775, + "learning_rate": 1.9989360798289646e-06, + "loss": 0.703351616859436, + "step": 556 + }, + { + "epoch": 0.12842979017754208, + "grad_norm": 0.9007382613729068, + "learning_rate": 1.998918424888194e-06, + "loss": 0.7498817443847656, + "step": 557 + }, + { + "epoch": 0.12866036430712474, + "grad_norm": 0.7936147649672419, + "learning_rate": 1.998900624744507e-06, + "loss": 0.647042989730835, + "step": 558 + }, + { + "epoch": 0.1288909384367074, + "grad_norm": 1.058658035724676, + "learning_rate": 1.99888267940049e-06, + "loss": 0.7519131898880005, + "step": 559 + }, + { + "epoch": 0.12912151256629006, + "grad_norm": 0.9392201849899589, + "learning_rate": 1.9988645888587524e-06, + "loss": 0.8416757583618164, + "step": 560 + }, + { + "epoch": 0.12935208669587273, + "grad_norm": 0.7856467653874107, + "learning_rate": 1.9988463531219238e-06, + "loss": 0.7044156193733215, + "step": 561 + }, + { + "epoch": 0.1295826608254554, + "grad_norm": 0.7712707168267965, + "learning_rate": 1.9988279721926547e-06, + "loss": 0.5429179668426514, + "step": 562 + }, + { + "epoch": 0.12981323495503805, + "grad_norm": 0.8186921939471294, + "learning_rate": 1.9988094460736173e-06, + "loss": 0.6146735548973083, + "step": 563 + }, + { + "epoch": 0.1300438090846207, + "grad_norm": 0.8439852070799176, + "learning_rate": 1.9987907747675038e-06, + "loss": 0.7544587850570679, + "step": 564 + }, + { + "epoch": 0.13027438321420337, + "grad_norm": 0.9760725928946941, + "learning_rate": 1.998771958277029e-06, + "loss": 0.7344266772270203, + "step": 565 + }, + { + "epoch": 0.13050495734378603, + "grad_norm": 0.8485941936610121, + "learning_rate": 1.9987529966049276e-06, + "loss": 0.6952091455459595, + "step": 566 + }, + { + "epoch": 0.1307355314733687, + "grad_norm": 0.7996168239987546, + "learning_rate": 1.9987338897539563e-06, + "loss": 0.6164644956588745, + "step": 567 + }, + { + "epoch": 0.13096610560295135, + "grad_norm": 1.04815525718601, + "learning_rate": 1.998714637726892e-06, + "loss": 0.7554208636283875, + "step": 568 + }, + { + "epoch": 0.13119667973253402, + "grad_norm": 0.97358719596577, + "learning_rate": 1.9986952405265336e-06, + "loss": 0.6640980243682861, + "step": 569 + }, + { + "epoch": 0.13142725386211668, + "grad_norm": 0.8089360786109361, + "learning_rate": 1.9986756981557005e-06, + "loss": 0.6947968006134033, + "step": 570 + }, + { + "epoch": 0.13165782799169934, + "grad_norm": 0.8239726316605849, + "learning_rate": 1.9986560106172332e-06, + "loss": 0.5987592935562134, + "step": 571 + }, + { + "epoch": 0.131888402121282, + "grad_norm": 0.709030479654625, + "learning_rate": 1.9986361779139944e-06, + "loss": 0.5830701589584351, + "step": 572 + }, + { + "epoch": 0.13211897625086466, + "grad_norm": 1.1719328645727012, + "learning_rate": 1.9986162000488655e-06, + "loss": 0.6589827537536621, + "step": 573 + }, + { + "epoch": 0.13234955038044732, + "grad_norm": 0.795778409153881, + "learning_rate": 1.9985960770247514e-06, + "loss": 0.7761766910552979, + "step": 574 + }, + { + "epoch": 0.13258012451002998, + "grad_norm": 0.8403074018612, + "learning_rate": 1.998575808844577e-06, + "loss": 0.6817613244056702, + "step": 575 + }, + { + "epoch": 0.13281069863961265, + "grad_norm": 0.8817998372104671, + "learning_rate": 1.998555395511289e-06, + "loss": 0.553085207939148, + "step": 576 + }, + { + "epoch": 0.1330412727691953, + "grad_norm": 0.6885856342268037, + "learning_rate": 1.998534837027854e-06, + "loss": 0.6500711441040039, + "step": 577 + }, + { + "epoch": 0.13327184689877797, + "grad_norm": 1.046231764034874, + "learning_rate": 1.9985141333972605e-06, + "loss": 0.7818950414657593, + "step": 578 + }, + { + "epoch": 0.13350242102836063, + "grad_norm": 0.7987907466299384, + "learning_rate": 1.9984932846225178e-06, + "loss": 0.7030247449874878, + "step": 579 + }, + { + "epoch": 0.1337329951579433, + "grad_norm": 0.7031460051202854, + "learning_rate": 1.9984722907066572e-06, + "loss": 0.6336206197738647, + "step": 580 + }, + { + "epoch": 0.13396356928752595, + "grad_norm": 0.8178681347907562, + "learning_rate": 1.9984511516527295e-06, + "loss": 0.7483044862747192, + "step": 581 + }, + { + "epoch": 0.1341941434171086, + "grad_norm": 0.8070808524670383, + "learning_rate": 1.9984298674638084e-06, + "loss": 0.7124725580215454, + "step": 582 + }, + { + "epoch": 0.13442471754669127, + "grad_norm": 0.8209937510618921, + "learning_rate": 1.998408438142987e-06, + "loss": 0.623436450958252, + "step": 583 + }, + { + "epoch": 0.1346552916762739, + "grad_norm": 0.8592886051949084, + "learning_rate": 1.9983868636933804e-06, + "loss": 0.646303653717041, + "step": 584 + }, + { + "epoch": 0.13488586580585657, + "grad_norm": 0.715391883952278, + "learning_rate": 1.998365144118125e-06, + "loss": 0.6349619626998901, + "step": 585 + }, + { + "epoch": 0.13511643993543923, + "grad_norm": 0.842094849315078, + "learning_rate": 1.9983432794203778e-06, + "loss": 0.5222466588020325, + "step": 586 + }, + { + "epoch": 0.1353470140650219, + "grad_norm": 0.7893129778630776, + "learning_rate": 1.998321269603317e-06, + "loss": 0.7210453152656555, + "step": 587 + }, + { + "epoch": 0.13557758819460455, + "grad_norm": 0.8260995902689467, + "learning_rate": 1.998299114670142e-06, + "loss": 0.6829872131347656, + "step": 588 + }, + { + "epoch": 0.13580816232418721, + "grad_norm": 0.714861095640182, + "learning_rate": 1.998276814624073e-06, + "loss": 0.6493744254112244, + "step": 589 + }, + { + "epoch": 0.13603873645376988, + "grad_norm": 0.8350239344719634, + "learning_rate": 1.998254369468352e-06, + "loss": 0.6885819435119629, + "step": 590 + }, + { + "epoch": 0.13626931058335254, + "grad_norm": 0.7070632175859811, + "learning_rate": 1.9982317792062415e-06, + "loss": 0.6393503546714783, + "step": 591 + }, + { + "epoch": 0.1364998847129352, + "grad_norm": 1.010551624947432, + "learning_rate": 1.998209043841025e-06, + "loss": 0.7243417501449585, + "step": 592 + }, + { + "epoch": 0.13673045884251786, + "grad_norm": 0.693273868923859, + "learning_rate": 1.9981861633760073e-06, + "loss": 0.5955190658569336, + "step": 593 + }, + { + "epoch": 0.13696103297210052, + "grad_norm": 0.89841301134605, + "learning_rate": 1.9981631378145147e-06, + "loss": 0.6907675862312317, + "step": 594 + }, + { + "epoch": 0.13719160710168318, + "grad_norm": 1.022542216960162, + "learning_rate": 1.9981399671598938e-06, + "loss": 0.8540418148040771, + "step": 595 + }, + { + "epoch": 0.13742218123126584, + "grad_norm": 0.850573072747265, + "learning_rate": 1.9981166514155128e-06, + "loss": 0.6558555364608765, + "step": 596 + }, + { + "epoch": 0.1376527553608485, + "grad_norm": 0.9448807343375427, + "learning_rate": 1.9980931905847607e-06, + "loss": 0.6902164220809937, + "step": 597 + }, + { + "epoch": 0.13788332949043117, + "grad_norm": 1.240663469028779, + "learning_rate": 1.9980695846710485e-06, + "loss": 0.7090387344360352, + "step": 598 + }, + { + "epoch": 0.13811390362001383, + "grad_norm": 0.8847772852436644, + "learning_rate": 1.9980458336778067e-06, + "loss": 0.5913621187210083, + "step": 599 + }, + { + "epoch": 0.1383444777495965, + "grad_norm": 0.864647475805302, + "learning_rate": 1.998021937608488e-06, + "loss": 0.6742709279060364, + "step": 600 + }, + { + "epoch": 0.13857505187917915, + "grad_norm": 0.9253166862332501, + "learning_rate": 1.997997896466566e-06, + "loss": 0.7156273126602173, + "step": 601 + }, + { + "epoch": 0.1388056260087618, + "grad_norm": 0.7104566809406643, + "learning_rate": 1.9979737102555358e-06, + "loss": 0.6039655208587646, + "step": 602 + }, + { + "epoch": 0.13903620013834447, + "grad_norm": 0.7521323143425293, + "learning_rate": 1.9979493789789123e-06, + "loss": 0.6437175273895264, + "step": 603 + }, + { + "epoch": 0.13926677426792713, + "grad_norm": 0.7922747435817725, + "learning_rate": 1.9979249026402327e-06, + "loss": 0.6037663221359253, + "step": 604 + }, + { + "epoch": 0.1394973483975098, + "grad_norm": 0.8526913554693543, + "learning_rate": 1.9979002812430544e-06, + "loss": 0.6014829874038696, + "step": 605 + }, + { + "epoch": 0.13972792252709246, + "grad_norm": 0.9960319429386536, + "learning_rate": 1.9978755147909575e-06, + "loss": 0.5644428133964539, + "step": 606 + }, + { + "epoch": 0.13995849665667512, + "grad_norm": 0.7146930597248379, + "learning_rate": 1.997850603287541e-06, + "loss": 0.5483256578445435, + "step": 607 + }, + { + "epoch": 0.14018907078625778, + "grad_norm": 0.941628560636658, + "learning_rate": 1.9978255467364264e-06, + "loss": 0.6323236227035522, + "step": 608 + }, + { + "epoch": 0.14041964491584044, + "grad_norm": 0.8661204864695959, + "learning_rate": 1.9978003451412563e-06, + "loss": 0.677186131477356, + "step": 609 + }, + { + "epoch": 0.1406502190454231, + "grad_norm": 0.7467694215725664, + "learning_rate": 1.9977749985056934e-06, + "loss": 0.6768285036087036, + "step": 610 + }, + { + "epoch": 0.14088079317500576, + "grad_norm": 0.6978429335446755, + "learning_rate": 1.997749506833422e-06, + "loss": 0.5347047448158264, + "step": 611 + }, + { + "epoch": 0.14111136730458843, + "grad_norm": 0.8856138167235749, + "learning_rate": 1.9977238701281484e-06, + "loss": 0.7459336519241333, + "step": 612 + }, + { + "epoch": 0.1413419414341711, + "grad_norm": 0.7081494897690513, + "learning_rate": 1.9976980883935982e-06, + "loss": 0.6617337465286255, + "step": 613 + }, + { + "epoch": 0.14157251556375375, + "grad_norm": 0.766248846701343, + "learning_rate": 1.9976721616335197e-06, + "loss": 0.6214765310287476, + "step": 614 + }, + { + "epoch": 0.1418030896933364, + "grad_norm": 0.9664061776833217, + "learning_rate": 1.9976460898516814e-06, + "loss": 0.7468793392181396, + "step": 615 + }, + { + "epoch": 0.14203366382291907, + "grad_norm": 0.9401860990707812, + "learning_rate": 1.9976198730518733e-06, + "loss": 0.676013708114624, + "step": 616 + }, + { + "epoch": 0.14226423795250173, + "grad_norm": 0.7984359669803877, + "learning_rate": 1.9975935112379057e-06, + "loss": 0.6350057125091553, + "step": 617 + }, + { + "epoch": 0.1424948120820844, + "grad_norm": 0.7941645196610473, + "learning_rate": 1.997567004413611e-06, + "loss": 0.6743426322937012, + "step": 618 + }, + { + "epoch": 0.14272538621166705, + "grad_norm": 0.9456320720036326, + "learning_rate": 1.9975403525828423e-06, + "loss": 0.5894836187362671, + "step": 619 + }, + { + "epoch": 0.14295596034124972, + "grad_norm": 1.1964423414511856, + "learning_rate": 1.9975135557494735e-06, + "loss": 0.7142415046691895, + "step": 620 + }, + { + "epoch": 0.14318653447083238, + "grad_norm": 0.7973360588907056, + "learning_rate": 1.9974866139174e-06, + "loss": 0.6402454972267151, + "step": 621 + }, + { + "epoch": 0.14341710860041504, + "grad_norm": 0.8197617379148621, + "learning_rate": 1.997459527090538e-06, + "loss": 0.6870661973953247, + "step": 622 + }, + { + "epoch": 0.1436476827299977, + "grad_norm": 0.9660987988063562, + "learning_rate": 1.9974322952728247e-06, + "loss": 0.5526704788208008, + "step": 623 + }, + { + "epoch": 0.14387825685958036, + "grad_norm": 0.8373386744091922, + "learning_rate": 1.9974049184682186e-06, + "loss": 0.6712762117385864, + "step": 624 + }, + { + "epoch": 0.14410883098916302, + "grad_norm": 0.8330659804365839, + "learning_rate": 1.997377396680699e-06, + "loss": 0.6064080595970154, + "step": 625 + }, + { + "epoch": 0.14433940511874568, + "grad_norm": 0.7758896299152315, + "learning_rate": 1.997349729914267e-06, + "loss": 0.5540767908096313, + "step": 626 + }, + { + "epoch": 0.14456997924832835, + "grad_norm": 0.7444906414234538, + "learning_rate": 1.997321918172944e-06, + "loss": 0.52143394947052, + "step": 627 + }, + { + "epoch": 0.144800553377911, + "grad_norm": 0.8091707705607726, + "learning_rate": 1.9972939614607723e-06, + "loss": 0.7708792686462402, + "step": 628 + }, + { + "epoch": 0.14503112750749367, + "grad_norm": 1.0019252225174067, + "learning_rate": 1.997265859781816e-06, + "loss": 0.706872284412384, + "step": 629 + }, + { + "epoch": 0.14526170163707633, + "grad_norm": 0.7978488701627702, + "learning_rate": 1.99723761314016e-06, + "loss": 0.6643307209014893, + "step": 630 + }, + { + "epoch": 0.145492275766659, + "grad_norm": 1.0319728160628425, + "learning_rate": 1.9972092215399107e-06, + "loss": 0.6582880020141602, + "step": 631 + }, + { + "epoch": 0.14572284989624165, + "grad_norm": 0.7041979367649327, + "learning_rate": 1.997180684985194e-06, + "loss": 0.5704749822616577, + "step": 632 + }, + { + "epoch": 0.1459534240258243, + "grad_norm": 0.9160954038448087, + "learning_rate": 1.997152003480159e-06, + "loss": 0.6021866798400879, + "step": 633 + }, + { + "epoch": 0.14618399815540697, + "grad_norm": 1.0186739140184302, + "learning_rate": 1.9971231770289745e-06, + "loss": 0.6980762481689453, + "step": 634 + }, + { + "epoch": 0.14641457228498964, + "grad_norm": 0.9102171344238382, + "learning_rate": 1.9970942056358307e-06, + "loss": 0.6252140998840332, + "step": 635 + }, + { + "epoch": 0.1466451464145723, + "grad_norm": 0.8257085970836279, + "learning_rate": 1.9970650893049384e-06, + "loss": 0.5938589572906494, + "step": 636 + }, + { + "epoch": 0.14687572054415496, + "grad_norm": 0.7561297866548697, + "learning_rate": 1.997035828040531e-06, + "loss": 0.48420464992523193, + "step": 637 + }, + { + "epoch": 0.14710629467373762, + "grad_norm": 1.1749911282917564, + "learning_rate": 1.997006421846861e-06, + "loss": 0.6917499303817749, + "step": 638 + }, + { + "epoch": 0.14733686880332028, + "grad_norm": 0.9636395596462505, + "learning_rate": 1.9969768707282034e-06, + "loss": 0.7040522694587708, + "step": 639 + }, + { + "epoch": 0.14756744293290291, + "grad_norm": 0.7956128694692409, + "learning_rate": 1.9969471746888535e-06, + "loss": 0.6131860017776489, + "step": 640 + }, + { + "epoch": 0.14779801706248558, + "grad_norm": 0.8000550155014501, + "learning_rate": 1.996917333733128e-06, + "loss": 0.7042062282562256, + "step": 641 + }, + { + "epoch": 0.14802859119206824, + "grad_norm": 0.9440344299424565, + "learning_rate": 1.9968873478653647e-06, + "loss": 0.6729326844215393, + "step": 642 + }, + { + "epoch": 0.1482591653216509, + "grad_norm": 0.8065631083250541, + "learning_rate": 1.996857217089922e-06, + "loss": 0.5801228880882263, + "step": 643 + }, + { + "epoch": 0.14848973945123356, + "grad_norm": 0.9584481605552773, + "learning_rate": 1.99682694141118e-06, + "loss": 0.6657989025115967, + "step": 644 + }, + { + "epoch": 0.14872031358081622, + "grad_norm": 0.8276892521273487, + "learning_rate": 1.9967965208335395e-06, + "loss": 0.5915562510490417, + "step": 645 + }, + { + "epoch": 0.14895088771039888, + "grad_norm": 0.8005079741579677, + "learning_rate": 1.9967659553614225e-06, + "loss": 0.6651759147644043, + "step": 646 + }, + { + "epoch": 0.14918146183998154, + "grad_norm": 0.785500734493462, + "learning_rate": 1.996735244999272e-06, + "loss": 0.625860333442688, + "step": 647 + }, + { + "epoch": 0.1494120359695642, + "grad_norm": 0.891334856659417, + "learning_rate": 1.996704389751552e-06, + "loss": 0.5731238126754761, + "step": 648 + }, + { + "epoch": 0.14964261009914687, + "grad_norm": 0.8662032133236818, + "learning_rate": 1.996673389622748e-06, + "loss": 0.6233615875244141, + "step": 649 + }, + { + "epoch": 0.14987318422872953, + "grad_norm": 0.7037223780792468, + "learning_rate": 1.9966422446173655e-06, + "loss": 0.5294947028160095, + "step": 650 + }, + { + "epoch": 0.1501037583583122, + "grad_norm": 0.8024689158972043, + "learning_rate": 1.996610954739932e-06, + "loss": 0.6234334707260132, + "step": 651 + }, + { + "epoch": 0.15033433248789485, + "grad_norm": 0.9863259301950934, + "learning_rate": 1.996579519994996e-06, + "loss": 0.5800126194953918, + "step": 652 + }, + { + "epoch": 0.1505649066174775, + "grad_norm": 0.9145794705086053, + "learning_rate": 1.9965479403871268e-06, + "loss": 0.7072441577911377, + "step": 653 + }, + { + "epoch": 0.15079548074706017, + "grad_norm": 0.8604804316966843, + "learning_rate": 1.996516215920915e-06, + "loss": 0.6350210309028625, + "step": 654 + }, + { + "epoch": 0.15102605487664283, + "grad_norm": 0.8272551438363688, + "learning_rate": 1.996484346600971e-06, + "loss": 0.6098944544792175, + "step": 655 + }, + { + "epoch": 0.1512566290062255, + "grad_norm": 0.7942772112843086, + "learning_rate": 1.996452332431929e-06, + "loss": 0.6593213081359863, + "step": 656 + }, + { + "epoch": 0.15148720313580816, + "grad_norm": 1.0870788996229426, + "learning_rate": 1.9964201734184413e-06, + "loss": 0.6997909545898438, + "step": 657 + }, + { + "epoch": 0.15171777726539082, + "grad_norm": 0.8320533396880808, + "learning_rate": 1.996387869565183e-06, + "loss": 0.5672277212142944, + "step": 658 + }, + { + "epoch": 0.15194835139497348, + "grad_norm": 0.8777194103988153, + "learning_rate": 1.99635542087685e-06, + "loss": 0.5835613012313843, + "step": 659 + }, + { + "epoch": 0.15217892552455614, + "grad_norm": 1.0025309187744094, + "learning_rate": 1.9963228273581587e-06, + "loss": 0.6001917123794556, + "step": 660 + }, + { + "epoch": 0.1524094996541388, + "grad_norm": 0.9582174045063777, + "learning_rate": 1.996290089013847e-06, + "loss": 0.6421242356300354, + "step": 661 + }, + { + "epoch": 0.15264007378372146, + "grad_norm": 0.8996449559898986, + "learning_rate": 1.996257205848674e-06, + "loss": 0.6888365745544434, + "step": 662 + }, + { + "epoch": 0.15287064791330413, + "grad_norm": 0.8017642329752841, + "learning_rate": 1.9962241778674193e-06, + "loss": 0.6694042682647705, + "step": 663 + }, + { + "epoch": 0.1531012220428868, + "grad_norm": 0.8362235694997654, + "learning_rate": 1.9961910050748836e-06, + "loss": 0.6754042506217957, + "step": 664 + }, + { + "epoch": 0.15333179617246945, + "grad_norm": 0.9429947161447709, + "learning_rate": 1.9961576874758893e-06, + "loss": 0.576134979724884, + "step": 665 + }, + { + "epoch": 0.1535623703020521, + "grad_norm": 0.8634505888713511, + "learning_rate": 1.9961242250752796e-06, + "loss": 0.6548957824707031, + "step": 666 + }, + { + "epoch": 0.15379294443163477, + "grad_norm": 0.8494612034918267, + "learning_rate": 1.9960906178779183e-06, + "loss": 0.553372859954834, + "step": 667 + }, + { + "epoch": 0.15402351856121743, + "grad_norm": 0.8776559544848238, + "learning_rate": 1.9960568658886904e-06, + "loss": 0.6749063730239868, + "step": 668 + }, + { + "epoch": 0.1542540926908001, + "grad_norm": 0.8490449157821316, + "learning_rate": 1.9960229691125023e-06, + "loss": 0.6083666086196899, + "step": 669 + }, + { + "epoch": 0.15448466682038275, + "grad_norm": 0.9102216407598661, + "learning_rate": 1.995988927554281e-06, + "loss": 0.6468017101287842, + "step": 670 + }, + { + "epoch": 0.15471524094996542, + "grad_norm": 0.9054463862187181, + "learning_rate": 1.995954741218976e-06, + "loss": 0.7095121145248413, + "step": 671 + }, + { + "epoch": 0.15494581507954808, + "grad_norm": 0.8984210973740085, + "learning_rate": 1.995920410111555e-06, + "loss": 0.7167302966117859, + "step": 672 + }, + { + "epoch": 0.15517638920913074, + "grad_norm": 0.9754903087688545, + "learning_rate": 1.995885934237009e-06, + "loss": 0.6563462018966675, + "step": 673 + }, + { + "epoch": 0.1554069633387134, + "grad_norm": 0.7833661271069817, + "learning_rate": 1.9958513136003495e-06, + "loss": 0.638554573059082, + "step": 674 + }, + { + "epoch": 0.15563753746829606, + "grad_norm": 1.1119382875058637, + "learning_rate": 1.995816548206609e-06, + "loss": 0.7051291465759277, + "step": 675 + }, + { + "epoch": 0.15586811159787872, + "grad_norm": 0.879000690907415, + "learning_rate": 1.995781638060841e-06, + "loss": 0.6292394399642944, + "step": 676 + }, + { + "epoch": 0.15609868572746138, + "grad_norm": 0.7328696227145686, + "learning_rate": 1.99574658316812e-06, + "loss": 0.5266016721725464, + "step": 677 + }, + { + "epoch": 0.15632925985704405, + "grad_norm": 0.8021809147598078, + "learning_rate": 1.9957113835335415e-06, + "loss": 0.6059033870697021, + "step": 678 + }, + { + "epoch": 0.1565598339866267, + "grad_norm": 1.0012445200078677, + "learning_rate": 1.995676039162222e-06, + "loss": 0.5252447128295898, + "step": 679 + }, + { + "epoch": 0.15679040811620937, + "grad_norm": 0.9661534967224599, + "learning_rate": 1.9956405500593e-06, + "loss": 0.5963196754455566, + "step": 680 + }, + { + "epoch": 0.15702098224579203, + "grad_norm": 1.1191160767100459, + "learning_rate": 1.9956049162299322e-06, + "loss": 0.7262317538261414, + "step": 681 + }, + { + "epoch": 0.1572515563753747, + "grad_norm": 0.6929567178003186, + "learning_rate": 1.995569137679301e-06, + "loss": 0.6701623201370239, + "step": 682 + }, + { + "epoch": 0.15748213050495735, + "grad_norm": 1.1067508842107727, + "learning_rate": 1.9955332144126048e-06, + "loss": 0.6201569437980652, + "step": 683 + }, + { + "epoch": 0.15771270463454, + "grad_norm": 0.8729576302308473, + "learning_rate": 1.9954971464350673e-06, + "loss": 0.5338399410247803, + "step": 684 + }, + { + "epoch": 0.15794327876412267, + "grad_norm": 1.0541267316046437, + "learning_rate": 1.99546093375193e-06, + "loss": 0.6784210205078125, + "step": 685 + }, + { + "epoch": 0.15817385289370534, + "grad_norm": 0.7386088048688241, + "learning_rate": 1.9954245763684574e-06, + "loss": 0.6752813458442688, + "step": 686 + }, + { + "epoch": 0.158404427023288, + "grad_norm": 0.92655840240498, + "learning_rate": 1.9953880742899344e-06, + "loss": 0.6734355688095093, + "step": 687 + }, + { + "epoch": 0.15863500115287066, + "grad_norm": 1.0183777461857344, + "learning_rate": 1.995351427521667e-06, + "loss": 0.4857062101364136, + "step": 688 + }, + { + "epoch": 0.15886557528245332, + "grad_norm": 1.0292686670210065, + "learning_rate": 1.995314636068982e-06, + "loss": 0.6014343500137329, + "step": 689 + }, + { + "epoch": 0.15909614941203598, + "grad_norm": 0.6804392354384567, + "learning_rate": 1.995277699937227e-06, + "loss": 0.571649432182312, + "step": 690 + }, + { + "epoch": 0.15932672354161864, + "grad_norm": 0.8504096595688001, + "learning_rate": 1.9952406191317717e-06, + "loss": 0.5195556879043579, + "step": 691 + }, + { + "epoch": 0.1595572976712013, + "grad_norm": 1.0458950135227758, + "learning_rate": 1.995203393658006e-06, + "loss": 0.6520895957946777, + "step": 692 + }, + { + "epoch": 0.15978787180078396, + "grad_norm": 0.8415432435774023, + "learning_rate": 1.995166023521341e-06, + "loss": 0.7223460674285889, + "step": 693 + }, + { + "epoch": 0.16001844593036663, + "grad_norm": 0.9976828679541363, + "learning_rate": 1.9951285087272085e-06, + "loss": 0.5540120005607605, + "step": 694 + }, + { + "epoch": 0.1602490200599493, + "grad_norm": 0.9583028785849829, + "learning_rate": 1.995090849281062e-06, + "loss": 0.6539945602416992, + "step": 695 + }, + { + "epoch": 0.16047959418953192, + "grad_norm": 0.6996553037894581, + "learning_rate": 1.995053045188376e-06, + "loss": 0.595169186592102, + "step": 696 + }, + { + "epoch": 0.16071016831911458, + "grad_norm": 0.7841493951031693, + "learning_rate": 1.995015096454645e-06, + "loss": 0.564440131187439, + "step": 697 + }, + { + "epoch": 0.16094074244869724, + "grad_norm": 0.8288568147288248, + "learning_rate": 1.9949770030853857e-06, + "loss": 0.5934277772903442, + "step": 698 + }, + { + "epoch": 0.1611713165782799, + "grad_norm": 0.8284586150514878, + "learning_rate": 1.9949387650861353e-06, + "loss": 0.5645352602005005, + "step": 699 + }, + { + "epoch": 0.16140189070786257, + "grad_norm": 0.7431587516594325, + "learning_rate": 1.9949003824624517e-06, + "loss": 0.6437552571296692, + "step": 700 + }, + { + "epoch": 0.16163246483744523, + "grad_norm": 0.9720884796741701, + "learning_rate": 1.9948618552199147e-06, + "loss": 0.7052004337310791, + "step": 701 + }, + { + "epoch": 0.1618630389670279, + "grad_norm": 0.869867046800395, + "learning_rate": 1.994823183364124e-06, + "loss": 0.6547686457633972, + "step": 702 + }, + { + "epoch": 0.16209361309661055, + "grad_norm": 0.8852938288883528, + "learning_rate": 1.994784366900702e-06, + "loss": 0.582744836807251, + "step": 703 + }, + { + "epoch": 0.1623241872261932, + "grad_norm": 0.9493941174588165, + "learning_rate": 1.99474540583529e-06, + "loss": 0.6668936014175415, + "step": 704 + }, + { + "epoch": 0.16255476135577587, + "grad_norm": 0.8294615633120708, + "learning_rate": 1.994706300173552e-06, + "loss": 0.6076918840408325, + "step": 705 + }, + { + "epoch": 0.16278533548535853, + "grad_norm": 0.8313694025786441, + "learning_rate": 1.994667049921172e-06, + "loss": 0.5053621530532837, + "step": 706 + }, + { + "epoch": 0.1630159096149412, + "grad_norm": 0.7898437620774408, + "learning_rate": 1.994627655083856e-06, + "loss": 0.5480915904045105, + "step": 707 + }, + { + "epoch": 0.16324648374452386, + "grad_norm": 0.8758549357955973, + "learning_rate": 1.99458811566733e-06, + "loss": 0.5851327776908875, + "step": 708 + }, + { + "epoch": 0.16347705787410652, + "grad_norm": 0.8484239464634123, + "learning_rate": 1.9945484316773415e-06, + "loss": 0.7058213949203491, + "step": 709 + }, + { + "epoch": 0.16370763200368918, + "grad_norm": 1.019538936894149, + "learning_rate": 1.9945086031196588e-06, + "loss": 0.6900246739387512, + "step": 710 + }, + { + "epoch": 0.16393820613327184, + "grad_norm": 0.9247299002550031, + "learning_rate": 1.994468630000072e-06, + "loss": 0.6088757514953613, + "step": 711 + }, + { + "epoch": 0.1641687802628545, + "grad_norm": 0.82117755294185, + "learning_rate": 1.9944285123243908e-06, + "loss": 0.6167945861816406, + "step": 712 + }, + { + "epoch": 0.16439935439243716, + "grad_norm": 0.8171354955480022, + "learning_rate": 1.994388250098447e-06, + "loss": 0.5842427015304565, + "step": 713 + }, + { + "epoch": 0.16462992852201982, + "grad_norm": 1.0833616769520091, + "learning_rate": 1.9943478433280937e-06, + "loss": 0.6709132194519043, + "step": 714 + }, + { + "epoch": 0.1648605026516025, + "grad_norm": 0.9486447603343945, + "learning_rate": 1.994307292019204e-06, + "loss": 0.5600479245185852, + "step": 715 + }, + { + "epoch": 0.16509107678118515, + "grad_norm": 0.9425877157645439, + "learning_rate": 1.994266596177672e-06, + "loss": 0.59420245885849, + "step": 716 + }, + { + "epoch": 0.1653216509107678, + "grad_norm": 0.8878954538957776, + "learning_rate": 1.994225755809414e-06, + "loss": 0.6098697185516357, + "step": 717 + }, + { + "epoch": 0.16555222504035047, + "grad_norm": 0.9792435497913993, + "learning_rate": 1.994184770920366e-06, + "loss": 0.5626084804534912, + "step": 718 + }, + { + "epoch": 0.16578279916993313, + "grad_norm": 0.827415177568412, + "learning_rate": 1.9941436415164854e-06, + "loss": 0.633317232131958, + "step": 719 + }, + { + "epoch": 0.1660133732995158, + "grad_norm": 0.7458775266643737, + "learning_rate": 1.994102367603752e-06, + "loss": 0.6629287004470825, + "step": 720 + }, + { + "epoch": 0.16624394742909845, + "grad_norm": 0.8804838237561229, + "learning_rate": 1.994060949188164e-06, + "loss": 0.6281176805496216, + "step": 721 + }, + { + "epoch": 0.16647452155868112, + "grad_norm": 0.7448717784104247, + "learning_rate": 1.994019386275743e-06, + "loss": 0.49195849895477295, + "step": 722 + }, + { + "epoch": 0.16670509568826378, + "grad_norm": 0.8001133040698483, + "learning_rate": 1.9939776788725295e-06, + "loss": 0.5165697932243347, + "step": 723 + }, + { + "epoch": 0.16693566981784644, + "grad_norm": 0.7747636914973149, + "learning_rate": 1.9939358269845867e-06, + "loss": 0.6294844150543213, + "step": 724 + }, + { + "epoch": 0.1671662439474291, + "grad_norm": 0.944854174617811, + "learning_rate": 1.9938938306179986e-06, + "loss": 0.6117822527885437, + "step": 725 + }, + { + "epoch": 0.16739681807701176, + "grad_norm": 0.8223415721013929, + "learning_rate": 1.9938516897788693e-06, + "loss": 0.5904515981674194, + "step": 726 + }, + { + "epoch": 0.16762739220659442, + "grad_norm": 0.9451811550082199, + "learning_rate": 1.9938094044733247e-06, + "loss": 0.5453853011131287, + "step": 727 + }, + { + "epoch": 0.16785796633617708, + "grad_norm": 1.0093698810967915, + "learning_rate": 1.9937669747075107e-06, + "loss": 0.6724731922149658, + "step": 728 + }, + { + "epoch": 0.16808854046575974, + "grad_norm": 0.8787203913390783, + "learning_rate": 1.993724400487596e-06, + "loss": 0.4844778776168823, + "step": 729 + }, + { + "epoch": 0.1683191145953424, + "grad_norm": 1.0150110817624924, + "learning_rate": 1.9936816818197682e-06, + "loss": 0.6666063070297241, + "step": 730 + }, + { + "epoch": 0.16854968872492507, + "grad_norm": 0.8363215992575103, + "learning_rate": 1.9936388187102374e-06, + "loss": 0.49354803562164307, + "step": 731 + }, + { + "epoch": 0.16878026285450773, + "grad_norm": 1.011739420494133, + "learning_rate": 1.993595811165234e-06, + "loss": 0.6587027311325073, + "step": 732 + }, + { + "epoch": 0.1690108369840904, + "grad_norm": 0.8706809761457309, + "learning_rate": 1.9935526591910095e-06, + "loss": 0.5618065595626831, + "step": 733 + }, + { + "epoch": 0.16924141111367305, + "grad_norm": 1.0230867510580486, + "learning_rate": 1.993509362793837e-06, + "loss": 0.6332052946090698, + "step": 734 + }, + { + "epoch": 0.1694719852432557, + "grad_norm": 0.8938300688074264, + "learning_rate": 1.9934659219800095e-06, + "loss": 0.5888797044754028, + "step": 735 + }, + { + "epoch": 0.16970255937283837, + "grad_norm": 0.9600504381358347, + "learning_rate": 1.9934223367558418e-06, + "loss": 0.6995177865028381, + "step": 736 + }, + { + "epoch": 0.16993313350242104, + "grad_norm": 0.8183852978697493, + "learning_rate": 1.9933786071276693e-06, + "loss": 0.6117641925811768, + "step": 737 + }, + { + "epoch": 0.1701637076320037, + "grad_norm": 0.8824726889784998, + "learning_rate": 1.9933347331018487e-06, + "loss": 0.7138235569000244, + "step": 738 + }, + { + "epoch": 0.17039428176158636, + "grad_norm": 0.9234925675447027, + "learning_rate": 1.993290714684758e-06, + "loss": 0.6139661073684692, + "step": 739 + }, + { + "epoch": 0.17062485589116902, + "grad_norm": 0.9457487351494172, + "learning_rate": 1.9932465518827945e-06, + "loss": 0.6998997926712036, + "step": 740 + }, + { + "epoch": 0.17085543002075168, + "grad_norm": 0.8625145077640682, + "learning_rate": 1.9932022447023787e-06, + "loss": 0.5736757516860962, + "step": 741 + }, + { + "epoch": 0.17108600415033434, + "grad_norm": 0.7768775382949296, + "learning_rate": 1.993157793149951e-06, + "loss": 0.6069833040237427, + "step": 742 + }, + { + "epoch": 0.171316578279917, + "grad_norm": 0.9368489446003049, + "learning_rate": 1.9931131972319726e-06, + "loss": 0.618720531463623, + "step": 743 + }, + { + "epoch": 0.17154715240949966, + "grad_norm": 1.1182101771495103, + "learning_rate": 1.9930684569549263e-06, + "loss": 0.6918530464172363, + "step": 744 + }, + { + "epoch": 0.17177772653908233, + "grad_norm": 0.9107072762217621, + "learning_rate": 1.993023572325315e-06, + "loss": 0.5303134322166443, + "step": 745 + }, + { + "epoch": 0.172008300668665, + "grad_norm": 1.163525853024132, + "learning_rate": 1.9929785433496637e-06, + "loss": 0.5017606019973755, + "step": 746 + }, + { + "epoch": 0.17223887479824765, + "grad_norm": 0.8248835281602814, + "learning_rate": 1.9929333700345176e-06, + "loss": 0.5683910846710205, + "step": 747 + }, + { + "epoch": 0.1724694489278303, + "grad_norm": 1.024957040527593, + "learning_rate": 1.992888052386443e-06, + "loss": 0.7594112157821655, + "step": 748 + }, + { + "epoch": 0.17270002305741297, + "grad_norm": 0.8415419064063624, + "learning_rate": 1.9928425904120272e-06, + "loss": 0.5817109942436218, + "step": 749 + }, + { + "epoch": 0.17293059718699563, + "grad_norm": 0.9772344685918459, + "learning_rate": 1.9927969841178785e-06, + "loss": 0.74810391664505, + "step": 750 + }, + { + "epoch": 0.17316117131657827, + "grad_norm": 0.7709842631317299, + "learning_rate": 1.992751233510627e-06, + "loss": 0.5620408654212952, + "step": 751 + }, + { + "epoch": 0.17339174544616093, + "grad_norm": 0.9147017514524429, + "learning_rate": 1.9927053385969224e-06, + "loss": 0.5661174654960632, + "step": 752 + }, + { + "epoch": 0.1736223195757436, + "grad_norm": 0.8721149149743948, + "learning_rate": 1.992659299383436e-06, + "loss": 0.6170656681060791, + "step": 753 + }, + { + "epoch": 0.17385289370532625, + "grad_norm": 0.8946316220934861, + "learning_rate": 1.99261311587686e-06, + "loss": 0.6399837136268616, + "step": 754 + }, + { + "epoch": 0.1740834678349089, + "grad_norm": 0.7741035474142021, + "learning_rate": 1.992566788083908e-06, + "loss": 0.646568775177002, + "step": 755 + }, + { + "epoch": 0.17431404196449157, + "grad_norm": 0.8936741351690501, + "learning_rate": 1.992520316011314e-06, + "loss": 0.6836358904838562, + "step": 756 + }, + { + "epoch": 0.17454461609407423, + "grad_norm": 0.8304614027509832, + "learning_rate": 1.9924736996658327e-06, + "loss": 0.7077229619026184, + "step": 757 + }, + { + "epoch": 0.1747751902236569, + "grad_norm": 0.87551528703017, + "learning_rate": 1.9924269390542408e-06, + "loss": 0.5127657651901245, + "step": 758 + }, + { + "epoch": 0.17500576435323956, + "grad_norm": 0.9006786249451013, + "learning_rate": 1.992380034183336e-06, + "loss": 0.49244552850723267, + "step": 759 + }, + { + "epoch": 0.17523633848282222, + "grad_norm": 0.8017561502743571, + "learning_rate": 1.9923329850599353e-06, + "loss": 0.6145986318588257, + "step": 760 + }, + { + "epoch": 0.17546691261240488, + "grad_norm": 1.0163805424999015, + "learning_rate": 1.9922857916908784e-06, + "loss": 0.5233397483825684, + "step": 761 + }, + { + "epoch": 0.17569748674198754, + "grad_norm": 0.9596772303146165, + "learning_rate": 1.992238454083025e-06, + "loss": 0.6296844482421875, + "step": 762 + }, + { + "epoch": 0.1759280608715702, + "grad_norm": 0.7860963753584104, + "learning_rate": 1.9921909722432565e-06, + "loss": 0.5274437665939331, + "step": 763 + }, + { + "epoch": 0.17615863500115286, + "grad_norm": 0.8930810667791799, + "learning_rate": 1.9921433461784744e-06, + "loss": 0.6365554332733154, + "step": 764 + }, + { + "epoch": 0.17638920913073552, + "grad_norm": 0.9611521576454714, + "learning_rate": 1.992095575895602e-06, + "loss": 0.6256603002548218, + "step": 765 + }, + { + "epoch": 0.17661978326031819, + "grad_norm": 0.9488006285824869, + "learning_rate": 1.9920476614015827e-06, + "loss": 0.6914918422698975, + "step": 766 + }, + { + "epoch": 0.17685035738990085, + "grad_norm": 0.9925839476608436, + "learning_rate": 1.9919996027033823e-06, + "loss": 0.618436336517334, + "step": 767 + }, + { + "epoch": 0.1770809315194835, + "grad_norm": 1.0637307823847924, + "learning_rate": 1.9919513998079857e-06, + "loss": 0.7496027946472168, + "step": 768 + }, + { + "epoch": 0.17731150564906617, + "grad_norm": 0.873569070894671, + "learning_rate": 1.9919030527224e-06, + "loss": 0.6188616752624512, + "step": 769 + }, + { + "epoch": 0.17754207977864883, + "grad_norm": 0.9573370107752551, + "learning_rate": 1.991854561453653e-06, + "loss": 0.6525505185127258, + "step": 770 + }, + { + "epoch": 0.1777726539082315, + "grad_norm": 0.8791752874309303, + "learning_rate": 1.9918059260087933e-06, + "loss": 0.6302521228790283, + "step": 771 + }, + { + "epoch": 0.17800322803781415, + "grad_norm": 0.7767159097983319, + "learning_rate": 1.9917571463948905e-06, + "loss": 0.48817628622055054, + "step": 772 + }, + { + "epoch": 0.17823380216739682, + "grad_norm": 0.9997756560425097, + "learning_rate": 1.9917082226190357e-06, + "loss": 0.7571396231651306, + "step": 773 + }, + { + "epoch": 0.17846437629697948, + "grad_norm": 0.9019653117383005, + "learning_rate": 1.99165915468834e-06, + "loss": 0.6416890025138855, + "step": 774 + }, + { + "epoch": 0.17869495042656214, + "grad_norm": 0.9030141776784474, + "learning_rate": 1.9916099426099357e-06, + "loss": 0.5668659210205078, + "step": 775 + }, + { + "epoch": 0.1789255245561448, + "grad_norm": 0.8616948701360102, + "learning_rate": 1.991560586390977e-06, + "loss": 0.5491495132446289, + "step": 776 + }, + { + "epoch": 0.17915609868572746, + "grad_norm": 0.8461739489170892, + "learning_rate": 1.991511086038637e-06, + "loss": 0.5596655607223511, + "step": 777 + }, + { + "epoch": 0.17938667281531012, + "grad_norm": 0.948797979696852, + "learning_rate": 1.991461441560113e-06, + "loss": 0.606618344783783, + "step": 778 + }, + { + "epoch": 0.17961724694489278, + "grad_norm": 0.8682290862864503, + "learning_rate": 1.9914116529626195e-06, + "loss": 0.6534444093704224, + "step": 779 + }, + { + "epoch": 0.17984782107447544, + "grad_norm": 0.7942772802909244, + "learning_rate": 1.9913617202533956e-06, + "loss": 0.6566994190216064, + "step": 780 + }, + { + "epoch": 0.1800783952040581, + "grad_norm": 0.8753236598884384, + "learning_rate": 1.9913116434396976e-06, + "loss": 0.6745898723602295, + "step": 781 + }, + { + "epoch": 0.18030896933364077, + "grad_norm": 0.8904483654623074, + "learning_rate": 1.991261422528806e-06, + "loss": 0.6260639429092407, + "step": 782 + }, + { + "epoch": 0.18053954346322343, + "grad_norm": 1.095081708934966, + "learning_rate": 1.9912110575280203e-06, + "loss": 0.6937930583953857, + "step": 783 + }, + { + "epoch": 0.1807701175928061, + "grad_norm": 0.7535766751550929, + "learning_rate": 1.991160548444662e-06, + "loss": 0.5220614671707153, + "step": 784 + }, + { + "epoch": 0.18100069172238875, + "grad_norm": 1.0171096783148863, + "learning_rate": 1.9911098952860725e-06, + "loss": 0.630463719367981, + "step": 785 + }, + { + "epoch": 0.1812312658519714, + "grad_norm": 0.9064677619585607, + "learning_rate": 1.9910590980596154e-06, + "loss": 0.5476818084716797, + "step": 786 + }, + { + "epoch": 0.18146183998155407, + "grad_norm": 0.8827497683061851, + "learning_rate": 1.9910081567726745e-06, + "loss": 0.619910478591919, + "step": 787 + }, + { + "epoch": 0.18169241411113674, + "grad_norm": 0.9583246792904453, + "learning_rate": 1.990957071432654e-06, + "loss": 0.759405255317688, + "step": 788 + }, + { + "epoch": 0.1819229882407194, + "grad_norm": 0.9249642030902185, + "learning_rate": 1.9909058420469808e-06, + "loss": 0.6093606948852539, + "step": 789 + }, + { + "epoch": 0.18215356237030206, + "grad_norm": 1.0777393301256872, + "learning_rate": 1.9908544686231e-06, + "loss": 0.5358198285102844, + "step": 790 + }, + { + "epoch": 0.18238413649988472, + "grad_norm": 0.8619190562873736, + "learning_rate": 1.9908029511684806e-06, + "loss": 0.577926754951477, + "step": 791 + }, + { + "epoch": 0.18261471062946738, + "grad_norm": 1.0298704295501269, + "learning_rate": 1.990751289690611e-06, + "loss": 0.6232448816299438, + "step": 792 + }, + { + "epoch": 0.18284528475905004, + "grad_norm": 0.9837349749201401, + "learning_rate": 1.9906994841970005e-06, + "loss": 0.5461868047714233, + "step": 793 + }, + { + "epoch": 0.1830758588886327, + "grad_norm": 0.9430576362377001, + "learning_rate": 1.9906475346951793e-06, + "loss": 0.6074671745300293, + "step": 794 + }, + { + "epoch": 0.18330643301821536, + "grad_norm": 0.9936839742941572, + "learning_rate": 1.990595441192699e-06, + "loss": 0.7101696729660034, + "step": 795 + }, + { + "epoch": 0.18353700714779803, + "grad_norm": 0.950260898814123, + "learning_rate": 1.9905432036971318e-06, + "loss": 0.6507722735404968, + "step": 796 + }, + { + "epoch": 0.1837675812773807, + "grad_norm": 0.8942288113166778, + "learning_rate": 1.9904908222160715e-06, + "loss": 0.6497524380683899, + "step": 797 + }, + { + "epoch": 0.18399815540696335, + "grad_norm": 0.9396678930556792, + "learning_rate": 1.9904382967571315e-06, + "loss": 0.6359415054321289, + "step": 798 + }, + { + "epoch": 0.184228729536546, + "grad_norm": 0.8070326036364724, + "learning_rate": 1.9903856273279475e-06, + "loss": 0.6062989234924316, + "step": 799 + }, + { + "epoch": 0.18445930366612867, + "grad_norm": 0.9626677000162343, + "learning_rate": 1.9903328139361753e-06, + "loss": 0.5872690677642822, + "step": 800 + }, + { + "epoch": 0.18468987779571133, + "grad_norm": 0.7985705265040473, + "learning_rate": 1.9902798565894917e-06, + "loss": 0.541993260383606, + "step": 801 + }, + { + "epoch": 0.184920451925294, + "grad_norm": 0.9775943406877085, + "learning_rate": 1.9902267552955948e-06, + "loss": 0.6509004235267639, + "step": 802 + }, + { + "epoch": 0.18515102605487666, + "grad_norm": 1.032367389635004, + "learning_rate": 1.9901735100622034e-06, + "loss": 0.6994458436965942, + "step": 803 + }, + { + "epoch": 0.18538160018445932, + "grad_norm": 0.723727027388961, + "learning_rate": 1.9901201208970574e-06, + "loss": 0.5426214933395386, + "step": 804 + }, + { + "epoch": 0.18561217431404198, + "grad_norm": 0.9494744349432898, + "learning_rate": 1.9900665878079172e-06, + "loss": 0.5889894366264343, + "step": 805 + }, + { + "epoch": 0.18584274844362464, + "grad_norm": 0.8565255265724333, + "learning_rate": 1.990012910802564e-06, + "loss": 0.6455902457237244, + "step": 806 + }, + { + "epoch": 0.18607332257320727, + "grad_norm": 0.8487813974117321, + "learning_rate": 1.989959089888801e-06, + "loss": 0.6336048245429993, + "step": 807 + }, + { + "epoch": 0.18630389670278993, + "grad_norm": 0.8414189962242138, + "learning_rate": 1.9899051250744517e-06, + "loss": 0.6091762781143188, + "step": 808 + }, + { + "epoch": 0.1865344708323726, + "grad_norm": 0.9439572961008054, + "learning_rate": 1.9898510163673594e-06, + "loss": 0.5551953315734863, + "step": 809 + }, + { + "epoch": 0.18676504496195526, + "grad_norm": 1.0494491780231465, + "learning_rate": 1.9897967637753907e-06, + "loss": 0.6441607475280762, + "step": 810 + }, + { + "epoch": 0.18699561909153792, + "grad_norm": 0.886313339848662, + "learning_rate": 1.989742367306431e-06, + "loss": 0.5766205787658691, + "step": 811 + }, + { + "epoch": 0.18722619322112058, + "grad_norm": 0.8129745295139125, + "learning_rate": 1.9896878269683872e-06, + "loss": 0.624677836894989, + "step": 812 + }, + { + "epoch": 0.18745676735070324, + "grad_norm": 1.0883386432883795, + "learning_rate": 1.9896331427691878e-06, + "loss": 0.5942056775093079, + "step": 813 + }, + { + "epoch": 0.1876873414802859, + "grad_norm": 0.9421668652395382, + "learning_rate": 1.989578314716781e-06, + "loss": 0.5194109082221985, + "step": 814 + }, + { + "epoch": 0.18791791560986856, + "grad_norm": 0.9041080200693152, + "learning_rate": 1.9895233428191375e-06, + "loss": 0.5851193070411682, + "step": 815 + }, + { + "epoch": 0.18814848973945122, + "grad_norm": 0.7963655717285544, + "learning_rate": 1.989468227084248e-06, + "loss": 0.5596088171005249, + "step": 816 + }, + { + "epoch": 0.18837906386903389, + "grad_norm": 0.9364254304069746, + "learning_rate": 1.989412967520123e-06, + "loss": 0.608109712600708, + "step": 817 + }, + { + "epoch": 0.18860963799861655, + "grad_norm": 0.8927696059217924, + "learning_rate": 1.9893575641347957e-06, + "loss": 0.6488924026489258, + "step": 818 + }, + { + "epoch": 0.1888402121281992, + "grad_norm": 0.9447086482881396, + "learning_rate": 1.9893020169363202e-06, + "loss": 0.6668595671653748, + "step": 819 + }, + { + "epoch": 0.18907078625778187, + "grad_norm": 0.9937318511996248, + "learning_rate": 1.9892463259327702e-06, + "loss": 0.6516261696815491, + "step": 820 + }, + { + "epoch": 0.18930136038736453, + "grad_norm": 1.0796549259081865, + "learning_rate": 1.9891904911322408e-06, + "loss": 0.5960654020309448, + "step": 821 + }, + { + "epoch": 0.1895319345169472, + "grad_norm": 0.7909478658460368, + "learning_rate": 1.989134512542848e-06, + "loss": 0.5836078524589539, + "step": 822 + }, + { + "epoch": 0.18976250864652985, + "grad_norm": 0.8238472267757905, + "learning_rate": 1.98907839017273e-06, + "loss": 0.6233468651771545, + "step": 823 + }, + { + "epoch": 0.18999308277611252, + "grad_norm": 0.9807541829716023, + "learning_rate": 1.989022124030043e-06, + "loss": 0.6228024363517761, + "step": 824 + }, + { + "epoch": 0.19022365690569518, + "grad_norm": 0.8131035743107407, + "learning_rate": 1.9889657141229674e-06, + "loss": 0.5549489259719849, + "step": 825 + }, + { + "epoch": 0.19045423103527784, + "grad_norm": 1.04900407843417, + "learning_rate": 1.988909160459703e-06, + "loss": 0.572743833065033, + "step": 826 + }, + { + "epoch": 0.1906848051648605, + "grad_norm": 0.9532449351501632, + "learning_rate": 1.988852463048469e-06, + "loss": 0.5483371019363403, + "step": 827 + }, + { + "epoch": 0.19091537929444316, + "grad_norm": 0.8589634934665029, + "learning_rate": 1.988795621897508e-06, + "loss": 0.6489086151123047, + "step": 828 + }, + { + "epoch": 0.19114595342402582, + "grad_norm": 0.8093738620503291, + "learning_rate": 1.9887386370150823e-06, + "loss": 0.5885359644889832, + "step": 829 + }, + { + "epoch": 0.19137652755360848, + "grad_norm": 1.1233507395706857, + "learning_rate": 1.988681508409475e-06, + "loss": 0.5725297927856445, + "step": 830 + }, + { + "epoch": 0.19160710168319114, + "grad_norm": 0.9186016287497916, + "learning_rate": 1.9886242360889907e-06, + "loss": 0.5165927410125732, + "step": 831 + }, + { + "epoch": 0.1918376758127738, + "grad_norm": 0.9873812028582082, + "learning_rate": 1.988566820061954e-06, + "loss": 0.4909062385559082, + "step": 832 + }, + { + "epoch": 0.19206824994235647, + "grad_norm": 0.8524339429885558, + "learning_rate": 1.988509260336711e-06, + "loss": 0.6611230373382568, + "step": 833 + }, + { + "epoch": 0.19229882407193913, + "grad_norm": 0.8054213393470881, + "learning_rate": 1.9884515569216296e-06, + "loss": 0.5702481269836426, + "step": 834 + }, + { + "epoch": 0.1925293982015218, + "grad_norm": 1.0204414620630202, + "learning_rate": 1.988393709825096e-06, + "loss": 0.5923126935958862, + "step": 835 + }, + { + "epoch": 0.19275997233110445, + "grad_norm": 0.9055032000924194, + "learning_rate": 1.98833571905552e-06, + "loss": 0.6054497957229614, + "step": 836 + }, + { + "epoch": 0.1929905464606871, + "grad_norm": 0.9248140875126212, + "learning_rate": 1.9882775846213305e-06, + "loss": 0.6688513159751892, + "step": 837 + }, + { + "epoch": 0.19322112059026977, + "grad_norm": 1.0273808455254545, + "learning_rate": 1.988219306530978e-06, + "loss": 0.5898394584655762, + "step": 838 + }, + { + "epoch": 0.19345169471985244, + "grad_norm": 0.9751112903331337, + "learning_rate": 1.9881608847929345e-06, + "loss": 0.575627326965332, + "step": 839 + }, + { + "epoch": 0.1936822688494351, + "grad_norm": 0.8673669914525766, + "learning_rate": 1.9881023194156913e-06, + "loss": 0.5392276048660278, + "step": 840 + }, + { + "epoch": 0.19391284297901776, + "grad_norm": 0.8706508008641746, + "learning_rate": 1.9880436104077624e-06, + "loss": 0.5464376211166382, + "step": 841 + }, + { + "epoch": 0.19414341710860042, + "grad_norm": 1.1088629334080236, + "learning_rate": 1.9879847577776804e-06, + "loss": 0.5483032464981079, + "step": 842 + }, + { + "epoch": 0.19437399123818308, + "grad_norm": 1.088158010228094, + "learning_rate": 1.9879257615340016e-06, + "loss": 0.583878219127655, + "step": 843 + }, + { + "epoch": 0.19460456536776574, + "grad_norm": 0.903659297701254, + "learning_rate": 1.9878666216853005e-06, + "loss": 0.5646623373031616, + "step": 844 + }, + { + "epoch": 0.1948351394973484, + "grad_norm": 0.8893037043091606, + "learning_rate": 1.9878073382401745e-06, + "loss": 0.4785343408584595, + "step": 845 + }, + { + "epoch": 0.19506571362693106, + "grad_norm": 0.8306997774077053, + "learning_rate": 1.987747911207241e-06, + "loss": 0.6247695684432983, + "step": 846 + }, + { + "epoch": 0.19529628775651373, + "grad_norm": 0.8871051444384922, + "learning_rate": 1.9876883405951377e-06, + "loss": 0.5686244368553162, + "step": 847 + }, + { + "epoch": 0.1955268618860964, + "grad_norm": 1.0693338597203925, + "learning_rate": 1.9876286264125242e-06, + "loss": 0.5887250900268555, + "step": 848 + }, + { + "epoch": 0.19575743601567905, + "grad_norm": 1.009687803574172, + "learning_rate": 1.9875687686680808e-06, + "loss": 0.6225967407226562, + "step": 849 + }, + { + "epoch": 0.1959880101452617, + "grad_norm": 0.8424215047754778, + "learning_rate": 1.987508767370508e-06, + "loss": 0.4695369601249695, + "step": 850 + }, + { + "epoch": 0.19621858427484437, + "grad_norm": 1.0270923710251258, + "learning_rate": 1.9874486225285276e-06, + "loss": 0.5248171091079712, + "step": 851 + }, + { + "epoch": 0.19644915840442703, + "grad_norm": 1.0947189066196994, + "learning_rate": 1.9873883341508825e-06, + "loss": 0.573886513710022, + "step": 852 + }, + { + "epoch": 0.1966797325340097, + "grad_norm": 0.980074050730982, + "learning_rate": 1.9873279022463365e-06, + "loss": 0.5309966802597046, + "step": 853 + }, + { + "epoch": 0.19691030666359235, + "grad_norm": 1.2273525906968545, + "learning_rate": 1.987267326823673e-06, + "loss": 0.7115850448608398, + "step": 854 + }, + { + "epoch": 0.19714088079317502, + "grad_norm": 1.65154587276706, + "learning_rate": 1.9872066078916984e-06, + "loss": 0.6970044374465942, + "step": 855 + }, + { + "epoch": 0.19737145492275768, + "grad_norm": 1.0520569639047552, + "learning_rate": 1.987145745459238e-06, + "loss": 0.5956458449363708, + "step": 856 + }, + { + "epoch": 0.19760202905234034, + "grad_norm": 0.8621512966256671, + "learning_rate": 1.9870847395351395e-06, + "loss": 0.6200698614120483, + "step": 857 + }, + { + "epoch": 0.197832603181923, + "grad_norm": 0.8987981187104104, + "learning_rate": 1.98702359012827e-06, + "loss": 0.6552712321281433, + "step": 858 + }, + { + "epoch": 0.19806317731150566, + "grad_norm": 0.8832934653512269, + "learning_rate": 1.986962297247519e-06, + "loss": 0.5995951294898987, + "step": 859 + }, + { + "epoch": 0.19829375144108832, + "grad_norm": 1.0415029103173328, + "learning_rate": 1.9869008609017946e-06, + "loss": 0.5903854966163635, + "step": 860 + }, + { + "epoch": 0.19852432557067098, + "grad_norm": 0.7946410320386238, + "learning_rate": 1.986839281100029e-06, + "loss": 0.49756956100463867, + "step": 861 + }, + { + "epoch": 0.19875489970025362, + "grad_norm": 0.8989937288923138, + "learning_rate": 1.986777557851172e-06, + "loss": 0.6726386547088623, + "step": 862 + }, + { + "epoch": 0.19898547382983628, + "grad_norm": 1.066877002121069, + "learning_rate": 1.9867156911641963e-06, + "loss": 0.5941756963729858, + "step": 863 + }, + { + "epoch": 0.19921604795941894, + "grad_norm": 1.1426428571577942, + "learning_rate": 1.986653681048095e-06, + "loss": 0.6148152351379395, + "step": 864 + }, + { + "epoch": 0.1994466220890016, + "grad_norm": 0.8574337846446602, + "learning_rate": 1.9865915275118815e-06, + "loss": 0.5484675765037537, + "step": 865 + }, + { + "epoch": 0.19967719621858426, + "grad_norm": 1.279305752369778, + "learning_rate": 1.986529230564591e-06, + "loss": 0.5835011601448059, + "step": 866 + }, + { + "epoch": 0.19990777034816692, + "grad_norm": 1.2828587747963143, + "learning_rate": 1.9864667902152785e-06, + "loss": 0.5505619049072266, + "step": 867 + }, + { + "epoch": 0.20013834447774959, + "grad_norm": 0.978792866059614, + "learning_rate": 1.986404206473021e-06, + "loss": 0.6170759797096252, + "step": 868 + }, + { + "epoch": 0.20036891860733225, + "grad_norm": 0.9063283607010307, + "learning_rate": 1.9863414793469144e-06, + "loss": 0.6302823424339294, + "step": 869 + }, + { + "epoch": 0.2005994927369149, + "grad_norm": 0.9919923586713045, + "learning_rate": 1.9862786088460778e-06, + "loss": 0.6265357732772827, + "step": 870 + }, + { + "epoch": 0.20083006686649757, + "grad_norm": 0.8288163853607481, + "learning_rate": 1.9862155949796497e-06, + "loss": 0.5346760749816895, + "step": 871 + }, + { + "epoch": 0.20106064099608023, + "grad_norm": 1.0613032711669241, + "learning_rate": 1.98615243775679e-06, + "loss": 0.5480276346206665, + "step": 872 + }, + { + "epoch": 0.2012912151256629, + "grad_norm": 1.0504212966242243, + "learning_rate": 1.986089137186679e-06, + "loss": 0.615007758140564, + "step": 873 + }, + { + "epoch": 0.20152178925524555, + "grad_norm": 1.0424303204478471, + "learning_rate": 1.986025693278518e-06, + "loss": 0.598671555519104, + "step": 874 + }, + { + "epoch": 0.20175236338482821, + "grad_norm": 1.1162570964298844, + "learning_rate": 1.98596210604153e-06, + "loss": 0.6029553413391113, + "step": 875 + }, + { + "epoch": 0.20198293751441088, + "grad_norm": 0.9723766835428509, + "learning_rate": 1.985898375484957e-06, + "loss": 0.6854428052902222, + "step": 876 + }, + { + "epoch": 0.20221351164399354, + "grad_norm": 0.7502030102171089, + "learning_rate": 1.9858345016180636e-06, + "loss": 0.5032496452331543, + "step": 877 + }, + { + "epoch": 0.2024440857735762, + "grad_norm": 0.910423493721141, + "learning_rate": 1.9857704844501343e-06, + "loss": 0.5521007776260376, + "step": 878 + }, + { + "epoch": 0.20267465990315886, + "grad_norm": 0.9861926154372014, + "learning_rate": 1.9857063239904742e-06, + "loss": 0.6473567485809326, + "step": 879 + }, + { + "epoch": 0.20290523403274152, + "grad_norm": 0.9973567674127126, + "learning_rate": 1.9856420202484103e-06, + "loss": 0.528810977935791, + "step": 880 + }, + { + "epoch": 0.20313580816232418, + "grad_norm": 1.0663389238750165, + "learning_rate": 1.9855775732332898e-06, + "loss": 0.681857705116272, + "step": 881 + }, + { + "epoch": 0.20336638229190684, + "grad_norm": 0.9199566615284357, + "learning_rate": 1.9855129829544805e-06, + "loss": 0.6510526537895203, + "step": 882 + }, + { + "epoch": 0.2035969564214895, + "grad_norm": 1.0847608945381821, + "learning_rate": 1.985448249421371e-06, + "loss": 0.5690885782241821, + "step": 883 + }, + { + "epoch": 0.20382753055107217, + "grad_norm": 0.9067033263808438, + "learning_rate": 1.985383372643371e-06, + "loss": 0.6451331973075867, + "step": 884 + }, + { + "epoch": 0.20405810468065483, + "grad_norm": 0.7596187493834748, + "learning_rate": 1.9853183526299117e-06, + "loss": 0.493961900472641, + "step": 885 + }, + { + "epoch": 0.2042886788102375, + "grad_norm": 1.031307930072274, + "learning_rate": 1.9852531893904434e-06, + "loss": 0.5390207767486572, + "step": 886 + }, + { + "epoch": 0.20451925293982015, + "grad_norm": 0.9671201783822709, + "learning_rate": 1.9851878829344395e-06, + "loss": 0.5976558923721313, + "step": 887 + }, + { + "epoch": 0.2047498270694028, + "grad_norm": 0.9832697265495778, + "learning_rate": 1.9851224332713917e-06, + "loss": 0.539776623249054, + "step": 888 + }, + { + "epoch": 0.20498040119898547, + "grad_norm": 1.1606849770347532, + "learning_rate": 1.9850568404108144e-06, + "loss": 0.6791383624076843, + "step": 889 + }, + { + "epoch": 0.20521097532856813, + "grad_norm": 1.1599404347752247, + "learning_rate": 1.984991104362242e-06, + "loss": 0.6195741891860962, + "step": 890 + }, + { + "epoch": 0.2054415494581508, + "grad_norm": 1.0295013801913249, + "learning_rate": 1.9849252251352303e-06, + "loss": 0.5792666673660278, + "step": 891 + }, + { + "epoch": 0.20567212358773346, + "grad_norm": 0.7871401361859056, + "learning_rate": 1.984859202739355e-06, + "loss": 0.5633316040039062, + "step": 892 + }, + { + "epoch": 0.20590269771731612, + "grad_norm": 0.9078754261167402, + "learning_rate": 1.9847930371842137e-06, + "loss": 0.6152814626693726, + "step": 893 + }, + { + "epoch": 0.20613327184689878, + "grad_norm": 1.0024181714804654, + "learning_rate": 1.9847267284794234e-06, + "loss": 0.5584526658058167, + "step": 894 + }, + { + "epoch": 0.20636384597648144, + "grad_norm": 0.9442571191896375, + "learning_rate": 1.9846602766346235e-06, + "loss": 0.5526787042617798, + "step": 895 + }, + { + "epoch": 0.2065944201060641, + "grad_norm": 1.114741515810547, + "learning_rate": 1.984593681659473e-06, + "loss": 0.6851564049720764, + "step": 896 + }, + { + "epoch": 0.20682499423564676, + "grad_norm": 0.9529867069899208, + "learning_rate": 1.9845269435636524e-06, + "loss": 0.6012386083602905, + "step": 897 + }, + { + "epoch": 0.20705556836522943, + "grad_norm": 0.9587418141612076, + "learning_rate": 1.9844600623568626e-06, + "loss": 0.5515716075897217, + "step": 898 + }, + { + "epoch": 0.2072861424948121, + "grad_norm": 1.0489716310270325, + "learning_rate": 1.9843930380488255e-06, + "loss": 0.6534323692321777, + "step": 899 + }, + { + "epoch": 0.20751671662439475, + "grad_norm": 0.9795829214559992, + "learning_rate": 1.9843258706492836e-06, + "loss": 0.726966381072998, + "step": 900 + }, + { + "epoch": 0.2077472907539774, + "grad_norm": 1.0154014646465384, + "learning_rate": 1.984258560168001e-06, + "loss": 0.6692399978637695, + "step": 901 + }, + { + "epoch": 0.20797786488356007, + "grad_norm": 0.8361205321250001, + "learning_rate": 1.9841911066147614e-06, + "loss": 0.5815941095352173, + "step": 902 + }, + { + "epoch": 0.20820843901314273, + "grad_norm": 0.8093430372283338, + "learning_rate": 1.98412350999937e-06, + "loss": 0.4850257933139801, + "step": 903 + }, + { + "epoch": 0.2084390131427254, + "grad_norm": 0.9321751727050823, + "learning_rate": 1.9840557703316524e-06, + "loss": 0.7309345006942749, + "step": 904 + }, + { + "epoch": 0.20866958727230805, + "grad_norm": 0.9487721653557605, + "learning_rate": 1.9839878876214556e-06, + "loss": 0.6246342658996582, + "step": 905 + }, + { + "epoch": 0.20890016140189072, + "grad_norm": 0.923401773715514, + "learning_rate": 1.983919861878647e-06, + "loss": 0.503870964050293, + "step": 906 + }, + { + "epoch": 0.20913073553147338, + "grad_norm": 0.9277576649885639, + "learning_rate": 1.9838516931131147e-06, + "loss": 0.5316766500473022, + "step": 907 + }, + { + "epoch": 0.20936130966105604, + "grad_norm": 0.9488124820166146, + "learning_rate": 1.983783381334768e-06, + "loss": 0.5707069039344788, + "step": 908 + }, + { + "epoch": 0.2095918837906387, + "grad_norm": 1.1481758251998657, + "learning_rate": 1.983714926553536e-06, + "loss": 0.5482156276702881, + "step": 909 + }, + { + "epoch": 0.20982245792022136, + "grad_norm": 0.8868748652499737, + "learning_rate": 1.98364632877937e-06, + "loss": 0.45747748017311096, + "step": 910 + }, + { + "epoch": 0.21005303204980402, + "grad_norm": 1.070435205795932, + "learning_rate": 1.9835775880222414e-06, + "loss": 0.5599262118339539, + "step": 911 + }, + { + "epoch": 0.21028360617938668, + "grad_norm": 0.8833178195747919, + "learning_rate": 1.9835087042921416e-06, + "loss": 0.5115377902984619, + "step": 912 + }, + { + "epoch": 0.21051418030896935, + "grad_norm": 1.0026720443060566, + "learning_rate": 1.9834396775990846e-06, + "loss": 0.6577836275100708, + "step": 913 + }, + { + "epoch": 0.210744754438552, + "grad_norm": 1.0996458728397183, + "learning_rate": 1.9833705079531033e-06, + "loss": 0.4979211091995239, + "step": 914 + }, + { + "epoch": 0.21097532856813467, + "grad_norm": 0.9038590231228891, + "learning_rate": 1.983301195364252e-06, + "loss": 0.5052670240402222, + "step": 915 + }, + { + "epoch": 0.21120590269771733, + "grad_norm": 0.9375736925419242, + "learning_rate": 1.9832317398426076e-06, + "loss": 0.5480808019638062, + "step": 916 + }, + { + "epoch": 0.2114364768273, + "grad_norm": 1.1234174619828885, + "learning_rate": 1.983162141398264e-06, + "loss": 0.5328841209411621, + "step": 917 + }, + { + "epoch": 0.21166705095688262, + "grad_norm": 1.0661654042909894, + "learning_rate": 1.98309240004134e-06, + "loss": 0.5572643280029297, + "step": 918 + }, + { + "epoch": 0.21189762508646529, + "grad_norm": 0.7370595537346776, + "learning_rate": 1.983022515781972e-06, + "loss": 0.5180699825286865, + "step": 919 + }, + { + "epoch": 0.21212819921604795, + "grad_norm": 0.9467461169752135, + "learning_rate": 1.9829524886303182e-06, + "loss": 0.5031566619873047, + "step": 920 + }, + { + "epoch": 0.2123587733456306, + "grad_norm": 1.0924744776428812, + "learning_rate": 1.9828823185965587e-06, + "loss": 0.6579925417900085, + "step": 921 + }, + { + "epoch": 0.21258934747521327, + "grad_norm": 1.0635734753276387, + "learning_rate": 1.982812005690893e-06, + "loss": 0.6107230186462402, + "step": 922 + }, + { + "epoch": 0.21281992160479593, + "grad_norm": 0.8209241554677639, + "learning_rate": 1.982741549923542e-06, + "loss": 0.5244725942611694, + "step": 923 + }, + { + "epoch": 0.2130504957343786, + "grad_norm": 0.8970249012108504, + "learning_rate": 1.9826709513047466e-06, + "loss": 0.5857048630714417, + "step": 924 + }, + { + "epoch": 0.21328106986396125, + "grad_norm": 1.1702999413512643, + "learning_rate": 1.9826002098447694e-06, + "loss": 0.6417914628982544, + "step": 925 + }, + { + "epoch": 0.21351164399354391, + "grad_norm": 1.025740647317304, + "learning_rate": 1.982529325553893e-06, + "loss": 0.6062248945236206, + "step": 926 + }, + { + "epoch": 0.21374221812312658, + "grad_norm": 0.8397411976395659, + "learning_rate": 1.982458298442422e-06, + "loss": 0.4870455265045166, + "step": 927 + }, + { + "epoch": 0.21397279225270924, + "grad_norm": 0.8931294029793581, + "learning_rate": 1.9823871285206802e-06, + "loss": 0.6552037000656128, + "step": 928 + }, + { + "epoch": 0.2142033663822919, + "grad_norm": 0.9703019761386622, + "learning_rate": 1.9823158157990133e-06, + "loss": 0.531679093837738, + "step": 929 + }, + { + "epoch": 0.21443394051187456, + "grad_norm": 1.2664544243150397, + "learning_rate": 1.982244360287787e-06, + "loss": 0.516847550868988, + "step": 930 + }, + { + "epoch": 0.21466451464145722, + "grad_norm": 0.810392988957607, + "learning_rate": 1.982172761997388e-06, + "loss": 0.47147709131240845, + "step": 931 + }, + { + "epoch": 0.21489508877103988, + "grad_norm": 0.8771741979565738, + "learning_rate": 1.982101020938224e-06, + "loss": 0.627938985824585, + "step": 932 + }, + { + "epoch": 0.21512566290062254, + "grad_norm": 1.0257080856710215, + "learning_rate": 1.9820291371207233e-06, + "loss": 0.639348030090332, + "step": 933 + }, + { + "epoch": 0.2153562370302052, + "grad_norm": 0.9702705556217962, + "learning_rate": 1.9819571105553354e-06, + "loss": 0.6480363607406616, + "step": 934 + }, + { + "epoch": 0.21558681115978787, + "grad_norm": 0.9260617050921398, + "learning_rate": 1.9818849412525293e-06, + "loss": 0.5776711702346802, + "step": 935 + }, + { + "epoch": 0.21581738528937053, + "grad_norm": 0.9042487017557694, + "learning_rate": 1.9818126292227957e-06, + "loss": 0.5891472101211548, + "step": 936 + }, + { + "epoch": 0.2160479594189532, + "grad_norm": 0.8905401941241984, + "learning_rate": 1.9817401744766465e-06, + "loss": 0.5977755784988403, + "step": 937 + }, + { + "epoch": 0.21627853354853585, + "grad_norm": 0.8626457448308078, + "learning_rate": 1.981667577024613e-06, + "loss": 0.5263733863830566, + "step": 938 + }, + { + "epoch": 0.2165091076781185, + "grad_norm": 1.0627291912482457, + "learning_rate": 1.9815948368772484e-06, + "loss": 0.5440605878829956, + "step": 939 + }, + { + "epoch": 0.21673968180770117, + "grad_norm": 0.9629159186929203, + "learning_rate": 1.9815219540451263e-06, + "loss": 0.5140440464019775, + "step": 940 + }, + { + "epoch": 0.21697025593728383, + "grad_norm": 1.0494365886675714, + "learning_rate": 1.9814489285388402e-06, + "loss": 0.6741353273391724, + "step": 941 + }, + { + "epoch": 0.2172008300668665, + "grad_norm": 1.1329427006993176, + "learning_rate": 1.981375760369006e-06, + "loss": 0.6243258714675903, + "step": 942 + }, + { + "epoch": 0.21743140419644916, + "grad_norm": 1.1054961559311265, + "learning_rate": 1.981302449546259e-06, + "loss": 0.6363699436187744, + "step": 943 + }, + { + "epoch": 0.21766197832603182, + "grad_norm": 0.9214231813217233, + "learning_rate": 1.981228996081256e-06, + "loss": 0.5849490165710449, + "step": 944 + }, + { + "epoch": 0.21789255245561448, + "grad_norm": 0.8824229032075002, + "learning_rate": 1.9811553999846736e-06, + "loss": 0.43679118156433105, + "step": 945 + }, + { + "epoch": 0.21812312658519714, + "grad_norm": 0.8524209104471582, + "learning_rate": 1.9810816612672104e-06, + "loss": 0.5575870275497437, + "step": 946 + }, + { + "epoch": 0.2183537007147798, + "grad_norm": 1.2313981009960802, + "learning_rate": 1.9810077799395846e-06, + "loss": 0.5288122296333313, + "step": 947 + }, + { + "epoch": 0.21858427484436246, + "grad_norm": 0.9413824588491826, + "learning_rate": 1.9809337560125357e-06, + "loss": 0.5618559718132019, + "step": 948 + }, + { + "epoch": 0.21881484897394513, + "grad_norm": 0.900237395227137, + "learning_rate": 1.980859589496824e-06, + "loss": 0.6346654891967773, + "step": 949 + }, + { + "epoch": 0.2190454231035278, + "grad_norm": 0.7859619018047411, + "learning_rate": 1.98078528040323e-06, + "loss": 0.5456810593605042, + "step": 950 + }, + { + "epoch": 0.21927599723311045, + "grad_norm": 1.096845447650345, + "learning_rate": 1.980710828742556e-06, + "loss": 0.6463650465011597, + "step": 951 + }, + { + "epoch": 0.2195065713626931, + "grad_norm": 0.8708852946707265, + "learning_rate": 1.980636234525624e-06, + "loss": 0.5013638734817505, + "step": 952 + }, + { + "epoch": 0.21973714549227577, + "grad_norm": 1.0813749561311563, + "learning_rate": 1.9805614977632763e-06, + "loss": 0.6522110104560852, + "step": 953 + }, + { + "epoch": 0.21996771962185843, + "grad_norm": 1.1282712003155921, + "learning_rate": 1.9804866184663775e-06, + "loss": 0.5864803791046143, + "step": 954 + }, + { + "epoch": 0.2201982937514411, + "grad_norm": 1.0131587624930238, + "learning_rate": 1.9804115966458116e-06, + "loss": 0.5261500477790833, + "step": 955 + }, + { + "epoch": 0.22042886788102375, + "grad_norm": 0.9727651996633074, + "learning_rate": 1.980336432312484e-06, + "loss": 0.585462212562561, + "step": 956 + }, + { + "epoch": 0.22065944201060642, + "grad_norm": 0.913173290527313, + "learning_rate": 1.9802611254773207e-06, + "loss": 0.5889539122581482, + "step": 957 + }, + { + "epoch": 0.22089001614018908, + "grad_norm": 0.9844451118331555, + "learning_rate": 1.980185676151268e-06, + "loss": 0.665162205696106, + "step": 958 + }, + { + "epoch": 0.22112059026977174, + "grad_norm": 0.9378356304402508, + "learning_rate": 1.9801100843452935e-06, + "loss": 0.5344980359077454, + "step": 959 + }, + { + "epoch": 0.2213511643993544, + "grad_norm": 0.9210142542004092, + "learning_rate": 1.980034350070385e-06, + "loss": 0.6301499009132385, + "step": 960 + }, + { + "epoch": 0.22158173852893706, + "grad_norm": 1.0404902143094334, + "learning_rate": 1.9799584733375512e-06, + "loss": 0.5114584565162659, + "step": 961 + }, + { + "epoch": 0.22181231265851972, + "grad_norm": 1.0168872016124533, + "learning_rate": 1.979882454157822e-06, + "loss": 0.5199861526489258, + "step": 962 + }, + { + "epoch": 0.22204288678810238, + "grad_norm": 1.1826380086118446, + "learning_rate": 1.9798062925422472e-06, + "loss": 0.5336212515830994, + "step": 963 + }, + { + "epoch": 0.22227346091768505, + "grad_norm": 1.0189277044162137, + "learning_rate": 1.9797299885018977e-06, + "loss": 0.535847544670105, + "step": 964 + }, + { + "epoch": 0.2225040350472677, + "grad_norm": 1.1943664941065335, + "learning_rate": 1.979653542047865e-06, + "loss": 0.6234130859375, + "step": 965 + }, + { + "epoch": 0.22273460917685037, + "grad_norm": 0.9414245062598806, + "learning_rate": 1.979576953191262e-06, + "loss": 0.5017205476760864, + "step": 966 + }, + { + "epoch": 0.22296518330643303, + "grad_norm": 0.8271602877368085, + "learning_rate": 1.9795002219432204e-06, + "loss": 0.4982973337173462, + "step": 967 + }, + { + "epoch": 0.2231957574360157, + "grad_norm": 1.0821521338057418, + "learning_rate": 1.979423348314895e-06, + "loss": 0.47946417331695557, + "step": 968 + }, + { + "epoch": 0.22342633156559835, + "grad_norm": 0.9333636639659694, + "learning_rate": 1.97934633231746e-06, + "loss": 0.5431856513023376, + "step": 969 + }, + { + "epoch": 0.223656905695181, + "grad_norm": 1.010615347342822, + "learning_rate": 1.9792691739621097e-06, + "loss": 0.5355685949325562, + "step": 970 + }, + { + "epoch": 0.22388747982476367, + "grad_norm": 0.9115391310212676, + "learning_rate": 1.979191873260061e-06, + "loss": 0.6103906631469727, + "step": 971 + }, + { + "epoch": 0.22411805395434634, + "grad_norm": 0.9295016548118124, + "learning_rate": 1.9791144302225493e-06, + "loss": 0.538421094417572, + "step": 972 + }, + { + "epoch": 0.224348628083929, + "grad_norm": 1.2200934433979187, + "learning_rate": 1.9790368448608322e-06, + "loss": 0.6068445444107056, + "step": 973 + }, + { + "epoch": 0.22457920221351163, + "grad_norm": 0.8606144159525476, + "learning_rate": 1.9789591171861874e-06, + "loss": 0.463737815618515, + "step": 974 + }, + { + "epoch": 0.2248097763430943, + "grad_norm": 1.0217946560153375, + "learning_rate": 1.9788812472099135e-06, + "loss": 0.6588588953018188, + "step": 975 + }, + { + "epoch": 0.22504035047267695, + "grad_norm": 1.0288343828209117, + "learning_rate": 1.9788032349433297e-06, + "loss": 0.678712010383606, + "step": 976 + }, + { + "epoch": 0.22527092460225961, + "grad_norm": 1.1695805252394589, + "learning_rate": 1.9787250803977757e-06, + "loss": 0.6397948265075684, + "step": 977 + }, + { + "epoch": 0.22550149873184228, + "grad_norm": 1.029054993282064, + "learning_rate": 1.978646783584612e-06, + "loss": 0.5422782897949219, + "step": 978 + }, + { + "epoch": 0.22573207286142494, + "grad_norm": 0.9969509169785887, + "learning_rate": 1.9785683445152204e-06, + "loss": 0.5314444303512573, + "step": 979 + }, + { + "epoch": 0.2259626469910076, + "grad_norm": 1.0816366548169771, + "learning_rate": 1.9784897632010026e-06, + "loss": 0.6260710954666138, + "step": 980 + }, + { + "epoch": 0.22619322112059026, + "grad_norm": 1.6140506138107567, + "learning_rate": 1.9784110396533804e-06, + "loss": 0.6765384078025818, + "step": 981 + }, + { + "epoch": 0.22642379525017292, + "grad_norm": 0.9741870993027198, + "learning_rate": 1.9783321738837983e-06, + "loss": 0.6716702580451965, + "step": 982 + }, + { + "epoch": 0.22665436937975558, + "grad_norm": 0.9800524570597025, + "learning_rate": 1.978253165903719e-06, + "loss": 0.5537375211715698, + "step": 983 + }, + { + "epoch": 0.22688494350933824, + "grad_norm": 1.2650751897909203, + "learning_rate": 1.9781740157246285e-06, + "loss": 0.525878369808197, + "step": 984 + }, + { + "epoch": 0.2271155176389209, + "grad_norm": 1.1285639712327624, + "learning_rate": 1.978094723358031e-06, + "loss": 0.6349027156829834, + "step": 985 + }, + { + "epoch": 0.22734609176850357, + "grad_norm": 0.9922350297605812, + "learning_rate": 1.9780152888154525e-06, + "loss": 0.5777440071105957, + "step": 986 + }, + { + "epoch": 0.22757666589808623, + "grad_norm": 0.8792919247604332, + "learning_rate": 1.9779357121084402e-06, + "loss": 0.6181483268737793, + "step": 987 + }, + { + "epoch": 0.2278072400276689, + "grad_norm": 1.113677830579263, + "learning_rate": 1.9778559932485606e-06, + "loss": 0.6364198923110962, + "step": 988 + }, + { + "epoch": 0.22803781415725155, + "grad_norm": 1.0528039871957056, + "learning_rate": 1.9777761322474024e-06, + "loss": 0.623460054397583, + "step": 989 + }, + { + "epoch": 0.2282683882868342, + "grad_norm": 1.0042426162492055, + "learning_rate": 1.977696129116574e-06, + "loss": 0.504749059677124, + "step": 990 + }, + { + "epoch": 0.22849896241641687, + "grad_norm": 0.9462650071116105, + "learning_rate": 1.9776159838677048e-06, + "loss": 0.5228890180587769, + "step": 991 + }, + { + "epoch": 0.22872953654599953, + "grad_norm": 0.983638268661895, + "learning_rate": 1.977535696512444e-06, + "loss": 0.5765929222106934, + "step": 992 + }, + { + "epoch": 0.2289601106755822, + "grad_norm": 1.0000819039461677, + "learning_rate": 1.977455267062463e-06, + "loss": 0.5165348052978516, + "step": 993 + }, + { + "epoch": 0.22919068480516486, + "grad_norm": 1.0528189784184039, + "learning_rate": 1.9773746955294525e-06, + "loss": 0.6056735515594482, + "step": 994 + }, + { + "epoch": 0.22942125893474752, + "grad_norm": 1.0625954437167437, + "learning_rate": 1.9772939819251245e-06, + "loss": 0.5430403351783752, + "step": 995 + }, + { + "epoch": 0.22965183306433018, + "grad_norm": 1.2611536344776966, + "learning_rate": 1.977213126261212e-06, + "loss": 0.5710945129394531, + "step": 996 + }, + { + "epoch": 0.22988240719391284, + "grad_norm": 0.9590894945496666, + "learning_rate": 1.977132128549468e-06, + "loss": 0.5189366936683655, + "step": 997 + }, + { + "epoch": 0.2301129813234955, + "grad_norm": 1.229825794085491, + "learning_rate": 1.977050988801666e-06, + "loss": 0.6578037738800049, + "step": 998 + }, + { + "epoch": 0.23034355545307816, + "grad_norm": 1.0761110723698188, + "learning_rate": 1.9769697070296006e-06, + "loss": 0.5787034034729004, + "step": 999 + }, + { + "epoch": 0.23057412958266083, + "grad_norm": 1.0414208441736372, + "learning_rate": 1.976888283245087e-06, + "loss": 0.5169408321380615, + "step": 1000 + }, + { + "epoch": 0.2308047037122435, + "grad_norm": 1.1228864795023747, + "learning_rate": 1.976806717459961e-06, + "loss": 0.6326704025268555, + "step": 1001 + }, + { + "epoch": 0.23103527784182615, + "grad_norm": 1.2998118201322668, + "learning_rate": 1.9767250096860785e-06, + "loss": 0.5188414454460144, + "step": 1002 + }, + { + "epoch": 0.2312658519714088, + "grad_norm": 0.9684429634366722, + "learning_rate": 1.9766431599353173e-06, + "loss": 0.5788798928260803, + "step": 1003 + }, + { + "epoch": 0.23149642610099147, + "grad_norm": 1.011079377555661, + "learning_rate": 1.976561168219575e-06, + "loss": 0.5513355731964111, + "step": 1004 + }, + { + "epoch": 0.23172700023057413, + "grad_norm": 0.9242770139183195, + "learning_rate": 1.97647903455077e-06, + "loss": 0.5810542106628418, + "step": 1005 + }, + { + "epoch": 0.2319575743601568, + "grad_norm": 0.9036081245550505, + "learning_rate": 1.9763967589408407e-06, + "loss": 0.6541746854782104, + "step": 1006 + }, + { + "epoch": 0.23218814848973945, + "grad_norm": 0.972339176589073, + "learning_rate": 1.976314341401747e-06, + "loss": 0.48837774991989136, + "step": 1007 + }, + { + "epoch": 0.23241872261932212, + "grad_norm": 1.0622732331560878, + "learning_rate": 1.976231781945469e-06, + "loss": 0.514664888381958, + "step": 1008 + }, + { + "epoch": 0.23264929674890478, + "grad_norm": 1.1476741578183667, + "learning_rate": 1.976149080584008e-06, + "loss": 0.48295027017593384, + "step": 1009 + }, + { + "epoch": 0.23287987087848744, + "grad_norm": 0.9532553897028984, + "learning_rate": 1.9760662373293847e-06, + "loss": 0.5975791811943054, + "step": 1010 + }, + { + "epoch": 0.2331104450080701, + "grad_norm": 1.0101722687438028, + "learning_rate": 1.9759832521936424e-06, + "loss": 0.4810718297958374, + "step": 1011 + }, + { + "epoch": 0.23334101913765276, + "grad_norm": 0.8377461102160731, + "learning_rate": 1.9759001251888425e-06, + "loss": 0.5984642505645752, + "step": 1012 + }, + { + "epoch": 0.23357159326723542, + "grad_norm": 1.1428510363276687, + "learning_rate": 1.975816856327069e-06, + "loss": 0.600128710269928, + "step": 1013 + }, + { + "epoch": 0.23380216739681808, + "grad_norm": 0.976646115631477, + "learning_rate": 1.9757334456204263e-06, + "loss": 0.5036175847053528, + "step": 1014 + }, + { + "epoch": 0.23403274152640074, + "grad_norm": 0.781296299293608, + "learning_rate": 1.975649893081038e-06, + "loss": 0.49270063638687134, + "step": 1015 + }, + { + "epoch": 0.2342633156559834, + "grad_norm": 1.0782515218974933, + "learning_rate": 1.97556619872105e-06, + "loss": 0.5337218642234802, + "step": 1016 + }, + { + "epoch": 0.23449388978556607, + "grad_norm": 1.279305397178248, + "learning_rate": 1.9754823625526277e-06, + "loss": 0.5263136625289917, + "step": 1017 + }, + { + "epoch": 0.23472446391514873, + "grad_norm": 1.1321753640293293, + "learning_rate": 1.975398384587958e-06, + "loss": 0.6271284818649292, + "step": 1018 + }, + { + "epoch": 0.2349550380447314, + "grad_norm": 0.9524936816808555, + "learning_rate": 1.975314264839248e-06, + "loss": 0.7009197473526001, + "step": 1019 + }, + { + "epoch": 0.23518561217431405, + "grad_norm": 1.0291281498015452, + "learning_rate": 1.9752300033187248e-06, + "loss": 0.5781605839729309, + "step": 1020 + }, + { + "epoch": 0.2354161863038967, + "grad_norm": 1.0439195983844425, + "learning_rate": 1.9751456000386367e-06, + "loss": 0.549934446811676, + "step": 1021 + }, + { + "epoch": 0.23564676043347937, + "grad_norm": 1.1313488046553661, + "learning_rate": 1.9750610550112535e-06, + "loss": 0.5856816172599792, + "step": 1022 + }, + { + "epoch": 0.23587733456306204, + "grad_norm": 1.1355877980298148, + "learning_rate": 1.9749763682488638e-06, + "loss": 0.6225322484970093, + "step": 1023 + }, + { + "epoch": 0.2361079086926447, + "grad_norm": 0.8829653489765357, + "learning_rate": 1.9748915397637775e-06, + "loss": 0.5533155202865601, + "step": 1024 + }, + { + "epoch": 0.23633848282222736, + "grad_norm": 0.9964032830251005, + "learning_rate": 1.974806569568326e-06, + "loss": 0.4960908889770508, + "step": 1025 + }, + { + "epoch": 0.23656905695181002, + "grad_norm": 1.0642112431572752, + "learning_rate": 1.97472145767486e-06, + "loss": 0.5960450768470764, + "step": 1026 + }, + { + "epoch": 0.23679963108139268, + "grad_norm": 1.0609331852795814, + "learning_rate": 1.9746362040957517e-06, + "loss": 0.5653714537620544, + "step": 1027 + }, + { + "epoch": 0.23703020521097534, + "grad_norm": 0.9636699324332547, + "learning_rate": 1.9745508088433936e-06, + "loss": 0.6400578022003174, + "step": 1028 + }, + { + "epoch": 0.23726077934055798, + "grad_norm": 1.0105210896498236, + "learning_rate": 1.9744652719301987e-06, + "loss": 0.5459057092666626, + "step": 1029 + }, + { + "epoch": 0.23749135347014064, + "grad_norm": 1.0859828591491134, + "learning_rate": 1.9743795933686005e-06, + "loss": 0.46735280752182007, + "step": 1030 + }, + { + "epoch": 0.2377219275997233, + "grad_norm": 0.9440768334185448, + "learning_rate": 1.9742937731710533e-06, + "loss": 0.526339590549469, + "step": 1031 + }, + { + "epoch": 0.23795250172930596, + "grad_norm": 1.013077702945683, + "learning_rate": 1.9742078113500323e-06, + "loss": 0.5976641178131104, + "step": 1032 + }, + { + "epoch": 0.23818307585888862, + "grad_norm": 0.9655038700233691, + "learning_rate": 1.9741217079180325e-06, + "loss": 0.5331728458404541, + "step": 1033 + }, + { + "epoch": 0.23841364998847128, + "grad_norm": 0.9368079955738086, + "learning_rate": 1.9740354628875696e-06, + "loss": 0.5743261575698853, + "step": 1034 + }, + { + "epoch": 0.23864422411805394, + "grad_norm": 0.9982653104570526, + "learning_rate": 1.973949076271181e-06, + "loss": 0.54700767993927, + "step": 1035 + }, + { + "epoch": 0.2388747982476366, + "grad_norm": 0.8919318869448586, + "learning_rate": 1.9738625480814235e-06, + "loss": 0.5483411550521851, + "step": 1036 + }, + { + "epoch": 0.23910537237721927, + "grad_norm": 0.9314153856468148, + "learning_rate": 1.973775878330875e-06, + "loss": 0.5677193403244019, + "step": 1037 + }, + { + "epoch": 0.23933594650680193, + "grad_norm": 0.9867371078797748, + "learning_rate": 1.973689067032133e-06, + "loss": 0.5092767477035522, + "step": 1038 + }, + { + "epoch": 0.2395665206363846, + "grad_norm": 0.9526587430164372, + "learning_rate": 1.973602114197818e-06, + "loss": 0.5618614554405212, + "step": 1039 + }, + { + "epoch": 0.23979709476596725, + "grad_norm": 1.1304270434054837, + "learning_rate": 1.9735150198405677e-06, + "loss": 0.5601966977119446, + "step": 1040 + }, + { + "epoch": 0.2400276688955499, + "grad_norm": 1.2376653334727166, + "learning_rate": 1.973427783973043e-06, + "loss": 0.5945397019386292, + "step": 1041 + }, + { + "epoch": 0.24025824302513257, + "grad_norm": 1.084452486357135, + "learning_rate": 1.9733404066079253e-06, + "loss": 0.42448002099990845, + "step": 1042 + }, + { + "epoch": 0.24048881715471523, + "grad_norm": 1.0671556472806993, + "learning_rate": 1.9732528877579146e-06, + "loss": 0.5237313508987427, + "step": 1043 + }, + { + "epoch": 0.2407193912842979, + "grad_norm": 1.085642930506958, + "learning_rate": 1.973165227435733e-06, + "loss": 0.6006743907928467, + "step": 1044 + }, + { + "epoch": 0.24094996541388056, + "grad_norm": 0.9267133414742948, + "learning_rate": 1.973077425654123e-06, + "loss": 0.547584056854248, + "step": 1045 + }, + { + "epoch": 0.24118053954346322, + "grad_norm": 1.0824218376223906, + "learning_rate": 1.972989482425847e-06, + "loss": 0.5472346544265747, + "step": 1046 + }, + { + "epoch": 0.24141111367304588, + "grad_norm": 1.1106806941355478, + "learning_rate": 1.972901397763689e-06, + "loss": 0.5962260365486145, + "step": 1047 + }, + { + "epoch": 0.24164168780262854, + "grad_norm": 0.9770536598072448, + "learning_rate": 1.9728131716804525e-06, + "loss": 0.561386227607727, + "step": 1048 + }, + { + "epoch": 0.2418722619322112, + "grad_norm": 1.2169602038706573, + "learning_rate": 1.9727248041889624e-06, + "loss": 0.46618524193763733, + "step": 1049 + }, + { + "epoch": 0.24210283606179386, + "grad_norm": 0.9641011081185654, + "learning_rate": 1.9726362953020643e-06, + "loss": 0.4684019088745117, + "step": 1050 + }, + { + "epoch": 0.24233341019137652, + "grad_norm": 1.1116892767931694, + "learning_rate": 1.9725476450326227e-06, + "loss": 0.5670303106307983, + "step": 1051 + }, + { + "epoch": 0.2425639843209592, + "grad_norm": 1.0413794589983083, + "learning_rate": 1.9724588533935246e-06, + "loss": 0.5451534986495972, + "step": 1052 + }, + { + "epoch": 0.24279455845054185, + "grad_norm": 1.3028651104025368, + "learning_rate": 1.9723699203976766e-06, + "loss": 0.578605592250824, + "step": 1053 + }, + { + "epoch": 0.2430251325801245, + "grad_norm": 1.072521418141734, + "learning_rate": 1.972280846058006e-06, + "loss": 0.5844857692718506, + "step": 1054 + }, + { + "epoch": 0.24325570670970717, + "grad_norm": 0.8882845471690917, + "learning_rate": 1.9721916303874603e-06, + "loss": 0.5152320861816406, + "step": 1055 + }, + { + "epoch": 0.24348628083928983, + "grad_norm": 0.994596822062513, + "learning_rate": 1.9721022733990087e-06, + "loss": 0.5108952522277832, + "step": 1056 + }, + { + "epoch": 0.2437168549688725, + "grad_norm": 1.2179028657479944, + "learning_rate": 1.97201277510564e-06, + "loss": 0.6345964670181274, + "step": 1057 + }, + { + "epoch": 0.24394742909845515, + "grad_norm": 1.0322609868377797, + "learning_rate": 1.9719231355203627e-06, + "loss": 0.6699639558792114, + "step": 1058 + }, + { + "epoch": 0.24417800322803782, + "grad_norm": 1.0786593444912098, + "learning_rate": 1.971833354656208e-06, + "loss": 0.5426750779151917, + "step": 1059 + }, + { + "epoch": 0.24440857735762048, + "grad_norm": 0.9469348439661489, + "learning_rate": 1.9717434325262253e-06, + "loss": 0.45797908306121826, + "step": 1060 + }, + { + "epoch": 0.24463915148720314, + "grad_norm": 0.9212142090514559, + "learning_rate": 1.9716533691434872e-06, + "loss": 0.46754708886146545, + "step": 1061 + }, + { + "epoch": 0.2448697256167858, + "grad_norm": 1.0419375830533737, + "learning_rate": 1.9715631645210838e-06, + "loss": 0.6593209505081177, + "step": 1062 + }, + { + "epoch": 0.24510029974636846, + "grad_norm": 0.8714440933836988, + "learning_rate": 1.9714728186721287e-06, + "loss": 0.5634866952896118, + "step": 1063 + }, + { + "epoch": 0.24533087387595112, + "grad_norm": 1.3414429697713321, + "learning_rate": 1.971382331609753e-06, + "loss": 0.5066277980804443, + "step": 1064 + }, + { + "epoch": 0.24556144800553378, + "grad_norm": 0.9735373407478976, + "learning_rate": 1.9712917033471113e-06, + "loss": 0.5721756219863892, + "step": 1065 + }, + { + "epoch": 0.24579202213511644, + "grad_norm": 0.9116883309182201, + "learning_rate": 1.9712009338973765e-06, + "loss": 0.5188664197921753, + "step": 1066 + }, + { + "epoch": 0.2460225962646991, + "grad_norm": 1.1314636983505006, + "learning_rate": 1.9711100232737434e-06, + "loss": 0.4879762828350067, + "step": 1067 + }, + { + "epoch": 0.24625317039428177, + "grad_norm": 1.2412816829375237, + "learning_rate": 1.971018971489426e-06, + "loss": 0.5169111490249634, + "step": 1068 + }, + { + "epoch": 0.24648374452386443, + "grad_norm": 1.2239551353327036, + "learning_rate": 1.9709277785576605e-06, + "loss": 0.7341418862342834, + "step": 1069 + }, + { + "epoch": 0.2467143186534471, + "grad_norm": 0.9353793197150668, + "learning_rate": 1.970836444491702e-06, + "loss": 0.48676228523254395, + "step": 1070 + }, + { + "epoch": 0.24694489278302975, + "grad_norm": 1.1049152340951753, + "learning_rate": 1.9707449693048277e-06, + "loss": 0.5594040751457214, + "step": 1071 + }, + { + "epoch": 0.2471754669126124, + "grad_norm": 1.1275772388460679, + "learning_rate": 1.970653353010334e-06, + "loss": 0.575579047203064, + "step": 1072 + }, + { + "epoch": 0.24740604104219507, + "grad_norm": 0.9990792550863451, + "learning_rate": 1.9705615956215375e-06, + "loss": 0.5212938189506531, + "step": 1073 + }, + { + "epoch": 0.24763661517177774, + "grad_norm": 1.2242480620016798, + "learning_rate": 1.970469697151777e-06, + "loss": 0.49838072061538696, + "step": 1074 + }, + { + "epoch": 0.2478671893013604, + "grad_norm": 1.0069439526224342, + "learning_rate": 1.9703776576144106e-06, + "loss": 0.505547285079956, + "step": 1075 + }, + { + "epoch": 0.24809776343094306, + "grad_norm": 0.9320138812686547, + "learning_rate": 1.970285477022817e-06, + "loss": 0.5236082077026367, + "step": 1076 + }, + { + "epoch": 0.24832833756052572, + "grad_norm": 1.1096851604663263, + "learning_rate": 1.9701931553903963e-06, + "loss": 0.5417677760124207, + "step": 1077 + }, + { + "epoch": 0.24855891169010838, + "grad_norm": 1.4437484296393372, + "learning_rate": 1.9701006927305676e-06, + "loss": 0.624547004699707, + "step": 1078 + }, + { + "epoch": 0.24878948581969104, + "grad_norm": 1.1814609406249081, + "learning_rate": 1.9700080890567713e-06, + "loss": 0.7127759456634521, + "step": 1079 + }, + { + "epoch": 0.2490200599492737, + "grad_norm": 1.1432146079503174, + "learning_rate": 1.9699153443824686e-06, + "loss": 0.44590264558792114, + "step": 1080 + }, + { + "epoch": 0.24925063407885636, + "grad_norm": 0.9565451374538135, + "learning_rate": 1.9698224587211407e-06, + "loss": 0.6311746835708618, + "step": 1081 + }, + { + "epoch": 0.24948120820843903, + "grad_norm": 0.870591902169041, + "learning_rate": 1.9697294320862898e-06, + "loss": 0.4837970733642578, + "step": 1082 + }, + { + "epoch": 0.2497117823380217, + "grad_norm": 0.8760016768814028, + "learning_rate": 1.969636264491438e-06, + "loss": 0.5749634504318237, + "step": 1083 + }, + { + "epoch": 0.24994235646760435, + "grad_norm": 0.9733867387062589, + "learning_rate": 1.9695429559501283e-06, + "loss": 0.5002774000167847, + "step": 1084 + }, + { + "epoch": 0.250172930597187, + "grad_norm": 0.9904270135981337, + "learning_rate": 1.9694495064759236e-06, + "loss": 0.5407592058181763, + "step": 1085 + }, + { + "epoch": 0.25040350472676964, + "grad_norm": 0.9112103184885231, + "learning_rate": 1.969355916082408e-06, + "loss": 0.5557315349578857, + "step": 1086 + }, + { + "epoch": 0.2506340788563523, + "grad_norm": 1.073902907739282, + "learning_rate": 1.9692621847831865e-06, + "loss": 0.4710160493850708, + "step": 1087 + }, + { + "epoch": 0.25086465298593497, + "grad_norm": 0.946965380647112, + "learning_rate": 1.969168312591883e-06, + "loss": 0.5935187339782715, + "step": 1088 + }, + { + "epoch": 0.2510952271155176, + "grad_norm": 0.9849357353961209, + "learning_rate": 1.969074299522143e-06, + "loss": 0.5358916521072388, + "step": 1089 + }, + { + "epoch": 0.2513258012451003, + "grad_norm": 0.9196749680008564, + "learning_rate": 1.968980145587632e-06, + "loss": 0.40736621618270874, + "step": 1090 + }, + { + "epoch": 0.25155637537468295, + "grad_norm": 0.8048789415521217, + "learning_rate": 1.968885850802037e-06, + "loss": 0.4986698627471924, + "step": 1091 + }, + { + "epoch": 0.2517869495042656, + "grad_norm": 0.9340127152994311, + "learning_rate": 1.968791415179064e-06, + "loss": 0.5547258853912354, + "step": 1092 + }, + { + "epoch": 0.2520175236338483, + "grad_norm": 1.0477998347740531, + "learning_rate": 1.96869683873244e-06, + "loss": 0.5187167525291443, + "step": 1093 + }, + { + "epoch": 0.25224809776343093, + "grad_norm": 0.9456931065936238, + "learning_rate": 1.9686021214759136e-06, + "loss": 0.560575008392334, + "step": 1094 + }, + { + "epoch": 0.2524786718930136, + "grad_norm": 1.0595767044992972, + "learning_rate": 1.968507263423252e-06, + "loss": 0.6441233158111572, + "step": 1095 + }, + { + "epoch": 0.25270924602259626, + "grad_norm": 1.1650850474563572, + "learning_rate": 1.9684122645882446e-06, + "loss": 0.6693669557571411, + "step": 1096 + }, + { + "epoch": 0.2529398201521789, + "grad_norm": 0.9107773905688578, + "learning_rate": 1.9683171249846992e-06, + "loss": 0.4713742434978485, + "step": 1097 + }, + { + "epoch": 0.2531703942817616, + "grad_norm": 1.0855755163203802, + "learning_rate": 1.9682218446264466e-06, + "loss": 0.5393046140670776, + "step": 1098 + }, + { + "epoch": 0.25340096841134424, + "grad_norm": 0.8304628447343301, + "learning_rate": 1.968126423527336e-06, + "loss": 0.44416874647140503, + "step": 1099 + }, + { + "epoch": 0.2536315425409269, + "grad_norm": 0.8560775526129268, + "learning_rate": 1.9680308617012383e-06, + "loss": 0.486186683177948, + "step": 1100 + }, + { + "epoch": 0.25386211667050956, + "grad_norm": 0.8812542184427957, + "learning_rate": 1.9679351591620446e-06, + "loss": 0.5523893237113953, + "step": 1101 + }, + { + "epoch": 0.2540926908000922, + "grad_norm": 0.9964866126205207, + "learning_rate": 1.967839315923665e-06, + "loss": 0.49889492988586426, + "step": 1102 + }, + { + "epoch": 0.2543232649296749, + "grad_norm": 1.1438608764608638, + "learning_rate": 1.9677433320000325e-06, + "loss": 0.6084630489349365, + "step": 1103 + }, + { + "epoch": 0.25455383905925755, + "grad_norm": 0.9684259335546852, + "learning_rate": 1.967647207405099e-06, + "loss": 0.5458555221557617, + "step": 1104 + }, + { + "epoch": 0.2547844131888402, + "grad_norm": 1.3299718075912128, + "learning_rate": 1.9675509421528367e-06, + "loss": 0.5453877449035645, + "step": 1105 + }, + { + "epoch": 0.25501498731842287, + "grad_norm": 1.0404901274691463, + "learning_rate": 1.9674545362572393e-06, + "loss": 0.5226954221725464, + "step": 1106 + }, + { + "epoch": 0.25524556144800553, + "grad_norm": 1.0740163604419912, + "learning_rate": 1.96735798973232e-06, + "loss": 0.5736720561981201, + "step": 1107 + }, + { + "epoch": 0.2554761355775882, + "grad_norm": 0.9184855028566775, + "learning_rate": 1.9672613025921135e-06, + "loss": 0.5474177598953247, + "step": 1108 + }, + { + "epoch": 0.25570670970717085, + "grad_norm": 1.2485055919980548, + "learning_rate": 1.967164474850673e-06, + "loss": 0.5146498084068298, + "step": 1109 + }, + { + "epoch": 0.2559372838367535, + "grad_norm": 1.1137167951471605, + "learning_rate": 1.967067506522075e-06, + "loss": 0.6319057941436768, + "step": 1110 + }, + { + "epoch": 0.2561678579663362, + "grad_norm": 0.9087550652455604, + "learning_rate": 1.9669703976204136e-06, + "loss": 0.44495588541030884, + "step": 1111 + }, + { + "epoch": 0.25639843209591884, + "grad_norm": 0.9108509097161608, + "learning_rate": 1.9668731481598052e-06, + "loss": 0.5331558585166931, + "step": 1112 + }, + { + "epoch": 0.2566290062255015, + "grad_norm": 0.9795245602848469, + "learning_rate": 1.9667757581543856e-06, + "loss": 0.5409468412399292, + "step": 1113 + }, + { + "epoch": 0.25685958035508416, + "grad_norm": 1.054007279778104, + "learning_rate": 1.9666782276183112e-06, + "loss": 0.5743308663368225, + "step": 1114 + }, + { + "epoch": 0.2570901544846668, + "grad_norm": 1.004577427685411, + "learning_rate": 1.96658055656576e-06, + "loss": 0.5612793564796448, + "step": 1115 + }, + { + "epoch": 0.2573207286142495, + "grad_norm": 0.9750416454144903, + "learning_rate": 1.9664827450109285e-06, + "loss": 0.554356575012207, + "step": 1116 + }, + { + "epoch": 0.25755130274383214, + "grad_norm": 0.9682247695156199, + "learning_rate": 1.9663847929680352e-06, + "loss": 0.5999840497970581, + "step": 1117 + }, + { + "epoch": 0.2577818768734148, + "grad_norm": 1.0370889815397122, + "learning_rate": 1.9662867004513184e-06, + "loss": 0.5152497291564941, + "step": 1118 + }, + { + "epoch": 0.25801245100299747, + "grad_norm": 1.098663296506931, + "learning_rate": 1.966188467475036e-06, + "loss": 0.6333990097045898, + "step": 1119 + }, + { + "epoch": 0.25824302513258013, + "grad_norm": 0.9734180757824468, + "learning_rate": 1.9660900940534685e-06, + "loss": 0.5826340913772583, + "step": 1120 + }, + { + "epoch": 0.2584735992621628, + "grad_norm": 1.0258650855361047, + "learning_rate": 1.965991580200915e-06, + "loss": 0.5968586206436157, + "step": 1121 + }, + { + "epoch": 0.25870417339174545, + "grad_norm": 1.1400845768454182, + "learning_rate": 1.9658929259316945e-06, + "loss": 0.6164212226867676, + "step": 1122 + }, + { + "epoch": 0.2589347475213281, + "grad_norm": 0.9979393096335119, + "learning_rate": 1.9657941312601487e-06, + "loss": 0.6115970611572266, + "step": 1123 + }, + { + "epoch": 0.2591653216509108, + "grad_norm": 1.0595728674513747, + "learning_rate": 1.9656951962006376e-06, + "loss": 0.5490012168884277, + "step": 1124 + }, + { + "epoch": 0.25939589578049344, + "grad_norm": 0.9502072685023252, + "learning_rate": 1.9655961207675425e-06, + "loss": 0.6350439786911011, + "step": 1125 + }, + { + "epoch": 0.2596264699100761, + "grad_norm": 1.0657411847577343, + "learning_rate": 1.965496904975266e-06, + "loss": 0.5667803287506104, + "step": 1126 + }, + { + "epoch": 0.25985704403965876, + "grad_norm": 1.1821679518558437, + "learning_rate": 1.9653975488382287e-06, + "loss": 0.6443949937820435, + "step": 1127 + }, + { + "epoch": 0.2600876181692414, + "grad_norm": 0.9716559479774245, + "learning_rate": 1.965298052370874e-06, + "loss": 0.6085849404335022, + "step": 1128 + }, + { + "epoch": 0.2603181922988241, + "grad_norm": 1.0823001356947075, + "learning_rate": 1.9651984155876644e-06, + "loss": 0.6633332967758179, + "step": 1129 + }, + { + "epoch": 0.26054876642840674, + "grad_norm": 1.2848504053653516, + "learning_rate": 1.965098638503083e-06, + "loss": 0.5997219085693359, + "step": 1130 + }, + { + "epoch": 0.2607793405579894, + "grad_norm": 1.0454096533900064, + "learning_rate": 1.9649987211316333e-06, + "loss": 0.5425878167152405, + "step": 1131 + }, + { + "epoch": 0.26100991468757206, + "grad_norm": 1.1511928917305188, + "learning_rate": 1.9648986634878397e-06, + "loss": 0.5894105434417725, + "step": 1132 + }, + { + "epoch": 0.2612404888171547, + "grad_norm": 1.0098199878370706, + "learning_rate": 1.9647984655862464e-06, + "loss": 0.5967395901679993, + "step": 1133 + }, + { + "epoch": 0.2614710629467374, + "grad_norm": 1.026032503619318, + "learning_rate": 1.964698127441418e-06, + "loss": 0.5129253268241882, + "step": 1134 + }, + { + "epoch": 0.26170163707632005, + "grad_norm": 0.8680242413092717, + "learning_rate": 1.96459764906794e-06, + "loss": 0.4503140449523926, + "step": 1135 + }, + { + "epoch": 0.2619322112059027, + "grad_norm": 1.3487730716398616, + "learning_rate": 1.964497030480418e-06, + "loss": 0.5533326864242554, + "step": 1136 + }, + { + "epoch": 0.26216278533548537, + "grad_norm": 1.020191268815397, + "learning_rate": 1.9643962716934776e-06, + "loss": 0.695278525352478, + "step": 1137 + }, + { + "epoch": 0.26239335946506803, + "grad_norm": 1.0637915159693183, + "learning_rate": 1.9642953727217654e-06, + "loss": 0.5198212265968323, + "step": 1138 + }, + { + "epoch": 0.2626239335946507, + "grad_norm": 0.8691408428805534, + "learning_rate": 1.9641943335799476e-06, + "loss": 0.4348503351211548, + "step": 1139 + }, + { + "epoch": 0.26285450772423335, + "grad_norm": 1.075781292907759, + "learning_rate": 1.9640931542827116e-06, + "loss": 0.5241343975067139, + "step": 1140 + }, + { + "epoch": 0.263085081853816, + "grad_norm": 1.1170175690927264, + "learning_rate": 1.9639918348447654e-06, + "loss": 0.6621984839439392, + "step": 1141 + }, + { + "epoch": 0.2633156559833987, + "grad_norm": 0.9797970310895017, + "learning_rate": 1.9638903752808358e-06, + "loss": 0.6091395020484924, + "step": 1142 + }, + { + "epoch": 0.26354623011298134, + "grad_norm": 1.358580155566318, + "learning_rate": 1.963788775605671e-06, + "loss": 0.4857162833213806, + "step": 1143 + }, + { + "epoch": 0.263776804242564, + "grad_norm": 1.155872598215321, + "learning_rate": 1.9636870358340408e-06, + "loss": 0.5912413597106934, + "step": 1144 + }, + { + "epoch": 0.26400737837214666, + "grad_norm": 0.9493926626803307, + "learning_rate": 1.9635851559807326e-06, + "loss": 0.6006268858909607, + "step": 1145 + }, + { + "epoch": 0.2642379525017293, + "grad_norm": 1.0095494395510323, + "learning_rate": 1.9634831360605567e-06, + "loss": 0.5580735802650452, + "step": 1146 + }, + { + "epoch": 0.264468526631312, + "grad_norm": 1.09443652681985, + "learning_rate": 1.9633809760883423e-06, + "loss": 0.5554602146148682, + "step": 1147 + }, + { + "epoch": 0.26469910076089465, + "grad_norm": 1.0073361110439816, + "learning_rate": 1.9632786760789393e-06, + "loss": 0.5648301839828491, + "step": 1148 + }, + { + "epoch": 0.2649296748904773, + "grad_norm": 0.9958775096480507, + "learning_rate": 1.9631762360472186e-06, + "loss": 0.5317412614822388, + "step": 1149 + }, + { + "epoch": 0.26516024902005997, + "grad_norm": 0.8377541227122274, + "learning_rate": 1.96307365600807e-06, + "loss": 0.5608310699462891, + "step": 1150 + }, + { + "epoch": 0.26539082314964263, + "grad_norm": 0.9709108194630034, + "learning_rate": 1.962970935976405e-06, + "loss": 0.49922698736190796, + "step": 1151 + }, + { + "epoch": 0.2656213972792253, + "grad_norm": 1.0372577064435262, + "learning_rate": 1.9628680759671556e-06, + "loss": 0.5840054750442505, + "step": 1152 + }, + { + "epoch": 0.26585197140880795, + "grad_norm": 1.1264168952681184, + "learning_rate": 1.9627650759952727e-06, + "loss": 0.6038475632667542, + "step": 1153 + }, + { + "epoch": 0.2660825455383906, + "grad_norm": 0.969212515968761, + "learning_rate": 1.9626619360757284e-06, + "loss": 0.5923193097114563, + "step": 1154 + }, + { + "epoch": 0.2663131196679733, + "grad_norm": 1.1606889211687668, + "learning_rate": 1.962558656223516e-06, + "loss": 0.5278598666191101, + "step": 1155 + }, + { + "epoch": 0.26654369379755594, + "grad_norm": 0.9873103600473375, + "learning_rate": 1.9624552364536472e-06, + "loss": 0.47691023349761963, + "step": 1156 + }, + { + "epoch": 0.2667742679271386, + "grad_norm": 0.9087676067471127, + "learning_rate": 1.962351676781156e-06, + "loss": 0.5801899433135986, + "step": 1157 + }, + { + "epoch": 0.26700484205672126, + "grad_norm": 1.253961482177072, + "learning_rate": 1.962247977221095e-06, + "loss": 0.5170506238937378, + "step": 1158 + }, + { + "epoch": 0.2672354161863039, + "grad_norm": 1.0951542684812736, + "learning_rate": 1.9621441377885387e-06, + "loss": 0.6114981174468994, + "step": 1159 + }, + { + "epoch": 0.2674659903158866, + "grad_norm": 1.0027892727643062, + "learning_rate": 1.9620401584985807e-06, + "loss": 0.6377004384994507, + "step": 1160 + }, + { + "epoch": 0.26769656444546924, + "grad_norm": 0.9961094597216124, + "learning_rate": 1.9619360393663356e-06, + "loss": 0.6177431344985962, + "step": 1161 + }, + { + "epoch": 0.2679271385750519, + "grad_norm": 1.1384478708718946, + "learning_rate": 1.9618317804069384e-06, + "loss": 0.579784095287323, + "step": 1162 + }, + { + "epoch": 0.26815771270463457, + "grad_norm": 0.8744752952973797, + "learning_rate": 1.9617273816355444e-06, + "loss": 0.6078776121139526, + "step": 1163 + }, + { + "epoch": 0.2683882868342172, + "grad_norm": 0.9801356210694869, + "learning_rate": 1.961622843067328e-06, + "loss": 0.5583093166351318, + "step": 1164 + }, + { + "epoch": 0.2686188609637999, + "grad_norm": 0.8741287294678143, + "learning_rate": 1.961518164717486e-06, + "loss": 0.46033143997192383, + "step": 1165 + }, + { + "epoch": 0.26884943509338255, + "grad_norm": 1.250568820610365, + "learning_rate": 1.961413346601234e-06, + "loss": 0.5637123584747314, + "step": 1166 + }, + { + "epoch": 0.2690800092229652, + "grad_norm": 1.0360456860810905, + "learning_rate": 1.9613083887338085e-06, + "loss": 0.5943595170974731, + "step": 1167 + }, + { + "epoch": 0.2693105833525478, + "grad_norm": 1.0495419121458136, + "learning_rate": 1.961203291130466e-06, + "loss": 0.5440319776535034, + "step": 1168 + }, + { + "epoch": 0.2695411574821305, + "grad_norm": 0.9704830315061433, + "learning_rate": 1.961098053806484e-06, + "loss": 0.5665608048439026, + "step": 1169 + }, + { + "epoch": 0.26977173161171314, + "grad_norm": 1.0522625707521382, + "learning_rate": 1.960992676777159e-06, + "loss": 0.5707683563232422, + "step": 1170 + }, + { + "epoch": 0.2700023057412958, + "grad_norm": 1.034604689259721, + "learning_rate": 1.9608871600578093e-06, + "loss": 0.5447777509689331, + "step": 1171 + }, + { + "epoch": 0.27023287987087846, + "grad_norm": 1.1920689559592121, + "learning_rate": 1.9607815036637726e-06, + "loss": 0.5598857402801514, + "step": 1172 + }, + { + "epoch": 0.2704634540004611, + "grad_norm": 1.208701571232948, + "learning_rate": 1.960675707610407e-06, + "loss": 0.558403491973877, + "step": 1173 + }, + { + "epoch": 0.2706940281300438, + "grad_norm": 1.3006493228897391, + "learning_rate": 1.960569771913091e-06, + "loss": 0.6696962118148804, + "step": 1174 + }, + { + "epoch": 0.27092460225962645, + "grad_norm": 1.0597715788538418, + "learning_rate": 1.960463696587224e-06, + "loss": 0.519884467124939, + "step": 1175 + }, + { + "epoch": 0.2711551763892091, + "grad_norm": 1.0090714718428708, + "learning_rate": 1.9603574816482243e-06, + "loss": 0.6440261602401733, + "step": 1176 + }, + { + "epoch": 0.27138575051879177, + "grad_norm": 1.1163188497552168, + "learning_rate": 1.9602511271115317e-06, + "loss": 0.48713982105255127, + "step": 1177 + }, + { + "epoch": 0.27161632464837443, + "grad_norm": 0.9570997011710476, + "learning_rate": 1.960144632992606e-06, + "loss": 0.5257129073143005, + "step": 1178 + }, + { + "epoch": 0.2718468987779571, + "grad_norm": 1.3308862733434774, + "learning_rate": 1.9600379993069272e-06, + "loss": 0.5220426917076111, + "step": 1179 + }, + { + "epoch": 0.27207747290753975, + "grad_norm": 1.0690404222828096, + "learning_rate": 1.9599312260699955e-06, + "loss": 0.569817304611206, + "step": 1180 + }, + { + "epoch": 0.2723080470371224, + "grad_norm": 1.0650857331550394, + "learning_rate": 1.9598243132973317e-06, + "loss": 0.4370031952857971, + "step": 1181 + }, + { + "epoch": 0.2725386211667051, + "grad_norm": 1.125403283606087, + "learning_rate": 1.959717261004476e-06, + "loss": 0.6060882210731506, + "step": 1182 + }, + { + "epoch": 0.27276919529628774, + "grad_norm": 0.9065361051198069, + "learning_rate": 1.9596100692069905e-06, + "loss": 0.5830891132354736, + "step": 1183 + }, + { + "epoch": 0.2729997694258704, + "grad_norm": 1.4570032441462188, + "learning_rate": 1.9595027379204556e-06, + "loss": 0.5689493417739868, + "step": 1184 + }, + { + "epoch": 0.27323034355545306, + "grad_norm": 1.3244280690129522, + "learning_rate": 1.9593952671604735e-06, + "loss": 0.5550887584686279, + "step": 1185 + }, + { + "epoch": 0.2734609176850357, + "grad_norm": 1.0207521269848765, + "learning_rate": 1.9592876569426665e-06, + "loss": 0.48127567768096924, + "step": 1186 + }, + { + "epoch": 0.2736914918146184, + "grad_norm": 1.071211669612227, + "learning_rate": 1.9591799072826764e-06, + "loss": 0.640753984451294, + "step": 1187 + }, + { + "epoch": 0.27392206594420104, + "grad_norm": 1.1730143666350425, + "learning_rate": 1.959072018196165e-06, + "loss": 0.5266000032424927, + "step": 1188 + }, + { + "epoch": 0.2741526400737837, + "grad_norm": 0.927867514508325, + "learning_rate": 1.958963989698817e-06, + "loss": 0.5586614608764648, + "step": 1189 + }, + { + "epoch": 0.27438321420336637, + "grad_norm": 1.1860842675481242, + "learning_rate": 1.9588558218063336e-06, + "loss": 0.5937967896461487, + "step": 1190 + }, + { + "epoch": 0.274613788332949, + "grad_norm": 1.3761930600193095, + "learning_rate": 1.958747514534439e-06, + "loss": 0.5887218713760376, + "step": 1191 + }, + { + "epoch": 0.2748443624625317, + "grad_norm": 1.0541442430853707, + "learning_rate": 1.9586390678988766e-06, + "loss": 0.5151614546775818, + "step": 1192 + }, + { + "epoch": 0.27507493659211435, + "grad_norm": 0.9782419657689414, + "learning_rate": 1.95853048191541e-06, + "loss": 0.5392748713493347, + "step": 1193 + }, + { + "epoch": 0.275305510721697, + "grad_norm": 1.330179141409128, + "learning_rate": 1.9584217565998237e-06, + "loss": 0.5649560689926147, + "step": 1194 + }, + { + "epoch": 0.2755360848512797, + "grad_norm": 1.0628047614804303, + "learning_rate": 1.9583128919679213e-06, + "loss": 0.4888305962085724, + "step": 1195 + }, + { + "epoch": 0.27576665898086233, + "grad_norm": 0.8838567368205815, + "learning_rate": 1.9582038880355282e-06, + "loss": 0.5026978850364685, + "step": 1196 + }, + { + "epoch": 0.275997233110445, + "grad_norm": 1.094585503881071, + "learning_rate": 1.9580947448184887e-06, + "loss": 0.5358047485351562, + "step": 1197 + }, + { + "epoch": 0.27622780724002766, + "grad_norm": 1.0838231861798517, + "learning_rate": 1.957985462332668e-06, + "loss": 0.6145739555358887, + "step": 1198 + }, + { + "epoch": 0.2764583813696103, + "grad_norm": 1.1469394336927528, + "learning_rate": 1.957876040593952e-06, + "loss": 0.5155332684516907, + "step": 1199 + }, + { + "epoch": 0.276688955499193, + "grad_norm": 0.9936014396625975, + "learning_rate": 1.957766479618245e-06, + "loss": 0.48794522881507874, + "step": 1200 + }, + { + "epoch": 0.27691952962877564, + "grad_norm": 1.135029138979863, + "learning_rate": 1.957656779421474e-06, + "loss": 0.5851761102676392, + "step": 1201 + }, + { + "epoch": 0.2771501037583583, + "grad_norm": 1.0236207003793518, + "learning_rate": 1.957546940019584e-06, + "loss": 0.603874683380127, + "step": 1202 + }, + { + "epoch": 0.27738067788794096, + "grad_norm": 1.0658787224753152, + "learning_rate": 1.9574369614285426e-06, + "loss": 0.5022559762001038, + "step": 1203 + }, + { + "epoch": 0.2776112520175236, + "grad_norm": 1.4179237341040045, + "learning_rate": 1.9573268436643347e-06, + "loss": 0.6469730138778687, + "step": 1204 + }, + { + "epoch": 0.2778418261471063, + "grad_norm": 0.9207501665109726, + "learning_rate": 1.9572165867429685e-06, + "loss": 0.49918532371520996, + "step": 1205 + }, + { + "epoch": 0.27807240027668895, + "grad_norm": 0.9656836684424259, + "learning_rate": 1.95710619068047e-06, + "loss": 0.48623788356781006, + "step": 1206 + }, + { + "epoch": 0.2783029744062716, + "grad_norm": 0.9837814076450196, + "learning_rate": 1.956995655492887e-06, + "loss": 0.4868438243865967, + "step": 1207 + }, + { + "epoch": 0.27853354853585427, + "grad_norm": 1.3533879485069031, + "learning_rate": 1.9568849811962862e-06, + "loss": 0.5989904403686523, + "step": 1208 + }, + { + "epoch": 0.27876412266543693, + "grad_norm": 1.3345070230968985, + "learning_rate": 1.956774167806756e-06, + "loss": 0.5125104188919067, + "step": 1209 + }, + { + "epoch": 0.2789946967950196, + "grad_norm": 1.0305365483781255, + "learning_rate": 1.956663215340404e-06, + "loss": 0.5126978158950806, + "step": 1210 + }, + { + "epoch": 0.27922527092460225, + "grad_norm": 0.9524616726362105, + "learning_rate": 1.9565521238133576e-06, + "loss": 0.5009375810623169, + "step": 1211 + }, + { + "epoch": 0.2794558450541849, + "grad_norm": 1.0762476710184214, + "learning_rate": 1.956440893241766e-06, + "loss": 0.5601603984832764, + "step": 1212 + }, + { + "epoch": 0.2796864191837676, + "grad_norm": 1.2962045971613827, + "learning_rate": 1.956329523641797e-06, + "loss": 0.6310690641403198, + "step": 1213 + }, + { + "epoch": 0.27991699331335024, + "grad_norm": 1.0395130987242733, + "learning_rate": 1.95621801502964e-06, + "loss": 0.498830646276474, + "step": 1214 + }, + { + "epoch": 0.2801475674429329, + "grad_norm": 1.0547121574701517, + "learning_rate": 1.9561063674215036e-06, + "loss": 0.6612650156021118, + "step": 1215 + }, + { + "epoch": 0.28037814157251556, + "grad_norm": 1.0369778810130763, + "learning_rate": 1.9559945808336166e-06, + "loss": 0.5651615858078003, + "step": 1216 + }, + { + "epoch": 0.2806087157020982, + "grad_norm": 1.0028009497915646, + "learning_rate": 1.955882655282229e-06, + "loss": 0.5675203800201416, + "step": 1217 + }, + { + "epoch": 0.2808392898316809, + "grad_norm": 1.0910384567165883, + "learning_rate": 1.9557705907836095e-06, + "loss": 0.5691455006599426, + "step": 1218 + }, + { + "epoch": 0.28106986396126354, + "grad_norm": 1.2440322291047097, + "learning_rate": 1.955658387354048e-06, + "loss": 0.6018673181533813, + "step": 1219 + }, + { + "epoch": 0.2813004380908462, + "grad_norm": 0.8594681913500082, + "learning_rate": 1.955546045009855e-06, + "loss": 0.5188831090927124, + "step": 1220 + }, + { + "epoch": 0.28153101222042887, + "grad_norm": 0.9611802055135819, + "learning_rate": 1.9554335637673596e-06, + "loss": 0.5161044597625732, + "step": 1221 + }, + { + "epoch": 0.28176158635001153, + "grad_norm": 1.0764912433641416, + "learning_rate": 1.9553209436429132e-06, + "loss": 0.5651452541351318, + "step": 1222 + }, + { + "epoch": 0.2819921604795942, + "grad_norm": 1.0362033432012678, + "learning_rate": 1.9552081846528858e-06, + "loss": 0.5763273239135742, + "step": 1223 + }, + { + "epoch": 0.28222273460917685, + "grad_norm": 1.0512305083546745, + "learning_rate": 1.9550952868136677e-06, + "loss": 0.6379664540290833, + "step": 1224 + }, + { + "epoch": 0.2824533087387595, + "grad_norm": 0.966358468685478, + "learning_rate": 1.95498225014167e-06, + "loss": 0.4021342396736145, + "step": 1225 + }, + { + "epoch": 0.2826838828683422, + "grad_norm": 1.3065298085361052, + "learning_rate": 1.954869074653324e-06, + "loss": 0.49230247735977173, + "step": 1226 + }, + { + "epoch": 0.28291445699792483, + "grad_norm": 0.9198430971109288, + "learning_rate": 1.954755760365081e-06, + "loss": 0.5921554565429688, + "step": 1227 + }, + { + "epoch": 0.2831450311275075, + "grad_norm": 1.2338068239582654, + "learning_rate": 1.954642307293412e-06, + "loss": 0.6495868563652039, + "step": 1228 + }, + { + "epoch": 0.28337560525709016, + "grad_norm": 1.0310593371372254, + "learning_rate": 1.954528715454808e-06, + "loss": 0.5699795484542847, + "step": 1229 + }, + { + "epoch": 0.2836061793866728, + "grad_norm": 1.3462988930710962, + "learning_rate": 1.9544149848657816e-06, + "loss": 0.582231879234314, + "step": 1230 + }, + { + "epoch": 0.2838367535162555, + "grad_norm": 1.0033811085419764, + "learning_rate": 1.9543011155428647e-06, + "loss": 0.5952359437942505, + "step": 1231 + }, + { + "epoch": 0.28406732764583814, + "grad_norm": 1.150479906025031, + "learning_rate": 1.9541871075026092e-06, + "loss": 0.646816611289978, + "step": 1232 + }, + { + "epoch": 0.2842979017754208, + "grad_norm": 1.2509776515814615, + "learning_rate": 1.9540729607615866e-06, + "loss": 0.5781043767929077, + "step": 1233 + }, + { + "epoch": 0.28452847590500346, + "grad_norm": 1.1718295930905136, + "learning_rate": 1.95395867533639e-06, + "loss": 0.609764814376831, + "step": 1234 + }, + { + "epoch": 0.2847590500345861, + "grad_norm": 1.2826152398089232, + "learning_rate": 1.9538442512436325e-06, + "loss": 0.4673759341239929, + "step": 1235 + }, + { + "epoch": 0.2849896241641688, + "grad_norm": 1.1343052125955835, + "learning_rate": 1.953729688499946e-06, + "loss": 0.6310999393463135, + "step": 1236 + }, + { + "epoch": 0.28522019829375145, + "grad_norm": 1.075568996273352, + "learning_rate": 1.953614987121983e-06, + "loss": 0.5103853344917297, + "step": 1237 + }, + { + "epoch": 0.2854507724233341, + "grad_norm": 1.1329951189185654, + "learning_rate": 1.9535001471264178e-06, + "loss": 0.5735328197479248, + "step": 1238 + }, + { + "epoch": 0.28568134655291677, + "grad_norm": 1.010063337652323, + "learning_rate": 1.953385168529942e-06, + "loss": 0.5617454051971436, + "step": 1239 + }, + { + "epoch": 0.28591192068249943, + "grad_norm": 1.1392481671873862, + "learning_rate": 1.9532700513492705e-06, + "loss": 0.49873489141464233, + "step": 1240 + }, + { + "epoch": 0.2861424948120821, + "grad_norm": 0.9923008758606798, + "learning_rate": 1.9531547956011353e-06, + "loss": 0.49185073375701904, + "step": 1241 + }, + { + "epoch": 0.28637306894166475, + "grad_norm": 1.1119890456844754, + "learning_rate": 1.9530394013022907e-06, + "loss": 0.6016734838485718, + "step": 1242 + }, + { + "epoch": 0.2866036430712474, + "grad_norm": 0.984310677257317, + "learning_rate": 1.9529238684695105e-06, + "loss": 0.5922054052352905, + "step": 1243 + }, + { + "epoch": 0.2868342172008301, + "grad_norm": 1.2933601588161594, + "learning_rate": 1.952808197119588e-06, + "loss": 0.6498355269432068, + "step": 1244 + }, + { + "epoch": 0.28706479133041274, + "grad_norm": 1.106145681286101, + "learning_rate": 1.9526923872693382e-06, + "loss": 0.5564426183700562, + "step": 1245 + }, + { + "epoch": 0.2872953654599954, + "grad_norm": 1.0410162813090216, + "learning_rate": 1.9525764389355945e-06, + "loss": 0.6144154071807861, + "step": 1246 + }, + { + "epoch": 0.28752593958957806, + "grad_norm": 0.9304288925500919, + "learning_rate": 1.9524603521352116e-06, + "loss": 0.5958914756774902, + "step": 1247 + }, + { + "epoch": 0.2877565137191607, + "grad_norm": 1.167763375182377, + "learning_rate": 1.952344126885063e-06, + "loss": 0.5471549034118652, + "step": 1248 + }, + { + "epoch": 0.2879870878487434, + "grad_norm": 1.0658282088084226, + "learning_rate": 1.952227763202044e-06, + "loss": 0.5512329936027527, + "step": 1249 + }, + { + "epoch": 0.28821766197832605, + "grad_norm": 0.9336952567830841, + "learning_rate": 1.9521112611030695e-06, + "loss": 0.5545130968093872, + "step": 1250 + }, + { + "epoch": 0.2884482361079087, + "grad_norm": 0.9540157404500241, + "learning_rate": 1.9519946206050734e-06, + "loss": 0.5409479737281799, + "step": 1251 + }, + { + "epoch": 0.28867881023749137, + "grad_norm": 1.0425656776824677, + "learning_rate": 1.9518778417250114e-06, + "loss": 0.5248778462409973, + "step": 1252 + }, + { + "epoch": 0.28890938436707403, + "grad_norm": 1.1108036883068904, + "learning_rate": 1.951760924479858e-06, + "loss": 0.4985620975494385, + "step": 1253 + }, + { + "epoch": 0.2891399584966567, + "grad_norm": 1.1956376798663733, + "learning_rate": 1.951643868886608e-06, + "loss": 0.5470424890518188, + "step": 1254 + }, + { + "epoch": 0.28937053262623935, + "grad_norm": 0.830517770820401, + "learning_rate": 1.9515266749622776e-06, + "loss": 0.5082905292510986, + "step": 1255 + }, + { + "epoch": 0.289601106755822, + "grad_norm": 1.1321002460273393, + "learning_rate": 1.9514093427239013e-06, + "loss": 0.5734596252441406, + "step": 1256 + }, + { + "epoch": 0.2898316808854047, + "grad_norm": 1.133005147672039, + "learning_rate": 1.951291872188535e-06, + "loss": 0.4727100431919098, + "step": 1257 + }, + { + "epoch": 0.29006225501498734, + "grad_norm": 1.044180363768592, + "learning_rate": 1.951174263373254e-06, + "loss": 0.6727551221847534, + "step": 1258 + }, + { + "epoch": 0.29029282914457, + "grad_norm": 0.9491498247436025, + "learning_rate": 1.9510565162951534e-06, + "loss": 0.5225725173950195, + "step": 1259 + }, + { + "epoch": 0.29052340327415266, + "grad_norm": 0.9861385624887246, + "learning_rate": 1.95093863097135e-06, + "loss": 0.46537530422210693, + "step": 1260 + }, + { + "epoch": 0.2907539774037353, + "grad_norm": 1.0433291271591505, + "learning_rate": 1.950820607418979e-06, + "loss": 0.4729498624801636, + "step": 1261 + }, + { + "epoch": 0.290984551533318, + "grad_norm": 1.0319083654914931, + "learning_rate": 1.950702445655196e-06, + "loss": 0.519434928894043, + "step": 1262 + }, + { + "epoch": 0.29121512566290064, + "grad_norm": 1.0839075745171884, + "learning_rate": 1.9505841456971784e-06, + "loss": 0.5487297177314758, + "step": 1263 + }, + { + "epoch": 0.2914456997924833, + "grad_norm": 0.9970964597897494, + "learning_rate": 1.9504657075621207e-06, + "loss": 0.6228574514389038, + "step": 1264 + }, + { + "epoch": 0.29167627392206597, + "grad_norm": 1.076219157850212, + "learning_rate": 1.95034713126724e-06, + "loss": 0.486205518245697, + "step": 1265 + }, + { + "epoch": 0.2919068480516486, + "grad_norm": 1.220321517878089, + "learning_rate": 1.950228416829772e-06, + "loss": 0.6465567350387573, + "step": 1266 + }, + { + "epoch": 0.2921374221812313, + "grad_norm": 1.0227736343783316, + "learning_rate": 1.9501095642669735e-06, + "loss": 0.5160506963729858, + "step": 1267 + }, + { + "epoch": 0.29236799631081395, + "grad_norm": 1.0494858452172506, + "learning_rate": 1.9499905735961206e-06, + "loss": 0.47334107756614685, + "step": 1268 + }, + { + "epoch": 0.2925985704403966, + "grad_norm": 1.1563719640673416, + "learning_rate": 1.9498714448345103e-06, + "loss": 0.46453380584716797, + "step": 1269 + }, + { + "epoch": 0.29282914456997927, + "grad_norm": 0.9754273704287023, + "learning_rate": 1.9497521779994582e-06, + "loss": 0.5617728233337402, + "step": 1270 + }, + { + "epoch": 0.29305971869956193, + "grad_norm": 1.3129160300173046, + "learning_rate": 1.9496327731083026e-06, + "loss": 0.6129153966903687, + "step": 1271 + }, + { + "epoch": 0.2932902928291446, + "grad_norm": 1.2949114738936178, + "learning_rate": 1.9495132301783983e-06, + "loss": 0.4903183579444885, + "step": 1272 + }, + { + "epoch": 0.29352086695872726, + "grad_norm": 1.1167146830002543, + "learning_rate": 1.9493935492271235e-06, + "loss": 0.5087980628013611, + "step": 1273 + }, + { + "epoch": 0.2937514410883099, + "grad_norm": 1.0447162269466075, + "learning_rate": 1.949273730271874e-06, + "loss": 0.5102910399436951, + "step": 1274 + }, + { + "epoch": 0.2939820152178926, + "grad_norm": 1.0971342006057034, + "learning_rate": 1.9491537733300674e-06, + "loss": 0.5581132769584656, + "step": 1275 + }, + { + "epoch": 0.29421258934747524, + "grad_norm": 1.0166201989797772, + "learning_rate": 1.949033678419141e-06, + "loss": 0.5668213367462158, + "step": 1276 + }, + { + "epoch": 0.2944431634770579, + "grad_norm": 1.1646263878722904, + "learning_rate": 1.9489134455565503e-06, + "loss": 0.5352080464363098, + "step": 1277 + }, + { + "epoch": 0.29467373760664056, + "grad_norm": 1.0375138174364513, + "learning_rate": 1.948793074759774e-06, + "loss": 0.47343915700912476, + "step": 1278 + }, + { + "epoch": 0.29490431173622317, + "grad_norm": 1.2395532163204355, + "learning_rate": 1.9486725660463084e-06, + "loss": 0.5169435143470764, + "step": 1279 + }, + { + "epoch": 0.29513488586580583, + "grad_norm": 1.2035025560649288, + "learning_rate": 1.9485519194336707e-06, + "loss": 0.4801402688026428, + "step": 1280 + }, + { + "epoch": 0.2953654599953885, + "grad_norm": 1.2115883619737033, + "learning_rate": 1.9484311349393984e-06, + "loss": 0.6537381410598755, + "step": 1281 + }, + { + "epoch": 0.29559603412497115, + "grad_norm": 0.9306094110342265, + "learning_rate": 1.9483102125810483e-06, + "loss": 0.5160089135169983, + "step": 1282 + }, + { + "epoch": 0.2958266082545538, + "grad_norm": 1.0525832312633145, + "learning_rate": 1.9481891523761985e-06, + "loss": 0.5332320332527161, + "step": 1283 + }, + { + "epoch": 0.2960571823841365, + "grad_norm": 0.9112280719646961, + "learning_rate": 1.9480679543424453e-06, + "loss": 0.5076215267181396, + "step": 1284 + }, + { + "epoch": 0.29628775651371914, + "grad_norm": 1.1265706213450601, + "learning_rate": 1.947946618497407e-06, + "loss": 0.607105016708374, + "step": 1285 + }, + { + "epoch": 0.2965183306433018, + "grad_norm": 1.076771624610464, + "learning_rate": 1.9478251448587203e-06, + "loss": 0.6265846490859985, + "step": 1286 + }, + { + "epoch": 0.29674890477288446, + "grad_norm": 1.164803442921585, + "learning_rate": 1.9477035334440426e-06, + "loss": 0.5313390493392944, + "step": 1287 + }, + { + "epoch": 0.2969794789024671, + "grad_norm": 1.0583207692233336, + "learning_rate": 1.947581784271052e-06, + "loss": 0.5059833526611328, + "step": 1288 + }, + { + "epoch": 0.2972100530320498, + "grad_norm": 1.171630953302918, + "learning_rate": 1.9474598973574455e-06, + "loss": 0.5550922155380249, + "step": 1289 + }, + { + "epoch": 0.29744062716163244, + "grad_norm": 0.9941233964259298, + "learning_rate": 1.947337872720941e-06, + "loss": 0.5594801306724548, + "step": 1290 + }, + { + "epoch": 0.2976712012912151, + "grad_norm": 1.1672729516761162, + "learning_rate": 1.9472157103792753e-06, + "loss": 0.6404933333396912, + "step": 1291 + }, + { + "epoch": 0.29790177542079777, + "grad_norm": 1.216836258446271, + "learning_rate": 1.947093410350206e-06, + "loss": 0.5884830355644226, + "step": 1292 + }, + { + "epoch": 0.2981323495503804, + "grad_norm": 1.313520165154308, + "learning_rate": 1.9469709726515114e-06, + "loss": 0.5723487138748169, + "step": 1293 + }, + { + "epoch": 0.2983629236799631, + "grad_norm": 1.047985941483805, + "learning_rate": 1.946848397300989e-06, + "loss": 0.5298895239830017, + "step": 1294 + }, + { + "epoch": 0.29859349780954575, + "grad_norm": 1.009793366380185, + "learning_rate": 1.9467256843164557e-06, + "loss": 0.6118877530097961, + "step": 1295 + }, + { + "epoch": 0.2988240719391284, + "grad_norm": 1.2369344702112195, + "learning_rate": 1.9466028337157498e-06, + "loss": 0.6014599800109863, + "step": 1296 + }, + { + "epoch": 0.29905464606871107, + "grad_norm": 0.9889478752374168, + "learning_rate": 1.9464798455167278e-06, + "loss": 0.5861071944236755, + "step": 1297 + }, + { + "epoch": 0.29928522019829373, + "grad_norm": 1.238998066636259, + "learning_rate": 1.9463567197372684e-06, + "loss": 0.5863409042358398, + "step": 1298 + }, + { + "epoch": 0.2995157943278764, + "grad_norm": 1.217300214744882, + "learning_rate": 1.9462334563952687e-06, + "loss": 0.6576352119445801, + "step": 1299 + }, + { + "epoch": 0.29974636845745906, + "grad_norm": 1.074029788035818, + "learning_rate": 1.9461100555086463e-06, + "loss": 0.5458395481109619, + "step": 1300 + }, + { + "epoch": 0.2999769425870417, + "grad_norm": 1.2759220903954522, + "learning_rate": 1.945986517095339e-06, + "loss": 0.48430997133255005, + "step": 1301 + }, + { + "epoch": 0.3002075167166244, + "grad_norm": 1.2436119574902915, + "learning_rate": 1.945862841173304e-06, + "loss": 0.4212522506713867, + "step": 1302 + }, + { + "epoch": 0.30043809084620704, + "grad_norm": 1.1823128908009017, + "learning_rate": 1.9457390277605188e-06, + "loss": 0.5671685934066772, + "step": 1303 + }, + { + "epoch": 0.3006686649757897, + "grad_norm": 1.0831721181422946, + "learning_rate": 1.945615076874981e-06, + "loss": 0.5350982546806335, + "step": 1304 + }, + { + "epoch": 0.30089923910537236, + "grad_norm": 0.9247033101108441, + "learning_rate": 1.9454909885347088e-06, + "loss": 0.45792657136917114, + "step": 1305 + }, + { + "epoch": 0.301129813234955, + "grad_norm": 1.0473073919925908, + "learning_rate": 1.9453667627577387e-06, + "loss": 0.5644106864929199, + "step": 1306 + }, + { + "epoch": 0.3013603873645377, + "grad_norm": 1.3332547603439018, + "learning_rate": 1.945242399562129e-06, + "loss": 0.554198145866394, + "step": 1307 + }, + { + "epoch": 0.30159096149412035, + "grad_norm": 0.9232575644574793, + "learning_rate": 1.9451178989659565e-06, + "loss": 0.5073474049568176, + "step": 1308 + }, + { + "epoch": 0.301821535623703, + "grad_norm": 1.0206284762622284, + "learning_rate": 1.944993260987319e-06, + "loss": 0.569359302520752, + "step": 1309 + }, + { + "epoch": 0.30205210975328567, + "grad_norm": 1.0382686851233573, + "learning_rate": 1.944868485644334e-06, + "loss": 0.5011791586875916, + "step": 1310 + }, + { + "epoch": 0.30228268388286833, + "grad_norm": 0.9869955270819804, + "learning_rate": 1.9447435729551384e-06, + "loss": 0.41121986508369446, + "step": 1311 + }, + { + "epoch": 0.302513258012451, + "grad_norm": 1.3489170954309295, + "learning_rate": 1.9446185229378896e-06, + "loss": 0.5615876913070679, + "step": 1312 + }, + { + "epoch": 0.30274383214203365, + "grad_norm": 1.2244043366760826, + "learning_rate": 1.9444933356107652e-06, + "loss": 0.5450695157051086, + "step": 1313 + }, + { + "epoch": 0.3029744062716163, + "grad_norm": 1.0371383598149113, + "learning_rate": 1.9443680109919626e-06, + "loss": 0.522222101688385, + "step": 1314 + }, + { + "epoch": 0.303204980401199, + "grad_norm": 0.9638880730108786, + "learning_rate": 1.9442425490996984e-06, + "loss": 0.5081876516342163, + "step": 1315 + }, + { + "epoch": 0.30343555453078164, + "grad_norm": 1.1506604859779093, + "learning_rate": 1.9441169499522104e-06, + "loss": 0.4955870509147644, + "step": 1316 + }, + { + "epoch": 0.3036661286603643, + "grad_norm": 1.0185303369767542, + "learning_rate": 1.9439912135677553e-06, + "loss": 0.5098991990089417, + "step": 1317 + }, + { + "epoch": 0.30389670278994696, + "grad_norm": 0.9949182918503017, + "learning_rate": 1.94386533996461e-06, + "loss": 0.5686191320419312, + "step": 1318 + }, + { + "epoch": 0.3041272769195296, + "grad_norm": 1.180090494573931, + "learning_rate": 1.943739329161072e-06, + "loss": 0.606401264667511, + "step": 1319 + }, + { + "epoch": 0.3043578510491123, + "grad_norm": 1.0411002752171188, + "learning_rate": 1.9436131811754576e-06, + "loss": 0.49249163269996643, + "step": 1320 + }, + { + "epoch": 0.30458842517869494, + "grad_norm": 1.1079741007732102, + "learning_rate": 1.9434868960261047e-06, + "loss": 0.5373499989509583, + "step": 1321 + }, + { + "epoch": 0.3048189993082776, + "grad_norm": 1.4236897413447511, + "learning_rate": 1.943360473731369e-06, + "loss": 0.4568977355957031, + "step": 1322 + }, + { + "epoch": 0.30504957343786027, + "grad_norm": 1.034905077800575, + "learning_rate": 1.943233914309628e-06, + "loss": 0.562126636505127, + "step": 1323 + }, + { + "epoch": 0.3052801475674429, + "grad_norm": 1.343019932527111, + "learning_rate": 1.943107217779278e-06, + "loss": 0.5795382261276245, + "step": 1324 + }, + { + "epoch": 0.3055107216970256, + "grad_norm": 0.9852538064889438, + "learning_rate": 1.942980384158736e-06, + "loss": 0.5671530365943909, + "step": 1325 + }, + { + "epoch": 0.30574129582660825, + "grad_norm": 0.8981413519731547, + "learning_rate": 1.942853413466438e-06, + "loss": 0.5511401891708374, + "step": 1326 + }, + { + "epoch": 0.3059718699561909, + "grad_norm": 1.1491379693233763, + "learning_rate": 1.942726305720841e-06, + "loss": 0.5712149739265442, + "step": 1327 + }, + { + "epoch": 0.3062024440857736, + "grad_norm": 1.171535283311252, + "learning_rate": 1.9425990609404215e-06, + "loss": 0.5181496739387512, + "step": 1328 + }, + { + "epoch": 0.30643301821535623, + "grad_norm": 1.1968505005842098, + "learning_rate": 1.9424716791436753e-06, + "loss": 0.5758726596832275, + "step": 1329 + }, + { + "epoch": 0.3066635923449389, + "grad_norm": 0.9714627365066287, + "learning_rate": 1.942344160349119e-06, + "loss": 0.5757049322128296, + "step": 1330 + }, + { + "epoch": 0.30689416647452156, + "grad_norm": 0.9271633895158528, + "learning_rate": 1.9422165045752886e-06, + "loss": 0.47352534532546997, + "step": 1331 + }, + { + "epoch": 0.3071247406041042, + "grad_norm": 1.1418817146577889, + "learning_rate": 1.94208871184074e-06, + "loss": 0.5940845012664795, + "step": 1332 + }, + { + "epoch": 0.3073553147336869, + "grad_norm": 1.0590875448509756, + "learning_rate": 1.9419607821640496e-06, + "loss": 0.5225652456283569, + "step": 1333 + }, + { + "epoch": 0.30758588886326954, + "grad_norm": 1.0803440664833228, + "learning_rate": 1.9418327155638126e-06, + "loss": 0.5253404378890991, + "step": 1334 + }, + { + "epoch": 0.3078164629928522, + "grad_norm": 0.9995333811538123, + "learning_rate": 1.941704512058646e-06, + "loss": 0.5637744665145874, + "step": 1335 + }, + { + "epoch": 0.30804703712243486, + "grad_norm": 0.9947267518967771, + "learning_rate": 1.941576171667184e-06, + "loss": 0.48273587226867676, + "step": 1336 + }, + { + "epoch": 0.3082776112520175, + "grad_norm": 0.9569882979404835, + "learning_rate": 1.9414476944080833e-06, + "loss": 0.5989019870758057, + "step": 1337 + }, + { + "epoch": 0.3085081853816002, + "grad_norm": 1.1125936950721667, + "learning_rate": 1.9413190803000183e-06, + "loss": 0.5231547951698303, + "step": 1338 + }, + { + "epoch": 0.30873875951118285, + "grad_norm": 1.0300527191348772, + "learning_rate": 1.9411903293616853e-06, + "loss": 0.5125160217285156, + "step": 1339 + }, + { + "epoch": 0.3089693336407655, + "grad_norm": 1.251133475270548, + "learning_rate": 1.9410614416117993e-06, + "loss": 0.50664883852005, + "step": 1340 + }, + { + "epoch": 0.30919990777034817, + "grad_norm": 1.063411016331963, + "learning_rate": 1.9409324170690955e-06, + "loss": 0.5555824637413025, + "step": 1341 + }, + { + "epoch": 0.30943048189993083, + "grad_norm": 0.9621002533491156, + "learning_rate": 1.940803255752329e-06, + "loss": 0.5182096362113953, + "step": 1342 + }, + { + "epoch": 0.3096610560295135, + "grad_norm": 1.0359415249922332, + "learning_rate": 1.940673957680274e-06, + "loss": 0.5202751159667969, + "step": 1343 + }, + { + "epoch": 0.30989163015909615, + "grad_norm": 0.9908809268815285, + "learning_rate": 1.940544522871726e-06, + "loss": 0.49791598320007324, + "step": 1344 + }, + { + "epoch": 0.3101222042886788, + "grad_norm": 0.990495096784543, + "learning_rate": 1.9404149513454995e-06, + "loss": 0.48691657185554504, + "step": 1345 + }, + { + "epoch": 0.3103527784182615, + "grad_norm": 1.0649987362093034, + "learning_rate": 1.9402852431204293e-06, + "loss": 0.5726481676101685, + "step": 1346 + }, + { + "epoch": 0.31058335254784414, + "grad_norm": 0.9750258824279312, + "learning_rate": 1.940155398215369e-06, + "loss": 0.5443148016929626, + "step": 1347 + }, + { + "epoch": 0.3108139266774268, + "grad_norm": 1.1005441671416878, + "learning_rate": 1.9400254166491935e-06, + "loss": 0.5767767429351807, + "step": 1348 + }, + { + "epoch": 0.31104450080700946, + "grad_norm": 1.059167179602632, + "learning_rate": 1.9398952984407967e-06, + "loss": 0.5208882689476013, + "step": 1349 + }, + { + "epoch": 0.3112750749365921, + "grad_norm": 0.8304820941291429, + "learning_rate": 1.939765043609093e-06, + "loss": 0.5152548551559448, + "step": 1350 + }, + { + "epoch": 0.3115056490661748, + "grad_norm": 1.1875548530259965, + "learning_rate": 1.939634652173016e-06, + "loss": 0.42542198300361633, + "step": 1351 + }, + { + "epoch": 0.31173622319575744, + "grad_norm": 1.1424220130032787, + "learning_rate": 1.9395041241515197e-06, + "loss": 0.6471734046936035, + "step": 1352 + }, + { + "epoch": 0.3119667973253401, + "grad_norm": 1.1191897598164906, + "learning_rate": 1.9393734595635767e-06, + "loss": 0.6257486343383789, + "step": 1353 + }, + { + "epoch": 0.31219737145492277, + "grad_norm": 1.1348942815080005, + "learning_rate": 1.9392426584281815e-06, + "loss": 0.562118649482727, + "step": 1354 + }, + { + "epoch": 0.31242794558450543, + "grad_norm": 1.223083488663697, + "learning_rate": 1.939111720764347e-06, + "loss": 0.5602811574935913, + "step": 1355 + }, + { + "epoch": 0.3126585197140881, + "grad_norm": 1.041642546930775, + "learning_rate": 1.9389806465911056e-06, + "loss": 0.54469895362854, + "step": 1356 + }, + { + "epoch": 0.31288909384367075, + "grad_norm": 1.159034123821878, + "learning_rate": 1.9388494359275115e-06, + "loss": 0.5262914896011353, + "step": 1357 + }, + { + "epoch": 0.3131196679732534, + "grad_norm": 1.184281074720895, + "learning_rate": 1.938718088792637e-06, + "loss": 0.6137207746505737, + "step": 1358 + }, + { + "epoch": 0.3133502421028361, + "grad_norm": 1.0740150522099046, + "learning_rate": 1.9385866052055744e-06, + "loss": 0.5792986750602722, + "step": 1359 + }, + { + "epoch": 0.31358081623241874, + "grad_norm": 0.9946259290534466, + "learning_rate": 1.938454985185437e-06, + "loss": 0.4953799843788147, + "step": 1360 + }, + { + "epoch": 0.3138113903620014, + "grad_norm": 1.2906978669163651, + "learning_rate": 1.938323228751356e-06, + "loss": 0.5722379684448242, + "step": 1361 + }, + { + "epoch": 0.31404196449158406, + "grad_norm": 0.9996513214249106, + "learning_rate": 1.938191335922484e-06, + "loss": 0.513651967048645, + "step": 1362 + }, + { + "epoch": 0.3142725386211667, + "grad_norm": 1.0509635344773647, + "learning_rate": 1.9380593067179935e-06, + "loss": 0.4911235272884369, + "step": 1363 + }, + { + "epoch": 0.3145031127507494, + "grad_norm": 1.0029036193486218, + "learning_rate": 1.9379271411570753e-06, + "loss": 0.5478678941726685, + "step": 1364 + }, + { + "epoch": 0.31473368688033204, + "grad_norm": 0.8901015021428158, + "learning_rate": 1.9377948392589417e-06, + "loss": 0.46698129177093506, + "step": 1365 + }, + { + "epoch": 0.3149642610099147, + "grad_norm": 1.3327357773387452, + "learning_rate": 1.9376624010428243e-06, + "loss": 0.5081343650817871, + "step": 1366 + }, + { + "epoch": 0.31519483513949736, + "grad_norm": 1.1172038301784757, + "learning_rate": 1.9375298265279735e-06, + "loss": 0.583903431892395, + "step": 1367 + }, + { + "epoch": 0.31542540926908, + "grad_norm": 1.0403870552320973, + "learning_rate": 1.937397115733661e-06, + "loss": 0.5249435901641846, + "step": 1368 + }, + { + "epoch": 0.3156559833986627, + "grad_norm": 1.184866053048378, + "learning_rate": 1.9372642686791777e-06, + "loss": 0.5463817119598389, + "step": 1369 + }, + { + "epoch": 0.31588655752824535, + "grad_norm": 1.2179956171685966, + "learning_rate": 1.9371312853838338e-06, + "loss": 0.4634520709514618, + "step": 1370 + }, + { + "epoch": 0.316117131657828, + "grad_norm": 1.2606144259751904, + "learning_rate": 1.93699816586696e-06, + "loss": 0.6018840074539185, + "step": 1371 + }, + { + "epoch": 0.31634770578741067, + "grad_norm": 1.1911067691024062, + "learning_rate": 1.9368649101479072e-06, + "loss": 0.5507885813713074, + "step": 1372 + }, + { + "epoch": 0.31657827991699333, + "grad_norm": 0.9991148637431415, + "learning_rate": 1.9367315182460442e-06, + "loss": 0.5520491600036621, + "step": 1373 + }, + { + "epoch": 0.316808854046576, + "grad_norm": 1.2455223208218802, + "learning_rate": 1.936597990180762e-06, + "loss": 0.5410347580909729, + "step": 1374 + }, + { + "epoch": 0.31703942817615866, + "grad_norm": 1.6049117927004484, + "learning_rate": 1.9364643259714694e-06, + "loss": 0.5771749019622803, + "step": 1375 + }, + { + "epoch": 0.3172700023057413, + "grad_norm": 1.123905862633382, + "learning_rate": 1.9363305256375965e-06, + "loss": 0.5071828365325928, + "step": 1376 + }, + { + "epoch": 0.317500576435324, + "grad_norm": 1.1240180544134455, + "learning_rate": 1.936196589198592e-06, + "loss": 0.558908224105835, + "step": 1377 + }, + { + "epoch": 0.31773115056490664, + "grad_norm": 1.1984781772064843, + "learning_rate": 1.9360625166739256e-06, + "loss": 0.5509803295135498, + "step": 1378 + }, + { + "epoch": 0.3179617246944893, + "grad_norm": 1.1703050385431384, + "learning_rate": 1.935928308083085e-06, + "loss": 0.5333945155143738, + "step": 1379 + }, + { + "epoch": 0.31819229882407196, + "grad_norm": 1.2141630137674275, + "learning_rate": 1.93579396344558e-06, + "loss": 0.5337819457054138, + "step": 1380 + }, + { + "epoch": 0.3184228729536546, + "grad_norm": 1.161230429960398, + "learning_rate": 1.9356594827809387e-06, + "loss": 0.5286899209022522, + "step": 1381 + }, + { + "epoch": 0.3186534470832373, + "grad_norm": 1.3042082103630104, + "learning_rate": 1.9355248661087083e-06, + "loss": 0.5915369987487793, + "step": 1382 + }, + { + "epoch": 0.31888402121281995, + "grad_norm": 1.2725859277548193, + "learning_rate": 1.9353901134484575e-06, + "loss": 0.5843492746353149, + "step": 1383 + }, + { + "epoch": 0.3191145953424026, + "grad_norm": 1.0723106790063142, + "learning_rate": 1.935255224819774e-06, + "loss": 0.5015528202056885, + "step": 1384 + }, + { + "epoch": 0.31934516947198527, + "grad_norm": 1.2053658641154292, + "learning_rate": 1.935120200242265e-06, + "loss": 0.5650957822799683, + "step": 1385 + }, + { + "epoch": 0.31957574360156793, + "grad_norm": 0.9993056241167617, + "learning_rate": 1.9349850397355576e-06, + "loss": 0.5452740788459778, + "step": 1386 + }, + { + "epoch": 0.3198063177311506, + "grad_norm": 1.138341645042275, + "learning_rate": 1.934849743319299e-06, + "loss": 0.5069071054458618, + "step": 1387 + }, + { + "epoch": 0.32003689186073325, + "grad_norm": 1.3097523217194937, + "learning_rate": 1.934714311013156e-06, + "loss": 0.5350260734558105, + "step": 1388 + }, + { + "epoch": 0.3202674659903159, + "grad_norm": 1.065882395696928, + "learning_rate": 1.9345787428368146e-06, + "loss": 0.6002014875411987, + "step": 1389 + }, + { + "epoch": 0.3204980401198986, + "grad_norm": 1.0951548438177328, + "learning_rate": 1.9344430388099813e-06, + "loss": 0.5111383199691772, + "step": 1390 + }, + { + "epoch": 0.3207286142494812, + "grad_norm": 1.3896947100609738, + "learning_rate": 1.934307198952382e-06, + "loss": 0.6029741168022156, + "step": 1391 + }, + { + "epoch": 0.32095918837906384, + "grad_norm": 1.0076386708324083, + "learning_rate": 1.9341712232837628e-06, + "loss": 0.48339328169822693, + "step": 1392 + }, + { + "epoch": 0.3211897625086465, + "grad_norm": 1.5017597017671664, + "learning_rate": 1.9340351118238882e-06, + "loss": 0.6080894470214844, + "step": 1393 + }, + { + "epoch": 0.32142033663822916, + "grad_norm": 1.1935202429445742, + "learning_rate": 1.9338988645925444e-06, + "loss": 0.46375036239624023, + "step": 1394 + }, + { + "epoch": 0.3216509107678118, + "grad_norm": 1.2397479694281224, + "learning_rate": 1.9337624816095357e-06, + "loss": 0.5974088907241821, + "step": 1395 + }, + { + "epoch": 0.3218814848973945, + "grad_norm": 1.4525926184759388, + "learning_rate": 1.9336259628946865e-06, + "loss": 0.5759298801422119, + "step": 1396 + }, + { + "epoch": 0.32211205902697715, + "grad_norm": 1.0361695525185906, + "learning_rate": 1.9334893084678417e-06, + "loss": 0.6050859689712524, + "step": 1397 + }, + { + "epoch": 0.3223426331565598, + "grad_norm": 1.1306650773102374, + "learning_rate": 1.9333525183488657e-06, + "loss": 0.5879993438720703, + "step": 1398 + }, + { + "epoch": 0.32257320728614247, + "grad_norm": 1.055350398289763, + "learning_rate": 1.933215592557642e-06, + "loss": 0.5496323108673096, + "step": 1399 + }, + { + "epoch": 0.32280378141572513, + "grad_norm": 1.2847712135798797, + "learning_rate": 1.9330785311140732e-06, + "loss": 0.48447534441947937, + "step": 1400 + }, + { + "epoch": 0.3230343555453078, + "grad_norm": 1.2583031445613762, + "learning_rate": 1.932941334038084e-06, + "loss": 0.5687322020530701, + "step": 1401 + }, + { + "epoch": 0.32326492967489046, + "grad_norm": 1.1545356458260727, + "learning_rate": 1.9328040013496166e-06, + "loss": 0.4070928990840912, + "step": 1402 + }, + { + "epoch": 0.3234955038044731, + "grad_norm": 0.9643847324304846, + "learning_rate": 1.9326665330686344e-06, + "loss": 0.5131539106369019, + "step": 1403 + }, + { + "epoch": 0.3237260779340558, + "grad_norm": 1.0846567553359194, + "learning_rate": 1.932528929215119e-06, + "loss": 0.47571802139282227, + "step": 1404 + }, + { + "epoch": 0.32395665206363844, + "grad_norm": 1.095169764239565, + "learning_rate": 1.9323911898090728e-06, + "loss": 0.5676391124725342, + "step": 1405 + }, + { + "epoch": 0.3241872261932211, + "grad_norm": 1.0653010445083047, + "learning_rate": 1.9322533148705177e-06, + "loss": 0.5464721322059631, + "step": 1406 + }, + { + "epoch": 0.32441780032280376, + "grad_norm": 1.044728614529827, + "learning_rate": 1.9321153044194953e-06, + "loss": 0.6130954027175903, + "step": 1407 + }, + { + "epoch": 0.3246483744523864, + "grad_norm": 1.6513732337511444, + "learning_rate": 1.9319771584760666e-06, + "loss": 0.6058028936386108, + "step": 1408 + }, + { + "epoch": 0.3248789485819691, + "grad_norm": 1.1251884535657009, + "learning_rate": 1.9318388770603123e-06, + "loss": 0.5326286554336548, + "step": 1409 + }, + { + "epoch": 0.32510952271155175, + "grad_norm": 1.2184625691329178, + "learning_rate": 1.9317004601923337e-06, + "loss": 0.6046053767204285, + "step": 1410 + }, + { + "epoch": 0.3253400968411344, + "grad_norm": 1.058617017669887, + "learning_rate": 1.931561907892251e-06, + "loss": 0.4597975015640259, + "step": 1411 + }, + { + "epoch": 0.32557067097071707, + "grad_norm": 1.1843983331118075, + "learning_rate": 1.9314232201802035e-06, + "loss": 0.6024897694587708, + "step": 1412 + }, + { + "epoch": 0.32580124510029973, + "grad_norm": 1.037552834044261, + "learning_rate": 1.9312843970763512e-06, + "loss": 0.45463523268699646, + "step": 1413 + }, + { + "epoch": 0.3260318192298824, + "grad_norm": 0.9412245310618959, + "learning_rate": 1.9311454386008736e-06, + "loss": 0.512498140335083, + "step": 1414 + }, + { + "epoch": 0.32626239335946505, + "grad_norm": 0.8929271577435476, + "learning_rate": 1.9310063447739695e-06, + "loss": 0.4851795434951782, + "step": 1415 + }, + { + "epoch": 0.3264929674890477, + "grad_norm": 1.1131717345806365, + "learning_rate": 1.930867115615858e-06, + "loss": 0.5464169979095459, + "step": 1416 + }, + { + "epoch": 0.3267235416186304, + "grad_norm": 0.9649299588738096, + "learning_rate": 1.930727751146777e-06, + "loss": 0.5614463090896606, + "step": 1417 + }, + { + "epoch": 0.32695411574821304, + "grad_norm": 1.1279163828506724, + "learning_rate": 1.930588251386985e-06, + "loss": 0.635399341583252, + "step": 1418 + }, + { + "epoch": 0.3271846898777957, + "grad_norm": 1.0116750083389472, + "learning_rate": 1.9304486163567588e-06, + "loss": 0.4862840175628662, + "step": 1419 + }, + { + "epoch": 0.32741526400737836, + "grad_norm": 1.3810849020281415, + "learning_rate": 1.930308846076397e-06, + "loss": 0.6548877954483032, + "step": 1420 + }, + { + "epoch": 0.327645838136961, + "grad_norm": 0.9726550652757486, + "learning_rate": 1.9301689405662154e-06, + "loss": 0.5781031250953674, + "step": 1421 + }, + { + "epoch": 0.3278764122665437, + "grad_norm": 1.0075078554250574, + "learning_rate": 1.930028899846552e-06, + "loss": 0.4945180118083954, + "step": 1422 + }, + { + "epoch": 0.32810698639612634, + "grad_norm": 1.1661473529435082, + "learning_rate": 1.9298887239377623e-06, + "loss": 0.548690915107727, + "step": 1423 + }, + { + "epoch": 0.328337560525709, + "grad_norm": 1.0120278252177992, + "learning_rate": 1.929748412860222e-06, + "loss": 0.44515126943588257, + "step": 1424 + }, + { + "epoch": 0.32856813465529167, + "grad_norm": 0.8968526552864172, + "learning_rate": 1.9296079666343273e-06, + "loss": 0.433849573135376, + "step": 1425 + }, + { + "epoch": 0.3287987087848743, + "grad_norm": 1.185097032812299, + "learning_rate": 1.9294673852804938e-06, + "loss": 0.5600666403770447, + "step": 1426 + }, + { + "epoch": 0.329029282914457, + "grad_norm": 1.1490365285996864, + "learning_rate": 1.9293266688191555e-06, + "loss": 0.5302737355232239, + "step": 1427 + }, + { + "epoch": 0.32925985704403965, + "grad_norm": 1.1854633228597617, + "learning_rate": 1.929185817270768e-06, + "loss": 0.5590239763259888, + "step": 1428 + }, + { + "epoch": 0.3294904311736223, + "grad_norm": 0.9322915581005059, + "learning_rate": 1.929044830655804e-06, + "loss": 0.43225252628326416, + "step": 1429 + }, + { + "epoch": 0.329721005303205, + "grad_norm": 1.0987581728513967, + "learning_rate": 1.9289037089947595e-06, + "loss": 0.4932950735092163, + "step": 1430 + }, + { + "epoch": 0.32995157943278763, + "grad_norm": 1.1539316791656467, + "learning_rate": 1.9287624523081457e-06, + "loss": 0.48358941078186035, + "step": 1431 + }, + { + "epoch": 0.3301821535623703, + "grad_norm": 1.1348341469716536, + "learning_rate": 1.928621060616497e-06, + "loss": 0.48359012603759766, + "step": 1432 + }, + { + "epoch": 0.33041272769195296, + "grad_norm": 0.9278501695529541, + "learning_rate": 1.9284795339403663e-06, + "loss": 0.48462390899658203, + "step": 1433 + }, + { + "epoch": 0.3306433018215356, + "grad_norm": 1.439376655816269, + "learning_rate": 1.9283378723003253e-06, + "loss": 0.5167088508605957, + "step": 1434 + }, + { + "epoch": 0.3308738759511183, + "grad_norm": 1.0184323306356053, + "learning_rate": 1.928196075716966e-06, + "loss": 0.47352856397628784, + "step": 1435 + }, + { + "epoch": 0.33110445008070094, + "grad_norm": 0.9676467825700396, + "learning_rate": 1.9280541442109e-06, + "loss": 0.5013144016265869, + "step": 1436 + }, + { + "epoch": 0.3313350242102836, + "grad_norm": 1.1746874818237374, + "learning_rate": 1.927912077802759e-06, + "loss": 0.5061586499214172, + "step": 1437 + }, + { + "epoch": 0.33156559833986626, + "grad_norm": 1.3055289684633111, + "learning_rate": 1.9277698765131927e-06, + "loss": 0.5718814134597778, + "step": 1438 + }, + { + "epoch": 0.3317961724694489, + "grad_norm": 1.147604660511156, + "learning_rate": 1.9276275403628727e-06, + "loss": 0.47547006607055664, + "step": 1439 + }, + { + "epoch": 0.3320267465990316, + "grad_norm": 1.1585259805283974, + "learning_rate": 1.9274850693724884e-06, + "loss": 0.5387942790985107, + "step": 1440 + }, + { + "epoch": 0.33225732072861425, + "grad_norm": 1.013907046172662, + "learning_rate": 1.9273424635627494e-06, + "loss": 0.524285078048706, + "step": 1441 + }, + { + "epoch": 0.3324878948581969, + "grad_norm": 1.1737357855070976, + "learning_rate": 1.927199722954385e-06, + "loss": 0.5073943138122559, + "step": 1442 + }, + { + "epoch": 0.33271846898777957, + "grad_norm": 1.2047946851654725, + "learning_rate": 1.927056847568144e-06, + "loss": 0.4609600007534027, + "step": 1443 + }, + { + "epoch": 0.33294904311736223, + "grad_norm": 1.0416538135601094, + "learning_rate": 1.926913837424795e-06, + "loss": 0.4861013889312744, + "step": 1444 + }, + { + "epoch": 0.3331796172469449, + "grad_norm": 1.0835107342484427, + "learning_rate": 1.9267706925451253e-06, + "loss": 0.5255436897277832, + "step": 1445 + }, + { + "epoch": 0.33341019137652755, + "grad_norm": 1.4634923921780199, + "learning_rate": 1.9266274129499434e-06, + "loss": 0.6673840880393982, + "step": 1446 + }, + { + "epoch": 0.3336407655061102, + "grad_norm": 0.9656915858584796, + "learning_rate": 1.9264839986600757e-06, + "loss": 0.38582634925842285, + "step": 1447 + }, + { + "epoch": 0.3338713396356929, + "grad_norm": 0.9567963925410773, + "learning_rate": 1.926340449696369e-06, + "loss": 0.4597562253475189, + "step": 1448 + }, + { + "epoch": 0.33410191376527554, + "grad_norm": 1.130778436617546, + "learning_rate": 1.92619676607969e-06, + "loss": 0.5901148319244385, + "step": 1449 + }, + { + "epoch": 0.3343324878948582, + "grad_norm": 1.2252206522255358, + "learning_rate": 1.9260529478309242e-06, + "loss": 0.49872028827667236, + "step": 1450 + }, + { + "epoch": 0.33456306202444086, + "grad_norm": 0.9242619738807548, + "learning_rate": 1.925908994970977e-06, + "loss": 0.4611232578754425, + "step": 1451 + }, + { + "epoch": 0.3347936361540235, + "grad_norm": 1.1122995891321772, + "learning_rate": 1.9257649075207738e-06, + "loss": 0.5671408176422119, + "step": 1452 + }, + { + "epoch": 0.3350242102836062, + "grad_norm": 1.2073453603933548, + "learning_rate": 1.925620685501259e-06, + "loss": 0.4892054498195648, + "step": 1453 + }, + { + "epoch": 0.33525478441318884, + "grad_norm": 1.1748595063207394, + "learning_rate": 1.9254763289333966e-06, + "loss": 0.5506503582000732, + "step": 1454 + }, + { + "epoch": 0.3354853585427715, + "grad_norm": 1.4352362120603241, + "learning_rate": 1.9253318378381702e-06, + "loss": 0.6233078241348267, + "step": 1455 + }, + { + "epoch": 0.33571593267235417, + "grad_norm": 1.2159230168553836, + "learning_rate": 1.9251872122365835e-06, + "loss": 0.5551373958587646, + "step": 1456 + }, + { + "epoch": 0.33594650680193683, + "grad_norm": 1.0308435059717576, + "learning_rate": 1.925042452149659e-06, + "loss": 0.5561612844467163, + "step": 1457 + }, + { + "epoch": 0.3361770809315195, + "grad_norm": 1.0286600789295617, + "learning_rate": 1.924897557598439e-06, + "loss": 0.613766074180603, + "step": 1458 + }, + { + "epoch": 0.33640765506110215, + "grad_norm": 1.092154153863493, + "learning_rate": 1.9247525286039852e-06, + "loss": 0.5767652988433838, + "step": 1459 + }, + { + "epoch": 0.3366382291906848, + "grad_norm": 1.1221153049255785, + "learning_rate": 1.9246073651873795e-06, + "loss": 0.49292564392089844, + "step": 1460 + }, + { + "epoch": 0.3368688033202675, + "grad_norm": 1.2909262812986786, + "learning_rate": 1.9244620673697224e-06, + "loss": 0.5901867151260376, + "step": 1461 + }, + { + "epoch": 0.33709937744985013, + "grad_norm": 1.1013040204716718, + "learning_rate": 1.924316635172135e-06, + "loss": 0.5543808937072754, + "step": 1462 + }, + { + "epoch": 0.3373299515794328, + "grad_norm": 1.3433064818976315, + "learning_rate": 1.9241710686157568e-06, + "loss": 0.528805136680603, + "step": 1463 + }, + { + "epoch": 0.33756052570901546, + "grad_norm": 1.2569454583762516, + "learning_rate": 1.924025367721748e-06, + "loss": 0.6396733522415161, + "step": 1464 + }, + { + "epoch": 0.3377910998385981, + "grad_norm": 0.9764691877916688, + "learning_rate": 1.9238795325112867e-06, + "loss": 0.5558862686157227, + "step": 1465 + }, + { + "epoch": 0.3380216739681808, + "grad_norm": 1.2329860923893396, + "learning_rate": 1.9237335630055724e-06, + "loss": 0.5863986015319824, + "step": 1466 + }, + { + "epoch": 0.33825224809776344, + "grad_norm": 1.0929132974739206, + "learning_rate": 1.923587459225823e-06, + "loss": 0.5636321306228638, + "step": 1467 + }, + { + "epoch": 0.3384828222273461, + "grad_norm": 1.1286586205882263, + "learning_rate": 1.923441221193276e-06, + "loss": 0.6065811514854431, + "step": 1468 + }, + { + "epoch": 0.33871339635692876, + "grad_norm": 1.4147716425908794, + "learning_rate": 1.9232948489291886e-06, + "loss": 0.580939769744873, + "step": 1469 + }, + { + "epoch": 0.3389439704865114, + "grad_norm": 1.1018333541876169, + "learning_rate": 1.9231483424548377e-06, + "loss": 0.5429994463920593, + "step": 1470 + }, + { + "epoch": 0.3391745446160941, + "grad_norm": 1.1834314239894592, + "learning_rate": 1.92300170179152e-06, + "loss": 0.5090892910957336, + "step": 1471 + }, + { + "epoch": 0.33940511874567675, + "grad_norm": 1.053685812356228, + "learning_rate": 1.9228549269605498e-06, + "loss": 0.5280312299728394, + "step": 1472 + }, + { + "epoch": 0.3396356928752594, + "grad_norm": 0.992641626439364, + "learning_rate": 1.9227080179832634e-06, + "loss": 0.5098810195922852, + "step": 1473 + }, + { + "epoch": 0.33986626700484207, + "grad_norm": 1.110706876976592, + "learning_rate": 1.922560974881015e-06, + "loss": 0.4554474353790283, + "step": 1474 + }, + { + "epoch": 0.34009684113442473, + "grad_norm": 1.042826154870894, + "learning_rate": 1.9224137976751793e-06, + "loss": 0.4492517113685608, + "step": 1475 + }, + { + "epoch": 0.3403274152640074, + "grad_norm": 1.3050966518961793, + "learning_rate": 1.9222664863871495e-06, + "loss": 0.47606343030929565, + "step": 1476 + }, + { + "epoch": 0.34055798939359005, + "grad_norm": 1.331553847580159, + "learning_rate": 1.9221190410383394e-06, + "loss": 0.5939435362815857, + "step": 1477 + }, + { + "epoch": 0.3407885635231727, + "grad_norm": 1.0156905582890146, + "learning_rate": 1.921971461650181e-06, + "loss": 0.5418350696563721, + "step": 1478 + }, + { + "epoch": 0.3410191376527554, + "grad_norm": 1.258400628812999, + "learning_rate": 1.9218237482441265e-06, + "loss": 0.5307733416557312, + "step": 1479 + }, + { + "epoch": 0.34124971178233804, + "grad_norm": 1.097634429758053, + "learning_rate": 1.9216759008416483e-06, + "loss": 0.5102016925811768, + "step": 1480 + }, + { + "epoch": 0.3414802859119207, + "grad_norm": 1.6070497683125828, + "learning_rate": 1.9215279194642366e-06, + "loss": 0.5043876767158508, + "step": 1481 + }, + { + "epoch": 0.34171086004150336, + "grad_norm": 1.0925329335071103, + "learning_rate": 1.9213798041334025e-06, + "loss": 0.5365253686904907, + "step": 1482 + }, + { + "epoch": 0.341941434171086, + "grad_norm": 1.1923005853358424, + "learning_rate": 1.921231554870676e-06, + "loss": 0.4938368797302246, + "step": 1483 + }, + { + "epoch": 0.3421720083006687, + "grad_norm": 1.0865439416616147, + "learning_rate": 1.921083171697607e-06, + "loss": 0.5274159908294678, + "step": 1484 + }, + { + "epoch": 0.34240258243025135, + "grad_norm": 1.1913792015364102, + "learning_rate": 1.9209346546357637e-06, + "loss": 0.4720276892185211, + "step": 1485 + }, + { + "epoch": 0.342633156559834, + "grad_norm": 0.9383641214181552, + "learning_rate": 1.920786003706735e-06, + "loss": 0.42276352643966675, + "step": 1486 + }, + { + "epoch": 0.34286373068941667, + "grad_norm": 1.0581324959121157, + "learning_rate": 1.920637218932129e-06, + "loss": 0.5319294333457947, + "step": 1487 + }, + { + "epoch": 0.34309430481899933, + "grad_norm": 1.1819330354237378, + "learning_rate": 1.920488300333572e-06, + "loss": 0.5197560787200928, + "step": 1488 + }, + { + "epoch": 0.343324878948582, + "grad_norm": 1.5013538667422215, + "learning_rate": 1.9203392479327127e-06, + "loss": 0.550025463104248, + "step": 1489 + }, + { + "epoch": 0.34355545307816465, + "grad_norm": 1.0981284345294107, + "learning_rate": 1.920190061751216e-06, + "loss": 0.50255286693573, + "step": 1490 + }, + { + "epoch": 0.3437860272077473, + "grad_norm": 1.1895622589876538, + "learning_rate": 1.9200407418107678e-06, + "loss": 0.5952906608581543, + "step": 1491 + }, + { + "epoch": 0.34401660133733, + "grad_norm": 0.9421522918126589, + "learning_rate": 1.9198912881330737e-06, + "loss": 0.48161056637763977, + "step": 1492 + }, + { + "epoch": 0.34424717546691264, + "grad_norm": 1.177243819966174, + "learning_rate": 1.919741700739858e-06, + "loss": 0.5490972995758057, + "step": 1493 + }, + { + "epoch": 0.3444777495964953, + "grad_norm": 1.4788962836499655, + "learning_rate": 1.9195919796528647e-06, + "loss": 0.45651519298553467, + "step": 1494 + }, + { + "epoch": 0.34470832372607796, + "grad_norm": 1.2203060266370191, + "learning_rate": 1.919442124893857e-06, + "loss": 0.5318460464477539, + "step": 1495 + }, + { + "epoch": 0.3449388978556606, + "grad_norm": 1.0748079339537138, + "learning_rate": 1.9192921364846187e-06, + "loss": 0.5052516460418701, + "step": 1496 + }, + { + "epoch": 0.3451694719852433, + "grad_norm": 1.3171544150804408, + "learning_rate": 1.9191420144469515e-06, + "loss": 0.6653434038162231, + "step": 1497 + }, + { + "epoch": 0.34540004611482594, + "grad_norm": 0.962422061512943, + "learning_rate": 1.9189917588026774e-06, + "loss": 0.47182875871658325, + "step": 1498 + }, + { + "epoch": 0.3456306202444086, + "grad_norm": 1.0305251609345925, + "learning_rate": 1.9188413695736376e-06, + "loss": 0.5257801413536072, + "step": 1499 + }, + { + "epoch": 0.34586119437399127, + "grad_norm": 1.1090254531285808, + "learning_rate": 1.918690846781692e-06, + "loss": 0.565075695514679, + "step": 1500 + }, + { + "epoch": 0.3460917685035739, + "grad_norm": 1.1909717210416553, + "learning_rate": 1.9185401904487214e-06, + "loss": 0.49737876653671265, + "step": 1501 + }, + { + "epoch": 0.34632234263315653, + "grad_norm": 1.021716441788736, + "learning_rate": 1.918389400596625e-06, + "loss": 0.5136237144470215, + "step": 1502 + }, + { + "epoch": 0.3465529167627392, + "grad_norm": 1.011829912931323, + "learning_rate": 1.9182384772473216e-06, + "loss": 0.5122819542884827, + "step": 1503 + }, + { + "epoch": 0.34678349089232186, + "grad_norm": 1.1232586653417744, + "learning_rate": 1.91808742042275e-06, + "loss": 0.4586041271686554, + "step": 1504 + }, + { + "epoch": 0.3470140650219045, + "grad_norm": 1.0599756649712084, + "learning_rate": 1.9179362301448666e-06, + "loss": 0.49752146005630493, + "step": 1505 + }, + { + "epoch": 0.3472446391514872, + "grad_norm": 1.0110535685015802, + "learning_rate": 1.917784906435649e-06, + "loss": 0.4423530101776123, + "step": 1506 + }, + { + "epoch": 0.34747521328106984, + "grad_norm": 1.2828635133632034, + "learning_rate": 1.9176334493170946e-06, + "loss": 0.4979468882083893, + "step": 1507 + }, + { + "epoch": 0.3477057874106525, + "grad_norm": 1.0086748218378025, + "learning_rate": 1.9174818588112178e-06, + "loss": 0.5229524374008179, + "step": 1508 + }, + { + "epoch": 0.34793636154023516, + "grad_norm": 1.006104946386604, + "learning_rate": 1.9173301349400546e-06, + "loss": 0.47884654998779297, + "step": 1509 + }, + { + "epoch": 0.3481669356698178, + "grad_norm": 1.161430061405767, + "learning_rate": 1.9171782777256594e-06, + "loss": 0.5204922556877136, + "step": 1510 + }, + { + "epoch": 0.3483975097994005, + "grad_norm": 1.1268415177845295, + "learning_rate": 1.917026287190106e-06, + "loss": 0.5077674984931946, + "step": 1511 + }, + { + "epoch": 0.34862808392898315, + "grad_norm": 0.9750269271228661, + "learning_rate": 1.9168741633554885e-06, + "loss": 0.4171299934387207, + "step": 1512 + }, + { + "epoch": 0.3488586580585658, + "grad_norm": 1.065613083459404, + "learning_rate": 1.9167219062439187e-06, + "loss": 0.5228694081306458, + "step": 1513 + }, + { + "epoch": 0.34908923218814847, + "grad_norm": 1.188410464922724, + "learning_rate": 1.916569515877529e-06, + "loss": 0.5496635437011719, + "step": 1514 + }, + { + "epoch": 0.34931980631773113, + "grad_norm": 0.969674279609777, + "learning_rate": 1.9164169922784716e-06, + "loss": 0.5197573900222778, + "step": 1515 + }, + { + "epoch": 0.3495503804473138, + "grad_norm": 1.3265152215611398, + "learning_rate": 1.9162643354689163e-06, + "loss": 0.5726813077926636, + "step": 1516 + }, + { + "epoch": 0.34978095457689645, + "grad_norm": 1.0368094455843846, + "learning_rate": 1.916111545471054e-06, + "loss": 0.53382408618927, + "step": 1517 + }, + { + "epoch": 0.3500115287064791, + "grad_norm": 1.0676291023728657, + "learning_rate": 1.915958622307094e-06, + "loss": 0.5535515546798706, + "step": 1518 + }, + { + "epoch": 0.3502421028360618, + "grad_norm": 1.183098293067818, + "learning_rate": 1.9158055659992648e-06, + "loss": 0.5295307040214539, + "step": 1519 + }, + { + "epoch": 0.35047267696564444, + "grad_norm": 1.3231709310936663, + "learning_rate": 1.9156523765698158e-06, + "loss": 0.5397933125495911, + "step": 1520 + }, + { + "epoch": 0.3507032510952271, + "grad_norm": 1.217082341703879, + "learning_rate": 1.915499054041014e-06, + "loss": 0.5614666938781738, + "step": 1521 + }, + { + "epoch": 0.35093382522480976, + "grad_norm": 1.155125291987811, + "learning_rate": 1.915345598435146e-06, + "loss": 0.5321720838546753, + "step": 1522 + }, + { + "epoch": 0.3511643993543924, + "grad_norm": 1.172353935810673, + "learning_rate": 1.9151920097745185e-06, + "loss": 0.51869797706604, + "step": 1523 + }, + { + "epoch": 0.3513949734839751, + "grad_norm": 1.0936179296558388, + "learning_rate": 1.9150382880814577e-06, + "loss": 0.58238685131073, + "step": 1524 + }, + { + "epoch": 0.35162554761355774, + "grad_norm": 1.135142968184709, + "learning_rate": 1.914884433378308e-06, + "loss": 0.5617767572402954, + "step": 1525 + }, + { + "epoch": 0.3518561217431404, + "grad_norm": 0.9232400306777988, + "learning_rate": 1.9147304456874336e-06, + "loss": 0.5207428932189941, + "step": 1526 + }, + { + "epoch": 0.35208669587272307, + "grad_norm": 1.0829138732821308, + "learning_rate": 1.914576325031218e-06, + "loss": 0.5929840207099915, + "step": 1527 + }, + { + "epoch": 0.3523172700023057, + "grad_norm": 1.0372438860332964, + "learning_rate": 1.914422071432065e-06, + "loss": 0.510567307472229, + "step": 1528 + }, + { + "epoch": 0.3525478441318884, + "grad_norm": 1.2529291445912578, + "learning_rate": 1.914267684912397e-06, + "loss": 0.5524177551269531, + "step": 1529 + }, + { + "epoch": 0.35277841826147105, + "grad_norm": 1.0844290023080794, + "learning_rate": 1.9141131654946548e-06, + "loss": 0.5622289180755615, + "step": 1530 + }, + { + "epoch": 0.3530089923910537, + "grad_norm": 1.1655531028574153, + "learning_rate": 1.9139585132012995e-06, + "loss": 0.5085979700088501, + "step": 1531 + }, + { + "epoch": 0.35323956652063637, + "grad_norm": 1.0367412290626608, + "learning_rate": 1.9138037280548117e-06, + "loss": 0.47232770919799805, + "step": 1532 + }, + { + "epoch": 0.35347014065021903, + "grad_norm": 1.3584148636864177, + "learning_rate": 1.913648810077691e-06, + "loss": 0.535300612449646, + "step": 1533 + }, + { + "epoch": 0.3537007147798017, + "grad_norm": 1.1457507125445123, + "learning_rate": 1.9134937592924562e-06, + "loss": 0.4351940155029297, + "step": 1534 + }, + { + "epoch": 0.35393128890938436, + "grad_norm": 0.9891980196576595, + "learning_rate": 1.9133385757216456e-06, + "loss": 0.4691917896270752, + "step": 1535 + }, + { + "epoch": 0.354161863038967, + "grad_norm": 1.03905005054118, + "learning_rate": 1.9131832593878167e-06, + "loss": 0.4911034107208252, + "step": 1536 + }, + { + "epoch": 0.3543924371685497, + "grad_norm": 0.9599946260153974, + "learning_rate": 1.9130278103135458e-06, + "loss": 0.3954068422317505, + "step": 1537 + }, + { + "epoch": 0.35462301129813234, + "grad_norm": 1.2512488183212185, + "learning_rate": 1.9128722285214297e-06, + "loss": 0.5541605949401855, + "step": 1538 + }, + { + "epoch": 0.354853585427715, + "grad_norm": 1.2362059407886639, + "learning_rate": 1.9127165140340832e-06, + "loss": 0.5719314217567444, + "step": 1539 + }, + { + "epoch": 0.35508415955729766, + "grad_norm": 1.342530930822934, + "learning_rate": 1.9125606668741418e-06, + "loss": 0.60889732837677, + "step": 1540 + }, + { + "epoch": 0.3553147336868803, + "grad_norm": 1.2098741685807175, + "learning_rate": 1.9124046870642587e-06, + "loss": 0.5247465968132019, + "step": 1541 + }, + { + "epoch": 0.355545307816463, + "grad_norm": 1.3096766952611592, + "learning_rate": 1.912248574627107e-06, + "loss": 0.5681591033935547, + "step": 1542 + }, + { + "epoch": 0.35577588194604565, + "grad_norm": 1.0008372683888578, + "learning_rate": 1.91209232958538e-06, + "loss": 0.5995845794677734, + "step": 1543 + }, + { + "epoch": 0.3560064560756283, + "grad_norm": 1.0463229098086306, + "learning_rate": 1.9119359519617893e-06, + "loss": 0.514456033706665, + "step": 1544 + }, + { + "epoch": 0.35623703020521097, + "grad_norm": 1.0680000709528683, + "learning_rate": 1.9117794417790657e-06, + "loss": 0.45192602276802063, + "step": 1545 + }, + { + "epoch": 0.35646760433479363, + "grad_norm": 1.042670075197141, + "learning_rate": 1.911622799059959e-06, + "loss": 0.5529573559761047, + "step": 1546 + }, + { + "epoch": 0.3566981784643763, + "grad_norm": 1.2129822836493795, + "learning_rate": 1.9114660238272403e-06, + "loss": 0.4544152021408081, + "step": 1547 + }, + { + "epoch": 0.35692875259395895, + "grad_norm": 1.516629148023364, + "learning_rate": 1.9113091161036974e-06, + "loss": 0.5676225423812866, + "step": 1548 + }, + { + "epoch": 0.3571593267235416, + "grad_norm": 1.1320627323756525, + "learning_rate": 1.9111520759121384e-06, + "loss": 0.5571830868721008, + "step": 1549 + }, + { + "epoch": 0.3573899008531243, + "grad_norm": 1.1377531274302592, + "learning_rate": 1.910994903275391e-06, + "loss": 0.5091487765312195, + "step": 1550 + }, + { + "epoch": 0.35762047498270694, + "grad_norm": 1.107456889270875, + "learning_rate": 1.9108375982163015e-06, + "loss": 0.5484684705734253, + "step": 1551 + }, + { + "epoch": 0.3578510491122896, + "grad_norm": 1.261905478374622, + "learning_rate": 1.9106801607577364e-06, + "loss": 0.49742424488067627, + "step": 1552 + }, + { + "epoch": 0.35808162324187226, + "grad_norm": 1.2341261046425518, + "learning_rate": 1.9105225909225804e-06, + "loss": 0.5871520638465881, + "step": 1553 + }, + { + "epoch": 0.3583121973714549, + "grad_norm": 1.2329576492287886, + "learning_rate": 1.910364888733738e-06, + "loss": 0.5096076726913452, + "step": 1554 + }, + { + "epoch": 0.3585427715010376, + "grad_norm": 1.3375416968847058, + "learning_rate": 1.910207054214133e-06, + "loss": 0.7168693542480469, + "step": 1555 + }, + { + "epoch": 0.35877334563062024, + "grad_norm": 1.126707169388949, + "learning_rate": 1.910049087386707e-06, + "loss": 0.5603561997413635, + "step": 1556 + }, + { + "epoch": 0.3590039197602029, + "grad_norm": 1.299433383477777, + "learning_rate": 1.909890988274424e-06, + "loss": 0.5857734680175781, + "step": 1557 + }, + { + "epoch": 0.35923449388978557, + "grad_norm": 1.040543925807462, + "learning_rate": 1.9097327569002642e-06, + "loss": 0.5612708926200867, + "step": 1558 + }, + { + "epoch": 0.35946506801936823, + "grad_norm": 1.146949414139332, + "learning_rate": 1.909574393287228e-06, + "loss": 0.5264564752578735, + "step": 1559 + }, + { + "epoch": 0.3596956421489509, + "grad_norm": 0.9390137754415148, + "learning_rate": 1.9094158974583357e-06, + "loss": 0.4163395166397095, + "step": 1560 + }, + { + "epoch": 0.35992621627853355, + "grad_norm": 1.0884801214343747, + "learning_rate": 1.909257269436626e-06, + "loss": 0.483236163854599, + "step": 1561 + }, + { + "epoch": 0.3601567904081162, + "grad_norm": 1.0086049535834347, + "learning_rate": 1.9090985092451572e-06, + "loss": 0.48892003297805786, + "step": 1562 + }, + { + "epoch": 0.3603873645376989, + "grad_norm": 1.0090138133688373, + "learning_rate": 1.908939616907007e-06, + "loss": 0.45310860872268677, + "step": 1563 + }, + { + "epoch": 0.36061793866728153, + "grad_norm": 1.0130833457744266, + "learning_rate": 1.908780592445271e-06, + "loss": 0.5242425799369812, + "step": 1564 + }, + { + "epoch": 0.3608485127968642, + "grad_norm": 1.0425805251353624, + "learning_rate": 1.9086214358830663e-06, + "loss": 0.47026845812797546, + "step": 1565 + }, + { + "epoch": 0.36107908692644686, + "grad_norm": 1.2209406413770176, + "learning_rate": 1.9084621472435267e-06, + "loss": 0.5783924460411072, + "step": 1566 + }, + { + "epoch": 0.3613096610560295, + "grad_norm": 1.0139793238266448, + "learning_rate": 1.9083027265498073e-06, + "loss": 0.5534437894821167, + "step": 1567 + }, + { + "epoch": 0.3615402351856122, + "grad_norm": 1.27522834837266, + "learning_rate": 1.9081431738250815e-06, + "loss": 0.49131953716278076, + "step": 1568 + }, + { + "epoch": 0.36177080931519484, + "grad_norm": 1.0466765845853998, + "learning_rate": 1.9079834890925412e-06, + "loss": 0.4798020124435425, + "step": 1569 + }, + { + "epoch": 0.3620013834447775, + "grad_norm": 1.1201181573638213, + "learning_rate": 1.9078236723753987e-06, + "loss": 0.4928893446922302, + "step": 1570 + }, + { + "epoch": 0.36223195757436016, + "grad_norm": 0.884047440430311, + "learning_rate": 1.9076637236968847e-06, + "loss": 0.4483630657196045, + "step": 1571 + }, + { + "epoch": 0.3624625317039428, + "grad_norm": 1.0983581542959335, + "learning_rate": 1.90750364308025e-06, + "loss": 0.593490481376648, + "step": 1572 + }, + { + "epoch": 0.3626931058335255, + "grad_norm": 1.1430514811975505, + "learning_rate": 1.9073434305487631e-06, + "loss": 0.5944634675979614, + "step": 1573 + }, + { + "epoch": 0.36292367996310815, + "grad_norm": 1.003698560447405, + "learning_rate": 1.9071830861257134e-06, + "loss": 0.5010452270507812, + "step": 1574 + }, + { + "epoch": 0.3631542540926908, + "grad_norm": 1.0687566975761509, + "learning_rate": 1.9070226098344078e-06, + "loss": 0.5128473043441772, + "step": 1575 + }, + { + "epoch": 0.36338482822227347, + "grad_norm": 1.0854169038402666, + "learning_rate": 1.9068620016981733e-06, + "loss": 0.6256363987922668, + "step": 1576 + }, + { + "epoch": 0.36361540235185613, + "grad_norm": 1.0796360454107574, + "learning_rate": 1.9067012617403565e-06, + "loss": 0.5502322912216187, + "step": 1577 + }, + { + "epoch": 0.3638459764814388, + "grad_norm": 1.2842731628323776, + "learning_rate": 1.906540389984322e-06, + "loss": 0.5756800174713135, + "step": 1578 + }, + { + "epoch": 0.36407655061102145, + "grad_norm": 1.135643566986845, + "learning_rate": 1.9063793864534543e-06, + "loss": 0.5131359696388245, + "step": 1579 + }, + { + "epoch": 0.3643071247406041, + "grad_norm": 0.9714084254330834, + "learning_rate": 1.9062182511711567e-06, + "loss": 0.5776810646057129, + "step": 1580 + }, + { + "epoch": 0.3645376988701868, + "grad_norm": 1.0973639487789169, + "learning_rate": 1.9060569841608523e-06, + "loss": 0.49460822343826294, + "step": 1581 + }, + { + "epoch": 0.36476827299976944, + "grad_norm": 0.942012419923591, + "learning_rate": 1.9058955854459823e-06, + "loss": 0.5031022429466248, + "step": 1582 + }, + { + "epoch": 0.3649988471293521, + "grad_norm": 1.2106661637014209, + "learning_rate": 1.9057340550500082e-06, + "loss": 0.4957816004753113, + "step": 1583 + }, + { + "epoch": 0.36522942125893476, + "grad_norm": 0.9363710565312526, + "learning_rate": 1.9055723929964102e-06, + "loss": 0.47861093282699585, + "step": 1584 + }, + { + "epoch": 0.3654599953885174, + "grad_norm": 1.027272725701274, + "learning_rate": 1.9054105993086868e-06, + "loss": 0.44517919421195984, + "step": 1585 + }, + { + "epoch": 0.3656905695181001, + "grad_norm": 1.1724343492985738, + "learning_rate": 1.9052486740103568e-06, + "loss": 0.46661484241485596, + "step": 1586 + }, + { + "epoch": 0.36592114364768275, + "grad_norm": 0.9788001147307338, + "learning_rate": 1.9050866171249575e-06, + "loss": 0.517694890499115, + "step": 1587 + }, + { + "epoch": 0.3661517177772654, + "grad_norm": 1.1284193922698917, + "learning_rate": 1.904924428676046e-06, + "loss": 0.49465644359588623, + "step": 1588 + }, + { + "epoch": 0.36638229190684807, + "grad_norm": 1.0036913999315975, + "learning_rate": 1.9047621086871971e-06, + "loss": 0.41830652952194214, + "step": 1589 + }, + { + "epoch": 0.36661286603643073, + "grad_norm": 1.1944977036427056, + "learning_rate": 1.9045996571820067e-06, + "loss": 0.5540663003921509, + "step": 1590 + }, + { + "epoch": 0.3668434401660134, + "grad_norm": 1.072580109375711, + "learning_rate": 1.9044370741840882e-06, + "loss": 0.5619527101516724, + "step": 1591 + }, + { + "epoch": 0.36707401429559605, + "grad_norm": 1.1509533440805209, + "learning_rate": 1.9042743597170746e-06, + "loss": 0.5086055994033813, + "step": 1592 + }, + { + "epoch": 0.3673045884251787, + "grad_norm": 1.050425223739088, + "learning_rate": 1.9041115138046183e-06, + "loss": 0.5839927196502686, + "step": 1593 + }, + { + "epoch": 0.3675351625547614, + "grad_norm": 1.0464789939377692, + "learning_rate": 1.9039485364703904e-06, + "loss": 0.508616030216217, + "step": 1594 + }, + { + "epoch": 0.36776573668434404, + "grad_norm": 1.15877506638183, + "learning_rate": 1.903785427738082e-06, + "loss": 0.46514832973480225, + "step": 1595 + }, + { + "epoch": 0.3679963108139267, + "grad_norm": 1.525284603977575, + "learning_rate": 1.9036221876314016e-06, + "loss": 0.42142176628112793, + "step": 1596 + }, + { + "epoch": 0.36822688494350936, + "grad_norm": 1.3114380851226077, + "learning_rate": 1.9034588161740786e-06, + "loss": 0.42195791006088257, + "step": 1597 + }, + { + "epoch": 0.368457459073092, + "grad_norm": 1.0276642661247686, + "learning_rate": 1.9032953133898601e-06, + "loss": 0.46705931425094604, + "step": 1598 + }, + { + "epoch": 0.3686880332026747, + "grad_norm": 1.1002100436754347, + "learning_rate": 1.9031316793025134e-06, + "loss": 0.4741164743900299, + "step": 1599 + }, + { + "epoch": 0.36891860733225734, + "grad_norm": 1.269728601723268, + "learning_rate": 1.902967913935824e-06, + "loss": 0.49730339646339417, + "step": 1600 + }, + { + "epoch": 0.36914918146184, + "grad_norm": 0.9594474153361355, + "learning_rate": 1.902804017313597e-06, + "loss": 0.47678127884864807, + "step": 1601 + }, + { + "epoch": 0.36937975559142266, + "grad_norm": 1.1964394586929104, + "learning_rate": 1.9026399894596565e-06, + "loss": 0.4954279661178589, + "step": 1602 + }, + { + "epoch": 0.3696103297210053, + "grad_norm": 0.9685506818723637, + "learning_rate": 1.9024758303978456e-06, + "loss": 0.5115381479263306, + "step": 1603 + }, + { + "epoch": 0.369840903850588, + "grad_norm": 1.0632901548704432, + "learning_rate": 1.9023115401520264e-06, + "loss": 0.6147117614746094, + "step": 1604 + }, + { + "epoch": 0.37007147798017065, + "grad_norm": 1.4566806194426465, + "learning_rate": 1.9021471187460802e-06, + "loss": 0.5334371328353882, + "step": 1605 + }, + { + "epoch": 0.3703020521097533, + "grad_norm": 1.2820059739478686, + "learning_rate": 1.9019825662039073e-06, + "loss": 0.4702361226081848, + "step": 1606 + }, + { + "epoch": 0.37053262623933597, + "grad_norm": 1.1889012346736458, + "learning_rate": 1.901817882549427e-06, + "loss": 0.5049586892127991, + "step": 1607 + }, + { + "epoch": 0.37076320036891863, + "grad_norm": 1.2055092488358514, + "learning_rate": 1.901653067806578e-06, + "loss": 0.5063170194625854, + "step": 1608 + }, + { + "epoch": 0.3709937744985013, + "grad_norm": 1.1599393359430212, + "learning_rate": 1.9014881219993175e-06, + "loss": 0.540824294090271, + "step": 1609 + }, + { + "epoch": 0.37122434862808396, + "grad_norm": 1.372148291928607, + "learning_rate": 1.901323045151622e-06, + "loss": 0.4744170904159546, + "step": 1610 + }, + { + "epoch": 0.3714549227576666, + "grad_norm": 1.2144026597364277, + "learning_rate": 1.9011578372874876e-06, + "loss": 0.5090929269790649, + "step": 1611 + }, + { + "epoch": 0.3716854968872493, + "grad_norm": 1.0610635938586983, + "learning_rate": 1.9009924984309284e-06, + "loss": 0.3886772394180298, + "step": 1612 + }, + { + "epoch": 0.3719160710168319, + "grad_norm": 1.1192663585328575, + "learning_rate": 1.9008270286059782e-06, + "loss": 0.4976482391357422, + "step": 1613 + }, + { + "epoch": 0.37214664514641455, + "grad_norm": 1.0577168176218985, + "learning_rate": 1.9006614278366898e-06, + "loss": 0.4629209041595459, + "step": 1614 + }, + { + "epoch": 0.3723772192759972, + "grad_norm": 1.0381238100092287, + "learning_rate": 1.9004956961471352e-06, + "loss": 0.49334412813186646, + "step": 1615 + }, + { + "epoch": 0.37260779340557987, + "grad_norm": 1.2336018114177745, + "learning_rate": 1.9003298335614047e-06, + "loss": 0.614592432975769, + "step": 1616 + }, + { + "epoch": 0.37283836753516253, + "grad_norm": 0.9895019344615126, + "learning_rate": 1.9001638401036082e-06, + "loss": 0.5339843034744263, + "step": 1617 + }, + { + "epoch": 0.3730689416647452, + "grad_norm": 0.9743667038154072, + "learning_rate": 1.8999977157978749e-06, + "loss": 0.5516937375068665, + "step": 1618 + }, + { + "epoch": 0.37329951579432785, + "grad_norm": 1.2149293301312265, + "learning_rate": 1.8998314606683522e-06, + "loss": 0.5034124255180359, + "step": 1619 + }, + { + "epoch": 0.3735300899239105, + "grad_norm": 0.9412969527830801, + "learning_rate": 1.8996650747392073e-06, + "loss": 0.49766790866851807, + "step": 1620 + }, + { + "epoch": 0.3737606640534932, + "grad_norm": 1.1063112007683722, + "learning_rate": 1.899498558034626e-06, + "loss": 0.6662446856498718, + "step": 1621 + }, + { + "epoch": 0.37399123818307584, + "grad_norm": 1.3692241861945424, + "learning_rate": 1.8993319105788129e-06, + "loss": 0.5416747331619263, + "step": 1622 + }, + { + "epoch": 0.3742218123126585, + "grad_norm": 1.2377768970666951, + "learning_rate": 1.8991651323959922e-06, + "loss": 0.5137313604354858, + "step": 1623 + }, + { + "epoch": 0.37445238644224116, + "grad_norm": 1.0509326993065755, + "learning_rate": 1.8989982235104072e-06, + "loss": 0.566002607345581, + "step": 1624 + }, + { + "epoch": 0.3746829605718238, + "grad_norm": 1.314391237074608, + "learning_rate": 1.8988311839463188e-06, + "loss": 0.5201380252838135, + "step": 1625 + }, + { + "epoch": 0.3749135347014065, + "grad_norm": 1.2844709164103703, + "learning_rate": 1.8986640137280087e-06, + "loss": 0.5103918313980103, + "step": 1626 + }, + { + "epoch": 0.37514410883098914, + "grad_norm": 1.081063959726764, + "learning_rate": 1.8984967128797763e-06, + "loss": 0.47900843620300293, + "step": 1627 + }, + { + "epoch": 0.3753746829605718, + "grad_norm": 1.0524739811683044, + "learning_rate": 1.898329281425941e-06, + "loss": 0.42991960048675537, + "step": 1628 + }, + { + "epoch": 0.37560525709015447, + "grad_norm": 1.2087969734027784, + "learning_rate": 1.89816171939084e-06, + "loss": 0.5707317590713501, + "step": 1629 + }, + { + "epoch": 0.3758358312197371, + "grad_norm": 1.0714171850017424, + "learning_rate": 1.8979940267988309e-06, + "loss": 0.565521240234375, + "step": 1630 + }, + { + "epoch": 0.3760664053493198, + "grad_norm": 1.2721353238917528, + "learning_rate": 1.8978262036742888e-06, + "loss": 0.6584400534629822, + "step": 1631 + }, + { + "epoch": 0.37629697947890245, + "grad_norm": 1.1181726564305359, + "learning_rate": 1.897658250041609e-06, + "loss": 0.4749317169189453, + "step": 1632 + }, + { + "epoch": 0.3765275536084851, + "grad_norm": 1.3732616000652873, + "learning_rate": 1.8974901659252048e-06, + "loss": 0.5495604872703552, + "step": 1633 + }, + { + "epoch": 0.37675812773806777, + "grad_norm": 1.6408199477459455, + "learning_rate": 1.8973219513495094e-06, + "loss": 0.465708464384079, + "step": 1634 + }, + { + "epoch": 0.37698870186765043, + "grad_norm": 1.1887777428919946, + "learning_rate": 1.8971536063389742e-06, + "loss": 0.4599069058895111, + "step": 1635 + }, + { + "epoch": 0.3772192759972331, + "grad_norm": 1.1348638946303797, + "learning_rate": 1.89698513091807e-06, + "loss": 0.4716145694255829, + "step": 1636 + }, + { + "epoch": 0.37744985012681576, + "grad_norm": 0.990973234996169, + "learning_rate": 1.8968165251112863e-06, + "loss": 0.594079852104187, + "step": 1637 + }, + { + "epoch": 0.3776804242563984, + "grad_norm": 1.3300173886007076, + "learning_rate": 1.8966477889431317e-06, + "loss": 0.4588915705680847, + "step": 1638 + }, + { + "epoch": 0.3779109983859811, + "grad_norm": 1.5111913527277292, + "learning_rate": 1.8964789224381337e-06, + "loss": 0.5236901044845581, + "step": 1639 + }, + { + "epoch": 0.37814157251556374, + "grad_norm": 1.067104402214014, + "learning_rate": 1.8963099256208388e-06, + "loss": 0.4954737424850464, + "step": 1640 + }, + { + "epoch": 0.3783721466451464, + "grad_norm": 1.066408318154628, + "learning_rate": 1.8961407985158125e-06, + "loss": 0.4194701910018921, + "step": 1641 + }, + { + "epoch": 0.37860272077472906, + "grad_norm": 0.9999478144515371, + "learning_rate": 1.8959715411476388e-06, + "loss": 0.5368303060531616, + "step": 1642 + }, + { + "epoch": 0.3788332949043117, + "grad_norm": 1.2178837934755509, + "learning_rate": 1.8958021535409214e-06, + "loss": 0.5181677341461182, + "step": 1643 + }, + { + "epoch": 0.3790638690338944, + "grad_norm": 1.0342390187480546, + "learning_rate": 1.8956326357202821e-06, + "loss": 0.4755169749259949, + "step": 1644 + }, + { + "epoch": 0.37929444316347705, + "grad_norm": 1.1097461588236448, + "learning_rate": 1.8954629877103625e-06, + "loss": 0.5460895299911499, + "step": 1645 + }, + { + "epoch": 0.3795250172930597, + "grad_norm": 1.090972908814234, + "learning_rate": 1.8952932095358224e-06, + "loss": 0.47811684012413025, + "step": 1646 + }, + { + "epoch": 0.37975559142264237, + "grad_norm": 1.1794844360929688, + "learning_rate": 1.8951233012213405e-06, + "loss": 0.5791733860969543, + "step": 1647 + }, + { + "epoch": 0.37998616555222503, + "grad_norm": 1.1163036430533217, + "learning_rate": 1.8949532627916151e-06, + "loss": 0.4996911585330963, + "step": 1648 + }, + { + "epoch": 0.3802167396818077, + "grad_norm": 1.3190959058791496, + "learning_rate": 1.8947830942713628e-06, + "loss": 0.6108353137969971, + "step": 1649 + }, + { + "epoch": 0.38044731381139035, + "grad_norm": 1.2084081721604487, + "learning_rate": 1.8946127956853195e-06, + "loss": 0.5303040742874146, + "step": 1650 + }, + { + "epoch": 0.380677887940973, + "grad_norm": 1.0581391679258725, + "learning_rate": 1.8944423670582397e-06, + "loss": 0.4651896357536316, + "step": 1651 + }, + { + "epoch": 0.3809084620705557, + "grad_norm": 1.1464415021916683, + "learning_rate": 1.8942718084148969e-06, + "loss": 0.6321637630462646, + "step": 1652 + }, + { + "epoch": 0.38113903620013834, + "grad_norm": 1.1535120052175352, + "learning_rate": 1.8941011197800836e-06, + "loss": 0.5124787092208862, + "step": 1653 + }, + { + "epoch": 0.381369610329721, + "grad_norm": 1.2712538370269149, + "learning_rate": 1.893930301178611e-06, + "loss": 0.5779180526733398, + "step": 1654 + }, + { + "epoch": 0.38160018445930366, + "grad_norm": 1.2579128550158534, + "learning_rate": 1.8937593526353096e-06, + "loss": 0.5723867416381836, + "step": 1655 + }, + { + "epoch": 0.3818307585888863, + "grad_norm": 1.0216965854263103, + "learning_rate": 1.8935882741750281e-06, + "loss": 0.4312398433685303, + "step": 1656 + }, + { + "epoch": 0.382061332718469, + "grad_norm": 1.7195703110538068, + "learning_rate": 1.893417065822635e-06, + "loss": 0.6503756046295166, + "step": 1657 + }, + { + "epoch": 0.38229190684805164, + "grad_norm": 1.2691180997694498, + "learning_rate": 1.8932457276030166e-06, + "loss": 0.508478045463562, + "step": 1658 + }, + { + "epoch": 0.3825224809776343, + "grad_norm": 0.9328619594784499, + "learning_rate": 1.8930742595410792e-06, + "loss": 0.46552446484565735, + "step": 1659 + }, + { + "epoch": 0.38275305510721697, + "grad_norm": 0.983497277362264, + "learning_rate": 1.8929026616617467e-06, + "loss": 0.4739278256893158, + "step": 1660 + }, + { + "epoch": 0.3829836292367996, + "grad_norm": 1.2642164913655083, + "learning_rate": 1.8927309339899634e-06, + "loss": 0.5584233403205872, + "step": 1661 + }, + { + "epoch": 0.3832142033663823, + "grad_norm": 1.0681648876128738, + "learning_rate": 1.8925590765506911e-06, + "loss": 0.6155074238777161, + "step": 1662 + }, + { + "epoch": 0.38344477749596495, + "grad_norm": 1.1479148469369402, + "learning_rate": 1.8923870893689112e-06, + "loss": 0.5253106951713562, + "step": 1663 + }, + { + "epoch": 0.3836753516255476, + "grad_norm": 1.2179992400932398, + "learning_rate": 1.8922149724696238e-06, + "loss": 0.4190565347671509, + "step": 1664 + }, + { + "epoch": 0.3839059257551303, + "grad_norm": 1.124098215736467, + "learning_rate": 1.892042725877848e-06, + "loss": 0.5263853073120117, + "step": 1665 + }, + { + "epoch": 0.38413649988471293, + "grad_norm": 1.0385777204325046, + "learning_rate": 1.8918703496186214e-06, + "loss": 0.4492432773113251, + "step": 1666 + }, + { + "epoch": 0.3843670740142956, + "grad_norm": 1.3356308613758272, + "learning_rate": 1.8916978437170004e-06, + "loss": 0.49745023250579834, + "step": 1667 + }, + { + "epoch": 0.38459764814387826, + "grad_norm": 1.2023114319635457, + "learning_rate": 1.891525208198061e-06, + "loss": 0.6003707647323608, + "step": 1668 + }, + { + "epoch": 0.3848282222734609, + "grad_norm": 1.6371184982518272, + "learning_rate": 1.8913524430868973e-06, + "loss": 0.5430049300193787, + "step": 1669 + }, + { + "epoch": 0.3850587964030436, + "grad_norm": 1.0715049923324578, + "learning_rate": 1.8911795484086222e-06, + "loss": 0.5561289191246033, + "step": 1670 + }, + { + "epoch": 0.38528937053262624, + "grad_norm": 1.1416350409171048, + "learning_rate": 1.8910065241883678e-06, + "loss": 0.5488184690475464, + "step": 1671 + }, + { + "epoch": 0.3855199446622089, + "grad_norm": 1.0082475661815067, + "learning_rate": 1.890833370451285e-06, + "loss": 0.46347010135650635, + "step": 1672 + }, + { + "epoch": 0.38575051879179156, + "grad_norm": 1.0668592703569681, + "learning_rate": 1.8906600872225438e-06, + "loss": 0.553687334060669, + "step": 1673 + }, + { + "epoch": 0.3859810929213742, + "grad_norm": 1.1035800532005071, + "learning_rate": 1.8904866745273323e-06, + "loss": 0.46162208914756775, + "step": 1674 + }, + { + "epoch": 0.3862116670509569, + "grad_norm": 1.076914158561248, + "learning_rate": 1.8903131323908576e-06, + "loss": 0.4478996992111206, + "step": 1675 + }, + { + "epoch": 0.38644224118053955, + "grad_norm": 1.1488135535707533, + "learning_rate": 1.8901394608383463e-06, + "loss": 0.5857031345367432, + "step": 1676 + }, + { + "epoch": 0.3866728153101222, + "grad_norm": 1.5929334393746841, + "learning_rate": 1.8899656598950432e-06, + "loss": 0.592833399772644, + "step": 1677 + }, + { + "epoch": 0.38690338943970487, + "grad_norm": 1.0232228390237461, + "learning_rate": 1.8897917295862117e-06, + "loss": 0.6007786989212036, + "step": 1678 + }, + { + "epoch": 0.38713396356928753, + "grad_norm": 1.109869111259485, + "learning_rate": 1.8896176699371343e-06, + "loss": 0.5248164534568787, + "step": 1679 + }, + { + "epoch": 0.3873645376988702, + "grad_norm": 0.856016560201164, + "learning_rate": 1.8894434809731128e-06, + "loss": 0.43112409114837646, + "step": 1680 + }, + { + "epoch": 0.38759511182845285, + "grad_norm": 1.318795823918729, + "learning_rate": 1.8892691627194673e-06, + "loss": 0.56545090675354, + "step": 1681 + }, + { + "epoch": 0.3878256859580355, + "grad_norm": 1.1470159881146635, + "learning_rate": 1.8890947152015363e-06, + "loss": 0.6287904977798462, + "step": 1682 + }, + { + "epoch": 0.3880562600876182, + "grad_norm": 1.155806897456587, + "learning_rate": 1.8889201384446775e-06, + "loss": 0.48461633920669556, + "step": 1683 + }, + { + "epoch": 0.38828683421720084, + "grad_norm": 1.2251476021613918, + "learning_rate": 1.888745432474268e-06, + "loss": 0.5089331865310669, + "step": 1684 + }, + { + "epoch": 0.3885174083467835, + "grad_norm": 0.9661641286318025, + "learning_rate": 1.8885705973157027e-06, + "loss": 0.4805281162261963, + "step": 1685 + }, + { + "epoch": 0.38874798247636616, + "grad_norm": 1.070887780603473, + "learning_rate": 1.8883956329943955e-06, + "loss": 0.5243096947669983, + "step": 1686 + }, + { + "epoch": 0.3889785566059488, + "grad_norm": 1.240979728566986, + "learning_rate": 1.8882205395357795e-06, + "loss": 0.5808781981468201, + "step": 1687 + }, + { + "epoch": 0.3892091307355315, + "grad_norm": 1.2574299318006046, + "learning_rate": 1.8880453169653063e-06, + "loss": 0.5397018194198608, + "step": 1688 + }, + { + "epoch": 0.38943970486511414, + "grad_norm": 1.182945649827907, + "learning_rate": 1.8878699653084462e-06, + "loss": 0.4475638270378113, + "step": 1689 + }, + { + "epoch": 0.3896702789946968, + "grad_norm": 1.3095447574792232, + "learning_rate": 1.8876944845906884e-06, + "loss": 0.6212958693504333, + "step": 1690 + }, + { + "epoch": 0.38990085312427947, + "grad_norm": 1.1726349359481907, + "learning_rate": 1.8875188748375407e-06, + "loss": 0.44465404748916626, + "step": 1691 + }, + { + "epoch": 0.39013142725386213, + "grad_norm": 1.2650698772045321, + "learning_rate": 1.8873431360745297e-06, + "loss": 0.5711641311645508, + "step": 1692 + }, + { + "epoch": 0.3903620013834448, + "grad_norm": 1.2039233000565408, + "learning_rate": 1.8871672683272012e-06, + "loss": 0.4527866244316101, + "step": 1693 + }, + { + "epoch": 0.39059257551302745, + "grad_norm": 1.515756125658867, + "learning_rate": 1.8869912716211188e-06, + "loss": 0.6242899894714355, + "step": 1694 + }, + { + "epoch": 0.3908231496426101, + "grad_norm": 1.6198907712835393, + "learning_rate": 1.8868151459818656e-06, + "loss": 0.6294416189193726, + "step": 1695 + }, + { + "epoch": 0.3910537237721928, + "grad_norm": 1.2238875456694314, + "learning_rate": 1.8866388914350435e-06, + "loss": 0.49869638681411743, + "step": 1696 + }, + { + "epoch": 0.39128429790177544, + "grad_norm": 1.1755814842525432, + "learning_rate": 1.886462508006273e-06, + "loss": 0.5456752777099609, + "step": 1697 + }, + { + "epoch": 0.3915148720313581, + "grad_norm": 1.0114016306766007, + "learning_rate": 1.8862859957211926e-06, + "loss": 0.4197172224521637, + "step": 1698 + }, + { + "epoch": 0.39174544616094076, + "grad_norm": 1.0278658872450297, + "learning_rate": 1.8861093546054603e-06, + "loss": 0.5012276768684387, + "step": 1699 + }, + { + "epoch": 0.3919760202905234, + "grad_norm": 1.2065880303446173, + "learning_rate": 1.8859325846847531e-06, + "loss": 0.48108845949172974, + "step": 1700 + }, + { + "epoch": 0.3922065944201061, + "grad_norm": 1.1190986847477769, + "learning_rate": 1.885755685984766e-06, + "loss": 0.48592355847358704, + "step": 1701 + }, + { + "epoch": 0.39243716854968874, + "grad_norm": 1.136053467553038, + "learning_rate": 1.8855786585312132e-06, + "loss": 0.5744791030883789, + "step": 1702 + }, + { + "epoch": 0.3926677426792714, + "grad_norm": 1.1435558229801501, + "learning_rate": 1.8854015023498273e-06, + "loss": 0.5378769040107727, + "step": 1703 + }, + { + "epoch": 0.39289831680885406, + "grad_norm": 1.0710678493453967, + "learning_rate": 1.8852242174663594e-06, + "loss": 0.5630123615264893, + "step": 1704 + }, + { + "epoch": 0.3931288909384367, + "grad_norm": 1.0913466409725974, + "learning_rate": 1.8850468039065806e-06, + "loss": 0.5247849225997925, + "step": 1705 + }, + { + "epoch": 0.3933594650680194, + "grad_norm": 1.282307381217427, + "learning_rate": 1.884869261696279e-06, + "loss": 0.5679286122322083, + "step": 1706 + }, + { + "epoch": 0.39359003919760205, + "grad_norm": 1.0140902583392881, + "learning_rate": 1.8846915908612622e-06, + "loss": 0.4505179524421692, + "step": 1707 + }, + { + "epoch": 0.3938206133271847, + "grad_norm": 1.233342858229108, + "learning_rate": 1.8845137914273566e-06, + "loss": 0.6077077388763428, + "step": 1708 + }, + { + "epoch": 0.39405118745676737, + "grad_norm": 1.1523756442286543, + "learning_rate": 1.8843358634204069e-06, + "loss": 0.4703037738800049, + "step": 1709 + }, + { + "epoch": 0.39428176158635003, + "grad_norm": 1.3467147447696661, + "learning_rate": 1.8841578068662773e-06, + "loss": 0.6085091829299927, + "step": 1710 + }, + { + "epoch": 0.3945123357159327, + "grad_norm": 1.3769264461225226, + "learning_rate": 1.8839796217908498e-06, + "loss": 0.6075730919837952, + "step": 1711 + }, + { + "epoch": 0.39474290984551536, + "grad_norm": 1.4068518720273175, + "learning_rate": 1.8838013082200252e-06, + "loss": 0.581851601600647, + "step": 1712 + }, + { + "epoch": 0.394973483975098, + "grad_norm": 0.9365976129961602, + "learning_rate": 1.8836228661797234e-06, + "loss": 0.555284857749939, + "step": 1713 + }, + { + "epoch": 0.3952040581046807, + "grad_norm": 1.205134330479215, + "learning_rate": 1.8834442956958832e-06, + "loss": 0.5342675447463989, + "step": 1714 + }, + { + "epoch": 0.39543463223426334, + "grad_norm": 1.2329889286532099, + "learning_rate": 1.8832655967944605e-06, + "loss": 0.47501081228256226, + "step": 1715 + }, + { + "epoch": 0.395665206363846, + "grad_norm": 1.1350943426800137, + "learning_rate": 1.8830867695014323e-06, + "loss": 0.592293918132782, + "step": 1716 + }, + { + "epoch": 0.39589578049342866, + "grad_norm": 1.2591938264724012, + "learning_rate": 1.8829078138427921e-06, + "loss": 0.5903242826461792, + "step": 1717 + }, + { + "epoch": 0.3961263546230113, + "grad_norm": 1.203385992389072, + "learning_rate": 1.882728729844553e-06, + "loss": 0.5292568206787109, + "step": 1718 + }, + { + "epoch": 0.396356928752594, + "grad_norm": 1.070652075724697, + "learning_rate": 1.8825495175327468e-06, + "loss": 0.5748786926269531, + "step": 1719 + }, + { + "epoch": 0.39658750288217665, + "grad_norm": 1.230421737483, + "learning_rate": 1.8823701769334242e-06, + "loss": 0.6191601753234863, + "step": 1720 + }, + { + "epoch": 0.3968180770117593, + "grad_norm": 1.180452919869617, + "learning_rate": 1.8821907080726535e-06, + "loss": 0.5569231510162354, + "step": 1721 + }, + { + "epoch": 0.39704865114134197, + "grad_norm": 1.291275382361216, + "learning_rate": 1.882011110976523e-06, + "loss": 0.5103349089622498, + "step": 1722 + }, + { + "epoch": 0.39727922527092463, + "grad_norm": 1.1952555855906501, + "learning_rate": 1.8818313856711382e-06, + "loss": 0.4981175363063812, + "step": 1723 + }, + { + "epoch": 0.39750979940050724, + "grad_norm": 1.5157833486690673, + "learning_rate": 1.8816515321826248e-06, + "loss": 0.5429514050483704, + "step": 1724 + }, + { + "epoch": 0.3977403735300899, + "grad_norm": 1.1377768164918185, + "learning_rate": 1.8814715505371254e-06, + "loss": 0.5318386554718018, + "step": 1725 + }, + { + "epoch": 0.39797094765967256, + "grad_norm": 1.0451576127270763, + "learning_rate": 1.881291440760803e-06, + "loss": 0.47451460361480713, + "step": 1726 + }, + { + "epoch": 0.3982015217892552, + "grad_norm": 1.2815255131055066, + "learning_rate": 1.8811112028798384e-06, + "loss": 0.5141372680664062, + "step": 1727 + }, + { + "epoch": 0.3984320959188379, + "grad_norm": 1.0864089006893662, + "learning_rate": 1.8809308369204302e-06, + "loss": 0.4950217008590698, + "step": 1728 + }, + { + "epoch": 0.39866267004842054, + "grad_norm": 0.9530925154379366, + "learning_rate": 1.880750342908797e-06, + "loss": 0.4961693286895752, + "step": 1729 + }, + { + "epoch": 0.3988932441780032, + "grad_norm": 1.1860643451162984, + "learning_rate": 1.8805697208711752e-06, + "loss": 0.43443650007247925, + "step": 1730 + }, + { + "epoch": 0.39912381830758586, + "grad_norm": 1.1332453377909741, + "learning_rate": 1.8803889708338203e-06, + "loss": 0.6116896867752075, + "step": 1731 + }, + { + "epoch": 0.3993543924371685, + "grad_norm": 0.9403622624868753, + "learning_rate": 1.8802080928230062e-06, + "loss": 0.46244728565216064, + "step": 1732 + }, + { + "epoch": 0.3995849665667512, + "grad_norm": 1.3180964068285155, + "learning_rate": 1.880027086865025e-06, + "loss": 0.5728162527084351, + "step": 1733 + }, + { + "epoch": 0.39981554069633385, + "grad_norm": 1.1310284579414278, + "learning_rate": 1.8798459529861876e-06, + "loss": 0.4472135901451111, + "step": 1734 + }, + { + "epoch": 0.4000461148259165, + "grad_norm": 1.4100215542732757, + "learning_rate": 1.8796646912128246e-06, + "loss": 0.5862090587615967, + "step": 1735 + }, + { + "epoch": 0.40027668895549917, + "grad_norm": 1.428537555998266, + "learning_rate": 1.8794833015712831e-06, + "loss": 0.6406301259994507, + "step": 1736 + }, + { + "epoch": 0.40050726308508183, + "grad_norm": 1.3320783455965834, + "learning_rate": 1.8793017840879306e-06, + "loss": 0.5865743160247803, + "step": 1737 + }, + { + "epoch": 0.4007378372146645, + "grad_norm": 1.2736301947050057, + "learning_rate": 1.8791201387891524e-06, + "loss": 0.5521814823150635, + "step": 1738 + }, + { + "epoch": 0.40096841134424716, + "grad_norm": 0.9710129928143749, + "learning_rate": 1.8789383657013522e-06, + "loss": 0.40027791261672974, + "step": 1739 + }, + { + "epoch": 0.4011989854738298, + "grad_norm": 1.213730124395359, + "learning_rate": 1.8787564648509528e-06, + "loss": 0.5594751238822937, + "step": 1740 + }, + { + "epoch": 0.4014295596034125, + "grad_norm": 1.2077878384788876, + "learning_rate": 1.8785744362643955e-06, + "loss": 0.5029730796813965, + "step": 1741 + }, + { + "epoch": 0.40166013373299514, + "grad_norm": 1.086599940670418, + "learning_rate": 1.8783922799681397e-06, + "loss": 0.6089034676551819, + "step": 1742 + }, + { + "epoch": 0.4018907078625778, + "grad_norm": 1.178028157014987, + "learning_rate": 1.8782099959886639e-06, + "loss": 0.5238372683525085, + "step": 1743 + }, + { + "epoch": 0.40212128199216046, + "grad_norm": 1.0430681899893623, + "learning_rate": 1.8780275843524643e-06, + "loss": 0.47281232476234436, + "step": 1744 + }, + { + "epoch": 0.4023518561217431, + "grad_norm": 1.0603667709126336, + "learning_rate": 1.8778450450860571e-06, + "loss": 0.44885876774787903, + "step": 1745 + }, + { + "epoch": 0.4025824302513258, + "grad_norm": 1.1187549409367323, + "learning_rate": 1.8776623782159762e-06, + "loss": 0.5915139317512512, + "step": 1746 + }, + { + "epoch": 0.40281300438090845, + "grad_norm": 1.6743224234561098, + "learning_rate": 1.8774795837687736e-06, + "loss": 0.49341484904289246, + "step": 1747 + }, + { + "epoch": 0.4030435785104911, + "grad_norm": 1.1133076324661322, + "learning_rate": 1.8772966617710205e-06, + "loss": 0.43253493309020996, + "step": 1748 + }, + { + "epoch": 0.40327415264007377, + "grad_norm": 1.2596810310862556, + "learning_rate": 1.8771136122493064e-06, + "loss": 0.48660045862197876, + "step": 1749 + }, + { + "epoch": 0.40350472676965643, + "grad_norm": 1.158836920018239, + "learning_rate": 1.8769304352302396e-06, + "loss": 0.4493838846683502, + "step": 1750 + }, + { + "epoch": 0.4037353008992391, + "grad_norm": 1.1033409495303377, + "learning_rate": 1.8767471307404464e-06, + "loss": 0.5656435489654541, + "step": 1751 + }, + { + "epoch": 0.40396587502882175, + "grad_norm": 1.1945430976561655, + "learning_rate": 1.876563698806572e-06, + "loss": 0.48047327995300293, + "step": 1752 + }, + { + "epoch": 0.4041964491584044, + "grad_norm": 1.117811372759575, + "learning_rate": 1.8763801394552806e-06, + "loss": 0.5314204692840576, + "step": 1753 + }, + { + "epoch": 0.4044270232879871, + "grad_norm": 1.212293607312766, + "learning_rate": 1.876196452713254e-06, + "loss": 0.5436627864837646, + "step": 1754 + }, + { + "epoch": 0.40465759741756974, + "grad_norm": 1.1748084841171984, + "learning_rate": 1.8760126386071933e-06, + "loss": 0.5383991599082947, + "step": 1755 + }, + { + "epoch": 0.4048881715471524, + "grad_norm": 1.1737559222863878, + "learning_rate": 1.8758286971638171e-06, + "loss": 0.48271507024765015, + "step": 1756 + }, + { + "epoch": 0.40511874567673506, + "grad_norm": 1.0323965631837329, + "learning_rate": 1.8756446284098638e-06, + "loss": 0.5920745134353638, + "step": 1757 + }, + { + "epoch": 0.4053493198063177, + "grad_norm": 1.1254236464300211, + "learning_rate": 1.875460432372089e-06, + "loss": 0.4467526078224182, + "step": 1758 + }, + { + "epoch": 0.4055798939359004, + "grad_norm": 0.9503211623796617, + "learning_rate": 1.875276109077268e-06, + "loss": 0.425409734249115, + "step": 1759 + }, + { + "epoch": 0.40581046806548304, + "grad_norm": 1.1318149217921376, + "learning_rate": 1.8750916585521938e-06, + "loss": 0.4911944568157196, + "step": 1760 + }, + { + "epoch": 0.4060410421950657, + "grad_norm": 1.5865124774001016, + "learning_rate": 1.8749070808236787e-06, + "loss": 0.49605780839920044, + "step": 1761 + }, + { + "epoch": 0.40627161632464837, + "grad_norm": 1.322640956813398, + "learning_rate": 1.874722375918552e-06, + "loss": 0.5582889914512634, + "step": 1762 + }, + { + "epoch": 0.406502190454231, + "grad_norm": 1.0487904765861873, + "learning_rate": 1.874537543863663e-06, + "loss": 0.4867294132709503, + "step": 1763 + }, + { + "epoch": 0.4067327645838137, + "grad_norm": 1.062364022734449, + "learning_rate": 1.8743525846858787e-06, + "loss": 0.5050587058067322, + "step": 1764 + }, + { + "epoch": 0.40696333871339635, + "grad_norm": 1.0581562602291477, + "learning_rate": 1.8741674984120852e-06, + "loss": 0.4380977749824524, + "step": 1765 + }, + { + "epoch": 0.407193912842979, + "grad_norm": 1.326690473297383, + "learning_rate": 1.8739822850691865e-06, + "loss": 0.5159280300140381, + "step": 1766 + }, + { + "epoch": 0.4074244869725617, + "grad_norm": 1.3542586293022822, + "learning_rate": 1.8737969446841046e-06, + "loss": 0.6999780535697937, + "step": 1767 + }, + { + "epoch": 0.40765506110214433, + "grad_norm": 1.110421221417803, + "learning_rate": 1.8736114772837816e-06, + "loss": 0.5844931602478027, + "step": 1768 + }, + { + "epoch": 0.407885635231727, + "grad_norm": 1.2621793403708754, + "learning_rate": 1.8734258828951764e-06, + "loss": 0.5078610181808472, + "step": 1769 + }, + { + "epoch": 0.40811620936130966, + "grad_norm": 1.1260800835324682, + "learning_rate": 1.8732401615452673e-06, + "loss": 0.564793586730957, + "step": 1770 + }, + { + "epoch": 0.4083467834908923, + "grad_norm": 1.2906459398399637, + "learning_rate": 1.8730543132610506e-06, + "loss": 0.6145100593566895, + "step": 1771 + }, + { + "epoch": 0.408577357620475, + "grad_norm": 1.181953537531204, + "learning_rate": 1.8728683380695414e-06, + "loss": 0.45434027910232544, + "step": 1772 + }, + { + "epoch": 0.40880793175005764, + "grad_norm": 1.0716516851559217, + "learning_rate": 1.872682235997773e-06, + "loss": 0.4917553961277008, + "step": 1773 + }, + { + "epoch": 0.4090385058796403, + "grad_norm": 1.0983534367258283, + "learning_rate": 1.872496007072797e-06, + "loss": 0.5677252411842346, + "step": 1774 + }, + { + "epoch": 0.40926908000922296, + "grad_norm": 1.042591224606922, + "learning_rate": 1.872309651321684e-06, + "loss": 0.5516688823699951, + "step": 1775 + }, + { + "epoch": 0.4094996541388056, + "grad_norm": 0.9746786592567609, + "learning_rate": 1.8721231687715227e-06, + "loss": 0.46755337715148926, + "step": 1776 + }, + { + "epoch": 0.4097302282683883, + "grad_norm": 1.3130136596789415, + "learning_rate": 1.8719365594494202e-06, + "loss": 0.6575521230697632, + "step": 1777 + }, + { + "epoch": 0.40996080239797095, + "grad_norm": 1.147271087293654, + "learning_rate": 1.8717498233825019e-06, + "loss": 0.6088716983795166, + "step": 1778 + }, + { + "epoch": 0.4101913765275536, + "grad_norm": 0.9692417840942277, + "learning_rate": 1.8715629605979118e-06, + "loss": 0.39476478099823, + "step": 1779 + }, + { + "epoch": 0.41042195065713627, + "grad_norm": 1.1915743629339146, + "learning_rate": 1.8713759711228123e-06, + "loss": 0.4893898665904999, + "step": 1780 + }, + { + "epoch": 0.41065252478671893, + "grad_norm": 1.298092223223541, + "learning_rate": 1.8711888549843842e-06, + "loss": 0.5077828764915466, + "step": 1781 + }, + { + "epoch": 0.4108830989163016, + "grad_norm": 1.0084481520460131, + "learning_rate": 1.8710016122098269e-06, + "loss": 0.5212582349777222, + "step": 1782 + }, + { + "epoch": 0.41111367304588425, + "grad_norm": 1.1325685052130308, + "learning_rate": 1.870814242826358e-06, + "loss": 0.5135321617126465, + "step": 1783 + }, + { + "epoch": 0.4113442471754669, + "grad_norm": 1.3281766258765773, + "learning_rate": 1.8706267468612133e-06, + "loss": 0.5398930311203003, + "step": 1784 + }, + { + "epoch": 0.4115748213050496, + "grad_norm": 1.3736547238310808, + "learning_rate": 1.8704391243416477e-06, + "loss": 0.49205562472343445, + "step": 1785 + }, + { + "epoch": 0.41180539543463224, + "grad_norm": 1.1386437791047925, + "learning_rate": 1.8702513752949335e-06, + "loss": 0.5145718455314636, + "step": 1786 + }, + { + "epoch": 0.4120359695642149, + "grad_norm": 0.9532031818658743, + "learning_rate": 1.8700634997483622e-06, + "loss": 0.4868374466896057, + "step": 1787 + }, + { + "epoch": 0.41226654369379756, + "grad_norm": 1.3881400467911258, + "learning_rate": 1.8698754977292435e-06, + "loss": 0.5409311652183533, + "step": 1788 + }, + { + "epoch": 0.4124971178233802, + "grad_norm": 1.307800898328953, + "learning_rate": 1.8696873692649052e-06, + "loss": 0.5476658344268799, + "step": 1789 + }, + { + "epoch": 0.4127276919529629, + "grad_norm": 1.251951597359409, + "learning_rate": 1.8694991143826937e-06, + "loss": 0.5545511245727539, + "step": 1790 + }, + { + "epoch": 0.41295826608254554, + "grad_norm": 1.1923559975321376, + "learning_rate": 1.869310733109974e-06, + "loss": 0.5479267835617065, + "step": 1791 + }, + { + "epoch": 0.4131888402121282, + "grad_norm": 1.1567279350887396, + "learning_rate": 1.8691222254741289e-06, + "loss": 0.5261585712432861, + "step": 1792 + }, + { + "epoch": 0.41341941434171087, + "grad_norm": 1.035636889065738, + "learning_rate": 1.8689335915025599e-06, + "loss": 0.5478091239929199, + "step": 1793 + }, + { + "epoch": 0.41364998847129353, + "grad_norm": 1.5699808716332777, + "learning_rate": 1.8687448312226872e-06, + "loss": 0.6739054322242737, + "step": 1794 + }, + { + "epoch": 0.4138805626008762, + "grad_norm": 1.2236857571837823, + "learning_rate": 1.8685559446619487e-06, + "loss": 0.613865315914154, + "step": 1795 + }, + { + "epoch": 0.41411113673045885, + "grad_norm": 1.0357788562325108, + "learning_rate": 1.8683669318478012e-06, + "loss": 0.3936721384525299, + "step": 1796 + }, + { + "epoch": 0.4143417108600415, + "grad_norm": 1.2330991076599302, + "learning_rate": 1.8681777928077197e-06, + "loss": 0.5508556365966797, + "step": 1797 + }, + { + "epoch": 0.4145722849896242, + "grad_norm": 1.1597942164225867, + "learning_rate": 1.867988527569197e-06, + "loss": 0.47734567523002625, + "step": 1798 + }, + { + "epoch": 0.41480285911920683, + "grad_norm": 1.0741273588884312, + "learning_rate": 1.8677991361597449e-06, + "loss": 0.46847039461135864, + "step": 1799 + }, + { + "epoch": 0.4150334332487895, + "grad_norm": 1.0364595457718502, + "learning_rate": 1.8676096186068937e-06, + "loss": 0.5202786326408386, + "step": 1800 + }, + { + "epoch": 0.41526400737837216, + "grad_norm": 1.2972392907268704, + "learning_rate": 1.8674199749381914e-06, + "loss": 0.5144700407981873, + "step": 1801 + }, + { + "epoch": 0.4154945815079548, + "grad_norm": 1.1959128972921023, + "learning_rate": 1.8672302051812048e-06, + "loss": 0.4394092559814453, + "step": 1802 + }, + { + "epoch": 0.4157251556375375, + "grad_norm": 1.159378410595036, + "learning_rate": 1.8670403093635185e-06, + "loss": 0.5017338991165161, + "step": 1803 + }, + { + "epoch": 0.41595572976712014, + "grad_norm": 1.173120824085894, + "learning_rate": 1.8668502875127366e-06, + "loss": 0.409381628036499, + "step": 1804 + }, + { + "epoch": 0.4161863038967028, + "grad_norm": 1.0538601271665184, + "learning_rate": 1.8666601396564795e-06, + "loss": 0.5193957090377808, + "step": 1805 + }, + { + "epoch": 0.41641687802628546, + "grad_norm": 1.1338279816499315, + "learning_rate": 1.8664698658223882e-06, + "loss": 0.5933586359024048, + "step": 1806 + }, + { + "epoch": 0.4166474521558681, + "grad_norm": 1.1304820859227924, + "learning_rate": 1.8662794660381204e-06, + "loss": 0.5283366441726685, + "step": 1807 + }, + { + "epoch": 0.4168780262854508, + "grad_norm": 1.118558214164988, + "learning_rate": 1.8660889403313526e-06, + "loss": 0.5063748359680176, + "step": 1808 + }, + { + "epoch": 0.41710860041503345, + "grad_norm": 1.087893149342631, + "learning_rate": 1.86589828872978e-06, + "loss": 0.6386028528213501, + "step": 1809 + }, + { + "epoch": 0.4173391745446161, + "grad_norm": 1.0041938541729358, + "learning_rate": 1.8657075112611153e-06, + "loss": 0.4618440270423889, + "step": 1810 + }, + { + "epoch": 0.41756974867419877, + "grad_norm": 1.3214046412105014, + "learning_rate": 1.8655166079530903e-06, + "loss": 0.4523535966873169, + "step": 1811 + }, + { + "epoch": 0.41780032280378143, + "grad_norm": 1.0747078557029888, + "learning_rate": 1.8653255788334544e-06, + "loss": 0.501311719417572, + "step": 1812 + }, + { + "epoch": 0.4180308969333641, + "grad_norm": 1.112333239244982, + "learning_rate": 1.865134423929976e-06, + "loss": 0.5504614114761353, + "step": 1813 + }, + { + "epoch": 0.41826147106294675, + "grad_norm": 1.0979124892402103, + "learning_rate": 1.864943143270441e-06, + "loss": 0.44275063276290894, + "step": 1814 + }, + { + "epoch": 0.4184920451925294, + "grad_norm": 1.2558217334961832, + "learning_rate": 1.8647517368826545e-06, + "loss": 0.5628173351287842, + "step": 1815 + }, + { + "epoch": 0.4187226193221121, + "grad_norm": 1.032119999950418, + "learning_rate": 1.864560204794439e-06, + "loss": 0.489221453666687, + "step": 1816 + }, + { + "epoch": 0.41895319345169474, + "grad_norm": 1.2211401188891802, + "learning_rate": 1.8643685470336355e-06, + "loss": 0.5440137386322021, + "step": 1817 + }, + { + "epoch": 0.4191837675812774, + "grad_norm": 1.169073111073683, + "learning_rate": 1.8641767636281035e-06, + "loss": 0.4518952965736389, + "step": 1818 + }, + { + "epoch": 0.41941434171086006, + "grad_norm": 1.3403542594346476, + "learning_rate": 1.8639848546057209e-06, + "loss": 0.591090977191925, + "step": 1819 + }, + { + "epoch": 0.4196449158404427, + "grad_norm": 1.1775626126130905, + "learning_rate": 1.8637928199943836e-06, + "loss": 0.5622411966323853, + "step": 1820 + }, + { + "epoch": 0.4198754899700254, + "grad_norm": 1.1913164061698733, + "learning_rate": 1.8636006598220052e-06, + "loss": 0.5086779594421387, + "step": 1821 + }, + { + "epoch": 0.42010606409960805, + "grad_norm": 1.1334153574078034, + "learning_rate": 1.8634083741165188e-06, + "loss": 0.5055384635925293, + "step": 1822 + }, + { + "epoch": 0.4203366382291907, + "grad_norm": 1.129676706405598, + "learning_rate": 1.863215962905875e-06, + "loss": 0.5076277852058411, + "step": 1823 + }, + { + "epoch": 0.42056721235877337, + "grad_norm": 1.2637764937692704, + "learning_rate": 1.8630234262180424e-06, + "loss": 0.5378403067588806, + "step": 1824 + }, + { + "epoch": 0.42079778648835603, + "grad_norm": 1.0886873342980177, + "learning_rate": 1.8628307640810083e-06, + "loss": 0.6133165955543518, + "step": 1825 + }, + { + "epoch": 0.4210283606179387, + "grad_norm": 1.1726755470049002, + "learning_rate": 1.8626379765227782e-06, + "loss": 0.4978156089782715, + "step": 1826 + }, + { + "epoch": 0.42125893474752135, + "grad_norm": 1.0651427070474233, + "learning_rate": 1.8624450635713759e-06, + "loss": 0.43159037828445435, + "step": 1827 + }, + { + "epoch": 0.421489508877104, + "grad_norm": 1.0498543002649237, + "learning_rate": 1.8622520252548424e-06, + "loss": 0.48821642994880676, + "step": 1828 + }, + { + "epoch": 0.4217200830066867, + "grad_norm": 1.016883491579865, + "learning_rate": 1.8620588616012387e-06, + "loss": 0.4666696786880493, + "step": 1829 + }, + { + "epoch": 0.42195065713626934, + "grad_norm": 1.3621906870852534, + "learning_rate": 1.8618655726386425e-06, + "loss": 0.5278067588806152, + "step": 1830 + }, + { + "epoch": 0.422181231265852, + "grad_norm": 1.0791230542588068, + "learning_rate": 1.8616721583951512e-06, + "loss": 0.4357749819755554, + "step": 1831 + }, + { + "epoch": 0.42241180539543466, + "grad_norm": 1.2299213864410639, + "learning_rate": 1.8614786188988782e-06, + "loss": 0.5388439893722534, + "step": 1832 + }, + { + "epoch": 0.4226423795250173, + "grad_norm": 1.4108572710321559, + "learning_rate": 1.8612849541779573e-06, + "loss": 0.5443956255912781, + "step": 1833 + }, + { + "epoch": 0.4228729536546, + "grad_norm": 1.2641105463427431, + "learning_rate": 1.86109116426054e-06, + "loss": 0.5614160895347595, + "step": 1834 + }, + { + "epoch": 0.4231035277841826, + "grad_norm": 1.2744746751945835, + "learning_rate": 1.8608972491747943e-06, + "loss": 0.45780229568481445, + "step": 1835 + }, + { + "epoch": 0.42333410191376525, + "grad_norm": 1.4638598184796152, + "learning_rate": 1.8607032089489088e-06, + "loss": 0.6354867219924927, + "step": 1836 + }, + { + "epoch": 0.4235646760433479, + "grad_norm": 1.2548140048045007, + "learning_rate": 1.860509043611089e-06, + "loss": 0.5172948241233826, + "step": 1837 + }, + { + "epoch": 0.42379525017293057, + "grad_norm": 1.1235697857312772, + "learning_rate": 1.8603147531895586e-06, + "loss": 0.4353157877922058, + "step": 1838 + }, + { + "epoch": 0.42402582430251323, + "grad_norm": 1.1680682893696177, + "learning_rate": 1.8601203377125599e-06, + "loss": 0.4971036911010742, + "step": 1839 + }, + { + "epoch": 0.4242563984320959, + "grad_norm": 1.0750331417799794, + "learning_rate": 1.859925797208353e-06, + "loss": 0.5037736296653748, + "step": 1840 + }, + { + "epoch": 0.42448697256167855, + "grad_norm": 1.052234823772871, + "learning_rate": 1.8597311317052165e-06, + "loss": 0.4480808675289154, + "step": 1841 + }, + { + "epoch": 0.4247175466912612, + "grad_norm": 1.2441100874175304, + "learning_rate": 1.8595363412314468e-06, + "loss": 0.5102680325508118, + "step": 1842 + }, + { + "epoch": 0.4249481208208439, + "grad_norm": 1.1806961844163353, + "learning_rate": 1.8593414258153585e-06, + "loss": 0.5979090929031372, + "step": 1843 + }, + { + "epoch": 0.42517869495042654, + "grad_norm": 1.0776260642041309, + "learning_rate": 1.8591463854852854e-06, + "loss": 0.4616047143936157, + "step": 1844 + }, + { + "epoch": 0.4254092690800092, + "grad_norm": 1.0059742827824252, + "learning_rate": 1.8589512202695773e-06, + "loss": 0.4893925189971924, + "step": 1845 + }, + { + "epoch": 0.42563984320959186, + "grad_norm": 1.0527785435538273, + "learning_rate": 1.8587559301966045e-06, + "loss": 0.49619823694229126, + "step": 1846 + }, + { + "epoch": 0.4258704173391745, + "grad_norm": 1.0558967393125807, + "learning_rate": 1.858560515294754e-06, + "loss": 0.5205181837081909, + "step": 1847 + }, + { + "epoch": 0.4261009914687572, + "grad_norm": 1.3589791827910958, + "learning_rate": 1.8583649755924315e-06, + "loss": 0.5910394191741943, + "step": 1848 + }, + { + "epoch": 0.42633156559833985, + "grad_norm": 1.0092224062378152, + "learning_rate": 1.8581693111180603e-06, + "loss": 0.4916709363460541, + "step": 1849 + }, + { + "epoch": 0.4265621397279225, + "grad_norm": 1.261654259944108, + "learning_rate": 1.8579735219000824e-06, + "loss": 0.5728994011878967, + "step": 1850 + }, + { + "epoch": 0.42679271385750517, + "grad_norm": 1.162885813109175, + "learning_rate": 1.857777607966958e-06, + "loss": 0.49620527029037476, + "step": 1851 + }, + { + "epoch": 0.42702328798708783, + "grad_norm": 1.2230754640158692, + "learning_rate": 1.8575815693471649e-06, + "loss": 0.5100233554840088, + "step": 1852 + }, + { + "epoch": 0.4272538621166705, + "grad_norm": 1.1713081386962017, + "learning_rate": 1.8573854060691994e-06, + "loss": 0.48981544375419617, + "step": 1853 + }, + { + "epoch": 0.42748443624625315, + "grad_norm": 1.0875128431195988, + "learning_rate": 1.8571891181615755e-06, + "loss": 0.44190293550491333, + "step": 1854 + }, + { + "epoch": 0.4277150103758358, + "grad_norm": 1.2645757986317834, + "learning_rate": 1.8569927056528264e-06, + "loss": 0.42867448925971985, + "step": 1855 + }, + { + "epoch": 0.4279455845054185, + "grad_norm": 1.849182592399251, + "learning_rate": 1.8567961685715016e-06, + "loss": 0.4873782694339752, + "step": 1856 + }, + { + "epoch": 0.42817615863500114, + "grad_norm": 1.2007241803680166, + "learning_rate": 1.8565995069461706e-06, + "loss": 0.4985312819480896, + "step": 1857 + }, + { + "epoch": 0.4284067327645838, + "grad_norm": 1.2242163730204847, + "learning_rate": 1.85640272080542e-06, + "loss": 0.5525496006011963, + "step": 1858 + }, + { + "epoch": 0.42863730689416646, + "grad_norm": 1.293851624108558, + "learning_rate": 1.8562058101778547e-06, + "loss": 0.5645877122879028, + "step": 1859 + }, + { + "epoch": 0.4288678810237491, + "grad_norm": 1.0805291431045556, + "learning_rate": 1.856008775092097e-06, + "loss": 0.4304332137107849, + "step": 1860 + }, + { + "epoch": 0.4290984551533318, + "grad_norm": 1.14759009112306, + "learning_rate": 1.8558116155767888e-06, + "loss": 0.4970170259475708, + "step": 1861 + }, + { + "epoch": 0.42932902928291444, + "grad_norm": 1.344010966492771, + "learning_rate": 1.8556143316605888e-06, + "loss": 0.5718003511428833, + "step": 1862 + }, + { + "epoch": 0.4295596034124971, + "grad_norm": 1.3157067542574963, + "learning_rate": 1.8554169233721741e-06, + "loss": 0.4445415139198303, + "step": 1863 + }, + { + "epoch": 0.42979017754207977, + "grad_norm": 1.1001033203387223, + "learning_rate": 1.8552193907402404e-06, + "loss": 0.5297178626060486, + "step": 1864 + }, + { + "epoch": 0.4300207516716624, + "grad_norm": 0.9618626645905404, + "learning_rate": 1.8550217337935013e-06, + "loss": 0.4564483165740967, + "step": 1865 + }, + { + "epoch": 0.4302513258012451, + "grad_norm": 1.2509575429906847, + "learning_rate": 1.8548239525606872e-06, + "loss": 0.4789202809333801, + "step": 1866 + }, + { + "epoch": 0.43048189993082775, + "grad_norm": 1.0950598228304256, + "learning_rate": 1.8546260470705485e-06, + "loss": 0.5240263938903809, + "step": 1867 + }, + { + "epoch": 0.4307124740604104, + "grad_norm": 1.0326884664902543, + "learning_rate": 1.8544280173518523e-06, + "loss": 0.4190866947174072, + "step": 1868 + }, + { + "epoch": 0.43094304818999307, + "grad_norm": 1.098749197470929, + "learning_rate": 1.8542298634333844e-06, + "loss": 0.502301812171936, + "step": 1869 + }, + { + "epoch": 0.43117362231957573, + "grad_norm": 1.3711612309046508, + "learning_rate": 1.8540315853439488e-06, + "loss": 0.5752545595169067, + "step": 1870 + }, + { + "epoch": 0.4314041964491584, + "grad_norm": 0.9641480143185914, + "learning_rate": 1.8538331831123667e-06, + "loss": 0.44959962368011475, + "step": 1871 + }, + { + "epoch": 0.43163477057874106, + "grad_norm": 1.2299121621798328, + "learning_rate": 1.8536346567674782e-06, + "loss": 0.5320106148719788, + "step": 1872 + }, + { + "epoch": 0.4318653447083237, + "grad_norm": 1.393182956860924, + "learning_rate": 1.8534360063381407e-06, + "loss": 0.5981979966163635, + "step": 1873 + }, + { + "epoch": 0.4320959188379064, + "grad_norm": 1.350381662747622, + "learning_rate": 1.8532372318532306e-06, + "loss": 0.5567579865455627, + "step": 1874 + }, + { + "epoch": 0.43232649296748904, + "grad_norm": 1.4350681093951811, + "learning_rate": 1.8530383333416415e-06, + "loss": 0.5604764223098755, + "step": 1875 + }, + { + "epoch": 0.4325570670970717, + "grad_norm": 1.4048444099270982, + "learning_rate": 1.8528393108322852e-06, + "loss": 0.5410721302032471, + "step": 1876 + }, + { + "epoch": 0.43278764122665436, + "grad_norm": 1.1191045271107989, + "learning_rate": 1.852640164354092e-06, + "loss": 0.5417271852493286, + "step": 1877 + }, + { + "epoch": 0.433018215356237, + "grad_norm": 1.1925092385457925, + "learning_rate": 1.8524408939360096e-06, + "loss": 0.5831471681594849, + "step": 1878 + }, + { + "epoch": 0.4332487894858197, + "grad_norm": 1.0939224950949575, + "learning_rate": 1.8522414996070045e-06, + "loss": 0.45030760765075684, + "step": 1879 + }, + { + "epoch": 0.43347936361540235, + "grad_norm": 1.1520994484307991, + "learning_rate": 1.8520419813960596e-06, + "loss": 0.44657936692237854, + "step": 1880 + }, + { + "epoch": 0.433709937744985, + "grad_norm": 1.1691007631884454, + "learning_rate": 1.851842339332178e-06, + "loss": 0.5472795963287354, + "step": 1881 + }, + { + "epoch": 0.43394051187456767, + "grad_norm": 1.1388268257083902, + "learning_rate": 1.8516425734443786e-06, + "loss": 0.4883359968662262, + "step": 1882 + }, + { + "epoch": 0.43417108600415033, + "grad_norm": 1.0473976151781044, + "learning_rate": 1.8514426837617006e-06, + "loss": 0.5172675848007202, + "step": 1883 + }, + { + "epoch": 0.434401660133733, + "grad_norm": 1.2812470936666533, + "learning_rate": 1.851242670313199e-06, + "loss": 0.5253418684005737, + "step": 1884 + }, + { + "epoch": 0.43463223426331565, + "grad_norm": 1.2940121862284113, + "learning_rate": 1.8510425331279485e-06, + "loss": 0.4684918522834778, + "step": 1885 + }, + { + "epoch": 0.4348628083928983, + "grad_norm": 1.7313907662218715, + "learning_rate": 1.8508422722350404e-06, + "loss": 0.522485077381134, + "step": 1886 + }, + { + "epoch": 0.435093382522481, + "grad_norm": 1.0862530759153244, + "learning_rate": 1.8506418876635852e-06, + "loss": 0.5123787522315979, + "step": 1887 + }, + { + "epoch": 0.43532395665206364, + "grad_norm": 1.2812741997977775, + "learning_rate": 1.8504413794427106e-06, + "loss": 0.5195976495742798, + "step": 1888 + }, + { + "epoch": 0.4355545307816463, + "grad_norm": 1.081503403719265, + "learning_rate": 1.8502407476015626e-06, + "loss": 0.48394906520843506, + "step": 1889 + }, + { + "epoch": 0.43578510491122896, + "grad_norm": 1.2031421687566246, + "learning_rate": 1.850039992169305e-06, + "loss": 0.5083323121070862, + "step": 1890 + }, + { + "epoch": 0.4360156790408116, + "grad_norm": 1.2379097603599272, + "learning_rate": 1.8498391131751196e-06, + "loss": 0.5303651094436646, + "step": 1891 + }, + { + "epoch": 0.4362462531703943, + "grad_norm": 1.010820397187413, + "learning_rate": 1.8496381106482062e-06, + "loss": 0.49429047107696533, + "step": 1892 + }, + { + "epoch": 0.43647682729997694, + "grad_norm": 1.2506572926955764, + "learning_rate": 1.8494369846177826e-06, + "loss": 0.5263347625732422, + "step": 1893 + }, + { + "epoch": 0.4367074014295596, + "grad_norm": 1.3195849148516783, + "learning_rate": 1.8492357351130848e-06, + "loss": 0.5332654714584351, + "step": 1894 + }, + { + "epoch": 0.43693797555914227, + "grad_norm": 1.1692381501686961, + "learning_rate": 1.8490343621633657e-06, + "loss": 0.5598278045654297, + "step": 1895 + }, + { + "epoch": 0.43716854968872493, + "grad_norm": 1.0323293964159153, + "learning_rate": 1.8488328657978975e-06, + "loss": 0.4026976227760315, + "step": 1896 + }, + { + "epoch": 0.4373991238183076, + "grad_norm": 1.3568102099956687, + "learning_rate": 1.8486312460459698e-06, + "loss": 0.4277791380882263, + "step": 1897 + }, + { + "epoch": 0.43762969794789025, + "grad_norm": 1.2550644818276735, + "learning_rate": 1.8484295029368896e-06, + "loss": 0.49567973613739014, + "step": 1898 + }, + { + "epoch": 0.4378602720774729, + "grad_norm": 1.3750960531365106, + "learning_rate": 1.8482276364999828e-06, + "loss": 0.4659258723258972, + "step": 1899 + }, + { + "epoch": 0.4380908462070556, + "grad_norm": 1.4921650354400726, + "learning_rate": 1.8480256467645923e-06, + "loss": 0.4950314164161682, + "step": 1900 + }, + { + "epoch": 0.43832142033663823, + "grad_norm": 1.2407118809889077, + "learning_rate": 1.8478235337600796e-06, + "loss": 0.5584981441497803, + "step": 1901 + }, + { + "epoch": 0.4385519944662209, + "grad_norm": 1.4539173472262998, + "learning_rate": 1.847621297515824e-06, + "loss": 0.6322404146194458, + "step": 1902 + }, + { + "epoch": 0.43878256859580356, + "grad_norm": 1.6859923054790666, + "learning_rate": 1.8474189380612225e-06, + "loss": 0.49535471200942993, + "step": 1903 + }, + { + "epoch": 0.4390131427253862, + "grad_norm": 1.0079272515569784, + "learning_rate": 1.8472164554256897e-06, + "loss": 0.40703707933425903, + "step": 1904 + }, + { + "epoch": 0.4392437168549689, + "grad_norm": 1.1125525506446694, + "learning_rate": 1.8470138496386588e-06, + "loss": 0.4540821313858032, + "step": 1905 + }, + { + "epoch": 0.43947429098455154, + "grad_norm": 1.1572392182622382, + "learning_rate": 1.846811120729581e-06, + "loss": 0.45964252948760986, + "step": 1906 + }, + { + "epoch": 0.4397048651141342, + "grad_norm": 1.018497744556974, + "learning_rate": 1.8466082687279244e-06, + "loss": 0.4604472517967224, + "step": 1907 + }, + { + "epoch": 0.43993543924371686, + "grad_norm": 1.114828518838774, + "learning_rate": 1.8464052936631758e-06, + "loss": 0.44585052132606506, + "step": 1908 + }, + { + "epoch": 0.4401660133732995, + "grad_norm": 1.2189161284011176, + "learning_rate": 1.8462021955648397e-06, + "loss": 0.43862414360046387, + "step": 1909 + }, + { + "epoch": 0.4403965875028822, + "grad_norm": 1.0484346475063675, + "learning_rate": 1.8459989744624386e-06, + "loss": 0.5148224234580994, + "step": 1910 + }, + { + "epoch": 0.44062716163246485, + "grad_norm": 1.3041727396087255, + "learning_rate": 1.8457956303855124e-06, + "loss": 0.6201390027999878, + "step": 1911 + }, + { + "epoch": 0.4408577357620475, + "grad_norm": 1.322348681007624, + "learning_rate": 1.8455921633636196e-06, + "loss": 0.5828813314437866, + "step": 1912 + }, + { + "epoch": 0.44108830989163017, + "grad_norm": 1.2413839772395276, + "learning_rate": 1.845388573426336e-06, + "loss": 0.5491579174995422, + "step": 1913 + }, + { + "epoch": 0.44131888402121283, + "grad_norm": 1.135006469141378, + "learning_rate": 1.8451848606032554e-06, + "loss": 0.4204079508781433, + "step": 1914 + }, + { + "epoch": 0.4415494581507955, + "grad_norm": 1.3248528862326203, + "learning_rate": 1.8449810249239898e-06, + "loss": 0.5734649300575256, + "step": 1915 + }, + { + "epoch": 0.44178003228037815, + "grad_norm": 1.1101812599659409, + "learning_rate": 1.8447770664181684e-06, + "loss": 0.48931679129600525, + "step": 1916 + }, + { + "epoch": 0.4420106064099608, + "grad_norm": 1.292831898773596, + "learning_rate": 1.8445729851154392e-06, + "loss": 0.5206375122070312, + "step": 1917 + }, + { + "epoch": 0.4422411805395435, + "grad_norm": 1.3590503413541226, + "learning_rate": 1.8443687810454666e-06, + "loss": 0.4916420578956604, + "step": 1918 + }, + { + "epoch": 0.44247175466912614, + "grad_norm": 1.0963843972341092, + "learning_rate": 1.8441644542379348e-06, + "loss": 0.5021753311157227, + "step": 1919 + }, + { + "epoch": 0.4427023287987088, + "grad_norm": 1.2556127492378621, + "learning_rate": 1.8439600047225441e-06, + "loss": 0.4615249037742615, + "step": 1920 + }, + { + "epoch": 0.44293290292829146, + "grad_norm": 1.3251855444784397, + "learning_rate": 1.8437554325290133e-06, + "loss": 0.4849514365196228, + "step": 1921 + }, + { + "epoch": 0.4431634770578741, + "grad_norm": 1.3926092312086646, + "learning_rate": 1.843550737687079e-06, + "loss": 0.5872727632522583, + "step": 1922 + }, + { + "epoch": 0.4433940511874568, + "grad_norm": 1.1422193923698303, + "learning_rate": 1.843345920226496e-06, + "loss": 0.48469966650009155, + "step": 1923 + }, + { + "epoch": 0.44362462531703944, + "grad_norm": 1.1078885152995024, + "learning_rate": 1.8431409801770364e-06, + "loss": 0.45931774377822876, + "step": 1924 + }, + { + "epoch": 0.4438551994466221, + "grad_norm": 1.0630184817249293, + "learning_rate": 1.8429359175684907e-06, + "loss": 0.5138596296310425, + "step": 1925 + }, + { + "epoch": 0.44408577357620477, + "grad_norm": 1.1576378783801253, + "learning_rate": 1.8427307324306661e-06, + "loss": 0.5586874485015869, + "step": 1926 + }, + { + "epoch": 0.44431634770578743, + "grad_norm": 0.9982496919132913, + "learning_rate": 1.8425254247933887e-06, + "loss": 0.5373901724815369, + "step": 1927 + }, + { + "epoch": 0.4445469218353701, + "grad_norm": 1.3044317948619655, + "learning_rate": 1.8423199946865022e-06, + "loss": 0.46104729175567627, + "step": 1928 + }, + { + "epoch": 0.44477749596495275, + "grad_norm": 1.2637964058278408, + "learning_rate": 1.8421144421398678e-06, + "loss": 0.4837646782398224, + "step": 1929 + }, + { + "epoch": 0.4450080700945354, + "grad_norm": 1.0579849017335872, + "learning_rate": 1.8419087671833647e-06, + "loss": 0.47685718536376953, + "step": 1930 + }, + { + "epoch": 0.4452386442241181, + "grad_norm": 1.3061309074235694, + "learning_rate": 1.8417029698468897e-06, + "loss": 0.5904572606086731, + "step": 1931 + }, + { + "epoch": 0.44546921835370074, + "grad_norm": 1.0698778232309683, + "learning_rate": 1.8414970501603577e-06, + "loss": 0.5434018969535828, + "step": 1932 + }, + { + "epoch": 0.4456997924832834, + "grad_norm": 1.0813116335575876, + "learning_rate": 1.8412910081537012e-06, + "loss": 0.5532705783843994, + "step": 1933 + }, + { + "epoch": 0.44593036661286606, + "grad_norm": 1.2746241772853588, + "learning_rate": 1.8410848438568704e-06, + "loss": 0.4900597929954529, + "step": 1934 + }, + { + "epoch": 0.4461609407424487, + "grad_norm": 1.1321871851277807, + "learning_rate": 1.8408785572998334e-06, + "loss": 0.40426892042160034, + "step": 1935 + }, + { + "epoch": 0.4463915148720314, + "grad_norm": 1.2056959007702837, + "learning_rate": 1.840672148512576e-06, + "loss": 0.48805081844329834, + "step": 1936 + }, + { + "epoch": 0.44662208900161404, + "grad_norm": 1.247599925173634, + "learning_rate": 1.8404656175251019e-06, + "loss": 0.4997096657752991, + "step": 1937 + }, + { + "epoch": 0.4468526631311967, + "grad_norm": 1.1300078883402307, + "learning_rate": 1.8402589643674325e-06, + "loss": 0.5113422274589539, + "step": 1938 + }, + { + "epoch": 0.44708323726077936, + "grad_norm": 1.2034211237767165, + "learning_rate": 1.8400521890696065e-06, + "loss": 0.44080060720443726, + "step": 1939 + }, + { + "epoch": 0.447313811390362, + "grad_norm": 1.1365386964776252, + "learning_rate": 1.8398452916616816e-06, + "loss": 0.4477943778038025, + "step": 1940 + }, + { + "epoch": 0.4475443855199447, + "grad_norm": 1.2171142668463, + "learning_rate": 1.8396382721737318e-06, + "loss": 0.4597470760345459, + "step": 1941 + }, + { + "epoch": 0.44777495964952735, + "grad_norm": 1.1079547319265362, + "learning_rate": 1.8394311306358494e-06, + "loss": 0.4758293628692627, + "step": 1942 + }, + { + "epoch": 0.44800553377911, + "grad_norm": 1.1579717682654027, + "learning_rate": 1.8392238670781453e-06, + "loss": 0.4573550224304199, + "step": 1943 + }, + { + "epoch": 0.44823610790869267, + "grad_norm": 1.318176172591765, + "learning_rate": 1.8390164815307465e-06, + "loss": 0.504696786403656, + "step": 1944 + }, + { + "epoch": 0.44846668203827533, + "grad_norm": 1.176904108457006, + "learning_rate": 1.8388089740237991e-06, + "loss": 0.4936453700065613, + "step": 1945 + }, + { + "epoch": 0.448697256167858, + "grad_norm": 1.0847569291854338, + "learning_rate": 1.8386013445874661e-06, + "loss": 0.4851078987121582, + "step": 1946 + }, + { + "epoch": 0.4489278302974406, + "grad_norm": 1.184810595622898, + "learning_rate": 1.8383935932519288e-06, + "loss": 0.4881519377231598, + "step": 1947 + }, + { + "epoch": 0.44915840442702326, + "grad_norm": 1.2389121525709461, + "learning_rate": 1.8381857200473859e-06, + "loss": 0.5604408979415894, + "step": 1948 + }, + { + "epoch": 0.4493889785566059, + "grad_norm": 1.2909928460674411, + "learning_rate": 1.8379777250040535e-06, + "loss": 0.5022269487380981, + "step": 1949 + }, + { + "epoch": 0.4496195526861886, + "grad_norm": 1.5074815200191058, + "learning_rate": 1.8377696081521666e-06, + "loss": 0.6519315242767334, + "step": 1950 + }, + { + "epoch": 0.44985012681577125, + "grad_norm": 1.0636886048128833, + "learning_rate": 1.8375613695219766e-06, + "loss": 0.3820997476577759, + "step": 1951 + }, + { + "epoch": 0.4500807009453539, + "grad_norm": 1.2705283632306288, + "learning_rate": 1.8373530091437526e-06, + "loss": 0.5473283529281616, + "step": 1952 + }, + { + "epoch": 0.45031127507493657, + "grad_norm": 1.3245130391551474, + "learning_rate": 1.8371445270477828e-06, + "loss": 0.5835955142974854, + "step": 1953 + }, + { + "epoch": 0.45054184920451923, + "grad_norm": 0.9645583101230016, + "learning_rate": 1.8369359232643716e-06, + "loss": 0.5398194789886475, + "step": 1954 + }, + { + "epoch": 0.4507724233341019, + "grad_norm": 1.363319289299188, + "learning_rate": 1.8367271978238418e-06, + "loss": 0.36561834812164307, + "step": 1955 + }, + { + "epoch": 0.45100299746368455, + "grad_norm": 1.212738724980002, + "learning_rate": 1.8365183507565342e-06, + "loss": 0.319802463054657, + "step": 1956 + }, + { + "epoch": 0.4512335715932672, + "grad_norm": 1.2303957915062576, + "learning_rate": 1.8363093820928063e-06, + "loss": 0.46466606855392456, + "step": 1957 + }, + { + "epoch": 0.4514641457228499, + "grad_norm": 1.0793723825771542, + "learning_rate": 1.8361002918630338e-06, + "loss": 0.5839806199073792, + "step": 1958 + }, + { + "epoch": 0.45169471985243254, + "grad_norm": 1.1018651408043991, + "learning_rate": 1.8358910800976105e-06, + "loss": 0.4472346603870392, + "step": 1959 + }, + { + "epoch": 0.4519252939820152, + "grad_norm": 1.2384424942976882, + "learning_rate": 1.835681746826947e-06, + "loss": 0.5191199779510498, + "step": 1960 + }, + { + "epoch": 0.45215586811159786, + "grad_norm": 1.199344967008703, + "learning_rate": 1.8354722920814722e-06, + "loss": 0.5832456350326538, + "step": 1961 + }, + { + "epoch": 0.4523864422411805, + "grad_norm": 1.17539846221013, + "learning_rate": 1.8352627158916326e-06, + "loss": 0.604708194732666, + "step": 1962 + }, + { + "epoch": 0.4526170163707632, + "grad_norm": 1.0362921929144542, + "learning_rate": 1.8350530182878924e-06, + "loss": 0.5640981793403625, + "step": 1963 + }, + { + "epoch": 0.45284759050034584, + "grad_norm": 1.6578766467164143, + "learning_rate": 1.8348431993007326e-06, + "loss": 0.4816977381706238, + "step": 1964 + }, + { + "epoch": 0.4530781646299285, + "grad_norm": 1.1374005988930347, + "learning_rate": 1.8346332589606526e-06, + "loss": 0.4226726293563843, + "step": 1965 + }, + { + "epoch": 0.45330873875951117, + "grad_norm": 1.1547528745449813, + "learning_rate": 1.8344231972981701e-06, + "loss": 0.49635130167007446, + "step": 1966 + }, + { + "epoch": 0.4535393128890938, + "grad_norm": 1.1372879426647424, + "learning_rate": 1.8342130143438193e-06, + "loss": 0.5275523662567139, + "step": 1967 + }, + { + "epoch": 0.4537698870186765, + "grad_norm": 1.202496816282669, + "learning_rate": 1.834002710128152e-06, + "loss": 0.48517313599586487, + "step": 1968 + }, + { + "epoch": 0.45400046114825915, + "grad_norm": 1.1968500607132941, + "learning_rate": 1.8337922846817388e-06, + "loss": 0.4352126717567444, + "step": 1969 + }, + { + "epoch": 0.4542310352778418, + "grad_norm": 1.116289808278095, + "learning_rate": 1.8335817380351668e-06, + "loss": 0.48131102323532104, + "step": 1970 + }, + { + "epoch": 0.45446160940742447, + "grad_norm": 1.1124663257243492, + "learning_rate": 1.8333710702190408e-06, + "loss": 0.48989611864089966, + "step": 1971 + }, + { + "epoch": 0.45469218353700713, + "grad_norm": 1.4370850989895667, + "learning_rate": 1.8331602812639839e-06, + "loss": 0.4841296076774597, + "step": 1972 + }, + { + "epoch": 0.4549227576665898, + "grad_norm": 1.1830445801916494, + "learning_rate": 1.8329493712006364e-06, + "loss": 0.5479841232299805, + "step": 1973 + }, + { + "epoch": 0.45515333179617246, + "grad_norm": 1.1923903658380426, + "learning_rate": 1.8327383400596559e-06, + "loss": 0.4732212424278259, + "step": 1974 + }, + { + "epoch": 0.4553839059257551, + "grad_norm": 1.0628413230145501, + "learning_rate": 1.8325271878717183e-06, + "loss": 0.46675610542297363, + "step": 1975 + }, + { + "epoch": 0.4556144800553378, + "grad_norm": 1.0416293786228703, + "learning_rate": 1.8323159146675163e-06, + "loss": 0.5464143753051758, + "step": 1976 + }, + { + "epoch": 0.45584505418492044, + "grad_norm": 1.0345078154587666, + "learning_rate": 1.832104520477761e-06, + "loss": 0.3888660669326782, + "step": 1977 + }, + { + "epoch": 0.4560756283145031, + "grad_norm": 1.4241654424068988, + "learning_rate": 1.8318930053331805e-06, + "loss": 0.5163271427154541, + "step": 1978 + }, + { + "epoch": 0.45630620244408576, + "grad_norm": 1.2347472844947731, + "learning_rate": 1.8316813692645208e-06, + "loss": 0.5471124649047852, + "step": 1979 + }, + { + "epoch": 0.4565367765736684, + "grad_norm": 1.1473833654009267, + "learning_rate": 1.8314696123025452e-06, + "loss": 0.5907406210899353, + "step": 1980 + }, + { + "epoch": 0.4567673507032511, + "grad_norm": 1.298768820373183, + "learning_rate": 1.8312577344780346e-06, + "loss": 0.5249447226524353, + "step": 1981 + }, + { + "epoch": 0.45699792483283375, + "grad_norm": 1.2135802460189444, + "learning_rate": 1.8310457358217879e-06, + "loss": 0.5063247084617615, + "step": 1982 + }, + { + "epoch": 0.4572284989624164, + "grad_norm": 1.361065103282706, + "learning_rate": 1.830833616364621e-06, + "loss": 0.4448107182979584, + "step": 1983 + }, + { + "epoch": 0.45745907309199907, + "grad_norm": 1.1036363497718666, + "learning_rate": 1.830621376137368e-06, + "loss": 0.5699697732925415, + "step": 1984 + }, + { + "epoch": 0.45768964722158173, + "grad_norm": 1.246349122018957, + "learning_rate": 1.8304090151708794e-06, + "loss": 0.5701720118522644, + "step": 1985 + }, + { + "epoch": 0.4579202213511644, + "grad_norm": 1.2319947144837158, + "learning_rate": 1.830196533496025e-06, + "loss": 0.4754391014575958, + "step": 1986 + }, + { + "epoch": 0.45815079548074705, + "grad_norm": 1.3528306833221286, + "learning_rate": 1.8299839311436903e-06, + "loss": 0.47649019956588745, + "step": 1987 + }, + { + "epoch": 0.4583813696103297, + "grad_norm": 1.3311097062461437, + "learning_rate": 1.8297712081447797e-06, + "loss": 0.5524393320083618, + "step": 1988 + }, + { + "epoch": 0.4586119437399124, + "grad_norm": 1.0762480086961639, + "learning_rate": 1.8295583645302144e-06, + "loss": 0.45731648802757263, + "step": 1989 + }, + { + "epoch": 0.45884251786949504, + "grad_norm": 1.130533269973984, + "learning_rate": 1.8293454003309336e-06, + "loss": 0.4999742805957794, + "step": 1990 + }, + { + "epoch": 0.4590730919990777, + "grad_norm": 1.1313506863251181, + "learning_rate": 1.829132315577894e-06, + "loss": 0.49084147810935974, + "step": 1991 + }, + { + "epoch": 0.45930366612866036, + "grad_norm": 1.2521400943324308, + "learning_rate": 1.828919110302069e-06, + "loss": 0.45332348346710205, + "step": 1992 + }, + { + "epoch": 0.459534240258243, + "grad_norm": 1.0776738520694769, + "learning_rate": 1.8287057845344504e-06, + "loss": 0.5029363632202148, + "step": 1993 + }, + { + "epoch": 0.4597648143878257, + "grad_norm": 1.1554006749910666, + "learning_rate": 1.8284923383060475e-06, + "loss": 0.5373274087905884, + "step": 1994 + }, + { + "epoch": 0.45999538851740834, + "grad_norm": 1.372219905846735, + "learning_rate": 1.8282787716478867e-06, + "loss": 0.5022158622741699, + "step": 1995 + }, + { + "epoch": 0.460225962646991, + "grad_norm": 1.5170390306548123, + "learning_rate": 1.828065084591012e-06, + "loss": 0.5093190670013428, + "step": 1996 + }, + { + "epoch": 0.46045653677657367, + "grad_norm": 1.1628780385550688, + "learning_rate": 1.827851277166485e-06, + "loss": 0.5406581163406372, + "step": 1997 + }, + { + "epoch": 0.4606871109061563, + "grad_norm": 1.0838824930169186, + "learning_rate": 1.8276373494053852e-06, + "loss": 0.4403364062309265, + "step": 1998 + }, + { + "epoch": 0.460917685035739, + "grad_norm": 1.0663930849179153, + "learning_rate": 1.8274233013388085e-06, + "loss": 0.48383134603500366, + "step": 1999 + }, + { + "epoch": 0.46114825916532165, + "grad_norm": 1.278024022767056, + "learning_rate": 1.8272091329978693e-06, + "loss": 0.5177836418151855, + "step": 2000 + }, + { + "epoch": 0.4613788332949043, + "grad_norm": 1.3026255484345248, + "learning_rate": 1.8269948444136991e-06, + "loss": 0.5699004530906677, + "step": 2001 + }, + { + "epoch": 0.461609407424487, + "grad_norm": 1.0712598167444656, + "learning_rate": 1.826780435617447e-06, + "loss": 0.5415153503417969, + "step": 2002 + }, + { + "epoch": 0.46183998155406963, + "grad_norm": 1.3243429308154806, + "learning_rate": 1.8265659066402792e-06, + "loss": 0.5521166920661926, + "step": 2003 + }, + { + "epoch": 0.4620705556836523, + "grad_norm": 1.0401918069659792, + "learning_rate": 1.8263512575133802e-06, + "loss": 0.4518507122993469, + "step": 2004 + }, + { + "epoch": 0.46230112981323496, + "grad_norm": 1.4036586027704223, + "learning_rate": 1.8261364882679508e-06, + "loss": 0.5997140407562256, + "step": 2005 + }, + { + "epoch": 0.4625317039428176, + "grad_norm": 1.2297832096563293, + "learning_rate": 1.8259215989352103e-06, + "loss": 0.5105265974998474, + "step": 2006 + }, + { + "epoch": 0.4627622780724003, + "grad_norm": 1.3620575066378895, + "learning_rate": 1.825706589546395e-06, + "loss": 0.5229371190071106, + "step": 2007 + }, + { + "epoch": 0.46299285220198294, + "grad_norm": 1.323713226525437, + "learning_rate": 1.825491460132759e-06, + "loss": 0.4833800792694092, + "step": 2008 + }, + { + "epoch": 0.4632234263315656, + "grad_norm": 1.443684310899243, + "learning_rate": 1.8252762107255727e-06, + "loss": 0.4323253035545349, + "step": 2009 + }, + { + "epoch": 0.46345400046114826, + "grad_norm": 1.0890999093716327, + "learning_rate": 1.8250608413561253e-06, + "loss": 0.4563494026660919, + "step": 2010 + }, + { + "epoch": 0.4636845745907309, + "grad_norm": 1.5474519259744821, + "learning_rate": 1.8248453520557228e-06, + "loss": 0.5656196475028992, + "step": 2011 + }, + { + "epoch": 0.4639151487203136, + "grad_norm": 1.4798653425077055, + "learning_rate": 1.8246297428556887e-06, + "loss": 0.5448226928710938, + "step": 2012 + }, + { + "epoch": 0.46414572284989625, + "grad_norm": 1.1620535147248132, + "learning_rate": 1.8244140137873645e-06, + "loss": 0.4692860543727875, + "step": 2013 + }, + { + "epoch": 0.4643762969794789, + "grad_norm": 1.1643805671555858, + "learning_rate": 1.8241981648821079e-06, + "loss": 0.5948643088340759, + "step": 2014 + }, + { + "epoch": 0.46460687110906157, + "grad_norm": 1.1853722372788744, + "learning_rate": 1.823982196171295e-06, + "loss": 0.54410719871521, + "step": 2015 + }, + { + "epoch": 0.46483744523864423, + "grad_norm": 1.1149495485691443, + "learning_rate": 1.8237661076863192e-06, + "loss": 0.430447518825531, + "step": 2016 + }, + { + "epoch": 0.4650680193682269, + "grad_norm": 1.2520273819748522, + "learning_rate": 1.8235498994585913e-06, + "loss": 0.5420910716056824, + "step": 2017 + }, + { + "epoch": 0.46529859349780955, + "grad_norm": 1.119152189162338, + "learning_rate": 1.823333571519539e-06, + "loss": 0.5140334963798523, + "step": 2018 + }, + { + "epoch": 0.4655291676273922, + "grad_norm": 1.1399919106847334, + "learning_rate": 1.8231171239006075e-06, + "loss": 0.5901660323143005, + "step": 2019 + }, + { + "epoch": 0.4657597417569749, + "grad_norm": 1.174060044130563, + "learning_rate": 1.8229005566332603e-06, + "loss": 0.5025908350944519, + "step": 2020 + }, + { + "epoch": 0.46599031588655754, + "grad_norm": 1.3363070549997977, + "learning_rate": 1.8226838697489772e-06, + "loss": 0.4884544909000397, + "step": 2021 + }, + { + "epoch": 0.4662208900161402, + "grad_norm": 1.1349219249551332, + "learning_rate": 1.822467063279256e-06, + "loss": 0.46449869871139526, + "step": 2022 + }, + { + "epoch": 0.46645146414572286, + "grad_norm": 1.2563720378844234, + "learning_rate": 1.8222501372556116e-06, + "loss": 0.49463552236557007, + "step": 2023 + }, + { + "epoch": 0.4666820382753055, + "grad_norm": 1.285405581097111, + "learning_rate": 1.8220330917095768e-06, + "loss": 0.5027149319648743, + "step": 2024 + }, + { + "epoch": 0.4669126124048882, + "grad_norm": 1.3048909901236199, + "learning_rate": 1.8218159266727007e-06, + "loss": 0.564018726348877, + "step": 2025 + }, + { + "epoch": 0.46714318653447084, + "grad_norm": 1.1965631228875364, + "learning_rate": 1.821598642176551e-06, + "loss": 0.4235766530036926, + "step": 2026 + }, + { + "epoch": 0.4673737606640535, + "grad_norm": 1.3354885477125742, + "learning_rate": 1.8213812382527118e-06, + "loss": 0.5696560144424438, + "step": 2027 + }, + { + "epoch": 0.46760433479363617, + "grad_norm": 1.2879943344932543, + "learning_rate": 1.8211637149327856e-06, + "loss": 0.6101738214492798, + "step": 2028 + }, + { + "epoch": 0.46783490892321883, + "grad_norm": 1.2787382273760666, + "learning_rate": 1.820946072248391e-06, + "loss": 0.46749603748321533, + "step": 2029 + }, + { + "epoch": 0.4680654830528015, + "grad_norm": 1.0137433334051962, + "learning_rate": 1.8207283102311646e-06, + "loss": 0.4713476300239563, + "step": 2030 + }, + { + "epoch": 0.46829605718238415, + "grad_norm": 1.1924917748606811, + "learning_rate": 1.8205104289127607e-06, + "loss": 0.5381859540939331, + "step": 2031 + }, + { + "epoch": 0.4685266313119668, + "grad_norm": 1.1753816722161505, + "learning_rate": 1.82029242832485e-06, + "loss": 0.4871833324432373, + "step": 2032 + }, + { + "epoch": 0.4687572054415495, + "grad_norm": 1.2889177236993268, + "learning_rate": 1.8200743084991217e-06, + "loss": 0.520627498626709, + "step": 2033 + }, + { + "epoch": 0.46898777957113214, + "grad_norm": 1.1168475824168262, + "learning_rate": 1.8198560694672813e-06, + "loss": 0.5382364392280579, + "step": 2034 + }, + { + "epoch": 0.4692183537007148, + "grad_norm": 1.0953401197844614, + "learning_rate": 1.8196377112610524e-06, + "loss": 0.384588360786438, + "step": 2035 + }, + { + "epoch": 0.46944892783029746, + "grad_norm": 1.3337847292368636, + "learning_rate": 1.8194192339121752e-06, + "loss": 0.5515186786651611, + "step": 2036 + }, + { + "epoch": 0.4696795019598801, + "grad_norm": 1.2634192136555153, + "learning_rate": 1.819200637452408e-06, + "loss": 0.5405331254005432, + "step": 2037 + }, + { + "epoch": 0.4699100760894628, + "grad_norm": 1.3408838607377604, + "learning_rate": 1.818981921913526e-06, + "loss": 0.5565645694732666, + "step": 2038 + }, + { + "epoch": 0.47014065021904544, + "grad_norm": 1.1845986031026676, + "learning_rate": 1.818763087327321e-06, + "loss": 0.4856358468532562, + "step": 2039 + }, + { + "epoch": 0.4703712243486281, + "grad_norm": 1.1018414398540533, + "learning_rate": 1.8185441337256035e-06, + "loss": 0.5495761632919312, + "step": 2040 + }, + { + "epoch": 0.47060179847821076, + "grad_norm": 1.1792744067343253, + "learning_rate": 1.8183250611402007e-06, + "loss": 0.509435772895813, + "step": 2041 + }, + { + "epoch": 0.4708323726077934, + "grad_norm": 1.0107628293119386, + "learning_rate": 1.8181058696029564e-06, + "loss": 0.4663920998573303, + "step": 2042 + }, + { + "epoch": 0.4710629467373761, + "grad_norm": 1.5093599722992523, + "learning_rate": 1.817886559145733e-06, + "loss": 0.5976128578186035, + "step": 2043 + }, + { + "epoch": 0.47129352086695875, + "grad_norm": 1.2084791393616294, + "learning_rate": 1.817667129800409e-06, + "loss": 0.49167966842651367, + "step": 2044 + }, + { + "epoch": 0.4715240949965414, + "grad_norm": 1.1457657477052965, + "learning_rate": 1.817447581598881e-06, + "loss": 0.5889153480529785, + "step": 2045 + }, + { + "epoch": 0.47175466912612407, + "grad_norm": 1.206584712735091, + "learning_rate": 1.8172279145730622e-06, + "loss": 0.4970330595970154, + "step": 2046 + }, + { + "epoch": 0.47198524325570673, + "grad_norm": 1.1497751548880843, + "learning_rate": 1.817008128754884e-06, + "loss": 0.4840531051158905, + "step": 2047 + }, + { + "epoch": 0.4722158173852894, + "grad_norm": 1.0450687693806986, + "learning_rate": 1.816788224176294e-06, + "loss": 0.48297861218452454, + "step": 2048 + }, + { + "epoch": 0.47244639151487205, + "grad_norm": 1.184218710920589, + "learning_rate": 1.8165682008692578e-06, + "loss": 0.540350079536438, + "step": 2049 + }, + { + "epoch": 0.4726769656444547, + "grad_norm": 1.0359041945652345, + "learning_rate": 1.8163480588657578e-06, + "loss": 0.46405351161956787, + "step": 2050 + }, + { + "epoch": 0.4729075397740374, + "grad_norm": 1.1107404730922064, + "learning_rate": 1.816127798197794e-06, + "loss": 0.5175468921661377, + "step": 2051 + }, + { + "epoch": 0.47313811390362004, + "grad_norm": 1.3876726162535544, + "learning_rate": 1.8159074188973836e-06, + "loss": 0.5923771858215332, + "step": 2052 + }, + { + "epoch": 0.4733686880332027, + "grad_norm": 1.135618311389398, + "learning_rate": 1.815686920996561e-06, + "loss": 0.4999024569988251, + "step": 2053 + }, + { + "epoch": 0.47359926216278536, + "grad_norm": 1.260203747569289, + "learning_rate": 1.8154663045273775e-06, + "loss": 0.5630939602851868, + "step": 2054 + }, + { + "epoch": 0.473829836292368, + "grad_norm": 1.0446947469213006, + "learning_rate": 1.8152455695219021e-06, + "loss": 0.5505836009979248, + "step": 2055 + }, + { + "epoch": 0.4740604104219507, + "grad_norm": 1.0593378648910954, + "learning_rate": 1.8150247160122213e-06, + "loss": 0.44550588726997375, + "step": 2056 + }, + { + "epoch": 0.47429098455153335, + "grad_norm": 1.3784716647825315, + "learning_rate": 1.8148037440304375e-06, + "loss": 0.5387516021728516, + "step": 2057 + }, + { + "epoch": 0.47452155868111595, + "grad_norm": 1.2100168024707112, + "learning_rate": 1.814582653608672e-06, + "loss": 0.5941788554191589, + "step": 2058 + }, + { + "epoch": 0.4747521328106986, + "grad_norm": 1.3537451578676338, + "learning_rate": 1.8143614447790622e-06, + "loss": 0.552179217338562, + "step": 2059 + }, + { + "epoch": 0.4749827069402813, + "grad_norm": 1.4352695047482156, + "learning_rate": 1.8141401175737632e-06, + "loss": 0.4475885033607483, + "step": 2060 + }, + { + "epoch": 0.47521328106986394, + "grad_norm": 1.560782042661122, + "learning_rate": 1.813918672024947e-06, + "loss": 0.5821356773376465, + "step": 2061 + }, + { + "epoch": 0.4754438551994466, + "grad_norm": 1.0378834941031638, + "learning_rate": 1.8136971081648027e-06, + "loss": 0.4673501253128052, + "step": 2062 + }, + { + "epoch": 0.47567442932902926, + "grad_norm": 1.278556049660224, + "learning_rate": 1.8134754260255373e-06, + "loss": 0.582427978515625, + "step": 2063 + }, + { + "epoch": 0.4759050034586119, + "grad_norm": 1.050202225169388, + "learning_rate": 1.8132536256393744e-06, + "loss": 0.4494328498840332, + "step": 2064 + }, + { + "epoch": 0.4761355775881946, + "grad_norm": 1.2125688329070163, + "learning_rate": 1.8130317070385552e-06, + "loss": 0.44775205850601196, + "step": 2065 + }, + { + "epoch": 0.47636615171777724, + "grad_norm": 1.6939798990457848, + "learning_rate": 1.8128096702553372e-06, + "loss": 0.5456822514533997, + "step": 2066 + }, + { + "epoch": 0.4765967258473599, + "grad_norm": 1.3273956589633653, + "learning_rate": 1.8125875153219963e-06, + "loss": 0.46396178007125854, + "step": 2067 + }, + { + "epoch": 0.47682729997694256, + "grad_norm": 1.1515186039412058, + "learning_rate": 1.8123652422708247e-06, + "loss": 0.4479365944862366, + "step": 2068 + }, + { + "epoch": 0.4770578741065252, + "grad_norm": 1.2802069282774096, + "learning_rate": 1.8121428511341322e-06, + "loss": 0.4633978605270386, + "step": 2069 + }, + { + "epoch": 0.4772884482361079, + "grad_norm": 1.0517363876370052, + "learning_rate": 1.811920341944245e-06, + "loss": 0.5190213918685913, + "step": 2070 + }, + { + "epoch": 0.47751902236569055, + "grad_norm": 1.1502023331468956, + "learning_rate": 1.811697714733508e-06, + "loss": 0.3900855779647827, + "step": 2071 + }, + { + "epoch": 0.4777495964952732, + "grad_norm": 1.1255517906685018, + "learning_rate": 1.8114749695342816e-06, + "loss": 0.5130020380020142, + "step": 2072 + }, + { + "epoch": 0.47798017062485587, + "grad_norm": 1.181934216759251, + "learning_rate": 1.8112521063789444e-06, + "loss": 0.5279096364974976, + "step": 2073 + }, + { + "epoch": 0.47821074475443853, + "grad_norm": 1.1536132669518966, + "learning_rate": 1.8110291252998918e-06, + "loss": 0.5048732161521912, + "step": 2074 + }, + { + "epoch": 0.4784413188840212, + "grad_norm": 1.3979756779725594, + "learning_rate": 1.8108060263295362e-06, + "loss": 0.5410048365592957, + "step": 2075 + }, + { + "epoch": 0.47867189301360386, + "grad_norm": 1.2583345285712537, + "learning_rate": 1.8105828095003073e-06, + "loss": 0.5144593715667725, + "step": 2076 + }, + { + "epoch": 0.4789024671431865, + "grad_norm": 1.427505910251362, + "learning_rate": 1.810359474844652e-06, + "loss": 0.543846845626831, + "step": 2077 + }, + { + "epoch": 0.4791330412727692, + "grad_norm": 1.3389957969723305, + "learning_rate": 1.8101360223950346e-06, + "loss": 0.5628032684326172, + "step": 2078 + }, + { + "epoch": 0.47936361540235184, + "grad_norm": 1.2233623869672197, + "learning_rate": 1.8099124521839358e-06, + "loss": 0.5248516201972961, + "step": 2079 + }, + { + "epoch": 0.4795941895319345, + "grad_norm": 1.1882395736191633, + "learning_rate": 1.8096887642438537e-06, + "loss": 0.44171589612960815, + "step": 2080 + }, + { + "epoch": 0.47982476366151716, + "grad_norm": 1.1226478747483744, + "learning_rate": 1.809464958607304e-06, + "loss": 0.5003859996795654, + "step": 2081 + }, + { + "epoch": 0.4800553377910998, + "grad_norm": 1.2241972764897475, + "learning_rate": 1.8092410353068183e-06, + "loss": 0.5271269679069519, + "step": 2082 + }, + { + "epoch": 0.4802859119206825, + "grad_norm": 1.390627459359596, + "learning_rate": 1.8090169943749474e-06, + "loss": 0.5191465616226196, + "step": 2083 + }, + { + "epoch": 0.48051648605026515, + "grad_norm": 1.229186901325219, + "learning_rate": 1.8087928358442567e-06, + "loss": 0.4569256007671356, + "step": 2084 + }, + { + "epoch": 0.4807470601798478, + "grad_norm": 1.2586566204343959, + "learning_rate": 1.8085685597473307e-06, + "loss": 0.521030068397522, + "step": 2085 + }, + { + "epoch": 0.48097763430943047, + "grad_norm": 1.8616539280014968, + "learning_rate": 1.80834416611677e-06, + "loss": 0.48959439992904663, + "step": 2086 + }, + { + "epoch": 0.48120820843901313, + "grad_norm": 1.37464754051939, + "learning_rate": 1.8081196549851925e-06, + "loss": 0.6536514163017273, + "step": 2087 + }, + { + "epoch": 0.4814387825685958, + "grad_norm": 1.2292193685806807, + "learning_rate": 1.8078950263852327e-06, + "loss": 0.5746080875396729, + "step": 2088 + }, + { + "epoch": 0.48166935669817845, + "grad_norm": 1.244000490897379, + "learning_rate": 1.8076702803495437e-06, + "loss": 0.5518802404403687, + "step": 2089 + }, + { + "epoch": 0.4818999308277611, + "grad_norm": 1.0641823457217219, + "learning_rate": 1.8074454169107934e-06, + "loss": 0.49385470151901245, + "step": 2090 + }, + { + "epoch": 0.4821305049573438, + "grad_norm": 1.0197781900207734, + "learning_rate": 1.8072204361016688e-06, + "loss": 0.4488806426525116, + "step": 2091 + }, + { + "epoch": 0.48236107908692644, + "grad_norm": 1.1424753749617582, + "learning_rate": 1.8069953379548727e-06, + "loss": 0.4167511761188507, + "step": 2092 + }, + { + "epoch": 0.4825916532165091, + "grad_norm": 1.0650805504939584, + "learning_rate": 1.8067701225031258e-06, + "loss": 0.4181321859359741, + "step": 2093 + }, + { + "epoch": 0.48282222734609176, + "grad_norm": 1.4930083094447149, + "learning_rate": 1.806544789779165e-06, + "loss": 0.5257805585861206, + "step": 2094 + }, + { + "epoch": 0.4830528014756744, + "grad_norm": 1.2055270290247748, + "learning_rate": 1.806319339815745e-06, + "loss": 0.4687056541442871, + "step": 2095 + }, + { + "epoch": 0.4832833756052571, + "grad_norm": 1.4682007990950796, + "learning_rate": 1.8060937726456373e-06, + "loss": 0.48070380091667175, + "step": 2096 + }, + { + "epoch": 0.48351394973483974, + "grad_norm": 1.1555932423285984, + "learning_rate": 1.80586808830163e-06, + "loss": 0.516263484954834, + "step": 2097 + }, + { + "epoch": 0.4837445238644224, + "grad_norm": 1.1676344701764343, + "learning_rate": 1.805642286816529e-06, + "loss": 0.44018858671188354, + "step": 2098 + }, + { + "epoch": 0.48397509799400507, + "grad_norm": 1.1426045047454896, + "learning_rate": 1.8054163682231565e-06, + "loss": 0.469373881816864, + "step": 2099 + }, + { + "epoch": 0.4842056721235877, + "grad_norm": 1.2080131082183756, + "learning_rate": 1.8051903325543525e-06, + "loss": 0.4759753346443176, + "step": 2100 + }, + { + "epoch": 0.4844362462531704, + "grad_norm": 1.210070128706108, + "learning_rate": 1.804964179842973e-06, + "loss": 0.5002714395523071, + "step": 2101 + }, + { + "epoch": 0.48466682038275305, + "grad_norm": 1.5442585246670464, + "learning_rate": 1.804737910121892e-06, + "loss": 0.4869537353515625, + "step": 2102 + }, + { + "epoch": 0.4848973945123357, + "grad_norm": 1.0025531891942765, + "learning_rate": 1.804511523424e-06, + "loss": 0.4840247929096222, + "step": 2103 + }, + { + "epoch": 0.4851279686419184, + "grad_norm": 1.2125955941110753, + "learning_rate": 1.8042850197822049e-06, + "loss": 0.48390740156173706, + "step": 2104 + }, + { + "epoch": 0.48535854277150103, + "grad_norm": 1.2581816256760507, + "learning_rate": 1.8040583992294305e-06, + "loss": 0.5875431895256042, + "step": 2105 + }, + { + "epoch": 0.4855891169010837, + "grad_norm": 1.1530238586197006, + "learning_rate": 1.803831661798619e-06, + "loss": 0.4599287211894989, + "step": 2106 + }, + { + "epoch": 0.48581969103066636, + "grad_norm": 1.120967919274212, + "learning_rate": 1.803604807522729e-06, + "loss": 0.5266382694244385, + "step": 2107 + }, + { + "epoch": 0.486050265160249, + "grad_norm": 1.6402953005136756, + "learning_rate": 1.8033778364347359e-06, + "loss": 0.5592058897018433, + "step": 2108 + }, + { + "epoch": 0.4862808392898317, + "grad_norm": 1.278433491122833, + "learning_rate": 1.8031507485676324e-06, + "loss": 0.4385683834552765, + "step": 2109 + }, + { + "epoch": 0.48651141341941434, + "grad_norm": 0.9409152493815139, + "learning_rate": 1.8029235439544277e-06, + "loss": 0.4205859303474426, + "step": 2110 + }, + { + "epoch": 0.486741987548997, + "grad_norm": 1.2334271425613326, + "learning_rate": 1.8026962226281484e-06, + "loss": 0.4179378151893616, + "step": 2111 + }, + { + "epoch": 0.48697256167857966, + "grad_norm": 1.3018247329424364, + "learning_rate": 1.8024687846218382e-06, + "loss": 0.5022565126419067, + "step": 2112 + }, + { + "epoch": 0.4872031358081623, + "grad_norm": 1.092822670373115, + "learning_rate": 1.8022412299685574e-06, + "loss": 0.4591484069824219, + "step": 2113 + }, + { + "epoch": 0.487433709937745, + "grad_norm": 1.135644170855214, + "learning_rate": 1.8020135587013836e-06, + "loss": 0.44381004571914673, + "step": 2114 + }, + { + "epoch": 0.48766428406732765, + "grad_norm": 1.4882998519827229, + "learning_rate": 1.8017857708534106e-06, + "loss": 0.5418124198913574, + "step": 2115 + }, + { + "epoch": 0.4878948581969103, + "grad_norm": 1.1899076485341344, + "learning_rate": 1.80155786645775e-06, + "loss": 0.45836228132247925, + "step": 2116 + }, + { + "epoch": 0.48812543232649297, + "grad_norm": 1.0900529156655503, + "learning_rate": 1.80132984554753e-06, + "loss": 0.6028016805648804, + "step": 2117 + }, + { + "epoch": 0.48835600645607563, + "grad_norm": 1.2082046720219188, + "learning_rate": 1.8011017081558956e-06, + "loss": 0.461037814617157, + "step": 2118 + }, + { + "epoch": 0.4885865805856583, + "grad_norm": 1.2201342507223627, + "learning_rate": 1.8008734543160092e-06, + "loss": 0.45145073533058167, + "step": 2119 + }, + { + "epoch": 0.48881715471524095, + "grad_norm": 1.0786402560770025, + "learning_rate": 1.8006450840610495e-06, + "loss": 0.5074604153633118, + "step": 2120 + }, + { + "epoch": 0.4890477288448236, + "grad_norm": 1.047533414614444, + "learning_rate": 1.8004165974242124e-06, + "loss": 0.48518210649490356, + "step": 2121 + }, + { + "epoch": 0.4892783029744063, + "grad_norm": 1.3858118136014763, + "learning_rate": 1.800187994438711e-06, + "loss": 0.5427801609039307, + "step": 2122 + }, + { + "epoch": 0.48950887710398894, + "grad_norm": 1.1550068575676335, + "learning_rate": 1.799959275137775e-06, + "loss": 0.5002918839454651, + "step": 2123 + }, + { + "epoch": 0.4897394512335716, + "grad_norm": 1.1639768741422865, + "learning_rate": 1.799730439554651e-06, + "loss": 0.4417838454246521, + "step": 2124 + }, + { + "epoch": 0.48997002536315426, + "grad_norm": 1.1441558832004912, + "learning_rate": 1.7995014877226024e-06, + "loss": 0.4260700047016144, + "step": 2125 + }, + { + "epoch": 0.4902005994927369, + "grad_norm": 1.2965264900873492, + "learning_rate": 1.79927241967491e-06, + "loss": 0.5480694770812988, + "step": 2126 + }, + { + "epoch": 0.4904311736223196, + "grad_norm": 1.1303746553940783, + "learning_rate": 1.7990432354448713e-06, + "loss": 0.3911926746368408, + "step": 2127 + }, + { + "epoch": 0.49066174775190224, + "grad_norm": 1.6919718962195622, + "learning_rate": 1.7988139350657997e-06, + "loss": 0.5269262194633484, + "step": 2128 + }, + { + "epoch": 0.4908923218814849, + "grad_norm": 1.1850805062858767, + "learning_rate": 1.7985845185710272e-06, + "loss": 0.47482216358184814, + "step": 2129 + }, + { + "epoch": 0.49112289601106757, + "grad_norm": 1.1047509042558772, + "learning_rate": 1.7983549859939018e-06, + "loss": 0.5663374662399292, + "step": 2130 + }, + { + "epoch": 0.49135347014065023, + "grad_norm": 1.3067402879954033, + "learning_rate": 1.7981253373677875e-06, + "loss": 0.5322546362876892, + "step": 2131 + }, + { + "epoch": 0.4915840442702329, + "grad_norm": 1.3127111295082199, + "learning_rate": 1.797895572726067e-06, + "loss": 0.4238794445991516, + "step": 2132 + }, + { + "epoch": 0.49181461839981555, + "grad_norm": 1.3803934905983801, + "learning_rate": 1.7976656921021384e-06, + "loss": 0.49363791942596436, + "step": 2133 + }, + { + "epoch": 0.4920451925293982, + "grad_norm": 1.2075981604593182, + "learning_rate": 1.7974356955294178e-06, + "loss": 0.5079565048217773, + "step": 2134 + }, + { + "epoch": 0.4922757666589809, + "grad_norm": 1.2533809097279895, + "learning_rate": 1.7972055830413369e-06, + "loss": 0.5259063243865967, + "step": 2135 + }, + { + "epoch": 0.49250634078856353, + "grad_norm": 1.1936271771370206, + "learning_rate": 1.7969753546713448e-06, + "loss": 0.49021831154823303, + "step": 2136 + }, + { + "epoch": 0.4927369149181462, + "grad_norm": 1.1560183810694227, + "learning_rate": 1.7967450104529078e-06, + "loss": 0.49721387028694153, + "step": 2137 + }, + { + "epoch": 0.49296748904772886, + "grad_norm": 1.523657234221405, + "learning_rate": 1.796514550419509e-06, + "loss": 0.6129348278045654, + "step": 2138 + }, + { + "epoch": 0.4931980631773115, + "grad_norm": 1.245217894172975, + "learning_rate": 1.7962839746046479e-06, + "loss": 0.5034269094467163, + "step": 2139 + }, + { + "epoch": 0.4934286373068942, + "grad_norm": 1.2009412202372387, + "learning_rate": 1.7960532830418408e-06, + "loss": 0.490216463804245, + "step": 2140 + }, + { + "epoch": 0.49365921143647684, + "grad_norm": 1.3063386967377661, + "learning_rate": 1.7958224757646212e-06, + "loss": 0.5609744787216187, + "step": 2141 + }, + { + "epoch": 0.4938897855660595, + "grad_norm": 1.2989425251267097, + "learning_rate": 1.7955915528065395e-06, + "loss": 0.4438238739967346, + "step": 2142 + }, + { + "epoch": 0.49412035969564216, + "grad_norm": 1.1724755739495214, + "learning_rate": 1.7953605142011626e-06, + "loss": 0.4704767167568207, + "step": 2143 + }, + { + "epoch": 0.4943509338252248, + "grad_norm": 1.0972580275821462, + "learning_rate": 1.795129359982074e-06, + "loss": 0.44819536805152893, + "step": 2144 + }, + { + "epoch": 0.4945815079548075, + "grad_norm": 1.4390962273022694, + "learning_rate": 1.7948980901828746e-06, + "loss": 0.5311752557754517, + "step": 2145 + }, + { + "epoch": 0.49481208208439015, + "grad_norm": 1.524280309497039, + "learning_rate": 1.7946667048371818e-06, + "loss": 0.46144258975982666, + "step": 2146 + }, + { + "epoch": 0.4950426562139728, + "grad_norm": 1.719231407355215, + "learning_rate": 1.7944352039786297e-06, + "loss": 0.5973725914955139, + "step": 2147 + }, + { + "epoch": 0.49527323034355547, + "grad_norm": 1.4078850153564488, + "learning_rate": 1.7942035876408693e-06, + "loss": 0.4930835962295532, + "step": 2148 + }, + { + "epoch": 0.49550380447313813, + "grad_norm": 1.3404357985733748, + "learning_rate": 1.7939718558575685e-06, + "loss": 0.39137697219848633, + "step": 2149 + }, + { + "epoch": 0.4957343786027208, + "grad_norm": 1.364926902591579, + "learning_rate": 1.7937400086624117e-06, + "loss": 0.47618329524993896, + "step": 2150 + }, + { + "epoch": 0.49596495273230345, + "grad_norm": 1.1307446090872737, + "learning_rate": 1.7935080460891005e-06, + "loss": 0.4751483201980591, + "step": 2151 + }, + { + "epoch": 0.4961955268618861, + "grad_norm": 1.05862482163457, + "learning_rate": 1.7932759681713528e-06, + "loss": 0.4654052257537842, + "step": 2152 + }, + { + "epoch": 0.4964261009914688, + "grad_norm": 1.5078817597304273, + "learning_rate": 1.7930437749429035e-06, + "loss": 0.551579475402832, + "step": 2153 + }, + { + "epoch": 0.49665667512105144, + "grad_norm": 1.1496698915645684, + "learning_rate": 1.792811466437504e-06, + "loss": 0.4967789053916931, + "step": 2154 + }, + { + "epoch": 0.4968872492506341, + "grad_norm": 1.2983844202508301, + "learning_rate": 1.7925790426889234e-06, + "loss": 0.5826432108879089, + "step": 2155 + }, + { + "epoch": 0.49711782338021676, + "grad_norm": 1.1680445889037752, + "learning_rate": 1.792346503730946e-06, + "loss": 0.4260643720626831, + "step": 2156 + }, + { + "epoch": 0.4973483975097994, + "grad_norm": 1.287300561489553, + "learning_rate": 1.7921138495973741e-06, + "loss": 0.48679620027542114, + "step": 2157 + }, + { + "epoch": 0.4975789716393821, + "grad_norm": 1.219223301068072, + "learning_rate": 1.7918810803220266e-06, + "loss": 0.5048027634620667, + "step": 2158 + }, + { + "epoch": 0.49780954576896475, + "grad_norm": 1.3507694371861767, + "learning_rate": 1.7916481959387384e-06, + "loss": 0.5073787569999695, + "step": 2159 + }, + { + "epoch": 0.4980401198985474, + "grad_norm": 1.1692017846177098, + "learning_rate": 1.791415196481362e-06, + "loss": 0.47361671924591064, + "step": 2160 + }, + { + "epoch": 0.49827069402813007, + "grad_norm": 1.2422906508724816, + "learning_rate": 1.7911820819837659e-06, + "loss": 0.46382519602775574, + "step": 2161 + }, + { + "epoch": 0.49850126815771273, + "grad_norm": 1.2239936361904968, + "learning_rate": 1.7909488524798357e-06, + "loss": 0.5167688727378845, + "step": 2162 + }, + { + "epoch": 0.4987318422872954, + "grad_norm": 1.125831583037744, + "learning_rate": 1.7907155080034739e-06, + "loss": 0.4486730992794037, + "step": 2163 + }, + { + "epoch": 0.49896241641687805, + "grad_norm": 1.1343310195374692, + "learning_rate": 1.7904820485885991e-06, + "loss": 0.508470356464386, + "step": 2164 + }, + { + "epoch": 0.4991929905464607, + "grad_norm": 1.2928862741310794, + "learning_rate": 1.790248474269148e-06, + "loss": 0.4752856492996216, + "step": 2165 + }, + { + "epoch": 0.4994235646760434, + "grad_norm": 1.4158256008874892, + "learning_rate": 1.7900147850790713e-06, + "loss": 0.47191953659057617, + "step": 2166 + }, + { + "epoch": 0.49965413880562604, + "grad_norm": 1.2139421208311327, + "learning_rate": 1.7897809810523396e-06, + "loss": 0.48935621976852417, + "step": 2167 + }, + { + "epoch": 0.4998847129352087, + "grad_norm": 1.0547512942585364, + "learning_rate": 1.789547062222938e-06, + "loss": 0.5455219149589539, + "step": 2168 + }, + { + "epoch": 0.5001152870647914, + "grad_norm": 1.3471138253822197, + "learning_rate": 1.789313028624869e-06, + "loss": 0.5068193078041077, + "step": 2169 + }, + { + "epoch": 0.500345861194374, + "grad_norm": 1.354177516749214, + "learning_rate": 1.789078880292152e-06, + "loss": 0.5868322253227234, + "step": 2170 + }, + { + "epoch": 0.5005764353239567, + "grad_norm": 1.2474005261331733, + "learning_rate": 1.7888446172588222e-06, + "loss": 0.5132089853286743, + "step": 2171 + }, + { + "epoch": 0.5008070094535393, + "grad_norm": 1.6917901077948925, + "learning_rate": 1.788610239558933e-06, + "loss": 0.5673823356628418, + "step": 2172 + }, + { + "epoch": 0.501037583583122, + "grad_norm": 1.1902561905753382, + "learning_rate": 1.7883757472265533e-06, + "loss": 0.47085779905319214, + "step": 2173 + }, + { + "epoch": 0.5012681577127046, + "grad_norm": 1.38526914772559, + "learning_rate": 1.7881411402957685e-06, + "loss": 0.5286725163459778, + "step": 2174 + }, + { + "epoch": 0.5014987318422873, + "grad_norm": 1.1910792946448119, + "learning_rate": 1.7879064188006817e-06, + "loss": 0.5044010877609253, + "step": 2175 + }, + { + "epoch": 0.5017293059718699, + "grad_norm": 1.8451305262061892, + "learning_rate": 1.7876715827754113e-06, + "loss": 0.5329761505126953, + "step": 2176 + }, + { + "epoch": 0.5019598801014526, + "grad_norm": 1.1057498562542696, + "learning_rate": 1.7874366322540937e-06, + "loss": 0.5025275349617004, + "step": 2177 + }, + { + "epoch": 0.5021904542310353, + "grad_norm": 1.1913338911250846, + "learning_rate": 1.7872015672708814e-06, + "loss": 0.48466378450393677, + "step": 2178 + }, + { + "epoch": 0.502421028360618, + "grad_norm": 1.298497377256874, + "learning_rate": 1.7869663878599427e-06, + "loss": 0.505358099937439, + "step": 2179 + }, + { + "epoch": 0.5026516024902006, + "grad_norm": 1.3974305011742736, + "learning_rate": 1.7867310940554643e-06, + "loss": 0.4934875965118408, + "step": 2180 + }, + { + "epoch": 0.5028821766197833, + "grad_norm": 0.9670109365307766, + "learning_rate": 1.7864956858916482e-06, + "loss": 0.4726678133010864, + "step": 2181 + }, + { + "epoch": 0.5031127507493659, + "grad_norm": 1.3043022336942207, + "learning_rate": 1.786260163402713e-06, + "loss": 0.4619986414909363, + "step": 2182 + }, + { + "epoch": 0.5033433248789486, + "grad_norm": 1.17201330946801, + "learning_rate": 1.7860245266228946e-06, + "loss": 0.4483926594257355, + "step": 2183 + }, + { + "epoch": 0.5035738990085312, + "grad_norm": 1.0474549975114675, + "learning_rate": 1.7857887755864451e-06, + "loss": 0.4756368100643158, + "step": 2184 + }, + { + "epoch": 0.5038044731381139, + "grad_norm": 1.248404397964203, + "learning_rate": 1.7855529103276334e-06, + "loss": 0.5610564351081848, + "step": 2185 + }, + { + "epoch": 0.5040350472676965, + "grad_norm": 1.178944045969772, + "learning_rate": 1.7853169308807447e-06, + "loss": 0.49948322772979736, + "step": 2186 + }, + { + "epoch": 0.5042656213972793, + "grad_norm": 1.203613939490818, + "learning_rate": 1.7850808372800813e-06, + "loss": 0.5023819208145142, + "step": 2187 + }, + { + "epoch": 0.5044961955268619, + "grad_norm": 1.1738403952666703, + "learning_rate": 1.7848446295599617e-06, + "loss": 0.45893096923828125, + "step": 2188 + }, + { + "epoch": 0.5047267696564446, + "grad_norm": 1.2621327179460875, + "learning_rate": 1.7846083077547212e-06, + "loss": 0.39129459857940674, + "step": 2189 + }, + { + "epoch": 0.5049573437860272, + "grad_norm": 0.9495823494613052, + "learning_rate": 1.784371871898711e-06, + "loss": 0.42348673939704895, + "step": 2190 + }, + { + "epoch": 0.5051879179156099, + "grad_norm": 1.4438634303858584, + "learning_rate": 1.7841353220263e-06, + "loss": 0.5760704278945923, + "step": 2191 + }, + { + "epoch": 0.5054184920451925, + "grad_norm": 1.1475240268019702, + "learning_rate": 1.7838986581718731e-06, + "loss": 0.5281997323036194, + "step": 2192 + }, + { + "epoch": 0.5056490661747752, + "grad_norm": 1.3139768062702608, + "learning_rate": 1.7836618803698315e-06, + "loss": 0.543775200843811, + "step": 2193 + }, + { + "epoch": 0.5058796403043578, + "grad_norm": 1.2497491249667418, + "learning_rate": 1.7834249886545934e-06, + "loss": 0.4148549437522888, + "step": 2194 + }, + { + "epoch": 0.5061102144339406, + "grad_norm": 1.183178207015322, + "learning_rate": 1.7831879830605936e-06, + "loss": 0.5165001153945923, + "step": 2195 + }, + { + "epoch": 0.5063407885635232, + "grad_norm": 1.0854657175123028, + "learning_rate": 1.782950863622283e-06, + "loss": 0.4183283746242523, + "step": 2196 + }, + { + "epoch": 0.5065713626931059, + "grad_norm": 1.2476527930959387, + "learning_rate": 1.7827136303741292e-06, + "loss": 0.46558016538619995, + "step": 2197 + }, + { + "epoch": 0.5068019368226885, + "grad_norm": 1.2829595269176914, + "learning_rate": 1.782476283350617e-06, + "loss": 0.5491806268692017, + "step": 2198 + }, + { + "epoch": 0.5070325109522712, + "grad_norm": 1.3547672961051511, + "learning_rate": 1.7822388225862466e-06, + "loss": 0.42999008297920227, + "step": 2199 + }, + { + "epoch": 0.5072630850818538, + "grad_norm": 1.2776437457035281, + "learning_rate": 1.7820012481155358e-06, + "loss": 0.42478299140930176, + "step": 2200 + }, + { + "epoch": 0.5074936592114365, + "grad_norm": 4.51069636831696, + "learning_rate": 1.781763559973018e-06, + "loss": 0.4175076186656952, + "step": 2201 + }, + { + "epoch": 0.5077242333410191, + "grad_norm": 1.1985836355289028, + "learning_rate": 1.7815257581932439e-06, + "loss": 0.42197084426879883, + "step": 2202 + }, + { + "epoch": 0.5079548074706018, + "grad_norm": 1.2175005553032592, + "learning_rate": 1.7812878428107803e-06, + "loss": 0.39872926473617554, + "step": 2203 + }, + { + "epoch": 0.5081853816001844, + "grad_norm": 1.2908474732070376, + "learning_rate": 1.7810498138602106e-06, + "loss": 0.4572516977787018, + "step": 2204 + }, + { + "epoch": 0.5084159557297672, + "grad_norm": 1.1254873587347531, + "learning_rate": 1.780811671376135e-06, + "loss": 0.5261520147323608, + "step": 2205 + }, + { + "epoch": 0.5086465298593498, + "grad_norm": 1.8336847349223555, + "learning_rate": 1.7805734153931696e-06, + "loss": 0.4714658260345459, + "step": 2206 + }, + { + "epoch": 0.5088771039889325, + "grad_norm": 1.0757806041139168, + "learning_rate": 1.7803350459459472e-06, + "loss": 0.46184858679771423, + "step": 2207 + }, + { + "epoch": 0.5091076781185151, + "grad_norm": 1.2531712345918984, + "learning_rate": 1.7800965630691173e-06, + "loss": 0.48189157247543335, + "step": 2208 + }, + { + "epoch": 0.5093382522480978, + "grad_norm": 1.5363179586848308, + "learning_rate": 1.7798579667973463e-06, + "loss": 0.47865352034568787, + "step": 2209 + }, + { + "epoch": 0.5095688263776804, + "grad_norm": 1.1589101806191746, + "learning_rate": 1.7796192571653162e-06, + "loss": 0.46073317527770996, + "step": 2210 + }, + { + "epoch": 0.5097994005072631, + "grad_norm": 1.1781605500578527, + "learning_rate": 1.7793804342077253e-06, + "loss": 0.5099648237228394, + "step": 2211 + }, + { + "epoch": 0.5100299746368457, + "grad_norm": 1.2319682423717142, + "learning_rate": 1.7791414979592903e-06, + "loss": 0.5436147451400757, + "step": 2212 + }, + { + "epoch": 0.5102605487664285, + "grad_norm": 1.2305699349330186, + "learning_rate": 1.7789024484547417e-06, + "loss": 0.5455893278121948, + "step": 2213 + }, + { + "epoch": 0.5104911228960111, + "grad_norm": 1.2918560641722026, + "learning_rate": 1.7786632857288284e-06, + "loss": 0.4886546730995178, + "step": 2214 + }, + { + "epoch": 0.5107216970255938, + "grad_norm": 1.1611199451436964, + "learning_rate": 1.778424009816315e-06, + "loss": 0.4793723225593567, + "step": 2215 + }, + { + "epoch": 0.5109522711551764, + "grad_norm": 1.3312189289078886, + "learning_rate": 1.7781846207519826e-06, + "loss": 0.5814248323440552, + "step": 2216 + }, + { + "epoch": 0.5111828452847591, + "grad_norm": 1.1560984097631717, + "learning_rate": 1.777945118570629e-06, + "loss": 0.5057421326637268, + "step": 2217 + }, + { + "epoch": 0.5114134194143417, + "grad_norm": 1.3009634347843195, + "learning_rate": 1.7777055033070682e-06, + "loss": 0.3913435935974121, + "step": 2218 + }, + { + "epoch": 0.5116439935439244, + "grad_norm": 0.9761581598604525, + "learning_rate": 1.7774657749961305e-06, + "loss": 0.4450770616531372, + "step": 2219 + }, + { + "epoch": 0.511874567673507, + "grad_norm": 1.731999332658399, + "learning_rate": 1.7772259336726636e-06, + "loss": 0.5164940357208252, + "step": 2220 + }, + { + "epoch": 0.5121051418030897, + "grad_norm": 1.257043827333845, + "learning_rate": 1.7769859793715298e-06, + "loss": 0.44231802225112915, + "step": 2221 + }, + { + "epoch": 0.5123357159326724, + "grad_norm": 1.2521439253976214, + "learning_rate": 1.7767459121276093e-06, + "loss": 0.516791820526123, + "step": 2222 + }, + { + "epoch": 0.5125662900622551, + "grad_norm": 1.2456616904380073, + "learning_rate": 1.7765057319757989e-06, + "loss": 0.4180450737476349, + "step": 2223 + }, + { + "epoch": 0.5127968641918377, + "grad_norm": 1.1350275613249636, + "learning_rate": 1.77626543895101e-06, + "loss": 0.49246734380722046, + "step": 2224 + }, + { + "epoch": 0.5130274383214203, + "grad_norm": 1.1582721424765736, + "learning_rate": 1.7760250330881728e-06, + "loss": 0.5058225393295288, + "step": 2225 + }, + { + "epoch": 0.513258012451003, + "grad_norm": 1.4118813849041838, + "learning_rate": 1.7757845144222321e-06, + "loss": 0.4752033054828644, + "step": 2226 + }, + { + "epoch": 0.5134885865805856, + "grad_norm": 1.2950831387397626, + "learning_rate": 1.77554388298815e-06, + "loss": 0.45163947343826294, + "step": 2227 + }, + { + "epoch": 0.5137191607101683, + "grad_norm": 1.387042973653302, + "learning_rate": 1.7753031388209044e-06, + "loss": 0.46295779943466187, + "step": 2228 + }, + { + "epoch": 0.5139497348397509, + "grad_norm": 1.2958875463664286, + "learning_rate": 1.7750622819554903e-06, + "loss": 0.5682947635650635, + "step": 2229 + }, + { + "epoch": 0.5141803089693336, + "grad_norm": 1.353052791820573, + "learning_rate": 1.7748213124269187e-06, + "loss": 0.4890878200531006, + "step": 2230 + }, + { + "epoch": 0.5144108830989162, + "grad_norm": 1.4612536503294715, + "learning_rate": 1.7745802302702164e-06, + "loss": 0.5952332615852356, + "step": 2231 + }, + { + "epoch": 0.514641457228499, + "grad_norm": 1.1928368431775584, + "learning_rate": 1.7743390355204278e-06, + "loss": 0.43224406242370605, + "step": 2232 + }, + { + "epoch": 0.5148720313580816, + "grad_norm": 1.1851533508030387, + "learning_rate": 1.7740977282126122e-06, + "loss": 0.5010303258895874, + "step": 2233 + }, + { + "epoch": 0.5151026054876643, + "grad_norm": 1.105983766082305, + "learning_rate": 1.7738563083818469e-06, + "loss": 0.5166633725166321, + "step": 2234 + }, + { + "epoch": 0.5153331796172469, + "grad_norm": 1.0533784617555741, + "learning_rate": 1.7736147760632245e-06, + "loss": 0.4748263359069824, + "step": 2235 + }, + { + "epoch": 0.5155637537468296, + "grad_norm": 0.9010011595528595, + "learning_rate": 1.773373131291854e-06, + "loss": 0.46462053060531616, + "step": 2236 + }, + { + "epoch": 0.5157943278764122, + "grad_norm": 1.1288843437350349, + "learning_rate": 1.7731313741028608e-06, + "loss": 0.47799748182296753, + "step": 2237 + }, + { + "epoch": 0.5160249020059949, + "grad_norm": 1.2958124494051022, + "learning_rate": 1.772889504531387e-06, + "loss": 0.43448662757873535, + "step": 2238 + }, + { + "epoch": 0.5162554761355775, + "grad_norm": 1.2781442130344307, + "learning_rate": 1.7726475226125905e-06, + "loss": 0.4609360098838806, + "step": 2239 + }, + { + "epoch": 0.5164860502651603, + "grad_norm": 1.123946418980165, + "learning_rate": 1.7724054283816463e-06, + "loss": 0.505261242389679, + "step": 2240 + }, + { + "epoch": 0.5167166243947429, + "grad_norm": 1.1143888709548355, + "learning_rate": 1.772163221873745e-06, + "loss": 0.3812851905822754, + "step": 2241 + }, + { + "epoch": 0.5169471985243256, + "grad_norm": 1.1698544335678498, + "learning_rate": 1.7719209031240938e-06, + "loss": 0.42545294761657715, + "step": 2242 + }, + { + "epoch": 0.5171777726539082, + "grad_norm": 1.3964979839005025, + "learning_rate": 1.771678472167916e-06, + "loss": 0.45135340094566345, + "step": 2243 + }, + { + "epoch": 0.5174083467834909, + "grad_norm": 1.1118819857040387, + "learning_rate": 1.7714359290404514e-06, + "loss": 0.4499250650405884, + "step": 2244 + }, + { + "epoch": 0.5176389209130735, + "grad_norm": 1.2793420965554383, + "learning_rate": 1.7711932737769564e-06, + "loss": 0.4355557858943939, + "step": 2245 + }, + { + "epoch": 0.5178694950426562, + "grad_norm": 1.3068878220482505, + "learning_rate": 1.7709505064127036e-06, + "loss": 0.4140744209289551, + "step": 2246 + }, + { + "epoch": 0.5181000691722388, + "grad_norm": 1.2538619837975196, + "learning_rate": 1.7707076269829809e-06, + "loss": 0.5108504891395569, + "step": 2247 + }, + { + "epoch": 0.5183306433018215, + "grad_norm": 1.0866593797381727, + "learning_rate": 1.7704646355230936e-06, + "loss": 0.5064615607261658, + "step": 2248 + }, + { + "epoch": 0.5185612174314042, + "grad_norm": 1.4034267264652582, + "learning_rate": 1.7702215320683636e-06, + "loss": 0.5922794342041016, + "step": 2249 + }, + { + "epoch": 0.5187917915609869, + "grad_norm": 1.236045367714828, + "learning_rate": 1.7699783166541279e-06, + "loss": 0.3890082836151123, + "step": 2250 + }, + { + "epoch": 0.5190223656905695, + "grad_norm": 1.1663861833023768, + "learning_rate": 1.7697349893157402e-06, + "loss": 0.5585668087005615, + "step": 2251 + }, + { + "epoch": 0.5192529398201522, + "grad_norm": 1.2125542528327162, + "learning_rate": 1.7694915500885706e-06, + "loss": 0.3904608488082886, + "step": 2252 + }, + { + "epoch": 0.5194835139497348, + "grad_norm": 1.3213509465151734, + "learning_rate": 1.7692479990080056e-06, + "loss": 0.4764491617679596, + "step": 2253 + }, + { + "epoch": 0.5197140880793175, + "grad_norm": 1.3113796870909902, + "learning_rate": 1.769004336109448e-06, + "loss": 0.49443554878234863, + "step": 2254 + }, + { + "epoch": 0.5199446622089001, + "grad_norm": 1.2196571448758133, + "learning_rate": 1.7687605614283165e-06, + "loss": 0.4679003357887268, + "step": 2255 + }, + { + "epoch": 0.5201752363384828, + "grad_norm": 1.6767016497784393, + "learning_rate": 1.7685166750000465e-06, + "loss": 0.6968683004379272, + "step": 2256 + }, + { + "epoch": 0.5204058104680654, + "grad_norm": 1.406455012631932, + "learning_rate": 1.7682726768600888e-06, + "loss": 0.5688217878341675, + "step": 2257 + }, + { + "epoch": 0.5206363845976482, + "grad_norm": 1.176050025614157, + "learning_rate": 1.7680285670439115e-06, + "loss": 0.4688011705875397, + "step": 2258 + }, + { + "epoch": 0.5208669587272308, + "grad_norm": 1.1772680288415673, + "learning_rate": 1.7677843455869984e-06, + "loss": 0.6447713971138, + "step": 2259 + }, + { + "epoch": 0.5210975328568135, + "grad_norm": 1.3187686937196665, + "learning_rate": 1.767540012524849e-06, + "loss": 0.578650951385498, + "step": 2260 + }, + { + "epoch": 0.5213281069863961, + "grad_norm": 1.4425748519700892, + "learning_rate": 1.76729556789298e-06, + "loss": 0.5001357197761536, + "step": 2261 + }, + { + "epoch": 0.5215586811159788, + "grad_norm": 1.2145912604177214, + "learning_rate": 1.7670510117269242e-06, + "loss": 0.5336331129074097, + "step": 2262 + }, + { + "epoch": 0.5217892552455614, + "grad_norm": 1.2105621787494676, + "learning_rate": 1.76680634406223e-06, + "loss": 0.5628900527954102, + "step": 2263 + }, + { + "epoch": 0.5220198293751441, + "grad_norm": 1.2476030455409495, + "learning_rate": 1.766561564934462e-06, + "loss": 0.46497443318367004, + "step": 2264 + }, + { + "epoch": 0.5222504035047267, + "grad_norm": 1.4921989012106511, + "learning_rate": 1.7663166743792019e-06, + "loss": 0.617607831954956, + "step": 2265 + }, + { + "epoch": 0.5224809776343095, + "grad_norm": 1.1582259137476871, + "learning_rate": 1.7660716724320468e-06, + "loss": 0.5236914157867432, + "step": 2266 + }, + { + "epoch": 0.5227115517638921, + "grad_norm": 1.2919028654437321, + "learning_rate": 1.76582655912861e-06, + "loss": 0.5527941584587097, + "step": 2267 + }, + { + "epoch": 0.5229421258934748, + "grad_norm": 1.208274388494889, + "learning_rate": 1.7655813345045218e-06, + "loss": 0.5394654273986816, + "step": 2268 + }, + { + "epoch": 0.5231727000230574, + "grad_norm": 1.1822216818330542, + "learning_rate": 1.7653359985954275e-06, + "loss": 0.47050246596336365, + "step": 2269 + }, + { + "epoch": 0.5234032741526401, + "grad_norm": 1.2893306401147882, + "learning_rate": 1.7650905514369894e-06, + "loss": 0.49413689970970154, + "step": 2270 + }, + { + "epoch": 0.5236338482822227, + "grad_norm": 1.3086960549802995, + "learning_rate": 1.7648449930648856e-06, + "loss": 0.5568829774856567, + "step": 2271 + }, + { + "epoch": 0.5238644224118054, + "grad_norm": 1.2475799557753502, + "learning_rate": 1.7645993235148107e-06, + "loss": 0.49238815903663635, + "step": 2272 + }, + { + "epoch": 0.524094996541388, + "grad_norm": 1.16612817534413, + "learning_rate": 1.7643535428224752e-06, + "loss": 0.5580959320068359, + "step": 2273 + }, + { + "epoch": 0.5243255706709707, + "grad_norm": 1.4921637909191205, + "learning_rate": 1.7641076510236052e-06, + "loss": 0.5853499174118042, + "step": 2274 + }, + { + "epoch": 0.5245561448005533, + "grad_norm": 1.3988944269011947, + "learning_rate": 1.7638616481539448e-06, + "loss": 0.5638653635978699, + "step": 2275 + }, + { + "epoch": 0.5247867189301361, + "grad_norm": 1.2859178438597552, + "learning_rate": 1.7636155342492521e-06, + "loss": 0.5197241306304932, + "step": 2276 + }, + { + "epoch": 0.5250172930597187, + "grad_norm": 1.1094174928372944, + "learning_rate": 1.7633693093453026e-06, + "loss": 0.4137725234031677, + "step": 2277 + }, + { + "epoch": 0.5252478671893014, + "grad_norm": 1.2940062745509122, + "learning_rate": 1.7631229734778872e-06, + "loss": 0.54244065284729, + "step": 2278 + }, + { + "epoch": 0.525478441318884, + "grad_norm": 1.1871875469955007, + "learning_rate": 1.7628765266828137e-06, + "loss": 0.5215432047843933, + "step": 2279 + }, + { + "epoch": 0.5257090154484667, + "grad_norm": 1.1984410258580116, + "learning_rate": 1.7626299689959057e-06, + "loss": 0.5559565424919128, + "step": 2280 + }, + { + "epoch": 0.5259395895780493, + "grad_norm": 1.1663711332671047, + "learning_rate": 1.7623833004530026e-06, + "loss": 0.5251328945159912, + "step": 2281 + }, + { + "epoch": 0.526170163707632, + "grad_norm": 1.241523894329925, + "learning_rate": 1.7621365210899598e-06, + "loss": 0.5351072549819946, + "step": 2282 + }, + { + "epoch": 0.5264007378372146, + "grad_norm": 1.1901641374825476, + "learning_rate": 1.7618896309426504e-06, + "loss": 0.46850037574768066, + "step": 2283 + }, + { + "epoch": 0.5266313119667974, + "grad_norm": 1.1697893294442419, + "learning_rate": 1.761642630046961e-06, + "loss": 0.5001033544540405, + "step": 2284 + }, + { + "epoch": 0.52686188609638, + "grad_norm": 0.9279299862604019, + "learning_rate": 1.7613955184387968e-06, + "loss": 0.47946250438690186, + "step": 2285 + }, + { + "epoch": 0.5270924602259627, + "grad_norm": 1.0539631796672029, + "learning_rate": 1.761148296154077e-06, + "loss": 0.4743049144744873, + "step": 2286 + }, + { + "epoch": 0.5273230343555453, + "grad_norm": 1.154224335020326, + "learning_rate": 1.7609009632287389e-06, + "loss": 0.4518652558326721, + "step": 2287 + }, + { + "epoch": 0.527553608485128, + "grad_norm": 1.0859896497705106, + "learning_rate": 1.7606535196987338e-06, + "loss": 0.5021224617958069, + "step": 2288 + }, + { + "epoch": 0.5277841826147106, + "grad_norm": 1.4832483769951506, + "learning_rate": 1.760405965600031e-06, + "loss": 0.4848078489303589, + "step": 2289 + }, + { + "epoch": 0.5280147567442933, + "grad_norm": 1.22421773905119, + "learning_rate": 1.7601583009686142e-06, + "loss": 0.49077051877975464, + "step": 2290 + }, + { + "epoch": 0.5282453308738759, + "grad_norm": 1.2916718452438969, + "learning_rate": 1.7599105258404848e-06, + "loss": 0.4802943468093872, + "step": 2291 + }, + { + "epoch": 0.5284759050034586, + "grad_norm": 1.4055248895326071, + "learning_rate": 1.7596626402516589e-06, + "loss": 0.5397455096244812, + "step": 2292 + }, + { + "epoch": 0.5287064791330413, + "grad_norm": 1.0497017336135974, + "learning_rate": 1.759414644238169e-06, + "loss": 0.478559672832489, + "step": 2293 + }, + { + "epoch": 0.528937053262624, + "grad_norm": 1.112359888255478, + "learning_rate": 1.7591665378360644e-06, + "loss": 0.5080797672271729, + "step": 2294 + }, + { + "epoch": 0.5291676273922066, + "grad_norm": 1.0468621326779766, + "learning_rate": 1.7589183210814093e-06, + "loss": 0.4959479868412018, + "step": 2295 + }, + { + "epoch": 0.5293982015217893, + "grad_norm": 1.1985868339045591, + "learning_rate": 1.7586699940102853e-06, + "loss": 0.512288510799408, + "step": 2296 + }, + { + "epoch": 0.5296287756513719, + "grad_norm": 1.1129893572343195, + "learning_rate": 1.7584215566587886e-06, + "loss": 0.525113046169281, + "step": 2297 + }, + { + "epoch": 0.5298593497809546, + "grad_norm": 1.2088844531850982, + "learning_rate": 1.7581730090630322e-06, + "loss": 0.3715069890022278, + "step": 2298 + }, + { + "epoch": 0.5300899239105372, + "grad_norm": 1.3852845244524983, + "learning_rate": 1.757924351259145e-06, + "loss": 0.5833072662353516, + "step": 2299 + }, + { + "epoch": 0.5303204980401199, + "grad_norm": 1.638098016270419, + "learning_rate": 1.7576755832832721e-06, + "loss": 0.5942450761795044, + "step": 2300 + }, + { + "epoch": 0.5305510721697025, + "grad_norm": 1.1523961468173722, + "learning_rate": 1.7574267051715745e-06, + "loss": 0.4754432737827301, + "step": 2301 + }, + { + "epoch": 0.5307816462992853, + "grad_norm": 1.3593694553922624, + "learning_rate": 1.7571777169602287e-06, + "loss": 0.5272700190544128, + "step": 2302 + }, + { + "epoch": 0.5310122204288679, + "grad_norm": 1.137089307163323, + "learning_rate": 1.7569286186854283e-06, + "loss": 0.48376554250717163, + "step": 2303 + }, + { + "epoch": 0.5312427945584506, + "grad_norm": 1.324023805933818, + "learning_rate": 1.7566794103833816e-06, + "loss": 0.4324077367782593, + "step": 2304 + }, + { + "epoch": 0.5314733686880332, + "grad_norm": 1.2843168925212602, + "learning_rate": 1.7564300920903142e-06, + "loss": 0.44939202070236206, + "step": 2305 + }, + { + "epoch": 0.5317039428176159, + "grad_norm": 1.2413807013846574, + "learning_rate": 1.7561806638424662e-06, + "loss": 0.5256277322769165, + "step": 2306 + }, + { + "epoch": 0.5319345169471985, + "grad_norm": 1.0855894350628046, + "learning_rate": 1.7559311256760955e-06, + "loss": 0.43901991844177246, + "step": 2307 + }, + { + "epoch": 0.5321650910767812, + "grad_norm": 1.3134089338347328, + "learning_rate": 1.7556814776274746e-06, + "loss": 0.5256138443946838, + "step": 2308 + }, + { + "epoch": 0.5323956652063638, + "grad_norm": 1.3769537654510517, + "learning_rate": 1.7554317197328922e-06, + "loss": 0.4664478600025177, + "step": 2309 + }, + { + "epoch": 0.5326262393359465, + "grad_norm": 1.1227476903728313, + "learning_rate": 1.7551818520286532e-06, + "loss": 0.5042726397514343, + "step": 2310 + }, + { + "epoch": 0.5328568134655292, + "grad_norm": 1.3417267355052607, + "learning_rate": 1.754931874551079e-06, + "loss": 0.5682350397109985, + "step": 2311 + }, + { + "epoch": 0.5330873875951119, + "grad_norm": 1.2416043105842551, + "learning_rate": 1.754681787336505e-06, + "loss": 0.5082807540893555, + "step": 2312 + }, + { + "epoch": 0.5333179617246945, + "grad_norm": 1.4255568276367208, + "learning_rate": 1.754431590421285e-06, + "loss": 0.6020215749740601, + "step": 2313 + }, + { + "epoch": 0.5335485358542772, + "grad_norm": 1.4104154799235167, + "learning_rate": 1.7541812838417877e-06, + "loss": 0.5004276633262634, + "step": 2314 + }, + { + "epoch": 0.5337791099838598, + "grad_norm": 1.060415170291065, + "learning_rate": 1.753930867634397e-06, + "loss": 0.4889993667602539, + "step": 2315 + }, + { + "epoch": 0.5340096841134425, + "grad_norm": 1.0849217066026469, + "learning_rate": 1.7536803418355141e-06, + "loss": 0.4179444909095764, + "step": 2316 + }, + { + "epoch": 0.5342402582430251, + "grad_norm": 1.2618059778728548, + "learning_rate": 1.7534297064815554e-06, + "loss": 0.46807605028152466, + "step": 2317 + }, + { + "epoch": 0.5344708323726078, + "grad_norm": 1.2827117317411258, + "learning_rate": 1.7531789616089528e-06, + "loss": 0.39173221588134766, + "step": 2318 + }, + { + "epoch": 0.5347014065021904, + "grad_norm": 1.2820357654319097, + "learning_rate": 1.7529281072541548e-06, + "loss": 0.4290514886379242, + "step": 2319 + }, + { + "epoch": 0.5349319806317732, + "grad_norm": 1.3778694052072273, + "learning_rate": 1.752677143453626e-06, + "loss": 0.6052347421646118, + "step": 2320 + }, + { + "epoch": 0.5351625547613558, + "grad_norm": 1.054542888313722, + "learning_rate": 1.752426070243846e-06, + "loss": 0.47622209787368774, + "step": 2321 + }, + { + "epoch": 0.5353931288909385, + "grad_norm": 1.128157779747108, + "learning_rate": 1.7521748876613112e-06, + "loss": 0.4216923415660858, + "step": 2322 + }, + { + "epoch": 0.5356237030205211, + "grad_norm": 2.0737049391078384, + "learning_rate": 1.751923595742533e-06, + "loss": 0.5527430772781372, + "step": 2323 + }, + { + "epoch": 0.5358542771501038, + "grad_norm": 1.1406433043117166, + "learning_rate": 1.75167219452404e-06, + "loss": 0.5562101602554321, + "step": 2324 + }, + { + "epoch": 0.5360848512796864, + "grad_norm": 1.2183539446117024, + "learning_rate": 1.7514206840423757e-06, + "loss": 0.546181321144104, + "step": 2325 + }, + { + "epoch": 0.5363154254092691, + "grad_norm": 1.5216852196360238, + "learning_rate": 1.7511690643340995e-06, + "loss": 0.5883532762527466, + "step": 2326 + }, + { + "epoch": 0.5365459995388517, + "grad_norm": 1.2667138111118152, + "learning_rate": 1.750917335435787e-06, + "loss": 0.5231350660324097, + "step": 2327 + }, + { + "epoch": 0.5367765736684345, + "grad_norm": 1.200525241411545, + "learning_rate": 1.7506654973840292e-06, + "loss": 0.4846429228782654, + "step": 2328 + }, + { + "epoch": 0.5370071477980171, + "grad_norm": 1.0815584734915895, + "learning_rate": 1.7504135502154335e-06, + "loss": 0.43692171573638916, + "step": 2329 + }, + { + "epoch": 0.5372377219275998, + "grad_norm": 1.0658062374834336, + "learning_rate": 1.7501614939666234e-06, + "loss": 0.5076167583465576, + "step": 2330 + }, + { + "epoch": 0.5374682960571824, + "grad_norm": 1.2658937157989252, + "learning_rate": 1.7499093286742373e-06, + "loss": 0.5302891135215759, + "step": 2331 + }, + { + "epoch": 0.5376988701867651, + "grad_norm": 1.3200406937261826, + "learning_rate": 1.7496570543749303e-06, + "loss": 0.5827817916870117, + "step": 2332 + }, + { + "epoch": 0.5379294443163477, + "grad_norm": 1.3684047155196064, + "learning_rate": 1.7494046711053726e-06, + "loss": 0.6765470504760742, + "step": 2333 + }, + { + "epoch": 0.5381600184459304, + "grad_norm": 1.3001315312834418, + "learning_rate": 1.7491521789022513e-06, + "loss": 0.48666322231292725, + "step": 2334 + }, + { + "epoch": 0.538390592575513, + "grad_norm": 1.0490910849362622, + "learning_rate": 1.7488995778022685e-06, + "loss": 0.5163695812225342, + "step": 2335 + }, + { + "epoch": 0.5386211667050956, + "grad_norm": 1.1765286879203154, + "learning_rate": 1.748646867842142e-06, + "loss": 0.44487982988357544, + "step": 2336 + }, + { + "epoch": 0.5388517408346783, + "grad_norm": 1.2992285046307706, + "learning_rate": 1.7483940490586058e-06, + "loss": 0.5512663722038269, + "step": 2337 + }, + { + "epoch": 0.539082314964261, + "grad_norm": 1.1533551829707172, + "learning_rate": 1.7481411214884098e-06, + "loss": 0.461128294467926, + "step": 2338 + }, + { + "epoch": 0.5393128890938437, + "grad_norm": 1.2239639921661383, + "learning_rate": 1.7478880851683197e-06, + "loss": 0.47291088104248047, + "step": 2339 + }, + { + "epoch": 0.5395434632234263, + "grad_norm": 1.1568837363453548, + "learning_rate": 1.747634940135117e-06, + "loss": 0.5900166034698486, + "step": 2340 + }, + { + "epoch": 0.539774037353009, + "grad_norm": 1.0385421801821113, + "learning_rate": 1.7473816864255983e-06, + "loss": 0.3878340721130371, + "step": 2341 + }, + { + "epoch": 0.5400046114825916, + "grad_norm": 1.442772155197814, + "learning_rate": 1.7471283240765775e-06, + "loss": 0.5671564340591431, + "step": 2342 + }, + { + "epoch": 0.5402351856121743, + "grad_norm": 1.1602673867587185, + "learning_rate": 1.7468748531248824e-06, + "loss": 0.5153918266296387, + "step": 2343 + }, + { + "epoch": 0.5404657597417569, + "grad_norm": 1.2187996046056446, + "learning_rate": 1.7466212736073585e-06, + "loss": 0.49520084261894226, + "step": 2344 + }, + { + "epoch": 0.5406963338713396, + "grad_norm": 1.0955374839449357, + "learning_rate": 1.7463675855608654e-06, + "loss": 0.4884970784187317, + "step": 2345 + }, + { + "epoch": 0.5409269080009222, + "grad_norm": 1.401002336922335, + "learning_rate": 1.7461137890222798e-06, + "loss": 0.5233277678489685, + "step": 2346 + }, + { + "epoch": 0.541157482130505, + "grad_norm": 1.272363275240415, + "learning_rate": 1.7458598840284928e-06, + "loss": 0.44011372327804565, + "step": 2347 + }, + { + "epoch": 0.5413880562600876, + "grad_norm": 1.1593134205382656, + "learning_rate": 1.745605870616413e-06, + "loss": 0.4833263158798218, + "step": 2348 + }, + { + "epoch": 0.5416186303896703, + "grad_norm": 1.186578949511732, + "learning_rate": 1.7453517488229634e-06, + "loss": 0.4852379262447357, + "step": 2349 + }, + { + "epoch": 0.5418492045192529, + "grad_norm": 1.527590855990685, + "learning_rate": 1.7450975186850831e-06, + "loss": 0.4710320830345154, + "step": 2350 + }, + { + "epoch": 0.5420797786488356, + "grad_norm": 1.4382691899722804, + "learning_rate": 1.744843180239727e-06, + "loss": 0.5144790410995483, + "step": 2351 + }, + { + "epoch": 0.5423103527784182, + "grad_norm": 1.3784898997392558, + "learning_rate": 1.7445887335238663e-06, + "loss": 0.5815445184707642, + "step": 2352 + }, + { + "epoch": 0.5425409269080009, + "grad_norm": 1.1629274836022288, + "learning_rate": 1.7443341785744864e-06, + "loss": 0.5101407170295715, + "step": 2353 + }, + { + "epoch": 0.5427715010375835, + "grad_norm": 1.1760272227987194, + "learning_rate": 1.7440795154285905e-06, + "loss": 0.4584839940071106, + "step": 2354 + }, + { + "epoch": 0.5430020751671663, + "grad_norm": 1.323122873632264, + "learning_rate": 1.743824744123196e-06, + "loss": 0.482247531414032, + "step": 2355 + }, + { + "epoch": 0.5432326492967489, + "grad_norm": 1.1361176263052393, + "learning_rate": 1.7435698646953364e-06, + "loss": 0.5503325462341309, + "step": 2356 + }, + { + "epoch": 0.5434632234263316, + "grad_norm": 1.2952580221197654, + "learning_rate": 1.7433148771820612e-06, + "loss": 0.4803489148616791, + "step": 2357 + }, + { + "epoch": 0.5436937975559142, + "grad_norm": 1.303291620807208, + "learning_rate": 1.7430597816204351e-06, + "loss": 0.5388872027397156, + "step": 2358 + }, + { + "epoch": 0.5439243716854969, + "grad_norm": 1.6209081192397237, + "learning_rate": 1.742804578047539e-06, + "loss": 0.512636125087738, + "step": 2359 + }, + { + "epoch": 0.5441549458150795, + "grad_norm": 1.5943501598581358, + "learning_rate": 1.7425492665004699e-06, + "loss": 0.49154865741729736, + "step": 2360 + }, + { + "epoch": 0.5443855199446622, + "grad_norm": 1.1498651594774036, + "learning_rate": 1.7422938470163389e-06, + "loss": 0.5185250639915466, + "step": 2361 + }, + { + "epoch": 0.5446160940742448, + "grad_norm": 1.5663688017502957, + "learning_rate": 1.7420383196322747e-06, + "loss": 0.5474511384963989, + "step": 2362 + }, + { + "epoch": 0.5448466682038275, + "grad_norm": 1.3465441719791955, + "learning_rate": 1.7417826843854202e-06, + "loss": 0.48212137818336487, + "step": 2363 + }, + { + "epoch": 0.5450772423334102, + "grad_norm": 1.1320785808666363, + "learning_rate": 1.7415269413129348e-06, + "loss": 0.47983086109161377, + "step": 2364 + }, + { + "epoch": 0.5453078164629929, + "grad_norm": 1.1314426678618292, + "learning_rate": 1.7412710904519932e-06, + "loss": 0.4935225546360016, + "step": 2365 + }, + { + "epoch": 0.5455383905925755, + "grad_norm": 1.2528535153373956, + "learning_rate": 1.7410151318397862e-06, + "loss": 0.5167664289474487, + "step": 2366 + }, + { + "epoch": 0.5457689647221582, + "grad_norm": 1.1782327982922274, + "learning_rate": 1.74075906551352e-06, + "loss": 0.5116056799888611, + "step": 2367 + }, + { + "epoch": 0.5459995388517408, + "grad_norm": 1.1184728717072068, + "learning_rate": 1.7405028915104158e-06, + "loss": 0.4709595739841461, + "step": 2368 + }, + { + "epoch": 0.5462301129813235, + "grad_norm": 1.560534410686712, + "learning_rate": 1.7402466098677118e-06, + "loss": 0.3989061117172241, + "step": 2369 + }, + { + "epoch": 0.5464606871109061, + "grad_norm": 1.1397817693321244, + "learning_rate": 1.739990220622661e-06, + "loss": 0.45720764994621277, + "step": 2370 + }, + { + "epoch": 0.5466912612404888, + "grad_norm": 1.6154705847610804, + "learning_rate": 1.739733723812532e-06, + "loss": 0.5865384936332703, + "step": 2371 + }, + { + "epoch": 0.5469218353700714, + "grad_norm": 1.3129437136284077, + "learning_rate": 1.7394771194746092e-06, + "loss": 0.4451501965522766, + "step": 2372 + }, + { + "epoch": 0.5471524094996542, + "grad_norm": 1.2213938230584949, + "learning_rate": 1.7392204076461928e-06, + "loss": 0.4628486633300781, + "step": 2373 + }, + { + "epoch": 0.5473829836292368, + "grad_norm": 1.2854198948482758, + "learning_rate": 1.7389635883645984e-06, + "loss": 0.4797760248184204, + "step": 2374 + }, + { + "epoch": 0.5476135577588195, + "grad_norm": 1.2890601616689177, + "learning_rate": 1.7387066616671571e-06, + "loss": 0.4716770648956299, + "step": 2375 + }, + { + "epoch": 0.5478441318884021, + "grad_norm": 1.071991179643841, + "learning_rate": 1.738449627591216e-06, + "loss": 0.504901647567749, + "step": 2376 + }, + { + "epoch": 0.5480747060179848, + "grad_norm": 1.259141194312177, + "learning_rate": 1.7381924861741375e-06, + "loss": 0.5248615145683289, + "step": 2377 + }, + { + "epoch": 0.5483052801475674, + "grad_norm": 1.1551298194401718, + "learning_rate": 1.7379352374532998e-06, + "loss": 0.41704076528549194, + "step": 2378 + }, + { + "epoch": 0.5485358542771501, + "grad_norm": 1.1093382819710802, + "learning_rate": 1.7376778814660966e-06, + "loss": 0.42278197407722473, + "step": 2379 + }, + { + "epoch": 0.5487664284067327, + "grad_norm": 1.3240414194175114, + "learning_rate": 1.7374204182499372e-06, + "loss": 0.4104729890823364, + "step": 2380 + }, + { + "epoch": 0.5489970025363154, + "grad_norm": 1.237574436817826, + "learning_rate": 1.7371628478422467e-06, + "loss": 0.5205684304237366, + "step": 2381 + }, + { + "epoch": 0.549227576665898, + "grad_norm": 1.2914374831424469, + "learning_rate": 1.7369051702804648e-06, + "loss": 0.4743306040763855, + "step": 2382 + }, + { + "epoch": 0.5494581507954808, + "grad_norm": 1.4263628155545096, + "learning_rate": 1.7366473856020486e-06, + "loss": 0.6324253678321838, + "step": 2383 + }, + { + "epoch": 0.5496887249250634, + "grad_norm": 1.2093119037905458, + "learning_rate": 1.736389493844469e-06, + "loss": 0.46466588973999023, + "step": 2384 + }, + { + "epoch": 0.5499192990546461, + "grad_norm": 1.257464863029373, + "learning_rate": 1.7361314950452136e-06, + "loss": 0.4117918014526367, + "step": 2385 + }, + { + "epoch": 0.5501498731842287, + "grad_norm": 1.0582357147304537, + "learning_rate": 1.7358733892417848e-06, + "loss": 0.40341615676879883, + "step": 2386 + }, + { + "epoch": 0.5503804473138114, + "grad_norm": 1.2083128590610215, + "learning_rate": 1.735615176471701e-06, + "loss": 0.642855167388916, + "step": 2387 + }, + { + "epoch": 0.550611021443394, + "grad_norm": 1.3821025749968947, + "learning_rate": 1.7353568567724959e-06, + "loss": 0.5490958094596863, + "step": 2388 + }, + { + "epoch": 0.5508415955729767, + "grad_norm": 1.0972882559163057, + "learning_rate": 1.7350984301817192e-06, + "loss": 0.5154834985733032, + "step": 2389 + }, + { + "epoch": 0.5510721697025593, + "grad_norm": 1.5156914347306212, + "learning_rate": 1.7348398967369358e-06, + "loss": 0.49488651752471924, + "step": 2390 + }, + { + "epoch": 0.5513027438321421, + "grad_norm": 1.097164324799634, + "learning_rate": 1.7345812564757257e-06, + "loss": 0.4211215674877167, + "step": 2391 + }, + { + "epoch": 0.5515333179617247, + "grad_norm": 1.1060429845011046, + "learning_rate": 1.7343225094356855e-06, + "loss": 0.41840964555740356, + "step": 2392 + }, + { + "epoch": 0.5517638920913074, + "grad_norm": 1.1213399734290006, + "learning_rate": 1.7340636556544264e-06, + "loss": 0.540780782699585, + "step": 2393 + }, + { + "epoch": 0.55199446622089, + "grad_norm": 1.328334535307567, + "learning_rate": 1.7338046951695754e-06, + "loss": 0.4967775046825409, + "step": 2394 + }, + { + "epoch": 0.5522250403504727, + "grad_norm": 1.337457775660936, + "learning_rate": 1.733545628018775e-06, + "loss": 0.5155577659606934, + "step": 2395 + }, + { + "epoch": 0.5524556144800553, + "grad_norm": 1.3409169497631646, + "learning_rate": 1.7332864542396832e-06, + "loss": 0.5106005072593689, + "step": 2396 + }, + { + "epoch": 0.552686188609638, + "grad_norm": 1.106469342539302, + "learning_rate": 1.7330271738699737e-06, + "loss": 0.3459712862968445, + "step": 2397 + }, + { + "epoch": 0.5529167627392206, + "grad_norm": 1.238811250755909, + "learning_rate": 1.7327677869473356e-06, + "loss": 0.4877927303314209, + "step": 2398 + }, + { + "epoch": 0.5531473368688034, + "grad_norm": 1.298959309949219, + "learning_rate": 1.7325082935094732e-06, + "loss": 0.5183857679367065, + "step": 2399 + }, + { + "epoch": 0.553377910998386, + "grad_norm": 1.1165163437308863, + "learning_rate": 1.7322486935941068e-06, + "loss": 0.4326491057872772, + "step": 2400 + }, + { + "epoch": 0.5536084851279687, + "grad_norm": 1.2472729786065346, + "learning_rate": 1.7319889872389716e-06, + "loss": 0.4688712954521179, + "step": 2401 + }, + { + "epoch": 0.5538390592575513, + "grad_norm": 1.2787851295656323, + "learning_rate": 1.7317291744818184e-06, + "loss": 0.4997788071632385, + "step": 2402 + }, + { + "epoch": 0.554069633387134, + "grad_norm": 1.3085189564145994, + "learning_rate": 1.731469255360414e-06, + "loss": 0.5271172523498535, + "step": 2403 + }, + { + "epoch": 0.5543002075167166, + "grad_norm": 1.3689434717845856, + "learning_rate": 1.73120922991254e-06, + "loss": 0.5339269042015076, + "step": 2404 + }, + { + "epoch": 0.5545307816462993, + "grad_norm": 1.2181123008680574, + "learning_rate": 1.7309490981759938e-06, + "loss": 0.47052568197250366, + "step": 2405 + }, + { + "epoch": 0.5547613557758819, + "grad_norm": 1.2508289898124627, + "learning_rate": 1.7306888601885885e-06, + "loss": 0.4112280309200287, + "step": 2406 + }, + { + "epoch": 0.5549919299054646, + "grad_norm": 1.1812487853939355, + "learning_rate": 1.730428515988152e-06, + "loss": 0.5473710298538208, + "step": 2407 + }, + { + "epoch": 0.5552225040350472, + "grad_norm": 1.6509587018432181, + "learning_rate": 1.7301680656125277e-06, + "loss": 0.5079115629196167, + "step": 2408 + }, + { + "epoch": 0.55545307816463, + "grad_norm": 1.193259996108104, + "learning_rate": 1.7299075090995755e-06, + "loss": 0.4805012345314026, + "step": 2409 + }, + { + "epoch": 0.5556836522942126, + "grad_norm": 1.1958830357632493, + "learning_rate": 1.729646846487169e-06, + "loss": 0.4657474756240845, + "step": 2410 + }, + { + "epoch": 0.5559142264237953, + "grad_norm": 1.2442110767414496, + "learning_rate": 1.729386077813199e-06, + "loss": 0.5887978076934814, + "step": 2411 + }, + { + "epoch": 0.5561448005533779, + "grad_norm": 1.0093517139206267, + "learning_rate": 1.7291252031155704e-06, + "loss": 0.43841421604156494, + "step": 2412 + }, + { + "epoch": 0.5563753746829606, + "grad_norm": 1.304380451031228, + "learning_rate": 1.728864222432204e-06, + "loss": 0.5026551485061646, + "step": 2413 + }, + { + "epoch": 0.5566059488125432, + "grad_norm": 1.2344100865196312, + "learning_rate": 1.728603135801036e-06, + "loss": 0.4525277614593506, + "step": 2414 + }, + { + "epoch": 0.5568365229421259, + "grad_norm": 1.3128956010351178, + "learning_rate": 1.7283419432600182e-06, + "loss": 0.4095644950866699, + "step": 2415 + }, + { + "epoch": 0.5570670970717085, + "grad_norm": 1.2351186073808627, + "learning_rate": 1.7280806448471173e-06, + "loss": 0.5098834037780762, + "step": 2416 + }, + { + "epoch": 0.5572976712012913, + "grad_norm": 0.9689174321932323, + "learning_rate": 1.7278192406003159e-06, + "loss": 0.42802777886390686, + "step": 2417 + }, + { + "epoch": 0.5575282453308739, + "grad_norm": 1.283644069549869, + "learning_rate": 1.7275577305576113e-06, + "loss": 0.5036378502845764, + "step": 2418 + }, + { + "epoch": 0.5577588194604566, + "grad_norm": 1.2960652355454445, + "learning_rate": 1.7272961147570175e-06, + "loss": 0.5324885249137878, + "step": 2419 + }, + { + "epoch": 0.5579893935900392, + "grad_norm": 1.6334614504341187, + "learning_rate": 1.727034393236562e-06, + "loss": 0.5763842463493347, + "step": 2420 + }, + { + "epoch": 0.5582199677196219, + "grad_norm": 1.343133312027108, + "learning_rate": 1.7267725660342895e-06, + "loss": 0.49291908740997314, + "step": 2421 + }, + { + "epoch": 0.5584505418492045, + "grad_norm": 1.651006143174213, + "learning_rate": 1.7265106331882588e-06, + "loss": 0.5114868879318237, + "step": 2422 + }, + { + "epoch": 0.5586811159787872, + "grad_norm": 1.1152807378164393, + "learning_rate": 1.7262485947365449e-06, + "loss": 0.42442530393600464, + "step": 2423 + }, + { + "epoch": 0.5589116901083698, + "grad_norm": 1.1309517905090323, + "learning_rate": 1.725986450717237e-06, + "loss": 0.3680551052093506, + "step": 2424 + }, + { + "epoch": 0.5591422642379525, + "grad_norm": 1.2183025106634426, + "learning_rate": 1.725724201168441e-06, + "loss": 0.5849576592445374, + "step": 2425 + }, + { + "epoch": 0.5593728383675352, + "grad_norm": 1.3597945996239442, + "learning_rate": 1.7254618461282773e-06, + "loss": 0.48919233679771423, + "step": 2426 + }, + { + "epoch": 0.5596034124971179, + "grad_norm": 1.1753552641156777, + "learning_rate": 1.7251993856348821e-06, + "loss": 0.4857720732688904, + "step": 2427 + }, + { + "epoch": 0.5598339866267005, + "grad_norm": 1.3324934167522995, + "learning_rate": 1.7249368197264062e-06, + "loss": 0.5106808543205261, + "step": 2428 + }, + { + "epoch": 0.5600645607562832, + "grad_norm": 1.305986731975411, + "learning_rate": 1.724674148441017e-06, + "loss": 0.500100314617157, + "step": 2429 + }, + { + "epoch": 0.5602951348858658, + "grad_norm": 1.226560051936561, + "learning_rate": 1.7244113718168957e-06, + "loss": 0.5389110445976257, + "step": 2430 + }, + { + "epoch": 0.5605257090154485, + "grad_norm": 1.2848731557614161, + "learning_rate": 1.72414848989224e-06, + "loss": 0.42860496044158936, + "step": 2431 + }, + { + "epoch": 0.5607562831450311, + "grad_norm": 1.2392935426075953, + "learning_rate": 1.723885502705262e-06, + "loss": 0.4867728352546692, + "step": 2432 + }, + { + "epoch": 0.5609868572746138, + "grad_norm": 1.215687300161219, + "learning_rate": 1.7236224102941899e-06, + "loss": 0.49194633960723877, + "step": 2433 + }, + { + "epoch": 0.5612174314041964, + "grad_norm": 1.278802988367442, + "learning_rate": 1.7233592126972667e-06, + "loss": 0.5194358229637146, + "step": 2434 + }, + { + "epoch": 0.5614480055337792, + "grad_norm": 1.518126298536734, + "learning_rate": 1.723095909952751e-06, + "loss": 0.4738645553588867, + "step": 2435 + }, + { + "epoch": 0.5616785796633618, + "grad_norm": 1.1842233457279843, + "learning_rate": 1.7228325020989165e-06, + "loss": 0.48232927918434143, + "step": 2436 + }, + { + "epoch": 0.5619091537929445, + "grad_norm": 1.0590325088103263, + "learning_rate": 1.7225689891740522e-06, + "loss": 0.5192145109176636, + "step": 2437 + }, + { + "epoch": 0.5621397279225271, + "grad_norm": 1.2756639382228332, + "learning_rate": 1.7223053712164621e-06, + "loss": 0.4934930205345154, + "step": 2438 + }, + { + "epoch": 0.5623703020521098, + "grad_norm": 1.294610704846241, + "learning_rate": 1.722041648264466e-06, + "loss": 0.5022200345993042, + "step": 2439 + }, + { + "epoch": 0.5626008761816924, + "grad_norm": 1.15319893327068, + "learning_rate": 1.7217778203563986e-06, + "loss": 0.45300528407096863, + "step": 2440 + }, + { + "epoch": 0.5628314503112751, + "grad_norm": 1.1335234735988557, + "learning_rate": 1.7215138875306103e-06, + "loss": 0.4965200126171112, + "step": 2441 + }, + { + "epoch": 0.5630620244408577, + "grad_norm": 1.3081789750993726, + "learning_rate": 1.721249849825466e-06, + "loss": 0.4618280231952667, + "step": 2442 + }, + { + "epoch": 0.5632925985704405, + "grad_norm": 1.255070715358214, + "learning_rate": 1.7209857072793464e-06, + "loss": 0.42270147800445557, + "step": 2443 + }, + { + "epoch": 0.5635231727000231, + "grad_norm": 1.0830436199918496, + "learning_rate": 1.720721459930647e-06, + "loss": 0.5200725793838501, + "step": 2444 + }, + { + "epoch": 0.5637537468296058, + "grad_norm": 1.1368018551382484, + "learning_rate": 1.7204571078177792e-06, + "loss": 0.47475337982177734, + "step": 2445 + }, + { + "epoch": 0.5639843209591884, + "grad_norm": 1.5482537414338693, + "learning_rate": 1.7201926509791693e-06, + "loss": 0.5493113994598389, + "step": 2446 + }, + { + "epoch": 0.564214895088771, + "grad_norm": 1.2861044506324582, + "learning_rate": 1.719928089453259e-06, + "loss": 0.4743562340736389, + "step": 2447 + }, + { + "epoch": 0.5644454692183537, + "grad_norm": 1.2343956116266135, + "learning_rate": 1.7196634232785038e-06, + "loss": 0.5145455598831177, + "step": 2448 + }, + { + "epoch": 0.5646760433479363, + "grad_norm": 1.5340568803714763, + "learning_rate": 1.719398652493377e-06, + "loss": 0.45072540640830994, + "step": 2449 + }, + { + "epoch": 0.564906617477519, + "grad_norm": 1.2363775684809537, + "learning_rate": 1.7191337771363651e-06, + "loss": 0.5150895714759827, + "step": 2450 + }, + { + "epoch": 0.5651371916071016, + "grad_norm": 1.4238500687035243, + "learning_rate": 1.7188687972459705e-06, + "loss": 0.5025302171707153, + "step": 2451 + }, + { + "epoch": 0.5653677657366843, + "grad_norm": 1.2149895801108108, + "learning_rate": 1.7186037128607107e-06, + "loss": 0.618930459022522, + "step": 2452 + }, + { + "epoch": 0.565598339866267, + "grad_norm": 1.1681250836374313, + "learning_rate": 1.7183385240191183e-06, + "loss": 0.5841591358184814, + "step": 2453 + }, + { + "epoch": 0.5658289139958497, + "grad_norm": 1.2481599814364495, + "learning_rate": 1.7180732307597413e-06, + "loss": 0.4915233850479126, + "step": 2454 + }, + { + "epoch": 0.5660594881254323, + "grad_norm": 1.127625184290067, + "learning_rate": 1.7178078331211429e-06, + "loss": 0.46732476353645325, + "step": 2455 + }, + { + "epoch": 0.566290062255015, + "grad_norm": 1.1121526599443385, + "learning_rate": 1.7175423311419013e-06, + "loss": 0.4640737771987915, + "step": 2456 + }, + { + "epoch": 0.5665206363845976, + "grad_norm": 1.2800685498732043, + "learning_rate": 1.7172767248606095e-06, + "loss": 0.39535683393478394, + "step": 2457 + }, + { + "epoch": 0.5667512105141803, + "grad_norm": 1.196636942462094, + "learning_rate": 1.7170110143158766e-06, + "loss": 0.4782179594039917, + "step": 2458 + }, + { + "epoch": 0.5669817846437629, + "grad_norm": 1.5731644028680265, + "learning_rate": 1.7167451995463258e-06, + "loss": 0.6186003684997559, + "step": 2459 + }, + { + "epoch": 0.5672123587733456, + "grad_norm": 1.3163111292704002, + "learning_rate": 1.7164792805905965e-06, + "loss": 0.4915347099304199, + "step": 2460 + }, + { + "epoch": 0.5674429329029282, + "grad_norm": 1.2683630708246802, + "learning_rate": 1.7162132574873422e-06, + "loss": 0.4789005517959595, + "step": 2461 + }, + { + "epoch": 0.567673507032511, + "grad_norm": 1.6928847577315913, + "learning_rate": 1.7159471302752326e-06, + "loss": 0.6307233572006226, + "step": 2462 + }, + { + "epoch": 0.5679040811620936, + "grad_norm": 1.240574680316347, + "learning_rate": 1.7156808989929514e-06, + "loss": 0.5278424024581909, + "step": 2463 + }, + { + "epoch": 0.5681346552916763, + "grad_norm": 1.4388020329709479, + "learning_rate": 1.7154145636791988e-06, + "loss": 0.48552995920181274, + "step": 2464 + }, + { + "epoch": 0.5683652294212589, + "grad_norm": 1.3679954470869684, + "learning_rate": 1.7151481243726885e-06, + "loss": 0.5125370621681213, + "step": 2465 + }, + { + "epoch": 0.5685958035508416, + "grad_norm": 1.3448408660581435, + "learning_rate": 1.7148815811121506e-06, + "loss": 0.44231730699539185, + "step": 2466 + }, + { + "epoch": 0.5688263776804242, + "grad_norm": 1.367567415522102, + "learning_rate": 1.7146149339363296e-06, + "loss": 0.5593529939651489, + "step": 2467 + }, + { + "epoch": 0.5690569518100069, + "grad_norm": 1.347377301704866, + "learning_rate": 1.714348182883986e-06, + "loss": 0.4830925464630127, + "step": 2468 + }, + { + "epoch": 0.5692875259395895, + "grad_norm": 1.4913136319748062, + "learning_rate": 1.714081327993894e-06, + "loss": 0.5538743734359741, + "step": 2469 + }, + { + "epoch": 0.5695181000691723, + "grad_norm": 1.4135532975212044, + "learning_rate": 1.7138143693048441e-06, + "loss": 0.5145905613899231, + "step": 2470 + }, + { + "epoch": 0.5697486741987549, + "grad_norm": 1.301183082915478, + "learning_rate": 1.713547306855641e-06, + "loss": 0.47706612944602966, + "step": 2471 + }, + { + "epoch": 0.5699792483283376, + "grad_norm": 1.2528774428968483, + "learning_rate": 1.7132801406851056e-06, + "loss": 0.45162689685821533, + "step": 2472 + }, + { + "epoch": 0.5702098224579202, + "grad_norm": 1.5721475156494655, + "learning_rate": 1.7130128708320727e-06, + "loss": 0.5141111612319946, + "step": 2473 + }, + { + "epoch": 0.5704403965875029, + "grad_norm": 1.0845779630695374, + "learning_rate": 1.7127454973353932e-06, + "loss": 0.4443173408508301, + "step": 2474 + }, + { + "epoch": 0.5706709707170855, + "grad_norm": 1.2704796440823871, + "learning_rate": 1.7124780202339317e-06, + "loss": 0.4162046015262604, + "step": 2475 + }, + { + "epoch": 0.5709015448466682, + "grad_norm": 1.100254820278883, + "learning_rate": 1.7122104395665695e-06, + "loss": 0.44526439905166626, + "step": 2476 + }, + { + "epoch": 0.5711321189762508, + "grad_norm": 1.3237501807128542, + "learning_rate": 1.7119427553722016e-06, + "loss": 0.5069452524185181, + "step": 2477 + }, + { + "epoch": 0.5713626931058335, + "grad_norm": 1.2833720010816703, + "learning_rate": 1.7116749676897393e-06, + "loss": 0.46709829568862915, + "step": 2478 + }, + { + "epoch": 0.5715932672354161, + "grad_norm": 1.2011083992406753, + "learning_rate": 1.7114070765581078e-06, + "loss": 0.5443992614746094, + "step": 2479 + }, + { + "epoch": 0.5718238413649989, + "grad_norm": 1.5805836267397864, + "learning_rate": 1.7111390820162477e-06, + "loss": 0.4307284653186798, + "step": 2480 + }, + { + "epoch": 0.5720544154945815, + "grad_norm": 1.272693158326629, + "learning_rate": 1.7108709841031148e-06, + "loss": 0.4753509759902954, + "step": 2481 + }, + { + "epoch": 0.5722849896241642, + "grad_norm": 1.3966851487133662, + "learning_rate": 1.7106027828576798e-06, + "loss": 0.5689436197280884, + "step": 2482 + }, + { + "epoch": 0.5725155637537468, + "grad_norm": 1.3535603859222731, + "learning_rate": 1.710334478318929e-06, + "loss": 0.47182410955429077, + "step": 2483 + }, + { + "epoch": 0.5727461378833295, + "grad_norm": 1.4415402220476166, + "learning_rate": 1.7100660705258623e-06, + "loss": 0.4418888986110687, + "step": 2484 + }, + { + "epoch": 0.5729767120129121, + "grad_norm": 1.0842485548099412, + "learning_rate": 1.709797559517496e-06, + "loss": 0.4315544366836548, + "step": 2485 + }, + { + "epoch": 0.5732072861424948, + "grad_norm": 1.136143164844157, + "learning_rate": 1.709528945332861e-06, + "loss": 0.34541741013526917, + "step": 2486 + }, + { + "epoch": 0.5734378602720774, + "grad_norm": 1.444798755487831, + "learning_rate": 1.709260228011003e-06, + "loss": 0.5380317568778992, + "step": 2487 + }, + { + "epoch": 0.5736684344016602, + "grad_norm": 1.1490218932398577, + "learning_rate": 1.7089914075909824e-06, + "loss": 0.5017478466033936, + "step": 2488 + }, + { + "epoch": 0.5738990085312428, + "grad_norm": 1.317791376396268, + "learning_rate": 1.7087224841118756e-06, + "loss": 0.5608090162277222, + "step": 2489 + }, + { + "epoch": 0.5741295826608255, + "grad_norm": 1.3491498137629283, + "learning_rate": 1.708453457612773e-06, + "loss": 0.5360782146453857, + "step": 2490 + }, + { + "epoch": 0.5743601567904081, + "grad_norm": 1.3100243824681166, + "learning_rate": 1.7081843281327802e-06, + "loss": 0.5638090372085571, + "step": 2491 + }, + { + "epoch": 0.5745907309199908, + "grad_norm": 1.2532603581217905, + "learning_rate": 1.707915095711018e-06, + "loss": 0.45777082443237305, + "step": 2492 + }, + { + "epoch": 0.5748213050495734, + "grad_norm": 1.2028357712850113, + "learning_rate": 1.7076457603866224e-06, + "loss": 0.5423707962036133, + "step": 2493 + }, + { + "epoch": 0.5750518791791561, + "grad_norm": 1.3752974790416335, + "learning_rate": 1.7073763221987436e-06, + "loss": 0.4286508560180664, + "step": 2494 + }, + { + "epoch": 0.5752824533087387, + "grad_norm": 1.1304014566480758, + "learning_rate": 1.7071067811865474e-06, + "loss": 0.4197548031806946, + "step": 2495 + }, + { + "epoch": 0.5755130274383214, + "grad_norm": 1.1820720623961845, + "learning_rate": 1.7068371373892142e-06, + "loss": 0.47944843769073486, + "step": 2496 + }, + { + "epoch": 0.575743601567904, + "grad_norm": 1.5454364363464301, + "learning_rate": 1.7065673908459396e-06, + "loss": 0.49708908796310425, + "step": 2497 + }, + { + "epoch": 0.5759741756974868, + "grad_norm": 1.2002677488287707, + "learning_rate": 1.706297541595934e-06, + "loss": 0.46402662992477417, + "step": 2498 + }, + { + "epoch": 0.5762047498270694, + "grad_norm": 1.2375577528106843, + "learning_rate": 1.7060275896784222e-06, + "loss": 0.4665846824645996, + "step": 2499 + }, + { + "epoch": 0.5764353239566521, + "grad_norm": 1.333335025499966, + "learning_rate": 1.7057575351326452e-06, + "loss": 0.511766791343689, + "step": 2500 + }, + { + "epoch": 0.5766658980862347, + "grad_norm": 1.3129729051878996, + "learning_rate": 1.7054873779978578e-06, + "loss": 0.5731323957443237, + "step": 2501 + }, + { + "epoch": 0.5768964722158174, + "grad_norm": 1.208575824869893, + "learning_rate": 1.70521711831333e-06, + "loss": 0.43246185779571533, + "step": 2502 + }, + { + "epoch": 0.5771270463454, + "grad_norm": 1.3743994267646191, + "learning_rate": 1.704946756118347e-06, + "loss": 0.5062395334243774, + "step": 2503 + }, + { + "epoch": 0.5773576204749827, + "grad_norm": 1.2169597850499592, + "learning_rate": 1.7046762914522087e-06, + "loss": 0.5010061264038086, + "step": 2504 + }, + { + "epoch": 0.5775881946045653, + "grad_norm": 1.1915100175955862, + "learning_rate": 1.7044057243542293e-06, + "loss": 0.5118759870529175, + "step": 2505 + }, + { + "epoch": 0.5778187687341481, + "grad_norm": 1.2406153903833703, + "learning_rate": 1.7041350548637392e-06, + "loss": 0.5796714425086975, + "step": 2506 + }, + { + "epoch": 0.5780493428637307, + "grad_norm": 1.198072830487735, + "learning_rate": 1.7038642830200828e-06, + "loss": 0.43587976694107056, + "step": 2507 + }, + { + "epoch": 0.5782799169933134, + "grad_norm": 1.0836383921827997, + "learning_rate": 1.7035934088626193e-06, + "loss": 0.4780135154724121, + "step": 2508 + }, + { + "epoch": 0.578510491122896, + "grad_norm": 1.2949967246283594, + "learning_rate": 1.7033224324307232e-06, + "loss": 0.48039600253105164, + "step": 2509 + }, + { + "epoch": 0.5787410652524787, + "grad_norm": 1.4288262034065056, + "learning_rate": 1.7030513537637835e-06, + "loss": 0.48075419664382935, + "step": 2510 + }, + { + "epoch": 0.5789716393820613, + "grad_norm": 1.294455603546607, + "learning_rate": 1.7027801729012044e-06, + "loss": 0.5006246566772461, + "step": 2511 + }, + { + "epoch": 0.579202213511644, + "grad_norm": 1.3239915881424993, + "learning_rate": 1.7025088898824046e-06, + "loss": 0.550139307975769, + "step": 2512 + }, + { + "epoch": 0.5794327876412266, + "grad_norm": 1.273345251271078, + "learning_rate": 1.7022375047468178e-06, + "loss": 0.5228495001792908, + "step": 2513 + }, + { + "epoch": 0.5796633617708093, + "grad_norm": 1.223108155250479, + "learning_rate": 1.701966017533893e-06, + "loss": 0.4783739149570465, + "step": 2514 + }, + { + "epoch": 0.579893935900392, + "grad_norm": 1.3364695116135945, + "learning_rate": 1.701694428283093e-06, + "loss": 0.47218769788742065, + "step": 2515 + }, + { + "epoch": 0.5801245100299747, + "grad_norm": 1.271458214482931, + "learning_rate": 1.7014227370338967e-06, + "loss": 0.5340671539306641, + "step": 2516 + }, + { + "epoch": 0.5803550841595573, + "grad_norm": 1.1389068048001012, + "learning_rate": 1.7011509438257967e-06, + "loss": 0.4629259407520294, + "step": 2517 + }, + { + "epoch": 0.58058565828914, + "grad_norm": 1.6036419177897663, + "learning_rate": 1.7008790486983013e-06, + "loss": 0.6334242820739746, + "step": 2518 + }, + { + "epoch": 0.5808162324187226, + "grad_norm": 1.3328081079482175, + "learning_rate": 1.7006070516909327e-06, + "loss": 0.544147789478302, + "step": 2519 + }, + { + "epoch": 0.5810468065483053, + "grad_norm": 1.2269860514972317, + "learning_rate": 1.700334952843229e-06, + "loss": 0.47045618295669556, + "step": 2520 + }, + { + "epoch": 0.5812773806778879, + "grad_norm": 1.4613594501045561, + "learning_rate": 1.700062752194742e-06, + "loss": 0.4582393169403076, + "step": 2521 + }, + { + "epoch": 0.5815079548074706, + "grad_norm": 1.335231293513905, + "learning_rate": 1.699790449785039e-06, + "loss": 0.507327139377594, + "step": 2522 + }, + { + "epoch": 0.5817385289370532, + "grad_norm": 1.3812182502399277, + "learning_rate": 1.6995180456537022e-06, + "loss": 0.5345891714096069, + "step": 2523 + }, + { + "epoch": 0.581969103066636, + "grad_norm": 1.3766088909590293, + "learning_rate": 1.6992455398403277e-06, + "loss": 0.4847550094127655, + "step": 2524 + }, + { + "epoch": 0.5821996771962186, + "grad_norm": 1.2694420906725428, + "learning_rate": 1.6989729323845276e-06, + "loss": 0.4472479820251465, + "step": 2525 + }, + { + "epoch": 0.5824302513258013, + "grad_norm": 1.1676894033843348, + "learning_rate": 1.698700223325928e-06, + "loss": 0.4426107108592987, + "step": 2526 + }, + { + "epoch": 0.5826608254553839, + "grad_norm": 1.3669509353012406, + "learning_rate": 1.6984274127041696e-06, + "loss": 0.4814276099205017, + "step": 2527 + }, + { + "epoch": 0.5828913995849666, + "grad_norm": 1.3849093780882, + "learning_rate": 1.6981545005589084e-06, + "loss": 0.5286451578140259, + "step": 2528 + }, + { + "epoch": 0.5831219737145492, + "grad_norm": 1.3586645163698117, + "learning_rate": 1.6978814869298152e-06, + "loss": 0.5291767120361328, + "step": 2529 + }, + { + "epoch": 0.5833525478441319, + "grad_norm": 1.4376369092272532, + "learning_rate": 1.6976083718565748e-06, + "loss": 0.5807399749755859, + "step": 2530 + }, + { + "epoch": 0.5835831219737145, + "grad_norm": 1.5620885730430554, + "learning_rate": 1.6973351553788878e-06, + "loss": 0.5489222407341003, + "step": 2531 + }, + { + "epoch": 0.5838136961032973, + "grad_norm": 1.5080367455114985, + "learning_rate": 1.6970618375364683e-06, + "loss": 0.5295521020889282, + "step": 2532 + }, + { + "epoch": 0.5840442702328799, + "grad_norm": 1.281498688581256, + "learning_rate": 1.6967884183690467e-06, + "loss": 0.4979495406150818, + "step": 2533 + }, + { + "epoch": 0.5842748443624626, + "grad_norm": 1.0681769287073983, + "learning_rate": 1.6965148979163661e-06, + "loss": 0.45667344331741333, + "step": 2534 + }, + { + "epoch": 0.5845054184920452, + "grad_norm": 1.1552847245372566, + "learning_rate": 1.6962412762181866e-06, + "loss": 0.42687737941741943, + "step": 2535 + }, + { + "epoch": 0.5847359926216279, + "grad_norm": 1.2720388462434997, + "learning_rate": 1.6959675533142815e-06, + "loss": 0.5616278648376465, + "step": 2536 + }, + { + "epoch": 0.5849665667512105, + "grad_norm": 1.245024966542371, + "learning_rate": 1.6956937292444386e-06, + "loss": 0.4961121678352356, + "step": 2537 + }, + { + "epoch": 0.5851971408807932, + "grad_norm": 1.1864554840937962, + "learning_rate": 1.6954198040484617e-06, + "loss": 0.5115770101547241, + "step": 2538 + }, + { + "epoch": 0.5854277150103758, + "grad_norm": 1.41778667190123, + "learning_rate": 1.6951457777661686e-06, + "loss": 0.540202260017395, + "step": 2539 + }, + { + "epoch": 0.5856582891399585, + "grad_norm": 1.3238570605319384, + "learning_rate": 1.6948716504373914e-06, + "loss": 0.5312114357948303, + "step": 2540 + }, + { + "epoch": 0.5858888632695411, + "grad_norm": 1.1842147435507233, + "learning_rate": 1.694597422101978e-06, + "loss": 0.49323517084121704, + "step": 2541 + }, + { + "epoch": 0.5861194373991239, + "grad_norm": 1.3138451660312804, + "learning_rate": 1.6943230927997894e-06, + "loss": 0.42929738759994507, + "step": 2542 + }, + { + "epoch": 0.5863500115287065, + "grad_norm": 1.2474057622168624, + "learning_rate": 1.6940486625707021e-06, + "loss": 0.45236462354660034, + "step": 2543 + }, + { + "epoch": 0.5865805856582892, + "grad_norm": 1.1944700996273265, + "learning_rate": 1.6937741314546084e-06, + "loss": 0.5129071474075317, + "step": 2544 + }, + { + "epoch": 0.5868111597878718, + "grad_norm": 1.303867373152147, + "learning_rate": 1.693499499491413e-06, + "loss": 0.5562577247619629, + "step": 2545 + }, + { + "epoch": 0.5870417339174545, + "grad_norm": 1.472236761409707, + "learning_rate": 1.6932247667210372e-06, + "loss": 0.5593177080154419, + "step": 2546 + }, + { + "epoch": 0.5872723080470371, + "grad_norm": 1.666463518969871, + "learning_rate": 1.692949933183416e-06, + "loss": 0.5536680221557617, + "step": 2547 + }, + { + "epoch": 0.5875028821766198, + "grad_norm": 1.552275933236934, + "learning_rate": 1.6926749989184993e-06, + "loss": 0.5523338317871094, + "step": 2548 + }, + { + "epoch": 0.5877334563062024, + "grad_norm": 1.3066438958077835, + "learning_rate": 1.692399963966251e-06, + "loss": 0.41815924644470215, + "step": 2549 + }, + { + "epoch": 0.5879640304357852, + "grad_norm": 1.1800035534558937, + "learning_rate": 1.6921248283666508e-06, + "loss": 0.46959248185157776, + "step": 2550 + }, + { + "epoch": 0.5881946045653678, + "grad_norm": 1.2343992191174948, + "learning_rate": 1.6918495921596928e-06, + "loss": 0.4748489260673523, + "step": 2551 + }, + { + "epoch": 0.5884251786949505, + "grad_norm": 1.853505775613954, + "learning_rate": 1.6915742553853845e-06, + "loss": 0.4541524052619934, + "step": 2552 + }, + { + "epoch": 0.5886557528245331, + "grad_norm": 1.2688298570187295, + "learning_rate": 1.691298818083749e-06, + "loss": 0.47106000781059265, + "step": 2553 + }, + { + "epoch": 0.5888863269541158, + "grad_norm": 1.6112122400264717, + "learning_rate": 1.6910232802948246e-06, + "loss": 0.5364842414855957, + "step": 2554 + }, + { + "epoch": 0.5891169010836984, + "grad_norm": 1.402469759006704, + "learning_rate": 1.690747642058663e-06, + "loss": 0.48388350009918213, + "step": 2555 + }, + { + "epoch": 0.5893474752132811, + "grad_norm": 1.1992143425994695, + "learning_rate": 1.690471903415331e-06, + "loss": 0.5075609683990479, + "step": 2556 + }, + { + "epoch": 0.5895780493428637, + "grad_norm": 1.2039147901396619, + "learning_rate": 1.6901960644049102e-06, + "loss": 0.45098066329956055, + "step": 2557 + }, + { + "epoch": 0.5898086234724463, + "grad_norm": 1.1869247135212617, + "learning_rate": 1.6899201250674966e-06, + "loss": 0.5329077243804932, + "step": 2558 + }, + { + "epoch": 0.590039197602029, + "grad_norm": 1.2771607201573625, + "learning_rate": 1.6896440854432005e-06, + "loss": 0.4632904529571533, + "step": 2559 + }, + { + "epoch": 0.5902697717316117, + "grad_norm": 1.3016593794447966, + "learning_rate": 1.6893679455721474e-06, + "loss": 0.5302451848983765, + "step": 2560 + }, + { + "epoch": 0.5905003458611944, + "grad_norm": 1.1349040723062418, + "learning_rate": 1.6890917054944768e-06, + "loss": 0.45363447070121765, + "step": 2561 + }, + { + "epoch": 0.590730919990777, + "grad_norm": 1.3869965053274627, + "learning_rate": 1.688815365250343e-06, + "loss": 0.5103914737701416, + "step": 2562 + }, + { + "epoch": 0.5909614941203597, + "grad_norm": 1.2859854063949494, + "learning_rate": 1.6885389248799152e-06, + "loss": 0.45474469661712646, + "step": 2563 + }, + { + "epoch": 0.5911920682499423, + "grad_norm": 1.3905925832105772, + "learning_rate": 1.6882623844233766e-06, + "loss": 0.517952024936676, + "step": 2564 + }, + { + "epoch": 0.591422642379525, + "grad_norm": 1.456181517852448, + "learning_rate": 1.6879857439209245e-06, + "loss": 0.4872232973575592, + "step": 2565 + }, + { + "epoch": 0.5916532165091076, + "grad_norm": 1.146992588808451, + "learning_rate": 1.6877090034127726e-06, + "loss": 0.4938408136367798, + "step": 2566 + }, + { + "epoch": 0.5918837906386903, + "grad_norm": 0.9819996395503116, + "learning_rate": 1.6874321629391469e-06, + "loss": 0.42687565088272095, + "step": 2567 + }, + { + "epoch": 0.592114364768273, + "grad_norm": 1.8882181325825955, + "learning_rate": 1.6871552225402896e-06, + "loss": 0.5272493362426758, + "step": 2568 + }, + { + "epoch": 0.5923449388978557, + "grad_norm": 1.265485903227574, + "learning_rate": 1.6868781822564565e-06, + "loss": 0.4643193185329437, + "step": 2569 + }, + { + "epoch": 0.5925755130274383, + "grad_norm": 1.5054555077342378, + "learning_rate": 1.6866010421279183e-06, + "loss": 0.4957782030105591, + "step": 2570 + }, + { + "epoch": 0.592806087157021, + "grad_norm": 1.2319191303045371, + "learning_rate": 1.6863238021949605e-06, + "loss": 0.442360520362854, + "step": 2571 + }, + { + "epoch": 0.5930366612866036, + "grad_norm": 1.365610357460579, + "learning_rate": 1.6860464624978824e-06, + "loss": 0.5108935832977295, + "step": 2572 + }, + { + "epoch": 0.5932672354161863, + "grad_norm": 1.1047616502548026, + "learning_rate": 1.6857690230769976e-06, + "loss": 0.46559715270996094, + "step": 2573 + }, + { + "epoch": 0.5934978095457689, + "grad_norm": 1.2296310276846145, + "learning_rate": 1.6854914839726356e-06, + "loss": 0.44752076268196106, + "step": 2574 + }, + { + "epoch": 0.5937283836753516, + "grad_norm": 1.6735698653712807, + "learning_rate": 1.6852138452251387e-06, + "loss": 0.4018149971961975, + "step": 2575 + }, + { + "epoch": 0.5939589578049342, + "grad_norm": 1.407358523561205, + "learning_rate": 1.6849361068748652e-06, + "loss": 0.47711417078971863, + "step": 2576 + }, + { + "epoch": 0.594189531934517, + "grad_norm": 1.3386417354625197, + "learning_rate": 1.684658268962187e-06, + "loss": 0.4671875834465027, + "step": 2577 + }, + { + "epoch": 0.5944201060640996, + "grad_norm": 1.2780841808458634, + "learning_rate": 1.6843803315274906e-06, + "loss": 0.48041921854019165, + "step": 2578 + }, + { + "epoch": 0.5946506801936823, + "grad_norm": 1.105183308056311, + "learning_rate": 1.6841022946111772e-06, + "loss": 0.3444385528564453, + "step": 2579 + }, + { + "epoch": 0.5948812543232649, + "grad_norm": 1.3054472047651338, + "learning_rate": 1.6838241582536619e-06, + "loss": 0.46800029277801514, + "step": 2580 + }, + { + "epoch": 0.5951118284528476, + "grad_norm": 1.7022638621771704, + "learning_rate": 1.683545922495375e-06, + "loss": 0.4362339377403259, + "step": 2581 + }, + { + "epoch": 0.5953424025824302, + "grad_norm": 1.5138702229312708, + "learning_rate": 1.6832675873767606e-06, + "loss": 0.4818536043167114, + "step": 2582 + }, + { + "epoch": 0.5955729767120129, + "grad_norm": 1.1464685816902647, + "learning_rate": 1.6829891529382775e-06, + "loss": 0.47899681329727173, + "step": 2583 + }, + { + "epoch": 0.5958035508415955, + "grad_norm": 1.028545290493661, + "learning_rate": 1.6827106192203995e-06, + "loss": 0.4239576458930969, + "step": 2584 + }, + { + "epoch": 0.5960341249711782, + "grad_norm": 1.299757224081726, + "learning_rate": 1.6824319862636136e-06, + "loss": 0.545168399810791, + "step": 2585 + }, + { + "epoch": 0.5962646991007609, + "grad_norm": 1.1433294908143323, + "learning_rate": 1.6821532541084228e-06, + "loss": 0.4238642156124115, + "step": 2586 + }, + { + "epoch": 0.5964952732303436, + "grad_norm": 1.1214453575304018, + "learning_rate": 1.6818744227953422e-06, + "loss": 0.39589810371398926, + "step": 2587 + }, + { + "epoch": 0.5967258473599262, + "grad_norm": 1.1696584305728281, + "learning_rate": 1.6815954923649044e-06, + "loss": 0.4358367919921875, + "step": 2588 + }, + { + "epoch": 0.5969564214895089, + "grad_norm": 1.232714944175718, + "learning_rate": 1.6813164628576538e-06, + "loss": 0.5012080073356628, + "step": 2589 + }, + { + "epoch": 0.5971869956190915, + "grad_norm": 1.0762630624781258, + "learning_rate": 1.6810373343141503e-06, + "loss": 0.4637286365032196, + "step": 2590 + }, + { + "epoch": 0.5974175697486742, + "grad_norm": 1.4947457348694884, + "learning_rate": 1.6807581067749684e-06, + "loss": 0.6130828261375427, + "step": 2591 + }, + { + "epoch": 0.5976481438782568, + "grad_norm": 1.538167494741888, + "learning_rate": 1.680478780280696e-06, + "loss": 0.5430021286010742, + "step": 2592 + }, + { + "epoch": 0.5978787180078395, + "grad_norm": 1.4318445545867842, + "learning_rate": 1.6801993548719368e-06, + "loss": 0.5195741653442383, + "step": 2593 + }, + { + "epoch": 0.5981092921374221, + "grad_norm": 1.4741188457279395, + "learning_rate": 1.6799198305893077e-06, + "loss": 0.5452337265014648, + "step": 2594 + }, + { + "epoch": 0.5983398662670049, + "grad_norm": 1.1858829095847359, + "learning_rate": 1.6796402074734402e-06, + "loss": 0.4802110493183136, + "step": 2595 + }, + { + "epoch": 0.5985704403965875, + "grad_norm": 1.114234548006963, + "learning_rate": 1.679360485564981e-06, + "loss": 0.48554790019989014, + "step": 2596 + }, + { + "epoch": 0.5988010145261702, + "grad_norm": 1.3519600489481014, + "learning_rate": 1.6790806649045896e-06, + "loss": 0.5151324272155762, + "step": 2597 + }, + { + "epoch": 0.5990315886557528, + "grad_norm": 1.4134149785589025, + "learning_rate": 1.6788007455329419e-06, + "loss": 0.5122699737548828, + "step": 2598 + }, + { + "epoch": 0.5992621627853355, + "grad_norm": 1.0762809832802989, + "learning_rate": 1.6785207274907258e-06, + "loss": 0.47776496410369873, + "step": 2599 + }, + { + "epoch": 0.5994927369149181, + "grad_norm": 1.3625217888513212, + "learning_rate": 1.6782406108186455e-06, + "loss": 0.5653492212295532, + "step": 2600 + }, + { + "epoch": 0.5997233110445008, + "grad_norm": 1.2197147141619178, + "learning_rate": 1.677960395557419e-06, + "loss": 0.44313424825668335, + "step": 2601 + }, + { + "epoch": 0.5999538851740834, + "grad_norm": 1.137470066753919, + "learning_rate": 1.677680081747778e-06, + "loss": 0.40465259552001953, + "step": 2602 + }, + { + "epoch": 0.6001844593036662, + "grad_norm": 1.4481779333184874, + "learning_rate": 1.6773996694304687e-06, + "loss": 0.5488068461418152, + "step": 2603 + }, + { + "epoch": 0.6004150334332488, + "grad_norm": 1.2545703783665254, + "learning_rate": 1.6771191586462523e-06, + "loss": 0.5122859477996826, + "step": 2604 + }, + { + "epoch": 0.6006456075628315, + "grad_norm": 1.2685821503383574, + "learning_rate": 1.6768385494359039e-06, + "loss": 0.47173869609832764, + "step": 2605 + }, + { + "epoch": 0.6008761816924141, + "grad_norm": 1.342808103655164, + "learning_rate": 1.6765578418402129e-06, + "loss": 0.527764081954956, + "step": 2606 + }, + { + "epoch": 0.6011067558219968, + "grad_norm": 1.7106657610470863, + "learning_rate": 1.6762770358999826e-06, + "loss": 0.5399610996246338, + "step": 2607 + }, + { + "epoch": 0.6013373299515794, + "grad_norm": 1.1677908773060481, + "learning_rate": 1.6759961316560314e-06, + "loss": 0.3441581428050995, + "step": 2608 + }, + { + "epoch": 0.6015679040811621, + "grad_norm": 1.2546350672529525, + "learning_rate": 1.6757151291491916e-06, + "loss": 0.5027580857276917, + "step": 2609 + }, + { + "epoch": 0.6017984782107447, + "grad_norm": 1.6099655975362483, + "learning_rate": 1.6754340284203095e-06, + "loss": 0.3898310363292694, + "step": 2610 + }, + { + "epoch": 0.6020290523403274, + "grad_norm": 1.5075448921993653, + "learning_rate": 1.675152829510246e-06, + "loss": 0.5577199459075928, + "step": 2611 + }, + { + "epoch": 0.60225962646991, + "grad_norm": 1.178797634573082, + "learning_rate": 1.6748715324598763e-06, + "loss": 0.47849035263061523, + "step": 2612 + }, + { + "epoch": 0.6024902005994928, + "grad_norm": 1.2674537093214957, + "learning_rate": 1.6745901373100896e-06, + "loss": 0.46845290064811707, + "step": 2613 + }, + { + "epoch": 0.6027207747290754, + "grad_norm": 1.4078882858329094, + "learning_rate": 1.6743086441017899e-06, + "loss": 0.46008870005607605, + "step": 2614 + }, + { + "epoch": 0.6029513488586581, + "grad_norm": 1.3347721564783812, + "learning_rate": 1.6740270528758948e-06, + "loss": 0.44386154413223267, + "step": 2615 + }, + { + "epoch": 0.6031819229882407, + "grad_norm": 1.2103476019651458, + "learning_rate": 1.6737453636733364e-06, + "loss": 0.495368629693985, + "step": 2616 + }, + { + "epoch": 0.6034124971178234, + "grad_norm": 1.257056760083973, + "learning_rate": 1.6734635765350613e-06, + "loss": 0.519428551197052, + "step": 2617 + }, + { + "epoch": 0.603643071247406, + "grad_norm": 1.5181965589957365, + "learning_rate": 1.6731816915020302e-06, + "loss": 0.49346470832824707, + "step": 2618 + }, + { + "epoch": 0.6038736453769887, + "grad_norm": 1.3323089431428572, + "learning_rate": 1.6728997086152173e-06, + "loss": 0.554854691028595, + "step": 2619 + }, + { + "epoch": 0.6041042195065713, + "grad_norm": 1.503361315997137, + "learning_rate": 1.6726176279156125e-06, + "loss": 0.4930881857872009, + "step": 2620 + }, + { + "epoch": 0.604334793636154, + "grad_norm": 1.1576996092953873, + "learning_rate": 1.6723354494442186e-06, + "loss": 0.4082447588443756, + "step": 2621 + }, + { + "epoch": 0.6045653677657367, + "grad_norm": 1.2572245396068074, + "learning_rate": 1.6720531732420531e-06, + "loss": 0.5151821374893188, + "step": 2622 + }, + { + "epoch": 0.6047959418953194, + "grad_norm": 1.6316483356509275, + "learning_rate": 1.671770799350148e-06, + "loss": 0.44579264521598816, + "step": 2623 + }, + { + "epoch": 0.605026516024902, + "grad_norm": 1.5349454914737826, + "learning_rate": 1.6714883278095489e-06, + "loss": 0.4937717020511627, + "step": 2624 + }, + { + "epoch": 0.6052570901544847, + "grad_norm": 1.4939841287703146, + "learning_rate": 1.671205758661316e-06, + "loss": 0.46298685669898987, + "step": 2625 + }, + { + "epoch": 0.6054876642840673, + "grad_norm": 1.3089529059854432, + "learning_rate": 1.6709230919465233e-06, + "loss": 0.5535221695899963, + "step": 2626 + }, + { + "epoch": 0.60571823841365, + "grad_norm": 1.2781536932155106, + "learning_rate": 1.6706403277062599e-06, + "loss": 0.5289112329483032, + "step": 2627 + }, + { + "epoch": 0.6059488125432326, + "grad_norm": 1.2619858231183905, + "learning_rate": 1.6703574659816285e-06, + "loss": 0.506280779838562, + "step": 2628 + }, + { + "epoch": 0.6061793866728153, + "grad_norm": 1.366142383501645, + "learning_rate": 1.6700745068137451e-06, + "loss": 0.504257082939148, + "step": 2629 + }, + { + "epoch": 0.606409960802398, + "grad_norm": 1.2835196483556859, + "learning_rate": 1.6697914502437411e-06, + "loss": 0.624682605266571, + "step": 2630 + }, + { + "epoch": 0.6066405349319807, + "grad_norm": 1.1715096985967743, + "learning_rate": 1.6695082963127617e-06, + "loss": 0.4539645314216614, + "step": 2631 + }, + { + "epoch": 0.6068711090615633, + "grad_norm": 1.2852717924915888, + "learning_rate": 1.6692250450619665e-06, + "loss": 0.5461890697479248, + "step": 2632 + }, + { + "epoch": 0.607101683191146, + "grad_norm": 1.2251930368732282, + "learning_rate": 1.6689416965325282e-06, + "loss": 0.615606427192688, + "step": 2633 + }, + { + "epoch": 0.6073322573207286, + "grad_norm": 1.3904526684847855, + "learning_rate": 1.668658250765635e-06, + "loss": 0.5355387926101685, + "step": 2634 + }, + { + "epoch": 0.6075628314503113, + "grad_norm": 1.1464900003631002, + "learning_rate": 1.6683747078024886e-06, + "loss": 0.5804985165596008, + "step": 2635 + }, + { + "epoch": 0.6077934055798939, + "grad_norm": 1.1983123193544134, + "learning_rate": 1.6680910676843042e-06, + "loss": 0.4514031410217285, + "step": 2636 + }, + { + "epoch": 0.6080239797094766, + "grad_norm": 1.3446092692413514, + "learning_rate": 1.6678073304523123e-06, + "loss": 0.5621001720428467, + "step": 2637 + }, + { + "epoch": 0.6082545538390592, + "grad_norm": 1.3749875179413227, + "learning_rate": 1.667523496147757e-06, + "loss": 0.49387669563293457, + "step": 2638 + }, + { + "epoch": 0.608485127968642, + "grad_norm": 1.0479438264918854, + "learning_rate": 1.6672395648118966e-06, + "loss": 0.5857938528060913, + "step": 2639 + }, + { + "epoch": 0.6087157020982246, + "grad_norm": 1.149056345239141, + "learning_rate": 1.6669555364860029e-06, + "loss": 0.46403199434280396, + "step": 2640 + }, + { + "epoch": 0.6089462762278073, + "grad_norm": 1.2068025098167319, + "learning_rate": 1.6666714112113627e-06, + "loss": 0.4998488128185272, + "step": 2641 + }, + { + "epoch": 0.6091768503573899, + "grad_norm": 1.3686546841392573, + "learning_rate": 1.6663871890292765e-06, + "loss": 0.6291745901107788, + "step": 2642 + }, + { + "epoch": 0.6094074244869726, + "grad_norm": 1.7034971765108011, + "learning_rate": 1.6661028699810587e-06, + "loss": 0.6326058506965637, + "step": 2643 + }, + { + "epoch": 0.6096379986165552, + "grad_norm": 1.2748339439376004, + "learning_rate": 1.6658184541080378e-06, + "loss": 0.5737805366516113, + "step": 2644 + }, + { + "epoch": 0.6098685727461379, + "grad_norm": 1.435593858390691, + "learning_rate": 1.6655339414515568e-06, + "loss": 0.565047025680542, + "step": 2645 + }, + { + "epoch": 0.6100991468757205, + "grad_norm": 1.154269897254632, + "learning_rate": 1.6652493320529724e-06, + "loss": 0.5157296061515808, + "step": 2646 + }, + { + "epoch": 0.6103297210053032, + "grad_norm": 1.2671967095996914, + "learning_rate": 1.6649646259536554e-06, + "loss": 0.4475112855434418, + "step": 2647 + }, + { + "epoch": 0.6105602951348859, + "grad_norm": 1.4397592539357233, + "learning_rate": 1.6646798231949911e-06, + "loss": 0.5072107315063477, + "step": 2648 + }, + { + "epoch": 0.6107908692644686, + "grad_norm": 1.3901386223871963, + "learning_rate": 1.6643949238183778e-06, + "loss": 0.44673952460289, + "step": 2649 + }, + { + "epoch": 0.6110214433940512, + "grad_norm": 1.4046630639478026, + "learning_rate": 1.6641099278652293e-06, + "loss": 0.47460734844207764, + "step": 2650 + }, + { + "epoch": 0.6112520175236339, + "grad_norm": 1.251836663583678, + "learning_rate": 1.6638248353769718e-06, + "loss": 0.4529770612716675, + "step": 2651 + }, + { + "epoch": 0.6114825916532165, + "grad_norm": 1.4298404685971746, + "learning_rate": 1.6635396463950473e-06, + "loss": 0.5200958251953125, + "step": 2652 + }, + { + "epoch": 0.6117131657827992, + "grad_norm": 1.4871792439140996, + "learning_rate": 1.66325436096091e-06, + "loss": 0.465969979763031, + "step": 2653 + }, + { + "epoch": 0.6119437399123818, + "grad_norm": 1.1085493213804483, + "learning_rate": 1.6629689791160298e-06, + "loss": 0.5173276662826538, + "step": 2654 + }, + { + "epoch": 0.6121743140419645, + "grad_norm": 1.246647464420017, + "learning_rate": 1.6626835009018892e-06, + "loss": 0.5539907217025757, + "step": 2655 + }, + { + "epoch": 0.6124048881715471, + "grad_norm": 1.1686862955670068, + "learning_rate": 1.6623979263599857e-06, + "loss": 0.5617278814315796, + "step": 2656 + }, + { + "epoch": 0.6126354623011299, + "grad_norm": 1.3640942620216159, + "learning_rate": 1.6621122555318304e-06, + "loss": 0.46238285303115845, + "step": 2657 + }, + { + "epoch": 0.6128660364307125, + "grad_norm": 1.4695540598112733, + "learning_rate": 1.6618264884589484e-06, + "loss": 0.49247878789901733, + "step": 2658 + }, + { + "epoch": 0.6130966105602952, + "grad_norm": 1.0811892876151687, + "learning_rate": 1.6615406251828793e-06, + "loss": 0.4844072163105011, + "step": 2659 + }, + { + "epoch": 0.6133271846898778, + "grad_norm": 1.2024921886284354, + "learning_rate": 1.6612546657451754e-06, + "loss": 0.47372323274612427, + "step": 2660 + }, + { + "epoch": 0.6135577588194605, + "grad_norm": 1.299485129998275, + "learning_rate": 1.660968610187404e-06, + "loss": 0.5287426114082336, + "step": 2661 + }, + { + "epoch": 0.6137883329490431, + "grad_norm": 1.4640884136716181, + "learning_rate": 1.6606824585511471e-06, + "loss": 0.5862994194030762, + "step": 2662 + }, + { + "epoch": 0.6140189070786258, + "grad_norm": 1.0158009777389652, + "learning_rate": 1.6603962108779986e-06, + "loss": 0.4866197109222412, + "step": 2663 + }, + { + "epoch": 0.6142494812082084, + "grad_norm": 1.408246184243547, + "learning_rate": 1.660109867209568e-06, + "loss": 0.5561861991882324, + "step": 2664 + }, + { + "epoch": 0.6144800553377912, + "grad_norm": 1.214620364544681, + "learning_rate": 1.659823427587478e-06, + "loss": 0.4878644645214081, + "step": 2665 + }, + { + "epoch": 0.6147106294673738, + "grad_norm": 1.3262957238727335, + "learning_rate": 1.659536892053366e-06, + "loss": 0.5371976494789124, + "step": 2666 + }, + { + "epoch": 0.6149412035969565, + "grad_norm": 1.2817478175527077, + "learning_rate": 1.6592502606488824e-06, + "loss": 0.4816581606864929, + "step": 2667 + }, + { + "epoch": 0.6151717777265391, + "grad_norm": 1.1536826566839264, + "learning_rate": 1.6589635334156919e-06, + "loss": 0.5105183124542236, + "step": 2668 + }, + { + "epoch": 0.6154023518561217, + "grad_norm": 1.4584261311401567, + "learning_rate": 1.6586767103954737e-06, + "loss": 0.5524129271507263, + "step": 2669 + }, + { + "epoch": 0.6156329259857044, + "grad_norm": 1.3107384301518328, + "learning_rate": 1.6583897916299204e-06, + "loss": 0.42373913526535034, + "step": 2670 + }, + { + "epoch": 0.615863500115287, + "grad_norm": 1.3724263799580212, + "learning_rate": 1.658102777160738e-06, + "loss": 0.5620803833007812, + "step": 2671 + }, + { + "epoch": 0.6160940742448697, + "grad_norm": 1.3004346965884186, + "learning_rate": 1.6578156670296472e-06, + "loss": 0.38180166482925415, + "step": 2672 + }, + { + "epoch": 0.6163246483744523, + "grad_norm": 1.2109058692777805, + "learning_rate": 1.6575284612783825e-06, + "loss": 0.48596519231796265, + "step": 2673 + }, + { + "epoch": 0.616555222504035, + "grad_norm": 1.1846928230852602, + "learning_rate": 1.657241159948692e-06, + "loss": 0.5098127126693726, + "step": 2674 + }, + { + "epoch": 0.6167857966336177, + "grad_norm": 1.5943292852368571, + "learning_rate": 1.6569537630823382e-06, + "loss": 0.5650018453598022, + "step": 2675 + }, + { + "epoch": 0.6170163707632004, + "grad_norm": 1.1501551859696775, + "learning_rate": 1.6566662707210967e-06, + "loss": 0.45061948895454407, + "step": 2676 + }, + { + "epoch": 0.617246944892783, + "grad_norm": 1.3028951742766879, + "learning_rate": 1.6563786829067576e-06, + "loss": 0.4292137622833252, + "step": 2677 + }, + { + "epoch": 0.6174775190223657, + "grad_norm": 1.269567036808456, + "learning_rate": 1.656090999681125e-06, + "loss": 0.4837046265602112, + "step": 2678 + }, + { + "epoch": 0.6177080931519483, + "grad_norm": 1.9486185906204885, + "learning_rate": 1.6558032210860162e-06, + "loss": 0.43580353260040283, + "step": 2679 + }, + { + "epoch": 0.617938667281531, + "grad_norm": 1.2529677917985589, + "learning_rate": 1.6555153471632628e-06, + "loss": 0.47321656346321106, + "step": 2680 + }, + { + "epoch": 0.6181692414111136, + "grad_norm": 1.1423229113084605, + "learning_rate": 1.65522737795471e-06, + "loss": 0.47431111335754395, + "step": 2681 + }, + { + "epoch": 0.6183998155406963, + "grad_norm": 0.9698177160310311, + "learning_rate": 1.6549393135022181e-06, + "loss": 0.38062599301338196, + "step": 2682 + }, + { + "epoch": 0.618630389670279, + "grad_norm": 1.2758905094442272, + "learning_rate": 1.6546511538476584e-06, + "loss": 0.5941839218139648, + "step": 2683 + }, + { + "epoch": 0.6188609637998617, + "grad_norm": 1.453087551621585, + "learning_rate": 1.6543628990329195e-06, + "loss": 0.5323158502578735, + "step": 2684 + }, + { + "epoch": 0.6190915379294443, + "grad_norm": 1.100143863509344, + "learning_rate": 1.654074549099901e-06, + "loss": 0.3814772367477417, + "step": 2685 + }, + { + "epoch": 0.619322112059027, + "grad_norm": 1.5499952709692644, + "learning_rate": 1.6537861040905181e-06, + "loss": 0.5520694255828857, + "step": 2686 + }, + { + "epoch": 0.6195526861886096, + "grad_norm": 1.297782443862308, + "learning_rate": 1.653497564046699e-06, + "loss": 0.5514999628067017, + "step": 2687 + }, + { + "epoch": 0.6197832603181923, + "grad_norm": 1.2170603559624027, + "learning_rate": 1.653208929010386e-06, + "loss": 0.39057493209838867, + "step": 2688 + }, + { + "epoch": 0.6200138344477749, + "grad_norm": 1.0224470752428403, + "learning_rate": 1.6529201990235352e-06, + "loss": 0.4941304922103882, + "step": 2689 + }, + { + "epoch": 0.6202444085773576, + "grad_norm": 1.2590211215766611, + "learning_rate": 1.6526313741281164e-06, + "loss": 0.539762020111084, + "step": 2690 + }, + { + "epoch": 0.6204749827069402, + "grad_norm": 1.3801421787603734, + "learning_rate": 1.6523424543661127e-06, + "loss": 0.49524787068367004, + "step": 2691 + }, + { + "epoch": 0.620705556836523, + "grad_norm": 1.2158625492501351, + "learning_rate": 1.6520534397795225e-06, + "loss": 0.4261528253555298, + "step": 2692 + }, + { + "epoch": 0.6209361309661056, + "grad_norm": 1.3188986304771895, + "learning_rate": 1.6517643304103563e-06, + "loss": 0.578548789024353, + "step": 2693 + }, + { + "epoch": 0.6211667050956883, + "grad_norm": 1.24168526725964, + "learning_rate": 1.6514751263006393e-06, + "loss": 0.4766680598258972, + "step": 2694 + }, + { + "epoch": 0.6213972792252709, + "grad_norm": 1.135518406763033, + "learning_rate": 1.6511858274924098e-06, + "loss": 0.4146459996700287, + "step": 2695 + }, + { + "epoch": 0.6216278533548536, + "grad_norm": 1.4632792907408574, + "learning_rate": 1.650896434027721e-06, + "loss": 0.5148390531539917, + "step": 2696 + }, + { + "epoch": 0.6218584274844362, + "grad_norm": 1.1678475162221296, + "learning_rate": 1.6506069459486388e-06, + "loss": 0.4830890893936157, + "step": 2697 + }, + { + "epoch": 0.6220890016140189, + "grad_norm": 1.2027318756470287, + "learning_rate": 1.6503173632972434e-06, + "loss": 0.4550463557243347, + "step": 2698 + }, + { + "epoch": 0.6223195757436015, + "grad_norm": 1.3023820822101895, + "learning_rate": 1.6500276861156284e-06, + "loss": 0.5811448097229004, + "step": 2699 + }, + { + "epoch": 0.6225501498731842, + "grad_norm": 1.3807858518585416, + "learning_rate": 1.6497379144459014e-06, + "loss": 0.44733545184135437, + "step": 2700 + }, + { + "epoch": 0.6227807240027669, + "grad_norm": 1.103384717152327, + "learning_rate": 1.6494480483301835e-06, + "loss": 0.4379687011241913, + "step": 2701 + }, + { + "epoch": 0.6230112981323496, + "grad_norm": 1.326644045971959, + "learning_rate": 1.6491580878106102e-06, + "loss": 0.5163959860801697, + "step": 2702 + }, + { + "epoch": 0.6232418722619322, + "grad_norm": 1.2037310331107272, + "learning_rate": 1.6488680329293297e-06, + "loss": 0.5636980533599854, + "step": 2703 + }, + { + "epoch": 0.6234724463915149, + "grad_norm": 1.1847301227909297, + "learning_rate": 1.6485778837285044e-06, + "loss": 0.46942776441574097, + "step": 2704 + }, + { + "epoch": 0.6237030205210975, + "grad_norm": 1.3867166397057658, + "learning_rate": 1.6482876402503103e-06, + "loss": 0.5104436278343201, + "step": 2705 + }, + { + "epoch": 0.6239335946506802, + "grad_norm": 1.2701601489299654, + "learning_rate": 1.6479973025369379e-06, + "loss": 0.4689507484436035, + "step": 2706 + }, + { + "epoch": 0.6241641687802628, + "grad_norm": 1.2388644364900292, + "learning_rate": 1.64770687063059e-06, + "loss": 0.4009973406791687, + "step": 2707 + }, + { + "epoch": 0.6243947429098455, + "grad_norm": 1.4958191711517836, + "learning_rate": 1.6474163445734846e-06, + "loss": 0.4938286542892456, + "step": 2708 + }, + { + "epoch": 0.6246253170394281, + "grad_norm": 1.2939637643231117, + "learning_rate": 1.6471257244078519e-06, + "loss": 0.4756525754928589, + "step": 2709 + }, + { + "epoch": 0.6248558911690109, + "grad_norm": 1.0308841763344028, + "learning_rate": 1.6468350101759366e-06, + "loss": 0.4322332739830017, + "step": 2710 + }, + { + "epoch": 0.6250864652985935, + "grad_norm": 1.381148895283306, + "learning_rate": 1.6465442019199972e-06, + "loss": 0.4605666995048523, + "step": 2711 + }, + { + "epoch": 0.6253170394281762, + "grad_norm": 1.3288993921232848, + "learning_rate": 1.6462532996823053e-06, + "loss": 0.4576036334037781, + "step": 2712 + }, + { + "epoch": 0.6255476135577588, + "grad_norm": 1.1587792990864858, + "learning_rate": 1.645962303505147e-06, + "loss": 0.4860233664512634, + "step": 2713 + }, + { + "epoch": 0.6257781876873415, + "grad_norm": 1.2195714743605923, + "learning_rate": 1.6456712134308213e-06, + "loss": 0.4717915654182434, + "step": 2714 + }, + { + "epoch": 0.6260087618169241, + "grad_norm": 1.1008237671202603, + "learning_rate": 1.645380029501641e-06, + "loss": 0.49637067317962646, + "step": 2715 + }, + { + "epoch": 0.6262393359465068, + "grad_norm": 1.2218828759453872, + "learning_rate": 1.6450887517599326e-06, + "loss": 0.45388346910476685, + "step": 2716 + }, + { + "epoch": 0.6264699100760894, + "grad_norm": 1.6333623536070287, + "learning_rate": 1.6447973802480362e-06, + "loss": 0.5549031496047974, + "step": 2717 + }, + { + "epoch": 0.6267004842056721, + "grad_norm": 1.333805192555573, + "learning_rate": 1.644505915008306e-06, + "loss": 0.39759719371795654, + "step": 2718 + }, + { + "epoch": 0.6269310583352548, + "grad_norm": 1.2648542744381963, + "learning_rate": 1.644214356083109e-06, + "loss": 0.5126739740371704, + "step": 2719 + }, + { + "epoch": 0.6271616324648375, + "grad_norm": 1.1846129595938097, + "learning_rate": 1.6439227035148265e-06, + "loss": 0.41424083709716797, + "step": 2720 + }, + { + "epoch": 0.6273922065944201, + "grad_norm": 1.2295786085250646, + "learning_rate": 1.643630957345853e-06, + "loss": 0.5829803943634033, + "step": 2721 + }, + { + "epoch": 0.6276227807240028, + "grad_norm": 1.2114307243350246, + "learning_rate": 1.6433391176185972e-06, + "loss": 0.4736567437648773, + "step": 2722 + }, + { + "epoch": 0.6278533548535854, + "grad_norm": 1.4670818430092263, + "learning_rate": 1.6430471843754804e-06, + "loss": 0.41305306553840637, + "step": 2723 + }, + { + "epoch": 0.6280839289831681, + "grad_norm": 1.5480231340195962, + "learning_rate": 1.6427551576589383e-06, + "loss": 0.38422563672065735, + "step": 2724 + }, + { + "epoch": 0.6283145031127507, + "grad_norm": 1.3725795006115715, + "learning_rate": 1.6424630375114199e-06, + "loss": 0.48302626609802246, + "step": 2725 + }, + { + "epoch": 0.6285450772423334, + "grad_norm": 1.2880102228926575, + "learning_rate": 1.6421708239753875e-06, + "loss": 0.4657328128814697, + "step": 2726 + }, + { + "epoch": 0.628775651371916, + "grad_norm": 1.4057295929235551, + "learning_rate": 1.641878517093318e-06, + "loss": 0.46126431226730347, + "step": 2727 + }, + { + "epoch": 0.6290062255014988, + "grad_norm": 1.3246078376538457, + "learning_rate": 1.6415861169077007e-06, + "loss": 0.5196214914321899, + "step": 2728 + }, + { + "epoch": 0.6292367996310814, + "grad_norm": 1.4794856753558834, + "learning_rate": 1.641293623461039e-06, + "loss": 0.5007073879241943, + "step": 2729 + }, + { + "epoch": 0.6294673737606641, + "grad_norm": 1.1543847272279724, + "learning_rate": 1.64100103679585e-06, + "loss": 0.4699769616127014, + "step": 2730 + }, + { + "epoch": 0.6296979478902467, + "grad_norm": 1.3221766888407216, + "learning_rate": 1.6407083569546636e-06, + "loss": 0.5487842559814453, + "step": 2731 + }, + { + "epoch": 0.6299285220198294, + "grad_norm": 1.0556125358940756, + "learning_rate": 1.6404155839800244e-06, + "loss": 0.42733538150787354, + "step": 2732 + }, + { + "epoch": 0.630159096149412, + "grad_norm": 1.1933689155818472, + "learning_rate": 1.64012271791449e-06, + "loss": 0.5105363726615906, + "step": 2733 + }, + { + "epoch": 0.6303896702789947, + "grad_norm": 1.3185367260440977, + "learning_rate": 1.6398297588006305e-06, + "loss": 0.5836968421936035, + "step": 2734 + }, + { + "epoch": 0.6306202444085773, + "grad_norm": 1.3830049962050668, + "learning_rate": 1.639536706681031e-06, + "loss": 0.4350558817386627, + "step": 2735 + }, + { + "epoch": 0.63085081853816, + "grad_norm": 1.4225393539645832, + "learning_rate": 1.63924356159829e-06, + "loss": 0.5388341546058655, + "step": 2736 + }, + { + "epoch": 0.6310813926677427, + "grad_norm": 1.1218759160612528, + "learning_rate": 1.6389503235950186e-06, + "loss": 0.4576529860496521, + "step": 2737 + }, + { + "epoch": 0.6313119667973254, + "grad_norm": 1.524583554785293, + "learning_rate": 1.6386569927138422e-06, + "loss": 0.4525975286960602, + "step": 2738 + }, + { + "epoch": 0.631542540926908, + "grad_norm": 1.56840988374272, + "learning_rate": 1.6383635689973993e-06, + "loss": 0.42143142223358154, + "step": 2739 + }, + { + "epoch": 0.6317731150564907, + "grad_norm": 1.0672209595897675, + "learning_rate": 1.6380700524883423e-06, + "loss": 0.4440336227416992, + "step": 2740 + }, + { + "epoch": 0.6320036891860733, + "grad_norm": 1.2412570194863743, + "learning_rate": 1.637776443229336e-06, + "loss": 0.5009843707084656, + "step": 2741 + }, + { + "epoch": 0.632234263315656, + "grad_norm": 1.6736573631214935, + "learning_rate": 1.6374827412630604e-06, + "loss": 0.538151741027832, + "step": 2742 + }, + { + "epoch": 0.6324648374452386, + "grad_norm": 1.1895254537976463, + "learning_rate": 1.6371889466322077e-06, + "loss": 0.550201416015625, + "step": 2743 + }, + { + "epoch": 0.6326954115748213, + "grad_norm": 1.3861259597044466, + "learning_rate": 1.6368950593794836e-06, + "loss": 0.5707399845123291, + "step": 2744 + }, + { + "epoch": 0.632925985704404, + "grad_norm": 1.393827128295071, + "learning_rate": 1.6366010795476082e-06, + "loss": 0.5196787714958191, + "step": 2745 + }, + { + "epoch": 0.6331565598339867, + "grad_norm": 1.171378891149435, + "learning_rate": 1.636307007179314e-06, + "loss": 0.5243285894393921, + "step": 2746 + }, + { + "epoch": 0.6333871339635693, + "grad_norm": 1.249132441469792, + "learning_rate": 1.6360128423173473e-06, + "loss": 0.4202825427055359, + "step": 2747 + }, + { + "epoch": 0.633617708093152, + "grad_norm": 1.2547380834154716, + "learning_rate": 1.6357185850044681e-06, + "loss": 0.49080896377563477, + "step": 2748 + }, + { + "epoch": 0.6338482822227346, + "grad_norm": 1.2234752623414968, + "learning_rate": 1.6354242352834502e-06, + "loss": 0.5537371635437012, + "step": 2749 + }, + { + "epoch": 0.6340788563523173, + "grad_norm": 1.1077493127634728, + "learning_rate": 1.6351297931970796e-06, + "loss": 0.3744293451309204, + "step": 2750 + }, + { + "epoch": 0.6343094304818999, + "grad_norm": 1.237975564408939, + "learning_rate": 1.634835258788157e-06, + "loss": 0.5176748037338257, + "step": 2751 + }, + { + "epoch": 0.6345400046114826, + "grad_norm": 1.321137847220575, + "learning_rate": 1.6345406320994952e-06, + "loss": 0.5179395079612732, + "step": 2752 + }, + { + "epoch": 0.6347705787410652, + "grad_norm": 1.3158476651008661, + "learning_rate": 1.634245913173922e-06, + "loss": 0.4810818135738373, + "step": 2753 + }, + { + "epoch": 0.635001152870648, + "grad_norm": 1.2760288557710286, + "learning_rate": 1.6339511020542775e-06, + "loss": 0.5188307762145996, + "step": 2754 + }, + { + "epoch": 0.6352317270002306, + "grad_norm": 1.662662743900965, + "learning_rate": 1.6336561987834151e-06, + "loss": 0.41170865297317505, + "step": 2755 + }, + { + "epoch": 0.6354623011298133, + "grad_norm": 1.1982414473393, + "learning_rate": 1.6333612034042025e-06, + "loss": 0.48726415634155273, + "step": 2756 + }, + { + "epoch": 0.6356928752593959, + "grad_norm": 1.1790415390507374, + "learning_rate": 1.63306611595952e-06, + "loss": 0.4483524560928345, + "step": 2757 + }, + { + "epoch": 0.6359234493889786, + "grad_norm": 1.2150870765180466, + "learning_rate": 1.6327709364922618e-06, + "loss": 0.3979623019695282, + "step": 2758 + }, + { + "epoch": 0.6361540235185612, + "grad_norm": 1.2093786796022739, + "learning_rate": 1.6324756650453346e-06, + "loss": 0.461483895778656, + "step": 2759 + }, + { + "epoch": 0.6363845976481439, + "grad_norm": 1.2350751043575534, + "learning_rate": 1.6321803016616598e-06, + "loss": 0.40054333209991455, + "step": 2760 + }, + { + "epoch": 0.6366151717777265, + "grad_norm": 1.1196609017801307, + "learning_rate": 1.6318848463841712e-06, + "loss": 0.534996747970581, + "step": 2761 + }, + { + "epoch": 0.6368457459073092, + "grad_norm": 1.260260551672407, + "learning_rate": 1.631589299255816e-06, + "loss": 0.49408137798309326, + "step": 2762 + }, + { + "epoch": 0.6370763200368919, + "grad_norm": 1.305230846296416, + "learning_rate": 1.6312936603195557e-06, + "loss": 0.49098217487335205, + "step": 2763 + }, + { + "epoch": 0.6373068941664746, + "grad_norm": 1.1344163970655265, + "learning_rate": 1.6309979296183636e-06, + "loss": 0.4990113377571106, + "step": 2764 + }, + { + "epoch": 0.6375374682960572, + "grad_norm": 1.2952446438426217, + "learning_rate": 1.6307021071952276e-06, + "loss": 0.49399930238723755, + "step": 2765 + }, + { + "epoch": 0.6377680424256399, + "grad_norm": 1.320323762194689, + "learning_rate": 1.6304061930931478e-06, + "loss": 0.5029928684234619, + "step": 2766 + }, + { + "epoch": 0.6379986165552225, + "grad_norm": 1.2455728900211775, + "learning_rate": 1.6301101873551396e-06, + "loss": 0.5732289552688599, + "step": 2767 + }, + { + "epoch": 0.6382291906848052, + "grad_norm": 1.2965522975146178, + "learning_rate": 1.6298140900242293e-06, + "loss": 0.47334790229797363, + "step": 2768 + }, + { + "epoch": 0.6384597648143878, + "grad_norm": 1.2464510374223752, + "learning_rate": 1.6295179011434578e-06, + "loss": 0.44271016120910645, + "step": 2769 + }, + { + "epoch": 0.6386903389439705, + "grad_norm": 1.8250225519339747, + "learning_rate": 1.6292216207558798e-06, + "loss": 0.5768353939056396, + "step": 2770 + }, + { + "epoch": 0.6389209130735531, + "grad_norm": 1.074704735340539, + "learning_rate": 1.6289252489045625e-06, + "loss": 0.48315417766571045, + "step": 2771 + }, + { + "epoch": 0.6391514872031359, + "grad_norm": 1.338382007112913, + "learning_rate": 1.6286287856325855e-06, + "loss": 0.5745590925216675, + "step": 2772 + }, + { + "epoch": 0.6393820613327185, + "grad_norm": 1.473033213400145, + "learning_rate": 1.6283322309830444e-06, + "loss": 0.6084291934967041, + "step": 2773 + }, + { + "epoch": 0.6396126354623012, + "grad_norm": 1.083816855400547, + "learning_rate": 1.6280355849990451e-06, + "loss": 0.4995007812976837, + "step": 2774 + }, + { + "epoch": 0.6398432095918838, + "grad_norm": 1.1962451309299882, + "learning_rate": 1.6277388477237084e-06, + "loss": 0.45811381936073303, + "step": 2775 + }, + { + "epoch": 0.6400737837214665, + "grad_norm": 1.448203316971052, + "learning_rate": 1.6274420192001689e-06, + "loss": 0.5666211247444153, + "step": 2776 + }, + { + "epoch": 0.6403043578510491, + "grad_norm": 1.3871415999727634, + "learning_rate": 1.6271450994715723e-06, + "loss": 0.5059396028518677, + "step": 2777 + }, + { + "epoch": 0.6405349319806318, + "grad_norm": 1.4444216130733851, + "learning_rate": 1.6268480885810798e-06, + "loss": 0.5418530702590942, + "step": 2778 + }, + { + "epoch": 0.6407655061102144, + "grad_norm": 1.4034133564890543, + "learning_rate": 1.6265509865718647e-06, + "loss": 0.5047061443328857, + "step": 2779 + }, + { + "epoch": 0.6409960802397972, + "grad_norm": 1.6003350461542336, + "learning_rate": 1.6262537934871138e-06, + "loss": 0.5104432702064514, + "step": 2780 + }, + { + "epoch": 0.6412266543693798, + "grad_norm": 1.3065683677222188, + "learning_rate": 1.625956509370027e-06, + "loss": 0.44423484802246094, + "step": 2781 + }, + { + "epoch": 0.6414572284989624, + "grad_norm": 1.1820302321160245, + "learning_rate": 1.6256591342638179e-06, + "loss": 0.47618383169174194, + "step": 2782 + }, + { + "epoch": 0.6416878026285451, + "grad_norm": 1.3796601981562324, + "learning_rate": 1.625361668211713e-06, + "loss": 0.5423145890235901, + "step": 2783 + }, + { + "epoch": 0.6419183767581277, + "grad_norm": 1.380895745392916, + "learning_rate": 1.6250641112569515e-06, + "loss": 0.517102837562561, + "step": 2784 + }, + { + "epoch": 0.6421489508877104, + "grad_norm": 1.2388489917279923, + "learning_rate": 1.6247664634427864e-06, + "loss": 0.39601820707321167, + "step": 2785 + }, + { + "epoch": 0.642379525017293, + "grad_norm": 1.296572577942614, + "learning_rate": 1.6244687248124843e-06, + "loss": 0.5480250120162964, + "step": 2786 + }, + { + "epoch": 0.6426100991468757, + "grad_norm": 1.1105051491643492, + "learning_rate": 1.624170895409324e-06, + "loss": 0.4743092656135559, + "step": 2787 + }, + { + "epoch": 0.6428406732764583, + "grad_norm": 1.463202362201621, + "learning_rate": 1.6238729752765985e-06, + "loss": 0.4595726728439331, + "step": 2788 + }, + { + "epoch": 0.643071247406041, + "grad_norm": 1.2909676791556273, + "learning_rate": 1.6235749644576132e-06, + "loss": 0.5058779716491699, + "step": 2789 + }, + { + "epoch": 0.6433018215356237, + "grad_norm": 1.3145538108383794, + "learning_rate": 1.623276862995687e-06, + "loss": 0.5075543522834778, + "step": 2790 + }, + { + "epoch": 0.6435323956652064, + "grad_norm": 1.3185436913231439, + "learning_rate": 1.622978670934152e-06, + "loss": 0.5623351335525513, + "step": 2791 + }, + { + "epoch": 0.643762969794789, + "grad_norm": 1.1682118545924238, + "learning_rate": 1.6226803883163536e-06, + "loss": 0.3645760118961334, + "step": 2792 + }, + { + "epoch": 0.6439935439243717, + "grad_norm": 1.4617740663680228, + "learning_rate": 1.6223820151856501e-06, + "loss": 0.5666004419326782, + "step": 2793 + }, + { + "epoch": 0.6442241180539543, + "grad_norm": 1.3342697895697784, + "learning_rate": 1.6220835515854133e-06, + "loss": 0.6571217775344849, + "step": 2794 + }, + { + "epoch": 0.644454692183537, + "grad_norm": 1.4229199895470708, + "learning_rate": 1.6217849975590271e-06, + "loss": 0.5684333443641663, + "step": 2795 + }, + { + "epoch": 0.6446852663131196, + "grad_norm": 1.5289890556459427, + "learning_rate": 1.62148635314989e-06, + "loss": 0.43374937772750854, + "step": 2796 + }, + { + "epoch": 0.6449158404427023, + "grad_norm": 1.1182458179152783, + "learning_rate": 1.6211876184014134e-06, + "loss": 0.5102420449256897, + "step": 2797 + }, + { + "epoch": 0.6451464145722849, + "grad_norm": 1.0775475511417847, + "learning_rate": 1.6208887933570203e-06, + "loss": 0.39345985651016235, + "step": 2798 + }, + { + "epoch": 0.6453769887018677, + "grad_norm": 1.4503631372644623, + "learning_rate": 1.620589878060149e-06, + "loss": 0.47554945945739746, + "step": 2799 + }, + { + "epoch": 0.6456075628314503, + "grad_norm": 1.601431882721041, + "learning_rate": 1.6202908725542495e-06, + "loss": 0.4385503828525543, + "step": 2800 + }, + { + "epoch": 0.645838136961033, + "grad_norm": 1.1168858860640334, + "learning_rate": 1.619991776882785e-06, + "loss": 0.5589696168899536, + "step": 2801 + }, + { + "epoch": 0.6460687110906156, + "grad_norm": 1.265570460008291, + "learning_rate": 1.619692591089232e-06, + "loss": 0.4827546179294586, + "step": 2802 + }, + { + "epoch": 0.6462992852201983, + "grad_norm": 1.3309974001593363, + "learning_rate": 1.6193933152170809e-06, + "loss": 0.491131067276001, + "step": 2803 + }, + { + "epoch": 0.6465298593497809, + "grad_norm": 1.2647545815457555, + "learning_rate": 1.6190939493098341e-06, + "loss": 0.47185173630714417, + "step": 2804 + }, + { + "epoch": 0.6467604334793636, + "grad_norm": 1.235826049412326, + "learning_rate": 1.6187944934110072e-06, + "loss": 0.4411182701587677, + "step": 2805 + }, + { + "epoch": 0.6469910076089462, + "grad_norm": 1.2245067812038697, + "learning_rate": 1.6184949475641295e-06, + "loss": 0.47243285179138184, + "step": 2806 + }, + { + "epoch": 0.647221581738529, + "grad_norm": 1.3311536114931484, + "learning_rate": 1.6181953118127428e-06, + "loss": 0.4449295401573181, + "step": 2807 + }, + { + "epoch": 0.6474521558681116, + "grad_norm": 1.2292361204281614, + "learning_rate": 1.6178955862004024e-06, + "loss": 0.5148872137069702, + "step": 2808 + }, + { + "epoch": 0.6476827299976943, + "grad_norm": 1.2738055603189895, + "learning_rate": 1.6175957707706762e-06, + "loss": 0.5017277598381042, + "step": 2809 + }, + { + "epoch": 0.6479133041272769, + "grad_norm": 1.1324070696899262, + "learning_rate": 1.6172958655671458e-06, + "loss": 0.44220247864723206, + "step": 2810 + }, + { + "epoch": 0.6481438782568596, + "grad_norm": 1.215492495713019, + "learning_rate": 1.6169958706334053e-06, + "loss": 0.45421087741851807, + "step": 2811 + }, + { + "epoch": 0.6483744523864422, + "grad_norm": 1.5167053281985836, + "learning_rate": 1.6166957860130618e-06, + "loss": 0.4772147536277771, + "step": 2812 + }, + { + "epoch": 0.6486050265160249, + "grad_norm": 1.1252103890770975, + "learning_rate": 1.6163956117497357e-06, + "loss": 0.5319628715515137, + "step": 2813 + }, + { + "epoch": 0.6488356006456075, + "grad_norm": 1.2663721872672429, + "learning_rate": 1.6160953478870608e-06, + "loss": 0.5109438896179199, + "step": 2814 + }, + { + "epoch": 0.6490661747751902, + "grad_norm": 1.33543378668276, + "learning_rate": 1.6157949944686827e-06, + "loss": 0.4417513608932495, + "step": 2815 + }, + { + "epoch": 0.6492967489047728, + "grad_norm": 1.2535935822359765, + "learning_rate": 1.6154945515382616e-06, + "loss": 0.5013085007667542, + "step": 2816 + }, + { + "epoch": 0.6495273230343556, + "grad_norm": 1.1191581438601172, + "learning_rate": 1.6151940191394693e-06, + "loss": 0.5197368860244751, + "step": 2817 + }, + { + "epoch": 0.6497578971639382, + "grad_norm": 1.4218758858652996, + "learning_rate": 1.6148933973159914e-06, + "loss": 0.46540898084640503, + "step": 2818 + }, + { + "epoch": 0.6499884712935209, + "grad_norm": 1.2080431861739462, + "learning_rate": 1.6145926861115268e-06, + "loss": 0.4867633581161499, + "step": 2819 + }, + { + "epoch": 0.6502190454231035, + "grad_norm": 1.1380395234486869, + "learning_rate": 1.6142918855697864e-06, + "loss": 0.426607221364975, + "step": 2820 + }, + { + "epoch": 0.6504496195526862, + "grad_norm": 1.2737116095131904, + "learning_rate": 1.613990995734495e-06, + "loss": 0.5183024406433105, + "step": 2821 + }, + { + "epoch": 0.6506801936822688, + "grad_norm": 1.3839354752611597, + "learning_rate": 1.6136900166493893e-06, + "loss": 0.48635101318359375, + "step": 2822 + }, + { + "epoch": 0.6509107678118515, + "grad_norm": 1.5911912747422927, + "learning_rate": 1.6133889483582204e-06, + "loss": 0.47468632459640503, + "step": 2823 + }, + { + "epoch": 0.6511413419414341, + "grad_norm": 1.1598857858501956, + "learning_rate": 1.6130877909047515e-06, + "loss": 0.4665389358997345, + "step": 2824 + }, + { + "epoch": 0.6513719160710169, + "grad_norm": 1.1793258331020087, + "learning_rate": 1.6127865443327585e-06, + "loss": 0.5069966316223145, + "step": 2825 + }, + { + "epoch": 0.6516024902005995, + "grad_norm": 1.4107626754859688, + "learning_rate": 1.612485208686031e-06, + "loss": 0.47820740938186646, + "step": 2826 + }, + { + "epoch": 0.6518330643301822, + "grad_norm": 1.2189859420338702, + "learning_rate": 1.612183784008371e-06, + "loss": 0.43017104268074036, + "step": 2827 + }, + { + "epoch": 0.6520636384597648, + "grad_norm": 1.158515500774614, + "learning_rate": 1.6118822703435937e-06, + "loss": 0.45495298504829407, + "step": 2828 + }, + { + "epoch": 0.6522942125893475, + "grad_norm": 1.7108375139007879, + "learning_rate": 1.6115806677355272e-06, + "loss": 0.4624331593513489, + "step": 2829 + }, + { + "epoch": 0.6525247867189301, + "grad_norm": 1.0788742222165304, + "learning_rate": 1.6112789762280125e-06, + "loss": 0.39458876848220825, + "step": 2830 + }, + { + "epoch": 0.6527553608485128, + "grad_norm": 1.4194134450814206, + "learning_rate": 1.6109771958649035e-06, + "loss": 0.45552846789360046, + "step": 2831 + }, + { + "epoch": 0.6529859349780954, + "grad_norm": 1.4199555723058743, + "learning_rate": 1.6106753266900671e-06, + "loss": 0.4579755663871765, + "step": 2832 + }, + { + "epoch": 0.6532165091076781, + "grad_norm": 1.2589449636358518, + "learning_rate": 1.6103733687473823e-06, + "loss": 0.5164625644683838, + "step": 2833 + }, + { + "epoch": 0.6534470832372608, + "grad_norm": 1.3635551079325425, + "learning_rate": 1.6100713220807432e-06, + "loss": 0.43071237206459045, + "step": 2834 + }, + { + "epoch": 0.6536776573668435, + "grad_norm": 1.2757429725484968, + "learning_rate": 1.6097691867340543e-06, + "loss": 0.5174099802970886, + "step": 2835 + }, + { + "epoch": 0.6539082314964261, + "grad_norm": 1.31351831375575, + "learning_rate": 1.609466962751234e-06, + "loss": 0.5944932699203491, + "step": 2836 + }, + { + "epoch": 0.6541388056260088, + "grad_norm": 1.312815606757786, + "learning_rate": 1.6091646501762145e-06, + "loss": 0.45203912258148193, + "step": 2837 + }, + { + "epoch": 0.6543693797555914, + "grad_norm": 1.292859531347235, + "learning_rate": 1.6088622490529386e-06, + "loss": 0.4197826683521271, + "step": 2838 + }, + { + "epoch": 0.6545999538851741, + "grad_norm": 1.3008648230701247, + "learning_rate": 1.6085597594253649e-06, + "loss": 0.4806807339191437, + "step": 2839 + }, + { + "epoch": 0.6548305280147567, + "grad_norm": 1.233893928808971, + "learning_rate": 1.608257181337462e-06, + "loss": 0.4618797302246094, + "step": 2840 + }, + { + "epoch": 0.6550611021443394, + "grad_norm": 1.1215282144992917, + "learning_rate": 1.6079545148332137e-06, + "loss": 0.4901892840862274, + "step": 2841 + }, + { + "epoch": 0.655291676273922, + "grad_norm": 1.250624448026336, + "learning_rate": 1.607651759956615e-06, + "loss": 0.44869139790534973, + "step": 2842 + }, + { + "epoch": 0.6555222504035048, + "grad_norm": 1.1064395173732657, + "learning_rate": 1.6073489167516747e-06, + "loss": 0.41470903158187866, + "step": 2843 + }, + { + "epoch": 0.6557528245330874, + "grad_norm": 1.2796938856852533, + "learning_rate": 1.6070459852624143e-06, + "loss": 0.5498615503311157, + "step": 2844 + }, + { + "epoch": 0.6559833986626701, + "grad_norm": 1.4741717641783516, + "learning_rate": 1.6067429655328675e-06, + "loss": 0.5462392568588257, + "step": 2845 + }, + { + "epoch": 0.6562139727922527, + "grad_norm": 1.5147243124828937, + "learning_rate": 1.6064398576070815e-06, + "loss": 0.3775100111961365, + "step": 2846 + }, + { + "epoch": 0.6564445469218354, + "grad_norm": 1.3806942156086204, + "learning_rate": 1.6061366615291161e-06, + "loss": 0.4712100028991699, + "step": 2847 + }, + { + "epoch": 0.656675121051418, + "grad_norm": 1.1320542857842297, + "learning_rate": 1.6058333773430439e-06, + "loss": 0.5152161121368408, + "step": 2848 + }, + { + "epoch": 0.6569056951810007, + "grad_norm": 1.2222287817453417, + "learning_rate": 1.6055300050929502e-06, + "loss": 0.46678972244262695, + "step": 2849 + }, + { + "epoch": 0.6571362693105833, + "grad_norm": 1.1948519980696821, + "learning_rate": 1.6052265448229338e-06, + "loss": 0.4622490108013153, + "step": 2850 + }, + { + "epoch": 0.657366843440166, + "grad_norm": 1.2601521252962713, + "learning_rate": 1.6049229965771052e-06, + "loss": 0.49909311532974243, + "step": 2851 + }, + { + "epoch": 0.6575974175697487, + "grad_norm": 1.1801405687475501, + "learning_rate": 1.6046193603995884e-06, + "loss": 0.4428306221961975, + "step": 2852 + }, + { + "epoch": 0.6578279916993314, + "grad_norm": 1.5295557154716768, + "learning_rate": 1.6043156363345196e-06, + "loss": 0.5842458009719849, + "step": 2853 + }, + { + "epoch": 0.658058565828914, + "grad_norm": 1.4945011678677886, + "learning_rate": 1.604011824426049e-06, + "loss": 0.47183722257614136, + "step": 2854 + }, + { + "epoch": 0.6582891399584967, + "grad_norm": 1.2843309395390234, + "learning_rate": 1.6037079247183379e-06, + "loss": 0.44225364923477173, + "step": 2855 + }, + { + "epoch": 0.6585197140880793, + "grad_norm": 1.3795669225253144, + "learning_rate": 1.6034039372555617e-06, + "loss": 0.4820272922515869, + "step": 2856 + }, + { + "epoch": 0.658750288217662, + "grad_norm": 1.6263387244434722, + "learning_rate": 1.6030998620819075e-06, + "loss": 0.48118168115615845, + "step": 2857 + }, + { + "epoch": 0.6589808623472446, + "grad_norm": 1.4704169894155685, + "learning_rate": 1.6027956992415764e-06, + "loss": 0.4386011064052582, + "step": 2858 + }, + { + "epoch": 0.6592114364768273, + "grad_norm": 1.4148356020107666, + "learning_rate": 1.6024914487787814e-06, + "loss": 0.48740649223327637, + "step": 2859 + }, + { + "epoch": 0.65944201060641, + "grad_norm": 1.436235867684013, + "learning_rate": 1.602187110737748e-06, + "loss": 0.46782761812210083, + "step": 2860 + }, + { + "epoch": 0.6596725847359927, + "grad_norm": 1.2796166668007127, + "learning_rate": 1.6018826851627155e-06, + "loss": 0.5086358189582825, + "step": 2861 + }, + { + "epoch": 0.6599031588655753, + "grad_norm": 1.1582673721463366, + "learning_rate": 1.6015781720979344e-06, + "loss": 0.5631915330886841, + "step": 2862 + }, + { + "epoch": 0.660133732995158, + "grad_norm": 1.462417648098582, + "learning_rate": 1.6012735715876693e-06, + "loss": 0.5134458541870117, + "step": 2863 + }, + { + "epoch": 0.6603643071247406, + "grad_norm": 1.1268653967137703, + "learning_rate": 1.6009688836761969e-06, + "loss": 0.4308784008026123, + "step": 2864 + }, + { + "epoch": 0.6605948812543233, + "grad_norm": 1.3112517816231024, + "learning_rate": 1.6006641084078068e-06, + "loss": 0.5149765610694885, + "step": 2865 + }, + { + "epoch": 0.6608254553839059, + "grad_norm": 1.6101510783439525, + "learning_rate": 1.6003592458268005e-06, + "loss": 0.521892786026001, + "step": 2866 + }, + { + "epoch": 0.6610560295134886, + "grad_norm": 1.247084334907296, + "learning_rate": 1.6000542959774937e-06, + "loss": 0.46611008048057556, + "step": 2867 + }, + { + "epoch": 0.6612866036430712, + "grad_norm": 1.2517698630875118, + "learning_rate": 1.5997492589042135e-06, + "loss": 0.43080392479896545, + "step": 2868 + }, + { + "epoch": 0.661517177772654, + "grad_norm": 1.2239680444750303, + "learning_rate": 1.5994441346513003e-06, + "loss": 0.48026901483535767, + "step": 2869 + }, + { + "epoch": 0.6617477519022366, + "grad_norm": 1.1948228818170457, + "learning_rate": 1.5991389232631068e-06, + "loss": 0.48706555366516113, + "step": 2870 + }, + { + "epoch": 0.6619783260318193, + "grad_norm": 1.205848115890533, + "learning_rate": 1.598833624783999e-06, + "loss": 0.5093512535095215, + "step": 2871 + }, + { + "epoch": 0.6622089001614019, + "grad_norm": 1.37517746631934, + "learning_rate": 1.5985282392583542e-06, + "loss": 0.5197086930274963, + "step": 2872 + }, + { + "epoch": 0.6624394742909846, + "grad_norm": 1.3389415544634544, + "learning_rate": 1.5982227667305646e-06, + "loss": 0.497372031211853, + "step": 2873 + }, + { + "epoch": 0.6626700484205672, + "grad_norm": 1.6851191621911175, + "learning_rate": 1.597917207245033e-06, + "loss": 0.4746604561805725, + "step": 2874 + }, + { + "epoch": 0.6629006225501499, + "grad_norm": 1.2864362072574318, + "learning_rate": 1.5976115608461755e-06, + "loss": 0.5531996488571167, + "step": 2875 + }, + { + "epoch": 0.6631311966797325, + "grad_norm": 1.2032344825838508, + "learning_rate": 1.5973058275784208e-06, + "loss": 0.44950544834136963, + "step": 2876 + }, + { + "epoch": 0.6633617708093152, + "grad_norm": 1.231321509427461, + "learning_rate": 1.597000007486211e-06, + "loss": 0.45596158504486084, + "step": 2877 + }, + { + "epoch": 0.6635923449388978, + "grad_norm": 1.1813154846400662, + "learning_rate": 1.596694100613999e-06, + "loss": 0.5243046879768372, + "step": 2878 + }, + { + "epoch": 0.6638229190684806, + "grad_norm": 1.2111771126184059, + "learning_rate": 1.5963881070062528e-06, + "loss": 0.46450644731521606, + "step": 2879 + }, + { + "epoch": 0.6640534931980632, + "grad_norm": 1.286085494147619, + "learning_rate": 1.5960820267074509e-06, + "loss": 0.5565767288208008, + "step": 2880 + }, + { + "epoch": 0.6642840673276459, + "grad_norm": 1.574495375498682, + "learning_rate": 1.595775859762085e-06, + "loss": 0.4351605176925659, + "step": 2881 + }, + { + "epoch": 0.6645146414572285, + "grad_norm": 1.3382136213218339, + "learning_rate": 1.5954696062146603e-06, + "loss": 0.5113346576690674, + "step": 2882 + }, + { + "epoch": 0.6647452155868112, + "grad_norm": 1.203285083111209, + "learning_rate": 1.5951632661096932e-06, + "loss": 0.5005035996437073, + "step": 2883 + }, + { + "epoch": 0.6649757897163938, + "grad_norm": 1.1502074786882042, + "learning_rate": 1.5948568394917138e-06, + "loss": 0.4539811611175537, + "step": 2884 + }, + { + "epoch": 0.6652063638459765, + "grad_norm": 1.234546797786613, + "learning_rate": 1.5945503264052637e-06, + "loss": 0.4519865810871124, + "step": 2885 + }, + { + "epoch": 0.6654369379755591, + "grad_norm": 1.1932724883335695, + "learning_rate": 1.5942437268948985e-06, + "loss": 0.5688626766204834, + "step": 2886 + }, + { + "epoch": 0.6656675121051419, + "grad_norm": 1.1582733834983177, + "learning_rate": 1.5939370410051846e-06, + "loss": 0.5038400888442993, + "step": 2887 + }, + { + "epoch": 0.6658980862347245, + "grad_norm": 1.4308591259843988, + "learning_rate": 1.5936302687807028e-06, + "loss": 0.6332568526268005, + "step": 2888 + }, + { + "epoch": 0.6661286603643072, + "grad_norm": 1.2020172387992982, + "learning_rate": 1.593323410266045e-06, + "loss": 0.4994644820690155, + "step": 2889 + }, + { + "epoch": 0.6663592344938898, + "grad_norm": 1.3423031921779223, + "learning_rate": 1.5930164655058165e-06, + "loss": 0.4952617883682251, + "step": 2890 + }, + { + "epoch": 0.6665898086234725, + "grad_norm": 1.1769489968231674, + "learning_rate": 1.5927094345446345e-06, + "loss": 0.4188910722732544, + "step": 2891 + }, + { + "epoch": 0.6668203827530551, + "grad_norm": 1.319346697910086, + "learning_rate": 1.5924023174271295e-06, + "loss": 0.47160637378692627, + "step": 2892 + }, + { + "epoch": 0.6670509568826377, + "grad_norm": 1.0773369781050426, + "learning_rate": 1.592095114197944e-06, + "loss": 0.44884049892425537, + "step": 2893 + }, + { + "epoch": 0.6672815310122204, + "grad_norm": 1.3166895153069564, + "learning_rate": 1.5917878249017327e-06, + "loss": 0.4105216860771179, + "step": 2894 + }, + { + "epoch": 0.667512105141803, + "grad_norm": 1.3288589826448391, + "learning_rate": 1.5914804495831634e-06, + "loss": 0.5000967383384705, + "step": 2895 + }, + { + "epoch": 0.6677426792713858, + "grad_norm": 1.4772652615504442, + "learning_rate": 1.5911729882869163e-06, + "loss": 0.45515477657318115, + "step": 2896 + }, + { + "epoch": 0.6679732534009684, + "grad_norm": 1.2034912342077588, + "learning_rate": 1.590865441057684e-06, + "loss": 0.4492835998535156, + "step": 2897 + }, + { + "epoch": 0.6682038275305511, + "grad_norm": 1.5637287950189662, + "learning_rate": 1.5905578079401716e-06, + "loss": 0.553781270980835, + "step": 2898 + }, + { + "epoch": 0.6684344016601337, + "grad_norm": 1.235173143749482, + "learning_rate": 1.5902500889790967e-06, + "loss": 0.5085616111755371, + "step": 2899 + }, + { + "epoch": 0.6686649757897164, + "grad_norm": 1.2766607551584273, + "learning_rate": 1.5899422842191891e-06, + "loss": 0.4651145935058594, + "step": 2900 + }, + { + "epoch": 0.668895549919299, + "grad_norm": 1.3114841240621398, + "learning_rate": 1.5896343937051921e-06, + "loss": 0.5503841638565063, + "step": 2901 + }, + { + "epoch": 0.6691261240488817, + "grad_norm": 1.1881721760666544, + "learning_rate": 1.5893264174818599e-06, + "loss": 0.48213839530944824, + "step": 2902 + }, + { + "epoch": 0.6693566981784643, + "grad_norm": 1.2726619976847688, + "learning_rate": 1.5890183555939604e-06, + "loss": 0.4602949023246765, + "step": 2903 + }, + { + "epoch": 0.669587272308047, + "grad_norm": 1.213092004639277, + "learning_rate": 1.5887102080862736e-06, + "loss": 0.43991196155548096, + "step": 2904 + }, + { + "epoch": 0.6698178464376296, + "grad_norm": 1.2472416336517922, + "learning_rate": 1.5884019750035914e-06, + "loss": 0.48186323046684265, + "step": 2905 + }, + { + "epoch": 0.6700484205672124, + "grad_norm": 1.3445409358829308, + "learning_rate": 1.5880936563907189e-06, + "loss": 0.44907671213150024, + "step": 2906 + }, + { + "epoch": 0.670278994696795, + "grad_norm": 1.874421138474627, + "learning_rate": 1.587785252292473e-06, + "loss": 0.4475386142730713, + "step": 2907 + }, + { + "epoch": 0.6705095688263777, + "grad_norm": 1.2649536391923781, + "learning_rate": 1.587476762753684e-06, + "loss": 0.4504704475402832, + "step": 2908 + }, + { + "epoch": 0.6707401429559603, + "grad_norm": 2.0624210450483376, + "learning_rate": 1.5871681878191937e-06, + "loss": 0.5090106129646301, + "step": 2909 + }, + { + "epoch": 0.670970717085543, + "grad_norm": 1.3010076823717651, + "learning_rate": 1.5868595275338561e-06, + "loss": 0.46150895953178406, + "step": 2910 + }, + { + "epoch": 0.6712012912151256, + "grad_norm": 1.2556909013752833, + "learning_rate": 1.586550781942539e-06, + "loss": 0.5499979257583618, + "step": 2911 + }, + { + "epoch": 0.6714318653447083, + "grad_norm": 1.2089730243488483, + "learning_rate": 1.5862419510901211e-06, + "loss": 0.46628689765930176, + "step": 2912 + }, + { + "epoch": 0.6716624394742909, + "grad_norm": 1.2998808024776154, + "learning_rate": 1.5859330350214941e-06, + "loss": 0.4517399072647095, + "step": 2913 + }, + { + "epoch": 0.6718930136038737, + "grad_norm": 1.0879313971673985, + "learning_rate": 1.5856240337815621e-06, + "loss": 0.4696923792362213, + "step": 2914 + }, + { + "epoch": 0.6721235877334563, + "grad_norm": 1.5676723620382764, + "learning_rate": 1.585314947415242e-06, + "loss": 0.41357535123825073, + "step": 2915 + }, + { + "epoch": 0.672354161863039, + "grad_norm": 1.2988881169526059, + "learning_rate": 1.5850057759674621e-06, + "loss": 0.5223745107650757, + "step": 2916 + }, + { + "epoch": 0.6725847359926216, + "grad_norm": 1.5751566352241433, + "learning_rate": 1.584696519483164e-06, + "loss": 0.48562729358673096, + "step": 2917 + }, + { + "epoch": 0.6728153101222043, + "grad_norm": 1.147456021361514, + "learning_rate": 1.5843871780073009e-06, + "loss": 0.3675496280193329, + "step": 2918 + }, + { + "epoch": 0.6730458842517869, + "grad_norm": 1.4691177353786786, + "learning_rate": 1.5840777515848389e-06, + "loss": 0.5782667994499207, + "step": 2919 + }, + { + "epoch": 0.6732764583813696, + "grad_norm": 1.110911745804502, + "learning_rate": 1.583768240260756e-06, + "loss": 0.419716477394104, + "step": 2920 + }, + { + "epoch": 0.6735070325109522, + "grad_norm": 1.2625181785612978, + "learning_rate": 1.5834586440800434e-06, + "loss": 0.4004133939743042, + "step": 2921 + }, + { + "epoch": 0.673737606640535, + "grad_norm": 1.3860644175168617, + "learning_rate": 1.5831489630877037e-06, + "loss": 0.4917314350605011, + "step": 2922 + }, + { + "epoch": 0.6739681807701176, + "grad_norm": 1.3350109690747092, + "learning_rate": 1.5828391973287522e-06, + "loss": 0.5488141179084778, + "step": 2923 + }, + { + "epoch": 0.6741987548997003, + "grad_norm": 1.2547850876004316, + "learning_rate": 1.5825293468482163e-06, + "loss": 0.5047071576118469, + "step": 2924 + }, + { + "epoch": 0.6744293290292829, + "grad_norm": 1.3178326140677985, + "learning_rate": 1.5822194116911364e-06, + "loss": 0.4830411672592163, + "step": 2925 + }, + { + "epoch": 0.6746599031588656, + "grad_norm": 1.2591886503495524, + "learning_rate": 1.5819093919025641e-06, + "loss": 0.47517114877700806, + "step": 2926 + }, + { + "epoch": 0.6748904772884482, + "grad_norm": 1.3603729738722081, + "learning_rate": 1.5815992875275642e-06, + "loss": 0.5617963075637817, + "step": 2927 + }, + { + "epoch": 0.6751210514180309, + "grad_norm": 1.1752484838801127, + "learning_rate": 1.5812890986112137e-06, + "loss": 0.4360186457633972, + "step": 2928 + }, + { + "epoch": 0.6753516255476135, + "grad_norm": 1.5551926866200483, + "learning_rate": 1.5809788251986014e-06, + "loss": 0.49538636207580566, + "step": 2929 + }, + { + "epoch": 0.6755821996771962, + "grad_norm": 1.1285780293266063, + "learning_rate": 1.5806684673348288e-06, + "loss": 0.538766622543335, + "step": 2930 + }, + { + "epoch": 0.6758127738067788, + "grad_norm": 1.5395880930573347, + "learning_rate": 1.5803580250650094e-06, + "loss": 0.4113287329673767, + "step": 2931 + }, + { + "epoch": 0.6760433479363616, + "grad_norm": 1.4441179706006158, + "learning_rate": 1.5800474984342698e-06, + "loss": 0.5298923254013062, + "step": 2932 + }, + { + "epoch": 0.6762739220659442, + "grad_norm": 1.2285488161220737, + "learning_rate": 1.5797368874877472e-06, + "loss": 0.4891100227832794, + "step": 2933 + }, + { + "epoch": 0.6765044961955269, + "grad_norm": 1.3809520207822814, + "learning_rate": 1.579426192270593e-06, + "loss": 0.4412326216697693, + "step": 2934 + }, + { + "epoch": 0.6767350703251095, + "grad_norm": 1.3386538114869513, + "learning_rate": 1.5791154128279693e-06, + "loss": 0.5514793395996094, + "step": 2935 + }, + { + "epoch": 0.6769656444546922, + "grad_norm": 1.2065068425398038, + "learning_rate": 1.578804549205051e-06, + "loss": 0.44050243496894836, + "step": 2936 + }, + { + "epoch": 0.6771962185842748, + "grad_norm": 1.3084516018872256, + "learning_rate": 1.5784936014470256e-06, + "loss": 0.47503453493118286, + "step": 2937 + }, + { + "epoch": 0.6774267927138575, + "grad_norm": 1.445992727647949, + "learning_rate": 1.5781825695990922e-06, + "loss": 0.524544894695282, + "step": 2938 + }, + { + "epoch": 0.6776573668434401, + "grad_norm": 1.2672201923678605, + "learning_rate": 1.5778714537064628e-06, + "loss": 0.4203689694404602, + "step": 2939 + }, + { + "epoch": 0.6778879409730229, + "grad_norm": 1.255678429788082, + "learning_rate": 1.577560253814361e-06, + "loss": 0.4305247664451599, + "step": 2940 + }, + { + "epoch": 0.6781185151026055, + "grad_norm": 1.2383698343036857, + "learning_rate": 1.577248969968023e-06, + "loss": 0.6129249930381775, + "step": 2941 + }, + { + "epoch": 0.6783490892321882, + "grad_norm": 1.4217586280781416, + "learning_rate": 1.5769376022126969e-06, + "loss": 0.44431981444358826, + "step": 2942 + }, + { + "epoch": 0.6785796633617708, + "grad_norm": 1.2327303005745092, + "learning_rate": 1.576626150593643e-06, + "loss": 0.4394958019256592, + "step": 2943 + }, + { + "epoch": 0.6788102374913535, + "grad_norm": 1.2593798978560244, + "learning_rate": 1.5763146151561345e-06, + "loss": 0.44481268525123596, + "step": 2944 + }, + { + "epoch": 0.6790408116209361, + "grad_norm": 1.4440486279504336, + "learning_rate": 1.5760029959454556e-06, + "loss": 0.4251822829246521, + "step": 2945 + }, + { + "epoch": 0.6792713857505188, + "grad_norm": 1.338830252556874, + "learning_rate": 1.575691293006904e-06, + "loss": 0.41041696071624756, + "step": 2946 + }, + { + "epoch": 0.6795019598801014, + "grad_norm": 1.357017341106407, + "learning_rate": 1.5753795063857883e-06, + "loss": 0.5710239410400391, + "step": 2947 + }, + { + "epoch": 0.6797325340096841, + "grad_norm": 1.2834985119403657, + "learning_rate": 1.57506763612743e-06, + "loss": 0.48825210332870483, + "step": 2948 + }, + { + "epoch": 0.6799631081392667, + "grad_norm": 1.263284608882453, + "learning_rate": 1.5747556822771628e-06, + "loss": 0.37077784538269043, + "step": 2949 + }, + { + "epoch": 0.6801936822688495, + "grad_norm": 1.2458271352531185, + "learning_rate": 1.5744436448803322e-06, + "loss": 0.4618649482727051, + "step": 2950 + }, + { + "epoch": 0.6804242563984321, + "grad_norm": 1.0624348057433408, + "learning_rate": 1.574131523982296e-06, + "loss": 0.4415496289730072, + "step": 2951 + }, + { + "epoch": 0.6806548305280148, + "grad_norm": 1.4732593030941656, + "learning_rate": 1.5738193196284239e-06, + "loss": 0.440029501914978, + "step": 2952 + }, + { + "epoch": 0.6808854046575974, + "grad_norm": 1.3992294210480754, + "learning_rate": 1.5735070318640986e-06, + "loss": 0.5149378776550293, + "step": 2953 + }, + { + "epoch": 0.6811159787871801, + "grad_norm": 1.3173119180782331, + "learning_rate": 1.5731946607347136e-06, + "loss": 0.4838085174560547, + "step": 2954 + }, + { + "epoch": 0.6813465529167627, + "grad_norm": 1.3500402916158631, + "learning_rate": 1.5728822062856757e-06, + "loss": 0.48472005128860474, + "step": 2955 + }, + { + "epoch": 0.6815771270463454, + "grad_norm": 1.163167888868214, + "learning_rate": 1.572569668562403e-06, + "loss": 0.5154656767845154, + "step": 2956 + }, + { + "epoch": 0.681807701175928, + "grad_norm": 1.1906599654401737, + "learning_rate": 1.5722570476103263e-06, + "loss": 0.4094988703727722, + "step": 2957 + }, + { + "epoch": 0.6820382753055108, + "grad_norm": 1.2324943837281264, + "learning_rate": 1.5719443434748877e-06, + "loss": 0.5125937461853027, + "step": 2958 + }, + { + "epoch": 0.6822688494350934, + "grad_norm": 1.2538269370063608, + "learning_rate": 1.5716315562015428e-06, + "loss": 0.4807034730911255, + "step": 2959 + }, + { + "epoch": 0.6824994235646761, + "grad_norm": 1.3513545314522855, + "learning_rate": 1.5713186858357577e-06, + "loss": 0.6126741170883179, + "step": 2960 + }, + { + "epoch": 0.6827299976942587, + "grad_norm": 2.1674593801056887, + "learning_rate": 1.5710057324230113e-06, + "loss": 0.5450708866119385, + "step": 2961 + }, + { + "epoch": 0.6829605718238414, + "grad_norm": 1.8355809144200355, + "learning_rate": 1.5706926960087948e-06, + "loss": 0.47740328311920166, + "step": 2962 + }, + { + "epoch": 0.683191145953424, + "grad_norm": 1.311529987995532, + "learning_rate": 1.5703795766386112e-06, + "loss": 0.4731057584285736, + "step": 2963 + }, + { + "epoch": 0.6834217200830067, + "grad_norm": 1.3162153678952433, + "learning_rate": 1.5700663743579754e-06, + "loss": 0.49735045433044434, + "step": 2964 + }, + { + "epoch": 0.6836522942125893, + "grad_norm": 1.2346637447285915, + "learning_rate": 1.569753089212415e-06, + "loss": 0.5257318019866943, + "step": 2965 + }, + { + "epoch": 0.683882868342172, + "grad_norm": 1.1458467925306592, + "learning_rate": 1.5694397212474685e-06, + "loss": 0.3947733938694, + "step": 2966 + }, + { + "epoch": 0.6841134424717547, + "grad_norm": 1.424176183527685, + "learning_rate": 1.5691262705086875e-06, + "loss": 0.5078107714653015, + "step": 2967 + }, + { + "epoch": 0.6843440166013374, + "grad_norm": 1.7316538509871626, + "learning_rate": 1.5688127370416351e-06, + "loss": 0.5921520590782166, + "step": 2968 + }, + { + "epoch": 0.68457459073092, + "grad_norm": 1.2277129646213039, + "learning_rate": 1.5684991208918866e-06, + "loss": 0.45995181798934937, + "step": 2969 + }, + { + "epoch": 0.6848051648605027, + "grad_norm": 1.1894548452861071, + "learning_rate": 1.5681854221050293e-06, + "loss": 0.4874386787414551, + "step": 2970 + }, + { + "epoch": 0.6850357389900853, + "grad_norm": 1.3695475422493124, + "learning_rate": 1.5678716407266625e-06, + "loss": 0.4522739052772522, + "step": 2971 + }, + { + "epoch": 0.685266313119668, + "grad_norm": 1.3244142914830208, + "learning_rate": 1.5675577768023977e-06, + "loss": 0.4596391022205353, + "step": 2972 + }, + { + "epoch": 0.6854968872492506, + "grad_norm": 1.6847382830263626, + "learning_rate": 1.567243830377858e-06, + "loss": 0.5391427278518677, + "step": 2973 + }, + { + "epoch": 0.6857274613788333, + "grad_norm": 1.2164543996098884, + "learning_rate": 1.5669298014986786e-06, + "loss": 0.5583066940307617, + "step": 2974 + }, + { + "epoch": 0.6859580355084159, + "grad_norm": 1.3656527800334406, + "learning_rate": 1.566615690210507e-06, + "loss": 0.5410330295562744, + "step": 2975 + }, + { + "epoch": 0.6861886096379987, + "grad_norm": 1.2007908045124778, + "learning_rate": 1.566301496559002e-06, + "loss": 0.5145233273506165, + "step": 2976 + }, + { + "epoch": 0.6864191837675813, + "grad_norm": 1.4168885241389684, + "learning_rate": 1.5659872205898356e-06, + "loss": 0.5021970272064209, + "step": 2977 + }, + { + "epoch": 0.686649757897164, + "grad_norm": 1.0896663307775538, + "learning_rate": 1.5656728623486903e-06, + "loss": 0.48251593112945557, + "step": 2978 + }, + { + "epoch": 0.6868803320267466, + "grad_norm": 1.2502610536872558, + "learning_rate": 1.5653584218812617e-06, + "loss": 0.4228450655937195, + "step": 2979 + }, + { + "epoch": 0.6871109061563293, + "grad_norm": 1.4048596098114436, + "learning_rate": 1.5650438992332567e-06, + "loss": 0.3975197374820709, + "step": 2980 + }, + { + "epoch": 0.6873414802859119, + "grad_norm": 1.386478606714872, + "learning_rate": 1.5647292944503945e-06, + "loss": 0.5441234707832336, + "step": 2981 + }, + { + "epoch": 0.6875720544154946, + "grad_norm": 1.3552115877356068, + "learning_rate": 1.5644146075784057e-06, + "loss": 0.5357148051261902, + "step": 2982 + }, + { + "epoch": 0.6878026285450772, + "grad_norm": 1.2605289404512496, + "learning_rate": 1.5640998386630337e-06, + "loss": 0.530154824256897, + "step": 2983 + }, + { + "epoch": 0.68803320267466, + "grad_norm": 1.3830405468746736, + "learning_rate": 1.563784987750033e-06, + "loss": 0.480657696723938, + "step": 2984 + }, + { + "epoch": 0.6882637768042426, + "grad_norm": 1.2595390052779563, + "learning_rate": 1.5634700548851712e-06, + "loss": 0.4822859764099121, + "step": 2985 + }, + { + "epoch": 0.6884943509338253, + "grad_norm": 1.4511024891592457, + "learning_rate": 1.5631550401142257e-06, + "loss": 0.48551490902900696, + "step": 2986 + }, + { + "epoch": 0.6887249250634079, + "grad_norm": 1.252088599015217, + "learning_rate": 1.562839943482988e-06, + "loss": 0.43080294132232666, + "step": 2987 + }, + { + "epoch": 0.6889554991929906, + "grad_norm": 1.1661214157780933, + "learning_rate": 1.56252476503726e-06, + "loss": 0.42780637741088867, + "step": 2988 + }, + { + "epoch": 0.6891860733225732, + "grad_norm": 1.3057809079761946, + "learning_rate": 1.5622095048228565e-06, + "loss": 0.539027214050293, + "step": 2989 + }, + { + "epoch": 0.6894166474521559, + "grad_norm": 1.2289425463506802, + "learning_rate": 1.5618941628856037e-06, + "loss": 0.4529460668563843, + "step": 2990 + }, + { + "epoch": 0.6896472215817385, + "grad_norm": 1.4016140654354556, + "learning_rate": 1.5615787392713395e-06, + "loss": 0.49724727869033813, + "step": 2991 + }, + { + "epoch": 0.6898777957113212, + "grad_norm": 1.25157972103927, + "learning_rate": 1.5612632340259144e-06, + "loss": 0.4711928963661194, + "step": 2992 + }, + { + "epoch": 0.6901083698409038, + "grad_norm": 1.3707143585352468, + "learning_rate": 1.56094764719519e-06, + "loss": 0.42258220911026, + "step": 2993 + }, + { + "epoch": 0.6903389439704866, + "grad_norm": 1.371187363460567, + "learning_rate": 1.5606319788250398e-06, + "loss": 0.47754064202308655, + "step": 2994 + }, + { + "epoch": 0.6905695181000692, + "grad_norm": 1.307708883093593, + "learning_rate": 1.5603162289613501e-06, + "loss": 0.47200560569763184, + "step": 2995 + }, + { + "epoch": 0.6908000922296519, + "grad_norm": 1.359798809074, + "learning_rate": 1.5600003976500173e-06, + "loss": 0.5194537043571472, + "step": 2996 + }, + { + "epoch": 0.6910306663592345, + "grad_norm": 1.707437655194179, + "learning_rate": 1.5596844849369518e-06, + "loss": 0.4874703586101532, + "step": 2997 + }, + { + "epoch": 0.6912612404888172, + "grad_norm": 1.262990523197611, + "learning_rate": 1.5593684908680738e-06, + "loss": 0.5028672218322754, + "step": 2998 + }, + { + "epoch": 0.6914918146183998, + "grad_norm": 1.2420345591817543, + "learning_rate": 1.5590524154893169e-06, + "loss": 0.44250521063804626, + "step": 2999 + }, + { + "epoch": 0.6917223887479825, + "grad_norm": 1.6089998258276121, + "learning_rate": 1.5587362588466253e-06, + "loss": 0.536510705947876, + "step": 3000 + }, + { + "epoch": 0.6919529628775651, + "grad_norm": 1.3333649931769909, + "learning_rate": 1.5584200209859558e-06, + "loss": 0.4514959752559662, + "step": 3001 + }, + { + "epoch": 0.6921835370071479, + "grad_norm": 1.1923376457733827, + "learning_rate": 1.5581037019532773e-06, + "loss": 0.4402197301387787, + "step": 3002 + }, + { + "epoch": 0.6924141111367305, + "grad_norm": 1.1940429657833775, + "learning_rate": 1.5577873017945691e-06, + "loss": 0.508256196975708, + "step": 3003 + }, + { + "epoch": 0.6926446852663131, + "grad_norm": 1.2600794916577294, + "learning_rate": 1.5574708205558236e-06, + "loss": 0.5123175978660583, + "step": 3004 + }, + { + "epoch": 0.6928752593958958, + "grad_norm": 1.4303227599201425, + "learning_rate": 1.5571542582830447e-06, + "loss": 0.4874982237815857, + "step": 3005 + }, + { + "epoch": 0.6931058335254784, + "grad_norm": 1.314228379499143, + "learning_rate": 1.556837615022248e-06, + "loss": 0.44554391503334045, + "step": 3006 + }, + { + "epoch": 0.6933364076550611, + "grad_norm": 1.5428941228634732, + "learning_rate": 1.5565208908194603e-06, + "loss": 0.5899895429611206, + "step": 3007 + }, + { + "epoch": 0.6935669817846437, + "grad_norm": 1.2685614762262514, + "learning_rate": 1.5562040857207208e-06, + "loss": 0.5137951374053955, + "step": 3008 + }, + { + "epoch": 0.6937975559142264, + "grad_norm": 1.2863812659603593, + "learning_rate": 1.5558871997720805e-06, + "loss": 0.5435892343521118, + "step": 3009 + }, + { + "epoch": 0.694028130043809, + "grad_norm": 1.4463505314835092, + "learning_rate": 1.5555702330196021e-06, + "loss": 0.45998525619506836, + "step": 3010 + }, + { + "epoch": 0.6942587041733917, + "grad_norm": 1.324515476398786, + "learning_rate": 1.5552531855093597e-06, + "loss": 0.4676332473754883, + "step": 3011 + }, + { + "epoch": 0.6944892783029744, + "grad_norm": 1.2595225568514163, + "learning_rate": 1.5549360572874397e-06, + "loss": 0.48250633478164673, + "step": 3012 + }, + { + "epoch": 0.6947198524325571, + "grad_norm": 1.4537609539003187, + "learning_rate": 1.5546188483999396e-06, + "loss": 0.4841402769088745, + "step": 3013 + }, + { + "epoch": 0.6949504265621397, + "grad_norm": 1.401637069375295, + "learning_rate": 1.5543015588929688e-06, + "loss": 0.4717336893081665, + "step": 3014 + }, + { + "epoch": 0.6951810006917224, + "grad_norm": 1.3276052543558161, + "learning_rate": 1.5539841888126488e-06, + "loss": 0.48844897747039795, + "step": 3015 + }, + { + "epoch": 0.695411574821305, + "grad_norm": 1.539947517538627, + "learning_rate": 1.5536667382051127e-06, + "loss": 0.5244781970977783, + "step": 3016 + }, + { + "epoch": 0.6956421489508877, + "grad_norm": 1.2794123200247822, + "learning_rate": 1.5533492071165046e-06, + "loss": 0.4612278938293457, + "step": 3017 + }, + { + "epoch": 0.6958727230804703, + "grad_norm": 1.1978546028008836, + "learning_rate": 1.5530315955929817e-06, + "loss": 0.40461257100105286, + "step": 3018 + }, + { + "epoch": 0.696103297210053, + "grad_norm": 1.387518032200497, + "learning_rate": 1.5527139036807112e-06, + "loss": 0.5191174745559692, + "step": 3019 + }, + { + "epoch": 0.6963338713396356, + "grad_norm": 1.510370534054042, + "learning_rate": 1.5523961314258731e-06, + "loss": 0.45882558822631836, + "step": 3020 + }, + { + "epoch": 0.6965644454692184, + "grad_norm": 1.230362803290169, + "learning_rate": 1.552078278874659e-06, + "loss": 0.4766819477081299, + "step": 3021 + }, + { + "epoch": 0.696795019598801, + "grad_norm": 1.2822436220739486, + "learning_rate": 1.5517603460732724e-06, + "loss": 0.4572867751121521, + "step": 3022 + }, + { + "epoch": 0.6970255937283837, + "grad_norm": 1.5677891937472022, + "learning_rate": 1.5514423330679272e-06, + "loss": 0.4689183235168457, + "step": 3023 + }, + { + "epoch": 0.6972561678579663, + "grad_norm": 1.18549719550499, + "learning_rate": 1.5511242399048504e-06, + "loss": 0.45769914984703064, + "step": 3024 + }, + { + "epoch": 0.697486741987549, + "grad_norm": 1.3095011770493485, + "learning_rate": 1.5508060666302796e-06, + "loss": 0.47367236018180847, + "step": 3025 + }, + { + "epoch": 0.6977173161171316, + "grad_norm": 1.5441644429162589, + "learning_rate": 1.550487813290465e-06, + "loss": 0.40873080492019653, + "step": 3026 + }, + { + "epoch": 0.6979478902467143, + "grad_norm": 1.2349195465907241, + "learning_rate": 1.5501694799316671e-06, + "loss": 0.42366844415664673, + "step": 3027 + }, + { + "epoch": 0.6981784643762969, + "grad_norm": 1.2587292360565243, + "learning_rate": 1.5498510666001602e-06, + "loss": 0.3133828043937683, + "step": 3028 + }, + { + "epoch": 0.6984090385058797, + "grad_norm": 1.5168032500602213, + "learning_rate": 1.549532573342228e-06, + "loss": 0.5188712477684021, + "step": 3029 + }, + { + "epoch": 0.6986396126354623, + "grad_norm": 1.2707264640547211, + "learning_rate": 1.5492140002041668e-06, + "loss": 0.4374960660934448, + "step": 3030 + }, + { + "epoch": 0.698870186765045, + "grad_norm": 1.6828882278794643, + "learning_rate": 1.5488953472322845e-06, + "loss": 0.5285592079162598, + "step": 3031 + }, + { + "epoch": 0.6991007608946276, + "grad_norm": 1.5111090584536853, + "learning_rate": 1.5485766144729006e-06, + "loss": 0.5331767797470093, + "step": 3032 + }, + { + "epoch": 0.6993313350242103, + "grad_norm": 1.3626863062762309, + "learning_rate": 1.5482578019723462e-06, + "loss": 0.4546147584915161, + "step": 3033 + }, + { + "epoch": 0.6995619091537929, + "grad_norm": 1.2127032724557087, + "learning_rate": 1.5479389097769639e-06, + "loss": 0.47674182057380676, + "step": 3034 + }, + { + "epoch": 0.6997924832833756, + "grad_norm": 1.2042624102453106, + "learning_rate": 1.5476199379331078e-06, + "loss": 0.496138334274292, + "step": 3035 + }, + { + "epoch": 0.7000230574129582, + "grad_norm": 1.367736432364491, + "learning_rate": 1.547300886487144e-06, + "loss": 0.4843756854534149, + "step": 3036 + }, + { + "epoch": 0.7002536315425409, + "grad_norm": 1.5043582093976149, + "learning_rate": 1.5469817554854494e-06, + "loss": 0.6028264760971069, + "step": 3037 + }, + { + "epoch": 0.7004842056721235, + "grad_norm": 1.4959257460685322, + "learning_rate": 1.5466625449744134e-06, + "loss": 0.49528858065605164, + "step": 3038 + }, + { + "epoch": 0.7007147798017063, + "grad_norm": 1.1403876193260207, + "learning_rate": 1.5463432550004358e-06, + "loss": 0.466439425945282, + "step": 3039 + }, + { + "epoch": 0.7009453539312889, + "grad_norm": 1.1012676712945453, + "learning_rate": 1.5460238856099292e-06, + "loss": 0.4196532368659973, + "step": 3040 + }, + { + "epoch": 0.7011759280608716, + "grad_norm": 1.40353983379054, + "learning_rate": 1.5457044368493173e-06, + "loss": 0.47679999470710754, + "step": 3041 + }, + { + "epoch": 0.7014065021904542, + "grad_norm": 1.2594197008827683, + "learning_rate": 1.5453849087650346e-06, + "loss": 0.4368046522140503, + "step": 3042 + }, + { + "epoch": 0.7016370763200369, + "grad_norm": 1.2211703865137815, + "learning_rate": 1.5450653014035285e-06, + "loss": 0.45165273547172546, + "step": 3043 + }, + { + "epoch": 0.7018676504496195, + "grad_norm": 1.1456058151260982, + "learning_rate": 1.5447456148112563e-06, + "loss": 0.44813454151153564, + "step": 3044 + }, + { + "epoch": 0.7020982245792022, + "grad_norm": 1.269275990698592, + "learning_rate": 1.5444258490346882e-06, + "loss": 0.44681504368782043, + "step": 3045 + }, + { + "epoch": 0.7023287987087848, + "grad_norm": 1.3036360811480283, + "learning_rate": 1.5441060041203057e-06, + "loss": 0.44788169860839844, + "step": 3046 + }, + { + "epoch": 0.7025593728383676, + "grad_norm": 1.3232925218771132, + "learning_rate": 1.5437860801146013e-06, + "loss": 0.3754178285598755, + "step": 3047 + }, + { + "epoch": 0.7027899469679502, + "grad_norm": 1.001044690167693, + "learning_rate": 1.5434660770640787e-06, + "loss": 0.3582305908203125, + "step": 3048 + }, + { + "epoch": 0.7030205210975329, + "grad_norm": 1.3449464333610996, + "learning_rate": 1.543145995015254e-06, + "loss": 0.42649000883102417, + "step": 3049 + }, + { + "epoch": 0.7032510952271155, + "grad_norm": 1.2880551855073363, + "learning_rate": 1.5428258340146543e-06, + "loss": 0.5164098143577576, + "step": 3050 + }, + { + "epoch": 0.7034816693566982, + "grad_norm": 1.2456398303270981, + "learning_rate": 1.5425055941088181e-06, + "loss": 0.4193584620952606, + "step": 3051 + }, + { + "epoch": 0.7037122434862808, + "grad_norm": 1.3825374305431077, + "learning_rate": 1.5421852753442957e-06, + "loss": 0.5230807662010193, + "step": 3052 + }, + { + "epoch": 0.7039428176158635, + "grad_norm": 1.466681367301644, + "learning_rate": 1.5418648777676488e-06, + "loss": 0.4573478102684021, + "step": 3053 + }, + { + "epoch": 0.7041733917454461, + "grad_norm": 1.1343088214156583, + "learning_rate": 1.5415444014254503e-06, + "loss": 0.47031426429748535, + "step": 3054 + }, + { + "epoch": 0.7044039658750288, + "grad_norm": 1.3599997528041683, + "learning_rate": 1.5412238463642844e-06, + "loss": 0.4499198794364929, + "step": 3055 + }, + { + "epoch": 0.7046345400046115, + "grad_norm": 1.4014132343100743, + "learning_rate": 1.5409032126307477e-06, + "loss": 0.4775800406932831, + "step": 3056 + }, + { + "epoch": 0.7048651141341942, + "grad_norm": 1.4264420683743835, + "learning_rate": 1.540582500271447e-06, + "loss": 0.535969614982605, + "step": 3057 + }, + { + "epoch": 0.7050956882637768, + "grad_norm": 1.3808494199198469, + "learning_rate": 1.5402617093330013e-06, + "loss": 0.5358741283416748, + "step": 3058 + }, + { + "epoch": 0.7053262623933595, + "grad_norm": 1.2492824573732915, + "learning_rate": 1.5399408398620406e-06, + "loss": 0.5392765998840332, + "step": 3059 + }, + { + "epoch": 0.7055568365229421, + "grad_norm": 1.275809486426879, + "learning_rate": 1.5396198919052066e-06, + "loss": 0.47976016998291016, + "step": 3060 + }, + { + "epoch": 0.7057874106525248, + "grad_norm": 1.2226120465526635, + "learning_rate": 1.5392988655091526e-06, + "loss": 0.39919328689575195, + "step": 3061 + }, + { + "epoch": 0.7060179847821074, + "grad_norm": 1.6011371731611943, + "learning_rate": 1.538977760720543e-06, + "loss": 0.4503553509712219, + "step": 3062 + }, + { + "epoch": 0.7062485589116901, + "grad_norm": 1.2363983734925073, + "learning_rate": 1.5386565775860531e-06, + "loss": 0.4570388197898865, + "step": 3063 + }, + { + "epoch": 0.7064791330412727, + "grad_norm": 1.2640125065615475, + "learning_rate": 1.5383353161523706e-06, + "loss": 0.54588782787323, + "step": 3064 + }, + { + "epoch": 0.7067097071708555, + "grad_norm": 1.3495245665399438, + "learning_rate": 1.5380139764661945e-06, + "loss": 0.40369170904159546, + "step": 3065 + }, + { + "epoch": 0.7069402813004381, + "grad_norm": 1.40505470554238, + "learning_rate": 1.5376925585742341e-06, + "loss": 0.5079206228256226, + "step": 3066 + }, + { + "epoch": 0.7071708554300208, + "grad_norm": 1.2407138703812135, + "learning_rate": 1.5373710625232107e-06, + "loss": 0.41418159008026123, + "step": 3067 + }, + { + "epoch": 0.7074014295596034, + "grad_norm": 1.2523103492462024, + "learning_rate": 1.5370494883598575e-06, + "loss": 0.4546199142932892, + "step": 3068 + }, + { + "epoch": 0.7076320036891861, + "grad_norm": 1.1794904786936184, + "learning_rate": 1.5367278361309183e-06, + "loss": 0.48041367530822754, + "step": 3069 + }, + { + "epoch": 0.7078625778187687, + "grad_norm": 1.3468711432386478, + "learning_rate": 1.5364061058831486e-06, + "loss": 0.47676384449005127, + "step": 3070 + }, + { + "epoch": 0.7080931519483514, + "grad_norm": 1.1888236379295274, + "learning_rate": 1.5360842976633148e-06, + "loss": 0.47341692447662354, + "step": 3071 + }, + { + "epoch": 0.708323726077934, + "grad_norm": 1.3227579498868685, + "learning_rate": 1.5357624115181956e-06, + "loss": 0.38436269760131836, + "step": 3072 + }, + { + "epoch": 0.7085543002075168, + "grad_norm": 1.4827200040386144, + "learning_rate": 1.5354404474945798e-06, + "loss": 0.5369806289672852, + "step": 3073 + }, + { + "epoch": 0.7087848743370994, + "grad_norm": 1.404704151375413, + "learning_rate": 1.535118405639269e-06, + "loss": 0.5314677953720093, + "step": 3074 + }, + { + "epoch": 0.7090154484666821, + "grad_norm": 1.1927563297298747, + "learning_rate": 1.5347962859990742e-06, + "loss": 0.49233007431030273, + "step": 3075 + }, + { + "epoch": 0.7092460225962647, + "grad_norm": 1.3477590726762334, + "learning_rate": 1.5344740886208194e-06, + "loss": 0.4834766983985901, + "step": 3076 + }, + { + "epoch": 0.7094765967258474, + "grad_norm": 1.432138793969477, + "learning_rate": 1.534151813551339e-06, + "loss": 0.505670428276062, + "step": 3077 + }, + { + "epoch": 0.70970717085543, + "grad_norm": 1.3290190812046396, + "learning_rate": 1.533829460837479e-06, + "loss": 0.5256010293960571, + "step": 3078 + }, + { + "epoch": 0.7099377449850127, + "grad_norm": 1.463108893430833, + "learning_rate": 1.5335070305260967e-06, + "loss": 0.4186098873615265, + "step": 3079 + }, + { + "epoch": 0.7101683191145953, + "grad_norm": 1.2048981968166306, + "learning_rate": 1.5331845226640607e-06, + "loss": 0.4034464359283447, + "step": 3080 + }, + { + "epoch": 0.710398893244178, + "grad_norm": 1.346673761335588, + "learning_rate": 1.5328619372982505e-06, + "loss": 0.4521537721157074, + "step": 3081 + }, + { + "epoch": 0.7106294673737606, + "grad_norm": 1.5250190734837208, + "learning_rate": 1.5325392744755574e-06, + "loss": 0.4919602572917938, + "step": 3082 + }, + { + "epoch": 0.7108600415033434, + "grad_norm": 1.1734195700346683, + "learning_rate": 1.5322165342428835e-06, + "loss": 0.4464415907859802, + "step": 3083 + }, + { + "epoch": 0.711090615632926, + "grad_norm": 1.2610549525832775, + "learning_rate": 1.5318937166471427e-06, + "loss": 0.47444385290145874, + "step": 3084 + }, + { + "epoch": 0.7113211897625087, + "grad_norm": 1.1782687896584645, + "learning_rate": 1.5315708217352595e-06, + "loss": 0.4014730453491211, + "step": 3085 + }, + { + "epoch": 0.7115517638920913, + "grad_norm": 1.1806273152667501, + "learning_rate": 1.5312478495541703e-06, + "loss": 0.4528852701187134, + "step": 3086 + }, + { + "epoch": 0.711782338021674, + "grad_norm": 1.4716504682159035, + "learning_rate": 1.5309248001508216e-06, + "loss": 0.4919637441635132, + "step": 3087 + }, + { + "epoch": 0.7120129121512566, + "grad_norm": 1.3824738486934829, + "learning_rate": 1.530601673572173e-06, + "loss": 0.5630985498428345, + "step": 3088 + }, + { + "epoch": 0.7122434862808393, + "grad_norm": 1.4462966182250279, + "learning_rate": 1.5302784698651935e-06, + "loss": 0.3920522630214691, + "step": 3089 + }, + { + "epoch": 0.7124740604104219, + "grad_norm": 1.3282823423467587, + "learning_rate": 1.5299551890768642e-06, + "loss": 0.5502145290374756, + "step": 3090 + }, + { + "epoch": 0.7127046345400047, + "grad_norm": 1.2547204060730106, + "learning_rate": 1.5296318312541767e-06, + "loss": 0.4839448928833008, + "step": 3091 + }, + { + "epoch": 0.7129352086695873, + "grad_norm": 1.3486430423834108, + "learning_rate": 1.5293083964441355e-06, + "loss": 0.5029735565185547, + "step": 3092 + }, + { + "epoch": 0.71316578279917, + "grad_norm": 1.2299483009823662, + "learning_rate": 1.5289848846937544e-06, + "loss": 0.4724803566932678, + "step": 3093 + }, + { + "epoch": 0.7133963569287526, + "grad_norm": 1.1015042263762262, + "learning_rate": 1.528661296050059e-06, + "loss": 0.4609840512275696, + "step": 3094 + }, + { + "epoch": 0.7136269310583353, + "grad_norm": 1.4829248198628113, + "learning_rate": 1.5283376305600863e-06, + "loss": 0.49763959646224976, + "step": 3095 + }, + { + "epoch": 0.7138575051879179, + "grad_norm": 1.2090810088725865, + "learning_rate": 1.5280138882708847e-06, + "loss": 0.42384523153305054, + "step": 3096 + }, + { + "epoch": 0.7140880793175006, + "grad_norm": 1.3550047979469209, + "learning_rate": 1.5276900692295134e-06, + "loss": 0.5034611225128174, + "step": 3097 + }, + { + "epoch": 0.7143186534470832, + "grad_norm": 1.3321189275554508, + "learning_rate": 1.5273661734830423e-06, + "loss": 0.5617417097091675, + "step": 3098 + }, + { + "epoch": 0.714549227576666, + "grad_norm": 1.320340684589947, + "learning_rate": 1.527042201078553e-06, + "loss": 0.4562014937400818, + "step": 3099 + }, + { + "epoch": 0.7147798017062486, + "grad_norm": 1.6932438225785027, + "learning_rate": 1.5267181520631386e-06, + "loss": 0.5626288056373596, + "step": 3100 + }, + { + "epoch": 0.7150103758358313, + "grad_norm": 1.4526784651389733, + "learning_rate": 1.5263940264839028e-06, + "loss": 0.4882054924964905, + "step": 3101 + }, + { + "epoch": 0.7152409499654139, + "grad_norm": 1.523666745804484, + "learning_rate": 1.5260698243879603e-06, + "loss": 0.5371058583259583, + "step": 3102 + }, + { + "epoch": 0.7154715240949966, + "grad_norm": 1.1599798656247362, + "learning_rate": 1.5257455458224368e-06, + "loss": 0.4683259129524231, + "step": 3103 + }, + { + "epoch": 0.7157020982245792, + "grad_norm": 1.223986374608111, + "learning_rate": 1.5254211908344704e-06, + "loss": 0.4894726872444153, + "step": 3104 + }, + { + "epoch": 0.7159326723541619, + "grad_norm": 1.3226351110788483, + "learning_rate": 1.5250967594712089e-06, + "loss": 0.4517880082130432, + "step": 3105 + }, + { + "epoch": 0.7161632464837445, + "grad_norm": 1.162528176566508, + "learning_rate": 1.5247722517798118e-06, + "loss": 0.5062767267227173, + "step": 3106 + }, + { + "epoch": 0.7163938206133272, + "grad_norm": 1.6349408984878264, + "learning_rate": 1.5244476678074494e-06, + "loss": 0.5029302835464478, + "step": 3107 + }, + { + "epoch": 0.7166243947429098, + "grad_norm": 1.3765367207185526, + "learning_rate": 1.5241230076013035e-06, + "loss": 0.44112175703048706, + "step": 3108 + }, + { + "epoch": 0.7168549688724926, + "grad_norm": 1.3847966627377115, + "learning_rate": 1.5237982712085665e-06, + "loss": 0.43693509697914124, + "step": 3109 + }, + { + "epoch": 0.7170855430020752, + "grad_norm": 1.3509946026255297, + "learning_rate": 1.5234734586764422e-06, + "loss": 0.4544166922569275, + "step": 3110 + }, + { + "epoch": 0.7173161171316579, + "grad_norm": 1.1949924477500942, + "learning_rate": 1.5231485700521451e-06, + "loss": 0.5470178127288818, + "step": 3111 + }, + { + "epoch": 0.7175466912612405, + "grad_norm": 1.5007057362656466, + "learning_rate": 1.5228236053829017e-06, + "loss": 0.5215972065925598, + "step": 3112 + }, + { + "epoch": 0.7177772653908232, + "grad_norm": 1.1400006826022246, + "learning_rate": 1.5224985647159488e-06, + "loss": 0.3922381103038788, + "step": 3113 + }, + { + "epoch": 0.7180078395204058, + "grad_norm": 1.3432802481675237, + "learning_rate": 1.5221734480985341e-06, + "loss": 0.47455158829689026, + "step": 3114 + }, + { + "epoch": 0.7182384136499884, + "grad_norm": 1.517078162476979, + "learning_rate": 1.5218482555779164e-06, + "loss": 0.5776175260543823, + "step": 3115 + }, + { + "epoch": 0.7184689877795711, + "grad_norm": 1.4757174936390305, + "learning_rate": 1.521522987201366e-06, + "loss": 0.40414175391197205, + "step": 3116 + }, + { + "epoch": 0.7186995619091537, + "grad_norm": 1.5441693701407133, + "learning_rate": 1.5211976430161643e-06, + "loss": 0.44597384333610535, + "step": 3117 + }, + { + "epoch": 0.7189301360387365, + "grad_norm": 1.6495022083145716, + "learning_rate": 1.5208722230696024e-06, + "loss": 0.50276118516922, + "step": 3118 + }, + { + "epoch": 0.7191607101683191, + "grad_norm": 1.255966386168249, + "learning_rate": 1.5205467274089844e-06, + "loss": 0.43281811475753784, + "step": 3119 + }, + { + "epoch": 0.7193912842979018, + "grad_norm": 1.196003407991791, + "learning_rate": 1.5202211560816243e-06, + "loss": 0.3796764016151428, + "step": 3120 + }, + { + "epoch": 0.7196218584274844, + "grad_norm": 1.1855608567240021, + "learning_rate": 1.5198955091348463e-06, + "loss": 0.47820231318473816, + "step": 3121 + }, + { + "epoch": 0.7198524325570671, + "grad_norm": 1.3809241508956476, + "learning_rate": 1.5195697866159875e-06, + "loss": 0.4737284779548645, + "step": 3122 + }, + { + "epoch": 0.7200830066866497, + "grad_norm": 1.3019928778593748, + "learning_rate": 1.519243988572394e-06, + "loss": 0.44652169942855835, + "step": 3123 + }, + { + "epoch": 0.7203135808162324, + "grad_norm": 1.0393403987452434, + "learning_rate": 1.518918115051425e-06, + "loss": 0.42702072858810425, + "step": 3124 + }, + { + "epoch": 0.720544154945815, + "grad_norm": 1.3835329760109338, + "learning_rate": 1.5185921661004483e-06, + "loss": 0.5003541707992554, + "step": 3125 + }, + { + "epoch": 0.7207747290753977, + "grad_norm": 1.3444035589789487, + "learning_rate": 1.518266141766845e-06, + "loss": 0.5045102834701538, + "step": 3126 + }, + { + "epoch": 0.7210053032049804, + "grad_norm": 1.3069630488439725, + "learning_rate": 1.5179400420980052e-06, + "loss": 0.46619412302970886, + "step": 3127 + }, + { + "epoch": 0.7212358773345631, + "grad_norm": 1.7755918931491346, + "learning_rate": 1.5176138671413314e-06, + "loss": 0.5006855726242065, + "step": 3128 + }, + { + "epoch": 0.7214664514641457, + "grad_norm": 1.4202077937995432, + "learning_rate": 1.5172876169442362e-06, + "loss": 0.4394634962081909, + "step": 3129 + }, + { + "epoch": 0.7216970255937284, + "grad_norm": 1.203576429459206, + "learning_rate": 1.5169612915541428e-06, + "loss": 0.49311593174934387, + "step": 3130 + }, + { + "epoch": 0.721927599723311, + "grad_norm": 1.2610358507024448, + "learning_rate": 1.5166348910184868e-06, + "loss": 0.38406768441200256, + "step": 3131 + }, + { + "epoch": 0.7221581738528937, + "grad_norm": 1.52088025341024, + "learning_rate": 1.5163084153847132e-06, + "loss": 0.547613799571991, + "step": 3132 + }, + { + "epoch": 0.7223887479824763, + "grad_norm": 1.4599825671580298, + "learning_rate": 1.515981864700279e-06, + "loss": 0.43875589966773987, + "step": 3133 + }, + { + "epoch": 0.722619322112059, + "grad_norm": 1.3276172293945816, + "learning_rate": 1.5156552390126516e-06, + "loss": 0.41515982151031494, + "step": 3134 + }, + { + "epoch": 0.7228498962416416, + "grad_norm": 1.400170522869638, + "learning_rate": 1.5153285383693088e-06, + "loss": 0.43297481536865234, + "step": 3135 + }, + { + "epoch": 0.7230804703712244, + "grad_norm": 1.3346402467183769, + "learning_rate": 1.5150017628177408e-06, + "loss": 0.5059916377067566, + "step": 3136 + }, + { + "epoch": 0.723311044500807, + "grad_norm": 1.4474439218451525, + "learning_rate": 1.514674912405447e-06, + "loss": 0.4776325225830078, + "step": 3137 + }, + { + "epoch": 0.7235416186303897, + "grad_norm": 1.4332410620248028, + "learning_rate": 1.5143479871799381e-06, + "loss": 0.4925272464752197, + "step": 3138 + }, + { + "epoch": 0.7237721927599723, + "grad_norm": 0.9806444224416654, + "learning_rate": 1.5140209871887368e-06, + "loss": 0.3825960159301758, + "step": 3139 + }, + { + "epoch": 0.724002766889555, + "grad_norm": 1.811554812935443, + "learning_rate": 1.513693912479376e-06, + "loss": 0.5582098960876465, + "step": 3140 + }, + { + "epoch": 0.7242333410191376, + "grad_norm": 1.4229587145535472, + "learning_rate": 1.5133667630993983e-06, + "loss": 0.4079757630825043, + "step": 3141 + }, + { + "epoch": 0.7244639151487203, + "grad_norm": 1.3307764336864334, + "learning_rate": 1.513039539096359e-06, + "loss": 0.4996449947357178, + "step": 3142 + }, + { + "epoch": 0.7246944892783029, + "grad_norm": 1.2360600034220603, + "learning_rate": 1.5127122405178233e-06, + "loss": 0.4822157323360443, + "step": 3143 + }, + { + "epoch": 0.7249250634078857, + "grad_norm": 1.2687974509229507, + "learning_rate": 1.512384867411367e-06, + "loss": 0.43123728036880493, + "step": 3144 + }, + { + "epoch": 0.7251556375374683, + "grad_norm": 1.2723246094506335, + "learning_rate": 1.5120574198245776e-06, + "loss": 0.4942808151245117, + "step": 3145 + }, + { + "epoch": 0.725386211667051, + "grad_norm": 1.1117112525626116, + "learning_rate": 1.5117298978050525e-06, + "loss": 0.49165093898773193, + "step": 3146 + }, + { + "epoch": 0.7256167857966336, + "grad_norm": 1.2668452294382095, + "learning_rate": 1.5114023014004008e-06, + "loss": 0.4700804352760315, + "step": 3147 + }, + { + "epoch": 0.7258473599262163, + "grad_norm": 1.9638712043686382, + "learning_rate": 1.5110746306582413e-06, + "loss": 0.4703143835067749, + "step": 3148 + }, + { + "epoch": 0.7260779340557989, + "grad_norm": 1.2418379131661055, + "learning_rate": 1.5107468856262048e-06, + "loss": 0.47312211990356445, + "step": 3149 + }, + { + "epoch": 0.7263085081853816, + "grad_norm": 1.3558937860977873, + "learning_rate": 1.5104190663519323e-06, + "loss": 0.49607813358306885, + "step": 3150 + }, + { + "epoch": 0.7265390823149642, + "grad_norm": 1.2747447528869889, + "learning_rate": 1.5100911728830754e-06, + "loss": 0.4401499629020691, + "step": 3151 + }, + { + "epoch": 0.7267696564445469, + "grad_norm": 1.3050498169083122, + "learning_rate": 1.5097632052672973e-06, + "loss": 0.4979579448699951, + "step": 3152 + }, + { + "epoch": 0.7270002305741295, + "grad_norm": 1.1477032098667286, + "learning_rate": 1.5094351635522706e-06, + "loss": 0.42917048931121826, + "step": 3153 + }, + { + "epoch": 0.7272308047037123, + "grad_norm": 1.2688450847611672, + "learning_rate": 1.50910704778568e-06, + "loss": 0.41664260625839233, + "step": 3154 + }, + { + "epoch": 0.7274613788332949, + "grad_norm": 1.4083630490412662, + "learning_rate": 1.5087788580152206e-06, + "loss": 0.5000253915786743, + "step": 3155 + }, + { + "epoch": 0.7276919529628776, + "grad_norm": 1.2424572303309531, + "learning_rate": 1.5084505942885976e-06, + "loss": 0.5075093507766724, + "step": 3156 + }, + { + "epoch": 0.7279225270924602, + "grad_norm": 1.319578470826436, + "learning_rate": 1.508122256653528e-06, + "loss": 0.44975680112838745, + "step": 3157 + }, + { + "epoch": 0.7281531012220429, + "grad_norm": 1.1450711263341298, + "learning_rate": 1.5077938451577383e-06, + "loss": 0.44494926929473877, + "step": 3158 + }, + { + "epoch": 0.7283836753516255, + "grad_norm": 1.3333716905743178, + "learning_rate": 1.5074653598489673e-06, + "loss": 0.5664352178573608, + "step": 3159 + }, + { + "epoch": 0.7286142494812082, + "grad_norm": 1.1840094617058035, + "learning_rate": 1.507136800774963e-06, + "loss": 0.5694705247879028, + "step": 3160 + }, + { + "epoch": 0.7288448236107908, + "grad_norm": 1.5658434570152957, + "learning_rate": 1.506808167983485e-06, + "loss": 0.5121151804924011, + "step": 3161 + }, + { + "epoch": 0.7290753977403736, + "grad_norm": 1.3559529766390859, + "learning_rate": 1.5064794615223034e-06, + "loss": 0.45935380458831787, + "step": 3162 + }, + { + "epoch": 0.7293059718699562, + "grad_norm": 1.2036749528520703, + "learning_rate": 1.506150681439199e-06, + "loss": 0.517521858215332, + "step": 3163 + }, + { + "epoch": 0.7295365459995389, + "grad_norm": 1.271352713883254, + "learning_rate": 1.5058218277819638e-06, + "loss": 0.5078546404838562, + "step": 3164 + }, + { + "epoch": 0.7297671201291215, + "grad_norm": 1.4877111530715366, + "learning_rate": 1.5054929005983992e-06, + "loss": 0.47892552614212036, + "step": 3165 + }, + { + "epoch": 0.7299976942587042, + "grad_norm": 1.5569470487033794, + "learning_rate": 1.5051638999363185e-06, + "loss": 0.48825597763061523, + "step": 3166 + }, + { + "epoch": 0.7302282683882868, + "grad_norm": 1.2181600327145499, + "learning_rate": 1.5048348258435457e-06, + "loss": 0.488031804561615, + "step": 3167 + }, + { + "epoch": 0.7304588425178695, + "grad_norm": 1.178638754387744, + "learning_rate": 1.5045056783679143e-06, + "loss": 0.4669504761695862, + "step": 3168 + }, + { + "epoch": 0.7306894166474521, + "grad_norm": 1.364305786110939, + "learning_rate": 1.5041764575572695e-06, + "loss": 0.45620614290237427, + "step": 3169 + }, + { + "epoch": 0.7309199907770348, + "grad_norm": 1.4607481202185084, + "learning_rate": 1.5038471634594667e-06, + "loss": 0.4271177649497986, + "step": 3170 + }, + { + "epoch": 0.7311505649066175, + "grad_norm": 1.4441980354968733, + "learning_rate": 1.5035177961223726e-06, + "loss": 0.5170531272888184, + "step": 3171 + }, + { + "epoch": 0.7313811390362002, + "grad_norm": 1.046719642579895, + "learning_rate": 1.5031883555938638e-06, + "loss": 0.4261493682861328, + "step": 3172 + }, + { + "epoch": 0.7316117131657828, + "grad_norm": 1.4357281868096983, + "learning_rate": 1.502858841921828e-06, + "loss": 0.4958994686603546, + "step": 3173 + }, + { + "epoch": 0.7318422872953655, + "grad_norm": 1.631538220078115, + "learning_rate": 1.502529255154163e-06, + "loss": 0.49798572063446045, + "step": 3174 + }, + { + "epoch": 0.7320728614249481, + "grad_norm": 1.3524076496726538, + "learning_rate": 1.502199595338778e-06, + "loss": 0.4067850708961487, + "step": 3175 + }, + { + "epoch": 0.7323034355545308, + "grad_norm": 1.2000506588677564, + "learning_rate": 1.5018698625235916e-06, + "loss": 0.4680994153022766, + "step": 3176 + }, + { + "epoch": 0.7325340096841134, + "grad_norm": 1.3054261583860276, + "learning_rate": 1.501540056756535e-06, + "loss": 0.49181580543518066, + "step": 3177 + }, + { + "epoch": 0.7327645838136961, + "grad_norm": 1.485479754545564, + "learning_rate": 1.501210178085548e-06, + "loss": 0.5425546169281006, + "step": 3178 + }, + { + "epoch": 0.7329951579432787, + "grad_norm": 1.1514309763496005, + "learning_rate": 1.500880226558582e-06, + "loss": 0.4869355261325836, + "step": 3179 + }, + { + "epoch": 0.7332257320728615, + "grad_norm": 1.5737536993523387, + "learning_rate": 1.500550202223599e-06, + "loss": 0.5157885551452637, + "step": 3180 + }, + { + "epoch": 0.7334563062024441, + "grad_norm": 1.4471157017235972, + "learning_rate": 1.5002201051285707e-06, + "loss": 0.528350293636322, + "step": 3181 + }, + { + "epoch": 0.7336868803320268, + "grad_norm": 1.0924579051997452, + "learning_rate": 1.499889935321481e-06, + "loss": 0.3963279128074646, + "step": 3182 + }, + { + "epoch": 0.7339174544616094, + "grad_norm": 1.0536411378011648, + "learning_rate": 1.499559692850323e-06, + "loss": 0.36777108907699585, + "step": 3183 + }, + { + "epoch": 0.7341480285911921, + "grad_norm": 1.3572066258310391, + "learning_rate": 1.4992293777631004e-06, + "loss": 0.4592905044555664, + "step": 3184 + }, + { + "epoch": 0.7343786027207747, + "grad_norm": 1.3801194879873266, + "learning_rate": 1.4988989901078285e-06, + "loss": 0.458257257938385, + "step": 3185 + }, + { + "epoch": 0.7346091768503574, + "grad_norm": 1.2823442631336313, + "learning_rate": 1.4985685299325316e-06, + "loss": 0.4844989478588104, + "step": 3186 + }, + { + "epoch": 0.73483975097994, + "grad_norm": 1.3019212093413413, + "learning_rate": 1.498237997285247e-06, + "loss": 0.381417453289032, + "step": 3187 + }, + { + "epoch": 0.7350703251095227, + "grad_norm": 1.267517645310936, + "learning_rate": 1.4979073922140196e-06, + "loss": 0.42452555894851685, + "step": 3188 + }, + { + "epoch": 0.7353008992391054, + "grad_norm": 1.2143530957836637, + "learning_rate": 1.4975767147669063e-06, + "loss": 0.4660685956478119, + "step": 3189 + }, + { + "epoch": 0.7355314733686881, + "grad_norm": 1.243568614271109, + "learning_rate": 1.4972459649919748e-06, + "loss": 0.4332653880119324, + "step": 3190 + }, + { + "epoch": 0.7357620474982707, + "grad_norm": 1.4818958085574696, + "learning_rate": 1.496915142937303e-06, + "loss": 0.5580132007598877, + "step": 3191 + }, + { + "epoch": 0.7359926216278534, + "grad_norm": 1.102415574688255, + "learning_rate": 1.4965842486509792e-06, + "loss": 0.43711793422698975, + "step": 3192 + }, + { + "epoch": 0.736223195757436, + "grad_norm": 1.1786805187530485, + "learning_rate": 1.496253282181102e-06, + "loss": 0.44969767332077026, + "step": 3193 + }, + { + "epoch": 0.7364537698870187, + "grad_norm": 1.5017804708887366, + "learning_rate": 1.4959222435757809e-06, + "loss": 0.5288668870925903, + "step": 3194 + }, + { + "epoch": 0.7366843440166013, + "grad_norm": 1.2442315862489326, + "learning_rate": 1.4955911328831353e-06, + "loss": 0.45993220806121826, + "step": 3195 + }, + { + "epoch": 0.736914918146184, + "grad_norm": 1.6618645292728147, + "learning_rate": 1.4952599501512963e-06, + "loss": 0.5360512733459473, + "step": 3196 + }, + { + "epoch": 0.7371454922757666, + "grad_norm": 1.2833906478614454, + "learning_rate": 1.4949286954284044e-06, + "loss": 0.3923282325267792, + "step": 3197 + }, + { + "epoch": 0.7373760664053494, + "grad_norm": 1.2830570803742403, + "learning_rate": 1.4945973687626103e-06, + "loss": 0.5051449537277222, + "step": 3198 + }, + { + "epoch": 0.737606640534932, + "grad_norm": 1.288727241344276, + "learning_rate": 1.4942659702020763e-06, + "loss": 0.5035187602043152, + "step": 3199 + }, + { + "epoch": 0.7378372146645147, + "grad_norm": 1.1929311231536464, + "learning_rate": 1.4939344997949742e-06, + "loss": 0.4922195076942444, + "step": 3200 + }, + { + "epoch": 0.7380677887940973, + "grad_norm": 1.1654414900260779, + "learning_rate": 1.4936029575894865e-06, + "loss": 0.49664247035980225, + "step": 3201 + }, + { + "epoch": 0.73829836292368, + "grad_norm": 1.2090144084254086, + "learning_rate": 1.4932713436338065e-06, + "loss": 0.4240155816078186, + "step": 3202 + }, + { + "epoch": 0.7385289370532626, + "grad_norm": 1.150655085488804, + "learning_rate": 1.4929396579761376e-06, + "loss": 0.3830781579017639, + "step": 3203 + }, + { + "epoch": 0.7387595111828453, + "grad_norm": 1.2626520886498587, + "learning_rate": 1.4926079006646936e-06, + "loss": 0.37983447313308716, + "step": 3204 + }, + { + "epoch": 0.7389900853124279, + "grad_norm": 1.37294258180721, + "learning_rate": 1.4922760717476989e-06, + "loss": 0.4680769443511963, + "step": 3205 + }, + { + "epoch": 0.7392206594420107, + "grad_norm": 1.0992782157194299, + "learning_rate": 1.4919441712733878e-06, + "loss": 0.3801664710044861, + "step": 3206 + }, + { + "epoch": 0.7394512335715933, + "grad_norm": 1.2101909370157682, + "learning_rate": 1.4916121992900062e-06, + "loss": 0.5506627559661865, + "step": 3207 + }, + { + "epoch": 0.739681807701176, + "grad_norm": 1.4326210599966231, + "learning_rate": 1.4912801558458087e-06, + "loss": 0.4976215660572052, + "step": 3208 + }, + { + "epoch": 0.7399123818307586, + "grad_norm": 1.269851030633043, + "learning_rate": 1.4909480409890615e-06, + "loss": 0.42806485295295715, + "step": 3209 + }, + { + "epoch": 0.7401429559603413, + "grad_norm": 1.5738327378318604, + "learning_rate": 1.4906158547680413e-06, + "loss": 0.3850712180137634, + "step": 3210 + }, + { + "epoch": 0.7403735300899239, + "grad_norm": 1.1706966056418486, + "learning_rate": 1.4902835972310342e-06, + "loss": 0.4356945753097534, + "step": 3211 + }, + { + "epoch": 0.7406041042195066, + "grad_norm": 1.3196733008465567, + "learning_rate": 1.4899512684263373e-06, + "loss": 0.4806904196739197, + "step": 3212 + }, + { + "epoch": 0.7408346783490892, + "grad_norm": 1.6634902313002624, + "learning_rate": 1.489618868402258e-06, + "loss": 0.544597327709198, + "step": 3213 + }, + { + "epoch": 0.7410652524786719, + "grad_norm": 1.2400106880376924, + "learning_rate": 1.4892863972071141e-06, + "loss": 0.39847469329833984, + "step": 3214 + }, + { + "epoch": 0.7412958266082545, + "grad_norm": 1.165782132875825, + "learning_rate": 1.4889538548892336e-06, + "loss": 0.4959847331047058, + "step": 3215 + }, + { + "epoch": 0.7415264007378373, + "grad_norm": 1.1727701470106202, + "learning_rate": 1.488621241496955e-06, + "loss": 0.3839089870452881, + "step": 3216 + }, + { + "epoch": 0.7417569748674199, + "grad_norm": 1.4119004491894294, + "learning_rate": 1.4882885570786266e-06, + "loss": 0.5187599658966064, + "step": 3217 + }, + { + "epoch": 0.7419875489970026, + "grad_norm": 1.1715648701346035, + "learning_rate": 1.4879558016826082e-06, + "loss": 0.45735663175582886, + "step": 3218 + }, + { + "epoch": 0.7422181231265852, + "grad_norm": 1.2093385209256575, + "learning_rate": 1.4876229753572687e-06, + "loss": 0.5635267496109009, + "step": 3219 + }, + { + "epoch": 0.7424486972561679, + "grad_norm": 1.5737635031230153, + "learning_rate": 1.4872900781509876e-06, + "loss": 0.5255833268165588, + "step": 3220 + }, + { + "epoch": 0.7426792713857505, + "grad_norm": 1.3608013352784492, + "learning_rate": 1.486957110112155e-06, + "loss": 0.4563497304916382, + "step": 3221 + }, + { + "epoch": 0.7429098455153332, + "grad_norm": 1.2494840959741684, + "learning_rate": 1.4866240712891714e-06, + "loss": 0.3737669885158539, + "step": 3222 + }, + { + "epoch": 0.7431404196449158, + "grad_norm": 1.3341042787752078, + "learning_rate": 1.4862909617304473e-06, + "loss": 0.48965659737586975, + "step": 3223 + }, + { + "epoch": 0.7433709937744986, + "grad_norm": 1.138792861067833, + "learning_rate": 1.4859577814844036e-06, + "loss": 0.40867483615875244, + "step": 3224 + }, + { + "epoch": 0.7436015679040812, + "grad_norm": 1.6873709244395776, + "learning_rate": 1.4856245305994711e-06, + "loss": 0.5870566368103027, + "step": 3225 + }, + { + "epoch": 0.7438321420336638, + "grad_norm": 1.9479920905112817, + "learning_rate": 1.4852912091240914e-06, + "loss": 0.5424025654792786, + "step": 3226 + }, + { + "epoch": 0.7440627161632465, + "grad_norm": 1.3117337551828157, + "learning_rate": 1.4849578171067166e-06, + "loss": 0.5305285453796387, + "step": 3227 + }, + { + "epoch": 0.7442932902928291, + "grad_norm": 1.6524409541791285, + "learning_rate": 1.4846243545958078e-06, + "loss": 0.4189227819442749, + "step": 3228 + }, + { + "epoch": 0.7445238644224118, + "grad_norm": 1.3163917938675591, + "learning_rate": 1.4842908216398379e-06, + "loss": 0.44568121433258057, + "step": 3229 + }, + { + "epoch": 0.7447544385519944, + "grad_norm": 1.57546318763007, + "learning_rate": 1.4839572182872883e-06, + "loss": 0.5177523493766785, + "step": 3230 + }, + { + "epoch": 0.7449850126815771, + "grad_norm": 2.0231485633083213, + "learning_rate": 1.4836235445866528e-06, + "loss": 0.5100630521774292, + "step": 3231 + }, + { + "epoch": 0.7452155868111597, + "grad_norm": 1.2988766977840327, + "learning_rate": 1.4832898005864336e-06, + "loss": 0.45731791853904724, + "step": 3232 + }, + { + "epoch": 0.7454461609407425, + "grad_norm": 1.4418312758556044, + "learning_rate": 1.4829559863351437e-06, + "loss": 0.5161736011505127, + "step": 3233 + }, + { + "epoch": 0.7456767350703251, + "grad_norm": 1.2131599613200943, + "learning_rate": 1.4826221018813067e-06, + "loss": 0.4778611660003662, + "step": 3234 + }, + { + "epoch": 0.7459073091999078, + "grad_norm": 1.208766404583587, + "learning_rate": 1.482288147273456e-06, + "loss": 0.467506468296051, + "step": 3235 + }, + { + "epoch": 0.7461378833294904, + "grad_norm": 1.3564852786094337, + "learning_rate": 1.4819541225601352e-06, + "loss": 0.5061084032058716, + "step": 3236 + }, + { + "epoch": 0.7463684574590731, + "grad_norm": 1.3693293129226278, + "learning_rate": 1.4816200277898983e-06, + "loss": 0.5066365599632263, + "step": 3237 + }, + { + "epoch": 0.7465990315886557, + "grad_norm": 1.2091939411250054, + "learning_rate": 1.4812858630113093e-06, + "loss": 0.44285398721694946, + "step": 3238 + }, + { + "epoch": 0.7468296057182384, + "grad_norm": 1.3395886619598594, + "learning_rate": 1.4809516282729426e-06, + "loss": 0.5325936079025269, + "step": 3239 + }, + { + "epoch": 0.747060179847821, + "grad_norm": 1.2575363206535257, + "learning_rate": 1.4806173236233818e-06, + "loss": 0.37296950817108154, + "step": 3240 + }, + { + "epoch": 0.7472907539774037, + "grad_norm": 1.3466058050144787, + "learning_rate": 1.4802829491112228e-06, + "loss": 0.4596887230873108, + "step": 3241 + }, + { + "epoch": 0.7475213281069863, + "grad_norm": 1.4791727382559166, + "learning_rate": 1.4799485047850693e-06, + "loss": 0.4344385266304016, + "step": 3242 + }, + { + "epoch": 0.7477519022365691, + "grad_norm": 1.235031250671636, + "learning_rate": 1.4796139906935365e-06, + "loss": 0.458631306886673, + "step": 3243 + }, + { + "epoch": 0.7479824763661517, + "grad_norm": 1.3676048590005543, + "learning_rate": 1.4792794068852494e-06, + "loss": 0.5425032377243042, + "step": 3244 + }, + { + "epoch": 0.7482130504957344, + "grad_norm": 1.1764717045773245, + "learning_rate": 1.478944753408843e-06, + "loss": 0.4240065813064575, + "step": 3245 + }, + { + "epoch": 0.748443624625317, + "grad_norm": 1.3527342191314002, + "learning_rate": 1.478610030312963e-06, + "loss": 0.5533365607261658, + "step": 3246 + }, + { + "epoch": 0.7486741987548997, + "grad_norm": 1.4574041701217884, + "learning_rate": 1.4782752376462647e-06, + "loss": 0.4089345335960388, + "step": 3247 + }, + { + "epoch": 0.7489047728844823, + "grad_norm": 1.3793731191813918, + "learning_rate": 1.4779403754574131e-06, + "loss": 0.5098259449005127, + "step": 3248 + }, + { + "epoch": 0.749135347014065, + "grad_norm": 1.3041128935188901, + "learning_rate": 1.4776054437950842e-06, + "loss": 0.4615677297115326, + "step": 3249 + }, + { + "epoch": 0.7493659211436476, + "grad_norm": 1.3216071057711354, + "learning_rate": 1.4772704427079639e-06, + "loss": 0.460266649723053, + "step": 3250 + }, + { + "epoch": 0.7495964952732304, + "grad_norm": 1.4054347579351087, + "learning_rate": 1.4769353722447476e-06, + "loss": 0.4727064371109009, + "step": 3251 + }, + { + "epoch": 0.749827069402813, + "grad_norm": 1.3954753679563598, + "learning_rate": 1.4766002324541411e-06, + "loss": 0.4733152985572815, + "step": 3252 + }, + { + "epoch": 0.7500576435323957, + "grad_norm": 1.408517900798552, + "learning_rate": 1.4762650233848609e-06, + "loss": 0.5055218935012817, + "step": 3253 + }, + { + "epoch": 0.7502882176619783, + "grad_norm": 1.3285058616446128, + "learning_rate": 1.4759297450856324e-06, + "loss": 0.6129124164581299, + "step": 3254 + }, + { + "epoch": 0.750518791791561, + "grad_norm": 1.6354094862337523, + "learning_rate": 1.4755943976051926e-06, + "loss": 0.46197545528411865, + "step": 3255 + }, + { + "epoch": 0.7507493659211436, + "grad_norm": 1.3239897164772563, + "learning_rate": 1.4752589809922868e-06, + "loss": 0.5227653980255127, + "step": 3256 + }, + { + "epoch": 0.7509799400507263, + "grad_norm": 1.4638577740242362, + "learning_rate": 1.4749234952956715e-06, + "loss": 0.5189518928527832, + "step": 3257 + }, + { + "epoch": 0.7512105141803089, + "grad_norm": 1.2059107130307087, + "learning_rate": 1.474587940564113e-06, + "loss": 0.4850584864616394, + "step": 3258 + }, + { + "epoch": 0.7514410883098916, + "grad_norm": 1.4809027704015267, + "learning_rate": 1.4742523168463876e-06, + "loss": 0.5218943357467651, + "step": 3259 + }, + { + "epoch": 0.7516716624394743, + "grad_norm": 1.130064311367936, + "learning_rate": 1.4739166241912814e-06, + "loss": 0.4311223030090332, + "step": 3260 + }, + { + "epoch": 0.751902236569057, + "grad_norm": 1.372801682112421, + "learning_rate": 1.473580862647591e-06, + "loss": 0.525306224822998, + "step": 3261 + }, + { + "epoch": 0.7521328106986396, + "grad_norm": 1.291063350632538, + "learning_rate": 1.4732450322641225e-06, + "loss": 0.506609320640564, + "step": 3262 + }, + { + "epoch": 0.7523633848282223, + "grad_norm": 1.4043846834415283, + "learning_rate": 1.4729091330896926e-06, + "loss": 0.5477846264839172, + "step": 3263 + }, + { + "epoch": 0.7525939589578049, + "grad_norm": 1.1342853276703964, + "learning_rate": 1.4725731651731268e-06, + "loss": 0.48802629113197327, + "step": 3264 + }, + { + "epoch": 0.7528245330873876, + "grad_norm": 1.5090127096652195, + "learning_rate": 1.4722371285632626e-06, + "loss": 0.4774906635284424, + "step": 3265 + }, + { + "epoch": 0.7530551072169702, + "grad_norm": 1.4537920297241385, + "learning_rate": 1.4719010233089458e-06, + "loss": 0.4220488667488098, + "step": 3266 + }, + { + "epoch": 0.7532856813465529, + "grad_norm": 1.441465153643324, + "learning_rate": 1.4715648494590324e-06, + "loss": 0.43912187218666077, + "step": 3267 + }, + { + "epoch": 0.7535162554761355, + "grad_norm": 1.3653901674246531, + "learning_rate": 1.4712286070623892e-06, + "loss": 0.5302494764328003, + "step": 3268 + }, + { + "epoch": 0.7537468296057183, + "grad_norm": 1.3282339539348487, + "learning_rate": 1.4708922961678923e-06, + "loss": 0.4800306260585785, + "step": 3269 + }, + { + "epoch": 0.7539774037353009, + "grad_norm": 1.2634165352126685, + "learning_rate": 1.4705559168244275e-06, + "loss": 0.3993161618709564, + "step": 3270 + }, + { + "epoch": 0.7542079778648836, + "grad_norm": 1.446141365903489, + "learning_rate": 1.4702194690808916e-06, + "loss": 0.37037837505340576, + "step": 3271 + }, + { + "epoch": 0.7544385519944662, + "grad_norm": 1.3105522613811469, + "learning_rate": 1.4698829529861898e-06, + "loss": 0.44288602471351624, + "step": 3272 + }, + { + "epoch": 0.7546691261240489, + "grad_norm": 1.542566998549956, + "learning_rate": 1.469546368589239e-06, + "loss": 0.5480727553367615, + "step": 3273 + }, + { + "epoch": 0.7548997002536315, + "grad_norm": 1.5093924463506492, + "learning_rate": 1.4692097159389649e-06, + "loss": 0.4964104890823364, + "step": 3274 + }, + { + "epoch": 0.7551302743832142, + "grad_norm": 1.5912503319666471, + "learning_rate": 1.4688729950843033e-06, + "loss": 0.4744144082069397, + "step": 3275 + }, + { + "epoch": 0.7553608485127968, + "grad_norm": 1.1258853516330976, + "learning_rate": 1.4685362060741997e-06, + "loss": 0.44675350189208984, + "step": 3276 + }, + { + "epoch": 0.7555914226423796, + "grad_norm": 1.4768191837188436, + "learning_rate": 1.46819934895761e-06, + "loss": 0.45261216163635254, + "step": 3277 + }, + { + "epoch": 0.7558219967719622, + "grad_norm": 1.3183121513891758, + "learning_rate": 1.4678624237835005e-06, + "loss": 0.4180977940559387, + "step": 3278 + }, + { + "epoch": 0.7560525709015449, + "grad_norm": 1.34629761070606, + "learning_rate": 1.4675254306008456e-06, + "loss": 0.39477843046188354, + "step": 3279 + }, + { + "epoch": 0.7562831450311275, + "grad_norm": 1.439585323315283, + "learning_rate": 1.467188369458631e-06, + "loss": 0.5033801198005676, + "step": 3280 + }, + { + "epoch": 0.7565137191607102, + "grad_norm": 1.3522884656136929, + "learning_rate": 1.4668512404058527e-06, + "loss": 0.5719846487045288, + "step": 3281 + }, + { + "epoch": 0.7567442932902928, + "grad_norm": 1.6993262990855147, + "learning_rate": 1.4665140434915147e-06, + "loss": 0.5198945999145508, + "step": 3282 + }, + { + "epoch": 0.7569748674198755, + "grad_norm": 1.6486008286234453, + "learning_rate": 1.4661767787646326e-06, + "loss": 0.4641912579536438, + "step": 3283 + }, + { + "epoch": 0.7572054415494581, + "grad_norm": 1.542363438136225, + "learning_rate": 1.4658394462742309e-06, + "loss": 0.44070225954055786, + "step": 3284 + }, + { + "epoch": 0.7574360156790408, + "grad_norm": 1.1923089532877131, + "learning_rate": 1.465502046069345e-06, + "loss": 0.4324581027030945, + "step": 3285 + }, + { + "epoch": 0.7576665898086234, + "grad_norm": 1.5168087965785, + "learning_rate": 1.4651645781990187e-06, + "loss": 0.5789060592651367, + "step": 3286 + }, + { + "epoch": 0.7578971639382062, + "grad_norm": 1.7886030443223944, + "learning_rate": 1.4648270427123068e-06, + "loss": 0.45642149448394775, + "step": 3287 + }, + { + "epoch": 0.7581277380677888, + "grad_norm": 1.222780244920245, + "learning_rate": 1.4644894396582732e-06, + "loss": 0.4587763547897339, + "step": 3288 + }, + { + "epoch": 0.7583583121973715, + "grad_norm": 1.570757900264253, + "learning_rate": 1.4641517690859924e-06, + "loss": 0.5472866892814636, + "step": 3289 + }, + { + "epoch": 0.7585888863269541, + "grad_norm": 1.4662287757114318, + "learning_rate": 1.4638140310445476e-06, + "loss": 0.5274207592010498, + "step": 3290 + }, + { + "epoch": 0.7588194604565368, + "grad_norm": 1.5317060576828687, + "learning_rate": 1.4634762255830326e-06, + "loss": 0.46280741691589355, + "step": 3291 + }, + { + "epoch": 0.7590500345861194, + "grad_norm": 1.357303550008307, + "learning_rate": 1.4631383527505515e-06, + "loss": 0.5395090579986572, + "step": 3292 + }, + { + "epoch": 0.7592806087157021, + "grad_norm": 1.3556569618907826, + "learning_rate": 1.4628004125962168e-06, + "loss": 0.49923229217529297, + "step": 3293 + }, + { + "epoch": 0.7595111828452847, + "grad_norm": 1.437270857620585, + "learning_rate": 1.462462405169152e-06, + "loss": 0.5414037108421326, + "step": 3294 + }, + { + "epoch": 0.7597417569748675, + "grad_norm": 1.2450139122326453, + "learning_rate": 1.4621243305184895e-06, + "loss": 0.4246688485145569, + "step": 3295 + }, + { + "epoch": 0.7599723311044501, + "grad_norm": 1.2346000309431113, + "learning_rate": 1.461786188693372e-06, + "loss": 0.4997994005680084, + "step": 3296 + }, + { + "epoch": 0.7602029052340328, + "grad_norm": 1.2539682682883548, + "learning_rate": 1.4614479797429523e-06, + "loss": 0.4571123719215393, + "step": 3297 + }, + { + "epoch": 0.7604334793636154, + "grad_norm": 1.3546747118119653, + "learning_rate": 1.4611097037163917e-06, + "loss": 0.5178083181381226, + "step": 3298 + }, + { + "epoch": 0.7606640534931981, + "grad_norm": 1.438807896221459, + "learning_rate": 1.4607713606628625e-06, + "loss": 0.538001298904419, + "step": 3299 + }, + { + "epoch": 0.7608946276227807, + "grad_norm": 1.6495208547410056, + "learning_rate": 1.4604329506315464e-06, + "loss": 0.45941218733787537, + "step": 3300 + }, + { + "epoch": 0.7611252017523634, + "grad_norm": 1.469904127152949, + "learning_rate": 1.4600944736716344e-06, + "loss": 0.619648277759552, + "step": 3301 + }, + { + "epoch": 0.761355775881946, + "grad_norm": 1.3648924598961014, + "learning_rate": 1.4597559298323281e-06, + "loss": 0.4035170376300812, + "step": 3302 + }, + { + "epoch": 0.7615863500115287, + "grad_norm": 1.4623041349874883, + "learning_rate": 1.4594173191628374e-06, + "loss": 0.48657041788101196, + "step": 3303 + }, + { + "epoch": 0.7618169241411114, + "grad_norm": 1.3486514765257445, + "learning_rate": 1.4590786417123838e-06, + "loss": 0.43324801325798035, + "step": 3304 + }, + { + "epoch": 0.7620474982706941, + "grad_norm": 1.3543990457839288, + "learning_rate": 1.4587398975301968e-06, + "loss": 0.5020644664764404, + "step": 3305 + }, + { + "epoch": 0.7622780724002767, + "grad_norm": 1.4758408294809282, + "learning_rate": 1.4584010866655163e-06, + "loss": 0.4123230576515198, + "step": 3306 + }, + { + "epoch": 0.7625086465298594, + "grad_norm": 1.4629462638568174, + "learning_rate": 1.4580622091675925e-06, + "loss": 0.5110459327697754, + "step": 3307 + }, + { + "epoch": 0.762739220659442, + "grad_norm": 1.3128675599733384, + "learning_rate": 1.4577232650856842e-06, + "loss": 0.3956744074821472, + "step": 3308 + }, + { + "epoch": 0.7629697947890247, + "grad_norm": 1.028092913473986, + "learning_rate": 1.4573842544690602e-06, + "loss": 0.44418880343437195, + "step": 3309 + }, + { + "epoch": 0.7632003689186073, + "grad_norm": 1.2935675774179733, + "learning_rate": 1.4570451773669993e-06, + "loss": 0.46690821647644043, + "step": 3310 + }, + { + "epoch": 0.76343094304819, + "grad_norm": 1.7250402170715877, + "learning_rate": 1.45670603382879e-06, + "loss": 0.5631324052810669, + "step": 3311 + }, + { + "epoch": 0.7636615171777726, + "grad_norm": 1.3197309301962783, + "learning_rate": 1.4563668239037301e-06, + "loss": 0.42355209589004517, + "step": 3312 + }, + { + "epoch": 0.7638920913073554, + "grad_norm": 1.1819135136971526, + "learning_rate": 1.4560275476411273e-06, + "loss": 0.4509078860282898, + "step": 3313 + }, + { + "epoch": 0.764122665436938, + "grad_norm": 1.2704317123198696, + "learning_rate": 1.4556882050902986e-06, + "loss": 0.48707491159439087, + "step": 3314 + }, + { + "epoch": 0.7643532395665207, + "grad_norm": 1.2817274130067733, + "learning_rate": 1.455348796300571e-06, + "loss": 0.4768955707550049, + "step": 3315 + }, + { + "epoch": 0.7645838136961033, + "grad_norm": 1.1995539933150834, + "learning_rate": 1.4550093213212812e-06, + "loss": 0.44231370091438293, + "step": 3316 + }, + { + "epoch": 0.764814387825686, + "grad_norm": 1.283098801050818, + "learning_rate": 1.4546697802017752e-06, + "loss": 0.41919445991516113, + "step": 3317 + }, + { + "epoch": 0.7650449619552686, + "grad_norm": 1.3370966440445557, + "learning_rate": 1.4543301729914086e-06, + "loss": 0.5004634857177734, + "step": 3318 + }, + { + "epoch": 0.7652755360848513, + "grad_norm": 1.3058062554730827, + "learning_rate": 1.4539904997395467e-06, + "loss": 0.5327651500701904, + "step": 3319 + }, + { + "epoch": 0.7655061102144339, + "grad_norm": 1.2690140519120048, + "learning_rate": 1.4536507604955647e-06, + "loss": 0.4571789801120758, + "step": 3320 + }, + { + "epoch": 0.7657366843440166, + "grad_norm": 1.4712336124149359, + "learning_rate": 1.4533109553088474e-06, + "loss": 0.3989352583885193, + "step": 3321 + }, + { + "epoch": 0.7659672584735993, + "grad_norm": 1.390525487190819, + "learning_rate": 1.452971084228788e-06, + "loss": 0.4661702513694763, + "step": 3322 + }, + { + "epoch": 0.766197832603182, + "grad_norm": 1.4525582608827485, + "learning_rate": 1.4526311473047911e-06, + "loss": 0.5007051825523376, + "step": 3323 + }, + { + "epoch": 0.7664284067327646, + "grad_norm": 1.4087277102322913, + "learning_rate": 1.4522911445862697e-06, + "loss": 0.44391199946403503, + "step": 3324 + }, + { + "epoch": 0.7666589808623473, + "grad_norm": 1.5508781982933997, + "learning_rate": 1.4519510761226466e-06, + "loss": 0.48606377840042114, + "step": 3325 + }, + { + "epoch": 0.7668895549919299, + "grad_norm": 1.4942248011879364, + "learning_rate": 1.4516109419633543e-06, + "loss": 0.4831564426422119, + "step": 3326 + }, + { + "epoch": 0.7671201291215126, + "grad_norm": 1.2492238673667777, + "learning_rate": 1.4512707421578344e-06, + "loss": 0.5033055543899536, + "step": 3327 + }, + { + "epoch": 0.7673507032510952, + "grad_norm": 1.268639260981401, + "learning_rate": 1.4509304767555385e-06, + "loss": 0.40440869331359863, + "step": 3328 + }, + { + "epoch": 0.7675812773806779, + "grad_norm": 1.154540060885232, + "learning_rate": 1.4505901458059282e-06, + "loss": 0.4281578063964844, + "step": 3329 + }, + { + "epoch": 0.7678118515102605, + "grad_norm": 1.2646658661078, + "learning_rate": 1.4502497493584735e-06, + "loss": 0.45301395654678345, + "step": 3330 + }, + { + "epoch": 0.7680424256398433, + "grad_norm": 1.2708958618179473, + "learning_rate": 1.4499092874626545e-06, + "loss": 0.3971232771873474, + "step": 3331 + }, + { + "epoch": 0.7682729997694259, + "grad_norm": 1.470304815457328, + "learning_rate": 1.4495687601679607e-06, + "loss": 0.45382559299468994, + "step": 3332 + }, + { + "epoch": 0.7685035738990086, + "grad_norm": 1.5230375908041864, + "learning_rate": 1.4492281675238916e-06, + "loss": 0.4101349711418152, + "step": 3333 + }, + { + "epoch": 0.7687341480285912, + "grad_norm": 1.7708001369907398, + "learning_rate": 1.4488875095799555e-06, + "loss": 0.5322436690330505, + "step": 3334 + }, + { + "epoch": 0.7689647221581739, + "grad_norm": 1.4488936734065874, + "learning_rate": 1.4485467863856703e-06, + "loss": 0.5497866272926331, + "step": 3335 + }, + { + "epoch": 0.7691952962877565, + "grad_norm": 1.5286830910755105, + "learning_rate": 1.4482059979905642e-06, + "loss": 0.5088074207305908, + "step": 3336 + }, + { + "epoch": 0.7694258704173391, + "grad_norm": 1.2530470288119384, + "learning_rate": 1.4478651444441736e-06, + "loss": 0.4444946050643921, + "step": 3337 + }, + { + "epoch": 0.7696564445469218, + "grad_norm": 1.1602955966590311, + "learning_rate": 1.4475242257960454e-06, + "loss": 0.41257357597351074, + "step": 3338 + }, + { + "epoch": 0.7698870186765044, + "grad_norm": 1.3512416855290101, + "learning_rate": 1.4471832420957356e-06, + "loss": 0.47933512926101685, + "step": 3339 + }, + { + "epoch": 0.7701175928060872, + "grad_norm": 1.204411185284335, + "learning_rate": 1.4468421933928093e-06, + "loss": 0.41331803798675537, + "step": 3340 + }, + { + "epoch": 0.7703481669356698, + "grad_norm": 1.3617384100749454, + "learning_rate": 1.4465010797368416e-06, + "loss": 0.5047392845153809, + "step": 3341 + }, + { + "epoch": 0.7705787410652525, + "grad_norm": 1.2651645489335748, + "learning_rate": 1.446159901177417e-06, + "loss": 0.5265953540802002, + "step": 3342 + }, + { + "epoch": 0.7708093151948351, + "grad_norm": 1.5538943468041178, + "learning_rate": 1.4458186577641285e-06, + "loss": 0.48366689682006836, + "step": 3343 + }, + { + "epoch": 0.7710398893244178, + "grad_norm": 1.3170443751716914, + "learning_rate": 1.4454773495465805e-06, + "loss": 0.4303058087825775, + "step": 3344 + }, + { + "epoch": 0.7712704634540004, + "grad_norm": 1.2782967712931992, + "learning_rate": 1.4451359765743845e-06, + "loss": 0.44936758279800415, + "step": 3345 + }, + { + "epoch": 0.7715010375835831, + "grad_norm": 1.1273529926323729, + "learning_rate": 1.4447945388971631e-06, + "loss": 0.37891095876693726, + "step": 3346 + }, + { + "epoch": 0.7717316117131657, + "grad_norm": 1.3818395750162065, + "learning_rate": 1.4444530365645477e-06, + "loss": 0.4958759546279907, + "step": 3347 + }, + { + "epoch": 0.7719621858427484, + "grad_norm": 1.2809802910956953, + "learning_rate": 1.4441114696261791e-06, + "loss": 0.5180525183677673, + "step": 3348 + }, + { + "epoch": 0.772192759972331, + "grad_norm": 1.3137706702012002, + "learning_rate": 1.4437698381317076e-06, + "loss": 0.4760133624076843, + "step": 3349 + }, + { + "epoch": 0.7724233341019138, + "grad_norm": 1.6019634089420207, + "learning_rate": 1.4434281421307923e-06, + "loss": 0.5095269680023193, + "step": 3350 + }, + { + "epoch": 0.7726539082314964, + "grad_norm": 1.3897770832286553, + "learning_rate": 1.443086381673103e-06, + "loss": 0.41132962703704834, + "step": 3351 + }, + { + "epoch": 0.7728844823610791, + "grad_norm": 2.1191686086439687, + "learning_rate": 1.442744556808317e-06, + "loss": 0.5617398023605347, + "step": 3352 + }, + { + "epoch": 0.7731150564906617, + "grad_norm": 1.3926070515875653, + "learning_rate": 1.4424026675861229e-06, + "loss": 0.4421590566635132, + "step": 3353 + }, + { + "epoch": 0.7733456306202444, + "grad_norm": 1.3079796762796725, + "learning_rate": 1.4420607140562175e-06, + "loss": 0.5533363223075867, + "step": 3354 + }, + { + "epoch": 0.773576204749827, + "grad_norm": 1.2259362177236217, + "learning_rate": 1.441718696268307e-06, + "loss": 0.3703731298446655, + "step": 3355 + }, + { + "epoch": 0.7738067788794097, + "grad_norm": 1.3132566837825874, + "learning_rate": 1.4413766142721074e-06, + "loss": 0.4078833758831024, + "step": 3356 + }, + { + "epoch": 0.7740373530089923, + "grad_norm": 1.3669338987803128, + "learning_rate": 1.4410344681173436e-06, + "loss": 0.47297823429107666, + "step": 3357 + }, + { + "epoch": 0.7742679271385751, + "grad_norm": 1.44476399239333, + "learning_rate": 1.4406922578537501e-06, + "loss": 0.4586789309978485, + "step": 3358 + }, + { + "epoch": 0.7744985012681577, + "grad_norm": 2.005996053014414, + "learning_rate": 1.440349983531071e-06, + "loss": 0.5284359455108643, + "step": 3359 + }, + { + "epoch": 0.7747290753977404, + "grad_norm": 1.453810263762319, + "learning_rate": 1.4400076451990585e-06, + "loss": 0.47153323888778687, + "step": 3360 + }, + { + "epoch": 0.774959649527323, + "grad_norm": 1.277395230723769, + "learning_rate": 1.4396652429074758e-06, + "loss": 0.3862396478652954, + "step": 3361 + }, + { + "epoch": 0.7751902236569057, + "grad_norm": 1.4585054412515979, + "learning_rate": 1.4393227767060938e-06, + "loss": 0.48918354511260986, + "step": 3362 + }, + { + "epoch": 0.7754207977864883, + "grad_norm": 1.2680408475983538, + "learning_rate": 1.4389802466446942e-06, + "loss": 0.5541480779647827, + "step": 3363 + }, + { + "epoch": 0.775651371916071, + "grad_norm": 1.3507983643401953, + "learning_rate": 1.4386376527730665e-06, + "loss": 0.48972445726394653, + "step": 3364 + }, + { + "epoch": 0.7758819460456536, + "grad_norm": 1.7557497204808084, + "learning_rate": 1.4382949951410109e-06, + "loss": 0.5016083717346191, + "step": 3365 + }, + { + "epoch": 0.7761125201752364, + "grad_norm": 1.3196221720148595, + "learning_rate": 1.4379522737983351e-06, + "loss": 0.40227651596069336, + "step": 3366 + }, + { + "epoch": 0.776343094304819, + "grad_norm": 1.596207218013102, + "learning_rate": 1.4376094887948584e-06, + "loss": 0.42994722723960876, + "step": 3367 + }, + { + "epoch": 0.7765736684344017, + "grad_norm": 1.516975070106083, + "learning_rate": 1.4372666401804073e-06, + "loss": 0.5087350010871887, + "step": 3368 + }, + { + "epoch": 0.7768042425639843, + "grad_norm": 1.2618017709219296, + "learning_rate": 1.4369237280048186e-06, + "loss": 0.39419132471084595, + "step": 3369 + }, + { + "epoch": 0.777034816693567, + "grad_norm": 1.3456260179482487, + "learning_rate": 1.4365807523179376e-06, + "loss": 0.500682532787323, + "step": 3370 + }, + { + "epoch": 0.7772653908231496, + "grad_norm": 1.4316905894274476, + "learning_rate": 1.4362377131696198e-06, + "loss": 0.49243754148483276, + "step": 3371 + }, + { + "epoch": 0.7774959649527323, + "grad_norm": 1.4395314935622772, + "learning_rate": 1.4358946106097295e-06, + "loss": 0.5479283332824707, + "step": 3372 + }, + { + "epoch": 0.7777265390823149, + "grad_norm": 1.08521870178353, + "learning_rate": 1.4355514446881396e-06, + "loss": 0.43217700719833374, + "step": 3373 + }, + { + "epoch": 0.7779571132118976, + "grad_norm": 1.292406809665349, + "learning_rate": 1.435208215454733e-06, + "loss": 0.5351289510726929, + "step": 3374 + }, + { + "epoch": 0.7781876873414802, + "grad_norm": 1.2023765125576906, + "learning_rate": 1.4348649229594016e-06, + "loss": 0.45523375272750854, + "step": 3375 + }, + { + "epoch": 0.778418261471063, + "grad_norm": 1.1345172738470508, + "learning_rate": 1.4345215672520465e-06, + "loss": 0.49811118841171265, + "step": 3376 + }, + { + "epoch": 0.7786488356006456, + "grad_norm": 1.3017016981868919, + "learning_rate": 1.434178148382578e-06, + "loss": 0.40621131658554077, + "step": 3377 + }, + { + "epoch": 0.7788794097302283, + "grad_norm": 1.322929743849566, + "learning_rate": 1.4338346664009152e-06, + "loss": 0.43339842557907104, + "step": 3378 + }, + { + "epoch": 0.7791099838598109, + "grad_norm": 1.4276417953872829, + "learning_rate": 1.433491121356987e-06, + "loss": 0.4397253096103668, + "step": 3379 + }, + { + "epoch": 0.7793405579893936, + "grad_norm": 1.3957946390360352, + "learning_rate": 1.433147513300731e-06, + "loss": 0.5146217942237854, + "step": 3380 + }, + { + "epoch": 0.7795711321189762, + "grad_norm": 1.3181842447854462, + "learning_rate": 1.432803842282094e-06, + "loss": 0.46328768134117126, + "step": 3381 + }, + { + "epoch": 0.7798017062485589, + "grad_norm": 1.4008272791948313, + "learning_rate": 1.432460108351032e-06, + "loss": 0.47743386030197144, + "step": 3382 + }, + { + "epoch": 0.7800322803781415, + "grad_norm": 1.4765555896470939, + "learning_rate": 1.4321163115575105e-06, + "loss": 0.467747300863266, + "step": 3383 + }, + { + "epoch": 0.7802628545077243, + "grad_norm": 1.2334202034705792, + "learning_rate": 1.431772451951504e-06, + "loss": 0.4269976019859314, + "step": 3384 + }, + { + "epoch": 0.7804934286373069, + "grad_norm": 1.4332482963337814, + "learning_rate": 1.4314285295829956e-06, + "loss": 0.5440881252288818, + "step": 3385 + }, + { + "epoch": 0.7807240027668896, + "grad_norm": 1.5634188347498899, + "learning_rate": 1.431084544501978e-06, + "loss": 0.42413994669914246, + "step": 3386 + }, + { + "epoch": 0.7809545768964722, + "grad_norm": 1.250472551312306, + "learning_rate": 1.4307404967584528e-06, + "loss": 0.5563687086105347, + "step": 3387 + }, + { + "epoch": 0.7811851510260549, + "grad_norm": 1.2530390736213655, + "learning_rate": 1.4303963864024314e-06, + "loss": 0.4822027087211609, + "step": 3388 + }, + { + "epoch": 0.7814157251556375, + "grad_norm": 1.265644144731409, + "learning_rate": 1.430052213483933e-06, + "loss": 0.5267205834388733, + "step": 3389 + }, + { + "epoch": 0.7816462992852202, + "grad_norm": 1.464631682134491, + "learning_rate": 1.4297079780529868e-06, + "loss": 0.49257054924964905, + "step": 3390 + }, + { + "epoch": 0.7818768734148028, + "grad_norm": 1.4967498256417051, + "learning_rate": 1.4293636801596314e-06, + "loss": 0.45225608348846436, + "step": 3391 + }, + { + "epoch": 0.7821074475443855, + "grad_norm": 1.3090966398510886, + "learning_rate": 1.4290193198539133e-06, + "loss": 0.4891412854194641, + "step": 3392 + }, + { + "epoch": 0.7823380216739682, + "grad_norm": 1.2913501590758174, + "learning_rate": 1.4286748971858893e-06, + "loss": 0.4411062002182007, + "step": 3393 + }, + { + "epoch": 0.7825685958035509, + "grad_norm": 1.3634871078304074, + "learning_rate": 1.4283304122056242e-06, + "loss": 0.4584164619445801, + "step": 3394 + }, + { + "epoch": 0.7827991699331335, + "grad_norm": 1.2884433704058607, + "learning_rate": 1.4279858649631928e-06, + "loss": 0.46913737058639526, + "step": 3395 + }, + { + "epoch": 0.7830297440627162, + "grad_norm": 1.320207574562506, + "learning_rate": 1.4276412555086786e-06, + "loss": 0.40582767128944397, + "step": 3396 + }, + { + "epoch": 0.7832603181922988, + "grad_norm": 1.4930886994867976, + "learning_rate": 1.4272965838921737e-06, + "loss": 0.5089453458786011, + "step": 3397 + }, + { + "epoch": 0.7834908923218815, + "grad_norm": 1.3151641529095257, + "learning_rate": 1.4269518501637798e-06, + "loss": 0.4744444489479065, + "step": 3398 + }, + { + "epoch": 0.7837214664514641, + "grad_norm": 1.3271165993445435, + "learning_rate": 1.426607054373608e-06, + "loss": 0.49168163537979126, + "step": 3399 + }, + { + "epoch": 0.7839520405810468, + "grad_norm": 1.4774301348156431, + "learning_rate": 1.4262621965717768e-06, + "loss": 0.4423940181732178, + "step": 3400 + }, + { + "epoch": 0.7841826147106294, + "grad_norm": 1.541226385884193, + "learning_rate": 1.4259172768084152e-06, + "loss": 0.5138403177261353, + "step": 3401 + }, + { + "epoch": 0.7844131888402122, + "grad_norm": 1.5691210214340656, + "learning_rate": 1.425572295133661e-06, + "loss": 0.5248140096664429, + "step": 3402 + }, + { + "epoch": 0.7846437629697948, + "grad_norm": 1.4659537352972094, + "learning_rate": 1.4252272515976607e-06, + "loss": 0.39161059260368347, + "step": 3403 + }, + { + "epoch": 0.7848743370993775, + "grad_norm": 1.307338649596764, + "learning_rate": 1.4248821462505699e-06, + "loss": 0.46826744079589844, + "step": 3404 + }, + { + "epoch": 0.7851049112289601, + "grad_norm": 1.3428424961182877, + "learning_rate": 1.424536979142553e-06, + "loss": 0.4329161047935486, + "step": 3405 + }, + { + "epoch": 0.7853354853585428, + "grad_norm": 1.3831028347986385, + "learning_rate": 1.4241917503237834e-06, + "loss": 0.4691393971443176, + "step": 3406 + }, + { + "epoch": 0.7855660594881254, + "grad_norm": 1.819344171969547, + "learning_rate": 1.423846459844444e-06, + "loss": 0.5130072236061096, + "step": 3407 + }, + { + "epoch": 0.7857966336177081, + "grad_norm": 1.4381134289937085, + "learning_rate": 1.4235011077547264e-06, + "loss": 0.37478166818618774, + "step": 3408 + }, + { + "epoch": 0.7860272077472907, + "grad_norm": 1.1654669583674488, + "learning_rate": 1.4231556941048307e-06, + "loss": 0.46112769842147827, + "step": 3409 + }, + { + "epoch": 0.7862577818768735, + "grad_norm": 1.3711520199030207, + "learning_rate": 1.422810218944966e-06, + "loss": 0.5095282793045044, + "step": 3410 + }, + { + "epoch": 0.7864883560064561, + "grad_norm": 1.4830709787042864, + "learning_rate": 1.422464682325351e-06, + "loss": 0.4182342290878296, + "step": 3411 + }, + { + "epoch": 0.7867189301360388, + "grad_norm": 1.4898619625675633, + "learning_rate": 1.422119084296213e-06, + "loss": 0.3892830014228821, + "step": 3412 + }, + { + "epoch": 0.7869495042656214, + "grad_norm": 1.655445800570714, + "learning_rate": 1.4217734249077877e-06, + "loss": 0.5294528603553772, + "step": 3413 + }, + { + "epoch": 0.7871800783952041, + "grad_norm": 1.501568458574139, + "learning_rate": 1.4214277042103208e-06, + "loss": 0.471803218126297, + "step": 3414 + }, + { + "epoch": 0.7874106525247867, + "grad_norm": 1.2078819401351728, + "learning_rate": 1.4210819222540662e-06, + "loss": 0.4363842010498047, + "step": 3415 + }, + { + "epoch": 0.7876412266543694, + "grad_norm": 1.191025232167839, + "learning_rate": 1.4207360790892867e-06, + "loss": 0.3834928870201111, + "step": 3416 + }, + { + "epoch": 0.787871800783952, + "grad_norm": 1.342904245190706, + "learning_rate": 1.4203901747662539e-06, + "loss": 0.4639194905757904, + "step": 3417 + }, + { + "epoch": 0.7881023749135347, + "grad_norm": 1.4526860275619324, + "learning_rate": 1.4200442093352486e-06, + "loss": 0.47130632400512695, + "step": 3418 + }, + { + "epoch": 0.7883329490431173, + "grad_norm": 1.2585342771790389, + "learning_rate": 1.4196981828465606e-06, + "loss": 0.4848192632198334, + "step": 3419 + }, + { + "epoch": 0.7885635231727001, + "grad_norm": 1.2424140051596944, + "learning_rate": 1.4193520953504884e-06, + "loss": 0.5137286186218262, + "step": 3420 + }, + { + "epoch": 0.7887940973022827, + "grad_norm": 1.4833943072924853, + "learning_rate": 1.4190059468973385e-06, + "loss": 0.47639960050582886, + "step": 3421 + }, + { + "epoch": 0.7890246714318654, + "grad_norm": 1.3974399628621321, + "learning_rate": 1.418659737537428e-06, + "loss": 0.4300975799560547, + "step": 3422 + }, + { + "epoch": 0.789255245561448, + "grad_norm": 1.6248920549834995, + "learning_rate": 1.4183134673210817e-06, + "loss": 0.5669160485267639, + "step": 3423 + }, + { + "epoch": 0.7894858196910307, + "grad_norm": 1.3431432318053507, + "learning_rate": 1.4179671362986336e-06, + "loss": 0.4113837480545044, + "step": 3424 + }, + { + "epoch": 0.7897163938206133, + "grad_norm": 1.3611327690280945, + "learning_rate": 1.417620744520426e-06, + "loss": 0.4992315173149109, + "step": 3425 + }, + { + "epoch": 0.789946967950196, + "grad_norm": 1.6418572453635272, + "learning_rate": 1.417274292036811e-06, + "loss": 0.5556696653366089, + "step": 3426 + }, + { + "epoch": 0.7901775420797786, + "grad_norm": 1.367999541896107, + "learning_rate": 1.4169277788981485e-06, + "loss": 0.47911009192466736, + "step": 3427 + }, + { + "epoch": 0.7904081162093614, + "grad_norm": 1.2100320134669527, + "learning_rate": 1.416581205154808e-06, + "loss": 0.45395466685295105, + "step": 3428 + }, + { + "epoch": 0.790638690338944, + "grad_norm": 1.5386887400015699, + "learning_rate": 1.4162345708571674e-06, + "loss": 0.4404561519622803, + "step": 3429 + }, + { + "epoch": 0.7908692644685267, + "grad_norm": 1.3845404606780534, + "learning_rate": 1.4158878760556136e-06, + "loss": 0.5541578531265259, + "step": 3430 + }, + { + "epoch": 0.7910998385981093, + "grad_norm": 1.4234082473199938, + "learning_rate": 1.4155411208005422e-06, + "loss": 0.5517834424972534, + "step": 3431 + }, + { + "epoch": 0.791330412727692, + "grad_norm": 1.2851916229874634, + "learning_rate": 1.4151943051423574e-06, + "loss": 0.42650169134140015, + "step": 3432 + }, + { + "epoch": 0.7915609868572746, + "grad_norm": 1.7886227172970943, + "learning_rate": 1.414847429131472e-06, + "loss": 0.42724043130874634, + "step": 3433 + }, + { + "epoch": 0.7917915609868573, + "grad_norm": 1.3978336018588784, + "learning_rate": 1.414500492818309e-06, + "loss": 0.41757941246032715, + "step": 3434 + }, + { + "epoch": 0.7920221351164399, + "grad_norm": 1.4250040620354028, + "learning_rate": 1.4141534962532984e-06, + "loss": 0.47318267822265625, + "step": 3435 + }, + { + "epoch": 0.7922527092460226, + "grad_norm": 1.5092267765141392, + "learning_rate": 1.41380643948688e-06, + "loss": 0.5540967583656311, + "step": 3436 + }, + { + "epoch": 0.7924832833756053, + "grad_norm": 1.2943595959957308, + "learning_rate": 1.4134593225695013e-06, + "loss": 0.4459697902202606, + "step": 3437 + }, + { + "epoch": 0.792713857505188, + "grad_norm": 1.2950911274447663, + "learning_rate": 1.41311214555162e-06, + "loss": 0.5263698101043701, + "step": 3438 + }, + { + "epoch": 0.7929444316347706, + "grad_norm": 1.321260987570187, + "learning_rate": 1.4127649084837016e-06, + "loss": 0.40453940629959106, + "step": 3439 + }, + { + "epoch": 0.7931750057643533, + "grad_norm": 1.4138023773004598, + "learning_rate": 1.412417611416221e-06, + "loss": 0.3859207034111023, + "step": 3440 + }, + { + "epoch": 0.7934055798939359, + "grad_norm": 1.3373104076984894, + "learning_rate": 1.4120702543996603e-06, + "loss": 0.4604511260986328, + "step": 3441 + }, + { + "epoch": 0.7936361540235186, + "grad_norm": 1.2912472996688542, + "learning_rate": 1.411722837484512e-06, + "loss": 0.40292084217071533, + "step": 3442 + }, + { + "epoch": 0.7938667281531012, + "grad_norm": 1.3099743009304052, + "learning_rate": 1.4113753607212766e-06, + "loss": 0.40447625517845154, + "step": 3443 + }, + { + "epoch": 0.7940973022826839, + "grad_norm": 1.1711578682822494, + "learning_rate": 1.4110278241604635e-06, + "loss": 0.48472997546195984, + "step": 3444 + }, + { + "epoch": 0.7943278764122665, + "grad_norm": 1.304688924593958, + "learning_rate": 1.4106802278525902e-06, + "loss": 0.5404670238494873, + "step": 3445 + }, + { + "epoch": 0.7945584505418493, + "grad_norm": 1.2201185877258616, + "learning_rate": 1.4103325718481838e-06, + "loss": 0.5885064005851746, + "step": 3446 + }, + { + "epoch": 0.7947890246714319, + "grad_norm": 1.2045708529585497, + "learning_rate": 1.4099848561977794e-06, + "loss": 0.47806939482688904, + "step": 3447 + }, + { + "epoch": 0.7950195988010145, + "grad_norm": 1.2183758256079422, + "learning_rate": 1.4096370809519213e-06, + "loss": 0.4247834086418152, + "step": 3448 + }, + { + "epoch": 0.7952501729305972, + "grad_norm": 1.4701805176850054, + "learning_rate": 1.409289246161162e-06, + "loss": 0.508902370929718, + "step": 3449 + }, + { + "epoch": 0.7954807470601798, + "grad_norm": 1.3709386014599791, + "learning_rate": 1.4089413518760626e-06, + "loss": 0.4866124987602234, + "step": 3450 + }, + { + "epoch": 0.7957113211897625, + "grad_norm": 1.4351510328158692, + "learning_rate": 1.408593398147193e-06, + "loss": 0.5168731212615967, + "step": 3451 + }, + { + "epoch": 0.7959418953193451, + "grad_norm": 1.257672253058261, + "learning_rate": 1.4082453850251326e-06, + "loss": 0.5039271712303162, + "step": 3452 + }, + { + "epoch": 0.7961724694489278, + "grad_norm": 1.3767040030777011, + "learning_rate": 1.4078973125604674e-06, + "loss": 0.3660929799079895, + "step": 3453 + }, + { + "epoch": 0.7964030435785104, + "grad_norm": 1.5330992916300397, + "learning_rate": 1.407549180803794e-06, + "loss": 0.514503538608551, + "step": 3454 + }, + { + "epoch": 0.7966336177080932, + "grad_norm": 1.5704286671243526, + "learning_rate": 1.4072009898057172e-06, + "loss": 0.4803028702735901, + "step": 3455 + }, + { + "epoch": 0.7968641918376758, + "grad_norm": 1.2332119133725918, + "learning_rate": 1.4068527396168492e-06, + "loss": 0.43116262555122375, + "step": 3456 + }, + { + "epoch": 0.7970947659672585, + "grad_norm": 1.522287028583898, + "learning_rate": 1.4065044302878125e-06, + "loss": 0.5009680986404419, + "step": 3457 + }, + { + "epoch": 0.7973253400968411, + "grad_norm": 1.1307500814268987, + "learning_rate": 1.406156061869237e-06, + "loss": 0.4047713875770569, + "step": 3458 + }, + { + "epoch": 0.7975559142264238, + "grad_norm": 1.348066090689188, + "learning_rate": 1.4058076344117615e-06, + "loss": 0.5287230014801025, + "step": 3459 + }, + { + "epoch": 0.7977864883560064, + "grad_norm": 1.7810979263679612, + "learning_rate": 1.4054591479660335e-06, + "loss": 0.5602750778198242, + "step": 3460 + }, + { + "epoch": 0.7980170624855891, + "grad_norm": 1.0587308388288128, + "learning_rate": 1.4051106025827096e-06, + "loss": 0.4178144335746765, + "step": 3461 + }, + { + "epoch": 0.7982476366151717, + "grad_norm": 1.408691487644406, + "learning_rate": 1.4047619983124536e-06, + "loss": 0.5061960220336914, + "step": 3462 + }, + { + "epoch": 0.7984782107447544, + "grad_norm": 1.5043212480263244, + "learning_rate": 1.4044133352059392e-06, + "loss": 0.5091691017150879, + "step": 3463 + }, + { + "epoch": 0.798708784874337, + "grad_norm": 1.3793897642043385, + "learning_rate": 1.4040646133138478e-06, + "loss": 0.5100894570350647, + "step": 3464 + }, + { + "epoch": 0.7989393590039198, + "grad_norm": 1.2188849241203001, + "learning_rate": 1.4037158326868697e-06, + "loss": 0.47493505477905273, + "step": 3465 + }, + { + "epoch": 0.7991699331335024, + "grad_norm": 1.637846674977116, + "learning_rate": 1.4033669933757038e-06, + "loss": 0.5561350584030151, + "step": 3466 + }, + { + "epoch": 0.7994005072630851, + "grad_norm": 1.4971197328143675, + "learning_rate": 1.4030180954310574e-06, + "loss": 0.44552814960479736, + "step": 3467 + }, + { + "epoch": 0.7996310813926677, + "grad_norm": 1.219192969590734, + "learning_rate": 1.4026691389036465e-06, + "loss": 0.4624238908290863, + "step": 3468 + }, + { + "epoch": 0.7998616555222504, + "grad_norm": 1.348458578104898, + "learning_rate": 1.4023201238441951e-06, + "loss": 0.5424448251724243, + "step": 3469 + }, + { + "epoch": 0.800092229651833, + "grad_norm": 1.2410568882309463, + "learning_rate": 1.4019710503034367e-06, + "loss": 0.4629395008087158, + "step": 3470 + }, + { + "epoch": 0.8003228037814157, + "grad_norm": 1.3564725845833965, + "learning_rate": 1.401621918332112e-06, + "loss": 0.4375717043876648, + "step": 3471 + }, + { + "epoch": 0.8005533779109983, + "grad_norm": 1.5212509367699154, + "learning_rate": 1.401272727980971e-06, + "loss": 0.4419640302658081, + "step": 3472 + }, + { + "epoch": 0.8007839520405811, + "grad_norm": 1.3621301015547722, + "learning_rate": 1.4009234793007724e-06, + "loss": 0.42077577114105225, + "step": 3473 + }, + { + "epoch": 0.8010145261701637, + "grad_norm": 1.394506766094276, + "learning_rate": 1.400574172342283e-06, + "loss": 0.3735182583332062, + "step": 3474 + }, + { + "epoch": 0.8012451002997464, + "grad_norm": 1.3325918102604086, + "learning_rate": 1.4002248071562778e-06, + "loss": 0.4263458251953125, + "step": 3475 + }, + { + "epoch": 0.801475674429329, + "grad_norm": 1.3278985843191269, + "learning_rate": 1.3998753837935406e-06, + "loss": 0.42377904057502747, + "step": 3476 + }, + { + "epoch": 0.8017062485589117, + "grad_norm": 1.4415172635554745, + "learning_rate": 1.399525902304864e-06, + "loss": 0.5017589330673218, + "step": 3477 + }, + { + "epoch": 0.8019368226884943, + "grad_norm": 1.2695777372701094, + "learning_rate": 1.3991763627410485e-06, + "loss": 0.41022592782974243, + "step": 3478 + }, + { + "epoch": 0.802167396818077, + "grad_norm": 1.6097549722001219, + "learning_rate": 1.3988267651529028e-06, + "loss": 0.49957793951034546, + "step": 3479 + }, + { + "epoch": 0.8023979709476596, + "grad_norm": 1.4695518489034636, + "learning_rate": 1.398477109591245e-06, + "loss": 0.5065722465515137, + "step": 3480 + }, + { + "epoch": 0.8026285450772424, + "grad_norm": 1.264735145451503, + "learning_rate": 1.398127396106901e-06, + "loss": 0.4353798031806946, + "step": 3481 + }, + { + "epoch": 0.802859119206825, + "grad_norm": 1.5800938751579423, + "learning_rate": 1.3977776247507049e-06, + "loss": 0.41438236832618713, + "step": 3482 + }, + { + "epoch": 0.8030896933364077, + "grad_norm": 1.2712154799989346, + "learning_rate": 1.3974277955734996e-06, + "loss": 0.4348248839378357, + "step": 3483 + }, + { + "epoch": 0.8033202674659903, + "grad_norm": 1.3020033760882643, + "learning_rate": 1.3970779086261363e-06, + "loss": 0.49369150400161743, + "step": 3484 + }, + { + "epoch": 0.803550841595573, + "grad_norm": 1.445427514378273, + "learning_rate": 1.396727963959475e-06, + "loss": 0.5694580078125, + "step": 3485 + }, + { + "epoch": 0.8037814157251556, + "grad_norm": 1.3859575121879733, + "learning_rate": 1.3963779616243834e-06, + "loss": 0.5357070565223694, + "step": 3486 + }, + { + "epoch": 0.8040119898547383, + "grad_norm": 1.3071217267808923, + "learning_rate": 1.3960279016717377e-06, + "loss": 0.41300907731056213, + "step": 3487 + }, + { + "epoch": 0.8042425639843209, + "grad_norm": 1.4713226080636248, + "learning_rate": 1.395677784152423e-06, + "loss": 0.5058030486106873, + "step": 3488 + }, + { + "epoch": 0.8044731381139036, + "grad_norm": 1.394990226330868, + "learning_rate": 1.3953276091173326e-06, + "loss": 0.5225522518157959, + "step": 3489 + }, + { + "epoch": 0.8047037122434862, + "grad_norm": 1.3669211701935395, + "learning_rate": 1.3949773766173675e-06, + "loss": 0.43893736600875854, + "step": 3490 + }, + { + "epoch": 0.804934286373069, + "grad_norm": 1.575168458794386, + "learning_rate": 1.3946270867034375e-06, + "loss": 0.4583659768104553, + "step": 3491 + }, + { + "epoch": 0.8051648605026516, + "grad_norm": 1.2728568882138123, + "learning_rate": 1.394276739426461e-06, + "loss": 0.49550747871398926, + "step": 3492 + }, + { + "epoch": 0.8053954346322343, + "grad_norm": 1.9438900883437185, + "learning_rate": 1.3939263348373648e-06, + "loss": 0.5637674331665039, + "step": 3493 + }, + { + "epoch": 0.8056260087618169, + "grad_norm": 1.3206034443977903, + "learning_rate": 1.3935758729870835e-06, + "loss": 0.4853670299053192, + "step": 3494 + }, + { + "epoch": 0.8058565828913996, + "grad_norm": 1.479029501570459, + "learning_rate": 1.3932253539265603e-06, + "loss": 0.4535500407218933, + "step": 3495 + }, + { + "epoch": 0.8060871570209822, + "grad_norm": 1.4461411101486477, + "learning_rate": 1.3928747777067464e-06, + "loss": 0.4198870062828064, + "step": 3496 + }, + { + "epoch": 0.8063177311505649, + "grad_norm": 1.3336585529006162, + "learning_rate": 1.392524144378602e-06, + "loss": 0.45773670077323914, + "step": 3497 + }, + { + "epoch": 0.8065483052801475, + "grad_norm": 1.718264798623436, + "learning_rate": 1.3921734539930952e-06, + "loss": 0.45263248682022095, + "step": 3498 + }, + { + "epoch": 0.8067788794097303, + "grad_norm": 1.300886470112164, + "learning_rate": 1.3918227066012025e-06, + "loss": 0.473066508769989, + "step": 3499 + }, + { + "epoch": 0.8070094535393129, + "grad_norm": 1.1261914460441818, + "learning_rate": 1.3914719022539082e-06, + "loss": 0.35737159848213196, + "step": 3500 + }, + { + "epoch": 0.8072400276688956, + "grad_norm": 1.4095537979750905, + "learning_rate": 1.3911210410022054e-06, + "loss": 0.5162703394889832, + "step": 3501 + }, + { + "epoch": 0.8074706017984782, + "grad_norm": 1.494617165800155, + "learning_rate": 1.3907701228970955e-06, + "loss": 0.5347551703453064, + "step": 3502 + }, + { + "epoch": 0.8077011759280609, + "grad_norm": 1.7642790890319513, + "learning_rate": 1.390419147989588e-06, + "loss": 0.4889448881149292, + "step": 3503 + }, + { + "epoch": 0.8079317500576435, + "grad_norm": 1.380092267420659, + "learning_rate": 1.3900681163306999e-06, + "loss": 0.47468650341033936, + "step": 3504 + }, + { + "epoch": 0.8081623241872262, + "grad_norm": 1.4749480234582377, + "learning_rate": 1.3897170279714585e-06, + "loss": 0.43236857652664185, + "step": 3505 + }, + { + "epoch": 0.8083928983168088, + "grad_norm": 1.4419786763918543, + "learning_rate": 1.3893658829628974e-06, + "loss": 0.46778976917266846, + "step": 3506 + }, + { + "epoch": 0.8086234724463915, + "grad_norm": 1.353368455676612, + "learning_rate": 1.389014681356059e-06, + "loss": 0.49447667598724365, + "step": 3507 + }, + { + "epoch": 0.8088540465759742, + "grad_norm": 1.3574196281726325, + "learning_rate": 1.388663423201994e-06, + "loss": 0.5221220254898071, + "step": 3508 + }, + { + "epoch": 0.8090846207055569, + "grad_norm": 1.8319434066548141, + "learning_rate": 1.3883121085517615e-06, + "loss": 0.5037325620651245, + "step": 3509 + }, + { + "epoch": 0.8093151948351395, + "grad_norm": 1.1547190760847952, + "learning_rate": 1.387960737456429e-06, + "loss": 0.46879589557647705, + "step": 3510 + }, + { + "epoch": 0.8095457689647222, + "grad_norm": 1.3552976314399992, + "learning_rate": 1.387609309967071e-06, + "loss": 0.44216716289520264, + "step": 3511 + }, + { + "epoch": 0.8097763430943048, + "grad_norm": 1.2016377736710804, + "learning_rate": 1.3872578261347716e-06, + "loss": 0.4525749981403351, + "step": 3512 + }, + { + "epoch": 0.8100069172238875, + "grad_norm": 1.3138421579944453, + "learning_rate": 1.3869062860106224e-06, + "loss": 0.44681644439697266, + "step": 3513 + }, + { + "epoch": 0.8102374913534701, + "grad_norm": 1.5030736189155554, + "learning_rate": 1.3865546896457233e-06, + "loss": 0.4162617325782776, + "step": 3514 + }, + { + "epoch": 0.8104680654830528, + "grad_norm": 1.4360914568156404, + "learning_rate": 1.3862030370911827e-06, + "loss": 0.5262776613235474, + "step": 3515 + }, + { + "epoch": 0.8106986396126354, + "grad_norm": 1.3010389916824352, + "learning_rate": 1.3858513283981163e-06, + "loss": 0.48102372884750366, + "step": 3516 + }, + { + "epoch": 0.8109292137422182, + "grad_norm": 1.41037363508679, + "learning_rate": 1.385499563617649e-06, + "loss": 0.46166497468948364, + "step": 3517 + }, + { + "epoch": 0.8111597878718008, + "grad_norm": 1.4145741054815544, + "learning_rate": 1.3851477428009133e-06, + "loss": 0.43523284792900085, + "step": 3518 + }, + { + "epoch": 0.8113903620013835, + "grad_norm": 1.3662294611202825, + "learning_rate": 1.3847958659990497e-06, + "loss": 0.5413048267364502, + "step": 3519 + }, + { + "epoch": 0.8116209361309661, + "grad_norm": 1.1462124150969017, + "learning_rate": 1.3844439332632073e-06, + "loss": 0.4257383346557617, + "step": 3520 + }, + { + "epoch": 0.8118515102605488, + "grad_norm": 1.5928313905350753, + "learning_rate": 1.3840919446445427e-06, + "loss": 0.4812018871307373, + "step": 3521 + }, + { + "epoch": 0.8120820843901314, + "grad_norm": 1.5231442697754751, + "learning_rate": 1.3837399001942216e-06, + "loss": 0.4890254735946655, + "step": 3522 + }, + { + "epoch": 0.8123126585197141, + "grad_norm": 1.7091323269762855, + "learning_rate": 1.3833877999634166e-06, + "loss": 0.5079991817474365, + "step": 3523 + }, + { + "epoch": 0.8125432326492967, + "grad_norm": 1.6148941470526432, + "learning_rate": 1.3830356440033096e-06, + "loss": 0.44703438878059387, + "step": 3524 + }, + { + "epoch": 0.8127738067788794, + "grad_norm": 1.4685605039032132, + "learning_rate": 1.3826834323650898e-06, + "loss": 0.4218645989894867, + "step": 3525 + }, + { + "epoch": 0.813004380908462, + "grad_norm": 1.585977018929449, + "learning_rate": 1.3823311650999547e-06, + "loss": 0.4544546902179718, + "step": 3526 + }, + { + "epoch": 0.8132349550380448, + "grad_norm": 1.2954656146833265, + "learning_rate": 1.3819788422591099e-06, + "loss": 0.4978422224521637, + "step": 3527 + }, + { + "epoch": 0.8134655291676274, + "grad_norm": 1.3262250095489831, + "learning_rate": 1.3816264638937688e-06, + "loss": 0.42122140526771545, + "step": 3528 + }, + { + "epoch": 0.8136961032972101, + "grad_norm": 1.0995613789441223, + "learning_rate": 1.381274030055154e-06, + "loss": 0.45674729347229004, + "step": 3529 + }, + { + "epoch": 0.8139266774267927, + "grad_norm": 1.5614041042611542, + "learning_rate": 1.3809215407944947e-06, + "loss": 0.5075385570526123, + "step": 3530 + }, + { + "epoch": 0.8141572515563754, + "grad_norm": 1.4231357002591019, + "learning_rate": 1.380568996163029e-06, + "loss": 0.45952552556991577, + "step": 3531 + }, + { + "epoch": 0.814387825685958, + "grad_norm": 1.239122573849665, + "learning_rate": 1.3802163962120025e-06, + "loss": 0.5062624216079712, + "step": 3532 + }, + { + "epoch": 0.8146183998155407, + "grad_norm": 1.4910945652834293, + "learning_rate": 1.3798637409926698e-06, + "loss": 0.49294552206993103, + "step": 3533 + }, + { + "epoch": 0.8148489739451233, + "grad_norm": 1.347255149566569, + "learning_rate": 1.3795110305562926e-06, + "loss": 0.4389861822128296, + "step": 3534 + }, + { + "epoch": 0.8150795480747061, + "grad_norm": 1.5704776908584448, + "learning_rate": 1.3791582649541401e-06, + "loss": 0.47733181715011597, + "step": 3535 + }, + { + "epoch": 0.8153101222042887, + "grad_norm": 1.3661823105841888, + "learning_rate": 1.3788054442374918e-06, + "loss": 0.5007725358009338, + "step": 3536 + }, + { + "epoch": 0.8155406963338714, + "grad_norm": 1.617600694156108, + "learning_rate": 1.378452568457633e-06, + "loss": 0.4857913553714752, + "step": 3537 + }, + { + "epoch": 0.815771270463454, + "grad_norm": 1.4509204702050165, + "learning_rate": 1.3780996376658577e-06, + "loss": 0.5330549478530884, + "step": 3538 + }, + { + "epoch": 0.8160018445930367, + "grad_norm": 1.283827597345967, + "learning_rate": 1.3777466519134684e-06, + "loss": 0.45034217834472656, + "step": 3539 + }, + { + "epoch": 0.8162324187226193, + "grad_norm": 1.313177908039173, + "learning_rate": 1.3773936112517746e-06, + "loss": 0.4442213773727417, + "step": 3540 + }, + { + "epoch": 0.816462992852202, + "grad_norm": 1.479375223581317, + "learning_rate": 1.377040515732095e-06, + "loss": 0.5000369548797607, + "step": 3541 + }, + { + "epoch": 0.8166935669817846, + "grad_norm": 1.3177535399447533, + "learning_rate": 1.3766873654057551e-06, + "loss": 0.5117775797843933, + "step": 3542 + }, + { + "epoch": 0.8169241411113674, + "grad_norm": 1.4163300067502158, + "learning_rate": 1.3763341603240889e-06, + "loss": 0.431648850440979, + "step": 3543 + }, + { + "epoch": 0.81715471524095, + "grad_norm": 1.230235072546183, + "learning_rate": 1.3759809005384387e-06, + "loss": 0.39463019371032715, + "step": 3544 + }, + { + "epoch": 0.8173852893705327, + "grad_norm": 1.4412595458793114, + "learning_rate": 1.375627586100154e-06, + "loss": 0.38739651441574097, + "step": 3545 + }, + { + "epoch": 0.8176158635001153, + "grad_norm": 1.1409525851258608, + "learning_rate": 1.3752742170605927e-06, + "loss": 0.3973360061645508, + "step": 3546 + }, + { + "epoch": 0.817846437629698, + "grad_norm": 1.3276328290635366, + "learning_rate": 1.3749207934711207e-06, + "loss": 0.4791724383831024, + "step": 3547 + }, + { + "epoch": 0.8180770117592806, + "grad_norm": 1.2963607541712077, + "learning_rate": 1.3745673153831114e-06, + "loss": 0.5245905518531799, + "step": 3548 + }, + { + "epoch": 0.8183075858888633, + "grad_norm": 1.4724838776986868, + "learning_rate": 1.3742137828479472e-06, + "loss": 0.5507007241249084, + "step": 3549 + }, + { + "epoch": 0.8185381600184459, + "grad_norm": 1.6416778504866436, + "learning_rate": 1.373860195917017e-06, + "loss": 0.4555748701095581, + "step": 3550 + }, + { + "epoch": 0.8187687341480286, + "grad_norm": 1.2633428656921684, + "learning_rate": 1.3735065546417182e-06, + "loss": 0.39309239387512207, + "step": 3551 + }, + { + "epoch": 0.8189993082776112, + "grad_norm": 1.205265119124541, + "learning_rate": 1.3731528590734564e-06, + "loss": 0.4984157681465149, + "step": 3552 + }, + { + "epoch": 0.819229882407194, + "grad_norm": 1.4373490041823445, + "learning_rate": 1.3727991092636448e-06, + "loss": 0.45853057503700256, + "step": 3553 + }, + { + "epoch": 0.8194604565367766, + "grad_norm": 1.427750473352885, + "learning_rate": 1.3724453052637043e-06, + "loss": 0.47412237524986267, + "step": 3554 + }, + { + "epoch": 0.8196910306663593, + "grad_norm": 1.5140095273509309, + "learning_rate": 1.3720914471250642e-06, + "loss": 0.46433544158935547, + "step": 3555 + }, + { + "epoch": 0.8199216047959419, + "grad_norm": 1.3530305082066354, + "learning_rate": 1.3717375348991612e-06, + "loss": 0.5773437023162842, + "step": 3556 + }, + { + "epoch": 0.8201521789255246, + "grad_norm": 1.519657617219548, + "learning_rate": 1.37138356863744e-06, + "loss": 0.5943500995635986, + "step": 3557 + }, + { + "epoch": 0.8203827530551072, + "grad_norm": 1.1903323655602067, + "learning_rate": 1.3710295483913533e-06, + "loss": 0.4970731735229492, + "step": 3558 + }, + { + "epoch": 0.8206133271846898, + "grad_norm": 1.3936455952745408, + "learning_rate": 1.3706754742123611e-06, + "loss": 0.44726189970970154, + "step": 3559 + }, + { + "epoch": 0.8208439013142725, + "grad_norm": 1.257368755928624, + "learning_rate": 1.3703213461519325e-06, + "loss": 0.3980759382247925, + "step": 3560 + }, + { + "epoch": 0.8210744754438551, + "grad_norm": 1.510740752003684, + "learning_rate": 1.3699671642615434e-06, + "loss": 0.5521829724311829, + "step": 3561 + }, + { + "epoch": 0.8213050495734379, + "grad_norm": 1.4257916187791417, + "learning_rate": 1.3696129285926769e-06, + "loss": 0.42630624771118164, + "step": 3562 + }, + { + "epoch": 0.8215356237030205, + "grad_norm": 1.3813571407602123, + "learning_rate": 1.3692586391968254e-06, + "loss": 0.5060243606567383, + "step": 3563 + }, + { + "epoch": 0.8217661978326032, + "grad_norm": 1.553405319049413, + "learning_rate": 1.3689042961254884e-06, + "loss": 0.5803407430648804, + "step": 3564 + }, + { + "epoch": 0.8219967719621858, + "grad_norm": 1.1610478816524794, + "learning_rate": 1.3685498994301735e-06, + "loss": 0.4510403871536255, + "step": 3565 + }, + { + "epoch": 0.8222273460917685, + "grad_norm": 1.668001711945016, + "learning_rate": 1.3681954491623953e-06, + "loss": 0.5350467562675476, + "step": 3566 + }, + { + "epoch": 0.8224579202213511, + "grad_norm": 1.4589682016059282, + "learning_rate": 1.367840945373677e-06, + "loss": 0.5194679498672485, + "step": 3567 + }, + { + "epoch": 0.8226884943509338, + "grad_norm": 1.5164701950999842, + "learning_rate": 1.3674863881155495e-06, + "loss": 0.43574345111846924, + "step": 3568 + }, + { + "epoch": 0.8229190684805164, + "grad_norm": 1.2235692010100727, + "learning_rate": 1.367131777439551e-06, + "loss": 0.43051451444625854, + "step": 3569 + }, + { + "epoch": 0.8231496426100992, + "grad_norm": 1.4294583851960962, + "learning_rate": 1.3667771133972278e-06, + "loss": 0.44449925422668457, + "step": 3570 + }, + { + "epoch": 0.8233802167396818, + "grad_norm": 1.4281775124274958, + "learning_rate": 1.3664223960401342e-06, + "loss": 0.4466608464717865, + "step": 3571 + }, + { + "epoch": 0.8236107908692645, + "grad_norm": 1.506734312309144, + "learning_rate": 1.3660676254198318e-06, + "loss": 0.6172389984130859, + "step": 3572 + }, + { + "epoch": 0.8238413649988471, + "grad_norm": 1.3071294444794341, + "learning_rate": 1.36571280158789e-06, + "loss": 0.3789742588996887, + "step": 3573 + }, + { + "epoch": 0.8240719391284298, + "grad_norm": 1.2713531694738989, + "learning_rate": 1.365357924595886e-06, + "loss": 0.3871726095676422, + "step": 3574 + }, + { + "epoch": 0.8243025132580124, + "grad_norm": 1.3659394637334186, + "learning_rate": 1.3650029944954047e-06, + "loss": 0.5464534759521484, + "step": 3575 + }, + { + "epoch": 0.8245330873875951, + "grad_norm": 1.4254183485118588, + "learning_rate": 1.3646480113380392e-06, + "loss": 0.4924513101577759, + "step": 3576 + }, + { + "epoch": 0.8247636615171777, + "grad_norm": 1.3350624286567714, + "learning_rate": 1.3642929751753896e-06, + "loss": 0.39648669958114624, + "step": 3577 + }, + { + "epoch": 0.8249942356467604, + "grad_norm": 1.155634552535419, + "learning_rate": 1.3639378860590642e-06, + "loss": 0.44139498472213745, + "step": 3578 + }, + { + "epoch": 0.825224809776343, + "grad_norm": 1.4016430263315434, + "learning_rate": 1.3635827440406784e-06, + "loss": 0.4477856159210205, + "step": 3579 + }, + { + "epoch": 0.8254553839059258, + "grad_norm": 1.2543072909410065, + "learning_rate": 1.363227549171856e-06, + "loss": 0.48722583055496216, + "step": 3580 + }, + { + "epoch": 0.8256859580355084, + "grad_norm": 1.5407337854642607, + "learning_rate": 1.3628723015042285e-06, + "loss": 0.44485795497894287, + "step": 3581 + }, + { + "epoch": 0.8259165321650911, + "grad_norm": 1.481687909768813, + "learning_rate": 1.362517001089434e-06, + "loss": 0.510918140411377, + "step": 3582 + }, + { + "epoch": 0.8261471062946737, + "grad_norm": 1.4714123899535927, + "learning_rate": 1.3621616479791196e-06, + "loss": 0.5157535076141357, + "step": 3583 + }, + { + "epoch": 0.8263776804242564, + "grad_norm": 1.601097277197277, + "learning_rate": 1.361806242224939e-06, + "loss": 0.6120826005935669, + "step": 3584 + }, + { + "epoch": 0.826608254553839, + "grad_norm": 1.379062804125132, + "learning_rate": 1.3614507838785545e-06, + "loss": 0.47521674633026123, + "step": 3585 + }, + { + "epoch": 0.8268388286834217, + "grad_norm": 1.2544051986437676, + "learning_rate": 1.3610952729916352e-06, + "loss": 0.431441068649292, + "step": 3586 + }, + { + "epoch": 0.8270694028130043, + "grad_norm": 1.4333858511847595, + "learning_rate": 1.3607397096158587e-06, + "loss": 0.5168293118476868, + "step": 3587 + }, + { + "epoch": 0.8272999769425871, + "grad_norm": 1.4075386997192105, + "learning_rate": 1.3603840938029092e-06, + "loss": 0.47669821977615356, + "step": 3588 + }, + { + "epoch": 0.8275305510721697, + "grad_norm": 1.6345113020695277, + "learning_rate": 1.3600284256044791e-06, + "loss": 0.5170806050300598, + "step": 3589 + }, + { + "epoch": 0.8277611252017524, + "grad_norm": 1.3443972777893194, + "learning_rate": 1.359672705072269e-06, + "loss": 0.5578932762145996, + "step": 3590 + }, + { + "epoch": 0.827991699331335, + "grad_norm": 1.2931790064355784, + "learning_rate": 1.3593169322579855e-06, + "loss": 0.45000678300857544, + "step": 3591 + }, + { + "epoch": 0.8282222734609177, + "grad_norm": 1.7408157234389992, + "learning_rate": 1.3589611072133448e-06, + "loss": 0.47859635949134827, + "step": 3592 + }, + { + "epoch": 0.8284528475905003, + "grad_norm": 1.629320946493551, + "learning_rate": 1.3586052299900693e-06, + "loss": 0.5373919606208801, + "step": 3593 + }, + { + "epoch": 0.828683421720083, + "grad_norm": 1.4093194136520946, + "learning_rate": 1.3582493006398888e-06, + "loss": 0.5461571216583252, + "step": 3594 + }, + { + "epoch": 0.8289139958496656, + "grad_norm": 1.4221547222488737, + "learning_rate": 1.357893319214542e-06, + "loss": 0.522891640663147, + "step": 3595 + }, + { + "epoch": 0.8291445699792483, + "grad_norm": 1.3931497044748549, + "learning_rate": 1.3575372857657739e-06, + "loss": 0.503441572189331, + "step": 3596 + }, + { + "epoch": 0.829375144108831, + "grad_norm": 1.4755218467347275, + "learning_rate": 1.357181200345338e-06, + "loss": 0.45475268363952637, + "step": 3597 + }, + { + "epoch": 0.8296057182384137, + "grad_norm": 1.3529340787561033, + "learning_rate": 1.3568250630049944e-06, + "loss": 0.4626728296279907, + "step": 3598 + }, + { + "epoch": 0.8298362923679963, + "grad_norm": 1.5106243497530205, + "learning_rate": 1.3564688737965118e-06, + "loss": 0.590618371963501, + "step": 3599 + }, + { + "epoch": 0.830066866497579, + "grad_norm": 1.1729232075760356, + "learning_rate": 1.3561126327716658e-06, + "loss": 0.4252029061317444, + "step": 3600 + }, + { + "epoch": 0.8302974406271616, + "grad_norm": 1.5093126003070163, + "learning_rate": 1.3557563399822396e-06, + "loss": 0.5741503238677979, + "step": 3601 + }, + { + "epoch": 0.8305280147567443, + "grad_norm": 1.346541706093541, + "learning_rate": 1.3553999954800236e-06, + "loss": 0.4591038227081299, + "step": 3602 + }, + { + "epoch": 0.8307585888863269, + "grad_norm": 1.5342817778823432, + "learning_rate": 1.3550435993168164e-06, + "loss": 0.5761657953262329, + "step": 3603 + }, + { + "epoch": 0.8309891630159096, + "grad_norm": 1.4873747737215213, + "learning_rate": 1.3546871515444239e-06, + "loss": 0.4835323691368103, + "step": 3604 + }, + { + "epoch": 0.8312197371454922, + "grad_norm": 1.3474153162620106, + "learning_rate": 1.3543306522146594e-06, + "loss": 0.6152533292770386, + "step": 3605 + }, + { + "epoch": 0.831450311275075, + "grad_norm": 1.7615931586989606, + "learning_rate": 1.3539741013793431e-06, + "loss": 0.48106616735458374, + "step": 3606 + }, + { + "epoch": 0.8316808854046576, + "grad_norm": 1.3977429311647935, + "learning_rate": 1.3536174990903042e-06, + "loss": 0.48128771781921387, + "step": 3607 + }, + { + "epoch": 0.8319114595342403, + "grad_norm": 1.5624866131401935, + "learning_rate": 1.353260845399378e-06, + "loss": 0.4395609498023987, + "step": 3608 + }, + { + "epoch": 0.8321420336638229, + "grad_norm": 1.6243424583265862, + "learning_rate": 1.3529041403584076e-06, + "loss": 0.5298231840133667, + "step": 3609 + }, + { + "epoch": 0.8323726077934056, + "grad_norm": 1.610376085646533, + "learning_rate": 1.3525473840192436e-06, + "loss": 0.4694434404373169, + "step": 3610 + }, + { + "epoch": 0.8326031819229882, + "grad_norm": 1.3870293085196028, + "learning_rate": 1.3521905764337449e-06, + "loss": 0.4264890253543854, + "step": 3611 + }, + { + "epoch": 0.8328337560525709, + "grad_norm": 1.3900907609641087, + "learning_rate": 1.3518337176537762e-06, + "loss": 0.3266828656196594, + "step": 3612 + }, + { + "epoch": 0.8330643301821535, + "grad_norm": 1.548598004244933, + "learning_rate": 1.351476807731211e-06, + "loss": 0.5554935336112976, + "step": 3613 + }, + { + "epoch": 0.8332949043117363, + "grad_norm": 1.3139574983210685, + "learning_rate": 1.3511198467179295e-06, + "loss": 0.4375999867916107, + "step": 3614 + }, + { + "epoch": 0.8335254784413189, + "grad_norm": 1.3568296792682797, + "learning_rate": 1.35076283466582e-06, + "loss": 0.564457893371582, + "step": 3615 + }, + { + "epoch": 0.8337560525709016, + "grad_norm": 1.5648573569840147, + "learning_rate": 1.3504057716267776e-06, + "loss": 0.5141148567199707, + "step": 3616 + }, + { + "epoch": 0.8339866267004842, + "grad_norm": 1.2607282701974722, + "learning_rate": 1.350048657652705e-06, + "loss": 0.45514535903930664, + "step": 3617 + }, + { + "epoch": 0.8342172008300669, + "grad_norm": 1.298858308641179, + "learning_rate": 1.3496914927955122e-06, + "loss": 0.5224772691726685, + "step": 3618 + }, + { + "epoch": 0.8344477749596495, + "grad_norm": 1.3773935543957632, + "learning_rate": 1.349334277107117e-06, + "loss": 0.45185205340385437, + "step": 3619 + }, + { + "epoch": 0.8346783490892322, + "grad_norm": 1.3400411570126707, + "learning_rate": 1.3489770106394444e-06, + "loss": 0.47232794761657715, + "step": 3620 + }, + { + "epoch": 0.8349089232188148, + "grad_norm": 1.3564585933268873, + "learning_rate": 1.3486196934444264e-06, + "loss": 0.44031190872192383, + "step": 3621 + }, + { + "epoch": 0.8351394973483975, + "grad_norm": 1.2921832515242213, + "learning_rate": 1.3482623255740028e-06, + "loss": 0.4594510793685913, + "step": 3622 + }, + { + "epoch": 0.8353700714779801, + "grad_norm": 1.3491628541071723, + "learning_rate": 1.347904907080121e-06, + "loss": 0.38726723194122314, + "step": 3623 + }, + { + "epoch": 0.8356006456075629, + "grad_norm": 1.4086239991990677, + "learning_rate": 1.3475474380147347e-06, + "loss": 0.544617772102356, + "step": 3624 + }, + { + "epoch": 0.8358312197371455, + "grad_norm": 1.5645995914963535, + "learning_rate": 1.347189918429806e-06, + "loss": 0.503423810005188, + "step": 3625 + }, + { + "epoch": 0.8360617938667282, + "grad_norm": 1.3950432339665733, + "learning_rate": 1.3468323483773038e-06, + "loss": 0.4395143985748291, + "step": 3626 + }, + { + "epoch": 0.8362923679963108, + "grad_norm": 1.6308000434387062, + "learning_rate": 1.346474727909205e-06, + "loss": 0.41464856266975403, + "step": 3627 + }, + { + "epoch": 0.8365229421258935, + "grad_norm": 1.4008674771220466, + "learning_rate": 1.346117057077493e-06, + "loss": 0.4782845079898834, + "step": 3628 + }, + { + "epoch": 0.8367535162554761, + "grad_norm": 1.2484540580184977, + "learning_rate": 1.345759335934159e-06, + "loss": 0.48308104276657104, + "step": 3629 + }, + { + "epoch": 0.8369840903850588, + "grad_norm": 1.3935764281095124, + "learning_rate": 1.345401564531201e-06, + "loss": 0.5759967565536499, + "step": 3630 + }, + { + "epoch": 0.8372146645146414, + "grad_norm": 1.421077506310717, + "learning_rate": 1.3450437429206256e-06, + "loss": 0.5900512337684631, + "step": 3631 + }, + { + "epoch": 0.8374452386442242, + "grad_norm": 1.3643346247687353, + "learning_rate": 1.3446858711544451e-06, + "loss": 0.4776286482810974, + "step": 3632 + }, + { + "epoch": 0.8376758127738068, + "grad_norm": 1.5796891796446009, + "learning_rate": 1.34432794928468e-06, + "loss": 0.5123563408851624, + "step": 3633 + }, + { + "epoch": 0.8379063869033895, + "grad_norm": 1.6272139775850447, + "learning_rate": 1.3439699773633574e-06, + "loss": 0.5505821108818054, + "step": 3634 + }, + { + "epoch": 0.8381369610329721, + "grad_norm": 1.4456391396483874, + "learning_rate": 1.343611955442513e-06, + "loss": 0.5525364875793457, + "step": 3635 + }, + { + "epoch": 0.8383675351625548, + "grad_norm": 1.1644228181066894, + "learning_rate": 1.3432538835741884e-06, + "loss": 0.44074952602386475, + "step": 3636 + }, + { + "epoch": 0.8385981092921374, + "grad_norm": 1.3792820862390651, + "learning_rate": 1.3428957618104331e-06, + "loss": 0.5488649606704712, + "step": 3637 + }, + { + "epoch": 0.8388286834217201, + "grad_norm": 1.159150884236996, + "learning_rate": 1.3425375902033034e-06, + "loss": 0.4427725672721863, + "step": 3638 + }, + { + "epoch": 0.8390592575513027, + "grad_norm": 1.5753495335559473, + "learning_rate": 1.3421793688048636e-06, + "loss": 0.5244250297546387, + "step": 3639 + }, + { + "epoch": 0.8392898316808854, + "grad_norm": 1.2853956216426152, + "learning_rate": 1.3418210976671845e-06, + "loss": 0.4684640169143677, + "step": 3640 + }, + { + "epoch": 0.839520405810468, + "grad_norm": 1.4767228704961965, + "learning_rate": 1.3414627768423449e-06, + "loss": 0.4518035054206848, + "step": 3641 + }, + { + "epoch": 0.8397509799400508, + "grad_norm": 1.5338085000094812, + "learning_rate": 1.34110440638243e-06, + "loss": 0.47504323720932007, + "step": 3642 + }, + { + "epoch": 0.8399815540696334, + "grad_norm": 1.7182899921711987, + "learning_rate": 1.3407459863395326e-06, + "loss": 0.3835057020187378, + "step": 3643 + }, + { + "epoch": 0.8402121281992161, + "grad_norm": 1.4517538314936977, + "learning_rate": 1.3403875167657529e-06, + "loss": 0.4103546738624573, + "step": 3644 + }, + { + "epoch": 0.8404427023287987, + "grad_norm": 1.3338056576205999, + "learning_rate": 1.3400289977131974e-06, + "loss": 0.48064136505126953, + "step": 3645 + }, + { + "epoch": 0.8406732764583814, + "grad_norm": 1.5606949897639386, + "learning_rate": 1.3396704292339813e-06, + "loss": 0.49655234813690186, + "step": 3646 + }, + { + "epoch": 0.840903850587964, + "grad_norm": 1.3180737586627664, + "learning_rate": 1.3393118113802259e-06, + "loss": 0.5559303760528564, + "step": 3647 + }, + { + "epoch": 0.8411344247175467, + "grad_norm": 1.3902505896601203, + "learning_rate": 1.3389531442040599e-06, + "loss": 0.5173505544662476, + "step": 3648 + }, + { + "epoch": 0.8413649988471293, + "grad_norm": 1.4997400095057662, + "learning_rate": 1.338594427757619e-06, + "loss": 0.500524640083313, + "step": 3649 + }, + { + "epoch": 0.8415955729767121, + "grad_norm": 1.3017945585861477, + "learning_rate": 1.3382356620930467e-06, + "loss": 0.5167285203933716, + "step": 3650 + }, + { + "epoch": 0.8418261471062947, + "grad_norm": 1.4661199659605932, + "learning_rate": 1.3378768472624929e-06, + "loss": 0.5006825923919678, + "step": 3651 + }, + { + "epoch": 0.8420567212358774, + "grad_norm": 1.5253217794534257, + "learning_rate": 1.3375179833181153e-06, + "loss": 0.5421864986419678, + "step": 3652 + }, + { + "epoch": 0.84228729536546, + "grad_norm": 1.5304567180850979, + "learning_rate": 1.337159070312078e-06, + "loss": 0.4964475929737091, + "step": 3653 + }, + { + "epoch": 0.8425178694950427, + "grad_norm": 1.2795061721511742, + "learning_rate": 1.3368001082965528e-06, + "loss": 0.4020928144454956, + "step": 3654 + }, + { + "epoch": 0.8427484436246253, + "grad_norm": 1.3457912405228358, + "learning_rate": 1.3364410973237183e-06, + "loss": 0.43009278178215027, + "step": 3655 + }, + { + "epoch": 0.842979017754208, + "grad_norm": 1.3663101783603413, + "learning_rate": 1.3360820374457608e-06, + "loss": 0.5939761400222778, + "step": 3656 + }, + { + "epoch": 0.8432095918837906, + "grad_norm": 1.3723718945789372, + "learning_rate": 1.335722928714873e-06, + "loss": 0.43889346718788147, + "step": 3657 + }, + { + "epoch": 0.8434401660133733, + "grad_norm": 1.510811137049935, + "learning_rate": 1.335363771183255e-06, + "loss": 0.5125945806503296, + "step": 3658 + }, + { + "epoch": 0.843670740142956, + "grad_norm": 1.2988273180041983, + "learning_rate": 1.3350045649031143e-06, + "loss": 0.516818642616272, + "step": 3659 + }, + { + "epoch": 0.8439013142725387, + "grad_norm": 1.2172726171902464, + "learning_rate": 1.3346453099266649e-06, + "loss": 0.5098299980163574, + "step": 3660 + }, + { + "epoch": 0.8441318884021213, + "grad_norm": 1.4809835823543989, + "learning_rate": 1.334286006306128e-06, + "loss": 0.46228134632110596, + "step": 3661 + }, + { + "epoch": 0.844362462531704, + "grad_norm": 1.518730905252404, + "learning_rate": 1.3339266540937324e-06, + "loss": 0.38364481925964355, + "step": 3662 + }, + { + "epoch": 0.8445930366612866, + "grad_norm": 1.2447229933483466, + "learning_rate": 1.3335672533417134e-06, + "loss": 0.4363073706626892, + "step": 3663 + }, + { + "epoch": 0.8448236107908693, + "grad_norm": 1.5445839123019949, + "learning_rate": 1.3332078041023133e-06, + "loss": 0.463603675365448, + "step": 3664 + }, + { + "epoch": 0.8450541849204519, + "grad_norm": 1.118250112497339, + "learning_rate": 1.3328483064277816e-06, + "loss": 0.4173084795475006, + "step": 3665 + }, + { + "epoch": 0.8452847590500346, + "grad_norm": 1.2905398126594152, + "learning_rate": 1.3324887603703756e-06, + "loss": 0.41451913118362427, + "step": 3666 + }, + { + "epoch": 0.8455153331796172, + "grad_norm": 1.3301474043831027, + "learning_rate": 1.3321291659823587e-06, + "loss": 0.49418264627456665, + "step": 3667 + }, + { + "epoch": 0.8457459073092, + "grad_norm": 1.323747824550861, + "learning_rate": 1.3317695233160015e-06, + "loss": 0.48787444829940796, + "step": 3668 + }, + { + "epoch": 0.8459764814387826, + "grad_norm": 1.419516654753041, + "learning_rate": 1.3314098324235814e-06, + "loss": 0.484865665435791, + "step": 3669 + }, + { + "epoch": 0.8462070555683652, + "grad_norm": 1.4996660725713626, + "learning_rate": 1.3310500933573837e-06, + "loss": 0.44162076711654663, + "step": 3670 + }, + { + "epoch": 0.8464376296979479, + "grad_norm": 1.4496595059902684, + "learning_rate": 1.3306903061696999e-06, + "loss": 0.39880990982055664, + "step": 3671 + }, + { + "epoch": 0.8466682038275305, + "grad_norm": 1.596735486600776, + "learning_rate": 1.3303304709128288e-06, + "loss": 0.4405972957611084, + "step": 3672 + }, + { + "epoch": 0.8468987779571132, + "grad_norm": 1.8476371944591239, + "learning_rate": 1.3299705876390755e-06, + "loss": 0.4228917956352234, + "step": 3673 + }, + { + "epoch": 0.8471293520866958, + "grad_norm": 1.3245854918753257, + "learning_rate": 1.3296106564007532e-06, + "loss": 0.44533059000968933, + "step": 3674 + }, + { + "epoch": 0.8473599262162785, + "grad_norm": 1.324480419314636, + "learning_rate": 1.3292506772501816e-06, + "loss": 0.4672505855560303, + "step": 3675 + }, + { + "epoch": 0.8475905003458611, + "grad_norm": 1.5345690520656405, + "learning_rate": 1.3288906502396873e-06, + "loss": 0.5651025772094727, + "step": 3676 + }, + { + "epoch": 0.8478210744754439, + "grad_norm": 1.4113200785742674, + "learning_rate": 1.3285305754216034e-06, + "loss": 0.4877372086048126, + "step": 3677 + }, + { + "epoch": 0.8480516486050265, + "grad_norm": 1.6156626909271148, + "learning_rate": 1.3281704528482713e-06, + "loss": 0.43767499923706055, + "step": 3678 + }, + { + "epoch": 0.8482822227346092, + "grad_norm": 1.6309175000442955, + "learning_rate": 1.3278102825720376e-06, + "loss": 0.5077182650566101, + "step": 3679 + }, + { + "epoch": 0.8485127968641918, + "grad_norm": 1.5150502093819094, + "learning_rate": 1.3274500646452573e-06, + "loss": 0.4814456105232239, + "step": 3680 + }, + { + "epoch": 0.8487433709937745, + "grad_norm": 1.3626740483959299, + "learning_rate": 1.3270897991202913e-06, + "loss": 0.4454193115234375, + "step": 3681 + }, + { + "epoch": 0.8489739451233571, + "grad_norm": 1.1173863119708762, + "learning_rate": 1.3267294860495084e-06, + "loss": 0.3973482549190521, + "step": 3682 + }, + { + "epoch": 0.8492045192529398, + "grad_norm": 1.5337644837004238, + "learning_rate": 1.3263691254852834e-06, + "loss": 0.5115909576416016, + "step": 3683 + }, + { + "epoch": 0.8494350933825224, + "grad_norm": 1.2962888350788886, + "learning_rate": 1.3260087174799982e-06, + "loss": 0.4217768907546997, + "step": 3684 + }, + { + "epoch": 0.8496656675121051, + "grad_norm": 1.5676465439666392, + "learning_rate": 1.3256482620860414e-06, + "loss": 0.4462714195251465, + "step": 3685 + }, + { + "epoch": 0.8498962416416878, + "grad_norm": 1.278085511550712, + "learning_rate": 1.32528775935581e-06, + "loss": 0.4617312550544739, + "step": 3686 + }, + { + "epoch": 0.8501268157712705, + "grad_norm": 1.2760475898780375, + "learning_rate": 1.324927209341706e-06, + "loss": 0.4774616062641144, + "step": 3687 + }, + { + "epoch": 0.8503573899008531, + "grad_norm": 1.389927333157612, + "learning_rate": 1.3245666120961389e-06, + "loss": 0.38730189204216003, + "step": 3688 + }, + { + "epoch": 0.8505879640304358, + "grad_norm": 1.5164687032364252, + "learning_rate": 1.324205967671525e-06, + "loss": 0.45189517736434937, + "step": 3689 + }, + { + "epoch": 0.8508185381600184, + "grad_norm": 1.489462413187487, + "learning_rate": 1.3238452761202887e-06, + "loss": 0.4965584874153137, + "step": 3690 + }, + { + "epoch": 0.8510491122896011, + "grad_norm": 1.2283217886481297, + "learning_rate": 1.3234845374948591e-06, + "loss": 0.4409075975418091, + "step": 3691 + }, + { + "epoch": 0.8512796864191837, + "grad_norm": 1.3545920303070538, + "learning_rate": 1.3231237518476737e-06, + "loss": 0.4457218647003174, + "step": 3692 + }, + { + "epoch": 0.8515102605487664, + "grad_norm": 1.2432481704868787, + "learning_rate": 1.3227629192311762e-06, + "loss": 0.42810603976249695, + "step": 3693 + }, + { + "epoch": 0.851740834678349, + "grad_norm": 1.3504737245283156, + "learning_rate": 1.3224020396978172e-06, + "loss": 0.40753173828125, + "step": 3694 + }, + { + "epoch": 0.8519714088079318, + "grad_norm": 1.5063309076640758, + "learning_rate": 1.3220411133000542e-06, + "loss": 0.5057830810546875, + "step": 3695 + }, + { + "epoch": 0.8522019829375144, + "grad_norm": 1.4625648008354504, + "learning_rate": 1.3216801400903515e-06, + "loss": 0.42498981952667236, + "step": 3696 + }, + { + "epoch": 0.8524325570670971, + "grad_norm": 1.736302707969947, + "learning_rate": 1.3213191201211806e-06, + "loss": 0.44985881447792053, + "step": 3697 + }, + { + "epoch": 0.8526631311966797, + "grad_norm": 1.5257289791960187, + "learning_rate": 1.3209580534450192e-06, + "loss": 0.39984816312789917, + "step": 3698 + }, + { + "epoch": 0.8528937053262624, + "grad_norm": 1.4859934204912078, + "learning_rate": 1.3205969401143516e-06, + "loss": 0.4773896038532257, + "step": 3699 + }, + { + "epoch": 0.853124279455845, + "grad_norm": 1.5299580963987478, + "learning_rate": 1.3202357801816698e-06, + "loss": 0.5699855089187622, + "step": 3700 + }, + { + "epoch": 0.8533548535854277, + "grad_norm": 1.5124437197630332, + "learning_rate": 1.3198745736994714e-06, + "loss": 0.4486675262451172, + "step": 3701 + }, + { + "epoch": 0.8535854277150103, + "grad_norm": 1.3641053506348044, + "learning_rate": 1.3195133207202625e-06, + "loss": 0.47909995913505554, + "step": 3702 + }, + { + "epoch": 0.853816001844593, + "grad_norm": 1.3267279385735278, + "learning_rate": 1.3191520212965542e-06, + "loss": 0.4356222450733185, + "step": 3703 + }, + { + "epoch": 0.8540465759741757, + "grad_norm": 1.5161594053893233, + "learning_rate": 1.3187906754808646e-06, + "loss": 0.4734821319580078, + "step": 3704 + }, + { + "epoch": 0.8542771501037584, + "grad_norm": 1.1414361983546972, + "learning_rate": 1.3184292833257197e-06, + "loss": 0.4164031744003296, + "step": 3705 + }, + { + "epoch": 0.854507724233341, + "grad_norm": 1.5194682024268111, + "learning_rate": 1.3180678448836516e-06, + "loss": 0.505548357963562, + "step": 3706 + }, + { + "epoch": 0.8547382983629237, + "grad_norm": 1.4180879233512311, + "learning_rate": 1.3177063602071985e-06, + "loss": 0.4443202316761017, + "step": 3707 + }, + { + "epoch": 0.8549688724925063, + "grad_norm": 1.4808642334806548, + "learning_rate": 1.317344829348906e-06, + "loss": 0.4594070017337799, + "step": 3708 + }, + { + "epoch": 0.855199446622089, + "grad_norm": 1.595149298191138, + "learning_rate": 1.3169832523613265e-06, + "loss": 0.5346768498420715, + "step": 3709 + }, + { + "epoch": 0.8554300207516716, + "grad_norm": 1.4211934536480004, + "learning_rate": 1.3166216292970185e-06, + "loss": 0.44471168518066406, + "step": 3710 + }, + { + "epoch": 0.8556605948812543, + "grad_norm": 1.3967510109946715, + "learning_rate": 1.3162599602085482e-06, + "loss": 0.4414154589176178, + "step": 3711 + }, + { + "epoch": 0.855891169010837, + "grad_norm": 1.2591243363727789, + "learning_rate": 1.3158982451484873e-06, + "loss": 0.4267842769622803, + "step": 3712 + }, + { + "epoch": 0.8561217431404197, + "grad_norm": 1.5517519524370356, + "learning_rate": 1.315536484169415e-06, + "loss": 0.5282812118530273, + "step": 3713 + }, + { + "epoch": 0.8563523172700023, + "grad_norm": 1.3747848129200213, + "learning_rate": 1.3151746773239167e-06, + "loss": 0.3831692934036255, + "step": 3714 + }, + { + "epoch": 0.856582891399585, + "grad_norm": 1.3399055617764033, + "learning_rate": 1.3148128246645848e-06, + "loss": 0.4714779853820801, + "step": 3715 + }, + { + "epoch": 0.8568134655291676, + "grad_norm": 1.5957966977407376, + "learning_rate": 1.3144509262440185e-06, + "loss": 0.515029788017273, + "step": 3716 + }, + { + "epoch": 0.8570440396587503, + "grad_norm": 1.6565005005078866, + "learning_rate": 1.314088982114823e-06, + "loss": 0.48407065868377686, + "step": 3717 + }, + { + "epoch": 0.8572746137883329, + "grad_norm": 1.2250893853794216, + "learning_rate": 1.3137269923296111e-06, + "loss": 0.4756847620010376, + "step": 3718 + }, + { + "epoch": 0.8575051879179156, + "grad_norm": 1.4417516161095163, + "learning_rate": 1.313364956941001e-06, + "loss": 0.47744277119636536, + "step": 3719 + }, + { + "epoch": 0.8577357620474982, + "grad_norm": 1.4540506451139732, + "learning_rate": 1.3130028760016187e-06, + "loss": 0.4967440366744995, + "step": 3720 + }, + { + "epoch": 0.857966336177081, + "grad_norm": 1.5755023694033539, + "learning_rate": 1.312640749564096e-06, + "loss": 0.44999921321868896, + "step": 3721 + }, + { + "epoch": 0.8581969103066636, + "grad_norm": 1.1829331105101752, + "learning_rate": 1.3122785776810723e-06, + "loss": 0.4454652667045593, + "step": 3722 + }, + { + "epoch": 0.8584274844362463, + "grad_norm": 1.220523426514953, + "learning_rate": 1.3119163604051923e-06, + "loss": 0.37483078241348267, + "step": 3723 + }, + { + "epoch": 0.8586580585658289, + "grad_norm": 1.45963624909142, + "learning_rate": 1.3115540977891076e-06, + "loss": 0.3732140064239502, + "step": 3724 + }, + { + "epoch": 0.8588886326954116, + "grad_norm": 1.5667872254799649, + "learning_rate": 1.3111917898854779e-06, + "loss": 0.5709421634674072, + "step": 3725 + }, + { + "epoch": 0.8591192068249942, + "grad_norm": 2.0482790256244514, + "learning_rate": 1.3108294367469677e-06, + "loss": 0.5301297307014465, + "step": 3726 + }, + { + "epoch": 0.8593497809545769, + "grad_norm": 1.2253994153188903, + "learning_rate": 1.3104670384262484e-06, + "loss": 0.45979735255241394, + "step": 3727 + }, + { + "epoch": 0.8595803550841595, + "grad_norm": 1.5172885339612137, + "learning_rate": 1.3101045949759985e-06, + "loss": 0.5051921606063843, + "step": 3728 + }, + { + "epoch": 0.8598109292137422, + "grad_norm": 1.5432212262669465, + "learning_rate": 1.309742106448903e-06, + "loss": 0.5057204365730286, + "step": 3729 + }, + { + "epoch": 0.8600415033433249, + "grad_norm": 1.3029916397805466, + "learning_rate": 1.3093795728976535e-06, + "loss": 0.4265059530735016, + "step": 3730 + }, + { + "epoch": 0.8602720774729076, + "grad_norm": 1.2392416355330595, + "learning_rate": 1.3090169943749473e-06, + "loss": 0.39166492223739624, + "step": 3731 + }, + { + "epoch": 0.8605026516024902, + "grad_norm": 1.4335892651385718, + "learning_rate": 1.308654370933489e-06, + "loss": 0.4321832060813904, + "step": 3732 + }, + { + "epoch": 0.8607332257320729, + "grad_norm": 1.4026009292758175, + "learning_rate": 1.3082917026259906e-06, + "loss": 0.5028939247131348, + "step": 3733 + }, + { + "epoch": 0.8609637998616555, + "grad_norm": 1.461263824354524, + "learning_rate": 1.3079289895051681e-06, + "loss": 0.4642373323440552, + "step": 3734 + }, + { + "epoch": 0.8611943739912382, + "grad_norm": 1.2616373488525174, + "learning_rate": 1.3075662316237464e-06, + "loss": 0.416348397731781, + "step": 3735 + }, + { + "epoch": 0.8614249481208208, + "grad_norm": 1.9156143459520234, + "learning_rate": 1.3072034290344556e-06, + "loss": 0.48442524671554565, + "step": 3736 + }, + { + "epoch": 0.8616555222504035, + "grad_norm": 1.4675369296005183, + "learning_rate": 1.3068405817900332e-06, + "loss": 0.46903935074806213, + "step": 3737 + }, + { + "epoch": 0.8618860963799861, + "grad_norm": 1.433982633948309, + "learning_rate": 1.3064776899432224e-06, + "loss": 0.48172008991241455, + "step": 3738 + }, + { + "epoch": 0.8621166705095689, + "grad_norm": 1.4697783322173945, + "learning_rate": 1.3061147535467734e-06, + "loss": 0.44460922479629517, + "step": 3739 + }, + { + "epoch": 0.8623472446391515, + "grad_norm": 1.4552688390934359, + "learning_rate": 1.3057517726534423e-06, + "loss": 0.4728608727455139, + "step": 3740 + }, + { + "epoch": 0.8625778187687342, + "grad_norm": 1.2981084774118934, + "learning_rate": 1.3053887473159928e-06, + "loss": 0.36457544565200806, + "step": 3741 + }, + { + "epoch": 0.8628083928983168, + "grad_norm": 1.3219603285138386, + "learning_rate": 1.3050256775871936e-06, + "loss": 0.3753359317779541, + "step": 3742 + }, + { + "epoch": 0.8630389670278995, + "grad_norm": 1.71764180047156, + "learning_rate": 1.304662563519821e-06, + "loss": 0.38679057359695435, + "step": 3743 + }, + { + "epoch": 0.8632695411574821, + "grad_norm": 1.2517686459377946, + "learning_rate": 1.304299405166657e-06, + "loss": 0.5008635520935059, + "step": 3744 + }, + { + "epoch": 0.8635001152870648, + "grad_norm": 1.6524585351681906, + "learning_rate": 1.3039362025804903e-06, + "loss": 0.3723052740097046, + "step": 3745 + }, + { + "epoch": 0.8637306894166474, + "grad_norm": 1.4101013037777343, + "learning_rate": 1.3035729558141166e-06, + "loss": 0.4227592945098877, + "step": 3746 + }, + { + "epoch": 0.8639612635462302, + "grad_norm": 1.2385954175555658, + "learning_rate": 1.3032096649203369e-06, + "loss": 0.44072139263153076, + "step": 3747 + }, + { + "epoch": 0.8641918376758128, + "grad_norm": 1.330285491132409, + "learning_rate": 1.3028463299519594e-06, + "loss": 0.49321871995925903, + "step": 3748 + }, + { + "epoch": 0.8644224118053955, + "grad_norm": 1.1777120494442346, + "learning_rate": 1.3024829509617987e-06, + "loss": 0.3751382827758789, + "step": 3749 + }, + { + "epoch": 0.8646529859349781, + "grad_norm": 1.2092220891938048, + "learning_rate": 1.3021195280026755e-06, + "loss": 0.43967729806900024, + "step": 3750 + }, + { + "epoch": 0.8648835600645608, + "grad_norm": 1.2227774970491123, + "learning_rate": 1.3017560611274172e-06, + "loss": 0.4102880358695984, + "step": 3751 + }, + { + "epoch": 0.8651141341941434, + "grad_norm": 1.4524327131347594, + "learning_rate": 1.301392550388857e-06, + "loss": 0.5225233435630798, + "step": 3752 + }, + { + "epoch": 0.8653447083237261, + "grad_norm": 1.7121734467218848, + "learning_rate": 1.3010289958398352e-06, + "loss": 0.6021677255630493, + "step": 3753 + }, + { + "epoch": 0.8655752824533087, + "grad_norm": 1.294116122042798, + "learning_rate": 1.300665397533198e-06, + "loss": 0.5031560063362122, + "step": 3754 + }, + { + "epoch": 0.8658058565828914, + "grad_norm": 1.2573123861588813, + "learning_rate": 1.300301755521798e-06, + "loss": 0.5406110286712646, + "step": 3755 + }, + { + "epoch": 0.866036430712474, + "grad_norm": 1.3123644187859618, + "learning_rate": 1.2999380698584945e-06, + "loss": 0.5359587669372559, + "step": 3756 + }, + { + "epoch": 0.8662670048420568, + "grad_norm": 1.4006997771166723, + "learning_rate": 1.2995743405961525e-06, + "loss": 0.46089720726013184, + "step": 3757 + }, + { + "epoch": 0.8664975789716394, + "grad_norm": 1.3064464980724229, + "learning_rate": 1.2992105677876444e-06, + "loss": 0.4611746668815613, + "step": 3758 + }, + { + "epoch": 0.8667281531012221, + "grad_norm": 1.3860871410802968, + "learning_rate": 1.2988467514858478e-06, + "loss": 0.47040778398513794, + "step": 3759 + }, + { + "epoch": 0.8669587272308047, + "grad_norm": 1.4624604845389892, + "learning_rate": 1.2984828917436469e-06, + "loss": 0.5118452310562134, + "step": 3760 + }, + { + "epoch": 0.8671893013603874, + "grad_norm": 1.3248325273306294, + "learning_rate": 1.2981189886139326e-06, + "loss": 0.42349302768707275, + "step": 3761 + }, + { + "epoch": 0.86741987548997, + "grad_norm": 1.4983666129317725, + "learning_rate": 1.2977550421496022e-06, + "loss": 0.4888027310371399, + "step": 3762 + }, + { + "epoch": 0.8676504496195527, + "grad_norm": 1.5557430857836938, + "learning_rate": 1.2973910524035587e-06, + "loss": 0.5637897849082947, + "step": 3763 + }, + { + "epoch": 0.8678810237491353, + "grad_norm": 1.2906063231523421, + "learning_rate": 1.2970270194287119e-06, + "loss": 0.4159572124481201, + "step": 3764 + }, + { + "epoch": 0.868111597878718, + "grad_norm": 1.613449710248156, + "learning_rate": 1.2966629432779775e-06, + "loss": 0.4558612108230591, + "step": 3765 + }, + { + "epoch": 0.8683421720083007, + "grad_norm": 1.229959300374187, + "learning_rate": 1.2962988240042775e-06, + "loss": 0.4235115647315979, + "step": 3766 + }, + { + "epoch": 0.8685727461378834, + "grad_norm": 1.5042750051225975, + "learning_rate": 1.2959346616605404e-06, + "loss": 0.5096476078033447, + "step": 3767 + }, + { + "epoch": 0.868803320267466, + "grad_norm": 1.3849812365321899, + "learning_rate": 1.2955704562997013e-06, + "loss": 0.47097906470298767, + "step": 3768 + }, + { + "epoch": 0.8690338943970487, + "grad_norm": 1.2057643302548011, + "learning_rate": 1.2952062079747008e-06, + "loss": 0.4508157968521118, + "step": 3769 + }, + { + "epoch": 0.8692644685266313, + "grad_norm": 1.3904260388472953, + "learning_rate": 1.2948419167384864e-06, + "loss": 0.43800675868988037, + "step": 3770 + }, + { + "epoch": 0.869495042656214, + "grad_norm": 1.3552023829739699, + "learning_rate": 1.2944775826440108e-06, + "loss": 0.5512480735778809, + "step": 3771 + }, + { + "epoch": 0.8697256167857966, + "grad_norm": 1.4428129453899297, + "learning_rate": 1.2941132057442342e-06, + "loss": 0.4654430150985718, + "step": 3772 + }, + { + "epoch": 0.8699561909153793, + "grad_norm": 1.3297596373891312, + "learning_rate": 1.293748786092123e-06, + "loss": 0.5429458618164062, + "step": 3773 + }, + { + "epoch": 0.870186765044962, + "grad_norm": 1.7953090529311853, + "learning_rate": 1.2933843237406481e-06, + "loss": 0.415671169757843, + "step": 3774 + }, + { + "epoch": 0.8704173391745447, + "grad_norm": 1.3784118855195835, + "learning_rate": 1.2930198187427884e-06, + "loss": 0.4347325563430786, + "step": 3775 + }, + { + "epoch": 0.8706479133041273, + "grad_norm": 1.3858530201589612, + "learning_rate": 1.2926552711515287e-06, + "loss": 0.41997528076171875, + "step": 3776 + }, + { + "epoch": 0.87087848743371, + "grad_norm": 1.4475652450278216, + "learning_rate": 1.292290681019859e-06, + "loss": 0.45956090092658997, + "step": 3777 + }, + { + "epoch": 0.8711090615632926, + "grad_norm": 1.3318373392521217, + "learning_rate": 1.2919260484007767e-06, + "loss": 0.4615165889263153, + "step": 3778 + }, + { + "epoch": 0.8713396356928753, + "grad_norm": 1.5526291007190895, + "learning_rate": 1.2915613733472848e-06, + "loss": 0.3919866681098938, + "step": 3779 + }, + { + "epoch": 0.8715702098224579, + "grad_norm": 1.5182901628405527, + "learning_rate": 1.2911966559123922e-06, + "loss": 0.5324772000312805, + "step": 3780 + }, + { + "epoch": 0.8718007839520405, + "grad_norm": 1.4899431097732017, + "learning_rate": 1.2908318961491147e-06, + "loss": 0.4813354015350342, + "step": 3781 + }, + { + "epoch": 0.8720313580816232, + "grad_norm": 1.6904916219237236, + "learning_rate": 1.2904670941104735e-06, + "loss": 0.5617851614952087, + "step": 3782 + }, + { + "epoch": 0.8722619322112058, + "grad_norm": 1.5869523154671146, + "learning_rate": 1.2901022498494963e-06, + "loss": 0.5369905233383179, + "step": 3783 + }, + { + "epoch": 0.8724925063407886, + "grad_norm": 1.4103839502113327, + "learning_rate": 1.289737363419217e-06, + "loss": 0.469723641872406, + "step": 3784 + }, + { + "epoch": 0.8727230804703712, + "grad_norm": 1.5392452648373567, + "learning_rate": 1.2893724348726757e-06, + "loss": 0.5100580453872681, + "step": 3785 + }, + { + "epoch": 0.8729536545999539, + "grad_norm": 1.4522390007049084, + "learning_rate": 1.289007464262918e-06, + "loss": 0.3959219455718994, + "step": 3786 + }, + { + "epoch": 0.8731842287295365, + "grad_norm": 1.3370969443139462, + "learning_rate": 1.2886424516429967e-06, + "loss": 0.4237936735153198, + "step": 3787 + }, + { + "epoch": 0.8734148028591192, + "grad_norm": 1.6505369649722645, + "learning_rate": 1.2882773970659693e-06, + "loss": 0.4604552984237671, + "step": 3788 + }, + { + "epoch": 0.8736453769887018, + "grad_norm": 1.4408188813706955, + "learning_rate": 1.287912300584901e-06, + "loss": 0.4265769124031067, + "step": 3789 + }, + { + "epoch": 0.8738759511182845, + "grad_norm": 1.185765484689313, + "learning_rate": 1.2875471622528617e-06, + "loss": 0.4644312262535095, + "step": 3790 + }, + { + "epoch": 0.8741065252478671, + "grad_norm": 1.5605966972230738, + "learning_rate": 1.2871819821229282e-06, + "loss": 0.5520300269126892, + "step": 3791 + }, + { + "epoch": 0.8743370993774499, + "grad_norm": 1.2172431342127952, + "learning_rate": 1.2868167602481831e-06, + "loss": 0.42350637912750244, + "step": 3792 + }, + { + "epoch": 0.8745676735070325, + "grad_norm": 1.3605025828289865, + "learning_rate": 1.2864514966817155e-06, + "loss": 0.5148683786392212, + "step": 3793 + }, + { + "epoch": 0.8747982476366152, + "grad_norm": 1.2825363473778824, + "learning_rate": 1.2860861914766191e-06, + "loss": 0.4506865441799164, + "step": 3794 + }, + { + "epoch": 0.8750288217661978, + "grad_norm": 1.240014068038836, + "learning_rate": 1.2857208446859957e-06, + "loss": 0.4042026996612549, + "step": 3795 + }, + { + "epoch": 0.8752593958957805, + "grad_norm": 1.749789157467437, + "learning_rate": 1.2853554563629521e-06, + "loss": 0.4601382613182068, + "step": 3796 + }, + { + "epoch": 0.8754899700253631, + "grad_norm": 1.1956968937229655, + "learning_rate": 1.2849900265606007e-06, + "loss": 0.3387809097766876, + "step": 3797 + }, + { + "epoch": 0.8757205441549458, + "grad_norm": 1.3296970918872935, + "learning_rate": 1.2846245553320604e-06, + "loss": 0.5295180082321167, + "step": 3798 + }, + { + "epoch": 0.8759511182845284, + "grad_norm": 1.518762035085977, + "learning_rate": 1.2842590427304564e-06, + "loss": 0.47733891010284424, + "step": 3799 + }, + { + "epoch": 0.8761816924141111, + "grad_norm": 1.3675518552119075, + "learning_rate": 1.2838934888089198e-06, + "loss": 0.46294957399368286, + "step": 3800 + }, + { + "epoch": 0.8764122665436938, + "grad_norm": 1.3892016156570253, + "learning_rate": 1.2835278936205877e-06, + "loss": 0.4638972580432892, + "step": 3801 + }, + { + "epoch": 0.8766428406732765, + "grad_norm": 1.2670627732920314, + "learning_rate": 1.2831622572186027e-06, + "loss": 0.5078087449073792, + "step": 3802 + }, + { + "epoch": 0.8768734148028591, + "grad_norm": 1.2490466990727205, + "learning_rate": 1.2827965796561138e-06, + "loss": 0.49626827239990234, + "step": 3803 + }, + { + "epoch": 0.8771039889324418, + "grad_norm": 1.3784871825818807, + "learning_rate": 1.2824308609862758e-06, + "loss": 0.4857192635536194, + "step": 3804 + }, + { + "epoch": 0.8773345630620244, + "grad_norm": 1.5003545684747548, + "learning_rate": 1.2820651012622498e-06, + "loss": 0.5403131246566772, + "step": 3805 + }, + { + "epoch": 0.8775651371916071, + "grad_norm": 1.532730699853752, + "learning_rate": 1.2816993005372029e-06, + "loss": 0.519463837146759, + "step": 3806 + }, + { + "epoch": 0.8777957113211897, + "grad_norm": 1.648937105926222, + "learning_rate": 1.2813334588643077e-06, + "loss": 0.6038607954978943, + "step": 3807 + }, + { + "epoch": 0.8780262854507724, + "grad_norm": 1.5251750284604964, + "learning_rate": 1.280967576296743e-06, + "loss": 0.4892663359642029, + "step": 3808 + }, + { + "epoch": 0.878256859580355, + "grad_norm": 1.4437992115831912, + "learning_rate": 1.2806016528876934e-06, + "loss": 0.47872501611709595, + "step": 3809 + }, + { + "epoch": 0.8784874337099378, + "grad_norm": 1.401497704596745, + "learning_rate": 1.28023568869035e-06, + "loss": 0.4863993227481842, + "step": 3810 + }, + { + "epoch": 0.8787180078395204, + "grad_norm": 1.2319881889422357, + "learning_rate": 1.2798696837579088e-06, + "loss": 0.45241546630859375, + "step": 3811 + }, + { + "epoch": 0.8789485819691031, + "grad_norm": 1.26957816055566, + "learning_rate": 1.2795036381435728e-06, + "loss": 0.48720863461494446, + "step": 3812 + }, + { + "epoch": 0.8791791560986857, + "grad_norm": 1.4244000796725484, + "learning_rate": 1.2791375519005507e-06, + "loss": 0.49139827489852905, + "step": 3813 + }, + { + "epoch": 0.8794097302282684, + "grad_norm": 1.1021730064681352, + "learning_rate": 1.278771425082056e-06, + "loss": 0.41915225982666016, + "step": 3814 + }, + { + "epoch": 0.879640304357851, + "grad_norm": 1.164668093587021, + "learning_rate": 1.2784052577413095e-06, + "loss": 0.41831016540527344, + "step": 3815 + }, + { + "epoch": 0.8798708784874337, + "grad_norm": 1.392466935090571, + "learning_rate": 1.2780390499315374e-06, + "loss": 0.49456197023391724, + "step": 3816 + }, + { + "epoch": 0.8801014526170163, + "grad_norm": 1.4645341817096265, + "learning_rate": 1.2776728017059714e-06, + "loss": 0.4656866192817688, + "step": 3817 + }, + { + "epoch": 0.880332026746599, + "grad_norm": 1.375452516729426, + "learning_rate": 1.2773065131178494e-06, + "loss": 0.449514776468277, + "step": 3818 + }, + { + "epoch": 0.8805626008761817, + "grad_norm": 1.320026502962018, + "learning_rate": 1.2769401842204156e-06, + "loss": 0.3762073516845703, + "step": 3819 + }, + { + "epoch": 0.8807931750057644, + "grad_norm": 1.6471923718834367, + "learning_rate": 1.2765738150669192e-06, + "loss": 0.5680521130561829, + "step": 3820 + }, + { + "epoch": 0.881023749135347, + "grad_norm": 1.227867578043664, + "learning_rate": 1.276207405710616e-06, + "loss": 0.35371482372283936, + "step": 3821 + }, + { + "epoch": 0.8812543232649297, + "grad_norm": 1.6584454245429339, + "learning_rate": 1.2758409562047669e-06, + "loss": 0.5145018100738525, + "step": 3822 + }, + { + "epoch": 0.8814848973945123, + "grad_norm": 1.4264603788288566, + "learning_rate": 1.2754744666026392e-06, + "loss": 0.5425234436988831, + "step": 3823 + }, + { + "epoch": 0.881715471524095, + "grad_norm": 1.605664005655016, + "learning_rate": 1.275107936957506e-06, + "loss": 0.48439931869506836, + "step": 3824 + }, + { + "epoch": 0.8819460456536776, + "grad_norm": 1.4836193722422002, + "learning_rate": 1.2747413673226462e-06, + "loss": 0.5177323818206787, + "step": 3825 + }, + { + "epoch": 0.8821766197832603, + "grad_norm": 1.4672524591279896, + "learning_rate": 1.2743747577513437e-06, + "loss": 0.4718499779701233, + "step": 3826 + }, + { + "epoch": 0.882407193912843, + "grad_norm": 1.3580668132517044, + "learning_rate": 1.27400810829689e-06, + "loss": 0.5140804648399353, + "step": 3827 + }, + { + "epoch": 0.8826377680424257, + "grad_norm": 1.2476007061260952, + "learning_rate": 1.2736414190125805e-06, + "loss": 0.4611731767654419, + "step": 3828 + }, + { + "epoch": 0.8828683421720083, + "grad_norm": 1.3574827964922753, + "learning_rate": 1.2732746899517175e-06, + "loss": 0.526127815246582, + "step": 3829 + }, + { + "epoch": 0.883098916301591, + "grad_norm": 1.3368001624765957, + "learning_rate": 1.2729079211676085e-06, + "loss": 0.4039766192436218, + "step": 3830 + }, + { + "epoch": 0.8833294904311736, + "grad_norm": 1.5033466347185125, + "learning_rate": 1.2725411127135676e-06, + "loss": 0.4232807159423828, + "step": 3831 + }, + { + "epoch": 0.8835600645607563, + "grad_norm": 1.2556638937655993, + "learning_rate": 1.2721742646429142e-06, + "loss": 0.48490262031555176, + "step": 3832 + }, + { + "epoch": 0.8837906386903389, + "grad_norm": 1.278298782194165, + "learning_rate": 1.2718073770089729e-06, + "loss": 0.4664677083492279, + "step": 3833 + }, + { + "epoch": 0.8840212128199216, + "grad_norm": 1.3387833207328181, + "learning_rate": 1.2714404498650742e-06, + "loss": 0.4402846097946167, + "step": 3834 + }, + { + "epoch": 0.8842517869495042, + "grad_norm": 1.195436797590032, + "learning_rate": 1.2710734832645555e-06, + "loss": 0.45942988991737366, + "step": 3835 + }, + { + "epoch": 0.884482361079087, + "grad_norm": 1.3235253441897963, + "learning_rate": 1.2707064772607587e-06, + "loss": 0.45924365520477295, + "step": 3836 + }, + { + "epoch": 0.8847129352086696, + "grad_norm": 1.2350134713864223, + "learning_rate": 1.270339431907032e-06, + "loss": 0.3877851963043213, + "step": 3837 + }, + { + "epoch": 0.8849435093382523, + "grad_norm": 1.381311043724791, + "learning_rate": 1.2699723472567288e-06, + "loss": 0.45364105701446533, + "step": 3838 + }, + { + "epoch": 0.8851740834678349, + "grad_norm": 1.2798000201692457, + "learning_rate": 1.2696052233632089e-06, + "loss": 0.3527877926826477, + "step": 3839 + }, + { + "epoch": 0.8854046575974176, + "grad_norm": 1.7105597319107566, + "learning_rate": 1.2692380602798375e-06, + "loss": 0.499268501996994, + "step": 3840 + }, + { + "epoch": 0.8856352317270002, + "grad_norm": 1.2823188650483364, + "learning_rate": 1.2688708580599854e-06, + "loss": 0.39443689584732056, + "step": 3841 + }, + { + "epoch": 0.8858658058565829, + "grad_norm": 1.442355552170661, + "learning_rate": 1.268503616757029e-06, + "loss": 0.5262328386306763, + "step": 3842 + }, + { + "epoch": 0.8860963799861655, + "grad_norm": 1.4602798515117177, + "learning_rate": 1.2681363364243509e-06, + "loss": 0.4761236608028412, + "step": 3843 + }, + { + "epoch": 0.8863269541157482, + "grad_norm": 1.3806283660695482, + "learning_rate": 1.2677690171153391e-06, + "loss": 0.5173169374465942, + "step": 3844 + }, + { + "epoch": 0.8865575282453309, + "grad_norm": 1.4796905287439253, + "learning_rate": 1.2674016588833866e-06, + "loss": 0.5304574966430664, + "step": 3845 + }, + { + "epoch": 0.8867881023749136, + "grad_norm": 1.2451043989470143, + "learning_rate": 1.2670342617818925e-06, + "loss": 0.44707632064819336, + "step": 3846 + }, + { + "epoch": 0.8870186765044962, + "grad_norm": 1.4327430501013436, + "learning_rate": 1.2666668258642628e-06, + "loss": 0.44395360350608826, + "step": 3847 + }, + { + "epoch": 0.8872492506340789, + "grad_norm": 1.5382701800989709, + "learning_rate": 1.266299351183907e-06, + "loss": 0.4993078112602234, + "step": 3848 + }, + { + "epoch": 0.8874798247636615, + "grad_norm": 1.447761685140105, + "learning_rate": 1.2659318377942418e-06, + "loss": 0.4836229681968689, + "step": 3849 + }, + { + "epoch": 0.8877103988932442, + "grad_norm": 1.1586406035440977, + "learning_rate": 1.2655642857486885e-06, + "loss": 0.4898098111152649, + "step": 3850 + }, + { + "epoch": 0.8879409730228268, + "grad_norm": 1.4550595650341691, + "learning_rate": 1.2651966951006753e-06, + "loss": 0.5117218494415283, + "step": 3851 + }, + { + "epoch": 0.8881715471524095, + "grad_norm": 1.1751749847019868, + "learning_rate": 1.2648290659036347e-06, + "loss": 0.3920857906341553, + "step": 3852 + }, + { + "epoch": 0.8884021212819921, + "grad_norm": 1.2103531492140316, + "learning_rate": 1.2644613982110055e-06, + "loss": 0.42527467012405396, + "step": 3853 + }, + { + "epoch": 0.8886326954115749, + "grad_norm": 1.4673474591941762, + "learning_rate": 1.2640936920762318e-06, + "loss": 0.5283650159835815, + "step": 3854 + }, + { + "epoch": 0.8888632695411575, + "grad_norm": 1.1384795561192926, + "learning_rate": 1.2637259475527634e-06, + "loss": 0.3976718783378601, + "step": 3855 + }, + { + "epoch": 0.8890938436707402, + "grad_norm": 1.3777221980377923, + "learning_rate": 1.2633581646940555e-06, + "loss": 0.3767106533050537, + "step": 3856 + }, + { + "epoch": 0.8893244178003228, + "grad_norm": 1.2421308508382682, + "learning_rate": 1.2629903435535695e-06, + "loss": 0.4002486765384674, + "step": 3857 + }, + { + "epoch": 0.8895549919299055, + "grad_norm": 1.7761729251417224, + "learning_rate": 1.2626224841847718e-06, + "loss": 0.3829443156719208, + "step": 3858 + }, + { + "epoch": 0.8897855660594881, + "grad_norm": 1.6906089339859913, + "learning_rate": 1.2622545866411342e-06, + "loss": 0.5338312983512878, + "step": 3859 + }, + { + "epoch": 0.8900161401890708, + "grad_norm": 1.3435755743208722, + "learning_rate": 1.2618866509761347e-06, + "loss": 0.49615299701690674, + "step": 3860 + }, + { + "epoch": 0.8902467143186534, + "grad_norm": 1.3772165276715471, + "learning_rate": 1.2615186772432562e-06, + "loss": 0.5080281496047974, + "step": 3861 + }, + { + "epoch": 0.8904772884482361, + "grad_norm": 1.3191602759544514, + "learning_rate": 1.2611506654959877e-06, + "loss": 0.4631335139274597, + "step": 3862 + }, + { + "epoch": 0.8907078625778188, + "grad_norm": 1.6754337710064344, + "learning_rate": 1.2607826157878232e-06, + "loss": 0.5179207921028137, + "step": 3863 + }, + { + "epoch": 0.8909384367074015, + "grad_norm": 1.8689690583071528, + "learning_rate": 1.260414528172263e-06, + "loss": 0.5107406973838806, + "step": 3864 + }, + { + "epoch": 0.8911690108369841, + "grad_norm": 1.4263135964434357, + "learning_rate": 1.2600464027028112e-06, + "loss": 0.3719855844974518, + "step": 3865 + }, + { + "epoch": 0.8913995849665668, + "grad_norm": 1.2717821474296322, + "learning_rate": 1.2596782394329797e-06, + "loss": 0.4703129231929779, + "step": 3866 + }, + { + "epoch": 0.8916301590961494, + "grad_norm": 1.4971801597034615, + "learning_rate": 1.2593100384162842e-06, + "loss": 0.49239644408226013, + "step": 3867 + }, + { + "epoch": 0.8918607332257321, + "grad_norm": 1.505796830220407, + "learning_rate": 1.2589417997062468e-06, + "loss": 0.5194324851036072, + "step": 3868 + }, + { + "epoch": 0.8920913073553147, + "grad_norm": 1.2722329079463401, + "learning_rate": 1.2585735233563943e-06, + "loss": 0.4224633574485779, + "step": 3869 + }, + { + "epoch": 0.8923218814848974, + "grad_norm": 1.7020995758876771, + "learning_rate": 1.2582052094202594e-06, + "loss": 0.4377749562263489, + "step": 3870 + }, + { + "epoch": 0.89255245561448, + "grad_norm": 1.2037908365106704, + "learning_rate": 1.2578368579513809e-06, + "loss": 0.42847269773483276, + "step": 3871 + }, + { + "epoch": 0.8927830297440628, + "grad_norm": 1.4087908465200083, + "learning_rate": 1.2574684690033018e-06, + "loss": 0.5194802284240723, + "step": 3872 + }, + { + "epoch": 0.8930136038736454, + "grad_norm": 1.3553883811442613, + "learning_rate": 1.2571000426295716e-06, + "loss": 0.4401082396507263, + "step": 3873 + }, + { + "epoch": 0.8932441780032281, + "grad_norm": 1.5117708123403886, + "learning_rate": 1.2567315788837442e-06, + "loss": 0.38890570402145386, + "step": 3874 + }, + { + "epoch": 0.8934747521328107, + "grad_norm": 1.4931972330534145, + "learning_rate": 1.2563630778193802e-06, + "loss": 0.522612452507019, + "step": 3875 + }, + { + "epoch": 0.8937053262623934, + "grad_norm": 1.757870637645656, + "learning_rate": 1.2559945394900447e-06, + "loss": 0.516444981098175, + "step": 3876 + }, + { + "epoch": 0.893935900391976, + "grad_norm": 1.193092685346779, + "learning_rate": 1.255625963949308e-06, + "loss": 0.4084436297416687, + "step": 3877 + }, + { + "epoch": 0.8941664745215587, + "grad_norm": 1.4364911954858623, + "learning_rate": 1.2552573512507474e-06, + "loss": 0.4561755657196045, + "step": 3878 + }, + { + "epoch": 0.8943970486511413, + "grad_norm": 1.3498949478529019, + "learning_rate": 1.2548887014479435e-06, + "loss": 0.44372665882110596, + "step": 3879 + }, + { + "epoch": 0.894627622780724, + "grad_norm": 1.4181034577590674, + "learning_rate": 1.2545200145944837e-06, + "loss": 0.4714791774749756, + "step": 3880 + }, + { + "epoch": 0.8948581969103067, + "grad_norm": 1.506508633299638, + "learning_rate": 1.25415129074396e-06, + "loss": 0.48050814867019653, + "step": 3881 + }, + { + "epoch": 0.8950887710398894, + "grad_norm": 1.7788226663138391, + "learning_rate": 1.2537825299499708e-06, + "loss": 0.4078127145767212, + "step": 3882 + }, + { + "epoch": 0.895319345169472, + "grad_norm": 1.1273639481853348, + "learning_rate": 1.2534137322661187e-06, + "loss": 0.41556763648986816, + "step": 3883 + }, + { + "epoch": 0.8955499192990547, + "grad_norm": 1.2916565664076916, + "learning_rate": 1.2530448977460127e-06, + "loss": 0.3862306475639343, + "step": 3884 + }, + { + "epoch": 0.8957804934286373, + "grad_norm": 1.2417402269481763, + "learning_rate": 1.2526760264432656e-06, + "loss": 0.4071112871170044, + "step": 3885 + }, + { + "epoch": 0.89601106755822, + "grad_norm": 1.2074121865816745, + "learning_rate": 1.2523071184114978e-06, + "loss": 0.36956706643104553, + "step": 3886 + }, + { + "epoch": 0.8962416416878026, + "grad_norm": 1.5187969981751328, + "learning_rate": 1.251938173704333e-06, + "loss": 0.5087941884994507, + "step": 3887 + }, + { + "epoch": 0.8964722158173853, + "grad_norm": 1.5300476571906632, + "learning_rate": 1.2515691923754017e-06, + "loss": 0.5636804103851318, + "step": 3888 + }, + { + "epoch": 0.896702789946968, + "grad_norm": 1.2028947296679213, + "learning_rate": 1.2512001744783383e-06, + "loss": 0.40899237990379333, + "step": 3889 + }, + { + "epoch": 0.8969333640765507, + "grad_norm": 1.2319974158201112, + "learning_rate": 1.2508311200667839e-06, + "loss": 0.3964187800884247, + "step": 3890 + }, + { + "epoch": 0.8971639382061333, + "grad_norm": 1.1881521968898023, + "learning_rate": 1.2504620291943838e-06, + "loss": 0.43190568685531616, + "step": 3891 + }, + { + "epoch": 0.897394512335716, + "grad_norm": 1.5323277954151004, + "learning_rate": 1.25009290191479e-06, + "loss": 0.5640079379081726, + "step": 3892 + }, + { + "epoch": 0.8976250864652986, + "grad_norm": 1.5228387521540339, + "learning_rate": 1.2497237382816577e-06, + "loss": 0.4969727396965027, + "step": 3893 + }, + { + "epoch": 0.8978556605948812, + "grad_norm": 1.438395912517929, + "learning_rate": 1.2493545383486497e-06, + "loss": 0.43710076808929443, + "step": 3894 + }, + { + "epoch": 0.8980862347244639, + "grad_norm": 1.217545409086522, + "learning_rate": 1.248985302169432e-06, + "loss": 0.4246212840080261, + "step": 3895 + }, + { + "epoch": 0.8983168088540465, + "grad_norm": 1.1837244532547113, + "learning_rate": 1.2486160297976776e-06, + "loss": 0.3812369108200073, + "step": 3896 + }, + { + "epoch": 0.8985473829836292, + "grad_norm": 2.1554879190255685, + "learning_rate": 1.248246721287063e-06, + "loss": 0.6407653093338013, + "step": 3897 + }, + { + "epoch": 0.8987779571132118, + "grad_norm": 1.6947319293322312, + "learning_rate": 1.247877376691272e-06, + "loss": 0.47748661041259766, + "step": 3898 + }, + { + "epoch": 0.8990085312427946, + "grad_norm": 1.5504399903750061, + "learning_rate": 1.2475079960639922e-06, + "loss": 0.5047964453697205, + "step": 3899 + }, + { + "epoch": 0.8992391053723772, + "grad_norm": 1.1781117181895115, + "learning_rate": 1.2471385794589167e-06, + "loss": 0.37989485263824463, + "step": 3900 + }, + { + "epoch": 0.8994696795019599, + "grad_norm": 1.2955755733611327, + "learning_rate": 1.2467691269297437e-06, + "loss": 0.38857924938201904, + "step": 3901 + }, + { + "epoch": 0.8997002536315425, + "grad_norm": 1.2312069291338004, + "learning_rate": 1.2463996385301776e-06, + "loss": 0.45452386140823364, + "step": 3902 + }, + { + "epoch": 0.8999308277611252, + "grad_norm": 1.5565774035889273, + "learning_rate": 1.2460301143139267e-06, + "loss": 0.41920900344848633, + "step": 3903 + }, + { + "epoch": 0.9001614018907078, + "grad_norm": 1.542875547138451, + "learning_rate": 1.2456605543347051e-06, + "loss": 0.5979125499725342, + "step": 3904 + }, + { + "epoch": 0.9003919760202905, + "grad_norm": 1.5505304900467811, + "learning_rate": 1.2452909586462323e-06, + "loss": 0.5517082214355469, + "step": 3905 + }, + { + "epoch": 0.9006225501498731, + "grad_norm": 1.2381443535248697, + "learning_rate": 1.244921327302233e-06, + "loss": 0.4558248519897461, + "step": 3906 + }, + { + "epoch": 0.9008531242794559, + "grad_norm": 1.5503878716470787, + "learning_rate": 1.2445516603564362e-06, + "loss": 0.5637399554252625, + "step": 3907 + }, + { + "epoch": 0.9010836984090385, + "grad_norm": 1.2396897738245216, + "learning_rate": 1.2441819578625775e-06, + "loss": 0.5208043456077576, + "step": 3908 + }, + { + "epoch": 0.9013142725386212, + "grad_norm": 1.400218770913741, + "learning_rate": 1.243812219874396e-06, + "loss": 0.3901744484901428, + "step": 3909 + }, + { + "epoch": 0.9015448466682038, + "grad_norm": 1.4025338042989108, + "learning_rate": 1.2434424464456376e-06, + "loss": 0.5770972967147827, + "step": 3910 + }, + { + "epoch": 0.9017754207977865, + "grad_norm": 1.375223010916462, + "learning_rate": 1.2430726376300525e-06, + "loss": 0.3457295894622803, + "step": 3911 + }, + { + "epoch": 0.9020059949273691, + "grad_norm": 1.3118554362154196, + "learning_rate": 1.242702793481396e-06, + "loss": 0.4487595558166504, + "step": 3912 + }, + { + "epoch": 0.9022365690569518, + "grad_norm": 1.2548104794507453, + "learning_rate": 1.2423329140534286e-06, + "loss": 0.4369876980781555, + "step": 3913 + }, + { + "epoch": 0.9024671431865344, + "grad_norm": 1.5693012853497335, + "learning_rate": 1.2419629993999165e-06, + "loss": 0.43154388666152954, + "step": 3914 + }, + { + "epoch": 0.9026977173161171, + "grad_norm": 1.313977531855456, + "learning_rate": 1.24159304957463e-06, + "loss": 0.4528294801712036, + "step": 3915 + }, + { + "epoch": 0.9029282914456997, + "grad_norm": 1.4152554930408472, + "learning_rate": 1.2412230646313452e-06, + "loss": 0.4204830527305603, + "step": 3916 + }, + { + "epoch": 0.9031588655752825, + "grad_norm": 1.3117655747531898, + "learning_rate": 1.2408530446238433e-06, + "loss": 0.46544623374938965, + "step": 3917 + }, + { + "epoch": 0.9033894397048651, + "grad_norm": 1.19103055945586, + "learning_rate": 1.2404829896059107e-06, + "loss": 0.39419203996658325, + "step": 3918 + }, + { + "epoch": 0.9036200138344478, + "grad_norm": 1.3085505059347724, + "learning_rate": 1.240112899631338e-06, + "loss": 0.4214451014995575, + "step": 3919 + }, + { + "epoch": 0.9038505879640304, + "grad_norm": 1.310156094815825, + "learning_rate": 1.239742774753922e-06, + "loss": 0.42385220527648926, + "step": 3920 + }, + { + "epoch": 0.9040811620936131, + "grad_norm": 1.4457769612459037, + "learning_rate": 1.2393726150274636e-06, + "loss": 0.5206592082977295, + "step": 3921 + }, + { + "epoch": 0.9043117362231957, + "grad_norm": 1.4602545667694231, + "learning_rate": 1.23900242050577e-06, + "loss": 0.4358803629875183, + "step": 3922 + }, + { + "epoch": 0.9045423103527784, + "grad_norm": 1.3596132034754325, + "learning_rate": 1.2386321912426524e-06, + "loss": 0.4525173306465149, + "step": 3923 + }, + { + "epoch": 0.904772884482361, + "grad_norm": 1.4736466426478543, + "learning_rate": 1.2382619272919273e-06, + "loss": 0.48877185583114624, + "step": 3924 + }, + { + "epoch": 0.9050034586119438, + "grad_norm": 1.152358955118646, + "learning_rate": 1.2378916287074162e-06, + "loss": 0.4401814341545105, + "step": 3925 + }, + { + "epoch": 0.9052340327415264, + "grad_norm": 1.337265572878916, + "learning_rate": 1.2375212955429459e-06, + "loss": 0.37818846106529236, + "step": 3926 + }, + { + "epoch": 0.9054646068711091, + "grad_norm": 1.285760527835995, + "learning_rate": 1.2371509278523482e-06, + "loss": 0.36472904682159424, + "step": 3927 + }, + { + "epoch": 0.9056951810006917, + "grad_norm": 1.2999097028645303, + "learning_rate": 1.2367805256894596e-06, + "loss": 0.5113309025764465, + "step": 3928 + }, + { + "epoch": 0.9059257551302744, + "grad_norm": 1.2052405163032573, + "learning_rate": 1.2364100891081218e-06, + "loss": 0.36074432730674744, + "step": 3929 + }, + { + "epoch": 0.906156329259857, + "grad_norm": 1.3493065976556424, + "learning_rate": 1.2360396181621819e-06, + "loss": 0.39177048206329346, + "step": 3930 + }, + { + "epoch": 0.9063869033894397, + "grad_norm": 1.3736058093352046, + "learning_rate": 1.2356691129054912e-06, + "loss": 0.4758113622665405, + "step": 3931 + }, + { + "epoch": 0.9066174775190223, + "grad_norm": 1.3614234520329223, + "learning_rate": 1.2352985733919065e-06, + "loss": 0.3840598464012146, + "step": 3932 + }, + { + "epoch": 0.906848051648605, + "grad_norm": 1.510763334369694, + "learning_rate": 1.2349279996752892e-06, + "loss": 0.5103816986083984, + "step": 3933 + }, + { + "epoch": 0.9070786257781877, + "grad_norm": 1.466046011323441, + "learning_rate": 1.234557391809507e-06, + "loss": 0.4175255298614502, + "step": 3934 + }, + { + "epoch": 0.9073091999077704, + "grad_norm": 2.627411026682294, + "learning_rate": 1.2341867498484302e-06, + "loss": 0.4504377245903015, + "step": 3935 + }, + { + "epoch": 0.907539774037353, + "grad_norm": 1.2868923632717955, + "learning_rate": 1.2338160738459355e-06, + "loss": 0.45868122577667236, + "step": 3936 + }, + { + "epoch": 0.9077703481669357, + "grad_norm": 1.3231771761325972, + "learning_rate": 1.2334453638559054e-06, + "loss": 0.5161639451980591, + "step": 3937 + }, + { + "epoch": 0.9080009222965183, + "grad_norm": 1.5486748129834036, + "learning_rate": 1.2330746199322257e-06, + "loss": 0.44561630487442017, + "step": 3938 + }, + { + "epoch": 0.908231496426101, + "grad_norm": 1.595486700598371, + "learning_rate": 1.2327038421287876e-06, + "loss": 0.4780126214027405, + "step": 3939 + }, + { + "epoch": 0.9084620705556836, + "grad_norm": 1.2226582649026916, + "learning_rate": 1.2323330304994877e-06, + "loss": 0.505066990852356, + "step": 3940 + }, + { + "epoch": 0.9086926446852663, + "grad_norm": 1.3041405659013958, + "learning_rate": 1.2319621850982274e-06, + "loss": 0.5053813457489014, + "step": 3941 + }, + { + "epoch": 0.9089232188148489, + "grad_norm": 1.178162092657054, + "learning_rate": 1.2315913059789125e-06, + "loss": 0.3579134941101074, + "step": 3942 + }, + { + "epoch": 0.9091537929444317, + "grad_norm": 1.4949007072050957, + "learning_rate": 1.2312203931954543e-06, + "loss": 0.5703507661819458, + "step": 3943 + }, + { + "epoch": 0.9093843670740143, + "grad_norm": 1.4141867956521472, + "learning_rate": 1.2308494468017685e-06, + "loss": 0.4972035884857178, + "step": 3944 + }, + { + "epoch": 0.909614941203597, + "grad_norm": 1.8338477540837272, + "learning_rate": 1.230478466851776e-06, + "loss": 0.5528955459594727, + "step": 3945 + }, + { + "epoch": 0.9098455153331796, + "grad_norm": 1.4009292239467905, + "learning_rate": 1.2301074533994024e-06, + "loss": 0.4099786877632141, + "step": 3946 + }, + { + "epoch": 0.9100760894627623, + "grad_norm": 1.3414325662099453, + "learning_rate": 1.2297364064985786e-06, + "loss": 0.41020166873931885, + "step": 3947 + }, + { + "epoch": 0.9103066635923449, + "grad_norm": 1.4112377219226224, + "learning_rate": 1.2293653262032395e-06, + "loss": 0.4340355694293976, + "step": 3948 + }, + { + "epoch": 0.9105372377219276, + "grad_norm": 1.376446280407005, + "learning_rate": 1.2289942125673261e-06, + "loss": 0.4369847774505615, + "step": 3949 + }, + { + "epoch": 0.9107678118515102, + "grad_norm": 1.4688076477466663, + "learning_rate": 1.228623065644783e-06, + "loss": 0.406423956155777, + "step": 3950 + }, + { + "epoch": 0.910998385981093, + "grad_norm": 1.4230223897567287, + "learning_rate": 1.22825188548956e-06, + "loss": 0.5081946849822998, + "step": 3951 + }, + { + "epoch": 0.9112289601106756, + "grad_norm": 1.7017899930713631, + "learning_rate": 1.2278806721556124e-06, + "loss": 0.43494492769241333, + "step": 3952 + }, + { + "epoch": 0.9114595342402583, + "grad_norm": 1.348884752431283, + "learning_rate": 1.2275094256968996e-06, + "loss": 0.35356831550598145, + "step": 3953 + }, + { + "epoch": 0.9116901083698409, + "grad_norm": 1.2260567341450548, + "learning_rate": 1.227138146167386e-06, + "loss": 0.36741551756858826, + "step": 3954 + }, + { + "epoch": 0.9119206824994236, + "grad_norm": 1.4686302016765889, + "learning_rate": 1.226766833621041e-06, + "loss": 0.491504430770874, + "step": 3955 + }, + { + "epoch": 0.9121512566290062, + "grad_norm": 1.266294151631501, + "learning_rate": 1.2263954881118384e-06, + "loss": 0.4558037519454956, + "step": 3956 + }, + { + "epoch": 0.9123818307585889, + "grad_norm": 1.398276341256052, + "learning_rate": 1.2260241096937571e-06, + "loss": 0.3941671848297119, + "step": 3957 + }, + { + "epoch": 0.9126124048881715, + "grad_norm": 1.7133993603535684, + "learning_rate": 1.2256526984207809e-06, + "loss": 0.40505191683769226, + "step": 3958 + }, + { + "epoch": 0.9128429790177542, + "grad_norm": 1.3369540241008888, + "learning_rate": 1.2252812543468982e-06, + "loss": 0.4669588804244995, + "step": 3959 + }, + { + "epoch": 0.9130735531473368, + "grad_norm": 1.6346862522902008, + "learning_rate": 1.2249097775261014e-06, + "loss": 0.535057544708252, + "step": 3960 + }, + { + "epoch": 0.9133041272769196, + "grad_norm": 1.465530924269544, + "learning_rate": 1.2245382680123898e-06, + "loss": 0.5127478837966919, + "step": 3961 + }, + { + "epoch": 0.9135347014065022, + "grad_norm": 1.239878706419753, + "learning_rate": 1.224166725859765e-06, + "loss": 0.5004767179489136, + "step": 3962 + }, + { + "epoch": 0.9137652755360849, + "grad_norm": 1.3382850542269662, + "learning_rate": 1.2237951511222346e-06, + "loss": 0.47929924726486206, + "step": 3963 + }, + { + "epoch": 0.9139958496656675, + "grad_norm": 1.3650943807220162, + "learning_rate": 1.2234235438538109e-06, + "loss": 0.5619359016418457, + "step": 3964 + }, + { + "epoch": 0.9142264237952502, + "grad_norm": 2.173999313160228, + "learning_rate": 1.223051904108511e-06, + "loss": 0.44648507237434387, + "step": 3965 + }, + { + "epoch": 0.9144569979248328, + "grad_norm": 1.5081082363333118, + "learning_rate": 1.2226802319403562e-06, + "loss": 0.4451872706413269, + "step": 3966 + }, + { + "epoch": 0.9146875720544155, + "grad_norm": 1.1999813764066747, + "learning_rate": 1.222308527403373e-06, + "loss": 0.44295474886894226, + "step": 3967 + }, + { + "epoch": 0.9149181461839981, + "grad_norm": 1.4510785821223537, + "learning_rate": 1.221936790551592e-06, + "loss": 0.517430305480957, + "step": 3968 + }, + { + "epoch": 0.9151487203135809, + "grad_norm": 1.2648448897941866, + "learning_rate": 1.2215650214390493e-06, + "loss": 0.4819454252719879, + "step": 3969 + }, + { + "epoch": 0.9153792944431635, + "grad_norm": 1.40726836834287, + "learning_rate": 1.2211932201197855e-06, + "loss": 0.41739264130592346, + "step": 3970 + }, + { + "epoch": 0.9156098685727462, + "grad_norm": 1.214750449543567, + "learning_rate": 1.2208213866478452e-06, + "loss": 0.38833269476890564, + "step": 3971 + }, + { + "epoch": 0.9158404427023288, + "grad_norm": 1.4780394203565799, + "learning_rate": 1.2204495210772784e-06, + "loss": 0.48899054527282715, + "step": 3972 + }, + { + "epoch": 0.9160710168319115, + "grad_norm": 1.4236888721907983, + "learning_rate": 1.2200776234621395e-06, + "loss": 0.5201622247695923, + "step": 3973 + }, + { + "epoch": 0.9163015909614941, + "grad_norm": 1.4696703280770271, + "learning_rate": 1.219705693856488e-06, + "loss": 0.4105098843574524, + "step": 3974 + }, + { + "epoch": 0.9165321650910768, + "grad_norm": 1.2658629585457457, + "learning_rate": 1.2193337323143865e-06, + "loss": 0.45458245277404785, + "step": 3975 + }, + { + "epoch": 0.9167627392206594, + "grad_norm": 1.4906657502786624, + "learning_rate": 1.2189617388899049e-06, + "loss": 0.5013390779495239, + "step": 3976 + }, + { + "epoch": 0.9169933133502421, + "grad_norm": 1.3837275498584536, + "learning_rate": 1.218589713637115e-06, + "loss": 0.37065303325653076, + "step": 3977 + }, + { + "epoch": 0.9172238874798248, + "grad_norm": 1.4237915808433583, + "learning_rate": 1.218217656610095e-06, + "loss": 0.45158177614212036, + "step": 3978 + }, + { + "epoch": 0.9174544616094075, + "grad_norm": 1.3261399530988285, + "learning_rate": 1.2178455678629271e-06, + "loss": 0.4439426064491272, + "step": 3979 + }, + { + "epoch": 0.9176850357389901, + "grad_norm": 1.4056969202356144, + "learning_rate": 1.217473447449698e-06, + "loss": 0.42215704917907715, + "step": 3980 + }, + { + "epoch": 0.9179156098685728, + "grad_norm": 1.6572776500354818, + "learning_rate": 1.2171012954244991e-06, + "loss": 0.42273545265197754, + "step": 3981 + }, + { + "epoch": 0.9181461839981554, + "grad_norm": 1.5659197643503024, + "learning_rate": 1.216729111841427e-06, + "loss": 0.6045219898223877, + "step": 3982 + }, + { + "epoch": 0.9183767581277381, + "grad_norm": 1.318642532575583, + "learning_rate": 1.216356896754582e-06, + "loss": 0.49316874146461487, + "step": 3983 + }, + { + "epoch": 0.9186073322573207, + "grad_norm": 1.2984174252340932, + "learning_rate": 1.2159846502180692e-06, + "loss": 0.5222599506378174, + "step": 3984 + }, + { + "epoch": 0.9188379063869034, + "grad_norm": 1.21924477747188, + "learning_rate": 1.2156123722859988e-06, + "loss": 0.4513903856277466, + "step": 3985 + }, + { + "epoch": 0.919068480516486, + "grad_norm": 1.5286242494549134, + "learning_rate": 1.2152400630124846e-06, + "loss": 0.4946150779724121, + "step": 3986 + }, + { + "epoch": 0.9192990546460688, + "grad_norm": 1.6287340554518628, + "learning_rate": 1.2148677224516458e-06, + "loss": 0.5482569336891174, + "step": 3987 + }, + { + "epoch": 0.9195296287756514, + "grad_norm": 1.4490082622042646, + "learning_rate": 1.2144953506576061e-06, + "loss": 0.457091361284256, + "step": 3988 + }, + { + "epoch": 0.9197602029052341, + "grad_norm": 1.378032718586854, + "learning_rate": 1.2141229476844933e-06, + "loss": 0.4262084364891052, + "step": 3989 + }, + { + "epoch": 0.9199907770348167, + "grad_norm": 1.2394422456854066, + "learning_rate": 1.2137505135864402e-06, + "loss": 0.4905529022216797, + "step": 3990 + }, + { + "epoch": 0.9202213511643994, + "grad_norm": 1.3246738813802295, + "learning_rate": 1.2133780484175833e-06, + "loss": 0.5001873970031738, + "step": 3991 + }, + { + "epoch": 0.920451925293982, + "grad_norm": 1.4663495799657225, + "learning_rate": 1.2130055522320647e-06, + "loss": 0.396418035030365, + "step": 3992 + }, + { + "epoch": 0.9206824994235647, + "grad_norm": 1.5742445852004807, + "learning_rate": 1.2126330250840302e-06, + "loss": 0.5743722915649414, + "step": 3993 + }, + { + "epoch": 0.9209130735531473, + "grad_norm": 1.720134285882963, + "learning_rate": 1.212260467027631e-06, + "loss": 0.5134707689285278, + "step": 3994 + }, + { + "epoch": 0.92114364768273, + "grad_norm": 1.2913764867867046, + "learning_rate": 1.2118878781170213e-06, + "loss": 0.4191853404045105, + "step": 3995 + }, + { + "epoch": 0.9213742218123127, + "grad_norm": 1.8061166260156263, + "learning_rate": 1.2115152584063613e-06, + "loss": 0.3430103063583374, + "step": 3996 + }, + { + "epoch": 0.9216047959418954, + "grad_norm": 1.491788048135039, + "learning_rate": 1.2111426079498147e-06, + "loss": 0.5229896903038025, + "step": 3997 + }, + { + "epoch": 0.921835370071478, + "grad_norm": 1.9288487767080142, + "learning_rate": 1.2107699268015501e-06, + "loss": 0.5028181076049805, + "step": 3998 + }, + { + "epoch": 0.9220659442010607, + "grad_norm": 1.8323250729268132, + "learning_rate": 1.2103972150157407e-06, + "loss": 0.4662501811981201, + "step": 3999 + }, + { + "epoch": 0.9222965183306433, + "grad_norm": 1.7877363086665337, + "learning_rate": 1.2100244726465636e-06, + "loss": 0.5581385493278503, + "step": 4000 + }, + { + "epoch": 0.922527092460226, + "grad_norm": 1.5059656153682595, + "learning_rate": 1.2096516997482012e-06, + "loss": 0.3925841450691223, + "step": 4001 + }, + { + "epoch": 0.9227576665898086, + "grad_norm": 1.4478402824011334, + "learning_rate": 1.2092788963748393e-06, + "loss": 0.4021197557449341, + "step": 4002 + }, + { + "epoch": 0.9229882407193913, + "grad_norm": 1.5875480480080288, + "learning_rate": 1.2089060625806683e-06, + "loss": 0.5519800186157227, + "step": 4003 + }, + { + "epoch": 0.923218814848974, + "grad_norm": 1.4740215502095901, + "learning_rate": 1.2085331984198847e-06, + "loss": 0.4426038861274719, + "step": 4004 + }, + { + "epoch": 0.9234493889785566, + "grad_norm": 1.3127950735735558, + "learning_rate": 1.2081603039466872e-06, + "loss": 0.4370608925819397, + "step": 4005 + }, + { + "epoch": 0.9236799631081393, + "grad_norm": 1.6270244555647773, + "learning_rate": 1.2077873792152797e-06, + "loss": 0.5535042881965637, + "step": 4006 + }, + { + "epoch": 0.9239105372377219, + "grad_norm": 1.4254025319676356, + "learning_rate": 1.2074144242798708e-06, + "loss": 0.45786774158477783, + "step": 4007 + }, + { + "epoch": 0.9241411113673046, + "grad_norm": 1.305332226115227, + "learning_rate": 1.207041439194673e-06, + "loss": 0.38189244270324707, + "step": 4008 + }, + { + "epoch": 0.9243716854968872, + "grad_norm": 1.4825176983109143, + "learning_rate": 1.206668424013904e-06, + "loss": 0.48782190680503845, + "step": 4009 + }, + { + "epoch": 0.9246022596264699, + "grad_norm": 1.4182276344304934, + "learning_rate": 1.2062953787917852e-06, + "loss": 0.46295344829559326, + "step": 4010 + }, + { + "epoch": 0.9248328337560525, + "grad_norm": 1.370453601452758, + "learning_rate": 1.205922303582542e-06, + "loss": 0.5205795764923096, + "step": 4011 + }, + { + "epoch": 0.9250634078856352, + "grad_norm": 1.431830816120071, + "learning_rate": 1.205549198440405e-06, + "loss": 0.47622987627983093, + "step": 4012 + }, + { + "epoch": 0.9252939820152178, + "grad_norm": 1.3190370245605134, + "learning_rate": 1.2051760634196091e-06, + "loss": 0.4826146960258484, + "step": 4013 + }, + { + "epoch": 0.9255245561448006, + "grad_norm": 1.608771307027525, + "learning_rate": 1.2048028985743928e-06, + "loss": 0.46193474531173706, + "step": 4014 + }, + { + "epoch": 0.9257551302743832, + "grad_norm": 1.4926107871852312, + "learning_rate": 1.2044297039589996e-06, + "loss": 0.523394763469696, + "step": 4015 + }, + { + "epoch": 0.9259857044039659, + "grad_norm": 1.3096026982819484, + "learning_rate": 1.2040564796276773e-06, + "loss": 0.3963446617126465, + "step": 4016 + }, + { + "epoch": 0.9262162785335485, + "grad_norm": 1.3803899653039033, + "learning_rate": 1.2036832256346774e-06, + "loss": 0.5016456842422485, + "step": 4017 + }, + { + "epoch": 0.9264468526631312, + "grad_norm": 1.2198633348825472, + "learning_rate": 1.2033099420342566e-06, + "loss": 0.47298160195350647, + "step": 4018 + }, + { + "epoch": 0.9266774267927138, + "grad_norm": 1.5448162104307424, + "learning_rate": 1.2029366288806748e-06, + "loss": 0.387129545211792, + "step": 4019 + }, + { + "epoch": 0.9269080009222965, + "grad_norm": 1.4210281769521962, + "learning_rate": 1.2025632862281976e-06, + "loss": 0.46101367473602295, + "step": 4020 + }, + { + "epoch": 0.9271385750518791, + "grad_norm": 1.364554371793265, + "learning_rate": 1.2021899141310938e-06, + "loss": 0.4242950677871704, + "step": 4021 + }, + { + "epoch": 0.9273691491814618, + "grad_norm": 1.5524341283687932, + "learning_rate": 1.201816512643637e-06, + "loss": 0.45983830094337463, + "step": 4022 + }, + { + "epoch": 0.9275997233110445, + "grad_norm": 1.3760025635830133, + "learning_rate": 1.2014430818201044e-06, + "loss": 0.39785802364349365, + "step": 4023 + }, + { + "epoch": 0.9278302974406272, + "grad_norm": 1.254017871701417, + "learning_rate": 1.2010696217147783e-06, + "loss": 0.39265739917755127, + "step": 4024 + }, + { + "epoch": 0.9280608715702098, + "grad_norm": 1.4761130221315304, + "learning_rate": 1.2006961323819455e-06, + "loss": 0.49783703684806824, + "step": 4025 + }, + { + "epoch": 0.9282914456997925, + "grad_norm": 1.3764899481486361, + "learning_rate": 1.2003226138758953e-06, + "loss": 0.4479181170463562, + "step": 4026 + }, + { + "epoch": 0.9285220198293751, + "grad_norm": 1.4404345233811269, + "learning_rate": 1.199949066250923e-06, + "loss": 0.5205901265144348, + "step": 4027 + }, + { + "epoch": 0.9287525939589578, + "grad_norm": 1.3718010528366764, + "learning_rate": 1.1995754895613277e-06, + "loss": 0.5163009762763977, + "step": 4028 + }, + { + "epoch": 0.9289831680885404, + "grad_norm": 1.6219891318512447, + "learning_rate": 1.1992018838614124e-06, + "loss": 0.5746268033981323, + "step": 4029 + }, + { + "epoch": 0.9292137422181231, + "grad_norm": 1.2896226756922917, + "learning_rate": 1.1988282492054844e-06, + "loss": 0.5306442975997925, + "step": 4030 + }, + { + "epoch": 0.9294443163477057, + "grad_norm": 1.1978686339854372, + "learning_rate": 1.198454585647855e-06, + "loss": 0.4219534993171692, + "step": 4031 + }, + { + "epoch": 0.9296748904772885, + "grad_norm": 1.3997557750947305, + "learning_rate": 1.1980808932428406e-06, + "loss": 0.4167936444282532, + "step": 4032 + }, + { + "epoch": 0.9299054646068711, + "grad_norm": 1.2271684703243566, + "learning_rate": 1.197707172044761e-06, + "loss": 0.42376089096069336, + "step": 4033 + }, + { + "epoch": 0.9301360387364538, + "grad_norm": 1.5370602561856461, + "learning_rate": 1.1973334221079398e-06, + "loss": 0.48729848861694336, + "step": 4034 + }, + { + "epoch": 0.9303666128660364, + "grad_norm": 1.2353226603771892, + "learning_rate": 1.1969596434867062e-06, + "loss": 0.45877987146377563, + "step": 4035 + }, + { + "epoch": 0.9305971869956191, + "grad_norm": 1.2531522631367908, + "learning_rate": 1.196585836235392e-06, + "loss": 0.504621684551239, + "step": 4036 + }, + { + "epoch": 0.9308277611252017, + "grad_norm": 1.202880043912139, + "learning_rate": 1.1962120004083342e-06, + "loss": 0.45170748233795166, + "step": 4037 + }, + { + "epoch": 0.9310583352547844, + "grad_norm": 1.3604906368473153, + "learning_rate": 1.1958381360598737e-06, + "loss": 0.3969152569770813, + "step": 4038 + }, + { + "epoch": 0.931288909384367, + "grad_norm": 1.2718279913855612, + "learning_rate": 1.1954642432443553e-06, + "loss": 0.4286048412322998, + "step": 4039 + }, + { + "epoch": 0.9315194835139498, + "grad_norm": 1.4261317138789782, + "learning_rate": 1.1950903220161284e-06, + "loss": 0.3755400776863098, + "step": 4040 + }, + { + "epoch": 0.9317500576435324, + "grad_norm": 1.7559058405972485, + "learning_rate": 1.1947163724295457e-06, + "loss": 0.553135871887207, + "step": 4041 + }, + { + "epoch": 0.9319806317731151, + "grad_norm": 1.3529681190465184, + "learning_rate": 1.194342394538965e-06, + "loss": 0.53995281457901, + "step": 4042 + }, + { + "epoch": 0.9322112059026977, + "grad_norm": 1.3239114086556873, + "learning_rate": 1.1939683883987476e-06, + "loss": 0.4405739903450012, + "step": 4043 + }, + { + "epoch": 0.9324417800322804, + "grad_norm": 1.4320084668753248, + "learning_rate": 1.1935943540632591e-06, + "loss": 0.5046489238739014, + "step": 4044 + }, + { + "epoch": 0.932672354161863, + "grad_norm": 1.63220562819442, + "learning_rate": 1.1932202915868694e-06, + "loss": 0.4699453115463257, + "step": 4045 + }, + { + "epoch": 0.9329029282914457, + "grad_norm": 1.791152379500816, + "learning_rate": 1.192846201023952e-06, + "loss": 0.5643539428710938, + "step": 4046 + }, + { + "epoch": 0.9331335024210283, + "grad_norm": 1.3213038373558907, + "learning_rate": 1.192472082428885e-06, + "loss": 0.4423527121543884, + "step": 4047 + }, + { + "epoch": 0.933364076550611, + "grad_norm": 1.488626793530787, + "learning_rate": 1.1920979358560498e-06, + "loss": 0.4446362257003784, + "step": 4048 + }, + { + "epoch": 0.9335946506801936, + "grad_norm": 1.6284188135746005, + "learning_rate": 1.1917237613598332e-06, + "loss": 0.48347601294517517, + "step": 4049 + }, + { + "epoch": 0.9338252248097764, + "grad_norm": 1.339621886087554, + "learning_rate": 1.1913495589946243e-06, + "loss": 0.4736206531524658, + "step": 4050 + }, + { + "epoch": 0.934055798939359, + "grad_norm": 1.5821523477294297, + "learning_rate": 1.1909753288148181e-06, + "loss": 0.4896177053451538, + "step": 4051 + }, + { + "epoch": 0.9342863730689417, + "grad_norm": 1.3503870180183308, + "learning_rate": 1.1906010708748124e-06, + "loss": 0.3953405022621155, + "step": 4052 + }, + { + "epoch": 0.9345169471985243, + "grad_norm": 1.75805064255455, + "learning_rate": 1.1902267852290092e-06, + "loss": 0.30871689319610596, + "step": 4053 + }, + { + "epoch": 0.934747521328107, + "grad_norm": 1.4966149449301516, + "learning_rate": 1.1898524719318151e-06, + "loss": 0.44187474250793457, + "step": 4054 + }, + { + "epoch": 0.9349780954576896, + "grad_norm": 1.3440011557143472, + "learning_rate": 1.1894781310376396e-06, + "loss": 0.4069768488407135, + "step": 4055 + }, + { + "epoch": 0.9352086695872723, + "grad_norm": 1.2938244564986259, + "learning_rate": 1.1891037626008982e-06, + "loss": 0.36307692527770996, + "step": 4056 + }, + { + "epoch": 0.9354392437168549, + "grad_norm": 1.2107088826138788, + "learning_rate": 1.188729366676008e-06, + "loss": 0.38535594940185547, + "step": 4057 + }, + { + "epoch": 0.9356698178464377, + "grad_norm": 1.416105966319888, + "learning_rate": 1.1883549433173916e-06, + "loss": 0.46454256772994995, + "step": 4058 + }, + { + "epoch": 0.9359003919760203, + "grad_norm": 1.5618282514551205, + "learning_rate": 1.1879804925794752e-06, + "loss": 0.48537465929985046, + "step": 4059 + }, + { + "epoch": 0.936130966105603, + "grad_norm": 1.4027831120439134, + "learning_rate": 1.1876060145166893e-06, + "loss": 0.4355062246322632, + "step": 4060 + }, + { + "epoch": 0.9363615402351856, + "grad_norm": 1.4619447190479122, + "learning_rate": 1.1872315091834676e-06, + "loss": 0.47248804569244385, + "step": 4061 + }, + { + "epoch": 0.9365921143647683, + "grad_norm": 1.4336627602293526, + "learning_rate": 1.1868569766342488e-06, + "loss": 0.4896939992904663, + "step": 4062 + }, + { + "epoch": 0.9368226884943509, + "grad_norm": 1.7008224797561309, + "learning_rate": 1.1864824169234744e-06, + "loss": 0.4259600043296814, + "step": 4063 + }, + { + "epoch": 0.9370532626239336, + "grad_norm": 1.4119659383453314, + "learning_rate": 1.186107830105591e-06, + "loss": 0.4228817820549011, + "step": 4064 + }, + { + "epoch": 0.9372838367535162, + "grad_norm": 1.4911543620584802, + "learning_rate": 1.1857332162350484e-06, + "loss": 0.44750750064849854, + "step": 4065 + }, + { + "epoch": 0.937514410883099, + "grad_norm": 1.4424129451647476, + "learning_rate": 1.1853585753663003e-06, + "loss": 0.49125558137893677, + "step": 4066 + }, + { + "epoch": 0.9377449850126816, + "grad_norm": 1.2540485430842725, + "learning_rate": 1.1849839075538048e-06, + "loss": 0.446805477142334, + "step": 4067 + }, + { + "epoch": 0.9379755591422643, + "grad_norm": 1.6527694351266196, + "learning_rate": 1.1846092128520235e-06, + "loss": 0.4516616463661194, + "step": 4068 + }, + { + "epoch": 0.9382061332718469, + "grad_norm": 1.2461495462560317, + "learning_rate": 1.1842344913154223e-06, + "loss": 0.5271207690238953, + "step": 4069 + }, + { + "epoch": 0.9384367074014296, + "grad_norm": 1.3340471888093621, + "learning_rate": 1.1838597429984702e-06, + "loss": 0.46718811988830566, + "step": 4070 + }, + { + "epoch": 0.9386672815310122, + "grad_norm": 1.6970586095771742, + "learning_rate": 1.1834849679556416e-06, + "loss": 0.4948880672454834, + "step": 4071 + }, + { + "epoch": 0.9388978556605949, + "grad_norm": 1.570925891079885, + "learning_rate": 1.183110166241413e-06, + "loss": 0.5141744613647461, + "step": 4072 + }, + { + "epoch": 0.9391284297901775, + "grad_norm": 1.683475962747206, + "learning_rate": 1.1827353379102662e-06, + "loss": 0.43921130895614624, + "step": 4073 + }, + { + "epoch": 0.9393590039197602, + "grad_norm": 1.458461387708897, + "learning_rate": 1.182360483016686e-06, + "loss": 0.35931193828582764, + "step": 4074 + }, + { + "epoch": 0.9395895780493428, + "grad_norm": 1.4562814179425503, + "learning_rate": 1.1819856016151615e-06, + "loss": 0.4376310408115387, + "step": 4075 + }, + { + "epoch": 0.9398201521789256, + "grad_norm": 1.1615675527476144, + "learning_rate": 1.1816106937601856e-06, + "loss": 0.45419907569885254, + "step": 4076 + }, + { + "epoch": 0.9400507263085082, + "grad_norm": 1.447994335613413, + "learning_rate": 1.1812357595062545e-06, + "loss": 0.4077754616737366, + "step": 4077 + }, + { + "epoch": 0.9402813004380909, + "grad_norm": 1.4463033622550583, + "learning_rate": 1.1808607989078686e-06, + "loss": 0.5555585622787476, + "step": 4078 + }, + { + "epoch": 0.9405118745676735, + "grad_norm": 1.4616481074430372, + "learning_rate": 1.1804858120195334e-06, + "loss": 0.4566183090209961, + "step": 4079 + }, + { + "epoch": 0.9407424486972562, + "grad_norm": 1.3314435652232666, + "learning_rate": 1.180110798895756e-06, + "loss": 0.39149847626686096, + "step": 4080 + }, + { + "epoch": 0.9409730228268388, + "grad_norm": 1.3122400287018474, + "learning_rate": 1.1797357595910485e-06, + "loss": 0.42695966362953186, + "step": 4081 + }, + { + "epoch": 0.9412035969564215, + "grad_norm": 1.4264504061469645, + "learning_rate": 1.1793606941599266e-06, + "loss": 0.49673956632614136, + "step": 4082 + }, + { + "epoch": 0.9414341710860041, + "grad_norm": 1.3703442162376693, + "learning_rate": 1.17898560265691e-06, + "loss": 0.44765836000442505, + "step": 4083 + }, + { + "epoch": 0.9416647452155869, + "grad_norm": 1.2694691955405566, + "learning_rate": 1.1786104851365227e-06, + "loss": 0.40580642223358154, + "step": 4084 + }, + { + "epoch": 0.9418953193451695, + "grad_norm": 1.6554640938571203, + "learning_rate": 1.1782353416532907e-06, + "loss": 0.5389235019683838, + "step": 4085 + }, + { + "epoch": 0.9421258934747522, + "grad_norm": 1.4858385739097846, + "learning_rate": 1.1778601722617456e-06, + "loss": 0.5130764245986938, + "step": 4086 + }, + { + "epoch": 0.9423564676043348, + "grad_norm": 1.4406092108567712, + "learning_rate": 1.1774849770164218e-06, + "loss": 0.5031291842460632, + "step": 4087 + }, + { + "epoch": 0.9425870417339175, + "grad_norm": 1.474863885181778, + "learning_rate": 1.1771097559718581e-06, + "loss": 0.463434636592865, + "step": 4088 + }, + { + "epoch": 0.9428176158635001, + "grad_norm": 1.3059771334220434, + "learning_rate": 1.1767345091825962e-06, + "loss": 0.4249681234359741, + "step": 4089 + }, + { + "epoch": 0.9430481899930828, + "grad_norm": 1.322875104249168, + "learning_rate": 1.176359236703182e-06, + "loss": 0.39353805780410767, + "step": 4090 + }, + { + "epoch": 0.9432787641226654, + "grad_norm": 1.1645299347166784, + "learning_rate": 1.1759839385881657e-06, + "loss": 0.4554273188114166, + "step": 4091 + }, + { + "epoch": 0.9435093382522481, + "grad_norm": 1.5935626726835685, + "learning_rate": 1.1756086148921005e-06, + "loss": 0.6275606155395508, + "step": 4092 + }, + { + "epoch": 0.9437399123818307, + "grad_norm": 1.40548177481024, + "learning_rate": 1.1752332656695432e-06, + "loss": 0.5058892965316772, + "step": 4093 + }, + { + "epoch": 0.9439704865114135, + "grad_norm": 1.4618963991295721, + "learning_rate": 1.1748578909750547e-06, + "loss": 0.4318118095397949, + "step": 4094 + }, + { + "epoch": 0.9442010606409961, + "grad_norm": 1.5133013388223657, + "learning_rate": 1.1744824908631996e-06, + "loss": 0.4873964190483093, + "step": 4095 + }, + { + "epoch": 0.9444316347705788, + "grad_norm": 1.7199346017960337, + "learning_rate": 1.1741070653885467e-06, + "loss": 0.5026696920394897, + "step": 4096 + }, + { + "epoch": 0.9446622089001614, + "grad_norm": 1.1838920009196625, + "learning_rate": 1.1737316146056667e-06, + "loss": 0.4337490200996399, + "step": 4097 + }, + { + "epoch": 0.9448927830297441, + "grad_norm": 1.4841621540296046, + "learning_rate": 1.173356138569136e-06, + "loss": 0.4552634358406067, + "step": 4098 + }, + { + "epoch": 0.9451233571593267, + "grad_norm": 1.50340660176824, + "learning_rate": 1.1729806373335336e-06, + "loss": 0.4631303548812866, + "step": 4099 + }, + { + "epoch": 0.9453539312889094, + "grad_norm": 1.2840677998534646, + "learning_rate": 1.1726051109534424e-06, + "loss": 0.5004513263702393, + "step": 4100 + }, + { + "epoch": 0.945584505418492, + "grad_norm": 1.4218926297879624, + "learning_rate": 1.172229559483449e-06, + "loss": 0.4634668827056885, + "step": 4101 + }, + { + "epoch": 0.9458150795480748, + "grad_norm": 1.3580815662313042, + "learning_rate": 1.171853982978144e-06, + "loss": 0.4034295678138733, + "step": 4102 + }, + { + "epoch": 0.9460456536776574, + "grad_norm": 1.4066326558267837, + "learning_rate": 1.1714783814921206e-06, + "loss": 0.4981224536895752, + "step": 4103 + }, + { + "epoch": 0.9462762278072401, + "grad_norm": 1.637441573047362, + "learning_rate": 1.1711027550799767e-06, + "loss": 0.460249125957489, + "step": 4104 + }, + { + "epoch": 0.9465068019368227, + "grad_norm": 1.7282687422797383, + "learning_rate": 1.170727103796313e-06, + "loss": 0.4794936180114746, + "step": 4105 + }, + { + "epoch": 0.9467373760664054, + "grad_norm": 1.679442128589896, + "learning_rate": 1.170351427695735e-06, + "loss": 0.42724454402923584, + "step": 4106 + }, + { + "epoch": 0.946967950195988, + "grad_norm": 1.5092304593591768, + "learning_rate": 1.16997572683285e-06, + "loss": 0.4612593948841095, + "step": 4107 + }, + { + "epoch": 0.9471985243255707, + "grad_norm": 1.4462371891962704, + "learning_rate": 1.169600001262271e-06, + "loss": 0.49512046575546265, + "step": 4108 + }, + { + "epoch": 0.9474290984551533, + "grad_norm": 1.382963972341291, + "learning_rate": 1.1692242510386124e-06, + "loss": 0.49438196420669556, + "step": 4109 + }, + { + "epoch": 0.947659672584736, + "grad_norm": 1.246967438511099, + "learning_rate": 1.1688484762164938e-06, + "loss": 0.4833865165710449, + "step": 4110 + }, + { + "epoch": 0.9478902467143187, + "grad_norm": 1.6394354229670154, + "learning_rate": 1.1684726768505385e-06, + "loss": 0.49647942185401917, + "step": 4111 + }, + { + "epoch": 0.9481208208439014, + "grad_norm": 1.3141370309593936, + "learning_rate": 1.1680968529953718e-06, + "loss": 0.4299147129058838, + "step": 4112 + }, + { + "epoch": 0.948351394973484, + "grad_norm": 1.2751791494481195, + "learning_rate": 1.167721004705624e-06, + "loss": 0.42613041400909424, + "step": 4113 + }, + { + "epoch": 0.9485819691030667, + "grad_norm": 1.5850112492057793, + "learning_rate": 1.1673451320359284e-06, + "loss": 0.3989883065223694, + "step": 4114 + }, + { + "epoch": 0.9488125432326493, + "grad_norm": 1.6195345588406382, + "learning_rate": 1.1669692350409222e-06, + "loss": 0.41362684965133667, + "step": 4115 + }, + { + "epoch": 0.9490431173622319, + "grad_norm": 1.3043186455514282, + "learning_rate": 1.1665933137752452e-06, + "loss": 0.3807048201560974, + "step": 4116 + }, + { + "epoch": 0.9492736914918146, + "grad_norm": 1.452270133487064, + "learning_rate": 1.1662173682935414e-06, + "loss": 0.3440876007080078, + "step": 4117 + }, + { + "epoch": 0.9495042656213972, + "grad_norm": 1.5051121617765968, + "learning_rate": 1.165841398650459e-06, + "loss": 0.43534499406814575, + "step": 4118 + }, + { + "epoch": 0.9497348397509799, + "grad_norm": 1.2124174426672352, + "learning_rate": 1.1654654049006484e-06, + "loss": 0.4900544285774231, + "step": 4119 + }, + { + "epoch": 0.9499654138805625, + "grad_norm": 1.4219346573372744, + "learning_rate": 1.1650893870987643e-06, + "loss": 0.5189288854598999, + "step": 4120 + }, + { + "epoch": 0.9501959880101453, + "grad_norm": 1.5561303354373495, + "learning_rate": 1.1647133452994643e-06, + "loss": 0.587873101234436, + "step": 4121 + }, + { + "epoch": 0.9504265621397279, + "grad_norm": 1.2947612520331362, + "learning_rate": 1.1643372795574106e-06, + "loss": 0.4367108941078186, + "step": 4122 + }, + { + "epoch": 0.9506571362693106, + "grad_norm": 1.3855876287330298, + "learning_rate": 1.1639611899272679e-06, + "loss": 0.4121246635913849, + "step": 4123 + }, + { + "epoch": 0.9508877103988932, + "grad_norm": 1.371083137252789, + "learning_rate": 1.1635850764637042e-06, + "loss": 0.4993973672389984, + "step": 4124 + }, + { + "epoch": 0.9511182845284759, + "grad_norm": 1.3729377845652901, + "learning_rate": 1.163208939221392e-06, + "loss": 0.39145413041114807, + "step": 4125 + }, + { + "epoch": 0.9513488586580585, + "grad_norm": 1.5515816392895183, + "learning_rate": 1.1628327782550065e-06, + "loss": 0.45954760909080505, + "step": 4126 + }, + { + "epoch": 0.9515794327876412, + "grad_norm": 1.5137997254417062, + "learning_rate": 1.1624565936192263e-06, + "loss": 0.5159680843353271, + "step": 4127 + }, + { + "epoch": 0.9518100069172238, + "grad_norm": 1.5429829982679306, + "learning_rate": 1.1620803853687337e-06, + "loss": 0.4441346228122711, + "step": 4128 + }, + { + "epoch": 0.9520405810468066, + "grad_norm": 1.1994992888255296, + "learning_rate": 1.1617041535582144e-06, + "loss": 0.3842248320579529, + "step": 4129 + }, + { + "epoch": 0.9522711551763892, + "grad_norm": 1.5742838715827387, + "learning_rate": 1.1613278982423577e-06, + "loss": 0.5332437753677368, + "step": 4130 + }, + { + "epoch": 0.9525017293059719, + "grad_norm": 1.416443461852387, + "learning_rate": 1.160951619475856e-06, + "loss": 0.4265931248664856, + "step": 4131 + }, + { + "epoch": 0.9527323034355545, + "grad_norm": 1.344407559333665, + "learning_rate": 1.1605753173134052e-06, + "loss": 0.47442418336868286, + "step": 4132 + }, + { + "epoch": 0.9529628775651372, + "grad_norm": 1.4385000789860496, + "learning_rate": 1.1601989918097044e-06, + "loss": 0.6128898859024048, + "step": 4133 + }, + { + "epoch": 0.9531934516947198, + "grad_norm": 1.3167710707989233, + "learning_rate": 1.159822643019457e-06, + "loss": 0.5347775220870972, + "step": 4134 + }, + { + "epoch": 0.9534240258243025, + "grad_norm": 1.1478699481046142, + "learning_rate": 1.1594462709973682e-06, + "loss": 0.39984625577926636, + "step": 4135 + }, + { + "epoch": 0.9536545999538851, + "grad_norm": 1.411910940206958, + "learning_rate": 1.1590698757981483e-06, + "loss": 0.5146951675415039, + "step": 4136 + }, + { + "epoch": 0.9538851740834678, + "grad_norm": 1.4057451726772026, + "learning_rate": 1.1586934574765097e-06, + "loss": 0.3589641749858856, + "step": 4137 + }, + { + "epoch": 0.9541157482130505, + "grad_norm": 1.4047870659239305, + "learning_rate": 1.1583170160871689e-06, + "loss": 0.428930401802063, + "step": 4138 + }, + { + "epoch": 0.9543463223426332, + "grad_norm": 1.3760779428564116, + "learning_rate": 1.1579405516848452e-06, + "loss": 0.46921080350875854, + "step": 4139 + }, + { + "epoch": 0.9545768964722158, + "grad_norm": 1.462957669946579, + "learning_rate": 1.1575640643242616e-06, + "loss": 0.39079514145851135, + "step": 4140 + }, + { + "epoch": 0.9548074706017985, + "grad_norm": 1.5322762323160557, + "learning_rate": 1.1571875540601443e-06, + "loss": 0.4475102424621582, + "step": 4141 + }, + { + "epoch": 0.9550380447313811, + "grad_norm": 1.3964952325110702, + "learning_rate": 1.1568110209472232e-06, + "loss": 0.43881016969680786, + "step": 4142 + }, + { + "epoch": 0.9552686188609638, + "grad_norm": 1.2846843095885363, + "learning_rate": 1.156434465040231e-06, + "loss": 0.4382214844226837, + "step": 4143 + }, + { + "epoch": 0.9554991929905464, + "grad_norm": 1.6590322564778253, + "learning_rate": 1.1560578863939037e-06, + "loss": 0.5390958786010742, + "step": 4144 + }, + { + "epoch": 0.9557297671201291, + "grad_norm": 1.2966408722030756, + "learning_rate": 1.155681285062981e-06, + "loss": 0.4276137948036194, + "step": 4145 + }, + { + "epoch": 0.9559603412497117, + "grad_norm": 1.3756682316204962, + "learning_rate": 1.1553046611022058e-06, + "loss": 0.4541968107223511, + "step": 4146 + }, + { + "epoch": 0.9561909153792945, + "grad_norm": 1.4806679512404375, + "learning_rate": 1.1549280145663242e-06, + "loss": 0.43287473917007446, + "step": 4147 + }, + { + "epoch": 0.9564214895088771, + "grad_norm": 1.5507500145218385, + "learning_rate": 1.1545513455100855e-06, + "loss": 0.432822585105896, + "step": 4148 + }, + { + "epoch": 0.9566520636384598, + "grad_norm": 1.4662390355071035, + "learning_rate": 1.1541746539882424e-06, + "loss": 0.519271969795227, + "step": 4149 + }, + { + "epoch": 0.9568826377680424, + "grad_norm": 1.4521470663351335, + "learning_rate": 1.1537979400555506e-06, + "loss": 0.4158627390861511, + "step": 4150 + }, + { + "epoch": 0.9571132118976251, + "grad_norm": 1.4834584070713739, + "learning_rate": 1.1534212037667698e-06, + "loss": 0.42122989892959595, + "step": 4151 + }, + { + "epoch": 0.9573437860272077, + "grad_norm": 1.696588703842723, + "learning_rate": 1.1530444451766623e-06, + "loss": 0.4141794443130493, + "step": 4152 + }, + { + "epoch": 0.9575743601567904, + "grad_norm": 1.3149219500885996, + "learning_rate": 1.1526676643399933e-06, + "loss": 0.4935780167579651, + "step": 4153 + }, + { + "epoch": 0.957804934286373, + "grad_norm": 1.3661965645097156, + "learning_rate": 1.152290861311532e-06, + "loss": 0.5075733661651611, + "step": 4154 + }, + { + "epoch": 0.9580355084159557, + "grad_norm": 1.37824406851626, + "learning_rate": 1.151914036146051e-06, + "loss": 0.4852841794490814, + "step": 4155 + }, + { + "epoch": 0.9582660825455384, + "grad_norm": 1.2576277022731817, + "learning_rate": 1.151537188898325e-06, + "loss": 0.46114620566368103, + "step": 4156 + }, + { + "epoch": 0.9584966566751211, + "grad_norm": 1.6662322349225411, + "learning_rate": 1.1511603196231327e-06, + "loss": 0.519254207611084, + "step": 4157 + }, + { + "epoch": 0.9587272308047037, + "grad_norm": 1.3283960828325414, + "learning_rate": 1.1507834283752562e-06, + "loss": 0.43635690212249756, + "step": 4158 + }, + { + "epoch": 0.9589578049342864, + "grad_norm": 1.3730336798021219, + "learning_rate": 1.1504065152094802e-06, + "loss": 0.48448023200035095, + "step": 4159 + }, + { + "epoch": 0.959188379063869, + "grad_norm": 1.320755520801986, + "learning_rate": 1.1500295801805927e-06, + "loss": 0.4461054801940918, + "step": 4160 + }, + { + "epoch": 0.9594189531934517, + "grad_norm": 1.3183810948385437, + "learning_rate": 1.1496526233433852e-06, + "loss": 0.44869595766067505, + "step": 4161 + }, + { + "epoch": 0.9596495273230343, + "grad_norm": 1.5137169599039804, + "learning_rate": 1.1492756447526524e-06, + "loss": 0.4592103660106659, + "step": 4162 + }, + { + "epoch": 0.959880101452617, + "grad_norm": 1.3625000210250673, + "learning_rate": 1.1488986444631918e-06, + "loss": 0.48352301120758057, + "step": 4163 + }, + { + "epoch": 0.9601106755821996, + "grad_norm": 1.2039059688900335, + "learning_rate": 1.1485216225298043e-06, + "loss": 0.44718503952026367, + "step": 4164 + }, + { + "epoch": 0.9603412497117824, + "grad_norm": 1.7796976813489804, + "learning_rate": 1.1481445790072933e-06, + "loss": 0.44659486413002014, + "step": 4165 + }, + { + "epoch": 0.960571823841365, + "grad_norm": 1.464260426957605, + "learning_rate": 1.1477675139504665e-06, + "loss": 0.5143063068389893, + "step": 4166 + }, + { + "epoch": 0.9608023979709477, + "grad_norm": 1.825014649582591, + "learning_rate": 1.1473904274141344e-06, + "loss": 0.6708887815475464, + "step": 4167 + }, + { + "epoch": 0.9610329721005303, + "grad_norm": 1.4397638416262573, + "learning_rate": 1.1470133194531094e-06, + "loss": 0.3889666199684143, + "step": 4168 + }, + { + "epoch": 0.961263546230113, + "grad_norm": 1.2805774485856607, + "learning_rate": 1.1466361901222086e-06, + "loss": 0.4610622227191925, + "step": 4169 + }, + { + "epoch": 0.9614941203596956, + "grad_norm": 1.4320030308850267, + "learning_rate": 1.1462590394762514e-06, + "loss": 0.46372538805007935, + "step": 4170 + }, + { + "epoch": 0.9617246944892783, + "grad_norm": 1.5638922992309852, + "learning_rate": 1.1458818675700607e-06, + "loss": 0.5197097063064575, + "step": 4171 + }, + { + "epoch": 0.9619552686188609, + "grad_norm": 1.2417860513603916, + "learning_rate": 1.145504674458462e-06, + "loss": 0.3849745988845825, + "step": 4172 + }, + { + "epoch": 0.9621858427484437, + "grad_norm": 1.5196854039542969, + "learning_rate": 1.1451274601962841e-06, + "loss": 0.4572817385196686, + "step": 4173 + }, + { + "epoch": 0.9624164168780263, + "grad_norm": 1.4154832612934123, + "learning_rate": 1.1447502248383594e-06, + "loss": 0.4383746385574341, + "step": 4174 + }, + { + "epoch": 0.962646991007609, + "grad_norm": 1.473681287130909, + "learning_rate": 1.1443729684395222e-06, + "loss": 0.5319672226905823, + "step": 4175 + }, + { + "epoch": 0.9628775651371916, + "grad_norm": 1.2307542062760268, + "learning_rate": 1.143995691054611e-06, + "loss": 0.4351249933242798, + "step": 4176 + }, + { + "epoch": 0.9631081392667743, + "grad_norm": 1.42416527435209, + "learning_rate": 1.1436183927384668e-06, + "loss": 0.5453774929046631, + "step": 4177 + }, + { + "epoch": 0.9633387133963569, + "grad_norm": 1.569291329857932, + "learning_rate": 1.1432410735459336e-06, + "loss": 0.5605905055999756, + "step": 4178 + }, + { + "epoch": 0.9635692875259396, + "grad_norm": 1.3825364023898294, + "learning_rate": 1.1428637335318587e-06, + "loss": 0.4556693434715271, + "step": 4179 + }, + { + "epoch": 0.9637998616555222, + "grad_norm": 1.316766347101971, + "learning_rate": 1.142486372751092e-06, + "loss": 0.45428892970085144, + "step": 4180 + }, + { + "epoch": 0.9640304357851049, + "grad_norm": 1.4252168865652697, + "learning_rate": 1.142108991258487e-06, + "loss": 0.4897412657737732, + "step": 4181 + }, + { + "epoch": 0.9642610099146876, + "grad_norm": 1.984637391356181, + "learning_rate": 1.1417315891089004e-06, + "loss": 0.5478836894035339, + "step": 4182 + }, + { + "epoch": 0.9644915840442703, + "grad_norm": 1.4620834191298895, + "learning_rate": 1.1413541663571904e-06, + "loss": 0.42394131422042847, + "step": 4183 + }, + { + "epoch": 0.9647221581738529, + "grad_norm": 1.585175673978148, + "learning_rate": 1.1409767230582199e-06, + "loss": 0.5047104954719543, + "step": 4184 + }, + { + "epoch": 0.9649527323034356, + "grad_norm": 1.4749915601759833, + "learning_rate": 1.1405992592668538e-06, + "loss": 0.43985825777053833, + "step": 4185 + }, + { + "epoch": 0.9651833064330182, + "grad_norm": 1.3061643078097422, + "learning_rate": 1.1402217750379608e-06, + "loss": 0.4338407516479492, + "step": 4186 + }, + { + "epoch": 0.9654138805626009, + "grad_norm": 1.5404850502320075, + "learning_rate": 1.1398442704264118e-06, + "loss": 0.4532614052295685, + "step": 4187 + }, + { + "epoch": 0.9656444546921835, + "grad_norm": 1.2345047018331374, + "learning_rate": 1.1394667454870802e-06, + "loss": 0.4546123445034027, + "step": 4188 + }, + { + "epoch": 0.9658750288217662, + "grad_norm": 1.5321856096614175, + "learning_rate": 1.139089200274844e-06, + "loss": 0.44743451476097107, + "step": 4189 + }, + { + "epoch": 0.9661056029513488, + "grad_norm": 1.3411063865526411, + "learning_rate": 1.138711634844583e-06, + "loss": 0.4566968083381653, + "step": 4190 + }, + { + "epoch": 0.9663361770809316, + "grad_norm": 1.481468600614622, + "learning_rate": 1.13833404925118e-06, + "loss": 0.46385467052459717, + "step": 4191 + }, + { + "epoch": 0.9665667512105142, + "grad_norm": 1.2411450691863102, + "learning_rate": 1.137956443549521e-06, + "loss": 0.4614461660385132, + "step": 4192 + }, + { + "epoch": 0.9667973253400969, + "grad_norm": 1.3326432316915904, + "learning_rate": 1.1375788177944945e-06, + "loss": 0.4351955056190491, + "step": 4193 + }, + { + "epoch": 0.9670278994696795, + "grad_norm": 1.368161025215393, + "learning_rate": 1.1372011720409927e-06, + "loss": 0.4172135591506958, + "step": 4194 + }, + { + "epoch": 0.9672584735992622, + "grad_norm": 1.6941620223477674, + "learning_rate": 1.1368235063439102e-06, + "loss": 0.5482916831970215, + "step": 4195 + }, + { + "epoch": 0.9674890477288448, + "grad_norm": 1.3508434751874687, + "learning_rate": 1.136445820758144e-06, + "loss": 0.4336891770362854, + "step": 4196 + }, + { + "epoch": 0.9677196218584275, + "grad_norm": 1.5072664158429512, + "learning_rate": 1.1360681153385956e-06, + "loss": 0.42612385749816895, + "step": 4197 + }, + { + "epoch": 0.9679501959880101, + "grad_norm": 1.5000454097568379, + "learning_rate": 1.135690390140167e-06, + "loss": 0.513736367225647, + "step": 4198 + }, + { + "epoch": 0.9681807701175928, + "grad_norm": 1.8279069537189752, + "learning_rate": 1.1353126452177656e-06, + "loss": 0.45551058650016785, + "step": 4199 + }, + { + "epoch": 0.9684113442471755, + "grad_norm": 1.3479770342549766, + "learning_rate": 1.1349348806262994e-06, + "loss": 0.45450061559677124, + "step": 4200 + }, + { + "epoch": 0.9686419183767582, + "grad_norm": 1.5942392384347237, + "learning_rate": 1.1345570964206807e-06, + "loss": 0.43962353467941284, + "step": 4201 + }, + { + "epoch": 0.9688724925063408, + "grad_norm": 1.4695533515040724, + "learning_rate": 1.1341792926558245e-06, + "loss": 0.5304821729660034, + "step": 4202 + }, + { + "epoch": 0.9691030666359235, + "grad_norm": 1.57215629996827, + "learning_rate": 1.1338014693866483e-06, + "loss": 0.6079045534133911, + "step": 4203 + }, + { + "epoch": 0.9693336407655061, + "grad_norm": 1.3451772860900804, + "learning_rate": 1.1334236266680724e-06, + "loss": 0.39895182847976685, + "step": 4204 + }, + { + "epoch": 0.9695642148950888, + "grad_norm": 1.4224201035305835, + "learning_rate": 1.1330457645550202e-06, + "loss": 0.5264945030212402, + "step": 4205 + }, + { + "epoch": 0.9697947890246714, + "grad_norm": 1.3209691457440123, + "learning_rate": 1.1326678831024178e-06, + "loss": 0.4794533848762512, + "step": 4206 + }, + { + "epoch": 0.9700253631542541, + "grad_norm": 1.472204632290126, + "learning_rate": 1.1322899823651938e-06, + "loss": 0.42917680740356445, + "step": 4207 + }, + { + "epoch": 0.9702559372838367, + "grad_norm": 1.4163025348687577, + "learning_rate": 1.1319120623982804e-06, + "loss": 0.42155951261520386, + "step": 4208 + }, + { + "epoch": 0.9704865114134195, + "grad_norm": 1.455345134423215, + "learning_rate": 1.1315341232566121e-06, + "loss": 0.5119719505310059, + "step": 4209 + }, + { + "epoch": 0.9707170855430021, + "grad_norm": 1.4441630965274395, + "learning_rate": 1.1311561649951255e-06, + "loss": 0.5261529684066772, + "step": 4210 + }, + { + "epoch": 0.9709476596725848, + "grad_norm": 1.3046857195112773, + "learning_rate": 1.1307781876687609e-06, + "loss": 0.5133010149002075, + "step": 4211 + }, + { + "epoch": 0.9711782338021674, + "grad_norm": 1.4061037707348525, + "learning_rate": 1.1304001913324617e-06, + "loss": 0.5214196443557739, + "step": 4212 + }, + { + "epoch": 0.9714088079317501, + "grad_norm": 1.4191122003483587, + "learning_rate": 1.1300221760411732e-06, + "loss": 0.4665095806121826, + "step": 4213 + }, + { + "epoch": 0.9716393820613327, + "grad_norm": 1.2917310787961995, + "learning_rate": 1.1296441418498435e-06, + "loss": 0.44912537932395935, + "step": 4214 + }, + { + "epoch": 0.9718699561909154, + "grad_norm": 1.384060094796334, + "learning_rate": 1.1292660888134241e-06, + "loss": 0.48622840642929077, + "step": 4215 + }, + { + "epoch": 0.972100530320498, + "grad_norm": 1.3952506250953003, + "learning_rate": 1.128888016986868e-06, + "loss": 0.40099745988845825, + "step": 4216 + }, + { + "epoch": 0.9723311044500808, + "grad_norm": 1.6661609433762745, + "learning_rate": 1.1285099264251331e-06, + "loss": 0.4981631934642792, + "step": 4217 + }, + { + "epoch": 0.9725616785796634, + "grad_norm": 1.3061541456837051, + "learning_rate": 1.1281318171831778e-06, + "loss": 0.3902980387210846, + "step": 4218 + }, + { + "epoch": 0.9727922527092461, + "grad_norm": 1.646940009523485, + "learning_rate": 1.1277536893159641e-06, + "loss": 0.5120723843574524, + "step": 4219 + }, + { + "epoch": 0.9730228268388287, + "grad_norm": 1.4050676349560098, + "learning_rate": 1.1273755428784568e-06, + "loss": 0.47908157110214233, + "step": 4220 + }, + { + "epoch": 0.9732534009684114, + "grad_norm": 1.3980215754858654, + "learning_rate": 1.126997377925624e-06, + "loss": 0.44935697317123413, + "step": 4221 + }, + { + "epoch": 0.973483975097994, + "grad_norm": 1.7936737063106103, + "learning_rate": 1.1266191945124345e-06, + "loss": 0.46883124113082886, + "step": 4222 + }, + { + "epoch": 0.9737145492275767, + "grad_norm": 1.3605023071963889, + "learning_rate": 1.1262409926938622e-06, + "loss": 0.41385799646377563, + "step": 4223 + }, + { + "epoch": 0.9739451233571593, + "grad_norm": 1.352097187992639, + "learning_rate": 1.1258627725248821e-06, + "loss": 0.5450118780136108, + "step": 4224 + }, + { + "epoch": 0.974175697486742, + "grad_norm": 1.3149598759310381, + "learning_rate": 1.1254845340604725e-06, + "loss": 0.4728820323944092, + "step": 4225 + }, + { + "epoch": 0.9744062716163246, + "grad_norm": 1.490906480143449, + "learning_rate": 1.1251062773556143e-06, + "loss": 0.5111296772956848, + "step": 4226 + }, + { + "epoch": 0.9746368457459073, + "grad_norm": 1.6529549144482583, + "learning_rate": 1.1247280024652908e-06, + "loss": 0.4538743793964386, + "step": 4227 + }, + { + "epoch": 0.97486741987549, + "grad_norm": 1.4130886870951611, + "learning_rate": 1.1243497094444877e-06, + "loss": 0.4917091131210327, + "step": 4228 + }, + { + "epoch": 0.9750979940050726, + "grad_norm": 1.387244231549714, + "learning_rate": 1.1239713983481945e-06, + "loss": 0.40376198291778564, + "step": 4229 + }, + { + "epoch": 0.9753285681346553, + "grad_norm": 1.4554658551428983, + "learning_rate": 1.1235930692314019e-06, + "loss": 0.5356566905975342, + "step": 4230 + }, + { + "epoch": 0.9755591422642379, + "grad_norm": 1.4359135131794967, + "learning_rate": 1.123214722149104e-06, + "loss": 0.4374624490737915, + "step": 4231 + }, + { + "epoch": 0.9757897163938206, + "grad_norm": 1.4746549529981767, + "learning_rate": 1.1228363571562976e-06, + "loss": 0.4225429594516754, + "step": 4232 + }, + { + "epoch": 0.9760202905234032, + "grad_norm": 1.4500544144002923, + "learning_rate": 1.1224579743079819e-06, + "loss": 0.5389699935913086, + "step": 4233 + }, + { + "epoch": 0.9762508646529859, + "grad_norm": 1.39848035447059, + "learning_rate": 1.1220795736591584e-06, + "loss": 0.4925463795661926, + "step": 4234 + }, + { + "epoch": 0.9764814387825685, + "grad_norm": 1.2916834361485914, + "learning_rate": 1.1217011552648315e-06, + "loss": 0.4694328308105469, + "step": 4235 + }, + { + "epoch": 0.9767120129121513, + "grad_norm": 1.377557176325016, + "learning_rate": 1.1213227191800086e-06, + "loss": 0.39887624979019165, + "step": 4236 + }, + { + "epoch": 0.9769425870417339, + "grad_norm": 1.5555659299458584, + "learning_rate": 1.120944265459699e-06, + "loss": 0.4930388927459717, + "step": 4237 + }, + { + "epoch": 0.9771731611713166, + "grad_norm": 1.2486101676760866, + "learning_rate": 1.1205657941589143e-06, + "loss": 0.4595404863357544, + "step": 4238 + }, + { + "epoch": 0.9774037353008992, + "grad_norm": 1.4574273243269236, + "learning_rate": 1.1201873053326695e-06, + "loss": 0.44177496433258057, + "step": 4239 + }, + { + "epoch": 0.9776343094304819, + "grad_norm": 1.4308970126871865, + "learning_rate": 1.119808799035982e-06, + "loss": 0.47095373272895813, + "step": 4240 + }, + { + "epoch": 0.9778648835600645, + "grad_norm": 1.4049777741841016, + "learning_rate": 1.1194302753238716e-06, + "loss": 0.4649583697319031, + "step": 4241 + }, + { + "epoch": 0.9780954576896472, + "grad_norm": 1.5269711326381101, + "learning_rate": 1.1190517342513598e-06, + "loss": 0.44815266132354736, + "step": 4242 + }, + { + "epoch": 0.9783260318192298, + "grad_norm": 1.462868793648971, + "learning_rate": 1.118673175873472e-06, + "loss": 0.4861665368080139, + "step": 4243 + }, + { + "epoch": 0.9785566059488126, + "grad_norm": 1.3395897424173215, + "learning_rate": 1.1182946002452354e-06, + "loss": 0.5196468830108643, + "step": 4244 + }, + { + "epoch": 0.9787871800783952, + "grad_norm": 1.5910002582718288, + "learning_rate": 1.11791600742168e-06, + "loss": 0.49746841192245483, + "step": 4245 + }, + { + "epoch": 0.9790177542079779, + "grad_norm": 1.2919062217717159, + "learning_rate": 1.1175373974578377e-06, + "loss": 0.4637739956378937, + "step": 4246 + }, + { + "epoch": 0.9792483283375605, + "grad_norm": 1.228394275609753, + "learning_rate": 1.1171587704087434e-06, + "loss": 0.46009692549705505, + "step": 4247 + }, + { + "epoch": 0.9794789024671432, + "grad_norm": 2.1569798034684706, + "learning_rate": 1.1167801263294346e-06, + "loss": 0.49036258459091187, + "step": 4248 + }, + { + "epoch": 0.9797094765967258, + "grad_norm": 1.395933426650918, + "learning_rate": 1.1164014652749509e-06, + "loss": 0.4730580449104309, + "step": 4249 + }, + { + "epoch": 0.9799400507263085, + "grad_norm": 1.618438538763921, + "learning_rate": 1.1160227873003345e-06, + "loss": 0.5029968023300171, + "step": 4250 + }, + { + "epoch": 0.9801706248558911, + "grad_norm": 1.4870951402562973, + "learning_rate": 1.1156440924606299e-06, + "loss": 0.5149805545806885, + "step": 4251 + }, + { + "epoch": 0.9804011989854738, + "grad_norm": 1.6248587467562292, + "learning_rate": 1.1152653808108845e-06, + "loss": 0.5017384886741638, + "step": 4252 + }, + { + "epoch": 0.9806317731150564, + "grad_norm": 1.486462967422998, + "learning_rate": 1.114886652406148e-06, + "loss": 0.47569048404693604, + "step": 4253 + }, + { + "epoch": 0.9808623472446392, + "grad_norm": 1.4476623501612873, + "learning_rate": 1.1145079073014722e-06, + "loss": 0.5127655863761902, + "step": 4254 + }, + { + "epoch": 0.9810929213742218, + "grad_norm": 1.4943063660203757, + "learning_rate": 1.1141291455519114e-06, + "loss": 0.4014360308647156, + "step": 4255 + }, + { + "epoch": 0.9813234955038045, + "grad_norm": 1.4814879590427052, + "learning_rate": 1.1137503672125228e-06, + "loss": 0.43737465143203735, + "step": 4256 + }, + { + "epoch": 0.9815540696333871, + "grad_norm": 1.413525212350489, + "learning_rate": 1.1133715723383655e-06, + "loss": 0.4389764070510864, + "step": 4257 + }, + { + "epoch": 0.9817846437629698, + "grad_norm": 1.3532173754404184, + "learning_rate": 1.112992760984501e-06, + "loss": 0.5105381608009338, + "step": 4258 + }, + { + "epoch": 0.9820152178925524, + "grad_norm": 1.4052776017835835, + "learning_rate": 1.1126139332059937e-06, + "loss": 0.4393002688884735, + "step": 4259 + }, + { + "epoch": 0.9822457920221351, + "grad_norm": 1.3179147448132482, + "learning_rate": 1.1122350890579102e-06, + "loss": 0.541419267654419, + "step": 4260 + }, + { + "epoch": 0.9824763661517177, + "grad_norm": 1.5177150542407778, + "learning_rate": 1.1118562285953186e-06, + "loss": 0.4153546094894409, + "step": 4261 + }, + { + "epoch": 0.9827069402813005, + "grad_norm": 1.4649176443917427, + "learning_rate": 1.1114773518732907e-06, + "loss": 0.5060696601867676, + "step": 4262 + }, + { + "epoch": 0.9829375144108831, + "grad_norm": 1.6266321171712574, + "learning_rate": 1.1110984589468998e-06, + "loss": 0.5975456237792969, + "step": 4263 + }, + { + "epoch": 0.9831680885404658, + "grad_norm": 1.4920078622156363, + "learning_rate": 1.110719549871222e-06, + "loss": 0.5729621648788452, + "step": 4264 + }, + { + "epoch": 0.9833986626700484, + "grad_norm": 1.3838030985279757, + "learning_rate": 1.1103406247013356e-06, + "loss": 0.3948165476322174, + "step": 4265 + }, + { + "epoch": 0.9836292367996311, + "grad_norm": 1.3893062538653607, + "learning_rate": 1.1099616834923212e-06, + "loss": 0.41744932532310486, + "step": 4266 + }, + { + "epoch": 0.9838598109292137, + "grad_norm": 1.3638196246051946, + "learning_rate": 1.1095827262992611e-06, + "loss": 0.4701330065727234, + "step": 4267 + }, + { + "epoch": 0.9840903850587964, + "grad_norm": 1.4764746527882953, + "learning_rate": 1.109203753177242e-06, + "loss": 0.4841681718826294, + "step": 4268 + }, + { + "epoch": 0.984320959188379, + "grad_norm": 1.3604414964396274, + "learning_rate": 1.10882476418135e-06, + "loss": 0.4180435538291931, + "step": 4269 + }, + { + "epoch": 0.9845515333179617, + "grad_norm": 1.4211218067668543, + "learning_rate": 1.1084457593666758e-06, + "loss": 0.39362633228302, + "step": 4270 + }, + { + "epoch": 0.9847821074475444, + "grad_norm": 1.4239354595534417, + "learning_rate": 1.1080667387883116e-06, + "loss": 0.5192993879318237, + "step": 4271 + }, + { + "epoch": 0.9850126815771271, + "grad_norm": 1.5201720088447181, + "learning_rate": 1.1076877025013517e-06, + "loss": 0.48835504055023193, + "step": 4272 + }, + { + "epoch": 0.9852432557067097, + "grad_norm": 1.5142338003412266, + "learning_rate": 1.1073086505608925e-06, + "loss": 0.44442474842071533, + "step": 4273 + }, + { + "epoch": 0.9854738298362924, + "grad_norm": 1.3436041344969518, + "learning_rate": 1.1069295830220339e-06, + "loss": 0.4544455409049988, + "step": 4274 + }, + { + "epoch": 0.985704403965875, + "grad_norm": 1.5833831369807498, + "learning_rate": 1.106550499939876e-06, + "loss": 0.482341468334198, + "step": 4275 + }, + { + "epoch": 0.9859349780954577, + "grad_norm": 1.421534858967002, + "learning_rate": 1.1061714013695236e-06, + "loss": 0.5251357555389404, + "step": 4276 + }, + { + "epoch": 0.9861655522250403, + "grad_norm": 1.2537356796939523, + "learning_rate": 1.1057922873660819e-06, + "loss": 0.4538683295249939, + "step": 4277 + }, + { + "epoch": 0.986396126354623, + "grad_norm": 2.0128553783671728, + "learning_rate": 1.105413157984659e-06, + "loss": 0.5112448930740356, + "step": 4278 + }, + { + "epoch": 0.9866267004842056, + "grad_norm": 1.4914994042257563, + "learning_rate": 1.1050340132803654e-06, + "loss": 0.48863890767097473, + "step": 4279 + }, + { + "epoch": 0.9868572746137884, + "grad_norm": 1.494741313695512, + "learning_rate": 1.1046548533083134e-06, + "loss": 0.43637439608573914, + "step": 4280 + }, + { + "epoch": 0.987087848743371, + "grad_norm": 1.5727176113962202, + "learning_rate": 1.104275678123618e-06, + "loss": 0.5231983065605164, + "step": 4281 + }, + { + "epoch": 0.9873184228729537, + "grad_norm": 1.7169447967595874, + "learning_rate": 1.1038964877813955e-06, + "loss": 0.46838122606277466, + "step": 4282 + }, + { + "epoch": 0.9875489970025363, + "grad_norm": 1.3537630033218837, + "learning_rate": 1.1035172823367658e-06, + "loss": 0.4330589473247528, + "step": 4283 + }, + { + "epoch": 0.987779571132119, + "grad_norm": 1.4178119046272273, + "learning_rate": 1.1031380618448501e-06, + "loss": 0.44962531328201294, + "step": 4284 + }, + { + "epoch": 0.9880101452617016, + "grad_norm": 1.3547255909489988, + "learning_rate": 1.1027588263607719e-06, + "loss": 0.44549795985221863, + "step": 4285 + }, + { + "epoch": 0.9882407193912843, + "grad_norm": 1.7082954293487662, + "learning_rate": 1.1023795759396568e-06, + "loss": 0.43510758876800537, + "step": 4286 + }, + { + "epoch": 0.9884712935208669, + "grad_norm": 1.3135837847563279, + "learning_rate": 1.1020003106366324e-06, + "loss": 0.4369906187057495, + "step": 4287 + }, + { + "epoch": 0.9887018676504497, + "grad_norm": 1.416650593568537, + "learning_rate": 1.1016210305068296e-06, + "loss": 0.42049574851989746, + "step": 4288 + }, + { + "epoch": 0.9889324417800323, + "grad_norm": 1.6285692706476314, + "learning_rate": 1.10124173560538e-06, + "loss": 0.449156790971756, + "step": 4289 + }, + { + "epoch": 0.989163015909615, + "grad_norm": 1.5784410678150576, + "learning_rate": 1.1008624259874177e-06, + "loss": 0.4736451506614685, + "step": 4290 + }, + { + "epoch": 0.9893935900391976, + "grad_norm": 1.3029401584123959, + "learning_rate": 1.10048310170808e-06, + "loss": 0.3988722860813141, + "step": 4291 + }, + { + "epoch": 0.9896241641687803, + "grad_norm": 1.4221756045070393, + "learning_rate": 1.100103762822505e-06, + "loss": 0.44330862164497375, + "step": 4292 + }, + { + "epoch": 0.9898547382983629, + "grad_norm": 1.5471015099626197, + "learning_rate": 1.0997244093858336e-06, + "loss": 0.5294286608695984, + "step": 4293 + }, + { + "epoch": 0.9900853124279456, + "grad_norm": 1.3808712553027187, + "learning_rate": 1.0993450414532082e-06, + "loss": 0.463120698928833, + "step": 4294 + }, + { + "epoch": 0.9903158865575282, + "grad_norm": 1.294463919332552, + "learning_rate": 1.0989656590797747e-06, + "loss": 0.4481865167617798, + "step": 4295 + }, + { + "epoch": 0.9905464606871109, + "grad_norm": 1.4153337646078945, + "learning_rate": 1.0985862623206794e-06, + "loss": 0.4467630386352539, + "step": 4296 + }, + { + "epoch": 0.9907770348166935, + "grad_norm": 1.8865527079498654, + "learning_rate": 1.0982068512310717e-06, + "loss": 0.43485027551651, + "step": 4297 + }, + { + "epoch": 0.9910076089462763, + "grad_norm": 1.5277390713389145, + "learning_rate": 1.0978274258661032e-06, + "loss": 0.4556450843811035, + "step": 4298 + }, + { + "epoch": 0.9912381830758589, + "grad_norm": 1.4768070925377026, + "learning_rate": 1.0974479862809268e-06, + "loss": 0.48326122760772705, + "step": 4299 + }, + { + "epoch": 0.9914687572054416, + "grad_norm": 1.1782147993424035, + "learning_rate": 1.097068532530698e-06, + "loss": 0.42254534363746643, + "step": 4300 + }, + { + "epoch": 0.9916993313350242, + "grad_norm": 1.3623288149981243, + "learning_rate": 1.096689064670574e-06, + "loss": 0.4076887369155884, + "step": 4301 + }, + { + "epoch": 0.9919299054646069, + "grad_norm": 1.4246737986617306, + "learning_rate": 1.0963095827557146e-06, + "loss": 0.40615612268447876, + "step": 4302 + }, + { + "epoch": 0.9921604795941895, + "grad_norm": 1.391998245639926, + "learning_rate": 1.095930086841281e-06, + "loss": 0.47794467210769653, + "step": 4303 + }, + { + "epoch": 0.9923910537237722, + "grad_norm": 1.479591301344316, + "learning_rate": 1.0955505769824375e-06, + "loss": 0.4927758574485779, + "step": 4304 + }, + { + "epoch": 0.9926216278533548, + "grad_norm": 1.1962407216416377, + "learning_rate": 1.0951710532343493e-06, + "loss": 0.40777790546417236, + "step": 4305 + }, + { + "epoch": 0.9928522019829376, + "grad_norm": 1.2781565166204398, + "learning_rate": 1.0947915156521837e-06, + "loss": 0.41996532678604126, + "step": 4306 + }, + { + "epoch": 0.9930827761125202, + "grad_norm": 1.3495931588969972, + "learning_rate": 1.0944119642911107e-06, + "loss": 0.4366680383682251, + "step": 4307 + }, + { + "epoch": 0.9933133502421029, + "grad_norm": 1.4609250216040512, + "learning_rate": 1.094032399206302e-06, + "loss": 0.5350530743598938, + "step": 4308 + }, + { + "epoch": 0.9935439243716855, + "grad_norm": 1.5545326791900604, + "learning_rate": 1.093652820452931e-06, + "loss": 0.5166209936141968, + "step": 4309 + }, + { + "epoch": 0.9937744985012682, + "grad_norm": 1.3624754056256652, + "learning_rate": 1.0932732280861734e-06, + "loss": 0.5104992389678955, + "step": 4310 + }, + { + "epoch": 0.9940050726308508, + "grad_norm": 1.293281056582964, + "learning_rate": 1.0928936221612068e-06, + "loss": 0.38249820470809937, + "step": 4311 + }, + { + "epoch": 0.9942356467604335, + "grad_norm": 1.5718744647134053, + "learning_rate": 1.0925140027332107e-06, + "loss": 0.4930746555328369, + "step": 4312 + }, + { + "epoch": 0.9944662208900161, + "grad_norm": 1.5006868919231642, + "learning_rate": 1.092134369857367e-06, + "loss": 0.46536654233932495, + "step": 4313 + }, + { + "epoch": 0.9946967950195988, + "grad_norm": 1.5384946564391833, + "learning_rate": 1.0917547235888582e-06, + "loss": 0.4591559171676636, + "step": 4314 + }, + { + "epoch": 0.9949273691491815, + "grad_norm": 1.609102883203802, + "learning_rate": 1.0913750639828709e-06, + "loss": 0.5034719705581665, + "step": 4315 + }, + { + "epoch": 0.9951579432787642, + "grad_norm": 1.3461654572756176, + "learning_rate": 1.0909953910945921e-06, + "loss": 0.5289135575294495, + "step": 4316 + }, + { + "epoch": 0.9953885174083468, + "grad_norm": 1.5181970245510374, + "learning_rate": 1.090615704979211e-06, + "loss": 0.48736900091171265, + "step": 4317 + }, + { + "epoch": 0.9956190915379295, + "grad_norm": 1.347314123709775, + "learning_rate": 1.0902360056919186e-06, + "loss": 0.44812899827957153, + "step": 4318 + }, + { + "epoch": 0.9958496656675121, + "grad_norm": 1.717313100956624, + "learning_rate": 1.0898562932879083e-06, + "loss": 0.42837953567504883, + "step": 4319 + }, + { + "epoch": 0.9960802397970948, + "grad_norm": 1.3616068420969312, + "learning_rate": 1.089476567822375e-06, + "loss": 0.4946538805961609, + "step": 4320 + }, + { + "epoch": 0.9963108139266774, + "grad_norm": 1.3738772638549184, + "learning_rate": 1.089096829350516e-06, + "loss": 0.472694993019104, + "step": 4321 + }, + { + "epoch": 0.9965413880562601, + "grad_norm": 1.51102718471871, + "learning_rate": 1.0887170779275297e-06, + "loss": 0.546560525894165, + "step": 4322 + }, + { + "epoch": 0.9967719621858427, + "grad_norm": 1.7144585803126207, + "learning_rate": 1.088337313608617e-06, + "loss": 0.5098580718040466, + "step": 4323 + }, + { + "epoch": 0.9970025363154255, + "grad_norm": 1.4511718916783138, + "learning_rate": 1.0879575364489807e-06, + "loss": 0.4127371907234192, + "step": 4324 + }, + { + "epoch": 0.9972331104450081, + "grad_norm": 1.361622993253284, + "learning_rate": 1.0875777465038249e-06, + "loss": 0.41234201192855835, + "step": 4325 + }, + { + "epoch": 0.9974636845745908, + "grad_norm": 1.334187068919988, + "learning_rate": 1.087197943828356e-06, + "loss": 0.42657697200775146, + "step": 4326 + }, + { + "epoch": 0.9976942587041734, + "grad_norm": 1.5731685077464828, + "learning_rate": 1.0868181284777825e-06, + "loss": 0.5168975591659546, + "step": 4327 + }, + { + "epoch": 0.9979248328337561, + "grad_norm": 1.3417267376651396, + "learning_rate": 1.0864383005073142e-06, + "loss": 0.4712294340133667, + "step": 4328 + }, + { + "epoch": 0.9981554069633387, + "grad_norm": 1.514146578387226, + "learning_rate": 1.0860584599721624e-06, + "loss": 0.4685649871826172, + "step": 4329 + }, + { + "epoch": 0.9983859810929214, + "grad_norm": 1.4104009699586146, + "learning_rate": 1.0856786069275417e-06, + "loss": 0.4699268937110901, + "step": 4330 + }, + { + "epoch": 0.998616555222504, + "grad_norm": 1.5072273981885642, + "learning_rate": 1.0852987414286669e-06, + "loss": 0.44216299057006836, + "step": 4331 + }, + { + "epoch": 0.9988471293520867, + "grad_norm": 1.489870947647978, + "learning_rate": 1.0849188635307558e-06, + "loss": 0.4374035894870758, + "step": 4332 + }, + { + "epoch": 0.9990777034816694, + "grad_norm": 1.396380314188184, + "learning_rate": 1.0845389732890269e-06, + "loss": 0.4538502097129822, + "step": 4333 + }, + { + "epoch": 0.9993082776112521, + "grad_norm": 1.5201233043344708, + "learning_rate": 1.0841590707587017e-06, + "loss": 0.4432523250579834, + "step": 4334 + }, + { + "epoch": 0.9995388517408347, + "grad_norm": 1.3401246835224159, + "learning_rate": 1.0837791559950026e-06, + "loss": 0.3614054322242737, + "step": 4335 + }, + { + "epoch": 0.9997694258704174, + "grad_norm": 1.5241184734301618, + "learning_rate": 1.0833992290531542e-06, + "loss": 0.5412651300430298, + "step": 4336 + }, + { + "epoch": 1.0, + "grad_norm": 1.3961487739465548, + "learning_rate": 1.0830192899883825e-06, + "loss": 0.43333327770233154, + "step": 4337 + }, + { + "epoch": 1.0002305741295827, + "grad_norm": 1.3739097269887006, + "learning_rate": 1.0826393388559156e-06, + "loss": 0.40433377027511597, + "step": 4338 + }, + { + "epoch": 1.0004611482591652, + "grad_norm": 1.5246903566917884, + "learning_rate": 1.0822593757109835e-06, + "loss": 0.49699902534484863, + "step": 4339 + }, + { + "epoch": 1.000691722388748, + "grad_norm": 1.4093275236950669, + "learning_rate": 1.0818794006088174e-06, + "loss": 0.4992629289627075, + "step": 4340 + }, + { + "epoch": 1.0009222965183306, + "grad_norm": 1.546985643456235, + "learning_rate": 1.0814994136046503e-06, + "loss": 0.39532744884490967, + "step": 4341 + }, + { + "epoch": 1.0011528706479134, + "grad_norm": 1.4715614082094945, + "learning_rate": 1.0811194147537177e-06, + "loss": 0.48260024189949036, + "step": 4342 + }, + { + "epoch": 1.0013834447774959, + "grad_norm": 1.1813818983438111, + "learning_rate": 1.0807394041112562e-06, + "loss": 0.40896737575531006, + "step": 4343 + }, + { + "epoch": 1.0016140189070786, + "grad_norm": 1.373003199387245, + "learning_rate": 1.0803593817325037e-06, + "loss": 0.361757755279541, + "step": 4344 + }, + { + "epoch": 1.0018445930366613, + "grad_norm": 1.3113582417275997, + "learning_rate": 1.0799793476727006e-06, + "loss": 0.5524640083312988, + "step": 4345 + }, + { + "epoch": 1.002075167166244, + "grad_norm": 1.4504745740569693, + "learning_rate": 1.0795993019870891e-06, + "loss": 0.4798622727394104, + "step": 4346 + }, + { + "epoch": 1.0023057412958265, + "grad_norm": 1.1125620580650875, + "learning_rate": 1.079219244730912e-06, + "loss": 0.3408532440662384, + "step": 4347 + }, + { + "epoch": 1.0025363154254092, + "grad_norm": 1.6198320758392701, + "learning_rate": 1.0788391759594152e-06, + "loss": 0.4185452461242676, + "step": 4348 + }, + { + "epoch": 1.002766889554992, + "grad_norm": 1.4569047754589481, + "learning_rate": 1.078459095727845e-06, + "loss": 0.4656596779823303, + "step": 4349 + }, + { + "epoch": 1.0029974636845747, + "grad_norm": 1.2861299587948707, + "learning_rate": 1.07807900409145e-06, + "loss": 0.45649081468582153, + "step": 4350 + }, + { + "epoch": 1.0032280378141571, + "grad_norm": 1.4368410869138808, + "learning_rate": 1.0776989011054806e-06, + "loss": 0.4732903242111206, + "step": 4351 + }, + { + "epoch": 1.0034586119437399, + "grad_norm": 1.4875640347613817, + "learning_rate": 1.0773187868251882e-06, + "loss": 0.5313757658004761, + "step": 4352 + }, + { + "epoch": 1.0036891860733226, + "grad_norm": 1.7663418153227872, + "learning_rate": 1.0769386613058267e-06, + "loss": 0.5373719334602356, + "step": 4353 + }, + { + "epoch": 1.0039197602029053, + "grad_norm": 1.4108655227977445, + "learning_rate": 1.076558524602651e-06, + "loss": 0.4530528783798218, + "step": 4354 + }, + { + "epoch": 1.0041503343324878, + "grad_norm": 2.0172927781638816, + "learning_rate": 1.076178376770918e-06, + "loss": 0.361511766910553, + "step": 4355 + }, + { + "epoch": 1.0043809084620705, + "grad_norm": 1.5430566364369291, + "learning_rate": 1.0757982178658857e-06, + "loss": 0.4260486364364624, + "step": 4356 + }, + { + "epoch": 1.0046114825916532, + "grad_norm": 1.4352564218347874, + "learning_rate": 1.0754180479428142e-06, + "loss": 0.4765712320804596, + "step": 4357 + }, + { + "epoch": 1.004842056721236, + "grad_norm": 1.408849526827852, + "learning_rate": 1.0750378670569652e-06, + "loss": 0.485443115234375, + "step": 4358 + }, + { + "epoch": 1.0050726308508184, + "grad_norm": 1.3833154190721015, + "learning_rate": 1.074657675263602e-06, + "loss": 0.5010418891906738, + "step": 4359 + }, + { + "epoch": 1.0053032049804012, + "grad_norm": 1.2138138176978153, + "learning_rate": 1.074277472617989e-06, + "loss": 0.42195719480514526, + "step": 4360 + }, + { + "epoch": 1.0055337791099839, + "grad_norm": 1.4341592826356415, + "learning_rate": 1.073897259175392e-06, + "loss": 0.48555606603622437, + "step": 4361 + }, + { + "epoch": 1.0057643532395666, + "grad_norm": 1.4030257216310642, + "learning_rate": 1.07351703499108e-06, + "loss": 0.4991112947463989, + "step": 4362 + }, + { + "epoch": 1.005994927369149, + "grad_norm": 1.365972754336138, + "learning_rate": 1.0731368001203217e-06, + "loss": 0.43016430735588074, + "step": 4363 + }, + { + "epoch": 1.0062255014987318, + "grad_norm": 1.635861674358112, + "learning_rate": 1.0727565546183883e-06, + "loss": 0.47147876024246216, + "step": 4364 + }, + { + "epoch": 1.0064560756283145, + "grad_norm": 1.4724107461573035, + "learning_rate": 1.0723762985405522e-06, + "loss": 0.4695407748222351, + "step": 4365 + }, + { + "epoch": 1.0066866497578972, + "grad_norm": 1.4167512288976294, + "learning_rate": 1.0719960319420878e-06, + "loss": 0.42666512727737427, + "step": 4366 + }, + { + "epoch": 1.0069172238874797, + "grad_norm": 1.4965231034133355, + "learning_rate": 1.0716157548782705e-06, + "loss": 0.5685237050056458, + "step": 4367 + }, + { + "epoch": 1.0071477980170624, + "grad_norm": 1.2856237164503312, + "learning_rate": 1.0712354674043774e-06, + "loss": 0.45181894302368164, + "step": 4368 + }, + { + "epoch": 1.0073783721466452, + "grad_norm": 1.479568259964695, + "learning_rate": 1.070855169575687e-06, + "loss": 0.4079795479774475, + "step": 4369 + }, + { + "epoch": 1.0076089462762279, + "grad_norm": 1.196685278300245, + "learning_rate": 1.0704748614474798e-06, + "loss": 0.4011094570159912, + "step": 4370 + }, + { + "epoch": 1.0078395204058104, + "grad_norm": 1.5280378960817975, + "learning_rate": 1.0700945430750373e-06, + "loss": 0.48842671513557434, + "step": 4371 + }, + { + "epoch": 1.008070094535393, + "grad_norm": 1.237232307792151, + "learning_rate": 1.0697142145136425e-06, + "loss": 0.5183907151222229, + "step": 4372 + }, + { + "epoch": 1.0083006686649758, + "grad_norm": 1.4080736997180416, + "learning_rate": 1.0693338758185797e-06, + "loss": 0.5022784471511841, + "step": 4373 + }, + { + "epoch": 1.0085312427945585, + "grad_norm": 1.5160750764739457, + "learning_rate": 1.0689535270451358e-06, + "loss": 0.500054121017456, + "step": 4374 + }, + { + "epoch": 1.008761816924141, + "grad_norm": 1.331407944528498, + "learning_rate": 1.068573168248598e-06, + "loss": 0.43674880266189575, + "step": 4375 + }, + { + "epoch": 1.0089923910537237, + "grad_norm": 1.3441260000045296, + "learning_rate": 1.068192799484255e-06, + "loss": 0.4272059202194214, + "step": 4376 + }, + { + "epoch": 1.0092229651833065, + "grad_norm": 1.3188087584834265, + "learning_rate": 1.0678124208073972e-06, + "loss": 0.41053932905197144, + "step": 4377 + }, + { + "epoch": 1.0094535393128892, + "grad_norm": 1.3285405544041065, + "learning_rate": 1.0674320322733173e-06, + "loss": 0.4571593701839447, + "step": 4378 + }, + { + "epoch": 1.0096841134424717, + "grad_norm": 1.2947195973212757, + "learning_rate": 1.0670516339373081e-06, + "loss": 0.464965283870697, + "step": 4379 + }, + { + "epoch": 1.0099146875720544, + "grad_norm": 1.2757697611295247, + "learning_rate": 1.0666712258546639e-06, + "loss": 0.4086726903915405, + "step": 4380 + }, + { + "epoch": 1.010145261701637, + "grad_norm": 1.3664230084580502, + "learning_rate": 1.0662908080806815e-06, + "loss": 0.49988412857055664, + "step": 4381 + }, + { + "epoch": 1.0103758358312198, + "grad_norm": 1.33263070405775, + "learning_rate": 1.0659103806706587e-06, + "loss": 0.3976360559463501, + "step": 4382 + }, + { + "epoch": 1.0106064099608023, + "grad_norm": 1.3554444243435904, + "learning_rate": 1.065529943679894e-06, + "loss": 0.4500683546066284, + "step": 4383 + }, + { + "epoch": 1.010836984090385, + "grad_norm": 1.4532099828866123, + "learning_rate": 1.0651494971636875e-06, + "loss": 0.5617754459381104, + "step": 4384 + }, + { + "epoch": 1.0110675582199677, + "grad_norm": 1.2285766706051995, + "learning_rate": 1.0647690411773414e-06, + "loss": 0.4180886745452881, + "step": 4385 + }, + { + "epoch": 1.0112981323495505, + "grad_norm": 1.3797895213155087, + "learning_rate": 1.0643885757761588e-06, + "loss": 0.406663179397583, + "step": 4386 + }, + { + "epoch": 1.011528706479133, + "grad_norm": 1.2899676326462104, + "learning_rate": 1.0640081010154443e-06, + "loss": 0.4698946475982666, + "step": 4387 + }, + { + "epoch": 1.0117592806087157, + "grad_norm": 1.2421672055806043, + "learning_rate": 1.0636276169505034e-06, + "loss": 0.4845995306968689, + "step": 4388 + }, + { + "epoch": 1.0119898547382984, + "grad_norm": 1.7127723444190444, + "learning_rate": 1.0632471236366435e-06, + "loss": 0.5065066814422607, + "step": 4389 + }, + { + "epoch": 1.012220428867881, + "grad_norm": 1.5183614166838566, + "learning_rate": 1.0628666211291735e-06, + "loss": 0.4302946925163269, + "step": 4390 + }, + { + "epoch": 1.0124510029974636, + "grad_norm": 1.682116735922279, + "learning_rate": 1.0624861094834029e-06, + "loss": 0.5772345066070557, + "step": 4391 + }, + { + "epoch": 1.0126815771270463, + "grad_norm": 1.3399536785573158, + "learning_rate": 1.0621055887546425e-06, + "loss": 0.5294336080551147, + "step": 4392 + }, + { + "epoch": 1.012912151256629, + "grad_norm": 1.1967430772955985, + "learning_rate": 1.0617250589982059e-06, + "loss": 0.5028249621391296, + "step": 4393 + }, + { + "epoch": 1.0131427253862118, + "grad_norm": 1.3120231857267954, + "learning_rate": 1.0613445202694065e-06, + "loss": 0.5072348713874817, + "step": 4394 + }, + { + "epoch": 1.0133732995157942, + "grad_norm": 1.3107230472369709, + "learning_rate": 1.060963972623559e-06, + "loss": 0.3632262945175171, + "step": 4395 + }, + { + "epoch": 1.013603873645377, + "grad_norm": 1.4739700660925632, + "learning_rate": 1.06058341611598e-06, + "loss": 0.419277161359787, + "step": 4396 + }, + { + "epoch": 1.0138344477749597, + "grad_norm": 1.4201089967708693, + "learning_rate": 1.060202850801988e-06, + "loss": 0.4056069850921631, + "step": 4397 + }, + { + "epoch": 1.0140650219045424, + "grad_norm": 1.4908298419223913, + "learning_rate": 1.0598222767369014e-06, + "loss": 0.5591505765914917, + "step": 4398 + }, + { + "epoch": 1.014295596034125, + "grad_norm": 1.2646885984398546, + "learning_rate": 1.0594416939760408e-06, + "loss": 0.38529443740844727, + "step": 4399 + }, + { + "epoch": 1.0145261701637076, + "grad_norm": 1.3255980825912217, + "learning_rate": 1.0590611025747272e-06, + "loss": 0.3609437644481659, + "step": 4400 + }, + { + "epoch": 1.0147567442932903, + "grad_norm": 1.3538282738769345, + "learning_rate": 1.058680502588284e-06, + "loss": 0.4849050045013428, + "step": 4401 + }, + { + "epoch": 1.014987318422873, + "grad_norm": 1.4516377120705455, + "learning_rate": 1.058299894072035e-06, + "loss": 0.39454251527786255, + "step": 4402 + }, + { + "epoch": 1.0152178925524555, + "grad_norm": 1.5578248119945644, + "learning_rate": 1.0579192770813052e-06, + "loss": 0.39726459980010986, + "step": 4403 + }, + { + "epoch": 1.0154484666820383, + "grad_norm": 1.4398814364290877, + "learning_rate": 1.0575386516714218e-06, + "loss": 0.4730626940727234, + "step": 4404 + }, + { + "epoch": 1.015679040811621, + "grad_norm": 1.5842749126492264, + "learning_rate": 1.0571580178977123e-06, + "loss": 0.5436214804649353, + "step": 4405 + }, + { + "epoch": 1.0159096149412037, + "grad_norm": 1.4188700773135285, + "learning_rate": 1.0567773758155055e-06, + "loss": 0.4197273850440979, + "step": 4406 + }, + { + "epoch": 1.0161401890707862, + "grad_norm": 1.2873423308659837, + "learning_rate": 1.0563967254801316e-06, + "loss": 0.46460944414138794, + "step": 4407 + }, + { + "epoch": 1.016370763200369, + "grad_norm": 1.3771325056314752, + "learning_rate": 1.056016066946922e-06, + "loss": 0.3504630923271179, + "step": 4408 + }, + { + "epoch": 1.0166013373299516, + "grad_norm": 1.3484234762530152, + "learning_rate": 1.0556354002712098e-06, + "loss": 0.4620180130004883, + "step": 4409 + }, + { + "epoch": 1.0168319114595343, + "grad_norm": 1.414975730602458, + "learning_rate": 1.0552547255083283e-06, + "loss": 0.5642764568328857, + "step": 4410 + }, + { + "epoch": 1.0170624855891168, + "grad_norm": 1.3858649703726607, + "learning_rate": 1.054874042713612e-06, + "loss": 0.48283201456069946, + "step": 4411 + }, + { + "epoch": 1.0172930597186995, + "grad_norm": 1.3477248933257546, + "learning_rate": 1.0544933519423976e-06, + "loss": 0.5346091985702515, + "step": 4412 + }, + { + "epoch": 1.0175236338482823, + "grad_norm": 1.216774984460132, + "learning_rate": 1.0541126532500224e-06, + "loss": 0.4710259437561035, + "step": 4413 + }, + { + "epoch": 1.017754207977865, + "grad_norm": 1.6611025915045114, + "learning_rate": 1.0537319466918243e-06, + "loss": 0.535955548286438, + "step": 4414 + }, + { + "epoch": 1.0179847821074475, + "grad_norm": 1.298601209078171, + "learning_rate": 1.0533512323231438e-06, + "loss": 0.4127902388572693, + "step": 4415 + }, + { + "epoch": 1.0182153562370302, + "grad_norm": 1.6222892430544704, + "learning_rate": 1.0529705101993203e-06, + "loss": 0.5209894180297852, + "step": 4416 + }, + { + "epoch": 1.018445930366613, + "grad_norm": 1.5702821211846574, + "learning_rate": 1.0525897803756967e-06, + "loss": 0.45600390434265137, + "step": 4417 + }, + { + "epoch": 1.0186765044961956, + "grad_norm": 1.6858904509627837, + "learning_rate": 1.0522090429076155e-06, + "loss": 0.5043426156044006, + "step": 4418 + }, + { + "epoch": 1.0189070786257781, + "grad_norm": 1.8442717417612486, + "learning_rate": 1.0518282978504207e-06, + "loss": 0.43386173248291016, + "step": 4419 + }, + { + "epoch": 1.0191376527553608, + "grad_norm": 1.4810433748538916, + "learning_rate": 1.0514475452594578e-06, + "loss": 0.44956767559051514, + "step": 4420 + }, + { + "epoch": 1.0193682268849436, + "grad_norm": 1.4162663845873593, + "learning_rate": 1.0510667851900726e-06, + "loss": 0.47164878249168396, + "step": 4421 + }, + { + "epoch": 1.0195988010145263, + "grad_norm": 1.3111398742961289, + "learning_rate": 1.0506860176976127e-06, + "loss": 0.4977136552333832, + "step": 4422 + }, + { + "epoch": 1.0198293751441088, + "grad_norm": 1.2272027402421368, + "learning_rate": 1.0503052428374264e-06, + "loss": 0.4344305396080017, + "step": 4423 + }, + { + "epoch": 1.0200599492736915, + "grad_norm": 1.4594484344103595, + "learning_rate": 1.049924460664863e-06, + "loss": 0.46536487340927124, + "step": 4424 + }, + { + "epoch": 1.0202905234032742, + "grad_norm": 1.5676489928965973, + "learning_rate": 1.0495436712352733e-06, + "loss": 0.4583844840526581, + "step": 4425 + }, + { + "epoch": 1.020521097532857, + "grad_norm": 1.3353943490467204, + "learning_rate": 1.049162874604009e-06, + "loss": 0.4098002314567566, + "step": 4426 + }, + { + "epoch": 1.0207516716624394, + "grad_norm": 1.5212892459953231, + "learning_rate": 1.0487820708264227e-06, + "loss": 0.48168665170669556, + "step": 4427 + }, + { + "epoch": 1.0209822457920221, + "grad_norm": 1.575752706874104, + "learning_rate": 1.048401259957868e-06, + "loss": 0.5517562627792358, + "step": 4428 + }, + { + "epoch": 1.0212128199216048, + "grad_norm": 1.4762864972879257, + "learning_rate": 1.0480204420536998e-06, + "loss": 0.5131476521492004, + "step": 4429 + }, + { + "epoch": 1.0214433940511876, + "grad_norm": 1.3669237261259728, + "learning_rate": 1.0476396171692734e-06, + "loss": 0.4590519666671753, + "step": 4430 + }, + { + "epoch": 1.02167396818077, + "grad_norm": 1.6209541549743127, + "learning_rate": 1.0472587853599458e-06, + "loss": 0.5581461191177368, + "step": 4431 + }, + { + "epoch": 1.0219045423103528, + "grad_norm": 1.9464318549736228, + "learning_rate": 1.046877946681075e-06, + "loss": 0.4169657826423645, + "step": 4432 + }, + { + "epoch": 1.0221351164399355, + "grad_norm": 1.6990409231148407, + "learning_rate": 1.0464971011880195e-06, + "loss": 0.48135459423065186, + "step": 4433 + }, + { + "epoch": 1.0223656905695182, + "grad_norm": 1.5888684830629844, + "learning_rate": 1.046116248936139e-06, + "loss": 0.5116040706634521, + "step": 4434 + }, + { + "epoch": 1.0225962646991007, + "grad_norm": 1.2239425777755701, + "learning_rate": 1.0457353899807946e-06, + "loss": 0.4369809329509735, + "step": 4435 + }, + { + "epoch": 1.0228268388286834, + "grad_norm": 1.3094581394180187, + "learning_rate": 1.0453545243773474e-06, + "loss": 0.42936772108078003, + "step": 4436 + }, + { + "epoch": 1.0230574129582661, + "grad_norm": 1.4191745941139933, + "learning_rate": 1.0449736521811605e-06, + "loss": 0.3614712357521057, + "step": 4437 + }, + { + "epoch": 1.0232879870878488, + "grad_norm": 1.4958077731615864, + "learning_rate": 1.0445927734475977e-06, + "loss": 0.40728119015693665, + "step": 4438 + }, + { + "epoch": 1.0235185612174313, + "grad_norm": 1.6199665099354292, + "learning_rate": 1.0442118882320233e-06, + "loss": 0.4940561056137085, + "step": 4439 + }, + { + "epoch": 1.023749135347014, + "grad_norm": 1.5292135898443935, + "learning_rate": 1.0438309965898027e-06, + "loss": 0.49529674649238586, + "step": 4440 + }, + { + "epoch": 1.0239797094765968, + "grad_norm": 1.3839632419664316, + "learning_rate": 1.0434500985763027e-06, + "loss": 0.4849408268928528, + "step": 4441 + }, + { + "epoch": 1.0242102836061795, + "grad_norm": 1.2306090654878221, + "learning_rate": 1.0430691942468903e-06, + "loss": 0.4121132791042328, + "step": 4442 + }, + { + "epoch": 1.024440857735762, + "grad_norm": 1.3788405992777184, + "learning_rate": 1.042688283656934e-06, + "loss": 0.4348478317260742, + "step": 4443 + }, + { + "epoch": 1.0246714318653447, + "grad_norm": 1.4946594419770094, + "learning_rate": 1.0423073668618033e-06, + "loss": 0.46817919611930847, + "step": 4444 + }, + { + "epoch": 1.0249020059949274, + "grad_norm": 1.4309128927667782, + "learning_rate": 1.041926443916868e-06, + "loss": 0.4422008991241455, + "step": 4445 + }, + { + "epoch": 1.02513258012451, + "grad_norm": 1.4766353003575698, + "learning_rate": 1.041545514877499e-06, + "loss": 0.5108183026313782, + "step": 4446 + }, + { + "epoch": 1.0253631542540926, + "grad_norm": 1.4287581583003561, + "learning_rate": 1.0411645797990685e-06, + "loss": 0.4759529232978821, + "step": 4447 + }, + { + "epoch": 1.0255937283836754, + "grad_norm": 1.4822019265627726, + "learning_rate": 1.040783638736949e-06, + "loss": 0.44447648525238037, + "step": 4448 + }, + { + "epoch": 1.025824302513258, + "grad_norm": 1.9820121270715096, + "learning_rate": 1.0404026917465144e-06, + "loss": 0.4558752477169037, + "step": 4449 + }, + { + "epoch": 1.0260548766428408, + "grad_norm": 1.5117188074263472, + "learning_rate": 1.0400217388831393e-06, + "loss": 0.4728459417819977, + "step": 4450 + }, + { + "epoch": 1.0262854507724233, + "grad_norm": 1.2832295949174854, + "learning_rate": 1.0396407802021985e-06, + "loss": 0.4815519452095032, + "step": 4451 + }, + { + "epoch": 1.026516024902006, + "grad_norm": 1.493224641636315, + "learning_rate": 1.0392598157590685e-06, + "loss": 0.5173656344413757, + "step": 4452 + }, + { + "epoch": 1.0267465990315887, + "grad_norm": 1.389267472286255, + "learning_rate": 1.0388788456091267e-06, + "loss": 0.5280762910842896, + "step": 4453 + }, + { + "epoch": 1.0269771731611712, + "grad_norm": 1.3239342530675255, + "learning_rate": 1.0384978698077506e-06, + "loss": 0.4524118900299072, + "step": 4454 + }, + { + "epoch": 1.027207747290754, + "grad_norm": 1.3855017021962426, + "learning_rate": 1.0381168884103186e-06, + "loss": 0.4011715054512024, + "step": 4455 + }, + { + "epoch": 1.0274383214203366, + "grad_norm": 1.6664926632341406, + "learning_rate": 1.0377359014722108e-06, + "loss": 0.518020749092102, + "step": 4456 + }, + { + "epoch": 1.0276688955499194, + "grad_norm": 1.3443799803410221, + "learning_rate": 1.0373549090488073e-06, + "loss": 0.44726112484931946, + "step": 4457 + }, + { + "epoch": 1.0278994696795019, + "grad_norm": 1.5697915792497608, + "learning_rate": 1.0369739111954894e-06, + "loss": 0.5344264507293701, + "step": 4458 + }, + { + "epoch": 1.0281300438090846, + "grad_norm": 1.3300732692572412, + "learning_rate": 1.0365929079676387e-06, + "loss": 0.4902813732624054, + "step": 4459 + }, + { + "epoch": 1.0283606179386673, + "grad_norm": 1.6676294678142136, + "learning_rate": 1.0362118994206378e-06, + "loss": 0.38346555829048157, + "step": 4460 + }, + { + "epoch": 1.02859119206825, + "grad_norm": 1.4992112279059755, + "learning_rate": 1.0358308856098705e-06, + "loss": 0.4232872724533081, + "step": 4461 + }, + { + "epoch": 1.0288217661978325, + "grad_norm": 1.4973168899301483, + "learning_rate": 1.0354498665907207e-06, + "loss": 0.5184470415115356, + "step": 4462 + }, + { + "epoch": 1.0290523403274152, + "grad_norm": 1.3344202325848402, + "learning_rate": 1.0350688424185733e-06, + "loss": 0.4989054203033447, + "step": 4463 + }, + { + "epoch": 1.029282914456998, + "grad_norm": 1.4348006325476266, + "learning_rate": 1.0346878131488145e-06, + "loss": 0.5204064249992371, + "step": 4464 + }, + { + "epoch": 1.0295134885865806, + "grad_norm": 1.5066284997527284, + "learning_rate": 1.0343067788368307e-06, + "loss": 0.47872811555862427, + "step": 4465 + }, + { + "epoch": 1.0297440627161631, + "grad_norm": 1.4195028916227292, + "learning_rate": 1.0339257395380087e-06, + "loss": 0.4104915261268616, + "step": 4466 + }, + { + "epoch": 1.0299746368457459, + "grad_norm": 1.3696214178005537, + "learning_rate": 1.0335446953077366e-06, + "loss": 0.39327263832092285, + "step": 4467 + }, + { + "epoch": 1.0302052109753286, + "grad_norm": 1.4702497550106948, + "learning_rate": 1.033163646201403e-06, + "loss": 0.4395657777786255, + "step": 4468 + }, + { + "epoch": 1.0304357851049113, + "grad_norm": 1.419425725268843, + "learning_rate": 1.0327825922743976e-06, + "loss": 0.462537944316864, + "step": 4469 + }, + { + "epoch": 1.0306663592344938, + "grad_norm": 1.3686105119540095, + "learning_rate": 1.03240153358211e-06, + "loss": 0.4399976134300232, + "step": 4470 + }, + { + "epoch": 1.0308969333640765, + "grad_norm": 1.2004518913155955, + "learning_rate": 1.0320204701799311e-06, + "loss": 0.4289684593677521, + "step": 4471 + }, + { + "epoch": 1.0311275074936592, + "grad_norm": 1.700414177665105, + "learning_rate": 1.0316394021232524e-06, + "loss": 0.4771305322647095, + "step": 4472 + }, + { + "epoch": 1.031358081623242, + "grad_norm": 1.3381367861828992, + "learning_rate": 1.031258329467466e-06, + "loss": 0.4544849395751953, + "step": 4473 + }, + { + "epoch": 1.0315886557528244, + "grad_norm": 1.7319531178301495, + "learning_rate": 1.0308772522679646e-06, + "loss": 0.5362099409103394, + "step": 4474 + }, + { + "epoch": 1.0318192298824072, + "grad_norm": 1.564907240947497, + "learning_rate": 1.0304961705801413e-06, + "loss": 0.48966753482818604, + "step": 4475 + }, + { + "epoch": 1.0320498040119899, + "grad_norm": 1.379783010020372, + "learning_rate": 1.0301150844593908e-06, + "loss": 0.3750344216823578, + "step": 4476 + }, + { + "epoch": 1.0322803781415726, + "grad_norm": 1.3651499470494945, + "learning_rate": 1.0297339939611076e-06, + "loss": 0.453983873128891, + "step": 4477 + }, + { + "epoch": 1.032510952271155, + "grad_norm": 1.837467998410361, + "learning_rate": 1.029352899140687e-06, + "loss": 0.5096027255058289, + "step": 4478 + }, + { + "epoch": 1.0327415264007378, + "grad_norm": 1.395622916901131, + "learning_rate": 1.028971800053525e-06, + "loss": 0.4387558698654175, + "step": 4479 + }, + { + "epoch": 1.0329721005303205, + "grad_norm": 1.324708629656248, + "learning_rate": 1.0285906967550184e-06, + "loss": 0.45710843801498413, + "step": 4480 + }, + { + "epoch": 1.0332026746599032, + "grad_norm": 1.631576144246761, + "learning_rate": 1.0282095893005643e-06, + "loss": 0.5258994102478027, + "step": 4481 + }, + { + "epoch": 1.0334332487894857, + "grad_norm": 1.320456527047697, + "learning_rate": 1.0278284777455603e-06, + "loss": 0.5037236213684082, + "step": 4482 + }, + { + "epoch": 1.0336638229190684, + "grad_norm": 1.3671446032683054, + "learning_rate": 1.027447362145405e-06, + "loss": 0.4730300307273865, + "step": 4483 + }, + { + "epoch": 1.0338943970486512, + "grad_norm": 1.5284074958618745, + "learning_rate": 1.0270662425554974e-06, + "loss": 0.4373326301574707, + "step": 4484 + }, + { + "epoch": 1.0341249711782339, + "grad_norm": 1.379045843622324, + "learning_rate": 1.0266851190312373e-06, + "loss": 0.3915579319000244, + "step": 4485 + }, + { + "epoch": 1.0343555453078164, + "grad_norm": 1.3482794503547837, + "learning_rate": 1.0263039916280247e-06, + "loss": 0.36588191986083984, + "step": 4486 + }, + { + "epoch": 1.034586119437399, + "grad_norm": 1.2333606023937755, + "learning_rate": 1.0259228604012602e-06, + "loss": 0.4287286400794983, + "step": 4487 + }, + { + "epoch": 1.0348166935669818, + "grad_norm": 1.3775270616642934, + "learning_rate": 1.0255417254063454e-06, + "loss": 0.4405861496925354, + "step": 4488 + }, + { + "epoch": 1.0350472676965645, + "grad_norm": 1.443831892269548, + "learning_rate": 1.0251605866986818e-06, + "loss": 0.4859738349914551, + "step": 4489 + }, + { + "epoch": 1.035277841826147, + "grad_norm": 1.4103288990509777, + "learning_rate": 1.0247794443336722e-06, + "loss": 0.40879446268081665, + "step": 4490 + }, + { + "epoch": 1.0355084159557297, + "grad_norm": 1.4900612923986292, + "learning_rate": 1.024398298366719e-06, + "loss": 0.44872337579727173, + "step": 4491 + }, + { + "epoch": 1.0357389900853124, + "grad_norm": 1.3707597883324278, + "learning_rate": 1.0240171488532258e-06, + "loss": 0.41155117750167847, + "step": 4492 + }, + { + "epoch": 1.0359695642148952, + "grad_norm": 1.4935319402234073, + "learning_rate": 1.0236359958485966e-06, + "loss": 0.48941487073898315, + "step": 4493 + }, + { + "epoch": 1.0362001383444777, + "grad_norm": 1.3889526979110256, + "learning_rate": 1.0232548394082362e-06, + "loss": 0.4462544322013855, + "step": 4494 + }, + { + "epoch": 1.0364307124740604, + "grad_norm": 1.7635931454030804, + "learning_rate": 1.0228736795875487e-06, + "loss": 0.3791837692260742, + "step": 4495 + }, + { + "epoch": 1.036661286603643, + "grad_norm": 1.7988283203699307, + "learning_rate": 1.0224925164419404e-06, + "loss": 0.5037285685539246, + "step": 4496 + }, + { + "epoch": 1.0368918607332258, + "grad_norm": 1.5033654685782605, + "learning_rate": 1.0221113500268169e-06, + "loss": 0.4762890636920929, + "step": 4497 + }, + { + "epoch": 1.0371224348628083, + "grad_norm": 1.2678994584792878, + "learning_rate": 1.0217301803975844e-06, + "loss": 0.4673793315887451, + "step": 4498 + }, + { + "epoch": 1.037353008992391, + "grad_norm": 1.4491139066226089, + "learning_rate": 1.0213490076096501e-06, + "loss": 0.37522250413894653, + "step": 4499 + }, + { + "epoch": 1.0375835831219737, + "grad_norm": 1.4197729369573655, + "learning_rate": 1.020967831718421e-06, + "loss": 0.4986375570297241, + "step": 4500 + }, + { + "epoch": 1.0378141572515565, + "grad_norm": 1.3424622189818292, + "learning_rate": 1.0205866527793053e-06, + "loss": 0.488337904214859, + "step": 4501 + }, + { + "epoch": 1.038044731381139, + "grad_norm": 1.2513264252251595, + "learning_rate": 1.0202054708477107e-06, + "loss": 0.37420767545700073, + "step": 4502 + }, + { + "epoch": 1.0382753055107217, + "grad_norm": 1.1901249454864467, + "learning_rate": 1.0198242859790465e-06, + "loss": 0.42453843355178833, + "step": 4503 + }, + { + "epoch": 1.0385058796403044, + "grad_norm": 1.5998980096348292, + "learning_rate": 1.0194430982287211e-06, + "loss": 0.4431978166103363, + "step": 4504 + }, + { + "epoch": 1.038736453769887, + "grad_norm": 1.2584649975167521, + "learning_rate": 1.0190619076521445e-06, + "loss": 0.5079195499420166, + "step": 4505 + }, + { + "epoch": 1.0389670278994696, + "grad_norm": 1.3630757915855334, + "learning_rate": 1.0186807143047263e-06, + "loss": 0.442915678024292, + "step": 4506 + }, + { + "epoch": 1.0391976020290523, + "grad_norm": 1.4946032354137926, + "learning_rate": 1.018299518241877e-06, + "loss": 0.4720972180366516, + "step": 4507 + }, + { + "epoch": 1.039428176158635, + "grad_norm": 1.407838633939113, + "learning_rate": 1.0179183195190073e-06, + "loss": 0.4637352526187897, + "step": 4508 + }, + { + "epoch": 1.0396587502882177, + "grad_norm": 1.3457342565284411, + "learning_rate": 1.0175371181915283e-06, + "loss": 0.4207759499549866, + "step": 4509 + }, + { + "epoch": 1.0398893244178002, + "grad_norm": 1.5872196626053143, + "learning_rate": 1.0171559143148514e-06, + "loss": 0.49227845668792725, + "step": 4510 + }, + { + "epoch": 1.040119898547383, + "grad_norm": 1.4565076836431372, + "learning_rate": 1.0167747079443884e-06, + "loss": 0.5006893873214722, + "step": 4511 + }, + { + "epoch": 1.0403504726769657, + "grad_norm": 1.4618469895611303, + "learning_rate": 1.016393499135552e-06, + "loss": 0.42048192024230957, + "step": 4512 + }, + { + "epoch": 1.0405810468065484, + "grad_norm": 1.5634742093932859, + "learning_rate": 1.0160122879437538e-06, + "loss": 0.5275895595550537, + "step": 4513 + }, + { + "epoch": 1.0408116209361309, + "grad_norm": 1.1544305266604897, + "learning_rate": 1.0156310744244073e-06, + "loss": 0.4677985906600952, + "step": 4514 + }, + { + "epoch": 1.0410421950657136, + "grad_norm": 1.422644417212902, + "learning_rate": 1.015249858632926e-06, + "loss": 0.5214150547981262, + "step": 4515 + }, + { + "epoch": 1.0412727691952963, + "grad_norm": 1.2418435857264525, + "learning_rate": 1.0148686406247232e-06, + "loss": 0.40790024399757385, + "step": 4516 + }, + { + "epoch": 1.041503343324879, + "grad_norm": 1.6199751141856524, + "learning_rate": 1.0144874204552125e-06, + "loss": 0.5943785309791565, + "step": 4517 + }, + { + "epoch": 1.0417339174544615, + "grad_norm": 1.531988684910503, + "learning_rate": 1.0141061981798086e-06, + "loss": 0.4590263366699219, + "step": 4518 + }, + { + "epoch": 1.0419644915840443, + "grad_norm": 1.3212940799821826, + "learning_rate": 1.0137249738539257e-06, + "loss": 0.4106098413467407, + "step": 4519 + }, + { + "epoch": 1.042195065713627, + "grad_norm": 1.4102973636174063, + "learning_rate": 1.013343747532979e-06, + "loss": 0.4730203151702881, + "step": 4520 + }, + { + "epoch": 1.0424256398432097, + "grad_norm": 1.2769276209650842, + "learning_rate": 1.0129625192723833e-06, + "loss": 0.43245944380760193, + "step": 4521 + }, + { + "epoch": 1.0426562139727922, + "grad_norm": 1.3088740452256564, + "learning_rate": 1.012581289127554e-06, + "loss": 0.40828272700309753, + "step": 4522 + }, + { + "epoch": 1.042886788102375, + "grad_norm": 1.5940499075267438, + "learning_rate": 1.0122000571539069e-06, + "loss": 0.4232874810695648, + "step": 4523 + }, + { + "epoch": 1.0431173622319576, + "grad_norm": 1.45477003479617, + "learning_rate": 1.0118188234068579e-06, + "loss": 0.43044984340667725, + "step": 4524 + }, + { + "epoch": 1.0433479363615403, + "grad_norm": 1.6545172631907663, + "learning_rate": 1.011437587941823e-06, + "loss": 0.4502897262573242, + "step": 4525 + }, + { + "epoch": 1.0435785104911228, + "grad_norm": 2.0995258586192467, + "learning_rate": 1.0110563508142185e-06, + "loss": 0.5505340099334717, + "step": 4526 + }, + { + "epoch": 1.0438090846207055, + "grad_norm": 1.5629586322344833, + "learning_rate": 1.0106751120794617e-06, + "loss": 0.4026086628437042, + "step": 4527 + }, + { + "epoch": 1.0440396587502883, + "grad_norm": 1.5105039899180257, + "learning_rate": 1.0102938717929692e-06, + "loss": 0.3910222053527832, + "step": 4528 + }, + { + "epoch": 1.044270232879871, + "grad_norm": 1.6830902678008934, + "learning_rate": 1.009912630010158e-06, + "loss": 0.4134068191051483, + "step": 4529 + }, + { + "epoch": 1.0445008070094535, + "grad_norm": 1.4825250898714368, + "learning_rate": 1.0095313867864457e-06, + "loss": 0.4801563024520874, + "step": 4530 + }, + { + "epoch": 1.0447313811390362, + "grad_norm": 1.2424640239796358, + "learning_rate": 1.0091501421772495e-06, + "loss": 0.4269358515739441, + "step": 4531 + }, + { + "epoch": 1.044961955268619, + "grad_norm": 1.3485994976026512, + "learning_rate": 1.0087688962379877e-06, + "loss": 0.5300281047821045, + "step": 4532 + }, + { + "epoch": 1.0451925293982016, + "grad_norm": 1.6865287595757648, + "learning_rate": 1.0083876490240777e-06, + "loss": 0.4634189009666443, + "step": 4533 + }, + { + "epoch": 1.0454231035277841, + "grad_norm": 1.5187760856795984, + "learning_rate": 1.0080064005909379e-06, + "loss": 0.37037551403045654, + "step": 4534 + }, + { + "epoch": 1.0456536776573668, + "grad_norm": 1.2977267015714409, + "learning_rate": 1.0076251509939867e-06, + "loss": 0.4740016460418701, + "step": 4535 + }, + { + "epoch": 1.0458842517869495, + "grad_norm": 1.4686161726335998, + "learning_rate": 1.0072439002886426e-06, + "loss": 0.4824775159358978, + "step": 4536 + }, + { + "epoch": 1.0461148259165323, + "grad_norm": 1.4032368341998698, + "learning_rate": 1.0068626485303242e-06, + "loss": 0.4891430735588074, + "step": 4537 + }, + { + "epoch": 1.0463454000461148, + "grad_norm": 1.440410031419601, + "learning_rate": 1.00648139577445e-06, + "loss": 0.48089975118637085, + "step": 4538 + }, + { + "epoch": 1.0465759741756975, + "grad_norm": 1.3280505427696812, + "learning_rate": 1.0061001420764395e-06, + "loss": 0.4353799521923065, + "step": 4539 + }, + { + "epoch": 1.0468065483052802, + "grad_norm": 1.5425308952951848, + "learning_rate": 1.0057188874917117e-06, + "loss": 0.4259982705116272, + "step": 4540 + }, + { + "epoch": 1.047037122434863, + "grad_norm": 1.502788920344227, + "learning_rate": 1.0053376320756852e-06, + "loss": 0.4400532841682434, + "step": 4541 + }, + { + "epoch": 1.0472676965644454, + "grad_norm": 1.398609267878258, + "learning_rate": 1.00495637588378e-06, + "loss": 0.48598533868789673, + "step": 4542 + }, + { + "epoch": 1.0474982706940281, + "grad_norm": 1.7261761893493324, + "learning_rate": 1.0045751189714153e-06, + "loss": 0.6310586929321289, + "step": 4543 + }, + { + "epoch": 1.0477288448236108, + "grad_norm": 1.4822203646620422, + "learning_rate": 1.0041938613940108e-06, + "loss": 0.49084293842315674, + "step": 4544 + }, + { + "epoch": 1.0479594189531936, + "grad_norm": 1.6167393331453148, + "learning_rate": 1.003812603206986e-06, + "loss": 0.5144428014755249, + "step": 4545 + }, + { + "epoch": 1.048189993082776, + "grad_norm": 1.4962485615696877, + "learning_rate": 1.0034313444657605e-06, + "loss": 0.4480917155742645, + "step": 4546 + }, + { + "epoch": 1.0484205672123588, + "grad_norm": 1.4833727438286728, + "learning_rate": 1.0030500852257545e-06, + "loss": 0.4505491852760315, + "step": 4547 + }, + { + "epoch": 1.0486511413419415, + "grad_norm": 1.3728340651335322, + "learning_rate": 1.0026688255423876e-06, + "loss": 0.3344930410385132, + "step": 4548 + }, + { + "epoch": 1.0488817154715242, + "grad_norm": 1.3493238342876126, + "learning_rate": 1.0022875654710801e-06, + "loss": 0.4006739854812622, + "step": 4549 + }, + { + "epoch": 1.0491122896011067, + "grad_norm": 1.4777604777161095, + "learning_rate": 1.0019063050672517e-06, + "loss": 0.4815717935562134, + "step": 4550 + }, + { + "epoch": 1.0493428637306894, + "grad_norm": 1.4182246513528267, + "learning_rate": 1.0015250443863223e-06, + "loss": 0.4660469889640808, + "step": 4551 + }, + { + "epoch": 1.0495734378602721, + "grad_norm": 1.4298035442899577, + "learning_rate": 1.0011437834837125e-06, + "loss": 0.5233521461486816, + "step": 4552 + }, + { + "epoch": 1.0498040119898548, + "grad_norm": 1.7530768174577198, + "learning_rate": 1.0007625224148418e-06, + "loss": 0.6037864685058594, + "step": 4553 + }, + { + "epoch": 1.0500345861194373, + "grad_norm": 1.726860458569315, + "learning_rate": 1.000381261235131e-06, + "loss": 0.469952255487442, + "step": 4554 + }, + { + "epoch": 1.05026516024902, + "grad_norm": 1.302712404041117, + "learning_rate": 1e-06, + "loss": 0.4577752649784088, + "step": 4555 + }, + { + "epoch": 1.0504957343786028, + "grad_norm": 1.537724574807554, + "learning_rate": 9.996187387648692e-07, + "loss": 0.46796074509620667, + "step": 4556 + }, + { + "epoch": 1.0507263085081853, + "grad_norm": 1.3633141581703183, + "learning_rate": 9.992374775851583e-07, + "loss": 0.40709036588668823, + "step": 4557 + }, + { + "epoch": 1.050956882637768, + "grad_norm": 1.2121351653860253, + "learning_rate": 9.988562165162878e-07, + "loss": 0.3997795879840851, + "step": 4558 + }, + { + "epoch": 1.0511874567673507, + "grad_norm": 1.6938685288563167, + "learning_rate": 9.984749556136779e-07, + "loss": 0.4677845239639282, + "step": 4559 + }, + { + "epoch": 1.0514180308969334, + "grad_norm": 1.315537055431831, + "learning_rate": 9.980936949327487e-07, + "loss": 0.40411800146102905, + "step": 4560 + }, + { + "epoch": 1.0516486050265161, + "grad_norm": 1.3999939149032237, + "learning_rate": 9.9771243452892e-07, + "loss": 0.50546795129776, + "step": 4561 + }, + { + "epoch": 1.0518791791560986, + "grad_norm": 1.5468163611837324, + "learning_rate": 9.973311744576125e-07, + "loss": 0.4116637110710144, + "step": 4562 + }, + { + "epoch": 1.0521097532856813, + "grad_norm": 1.2997915019544943, + "learning_rate": 9.969499147742454e-07, + "loss": 0.4271109700202942, + "step": 4563 + }, + { + "epoch": 1.052340327415264, + "grad_norm": 1.1760164248835672, + "learning_rate": 9.965686555342396e-07, + "loss": 0.37195074558258057, + "step": 4564 + }, + { + "epoch": 1.0525709015448466, + "grad_norm": 1.6759945376385115, + "learning_rate": 9.96187396793014e-07, + "loss": 0.4020707607269287, + "step": 4565 + }, + { + "epoch": 1.0528014756744293, + "grad_norm": 1.5880882887273124, + "learning_rate": 9.95806138605989e-07, + "loss": 0.4980151951313019, + "step": 4566 + }, + { + "epoch": 1.053032049804012, + "grad_norm": 1.419377079967674, + "learning_rate": 9.95424881028585e-07, + "loss": 0.39553767442703247, + "step": 4567 + }, + { + "epoch": 1.0532626239335947, + "grad_norm": 1.3361167736969362, + "learning_rate": 9.9504362411622e-07, + "loss": 0.47618645429611206, + "step": 4568 + }, + { + "epoch": 1.0534931980631772, + "grad_norm": 1.6469408967264108, + "learning_rate": 9.94662367924315e-07, + "loss": 0.4613817036151886, + "step": 4569 + }, + { + "epoch": 1.05372377219276, + "grad_norm": 1.4563205269464143, + "learning_rate": 9.942811125082884e-07, + "loss": 0.35888034105300903, + "step": 4570 + }, + { + "epoch": 1.0539543463223426, + "grad_norm": 1.896669698951033, + "learning_rate": 9.938998579235606e-07, + "loss": 0.45810097455978394, + "step": 4571 + }, + { + "epoch": 1.0541849204519254, + "grad_norm": 1.4115626759758866, + "learning_rate": 9.935186042255499e-07, + "loss": 0.5351384878158569, + "step": 4572 + }, + { + "epoch": 1.0544154945815079, + "grad_norm": 1.4888165757644622, + "learning_rate": 9.931373514696759e-07, + "loss": 0.5261274576187134, + "step": 4573 + }, + { + "epoch": 1.0546460687110906, + "grad_norm": 1.368295507669899, + "learning_rate": 9.927560997113573e-07, + "loss": 0.483295202255249, + "step": 4574 + }, + { + "epoch": 1.0548766428406733, + "grad_norm": 1.5639325535974613, + "learning_rate": 9.923748490060132e-07, + "loss": 0.5371580719947815, + "step": 4575 + }, + { + "epoch": 1.055107216970256, + "grad_norm": 1.8721225876517977, + "learning_rate": 9.919935994090622e-07, + "loss": 0.4863673746585846, + "step": 4576 + }, + { + "epoch": 1.0553377910998385, + "grad_norm": 1.5391981555318386, + "learning_rate": 9.916123509759224e-07, + "loss": 0.47929099202156067, + "step": 4577 + }, + { + "epoch": 1.0555683652294212, + "grad_norm": 1.3884034720788059, + "learning_rate": 9.912311037620126e-07, + "loss": 0.4687851667404175, + "step": 4578 + }, + { + "epoch": 1.055798939359004, + "grad_norm": 1.5841867302150618, + "learning_rate": 9.908498578227504e-07, + "loss": 0.5308720469474792, + "step": 4579 + }, + { + "epoch": 1.0560295134885866, + "grad_norm": 1.8691314272616926, + "learning_rate": 9.904686132135546e-07, + "loss": 0.45900580286979675, + "step": 4580 + }, + { + "epoch": 1.0562600876181691, + "grad_norm": 1.4586686619480431, + "learning_rate": 9.900873699898422e-07, + "loss": 0.49392157793045044, + "step": 4581 + }, + { + "epoch": 1.0564906617477519, + "grad_norm": 1.6139111586944341, + "learning_rate": 9.89706128207031e-07, + "loss": 0.47190070152282715, + "step": 4582 + }, + { + "epoch": 1.0567212358773346, + "grad_norm": 1.7781894650458763, + "learning_rate": 9.893248879205382e-07, + "loss": 0.4431575834751129, + "step": 4583 + }, + { + "epoch": 1.0569518100069173, + "grad_norm": 1.293421470994464, + "learning_rate": 9.889436491857814e-07, + "loss": 0.49873441457748413, + "step": 4584 + }, + { + "epoch": 1.0571823841364998, + "grad_norm": 1.4263954197349762, + "learning_rate": 9.885624120581772e-07, + "loss": 0.41190844774246216, + "step": 4585 + }, + { + "epoch": 1.0574129582660825, + "grad_norm": 1.5698735406284627, + "learning_rate": 9.881811765931423e-07, + "loss": 0.5164123773574829, + "step": 4586 + }, + { + "epoch": 1.0576435323956652, + "grad_norm": 1.5034141006108586, + "learning_rate": 9.877999428460933e-07, + "loss": 0.4141567349433899, + "step": 4587 + }, + { + "epoch": 1.057874106525248, + "grad_norm": 1.557658840701198, + "learning_rate": 9.87418710872446e-07, + "loss": 0.457628458738327, + "step": 4588 + }, + { + "epoch": 1.0581046806548304, + "grad_norm": 1.4732865673601758, + "learning_rate": 9.870374807276168e-07, + "loss": 0.41788995265960693, + "step": 4589 + }, + { + "epoch": 1.0583352547844131, + "grad_norm": 1.6240063497851516, + "learning_rate": 9.866562524670209e-07, + "loss": 0.5124667882919312, + "step": 4590 + }, + { + "epoch": 1.0585658289139959, + "grad_norm": 1.1619873853554898, + "learning_rate": 9.862750261460742e-07, + "loss": 0.4192196726799011, + "step": 4591 + }, + { + "epoch": 1.0587964030435786, + "grad_norm": 1.3804521479784477, + "learning_rate": 9.858938018201913e-07, + "loss": 0.4345153868198395, + "step": 4592 + }, + { + "epoch": 1.059026977173161, + "grad_norm": 1.3186049119261667, + "learning_rate": 9.855125795447874e-07, + "loss": 0.391804963350296, + "step": 4593 + }, + { + "epoch": 1.0592575513027438, + "grad_norm": 1.3394610780120433, + "learning_rate": 9.851313593752767e-07, + "loss": 0.3904710114002228, + "step": 4594 + }, + { + "epoch": 1.0594881254323265, + "grad_norm": 1.4234043935357816, + "learning_rate": 9.847501413670742e-07, + "loss": 0.37314411997795105, + "step": 4595 + }, + { + "epoch": 1.0597186995619092, + "grad_norm": 1.7572920451540888, + "learning_rate": 9.843689255755926e-07, + "loss": 0.5402779579162598, + "step": 4596 + }, + { + "epoch": 1.0599492736914917, + "grad_norm": 1.4688689617213957, + "learning_rate": 9.839877120562463e-07, + "loss": 0.4243565797805786, + "step": 4597 + }, + { + "epoch": 1.0601798478210744, + "grad_norm": 1.6330717694890693, + "learning_rate": 9.836065008644484e-07, + "loss": 0.4504585564136505, + "step": 4598 + }, + { + "epoch": 1.0604104219506572, + "grad_norm": 1.3073319656874434, + "learning_rate": 9.832252920556115e-07, + "loss": 0.46487870812416077, + "step": 4599 + }, + { + "epoch": 1.0606409960802399, + "grad_norm": 1.452752590173503, + "learning_rate": 9.828440856851487e-07, + "loss": 0.470059871673584, + "step": 4600 + }, + { + "epoch": 1.0608715702098224, + "grad_norm": 1.4580866952416336, + "learning_rate": 9.824628818084716e-07, + "loss": 0.4307391047477722, + "step": 4601 + }, + { + "epoch": 1.061102144339405, + "grad_norm": 1.545423985207434, + "learning_rate": 9.820816804809927e-07, + "loss": 0.49449142813682556, + "step": 4602 + }, + { + "epoch": 1.0613327184689878, + "grad_norm": 1.4803985945664777, + "learning_rate": 9.817004817581229e-07, + "loss": 0.4932701885700226, + "step": 4603 + }, + { + "epoch": 1.0615632925985705, + "grad_norm": 1.4502372729626234, + "learning_rate": 9.813192856952739e-07, + "loss": 0.49543553590774536, + "step": 4604 + }, + { + "epoch": 1.061793866728153, + "grad_norm": 1.1578379554584357, + "learning_rate": 9.809380923478554e-07, + "loss": 0.3906818926334381, + "step": 4605 + }, + { + "epoch": 1.0620244408577357, + "grad_norm": 1.4436425775524195, + "learning_rate": 9.80556901771279e-07, + "loss": 0.41667112708091736, + "step": 4606 + }, + { + "epoch": 1.0622550149873184, + "grad_norm": 1.475010908303335, + "learning_rate": 9.801757140209538e-07, + "loss": 0.36195361614227295, + "step": 4607 + }, + { + "epoch": 1.0624855891169012, + "grad_norm": 1.4053500417900708, + "learning_rate": 9.797945291522892e-07, + "loss": 0.4056081175804138, + "step": 4608 + }, + { + "epoch": 1.0627161632464837, + "grad_norm": 1.4310559040175581, + "learning_rate": 9.794133472206948e-07, + "loss": 0.5048736929893494, + "step": 4609 + }, + { + "epoch": 1.0629467373760664, + "grad_norm": 1.3896886111265523, + "learning_rate": 9.790321682815788e-07, + "loss": 0.4846169352531433, + "step": 4610 + }, + { + "epoch": 1.063177311505649, + "grad_norm": 1.3569892439901554, + "learning_rate": 9.7865099239035e-07, + "loss": 0.5149316787719727, + "step": 4611 + }, + { + "epoch": 1.0634078856352318, + "grad_norm": 1.5344870466099163, + "learning_rate": 9.782698196024155e-07, + "loss": 0.3816874623298645, + "step": 4612 + }, + { + "epoch": 1.0636384597648143, + "grad_norm": 1.39688044025804, + "learning_rate": 9.77888649973183e-07, + "loss": 0.5469645261764526, + "step": 4613 + }, + { + "epoch": 1.063869033894397, + "grad_norm": 1.2954034757094786, + "learning_rate": 9.775074835580593e-07, + "loss": 0.42796647548675537, + "step": 4614 + }, + { + "epoch": 1.0640996080239797, + "grad_norm": 1.4924945772778404, + "learning_rate": 9.771263204124512e-07, + "loss": 0.4931715726852417, + "step": 4615 + }, + { + "epoch": 1.0643301821535625, + "grad_norm": 1.367565961969811, + "learning_rate": 9.767451605917641e-07, + "loss": 0.5435268878936768, + "step": 4616 + }, + { + "epoch": 1.064560756283145, + "grad_norm": 1.6066093331363582, + "learning_rate": 9.763640041514033e-07, + "loss": 0.46361953020095825, + "step": 4617 + }, + { + "epoch": 1.0647913304127277, + "grad_norm": 1.240667858579194, + "learning_rate": 9.759828511467743e-07, + "loss": 0.3742775619029999, + "step": 4618 + }, + { + "epoch": 1.0650219045423104, + "grad_norm": 1.5520509510364326, + "learning_rate": 9.75601701633281e-07, + "loss": 0.4060659408569336, + "step": 4619 + }, + { + "epoch": 1.065252478671893, + "grad_norm": 1.2052909018096978, + "learning_rate": 9.75220555666328e-07, + "loss": 0.45316505432128906, + "step": 4620 + }, + { + "epoch": 1.0654830528014756, + "grad_norm": 1.4180749825165042, + "learning_rate": 9.748394133013179e-07, + "loss": 0.4548850655555725, + "step": 4621 + }, + { + "epoch": 1.0657136269310583, + "grad_norm": 1.2793215690458788, + "learning_rate": 9.744582745936547e-07, + "loss": 0.5065705180168152, + "step": 4622 + }, + { + "epoch": 1.065944201060641, + "grad_norm": 1.4912306578981507, + "learning_rate": 9.740771395987395e-07, + "loss": 0.4114503860473633, + "step": 4623 + }, + { + "epoch": 1.0661747751902237, + "grad_norm": 1.4280192292492455, + "learning_rate": 9.736960083719752e-07, + "loss": 0.4568501114845276, + "step": 4624 + }, + { + "epoch": 1.0664053493198062, + "grad_norm": 1.2972553921673455, + "learning_rate": 9.733148809687624e-07, + "loss": 0.49967026710510254, + "step": 4625 + }, + { + "epoch": 1.066635923449389, + "grad_norm": 1.4642812597554793, + "learning_rate": 9.729337574445025e-07, + "loss": 0.529681384563446, + "step": 4626 + }, + { + "epoch": 1.0668664975789717, + "grad_norm": 1.4791668180519966, + "learning_rate": 9.72552637854595e-07, + "loss": 0.4819791316986084, + "step": 4627 + }, + { + "epoch": 1.0670970717085544, + "grad_norm": 1.3549019355661691, + "learning_rate": 9.721715222544396e-07, + "loss": 0.4186001718044281, + "step": 4628 + }, + { + "epoch": 1.0673276458381369, + "grad_norm": 1.221767945169434, + "learning_rate": 9.717904106994359e-07, + "loss": 0.4442529082298279, + "step": 4629 + }, + { + "epoch": 1.0675582199677196, + "grad_norm": 1.886711265076429, + "learning_rate": 9.714093032449815e-07, + "loss": 0.4655953049659729, + "step": 4630 + }, + { + "epoch": 1.0677887940973023, + "grad_norm": 1.2641786187672595, + "learning_rate": 9.71028199946475e-07, + "loss": 0.45248714089393616, + "step": 4631 + }, + { + "epoch": 1.068019368226885, + "grad_norm": 1.547270813258376, + "learning_rate": 9.706471008593128e-07, + "loss": 0.4244336485862732, + "step": 4632 + }, + { + "epoch": 1.0682499423564675, + "grad_norm": 1.441914160495435, + "learning_rate": 9.702660060388923e-07, + "loss": 0.4396495819091797, + "step": 4633 + }, + { + "epoch": 1.0684805164860502, + "grad_norm": 1.3832490714301353, + "learning_rate": 9.698849155406089e-07, + "loss": 0.4504232406616211, + "step": 4634 + }, + { + "epoch": 1.068711090615633, + "grad_norm": 1.5660708185651993, + "learning_rate": 9.695038294198588e-07, + "loss": 0.40112000703811646, + "step": 4635 + }, + { + "epoch": 1.0689416647452157, + "grad_norm": 1.5797332497697052, + "learning_rate": 9.691227477320357e-07, + "loss": 0.4511067271232605, + "step": 4636 + }, + { + "epoch": 1.0691722388747982, + "grad_norm": 1.4624732720511697, + "learning_rate": 9.687416705325342e-07, + "loss": 0.44541406631469727, + "step": 4637 + }, + { + "epoch": 1.069402813004381, + "grad_norm": 1.3872197811900322, + "learning_rate": 9.68360597876748e-07, + "loss": 0.5038847327232361, + "step": 4638 + }, + { + "epoch": 1.0696333871339636, + "grad_norm": 1.2356986255488158, + "learning_rate": 9.67979529820069e-07, + "loss": 0.41960060596466064, + "step": 4639 + }, + { + "epoch": 1.0698639612635463, + "grad_norm": 1.6121133741192841, + "learning_rate": 9.6759846641789e-07, + "loss": 0.49760064482688904, + "step": 4640 + }, + { + "epoch": 1.0700945353931288, + "grad_norm": 1.7920934015909264, + "learning_rate": 9.672174077256023e-07, + "loss": 0.46513333916664124, + "step": 4641 + }, + { + "epoch": 1.0703251095227115, + "grad_norm": 1.5128396951273724, + "learning_rate": 9.66836353798597e-07, + "loss": 0.41129356622695923, + "step": 4642 + }, + { + "epoch": 1.0705556836522943, + "grad_norm": 1.1803503202020598, + "learning_rate": 9.664553046922634e-07, + "loss": 0.5021853446960449, + "step": 4643 + }, + { + "epoch": 1.070786257781877, + "grad_norm": 1.7444146178498035, + "learning_rate": 9.660742604619912e-07, + "loss": 0.5184302926063538, + "step": 4644 + }, + { + "epoch": 1.0710168319114595, + "grad_norm": 1.8278981381437267, + "learning_rate": 9.65693221163169e-07, + "loss": 0.4793940484523773, + "step": 4645 + }, + { + "epoch": 1.0712474060410422, + "grad_norm": 1.6157027564363053, + "learning_rate": 9.653121868511854e-07, + "loss": 0.43454456329345703, + "step": 4646 + }, + { + "epoch": 1.071477980170625, + "grad_norm": 1.3605748894383922, + "learning_rate": 9.649311575814266e-07, + "loss": 0.49123185873031616, + "step": 4647 + }, + { + "epoch": 1.0717085543002076, + "grad_norm": 1.2316654311751212, + "learning_rate": 9.645501334092792e-07, + "loss": 0.37020617723464966, + "step": 4648 + }, + { + "epoch": 1.0719391284297901, + "grad_norm": 1.3370776970957903, + "learning_rate": 9.641691143901296e-07, + "loss": 0.461778849363327, + "step": 4649 + }, + { + "epoch": 1.0721697025593728, + "grad_norm": 1.7402606402657241, + "learning_rate": 9.63788100579362e-07, + "loss": 0.46640273928642273, + "step": 4650 + }, + { + "epoch": 1.0724002766889555, + "grad_norm": 1.543123481033078, + "learning_rate": 9.634070920323614e-07, + "loss": 0.44978517293930054, + "step": 4651 + }, + { + "epoch": 1.0726308508185383, + "grad_norm": 1.5280216878422028, + "learning_rate": 9.630260888045103e-07, + "loss": 0.5070945024490356, + "step": 4652 + }, + { + "epoch": 1.0728614249481208, + "grad_norm": 1.3361545028178132, + "learning_rate": 9.626450909511926e-07, + "loss": 0.4513545334339142, + "step": 4653 + }, + { + "epoch": 1.0730919990777035, + "grad_norm": 1.2352969540055843, + "learning_rate": 9.622640985277889e-07, + "loss": 0.4430030584335327, + "step": 4654 + }, + { + "epoch": 1.0733225732072862, + "grad_norm": 1.7185507494111099, + "learning_rate": 9.618831115896814e-07, + "loss": 0.45619165897369385, + "step": 4655 + }, + { + "epoch": 1.073553147336869, + "grad_norm": 1.3452693944435885, + "learning_rate": 9.615021301922497e-07, + "loss": 0.411594033241272, + "step": 4656 + }, + { + "epoch": 1.0737837214664514, + "grad_norm": 1.696260647190632, + "learning_rate": 9.611211543908732e-07, + "loss": 0.5230164527893066, + "step": 4657 + }, + { + "epoch": 1.0740142955960341, + "grad_norm": 1.2546383850728546, + "learning_rate": 9.607401842409316e-07, + "loss": 0.45379406213760376, + "step": 4658 + }, + { + "epoch": 1.0742448697256168, + "grad_norm": 1.4465974878955368, + "learning_rate": 9.603592197978016e-07, + "loss": 0.47254839539527893, + "step": 4659 + }, + { + "epoch": 1.0744754438551993, + "grad_norm": 1.4899733507525732, + "learning_rate": 9.59978261116861e-07, + "loss": 0.3990492820739746, + "step": 4660 + }, + { + "epoch": 1.074706017984782, + "grad_norm": 1.2629235312972213, + "learning_rate": 9.595973082534855e-07, + "loss": 0.41671720147132874, + "step": 4661 + }, + { + "epoch": 1.0749365921143648, + "grad_norm": 1.3769486256402874, + "learning_rate": 9.59216361263051e-07, + "loss": 0.4269324839115143, + "step": 4662 + }, + { + "epoch": 1.0751671662439475, + "grad_norm": 1.7548425902665015, + "learning_rate": 9.588354202009314e-07, + "loss": 0.42989516258239746, + "step": 4663 + }, + { + "epoch": 1.0753977403735302, + "grad_norm": 1.5474664125691167, + "learning_rate": 9.584544851225008e-07, + "loss": 0.5224605798721313, + "step": 4664 + }, + { + "epoch": 1.0756283145031127, + "grad_norm": 1.393419713492626, + "learning_rate": 9.580735560831318e-07, + "loss": 0.3853871524333954, + "step": 4665 + }, + { + "epoch": 1.0758588886326954, + "grad_norm": 1.360242198109215, + "learning_rate": 9.576926331381968e-07, + "loss": 0.4460698366165161, + "step": 4666 + }, + { + "epoch": 1.0760894627622781, + "grad_norm": 1.524802030014046, + "learning_rate": 9.57311716343066e-07, + "loss": 0.45617812871932983, + "step": 4667 + }, + { + "epoch": 1.0763200368918606, + "grad_norm": 1.7079854681006486, + "learning_rate": 9.569308057531096e-07, + "loss": 0.5631355047225952, + "step": 4668 + }, + { + "epoch": 1.0765506110214433, + "grad_norm": 1.3155596598859882, + "learning_rate": 9.565499014236977e-07, + "loss": 0.4197179973125458, + "step": 4669 + }, + { + "epoch": 1.076781185151026, + "grad_norm": 1.5894301477582775, + "learning_rate": 9.561690034101973e-07, + "loss": 0.4262646436691284, + "step": 4670 + }, + { + "epoch": 1.0770117592806088, + "grad_norm": 1.4805271814916348, + "learning_rate": 9.557881117679768e-07, + "loss": 0.42719966173171997, + "step": 4671 + }, + { + "epoch": 1.0772423334101915, + "grad_norm": 1.3479731294807211, + "learning_rate": 9.554072265524022e-07, + "loss": 0.4278491735458374, + "step": 4672 + }, + { + "epoch": 1.077472907539774, + "grad_norm": 1.4324931591130032, + "learning_rate": 9.550263478188396e-07, + "loss": 0.3915478587150574, + "step": 4673 + }, + { + "epoch": 1.0777034816693567, + "grad_norm": 1.4807606218185139, + "learning_rate": 9.546454756226525e-07, + "loss": 0.4391477704048157, + "step": 4674 + }, + { + "epoch": 1.0779340557989394, + "grad_norm": 1.6230153652074522, + "learning_rate": 9.542646100192055e-07, + "loss": 0.47325795888900757, + "step": 4675 + }, + { + "epoch": 1.078164629928522, + "grad_norm": 1.3326185339285364, + "learning_rate": 9.538837510638607e-07, + "loss": 0.4698373079299927, + "step": 4676 + }, + { + "epoch": 1.0783952040581046, + "grad_norm": 1.5843176103578385, + "learning_rate": 9.535028988119805e-07, + "loss": 0.4252272844314575, + "step": 4677 + }, + { + "epoch": 1.0786257781876873, + "grad_norm": 1.4642476960881914, + "learning_rate": 9.531220533189253e-07, + "loss": 0.46726179122924805, + "step": 4678 + }, + { + "epoch": 1.07885635231727, + "grad_norm": 1.3792408296611596, + "learning_rate": 9.527412146400542e-07, + "loss": 0.46616411209106445, + "step": 4679 + }, + { + "epoch": 1.0790869264468528, + "grad_norm": 1.3938952826758202, + "learning_rate": 9.523603828307268e-07, + "loss": 0.5607181787490845, + "step": 4680 + }, + { + "epoch": 1.0793175005764353, + "grad_norm": 1.6234566687004295, + "learning_rate": 9.519795579463002e-07, + "loss": 0.5039520859718323, + "step": 4681 + }, + { + "epoch": 1.079548074706018, + "grad_norm": 1.6358698645091259, + "learning_rate": 9.515987400421322e-07, + "loss": 0.45532113313674927, + "step": 4682 + }, + { + "epoch": 1.0797786488356007, + "grad_norm": 1.3987490622653254, + "learning_rate": 9.512179291735772e-07, + "loss": 0.4198398292064667, + "step": 4683 + }, + { + "epoch": 1.0800092229651832, + "grad_norm": 2.0745649369110577, + "learning_rate": 9.508371253959909e-07, + "loss": 0.371380090713501, + "step": 4684 + }, + { + "epoch": 1.080239797094766, + "grad_norm": 1.6602368865180097, + "learning_rate": 9.504563287647265e-07, + "loss": 0.44341978430747986, + "step": 4685 + }, + { + "epoch": 1.0804703712243486, + "grad_norm": 1.3233390600316475, + "learning_rate": 9.500755393351372e-07, + "loss": 0.4184574484825134, + "step": 4686 + }, + { + "epoch": 1.0807009453539314, + "grad_norm": 1.554478033670439, + "learning_rate": 9.496947571625739e-07, + "loss": 0.5584033727645874, + "step": 4687 + }, + { + "epoch": 1.0809315194835138, + "grad_norm": 1.4303675439776025, + "learning_rate": 9.493139823023874e-07, + "loss": 0.44405317306518555, + "step": 4688 + }, + { + "epoch": 1.0811620936130966, + "grad_norm": 1.5109921870756446, + "learning_rate": 9.489332148099277e-07, + "loss": 0.41137009859085083, + "step": 4689 + }, + { + "epoch": 1.0813926677426793, + "grad_norm": 1.5933695881826222, + "learning_rate": 9.485524547405424e-07, + "loss": 0.4831092357635498, + "step": 4690 + }, + { + "epoch": 1.081623241872262, + "grad_norm": 1.3224307777817799, + "learning_rate": 9.481717021495793e-07, + "loss": 0.41243845224380493, + "step": 4691 + }, + { + "epoch": 1.0818538160018445, + "grad_norm": 1.506253034871724, + "learning_rate": 9.477909570923844e-07, + "loss": 0.33649003505706787, + "step": 4692 + }, + { + "epoch": 1.0820843901314272, + "grad_norm": 1.3759728989311568, + "learning_rate": 9.474102196243033e-07, + "loss": 0.4959014654159546, + "step": 4693 + }, + { + "epoch": 1.08231496426101, + "grad_norm": 1.4717496348190642, + "learning_rate": 9.470294898006795e-07, + "loss": 0.43924248218536377, + "step": 4694 + }, + { + "epoch": 1.0825455383905926, + "grad_norm": 1.5425758669304555, + "learning_rate": 9.466487676768563e-07, + "loss": 0.4777243137359619, + "step": 4695 + }, + { + "epoch": 1.0827761125201751, + "grad_norm": 1.7258911046059784, + "learning_rate": 9.462680533081752e-07, + "loss": 0.4488077759742737, + "step": 4696 + }, + { + "epoch": 1.0830066866497579, + "grad_norm": 1.5375128445555653, + "learning_rate": 9.458873467499778e-07, + "loss": 0.5058270692825317, + "step": 4697 + }, + { + "epoch": 1.0832372607793406, + "grad_norm": 1.5052517610014813, + "learning_rate": 9.455066480576025e-07, + "loss": 0.4537619650363922, + "step": 4698 + }, + { + "epoch": 1.0834678349089233, + "grad_norm": 1.5194044905455244, + "learning_rate": 9.45125957286388e-07, + "loss": 0.4725874960422516, + "step": 4699 + }, + { + "epoch": 1.0836984090385058, + "grad_norm": 1.61840988882087, + "learning_rate": 9.447452744916722e-07, + "loss": 0.4967196583747864, + "step": 4700 + }, + { + "epoch": 1.0839289831680885, + "grad_norm": 1.3272496966479597, + "learning_rate": 9.443645997287902e-07, + "loss": 0.43682345747947693, + "step": 4701 + }, + { + "epoch": 1.0841595572976712, + "grad_norm": 1.4038050893134464, + "learning_rate": 9.439839330530781e-07, + "loss": 0.48844271898269653, + "step": 4702 + }, + { + "epoch": 1.084390131427254, + "grad_norm": 1.3581740542884078, + "learning_rate": 9.436032745198682e-07, + "loss": 0.43654918670654297, + "step": 4703 + }, + { + "epoch": 1.0846207055568364, + "grad_norm": 1.6070546851567389, + "learning_rate": 9.432226241844947e-07, + "loss": 0.5034382939338684, + "step": 4704 + }, + { + "epoch": 1.0848512796864191, + "grad_norm": 1.9516449815592325, + "learning_rate": 9.428419821022877e-07, + "loss": 0.5407527089118958, + "step": 4705 + }, + { + "epoch": 1.0850818538160019, + "grad_norm": 1.3188521673213394, + "learning_rate": 9.424613483285783e-07, + "loss": 0.4372078478336334, + "step": 4706 + }, + { + "epoch": 1.0853124279455846, + "grad_norm": 1.3673238165045705, + "learning_rate": 9.420807229186949e-07, + "loss": 0.5264855623245239, + "step": 4707 + }, + { + "epoch": 1.085543002075167, + "grad_norm": 1.2884056915833075, + "learning_rate": 9.417001059279652e-07, + "loss": 0.3810223937034607, + "step": 4708 + }, + { + "epoch": 1.0857735762047498, + "grad_norm": 1.318670262430079, + "learning_rate": 9.413194974117163e-07, + "loss": 0.368865430355072, + "step": 4709 + }, + { + "epoch": 1.0860041503343325, + "grad_norm": 1.3202107346651724, + "learning_rate": 9.409388974252729e-07, + "loss": 0.41845810413360596, + "step": 4710 + }, + { + "epoch": 1.0862347244639152, + "grad_norm": 1.4709870024189373, + "learning_rate": 9.405583060239594e-07, + "loss": 0.5185590982437134, + "step": 4711 + }, + { + "epoch": 1.0864652985934977, + "grad_norm": 1.7793671382372165, + "learning_rate": 9.401777232630983e-07, + "loss": 0.4848501682281494, + "step": 4712 + }, + { + "epoch": 1.0866958727230804, + "grad_norm": 1.5218788678149173, + "learning_rate": 9.397971491980119e-07, + "loss": 0.5581566691398621, + "step": 4713 + }, + { + "epoch": 1.0869264468526632, + "grad_norm": 1.475012350727374, + "learning_rate": 9.394165838840196e-07, + "loss": 0.42043447494506836, + "step": 4714 + }, + { + "epoch": 1.0871570209822459, + "grad_norm": 1.3731967040929853, + "learning_rate": 9.39036027376441e-07, + "loss": 0.45076289772987366, + "step": 4715 + }, + { + "epoch": 1.0873875951118284, + "grad_norm": 1.353578451117457, + "learning_rate": 9.386554797305934e-07, + "loss": 0.3650796413421631, + "step": 4716 + }, + { + "epoch": 1.087618169241411, + "grad_norm": 1.436571768450736, + "learning_rate": 9.38274941001794e-07, + "loss": 0.4837912321090698, + "step": 4717 + }, + { + "epoch": 1.0878487433709938, + "grad_norm": 1.5272898845570653, + "learning_rate": 9.378944112453574e-07, + "loss": 0.41277679800987244, + "step": 4718 + }, + { + "epoch": 1.0880793175005765, + "grad_norm": 1.7344713328668464, + "learning_rate": 9.375138905165973e-07, + "loss": 0.48409390449523926, + "step": 4719 + }, + { + "epoch": 1.088309891630159, + "grad_norm": 1.360949967282617, + "learning_rate": 9.371333788708268e-07, + "loss": 0.3952450752258301, + "step": 4720 + }, + { + "epoch": 1.0885404657597417, + "grad_norm": 1.6450358552008089, + "learning_rate": 9.367528763633563e-07, + "loss": 0.42314866185188293, + "step": 4721 + }, + { + "epoch": 1.0887710398893244, + "grad_norm": 1.492846868063658, + "learning_rate": 9.363723830494966e-07, + "loss": 0.5322449207305908, + "step": 4722 + }, + { + "epoch": 1.0890016140189072, + "grad_norm": 1.3552869600155872, + "learning_rate": 9.359918989845557e-07, + "loss": 0.42307883501052856, + "step": 4723 + }, + { + "epoch": 1.0892321881484897, + "grad_norm": 1.3481901437941268, + "learning_rate": 9.356114242238413e-07, + "loss": 0.39321061968803406, + "step": 4724 + }, + { + "epoch": 1.0894627622780724, + "grad_norm": 1.6333273110158268, + "learning_rate": 9.352309588226585e-07, + "loss": 0.5064421892166138, + "step": 4725 + }, + { + "epoch": 1.089693336407655, + "grad_norm": 1.4475724274606394, + "learning_rate": 9.348505028363125e-07, + "loss": 0.44825220108032227, + "step": 4726 + }, + { + "epoch": 1.0899239105372378, + "grad_norm": 1.384316241889946, + "learning_rate": 9.344700563201065e-07, + "loss": 0.4323306679725647, + "step": 4727 + }, + { + "epoch": 1.0901544846668203, + "grad_norm": 1.3254947105842285, + "learning_rate": 9.340896193293414e-07, + "loss": 0.44907987117767334, + "step": 4728 + }, + { + "epoch": 1.090385058796403, + "grad_norm": 1.3161326376052391, + "learning_rate": 9.337091919193185e-07, + "loss": 0.416559636592865, + "step": 4729 + }, + { + "epoch": 1.0906156329259857, + "grad_norm": 1.6044534711260028, + "learning_rate": 9.33328774145336e-07, + "loss": 0.5361836552619934, + "step": 4730 + }, + { + "epoch": 1.0908462070555685, + "grad_norm": 1.3742080048163032, + "learning_rate": 9.329483660626922e-07, + "loss": 0.4815465211868286, + "step": 4731 + }, + { + "epoch": 1.091076781185151, + "grad_norm": 1.4553535934080677, + "learning_rate": 9.325679677266826e-07, + "loss": 0.5205050110816956, + "step": 4732 + }, + { + "epoch": 1.0913073553147337, + "grad_norm": 1.9887709257052897, + "learning_rate": 9.321875791926028e-07, + "loss": 0.4830896258354187, + "step": 4733 + }, + { + "epoch": 1.0915379294443164, + "grad_norm": 1.3739860439026885, + "learning_rate": 9.318072005157451e-07, + "loss": 0.4394579827785492, + "step": 4734 + }, + { + "epoch": 1.091768503573899, + "grad_norm": 1.6664317769247758, + "learning_rate": 9.314268317514022e-07, + "loss": 0.4614049792289734, + "step": 4735 + }, + { + "epoch": 1.0919990777034816, + "grad_norm": 1.5989711566807139, + "learning_rate": 9.31046472954864e-07, + "loss": 0.5123867988586426, + "step": 4736 + }, + { + "epoch": 1.0922296518330643, + "grad_norm": 1.879970895540274, + "learning_rate": 9.306661241814204e-07, + "loss": 0.43548035621643066, + "step": 4737 + }, + { + "epoch": 1.092460225962647, + "grad_norm": 1.4190205685105515, + "learning_rate": 9.302857854863579e-07, + "loss": 0.4102709889411926, + "step": 4738 + }, + { + "epoch": 1.0926908000922297, + "grad_norm": 1.7007344632271022, + "learning_rate": 9.299054569249628e-07, + "loss": 0.46276605129241943, + "step": 4739 + }, + { + "epoch": 1.0929213742218122, + "grad_norm": 1.5950261365712695, + "learning_rate": 9.295251385525204e-07, + "loss": 0.47700244188308716, + "step": 4740 + }, + { + "epoch": 1.093151948351395, + "grad_norm": 1.5081940540312389, + "learning_rate": 9.29144830424313e-07, + "loss": 0.5492758750915527, + "step": 4741 + }, + { + "epoch": 1.0933825224809777, + "grad_norm": 1.6521559747103167, + "learning_rate": 9.287645325956228e-07, + "loss": 0.3846803307533264, + "step": 4742 + }, + { + "epoch": 1.0936130966105604, + "grad_norm": 1.4300122822608972, + "learning_rate": 9.283842451217294e-07, + "loss": 0.47237372398376465, + "step": 4743 + }, + { + "epoch": 1.0938436707401429, + "grad_norm": 1.6996074936661776, + "learning_rate": 9.280039680579122e-07, + "loss": 0.4651675820350647, + "step": 4744 + }, + { + "epoch": 1.0940742448697256, + "grad_norm": 1.6397662048344088, + "learning_rate": 9.276237014594476e-07, + "loss": 0.5472640991210938, + "step": 4745 + }, + { + "epoch": 1.0943048189993083, + "grad_norm": 1.3158004626748314, + "learning_rate": 9.272434453816117e-07, + "loss": 0.45672351121902466, + "step": 4746 + }, + { + "epoch": 1.094535393128891, + "grad_norm": 1.4246135812847533, + "learning_rate": 9.268631998796785e-07, + "loss": 0.4589729905128479, + "step": 4747 + }, + { + "epoch": 1.0947659672584735, + "grad_norm": 1.4398967186683822, + "learning_rate": 9.264829650089201e-07, + "loss": 0.45882588624954224, + "step": 4748 + }, + { + "epoch": 1.0949965413880562, + "grad_norm": 1.8586265213095916, + "learning_rate": 9.26102740824608e-07, + "loss": 0.6183863282203674, + "step": 4749 + }, + { + "epoch": 1.095227115517639, + "grad_norm": 1.4631882562588927, + "learning_rate": 9.257225273820112e-07, + "loss": 0.4512014389038086, + "step": 4750 + }, + { + "epoch": 1.0954576896472217, + "grad_norm": 1.5706161838979387, + "learning_rate": 9.253423247363983e-07, + "loss": 0.5006139874458313, + "step": 4751 + }, + { + "epoch": 1.0956882637768042, + "grad_norm": 1.4110458948787974, + "learning_rate": 9.249621329430346e-07, + "loss": 0.5394018888473511, + "step": 4752 + }, + { + "epoch": 1.095918837906387, + "grad_norm": 1.5150959480945791, + "learning_rate": 9.245819520571858e-07, + "loss": 0.35523056983947754, + "step": 4753 + }, + { + "epoch": 1.0961494120359696, + "grad_norm": 1.3819812548856059, + "learning_rate": 9.242017821341143e-07, + "loss": 0.44379743933677673, + "step": 4754 + }, + { + "epoch": 1.0963799861655523, + "grad_norm": 1.6129174796361336, + "learning_rate": 9.238216232290821e-07, + "loss": 0.4190908968448639, + "step": 4755 + }, + { + "epoch": 1.0966105602951348, + "grad_norm": 1.6222067534589701, + "learning_rate": 9.234414753973488e-07, + "loss": 0.44818970561027527, + "step": 4756 + }, + { + "epoch": 1.0968411344247175, + "grad_norm": 1.4925644141379035, + "learning_rate": 9.230613386941734e-07, + "loss": 0.4134204685688019, + "step": 4757 + }, + { + "epoch": 1.0970717085543003, + "grad_norm": 1.2148478016107016, + "learning_rate": 9.226812131748118e-07, + "loss": 0.3554952144622803, + "step": 4758 + }, + { + "epoch": 1.097302282683883, + "grad_norm": 1.674922299722459, + "learning_rate": 9.223010988945194e-07, + "loss": 0.522594690322876, + "step": 4759 + }, + { + "epoch": 1.0975328568134655, + "grad_norm": 1.4320622438584156, + "learning_rate": 9.219209959085502e-07, + "loss": 0.44814133644104004, + "step": 4760 + }, + { + "epoch": 1.0977634309430482, + "grad_norm": 1.4723286174250931, + "learning_rate": 9.215409042721551e-07, + "loss": 0.42479634284973145, + "step": 4761 + }, + { + "epoch": 1.097994005072631, + "grad_norm": 1.5414891522514993, + "learning_rate": 9.211608240405849e-07, + "loss": 0.4384934902191162, + "step": 4762 + }, + { + "epoch": 1.0982245792022136, + "grad_norm": 1.4811013868533904, + "learning_rate": 9.207807552690878e-07, + "loss": 0.5378658771514893, + "step": 4763 + }, + { + "epoch": 1.098455153331796, + "grad_norm": 1.4445039209024981, + "learning_rate": 9.204006980129111e-07, + "loss": 0.5071386694908142, + "step": 4764 + }, + { + "epoch": 1.0986857274613788, + "grad_norm": 1.5460474623164162, + "learning_rate": 9.200206523272992e-07, + "loss": 0.46085822582244873, + "step": 4765 + }, + { + "epoch": 1.0989163015909615, + "grad_norm": 1.544747382675103, + "learning_rate": 9.196406182674964e-07, + "loss": 0.5083057880401611, + "step": 4766 + }, + { + "epoch": 1.0991468757205443, + "grad_norm": 1.2845065354356755, + "learning_rate": 9.192605958887438e-07, + "loss": 0.48307740688323975, + "step": 4767 + }, + { + "epoch": 1.0993774498501268, + "grad_norm": 1.8405581264672015, + "learning_rate": 9.188805852462824e-07, + "loss": 0.5195509791374207, + "step": 4768 + }, + { + "epoch": 1.0996080239797095, + "grad_norm": 1.5537273798526559, + "learning_rate": 9.185005863953498e-07, + "loss": 0.5161266326904297, + "step": 4769 + }, + { + "epoch": 1.0998385981092922, + "grad_norm": 1.5985708455901557, + "learning_rate": 9.181205993911827e-07, + "loss": 0.4757764935493469, + "step": 4770 + }, + { + "epoch": 1.1000691722388747, + "grad_norm": 1.5307887938016926, + "learning_rate": 9.177406242890167e-07, + "loss": 0.4071381688117981, + "step": 4771 + }, + { + "epoch": 1.1002997463684574, + "grad_norm": 1.3525378547606768, + "learning_rate": 9.173606611440842e-07, + "loss": 0.4794449210166931, + "step": 4772 + }, + { + "epoch": 1.1005303204980401, + "grad_norm": 1.3205547171467464, + "learning_rate": 9.169807100116175e-07, + "loss": 0.4678712487220764, + "step": 4773 + }, + { + "epoch": 1.1007608946276228, + "grad_norm": 1.2863487713029464, + "learning_rate": 9.166007709468456e-07, + "loss": 0.43200960755348206, + "step": 4774 + }, + { + "epoch": 1.1009914687572055, + "grad_norm": 1.8114336882311408, + "learning_rate": 9.162208440049974e-07, + "loss": 0.49283260107040405, + "step": 4775 + }, + { + "epoch": 1.101222042886788, + "grad_norm": 1.2265456496064566, + "learning_rate": 9.158409292412982e-07, + "loss": 0.4430215358734131, + "step": 4776 + }, + { + "epoch": 1.1014526170163708, + "grad_norm": 1.282698473472426, + "learning_rate": 9.154610267109731e-07, + "loss": 0.4529581069946289, + "step": 4777 + }, + { + "epoch": 1.1016831911459535, + "grad_norm": 1.3698366211761768, + "learning_rate": 9.150811364692446e-07, + "loss": 0.3872554302215576, + "step": 4778 + }, + { + "epoch": 1.101913765275536, + "grad_norm": 1.4034579683870105, + "learning_rate": 9.147012585713331e-07, + "loss": 0.466983437538147, + "step": 4779 + }, + { + "epoch": 1.1021443394051187, + "grad_norm": 1.3799350437064777, + "learning_rate": 9.143213930724587e-07, + "loss": 0.4841456115245819, + "step": 4780 + }, + { + "epoch": 1.1023749135347014, + "grad_norm": 2.083063073101601, + "learning_rate": 9.139415400278376e-07, + "loss": 0.4506613612174988, + "step": 4781 + }, + { + "epoch": 1.1026054876642841, + "grad_norm": 1.5047320834529434, + "learning_rate": 9.135616994926861e-07, + "loss": 0.428241491317749, + "step": 4782 + }, + { + "epoch": 1.1028360617938668, + "grad_norm": 1.3329992006000018, + "learning_rate": 9.131818715222175e-07, + "loss": 0.46940821409225464, + "step": 4783 + }, + { + "epoch": 1.1030666359234493, + "grad_norm": 1.5416614978551508, + "learning_rate": 9.12802056171644e-07, + "loss": 0.4527658224105835, + "step": 4784 + }, + { + "epoch": 1.103297210053032, + "grad_norm": 1.3412511641642377, + "learning_rate": 9.124222534961749e-07, + "loss": 0.3284989893436432, + "step": 4785 + }, + { + "epoch": 1.1035277841826148, + "grad_norm": 1.497248247266052, + "learning_rate": 9.120424635510193e-07, + "loss": 0.448346883058548, + "step": 4786 + }, + { + "epoch": 1.1037583583121973, + "grad_norm": 1.5413647461227613, + "learning_rate": 9.116626863913826e-07, + "loss": 0.4625587463378906, + "step": 4787 + }, + { + "epoch": 1.10398893244178, + "grad_norm": 1.398727589269655, + "learning_rate": 9.112829220724703e-07, + "loss": 0.37891942262649536, + "step": 4788 + }, + { + "epoch": 1.1042195065713627, + "grad_norm": 1.510309439727558, + "learning_rate": 9.109031706494841e-07, + "loss": 0.48719239234924316, + "step": 4789 + }, + { + "epoch": 1.1044500807009454, + "grad_norm": 1.695631911449914, + "learning_rate": 9.105234321776247e-07, + "loss": 0.5341615676879883, + "step": 4790 + }, + { + "epoch": 1.1046806548305281, + "grad_norm": 1.30752453253924, + "learning_rate": 9.101437067120918e-07, + "loss": 0.36677777767181396, + "step": 4791 + }, + { + "epoch": 1.1049112289601106, + "grad_norm": 1.3000512165603213, + "learning_rate": 9.097639943080813e-07, + "loss": 0.4348159432411194, + "step": 4792 + }, + { + "epoch": 1.1051418030896933, + "grad_norm": 1.3763164723830184, + "learning_rate": 9.093842950207891e-07, + "loss": 0.44912683963775635, + "step": 4793 + }, + { + "epoch": 1.105372377219276, + "grad_norm": 1.655048045877048, + "learning_rate": 9.090046089054077e-07, + "loss": 0.5576057434082031, + "step": 4794 + }, + { + "epoch": 1.1056029513488586, + "grad_norm": 1.4655907130631036, + "learning_rate": 9.08624936017129e-07, + "loss": 0.43964770436286926, + "step": 4795 + }, + { + "epoch": 1.1058335254784413, + "grad_norm": 1.3648059541391266, + "learning_rate": 9.082452764111415e-07, + "loss": 0.4285386800765991, + "step": 4796 + }, + { + "epoch": 1.106064099608024, + "grad_norm": 1.6322901017927212, + "learning_rate": 9.078656301426332e-07, + "loss": 0.4257868230342865, + "step": 4797 + }, + { + "epoch": 1.1062946737376067, + "grad_norm": 1.9314022304382554, + "learning_rate": 9.074859972667895e-07, + "loss": 0.4540346562862396, + "step": 4798 + }, + { + "epoch": 1.1065252478671892, + "grad_norm": 1.6801359554397164, + "learning_rate": 9.071063778387933e-07, + "loss": 0.5273457765579224, + "step": 4799 + }, + { + "epoch": 1.106755821996772, + "grad_norm": 1.4107980839711056, + "learning_rate": 9.067267719138268e-07, + "loss": 0.391310453414917, + "step": 4800 + }, + { + "epoch": 1.1069863961263546, + "grad_norm": 1.4182050274963418, + "learning_rate": 9.063471795470691e-07, + "loss": 0.47945383191108704, + "step": 4801 + }, + { + "epoch": 1.1072169702559373, + "grad_norm": 1.7087277476088294, + "learning_rate": 9.05967600793698e-07, + "loss": 0.49561476707458496, + "step": 4802 + }, + { + "epoch": 1.1074475443855198, + "grad_norm": 1.3070252929290396, + "learning_rate": 9.05588035708889e-07, + "loss": 0.4505256414413452, + "step": 4803 + }, + { + "epoch": 1.1076781185151026, + "grad_norm": 1.6864844579974707, + "learning_rate": 9.052084843478164e-07, + "loss": 0.37591490149497986, + "step": 4804 + }, + { + "epoch": 1.1079086926446853, + "grad_norm": 1.486226704077577, + "learning_rate": 9.048289467656508e-07, + "loss": 0.478586345911026, + "step": 4805 + }, + { + "epoch": 1.108139266774268, + "grad_norm": 1.3819959446941394, + "learning_rate": 9.044494230175625e-07, + "loss": 0.4373725354671478, + "step": 4806 + }, + { + "epoch": 1.1083698409038505, + "grad_norm": 1.4091791216138099, + "learning_rate": 9.040699131587186e-07, + "loss": 0.3976345360279083, + "step": 4807 + }, + { + "epoch": 1.1086004150334332, + "grad_norm": 1.3848852740812903, + "learning_rate": 9.036904172442857e-07, + "loss": 0.44611310958862305, + "step": 4808 + }, + { + "epoch": 1.108830989163016, + "grad_norm": 1.3117584806534919, + "learning_rate": 9.033109353294262e-07, + "loss": 0.40816667675971985, + "step": 4809 + }, + { + "epoch": 1.1090615632925986, + "grad_norm": 1.359605756890841, + "learning_rate": 9.029314674693023e-07, + "loss": 0.37462317943573, + "step": 4810 + }, + { + "epoch": 1.1092921374221811, + "grad_norm": 1.3641846963299056, + "learning_rate": 9.025520137190735e-07, + "loss": 0.3856509327888489, + "step": 4811 + }, + { + "epoch": 1.1095227115517639, + "grad_norm": 1.5740711616700624, + "learning_rate": 9.021725741338969e-07, + "loss": 0.4728443920612335, + "step": 4812 + }, + { + "epoch": 1.1097532856813466, + "grad_norm": 2.0717537833557773, + "learning_rate": 9.017931487689282e-07, + "loss": 0.4614938795566559, + "step": 4813 + }, + { + "epoch": 1.1099838598109293, + "grad_norm": 1.4925546437709947, + "learning_rate": 9.014137376793203e-07, + "loss": 0.4137331247329712, + "step": 4814 + }, + { + "epoch": 1.1102144339405118, + "grad_norm": 1.2481779358565226, + "learning_rate": 9.010343409202255e-07, + "loss": 0.42436620593070984, + "step": 4815 + }, + { + "epoch": 1.1104450080700945, + "grad_norm": 1.3339513565407848, + "learning_rate": 9.006549585467916e-07, + "loss": 0.43592822551727295, + "step": 4816 + }, + { + "epoch": 1.1106755821996772, + "grad_norm": 1.3742872645989155, + "learning_rate": 9.002755906141666e-07, + "loss": 0.45627349615097046, + "step": 4817 + }, + { + "epoch": 1.11090615632926, + "grad_norm": 1.819907938722267, + "learning_rate": 8.998962371774953e-07, + "loss": 0.5103771686553955, + "step": 4818 + }, + { + "epoch": 1.1111367304588424, + "grad_norm": 1.4418115437773273, + "learning_rate": 8.995168982919201e-07, + "loss": 0.470276802778244, + "step": 4819 + }, + { + "epoch": 1.1113673045884251, + "grad_norm": 1.3186176277536419, + "learning_rate": 8.991375740125823e-07, + "loss": 0.49486416578292847, + "step": 4820 + }, + { + "epoch": 1.1115978787180079, + "grad_norm": 1.143316450397621, + "learning_rate": 8.987582643946201e-07, + "loss": 0.338329017162323, + "step": 4821 + }, + { + "epoch": 1.1118284528475906, + "grad_norm": 1.4885392176771477, + "learning_rate": 8.983789694931706e-07, + "loss": 0.38252198696136475, + "step": 4822 + }, + { + "epoch": 1.112059026977173, + "grad_norm": 1.4537319037859584, + "learning_rate": 8.979996893633675e-07, + "loss": 0.47691571712493896, + "step": 4823 + }, + { + "epoch": 1.1122896011067558, + "grad_norm": 1.41954873904419, + "learning_rate": 8.976204240603433e-07, + "loss": 0.40156808495521545, + "step": 4824 + }, + { + "epoch": 1.1125201752363385, + "grad_norm": 1.312743475511893, + "learning_rate": 8.97241173639228e-07, + "loss": 0.3837090730667114, + "step": 4825 + }, + { + "epoch": 1.1127507493659212, + "grad_norm": 1.6300077035939553, + "learning_rate": 8.968619381551499e-07, + "loss": 0.5094380378723145, + "step": 4826 + }, + { + "epoch": 1.1129813234955037, + "grad_norm": 1.4389159508234053, + "learning_rate": 8.964827176632339e-07, + "loss": 0.48674100637435913, + "step": 4827 + }, + { + "epoch": 1.1132118976250864, + "grad_norm": 1.7742534070601, + "learning_rate": 8.961035122186045e-07, + "loss": 0.49288761615753174, + "step": 4828 + }, + { + "epoch": 1.1134424717546691, + "grad_norm": 1.4156686622304593, + "learning_rate": 8.957243218763824e-07, + "loss": 0.42933952808380127, + "step": 4829 + }, + { + "epoch": 1.1136730458842519, + "grad_norm": 1.838762036908513, + "learning_rate": 8.953451466916866e-07, + "loss": 0.39244914054870605, + "step": 4830 + }, + { + "epoch": 1.1139036200138344, + "grad_norm": 1.3776049792093739, + "learning_rate": 8.949659867196348e-07, + "loss": 0.44688090682029724, + "step": 4831 + }, + { + "epoch": 1.114134194143417, + "grad_norm": 1.6923430022628052, + "learning_rate": 8.945868420153409e-07, + "loss": 0.5388743877410889, + "step": 4832 + }, + { + "epoch": 1.1143647682729998, + "grad_norm": 1.6108426528928312, + "learning_rate": 8.942077126339182e-07, + "loss": 0.4320666193962097, + "step": 4833 + }, + { + "epoch": 1.1145953424025825, + "grad_norm": 1.3700008221476991, + "learning_rate": 8.938285986304762e-07, + "loss": 0.37623411417007446, + "step": 4834 + }, + { + "epoch": 1.114825916532165, + "grad_norm": 1.4274453986312428, + "learning_rate": 8.93449500060124e-07, + "loss": 0.4743962287902832, + "step": 4835 + }, + { + "epoch": 1.1150564906617477, + "grad_norm": 1.4687481503878526, + "learning_rate": 8.930704169779663e-07, + "loss": 0.4833221435546875, + "step": 4836 + }, + { + "epoch": 1.1152870647913304, + "grad_norm": 1.580828459296504, + "learning_rate": 8.926913494391074e-07, + "loss": 0.48811084032058716, + "step": 4837 + }, + { + "epoch": 1.1155176389209132, + "grad_norm": 1.4663777441823886, + "learning_rate": 8.923122974986487e-07, + "loss": 0.42525774240493774, + "step": 4838 + }, + { + "epoch": 1.1157482130504957, + "grad_norm": 1.4773669175093567, + "learning_rate": 8.919332612116884e-07, + "loss": 0.4347909688949585, + "step": 4839 + }, + { + "epoch": 1.1159787871800784, + "grad_norm": 1.9619203877260345, + "learning_rate": 8.915542406333241e-07, + "loss": 0.5085601806640625, + "step": 4840 + }, + { + "epoch": 1.116209361309661, + "grad_norm": 1.4214902735687815, + "learning_rate": 8.911752358186497e-07, + "loss": 0.4620482325553894, + "step": 4841 + }, + { + "epoch": 1.1164399354392438, + "grad_norm": 1.3147570239530335, + "learning_rate": 8.907962468227582e-07, + "loss": 0.44923216104507446, + "step": 4842 + }, + { + "epoch": 1.1166705095688263, + "grad_norm": 1.6422580107908513, + "learning_rate": 8.904172737007386e-07, + "loss": 0.547439694404602, + "step": 4843 + }, + { + "epoch": 1.116901083698409, + "grad_norm": 1.7769022711207687, + "learning_rate": 8.900383165076789e-07, + "loss": 0.4609268307685852, + "step": 4844 + }, + { + "epoch": 1.1171316578279917, + "grad_norm": 1.4046866803141593, + "learning_rate": 8.896593752986642e-07, + "loss": 0.41780030727386475, + "step": 4845 + }, + { + "epoch": 1.1173622319575744, + "grad_norm": 1.3641825367692086, + "learning_rate": 8.89280450128778e-07, + "loss": 0.506212592124939, + "step": 4846 + }, + { + "epoch": 1.117592806087157, + "grad_norm": 1.4049897839890735, + "learning_rate": 8.889015410531001e-07, + "loss": 0.4436545968055725, + "step": 4847 + }, + { + "epoch": 1.1178233802167397, + "grad_norm": 1.3856199735325436, + "learning_rate": 8.885226481267093e-07, + "loss": 0.4473826289176941, + "step": 4848 + }, + { + "epoch": 1.1180539543463224, + "grad_norm": 1.42622736433257, + "learning_rate": 8.881437714046815e-07, + "loss": 0.43499836325645447, + "step": 4849 + }, + { + "epoch": 1.118284528475905, + "grad_norm": 1.5927469786677344, + "learning_rate": 8.877649109420899e-07, + "loss": 0.522705078125, + "step": 4850 + }, + { + "epoch": 1.1185151026054876, + "grad_norm": 1.5596781330511842, + "learning_rate": 8.873860667940064e-07, + "loss": 0.42146036028862, + "step": 4851 + }, + { + "epoch": 1.1187456767350703, + "grad_norm": 1.649425162171124, + "learning_rate": 8.870072390154989e-07, + "loss": 0.5875130891799927, + "step": 4852 + }, + { + "epoch": 1.118976250864653, + "grad_norm": 1.6372722830693418, + "learning_rate": 8.866284276616345e-07, + "loss": 0.5187985301017761, + "step": 4853 + }, + { + "epoch": 1.1192068249942357, + "grad_norm": 2.6266893474509474, + "learning_rate": 8.86249632787477e-07, + "loss": 0.46115952730178833, + "step": 4854 + }, + { + "epoch": 1.1194373991238182, + "grad_norm": 1.4714921061709185, + "learning_rate": 8.858708544480886e-07, + "loss": 0.4926493167877197, + "step": 4855 + }, + { + "epoch": 1.119667973253401, + "grad_norm": 1.5525331026142626, + "learning_rate": 8.854920926985278e-07, + "loss": 0.44512006640434265, + "step": 4856 + }, + { + "epoch": 1.1198985473829837, + "grad_norm": 1.5145408688074757, + "learning_rate": 8.85113347593852e-07, + "loss": 0.45973241329193115, + "step": 4857 + }, + { + "epoch": 1.1201291215125664, + "grad_norm": 1.5400172209521554, + "learning_rate": 8.847346191891157e-07, + "loss": 0.4915385842323303, + "step": 4858 + }, + { + "epoch": 1.1203596956421489, + "grad_norm": 1.4900152202768027, + "learning_rate": 8.843559075393701e-07, + "loss": 0.4457864463329315, + "step": 4859 + }, + { + "epoch": 1.1205902697717316, + "grad_norm": 1.3414730221020197, + "learning_rate": 8.839772126996658e-07, + "loss": 0.4782453775405884, + "step": 4860 + }, + { + "epoch": 1.1208208439013143, + "grad_norm": 1.3591384899787133, + "learning_rate": 8.835985347250492e-07, + "loss": 0.42789584398269653, + "step": 4861 + }, + { + "epoch": 1.121051418030897, + "grad_norm": 1.8532602863182117, + "learning_rate": 8.832198736705657e-07, + "loss": 0.49990910291671753, + "step": 4862 + }, + { + "epoch": 1.1212819921604795, + "grad_norm": 1.4158258863269764, + "learning_rate": 8.828412295912566e-07, + "loss": 0.3735005855560303, + "step": 4863 + }, + { + "epoch": 1.1215125662900622, + "grad_norm": 1.3744374187815367, + "learning_rate": 8.824626025421624e-07, + "loss": 0.402673602104187, + "step": 4864 + }, + { + "epoch": 1.121743140419645, + "grad_norm": 1.57241412674585, + "learning_rate": 8.820839925783198e-07, + "loss": 0.4675491452217102, + "step": 4865 + }, + { + "epoch": 1.1219737145492277, + "grad_norm": 2.0200104658377254, + "learning_rate": 8.817053997547645e-07, + "loss": 0.5098662376403809, + "step": 4866 + }, + { + "epoch": 1.1222042886788102, + "grad_norm": 1.3880207155981488, + "learning_rate": 8.813268241265278e-07, + "loss": 0.44478029012680054, + "step": 4867 + }, + { + "epoch": 1.1224348628083929, + "grad_norm": 1.4983402004688406, + "learning_rate": 8.809482657486401e-07, + "loss": 0.410754919052124, + "step": 4868 + }, + { + "epoch": 1.1226654369379756, + "grad_norm": 1.193726420763111, + "learning_rate": 8.805697246761288e-07, + "loss": 0.4198191165924072, + "step": 4869 + }, + { + "epoch": 1.1228960110675583, + "grad_norm": 1.6015778378598091, + "learning_rate": 8.801912009640178e-07, + "loss": 0.5399911403656006, + "step": 4870 + }, + { + "epoch": 1.1231265851971408, + "grad_norm": 1.3209581029003303, + "learning_rate": 8.798126946673305e-07, + "loss": 0.3879680633544922, + "step": 4871 + }, + { + "epoch": 1.1233571593267235, + "grad_norm": 1.7893299917127135, + "learning_rate": 8.794342058410856e-07, + "loss": 0.4629073739051819, + "step": 4872 + }, + { + "epoch": 1.1235877334563062, + "grad_norm": 1.25180398717926, + "learning_rate": 8.790557345403013e-07, + "loss": 0.42299884557724, + "step": 4873 + }, + { + "epoch": 1.123818307585889, + "grad_norm": 1.5467146262725529, + "learning_rate": 8.786772808199912e-07, + "loss": 0.509437620639801, + "step": 4874 + }, + { + "epoch": 1.1240488817154715, + "grad_norm": 1.3436359029840506, + "learning_rate": 8.782988447351684e-07, + "loss": 0.4682687222957611, + "step": 4875 + }, + { + "epoch": 1.1242794558450542, + "grad_norm": 1.2884743737928093, + "learning_rate": 8.779204263408416e-07, + "loss": 0.41155606508255005, + "step": 4876 + }, + { + "epoch": 1.124510029974637, + "grad_norm": 1.6449136860944156, + "learning_rate": 8.775420256920182e-07, + "loss": 0.4705810844898224, + "step": 4877 + }, + { + "epoch": 1.1247406041042196, + "grad_norm": 1.4648471947605348, + "learning_rate": 8.771636428437022e-07, + "loss": 0.36571264266967773, + "step": 4878 + }, + { + "epoch": 1.124971178233802, + "grad_norm": 1.1768139651906544, + "learning_rate": 8.76785277850896e-07, + "loss": 0.36618396639823914, + "step": 4879 + }, + { + "epoch": 1.1252017523633848, + "grad_norm": 1.5334328638730685, + "learning_rate": 8.764069307685983e-07, + "loss": 0.4861210584640503, + "step": 4880 + }, + { + "epoch": 1.1254323264929675, + "grad_norm": 1.457839206264918, + "learning_rate": 8.760286016518056e-07, + "loss": 0.43346846103668213, + "step": 4881 + }, + { + "epoch": 1.12566290062255, + "grad_norm": 1.28421921022301, + "learning_rate": 8.756502905555123e-07, + "loss": 0.40088707208633423, + "step": 4882 + }, + { + "epoch": 1.1258934747521328, + "grad_norm": 1.4643062187844458, + "learning_rate": 8.752719975347092e-07, + "loss": 0.4088619649410248, + "step": 4883 + }, + { + "epoch": 1.1261240488817155, + "grad_norm": 1.5527291710325282, + "learning_rate": 8.748937226443857e-07, + "loss": 0.4988909661769867, + "step": 4884 + }, + { + "epoch": 1.1263546230112982, + "grad_norm": 1.5377239167998313, + "learning_rate": 8.745154659395271e-07, + "loss": 0.47022196650505066, + "step": 4885 + }, + { + "epoch": 1.126585197140881, + "grad_norm": 1.3259626220698026, + "learning_rate": 8.741372274751178e-07, + "loss": 0.45005398988723755, + "step": 4886 + }, + { + "epoch": 1.1268157712704634, + "grad_norm": 1.5001674672720546, + "learning_rate": 8.737590073061376e-07, + "loss": 0.4632537364959717, + "step": 4887 + }, + { + "epoch": 1.1270463454000461, + "grad_norm": 1.2983235840008036, + "learning_rate": 8.733808054875653e-07, + "loss": 0.41034963726997375, + "step": 4888 + }, + { + "epoch": 1.1272769195296288, + "grad_norm": 1.423352740140202, + "learning_rate": 8.730026220743765e-07, + "loss": 0.5169668793678284, + "step": 4889 + }, + { + "epoch": 1.1275074936592113, + "grad_norm": 1.46630659535839, + "learning_rate": 8.726244571215431e-07, + "loss": 0.44972485303878784, + "step": 4890 + }, + { + "epoch": 1.127738067788794, + "grad_norm": 1.5712937661942725, + "learning_rate": 8.722463106840361e-07, + "loss": 0.4854368567466736, + "step": 4891 + }, + { + "epoch": 1.1279686419183768, + "grad_norm": 1.0525840961962005, + "learning_rate": 8.718681828168223e-07, + "loss": 0.3029147982597351, + "step": 4892 + }, + { + "epoch": 1.1281992160479595, + "grad_norm": 1.5856241308624208, + "learning_rate": 8.714900735748671e-07, + "loss": 0.4770504832267761, + "step": 4893 + }, + { + "epoch": 1.1284297901775422, + "grad_norm": 1.3799690323722245, + "learning_rate": 8.711119830131317e-07, + "loss": 0.48508110642433167, + "step": 4894 + }, + { + "epoch": 1.1286603643071247, + "grad_norm": 1.4227656672873528, + "learning_rate": 8.707339111865761e-07, + "loss": 0.43302488327026367, + "step": 4895 + }, + { + "epoch": 1.1288909384367074, + "grad_norm": 1.3481652076868464, + "learning_rate": 8.703558581501563e-07, + "loss": 0.5720575451850891, + "step": 4896 + }, + { + "epoch": 1.1291215125662901, + "grad_norm": 1.1736572520471924, + "learning_rate": 8.69977823958827e-07, + "loss": 0.48236098885536194, + "step": 4897 + }, + { + "epoch": 1.1293520866958726, + "grad_norm": 1.6539784416028527, + "learning_rate": 8.69599808667538e-07, + "loss": 0.48531901836395264, + "step": 4898 + }, + { + "epoch": 1.1295826608254553, + "grad_norm": 1.390226643422974, + "learning_rate": 8.69221812331239e-07, + "loss": 0.4150174856185913, + "step": 4899 + }, + { + "epoch": 1.129813234955038, + "grad_norm": 1.4594360531114157, + "learning_rate": 8.688438350048748e-07, + "loss": 0.4729560911655426, + "step": 4900 + }, + { + "epoch": 1.1300438090846208, + "grad_norm": 1.5805161631694824, + "learning_rate": 8.684658767433881e-07, + "loss": 0.5081748962402344, + "step": 4901 + }, + { + "epoch": 1.1302743832142035, + "grad_norm": 1.3577399194161552, + "learning_rate": 8.680879376017197e-07, + "loss": 0.4552333354949951, + "step": 4902 + }, + { + "epoch": 1.130504957343786, + "grad_norm": 1.666206186626053, + "learning_rate": 8.67710017634806e-07, + "loss": 0.4784387946128845, + "step": 4903 + }, + { + "epoch": 1.1307355314733687, + "grad_norm": 1.7781011363806714, + "learning_rate": 8.673321168975823e-07, + "loss": 0.46922338008880615, + "step": 4904 + }, + { + "epoch": 1.1309661056029514, + "grad_norm": 1.414520843561148, + "learning_rate": 8.669542354449797e-07, + "loss": 0.38181525468826294, + "step": 4905 + }, + { + "epoch": 1.131196679732534, + "grad_norm": 1.409807627526861, + "learning_rate": 8.665763733319278e-07, + "loss": 0.4729689359664917, + "step": 4906 + }, + { + "epoch": 1.1314272538621166, + "grad_norm": 1.3128859029806206, + "learning_rate": 8.661985306133517e-07, + "loss": 0.3934294581413269, + "step": 4907 + }, + { + "epoch": 1.1316578279916993, + "grad_norm": 1.1525332387894895, + "learning_rate": 8.658207073441754e-07, + "loss": 0.40270352363586426, + "step": 4908 + }, + { + "epoch": 1.131888402121282, + "grad_norm": 1.245477282269021, + "learning_rate": 8.654429035793196e-07, + "loss": 0.43291163444519043, + "step": 4909 + }, + { + "epoch": 1.1321189762508648, + "grad_norm": 1.8011937733870678, + "learning_rate": 8.650651193737009e-07, + "loss": 0.5054877996444702, + "step": 4910 + }, + { + "epoch": 1.1323495503804473, + "grad_norm": 1.4188548576207016, + "learning_rate": 8.646873547822347e-07, + "loss": 0.5043776035308838, + "step": 4911 + }, + { + "epoch": 1.13258012451003, + "grad_norm": 1.511127988179462, + "learning_rate": 8.643096098598328e-07, + "loss": 0.4246225953102112, + "step": 4912 + }, + { + "epoch": 1.1328106986396127, + "grad_norm": 1.3198976342579845, + "learning_rate": 8.639318846614048e-07, + "loss": 0.4514849781990051, + "step": 4913 + }, + { + "epoch": 1.1330412727691952, + "grad_norm": 1.5409054507370947, + "learning_rate": 8.635541792418557e-07, + "loss": 0.4780477285385132, + "step": 4914 + }, + { + "epoch": 1.133271846898778, + "grad_norm": 1.4447509965410514, + "learning_rate": 8.631764936560899e-07, + "loss": 0.47164270281791687, + "step": 4915 + }, + { + "epoch": 1.1335024210283606, + "grad_norm": 1.4642572467177732, + "learning_rate": 8.62798827959007e-07, + "loss": 0.5462276339530945, + "step": 4916 + }, + { + "epoch": 1.1337329951579433, + "grad_norm": 1.3611348332418316, + "learning_rate": 8.624211822055055e-07, + "loss": 0.37229591608047485, + "step": 4917 + }, + { + "epoch": 1.133963569287526, + "grad_norm": 1.6004056206114348, + "learning_rate": 8.620435564504791e-07, + "loss": 0.46595901250839233, + "step": 4918 + }, + { + "epoch": 1.1341941434171086, + "grad_norm": 1.899603419019246, + "learning_rate": 8.616659507488201e-07, + "loss": 0.4645708203315735, + "step": 4919 + }, + { + "epoch": 1.1344247175466913, + "grad_norm": 1.3014565799840314, + "learning_rate": 8.612883651554173e-07, + "loss": 0.4309888482093811, + "step": 4920 + }, + { + "epoch": 1.134655291676274, + "grad_norm": 1.2254662174184374, + "learning_rate": 8.60910799725156e-07, + "loss": 0.4000548720359802, + "step": 4921 + }, + { + "epoch": 1.1348858658058565, + "grad_norm": 1.2990272231335294, + "learning_rate": 8.6053325451292e-07, + "loss": 0.41321274638175964, + "step": 4922 + }, + { + "epoch": 1.1351164399354392, + "grad_norm": 1.7479036509525407, + "learning_rate": 8.601557295735884e-07, + "loss": 0.38982951641082764, + "step": 4923 + }, + { + "epoch": 1.135347014065022, + "grad_norm": 1.3265126570648142, + "learning_rate": 8.597782249620394e-07, + "loss": 0.44623300433158875, + "step": 4924 + }, + { + "epoch": 1.1355775881946046, + "grad_norm": 1.6004563551212632, + "learning_rate": 8.594007407331458e-07, + "loss": 0.46876993775367737, + "step": 4925 + }, + { + "epoch": 1.1358081623241871, + "grad_norm": 1.4785026933128127, + "learning_rate": 8.590232769417803e-07, + "loss": 0.41345149278640747, + "step": 4926 + }, + { + "epoch": 1.1360387364537698, + "grad_norm": 1.6712340860086734, + "learning_rate": 8.586458336428095e-07, + "loss": 0.4199402332305908, + "step": 4927 + }, + { + "epoch": 1.1362693105833526, + "grad_norm": 1.5807454346525946, + "learning_rate": 8.582684108910998e-07, + "loss": 0.4424753785133362, + "step": 4928 + }, + { + "epoch": 1.1364998847129353, + "grad_norm": 1.5318763722061228, + "learning_rate": 8.57891008741513e-07, + "loss": 0.5066598057746887, + "step": 4929 + }, + { + "epoch": 1.1367304588425178, + "grad_norm": 1.409045447069904, + "learning_rate": 8.575136272489081e-07, + "loss": 0.45959407091140747, + "step": 4930 + }, + { + "epoch": 1.1369610329721005, + "grad_norm": 1.191773933725539, + "learning_rate": 8.571362664681415e-07, + "loss": 0.4579051733016968, + "step": 4931 + }, + { + "epoch": 1.1371916071016832, + "grad_norm": 1.4061203144708347, + "learning_rate": 8.567589264540665e-07, + "loss": 0.5125559568405151, + "step": 4932 + }, + { + "epoch": 1.137422181231266, + "grad_norm": 1.484125992313306, + "learning_rate": 8.563816072615335e-07, + "loss": 0.4236595630645752, + "step": 4933 + }, + { + "epoch": 1.1376527553608484, + "grad_norm": 1.3909472723060943, + "learning_rate": 8.56004308945389e-07, + "loss": 0.40187013149261475, + "step": 4934 + }, + { + "epoch": 1.1378833294904311, + "grad_norm": 1.7306785223672838, + "learning_rate": 8.556270315604778e-07, + "loss": 0.5069487690925598, + "step": 4935 + }, + { + "epoch": 1.1381139036200139, + "grad_norm": 1.2666499948179348, + "learning_rate": 8.552497751616406e-07, + "loss": 0.4032680094242096, + "step": 4936 + }, + { + "epoch": 1.1383444777495966, + "grad_norm": 1.5147949059405765, + "learning_rate": 8.548725398037158e-07, + "loss": 0.4745323061943054, + "step": 4937 + }, + { + "epoch": 1.138575051879179, + "grad_norm": 1.6025857024716508, + "learning_rate": 8.544953255415379e-07, + "loss": 0.5203470587730408, + "step": 4938 + }, + { + "epoch": 1.1388056260087618, + "grad_norm": 1.3018365690111693, + "learning_rate": 8.541181324299392e-07, + "loss": 0.3780457079410553, + "step": 4939 + }, + { + "epoch": 1.1390362001383445, + "grad_norm": 1.4908739703097478, + "learning_rate": 8.537409605237486e-07, + "loss": 0.4544069766998291, + "step": 4940 + }, + { + "epoch": 1.1392667742679272, + "grad_norm": 1.3726631913286653, + "learning_rate": 8.533638098777914e-07, + "loss": 0.3692469000816345, + "step": 4941 + }, + { + "epoch": 1.1394973483975097, + "grad_norm": 1.7461198015621147, + "learning_rate": 8.529866805468907e-07, + "loss": 0.4733508825302124, + "step": 4942 + }, + { + "epoch": 1.1397279225270924, + "grad_norm": 1.7055847796006547, + "learning_rate": 8.526095725858658e-07, + "loss": 0.5165152549743652, + "step": 4943 + }, + { + "epoch": 1.1399584966566751, + "grad_norm": 1.5781652989183093, + "learning_rate": 8.522324860495336e-07, + "loss": 0.40220290422439575, + "step": 4944 + }, + { + "epoch": 1.1401890707862579, + "grad_norm": 1.676524129553008, + "learning_rate": 8.518554209927066e-07, + "loss": 0.511976957321167, + "step": 4945 + }, + { + "epoch": 1.1404196449158404, + "grad_norm": 1.4578766238891505, + "learning_rate": 8.514783774701959e-07, + "loss": 0.4472247362136841, + "step": 4946 + }, + { + "epoch": 1.140650219045423, + "grad_norm": 1.3731717985494665, + "learning_rate": 8.51101355536808e-07, + "loss": 0.4368797242641449, + "step": 4947 + }, + { + "epoch": 1.1408807931750058, + "grad_norm": 1.3383514367818596, + "learning_rate": 8.507243552473476e-07, + "loss": 0.3794320225715637, + "step": 4948 + }, + { + "epoch": 1.1411113673045885, + "grad_norm": 1.7604514892248042, + "learning_rate": 8.50347376656615e-07, + "loss": 0.5229817628860474, + "step": 4949 + }, + { + "epoch": 1.141341941434171, + "grad_norm": 1.4803188344976619, + "learning_rate": 8.499704198194075e-07, + "loss": 0.4771326780319214, + "step": 4950 + }, + { + "epoch": 1.1415725155637537, + "grad_norm": 1.406078110966921, + "learning_rate": 8.495934847905201e-07, + "loss": 0.45151978731155396, + "step": 4951 + }, + { + "epoch": 1.1418030896933364, + "grad_norm": 1.3579359781108167, + "learning_rate": 8.492165716247439e-07, + "loss": 0.3963208496570587, + "step": 4952 + }, + { + "epoch": 1.1420336638229192, + "grad_norm": 1.2797227148111936, + "learning_rate": 8.488396803768675e-07, + "loss": 0.37465882301330566, + "step": 4953 + }, + { + "epoch": 1.1422642379525016, + "grad_norm": 1.7257432451816517, + "learning_rate": 8.484628111016752e-07, + "loss": 0.437372088432312, + "step": 4954 + }, + { + "epoch": 1.1424948120820844, + "grad_norm": 1.3198726990576308, + "learning_rate": 8.480859638539492e-07, + "loss": 0.40495651960372925, + "step": 4955 + }, + { + "epoch": 1.142725386211667, + "grad_norm": 1.5937176142563847, + "learning_rate": 8.477091386884677e-07, + "loss": 0.5346927642822266, + "step": 4956 + }, + { + "epoch": 1.1429559603412498, + "grad_norm": 1.7035083737998966, + "learning_rate": 8.473323356600068e-07, + "loss": 0.42448925971984863, + "step": 4957 + }, + { + "epoch": 1.1431865344708323, + "grad_norm": 1.4329878189218077, + "learning_rate": 8.469555548233378e-07, + "loss": 0.4715193808078766, + "step": 4958 + }, + { + "epoch": 1.143417108600415, + "grad_norm": 1.5249370547485697, + "learning_rate": 8.465787962332301e-07, + "loss": 0.4721440076828003, + "step": 4959 + }, + { + "epoch": 1.1436476827299977, + "grad_norm": 1.4963659204960478, + "learning_rate": 8.462020599444495e-07, + "loss": 0.5478333234786987, + "step": 4960 + }, + { + "epoch": 1.1438782568595804, + "grad_norm": 1.5534391969085817, + "learning_rate": 8.458253460117577e-07, + "loss": 0.4005582928657532, + "step": 4961 + }, + { + "epoch": 1.144108830989163, + "grad_norm": 1.4816205297794078, + "learning_rate": 8.454486544899146e-07, + "loss": 0.43886178731918335, + "step": 4962 + }, + { + "epoch": 1.1443394051187457, + "grad_norm": 1.2296294541393762, + "learning_rate": 8.450719854336758e-07, + "loss": 0.4404095709323883, + "step": 4963 + }, + { + "epoch": 1.1445699792483284, + "grad_norm": 1.5412493838775327, + "learning_rate": 8.446953388977943e-07, + "loss": 0.5386335849761963, + "step": 4964 + }, + { + "epoch": 1.144800553377911, + "grad_norm": 1.5969922474986569, + "learning_rate": 8.44318714937019e-07, + "loss": 0.4576258659362793, + "step": 4965 + }, + { + "epoch": 1.1450311275074936, + "grad_norm": 1.2968718824878773, + "learning_rate": 8.439421136060964e-07, + "loss": 0.4619024991989136, + "step": 4966 + }, + { + "epoch": 1.1452617016370763, + "grad_norm": 1.4106895392209726, + "learning_rate": 8.435655349597689e-07, + "loss": 0.4071081876754761, + "step": 4967 + }, + { + "epoch": 1.145492275766659, + "grad_norm": 1.3534750631649812, + "learning_rate": 8.431889790527769e-07, + "loss": 0.4605948328971863, + "step": 4968 + }, + { + "epoch": 1.1457228498962417, + "grad_norm": 1.4715761177473734, + "learning_rate": 8.428124459398554e-07, + "loss": 0.46706438064575195, + "step": 4969 + }, + { + "epoch": 1.1459534240258242, + "grad_norm": 1.480784825415981, + "learning_rate": 8.424359356757383e-07, + "loss": 0.39674657583236694, + "step": 4970 + }, + { + "epoch": 1.146183998155407, + "grad_norm": 1.4606371633345823, + "learning_rate": 8.42059448315155e-07, + "loss": 0.4421246647834778, + "step": 4971 + }, + { + "epoch": 1.1464145722849897, + "grad_norm": 1.6921922922853865, + "learning_rate": 8.416829839128312e-07, + "loss": 0.5220682621002197, + "step": 4972 + }, + { + "epoch": 1.1466451464145724, + "grad_norm": 1.338254387958773, + "learning_rate": 8.413065425234904e-07, + "loss": 0.40189129114151, + "step": 4973 + }, + { + "epoch": 1.1468757205441549, + "grad_norm": 1.3011913252808138, + "learning_rate": 8.409301242018517e-07, + "loss": 0.448421835899353, + "step": 4974 + }, + { + "epoch": 1.1471062946737376, + "grad_norm": 1.5996651322296722, + "learning_rate": 8.405537290026318e-07, + "loss": 0.49476757645606995, + "step": 4975 + }, + { + "epoch": 1.1473368688033203, + "grad_norm": 1.4573872381246367, + "learning_rate": 8.401773569805431e-07, + "loss": 0.3888528347015381, + "step": 4976 + }, + { + "epoch": 1.1475674429329028, + "grad_norm": 1.4760563096119323, + "learning_rate": 8.398010081902956e-07, + "loss": 0.49057653546333313, + "step": 4977 + }, + { + "epoch": 1.1477980170624855, + "grad_norm": 1.3851559333900214, + "learning_rate": 8.39424682686595e-07, + "loss": 0.41700610518455505, + "step": 4978 + }, + { + "epoch": 1.1480285911920682, + "grad_norm": 1.5382531029836037, + "learning_rate": 8.390483805241441e-07, + "loss": 0.4801902770996094, + "step": 4979 + }, + { + "epoch": 1.148259165321651, + "grad_norm": 1.5691797878096674, + "learning_rate": 8.386721017576426e-07, + "loss": 0.5438926219940186, + "step": 4980 + }, + { + "epoch": 1.1484897394512337, + "grad_norm": 1.3886510011393631, + "learning_rate": 8.382958464417857e-07, + "loss": 0.3991735577583313, + "step": 4981 + }, + { + "epoch": 1.1487203135808162, + "grad_norm": 1.5064271527131172, + "learning_rate": 8.379196146312664e-07, + "loss": 0.4918370246887207, + "step": 4982 + }, + { + "epoch": 1.1489508877103989, + "grad_norm": 1.713149481922198, + "learning_rate": 8.375434063807737e-07, + "loss": 0.5280467867851257, + "step": 4983 + }, + { + "epoch": 1.1491814618399816, + "grad_norm": 1.2990876069782782, + "learning_rate": 8.371672217449936e-07, + "loss": 0.4186179041862488, + "step": 4984 + }, + { + "epoch": 1.149412035969564, + "grad_norm": 1.3742464834005608, + "learning_rate": 8.367910607786079e-07, + "loss": 0.3698224723339081, + "step": 4985 + }, + { + "epoch": 1.1496426100991468, + "grad_norm": 1.4766762383505605, + "learning_rate": 8.364149235362956e-07, + "loss": 0.45402267575263977, + "step": 4986 + }, + { + "epoch": 1.1498731842287295, + "grad_norm": 1.530758978566143, + "learning_rate": 8.36038810072732e-07, + "loss": 0.5145484209060669, + "step": 4987 + }, + { + "epoch": 1.1501037583583122, + "grad_norm": 1.2257671687651395, + "learning_rate": 8.356627204425893e-07, + "loss": 0.4293951392173767, + "step": 4988 + }, + { + "epoch": 1.150334332487895, + "grad_norm": 1.5415847348488914, + "learning_rate": 8.352866547005354e-07, + "loss": 0.3916272521018982, + "step": 4989 + }, + { + "epoch": 1.1505649066174775, + "grad_norm": 1.6777087516004896, + "learning_rate": 8.349106129012357e-07, + "loss": 0.40171611309051514, + "step": 4990 + }, + { + "epoch": 1.1507954807470602, + "grad_norm": 1.5767244212385862, + "learning_rate": 8.345345950993518e-07, + "loss": 0.49580252170562744, + "step": 4991 + }, + { + "epoch": 1.151026054876643, + "grad_norm": 1.491822308561489, + "learning_rate": 8.34158601349541e-07, + "loss": 0.4521256685256958, + "step": 4992 + }, + { + "epoch": 1.1512566290062254, + "grad_norm": 1.5317445246777317, + "learning_rate": 8.337826317064585e-07, + "loss": 0.3920813798904419, + "step": 4993 + }, + { + "epoch": 1.151487203135808, + "grad_norm": 1.4336055128806646, + "learning_rate": 8.334066862247547e-07, + "loss": 0.4263145923614502, + "step": 4994 + }, + { + "epoch": 1.1517177772653908, + "grad_norm": 1.513949850078891, + "learning_rate": 8.330307649590779e-07, + "loss": 0.4746140241622925, + "step": 4995 + }, + { + "epoch": 1.1519483513949735, + "grad_norm": 1.6708741885004843, + "learning_rate": 8.326548679640713e-07, + "loss": 0.37520158290863037, + "step": 4996 + }, + { + "epoch": 1.1521789255245563, + "grad_norm": 1.4060610690176367, + "learning_rate": 8.322789952943759e-07, + "loss": 0.4481951892375946, + "step": 4997 + }, + { + "epoch": 1.1524094996541387, + "grad_norm": 1.4336851088246751, + "learning_rate": 8.319031470046281e-07, + "loss": 0.40319859981536865, + "step": 4998 + }, + { + "epoch": 1.1526400737837215, + "grad_norm": 1.805948160607668, + "learning_rate": 8.315273231494615e-07, + "loss": 0.47720152139663696, + "step": 4999 + }, + { + "epoch": 1.1528706479133042, + "grad_norm": 1.2994404231083814, + "learning_rate": 8.311515237835063e-07, + "loss": 0.4027557969093323, + "step": 5000 + }, + { + "epoch": 1.1531012220428867, + "grad_norm": 1.5346692874582604, + "learning_rate": 8.307757489613878e-07, + "loss": 0.3939552307128906, + "step": 5001 + }, + { + "epoch": 1.1533317961724694, + "grad_norm": 1.541801101637957, + "learning_rate": 8.303999987377295e-07, + "loss": 0.379425585269928, + "step": 5002 + }, + { + "epoch": 1.153562370302052, + "grad_norm": 1.3222707927494204, + "learning_rate": 8.300242731671499e-07, + "loss": 0.46231499314308167, + "step": 5003 + }, + { + "epoch": 1.1537929444316348, + "grad_norm": 1.5623820882279815, + "learning_rate": 8.296485723042654e-07, + "loss": 0.4639621675014496, + "step": 5004 + }, + { + "epoch": 1.1540235185612175, + "grad_norm": 1.4577901713449948, + "learning_rate": 8.29272896203687e-07, + "loss": 0.49264025688171387, + "step": 5005 + }, + { + "epoch": 1.1542540926908, + "grad_norm": 1.2796677798690286, + "learning_rate": 8.288972449200233e-07, + "loss": 0.4145156145095825, + "step": 5006 + }, + { + "epoch": 1.1544846668203828, + "grad_norm": 1.3338594060824709, + "learning_rate": 8.285216185078792e-07, + "loss": 0.39693811535835266, + "step": 5007 + }, + { + "epoch": 1.1547152409499655, + "grad_norm": 1.356694069152444, + "learning_rate": 8.281460170218561e-07, + "loss": 0.46224820613861084, + "step": 5008 + }, + { + "epoch": 1.154945815079548, + "grad_norm": 1.5380330607680774, + "learning_rate": 8.277704405165506e-07, + "loss": 0.48868128657341003, + "step": 5009 + }, + { + "epoch": 1.1551763892091307, + "grad_norm": 1.4024811483349113, + "learning_rate": 8.273948890465574e-07, + "loss": 0.5127776265144348, + "step": 5010 + }, + { + "epoch": 1.1554069633387134, + "grad_norm": 1.4092381840768406, + "learning_rate": 8.270193626664665e-07, + "loss": 0.4039389491081238, + "step": 5011 + }, + { + "epoch": 1.1556375374682961, + "grad_norm": 1.5807780806971976, + "learning_rate": 8.266438614308641e-07, + "loss": 0.4224502444267273, + "step": 5012 + }, + { + "epoch": 1.1558681115978788, + "grad_norm": 1.42726619115002, + "learning_rate": 8.262683853943335e-07, + "loss": 0.4392918050289154, + "step": 5013 + }, + { + "epoch": 1.1560986857274613, + "grad_norm": 1.5001771531608157, + "learning_rate": 8.258929346114534e-07, + "loss": 0.5055289268493652, + "step": 5014 + }, + { + "epoch": 1.156329259857044, + "grad_norm": 1.3839083181087675, + "learning_rate": 8.255175091368003e-07, + "loss": 0.43851351737976074, + "step": 5015 + }, + { + "epoch": 1.1565598339866268, + "grad_norm": 1.576893376736649, + "learning_rate": 8.251421090249451e-07, + "loss": 0.4557814598083496, + "step": 5016 + }, + { + "epoch": 1.1567904081162093, + "grad_norm": 1.2994912796642604, + "learning_rate": 8.247667343304568e-07, + "loss": 0.4288882613182068, + "step": 5017 + }, + { + "epoch": 1.157020982245792, + "grad_norm": 1.4237104241903844, + "learning_rate": 8.243913851078994e-07, + "loss": 0.42711886763572693, + "step": 5018 + }, + { + "epoch": 1.1572515563753747, + "grad_norm": 1.8597293679946851, + "learning_rate": 8.240160614118342e-07, + "loss": 0.515809953212738, + "step": 5019 + }, + { + "epoch": 1.1574821305049574, + "grad_norm": 1.828777504717114, + "learning_rate": 8.236407632968182e-07, + "loss": 0.5754632949829102, + "step": 5020 + }, + { + "epoch": 1.1577127046345401, + "grad_norm": 1.553176542229762, + "learning_rate": 8.232654908174038e-07, + "loss": 0.4601830244064331, + "step": 5021 + }, + { + "epoch": 1.1579432787641226, + "grad_norm": 1.500802040492981, + "learning_rate": 8.228902440281422e-07, + "loss": 0.4740797281265259, + "step": 5022 + }, + { + "epoch": 1.1581738528937053, + "grad_norm": 1.688304974088827, + "learning_rate": 8.225150229835781e-07, + "loss": 0.4066367745399475, + "step": 5023 + }, + { + "epoch": 1.158404427023288, + "grad_norm": 1.357187761009418, + "learning_rate": 8.221398277382546e-07, + "loss": 0.4664362668991089, + "step": 5024 + }, + { + "epoch": 1.1586350011528705, + "grad_norm": 1.3912425171719864, + "learning_rate": 8.217646583467093e-07, + "loss": 0.5204637050628662, + "step": 5025 + }, + { + "epoch": 1.1588655752824533, + "grad_norm": 1.4227227145637968, + "learning_rate": 8.213895148634775e-07, + "loss": 0.4991419017314911, + "step": 5026 + }, + { + "epoch": 1.159096149412036, + "grad_norm": 1.2844880437163813, + "learning_rate": 8.210143973430896e-07, + "loss": 0.40420424938201904, + "step": 5027 + }, + { + "epoch": 1.1593267235416187, + "grad_norm": 1.4946107412544847, + "learning_rate": 8.206393058400736e-07, + "loss": 0.523331880569458, + "step": 5028 + }, + { + "epoch": 1.1595572976712014, + "grad_norm": 1.4908780499938201, + "learning_rate": 8.202642404089516e-07, + "loss": 0.5019216537475586, + "step": 5029 + }, + { + "epoch": 1.159787871800784, + "grad_norm": 1.6451488656605473, + "learning_rate": 8.198892011042442e-07, + "loss": 0.522672712802887, + "step": 5030 + }, + { + "epoch": 1.1600184459303666, + "grad_norm": 1.505727418733034, + "learning_rate": 8.195141879804668e-07, + "loss": 0.418377548456192, + "step": 5031 + }, + { + "epoch": 1.1602490200599493, + "grad_norm": 1.5635210393713965, + "learning_rate": 8.191392010921312e-07, + "loss": 0.4914432764053345, + "step": 5032 + }, + { + "epoch": 1.1604795941895318, + "grad_norm": 1.3929576184838368, + "learning_rate": 8.187642404937459e-07, + "loss": 0.42149683833122253, + "step": 5033 + }, + { + "epoch": 1.1607101683191146, + "grad_norm": 1.6811040317548793, + "learning_rate": 8.183893062398145e-07, + "loss": 0.5637058019638062, + "step": 5034 + }, + { + "epoch": 1.1609407424486973, + "grad_norm": 1.2252559322458123, + "learning_rate": 8.180143983848387e-07, + "loss": 0.49930211901664734, + "step": 5035 + }, + { + "epoch": 1.16117131657828, + "grad_norm": 1.626369547940987, + "learning_rate": 8.176395169833139e-07, + "loss": 0.4217071235179901, + "step": 5036 + }, + { + "epoch": 1.1614018907078625, + "grad_norm": 1.9654976691842632, + "learning_rate": 8.172646620897336e-07, + "loss": 0.4208733141422272, + "step": 5037 + }, + { + "epoch": 1.1616324648374452, + "grad_norm": 1.434216808832, + "learning_rate": 8.168898337585866e-07, + "loss": 0.42970529198646545, + "step": 5038 + }, + { + "epoch": 1.161863038967028, + "grad_norm": 1.429859410744686, + "learning_rate": 8.165150320443584e-07, + "loss": 0.49482622742652893, + "step": 5039 + }, + { + "epoch": 1.1620936130966106, + "grad_norm": 1.2888747437309156, + "learning_rate": 8.161402570015297e-07, + "loss": 0.4106384217739105, + "step": 5040 + }, + { + "epoch": 1.1623241872261931, + "grad_norm": 1.8632515092828725, + "learning_rate": 8.157655086845778e-07, + "loss": 0.4550397992134094, + "step": 5041 + }, + { + "epoch": 1.1625547613557758, + "grad_norm": 1.4636128502892785, + "learning_rate": 8.153907871479768e-07, + "loss": 0.5144504308700562, + "step": 5042 + }, + { + "epoch": 1.1627853354853586, + "grad_norm": 1.4308354935014596, + "learning_rate": 8.150160924461953e-07, + "loss": 0.3970009684562683, + "step": 5043 + }, + { + "epoch": 1.1630159096149413, + "grad_norm": 1.4674063038688332, + "learning_rate": 8.146414246336998e-07, + "loss": 0.45825856924057007, + "step": 5044 + }, + { + "epoch": 1.1632464837445238, + "grad_norm": 1.6850972190756333, + "learning_rate": 8.142667837649515e-07, + "loss": 0.4515247344970703, + "step": 5045 + }, + { + "epoch": 1.1634770578741065, + "grad_norm": 1.347770803032681, + "learning_rate": 8.13892169894409e-07, + "loss": 0.41265833377838135, + "step": 5046 + }, + { + "epoch": 1.1637076320036892, + "grad_norm": 1.4117996459358377, + "learning_rate": 8.135175830765254e-07, + "loss": 0.39820557832717896, + "step": 5047 + }, + { + "epoch": 1.163938206133272, + "grad_norm": 1.4272016239744356, + "learning_rate": 8.131430233657514e-07, + "loss": 0.41528987884521484, + "step": 5048 + }, + { + "epoch": 1.1641687802628544, + "grad_norm": 1.3404996701874776, + "learning_rate": 8.127684908165323e-07, + "loss": 0.4453636407852173, + "step": 5049 + }, + { + "epoch": 1.1643993543924371, + "grad_norm": 1.846029547761043, + "learning_rate": 8.123939854833107e-07, + "loss": 0.45008519291877747, + "step": 5050 + }, + { + "epoch": 1.1646299285220199, + "grad_norm": 1.7254544812081525, + "learning_rate": 8.120195074205249e-07, + "loss": 0.456550657749176, + "step": 5051 + }, + { + "epoch": 1.1648605026516026, + "grad_norm": 1.4455041595835194, + "learning_rate": 8.116450566826086e-07, + "loss": 0.44465887546539307, + "step": 5052 + }, + { + "epoch": 1.165091076781185, + "grad_norm": 1.4606872040412728, + "learning_rate": 8.112706333239923e-07, + "loss": 0.4769172668457031, + "step": 5053 + }, + { + "epoch": 1.1653216509107678, + "grad_norm": 1.5800176181940382, + "learning_rate": 8.108962373991019e-07, + "loss": 0.42662739753723145, + "step": 5054 + }, + { + "epoch": 1.1655522250403505, + "grad_norm": 1.533727299161298, + "learning_rate": 8.105218689623603e-07, + "loss": 0.4923250079154968, + "step": 5055 + }, + { + "epoch": 1.1657827991699332, + "grad_norm": 1.5783599756682145, + "learning_rate": 8.10147528068185e-07, + "loss": 0.42462587356567383, + "step": 5056 + }, + { + "epoch": 1.1660133732995157, + "grad_norm": 1.3458818448335859, + "learning_rate": 8.097732147709908e-07, + "loss": 0.47610223293304443, + "step": 5057 + }, + { + "epoch": 1.1662439474290984, + "grad_norm": 1.6207397386125497, + "learning_rate": 8.093989291251875e-07, + "loss": 0.47519630193710327, + "step": 5058 + }, + { + "epoch": 1.1664745215586811, + "grad_norm": 1.3901575117179885, + "learning_rate": 8.090246711851819e-07, + "loss": 0.38865840435028076, + "step": 5059 + }, + { + "epoch": 1.1667050956882639, + "grad_norm": 1.271312682478528, + "learning_rate": 8.086504410053757e-07, + "loss": 0.39990776777267456, + "step": 5060 + }, + { + "epoch": 1.1669356698178464, + "grad_norm": 1.4665951386644982, + "learning_rate": 8.082762386401669e-07, + "loss": 0.4330836534500122, + "step": 5061 + }, + { + "epoch": 1.167166243947429, + "grad_norm": 1.286707043518209, + "learning_rate": 8.079020641439504e-07, + "loss": 0.4285934865474701, + "step": 5062 + }, + { + "epoch": 1.1673968180770118, + "grad_norm": 1.7499199825760443, + "learning_rate": 8.075279175711152e-07, + "loss": 0.3900645077228546, + "step": 5063 + }, + { + "epoch": 1.1676273922065945, + "grad_norm": 1.3606445329404238, + "learning_rate": 8.07153798976048e-07, + "loss": 0.48145759105682373, + "step": 5064 + }, + { + "epoch": 1.167857966336177, + "grad_norm": 1.7592322949259351, + "learning_rate": 8.067797084131305e-07, + "loss": 0.4239045977592468, + "step": 5065 + }, + { + "epoch": 1.1680885404657597, + "grad_norm": 1.7501505795878665, + "learning_rate": 8.064056459367409e-07, + "loss": 0.55517578125, + "step": 5066 + }, + { + "epoch": 1.1683191145953424, + "grad_norm": 1.588400616006081, + "learning_rate": 8.060316116012524e-07, + "loss": 0.4956046938896179, + "step": 5067 + }, + { + "epoch": 1.1685496887249252, + "grad_norm": 1.3607022789051413, + "learning_rate": 8.05657605461035e-07, + "loss": 0.4051878750324249, + "step": 5068 + }, + { + "epoch": 1.1687802628545076, + "grad_norm": 1.6471264462607456, + "learning_rate": 8.052836275704541e-07, + "loss": 0.47389912605285645, + "step": 5069 + }, + { + "epoch": 1.1690108369840904, + "grad_norm": 1.3462872241997197, + "learning_rate": 8.049096779838717e-07, + "loss": 0.5023842453956604, + "step": 5070 + }, + { + "epoch": 1.169241411113673, + "grad_norm": 1.3943514778037218, + "learning_rate": 8.045357567556449e-07, + "loss": 0.4895137548446655, + "step": 5071 + }, + { + "epoch": 1.1694719852432558, + "grad_norm": 1.5328176046123796, + "learning_rate": 8.041618639401264e-07, + "loss": 0.47874224185943604, + "step": 5072 + }, + { + "epoch": 1.1697025593728383, + "grad_norm": 1.4666773972258982, + "learning_rate": 8.037879995916659e-07, + "loss": 0.4784395694732666, + "step": 5073 + }, + { + "epoch": 1.169933133502421, + "grad_norm": 1.4433652991816976, + "learning_rate": 8.034141637646079e-07, + "loss": 0.45289772748947144, + "step": 5074 + }, + { + "epoch": 1.1701637076320037, + "grad_norm": 1.931933746015264, + "learning_rate": 8.030403565132942e-07, + "loss": 0.5375204682350159, + "step": 5075 + }, + { + "epoch": 1.1703942817615864, + "grad_norm": 1.4956339972756536, + "learning_rate": 8.026665778920602e-07, + "loss": 0.45003899931907654, + "step": 5076 + }, + { + "epoch": 1.170624855891169, + "grad_norm": 1.348037979358877, + "learning_rate": 8.022928279552392e-07, + "loss": 0.4236389994621277, + "step": 5077 + }, + { + "epoch": 1.1708554300207517, + "grad_norm": 1.3333943245649609, + "learning_rate": 8.019191067571592e-07, + "loss": 0.43182557821273804, + "step": 5078 + }, + { + "epoch": 1.1710860041503344, + "grad_norm": 1.7521692166476222, + "learning_rate": 8.01545414352145e-07, + "loss": 0.5171953439712524, + "step": 5079 + }, + { + "epoch": 1.171316578279917, + "grad_norm": 1.5319548219026522, + "learning_rate": 8.011717507945157e-07, + "loss": 0.5084770321846008, + "step": 5080 + }, + { + "epoch": 1.1715471524094996, + "grad_norm": 1.6342595542262888, + "learning_rate": 8.007981161385876e-07, + "loss": 0.4685532748699188, + "step": 5081 + }, + { + "epoch": 1.1717777265390823, + "grad_norm": 1.5086552244362486, + "learning_rate": 8.004245104386724e-07, + "loss": 0.4647448658943176, + "step": 5082 + }, + { + "epoch": 1.172008300668665, + "grad_norm": 1.4914913702780284, + "learning_rate": 8.000509337490768e-07, + "loss": 0.4038098454475403, + "step": 5083 + }, + { + "epoch": 1.1722388747982477, + "grad_norm": 1.435384500623052, + "learning_rate": 7.996773861241047e-07, + "loss": 0.4153759479522705, + "step": 5084 + }, + { + "epoch": 1.1724694489278302, + "grad_norm": 1.5573715225755111, + "learning_rate": 7.993038676180545e-07, + "loss": 0.4569447636604309, + "step": 5085 + }, + { + "epoch": 1.172700023057413, + "grad_norm": 1.4307958679817, + "learning_rate": 7.989303782852215e-07, + "loss": 0.4419426918029785, + "step": 5086 + }, + { + "epoch": 1.1729305971869957, + "grad_norm": 1.4177391878017933, + "learning_rate": 7.985569181798955e-07, + "loss": 0.3902894854545593, + "step": 5087 + }, + { + "epoch": 1.1731611713165782, + "grad_norm": 1.3935681641299988, + "learning_rate": 7.981834873563631e-07, + "loss": 0.4066358208656311, + "step": 5088 + }, + { + "epoch": 1.1733917454461609, + "grad_norm": 1.579270038843054, + "learning_rate": 7.978100858689059e-07, + "loss": 0.4589639902114868, + "step": 5089 + }, + { + "epoch": 1.1736223195757436, + "grad_norm": 1.5868805646941586, + "learning_rate": 7.974367137718024e-07, + "loss": 0.4431188106536865, + "step": 5090 + }, + { + "epoch": 1.1738528937053263, + "grad_norm": 1.3420666663317198, + "learning_rate": 7.970633711193252e-07, + "loss": 0.43412742018699646, + "step": 5091 + }, + { + "epoch": 1.174083467834909, + "grad_norm": 1.360898150528172, + "learning_rate": 7.966900579657435e-07, + "loss": 0.40296387672424316, + "step": 5092 + }, + { + "epoch": 1.1743140419644915, + "grad_norm": 1.4702894316239854, + "learning_rate": 7.963167743653228e-07, + "loss": 0.4814741611480713, + "step": 5093 + }, + { + "epoch": 1.1745446160940742, + "grad_norm": 1.7678935112109417, + "learning_rate": 7.959435203723228e-07, + "loss": 0.4412423372268677, + "step": 5094 + }, + { + "epoch": 1.174775190223657, + "grad_norm": 1.698823813376211, + "learning_rate": 7.955702960410006e-07, + "loss": 0.49773266911506653, + "step": 5095 + }, + { + "epoch": 1.1750057643532394, + "grad_norm": 1.445996901779518, + "learning_rate": 7.951971014256073e-07, + "loss": 0.4657529592514038, + "step": 5096 + }, + { + "epoch": 1.1752363384828222, + "grad_norm": 1.4844953949134, + "learning_rate": 7.94823936580391e-07, + "loss": 0.4062782824039459, + "step": 5097 + }, + { + "epoch": 1.1754669126124049, + "grad_norm": 1.3280643963390701, + "learning_rate": 7.944508015595948e-07, + "loss": 0.4154980182647705, + "step": 5098 + }, + { + "epoch": 1.1756974867419876, + "grad_norm": 1.3235405382692107, + "learning_rate": 7.940776964174582e-07, + "loss": 0.4724680185317993, + "step": 5099 + }, + { + "epoch": 1.1759280608715703, + "grad_norm": 1.4212228031547876, + "learning_rate": 7.937046212082149e-07, + "loss": 0.48808538913726807, + "step": 5100 + }, + { + "epoch": 1.1761586350011528, + "grad_norm": 1.3949555418133748, + "learning_rate": 7.933315759860959e-07, + "loss": 0.4985845983028412, + "step": 5101 + }, + { + "epoch": 1.1763892091307355, + "grad_norm": 1.2192149824969183, + "learning_rate": 7.92958560805327e-07, + "loss": 0.3735587000846863, + "step": 5102 + }, + { + "epoch": 1.1766197832603182, + "grad_norm": 1.3793872147262238, + "learning_rate": 7.925855757201294e-07, + "loss": 0.4198414385318756, + "step": 5103 + }, + { + "epoch": 1.1768503573899007, + "grad_norm": 1.7231390796467927, + "learning_rate": 7.922126207847204e-07, + "loss": 0.41973787546157837, + "step": 5104 + }, + { + "epoch": 1.1770809315194835, + "grad_norm": 1.8258365265115961, + "learning_rate": 7.918396960533128e-07, + "loss": 0.5179545283317566, + "step": 5105 + }, + { + "epoch": 1.1773115056490662, + "grad_norm": 1.5757377934881964, + "learning_rate": 7.914668015801153e-07, + "loss": 0.4917227625846863, + "step": 5106 + }, + { + "epoch": 1.1775420797786489, + "grad_norm": 1.5132865673859617, + "learning_rate": 7.910939374193312e-07, + "loss": 0.41775548458099365, + "step": 5107 + }, + { + "epoch": 1.1777726539082316, + "grad_norm": 1.484971286444874, + "learning_rate": 7.907211036251608e-07, + "loss": 0.45468997955322266, + "step": 5108 + }, + { + "epoch": 1.178003228037814, + "grad_norm": 1.292166499414124, + "learning_rate": 7.903483002517988e-07, + "loss": 0.3749620318412781, + "step": 5109 + }, + { + "epoch": 1.1782338021673968, + "grad_norm": 1.3945828421286317, + "learning_rate": 7.899755273534365e-07, + "loss": 0.48940956592559814, + "step": 5110 + }, + { + "epoch": 1.1784643762969795, + "grad_norm": 1.3575927994558319, + "learning_rate": 7.896027849842594e-07, + "loss": 0.4561386704444885, + "step": 5111 + }, + { + "epoch": 1.178694950426562, + "grad_norm": 1.4968176209501343, + "learning_rate": 7.892300731984498e-07, + "loss": 0.441898375749588, + "step": 5112 + }, + { + "epoch": 1.1789255245561447, + "grad_norm": 1.7617220832230103, + "learning_rate": 7.888573920501856e-07, + "loss": 0.43445056676864624, + "step": 5113 + }, + { + "epoch": 1.1791560986857275, + "grad_norm": 1.4680500200302005, + "learning_rate": 7.884847415936389e-07, + "loss": 0.42653167247772217, + "step": 5114 + }, + { + "epoch": 1.1793866728153102, + "grad_norm": 1.3867120793190437, + "learning_rate": 7.881121218829787e-07, + "loss": 0.42003321647644043, + "step": 5115 + }, + { + "epoch": 1.179617246944893, + "grad_norm": 1.613544333660259, + "learning_rate": 7.87739532972369e-07, + "loss": 0.4920128881931305, + "step": 5116 + }, + { + "epoch": 1.1798478210744754, + "grad_norm": 1.430783098871577, + "learning_rate": 7.873669749159697e-07, + "loss": 0.49529707431793213, + "step": 5117 + }, + { + "epoch": 1.180078395204058, + "grad_norm": 1.4915607575501106, + "learning_rate": 7.869944477679351e-07, + "loss": 0.4813005328178406, + "step": 5118 + }, + { + "epoch": 1.1803089693336408, + "grad_norm": 1.4923304237688, + "learning_rate": 7.866219515824168e-07, + "loss": 0.47239556908607483, + "step": 5119 + }, + { + "epoch": 1.1805395434632233, + "grad_norm": 1.7203098580351979, + "learning_rate": 7.862494864135596e-07, + "loss": 0.4808405935764313, + "step": 5120 + }, + { + "epoch": 1.180770117592806, + "grad_norm": 1.5206410201181635, + "learning_rate": 7.858770523155066e-07, + "loss": 0.44946521520614624, + "step": 5121 + }, + { + "epoch": 1.1810006917223888, + "grad_norm": 1.8958199353441048, + "learning_rate": 7.85504649342394e-07, + "loss": 0.5344874858856201, + "step": 5122 + }, + { + "epoch": 1.1812312658519715, + "grad_norm": 1.729692211161555, + "learning_rate": 7.851322775483542e-07, + "loss": 0.49354079365730286, + "step": 5123 + }, + { + "epoch": 1.1814618399815542, + "grad_norm": 1.6407900723292905, + "learning_rate": 7.847599369875155e-07, + "loss": 0.414085328578949, + "step": 5124 + }, + { + "epoch": 1.1816924141111367, + "grad_norm": 1.51838750003237, + "learning_rate": 7.843876277140013e-07, + "loss": 0.4638150632381439, + "step": 5125 + }, + { + "epoch": 1.1819229882407194, + "grad_norm": 1.5309477954820934, + "learning_rate": 7.84015349781931e-07, + "loss": 0.39239877462387085, + "step": 5126 + }, + { + "epoch": 1.1821535623703021, + "grad_norm": 1.456140160914471, + "learning_rate": 7.83643103245418e-07, + "loss": 0.46846455335617065, + "step": 5127 + }, + { + "epoch": 1.1823841364998846, + "grad_norm": 1.7368044200229882, + "learning_rate": 7.832708881585729e-07, + "loss": 0.5257229804992676, + "step": 5128 + }, + { + "epoch": 1.1826147106294673, + "grad_norm": 1.246852967804398, + "learning_rate": 7.828987045755006e-07, + "loss": 0.3858698904514313, + "step": 5129 + }, + { + "epoch": 1.18284528475905, + "grad_norm": 1.526790126487461, + "learning_rate": 7.82526552550302e-07, + "loss": 0.48664575815200806, + "step": 5130 + }, + { + "epoch": 1.1830758588886328, + "grad_norm": 1.4370667079865387, + "learning_rate": 7.821544321370731e-07, + "loss": 0.5246836543083191, + "step": 5131 + }, + { + "epoch": 1.1833064330182155, + "grad_norm": 1.6695741670894575, + "learning_rate": 7.817823433899049e-07, + "loss": 0.5538516640663147, + "step": 5132 + }, + { + "epoch": 1.183537007147798, + "grad_norm": 1.5154692060299837, + "learning_rate": 7.814102863628852e-07, + "loss": 0.4563618302345276, + "step": 5133 + }, + { + "epoch": 1.1837675812773807, + "grad_norm": 1.6013623117191365, + "learning_rate": 7.810382611100952e-07, + "loss": 0.48093757033348083, + "step": 5134 + }, + { + "epoch": 1.1839981554069634, + "grad_norm": 1.4079128694512013, + "learning_rate": 7.806662676856133e-07, + "loss": 0.41152772307395935, + "step": 5135 + }, + { + "epoch": 1.184228729536546, + "grad_norm": 1.470828934761741, + "learning_rate": 7.802943061435121e-07, + "loss": 0.4429926574230194, + "step": 5136 + }, + { + "epoch": 1.1844593036661286, + "grad_norm": 1.6844871985058756, + "learning_rate": 7.799223765378604e-07, + "loss": 0.5795058012008667, + "step": 5137 + }, + { + "epoch": 1.1846898777957113, + "grad_norm": 1.3964078038325152, + "learning_rate": 7.795504789227214e-07, + "loss": 0.43219637870788574, + "step": 5138 + }, + { + "epoch": 1.184920451925294, + "grad_norm": 1.3120429368988666, + "learning_rate": 7.791786133521547e-07, + "loss": 0.472915917634964, + "step": 5139 + }, + { + "epoch": 1.1851510260548768, + "grad_norm": 1.8547533260703066, + "learning_rate": 7.788067798802144e-07, + "loss": 0.609251081943512, + "step": 5140 + }, + { + "epoch": 1.1853816001844593, + "grad_norm": 1.5647854614729606, + "learning_rate": 7.784349785609506e-07, + "loss": 0.5051882266998291, + "step": 5141 + }, + { + "epoch": 1.185612174314042, + "grad_norm": 1.8256847598733492, + "learning_rate": 7.780632094484081e-07, + "loss": 0.5062044858932495, + "step": 5142 + }, + { + "epoch": 1.1858427484436247, + "grad_norm": 1.6792228276022907, + "learning_rate": 7.77691472596627e-07, + "loss": 0.48717936873435974, + "step": 5143 + }, + { + "epoch": 1.1860733225732072, + "grad_norm": 1.4962691739334948, + "learning_rate": 7.773197680596439e-07, + "loss": 0.4755759537220001, + "step": 5144 + }, + { + "epoch": 1.18630389670279, + "grad_norm": 1.5701944534084074, + "learning_rate": 7.769480958914889e-07, + "loss": 0.4549487829208374, + "step": 5145 + }, + { + "epoch": 1.1865344708323726, + "grad_norm": 1.3416043214582947, + "learning_rate": 7.765764561461891e-07, + "loss": 0.39759546518325806, + "step": 5146 + }, + { + "epoch": 1.1867650449619553, + "grad_norm": 1.7321999626139561, + "learning_rate": 7.762048488777654e-07, + "loss": 0.5151915550231934, + "step": 5147 + }, + { + "epoch": 1.1869956190915378, + "grad_norm": 1.739537041268416, + "learning_rate": 7.758332741402351e-07, + "loss": 0.4555166959762573, + "step": 5148 + }, + { + "epoch": 1.1872261932211206, + "grad_norm": 1.246823148309275, + "learning_rate": 7.754617319876102e-07, + "loss": 0.3639993667602539, + "step": 5149 + }, + { + "epoch": 1.1874567673507033, + "grad_norm": 1.4228626603425891, + "learning_rate": 7.750902224738984e-07, + "loss": 0.4158916473388672, + "step": 5150 + }, + { + "epoch": 1.187687341480286, + "grad_norm": 1.5159845507016538, + "learning_rate": 7.747187456531021e-07, + "loss": 0.44933754205703735, + "step": 5151 + }, + { + "epoch": 1.1879179156098685, + "grad_norm": 1.1574431418082898, + "learning_rate": 7.74347301579219e-07, + "loss": 0.35436397790908813, + "step": 5152 + }, + { + "epoch": 1.1881484897394512, + "grad_norm": 1.7559371420298944, + "learning_rate": 7.73975890306243e-07, + "loss": 0.40650928020477295, + "step": 5153 + }, + { + "epoch": 1.188379063869034, + "grad_norm": 1.655955114095899, + "learning_rate": 7.736045118881615e-07, + "loss": 0.424211710691452, + "step": 5154 + }, + { + "epoch": 1.1886096379986166, + "grad_norm": 1.386370427214692, + "learning_rate": 7.73233166378959e-07, + "loss": 0.38909512758255005, + "step": 5155 + }, + { + "epoch": 1.1888402121281991, + "grad_norm": 1.6273556393891413, + "learning_rate": 7.728618538326139e-07, + "loss": 0.4452083110809326, + "step": 5156 + }, + { + "epoch": 1.1890707862577818, + "grad_norm": 1.7325341862894768, + "learning_rate": 7.724905743031005e-07, + "loss": 0.45061540603637695, + "step": 5157 + }, + { + "epoch": 1.1893013603873646, + "grad_norm": 1.875195364158454, + "learning_rate": 7.721193278443875e-07, + "loss": 0.5301374197006226, + "step": 5158 + }, + { + "epoch": 1.1895319345169473, + "grad_norm": 1.32653936253781, + "learning_rate": 7.717481145104398e-07, + "loss": 0.4386521577835083, + "step": 5159 + }, + { + "epoch": 1.1897625086465298, + "grad_norm": 1.5893013583646332, + "learning_rate": 7.713769343552169e-07, + "loss": 0.447623074054718, + "step": 5160 + }, + { + "epoch": 1.1899930827761125, + "grad_norm": 1.4757184491338362, + "learning_rate": 7.71005787432674e-07, + "loss": 0.44326454401016235, + "step": 5161 + }, + { + "epoch": 1.1902236569056952, + "grad_norm": 1.4868394681814385, + "learning_rate": 7.706346737967603e-07, + "loss": 0.564007043838501, + "step": 5162 + }, + { + "epoch": 1.190454231035278, + "grad_norm": 1.4497565739191507, + "learning_rate": 7.702635935014213e-07, + "loss": 0.5338540077209473, + "step": 5163 + }, + { + "epoch": 1.1906848051648604, + "grad_norm": 1.5430964424900424, + "learning_rate": 7.698925466005977e-07, + "loss": 0.45307862758636475, + "step": 5164 + }, + { + "epoch": 1.1909153792944431, + "grad_norm": 1.4703583168080245, + "learning_rate": 7.69521533148224e-07, + "loss": 0.5383142232894897, + "step": 5165 + }, + { + "epoch": 1.1911459534240258, + "grad_norm": 1.46357622305891, + "learning_rate": 7.691505531982316e-07, + "loss": 0.3794770836830139, + "step": 5166 + }, + { + "epoch": 1.1913765275536086, + "grad_norm": 1.73725405615964, + "learning_rate": 7.687796068045455e-07, + "loss": 0.4633198082447052, + "step": 5167 + }, + { + "epoch": 1.191607101683191, + "grad_norm": 1.4824242158713679, + "learning_rate": 7.684086940210875e-07, + "loss": 0.5080294609069824, + "step": 5168 + }, + { + "epoch": 1.1918376758127738, + "grad_norm": 1.4742940614632714, + "learning_rate": 7.680378149017724e-07, + "loss": 0.3952289819717407, + "step": 5169 + }, + { + "epoch": 1.1920682499423565, + "grad_norm": 1.6284523488523228, + "learning_rate": 7.676669695005122e-07, + "loss": 0.4518551528453827, + "step": 5170 + }, + { + "epoch": 1.1922988240719392, + "grad_norm": 1.3915500318606786, + "learning_rate": 7.672961578712125e-07, + "loss": 0.4752943515777588, + "step": 5171 + }, + { + "epoch": 1.1925293982015217, + "grad_norm": 1.4424968675316805, + "learning_rate": 7.669253800677744e-07, + "loss": 0.5059680342674255, + "step": 5172 + }, + { + "epoch": 1.1927599723311044, + "grad_norm": 1.4513506332822887, + "learning_rate": 7.665546361440949e-07, + "loss": 0.47073960304260254, + "step": 5173 + }, + { + "epoch": 1.1929905464606871, + "grad_norm": 1.6974826094600077, + "learning_rate": 7.661839261540644e-07, + "loss": 0.5851496458053589, + "step": 5174 + }, + { + "epoch": 1.1932211205902699, + "grad_norm": 1.4255244135326766, + "learning_rate": 7.658132501515701e-07, + "loss": 0.44255387783050537, + "step": 5175 + }, + { + "epoch": 1.1934516947198524, + "grad_norm": 1.7360033352973823, + "learning_rate": 7.654426081904931e-07, + "loss": 0.543785810470581, + "step": 5176 + }, + { + "epoch": 1.193682268849435, + "grad_norm": 1.697289945139709, + "learning_rate": 7.650720003247107e-07, + "loss": 0.503501296043396, + "step": 5177 + }, + { + "epoch": 1.1939128429790178, + "grad_norm": 1.6448034142146566, + "learning_rate": 7.647014266080935e-07, + "loss": 0.43894368410110474, + "step": 5178 + }, + { + "epoch": 1.1941434171086005, + "grad_norm": 1.9780925681836061, + "learning_rate": 7.643308870945088e-07, + "loss": 0.5014036297798157, + "step": 5179 + }, + { + "epoch": 1.194373991238183, + "grad_norm": 1.3813934145743847, + "learning_rate": 7.639603818378178e-07, + "loss": 0.4859309196472168, + "step": 5180 + }, + { + "epoch": 1.1946045653677657, + "grad_norm": 1.611175852060371, + "learning_rate": 7.635899108918781e-07, + "loss": 0.40631920099258423, + "step": 5181 + }, + { + "epoch": 1.1948351394973484, + "grad_norm": 1.923584573200039, + "learning_rate": 7.632194743105405e-07, + "loss": 0.5206565856933594, + "step": 5182 + }, + { + "epoch": 1.1950657136269311, + "grad_norm": 1.659582338573284, + "learning_rate": 7.628490721476517e-07, + "loss": 0.5052351355552673, + "step": 5183 + }, + { + "epoch": 1.1952962877565136, + "grad_norm": 1.3967739180573415, + "learning_rate": 7.624787044570543e-07, + "loss": 0.4921465516090393, + "step": 5184 + }, + { + "epoch": 1.1955268618860964, + "grad_norm": 1.2706689377506823, + "learning_rate": 7.621083712925839e-07, + "loss": 0.3307859003543854, + "step": 5185 + }, + { + "epoch": 1.195757436015679, + "grad_norm": 1.5942715812711645, + "learning_rate": 7.617380727080728e-07, + "loss": 0.4276743531227112, + "step": 5186 + }, + { + "epoch": 1.1959880101452618, + "grad_norm": 1.434739100338101, + "learning_rate": 7.613678087573475e-07, + "loss": 0.5065702795982361, + "step": 5187 + }, + { + "epoch": 1.1962185842748443, + "grad_norm": 1.2918886211693255, + "learning_rate": 7.609975794942301e-07, + "loss": 0.3588709533214569, + "step": 5188 + }, + { + "epoch": 1.196449158404427, + "grad_norm": 1.4907134183008088, + "learning_rate": 7.606273849725362e-07, + "loss": 0.4296506941318512, + "step": 5189 + }, + { + "epoch": 1.1966797325340097, + "grad_norm": 1.5501182036176049, + "learning_rate": 7.602572252460782e-07, + "loss": 0.517792820930481, + "step": 5190 + }, + { + "epoch": 1.1969103066635924, + "grad_norm": 1.6883448687359832, + "learning_rate": 7.598871003686619e-07, + "loss": 0.38939881324768066, + "step": 5191 + }, + { + "epoch": 1.197140880793175, + "grad_norm": 1.5288548185908284, + "learning_rate": 7.595170103940896e-07, + "loss": 0.5759290456771851, + "step": 5192 + }, + { + "epoch": 1.1973714549227576, + "grad_norm": 1.975229876516129, + "learning_rate": 7.591469553761569e-07, + "loss": 0.4705851078033447, + "step": 5193 + }, + { + "epoch": 1.1976020290523404, + "grad_norm": 1.4820736709912923, + "learning_rate": 7.587769353686548e-07, + "loss": 0.5137619972229004, + "step": 5194 + }, + { + "epoch": 1.197832603181923, + "grad_norm": 1.426346211238444, + "learning_rate": 7.584069504253701e-07, + "loss": 0.43207496404647827, + "step": 5195 + }, + { + "epoch": 1.1980631773115056, + "grad_norm": 1.7446559629267169, + "learning_rate": 7.580370006000835e-07, + "loss": 0.3976139426231384, + "step": 5196 + }, + { + "epoch": 1.1982937514410883, + "grad_norm": 1.3117053560833851, + "learning_rate": 7.576670859465715e-07, + "loss": 0.41323673725128174, + "step": 5197 + }, + { + "epoch": 1.198524325570671, + "grad_norm": 1.5110343718270132, + "learning_rate": 7.57297206518604e-07, + "loss": 0.404024600982666, + "step": 5198 + }, + { + "epoch": 1.1987548997002535, + "grad_norm": 1.3684281900258655, + "learning_rate": 7.569273623699475e-07, + "loss": 0.4010540843009949, + "step": 5199 + }, + { + "epoch": 1.1989854738298362, + "grad_norm": 1.5739020793077496, + "learning_rate": 7.565575535543623e-07, + "loss": 0.44299256801605225, + "step": 5200 + }, + { + "epoch": 1.199216047959419, + "grad_norm": 1.5204166282494558, + "learning_rate": 7.561877801256041e-07, + "loss": 0.5217546820640564, + "step": 5201 + }, + { + "epoch": 1.1994466220890017, + "grad_norm": 1.868873770331591, + "learning_rate": 7.558180421374229e-07, + "loss": 0.5192688703536987, + "step": 5202 + }, + { + "epoch": 1.1996771962185844, + "grad_norm": 1.5743910950617057, + "learning_rate": 7.554483396435637e-07, + "loss": 0.38272884488105774, + "step": 5203 + }, + { + "epoch": 1.1999077703481669, + "grad_norm": 1.4246723536184043, + "learning_rate": 7.550786726977673e-07, + "loss": 0.474464476108551, + "step": 5204 + }, + { + "epoch": 1.2001383444777496, + "grad_norm": 1.6360159300410695, + "learning_rate": 7.547090413537676e-07, + "loss": 0.540134072303772, + "step": 5205 + }, + { + "epoch": 1.2003689186073323, + "grad_norm": 1.4752644193711169, + "learning_rate": 7.543394456652948e-07, + "loss": 0.4662882089614868, + "step": 5206 + }, + { + "epoch": 1.2005994927369148, + "grad_norm": 1.6858064119472538, + "learning_rate": 7.539698856860732e-07, + "loss": 0.440970778465271, + "step": 5207 + }, + { + "epoch": 1.2008300668664975, + "grad_norm": 1.3786365004169476, + "learning_rate": 7.536003614698225e-07, + "loss": 0.41787397861480713, + "step": 5208 + }, + { + "epoch": 1.2010606409960802, + "grad_norm": 1.4726677497641942, + "learning_rate": 7.532308730702561e-07, + "loss": 0.5503408908843994, + "step": 5209 + }, + { + "epoch": 1.201291215125663, + "grad_norm": 1.4739960164302617, + "learning_rate": 7.528614205410833e-07, + "loss": 0.43713903427124023, + "step": 5210 + }, + { + "epoch": 1.2015217892552457, + "grad_norm": 1.5362481289460599, + "learning_rate": 7.524920039360076e-07, + "loss": 0.4145667552947998, + "step": 5211 + }, + { + "epoch": 1.2017523633848282, + "grad_norm": 1.4800845890771783, + "learning_rate": 7.521226233087279e-07, + "loss": 0.4307587146759033, + "step": 5212 + }, + { + "epoch": 1.2019829375144109, + "grad_norm": 1.436182742461266, + "learning_rate": 7.517532787129369e-07, + "loss": 0.43784570693969727, + "step": 5213 + }, + { + "epoch": 1.2022135116439936, + "grad_norm": 1.3395031095564736, + "learning_rate": 7.513839702023226e-07, + "loss": 0.40003830194473267, + "step": 5214 + }, + { + "epoch": 1.202444085773576, + "grad_norm": 1.4786298792735793, + "learning_rate": 7.510146978305682e-07, + "loss": 0.4880738854408264, + "step": 5215 + }, + { + "epoch": 1.2026746599031588, + "grad_norm": 1.31895753202322, + "learning_rate": 7.506454616513505e-07, + "loss": 0.39548349380493164, + "step": 5216 + }, + { + "epoch": 1.2029052340327415, + "grad_norm": 1.5189592384869435, + "learning_rate": 7.502762617183425e-07, + "loss": 0.4060090184211731, + "step": 5217 + }, + { + "epoch": 1.2031358081623242, + "grad_norm": 1.6902238907281657, + "learning_rate": 7.499070980852101e-07, + "loss": 0.44657808542251587, + "step": 5218 + }, + { + "epoch": 1.203366382291907, + "grad_norm": 1.553015362629627, + "learning_rate": 7.495379708056161e-07, + "loss": 0.5283595323562622, + "step": 5219 + }, + { + "epoch": 1.2035969564214895, + "grad_norm": 1.5940858647104894, + "learning_rate": 7.49168879933216e-07, + "loss": 0.4424205422401428, + "step": 5220 + }, + { + "epoch": 1.2038275305510722, + "grad_norm": 1.4929497446465205, + "learning_rate": 7.487998255216619e-07, + "loss": 0.4998319745063782, + "step": 5221 + }, + { + "epoch": 1.2040581046806549, + "grad_norm": 1.3437939609448373, + "learning_rate": 7.484308076245987e-07, + "loss": 0.3821876645088196, + "step": 5222 + }, + { + "epoch": 1.2042886788102374, + "grad_norm": 1.4227177114495277, + "learning_rate": 7.480618262956669e-07, + "loss": 0.4567919373512268, + "step": 5223 + }, + { + "epoch": 1.20451925293982, + "grad_norm": 1.4207326358395804, + "learning_rate": 7.476928815885026e-07, + "loss": 0.4561428427696228, + "step": 5224 + }, + { + "epoch": 1.2047498270694028, + "grad_norm": 1.5720016799439587, + "learning_rate": 7.473239735567344e-07, + "loss": 0.4384823739528656, + "step": 5225 + }, + { + "epoch": 1.2049804011989855, + "grad_norm": 1.518914607229236, + "learning_rate": 7.469551022539877e-07, + "loss": 0.42840123176574707, + "step": 5226 + }, + { + "epoch": 1.2052109753285682, + "grad_norm": 1.4031825092609558, + "learning_rate": 7.465862677338812e-07, + "loss": 0.39553213119506836, + "step": 5227 + }, + { + "epoch": 1.2054415494581507, + "grad_norm": 1.521464998921144, + "learning_rate": 7.462174700500295e-07, + "loss": 0.4325043559074402, + "step": 5228 + }, + { + "epoch": 1.2056721235877335, + "grad_norm": 1.7451009485961195, + "learning_rate": 7.4584870925604e-07, + "loss": 0.5004623532295227, + "step": 5229 + }, + { + "epoch": 1.2059026977173162, + "grad_norm": 1.6975060246760258, + "learning_rate": 7.454799854055165e-07, + "loss": 0.42296791076660156, + "step": 5230 + }, + { + "epoch": 1.2061332718468987, + "grad_norm": 1.7859122255595659, + "learning_rate": 7.451112985520565e-07, + "loss": 0.45638370513916016, + "step": 5231 + }, + { + "epoch": 1.2063638459764814, + "grad_norm": 1.9018837416313183, + "learning_rate": 7.447426487492528e-07, + "loss": 0.5134493112564087, + "step": 5232 + }, + { + "epoch": 1.206594420106064, + "grad_norm": 1.382989024686568, + "learning_rate": 7.443740360506918e-07, + "loss": 0.4132578372955322, + "step": 5233 + }, + { + "epoch": 1.2068249942356468, + "grad_norm": 1.321784021070878, + "learning_rate": 7.440054605099552e-07, + "loss": 0.4363224506378174, + "step": 5234 + }, + { + "epoch": 1.2070555683652295, + "grad_norm": 1.4395608486144074, + "learning_rate": 7.4363692218062e-07, + "loss": 0.44970041513442993, + "step": 5235 + }, + { + "epoch": 1.207286142494812, + "grad_norm": 1.3219627332758312, + "learning_rate": 7.432684211162556e-07, + "loss": 0.39787235856056213, + "step": 5236 + }, + { + "epoch": 1.2075167166243947, + "grad_norm": 1.694639970069785, + "learning_rate": 7.428999573704284e-07, + "loss": 0.46057572960853577, + "step": 5237 + }, + { + "epoch": 1.2077472907539775, + "grad_norm": 1.3954230269661139, + "learning_rate": 7.42531530996698e-07, + "loss": 0.46754559874534607, + "step": 5238 + }, + { + "epoch": 1.20797786488356, + "grad_norm": 1.4060087118514482, + "learning_rate": 7.42163142048619e-07, + "loss": 0.5072697401046753, + "step": 5239 + }, + { + "epoch": 1.2082084390131427, + "grad_norm": 1.5355585762921151, + "learning_rate": 7.417947905797403e-07, + "loss": 0.4691959023475647, + "step": 5240 + }, + { + "epoch": 1.2084390131427254, + "grad_norm": 1.4596733170422231, + "learning_rate": 7.414264766436056e-07, + "loss": 0.43248072266578674, + "step": 5241 + }, + { + "epoch": 1.208669587272308, + "grad_norm": 1.8386458599943265, + "learning_rate": 7.410582002937534e-07, + "loss": 0.4748457968235016, + "step": 5242 + }, + { + "epoch": 1.2089001614018908, + "grad_norm": 1.413498638420547, + "learning_rate": 7.406899615837157e-07, + "loss": 0.4682820439338684, + "step": 5243 + }, + { + "epoch": 1.2091307355314733, + "grad_norm": 1.3788557575990639, + "learning_rate": 7.403217605670205e-07, + "loss": 0.41747021675109863, + "step": 5244 + }, + { + "epoch": 1.209361309661056, + "grad_norm": 1.5523861247321795, + "learning_rate": 7.399535972971886e-07, + "loss": 0.4968727231025696, + "step": 5245 + }, + { + "epoch": 1.2095918837906388, + "grad_norm": 1.6255626899279143, + "learning_rate": 7.395854718277372e-07, + "loss": 0.486778199672699, + "step": 5246 + }, + { + "epoch": 1.2098224579202213, + "grad_norm": 1.938770231002498, + "learning_rate": 7.392173842121765e-07, + "loss": 0.5153725147247314, + "step": 5247 + }, + { + "epoch": 1.210053032049804, + "grad_norm": 1.6258479412197122, + "learning_rate": 7.388493345040123e-07, + "loss": 0.42352354526519775, + "step": 5248 + }, + { + "epoch": 1.2102836061793867, + "grad_norm": 1.477454043811349, + "learning_rate": 7.384813227567437e-07, + "loss": 0.363994300365448, + "step": 5249 + }, + { + "epoch": 1.2105141803089694, + "grad_norm": 1.3450193947115454, + "learning_rate": 7.381133490238654e-07, + "loss": 0.44195863604545593, + "step": 5250 + }, + { + "epoch": 1.2107447544385521, + "grad_norm": 1.6510262733932026, + "learning_rate": 7.377454133588657e-07, + "loss": 0.5031026601791382, + "step": 5251 + }, + { + "epoch": 1.2109753285681346, + "grad_norm": 1.1126223170422647, + "learning_rate": 7.373775158152284e-07, + "loss": 0.3900304436683655, + "step": 5252 + }, + { + "epoch": 1.2112059026977173, + "grad_norm": 1.4718461813811798, + "learning_rate": 7.370096564464308e-07, + "loss": 0.406912624835968, + "step": 5253 + }, + { + "epoch": 1.2114364768273, + "grad_norm": 1.2742945351379469, + "learning_rate": 7.366418353059445e-07, + "loss": 0.407238632440567, + "step": 5254 + }, + { + "epoch": 1.2116670509568825, + "grad_norm": 2.3145771276343625, + "learning_rate": 7.36274052447237e-07, + "loss": 0.5605549216270447, + "step": 5255 + }, + { + "epoch": 1.2118976250864653, + "grad_norm": 1.7547311772877803, + "learning_rate": 7.359063079237684e-07, + "loss": 0.5016111731529236, + "step": 5256 + }, + { + "epoch": 1.212128199216048, + "grad_norm": 1.31999939383151, + "learning_rate": 7.355386017889946e-07, + "loss": 0.38812315464019775, + "step": 5257 + }, + { + "epoch": 1.2123587733456307, + "grad_norm": 1.5177330463551633, + "learning_rate": 7.35170934096365e-07, + "loss": 0.46022963523864746, + "step": 5258 + }, + { + "epoch": 1.2125893474752132, + "grad_norm": 1.4118628857930515, + "learning_rate": 7.348033048993246e-07, + "loss": 0.40029624104499817, + "step": 5259 + }, + { + "epoch": 1.212819921604796, + "grad_norm": 1.4051430521275825, + "learning_rate": 7.344357142513111e-07, + "loss": 0.4331943392753601, + "step": 5260 + }, + { + "epoch": 1.2130504957343786, + "grad_norm": 1.565074125850335, + "learning_rate": 7.340681622057582e-07, + "loss": 0.43757596611976624, + "step": 5261 + }, + { + "epoch": 1.2132810698639613, + "grad_norm": 1.7743971563599887, + "learning_rate": 7.337006488160931e-07, + "loss": 0.49733203649520874, + "step": 5262 + }, + { + "epoch": 1.2135116439935438, + "grad_norm": 1.341577967095045, + "learning_rate": 7.333331741357373e-07, + "loss": 0.35552018880844116, + "step": 5263 + }, + { + "epoch": 1.2137422181231265, + "grad_norm": 1.6321675762702066, + "learning_rate": 7.329657382181074e-07, + "loss": 0.4102798104286194, + "step": 5264 + }, + { + "epoch": 1.2139727922527093, + "grad_norm": 1.4184297160567871, + "learning_rate": 7.325983411166136e-07, + "loss": 0.4517349600791931, + "step": 5265 + }, + { + "epoch": 1.214203366382292, + "grad_norm": 1.6427775893660324, + "learning_rate": 7.322309828846613e-07, + "loss": 0.48924458026885986, + "step": 5266 + }, + { + "epoch": 1.2144339405118745, + "grad_norm": 1.4030974508932201, + "learning_rate": 7.31863663575649e-07, + "loss": 0.38971561193466187, + "step": 5267 + }, + { + "epoch": 1.2146645146414572, + "grad_norm": 1.6155044970268224, + "learning_rate": 7.31496383242971e-07, + "loss": 0.6503559350967407, + "step": 5268 + }, + { + "epoch": 1.21489508877104, + "grad_norm": 1.6905359606856467, + "learning_rate": 7.311291419400146e-07, + "loss": 0.4615272879600525, + "step": 5269 + }, + { + "epoch": 1.2151256629006226, + "grad_norm": 1.6629441467357413, + "learning_rate": 7.307619397201625e-07, + "loss": 0.3793429732322693, + "step": 5270 + }, + { + "epoch": 1.2153562370302051, + "grad_norm": 1.3076578533376795, + "learning_rate": 7.303947766367909e-07, + "loss": 0.48186585307121277, + "step": 5271 + }, + { + "epoch": 1.2155868111597878, + "grad_norm": 1.4243590091370186, + "learning_rate": 7.300276527432713e-07, + "loss": 0.4051778018474579, + "step": 5272 + }, + { + "epoch": 1.2158173852893706, + "grad_norm": 1.6820510248806995, + "learning_rate": 7.296605680929684e-07, + "loss": 0.43364250659942627, + "step": 5273 + }, + { + "epoch": 1.2160479594189533, + "grad_norm": 1.6130796939421093, + "learning_rate": 7.292935227392414e-07, + "loss": 0.4893898367881775, + "step": 5274 + }, + { + "epoch": 1.2162785335485358, + "grad_norm": 1.240780138685616, + "learning_rate": 7.289265167354448e-07, + "loss": 0.43125462532043457, + "step": 5275 + }, + { + "epoch": 1.2165091076781185, + "grad_norm": 1.6108443522760163, + "learning_rate": 7.285595501349258e-07, + "loss": 0.4086509943008423, + "step": 5276 + }, + { + "epoch": 1.2167396818077012, + "grad_norm": 1.838256686394942, + "learning_rate": 7.281926229910274e-07, + "loss": 0.5176471471786499, + "step": 5277 + }, + { + "epoch": 1.216970255937284, + "grad_norm": 1.8145364687667531, + "learning_rate": 7.278257353570857e-07, + "loss": 0.4783210754394531, + "step": 5278 + }, + { + "epoch": 1.2172008300668664, + "grad_norm": 1.5012148176529632, + "learning_rate": 7.274588872864322e-07, + "loss": 0.4847145080566406, + "step": 5279 + }, + { + "epoch": 1.2174314041964491, + "grad_norm": 1.4076947828029491, + "learning_rate": 7.270920788323911e-07, + "loss": 0.4691849946975708, + "step": 5280 + }, + { + "epoch": 1.2176619783260318, + "grad_norm": 1.8729494542899485, + "learning_rate": 7.267253100482824e-07, + "loss": 0.5755687952041626, + "step": 5281 + }, + { + "epoch": 1.2178925524556146, + "grad_norm": 1.3639853941099451, + "learning_rate": 7.263585809874193e-07, + "loss": 0.42995721101760864, + "step": 5282 + }, + { + "epoch": 1.218123126585197, + "grad_norm": 1.4560966669318844, + "learning_rate": 7.259918917031101e-07, + "loss": 0.501590371131897, + "step": 5283 + }, + { + "epoch": 1.2183537007147798, + "grad_norm": 1.5326641731074693, + "learning_rate": 7.256252422486563e-07, + "loss": 0.5499469041824341, + "step": 5284 + }, + { + "epoch": 1.2185842748443625, + "grad_norm": 1.7075536366613502, + "learning_rate": 7.25258632677354e-07, + "loss": 0.4567297399044037, + "step": 5285 + }, + { + "epoch": 1.2188148489739452, + "grad_norm": 1.3251311548344207, + "learning_rate": 7.248920630424942e-07, + "loss": 0.4046020805835724, + "step": 5286 + }, + { + "epoch": 1.2190454231035277, + "grad_norm": 1.4721989927884918, + "learning_rate": 7.245255333973608e-07, + "loss": 0.3534840941429138, + "step": 5287 + }, + { + "epoch": 1.2192759972331104, + "grad_norm": 1.4151850401024268, + "learning_rate": 7.241590437952331e-07, + "loss": 0.45795637369155884, + "step": 5288 + }, + { + "epoch": 1.2195065713626931, + "grad_norm": 1.4921564176260302, + "learning_rate": 7.237925942893839e-07, + "loss": 0.3984150290489197, + "step": 5289 + }, + { + "epoch": 1.2197371454922759, + "grad_norm": 1.5617581917582364, + "learning_rate": 7.234261849330807e-07, + "loss": 0.46833336353302, + "step": 5290 + }, + { + "epoch": 1.2199677196218583, + "grad_norm": 1.6200691445613622, + "learning_rate": 7.230598157795842e-07, + "loss": 0.5395709276199341, + "step": 5291 + }, + { + "epoch": 1.220198293751441, + "grad_norm": 1.300141768975315, + "learning_rate": 7.226934868821505e-07, + "loss": 0.4556152820587158, + "step": 5292 + }, + { + "epoch": 1.2204288678810238, + "grad_norm": 1.5916352600329198, + "learning_rate": 7.223271982940287e-07, + "loss": 0.49564266204833984, + "step": 5293 + }, + { + "epoch": 1.2206594420106065, + "grad_norm": 1.5492667362910795, + "learning_rate": 7.219609500684625e-07, + "loss": 0.5389127731323242, + "step": 5294 + }, + { + "epoch": 1.220890016140189, + "grad_norm": 1.3125997254034645, + "learning_rate": 7.215947422586905e-07, + "loss": 0.48815661668777466, + "step": 5295 + }, + { + "epoch": 1.2211205902697717, + "grad_norm": 1.6576709424363434, + "learning_rate": 7.21228574917944e-07, + "loss": 0.4204339385032654, + "step": 5296 + }, + { + "epoch": 1.2213511643993544, + "grad_norm": 1.2807688149232648, + "learning_rate": 7.208624480994494e-07, + "loss": 0.39993199706077576, + "step": 5297 + }, + { + "epoch": 1.2215817385289371, + "grad_norm": 1.7420778835945019, + "learning_rate": 7.204963618564268e-07, + "loss": 0.5679433941841125, + "step": 5298 + }, + { + "epoch": 1.2218123126585196, + "grad_norm": 1.819503614929131, + "learning_rate": 7.201303162420913e-07, + "loss": 0.46620815992355347, + "step": 5299 + }, + { + "epoch": 1.2220428867881024, + "grad_norm": 1.4667553556365653, + "learning_rate": 7.1976431130965e-07, + "loss": 0.44684547185897827, + "step": 5300 + }, + { + "epoch": 1.222273460917685, + "grad_norm": 1.6182813529173974, + "learning_rate": 7.193983471123066e-07, + "loss": 0.4518858790397644, + "step": 5301 + }, + { + "epoch": 1.2225040350472678, + "grad_norm": 1.497058969625444, + "learning_rate": 7.190324237032569e-07, + "loss": 0.3966304659843445, + "step": 5302 + }, + { + "epoch": 1.2227346091768503, + "grad_norm": 1.7688402904846452, + "learning_rate": 7.186665411356925e-07, + "loss": 0.5541782379150391, + "step": 5303 + }, + { + "epoch": 1.222965183306433, + "grad_norm": 1.5748150394963076, + "learning_rate": 7.183006994627972e-07, + "loss": 0.3986799120903015, + "step": 5304 + }, + { + "epoch": 1.2231957574360157, + "grad_norm": 1.3179167901427211, + "learning_rate": 7.1793489873775e-07, + "loss": 0.485867977142334, + "step": 5305 + }, + { + "epoch": 1.2234263315655984, + "grad_norm": 1.6264368495030206, + "learning_rate": 7.175691390137244e-07, + "loss": 0.40187692642211914, + "step": 5306 + }, + { + "epoch": 1.223656905695181, + "grad_norm": 1.5085798270078894, + "learning_rate": 7.172034203438864e-07, + "loss": 0.4679393172264099, + "step": 5307 + }, + { + "epoch": 1.2238874798247636, + "grad_norm": 1.3178949369734356, + "learning_rate": 7.168377427813974e-07, + "loss": 0.512301504611969, + "step": 5308 + }, + { + "epoch": 1.2241180539543464, + "grad_norm": 1.4684075358167812, + "learning_rate": 7.164721063794122e-07, + "loss": 0.5340646505355835, + "step": 5309 + }, + { + "epoch": 1.224348628083929, + "grad_norm": 1.6528941936609833, + "learning_rate": 7.1610651119108e-07, + "loss": 0.4757506847381592, + "step": 5310 + }, + { + "epoch": 1.2245792022135116, + "grad_norm": 1.5982652868975813, + "learning_rate": 7.157409572695434e-07, + "loss": 0.5697519779205322, + "step": 5311 + }, + { + "epoch": 1.2248097763430943, + "grad_norm": 1.4427165421847559, + "learning_rate": 7.153754446679395e-07, + "loss": 0.47521811723709106, + "step": 5312 + }, + { + "epoch": 1.225040350472677, + "grad_norm": 1.4092560589123113, + "learning_rate": 7.150099734393997e-07, + "loss": 0.40484973788261414, + "step": 5313 + }, + { + "epoch": 1.2252709246022597, + "grad_norm": 1.4095470452598946, + "learning_rate": 7.146445436370481e-07, + "loss": 0.4465969204902649, + "step": 5314 + }, + { + "epoch": 1.2255014987318422, + "grad_norm": 1.5543895211488108, + "learning_rate": 7.142791553140044e-07, + "loss": 0.44878089427948, + "step": 5315 + }, + { + "epoch": 1.225732072861425, + "grad_norm": 1.657847170962442, + "learning_rate": 7.139138085233809e-07, + "loss": 0.5049536228179932, + "step": 5316 + }, + { + "epoch": 1.2259626469910077, + "grad_norm": 1.377588971885486, + "learning_rate": 7.135485033182847e-07, + "loss": 0.42945951223373413, + "step": 5317 + }, + { + "epoch": 1.2261932211205901, + "grad_norm": 1.607627236207016, + "learning_rate": 7.131832397518167e-07, + "loss": 0.4668564200401306, + "step": 5318 + }, + { + "epoch": 1.2264237952501729, + "grad_norm": 1.640684584420395, + "learning_rate": 7.128180178770718e-07, + "loss": 0.4691551625728607, + "step": 5319 + }, + { + "epoch": 1.2266543693797556, + "grad_norm": 1.4653351758865718, + "learning_rate": 7.124528377471382e-07, + "loss": 0.4306211769580841, + "step": 5320 + }, + { + "epoch": 1.2268849435093383, + "grad_norm": 1.7130888177954928, + "learning_rate": 7.120876994150991e-07, + "loss": 0.4986322522163391, + "step": 5321 + }, + { + "epoch": 1.227115517638921, + "grad_norm": 1.4775997138779564, + "learning_rate": 7.117226029340304e-07, + "loss": 0.4058566093444824, + "step": 5322 + }, + { + "epoch": 1.2273460917685035, + "grad_norm": 1.3729187298835452, + "learning_rate": 7.113575483570036e-07, + "loss": 0.390174925327301, + "step": 5323 + }, + { + "epoch": 1.2275766658980862, + "grad_norm": 1.3070483816242904, + "learning_rate": 7.109925357370821e-07, + "loss": 0.38822996616363525, + "step": 5324 + }, + { + "epoch": 1.227807240027669, + "grad_norm": 1.3599088173875424, + "learning_rate": 7.106275651273244e-07, + "loss": 0.47792741656303406, + "step": 5325 + }, + { + "epoch": 1.2280378141572514, + "grad_norm": 1.52666177684785, + "learning_rate": 7.102626365807833e-07, + "loss": 0.5332789421081543, + "step": 5326 + }, + { + "epoch": 1.2282683882868342, + "grad_norm": 1.4337525635961101, + "learning_rate": 7.098977501505036e-07, + "loss": 0.5325096845626831, + "step": 5327 + }, + { + "epoch": 1.2284989624164169, + "grad_norm": 1.6185088994304762, + "learning_rate": 7.095329058895267e-07, + "loss": 0.4184231162071228, + "step": 5328 + }, + { + "epoch": 1.2287295365459996, + "grad_norm": 1.7570013482364435, + "learning_rate": 7.091681038508852e-07, + "loss": 0.43037641048431396, + "step": 5329 + }, + { + "epoch": 1.2289601106755823, + "grad_norm": 1.5067774692843796, + "learning_rate": 7.088033440876078e-07, + "loss": 0.4466821551322937, + "step": 5330 + }, + { + "epoch": 1.2291906848051648, + "grad_norm": 1.5083021571464743, + "learning_rate": 7.084386266527151e-07, + "loss": 0.35853004455566406, + "step": 5331 + }, + { + "epoch": 1.2294212589347475, + "grad_norm": 1.542402337323393, + "learning_rate": 7.080739515992231e-07, + "loss": 0.44986268877983093, + "step": 5332 + }, + { + "epoch": 1.2296518330643302, + "grad_norm": 1.7104999289185845, + "learning_rate": 7.07709318980141e-07, + "loss": 0.3563602566719055, + "step": 5333 + }, + { + "epoch": 1.2298824071939127, + "grad_norm": 1.5401970805558025, + "learning_rate": 7.073447288484715e-07, + "loss": 0.4505435824394226, + "step": 5334 + }, + { + "epoch": 1.2301129813234954, + "grad_norm": 1.3508208021904817, + "learning_rate": 7.069801812572116e-07, + "loss": 0.4477807283401489, + "step": 5335 + }, + { + "epoch": 1.2303435554530782, + "grad_norm": 1.5084663891676386, + "learning_rate": 7.066156762593518e-07, + "loss": 0.4470565915107727, + "step": 5336 + }, + { + "epoch": 1.2305741295826609, + "grad_norm": 1.4627780913359043, + "learning_rate": 7.062512139078773e-07, + "loss": 0.4236464500427246, + "step": 5337 + }, + { + "epoch": 1.2308047037122436, + "grad_norm": 1.3002436810863733, + "learning_rate": 7.058867942557655e-07, + "loss": 0.3221476376056671, + "step": 5338 + }, + { + "epoch": 1.231035277841826, + "grad_norm": 1.818660153327524, + "learning_rate": 7.055224173559891e-07, + "loss": 0.502305269241333, + "step": 5339 + }, + { + "epoch": 1.2312658519714088, + "grad_norm": 1.655814956644188, + "learning_rate": 7.051580832615136e-07, + "loss": 0.5121853351593018, + "step": 5340 + }, + { + "epoch": 1.2314964261009915, + "grad_norm": 1.713071870874518, + "learning_rate": 7.047937920252991e-07, + "loss": 0.5468438863754272, + "step": 5341 + }, + { + "epoch": 1.231727000230574, + "grad_norm": 1.2030374980808431, + "learning_rate": 7.044295437002985e-07, + "loss": 0.5026402473449707, + "step": 5342 + }, + { + "epoch": 1.2319575743601567, + "grad_norm": 1.9445671085046203, + "learning_rate": 7.040653383394596e-07, + "loss": 0.5205342173576355, + "step": 5343 + }, + { + "epoch": 1.2321881484897395, + "grad_norm": 1.5970504229179872, + "learning_rate": 7.037011759957228e-07, + "loss": 0.5184727311134338, + "step": 5344 + }, + { + "epoch": 1.2324187226193222, + "grad_norm": 1.3779493729990695, + "learning_rate": 7.033370567220227e-07, + "loss": 0.414316862821579, + "step": 5345 + }, + { + "epoch": 1.2326492967489049, + "grad_norm": 1.4260441300832385, + "learning_rate": 7.029729805712885e-07, + "loss": 0.42133980989456177, + "step": 5346 + }, + { + "epoch": 1.2328798708784874, + "grad_norm": 1.8139584962445312, + "learning_rate": 7.026089475964414e-07, + "loss": 0.4888553321361542, + "step": 5347 + }, + { + "epoch": 1.23311044500807, + "grad_norm": 1.3419182130591616, + "learning_rate": 7.022449578503979e-07, + "loss": 0.4702431857585907, + "step": 5348 + }, + { + "epoch": 1.2333410191376528, + "grad_norm": 1.7237576970327266, + "learning_rate": 7.018810113860672e-07, + "loss": 0.5312628746032715, + "step": 5349 + }, + { + "epoch": 1.2335715932672353, + "grad_norm": 1.3183810824607851, + "learning_rate": 7.015171082563533e-07, + "loss": 0.5297777056694031, + "step": 5350 + }, + { + "epoch": 1.233802167396818, + "grad_norm": 1.4423147751678271, + "learning_rate": 7.011532485141524e-07, + "loss": 0.5172504782676697, + "step": 5351 + }, + { + "epoch": 1.2340327415264007, + "grad_norm": 1.4663357988839691, + "learning_rate": 7.007894322123556e-07, + "loss": 0.4288995862007141, + "step": 5352 + }, + { + "epoch": 1.2342633156559835, + "grad_norm": 1.373863251988179, + "learning_rate": 7.004256594038475e-07, + "loss": 0.4194108247756958, + "step": 5353 + }, + { + "epoch": 1.2344938897855662, + "grad_norm": 1.6567765897983155, + "learning_rate": 7.000619301415056e-07, + "loss": 0.48825979232788086, + "step": 5354 + }, + { + "epoch": 1.2347244639151487, + "grad_norm": 1.5674749005570563, + "learning_rate": 6.99698244478202e-07, + "loss": 0.4721163213253021, + "step": 5355 + }, + { + "epoch": 1.2349550380447314, + "grad_norm": 1.4292932334311201, + "learning_rate": 6.993346024668019e-07, + "loss": 0.5104520916938782, + "step": 5356 + }, + { + "epoch": 1.235185612174314, + "grad_norm": 1.757397862406759, + "learning_rate": 6.98971004160165e-07, + "loss": 0.5257378816604614, + "step": 5357 + }, + { + "epoch": 1.2354161863038966, + "grad_norm": 1.5756368498047397, + "learning_rate": 6.986074496111429e-07, + "loss": 0.5624911785125732, + "step": 5358 + }, + { + "epoch": 1.2356467604334793, + "grad_norm": 1.4832170020848512, + "learning_rate": 6.982439388725828e-07, + "loss": 0.5186502933502197, + "step": 5359 + }, + { + "epoch": 1.235877334563062, + "grad_norm": 1.4333093290057806, + "learning_rate": 6.978804719973241e-07, + "loss": 0.42711856961250305, + "step": 5360 + }, + { + "epoch": 1.2361079086926448, + "grad_norm": 1.5710112274218073, + "learning_rate": 6.975170490382013e-07, + "loss": 0.525848388671875, + "step": 5361 + }, + { + "epoch": 1.2363384828222275, + "grad_norm": 1.475742371846223, + "learning_rate": 6.971536700480405e-07, + "loss": 0.41279107332229614, + "step": 5362 + }, + { + "epoch": 1.23656905695181, + "grad_norm": 1.381610773190275, + "learning_rate": 6.967903350796632e-07, + "loss": 0.38868075609207153, + "step": 5363 + }, + { + "epoch": 1.2367996310813927, + "grad_norm": 1.2852056850014901, + "learning_rate": 6.964270441858837e-07, + "loss": 0.41875284910202026, + "step": 5364 + }, + { + "epoch": 1.2370302052109754, + "grad_norm": 1.6506819982730945, + "learning_rate": 6.960637974195096e-07, + "loss": 0.4754808843135834, + "step": 5365 + }, + { + "epoch": 1.237260779340558, + "grad_norm": 1.367170455716087, + "learning_rate": 6.957005948333434e-07, + "loss": 0.5073249340057373, + "step": 5366 + }, + { + "epoch": 1.2374913534701406, + "grad_norm": 1.4682970250918908, + "learning_rate": 6.953374364801792e-07, + "loss": 0.4545915126800537, + "step": 5367 + }, + { + "epoch": 1.2377219275997233, + "grad_norm": 1.4664699450973697, + "learning_rate": 6.949743224128064e-07, + "loss": 0.42797422409057617, + "step": 5368 + }, + { + "epoch": 1.237952501729306, + "grad_norm": 1.7409270878989862, + "learning_rate": 6.946112526840071e-07, + "loss": 0.570556104183197, + "step": 5369 + }, + { + "epoch": 1.2381830758588885, + "grad_norm": 1.21807525986395, + "learning_rate": 6.942482273465577e-07, + "loss": 0.3866136074066162, + "step": 5370 + }, + { + "epoch": 1.2384136499884713, + "grad_norm": 1.385922338157159, + "learning_rate": 6.938852464532267e-07, + "loss": 0.3716529309749603, + "step": 5371 + }, + { + "epoch": 1.238644224118054, + "grad_norm": 1.5756601150848535, + "learning_rate": 6.935223100567776e-07, + "loss": 0.4781096577644348, + "step": 5372 + }, + { + "epoch": 1.2388747982476367, + "grad_norm": 1.5023911555765588, + "learning_rate": 6.931594182099671e-07, + "loss": 0.4262877106666565, + "step": 5373 + }, + { + "epoch": 1.2391053723772192, + "grad_norm": 1.6023295142223875, + "learning_rate": 6.927965709655444e-07, + "loss": 0.49859267473220825, + "step": 5374 + }, + { + "epoch": 1.239335946506802, + "grad_norm": 1.8550612096678925, + "learning_rate": 6.924337683762539e-07, + "loss": 0.4710119664669037, + "step": 5375 + }, + { + "epoch": 1.2395665206363846, + "grad_norm": 1.518585467890365, + "learning_rate": 6.92071010494832e-07, + "loss": 0.4974974989891052, + "step": 5376 + }, + { + "epoch": 1.2397970947659673, + "grad_norm": 2.029509938602293, + "learning_rate": 6.917082973740098e-07, + "loss": 0.4118514657020569, + "step": 5377 + }, + { + "epoch": 1.2400276688955498, + "grad_norm": 1.391922482329176, + "learning_rate": 6.913456290665106e-07, + "loss": 0.4223165214061737, + "step": 5378 + }, + { + "epoch": 1.2402582430251325, + "grad_norm": 1.5760276199817416, + "learning_rate": 6.909830056250526e-07, + "loss": 0.4896865487098694, + "step": 5379 + }, + { + "epoch": 1.2404888171547153, + "grad_norm": 1.35318854532684, + "learning_rate": 6.906204271023463e-07, + "loss": 0.36112266778945923, + "step": 5380 + }, + { + "epoch": 1.240719391284298, + "grad_norm": 1.4255868593911465, + "learning_rate": 6.902578935510969e-07, + "loss": 0.4665502905845642, + "step": 5381 + }, + { + "epoch": 1.2409499654138805, + "grad_norm": 1.6036447338223971, + "learning_rate": 6.898954050240013e-07, + "loss": 0.46059858798980713, + "step": 5382 + }, + { + "epoch": 1.2411805395434632, + "grad_norm": 1.4844055015741944, + "learning_rate": 6.895329615737515e-07, + "loss": 0.46149420738220215, + "step": 5383 + }, + { + "epoch": 1.241411113673046, + "grad_norm": 1.5602784439666317, + "learning_rate": 6.891705632530327e-07, + "loss": 0.42226743698120117, + "step": 5384 + }, + { + "epoch": 1.2416416878026286, + "grad_norm": 1.4308699177023212, + "learning_rate": 6.88808210114522e-07, + "loss": 0.45789939165115356, + "step": 5385 + }, + { + "epoch": 1.2418722619322111, + "grad_norm": 1.5754200685163184, + "learning_rate": 6.884459022108922e-07, + "loss": 0.44569891691207886, + "step": 5386 + }, + { + "epoch": 1.2421028360617938, + "grad_norm": 1.4099412845136035, + "learning_rate": 6.880836395948078e-07, + "loss": 0.3971112370491028, + "step": 5387 + }, + { + "epoch": 1.2423334101913766, + "grad_norm": 1.6636550459216706, + "learning_rate": 6.877214223189278e-07, + "loss": 0.46052566170692444, + "step": 5388 + }, + { + "epoch": 1.2425639843209593, + "grad_norm": 1.2735689149473257, + "learning_rate": 6.873592504359037e-07, + "loss": 0.42730599641799927, + "step": 5389 + }, + { + "epoch": 1.2427945584505418, + "grad_norm": 1.5806143555224212, + "learning_rate": 6.869971239983814e-07, + "loss": 0.4391734004020691, + "step": 5390 + }, + { + "epoch": 1.2430251325801245, + "grad_norm": 1.5314248582389964, + "learning_rate": 6.866350430589989e-07, + "loss": 0.4523593485355377, + "step": 5391 + }, + { + "epoch": 1.2432557067097072, + "grad_norm": 1.587550694342246, + "learning_rate": 6.86273007670389e-07, + "loss": 0.5398315787315369, + "step": 5392 + }, + { + "epoch": 1.24348628083929, + "grad_norm": 1.2298139407771986, + "learning_rate": 6.859110178851767e-07, + "loss": 0.40480807423591614, + "step": 5393 + }, + { + "epoch": 1.2437168549688724, + "grad_norm": 1.4233815325100456, + "learning_rate": 6.855490737559816e-07, + "loss": 0.42483675479888916, + "step": 5394 + }, + { + "epoch": 1.2439474290984551, + "grad_norm": 1.611497963721617, + "learning_rate": 6.851871753354153e-07, + "loss": 0.39951619505882263, + "step": 5395 + }, + { + "epoch": 1.2441780032280378, + "grad_norm": 1.5084898015563448, + "learning_rate": 6.848253226760833e-07, + "loss": 0.48650771379470825, + "step": 5396 + }, + { + "epoch": 1.2444085773576206, + "grad_norm": 1.5899141960647352, + "learning_rate": 6.844635158305853e-07, + "loss": 0.5377830266952515, + "step": 5397 + }, + { + "epoch": 1.244639151487203, + "grad_norm": 1.667763606347776, + "learning_rate": 6.841017548515127e-07, + "loss": 0.4365614950656891, + "step": 5398 + }, + { + "epoch": 1.2448697256167858, + "grad_norm": 1.2560105349082187, + "learning_rate": 6.837400397914519e-07, + "loss": 0.39739400148391724, + "step": 5399 + }, + { + "epoch": 1.2451002997463685, + "grad_norm": 1.3287360038901976, + "learning_rate": 6.833783707029812e-07, + "loss": 0.4005683660507202, + "step": 5400 + }, + { + "epoch": 1.2453308738759512, + "grad_norm": 1.6646043641444999, + "learning_rate": 6.830167476386737e-07, + "loss": 0.5635108351707458, + "step": 5401 + }, + { + "epoch": 1.2455614480055337, + "grad_norm": 1.6642180514990483, + "learning_rate": 6.82655170651094e-07, + "loss": 0.4332388639450073, + "step": 5402 + }, + { + "epoch": 1.2457920221351164, + "grad_norm": 1.525164084943155, + "learning_rate": 6.822936397928015e-07, + "loss": 0.47506433725357056, + "step": 5403 + }, + { + "epoch": 1.2460225962646991, + "grad_norm": 1.600563207739989, + "learning_rate": 6.819321551163486e-07, + "loss": 0.5081777572631836, + "step": 5404 + }, + { + "epoch": 1.2462531703942819, + "grad_norm": 1.6650056699718765, + "learning_rate": 6.815707166742801e-07, + "loss": 0.4038957953453064, + "step": 5405 + }, + { + "epoch": 1.2464837445238643, + "grad_norm": 1.759676797230376, + "learning_rate": 6.812093245191354e-07, + "loss": 0.4665706753730774, + "step": 5406 + }, + { + "epoch": 1.246714318653447, + "grad_norm": 1.8957165771048585, + "learning_rate": 6.808479787034459e-07, + "loss": 0.45610785484313965, + "step": 5407 + }, + { + "epoch": 1.2469448927830298, + "grad_norm": 1.443572019443965, + "learning_rate": 6.804866792797377e-07, + "loss": 0.4334958493709564, + "step": 5408 + }, + { + "epoch": 1.2471754669126125, + "grad_norm": 1.4719822396111175, + "learning_rate": 6.801254263005283e-07, + "loss": 0.5505996942520142, + "step": 5409 + }, + { + "epoch": 1.247406041042195, + "grad_norm": 1.5261896109132582, + "learning_rate": 6.797642198183303e-07, + "loss": 0.5589424967765808, + "step": 5410 + }, + { + "epoch": 1.2476366151717777, + "grad_norm": 1.892082521677576, + "learning_rate": 6.794030598856483e-07, + "loss": 0.48142847418785095, + "step": 5411 + }, + { + "epoch": 1.2478671893013604, + "grad_norm": 1.6606812394072976, + "learning_rate": 6.790419465549811e-07, + "loss": 0.5549830198287964, + "step": 5412 + }, + { + "epoch": 1.2480977634309431, + "grad_norm": 1.6097248774465256, + "learning_rate": 6.786808798788193e-07, + "loss": 0.5974072217941284, + "step": 5413 + }, + { + "epoch": 1.2483283375605256, + "grad_norm": 1.3333137403479542, + "learning_rate": 6.783198599096484e-07, + "loss": 0.38189029693603516, + "step": 5414 + }, + { + "epoch": 1.2485589116901084, + "grad_norm": 1.4543286006354934, + "learning_rate": 6.779588866999459e-07, + "loss": 0.41150039434432983, + "step": 5415 + }, + { + "epoch": 1.248789485819691, + "grad_norm": 1.451215833026304, + "learning_rate": 6.775979603021828e-07, + "loss": 0.4291636645793915, + "step": 5416 + }, + { + "epoch": 1.2490200599492738, + "grad_norm": 1.2798211834451962, + "learning_rate": 6.772370807688242e-07, + "loss": 0.45324140787124634, + "step": 5417 + }, + { + "epoch": 1.2492506340788563, + "grad_norm": 1.3895968147090427, + "learning_rate": 6.768762481523262e-07, + "loss": 0.4748731851577759, + "step": 5418 + }, + { + "epoch": 1.249481208208439, + "grad_norm": 1.618628812481624, + "learning_rate": 6.765154625051408e-07, + "loss": 0.43602505326271057, + "step": 5419 + }, + { + "epoch": 1.2497117823380217, + "grad_norm": 1.4027608933739075, + "learning_rate": 6.761547238797112e-07, + "loss": 0.49135684967041016, + "step": 5420 + }, + { + "epoch": 1.2499423564676044, + "grad_norm": 1.6315360373382408, + "learning_rate": 6.757940323284747e-07, + "loss": 0.47508272528648376, + "step": 5421 + }, + { + "epoch": 1.250172930597187, + "grad_norm": 1.612865868213556, + "learning_rate": 6.754333879038611e-07, + "loss": 0.399259090423584, + "step": 5422 + }, + { + "epoch": 1.2504035047267696, + "grad_norm": 1.6878741312884291, + "learning_rate": 6.750727906582941e-07, + "loss": 0.426364004611969, + "step": 5423 + }, + { + "epoch": 1.2506340788563524, + "grad_norm": 1.4584807010931917, + "learning_rate": 6.747122406441903e-07, + "loss": 0.4641951322555542, + "step": 5424 + }, + { + "epoch": 1.250864652985935, + "grad_norm": 1.3880451781756755, + "learning_rate": 6.743517379139585e-07, + "loss": 0.35008323192596436, + "step": 5425 + }, + { + "epoch": 1.2510952271155176, + "grad_norm": 1.4485633708895984, + "learning_rate": 6.739912825200022e-07, + "loss": 0.49627771973609924, + "step": 5426 + }, + { + "epoch": 1.2513258012451003, + "grad_norm": 1.628398042874366, + "learning_rate": 6.736308745147168e-07, + "loss": 0.4926851987838745, + "step": 5427 + }, + { + "epoch": 1.251556375374683, + "grad_norm": 1.622960147434406, + "learning_rate": 6.732705139504917e-07, + "loss": 0.44777536392211914, + "step": 5428 + }, + { + "epoch": 1.2517869495042655, + "grad_norm": 1.6523545202218224, + "learning_rate": 6.729102008797085e-07, + "loss": 0.39160430431365967, + "step": 5429 + }, + { + "epoch": 1.2520175236338482, + "grad_norm": 1.5184849781676724, + "learning_rate": 6.725499353547426e-07, + "loss": 0.4585273861885071, + "step": 5430 + }, + { + "epoch": 1.252248097763431, + "grad_norm": 1.5327675196324342, + "learning_rate": 6.721897174279621e-07, + "loss": 0.5245224237442017, + "step": 5431 + }, + { + "epoch": 1.2524786718930137, + "grad_norm": 1.5257069000403813, + "learning_rate": 6.718295471517288e-07, + "loss": 0.4217349886894226, + "step": 5432 + }, + { + "epoch": 1.2527092460225964, + "grad_norm": 1.4826939266004133, + "learning_rate": 6.714694245783963e-07, + "loss": 0.4944193661212921, + "step": 5433 + }, + { + "epoch": 1.2529398201521789, + "grad_norm": 1.387839760206308, + "learning_rate": 6.711093497603127e-07, + "loss": 0.5058057904243469, + "step": 5434 + }, + { + "epoch": 1.2531703942817616, + "grad_norm": 1.381621888753065, + "learning_rate": 6.707493227498186e-07, + "loss": 0.45669037103652954, + "step": 5435 + }, + { + "epoch": 1.2534009684113443, + "grad_norm": 1.5997486257834712, + "learning_rate": 6.703893435992469e-07, + "loss": 0.4248945116996765, + "step": 5436 + }, + { + "epoch": 1.2536315425409268, + "grad_norm": 1.6056111266165571, + "learning_rate": 6.700294123609249e-07, + "loss": 0.3984343707561493, + "step": 5437 + }, + { + "epoch": 1.2538621166705095, + "grad_norm": 1.5349078061254786, + "learning_rate": 6.696695290871715e-07, + "loss": 0.435299813747406, + "step": 5438 + }, + { + "epoch": 1.2540926908000922, + "grad_norm": 1.6277363060500583, + "learning_rate": 6.693096938303002e-07, + "loss": 0.4225304126739502, + "step": 5439 + }, + { + "epoch": 1.254323264929675, + "grad_norm": 1.6495416759002697, + "learning_rate": 6.689499066426161e-07, + "loss": 0.4686669111251831, + "step": 5440 + }, + { + "epoch": 1.2545538390592577, + "grad_norm": 1.5168957851404996, + "learning_rate": 6.685901675764186e-07, + "loss": 0.45163553953170776, + "step": 5441 + }, + { + "epoch": 1.2547844131888402, + "grad_norm": 1.3593822737620262, + "learning_rate": 6.682304766839986e-07, + "loss": 0.44223567843437195, + "step": 5442 + }, + { + "epoch": 1.2550149873184229, + "grad_norm": 1.5363469724843986, + "learning_rate": 6.678708340176413e-07, + "loss": 0.4008648991584778, + "step": 5443 + }, + { + "epoch": 1.2552455614480056, + "grad_norm": 1.4199248627467993, + "learning_rate": 6.675112396296245e-07, + "loss": 0.4500792324542999, + "step": 5444 + }, + { + "epoch": 1.255476135577588, + "grad_norm": 1.490145734356762, + "learning_rate": 6.671516935722183e-07, + "loss": 0.42558690905570984, + "step": 5445 + }, + { + "epoch": 1.2557067097071708, + "grad_norm": 1.7098682543926618, + "learning_rate": 6.667921958976871e-07, + "loss": 0.4676043391227722, + "step": 5446 + }, + { + "epoch": 1.2559372838367535, + "grad_norm": 1.8041492407407758, + "learning_rate": 6.664327466582869e-07, + "loss": 0.44114184379577637, + "step": 5447 + }, + { + "epoch": 1.2561678579663362, + "grad_norm": 1.6102069805165957, + "learning_rate": 6.660733459062679e-07, + "loss": 0.33865463733673096, + "step": 5448 + }, + { + "epoch": 1.256398432095919, + "grad_norm": 1.8619975614063338, + "learning_rate": 6.65713993693872e-07, + "loss": 0.5397414565086365, + "step": 5449 + }, + { + "epoch": 1.2566290062255014, + "grad_norm": 1.4730562973077854, + "learning_rate": 6.653546900733352e-07, + "loss": 0.49249517917633057, + "step": 5450 + }, + { + "epoch": 1.2568595803550842, + "grad_norm": 1.5757041605280757, + "learning_rate": 6.649954350968855e-07, + "loss": 0.5438433885574341, + "step": 5451 + }, + { + "epoch": 1.2570901544846669, + "grad_norm": 1.4727448576353426, + "learning_rate": 6.646362288167448e-07, + "loss": 0.43725037574768066, + "step": 5452 + }, + { + "epoch": 1.2573207286142494, + "grad_norm": 1.5159104216766552, + "learning_rate": 6.642770712851269e-07, + "loss": 0.5369226336479187, + "step": 5453 + }, + { + "epoch": 1.257551302743832, + "grad_norm": 1.4915531986930697, + "learning_rate": 6.63917962554239e-07, + "loss": 0.45022842288017273, + "step": 5454 + }, + { + "epoch": 1.2577818768734148, + "grad_norm": 1.6219974371712227, + "learning_rate": 6.635589026762818e-07, + "loss": 0.42483362555503845, + "step": 5455 + }, + { + "epoch": 1.2580124510029975, + "grad_norm": 1.4115832140490556, + "learning_rate": 6.631998917034474e-07, + "loss": 0.4909497797489166, + "step": 5456 + }, + { + "epoch": 1.2582430251325802, + "grad_norm": 1.3159817254483799, + "learning_rate": 6.628409296879223e-07, + "loss": 0.4927433431148529, + "step": 5457 + }, + { + "epoch": 1.2584735992621627, + "grad_norm": 1.550356576361105, + "learning_rate": 6.624820166818847e-07, + "loss": 0.4452761113643646, + "step": 5458 + }, + { + "epoch": 1.2587041733917455, + "grad_norm": 1.5683413746620685, + "learning_rate": 6.62123152737507e-07, + "loss": 0.4637982249259949, + "step": 5459 + }, + { + "epoch": 1.2589347475213282, + "grad_norm": 1.3293268937895057, + "learning_rate": 6.617643379069532e-07, + "loss": 0.3189438581466675, + "step": 5460 + }, + { + "epoch": 1.2591653216509107, + "grad_norm": 1.3296675722252447, + "learning_rate": 6.614055722423808e-07, + "loss": 0.420698881149292, + "step": 5461 + }, + { + "epoch": 1.2593958957804934, + "grad_norm": 1.5202476608747133, + "learning_rate": 6.610468557959398e-07, + "loss": 0.5187642574310303, + "step": 5462 + }, + { + "epoch": 1.259626469910076, + "grad_norm": 1.4954844764147424, + "learning_rate": 6.606881886197741e-07, + "loss": 0.48519381880760193, + "step": 5463 + }, + { + "epoch": 1.2598570440396588, + "grad_norm": 1.4755140585184632, + "learning_rate": 6.60329570766019e-07, + "loss": 0.3930806815624237, + "step": 5464 + }, + { + "epoch": 1.2600876181692415, + "grad_norm": 1.8617928902566707, + "learning_rate": 6.599710022868027e-07, + "loss": 0.4890612065792084, + "step": 5465 + }, + { + "epoch": 1.260318192298824, + "grad_norm": 1.2781262224531547, + "learning_rate": 6.596124832342476e-07, + "loss": 0.4202774465084076, + "step": 5466 + }, + { + "epoch": 1.2605487664284067, + "grad_norm": 1.5196012608537903, + "learning_rate": 6.592540136604674e-07, + "loss": 0.5053761005401611, + "step": 5467 + }, + { + "epoch": 1.2607793405579895, + "grad_norm": 1.4874107682553572, + "learning_rate": 6.588955936175702e-07, + "loss": 0.4827175736427307, + "step": 5468 + }, + { + "epoch": 1.261009914687572, + "grad_norm": 1.4659080652243894, + "learning_rate": 6.585372231576551e-07, + "loss": 0.45179229974746704, + "step": 5469 + }, + { + "epoch": 1.2612404888171547, + "grad_norm": 1.3781712796058982, + "learning_rate": 6.581789023328155e-07, + "loss": 0.4024949073791504, + "step": 5470 + }, + { + "epoch": 1.2614710629467374, + "grad_norm": 1.7288759385339574, + "learning_rate": 6.578206311951363e-07, + "loss": 0.48839491605758667, + "step": 5471 + }, + { + "epoch": 1.26170163707632, + "grad_norm": 1.4778086795689929, + "learning_rate": 6.574624097966968e-07, + "loss": 0.45897620916366577, + "step": 5472 + }, + { + "epoch": 1.2619322112059028, + "grad_norm": 1.5548512112712307, + "learning_rate": 6.571042381895671e-07, + "loss": 0.48471882939338684, + "step": 5473 + }, + { + "epoch": 1.2621627853354853, + "grad_norm": 2.0045804163216414, + "learning_rate": 6.567461164258117e-07, + "loss": 0.44159913063049316, + "step": 5474 + }, + { + "epoch": 1.262393359465068, + "grad_norm": 1.5752243442253915, + "learning_rate": 6.563880445574872e-07, + "loss": 0.39186012744903564, + "step": 5475 + }, + { + "epoch": 1.2626239335946507, + "grad_norm": 1.818057995697113, + "learning_rate": 6.560300226366425e-07, + "loss": 0.5332233905792236, + "step": 5476 + }, + { + "epoch": 1.2628545077242332, + "grad_norm": 1.350222227503923, + "learning_rate": 6.556720507153201e-07, + "loss": 0.4252084195613861, + "step": 5477 + }, + { + "epoch": 1.263085081853816, + "grad_norm": 1.4204993118440263, + "learning_rate": 6.553141288455548e-07, + "loss": 0.36927711963653564, + "step": 5478 + }, + { + "epoch": 1.2633156559833987, + "grad_norm": 1.5676826878414558, + "learning_rate": 6.549562570793745e-07, + "loss": 0.4405602216720581, + "step": 5479 + }, + { + "epoch": 1.2635462301129814, + "grad_norm": 1.5245742985153417, + "learning_rate": 6.545984354687986e-07, + "loss": 0.5691590309143066, + "step": 5480 + }, + { + "epoch": 1.2637768042425641, + "grad_norm": 1.468644623890153, + "learning_rate": 6.542406640658411e-07, + "loss": 0.3750354051589966, + "step": 5481 + }, + { + "epoch": 1.2640073783721466, + "grad_norm": 1.5266320276968284, + "learning_rate": 6.538829429225068e-07, + "loss": 0.47816041111946106, + "step": 5482 + }, + { + "epoch": 1.2642379525017293, + "grad_norm": 1.4911563737024116, + "learning_rate": 6.535252720907951e-07, + "loss": 0.42470186948776245, + "step": 5483 + }, + { + "epoch": 1.264468526631312, + "grad_norm": 1.4256480441382235, + "learning_rate": 6.531676516226961e-07, + "loss": 0.37356555461883545, + "step": 5484 + }, + { + "epoch": 1.2646991007608945, + "grad_norm": 1.4604810104028516, + "learning_rate": 6.528100815701942e-07, + "loss": 0.4895293116569519, + "step": 5485 + }, + { + "epoch": 1.2649296748904773, + "grad_norm": 1.9575945537740915, + "learning_rate": 6.524525619852656e-07, + "loss": 0.4963725805282593, + "step": 5486 + }, + { + "epoch": 1.26516024902006, + "grad_norm": 1.7629474018170985, + "learning_rate": 6.520950929198792e-07, + "loss": 0.5443764925003052, + "step": 5487 + }, + { + "epoch": 1.2653908231496427, + "grad_norm": 1.2536482779264142, + "learning_rate": 6.517376744259972e-07, + "loss": 0.400549054145813, + "step": 5488 + }, + { + "epoch": 1.2656213972792254, + "grad_norm": 1.8850482793273033, + "learning_rate": 6.513803065555736e-07, + "loss": 0.46384042501449585, + "step": 5489 + }, + { + "epoch": 1.265851971408808, + "grad_norm": 1.4893040501119004, + "learning_rate": 6.510229893605556e-07, + "loss": 0.5044240951538086, + "step": 5490 + }, + { + "epoch": 1.2660825455383906, + "grad_norm": 1.477450831039122, + "learning_rate": 6.506657228928827e-07, + "loss": 0.4544214904308319, + "step": 5491 + }, + { + "epoch": 1.2663131196679733, + "grad_norm": 1.441487086349296, + "learning_rate": 6.503085072044878e-07, + "loss": 0.36688071489334106, + "step": 5492 + }, + { + "epoch": 1.2665436937975558, + "grad_norm": 1.4594163949727883, + "learning_rate": 6.499513423472951e-07, + "loss": 0.4058225154876709, + "step": 5493 + }, + { + "epoch": 1.2667742679271385, + "grad_norm": 1.4647938941101153, + "learning_rate": 6.495942283732225e-07, + "loss": 0.36429229378700256, + "step": 5494 + }, + { + "epoch": 1.2670048420567213, + "grad_norm": 1.7674965095028434, + "learning_rate": 6.492371653341802e-07, + "loss": 0.47116899490356445, + "step": 5495 + }, + { + "epoch": 1.267235416186304, + "grad_norm": 1.4923904627456126, + "learning_rate": 6.488801532820706e-07, + "loss": 0.4437965750694275, + "step": 5496 + }, + { + "epoch": 1.2674659903158867, + "grad_norm": 1.5533994295939695, + "learning_rate": 6.485231922687893e-07, + "loss": 0.4810328483581543, + "step": 5497 + }, + { + "epoch": 1.2676965644454692, + "grad_norm": 1.4632129166419525, + "learning_rate": 6.481662823462238e-07, + "loss": 0.362907350063324, + "step": 5498 + }, + { + "epoch": 1.267927138575052, + "grad_norm": 1.375729756251652, + "learning_rate": 6.478094235662554e-07, + "loss": 0.43647170066833496, + "step": 5499 + }, + { + "epoch": 1.2681577127046346, + "grad_norm": 1.422215620145209, + "learning_rate": 6.474526159807563e-07, + "loss": 0.4566631317138672, + "step": 5500 + }, + { + "epoch": 1.2683882868342171, + "grad_norm": 1.5097982290449063, + "learning_rate": 6.470958596415925e-07, + "loss": 0.3940081298351288, + "step": 5501 + }, + { + "epoch": 1.2686188609637998, + "grad_norm": 1.617526881385646, + "learning_rate": 6.46739154600622e-07, + "loss": 0.5275603532791138, + "step": 5502 + }, + { + "epoch": 1.2688494350933825, + "grad_norm": 1.846449658895825, + "learning_rate": 6.463825009096959e-07, + "loss": 0.42546436190605164, + "step": 5503 + }, + { + "epoch": 1.2690800092229653, + "grad_norm": 1.6068032996774941, + "learning_rate": 6.460258986206566e-07, + "loss": 0.3833821713924408, + "step": 5504 + }, + { + "epoch": 1.2693105833525478, + "grad_norm": 1.4806797403979666, + "learning_rate": 6.456693477853408e-07, + "loss": 0.5056046843528748, + "step": 5505 + }, + { + "epoch": 1.2695411574821305, + "grad_norm": 1.6345259734279236, + "learning_rate": 6.453128484555764e-07, + "loss": 0.3544192910194397, + "step": 5506 + }, + { + "epoch": 1.2697717316117132, + "grad_norm": 1.684231386275673, + "learning_rate": 6.449564006831836e-07, + "loss": 0.47164130210876465, + "step": 5507 + }, + { + "epoch": 1.2700023057412957, + "grad_norm": 1.3334241214641123, + "learning_rate": 6.446000045199765e-07, + "loss": 0.4580638110637665, + "step": 5508 + }, + { + "epoch": 1.2702328798708784, + "grad_norm": 1.2809631136030655, + "learning_rate": 6.442436600177606e-07, + "loss": 0.45945844054222107, + "step": 5509 + }, + { + "epoch": 1.2704634540004611, + "grad_norm": 1.447660138842985, + "learning_rate": 6.438873672283343e-07, + "loss": 0.5539910793304443, + "step": 5510 + }, + { + "epoch": 1.2706940281300438, + "grad_norm": 1.6550705344684873, + "learning_rate": 6.43531126203488e-07, + "loss": 0.4661790132522583, + "step": 5511 + }, + { + "epoch": 1.2709246022596266, + "grad_norm": 1.7015547164246037, + "learning_rate": 6.431749369950057e-07, + "loss": 0.3781178891658783, + "step": 5512 + }, + { + "epoch": 1.271155176389209, + "grad_norm": 1.571227420481097, + "learning_rate": 6.428187996546621e-07, + "loss": 0.4858461618423462, + "step": 5513 + }, + { + "epoch": 1.2713857505187918, + "grad_norm": 1.5308384830726272, + "learning_rate": 6.424627142342262e-07, + "loss": 0.5003963708877563, + "step": 5514 + }, + { + "epoch": 1.2716163246483745, + "grad_norm": 1.3605664168425382, + "learning_rate": 6.421066807854584e-07, + "loss": 0.4620795249938965, + "step": 5515 + }, + { + "epoch": 1.271846898777957, + "grad_norm": 1.385915858471925, + "learning_rate": 6.417506993601114e-07, + "loss": 0.43998581171035767, + "step": 5516 + }, + { + "epoch": 1.2720774729075397, + "grad_norm": 1.6777446711260993, + "learning_rate": 6.413947700099311e-07, + "loss": 0.5204107165336609, + "step": 5517 + }, + { + "epoch": 1.2723080470371224, + "grad_norm": 1.5515853600398104, + "learning_rate": 6.410388927866551e-07, + "loss": 0.46675950288772583, + "step": 5518 + }, + { + "epoch": 1.2725386211667051, + "grad_norm": 1.4020610518461032, + "learning_rate": 6.406830677420146e-07, + "loss": 0.4002436101436615, + "step": 5519 + }, + { + "epoch": 1.2727691952962878, + "grad_norm": 1.6847281008342299, + "learning_rate": 6.403272949277312e-07, + "loss": 0.4051012396812439, + "step": 5520 + }, + { + "epoch": 1.2729997694258703, + "grad_norm": 1.4780078562694616, + "learning_rate": 6.399715743955209e-07, + "loss": 0.4847797751426697, + "step": 5521 + }, + { + "epoch": 1.273230343555453, + "grad_norm": 1.6389704995828815, + "learning_rate": 6.396159061970907e-07, + "loss": 0.4742053151130676, + "step": 5522 + }, + { + "epoch": 1.2734609176850358, + "grad_norm": 1.4123933831310747, + "learning_rate": 6.392602903841415e-07, + "loss": 0.44291001558303833, + "step": 5523 + }, + { + "epoch": 1.2736914918146183, + "grad_norm": 1.438016627678946, + "learning_rate": 6.389047270083646e-07, + "loss": 0.38993996381759644, + "step": 5524 + }, + { + "epoch": 1.273922065944201, + "grad_norm": 1.5621491080936318, + "learning_rate": 6.385492161214454e-07, + "loss": 0.5045995116233826, + "step": 5525 + }, + { + "epoch": 1.2741526400737837, + "grad_norm": 1.4769511790871679, + "learning_rate": 6.381937577750611e-07, + "loss": 0.4377788305282593, + "step": 5526 + }, + { + "epoch": 1.2743832142033664, + "grad_norm": 1.470801087764595, + "learning_rate": 6.378383520208806e-07, + "loss": 0.5363353490829468, + "step": 5527 + }, + { + "epoch": 1.2746137883329491, + "grad_norm": 1.340047582641372, + "learning_rate": 6.374829989105661e-07, + "loss": 0.42230546474456787, + "step": 5528 + }, + { + "epoch": 1.2748443624625316, + "grad_norm": 1.2882420810653734, + "learning_rate": 6.371276984957715e-07, + "loss": 0.39565908908843994, + "step": 5529 + }, + { + "epoch": 1.2750749365921143, + "grad_norm": 1.3633189139651096, + "learning_rate": 6.36772450828144e-07, + "loss": 0.4375323951244354, + "step": 5530 + }, + { + "epoch": 1.275305510721697, + "grad_norm": 1.5028848525750826, + "learning_rate": 6.364172559593215e-07, + "loss": 0.4901241660118103, + "step": 5531 + }, + { + "epoch": 1.2755360848512796, + "grad_norm": 1.3653729298225772, + "learning_rate": 6.360621139409359e-07, + "loss": 0.4108780026435852, + "step": 5532 + }, + { + "epoch": 1.2757666589808623, + "grad_norm": 1.4800363393725149, + "learning_rate": 6.357070248246102e-07, + "loss": 0.43631279468536377, + "step": 5533 + }, + { + "epoch": 1.275997233110445, + "grad_norm": 1.5982504223136969, + "learning_rate": 6.353519886619607e-07, + "loss": 0.4623757004737854, + "step": 5534 + }, + { + "epoch": 1.2762278072400277, + "grad_norm": 1.5284512936045929, + "learning_rate": 6.349970055045954e-07, + "loss": 0.41303062438964844, + "step": 5535 + }, + { + "epoch": 1.2764583813696104, + "grad_norm": 1.7689201212047627, + "learning_rate": 6.34642075404114e-07, + "loss": 0.5157878994941711, + "step": 5536 + }, + { + "epoch": 1.276688955499193, + "grad_norm": 1.6093049161057067, + "learning_rate": 6.342871984121103e-07, + "loss": 0.41295093297958374, + "step": 5537 + }, + { + "epoch": 1.2769195296287756, + "grad_norm": 1.4185213028911483, + "learning_rate": 6.339323745801682e-07, + "loss": 0.4636460542678833, + "step": 5538 + }, + { + "epoch": 1.2771501037583584, + "grad_norm": 1.44057433861511, + "learning_rate": 6.335776039598659e-07, + "loss": 0.45273804664611816, + "step": 5539 + }, + { + "epoch": 1.2773806778879409, + "grad_norm": 1.7212686324453035, + "learning_rate": 6.332228866027721e-07, + "loss": 0.4562758803367615, + "step": 5540 + }, + { + "epoch": 1.2776112520175236, + "grad_norm": 1.5821328258880776, + "learning_rate": 6.328682225604491e-07, + "loss": 0.3162837326526642, + "step": 5541 + }, + { + "epoch": 1.2778418261471063, + "grad_norm": 1.4226618207277133, + "learning_rate": 6.325136118844504e-07, + "loss": 0.48594871163368225, + "step": 5542 + }, + { + "epoch": 1.278072400276689, + "grad_norm": 1.398820126458318, + "learning_rate": 6.321590546263231e-07, + "loss": 0.4346798360347748, + "step": 5543 + }, + { + "epoch": 1.2783029744062717, + "grad_norm": 1.7945463027279862, + "learning_rate": 6.318045508376046e-07, + "loss": 0.5133204460144043, + "step": 5544 + }, + { + "epoch": 1.2785335485358542, + "grad_norm": 1.6462955147402891, + "learning_rate": 6.314501005698266e-07, + "loss": 0.40679338574409485, + "step": 5545 + }, + { + "epoch": 1.278764122665437, + "grad_norm": 1.341754342655084, + "learning_rate": 6.310957038745117e-07, + "loss": 0.363874614238739, + "step": 5546 + }, + { + "epoch": 1.2789946967950196, + "grad_norm": 1.3013776361069782, + "learning_rate": 6.307413608031746e-07, + "loss": 0.43020665645599365, + "step": 5547 + }, + { + "epoch": 1.2792252709246021, + "grad_norm": 1.301444097702827, + "learning_rate": 6.303870714073233e-07, + "loss": 0.5280083417892456, + "step": 5548 + }, + { + "epoch": 1.2794558450541849, + "grad_norm": 1.803757705570539, + "learning_rate": 6.300328357384568e-07, + "loss": 0.4584185481071472, + "step": 5549 + }, + { + "epoch": 1.2796864191837676, + "grad_norm": 1.4682285924702114, + "learning_rate": 6.296786538480675e-07, + "loss": 0.4068162441253662, + "step": 5550 + }, + { + "epoch": 1.2799169933133503, + "grad_norm": 1.361515758715701, + "learning_rate": 6.293245257876387e-07, + "loss": 0.4336085915565491, + "step": 5551 + }, + { + "epoch": 1.280147567442933, + "grad_norm": 1.4906971509519245, + "learning_rate": 6.289704516086468e-07, + "loss": 0.4932886064052582, + "step": 5552 + }, + { + "epoch": 1.2803781415725155, + "grad_norm": 1.3660207414526373, + "learning_rate": 6.2861643136256e-07, + "loss": 0.437292218208313, + "step": 5553 + }, + { + "epoch": 1.2806087157020982, + "grad_norm": 1.5017461161180483, + "learning_rate": 6.28262465100839e-07, + "loss": 0.4131085276603699, + "step": 5554 + }, + { + "epoch": 1.280839289831681, + "grad_norm": 1.441603184912447, + "learning_rate": 6.27908552874936e-07, + "loss": 0.4146266579627991, + "step": 5555 + }, + { + "epoch": 1.2810698639612634, + "grad_norm": 1.6115588407174422, + "learning_rate": 6.275546947362957e-07, + "loss": 0.4778539538383484, + "step": 5556 + }, + { + "epoch": 1.2813004380908461, + "grad_norm": 1.4722189673341872, + "learning_rate": 6.272008907363555e-07, + "loss": 0.3989019989967346, + "step": 5557 + }, + { + "epoch": 1.2815310122204289, + "grad_norm": 1.5188067628601776, + "learning_rate": 6.268471409265436e-07, + "loss": 0.4433528184890747, + "step": 5558 + }, + { + "epoch": 1.2817615863500116, + "grad_norm": 1.4551631195697798, + "learning_rate": 6.264934453582817e-07, + "loss": 0.46929931640625, + "step": 5559 + }, + { + "epoch": 1.2819921604795943, + "grad_norm": 1.749202490253535, + "learning_rate": 6.261398040829829e-07, + "loss": 0.4908202886581421, + "step": 5560 + }, + { + "epoch": 1.2822227346091768, + "grad_norm": 1.766310768413501, + "learning_rate": 6.257862171520528e-07, + "loss": 0.44195377826690674, + "step": 5561 + }, + { + "epoch": 1.2824533087387595, + "grad_norm": 1.8716445464357578, + "learning_rate": 6.254326846168882e-07, + "loss": 0.548696756362915, + "step": 5562 + }, + { + "epoch": 1.2826838828683422, + "grad_norm": 1.6355324229757326, + "learning_rate": 6.250792065288794e-07, + "loss": 0.4015994668006897, + "step": 5563 + }, + { + "epoch": 1.2829144569979247, + "grad_norm": 1.5798153885574688, + "learning_rate": 6.247257829394074e-07, + "loss": 0.4281688928604126, + "step": 5564 + }, + { + "epoch": 1.2831450311275074, + "grad_norm": 1.2159971773233473, + "learning_rate": 6.243724138998462e-07, + "loss": 0.37623634934425354, + "step": 5565 + }, + { + "epoch": 1.2833756052570902, + "grad_norm": 1.7282596196498647, + "learning_rate": 6.240190994615617e-07, + "loss": 0.4753819704055786, + "step": 5566 + }, + { + "epoch": 1.2836061793866729, + "grad_norm": 1.8092084567061366, + "learning_rate": 6.236658396759111e-07, + "loss": 0.4584893584251404, + "step": 5567 + }, + { + "epoch": 1.2838367535162556, + "grad_norm": 1.598249680169706, + "learning_rate": 6.23312634594245e-07, + "loss": 0.445067435503006, + "step": 5568 + }, + { + "epoch": 1.284067327645838, + "grad_norm": 1.402901275205923, + "learning_rate": 6.229594842679049e-07, + "loss": 0.4209640920162201, + "step": 5569 + }, + { + "epoch": 1.2842979017754208, + "grad_norm": 1.3481434606649714, + "learning_rate": 6.226063887482254e-07, + "loss": 0.34620141983032227, + "step": 5570 + }, + { + "epoch": 1.2845284759050035, + "grad_norm": 1.2702834444597235, + "learning_rate": 6.222533480865315e-07, + "loss": 0.43683767318725586, + "step": 5571 + }, + { + "epoch": 1.284759050034586, + "grad_norm": 1.5394879174992184, + "learning_rate": 6.219003623341421e-07, + "loss": 0.45881450176239014, + "step": 5572 + }, + { + "epoch": 1.2849896241641687, + "grad_norm": 1.2015099259152706, + "learning_rate": 6.215474315423667e-07, + "loss": 0.40115928649902344, + "step": 5573 + }, + { + "epoch": 1.2852201982937514, + "grad_norm": 1.5480428253925462, + "learning_rate": 6.211945557625082e-07, + "loss": 0.4181373119354248, + "step": 5574 + }, + { + "epoch": 1.2854507724233342, + "grad_norm": 1.6874872010842208, + "learning_rate": 6.208417350458598e-07, + "loss": 0.4743300676345825, + "step": 5575 + }, + { + "epoch": 1.2856813465529169, + "grad_norm": 1.6331906817141153, + "learning_rate": 6.204889694437077e-07, + "loss": 0.4236707091331482, + "step": 5576 + }, + { + "epoch": 1.2859119206824994, + "grad_norm": 1.1887995996963334, + "learning_rate": 6.201362590073305e-07, + "loss": 0.4105497896671295, + "step": 5577 + }, + { + "epoch": 1.286142494812082, + "grad_norm": 1.3982883240902815, + "learning_rate": 6.197836037879973e-07, + "loss": 0.4164474606513977, + "step": 5578 + }, + { + "epoch": 1.2863730689416648, + "grad_norm": 1.648111600369129, + "learning_rate": 6.19431003836971e-07, + "loss": 0.49809616804122925, + "step": 5579 + }, + { + "epoch": 1.2866036430712473, + "grad_norm": 1.608787056057215, + "learning_rate": 6.19078459205505e-07, + "loss": 0.4902994632720947, + "step": 5580 + }, + { + "epoch": 1.28683421720083, + "grad_norm": 1.336430500063446, + "learning_rate": 6.18725969944846e-07, + "loss": 0.3697085380554199, + "step": 5581 + }, + { + "epoch": 1.2870647913304127, + "grad_norm": 1.353359914681952, + "learning_rate": 6.183735361062309e-07, + "loss": 0.446627140045166, + "step": 5582 + }, + { + "epoch": 1.2872953654599955, + "grad_norm": 1.590519620379444, + "learning_rate": 6.180211577408901e-07, + "loss": 0.39521220326423645, + "step": 5583 + }, + { + "epoch": 1.2875259395895782, + "grad_norm": 1.7929636253307002, + "learning_rate": 6.176688349000452e-07, + "loss": 0.6308573484420776, + "step": 5584 + }, + { + "epoch": 1.2877565137191607, + "grad_norm": 1.5017758457543093, + "learning_rate": 6.173165676349102e-07, + "loss": 0.4558343291282654, + "step": 5585 + }, + { + "epoch": 1.2879870878487434, + "grad_norm": 1.4546689222111522, + "learning_rate": 6.169643559966906e-07, + "loss": 0.5487015247344971, + "step": 5586 + }, + { + "epoch": 1.288217661978326, + "grad_norm": 1.3949279502201517, + "learning_rate": 6.166122000365834e-07, + "loss": 0.39074039459228516, + "step": 5587 + }, + { + "epoch": 1.2884482361079086, + "grad_norm": 1.4687466147876906, + "learning_rate": 6.162600998057787e-07, + "loss": 0.5136120915412903, + "step": 5588 + }, + { + "epoch": 1.2886788102374913, + "grad_norm": 1.5457442901158343, + "learning_rate": 6.159080553554572e-07, + "loss": 0.5344336628913879, + "step": 5589 + }, + { + "epoch": 1.288909384367074, + "grad_norm": 1.5840783894802135, + "learning_rate": 6.15556066736793e-07, + "loss": 0.5204205513000488, + "step": 5590 + }, + { + "epoch": 1.2891399584966567, + "grad_norm": 1.588345092971114, + "learning_rate": 6.152041340009504e-07, + "loss": 0.4768211245536804, + "step": 5591 + }, + { + "epoch": 1.2893705326262395, + "grad_norm": 2.0914169507965936, + "learning_rate": 6.148522571990868e-07, + "loss": 0.44098299741744995, + "step": 5592 + }, + { + "epoch": 1.289601106755822, + "grad_norm": 1.6411833405865308, + "learning_rate": 6.145004363823509e-07, + "loss": 0.5038055181503296, + "step": 5593 + }, + { + "epoch": 1.2898316808854047, + "grad_norm": 1.6256634474518743, + "learning_rate": 6.141486716018837e-07, + "loss": 0.417998343706131, + "step": 5594 + }, + { + "epoch": 1.2900622550149874, + "grad_norm": 1.755327490864145, + "learning_rate": 6.137969629088174e-07, + "loss": 0.48858124017715454, + "step": 5595 + }, + { + "epoch": 1.2902928291445699, + "grad_norm": 1.6236287189755654, + "learning_rate": 6.134453103542765e-07, + "loss": 0.46988582611083984, + "step": 5596 + }, + { + "epoch": 1.2905234032741526, + "grad_norm": 1.4715150644247719, + "learning_rate": 6.130937139893779e-07, + "loss": 0.5100589394569397, + "step": 5597 + }, + { + "epoch": 1.2907539774037353, + "grad_norm": 1.861124742863941, + "learning_rate": 6.127421738652286e-07, + "loss": 0.490558922290802, + "step": 5598 + }, + { + "epoch": 1.290984551533318, + "grad_norm": 1.624496792014592, + "learning_rate": 6.123906900329291e-07, + "loss": 0.4749597907066345, + "step": 5599 + }, + { + "epoch": 1.2912151256629008, + "grad_norm": 1.4155787175262067, + "learning_rate": 6.12039262543571e-07, + "loss": 0.5006792545318604, + "step": 5600 + }, + { + "epoch": 1.2914456997924832, + "grad_norm": 1.6772265070157861, + "learning_rate": 6.116878914482384e-07, + "loss": 0.46902909874916077, + "step": 5601 + }, + { + "epoch": 1.291676273922066, + "grad_norm": 1.4563548131763813, + "learning_rate": 6.113365767980059e-07, + "loss": 0.46765559911727905, + "step": 5602 + }, + { + "epoch": 1.2919068480516487, + "grad_norm": 1.4143636586875892, + "learning_rate": 6.10985318643941e-07, + "loss": 0.45960646867752075, + "step": 5603 + }, + { + "epoch": 1.2921374221812312, + "grad_norm": 1.578129032516793, + "learning_rate": 6.106341170371024e-07, + "loss": 0.4067912697792053, + "step": 5604 + }, + { + "epoch": 1.292367996310814, + "grad_norm": 1.653263856685772, + "learning_rate": 6.102829720285414e-07, + "loss": 0.45004114508628845, + "step": 5605 + }, + { + "epoch": 1.2925985704403966, + "grad_norm": 1.698803058368325, + "learning_rate": 6.099318836692999e-07, + "loss": 0.5086014270782471, + "step": 5606 + }, + { + "epoch": 1.2928291445699793, + "grad_norm": 1.5400277013654406, + "learning_rate": 6.095808520104122e-07, + "loss": 0.49985191226005554, + "step": 5607 + }, + { + "epoch": 1.293059718699562, + "grad_norm": 1.5622376081366391, + "learning_rate": 6.092298771029047e-07, + "loss": 0.5066381096839905, + "step": 5608 + }, + { + "epoch": 1.2932902928291445, + "grad_norm": 1.5786958248418999, + "learning_rate": 6.088789589977947e-07, + "loss": 0.49626559019088745, + "step": 5609 + }, + { + "epoch": 1.2935208669587273, + "grad_norm": 1.6542820345168319, + "learning_rate": 6.085280977460921e-07, + "loss": 0.4837498962879181, + "step": 5610 + }, + { + "epoch": 1.29375144108831, + "grad_norm": 1.3607897650960659, + "learning_rate": 6.081772933987977e-07, + "loss": 0.41308102011680603, + "step": 5611 + }, + { + "epoch": 1.2939820152178925, + "grad_norm": 1.4026215025684987, + "learning_rate": 6.078265460069048e-07, + "loss": 0.4453086853027344, + "step": 5612 + }, + { + "epoch": 1.2942125893474752, + "grad_norm": 1.5506248233039113, + "learning_rate": 6.074758556213976e-07, + "loss": 0.4700174927711487, + "step": 5613 + }, + { + "epoch": 1.294443163477058, + "grad_norm": 1.6021152444285431, + "learning_rate": 6.071252222932537e-07, + "loss": 0.578227162361145, + "step": 5614 + }, + { + "epoch": 1.2946737376066406, + "grad_norm": 1.3711009132002785, + "learning_rate": 6.067746460734398e-07, + "loss": 0.36468571424484253, + "step": 5615 + }, + { + "epoch": 1.2949043117362231, + "grad_norm": 1.7197393040240752, + "learning_rate": 6.064241270129166e-07, + "loss": 0.4793199896812439, + "step": 5616 + }, + { + "epoch": 1.2951348858658058, + "grad_norm": 1.4731744493442007, + "learning_rate": 6.060736651626355e-07, + "loss": 0.40342214703559875, + "step": 5617 + }, + { + "epoch": 1.2953654599953885, + "grad_norm": 1.2868571274228024, + "learning_rate": 6.05723260573539e-07, + "loss": 0.4212435185909271, + "step": 5618 + }, + { + "epoch": 1.295596034124971, + "grad_norm": 1.592545901664945, + "learning_rate": 6.053729132965626e-07, + "loss": 0.44668713212013245, + "step": 5619 + }, + { + "epoch": 1.2958266082545538, + "grad_norm": 1.3590289444558108, + "learning_rate": 6.050226233826326e-07, + "loss": 0.5159831643104553, + "step": 5620 + }, + { + "epoch": 1.2960571823841365, + "grad_norm": 1.792827614220507, + "learning_rate": 6.046723908826676e-07, + "loss": 0.5091866850852966, + "step": 5621 + }, + { + "epoch": 1.2962877565137192, + "grad_norm": 1.3636713576072057, + "learning_rate": 6.043222158475767e-07, + "loss": 0.34838563203811646, + "step": 5622 + }, + { + "epoch": 1.296518330643302, + "grad_norm": 1.679394698956229, + "learning_rate": 6.039720983282621e-07, + "loss": 0.46576952934265137, + "step": 5623 + }, + { + "epoch": 1.2967489047728844, + "grad_norm": 1.5739745386461328, + "learning_rate": 6.036220383756163e-07, + "loss": 0.4971234202384949, + "step": 5624 + }, + { + "epoch": 1.2969794789024671, + "grad_norm": 1.3832811037885837, + "learning_rate": 6.03272036040525e-07, + "loss": 0.4792482256889343, + "step": 5625 + }, + { + "epoch": 1.2972100530320498, + "grad_norm": 1.5438407741127544, + "learning_rate": 6.029220913738636e-07, + "loss": 0.45584213733673096, + "step": 5626 + }, + { + "epoch": 1.2974406271616323, + "grad_norm": 2.1628056802136686, + "learning_rate": 6.025722044265004e-07, + "loss": 0.5094096064567566, + "step": 5627 + }, + { + "epoch": 1.297671201291215, + "grad_norm": 1.2707985126710273, + "learning_rate": 6.022223752492954e-07, + "loss": 0.33178865909576416, + "step": 5628 + }, + { + "epoch": 1.2979017754207978, + "grad_norm": 1.4977758648466553, + "learning_rate": 6.018726038930991e-07, + "loss": 0.4955121874809265, + "step": 5629 + }, + { + "epoch": 1.2981323495503805, + "grad_norm": 1.9087861970540962, + "learning_rate": 6.01522890408755e-07, + "loss": 0.46253639459609985, + "step": 5630 + }, + { + "epoch": 1.2983629236799632, + "grad_norm": 1.725580686624441, + "learning_rate": 6.011732348470971e-07, + "loss": 0.4760236442089081, + "step": 5631 + }, + { + "epoch": 1.2985934978095457, + "grad_norm": 1.487451213133888, + "learning_rate": 6.008236372589516e-07, + "loss": 0.44413092732429504, + "step": 5632 + }, + { + "epoch": 1.2988240719391284, + "grad_norm": 1.5710401716420814, + "learning_rate": 6.004740976951358e-07, + "loss": 0.5431559681892395, + "step": 5633 + }, + { + "epoch": 1.2990546460687111, + "grad_norm": 1.448678008923642, + "learning_rate": 6.001246162064592e-07, + "loss": 0.41276806592941284, + "step": 5634 + }, + { + "epoch": 1.2992852201982936, + "grad_norm": 1.8698453553316883, + "learning_rate": 5.997751928437219e-07, + "loss": 0.3998986482620239, + "step": 5635 + }, + { + "epoch": 1.2995157943278763, + "grad_norm": 1.7019145009400753, + "learning_rate": 5.994258276577169e-07, + "loss": 0.47741782665252686, + "step": 5636 + }, + { + "epoch": 1.299746368457459, + "grad_norm": 1.8471752326794122, + "learning_rate": 5.990765206992277e-07, + "loss": 0.4294115900993347, + "step": 5637 + }, + { + "epoch": 1.2999769425870418, + "grad_norm": 1.2676173155963009, + "learning_rate": 5.987272720190288e-07, + "loss": 0.4717773199081421, + "step": 5638 + }, + { + "epoch": 1.3002075167166245, + "grad_norm": 1.4764264012124577, + "learning_rate": 5.983780816678881e-07, + "loss": 0.5169499516487122, + "step": 5639 + }, + { + "epoch": 1.300438090846207, + "grad_norm": 1.3402196455719508, + "learning_rate": 5.980289496965634e-07, + "loss": 0.3796359598636627, + "step": 5640 + }, + { + "epoch": 1.3006686649757897, + "grad_norm": 1.439771899645747, + "learning_rate": 5.976798761558048e-07, + "loss": 0.44377613067626953, + "step": 5641 + }, + { + "epoch": 1.3008992391053724, + "grad_norm": 1.4787491173073983, + "learning_rate": 5.973308610963534e-07, + "loss": 0.46863383054733276, + "step": 5642 + }, + { + "epoch": 1.301129813234955, + "grad_norm": 1.6231703309548882, + "learning_rate": 5.969819045689426e-07, + "loss": 0.5437184572219849, + "step": 5643 + }, + { + "epoch": 1.3013603873645376, + "grad_norm": 1.3526724102376106, + "learning_rate": 5.96633006624296e-07, + "loss": 0.4487720727920532, + "step": 5644 + }, + { + "epoch": 1.3015909614941203, + "grad_norm": 1.4099594164441491, + "learning_rate": 5.962841673131305e-07, + "loss": 0.42834270000457764, + "step": 5645 + }, + { + "epoch": 1.301821535623703, + "grad_norm": 1.6303538612123332, + "learning_rate": 5.959353866861525e-07, + "loss": 0.5242533087730408, + "step": 5646 + }, + { + "epoch": 1.3020521097532858, + "grad_norm": 1.467793467454458, + "learning_rate": 5.955866647940609e-07, + "loss": 0.4529950022697449, + "step": 5647 + }, + { + "epoch": 1.3022826838828683, + "grad_norm": 1.704233159172443, + "learning_rate": 5.952380016875465e-07, + "loss": 0.41109561920166016, + "step": 5648 + }, + { + "epoch": 1.302513258012451, + "grad_norm": 2.1978948521850237, + "learning_rate": 5.948893974172904e-07, + "loss": 0.5468418598175049, + "step": 5649 + }, + { + "epoch": 1.3027438321420337, + "grad_norm": 1.6524182777322811, + "learning_rate": 5.945408520339663e-07, + "loss": 0.4594927430152893, + "step": 5650 + }, + { + "epoch": 1.3029744062716162, + "grad_norm": 1.8822005278969978, + "learning_rate": 5.941923655882383e-07, + "loss": 0.5011999011039734, + "step": 5651 + }, + { + "epoch": 1.303204980401199, + "grad_norm": 1.3940543055361847, + "learning_rate": 5.938439381307632e-07, + "loss": 0.519101083278656, + "step": 5652 + }, + { + "epoch": 1.3034355545307816, + "grad_norm": 1.3048743953658823, + "learning_rate": 5.934955697121875e-07, + "loss": 0.521979570388794, + "step": 5653 + }, + { + "epoch": 1.3036661286603644, + "grad_norm": 1.5140544105240696, + "learning_rate": 5.931472603831507e-07, + "loss": 0.5969122648239136, + "step": 5654 + }, + { + "epoch": 1.303896702789947, + "grad_norm": 1.6283257057537612, + "learning_rate": 5.927990101942826e-07, + "loss": 0.47013232111930847, + "step": 5655 + }, + { + "epoch": 1.3041272769195296, + "grad_norm": 1.485470149052559, + "learning_rate": 5.924508191962059e-07, + "loss": 0.4135271906852722, + "step": 5656 + }, + { + "epoch": 1.3043578510491123, + "grad_norm": 1.6826248484124529, + "learning_rate": 5.921026874395327e-07, + "loss": 0.45639151334762573, + "step": 5657 + }, + { + "epoch": 1.304588425178695, + "grad_norm": 1.4851105420204929, + "learning_rate": 5.917546149748676e-07, + "loss": 0.4047633409500122, + "step": 5658 + }, + { + "epoch": 1.3048189993082775, + "grad_norm": 1.470073094956581, + "learning_rate": 5.91406601852807e-07, + "loss": 0.4352290630340576, + "step": 5659 + }, + { + "epoch": 1.3050495734378602, + "grad_norm": 1.569723084578139, + "learning_rate": 5.910586481239375e-07, + "loss": 0.4912130534648895, + "step": 5660 + }, + { + "epoch": 1.305280147567443, + "grad_norm": 1.4302762159123064, + "learning_rate": 5.907107538388383e-07, + "loss": 0.4114433526992798, + "step": 5661 + }, + { + "epoch": 1.3055107216970256, + "grad_norm": 1.6307461117750972, + "learning_rate": 5.903629190480786e-07, + "loss": 0.4230955243110657, + "step": 5662 + }, + { + "epoch": 1.3057412958266084, + "grad_norm": 1.525164874833489, + "learning_rate": 5.900151438022205e-07, + "loss": 0.5020648241043091, + "step": 5663 + }, + { + "epoch": 1.3059718699561909, + "grad_norm": 1.6834639607808413, + "learning_rate": 5.89667428151816e-07, + "loss": 0.48636388778686523, + "step": 5664 + }, + { + "epoch": 1.3062024440857736, + "grad_norm": 1.376635193773143, + "learning_rate": 5.893197721474099e-07, + "loss": 0.412000447511673, + "step": 5665 + }, + { + "epoch": 1.3064330182153563, + "grad_norm": 1.8328035722486296, + "learning_rate": 5.889721758395369e-07, + "loss": 0.3584952652454376, + "step": 5666 + }, + { + "epoch": 1.3066635923449388, + "grad_norm": 1.599166825150926, + "learning_rate": 5.886246392787234e-07, + "loss": 0.4538918733596802, + "step": 5667 + }, + { + "epoch": 1.3068941664745215, + "grad_norm": 1.3551701558323133, + "learning_rate": 5.882771625154883e-07, + "loss": 0.478498637676239, + "step": 5668 + }, + { + "epoch": 1.3071247406041042, + "grad_norm": 1.5353917292288828, + "learning_rate": 5.879297456003398e-07, + "loss": 0.49535906314849854, + "step": 5669 + }, + { + "epoch": 1.307355314733687, + "grad_norm": 1.4516733372645705, + "learning_rate": 5.875823885837793e-07, + "loss": 0.48975661396980286, + "step": 5670 + }, + { + "epoch": 1.3075858888632697, + "grad_norm": 1.675865776424194, + "learning_rate": 5.87235091516298e-07, + "loss": 0.4870087802410126, + "step": 5671 + }, + { + "epoch": 1.3078164629928521, + "grad_norm": 1.5358758810801338, + "learning_rate": 5.8688785444838e-07, + "loss": 0.43411481380462646, + "step": 5672 + }, + { + "epoch": 1.3080470371224349, + "grad_norm": 1.5956307221574964, + "learning_rate": 5.865406774304986e-07, + "loss": 0.5108835697174072, + "step": 5673 + }, + { + "epoch": 1.3082776112520176, + "grad_norm": 1.6165992027891032, + "learning_rate": 5.861935605131202e-07, + "loss": 0.47449198365211487, + "step": 5674 + }, + { + "epoch": 1.3085081853816, + "grad_norm": 1.8165499378032328, + "learning_rate": 5.858465037467014e-07, + "loss": 0.5550234913825989, + "step": 5675 + }, + { + "epoch": 1.3087387595111828, + "grad_norm": 1.5758581559369806, + "learning_rate": 5.854995071816911e-07, + "loss": 0.4548208713531494, + "step": 5676 + }, + { + "epoch": 1.3089693336407655, + "grad_norm": 1.4849539841305146, + "learning_rate": 5.851525708685279e-07, + "loss": 0.5176935195922852, + "step": 5677 + }, + { + "epoch": 1.3091999077703482, + "grad_norm": 1.5664760566663032, + "learning_rate": 5.848056948576428e-07, + "loss": 0.4460016191005707, + "step": 5678 + }, + { + "epoch": 1.309430481899931, + "grad_norm": 1.808203061607658, + "learning_rate": 5.84458879199458e-07, + "loss": 0.5344464182853699, + "step": 5679 + }, + { + "epoch": 1.3096610560295134, + "grad_norm": 1.3109840468073877, + "learning_rate": 5.841121239443863e-07, + "loss": 0.48601672053337097, + "step": 5680 + }, + { + "epoch": 1.3098916301590962, + "grad_norm": 1.3467689115963568, + "learning_rate": 5.837654291428327e-07, + "loss": 0.46849286556243896, + "step": 5681 + }, + { + "epoch": 1.3101222042886789, + "grad_norm": 1.2665516862618484, + "learning_rate": 5.834187948451918e-07, + "loss": 0.4353019893169403, + "step": 5682 + }, + { + "epoch": 1.3103527784182614, + "grad_norm": 1.7099740749541261, + "learning_rate": 5.830722211018516e-07, + "loss": 0.5345665812492371, + "step": 5683 + }, + { + "epoch": 1.310583352547844, + "grad_norm": 1.4659221660940824, + "learning_rate": 5.827257079631886e-07, + "loss": 0.4060036540031433, + "step": 5684 + }, + { + "epoch": 1.3108139266774268, + "grad_norm": 1.3640742579072, + "learning_rate": 5.823792554795738e-07, + "loss": 0.43724536895751953, + "step": 5685 + }, + { + "epoch": 1.3110445008070095, + "grad_norm": 1.550163679413481, + "learning_rate": 5.820328637013665e-07, + "loss": 0.4600690007209778, + "step": 5686 + }, + { + "epoch": 1.3112750749365922, + "grad_norm": 1.5199243554334652, + "learning_rate": 5.816865326789182e-07, + "loss": 0.4352531433105469, + "step": 5687 + }, + { + "epoch": 1.3115056490661747, + "grad_norm": 1.4575114943022274, + "learning_rate": 5.813402624625722e-07, + "loss": 0.39384984970092773, + "step": 5688 + }, + { + "epoch": 1.3117362231957574, + "grad_norm": 1.329194110980277, + "learning_rate": 5.809940531026616e-07, + "loss": 0.44367098808288574, + "step": 5689 + }, + { + "epoch": 1.3119667973253402, + "grad_norm": 1.4497223943190725, + "learning_rate": 5.806479046495123e-07, + "loss": 0.4757416546344757, + "step": 5690 + }, + { + "epoch": 1.3121973714549227, + "grad_norm": 1.5821654764353048, + "learning_rate": 5.803018171534396e-07, + "loss": 0.521708607673645, + "step": 5691 + }, + { + "epoch": 1.3124279455845054, + "grad_norm": 1.3510537988002305, + "learning_rate": 5.799557906647514e-07, + "loss": 0.4127439260482788, + "step": 5692 + }, + { + "epoch": 1.312658519714088, + "grad_norm": 1.4570205213875538, + "learning_rate": 5.79609825233746e-07, + "loss": 0.4809693396091461, + "step": 5693 + }, + { + "epoch": 1.3128890938436708, + "grad_norm": 1.2590938015478794, + "learning_rate": 5.792639209107134e-07, + "loss": 0.5075684189796448, + "step": 5694 + }, + { + "epoch": 1.3131196679732535, + "grad_norm": 1.3738792104421846, + "learning_rate": 5.789180777459336e-07, + "loss": 0.416393518447876, + "step": 5695 + }, + { + "epoch": 1.313350242102836, + "grad_norm": 1.4282126857493198, + "learning_rate": 5.78572295789679e-07, + "loss": 0.4456642270088196, + "step": 5696 + }, + { + "epoch": 1.3135808162324187, + "grad_norm": 1.327521871832615, + "learning_rate": 5.782265750922124e-07, + "loss": 0.4757812023162842, + "step": 5697 + }, + { + "epoch": 1.3138113903620015, + "grad_norm": 1.6103197546493997, + "learning_rate": 5.778809157037872e-07, + "loss": 0.5081768035888672, + "step": 5698 + }, + { + "epoch": 1.314041964491584, + "grad_norm": 1.6849043068796357, + "learning_rate": 5.775353176746489e-07, + "loss": 0.4604584872722626, + "step": 5699 + }, + { + "epoch": 1.3142725386211667, + "grad_norm": 1.3964100189157245, + "learning_rate": 5.771897810550339e-07, + "loss": 0.4153773784637451, + "step": 5700 + }, + { + "epoch": 1.3145031127507494, + "grad_norm": 1.5346514188080242, + "learning_rate": 5.768443058951695e-07, + "loss": 0.5194085836410522, + "step": 5701 + }, + { + "epoch": 1.314733686880332, + "grad_norm": 1.6610989574168062, + "learning_rate": 5.764988922452733e-07, + "loss": 0.4398482143878937, + "step": 5702 + }, + { + "epoch": 1.3149642610099148, + "grad_norm": 1.747178590910114, + "learning_rate": 5.761535401555558e-07, + "loss": 0.5148836374282837, + "step": 5703 + }, + { + "epoch": 1.3151948351394973, + "grad_norm": 1.8977812861580863, + "learning_rate": 5.758082496762163e-07, + "loss": 0.533142626285553, + "step": 5704 + }, + { + "epoch": 1.31542540926908, + "grad_norm": 1.3488739739710767, + "learning_rate": 5.754630208574473e-07, + "loss": 0.4059423804283142, + "step": 5705 + }, + { + "epoch": 1.3156559833986627, + "grad_norm": 1.3213051571946475, + "learning_rate": 5.751178537494302e-07, + "loss": 0.4685533940792084, + "step": 5706 + }, + { + "epoch": 1.3158865575282452, + "grad_norm": 1.5403217644159128, + "learning_rate": 5.747727484023392e-07, + "loss": 0.4454694986343384, + "step": 5707 + }, + { + "epoch": 1.316117131657828, + "grad_norm": 1.481350859430692, + "learning_rate": 5.74427704866339e-07, + "loss": 0.4058796167373657, + "step": 5708 + }, + { + "epoch": 1.3163477057874107, + "grad_norm": 1.3294270142641733, + "learning_rate": 5.740827231915847e-07, + "loss": 0.3891766369342804, + "step": 5709 + }, + { + "epoch": 1.3165782799169934, + "grad_norm": 1.5072356875610937, + "learning_rate": 5.737378034282235e-07, + "loss": 0.47912657260894775, + "step": 5710 + }, + { + "epoch": 1.316808854046576, + "grad_norm": 1.5228549079910219, + "learning_rate": 5.733929456263922e-07, + "loss": 0.4221952557563782, + "step": 5711 + }, + { + "epoch": 1.3170394281761586, + "grad_norm": 1.5405159904484362, + "learning_rate": 5.730481498362202e-07, + "loss": 0.39018404483795166, + "step": 5712 + }, + { + "epoch": 1.3172700023057413, + "grad_norm": 1.6184406292698126, + "learning_rate": 5.727034161078262e-07, + "loss": 0.5388307571411133, + "step": 5713 + }, + { + "epoch": 1.317500576435324, + "grad_norm": 1.5278965195377916, + "learning_rate": 5.723587444913216e-07, + "loss": 0.3243408501148224, + "step": 5714 + }, + { + "epoch": 1.3177311505649065, + "grad_norm": 1.6496814482710773, + "learning_rate": 5.720141350368072e-07, + "loss": 0.46480363607406616, + "step": 5715 + }, + { + "epoch": 1.3179617246944892, + "grad_norm": 1.6265951465013608, + "learning_rate": 5.716695877943757e-07, + "loss": 0.5286417603492737, + "step": 5716 + }, + { + "epoch": 1.318192298824072, + "grad_norm": 1.455901542591345, + "learning_rate": 5.71325102814111e-07, + "loss": 0.4170069694519043, + "step": 5717 + }, + { + "epoch": 1.3184228729536547, + "grad_norm": 1.5051159019770526, + "learning_rate": 5.709806801460867e-07, + "loss": 0.5738973617553711, + "step": 5718 + }, + { + "epoch": 1.3186534470832374, + "grad_norm": 1.4473352410585376, + "learning_rate": 5.706363198403689e-07, + "loss": 0.5309658050537109, + "step": 5719 + }, + { + "epoch": 1.31888402121282, + "grad_norm": 1.588487236125564, + "learning_rate": 5.70292021947013e-07, + "loss": 0.4569379389286041, + "step": 5720 + }, + { + "epoch": 1.3191145953424026, + "grad_norm": 1.5641598702256398, + "learning_rate": 5.699477865160674e-07, + "loss": 0.46686258912086487, + "step": 5721 + }, + { + "epoch": 1.3193451694719853, + "grad_norm": 1.551220703032623, + "learning_rate": 5.696036135975688e-07, + "loss": 0.5333213806152344, + "step": 5722 + }, + { + "epoch": 1.3195757436015678, + "grad_norm": 1.6027893782611593, + "learning_rate": 5.69259503241547e-07, + "loss": 0.3519536256790161, + "step": 5723 + }, + { + "epoch": 1.3198063177311505, + "grad_norm": 1.5104260104986362, + "learning_rate": 5.689154554980218e-07, + "loss": 0.4763161242008209, + "step": 5724 + }, + { + "epoch": 1.3200368918607333, + "grad_norm": 1.5061315373489772, + "learning_rate": 5.685714704170044e-07, + "loss": 0.43600207567214966, + "step": 5725 + }, + { + "epoch": 1.320267465990316, + "grad_norm": 1.4992417251350876, + "learning_rate": 5.682275480484958e-07, + "loss": 0.41991305351257324, + "step": 5726 + }, + { + "epoch": 1.3204980401198987, + "grad_norm": 1.663551629444692, + "learning_rate": 5.678836884424894e-07, + "loss": 0.44275131821632385, + "step": 5727 + }, + { + "epoch": 1.3207286142494812, + "grad_norm": 1.65999947024113, + "learning_rate": 5.675398916489682e-07, + "loss": 0.4339372515678406, + "step": 5728 + }, + { + "epoch": 1.320959188379064, + "grad_norm": 1.484455134036602, + "learning_rate": 5.671961577179062e-07, + "loss": 0.4462248384952545, + "step": 5729 + }, + { + "epoch": 1.3211897625086464, + "grad_norm": 1.4704913213821902, + "learning_rate": 5.668524866992693e-07, + "loss": 0.36548441648483276, + "step": 5730 + }, + { + "epoch": 1.321420336638229, + "grad_norm": 1.5370532211440713, + "learning_rate": 5.665088786430129e-07, + "loss": 0.4709678888320923, + "step": 5731 + }, + { + "epoch": 1.3216509107678118, + "grad_norm": 1.4993066403144744, + "learning_rate": 5.661653335990848e-07, + "loss": 0.40125030279159546, + "step": 5732 + }, + { + "epoch": 1.3218814848973945, + "grad_norm": 1.8517319571144346, + "learning_rate": 5.658218516174218e-07, + "loss": 0.5288605690002441, + "step": 5733 + }, + { + "epoch": 1.3221120590269773, + "grad_norm": 1.2954018601150643, + "learning_rate": 5.654784327479534e-07, + "loss": 0.41306072473526, + "step": 5734 + }, + { + "epoch": 1.3223426331565598, + "grad_norm": 1.3199807449430407, + "learning_rate": 5.651350770405983e-07, + "loss": 0.34327009320259094, + "step": 5735 + }, + { + "epoch": 1.3225732072861425, + "grad_norm": 1.4524630442098247, + "learning_rate": 5.647917845452671e-07, + "loss": 0.5055800080299377, + "step": 5736 + }, + { + "epoch": 1.3228037814157252, + "grad_norm": 1.7153085926535214, + "learning_rate": 5.644485553118609e-07, + "loss": 0.45496249198913574, + "step": 5737 + }, + { + "epoch": 1.3230343555453077, + "grad_norm": 1.6142993934275558, + "learning_rate": 5.641053893902708e-07, + "loss": 0.4626169502735138, + "step": 5738 + }, + { + "epoch": 1.3232649296748904, + "grad_norm": 1.3569624734396053, + "learning_rate": 5.637622868303802e-07, + "loss": 0.46621328592300415, + "step": 5739 + }, + { + "epoch": 1.3234955038044731, + "grad_norm": 1.5833136701466524, + "learning_rate": 5.634192476820623e-07, + "loss": 0.47793662548065186, + "step": 5740 + }, + { + "epoch": 1.3237260779340558, + "grad_norm": 1.5367680790773321, + "learning_rate": 5.630762719951816e-07, + "loss": 0.42578715085983276, + "step": 5741 + }, + { + "epoch": 1.3239566520636386, + "grad_norm": 1.7421270871218182, + "learning_rate": 5.627333598195927e-07, + "loss": 0.3146113157272339, + "step": 5742 + }, + { + "epoch": 1.324187226193221, + "grad_norm": 1.376620002714832, + "learning_rate": 5.623905112051417e-07, + "loss": 0.39731544256210327, + "step": 5743 + }, + { + "epoch": 1.3244178003228038, + "grad_norm": 1.6655684412604148, + "learning_rate": 5.620477262016647e-07, + "loss": 0.3755846619606018, + "step": 5744 + }, + { + "epoch": 1.3246483744523865, + "grad_norm": 1.5953907301532468, + "learning_rate": 5.617050048589896e-07, + "loss": 0.43060415983200073, + "step": 5745 + }, + { + "epoch": 1.324878948581969, + "grad_norm": 1.54564820857706, + "learning_rate": 5.613623472269334e-07, + "loss": 0.4213481545448303, + "step": 5746 + }, + { + "epoch": 1.3251095227115517, + "grad_norm": 1.2422408749001486, + "learning_rate": 5.610197533553057e-07, + "loss": 0.3923456072807312, + "step": 5747 + }, + { + "epoch": 1.3253400968411344, + "grad_norm": 1.6088447345623693, + "learning_rate": 5.606772232939061e-07, + "loss": 0.42293328046798706, + "step": 5748 + }, + { + "epoch": 1.3255706709707171, + "grad_norm": 1.596682526932072, + "learning_rate": 5.603347570925242e-07, + "loss": 0.4545479118824005, + "step": 5749 + }, + { + "epoch": 1.3258012451002998, + "grad_norm": 1.4262513090332916, + "learning_rate": 5.599923548009416e-07, + "loss": 0.3969312310218811, + "step": 5750 + }, + { + "epoch": 1.3260318192298823, + "grad_norm": 1.687653911460881, + "learning_rate": 5.59650016468929e-07, + "loss": 0.4296644330024719, + "step": 5751 + }, + { + "epoch": 1.326262393359465, + "grad_norm": 1.4928189267328964, + "learning_rate": 5.5930774214625e-07, + "loss": 0.43291348218917847, + "step": 5752 + }, + { + "epoch": 1.3264929674890478, + "grad_norm": 1.4463941028108167, + "learning_rate": 5.589655318826564e-07, + "loss": 0.47684454917907715, + "step": 5753 + }, + { + "epoch": 1.3267235416186303, + "grad_norm": 1.3515496302725483, + "learning_rate": 5.586233857278924e-07, + "loss": 0.48520004749298096, + "step": 5754 + }, + { + "epoch": 1.326954115748213, + "grad_norm": 1.6127441732883512, + "learning_rate": 5.582813037316926e-07, + "loss": 0.4434587359428406, + "step": 5755 + }, + { + "epoch": 1.3271846898777957, + "grad_norm": 1.7808352880972456, + "learning_rate": 5.579392859437825e-07, + "loss": 0.47306808829307556, + "step": 5756 + }, + { + "epoch": 1.3274152640073784, + "grad_norm": 1.5663021335869645, + "learning_rate": 5.575973324138772e-07, + "loss": 0.4349653720855713, + "step": 5757 + }, + { + "epoch": 1.3276458381369611, + "grad_norm": 1.2914359149982935, + "learning_rate": 5.572554431916829e-07, + "loss": 0.31277602910995483, + "step": 5758 + }, + { + "epoch": 1.3278764122665436, + "grad_norm": 1.5658319454866303, + "learning_rate": 5.569136183268974e-07, + "loss": 0.4281114637851715, + "step": 5759 + }, + { + "epoch": 1.3281069863961263, + "grad_norm": 1.2867721627127386, + "learning_rate": 5.565718578692076e-07, + "loss": 0.45071113109588623, + "step": 5760 + }, + { + "epoch": 1.328337560525709, + "grad_norm": 1.4460147363867, + "learning_rate": 5.562301618682927e-07, + "loss": 0.426133394241333, + "step": 5761 + }, + { + "epoch": 1.3285681346552916, + "grad_norm": 1.3630920926710801, + "learning_rate": 5.558885303738209e-07, + "loss": 0.3882424235343933, + "step": 5762 + }, + { + "epoch": 1.3287987087848743, + "grad_norm": 1.3878174095068123, + "learning_rate": 5.55546963435452e-07, + "loss": 0.4706958532333374, + "step": 5763 + }, + { + "epoch": 1.329029282914457, + "grad_norm": 1.9122348340273743, + "learning_rate": 5.552054611028365e-07, + "loss": 0.4868433475494385, + "step": 5764 + }, + { + "epoch": 1.3292598570440397, + "grad_norm": 1.4411048310630292, + "learning_rate": 5.548640234256154e-07, + "loss": 0.41839566826820374, + "step": 5765 + }, + { + "epoch": 1.3294904311736224, + "grad_norm": 1.9627530346102546, + "learning_rate": 5.545226504534195e-07, + "loss": 0.4088629484176636, + "step": 5766 + }, + { + "epoch": 1.329721005303205, + "grad_norm": 1.3819218540316194, + "learning_rate": 5.541813422358715e-07, + "loss": 0.34617769718170166, + "step": 5767 + }, + { + "epoch": 1.3299515794327876, + "grad_norm": 1.5711021474470717, + "learning_rate": 5.538400988225835e-07, + "loss": 0.5098900198936462, + "step": 5768 + }, + { + "epoch": 1.3301821535623704, + "grad_norm": 1.5683015797269382, + "learning_rate": 5.534989202631586e-07, + "loss": 0.4294108748435974, + "step": 5769 + }, + { + "epoch": 1.3304127276919528, + "grad_norm": 1.3488716534216894, + "learning_rate": 5.531578066071907e-07, + "loss": 0.42205139994621277, + "step": 5770 + }, + { + "epoch": 1.3306433018215356, + "grad_norm": 1.8657910300729754, + "learning_rate": 5.528167579042645e-07, + "loss": 0.5009530186653137, + "step": 5771 + }, + { + "epoch": 1.3308738759511183, + "grad_norm": 1.468249228101101, + "learning_rate": 5.524757742039545e-07, + "loss": 0.554497241973877, + "step": 5772 + }, + { + "epoch": 1.331104450080701, + "grad_norm": 1.711116822757576, + "learning_rate": 5.521348555558263e-07, + "loss": 0.3514432907104492, + "step": 5773 + }, + { + "epoch": 1.3313350242102837, + "grad_norm": 1.4224522574801144, + "learning_rate": 5.51794002009436e-07, + "loss": 0.4712038040161133, + "step": 5774 + }, + { + "epoch": 1.3315655983398662, + "grad_norm": 1.6288850118765847, + "learning_rate": 5.514532136143295e-07, + "loss": 0.48556071519851685, + "step": 5775 + }, + { + "epoch": 1.331796172469449, + "grad_norm": 1.42798680480441, + "learning_rate": 5.511124904200448e-07, + "loss": 0.43158456683158875, + "step": 5776 + }, + { + "epoch": 1.3320267465990316, + "grad_norm": 1.8128360066016722, + "learning_rate": 5.507718324761085e-07, + "loss": 0.5376255512237549, + "step": 5777 + }, + { + "epoch": 1.3322573207286141, + "grad_norm": 1.446480187929883, + "learning_rate": 5.504312398320392e-07, + "loss": 0.3800685405731201, + "step": 5778 + }, + { + "epoch": 1.3324878948581969, + "grad_norm": 1.3675185316121448, + "learning_rate": 5.500907125373458e-07, + "loss": 0.4015260338783264, + "step": 5779 + }, + { + "epoch": 1.3327184689877796, + "grad_norm": 1.7400186621828952, + "learning_rate": 5.497502506415266e-07, + "loss": 0.42762285470962524, + "step": 5780 + }, + { + "epoch": 1.3329490431173623, + "grad_norm": 1.4501572722598215, + "learning_rate": 5.494098541940719e-07, + "loss": 0.4467644691467285, + "step": 5781 + }, + { + "epoch": 1.333179617246945, + "grad_norm": 1.9298171674754279, + "learning_rate": 5.490695232444613e-07, + "loss": 0.42699599266052246, + "step": 5782 + }, + { + "epoch": 1.3334101913765275, + "grad_norm": 1.6654850032985582, + "learning_rate": 5.487292578421659e-07, + "loss": 0.586537778377533, + "step": 5783 + }, + { + "epoch": 1.3336407655061102, + "grad_norm": 1.761605169999467, + "learning_rate": 5.48389058036646e-07, + "loss": 0.4525066018104553, + "step": 5784 + }, + { + "epoch": 1.333871339635693, + "grad_norm": 1.4697934550209713, + "learning_rate": 5.480489238773535e-07, + "loss": 0.40520548820495605, + "step": 5785 + }, + { + "epoch": 1.3341019137652754, + "grad_norm": 1.7127717596843188, + "learning_rate": 5.477088554137304e-07, + "loss": 0.3910450339317322, + "step": 5786 + }, + { + "epoch": 1.3343324878948581, + "grad_norm": 1.781985995356997, + "learning_rate": 5.473688526952087e-07, + "loss": 0.45285511016845703, + "step": 5787 + }, + { + "epoch": 1.3345630620244409, + "grad_norm": 1.3079701521023397, + "learning_rate": 5.47028915771212e-07, + "loss": 0.39207279682159424, + "step": 5788 + }, + { + "epoch": 1.3347936361540236, + "grad_norm": 1.3401224496215014, + "learning_rate": 5.466890446911527e-07, + "loss": 0.40281063318252563, + "step": 5789 + }, + { + "epoch": 1.3350242102836063, + "grad_norm": 1.5855589292084546, + "learning_rate": 5.463492395044354e-07, + "loss": 0.5087814927101135, + "step": 5790 + }, + { + "epoch": 1.3352547844131888, + "grad_norm": 1.6443172906836578, + "learning_rate": 5.460095002604532e-07, + "loss": 0.47597891092300415, + "step": 5791 + }, + { + "epoch": 1.3354853585427715, + "grad_norm": 1.656230003127049, + "learning_rate": 5.456698270085917e-07, + "loss": 0.5722953677177429, + "step": 5792 + }, + { + "epoch": 1.3357159326723542, + "grad_norm": 1.6424947586218923, + "learning_rate": 5.45330219798225e-07, + "loss": 0.5133349299430847, + "step": 5793 + }, + { + "epoch": 1.3359465068019367, + "grad_norm": 1.5413030595202453, + "learning_rate": 5.449906786787187e-07, + "loss": 0.46230804920196533, + "step": 5794 + }, + { + "epoch": 1.3361770809315194, + "grad_norm": 1.6839619437291453, + "learning_rate": 5.446512036994286e-07, + "loss": 0.42002394795417786, + "step": 5795 + }, + { + "epoch": 1.3364076550611022, + "grad_norm": 1.46623243210155, + "learning_rate": 5.443117949097013e-07, + "loss": 0.42281097173690796, + "step": 5796 + }, + { + "epoch": 1.3366382291906849, + "grad_norm": 1.4476698476010996, + "learning_rate": 5.439724523588726e-07, + "loss": 0.511898398399353, + "step": 5797 + }, + { + "epoch": 1.3368688033202676, + "grad_norm": 1.4307520026731049, + "learning_rate": 5.4363317609627e-07, + "loss": 0.4475559592247009, + "step": 5798 + }, + { + "epoch": 1.33709937744985, + "grad_norm": 1.509864957359139, + "learning_rate": 5.432939661712103e-07, + "loss": 0.4872414469718933, + "step": 5799 + }, + { + "epoch": 1.3373299515794328, + "grad_norm": 1.3480605234272842, + "learning_rate": 5.429548226330009e-07, + "loss": 0.40401679277420044, + "step": 5800 + }, + { + "epoch": 1.3375605257090155, + "grad_norm": 2.083088707198395, + "learning_rate": 5.426157455309399e-07, + "loss": 0.43559926748275757, + "step": 5801 + }, + { + "epoch": 1.337791099838598, + "grad_norm": 1.6000855398004097, + "learning_rate": 5.422767349143158e-07, + "loss": 0.44283759593963623, + "step": 5802 + }, + { + "epoch": 1.3380216739681807, + "grad_norm": 1.310277684226626, + "learning_rate": 5.419377908324077e-07, + "loss": 0.3770032525062561, + "step": 5803 + }, + { + "epoch": 1.3382522480977634, + "grad_norm": 1.3856773934136148, + "learning_rate": 5.415989133344834e-07, + "loss": 0.4497501850128174, + "step": 5804 + }, + { + "epoch": 1.3384828222273462, + "grad_norm": 1.49195449044666, + "learning_rate": 5.412601024698033e-07, + "loss": 0.5008253455162048, + "step": 5805 + }, + { + "epoch": 1.3387133963569289, + "grad_norm": 1.3694796854029274, + "learning_rate": 5.409213582876162e-07, + "loss": 0.46178537607192993, + "step": 5806 + }, + { + "epoch": 1.3389439704865114, + "grad_norm": 1.1951838089282807, + "learning_rate": 5.405826808371625e-07, + "loss": 0.39843931794166565, + "step": 5807 + }, + { + "epoch": 1.339174544616094, + "grad_norm": 1.4243934050525646, + "learning_rate": 5.402440701676724e-07, + "loss": 0.4829174280166626, + "step": 5808 + }, + { + "epoch": 1.3394051187456768, + "grad_norm": 1.0859530853021675, + "learning_rate": 5.399055263283656e-07, + "loss": 0.36173316836357117, + "step": 5809 + }, + { + "epoch": 1.3396356928752593, + "grad_norm": 1.5741135880130834, + "learning_rate": 5.395670493684536e-07, + "loss": 0.400304913520813, + "step": 5810 + }, + { + "epoch": 1.339866267004842, + "grad_norm": 1.507879612413509, + "learning_rate": 5.392286393371372e-07, + "loss": 0.4536975622177124, + "step": 5811 + }, + { + "epoch": 1.3400968411344247, + "grad_norm": 1.7310508291395992, + "learning_rate": 5.388902962836084e-07, + "loss": 0.6474577188491821, + "step": 5812 + }, + { + "epoch": 1.3403274152640074, + "grad_norm": 1.6348182443046517, + "learning_rate": 5.385520202570477e-07, + "loss": 0.48008009791374207, + "step": 5813 + }, + { + "epoch": 1.3405579893935902, + "grad_norm": 1.6214175923335088, + "learning_rate": 5.38213811306628e-07, + "loss": 0.4518657326698303, + "step": 5814 + }, + { + "epoch": 1.3407885635231727, + "grad_norm": 1.280530895656809, + "learning_rate": 5.378756694815105e-07, + "loss": 0.449008584022522, + "step": 5815 + }, + { + "epoch": 1.3410191376527554, + "grad_norm": 1.689898643370083, + "learning_rate": 5.375375948308483e-07, + "loss": 0.5448319315910339, + "step": 5816 + }, + { + "epoch": 1.341249711782338, + "grad_norm": 1.5166178678578832, + "learning_rate": 5.371995874037832e-07, + "loss": 0.5078369379043579, + "step": 5817 + }, + { + "epoch": 1.3414802859119206, + "grad_norm": 1.611364899344997, + "learning_rate": 5.368616472494482e-07, + "loss": 0.508685290813446, + "step": 5818 + }, + { + "epoch": 1.3417108600415033, + "grad_norm": 1.3809568946566115, + "learning_rate": 5.365237744169672e-07, + "loss": 0.4166705012321472, + "step": 5819 + }, + { + "epoch": 1.341941434171086, + "grad_norm": 1.432431964622234, + "learning_rate": 5.361859689554524e-07, + "loss": 0.4741361737251282, + "step": 5820 + }, + { + "epoch": 1.3421720083006687, + "grad_norm": 1.5546451283342237, + "learning_rate": 5.358482309140079e-07, + "loss": 0.36658185720443726, + "step": 5821 + }, + { + "epoch": 1.3424025824302515, + "grad_norm": 1.9632157270552801, + "learning_rate": 5.355105603417267e-07, + "loss": 0.38921263813972473, + "step": 5822 + }, + { + "epoch": 1.342633156559834, + "grad_norm": 1.9732368197118861, + "learning_rate": 5.351729572876935e-07, + "loss": 0.5553977489471436, + "step": 5823 + }, + { + "epoch": 1.3428637306894167, + "grad_norm": 1.4618484003422054, + "learning_rate": 5.348354218009813e-07, + "loss": 0.3968391418457031, + "step": 5824 + }, + { + "epoch": 1.3430943048189994, + "grad_norm": 1.4937275325292458, + "learning_rate": 5.344979539306549e-07, + "loss": 0.4289783239364624, + "step": 5825 + }, + { + "epoch": 1.3433248789485819, + "grad_norm": 1.313862309148984, + "learning_rate": 5.341605537257686e-07, + "loss": 0.45359861850738525, + "step": 5826 + }, + { + "epoch": 1.3435554530781646, + "grad_norm": 1.366684570776694, + "learning_rate": 5.338232212353675e-07, + "loss": 0.3571642339229584, + "step": 5827 + }, + { + "epoch": 1.3437860272077473, + "grad_norm": 1.1954938252676188, + "learning_rate": 5.334859565084855e-07, + "loss": 0.3784096837043762, + "step": 5828 + }, + { + "epoch": 1.34401660133733, + "grad_norm": 1.5372749019268697, + "learning_rate": 5.331487595941475e-07, + "loss": 0.44996407628059387, + "step": 5829 + }, + { + "epoch": 1.3442471754669127, + "grad_norm": 1.4793854978740197, + "learning_rate": 5.32811630541369e-07, + "loss": 0.4466405510902405, + "step": 5830 + }, + { + "epoch": 1.3444777495964952, + "grad_norm": 1.3432081322840168, + "learning_rate": 5.324745693991545e-07, + "loss": 0.34488850831985474, + "step": 5831 + }, + { + "epoch": 1.344708323726078, + "grad_norm": 1.589654871057016, + "learning_rate": 5.321375762164999e-07, + "loss": 0.5530165433883667, + "step": 5832 + }, + { + "epoch": 1.3449388978556607, + "grad_norm": 1.6555576202053326, + "learning_rate": 5.318006510423898e-07, + "loss": 0.40732342004776, + "step": 5833 + }, + { + "epoch": 1.3451694719852432, + "grad_norm": 1.5528027430812303, + "learning_rate": 5.314637939258002e-07, + "loss": 0.3364611566066742, + "step": 5834 + }, + { + "epoch": 1.3454000461148259, + "grad_norm": 1.4557702222082582, + "learning_rate": 5.311270049156966e-07, + "loss": 0.43964290618896484, + "step": 5835 + }, + { + "epoch": 1.3456306202444086, + "grad_norm": 1.5963363545263636, + "learning_rate": 5.30790284061035e-07, + "loss": 0.5203431844711304, + "step": 5836 + }, + { + "epoch": 1.3458611943739913, + "grad_norm": 1.356219303149177, + "learning_rate": 5.304536314107607e-07, + "loss": 0.4779793620109558, + "step": 5837 + }, + { + "epoch": 1.346091768503574, + "grad_norm": 1.4030454651132978, + "learning_rate": 5.301170470138102e-07, + "loss": 0.4769410490989685, + "step": 5838 + }, + { + "epoch": 1.3463223426331565, + "grad_norm": 1.5437367488200047, + "learning_rate": 5.297805309191089e-07, + "loss": 0.42390304803848267, + "step": 5839 + }, + { + "epoch": 1.3465529167627392, + "grad_norm": 1.6498587295444291, + "learning_rate": 5.294440831755727e-07, + "loss": 0.5550302863121033, + "step": 5840 + }, + { + "epoch": 1.3467834908923217, + "grad_norm": 1.5927381474044073, + "learning_rate": 5.291077038321078e-07, + "loss": 0.4897978901863098, + "step": 5841 + }, + { + "epoch": 1.3470140650219045, + "grad_norm": 1.5707311912828865, + "learning_rate": 5.287713929376105e-07, + "loss": 0.4014284610748291, + "step": 5842 + }, + { + "epoch": 1.3472446391514872, + "grad_norm": 1.61036503253005, + "learning_rate": 5.284351505409675e-07, + "loss": 0.4299513101577759, + "step": 5843 + }, + { + "epoch": 1.34747521328107, + "grad_norm": 1.382725158348277, + "learning_rate": 5.280989766910541e-07, + "loss": 0.44863104820251465, + "step": 5844 + }, + { + "epoch": 1.3477057874106526, + "grad_norm": 1.4391517424186664, + "learning_rate": 5.277628714367374e-07, + "loss": 0.41933274269104004, + "step": 5845 + }, + { + "epoch": 1.347936361540235, + "grad_norm": 1.5110585127257306, + "learning_rate": 5.274268348268729e-07, + "loss": 0.48257556557655334, + "step": 5846 + }, + { + "epoch": 1.3481669356698178, + "grad_norm": 1.6840388322451993, + "learning_rate": 5.270908669103078e-07, + "loss": 0.435384064912796, + "step": 5847 + }, + { + "epoch": 1.3483975097994005, + "grad_norm": 1.502056490079635, + "learning_rate": 5.267549677358775e-07, + "loss": 0.43291670083999634, + "step": 5848 + }, + { + "epoch": 1.348628083928983, + "grad_norm": 2.07427587572329, + "learning_rate": 5.264191373524089e-07, + "loss": 0.4584086537361145, + "step": 5849 + }, + { + "epoch": 1.3488586580585658, + "grad_norm": 1.4212548389061759, + "learning_rate": 5.260833758087187e-07, + "loss": 0.44879037141799927, + "step": 5850 + }, + { + "epoch": 1.3490892321881485, + "grad_norm": 1.4876230861981237, + "learning_rate": 5.257476831536124e-07, + "loss": 0.48467326164245605, + "step": 5851 + }, + { + "epoch": 1.3493198063177312, + "grad_norm": 1.4803329007154076, + "learning_rate": 5.254120594358871e-07, + "loss": 0.4126189947128296, + "step": 5852 + }, + { + "epoch": 1.349550380447314, + "grad_norm": 1.494164620045959, + "learning_rate": 5.250765047043284e-07, + "loss": 0.5592546463012695, + "step": 5853 + }, + { + "epoch": 1.3497809545768964, + "grad_norm": 1.2572079660485564, + "learning_rate": 5.247410190077134e-07, + "loss": 0.3269529342651367, + "step": 5854 + }, + { + "epoch": 1.3500115287064791, + "grad_norm": 1.4784058003593112, + "learning_rate": 5.244056023948075e-07, + "loss": 0.42812949419021606, + "step": 5855 + }, + { + "epoch": 1.3502421028360618, + "grad_norm": 1.643847647603701, + "learning_rate": 5.240702549143676e-07, + "loss": 0.4266297221183777, + "step": 5856 + }, + { + "epoch": 1.3504726769656443, + "grad_norm": 1.6490610440384348, + "learning_rate": 5.237349766151392e-07, + "loss": 0.43848085403442383, + "step": 5857 + }, + { + "epoch": 1.350703251095227, + "grad_norm": 1.5778355488021025, + "learning_rate": 5.233997675458588e-07, + "loss": 0.47512906789779663, + "step": 5858 + }, + { + "epoch": 1.3509338252248098, + "grad_norm": 1.4893970639177625, + "learning_rate": 5.230646277552527e-07, + "loss": 0.3484492897987366, + "step": 5859 + }, + { + "epoch": 1.3511643993543925, + "grad_norm": 1.5529244445697006, + "learning_rate": 5.227295572920363e-07, + "loss": 0.48915669322013855, + "step": 5860 + }, + { + "epoch": 1.3513949734839752, + "grad_norm": 1.687195391171769, + "learning_rate": 5.223945562049159e-07, + "loss": 0.415932834148407, + "step": 5861 + }, + { + "epoch": 1.3516255476135577, + "grad_norm": 1.8036222540660396, + "learning_rate": 5.220596245425869e-07, + "loss": 0.47945982217788696, + "step": 5862 + }, + { + "epoch": 1.3518561217431404, + "grad_norm": 1.7032993247582504, + "learning_rate": 5.217247623537356e-07, + "loss": 0.4322330951690674, + "step": 5863 + }, + { + "epoch": 1.3520866958727231, + "grad_norm": 1.7271334098970212, + "learning_rate": 5.213899696870369e-07, + "loss": 0.4608469605445862, + "step": 5864 + }, + { + "epoch": 1.3523172700023056, + "grad_norm": 1.4726583260713841, + "learning_rate": 5.210552465911566e-07, + "loss": 0.5108528137207031, + "step": 5865 + }, + { + "epoch": 1.3525478441318883, + "grad_norm": 1.3172906919344538, + "learning_rate": 5.207205931147502e-07, + "loss": 0.37947285175323486, + "step": 5866 + }, + { + "epoch": 1.352778418261471, + "grad_norm": 1.5825329658520386, + "learning_rate": 5.203860093064635e-07, + "loss": 0.49094486236572266, + "step": 5867 + }, + { + "epoch": 1.3530089923910538, + "grad_norm": 1.7057097538270483, + "learning_rate": 5.200514952149308e-07, + "loss": 0.34238702058792114, + "step": 5868 + }, + { + "epoch": 1.3532395665206365, + "grad_norm": 1.4815052827701158, + "learning_rate": 5.197170508887774e-07, + "loss": 0.46390393376350403, + "step": 5869 + }, + { + "epoch": 1.353470140650219, + "grad_norm": 1.517083535949924, + "learning_rate": 5.193826763766183e-07, + "loss": 0.44219160079956055, + "step": 5870 + }, + { + "epoch": 1.3537007147798017, + "grad_norm": 1.2444078580604416, + "learning_rate": 5.190483717270578e-07, + "loss": 0.42801350355148315, + "step": 5871 + }, + { + "epoch": 1.3539312889093844, + "grad_norm": 1.5276855271974423, + "learning_rate": 5.187141369886906e-07, + "loss": 0.43861454725265503, + "step": 5872 + }, + { + "epoch": 1.354161863038967, + "grad_norm": 1.3684710867849712, + "learning_rate": 5.183799722101014e-07, + "loss": 0.4381449222564697, + "step": 5873 + }, + { + "epoch": 1.3543924371685496, + "grad_norm": 1.6990772878337996, + "learning_rate": 5.180458774398646e-07, + "loss": 0.4341619610786438, + "step": 5874 + }, + { + "epoch": 1.3546230112981323, + "grad_norm": 1.5170997767832792, + "learning_rate": 5.177118527265437e-07, + "loss": 0.4376588463783264, + "step": 5875 + }, + { + "epoch": 1.354853585427715, + "grad_norm": 1.4712846387139202, + "learning_rate": 5.173778981186932e-07, + "loss": 0.38568538427352905, + "step": 5876 + }, + { + "epoch": 1.3550841595572978, + "grad_norm": 1.4162179235966152, + "learning_rate": 5.170440136648561e-07, + "loss": 0.44178056716918945, + "step": 5877 + }, + { + "epoch": 1.3553147336868803, + "grad_norm": 1.434763306400174, + "learning_rate": 5.167101994135665e-07, + "loss": 0.49847882986068726, + "step": 5878 + }, + { + "epoch": 1.355545307816463, + "grad_norm": 1.3114035605969607, + "learning_rate": 5.163764554133476e-07, + "loss": 0.33697545528411865, + "step": 5879 + }, + { + "epoch": 1.3557758819460457, + "grad_norm": 1.9314852987462174, + "learning_rate": 5.160427817127117e-07, + "loss": 0.5216578841209412, + "step": 5880 + }, + { + "epoch": 1.3560064560756282, + "grad_norm": 1.5367735086016923, + "learning_rate": 5.157091783601624e-07, + "loss": 0.5101301670074463, + "step": 5881 + }, + { + "epoch": 1.356237030205211, + "grad_norm": 1.4437708354871932, + "learning_rate": 5.15375645404192e-07, + "loss": 0.47876495122909546, + "step": 5882 + }, + { + "epoch": 1.3564676043347936, + "grad_norm": 1.413429948502146, + "learning_rate": 5.150421828932837e-07, + "loss": 0.4656233787536621, + "step": 5883 + }, + { + "epoch": 1.3566981784643763, + "grad_norm": 1.4503708847221477, + "learning_rate": 5.147087908759082e-07, + "loss": 0.4392930269241333, + "step": 5884 + }, + { + "epoch": 1.356928752593959, + "grad_norm": 1.6187538312851866, + "learning_rate": 5.143754694005289e-07, + "loss": 0.5044047832489014, + "step": 5885 + }, + { + "epoch": 1.3571593267235416, + "grad_norm": 1.3914560087628793, + "learning_rate": 5.140422185155964e-07, + "loss": 0.4345476031303406, + "step": 5886 + }, + { + "epoch": 1.3573899008531243, + "grad_norm": 1.768236932460398, + "learning_rate": 5.137090382695528e-07, + "loss": 0.49207669496536255, + "step": 5887 + }, + { + "epoch": 1.357620474982707, + "grad_norm": 1.531417533887488, + "learning_rate": 5.133759287108286e-07, + "loss": 0.4054356813430786, + "step": 5888 + }, + { + "epoch": 1.3578510491122895, + "grad_norm": 1.9704323937726442, + "learning_rate": 5.130428898878449e-07, + "loss": 0.5436004400253296, + "step": 5889 + }, + { + "epoch": 1.3580816232418722, + "grad_norm": 1.521959500035041, + "learning_rate": 5.127099218490127e-07, + "loss": 0.4832550287246704, + "step": 5890 + }, + { + "epoch": 1.358312197371455, + "grad_norm": 1.4438750839498624, + "learning_rate": 5.123770246427315e-07, + "loss": 0.38890475034713745, + "step": 5891 + }, + { + "epoch": 1.3585427715010376, + "grad_norm": 1.3028583829520697, + "learning_rate": 5.12044198317392e-07, + "loss": 0.49784210324287415, + "step": 5892 + }, + { + "epoch": 1.3587733456306204, + "grad_norm": 1.5058620289816076, + "learning_rate": 5.117114429213732e-07, + "loss": 0.5033924579620361, + "step": 5893 + }, + { + "epoch": 1.3590039197602028, + "grad_norm": 1.5069016697055244, + "learning_rate": 5.113787585030454e-07, + "loss": 0.4857698678970337, + "step": 5894 + }, + { + "epoch": 1.3592344938897856, + "grad_norm": 1.6430229342698937, + "learning_rate": 5.110461451107663e-07, + "loss": 0.4269944429397583, + "step": 5895 + }, + { + "epoch": 1.3594650680193683, + "grad_norm": 1.5554523008644683, + "learning_rate": 5.107136027928858e-07, + "loss": 0.44045162200927734, + "step": 5896 + }, + { + "epoch": 1.3596956421489508, + "grad_norm": 1.6719472262672752, + "learning_rate": 5.103811315977418e-07, + "loss": 0.5223391056060791, + "step": 5897 + }, + { + "epoch": 1.3599262162785335, + "grad_norm": 1.6234993813736853, + "learning_rate": 5.100487315736627e-07, + "loss": 0.45988473296165466, + "step": 5898 + }, + { + "epoch": 1.3601567904081162, + "grad_norm": 1.3494964030299075, + "learning_rate": 5.097164027689661e-07, + "loss": 0.46342164278030396, + "step": 5899 + }, + { + "epoch": 1.360387364537699, + "grad_norm": 1.6151646749241875, + "learning_rate": 5.093841452319588e-07, + "loss": 0.48150479793548584, + "step": 5900 + }, + { + "epoch": 1.3606179386672816, + "grad_norm": 1.3258214555354595, + "learning_rate": 5.090519590109386e-07, + "loss": 0.3971351981163025, + "step": 5901 + }, + { + "epoch": 1.3608485127968641, + "grad_norm": 1.755266254483419, + "learning_rate": 5.087198441541914e-07, + "loss": 0.44869956374168396, + "step": 5902 + }, + { + "epoch": 1.3610790869264469, + "grad_norm": 1.4425507935259798, + "learning_rate": 5.083878007099943e-07, + "loss": 0.3402775526046753, + "step": 5903 + }, + { + "epoch": 1.3613096610560296, + "grad_norm": 1.3415772700158808, + "learning_rate": 5.080558287266119e-07, + "loss": 0.4031033515930176, + "step": 5904 + }, + { + "epoch": 1.361540235185612, + "grad_norm": 1.6435607583739225, + "learning_rate": 5.077239282523012e-07, + "loss": 0.493259459733963, + "step": 5905 + }, + { + "epoch": 1.3617708093151948, + "grad_norm": 1.4120722192098578, + "learning_rate": 5.073920993353063e-07, + "loss": 0.39178919792175293, + "step": 5906 + }, + { + "epoch": 1.3620013834447775, + "grad_norm": 1.6684880889475469, + "learning_rate": 5.070603420238624e-07, + "loss": 0.5091253519058228, + "step": 5907 + }, + { + "epoch": 1.3622319575743602, + "grad_norm": 1.3497137288112562, + "learning_rate": 5.067286563661934e-07, + "loss": 0.416462779045105, + "step": 5908 + }, + { + "epoch": 1.362462531703943, + "grad_norm": 1.7821137618482668, + "learning_rate": 5.063970424105137e-07, + "loss": 0.5018768310546875, + "step": 5909 + }, + { + "epoch": 1.3626931058335254, + "grad_norm": 1.4656990143163084, + "learning_rate": 5.060655002050262e-07, + "loss": 0.5512624979019165, + "step": 5910 + }, + { + "epoch": 1.3629236799631081, + "grad_norm": 1.3507263825947706, + "learning_rate": 5.057340297979241e-07, + "loss": 0.3953768014907837, + "step": 5911 + }, + { + "epoch": 1.3631542540926909, + "grad_norm": 1.2807145092132266, + "learning_rate": 5.054026312373896e-07, + "loss": 0.4355456233024597, + "step": 5912 + }, + { + "epoch": 1.3633848282222734, + "grad_norm": 1.7515987196576535, + "learning_rate": 5.050713045715955e-07, + "loss": 0.4826827645301819, + "step": 5913 + }, + { + "epoch": 1.363615402351856, + "grad_norm": 1.5075633708078446, + "learning_rate": 5.047400498487035e-07, + "loss": 0.47084230184555054, + "step": 5914 + }, + { + "epoch": 1.3638459764814388, + "grad_norm": 1.750968751768445, + "learning_rate": 5.044088671168644e-07, + "loss": 0.5273452997207642, + "step": 5915 + }, + { + "epoch": 1.3640765506110215, + "grad_norm": 1.484245498844297, + "learning_rate": 5.040777564242194e-07, + "loss": 0.44878947734832764, + "step": 5916 + }, + { + "epoch": 1.3643071247406042, + "grad_norm": 1.5815904358854045, + "learning_rate": 5.03746717818898e-07, + "loss": 0.47986388206481934, + "step": 5917 + }, + { + "epoch": 1.3645376988701867, + "grad_norm": 1.4148899602283196, + "learning_rate": 5.034157513490211e-07, + "loss": 0.4807628393173218, + "step": 5918 + }, + { + "epoch": 1.3647682729997694, + "grad_norm": 1.3747301384734179, + "learning_rate": 5.030848570626969e-07, + "loss": 0.46027708053588867, + "step": 5919 + }, + { + "epoch": 1.3649988471293522, + "grad_norm": 1.517934310152821, + "learning_rate": 5.027540350080249e-07, + "loss": 0.3803088963031769, + "step": 5920 + }, + { + "epoch": 1.3652294212589347, + "grad_norm": 1.7239494972976075, + "learning_rate": 5.024232852330939e-07, + "loss": 0.5530920028686523, + "step": 5921 + }, + { + "epoch": 1.3654599953885174, + "grad_norm": 1.7183928961648565, + "learning_rate": 5.020926077859805e-07, + "loss": 0.45984846353530884, + "step": 5922 + }, + { + "epoch": 1.3656905695181, + "grad_norm": 1.5752429840016822, + "learning_rate": 5.017620027147533e-07, + "loss": 0.4448089301586151, + "step": 5923 + }, + { + "epoch": 1.3659211436476828, + "grad_norm": 1.713335636587649, + "learning_rate": 5.01431470067468e-07, + "loss": 0.4226706326007843, + "step": 5924 + }, + { + "epoch": 1.3661517177772655, + "grad_norm": 1.9953320185051966, + "learning_rate": 5.011010098921718e-07, + "loss": 0.5243814587593079, + "step": 5925 + }, + { + "epoch": 1.366382291906848, + "grad_norm": 1.6278540239253128, + "learning_rate": 5.007706222368995e-07, + "loss": 0.5733383893966675, + "step": 5926 + }, + { + "epoch": 1.3666128660364307, + "grad_norm": 1.373199955472141, + "learning_rate": 5.00440307149677e-07, + "loss": 0.4583539366722107, + "step": 5927 + }, + { + "epoch": 1.3668434401660134, + "grad_norm": 1.5871148090703988, + "learning_rate": 5.001100646785186e-07, + "loss": 0.474712610244751, + "step": 5928 + }, + { + "epoch": 1.367074014295596, + "grad_norm": 1.6888872351824356, + "learning_rate": 4.997798948714291e-07, + "loss": 0.3995950222015381, + "step": 5929 + }, + { + "epoch": 1.3673045884251787, + "grad_norm": 1.7317310910620232, + "learning_rate": 4.994497977764011e-07, + "loss": 0.4236767888069153, + "step": 5930 + }, + { + "epoch": 1.3675351625547614, + "grad_norm": 1.6853541022393534, + "learning_rate": 4.991197734414178e-07, + "loss": 0.4972396492958069, + "step": 5931 + }, + { + "epoch": 1.367765736684344, + "grad_norm": 1.503037819471691, + "learning_rate": 4.98789821914452e-07, + "loss": 0.444613516330719, + "step": 5932 + }, + { + "epoch": 1.3679963108139268, + "grad_norm": 1.6912958330957677, + "learning_rate": 4.984599432434649e-07, + "loss": 0.4955690801143646, + "step": 5933 + }, + { + "epoch": 1.3682268849435093, + "grad_norm": 1.559115794882019, + "learning_rate": 4.981301374764084e-07, + "loss": 0.4983398914337158, + "step": 5934 + }, + { + "epoch": 1.368457459073092, + "grad_norm": 1.5588186216828477, + "learning_rate": 4.978004046612223e-07, + "loss": 0.45190921425819397, + "step": 5935 + }, + { + "epoch": 1.3686880332026747, + "grad_norm": 1.757499738470118, + "learning_rate": 4.974707448458369e-07, + "loss": 0.5014151334762573, + "step": 5936 + }, + { + "epoch": 1.3689186073322572, + "grad_norm": 1.5399509659752455, + "learning_rate": 4.971411580781719e-07, + "loss": 0.3868405818939209, + "step": 5937 + }, + { + "epoch": 1.36914918146184, + "grad_norm": 1.42775142494789, + "learning_rate": 4.968116444061363e-07, + "loss": 0.4093654155731201, + "step": 5938 + }, + { + "epoch": 1.3693797555914227, + "grad_norm": 1.318689202230345, + "learning_rate": 4.964822038776276e-07, + "loss": 0.3945506513118744, + "step": 5939 + }, + { + "epoch": 1.3696103297210054, + "grad_norm": 1.5874458283663229, + "learning_rate": 4.961528365405333e-07, + "loss": 0.3645547330379486, + "step": 5940 + }, + { + "epoch": 1.369840903850588, + "grad_norm": 1.760752800086673, + "learning_rate": 4.958235424427309e-07, + "loss": 0.36679786443710327, + "step": 5941 + }, + { + "epoch": 1.3700714779801706, + "grad_norm": 1.5458160371079348, + "learning_rate": 4.954943216320861e-07, + "loss": 0.4892774820327759, + "step": 5942 + }, + { + "epoch": 1.3703020521097533, + "grad_norm": 1.4817693224477149, + "learning_rate": 4.951651741564544e-07, + "loss": 0.40406349301338196, + "step": 5943 + }, + { + "epoch": 1.370532626239336, + "grad_norm": 1.277384097830529, + "learning_rate": 4.948361000636812e-07, + "loss": 0.4219849407672882, + "step": 5944 + }, + { + "epoch": 1.3707632003689185, + "grad_norm": 1.7190062313169097, + "learning_rate": 4.945070994016008e-07, + "loss": 0.5329363346099854, + "step": 5945 + }, + { + "epoch": 1.3709937744985012, + "grad_norm": 1.5495655705207303, + "learning_rate": 4.941781722180361e-07, + "loss": 0.42577850818634033, + "step": 5946 + }, + { + "epoch": 1.371224348628084, + "grad_norm": 1.3916296167797302, + "learning_rate": 4.938493185608008e-07, + "loss": 0.4157155156135559, + "step": 5947 + }, + { + "epoch": 1.3714549227576667, + "grad_norm": 1.5016286739703502, + "learning_rate": 4.935205384776965e-07, + "loss": 0.46491485834121704, + "step": 5948 + }, + { + "epoch": 1.3716854968872494, + "grad_norm": 1.6766694792768029, + "learning_rate": 4.931918320165151e-07, + "loss": 0.39582759141921997, + "step": 5949 + }, + { + "epoch": 1.3719160710168319, + "grad_norm": 1.3277840228822322, + "learning_rate": 4.928631992250371e-07, + "loss": 0.4380473792552948, + "step": 5950 + }, + { + "epoch": 1.3721466451464146, + "grad_norm": 1.5358043238579873, + "learning_rate": 4.925346401510327e-07, + "loss": 0.5044572949409485, + "step": 5951 + }, + { + "epoch": 1.372377219275997, + "grad_norm": 1.6172521688559274, + "learning_rate": 4.922061548422617e-07, + "loss": 0.4808889627456665, + "step": 5952 + }, + { + "epoch": 1.3726077934055798, + "grad_norm": 1.370713689883329, + "learning_rate": 4.91877743346472e-07, + "loss": 0.4215632677078247, + "step": 5953 + }, + { + "epoch": 1.3728383675351625, + "grad_norm": 1.4640509349497177, + "learning_rate": 4.915494057114025e-07, + "loss": 0.4999268651008606, + "step": 5954 + }, + { + "epoch": 1.3730689416647452, + "grad_norm": 1.593000178254792, + "learning_rate": 4.912211419847793e-07, + "loss": 0.476152241230011, + "step": 5955 + }, + { + "epoch": 1.373299515794328, + "grad_norm": 1.5436036358421792, + "learning_rate": 4.908929522143201e-07, + "loss": 0.4253045320510864, + "step": 5956 + }, + { + "epoch": 1.3735300899239105, + "grad_norm": 1.6726587032262756, + "learning_rate": 4.905648364477293e-07, + "loss": 0.4251098036766052, + "step": 5957 + }, + { + "epoch": 1.3737606640534932, + "grad_norm": 1.5635582188699524, + "learning_rate": 4.902367947327029e-07, + "loss": 0.3820844888687134, + "step": 5958 + }, + { + "epoch": 1.373991238183076, + "grad_norm": 1.5563353591748068, + "learning_rate": 4.899088271169245e-07, + "loss": 0.4725508689880371, + "step": 5959 + }, + { + "epoch": 1.3742218123126584, + "grad_norm": 1.4545077693536257, + "learning_rate": 4.895809336480675e-07, + "loss": 0.48313626646995544, + "step": 5960 + }, + { + "epoch": 1.374452386442241, + "grad_norm": 1.6596316713803083, + "learning_rate": 4.892531143737952e-07, + "loss": 0.5344939231872559, + "step": 5961 + }, + { + "epoch": 1.3746829605718238, + "grad_norm": 1.7551620350578117, + "learning_rate": 4.889253693417585e-07, + "loss": 0.4305552840232849, + "step": 5962 + }, + { + "epoch": 1.3749135347014065, + "grad_norm": 1.4302106398553562, + "learning_rate": 4.885976985995996e-07, + "loss": 0.3564034700393677, + "step": 5963 + }, + { + "epoch": 1.3751441088309893, + "grad_norm": 1.4796542999179279, + "learning_rate": 4.882701021949475e-07, + "loss": 0.5498751997947693, + "step": 5964 + }, + { + "epoch": 1.3753746829605717, + "grad_norm": 1.5956710623028654, + "learning_rate": 4.879425801754226e-07, + "loss": 0.4489964246749878, + "step": 5965 + }, + { + "epoch": 1.3756052570901545, + "grad_norm": 1.7595842751992934, + "learning_rate": 4.87615132588633e-07, + "loss": 0.4142688810825348, + "step": 5966 + }, + { + "epoch": 1.3758358312197372, + "grad_norm": 1.483255834477138, + "learning_rate": 4.872877594821767e-07, + "loss": 0.3823632597923279, + "step": 5967 + }, + { + "epoch": 1.3760664053493197, + "grad_norm": 1.603982795420405, + "learning_rate": 4.869604609036408e-07, + "loss": 0.39014697074890137, + "step": 5968 + }, + { + "epoch": 1.3762969794789024, + "grad_norm": 1.5363032345717058, + "learning_rate": 4.866332369006016e-07, + "loss": 0.3907933235168457, + "step": 5969 + }, + { + "epoch": 1.376527553608485, + "grad_norm": 1.5125931439342233, + "learning_rate": 4.863060875206244e-07, + "loss": 0.3872087001800537, + "step": 5970 + }, + { + "epoch": 1.3767581277380678, + "grad_norm": 1.5847290584713085, + "learning_rate": 4.85979012811263e-07, + "loss": 0.40380537509918213, + "step": 5971 + }, + { + "epoch": 1.3769887018676505, + "grad_norm": 1.3127541034285726, + "learning_rate": 4.856520128200621e-07, + "loss": 0.39867663383483887, + "step": 5972 + }, + { + "epoch": 1.377219275997233, + "grad_norm": 1.7829413941875683, + "learning_rate": 4.853250875945534e-07, + "loss": 0.5337423086166382, + "step": 5973 + }, + { + "epoch": 1.3774498501268158, + "grad_norm": 1.4903518724810052, + "learning_rate": 4.849982371822593e-07, + "loss": 0.3824300765991211, + "step": 5974 + }, + { + "epoch": 1.3776804242563985, + "grad_norm": 1.4611697760932394, + "learning_rate": 4.846714616306907e-07, + "loss": 0.3613823652267456, + "step": 5975 + }, + { + "epoch": 1.377910998385981, + "grad_norm": 1.5701851835478555, + "learning_rate": 4.843447609873484e-07, + "loss": 0.5040241479873657, + "step": 5976 + }, + { + "epoch": 1.3781415725155637, + "grad_norm": 1.5801365248176698, + "learning_rate": 4.840181352997207e-07, + "loss": 0.4639400243759155, + "step": 5977 + }, + { + "epoch": 1.3783721466451464, + "grad_norm": 1.730401874176074, + "learning_rate": 4.836915846152867e-07, + "loss": 0.503246009349823, + "step": 5978 + }, + { + "epoch": 1.3786027207747291, + "grad_norm": 1.6695377873006745, + "learning_rate": 4.833651089815135e-07, + "loss": 0.3974607586860657, + "step": 5979 + }, + { + "epoch": 1.3788332949043118, + "grad_norm": 1.556324884896908, + "learning_rate": 4.830387084458573e-07, + "loss": 0.43200844526290894, + "step": 5980 + }, + { + "epoch": 1.3790638690338943, + "grad_norm": 1.8355646307086506, + "learning_rate": 4.827123830557644e-07, + "loss": 0.547272801399231, + "step": 5981 + }, + { + "epoch": 1.379294443163477, + "grad_norm": 1.5723785141918243, + "learning_rate": 4.823861328586688e-07, + "loss": 0.4509696960449219, + "step": 5982 + }, + { + "epoch": 1.3795250172930598, + "grad_norm": 1.53889123165165, + "learning_rate": 4.820599579019946e-07, + "loss": 0.46022483706474304, + "step": 5983 + }, + { + "epoch": 1.3797555914226423, + "grad_norm": 1.5251655198087088, + "learning_rate": 4.817338582331548e-07, + "loss": 0.40973198413848877, + "step": 5984 + }, + { + "epoch": 1.379986165552225, + "grad_norm": 1.6235538954137896, + "learning_rate": 4.814078338995515e-07, + "loss": 0.39012736082077026, + "step": 5985 + }, + { + "epoch": 1.3802167396818077, + "grad_norm": 1.6954879615528178, + "learning_rate": 4.810818849485749e-07, + "loss": 0.40657323598861694, + "step": 5986 + }, + { + "epoch": 1.3804473138113904, + "grad_norm": 1.4158383607530642, + "learning_rate": 4.80756011427606e-07, + "loss": 0.38662189245224, + "step": 5987 + }, + { + "epoch": 1.3806778879409731, + "grad_norm": 1.629559894183336, + "learning_rate": 4.804302133840126e-07, + "loss": 0.4888705015182495, + "step": 5988 + }, + { + "epoch": 1.3809084620705556, + "grad_norm": 1.4732586688358036, + "learning_rate": 4.801044908651537e-07, + "loss": 0.4559556245803833, + "step": 5989 + }, + { + "epoch": 1.3811390362001383, + "grad_norm": 1.773370569584542, + "learning_rate": 4.797788439183757e-07, + "loss": 0.40912386775016785, + "step": 5990 + }, + { + "epoch": 1.381369610329721, + "grad_norm": 1.3364334005028415, + "learning_rate": 4.794532725910152e-07, + "loss": 0.3848627209663391, + "step": 5991 + }, + { + "epoch": 1.3816001844593035, + "grad_norm": 1.3860556916017956, + "learning_rate": 4.791277769303975e-07, + "loss": 0.4995359778404236, + "step": 5992 + }, + { + "epoch": 1.3818307585888863, + "grad_norm": 1.3898521995378452, + "learning_rate": 4.788023569838356e-07, + "loss": 0.38717859983444214, + "step": 5993 + }, + { + "epoch": 1.382061332718469, + "grad_norm": 1.7766923949498086, + "learning_rate": 4.784770127986339e-07, + "loss": 0.39855217933654785, + "step": 5994 + }, + { + "epoch": 1.3822919068480517, + "grad_norm": 1.337680228597258, + "learning_rate": 4.781517444220835e-07, + "loss": 0.38494858145713806, + "step": 5995 + }, + { + "epoch": 1.3825224809776344, + "grad_norm": 1.4735802599680248, + "learning_rate": 4.778265519014661e-07, + "loss": 0.44064784049987793, + "step": 5996 + }, + { + "epoch": 1.382753055107217, + "grad_norm": 1.8926413264660993, + "learning_rate": 4.775014352840512e-07, + "loss": 0.39377373456954956, + "step": 5997 + }, + { + "epoch": 1.3829836292367996, + "grad_norm": 1.5108151654480286, + "learning_rate": 4.771763946170979e-07, + "loss": 0.45127296447753906, + "step": 5998 + }, + { + "epoch": 1.3832142033663823, + "grad_norm": 1.4916107560429466, + "learning_rate": 4.768514299478545e-07, + "loss": 0.4999358654022217, + "step": 5999 + }, + { + "epoch": 1.3834447774959648, + "grad_norm": 1.7185286370183794, + "learning_rate": 4.7652654132355784e-07, + "loss": 0.49552851915359497, + "step": 6000 + }, + { + "epoch": 1.3836753516255476, + "grad_norm": 1.7765151369959267, + "learning_rate": 4.762017287914338e-07, + "loss": 0.49196135997772217, + "step": 6001 + }, + { + "epoch": 1.3839059257551303, + "grad_norm": 1.6417248034868954, + "learning_rate": 4.758769923986966e-07, + "loss": 0.3870600461959839, + "step": 6002 + }, + { + "epoch": 1.384136499884713, + "grad_norm": 1.6104154654929026, + "learning_rate": 4.7555233219255074e-07, + "loss": 0.4585425853729248, + "step": 6003 + }, + { + "epoch": 1.3843670740142957, + "grad_norm": 1.3699827425500786, + "learning_rate": 4.752277482201882e-07, + "loss": 0.4332588315010071, + "step": 6004 + }, + { + "epoch": 1.3845976481438782, + "grad_norm": 1.6005942921335146, + "learning_rate": 4.749032405287913e-07, + "loss": 0.4386274814605713, + "step": 6005 + }, + { + "epoch": 1.384828222273461, + "grad_norm": 1.430715117905666, + "learning_rate": 4.745788091655295e-07, + "loss": 0.5064895749092102, + "step": 6006 + }, + { + "epoch": 1.3850587964030436, + "grad_norm": 1.470846994377081, + "learning_rate": 4.7425445417756295e-07, + "loss": 0.4441327452659607, + "step": 6007 + }, + { + "epoch": 1.3852893705326261, + "grad_norm": 1.6191746478584856, + "learning_rate": 4.7393017561203965e-07, + "loss": 0.4415687918663025, + "step": 6008 + }, + { + "epoch": 1.3855199446622088, + "grad_norm": 1.4021203224812295, + "learning_rate": 4.736059735160973e-07, + "loss": 0.4668382704257965, + "step": 6009 + }, + { + "epoch": 1.3857505187917916, + "grad_norm": 1.6079029250549948, + "learning_rate": 4.732818479368615e-07, + "loss": 0.3981805443763733, + "step": 6010 + }, + { + "epoch": 1.3859810929213743, + "grad_norm": 1.4448652226463723, + "learning_rate": 4.7295779892144694e-07, + "loss": 0.4465348720550537, + "step": 6011 + }, + { + "epoch": 1.386211667050957, + "grad_norm": 1.7530840597871544, + "learning_rate": 4.7263382651695805e-07, + "loss": 0.4844682812690735, + "step": 6012 + }, + { + "epoch": 1.3864422411805395, + "grad_norm": 1.417618664232542, + "learning_rate": 4.723099307704868e-07, + "loss": 0.4261378347873688, + "step": 6013 + }, + { + "epoch": 1.3866728153101222, + "grad_norm": 1.4997543603341101, + "learning_rate": 4.7198611172911506e-07, + "loss": 0.457815945148468, + "step": 6014 + }, + { + "epoch": 1.386903389439705, + "grad_norm": 1.570655771567204, + "learning_rate": 4.7166236943991333e-07, + "loss": 0.46352216601371765, + "step": 6015 + }, + { + "epoch": 1.3871339635692874, + "grad_norm": 1.486567492766103, + "learning_rate": 4.7133870394994104e-07, + "loss": 0.4166485667228699, + "step": 6016 + }, + { + "epoch": 1.3873645376988701, + "grad_norm": 1.6982826579565595, + "learning_rate": 4.710151153062456e-07, + "loss": 0.405789852142334, + "step": 6017 + }, + { + "epoch": 1.3875951118284529, + "grad_norm": 1.7459761562612983, + "learning_rate": 4.7069160355586456e-07, + "loss": 0.47718119621276855, + "step": 6018 + }, + { + "epoch": 1.3878256859580356, + "grad_norm": 1.5824023496617, + "learning_rate": 4.7036816874582307e-07, + "loss": 0.5040356516838074, + "step": 6019 + }, + { + "epoch": 1.3880562600876183, + "grad_norm": 1.5657039890557007, + "learning_rate": 4.700448109231362e-07, + "loss": 0.45093637704849243, + "step": 6020 + }, + { + "epoch": 1.3882868342172008, + "grad_norm": 1.4929438188817195, + "learning_rate": 4.6972153013480666e-07, + "loss": 0.5363638997077942, + "step": 6021 + }, + { + "epoch": 1.3885174083467835, + "grad_norm": 1.6076509313088967, + "learning_rate": 4.6939832642782684e-07, + "loss": 0.4917050004005432, + "step": 6022 + }, + { + "epoch": 1.3887479824763662, + "grad_norm": 1.692377103708349, + "learning_rate": 4.690751998491782e-07, + "loss": 0.43033331632614136, + "step": 6023 + }, + { + "epoch": 1.3889785566059487, + "grad_norm": 1.5272594017885164, + "learning_rate": 4.6875215044582973e-07, + "loss": 0.36168330907821655, + "step": 6024 + }, + { + "epoch": 1.3892091307355314, + "grad_norm": 1.693805471797637, + "learning_rate": 4.6842917826474047e-07, + "loss": 0.48347967863082886, + "step": 6025 + }, + { + "epoch": 1.3894397048651141, + "grad_norm": 1.332022962916858, + "learning_rate": 4.681062833528572e-07, + "loss": 0.4493439495563507, + "step": 6026 + }, + { + "epoch": 1.3896702789946969, + "grad_norm": 1.4842335012941816, + "learning_rate": 4.677834657571165e-07, + "loss": 0.385773628950119, + "step": 6027 + }, + { + "epoch": 1.3899008531242796, + "grad_norm": 1.396017775513053, + "learning_rate": 4.674607255244426e-07, + "loss": 0.4254469573497772, + "step": 6028 + }, + { + "epoch": 1.390131427253862, + "grad_norm": 1.6964811881797437, + "learning_rate": 4.671380627017497e-07, + "loss": 0.5070454478263855, + "step": 6029 + }, + { + "epoch": 1.3903620013834448, + "grad_norm": 1.4647574188657595, + "learning_rate": 4.668154773359394e-07, + "loss": 0.44099801778793335, + "step": 6030 + }, + { + "epoch": 1.3905925755130275, + "grad_norm": 1.6731498815474952, + "learning_rate": 4.6649296947390314e-07, + "loss": 0.4965481162071228, + "step": 6031 + }, + { + "epoch": 1.39082314964261, + "grad_norm": 1.6621123973009748, + "learning_rate": 4.6617053916252116e-07, + "loss": 0.4085753262042999, + "step": 6032 + }, + { + "epoch": 1.3910537237721927, + "grad_norm": 1.473260966023028, + "learning_rate": 4.6584818644866106e-07, + "loss": 0.3768424391746521, + "step": 6033 + }, + { + "epoch": 1.3912842979017754, + "grad_norm": 1.7152094772871185, + "learning_rate": 4.6552591137918087e-07, + "loss": 0.4330044388771057, + "step": 6034 + }, + { + "epoch": 1.3915148720313582, + "grad_norm": 1.5907700374750249, + "learning_rate": 4.6520371400092584e-07, + "loss": 0.4669216275215149, + "step": 6035 + }, + { + "epoch": 1.3917454461609409, + "grad_norm": 1.8634085835731031, + "learning_rate": 4.648815943607314e-07, + "loss": 0.5491182208061218, + "step": 6036 + }, + { + "epoch": 1.3919760202905234, + "grad_norm": 1.439715262819595, + "learning_rate": 4.6455955250542e-07, + "loss": 0.4842255413532257, + "step": 6037 + }, + { + "epoch": 1.392206594420106, + "grad_norm": 1.598726710739168, + "learning_rate": 4.6423758848180427e-07, + "loss": 0.45479631423950195, + "step": 6038 + }, + { + "epoch": 1.3924371685496888, + "grad_norm": 1.5770365297702393, + "learning_rate": 4.6391570233668486e-07, + "loss": 0.4209587574005127, + "step": 6039 + }, + { + "epoch": 1.3926677426792713, + "grad_norm": 1.4722680740741498, + "learning_rate": 4.6359389411685145e-07, + "loss": 0.5061464905738831, + "step": 6040 + }, + { + "epoch": 1.392898316808854, + "grad_norm": 1.5166334201375402, + "learning_rate": 4.6327216386908196e-07, + "loss": 0.39443570375442505, + "step": 6041 + }, + { + "epoch": 1.3931288909384367, + "grad_norm": 1.6936024892202146, + "learning_rate": 4.6295051164014256e-07, + "loss": 0.4784463942050934, + "step": 6042 + }, + { + "epoch": 1.3933594650680194, + "grad_norm": 1.623401531095956, + "learning_rate": 4.6262893747678957e-07, + "loss": 0.41256606578826904, + "step": 6043 + }, + { + "epoch": 1.3935900391976022, + "grad_norm": 1.430742297932055, + "learning_rate": 4.623074414257662e-07, + "loss": 0.4507666230201721, + "step": 6044 + }, + { + "epoch": 1.3938206133271847, + "grad_norm": 1.4646678303979026, + "learning_rate": 4.6198602353380545e-07, + "loss": 0.3783376216888428, + "step": 6045 + }, + { + "epoch": 1.3940511874567674, + "grad_norm": 1.5485119918407955, + "learning_rate": 4.616646838476289e-07, + "loss": 0.47854840755462646, + "step": 6046 + }, + { + "epoch": 1.39428176158635, + "grad_norm": 1.506150277535636, + "learning_rate": 4.6134342241394685e-07, + "loss": 0.47121208906173706, + "step": 6047 + }, + { + "epoch": 1.3945123357159326, + "grad_norm": 1.4779397331062858, + "learning_rate": 4.610222392794569e-07, + "loss": 0.5211559534072876, + "step": 6048 + }, + { + "epoch": 1.3947429098455153, + "grad_norm": 2.0522570691736606, + "learning_rate": 4.6070113449084747e-07, + "loss": 0.5846370458602905, + "step": 6049 + }, + { + "epoch": 1.394973483975098, + "grad_norm": 1.6651959806589232, + "learning_rate": 4.6038010809479365e-07, + "loss": 0.4787401854991913, + "step": 6050 + }, + { + "epoch": 1.3952040581046807, + "grad_norm": 1.336725780471279, + "learning_rate": 4.600591601379596e-07, + "loss": 0.36429738998413086, + "step": 6051 + }, + { + "epoch": 1.3954346322342635, + "grad_norm": 1.606284081701607, + "learning_rate": 4.597382906669992e-07, + "loss": 0.49923771619796753, + "step": 6052 + }, + { + "epoch": 1.395665206363846, + "grad_norm": 1.5476584348847333, + "learning_rate": 4.5941749972855326e-07, + "loss": 0.408005028963089, + "step": 6053 + }, + { + "epoch": 1.3958957804934287, + "grad_norm": 1.72927604568786, + "learning_rate": 4.590967873692523e-07, + "loss": 0.4524402618408203, + "step": 6054 + }, + { + "epoch": 1.3961263546230114, + "grad_norm": 1.5041096845532136, + "learning_rate": 4.587761536357152e-07, + "loss": 0.5264980792999268, + "step": 6055 + }, + { + "epoch": 1.3963569287525939, + "grad_norm": 1.6066275699787076, + "learning_rate": 4.5845559857454976e-07, + "loss": 0.5324279069900513, + "step": 6056 + }, + { + "epoch": 1.3965875028821766, + "grad_norm": 1.4996065290876746, + "learning_rate": 4.581351222323511e-07, + "loss": 0.5197574496269226, + "step": 6057 + }, + { + "epoch": 1.3968180770117593, + "grad_norm": 1.6418756331716369, + "learning_rate": 4.578147246557043e-07, + "loss": 0.4549001157283783, + "step": 6058 + }, + { + "epoch": 1.397048651141342, + "grad_norm": 1.374490396915421, + "learning_rate": 4.5749440589118183e-07, + "loss": 0.38597673177719116, + "step": 6059 + }, + { + "epoch": 1.3972792252709247, + "grad_norm": 1.3707652210777583, + "learning_rate": 4.57174165985346e-07, + "loss": 0.4104316532611847, + "step": 6060 + }, + { + "epoch": 1.3975097994005072, + "grad_norm": 1.7242255092716443, + "learning_rate": 4.5685400498474614e-07, + "loss": 0.5241787433624268, + "step": 6061 + }, + { + "epoch": 1.39774037353009, + "grad_norm": 1.668574015144598, + "learning_rate": 4.565339229359213e-07, + "loss": 0.5033289790153503, + "step": 6062 + }, + { + "epoch": 1.3979709476596724, + "grad_norm": 1.3309384356199967, + "learning_rate": 4.5621391988539894e-07, + "loss": 0.436188280582428, + "step": 6063 + }, + { + "epoch": 1.3982015217892552, + "grad_norm": 1.4783680897212301, + "learning_rate": 4.5589399587969414e-07, + "loss": 0.3885838985443115, + "step": 6064 + }, + { + "epoch": 1.3984320959188379, + "grad_norm": 1.6395174483956128, + "learning_rate": 4.555741509653116e-07, + "loss": 0.5140193104743958, + "step": 6065 + }, + { + "epoch": 1.3986626700484206, + "grad_norm": 1.360236032045127, + "learning_rate": 4.552543851887436e-07, + "loss": 0.41084468364715576, + "step": 6066 + }, + { + "epoch": 1.3988932441780033, + "grad_norm": 1.417896120601143, + "learning_rate": 4.549346985964718e-07, + "loss": 0.3606417179107666, + "step": 6067 + }, + { + "epoch": 1.3991238183075858, + "grad_norm": 1.5212574193639694, + "learning_rate": 4.546150912349653e-07, + "loss": 0.48518556356430054, + "step": 6068 + }, + { + "epoch": 1.3993543924371685, + "grad_norm": 1.6821671640024862, + "learning_rate": 4.5429556315068264e-07, + "loss": 0.5394424200057983, + "step": 6069 + }, + { + "epoch": 1.3995849665667512, + "grad_norm": 1.3734997636022714, + "learning_rate": 4.539761143900708e-07, + "loss": 0.40272367000579834, + "step": 6070 + }, + { + "epoch": 1.3998155406963337, + "grad_norm": 1.6175896107942709, + "learning_rate": 4.536567449995641e-07, + "loss": 0.4279879331588745, + "step": 6071 + }, + { + "epoch": 1.4000461148259165, + "grad_norm": 1.4620694447822713, + "learning_rate": 4.5333745502558695e-07, + "loss": 0.48560982942581177, + "step": 6072 + }, + { + "epoch": 1.4002766889554992, + "grad_norm": 1.7184355426607418, + "learning_rate": 4.530182445145506e-07, + "loss": 0.49256429076194763, + "step": 6073 + }, + { + "epoch": 1.4005072630850819, + "grad_norm": 1.4236944961072253, + "learning_rate": 4.5269911351285614e-07, + "loss": 0.5015553832054138, + "step": 6074 + }, + { + "epoch": 1.4007378372146646, + "grad_norm": 1.4505255602543088, + "learning_rate": 4.5238006206689204e-07, + "loss": 0.4313800632953644, + "step": 6075 + }, + { + "epoch": 1.400968411344247, + "grad_norm": 1.311079736416616, + "learning_rate": 4.520610902230363e-07, + "loss": 0.3440586030483246, + "step": 6076 + }, + { + "epoch": 1.4011989854738298, + "grad_norm": 1.4064686390113332, + "learning_rate": 4.517421980276538e-07, + "loss": 0.43868017196655273, + "step": 6077 + }, + { + "epoch": 1.4014295596034125, + "grad_norm": 1.6307364330463041, + "learning_rate": 4.5142338552709923e-07, + "loss": 0.5581029057502747, + "step": 6078 + }, + { + "epoch": 1.401660133732995, + "grad_norm": 1.6962393590938891, + "learning_rate": 4.5110465276771524e-07, + "loss": 0.4543154835700989, + "step": 6079 + }, + { + "epoch": 1.4018907078625777, + "grad_norm": 1.5554679193557313, + "learning_rate": 4.507859997958333e-07, + "loss": 0.5229466557502747, + "step": 6080 + }, + { + "epoch": 1.4021212819921605, + "grad_norm": 1.5285075075955497, + "learning_rate": 4.504674266577724e-07, + "loss": 0.46781739592552185, + "step": 6081 + }, + { + "epoch": 1.4023518561217432, + "grad_norm": 1.6198419428344395, + "learning_rate": 4.5014893339983993e-07, + "loss": 0.48040711879730225, + "step": 6082 + }, + { + "epoch": 1.402582430251326, + "grad_norm": 1.5279313939865138, + "learning_rate": 4.49830520068333e-07, + "loss": 0.5039708018302917, + "step": 6083 + }, + { + "epoch": 1.4028130043809084, + "grad_norm": 1.4998739241266676, + "learning_rate": 4.495121867095354e-07, + "loss": 0.43496155738830566, + "step": 6084 + }, + { + "epoch": 1.403043578510491, + "grad_norm": 1.3838778339679694, + "learning_rate": 4.4919393336972045e-07, + "loss": 0.4603109061717987, + "step": 6085 + }, + { + "epoch": 1.4032741526400738, + "grad_norm": 1.476085268646584, + "learning_rate": 4.488757600951496e-07, + "loss": 0.4571962356567383, + "step": 6086 + }, + { + "epoch": 1.4035047267696563, + "grad_norm": 1.4791952167701867, + "learning_rate": 4.485576669320729e-07, + "loss": 0.46302443742752075, + "step": 6087 + }, + { + "epoch": 1.403735300899239, + "grad_norm": 1.675302072516594, + "learning_rate": 4.482396539267275e-07, + "loss": 0.39066869020462036, + "step": 6088 + }, + { + "epoch": 1.4039658750288218, + "grad_norm": 1.704176039322231, + "learning_rate": 4.4792172112534076e-07, + "loss": 0.4797130823135376, + "step": 6089 + }, + { + "epoch": 1.4041964491584045, + "grad_norm": 1.5835144658620484, + "learning_rate": 4.4760386857412704e-07, + "loss": 0.4578198492527008, + "step": 6090 + }, + { + "epoch": 1.4044270232879872, + "grad_norm": 1.3987211085891795, + "learning_rate": 4.472860963192889e-07, + "loss": 0.40768736600875854, + "step": 6091 + }, + { + "epoch": 1.4046575974175697, + "grad_norm": 1.4530633567004236, + "learning_rate": 4.4696840440701846e-07, + "loss": 0.4201413094997406, + "step": 6092 + }, + { + "epoch": 1.4048881715471524, + "grad_norm": 1.3648395822246437, + "learning_rate": 4.466507928834951e-07, + "loss": 0.45901796221733093, + "step": 6093 + }, + { + "epoch": 1.4051187456767351, + "grad_norm": 1.6465847208416895, + "learning_rate": 4.463332617948874e-07, + "loss": 0.4699435830116272, + "step": 6094 + }, + { + "epoch": 1.4053493198063176, + "grad_norm": 1.4755445259366653, + "learning_rate": 4.46015811187351e-07, + "loss": 0.4526669383049011, + "step": 6095 + }, + { + "epoch": 1.4055798939359003, + "grad_norm": 1.5721685230021194, + "learning_rate": 4.456984411070313e-07, + "loss": 0.46754884719848633, + "step": 6096 + }, + { + "epoch": 1.405810468065483, + "grad_norm": 2.1874728205075495, + "learning_rate": 4.453811516000604e-07, + "loss": 0.5119268894195557, + "step": 6097 + }, + { + "epoch": 1.4060410421950658, + "grad_norm": 2.056110026644097, + "learning_rate": 4.4506394271256043e-07, + "loss": 0.42980802059173584, + "step": 6098 + }, + { + "epoch": 1.4062716163246485, + "grad_norm": 1.5339161636381375, + "learning_rate": 4.447468144906401e-07, + "loss": 0.5895063281059265, + "step": 6099 + }, + { + "epoch": 1.406502190454231, + "grad_norm": 1.3796241305160553, + "learning_rate": 4.4442976698039803e-07, + "loss": 0.42768803238868713, + "step": 6100 + }, + { + "epoch": 1.4067327645838137, + "grad_norm": 1.608854909074267, + "learning_rate": 4.4411280022791943e-07, + "loss": 0.44234544038772583, + "step": 6101 + }, + { + "epoch": 1.4069633387133964, + "grad_norm": 1.3028889839673445, + "learning_rate": 4.437959142792791e-07, + "loss": 0.4382736086845398, + "step": 6102 + }, + { + "epoch": 1.407193912842979, + "grad_norm": 1.6088674485493302, + "learning_rate": 4.4347910918054e-07, + "loss": 0.47603681683540344, + "step": 6103 + }, + { + "epoch": 1.4074244869725616, + "grad_norm": 1.8816511615485159, + "learning_rate": 4.431623849777522e-07, + "loss": 0.5562035441398621, + "step": 6104 + }, + { + "epoch": 1.4076550611021443, + "grad_norm": 2.2517510056002763, + "learning_rate": 4.4284574171695535e-07, + "loss": 0.4153141677379608, + "step": 6105 + }, + { + "epoch": 1.407885635231727, + "grad_norm": 1.2534764690727898, + "learning_rate": 4.425291794441762e-07, + "loss": 0.4825887680053711, + "step": 6106 + }, + { + "epoch": 1.4081162093613098, + "grad_norm": 1.4829126230878127, + "learning_rate": 4.4221269820543104e-07, + "loss": 0.4853668808937073, + "step": 6107 + }, + { + "epoch": 1.4083467834908923, + "grad_norm": 1.6140810272295893, + "learning_rate": 4.418962980467229e-07, + "loss": 0.5615251064300537, + "step": 6108 + }, + { + "epoch": 1.408577357620475, + "grad_norm": 1.8397680714752904, + "learning_rate": 4.4157997901404396e-07, + "loss": 0.38605546951293945, + "step": 6109 + }, + { + "epoch": 1.4088079317500577, + "grad_norm": 1.412066772348378, + "learning_rate": 4.412637411533745e-07, + "loss": 0.41582173109054565, + "step": 6110 + }, + { + "epoch": 1.4090385058796402, + "grad_norm": 1.4963267141581975, + "learning_rate": 4.4094758451068327e-07, + "loss": 0.38091376423835754, + "step": 6111 + }, + { + "epoch": 1.409269080009223, + "grad_norm": 1.5465721612260863, + "learning_rate": 4.4063150913192635e-07, + "loss": 0.43319058418273926, + "step": 6112 + }, + { + "epoch": 1.4094996541388056, + "grad_norm": 1.2123497825560654, + "learning_rate": 4.403155150630484e-07, + "loss": 0.43207013607025146, + "step": 6113 + }, + { + "epoch": 1.4097302282683883, + "grad_norm": 1.7217391258871346, + "learning_rate": 4.399996023499829e-07, + "loss": 0.43750250339508057, + "step": 6114 + }, + { + "epoch": 1.409960802397971, + "grad_norm": 1.5123653802002535, + "learning_rate": 4.3968377103865016e-07, + "loss": 0.44084444642066956, + "step": 6115 + }, + { + "epoch": 1.4101913765275536, + "grad_norm": 1.4135580211481893, + "learning_rate": 4.3936802117495997e-07, + "loss": 0.4752010405063629, + "step": 6116 + }, + { + "epoch": 1.4104219506571363, + "grad_norm": 1.384945744446678, + "learning_rate": 4.390523528048098e-07, + "loss": 0.39239025115966797, + "step": 6117 + }, + { + "epoch": 1.410652524786719, + "grad_norm": 1.7179287290824201, + "learning_rate": 4.387367659740856e-07, + "loss": 0.46021080017089844, + "step": 6118 + }, + { + "epoch": 1.4108830989163015, + "grad_norm": 1.3751290560349647, + "learning_rate": 4.3842126072866014e-07, + "loss": 0.4079766571521759, + "step": 6119 + }, + { + "epoch": 1.4111136730458842, + "grad_norm": 1.5182170234243058, + "learning_rate": 4.381058371143964e-07, + "loss": 0.4922672510147095, + "step": 6120 + }, + { + "epoch": 1.411344247175467, + "grad_norm": 1.5200373777326295, + "learning_rate": 4.377904951771438e-07, + "loss": 0.3950929045677185, + "step": 6121 + }, + { + "epoch": 1.4115748213050496, + "grad_norm": 1.6189013836504815, + "learning_rate": 4.374752349627402e-07, + "loss": 0.503406286239624, + "step": 6122 + }, + { + "epoch": 1.4118053954346323, + "grad_norm": 1.724327270706253, + "learning_rate": 4.3716005651701215e-07, + "loss": 0.49198317527770996, + "step": 6123 + }, + { + "epoch": 1.4120359695642148, + "grad_norm": 1.424527206510087, + "learning_rate": 4.368449598857742e-07, + "loss": 0.47396305203437805, + "step": 6124 + }, + { + "epoch": 1.4122665436937976, + "grad_norm": 1.7537535213801698, + "learning_rate": 4.365299451148291e-07, + "loss": 0.5248152017593384, + "step": 6125 + }, + { + "epoch": 1.4124971178233803, + "grad_norm": 1.310814657820865, + "learning_rate": 4.362150122499666e-07, + "loss": 0.44327419996261597, + "step": 6126 + }, + { + "epoch": 1.4127276919529628, + "grad_norm": 1.5885906377106098, + "learning_rate": 4.3590016133696626e-07, + "loss": 0.4628877639770508, + "step": 6127 + }, + { + "epoch": 1.4129582660825455, + "grad_norm": 1.5166490469327556, + "learning_rate": 4.355853924215942e-07, + "loss": 0.5277193188667297, + "step": 6128 + }, + { + "epoch": 1.4131888402121282, + "grad_norm": 1.6202759290555122, + "learning_rate": 4.3527070554960577e-07, + "loss": 0.4675426781177521, + "step": 6129 + }, + { + "epoch": 1.413419414341711, + "grad_norm": 1.668904355836008, + "learning_rate": 4.349561007667433e-07, + "loss": 0.3762160539627075, + "step": 6130 + }, + { + "epoch": 1.4136499884712936, + "grad_norm": 1.5686457690092273, + "learning_rate": 4.346415781187385e-07, + "loss": 0.4797256588935852, + "step": 6131 + }, + { + "epoch": 1.4138805626008761, + "grad_norm": 1.283129438483415, + "learning_rate": 4.3432713765130967e-07, + "loss": 0.4348931312561035, + "step": 6132 + }, + { + "epoch": 1.4141111367304589, + "grad_norm": 1.72495987311985, + "learning_rate": 4.3401277941016435e-07, + "loss": 0.5080585479736328, + "step": 6133 + }, + { + "epoch": 1.4143417108600416, + "grad_norm": 1.5083246190317607, + "learning_rate": 4.33698503440998e-07, + "loss": 0.40223604440689087, + "step": 6134 + }, + { + "epoch": 1.414572284989624, + "grad_norm": 1.5888336584861464, + "learning_rate": 4.3338430978949315e-07, + "loss": 0.4460202753543854, + "step": 6135 + }, + { + "epoch": 1.4148028591192068, + "grad_norm": 1.6992292342961226, + "learning_rate": 4.3307019850132167e-07, + "loss": 0.5814889669418335, + "step": 6136 + }, + { + "epoch": 1.4150334332487895, + "grad_norm": 1.366462724450419, + "learning_rate": 4.3275616962214214e-07, + "loss": 0.39237886667251587, + "step": 6137 + }, + { + "epoch": 1.4152640073783722, + "grad_norm": 1.8844588932900945, + "learning_rate": 4.324422231976025e-07, + "loss": 0.4621772766113281, + "step": 6138 + }, + { + "epoch": 1.415494581507955, + "grad_norm": 1.2090393738968102, + "learning_rate": 4.3212835927333745e-07, + "loss": 0.3722139596939087, + "step": 6139 + }, + { + "epoch": 1.4157251556375374, + "grad_norm": 1.4849768206374545, + "learning_rate": 4.3181457789497055e-07, + "loss": 0.5007534623146057, + "step": 6140 + }, + { + "epoch": 1.4159557297671201, + "grad_norm": 1.603501037396303, + "learning_rate": 4.315008791081135e-07, + "loss": 0.470672607421875, + "step": 6141 + }, + { + "epoch": 1.4161863038967029, + "grad_norm": 1.6882048347200689, + "learning_rate": 4.3118726295836495e-07, + "loss": 0.5196114778518677, + "step": 6142 + }, + { + "epoch": 1.4164168780262854, + "grad_norm": 1.686399785386393, + "learning_rate": 4.3087372949131275e-07, + "loss": 0.4606804847717285, + "step": 6143 + }, + { + "epoch": 1.416647452155868, + "grad_norm": 1.2427386262927842, + "learning_rate": 4.3056027875253156e-07, + "loss": 0.3926661014556885, + "step": 6144 + }, + { + "epoch": 1.4168780262854508, + "grad_norm": 1.5075319697699416, + "learning_rate": 4.3024691078758536e-07, + "loss": 0.4570828080177307, + "step": 6145 + }, + { + "epoch": 1.4171086004150335, + "grad_norm": 1.4876286685500335, + "learning_rate": 4.299336256420245e-07, + "loss": 0.398615300655365, + "step": 6146 + }, + { + "epoch": 1.4173391745446162, + "grad_norm": 1.5413174329970663, + "learning_rate": 4.2962042336138873e-07, + "loss": 0.47571802139282227, + "step": 6147 + }, + { + "epoch": 1.4175697486741987, + "grad_norm": 1.5960399575320494, + "learning_rate": 4.2930730399120487e-07, + "loss": 0.4266431927680969, + "step": 6148 + }, + { + "epoch": 1.4178003228037814, + "grad_norm": 1.5511638894349447, + "learning_rate": 4.289942675769886e-07, + "loss": 0.47870057821273804, + "step": 6149 + }, + { + "epoch": 1.4180308969333641, + "grad_norm": 1.3514029969532406, + "learning_rate": 4.2868131416424223e-07, + "loss": 0.3947669267654419, + "step": 6150 + }, + { + "epoch": 1.4182614710629466, + "grad_norm": 1.6045441623823578, + "learning_rate": 4.283684437984573e-07, + "loss": 0.49074164032936096, + "step": 6151 + }, + { + "epoch": 1.4184920451925294, + "grad_norm": 1.5267380397937564, + "learning_rate": 4.280556565251123e-07, + "loss": 0.5540445446968079, + "step": 6152 + }, + { + "epoch": 1.418722619322112, + "grad_norm": 1.4292058799019856, + "learning_rate": 4.2774295238967386e-07, + "loss": 0.4898286461830139, + "step": 6153 + }, + { + "epoch": 1.4189531934516948, + "grad_norm": 1.5872207462828773, + "learning_rate": 4.2743033143759733e-07, + "loss": 0.5432708859443665, + "step": 6154 + }, + { + "epoch": 1.4191837675812775, + "grad_norm": 1.811563729099354, + "learning_rate": 4.2711779371432445e-07, + "loss": 0.4438853859901428, + "step": 6155 + }, + { + "epoch": 1.41941434171086, + "grad_norm": 1.4197202159023756, + "learning_rate": 4.268053392652863e-07, + "loss": 0.4885905385017395, + "step": 6156 + }, + { + "epoch": 1.4196449158404427, + "grad_norm": 2.10234923243058, + "learning_rate": 4.264929681359013e-07, + "loss": 0.4465547204017639, + "step": 6157 + }, + { + "epoch": 1.4198754899700254, + "grad_norm": 1.5987256760741122, + "learning_rate": 4.2618068037157594e-07, + "loss": 0.4392780661582947, + "step": 6158 + }, + { + "epoch": 1.420106064099608, + "grad_norm": 1.7421664904589054, + "learning_rate": 4.258684760177039e-07, + "loss": 0.4501269459724426, + "step": 6159 + }, + { + "epoch": 1.4203366382291907, + "grad_norm": 1.399976858224263, + "learning_rate": 4.2555635511966783e-07, + "loss": 0.38439738750457764, + "step": 6160 + }, + { + "epoch": 1.4205672123587734, + "grad_norm": 1.4211214514262747, + "learning_rate": 4.2524431772283743e-07, + "loss": 0.4679202437400818, + "step": 6161 + }, + { + "epoch": 1.420797786488356, + "grad_norm": 1.3094843029172225, + "learning_rate": 4.2493236387257e-07, + "loss": 0.33505773544311523, + "step": 6162 + }, + { + "epoch": 1.4210283606179388, + "grad_norm": 1.7083049967506945, + "learning_rate": 4.246204936142116e-07, + "loss": 0.39141514897346497, + "step": 6163 + }, + { + "epoch": 1.4212589347475213, + "grad_norm": 1.5786326298364493, + "learning_rate": 4.243087069930958e-07, + "loss": 0.49278295040130615, + "step": 6164 + }, + { + "epoch": 1.421489508877104, + "grad_norm": 2.2314439595882214, + "learning_rate": 4.239970040545442e-07, + "loss": 0.44093143939971924, + "step": 6165 + }, + { + "epoch": 1.4217200830066867, + "grad_norm": 1.5138193694081605, + "learning_rate": 4.236853848438654e-07, + "loss": 0.3840683102607727, + "step": 6166 + }, + { + "epoch": 1.4219506571362692, + "grad_norm": 1.7654139979291832, + "learning_rate": 4.23373849406357e-07, + "loss": 0.49814748764038086, + "step": 6167 + }, + { + "epoch": 1.422181231265852, + "grad_norm": 1.672205831624779, + "learning_rate": 4.2306239778730314e-07, + "loss": 0.37481504678726196, + "step": 6168 + }, + { + "epoch": 1.4224118053954347, + "grad_norm": 1.6089555356775624, + "learning_rate": 4.227510300319772e-07, + "loss": 0.3936859965324402, + "step": 6169 + }, + { + "epoch": 1.4226423795250174, + "grad_norm": 1.6958111197730896, + "learning_rate": 4.224397461856389e-07, + "loss": 0.4448816478252411, + "step": 6170 + }, + { + "epoch": 1.4228729536546, + "grad_norm": 1.7506080980818486, + "learning_rate": 4.22128546293537e-07, + "loss": 0.5494886040687561, + "step": 6171 + }, + { + "epoch": 1.4231035277841826, + "grad_norm": 1.6093955633210433, + "learning_rate": 4.218174304009078e-07, + "loss": 0.4532161355018616, + "step": 6172 + }, + { + "epoch": 1.4233341019137653, + "grad_norm": 1.5423276922709723, + "learning_rate": 4.215063985529743e-07, + "loss": 0.4771450161933899, + "step": 6173 + }, + { + "epoch": 1.4235646760433478, + "grad_norm": 1.4359456178719159, + "learning_rate": 4.211954507949491e-07, + "loss": 0.40784329175949097, + "step": 6174 + }, + { + "epoch": 1.4237952501729305, + "grad_norm": 1.6548161498628766, + "learning_rate": 4.208845871720308e-07, + "loss": 0.5336268544197083, + "step": 6175 + }, + { + "epoch": 1.4240258243025132, + "grad_norm": 1.495644640745375, + "learning_rate": 4.205738077294072e-07, + "loss": 0.44641751050949097, + "step": 6176 + }, + { + "epoch": 1.424256398432096, + "grad_norm": 1.650188328042211, + "learning_rate": 4.2026311251225264e-07, + "loss": 0.4370793104171753, + "step": 6177 + }, + { + "epoch": 1.4244869725616787, + "grad_norm": 1.5423618719597711, + "learning_rate": 4.1995250156573046e-07, + "loss": 0.4290730953216553, + "step": 6178 + }, + { + "epoch": 1.4247175466912612, + "grad_norm": 1.8757556733756044, + "learning_rate": 4.196419749349904e-07, + "loss": 0.5021491646766663, + "step": 6179 + }, + { + "epoch": 1.4249481208208439, + "grad_norm": 1.4243786827618563, + "learning_rate": 4.193315326651711e-07, + "loss": 0.3880186080932617, + "step": 6180 + }, + { + "epoch": 1.4251786949504266, + "grad_norm": 1.6032235222838507, + "learning_rate": 4.1902117480139876e-07, + "loss": 0.46498721837997437, + "step": 6181 + }, + { + "epoch": 1.425409269080009, + "grad_norm": 1.6074916356613946, + "learning_rate": 4.187109013887863e-07, + "loss": 0.45799821615219116, + "step": 6182 + }, + { + "epoch": 1.4256398432095918, + "grad_norm": 1.7936327965955485, + "learning_rate": 4.1840071247243594e-07, + "loss": 0.47459733486175537, + "step": 6183 + }, + { + "epoch": 1.4258704173391745, + "grad_norm": 1.7628830057109544, + "learning_rate": 4.18090608097436e-07, + "loss": 0.47636276483535767, + "step": 6184 + }, + { + "epoch": 1.4261009914687572, + "grad_norm": 1.4575388433663756, + "learning_rate": 4.17780588308864e-07, + "loss": 0.4710165858268738, + "step": 6185 + }, + { + "epoch": 1.42633156559834, + "grad_norm": 1.6068491390352067, + "learning_rate": 4.174706531517836e-07, + "loss": 0.4222904443740845, + "step": 6186 + }, + { + "epoch": 1.4265621397279225, + "grad_norm": 1.6136307494472921, + "learning_rate": 4.171608026712476e-07, + "loss": 0.43496620655059814, + "step": 6187 + }, + { + "epoch": 1.4267927138575052, + "grad_norm": 1.6637888441260775, + "learning_rate": 4.1685103691229597e-07, + "loss": 0.5178344249725342, + "step": 6188 + }, + { + "epoch": 1.4270232879870879, + "grad_norm": 1.2438461713878222, + "learning_rate": 4.1654135591995644e-07, + "loss": 0.4033231735229492, + "step": 6189 + }, + { + "epoch": 1.4272538621166704, + "grad_norm": 1.6711330724791171, + "learning_rate": 4.162317597392436e-07, + "loss": 0.3368793725967407, + "step": 6190 + }, + { + "epoch": 1.427484436246253, + "grad_norm": 1.6185157962363963, + "learning_rate": 4.159222484151612e-07, + "loss": 0.44133609533309937, + "step": 6191 + }, + { + "epoch": 1.4277150103758358, + "grad_norm": 1.4778493402771002, + "learning_rate": 4.1561282199269944e-07, + "loss": 0.431888222694397, + "step": 6192 + }, + { + "epoch": 1.4279455845054185, + "grad_norm": 1.6042487363335018, + "learning_rate": 4.1530348051683615e-07, + "loss": 0.4319697618484497, + "step": 6193 + }, + { + "epoch": 1.4281761586350012, + "grad_norm": 2.1012743912812986, + "learning_rate": 4.1499422403253783e-07, + "loss": 0.5468018054962158, + "step": 6194 + }, + { + "epoch": 1.4284067327645837, + "grad_norm": 1.5851271799276925, + "learning_rate": 4.1468505258475784e-07, + "loss": 0.5083246231079102, + "step": 6195 + }, + { + "epoch": 1.4286373068941665, + "grad_norm": 1.5639019523203612, + "learning_rate": 4.1437596621843774e-07, + "loss": 0.3767821788787842, + "step": 6196 + }, + { + "epoch": 1.4288678810237492, + "grad_norm": 1.7459586887034657, + "learning_rate": 4.140669649785058e-07, + "loss": 0.5210238099098206, + "step": 6197 + }, + { + "epoch": 1.4290984551533317, + "grad_norm": 1.7429606479800976, + "learning_rate": 4.1375804890987907e-07, + "loss": 0.4498119354248047, + "step": 6198 + }, + { + "epoch": 1.4293290292829144, + "grad_norm": 1.8267093368864302, + "learning_rate": 4.134492180574609e-07, + "loss": 0.5093557238578796, + "step": 6199 + }, + { + "epoch": 1.429559603412497, + "grad_norm": 1.422406352052411, + "learning_rate": 4.131404724661438e-07, + "loss": 0.4745742082595825, + "step": 6200 + }, + { + "epoch": 1.4297901775420798, + "grad_norm": 1.506088588333767, + "learning_rate": 4.128318121808068e-07, + "loss": 0.45697301626205444, + "step": 6201 + }, + { + "epoch": 1.4300207516716625, + "grad_norm": 1.7309660786915744, + "learning_rate": 4.125232372463161e-07, + "loss": 0.4690994918346405, + "step": 6202 + }, + { + "epoch": 1.430251325801245, + "grad_norm": 1.6241026421208185, + "learning_rate": 4.1221474770752696e-07, + "loss": 0.49369046092033386, + "step": 6203 + }, + { + "epoch": 1.4304818999308277, + "grad_norm": 1.573925179309737, + "learning_rate": 4.1190634360928113e-07, + "loss": 0.5137126445770264, + "step": 6204 + }, + { + "epoch": 1.4307124740604105, + "grad_norm": 1.492371449937338, + "learning_rate": 4.1159802499640883e-07, + "loss": 0.43663549423217773, + "step": 6205 + }, + { + "epoch": 1.430943048189993, + "grad_norm": 1.373244593865611, + "learning_rate": 4.112897919137265e-07, + "loss": 0.40197718143463135, + "step": 6206 + }, + { + "epoch": 1.4311736223195757, + "grad_norm": 1.782636444844866, + "learning_rate": 4.1098164440603967e-07, + "loss": 0.5537480115890503, + "step": 6207 + }, + { + "epoch": 1.4314041964491584, + "grad_norm": 1.415124349915093, + "learning_rate": 4.1067358251814e-07, + "loss": 0.36077365279197693, + "step": 6208 + }, + { + "epoch": 1.4316347705787411, + "grad_norm": 1.8848844116732066, + "learning_rate": 4.103656062948081e-07, + "loss": 0.5421038866043091, + "step": 6209 + }, + { + "epoch": 1.4318653447083238, + "grad_norm": 1.5989095555214856, + "learning_rate": 4.100577157808107e-07, + "loss": 0.4330317974090576, + "step": 6210 + }, + { + "epoch": 1.4320959188379063, + "grad_norm": 1.5778977933757077, + "learning_rate": 4.0974991102090315e-07, + "loss": 0.4734618067741394, + "step": 6211 + }, + { + "epoch": 1.432326492967489, + "grad_norm": 1.7307541730622933, + "learning_rate": 4.0944219205982853e-07, + "loss": 0.4664125442504883, + "step": 6212 + }, + { + "epoch": 1.4325570670970718, + "grad_norm": 1.5163510968488794, + "learning_rate": 4.09134558942316e-07, + "loss": 0.5214053988456726, + "step": 6213 + }, + { + "epoch": 1.4327876412266543, + "grad_norm": 1.4446024999002893, + "learning_rate": 4.08827011713084e-07, + "loss": 0.4694370627403259, + "step": 6214 + }, + { + "epoch": 1.433018215356237, + "grad_norm": 1.4399092047479434, + "learning_rate": 4.0851955041683674e-07, + "loss": 0.46517378091812134, + "step": 6215 + }, + { + "epoch": 1.4332487894858197, + "grad_norm": 1.589744461016997, + "learning_rate": 4.0821217509826766e-07, + "loss": 0.49152523279190063, + "step": 6216 + }, + { + "epoch": 1.4334793636154024, + "grad_norm": 1.3335404796705832, + "learning_rate": 4.0790488580205616e-07, + "loss": 0.4272884726524353, + "step": 6217 + }, + { + "epoch": 1.4337099377449851, + "grad_norm": 1.7167989658225775, + "learning_rate": 4.075976825728703e-07, + "loss": 0.4585829973220825, + "step": 6218 + }, + { + "epoch": 1.4339405118745676, + "grad_norm": 1.4284884424474726, + "learning_rate": 4.07290565455365e-07, + "loss": 0.33463186025619507, + "step": 6219 + }, + { + "epoch": 1.4341710860041503, + "grad_norm": 1.618873724040505, + "learning_rate": 4.0698353449418344e-07, + "loss": 0.4228953719139099, + "step": 6220 + }, + { + "epoch": 1.434401660133733, + "grad_norm": 1.688194150248175, + "learning_rate": 4.066765897339547e-07, + "loss": 0.5336583256721497, + "step": 6221 + }, + { + "epoch": 1.4346322342633155, + "grad_norm": 1.590308662997971, + "learning_rate": 4.063697312192972e-07, + "loss": 0.4779771864414215, + "step": 6222 + }, + { + "epoch": 1.4348628083928983, + "grad_norm": 1.4786534556099964, + "learning_rate": 4.060629589948155e-07, + "loss": 0.35226666927337646, + "step": 6223 + }, + { + "epoch": 1.435093382522481, + "grad_norm": 1.7110004239307235, + "learning_rate": 4.0575627310510174e-07, + "loss": 0.5006309747695923, + "step": 6224 + }, + { + "epoch": 1.4353239566520637, + "grad_norm": 1.5102552970375984, + "learning_rate": 4.0544967359473645e-07, + "loss": 0.3925382196903229, + "step": 6225 + }, + { + "epoch": 1.4355545307816464, + "grad_norm": 1.4323897305301354, + "learning_rate": 4.0514316050828643e-07, + "loss": 0.3443659543991089, + "step": 6226 + }, + { + "epoch": 1.435785104911229, + "grad_norm": 1.3832333833383677, + "learning_rate": 4.048367338903067e-07, + "loss": 0.35585030913352966, + "step": 6227 + }, + { + "epoch": 1.4360156790408116, + "grad_norm": 1.551815991519559, + "learning_rate": 4.045303937853395e-07, + "loss": 0.4147206246852875, + "step": 6228 + }, + { + "epoch": 1.4362462531703943, + "grad_norm": 1.2817256800052734, + "learning_rate": 4.0422414023791486e-07, + "loss": 0.4475427567958832, + "step": 6229 + }, + { + "epoch": 1.4364768272999768, + "grad_norm": 1.3842198366935599, + "learning_rate": 4.0391797329254897e-07, + "loss": 0.5235386490821838, + "step": 6230 + }, + { + "epoch": 1.4367074014295595, + "grad_norm": 1.4929978689012695, + "learning_rate": 4.036118929937472e-07, + "loss": 0.3543087840080261, + "step": 6231 + }, + { + "epoch": 1.4369379755591423, + "grad_norm": 1.793735853632873, + "learning_rate": 4.03305899386001e-07, + "loss": 0.4718255400657654, + "step": 6232 + }, + { + "epoch": 1.437168549688725, + "grad_norm": 1.338180352532036, + "learning_rate": 4.0299999251378924e-07, + "loss": 0.41239792108535767, + "step": 6233 + }, + { + "epoch": 1.4373991238183077, + "grad_norm": 1.5900128771725797, + "learning_rate": 4.026941724215791e-07, + "loss": 0.4241238236427307, + "step": 6234 + }, + { + "epoch": 1.4376296979478902, + "grad_norm": 1.4625134538700348, + "learning_rate": 4.0238843915382435e-07, + "loss": 0.43678992986679077, + "step": 6235 + }, + { + "epoch": 1.437860272077473, + "grad_norm": 1.3845075397304552, + "learning_rate": 4.0208279275496706e-07, + "loss": 0.4304202198982239, + "step": 6236 + }, + { + "epoch": 1.4380908462070556, + "grad_norm": 1.4379971371115365, + "learning_rate": 4.0177723326943516e-07, + "loss": 0.4297143816947937, + "step": 6237 + }, + { + "epoch": 1.4383214203366381, + "grad_norm": 1.4713452003345164, + "learning_rate": 4.0147176074164557e-07, + "loss": 0.4823951721191406, + "step": 6238 + }, + { + "epoch": 1.4385519944662208, + "grad_norm": 1.4766475893290447, + "learning_rate": 4.0116637521600104e-07, + "loss": 0.41384291648864746, + "step": 6239 + }, + { + "epoch": 1.4387825685958036, + "grad_norm": 1.4772189735738515, + "learning_rate": 4.008610767368933e-07, + "loss": 0.5725995898246765, + "step": 6240 + }, + { + "epoch": 1.4390131427253863, + "grad_norm": 1.580155865045121, + "learning_rate": 4.0055586534869976e-07, + "loss": 0.5222553014755249, + "step": 6241 + }, + { + "epoch": 1.439243716854969, + "grad_norm": 1.3886146191032183, + "learning_rate": 4.002507410957864e-07, + "loss": 0.33871912956237793, + "step": 6242 + }, + { + "epoch": 1.4394742909845515, + "grad_norm": 1.6215524550661136, + "learning_rate": 3.9994570402250647e-07, + "loss": 0.423028826713562, + "step": 6243 + }, + { + "epoch": 1.4397048651141342, + "grad_norm": 1.5682836985778081, + "learning_rate": 3.996407541731994e-07, + "loss": 0.4235682785511017, + "step": 6244 + }, + { + "epoch": 1.439935439243717, + "grad_norm": 1.231022526448631, + "learning_rate": 3.993358915921936e-07, + "loss": 0.43758147954940796, + "step": 6245 + }, + { + "epoch": 1.4401660133732994, + "grad_norm": 1.4111669631590298, + "learning_rate": 3.9903111632380314e-07, + "loss": 0.4462485611438751, + "step": 6246 + }, + { + "epoch": 1.4403965875028821, + "grad_norm": 1.4290246546090093, + "learning_rate": 3.9872642841233086e-07, + "loss": 0.4650310277938843, + "step": 6247 + }, + { + "epoch": 1.4406271616324648, + "grad_norm": 1.4998946903017614, + "learning_rate": 3.984218279020656e-07, + "loss": 0.36653342843055725, + "step": 6248 + }, + { + "epoch": 1.4408577357620476, + "grad_norm": 1.4936296304301175, + "learning_rate": 3.9811731483728483e-07, + "loss": 0.4102433919906616, + "step": 6249 + }, + { + "epoch": 1.4410883098916303, + "grad_norm": 1.6065631349936378, + "learning_rate": 3.9781288926225187e-07, + "loss": 0.46611371636390686, + "step": 6250 + }, + { + "epoch": 1.4413188840212128, + "grad_norm": 1.4339333577964222, + "learning_rate": 3.9750855122121854e-07, + "loss": 0.39757978916168213, + "step": 6251 + }, + { + "epoch": 1.4415494581507955, + "grad_norm": 1.762654016187883, + "learning_rate": 3.972043007584236e-07, + "loss": 0.3736093044281006, + "step": 6252 + }, + { + "epoch": 1.4417800322803782, + "grad_norm": 1.463877920104907, + "learning_rate": 3.9690013791809243e-07, + "loss": 0.4907599091529846, + "step": 6253 + }, + { + "epoch": 1.4420106064099607, + "grad_norm": 1.8306810417206691, + "learning_rate": 3.965960627444387e-07, + "loss": 0.4852679967880249, + "step": 6254 + }, + { + "epoch": 1.4422411805395434, + "grad_norm": 1.379992571943406, + "learning_rate": 3.962920752816622e-07, + "loss": 0.3681846261024475, + "step": 6255 + }, + { + "epoch": 1.4424717546691261, + "grad_norm": 1.3930271555712797, + "learning_rate": 3.9598817557395136e-07, + "loss": 0.36029407382011414, + "step": 6256 + }, + { + "epoch": 1.4427023287987089, + "grad_norm": 1.5468752557100751, + "learning_rate": 3.9568436366548044e-07, + "loss": 0.4156547486782074, + "step": 6257 + }, + { + "epoch": 1.4429329029282916, + "grad_norm": 1.2893479866141693, + "learning_rate": 3.9538063960041155e-07, + "loss": 0.417999804019928, + "step": 6258 + }, + { + "epoch": 1.443163477057874, + "grad_norm": 1.5873772931626444, + "learning_rate": 3.9507700342289454e-07, + "loss": 0.34347790479660034, + "step": 6259 + }, + { + "epoch": 1.4433940511874568, + "grad_norm": 1.6747174695424258, + "learning_rate": 3.9477345517706606e-07, + "loss": 0.5093958973884583, + "step": 6260 + }, + { + "epoch": 1.4436246253170395, + "grad_norm": 1.3786087360846342, + "learning_rate": 3.9446999490704935e-07, + "loss": 0.45406264066696167, + "step": 6261 + }, + { + "epoch": 1.443855199446622, + "grad_norm": 1.4643807349818905, + "learning_rate": 3.941666226569561e-07, + "loss": 0.35074740648269653, + "step": 6262 + }, + { + "epoch": 1.4440857735762047, + "grad_norm": 1.9209061652207753, + "learning_rate": 3.9386333847088414e-07, + "loss": 0.4588093161582947, + "step": 6263 + }, + { + "epoch": 1.4443163477057874, + "grad_norm": 1.706957598822881, + "learning_rate": 3.935601423929187e-07, + "loss": 0.5431508421897888, + "step": 6264 + }, + { + "epoch": 1.4445469218353701, + "grad_norm": 2.1293944579193744, + "learning_rate": 3.9325703446713253e-07, + "loss": 0.5942284464836121, + "step": 6265 + }, + { + "epoch": 1.4447774959649529, + "grad_norm": 1.563688512589723, + "learning_rate": 3.929540147375856e-07, + "loss": 0.45533287525177, + "step": 6266 + }, + { + "epoch": 1.4450080700945354, + "grad_norm": 1.4069649860322977, + "learning_rate": 3.926510832483252e-07, + "loss": 0.41154634952545166, + "step": 6267 + }, + { + "epoch": 1.445238644224118, + "grad_norm": 1.7442081379649044, + "learning_rate": 3.923482400433847e-07, + "loss": 0.548882246017456, + "step": 6268 + }, + { + "epoch": 1.4454692183537008, + "grad_norm": 1.6064445647457797, + "learning_rate": 3.9204548516678635e-07, + "loss": 0.4062466621398926, + "step": 6269 + }, + { + "epoch": 1.4456997924832833, + "grad_norm": 1.4970160030578672, + "learning_rate": 3.917428186625378e-07, + "loss": 0.39035165309906006, + "step": 6270 + }, + { + "epoch": 1.445930366612866, + "grad_norm": 1.647666751716306, + "learning_rate": 3.9144024057463545e-07, + "loss": 0.44899889826774597, + "step": 6271 + }, + { + "epoch": 1.4461609407424487, + "grad_norm": 1.6865824844286113, + "learning_rate": 3.911377509470616e-07, + "loss": 0.5676968097686768, + "step": 6272 + }, + { + "epoch": 1.4463915148720314, + "grad_norm": 1.5001442753287921, + "learning_rate": 3.9083534982378596e-07, + "loss": 0.5157150626182556, + "step": 6273 + }, + { + "epoch": 1.4466220890016142, + "grad_norm": 1.3999116109701921, + "learning_rate": 3.9053303724876595e-07, + "loss": 0.4405839443206787, + "step": 6274 + }, + { + "epoch": 1.4468526631311966, + "grad_norm": 1.4027072316284976, + "learning_rate": 3.9023081326594564e-07, + "loss": 0.4184240400791168, + "step": 6275 + }, + { + "epoch": 1.4470832372607794, + "grad_norm": 1.4676581347164595, + "learning_rate": 3.8992867791925687e-07, + "loss": 0.46825113892555237, + "step": 6276 + }, + { + "epoch": 1.447313811390362, + "grad_norm": 1.5974669468558875, + "learning_rate": 3.896266312526174e-07, + "loss": 0.39870697259902954, + "step": 6277 + }, + { + "epoch": 1.4475443855199446, + "grad_norm": 1.5056097224989398, + "learning_rate": 3.893246733099332e-07, + "loss": 0.5021681785583496, + "step": 6278 + }, + { + "epoch": 1.4477749596495273, + "grad_norm": 1.6448123845050522, + "learning_rate": 3.890228041350966e-07, + "loss": 0.5453378558158875, + "step": 6279 + }, + { + "epoch": 1.44800553377911, + "grad_norm": 1.6411917622938994, + "learning_rate": 3.887210237719877e-07, + "loss": 0.4488704800605774, + "step": 6280 + }, + { + "epoch": 1.4482361079086927, + "grad_norm": 1.5018657352386517, + "learning_rate": 3.8841933226447274e-07, + "loss": 0.45669007301330566, + "step": 6281 + }, + { + "epoch": 1.4484666820382754, + "grad_norm": 1.704954137797073, + "learning_rate": 3.881177296564061e-07, + "loss": 0.43954944610595703, + "step": 6282 + }, + { + "epoch": 1.448697256167858, + "grad_norm": 1.3077525799414271, + "learning_rate": 3.8781621599162896e-07, + "loss": 0.39490729570388794, + "step": 6283 + }, + { + "epoch": 1.4489278302974407, + "grad_norm": 1.8875404119821422, + "learning_rate": 3.875147913139688e-07, + "loss": 0.44206392765045166, + "step": 6284 + }, + { + "epoch": 1.4491584044270232, + "grad_norm": 1.5003627073617865, + "learning_rate": 3.872134556672415e-07, + "loss": 0.3874932527542114, + "step": 6285 + }, + { + "epoch": 1.4493889785566059, + "grad_norm": 1.616983828039009, + "learning_rate": 3.8691220909524847e-07, + "loss": 0.4762042760848999, + "step": 6286 + }, + { + "epoch": 1.4496195526861886, + "grad_norm": 1.4983771405139852, + "learning_rate": 3.8661105164177955e-07, + "loss": 0.45220378041267395, + "step": 6287 + }, + { + "epoch": 1.4498501268157713, + "grad_norm": 1.5182044259213916, + "learning_rate": 3.863099833506105e-07, + "loss": 0.48711973428726196, + "step": 6288 + }, + { + "epoch": 1.450080700945354, + "grad_norm": 1.795485740865634, + "learning_rate": 3.8600900426550495e-07, + "loss": 0.3985457420349121, + "step": 6289 + }, + { + "epoch": 1.4503112750749365, + "grad_norm": 1.8111920220274738, + "learning_rate": 3.8570811443021324e-07, + "loss": 0.4626576006412506, + "step": 6290 + }, + { + "epoch": 1.4505418492045192, + "grad_norm": 1.3056530217454654, + "learning_rate": 3.8540731388847303e-07, + "loss": 0.49909156560897827, + "step": 6291 + }, + { + "epoch": 1.450772423334102, + "grad_norm": 1.6088418800938844, + "learning_rate": 3.8510660268400853e-07, + "loss": 0.47779160737991333, + "step": 6292 + }, + { + "epoch": 1.4510029974636844, + "grad_norm": 1.7546373602134575, + "learning_rate": 3.8480598086053073e-07, + "loss": 0.41273951530456543, + "step": 6293 + }, + { + "epoch": 1.4512335715932672, + "grad_norm": 1.372334717947673, + "learning_rate": 3.8450544846173873e-07, + "loss": 0.49659836292266846, + "step": 6294 + }, + { + "epoch": 1.4514641457228499, + "grad_norm": 1.5745738888755318, + "learning_rate": 3.842050055313174e-07, + "loss": 0.48864418268203735, + "step": 6295 + }, + { + "epoch": 1.4516947198524326, + "grad_norm": 1.5511685453466029, + "learning_rate": 3.8390465211293964e-07, + "loss": 0.4437263011932373, + "step": 6296 + }, + { + "epoch": 1.4519252939820153, + "grad_norm": 1.425822828962689, + "learning_rate": 3.83604388250264e-07, + "loss": 0.4785847067832947, + "step": 6297 + }, + { + "epoch": 1.4521558681115978, + "grad_norm": 1.4667204310824673, + "learning_rate": 3.8330421398693815e-07, + "loss": 0.4376726746559143, + "step": 6298 + }, + { + "epoch": 1.4523864422411805, + "grad_norm": 1.3570227959381094, + "learning_rate": 3.8300412936659456e-07, + "loss": 0.39121049642562866, + "step": 6299 + }, + { + "epoch": 1.4526170163707632, + "grad_norm": 1.3658035995507571, + "learning_rate": 3.827041344328541e-07, + "loss": 0.4635738730430603, + "step": 6300 + }, + { + "epoch": 1.4528475905003457, + "grad_norm": 2.0304852722065068, + "learning_rate": 3.8240422922932345e-07, + "loss": 0.502306342124939, + "step": 6301 + }, + { + "epoch": 1.4530781646299284, + "grad_norm": 1.4029845821737765, + "learning_rate": 3.8210441379959765e-07, + "loss": 0.4401247799396515, + "step": 6302 + }, + { + "epoch": 1.4533087387595112, + "grad_norm": 1.3861824238158087, + "learning_rate": 3.8180468818725744e-07, + "loss": 0.5291532874107361, + "step": 6303 + }, + { + "epoch": 1.4535393128890939, + "grad_norm": 1.6276608547131342, + "learning_rate": 3.8150505243587074e-07, + "loss": 0.44658181071281433, + "step": 6304 + }, + { + "epoch": 1.4537698870186766, + "grad_norm": 1.6458326531407963, + "learning_rate": 3.8120550658899284e-07, + "loss": 0.45127803087234497, + "step": 6305 + }, + { + "epoch": 1.454000461148259, + "grad_norm": 1.492007208083286, + "learning_rate": 3.809060506901659e-07, + "loss": 0.42187097668647766, + "step": 6306 + }, + { + "epoch": 1.4542310352778418, + "grad_norm": 1.5038936507089915, + "learning_rate": 3.806066847829191e-07, + "loss": 0.3573130667209625, + "step": 6307 + }, + { + "epoch": 1.4544616094074245, + "grad_norm": 1.9148379623538745, + "learning_rate": 3.8030740891076775e-07, + "loss": 0.4350733757019043, + "step": 6308 + }, + { + "epoch": 1.454692183537007, + "grad_norm": 1.541900067739278, + "learning_rate": 3.8000822311721526e-07, + "loss": 0.48514148592948914, + "step": 6309 + }, + { + "epoch": 1.4549227576665897, + "grad_norm": 1.4827947959124368, + "learning_rate": 3.797091274457507e-07, + "loss": 0.41036373376846313, + "step": 6310 + }, + { + "epoch": 1.4551533317961725, + "grad_norm": 1.494922453363639, + "learning_rate": 3.7941012193985113e-07, + "loss": 0.4141424298286438, + "step": 6311 + }, + { + "epoch": 1.4553839059257552, + "grad_norm": 1.273366480801725, + "learning_rate": 3.7911120664297947e-07, + "loss": 0.4465962052345276, + "step": 6312 + }, + { + "epoch": 1.455614480055338, + "grad_norm": 1.5781844793110138, + "learning_rate": 3.7881238159858653e-07, + "loss": 0.42370718717575073, + "step": 6313 + }, + { + "epoch": 1.4558450541849204, + "grad_norm": 1.5971127849956464, + "learning_rate": 3.785136468501098e-07, + "loss": 0.5199419260025024, + "step": 6314 + }, + { + "epoch": 1.456075628314503, + "grad_norm": 1.617344004292436, + "learning_rate": 3.782150024409727e-07, + "loss": 0.4802842140197754, + "step": 6315 + }, + { + "epoch": 1.4563062024440858, + "grad_norm": 1.24431475405318, + "learning_rate": 3.77916448414587e-07, + "loss": 0.4640405476093292, + "step": 6316 + }, + { + "epoch": 1.4565367765736683, + "grad_norm": 1.4636172678889559, + "learning_rate": 3.776179848143497e-07, + "loss": 0.4338728189468384, + "step": 6317 + }, + { + "epoch": 1.456767350703251, + "grad_norm": 2.139264242241595, + "learning_rate": 3.7731961168364644e-07, + "loss": 0.42709267139434814, + "step": 6318 + }, + { + "epoch": 1.4569979248328337, + "grad_norm": 1.6617712318798017, + "learning_rate": 3.7702132906584784e-07, + "loss": 0.4985729455947876, + "step": 6319 + }, + { + "epoch": 1.4572284989624165, + "grad_norm": 1.441274937368423, + "learning_rate": 3.7672313700431277e-07, + "loss": 0.46335911750793457, + "step": 6320 + }, + { + "epoch": 1.4574590730919992, + "grad_norm": 1.416712646344965, + "learning_rate": 3.7642503554238657e-07, + "loss": 0.39897364377975464, + "step": 6321 + }, + { + "epoch": 1.4576896472215817, + "grad_norm": 1.7524170106258121, + "learning_rate": 3.761270247234014e-07, + "loss": 0.4338347017765045, + "step": 6322 + }, + { + "epoch": 1.4579202213511644, + "grad_norm": 1.5421394568485456, + "learning_rate": 3.7582910459067607e-07, + "loss": 0.4619752764701843, + "step": 6323 + }, + { + "epoch": 1.458150795480747, + "grad_norm": 1.6592584693059589, + "learning_rate": 3.7553127518751583e-07, + "loss": 0.4676104784011841, + "step": 6324 + }, + { + "epoch": 1.4583813696103296, + "grad_norm": 1.495504668484879, + "learning_rate": 3.752335365572138e-07, + "loss": 0.37536361813545227, + "step": 6325 + }, + { + "epoch": 1.4586119437399123, + "grad_norm": 1.5747560176376743, + "learning_rate": 3.749358887430487e-07, + "loss": 0.4389209449291229, + "step": 6326 + }, + { + "epoch": 1.458842517869495, + "grad_norm": 1.561809426616513, + "learning_rate": 3.746383317882874e-07, + "loss": 0.44722115993499756, + "step": 6327 + }, + { + "epoch": 1.4590730919990778, + "grad_norm": 1.8177515516918266, + "learning_rate": 3.743408657361821e-07, + "loss": 0.39179277420043945, + "step": 6328 + }, + { + "epoch": 1.4593036661286605, + "grad_norm": 1.5511886302037754, + "learning_rate": 3.7404349062997275e-07, + "loss": 0.4704967737197876, + "step": 6329 + }, + { + "epoch": 1.459534240258243, + "grad_norm": 1.4679557991806869, + "learning_rate": 3.737462065128859e-07, + "loss": 0.4294360876083374, + "step": 6330 + }, + { + "epoch": 1.4597648143878257, + "grad_norm": 1.5082268745032619, + "learning_rate": 3.734490134281353e-07, + "loss": 0.5070170760154724, + "step": 6331 + }, + { + "epoch": 1.4599953885174084, + "grad_norm": 1.4285887900302483, + "learning_rate": 3.7315191141892013e-07, + "loss": 0.3670409023761749, + "step": 6332 + }, + { + "epoch": 1.460225962646991, + "grad_norm": 1.4866250279072872, + "learning_rate": 3.7285490052842785e-07, + "loss": 0.5043025016784668, + "step": 6333 + }, + { + "epoch": 1.4604565367765736, + "grad_norm": 1.5557807366245089, + "learning_rate": 3.725579807998316e-07, + "loss": 0.43942689895629883, + "step": 6334 + }, + { + "epoch": 1.4606871109061563, + "grad_norm": 1.61242194971354, + "learning_rate": 3.7226115227629164e-07, + "loss": 0.3444882035255432, + "step": 6335 + }, + { + "epoch": 1.460917685035739, + "grad_norm": 1.4093154726677697, + "learning_rate": 3.71964415000955e-07, + "loss": 0.3994483947753906, + "step": 6336 + }, + { + "epoch": 1.4611482591653218, + "grad_norm": 1.799524270186483, + "learning_rate": 3.7166776901695564e-07, + "loss": 0.3581928014755249, + "step": 6337 + }, + { + "epoch": 1.4613788332949043, + "grad_norm": 1.4094806965107296, + "learning_rate": 3.7137121436741423e-07, + "loss": 0.4068276286125183, + "step": 6338 + }, + { + "epoch": 1.461609407424487, + "grad_norm": 1.5430920931361498, + "learning_rate": 3.710747510954376e-07, + "loss": 0.4140080213546753, + "step": 6339 + }, + { + "epoch": 1.4618399815540697, + "grad_norm": 1.5667918006300834, + "learning_rate": 3.707783792441201e-07, + "loss": 0.4328460097312927, + "step": 6340 + }, + { + "epoch": 1.4620705556836522, + "grad_norm": 1.7344820768552758, + "learning_rate": 3.704820988565419e-07, + "loss": 0.49252209067344666, + "step": 6341 + }, + { + "epoch": 1.462301129813235, + "grad_norm": 1.4564646974830249, + "learning_rate": 3.7018590997577093e-07, + "loss": 0.43051671981811523, + "step": 6342 + }, + { + "epoch": 1.4625317039428176, + "grad_norm": 1.5901870751351228, + "learning_rate": 3.698898126448605e-07, + "loss": 0.5131059288978577, + "step": 6343 + }, + { + "epoch": 1.4627622780724003, + "grad_norm": 2.025312431684147, + "learning_rate": 3.6959380690685185e-07, + "loss": 0.4633597731590271, + "step": 6344 + }, + { + "epoch": 1.462992852201983, + "grad_norm": 1.5138095102076332, + "learning_rate": 3.6929789280477265e-07, + "loss": 0.3603428602218628, + "step": 6345 + }, + { + "epoch": 1.4632234263315655, + "grad_norm": 1.4981993836978438, + "learning_rate": 3.6900207038163633e-07, + "loss": 0.5337490439414978, + "step": 6346 + }, + { + "epoch": 1.4634540004611483, + "grad_norm": 1.8305905685338713, + "learning_rate": 3.687063396804444e-07, + "loss": 0.4940665066242218, + "step": 6347 + }, + { + "epoch": 1.463684574590731, + "grad_norm": 2.012256207996667, + "learning_rate": 3.6841070074418367e-07, + "loss": 0.45664387941360474, + "step": 6348 + }, + { + "epoch": 1.4639151487203135, + "grad_norm": 1.6965611532451377, + "learning_rate": 3.681151536158289e-07, + "loss": 0.4546254277229309, + "step": 6349 + }, + { + "epoch": 1.4641457228498962, + "grad_norm": 1.4760234786987596, + "learning_rate": 3.6781969833834015e-07, + "loss": 0.37474149465560913, + "step": 6350 + }, + { + "epoch": 1.464376296979479, + "grad_norm": 1.473821341410815, + "learning_rate": 3.675243349546655e-07, + "loss": 0.38016337156295776, + "step": 6351 + }, + { + "epoch": 1.4646068711090616, + "grad_norm": 1.3725937182091388, + "learning_rate": 3.672290635077384e-07, + "loss": 0.46079233288764954, + "step": 6352 + }, + { + "epoch": 1.4648374452386443, + "grad_norm": 1.754716547965532, + "learning_rate": 3.669338840404799e-07, + "loss": 0.39382117986679077, + "step": 6353 + }, + { + "epoch": 1.4650680193682268, + "grad_norm": 1.5018040161914972, + "learning_rate": 3.6663879659579766e-07, + "loss": 0.4502074718475342, + "step": 6354 + }, + { + "epoch": 1.4652985934978096, + "grad_norm": 1.4446726503170868, + "learning_rate": 3.663438012165848e-07, + "loss": 0.38199833035469055, + "step": 6355 + }, + { + "epoch": 1.4655291676273923, + "grad_norm": 1.4760781012903512, + "learning_rate": 3.660488979457228e-07, + "loss": 0.4340086579322815, + "step": 6356 + }, + { + "epoch": 1.4657597417569748, + "grad_norm": 1.7005769563076596, + "learning_rate": 3.65754086826078e-07, + "loss": 0.5425105094909668, + "step": 6357 + }, + { + "epoch": 1.4659903158865575, + "grad_norm": 1.4480393161895644, + "learning_rate": 3.654593679005048e-07, + "loss": 0.4671604633331299, + "step": 6358 + }, + { + "epoch": 1.4662208900161402, + "grad_norm": 1.6404775976624013, + "learning_rate": 3.6516474121184317e-07, + "loss": 0.4608290195465088, + "step": 6359 + }, + { + "epoch": 1.466451464145723, + "grad_norm": 1.9415349791307541, + "learning_rate": 3.6487020680292023e-07, + "loss": 0.5272650122642517, + "step": 6360 + }, + { + "epoch": 1.4666820382753056, + "grad_norm": 1.4115666654764834, + "learning_rate": 3.645757647165495e-07, + "loss": 0.40990152955055237, + "step": 6361 + }, + { + "epoch": 1.4669126124048881, + "grad_norm": 1.405277693008717, + "learning_rate": 3.6428141499553166e-07, + "loss": 0.4723639488220215, + "step": 6362 + }, + { + "epoch": 1.4671431865344708, + "grad_norm": 1.7789473556982454, + "learning_rate": 3.639871576826529e-07, + "loss": 0.5115963220596313, + "step": 6363 + }, + { + "epoch": 1.4673737606640536, + "grad_norm": 1.669989973617769, + "learning_rate": 3.636929928206862e-07, + "loss": 0.44548431038856506, + "step": 6364 + }, + { + "epoch": 1.467604334793636, + "grad_norm": 1.5904330694852653, + "learning_rate": 3.633989204523922e-07, + "loss": 0.48599356412887573, + "step": 6365 + }, + { + "epoch": 1.4678349089232188, + "grad_norm": 1.4664661517676485, + "learning_rate": 3.631049406205164e-07, + "loss": 0.463236004114151, + "step": 6366 + }, + { + "epoch": 1.4680654830528015, + "grad_norm": 1.7238002544119735, + "learning_rate": 3.6281105336779225e-07, + "loss": 0.4840255379676819, + "step": 6367 + }, + { + "epoch": 1.4682960571823842, + "grad_norm": 1.5727046676978498, + "learning_rate": 3.6251725873693926e-07, + "loss": 0.39191675186157227, + "step": 6368 + }, + { + "epoch": 1.468526631311967, + "grad_norm": 1.4333992251496341, + "learning_rate": 3.622235567706637e-07, + "loss": 0.5161769986152649, + "step": 6369 + }, + { + "epoch": 1.4687572054415494, + "grad_norm": 1.811820117175508, + "learning_rate": 3.6192994751165764e-07, + "loss": 0.4579160213470459, + "step": 6370 + }, + { + "epoch": 1.4689877795711321, + "grad_norm": 1.5348364339019953, + "learning_rate": 3.616364310026006e-07, + "loss": 0.4254727363586426, + "step": 6371 + }, + { + "epoch": 1.4692183537007149, + "grad_norm": 1.60846510703603, + "learning_rate": 3.613430072861575e-07, + "loss": 0.3614911139011383, + "step": 6372 + }, + { + "epoch": 1.4694489278302973, + "grad_norm": 1.332197813540827, + "learning_rate": 3.610496764049814e-07, + "loss": 0.4501386284828186, + "step": 6373 + }, + { + "epoch": 1.46967950195988, + "grad_norm": 1.4207205401720155, + "learning_rate": 3.607564384017102e-07, + "loss": 0.4988802671432495, + "step": 6374 + }, + { + "epoch": 1.4699100760894628, + "grad_norm": 1.5751788296655767, + "learning_rate": 3.6046329331896907e-07, + "loss": 0.4277713894844055, + "step": 6375 + }, + { + "epoch": 1.4701406502190455, + "grad_norm": 1.5414838298104503, + "learning_rate": 3.601702411993697e-07, + "loss": 0.5007919073104858, + "step": 6376 + }, + { + "epoch": 1.4703712243486282, + "grad_norm": 1.5705777345927519, + "learning_rate": 3.5987728208551015e-07, + "loss": 0.4857282042503357, + "step": 6377 + }, + { + "epoch": 1.4706017984782107, + "grad_norm": 1.3913774043642957, + "learning_rate": 3.595844160199756e-07, + "loss": 0.45752188563346863, + "step": 6378 + }, + { + "epoch": 1.4708323726077934, + "grad_norm": 1.3374827793978188, + "learning_rate": 3.592916430453361e-07, + "loss": 0.4364059269428253, + "step": 6379 + }, + { + "epoch": 1.4710629467373761, + "grad_norm": 1.4896729369612345, + "learning_rate": 3.589989632041501e-07, + "loss": 0.48765695095062256, + "step": 6380 + }, + { + "epoch": 1.4712935208669586, + "grad_norm": 1.8321401665511103, + "learning_rate": 3.5870637653896087e-07, + "loss": 0.5505347847938538, + "step": 6381 + }, + { + "epoch": 1.4715240949965414, + "grad_norm": 1.5940287914496154, + "learning_rate": 3.584138830922994e-07, + "loss": 0.4468069076538086, + "step": 6382 + }, + { + "epoch": 1.471754669126124, + "grad_norm": 1.2639532856264213, + "learning_rate": 3.5812148290668186e-07, + "loss": 0.4050968289375305, + "step": 6383 + }, + { + "epoch": 1.4719852432557068, + "grad_norm": 1.6709771008348266, + "learning_rate": 3.578291760246122e-07, + "loss": 0.47324883937835693, + "step": 6384 + }, + { + "epoch": 1.4722158173852895, + "grad_norm": 1.646291535207369, + "learning_rate": 3.5753696248858025e-07, + "loss": 0.4431450366973877, + "step": 6385 + }, + { + "epoch": 1.472446391514872, + "grad_norm": 1.3398593447687968, + "learning_rate": 3.5724484234106166e-07, + "loss": 0.4599822163581848, + "step": 6386 + }, + { + "epoch": 1.4726769656444547, + "grad_norm": 1.6764694987177748, + "learning_rate": 3.5695281562451964e-07, + "loss": 0.3655046224594116, + "step": 6387 + }, + { + "epoch": 1.4729075397740374, + "grad_norm": 1.925765064850511, + "learning_rate": 3.5666088238140267e-07, + "loss": 0.4543811082839966, + "step": 6388 + }, + { + "epoch": 1.47313811390362, + "grad_norm": 1.7682119668466059, + "learning_rate": 3.563690426541469e-07, + "loss": 0.45380568504333496, + "step": 6389 + }, + { + "epoch": 1.4733686880332026, + "grad_norm": 1.3928278789748259, + "learning_rate": 3.5607729648517336e-07, + "loss": 0.3640294373035431, + "step": 6390 + }, + { + "epoch": 1.4735992621627854, + "grad_norm": 1.4826659174775283, + "learning_rate": 3.557856439168907e-07, + "loss": 0.39890235662460327, + "step": 6391 + }, + { + "epoch": 1.473829836292368, + "grad_norm": 1.7657939773449876, + "learning_rate": 3.5549408499169374e-07, + "loss": 0.47551727294921875, + "step": 6392 + }, + { + "epoch": 1.4740604104219508, + "grad_norm": 1.5946717850777934, + "learning_rate": 3.5520261975196364e-07, + "loss": 0.43851834535598755, + "step": 6393 + }, + { + "epoch": 1.4742909845515333, + "grad_norm": 1.7160257871535318, + "learning_rate": 3.549112482400676e-07, + "loss": 0.45289307832717896, + "step": 6394 + }, + { + "epoch": 1.474521558681116, + "grad_norm": 1.660677297447299, + "learning_rate": 3.546199704983591e-07, + "loss": 0.5229180455207825, + "step": 6395 + }, + { + "epoch": 1.4747521328106985, + "grad_norm": 1.5089259577077747, + "learning_rate": 3.5432878656917884e-07, + "loss": 0.47332310676574707, + "step": 6396 + }, + { + "epoch": 1.4749827069402812, + "grad_norm": 1.402371205517633, + "learning_rate": 3.540376964948529e-07, + "loss": 0.4079092741012573, + "step": 6397 + }, + { + "epoch": 1.475213281069864, + "grad_norm": 1.607654850710184, + "learning_rate": 3.5374670031769484e-07, + "loss": 0.43366020917892456, + "step": 6398 + }, + { + "epoch": 1.4754438551994467, + "grad_norm": 1.6067458113996615, + "learning_rate": 3.5345579808000294e-07, + "loss": 0.45040106773376465, + "step": 6399 + }, + { + "epoch": 1.4756744293290294, + "grad_norm": 1.584960802510298, + "learning_rate": 3.531649898240634e-07, + "loss": 0.4409756064414978, + "step": 6400 + }, + { + "epoch": 1.4759050034586119, + "grad_norm": 1.5204759785794038, + "learning_rate": 3.528742755921481e-07, + "loss": 0.4141521751880646, + "step": 6401 + }, + { + "epoch": 1.4761355775881946, + "grad_norm": 1.6363482264143396, + "learning_rate": 3.525836554265156e-07, + "loss": 0.4697296619415283, + "step": 6402 + }, + { + "epoch": 1.4763661517177773, + "grad_norm": 1.3771953803345143, + "learning_rate": 3.5229312936941013e-07, + "loss": 0.4369434714317322, + "step": 6403 + }, + { + "epoch": 1.4765967258473598, + "grad_norm": 1.3415133870830294, + "learning_rate": 3.5200269746306224e-07, + "loss": 0.4197359085083008, + "step": 6404 + }, + { + "epoch": 1.4768272999769425, + "grad_norm": 1.8249279231813902, + "learning_rate": 3.5171235974968996e-07, + "loss": 0.495933473110199, + "step": 6405 + }, + { + "epoch": 1.4770578741065252, + "grad_norm": 1.3638396377453934, + "learning_rate": 3.51422116271496e-07, + "loss": 0.4177231192588806, + "step": 6406 + }, + { + "epoch": 1.477288448236108, + "grad_norm": 1.5336568107147823, + "learning_rate": 3.511319670706705e-07, + "loss": 0.5366500020027161, + "step": 6407 + }, + { + "epoch": 1.4775190223656907, + "grad_norm": 1.5479295323166011, + "learning_rate": 3.508419121893897e-07, + "loss": 0.3900446891784668, + "step": 6408 + }, + { + "epoch": 1.4777495964952732, + "grad_norm": 1.8223854522009124, + "learning_rate": 3.5055195166981646e-07, + "loss": 0.40877431631088257, + "step": 6409 + }, + { + "epoch": 1.4779801706248559, + "grad_norm": 1.3594177124317366, + "learning_rate": 3.502620855540985e-07, + "loss": 0.4381163716316223, + "step": 6410 + }, + { + "epoch": 1.4782107447544386, + "grad_norm": 1.2256800281998605, + "learning_rate": 3.4997231388437167e-07, + "loss": 0.3449817895889282, + "step": 6411 + }, + { + "epoch": 1.478441318884021, + "grad_norm": 1.4879818959728963, + "learning_rate": 3.4968263670275653e-07, + "loss": 0.4879523515701294, + "step": 6412 + }, + { + "epoch": 1.4786718930136038, + "grad_norm": 1.5651020351069762, + "learning_rate": 3.493930540513613e-07, + "loss": 0.3781365156173706, + "step": 6413 + }, + { + "epoch": 1.4789024671431865, + "grad_norm": 1.6645622352676888, + "learning_rate": 3.49103565972279e-07, + "loss": 0.4505656361579895, + "step": 6414 + }, + { + "epoch": 1.4791330412727692, + "grad_norm": 1.4565716791756764, + "learning_rate": 3.4881417250759006e-07, + "loss": 0.4285612106323242, + "step": 6415 + }, + { + "epoch": 1.479363615402352, + "grad_norm": 1.5357416036601346, + "learning_rate": 3.48524873699361e-07, + "loss": 0.5285177826881409, + "step": 6416 + }, + { + "epoch": 1.4795941895319344, + "grad_norm": 1.6484784065232339, + "learning_rate": 3.482356695896437e-07, + "loss": 0.4504782259464264, + "step": 6417 + }, + { + "epoch": 1.4798247636615172, + "grad_norm": 1.5658620514352724, + "learning_rate": 3.4794656022047765e-07, + "loss": 0.45295125246047974, + "step": 6418 + }, + { + "epoch": 1.4800553377910999, + "grad_norm": 1.3627022105594853, + "learning_rate": 3.47657545633887e-07, + "loss": 0.35889285802841187, + "step": 6419 + }, + { + "epoch": 1.4802859119206824, + "grad_norm": 1.5560865897069756, + "learning_rate": 3.4736862587188384e-07, + "loss": 0.49129703640937805, + "step": 6420 + }, + { + "epoch": 1.480516486050265, + "grad_norm": 1.6626930717329957, + "learning_rate": 3.4707980097646474e-07, + "loss": 0.5018036365509033, + "step": 6421 + }, + { + "epoch": 1.4807470601798478, + "grad_norm": 1.6557207215915222, + "learning_rate": 3.46791070989614e-07, + "loss": 0.48743095993995667, + "step": 6422 + }, + { + "epoch": 1.4809776343094305, + "grad_norm": 1.5043027194300391, + "learning_rate": 3.46502435953301e-07, + "loss": 0.4876127243041992, + "step": 6423 + }, + { + "epoch": 1.4812082084390132, + "grad_norm": 1.971149486413709, + "learning_rate": 3.462138959094818e-07, + "loss": 0.517420768737793, + "step": 6424 + }, + { + "epoch": 1.4814387825685957, + "grad_norm": 1.8274785313456325, + "learning_rate": 3.4592545090009907e-07, + "loss": 0.49587076902389526, + "step": 6425 + }, + { + "epoch": 1.4816693566981785, + "grad_norm": 1.5362037346917286, + "learning_rate": 3.4563710096708063e-07, + "loss": 0.43007123470306396, + "step": 6426 + }, + { + "epoch": 1.4818999308277612, + "grad_norm": 1.358212427456112, + "learning_rate": 3.4534884615234163e-07, + "loss": 0.41231095790863037, + "step": 6427 + }, + { + "epoch": 1.4821305049573437, + "grad_norm": 1.6451517308598724, + "learning_rate": 3.450606864977822e-07, + "loss": 0.4454977512359619, + "step": 6428 + }, + { + "epoch": 1.4823610790869264, + "grad_norm": 1.3739971676037328, + "learning_rate": 3.447726220452899e-07, + "loss": 0.4432292878627777, + "step": 6429 + }, + { + "epoch": 1.482591653216509, + "grad_norm": 1.6222705799101154, + "learning_rate": 3.444846528367372e-07, + "loss": 0.47547852993011475, + "step": 6430 + }, + { + "epoch": 1.4828222273460918, + "grad_norm": 1.522255385470065, + "learning_rate": 3.441967789139837e-07, + "loss": 0.45712774991989136, + "step": 6431 + }, + { + "epoch": 1.4830528014756745, + "grad_norm": 2.2700209255759107, + "learning_rate": 3.439090003188748e-07, + "loss": 0.4485551714897156, + "step": 6432 + }, + { + "epoch": 1.483283375605257, + "grad_norm": 1.4019614855782472, + "learning_rate": 3.4362131709324225e-07, + "loss": 0.5157139301300049, + "step": 6433 + }, + { + "epoch": 1.4835139497348397, + "grad_norm": 1.6970431173839349, + "learning_rate": 3.4333372927890346e-07, + "loss": 0.3786337375640869, + "step": 6434 + }, + { + "epoch": 1.4837445238644225, + "grad_norm": 1.430215191007922, + "learning_rate": 3.430462369176619e-07, + "loss": 0.444644033908844, + "step": 6435 + }, + { + "epoch": 1.483975097994005, + "grad_norm": 1.5213084700296855, + "learning_rate": 3.427588400513082e-07, + "loss": 0.450777530670166, + "step": 6436 + }, + { + "epoch": 1.4842056721235877, + "grad_norm": 1.6553650689166306, + "learning_rate": 3.424715387216176e-07, + "loss": 0.4547499418258667, + "step": 6437 + }, + { + "epoch": 1.4844362462531704, + "grad_norm": 1.3603667716838959, + "learning_rate": 3.4218433297035274e-07, + "loss": 0.41394394636154175, + "step": 6438 + }, + { + "epoch": 1.484666820382753, + "grad_norm": 1.3921623882761025, + "learning_rate": 3.4189722283926194e-07, + "loss": 0.46392822265625, + "step": 6439 + }, + { + "epoch": 1.4848973945123358, + "grad_norm": 1.3499969732544597, + "learning_rate": 3.416102083700797e-07, + "loss": 0.443311870098114, + "step": 6440 + }, + { + "epoch": 1.4851279686419183, + "grad_norm": 1.3830140570978715, + "learning_rate": 3.4132328960452594e-07, + "loss": 0.49744826555252075, + "step": 6441 + }, + { + "epoch": 1.485358542771501, + "grad_norm": 1.5191431970911358, + "learning_rate": 3.4103646658430787e-07, + "loss": 0.3906005620956421, + "step": 6442 + }, + { + "epoch": 1.4855891169010838, + "grad_norm": 1.3526583076340324, + "learning_rate": 3.407497393511175e-07, + "loss": 0.4236280918121338, + "step": 6443 + }, + { + "epoch": 1.4858196910306662, + "grad_norm": 1.6787824686307624, + "learning_rate": 3.4046310794663403e-07, + "loss": 0.5457645654678345, + "step": 6444 + }, + { + "epoch": 1.486050265160249, + "grad_norm": 1.7325001007084588, + "learning_rate": 3.4017657241252217e-07, + "loss": 0.541573703289032, + "step": 6445 + }, + { + "epoch": 1.4862808392898317, + "grad_norm": 1.9081537369674455, + "learning_rate": 3.398901327904322e-07, + "loss": 0.496945858001709, + "step": 6446 + }, + { + "epoch": 1.4865114134194144, + "grad_norm": 1.5413856714091914, + "learning_rate": 3.3960378912200136e-07, + "loss": 0.46119701862335205, + "step": 6447 + }, + { + "epoch": 1.4867419875489971, + "grad_norm": 1.8976464043536114, + "learning_rate": 3.3931754144885284e-07, + "loss": 0.5169441103935242, + "step": 6448 + }, + { + "epoch": 1.4869725616785796, + "grad_norm": 1.7130869588848308, + "learning_rate": 3.390313898125957e-07, + "loss": 0.525173544883728, + "step": 6449 + }, + { + "epoch": 1.4872031358081623, + "grad_norm": 1.6684348208587065, + "learning_rate": 3.3874533425482457e-07, + "loss": 0.46877139806747437, + "step": 6450 + }, + { + "epoch": 1.487433709937745, + "grad_norm": 1.6810644095850389, + "learning_rate": 3.3845937481712096e-07, + "loss": 0.49436479806900024, + "step": 6451 + }, + { + "epoch": 1.4876642840673275, + "grad_norm": 1.2950679928032611, + "learning_rate": 3.3817351154105145e-07, + "loss": 0.40879231691360474, + "step": 6452 + }, + { + "epoch": 1.4878948581969103, + "grad_norm": 1.5253823933458253, + "learning_rate": 3.378877444681697e-07, + "loss": 0.5060825347900391, + "step": 6453 + }, + { + "epoch": 1.488125432326493, + "grad_norm": 1.4561081118713566, + "learning_rate": 3.3760207364001434e-07, + "loss": 0.4875546097755432, + "step": 6454 + }, + { + "epoch": 1.4883560064560757, + "grad_norm": 1.5036556031092911, + "learning_rate": 3.373164990981108e-07, + "loss": 0.3791916072368622, + "step": 6455 + }, + { + "epoch": 1.4885865805856584, + "grad_norm": 1.4585716739422292, + "learning_rate": 3.370310208839704e-07, + "loss": 0.46757322549819946, + "step": 6456 + }, + { + "epoch": 1.488817154715241, + "grad_norm": 1.4061567541704671, + "learning_rate": 3.3674563903908994e-07, + "loss": 0.4334050416946411, + "step": 6457 + }, + { + "epoch": 1.4890477288448236, + "grad_norm": 1.4217577265821555, + "learning_rate": 3.3646035360495294e-07, + "loss": 0.4408720135688782, + "step": 6458 + }, + { + "epoch": 1.4892783029744063, + "grad_norm": 1.637938092148249, + "learning_rate": 3.3617516462302795e-07, + "loss": 0.46556228399276733, + "step": 6459 + }, + { + "epoch": 1.4895088771039888, + "grad_norm": 1.3694379850190115, + "learning_rate": 3.3589007213477096e-07, + "loss": 0.5212184190750122, + "step": 6460 + }, + { + "epoch": 1.4897394512335715, + "grad_norm": 1.6425370019041445, + "learning_rate": 3.35605076181622e-07, + "loss": 0.5340084433555603, + "step": 6461 + }, + { + "epoch": 1.4899700253631543, + "grad_norm": 1.4674031830711234, + "learning_rate": 3.353201768050088e-07, + "loss": 0.38049495220184326, + "step": 6462 + }, + { + "epoch": 1.490200599492737, + "grad_norm": 1.5849611777401629, + "learning_rate": 3.350353740463442e-07, + "loss": 0.5480734705924988, + "step": 6463 + }, + { + "epoch": 1.4904311736223197, + "grad_norm": 1.4050939080217109, + "learning_rate": 3.3475066794702756e-07, + "loss": 0.4179231524467468, + "step": 6464 + }, + { + "epoch": 1.4906617477519022, + "grad_norm": 1.8331951463468434, + "learning_rate": 3.3446605854844335e-07, + "loss": 0.5380987524986267, + "step": 6465 + }, + { + "epoch": 1.490892321881485, + "grad_norm": 1.4221970681414315, + "learning_rate": 3.3418154589196226e-07, + "loss": 0.41146454215049744, + "step": 6466 + }, + { + "epoch": 1.4911228960110676, + "grad_norm": 1.5814296524447065, + "learning_rate": 3.3389713001894157e-07, + "loss": 0.4586387276649475, + "step": 6467 + }, + { + "epoch": 1.4913534701406501, + "grad_norm": 1.1757977126470995, + "learning_rate": 3.336128109707236e-07, + "loss": 0.4023931920528412, + "step": 6468 + }, + { + "epoch": 1.4915840442702328, + "grad_norm": 1.6673237012516164, + "learning_rate": 3.333285887886373e-07, + "loss": 0.5373448133468628, + "step": 6469 + }, + { + "epoch": 1.4918146183998156, + "grad_norm": 1.4523946751037105, + "learning_rate": 3.330444635139971e-07, + "loss": 0.4413643479347229, + "step": 6470 + }, + { + "epoch": 1.4920451925293983, + "grad_norm": 1.3734904271626787, + "learning_rate": 3.3276043518810327e-07, + "loss": 0.399494469165802, + "step": 6471 + }, + { + "epoch": 1.492275766658981, + "grad_norm": 1.4170973987364872, + "learning_rate": 3.3247650385224256e-07, + "loss": 0.4353644847869873, + "step": 6472 + }, + { + "epoch": 1.4925063407885635, + "grad_norm": 1.7462483377307876, + "learning_rate": 3.3219266954768743e-07, + "loss": 0.5231607556343079, + "step": 6473 + }, + { + "epoch": 1.4927369149181462, + "grad_norm": 1.55800999194994, + "learning_rate": 3.3190893231569596e-07, + "loss": 0.414408802986145, + "step": 6474 + }, + { + "epoch": 1.492967489047729, + "grad_norm": 1.6408204727748315, + "learning_rate": 3.3162529219751155e-07, + "loss": 0.3921009302139282, + "step": 6475 + }, + { + "epoch": 1.4931980631773114, + "grad_norm": 1.6197044883986413, + "learning_rate": 3.3134174923436506e-07, + "loss": 0.4317164421081543, + "step": 6476 + }, + { + "epoch": 1.4934286373068941, + "grad_norm": 1.5697343564549593, + "learning_rate": 3.3105830346747175e-07, + "loss": 0.46302181482315063, + "step": 6477 + }, + { + "epoch": 1.4936592114364768, + "grad_norm": 1.464087037907405, + "learning_rate": 3.307749549380335e-07, + "loss": 0.45704615116119385, + "step": 6478 + }, + { + "epoch": 1.4938897855660596, + "grad_norm": 1.5032451370482525, + "learning_rate": 3.304917036872379e-07, + "loss": 0.45455485582351685, + "step": 6479 + }, + { + "epoch": 1.4941203596956423, + "grad_norm": 1.5465084069557762, + "learning_rate": 3.302085497562588e-07, + "loss": 0.41939157247543335, + "step": 6480 + }, + { + "epoch": 1.4943509338252248, + "grad_norm": 1.3682263746176198, + "learning_rate": 3.2992549318625487e-07, + "loss": 0.4109286367893219, + "step": 6481 + }, + { + "epoch": 1.4945815079548075, + "grad_norm": 2.0164734849697, + "learning_rate": 3.2964253401837173e-07, + "loss": 0.44710463285446167, + "step": 6482 + }, + { + "epoch": 1.4948120820843902, + "grad_norm": 1.6884711291100036, + "learning_rate": 3.2935967229373986e-07, + "loss": 0.4330691695213318, + "step": 6483 + }, + { + "epoch": 1.4950426562139727, + "grad_norm": 1.4066891595951536, + "learning_rate": 3.2907690805347667e-07, + "loss": 0.41174834966659546, + "step": 6484 + }, + { + "epoch": 1.4952732303435554, + "grad_norm": 1.5235589172624593, + "learning_rate": 3.2879424133868406e-07, + "loss": 0.4368870258331299, + "step": 6485 + }, + { + "epoch": 1.4955038044731381, + "grad_norm": 1.581699276196859, + "learning_rate": 3.2851167219045107e-07, + "loss": 0.5155518651008606, + "step": 6486 + }, + { + "epoch": 1.4957343786027208, + "grad_norm": 1.4965040692694338, + "learning_rate": 3.282292006498522e-07, + "loss": 0.47015419602394104, + "step": 6487 + }, + { + "epoch": 1.4959649527323036, + "grad_norm": 1.4271101962383341, + "learning_rate": 3.2794682675794684e-07, + "loss": 0.41059884428977966, + "step": 6488 + }, + { + "epoch": 1.496195526861886, + "grad_norm": 1.7728377181019612, + "learning_rate": 3.2766455055578157e-07, + "loss": 0.4864136278629303, + "step": 6489 + }, + { + "epoch": 1.4964261009914688, + "grad_norm": 1.1780419841322618, + "learning_rate": 3.2738237208438744e-07, + "loss": 0.3599165976047516, + "step": 6490 + }, + { + "epoch": 1.4966566751210515, + "grad_norm": 1.4373611771192503, + "learning_rate": 3.2710029138478267e-07, + "loss": 0.4734029769897461, + "step": 6491 + }, + { + "epoch": 1.496887249250634, + "grad_norm": 1.5053587105753783, + "learning_rate": 3.268183084979699e-07, + "loss": 0.46739861369132996, + "step": 6492 + }, + { + "epoch": 1.4971178233802167, + "grad_norm": 1.745789102022849, + "learning_rate": 3.265364234649387e-07, + "loss": 0.46794670820236206, + "step": 6493 + }, + { + "epoch": 1.4973483975097994, + "grad_norm": 1.6683012395243093, + "learning_rate": 3.262546363266635e-07, + "loss": 0.463203489780426, + "step": 6494 + }, + { + "epoch": 1.4975789716393821, + "grad_norm": 1.4489172807794646, + "learning_rate": 3.2597294712410504e-07, + "loss": 0.4495059847831726, + "step": 6495 + }, + { + "epoch": 1.4978095457689649, + "grad_norm": 1.464704014292867, + "learning_rate": 3.256913558982101e-07, + "loss": 0.43549245595932007, + "step": 6496 + }, + { + "epoch": 1.4980401198985474, + "grad_norm": 1.552183908593376, + "learning_rate": 3.254098626899102e-07, + "loss": 0.40582704544067383, + "step": 6497 + }, + { + "epoch": 1.49827069402813, + "grad_norm": 1.527774566610999, + "learning_rate": 3.251284675401238e-07, + "loss": 0.3720378279685974, + "step": 6498 + }, + { + "epoch": 1.4985012681577128, + "grad_norm": 1.4814613073983138, + "learning_rate": 3.24847170489754e-07, + "loss": 0.42694520950317383, + "step": 6499 + }, + { + "epoch": 1.4987318422872953, + "grad_norm": 1.4768231117771715, + "learning_rate": 3.2456597157969066e-07, + "loss": 0.442158043384552, + "step": 6500 + }, + { + "epoch": 1.498962416416878, + "grad_norm": 1.4765054194953837, + "learning_rate": 3.2428487085080846e-07, + "loss": 0.44245558977127075, + "step": 6501 + }, + { + "epoch": 1.4991929905464607, + "grad_norm": 1.3559485373971267, + "learning_rate": 3.240038683439684e-07, + "loss": 0.4127236008644104, + "step": 6502 + }, + { + "epoch": 1.4994235646760434, + "grad_norm": 1.4985576311709152, + "learning_rate": 3.237229641000171e-07, + "loss": 0.4262787103652954, + "step": 6503 + }, + { + "epoch": 1.4996541388056261, + "grad_norm": 1.6706445028718073, + "learning_rate": 3.2344215815978714e-07, + "loss": 0.4181264042854309, + "step": 6504 + }, + { + "epoch": 1.4998847129352086, + "grad_norm": 1.6044294628436637, + "learning_rate": 3.2316145056409616e-07, + "loss": 0.4416937530040741, + "step": 6505 + }, + { + "epoch": 1.5001152870647914, + "grad_norm": 1.8850023720212492, + "learning_rate": 3.228808413537476e-07, + "loss": 0.4901489019393921, + "step": 6506 + }, + { + "epoch": 1.5003458611943739, + "grad_norm": 1.3996173090866784, + "learning_rate": 3.2260033056953153e-07, + "loss": 0.37932026386260986, + "step": 6507 + }, + { + "epoch": 1.5005764353239566, + "grad_norm": 1.649923361135509, + "learning_rate": 3.223199182522223e-07, + "loss": 0.4680899381637573, + "step": 6508 + }, + { + "epoch": 1.5008070094535393, + "grad_norm": 1.6955418693371036, + "learning_rate": 3.2203960444258105e-07, + "loss": 0.508334219455719, + "step": 6509 + }, + { + "epoch": 1.501037583583122, + "grad_norm": 2.0480591557575685, + "learning_rate": 3.2175938918135415e-07, + "loss": 0.3386784791946411, + "step": 6510 + }, + { + "epoch": 1.5012681577127047, + "grad_norm": 1.860117074212897, + "learning_rate": 3.214792725092741e-07, + "loss": 0.4315892457962036, + "step": 6511 + }, + { + "epoch": 1.5014987318422874, + "grad_norm": 1.4533616152071933, + "learning_rate": 3.211992544670582e-07, + "loss": 0.3709627389907837, + "step": 6512 + }, + { + "epoch": 1.50172930597187, + "grad_norm": 1.6433224440752017, + "learning_rate": 3.2091933509541023e-07, + "loss": 0.5260987877845764, + "step": 6513 + }, + { + "epoch": 1.5019598801014526, + "grad_norm": 1.5201640514539732, + "learning_rate": 3.20639514435019e-07, + "loss": 0.5379073619842529, + "step": 6514 + }, + { + "epoch": 1.5021904542310351, + "grad_norm": 1.2867052063244526, + "learning_rate": 3.2035979252655976e-07, + "loss": 0.47530391812324524, + "step": 6515 + }, + { + "epoch": 1.5024210283606179, + "grad_norm": 1.5201328820105404, + "learning_rate": 3.200801694106926e-07, + "loss": 0.459227979183197, + "step": 6516 + }, + { + "epoch": 1.5026516024902006, + "grad_norm": 1.5330729417783509, + "learning_rate": 3.19800645128063e-07, + "loss": 0.4867238998413086, + "step": 6517 + }, + { + "epoch": 1.5028821766197833, + "grad_norm": 1.4246709864782185, + "learning_rate": 3.195212197193039e-07, + "loss": 0.38478928804397583, + "step": 6518 + }, + { + "epoch": 1.503112750749366, + "grad_norm": 1.625989812299007, + "learning_rate": 3.192418932250316e-07, + "loss": 0.3938423991203308, + "step": 6519 + }, + { + "epoch": 1.5033433248789487, + "grad_norm": 1.8227844221564524, + "learning_rate": 3.1896266568584975e-07, + "loss": 0.457303911447525, + "step": 6520 + }, + { + "epoch": 1.5035738990085312, + "grad_norm": 1.5422494994233005, + "learning_rate": 3.1868353714234607e-07, + "loss": 0.5007269382476807, + "step": 6521 + }, + { + "epoch": 1.503804473138114, + "grad_norm": 1.4891205198132078, + "learning_rate": 3.1840450763509576e-07, + "loss": 0.3878381848335266, + "step": 6522 + }, + { + "epoch": 1.5040350472676964, + "grad_norm": 1.798955261342233, + "learning_rate": 3.181255772046575e-07, + "loss": 0.488269567489624, + "step": 6523 + }, + { + "epoch": 1.5042656213972792, + "grad_norm": 1.4981578078592954, + "learning_rate": 3.1784674589157767e-07, + "loss": 0.41664889454841614, + "step": 6524 + }, + { + "epoch": 1.5044961955268619, + "grad_norm": 1.6014375227212925, + "learning_rate": 3.175680137363863e-07, + "loss": 0.4862533509731293, + "step": 6525 + }, + { + "epoch": 1.5047267696564446, + "grad_norm": 1.599713126186934, + "learning_rate": 3.172893807796004e-07, + "loss": 0.4629037380218506, + "step": 6526 + }, + { + "epoch": 1.5049573437860273, + "grad_norm": 1.6094632634811818, + "learning_rate": 3.1701084706172245e-07, + "loss": 0.46300196647644043, + "step": 6527 + }, + { + "epoch": 1.50518791791561, + "grad_norm": 1.4186362500626026, + "learning_rate": 3.1673241262323934e-07, + "loss": 0.40698888897895813, + "step": 6528 + }, + { + "epoch": 1.5054184920451925, + "grad_norm": 1.484473947418196, + "learning_rate": 3.1645407750462514e-07, + "loss": 0.4344380497932434, + "step": 6529 + }, + { + "epoch": 1.5056490661747752, + "grad_norm": 1.6200348544461498, + "learning_rate": 3.1617584174633806e-07, + "loss": 0.49757128953933716, + "step": 6530 + }, + { + "epoch": 1.5058796403043577, + "grad_norm": 1.6256839483530447, + "learning_rate": 3.15897705388823e-07, + "loss": 0.4506916105747223, + "step": 6531 + }, + { + "epoch": 1.5061102144339404, + "grad_norm": 1.5009759227514647, + "learning_rate": 3.156196684725093e-07, + "loss": 0.3941146731376648, + "step": 6532 + }, + { + "epoch": 1.5063407885635232, + "grad_norm": 1.9065405733956409, + "learning_rate": 3.153417310378127e-07, + "loss": 0.5400820374488831, + "step": 6533 + }, + { + "epoch": 1.5065713626931059, + "grad_norm": 1.774411964329925, + "learning_rate": 3.1506389312513435e-07, + "loss": 0.4418470859527588, + "step": 6534 + }, + { + "epoch": 1.5068019368226886, + "grad_norm": 1.3196915654196755, + "learning_rate": 3.1478615477486113e-07, + "loss": 0.3897334933280945, + "step": 6535 + }, + { + "epoch": 1.5070325109522713, + "grad_norm": 1.5772083777596413, + "learning_rate": 3.145085160273647e-07, + "loss": 0.4923437833786011, + "step": 6536 + }, + { + "epoch": 1.5072630850818538, + "grad_norm": 1.575539005736493, + "learning_rate": 3.142309769230025e-07, + "loss": 0.41996920108795166, + "step": 6537 + }, + { + "epoch": 1.5074936592114365, + "grad_norm": 1.5634954618427415, + "learning_rate": 3.1395353750211806e-07, + "loss": 0.38584667444229126, + "step": 6538 + }, + { + "epoch": 1.507724233341019, + "grad_norm": 1.5469052539454182, + "learning_rate": 3.136761978050395e-07, + "loss": 0.5093455910682678, + "step": 6539 + }, + { + "epoch": 1.5079548074706017, + "grad_norm": 1.8844111555093896, + "learning_rate": 3.1339895787208126e-07, + "loss": 0.5592935681343079, + "step": 6540 + }, + { + "epoch": 1.5081853816001844, + "grad_norm": 1.7670191671756568, + "learning_rate": 3.1312181774354306e-07, + "loss": 0.38311779499053955, + "step": 6541 + }, + { + "epoch": 1.5084159557297672, + "grad_norm": 1.6894588927823573, + "learning_rate": 3.1284477745971025e-07, + "loss": 0.4422299265861511, + "step": 6542 + }, + { + "epoch": 1.5086465298593499, + "grad_norm": 1.5653024747826005, + "learning_rate": 3.125678370608528e-07, + "loss": 0.5097527503967285, + "step": 6543 + }, + { + "epoch": 1.5088771039889326, + "grad_norm": 1.4635088499535702, + "learning_rate": 3.1229099658722747e-07, + "loss": 0.42586642503738403, + "step": 6544 + }, + { + "epoch": 1.509107678118515, + "grad_norm": 1.7853929312810684, + "learning_rate": 3.120142560790755e-07, + "loss": 0.5006861686706543, + "step": 6545 + }, + { + "epoch": 1.5093382522480978, + "grad_norm": 1.292111562170076, + "learning_rate": 3.117376155766237e-07, + "loss": 0.4361686706542969, + "step": 6546 + }, + { + "epoch": 1.5095688263776803, + "grad_norm": 1.4890005224956508, + "learning_rate": 3.11461075120085e-07, + "loss": 0.45466339588165283, + "step": 6547 + }, + { + "epoch": 1.509799400507263, + "grad_norm": 1.4657261766322067, + "learning_rate": 3.1118463474965697e-07, + "loss": 0.39591068029403687, + "step": 6548 + }, + { + "epoch": 1.5100299746368457, + "grad_norm": 1.669083463008409, + "learning_rate": 3.1090829450552316e-07, + "loss": 0.4672427475452423, + "step": 6549 + }, + { + "epoch": 1.5102605487664285, + "grad_norm": 1.6273442700037082, + "learning_rate": 3.1063205442785234e-07, + "loss": 0.4785880148410797, + "step": 6550 + }, + { + "epoch": 1.5104911228960112, + "grad_norm": 1.3915985235576667, + "learning_rate": 3.103559145567994e-07, + "loss": 0.441936731338501, + "step": 6551 + }, + { + "epoch": 1.510721697025594, + "grad_norm": 1.5501390159164539, + "learning_rate": 3.1007987493250334e-07, + "loss": 0.49719512462615967, + "step": 6552 + }, + { + "epoch": 1.5109522711551764, + "grad_norm": 1.7806538694012621, + "learning_rate": 3.098039355950899e-07, + "loss": 0.40702491998672485, + "step": 6553 + }, + { + "epoch": 1.511182845284759, + "grad_norm": 1.4605232780084745, + "learning_rate": 3.0952809658466896e-07, + "loss": 0.44754648208618164, + "step": 6554 + }, + { + "epoch": 1.5114134194143416, + "grad_norm": 1.7119927234849008, + "learning_rate": 3.0925235794133717e-07, + "loss": 0.5370102524757385, + "step": 6555 + }, + { + "epoch": 1.5116439935439243, + "grad_norm": 1.4781444883115034, + "learning_rate": 3.089767197051755e-07, + "loss": 0.46693646907806396, + "step": 6556 + }, + { + "epoch": 1.511874567673507, + "grad_norm": 1.3940905139236526, + "learning_rate": 3.0870118191625084e-07, + "loss": 0.3887597322463989, + "step": 6557 + }, + { + "epoch": 1.5121051418030897, + "grad_norm": 1.509297997221229, + "learning_rate": 3.0842574461461577e-07, + "loss": 0.4783397912979126, + "step": 6558 + }, + { + "epoch": 1.5123357159326725, + "grad_norm": 2.254982960205746, + "learning_rate": 3.081504078403073e-07, + "loss": 0.5305588245391846, + "step": 6559 + }, + { + "epoch": 1.5125662900622552, + "grad_norm": 1.867807225680096, + "learning_rate": 3.078751716333492e-07, + "loss": 0.45315784215927124, + "step": 6560 + }, + { + "epoch": 1.5127968641918377, + "grad_norm": 1.6356411182801975, + "learning_rate": 3.0760003603374897e-07, + "loss": 0.4805132746696472, + "step": 6561 + }, + { + "epoch": 1.5130274383214202, + "grad_norm": 1.5579254915377003, + "learning_rate": 3.0732500108150104e-07, + "loss": 0.4956076145172119, + "step": 6562 + }, + { + "epoch": 1.5132580124510029, + "grad_norm": 1.6872988549232402, + "learning_rate": 3.07050066816584e-07, + "loss": 0.3629196882247925, + "step": 6563 + }, + { + "epoch": 1.5134885865805856, + "grad_norm": 1.4271734684348691, + "learning_rate": 3.067752332789626e-07, + "loss": 0.43240371346473694, + "step": 6564 + }, + { + "epoch": 1.5137191607101683, + "grad_norm": 1.4730845718882644, + "learning_rate": 3.065005005085869e-07, + "loss": 0.4933302402496338, + "step": 6565 + }, + { + "epoch": 1.513949734839751, + "grad_norm": 1.5594123406832316, + "learning_rate": 3.0622586854539155e-07, + "loss": 0.47905197739601135, + "step": 6566 + }, + { + "epoch": 1.5141803089693338, + "grad_norm": 1.3120965583955209, + "learning_rate": 3.059513374292978e-07, + "loss": 0.4245232343673706, + "step": 6567 + }, + { + "epoch": 1.5144108830989162, + "grad_norm": 1.6401225191596096, + "learning_rate": 3.0567690720021077e-07, + "loss": 0.40526312589645386, + "step": 6568 + }, + { + "epoch": 1.514641457228499, + "grad_norm": 1.7208705138340397, + "learning_rate": 3.0540257789802227e-07, + "loss": 0.5808804631233215, + "step": 6569 + }, + { + "epoch": 1.5148720313580815, + "grad_norm": 1.791338069752229, + "learning_rate": 3.0512834956260836e-07, + "loss": 0.44997286796569824, + "step": 6570 + }, + { + "epoch": 1.5151026054876642, + "grad_norm": 1.6800897456169108, + "learning_rate": 3.048542222338315e-07, + "loss": 0.44051581621170044, + "step": 6571 + }, + { + "epoch": 1.515333179617247, + "grad_norm": 1.525217042834723, + "learning_rate": 3.045801959515382e-07, + "loss": 0.5113236308097839, + "step": 6572 + }, + { + "epoch": 1.5155637537468296, + "grad_norm": 1.5439102757372205, + "learning_rate": 3.0430627075556125e-07, + "loss": 0.554703950881958, + "step": 6573 + }, + { + "epoch": 1.5157943278764123, + "grad_norm": 1.600156572288611, + "learning_rate": 3.0403244668571847e-07, + "loss": 0.3819808065891266, + "step": 6574 + }, + { + "epoch": 1.516024902005995, + "grad_norm": 1.4872928405937125, + "learning_rate": 3.037587237818133e-07, + "loss": 0.47970864176750183, + "step": 6575 + }, + { + "epoch": 1.5162554761355775, + "grad_norm": 1.4776778157711579, + "learning_rate": 3.0348510208363386e-07, + "loss": 0.4296469986438751, + "step": 6576 + }, + { + "epoch": 1.5164860502651603, + "grad_norm": 1.462836798021035, + "learning_rate": 3.032115816309535e-07, + "loss": 0.4372752904891968, + "step": 6577 + }, + { + "epoch": 1.5167166243947428, + "grad_norm": 1.673613757204577, + "learning_rate": 3.029381624635318e-07, + "loss": 0.4711950719356537, + "step": 6578 + }, + { + "epoch": 1.5169471985243255, + "grad_norm": 1.3932522433513406, + "learning_rate": 3.026648446211124e-07, + "loss": 0.4448170065879822, + "step": 6579 + }, + { + "epoch": 1.5171777726539082, + "grad_norm": 1.6184181695445041, + "learning_rate": 3.02391628143425e-07, + "loss": 0.4527873992919922, + "step": 6580 + }, + { + "epoch": 1.517408346783491, + "grad_norm": 1.6799725255249693, + "learning_rate": 3.0211851307018463e-07, + "loss": 0.453765332698822, + "step": 6581 + }, + { + "epoch": 1.5176389209130736, + "grad_norm": 1.686193810125547, + "learning_rate": 3.018454994410915e-07, + "loss": 0.46818265318870544, + "step": 6582 + }, + { + "epoch": 1.5178694950426563, + "grad_norm": 1.6601834563107158, + "learning_rate": 3.0157258729583026e-07, + "loss": 0.38551369309425354, + "step": 6583 + }, + { + "epoch": 1.5181000691722388, + "grad_norm": 1.2759146716130436, + "learning_rate": 3.012997766740721e-07, + "loss": 0.3651260733604431, + "step": 6584 + }, + { + "epoch": 1.5183306433018215, + "grad_norm": 1.4942378521466573, + "learning_rate": 3.010270676154726e-07, + "loss": 0.36894726753234863, + "step": 6585 + }, + { + "epoch": 1.518561217431404, + "grad_norm": 1.5163949110289714, + "learning_rate": 3.007544601596722e-07, + "loss": 0.42595791816711426, + "step": 6586 + }, + { + "epoch": 1.5187917915609868, + "grad_norm": 1.9011368495730705, + "learning_rate": 3.004819543462979e-07, + "loss": 0.4916795492172241, + "step": 6587 + }, + { + "epoch": 1.5190223656905695, + "grad_norm": 3.958756092482824, + "learning_rate": 3.0020955021496073e-07, + "loss": 0.5098932385444641, + "step": 6588 + }, + { + "epoch": 1.5192529398201522, + "grad_norm": 1.7429564765653418, + "learning_rate": 2.9993724780525796e-07, + "loss": 0.6336305737495422, + "step": 6589 + }, + { + "epoch": 1.519483513949735, + "grad_norm": 1.6454779446539551, + "learning_rate": 2.996650471567709e-07, + "loss": 0.4911893606185913, + "step": 6590 + }, + { + "epoch": 1.5197140880793176, + "grad_norm": 1.6053455149976412, + "learning_rate": 2.9939294830906727e-07, + "loss": 0.4388008117675781, + "step": 6591 + }, + { + "epoch": 1.5199446622089001, + "grad_norm": 1.4960203678707569, + "learning_rate": 2.991209513016986e-07, + "loss": 0.392263799905777, + "step": 6592 + }, + { + "epoch": 1.5201752363384828, + "grad_norm": 1.4101720949081316, + "learning_rate": 2.988490561742032e-07, + "loss": 0.36495402455329895, + "step": 6593 + }, + { + "epoch": 1.5204058104680653, + "grad_norm": 1.6817212910549741, + "learning_rate": 2.985772629661032e-07, + "loss": 0.5280855298042297, + "step": 6594 + }, + { + "epoch": 1.520636384597648, + "grad_norm": 1.4575719708434207, + "learning_rate": 2.9830557171690693e-07, + "loss": 0.43953752517700195, + "step": 6595 + }, + { + "epoch": 1.5208669587272308, + "grad_norm": 1.261754251016282, + "learning_rate": 2.980339824661071e-07, + "loss": 0.41361862421035767, + "step": 6596 + }, + { + "epoch": 1.5210975328568135, + "grad_norm": 1.4525947923531464, + "learning_rate": 2.977624952531821e-07, + "loss": 0.39955854415893555, + "step": 6597 + }, + { + "epoch": 1.5213281069863962, + "grad_norm": 1.664684863463753, + "learning_rate": 2.9749111011759565e-07, + "loss": 0.505165696144104, + "step": 6598 + }, + { + "epoch": 1.521558681115979, + "grad_norm": 1.5619432117854901, + "learning_rate": 2.9721982709879566e-07, + "loss": 0.4388153851032257, + "step": 6599 + }, + { + "epoch": 1.5217892552455614, + "grad_norm": 1.454152411615684, + "learning_rate": 2.969486462362167e-07, + "loss": 0.4479100704193115, + "step": 6600 + }, + { + "epoch": 1.5220198293751441, + "grad_norm": 1.4345831092951191, + "learning_rate": 2.9667756756927686e-07, + "loss": 0.4005380868911743, + "step": 6601 + }, + { + "epoch": 1.5222504035047266, + "grad_norm": 1.707280681236192, + "learning_rate": 2.9640659113738087e-07, + "loss": 0.43774881958961487, + "step": 6602 + }, + { + "epoch": 1.5224809776343093, + "grad_norm": 1.5608510724785551, + "learning_rate": 2.9613571697991725e-07, + "loss": 0.4449707865715027, + "step": 6603 + }, + { + "epoch": 1.522711551763892, + "grad_norm": 1.6567386639534631, + "learning_rate": 2.958649451362606e-07, + "loss": 0.454499751329422, + "step": 6604 + }, + { + "epoch": 1.5229421258934748, + "grad_norm": 1.2977143159727098, + "learning_rate": 2.955942756457707e-07, + "loss": 0.35601305961608887, + "step": 6605 + }, + { + "epoch": 1.5231727000230575, + "grad_norm": 1.6684183476509384, + "learning_rate": 2.9532370854779143e-07, + "loss": 0.5252523422241211, + "step": 6606 + }, + { + "epoch": 1.5234032741526402, + "grad_norm": 1.3731317276647081, + "learning_rate": 2.950532438816531e-07, + "loss": 0.4311884939670563, + "step": 6607 + }, + { + "epoch": 1.5236338482822227, + "grad_norm": 1.5784692430456444, + "learning_rate": 2.9478288168667e-07, + "loss": 0.43956485390663147, + "step": 6608 + }, + { + "epoch": 1.5238644224118054, + "grad_norm": 1.4213527447836085, + "learning_rate": 2.9451262200214235e-07, + "loss": 0.400115430355072, + "step": 6609 + }, + { + "epoch": 1.524094996541388, + "grad_norm": 1.6612091081011793, + "learning_rate": 2.942424648673548e-07, + "loss": 0.41738802194595337, + "step": 6610 + }, + { + "epoch": 1.5243255706709706, + "grad_norm": 1.5951584459105572, + "learning_rate": 2.939724103215776e-07, + "loss": 0.412765771150589, + "step": 6611 + }, + { + "epoch": 1.5245561448005533, + "grad_norm": 1.6739308031441762, + "learning_rate": 2.937024584040659e-07, + "loss": 0.44869422912597656, + "step": 6612 + }, + { + "epoch": 1.524786718930136, + "grad_norm": 1.5443554211834334, + "learning_rate": 2.934326091540603e-07, + "loss": 0.39191997051239014, + "step": 6613 + }, + { + "epoch": 1.5250172930597188, + "grad_norm": 1.307209963924962, + "learning_rate": 2.9316286261078547e-07, + "loss": 0.36575692892074585, + "step": 6614 + }, + { + "epoch": 1.5252478671893015, + "grad_norm": 1.5775953874602453, + "learning_rate": 2.9289321881345254e-07, + "loss": 0.49928778409957886, + "step": 6615 + }, + { + "epoch": 1.525478441318884, + "grad_norm": 1.5029437064522762, + "learning_rate": 2.926236778012565e-07, + "loss": 0.49619296193122864, + "step": 6616 + }, + { + "epoch": 1.5257090154484667, + "grad_norm": 1.5175956935877304, + "learning_rate": 2.923542396133777e-07, + "loss": 0.4614447355270386, + "step": 6617 + }, + { + "epoch": 1.5259395895780492, + "grad_norm": 1.5326379965687464, + "learning_rate": 2.9208490428898213e-07, + "loss": 0.43820804357528687, + "step": 6618 + }, + { + "epoch": 1.526170163707632, + "grad_norm": 1.7297859153701105, + "learning_rate": 2.9181567186722e-07, + "loss": 0.46856528520584106, + "step": 6619 + }, + { + "epoch": 1.5264007378372146, + "grad_norm": 1.5560178508678546, + "learning_rate": 2.915465423872272e-07, + "loss": 0.45428818464279175, + "step": 6620 + }, + { + "epoch": 1.5266313119667974, + "grad_norm": 1.765757281110695, + "learning_rate": 2.912775158881243e-07, + "loss": 0.44715386629104614, + "step": 6621 + }, + { + "epoch": 1.52686188609638, + "grad_norm": 1.845941311143575, + "learning_rate": 2.9100859240901764e-07, + "loss": 0.537441611289978, + "step": 6622 + }, + { + "epoch": 1.5270924602259628, + "grad_norm": 2.100811269468338, + "learning_rate": 2.9073977198899714e-07, + "loss": 0.4430112838745117, + "step": 6623 + }, + { + "epoch": 1.5273230343555453, + "grad_norm": 1.625928583733216, + "learning_rate": 2.904710546671392e-07, + "loss": 0.41713255643844604, + "step": 6624 + }, + { + "epoch": 1.527553608485128, + "grad_norm": 1.639578198355071, + "learning_rate": 2.9020244048250396e-07, + "loss": 0.4313931465148926, + "step": 6625 + }, + { + "epoch": 1.5277841826147105, + "grad_norm": 1.617455818460061, + "learning_rate": 2.899339294741379e-07, + "loss": 0.5038034319877625, + "step": 6626 + }, + { + "epoch": 1.5280147567442932, + "grad_norm": 1.6017224429954546, + "learning_rate": 2.8966552168107127e-07, + "loss": 0.45088762044906616, + "step": 6627 + }, + { + "epoch": 1.528245330873876, + "grad_norm": 1.6027378992570083, + "learning_rate": 2.8939721714232e-07, + "loss": 0.40857064723968506, + "step": 6628 + }, + { + "epoch": 1.5284759050034586, + "grad_norm": 1.5432592985198028, + "learning_rate": 2.891290158968853e-07, + "loss": 0.43766242265701294, + "step": 6629 + }, + { + "epoch": 1.5287064791330414, + "grad_norm": 1.6663524119863393, + "learning_rate": 2.888609179837523e-07, + "loss": 0.45986247062683105, + "step": 6630 + }, + { + "epoch": 1.528937053262624, + "grad_norm": 1.5102818288035118, + "learning_rate": 2.8859292344189236e-07, + "loss": 0.4681728482246399, + "step": 6631 + }, + { + "epoch": 1.5291676273922066, + "grad_norm": 1.4009274503220306, + "learning_rate": 2.883250323102605e-07, + "loss": 0.36730295419692993, + "step": 6632 + }, + { + "epoch": 1.5293982015217893, + "grad_norm": 1.6785355662696937, + "learning_rate": 2.880572446277982e-07, + "loss": 0.43494418263435364, + "step": 6633 + }, + { + "epoch": 1.5296287756513718, + "grad_norm": 1.6257441783659756, + "learning_rate": 2.877895604334305e-07, + "loss": 0.49145790934562683, + "step": 6634 + }, + { + "epoch": 1.5298593497809545, + "grad_norm": 1.4638603112091872, + "learning_rate": 2.875219797660681e-07, + "loss": 0.4166264832019806, + "step": 6635 + }, + { + "epoch": 1.5300899239105372, + "grad_norm": 1.3504636181719787, + "learning_rate": 2.8725450266460704e-07, + "loss": 0.4336514472961426, + "step": 6636 + }, + { + "epoch": 1.53032049804012, + "grad_norm": 1.6796430942391267, + "learning_rate": 2.869871291679271e-07, + "loss": 0.44186240434646606, + "step": 6637 + }, + { + "epoch": 1.5305510721697027, + "grad_norm": 1.4751166079505253, + "learning_rate": 2.867198593148945e-07, + "loss": 0.40619733929634094, + "step": 6638 + }, + { + "epoch": 1.5307816462992854, + "grad_norm": 1.4034694689938345, + "learning_rate": 2.864526931443588e-07, + "loss": 0.45552101731300354, + "step": 6639 + }, + { + "epoch": 1.5310122204288679, + "grad_norm": 1.3563039501008287, + "learning_rate": 2.861856306951562e-07, + "loss": 0.45153865218162537, + "step": 6640 + }, + { + "epoch": 1.5312427945584506, + "grad_norm": 1.5793746333655185, + "learning_rate": 2.859186720061061e-07, + "loss": 0.5146148204803467, + "step": 6641 + }, + { + "epoch": 1.531473368688033, + "grad_norm": 1.5627792728055054, + "learning_rate": 2.856518171160143e-07, + "loss": 0.4566080868244171, + "step": 6642 + }, + { + "epoch": 1.5317039428176158, + "grad_norm": 1.93802928616596, + "learning_rate": 2.853850660636703e-07, + "loss": 0.4390585124492645, + "step": 6643 + }, + { + "epoch": 1.5319345169471985, + "grad_norm": 1.7734959004013588, + "learning_rate": 2.851184188878493e-07, + "loss": 0.5508195757865906, + "step": 6644 + }, + { + "epoch": 1.5321650910767812, + "grad_norm": 1.6721581584041076, + "learning_rate": 2.8485187562731126e-07, + "loss": 0.47640183568000793, + "step": 6645 + }, + { + "epoch": 1.532395665206364, + "grad_norm": 1.421769874384772, + "learning_rate": 2.8458543632080123e-07, + "loss": 0.4511566758155823, + "step": 6646 + }, + { + "epoch": 1.5326262393359467, + "grad_norm": 1.5003089507123706, + "learning_rate": 2.843191010070486e-07, + "loss": 0.414367139339447, + "step": 6647 + }, + { + "epoch": 1.5328568134655292, + "grad_norm": 1.5192326893049226, + "learning_rate": 2.840528697247674e-07, + "loss": 0.4611589312553406, + "step": 6648 + }, + { + "epoch": 1.5330873875951119, + "grad_norm": 1.6397285440449882, + "learning_rate": 2.8378674251265787e-07, + "loss": 0.4675883948802948, + "step": 6649 + }, + { + "epoch": 1.5333179617246944, + "grad_norm": 1.6281144487220143, + "learning_rate": 2.835207194094036e-07, + "loss": 0.49039095640182495, + "step": 6650 + }, + { + "epoch": 1.533548535854277, + "grad_norm": 1.6636356702139277, + "learning_rate": 2.832548004536741e-07, + "loss": 0.45641693472862244, + "step": 6651 + }, + { + "epoch": 1.5337791099838598, + "grad_norm": 1.7323507398911224, + "learning_rate": 2.829889856841233e-07, + "loss": 0.4858587682247162, + "step": 6652 + }, + { + "epoch": 1.5340096841134425, + "grad_norm": 1.3640056940377991, + "learning_rate": 2.8272327513939055e-07, + "loss": 0.3640017807483673, + "step": 6653 + }, + { + "epoch": 1.5342402582430252, + "grad_norm": 1.5342226074105705, + "learning_rate": 2.8245766885809865e-07, + "loss": 0.42915207147598267, + "step": 6654 + }, + { + "epoch": 1.534470832372608, + "grad_norm": 1.5250515427099394, + "learning_rate": 2.8219216687885707e-07, + "loss": 0.5041407346725464, + "step": 6655 + }, + { + "epoch": 1.5347014065021904, + "grad_norm": 1.479165849869464, + "learning_rate": 2.8192676924025885e-07, + "loss": 0.4748334288597107, + "step": 6656 + }, + { + "epoch": 1.5349319806317732, + "grad_norm": 1.5854109757101433, + "learning_rate": 2.8166147598088173e-07, + "loss": 0.4745975136756897, + "step": 6657 + }, + { + "epoch": 1.5351625547613557, + "grad_norm": 1.6430139570672564, + "learning_rate": 2.813962871392893e-07, + "loss": 0.49246084690093994, + "step": 6658 + }, + { + "epoch": 1.5353931288909384, + "grad_norm": 1.3796442061928538, + "learning_rate": 2.8113120275402936e-07, + "loss": 0.47876033186912537, + "step": 6659 + }, + { + "epoch": 1.535623703020521, + "grad_norm": 1.6460545742229191, + "learning_rate": 2.808662228636348e-07, + "loss": 0.5244987607002258, + "step": 6660 + }, + { + "epoch": 1.5358542771501038, + "grad_norm": 1.6433381019004774, + "learning_rate": 2.8060134750662277e-07, + "loss": 0.44661569595336914, + "step": 6661 + }, + { + "epoch": 1.5360848512796865, + "grad_norm": 1.4583799872096337, + "learning_rate": 2.8033657672149615e-07, + "loss": 0.4508060812950134, + "step": 6662 + }, + { + "epoch": 1.5363154254092692, + "grad_norm": 1.3497148067649773, + "learning_rate": 2.8007191054674117e-07, + "loss": 0.4657326340675354, + "step": 6663 + }, + { + "epoch": 1.5365459995388517, + "grad_norm": 1.4227603766742651, + "learning_rate": 2.798073490208307e-07, + "loss": 0.495077520608902, + "step": 6664 + }, + { + "epoch": 1.5367765736684345, + "grad_norm": 1.4557135691757939, + "learning_rate": 2.795428921822206e-07, + "loss": 0.40721309185028076, + "step": 6665 + }, + { + "epoch": 1.537007147798017, + "grad_norm": 1.4109014285343175, + "learning_rate": 2.7927854006935315e-07, + "loss": 0.3279367685317993, + "step": 6666 + }, + { + "epoch": 1.5372377219275997, + "grad_norm": 1.6893419118169095, + "learning_rate": 2.790142927206538e-07, + "loss": 0.4849242866039276, + "step": 6667 + }, + { + "epoch": 1.5374682960571824, + "grad_norm": 1.7502055418971636, + "learning_rate": 2.7875015017453394e-07, + "loss": 0.45151397585868835, + "step": 6668 + }, + { + "epoch": 1.537698870186765, + "grad_norm": 1.7275509884274352, + "learning_rate": 2.784861124693898e-07, + "loss": 0.43480992317199707, + "step": 6669 + }, + { + "epoch": 1.5379294443163478, + "grad_norm": 1.606181868361543, + "learning_rate": 2.782221796436012e-07, + "loss": 0.48764440417289734, + "step": 6670 + }, + { + "epoch": 1.5381600184459305, + "grad_norm": 1.5345831310523104, + "learning_rate": 2.7795835173553407e-07, + "loss": 0.4164161682128906, + "step": 6671 + }, + { + "epoch": 1.538390592575513, + "grad_norm": 1.8060994369656536, + "learning_rate": 2.7769462878353777e-07, + "loss": 0.49934858083724976, + "step": 6672 + }, + { + "epoch": 1.5386211667050955, + "grad_norm": 1.4004311994850918, + "learning_rate": 2.77431010825948e-07, + "loss": 0.4877321124076843, + "step": 6673 + }, + { + "epoch": 1.5388517408346782, + "grad_norm": 1.7442704894714258, + "learning_rate": 2.771674979010834e-07, + "loss": 0.44518858194351196, + "step": 6674 + }, + { + "epoch": 1.539082314964261, + "grad_norm": 1.4902795732558884, + "learning_rate": 2.769040900472488e-07, + "loss": 0.4237474203109741, + "step": 6675 + }, + { + "epoch": 1.5393128890938437, + "grad_norm": 1.8818051716593445, + "learning_rate": 2.7664078730273335e-07, + "loss": 0.45270341634750366, + "step": 6676 + }, + { + "epoch": 1.5395434632234264, + "grad_norm": 1.9777420597791724, + "learning_rate": 2.7637758970581004e-07, + "loss": 0.3866819739341736, + "step": 6677 + }, + { + "epoch": 1.539774037353009, + "grad_norm": 1.709571144624541, + "learning_rate": 2.7611449729473825e-07, + "loss": 0.4384220838546753, + "step": 6678 + }, + { + "epoch": 1.5400046114825916, + "grad_norm": 1.523752237168306, + "learning_rate": 2.758515101077602e-07, + "loss": 0.4462182819843292, + "step": 6679 + }, + { + "epoch": 1.5402351856121743, + "grad_norm": 1.6129576485586044, + "learning_rate": 2.755886281831046e-07, + "loss": 0.3927033245563507, + "step": 6680 + }, + { + "epoch": 1.5404657597417568, + "grad_norm": 1.7095013933604486, + "learning_rate": 2.7532585155898314e-07, + "loss": 0.4678634703159332, + "step": 6681 + }, + { + "epoch": 1.5406963338713395, + "grad_norm": 1.4524055684149206, + "learning_rate": 2.750631802735935e-07, + "loss": 0.4165131151676178, + "step": 6682 + }, + { + "epoch": 1.5409269080009222, + "grad_norm": 1.1494402193253566, + "learning_rate": 2.748006143651178e-07, + "loss": 0.3705793023109436, + "step": 6683 + }, + { + "epoch": 1.541157482130505, + "grad_norm": 1.5819526439113667, + "learning_rate": 2.745381538717226e-07, + "loss": 0.5428882837295532, + "step": 6684 + }, + { + "epoch": 1.5413880562600877, + "grad_norm": 1.6426127293668795, + "learning_rate": 2.742757988315589e-07, + "loss": 0.4116673171520233, + "step": 6685 + }, + { + "epoch": 1.5416186303896704, + "grad_norm": 1.4540567592422353, + "learning_rate": 2.740135492827631e-07, + "loss": 0.4617515802383423, + "step": 6686 + }, + { + "epoch": 1.541849204519253, + "grad_norm": 1.6140828940427878, + "learning_rate": 2.737514052634555e-07, + "loss": 0.5002453923225403, + "step": 6687 + }, + { + "epoch": 1.5420797786488356, + "grad_norm": 1.4130856063185002, + "learning_rate": 2.734893668117412e-07, + "loss": 0.46029362082481384, + "step": 6688 + }, + { + "epoch": 1.542310352778418, + "grad_norm": 1.4809565956171882, + "learning_rate": 2.732274339657107e-07, + "loss": 0.40502026677131653, + "step": 6689 + }, + { + "epoch": 1.5425409269080008, + "grad_norm": 1.6538580711421296, + "learning_rate": 2.7296560676343803e-07, + "loss": 0.5267831087112427, + "step": 6690 + }, + { + "epoch": 1.5427715010375835, + "grad_norm": 1.3087993674480496, + "learning_rate": 2.727038852429826e-07, + "loss": 0.3464335799217224, + "step": 6691 + }, + { + "epoch": 1.5430020751671663, + "grad_norm": 1.5384863769893498, + "learning_rate": 2.7244226944238847e-07, + "loss": 0.36635881662368774, + "step": 6692 + }, + { + "epoch": 1.543232649296749, + "grad_norm": 1.7314925345176482, + "learning_rate": 2.7218075939968435e-07, + "loss": 0.4567757844924927, + "step": 6693 + }, + { + "epoch": 1.5434632234263317, + "grad_norm": 1.9452957704897642, + "learning_rate": 2.719193551528827e-07, + "loss": 0.539220929145813, + "step": 6694 + }, + { + "epoch": 1.5436937975559142, + "grad_norm": 1.653206530012829, + "learning_rate": 2.71658056739982e-07, + "loss": 0.48553818464279175, + "step": 6695 + }, + { + "epoch": 1.543924371685497, + "grad_norm": 1.5040526715775615, + "learning_rate": 2.7139686419896424e-07, + "loss": 0.48564499616622925, + "step": 6696 + }, + { + "epoch": 1.5441549458150794, + "grad_norm": 1.3502417010865393, + "learning_rate": 2.7113577756779616e-07, + "loss": 0.4163014590740204, + "step": 6697 + }, + { + "epoch": 1.544385519944662, + "grad_norm": 1.864828438533457, + "learning_rate": 2.708747968844296e-07, + "loss": 0.5686431527137756, + "step": 6698 + }, + { + "epoch": 1.5446160940742448, + "grad_norm": 1.8608147536494253, + "learning_rate": 2.706139221868008e-07, + "loss": 0.5365211963653564, + "step": 6699 + }, + { + "epoch": 1.5448466682038275, + "grad_norm": 1.5480523179756653, + "learning_rate": 2.7035315351283084e-07, + "loss": 0.4147397577762604, + "step": 6700 + }, + { + "epoch": 1.5450772423334103, + "grad_norm": 1.5279455451058772, + "learning_rate": 2.7009249090042454e-07, + "loss": 0.3938590884208679, + "step": 6701 + }, + { + "epoch": 1.545307816462993, + "grad_norm": 1.726862148896079, + "learning_rate": 2.698319343874722e-07, + "loss": 0.3521370589733124, + "step": 6702 + }, + { + "epoch": 1.5455383905925755, + "grad_norm": 1.6305887024948476, + "learning_rate": 2.69571484011848e-07, + "loss": 0.430014967918396, + "step": 6703 + }, + { + "epoch": 1.5457689647221582, + "grad_norm": 1.636933956561892, + "learning_rate": 2.6931113981141164e-07, + "loss": 0.4697108864784241, + "step": 6704 + }, + { + "epoch": 1.5459995388517407, + "grad_norm": 1.5552943329509785, + "learning_rate": 2.69050901824006e-07, + "loss": 0.46567851305007935, + "step": 6705 + }, + { + "epoch": 1.5462301129813234, + "grad_norm": 1.620367133120872, + "learning_rate": 2.6879077008745986e-07, + "loss": 0.46061819791793823, + "step": 6706 + }, + { + "epoch": 1.5464606871109061, + "grad_norm": 1.5411435279833592, + "learning_rate": 2.6853074463958614e-07, + "loss": 0.568658709526062, + "step": 6707 + }, + { + "epoch": 1.5466912612404888, + "grad_norm": 1.3834999667432357, + "learning_rate": 2.682708255181815e-07, + "loss": 0.42816412448883057, + "step": 6708 + }, + { + "epoch": 1.5469218353700716, + "grad_norm": 1.576410551372393, + "learning_rate": 2.6801101276102866e-07, + "loss": 0.42515552043914795, + "step": 6709 + }, + { + "epoch": 1.5471524094996543, + "grad_norm": 1.5447523266389376, + "learning_rate": 2.677513064058932e-07, + "loss": 0.46513399481773376, + "step": 6710 + }, + { + "epoch": 1.5473829836292368, + "grad_norm": 1.3853944144224488, + "learning_rate": 2.6749170649052675e-07, + "loss": 0.4194756746292114, + "step": 6711 + }, + { + "epoch": 1.5476135577588195, + "grad_norm": 1.4035563039276318, + "learning_rate": 2.672322130526643e-07, + "loss": 0.4456541836261749, + "step": 6712 + }, + { + "epoch": 1.547844131888402, + "grad_norm": 1.5113453932130136, + "learning_rate": 2.669728261300264e-07, + "loss": 0.493444561958313, + "step": 6713 + }, + { + "epoch": 1.5480747060179847, + "grad_norm": 1.582884732282312, + "learning_rate": 2.6671354576031645e-07, + "loss": 0.47202616930007935, + "step": 6714 + }, + { + "epoch": 1.5483052801475674, + "grad_norm": 1.824788636144565, + "learning_rate": 2.66454371981225e-07, + "loss": 0.4584811329841614, + "step": 6715 + }, + { + "epoch": 1.5485358542771501, + "grad_norm": 1.3167028831683925, + "learning_rate": 2.6619530483042485e-07, + "loss": 0.4072091579437256, + "step": 6716 + }, + { + "epoch": 1.5487664284067328, + "grad_norm": 1.5656021898929726, + "learning_rate": 2.6593634434557365e-07, + "loss": 0.49742361903190613, + "step": 6717 + }, + { + "epoch": 1.5489970025363156, + "grad_norm": 1.6686846450785309, + "learning_rate": 2.6567749056431467e-07, + "loss": 0.49291643500328064, + "step": 6718 + }, + { + "epoch": 1.549227576665898, + "grad_norm": 1.5234565390584587, + "learning_rate": 2.6541874352427427e-07, + "loss": 0.5210362076759338, + "step": 6719 + }, + { + "epoch": 1.5494581507954808, + "grad_norm": 1.523136615036839, + "learning_rate": 2.651601032630645e-07, + "loss": 0.4489557147026062, + "step": 6720 + }, + { + "epoch": 1.5496887249250633, + "grad_norm": 1.515706035484409, + "learning_rate": 2.649015698182808e-07, + "loss": 0.4417908191680908, + "step": 6721 + }, + { + "epoch": 1.549919299054646, + "grad_norm": 1.5123745571810647, + "learning_rate": 2.6464314322750404e-07, + "loss": 0.45177266001701355, + "step": 6722 + }, + { + "epoch": 1.5501498731842287, + "grad_norm": 1.5422888438788165, + "learning_rate": 2.6438482352829896e-07, + "loss": 0.37720638513565063, + "step": 6723 + }, + { + "epoch": 1.5503804473138114, + "grad_norm": 1.5572735157633186, + "learning_rate": 2.641266107582153e-07, + "loss": 0.5108897089958191, + "step": 6724 + }, + { + "epoch": 1.5506110214433941, + "grad_norm": 1.5098940840101445, + "learning_rate": 2.638685049547863e-07, + "loss": 0.449248731136322, + "step": 6725 + }, + { + "epoch": 1.5508415955729768, + "grad_norm": 1.4667668469814954, + "learning_rate": 2.636105061555309e-07, + "loss": 0.4692652225494385, + "step": 6726 + }, + { + "epoch": 1.5510721697025593, + "grad_norm": 1.5150559633489926, + "learning_rate": 2.6335261439795153e-07, + "loss": 0.49128347635269165, + "step": 6727 + }, + { + "epoch": 1.551302743832142, + "grad_norm": 1.5725646817979666, + "learning_rate": 2.630948297195351e-07, + "loss": 0.4618053436279297, + "step": 6728 + }, + { + "epoch": 1.5515333179617246, + "grad_norm": 1.5786249232029208, + "learning_rate": 2.6283715215775336e-07, + "loss": 0.4342828094959259, + "step": 6729 + }, + { + "epoch": 1.5517638920913073, + "grad_norm": 1.5592983853420144, + "learning_rate": 2.625795817500626e-07, + "loss": 0.5214434862136841, + "step": 6730 + }, + { + "epoch": 1.55199446622089, + "grad_norm": 1.521395946192631, + "learning_rate": 2.623221185339034e-07, + "loss": 0.4873029589653015, + "step": 6731 + }, + { + "epoch": 1.5522250403504727, + "grad_norm": 1.5014817933254478, + "learning_rate": 2.6206476254670007e-07, + "loss": 0.4510548412799835, + "step": 6732 + }, + { + "epoch": 1.5524556144800554, + "grad_norm": 1.5931454307395074, + "learning_rate": 2.6180751382586265e-07, + "loss": 0.4832548499107361, + "step": 6733 + }, + { + "epoch": 1.5526861886096381, + "grad_norm": 1.8273040799326088, + "learning_rate": 2.6155037240878406e-07, + "loss": 0.5438823699951172, + "step": 6734 + }, + { + "epoch": 1.5529167627392206, + "grad_norm": 1.488758610712305, + "learning_rate": 2.6129333833284315e-07, + "loss": 0.4967566728591919, + "step": 6735 + }, + { + "epoch": 1.5531473368688034, + "grad_norm": 1.419700158234616, + "learning_rate": 2.610364116354018e-07, + "loss": 0.5187437534332275, + "step": 6736 + }, + { + "epoch": 1.5533779109983858, + "grad_norm": 1.3624978155475462, + "learning_rate": 2.607795923538072e-07, + "loss": 0.4199862480163574, + "step": 6737 + }, + { + "epoch": 1.5536084851279686, + "grad_norm": 1.463828508781327, + "learning_rate": 2.6052288052539084e-07, + "loss": 0.5009325742721558, + "step": 6738 + }, + { + "epoch": 1.5538390592575513, + "grad_norm": 1.5361155892650822, + "learning_rate": 2.602662761874679e-07, + "loss": 0.48698678612709045, + "step": 6739 + }, + { + "epoch": 1.554069633387134, + "grad_norm": 1.4600353762817446, + "learning_rate": 2.6000977937733905e-07, + "loss": 0.4845883846282959, + "step": 6740 + }, + { + "epoch": 1.5543002075167167, + "grad_norm": 1.6153802807658302, + "learning_rate": 2.59753390132288e-07, + "loss": 0.512161135673523, + "step": 6741 + }, + { + "epoch": 1.5545307816462994, + "grad_norm": 1.756231295082545, + "learning_rate": 2.5949710848958415e-07, + "loss": 0.42334964871406555, + "step": 6742 + }, + { + "epoch": 1.554761355775882, + "grad_norm": 1.2927501946290025, + "learning_rate": 2.592409344864801e-07, + "loss": 0.3781980276107788, + "step": 6743 + }, + { + "epoch": 1.5549919299054646, + "grad_norm": 1.5363470406300028, + "learning_rate": 2.5898486816021394e-07, + "loss": 0.4989853501319885, + "step": 6744 + }, + { + "epoch": 1.5552225040350471, + "grad_norm": 1.5873964925893267, + "learning_rate": 2.5872890954800676e-07, + "loss": 0.45715585350990295, + "step": 6745 + }, + { + "epoch": 1.5554530781646299, + "grad_norm": 1.3499060893753405, + "learning_rate": 2.5847305868706515e-07, + "loss": 0.5025684833526611, + "step": 6746 + }, + { + "epoch": 1.5556836522942126, + "grad_norm": 1.5290460697986008, + "learning_rate": 2.5821731561457994e-07, + "loss": 0.47298115491867065, + "step": 6747 + }, + { + "epoch": 1.5559142264237953, + "grad_norm": 1.4250590830459762, + "learning_rate": 2.5796168036772524e-07, + "loss": 0.45412957668304443, + "step": 6748 + }, + { + "epoch": 1.556144800553378, + "grad_norm": 1.6230149340497857, + "learning_rate": 2.5770615298366107e-07, + "loss": 0.3958669602870941, + "step": 6749 + }, + { + "epoch": 1.5563753746829607, + "grad_norm": 1.4992477100706287, + "learning_rate": 2.574507334995302e-07, + "loss": 0.4748396873474121, + "step": 6750 + }, + { + "epoch": 1.5566059488125432, + "grad_norm": 2.1473408883216534, + "learning_rate": 2.5719542195246093e-07, + "loss": 0.4741169810295105, + "step": 6751 + }, + { + "epoch": 1.556836522942126, + "grad_norm": 1.5072269547692108, + "learning_rate": 2.569402183795648e-07, + "loss": 0.4362972378730774, + "step": 6752 + }, + { + "epoch": 1.5570670970717084, + "grad_norm": 1.5695384848079892, + "learning_rate": 2.5668512281793873e-07, + "loss": 0.48013412952423096, + "step": 6753 + }, + { + "epoch": 1.5572976712012911, + "grad_norm": 1.4514603270444408, + "learning_rate": 2.564301353046634e-07, + "loss": 0.4728567600250244, + "step": 6754 + }, + { + "epoch": 1.5575282453308739, + "grad_norm": 1.7592773476195727, + "learning_rate": 2.56175255876804e-07, + "loss": 0.4304337501525879, + "step": 6755 + }, + { + "epoch": 1.5577588194604566, + "grad_norm": 1.5275686028016913, + "learning_rate": 2.5592048457140926e-07, + "loss": 0.43467870354652405, + "step": 6756 + }, + { + "epoch": 1.5579893935900393, + "grad_norm": 1.9596482130933712, + "learning_rate": 2.556658214255134e-07, + "loss": 0.3912844657897949, + "step": 6757 + }, + { + "epoch": 1.558219967719622, + "grad_norm": 1.5284327791141838, + "learning_rate": 2.5541126647613397e-07, + "loss": 0.4462862014770508, + "step": 6758 + }, + { + "epoch": 1.5584505418492045, + "grad_norm": 1.5847675751494867, + "learning_rate": 2.551568197602729e-07, + "loss": 0.43929487466812134, + "step": 6759 + }, + { + "epoch": 1.5586811159787872, + "grad_norm": 1.5077581986013873, + "learning_rate": 2.549024813149169e-07, + "loss": 0.44473958015441895, + "step": 6760 + }, + { + "epoch": 1.5589116901083697, + "grad_norm": 1.5536876763085832, + "learning_rate": 2.546482511770365e-07, + "loss": 0.5159727931022644, + "step": 6761 + }, + { + "epoch": 1.5591422642379524, + "grad_norm": 1.7371461951042986, + "learning_rate": 2.5439412938358696e-07, + "loss": 0.3975204825401306, + "step": 6762 + }, + { + "epoch": 1.5593728383675352, + "grad_norm": 1.493493619365051, + "learning_rate": 2.54140115971507e-07, + "loss": 0.5198286175727844, + "step": 6763 + }, + { + "epoch": 1.5596034124971179, + "grad_norm": 1.4309109790386, + "learning_rate": 2.5388621097772046e-07, + "loss": 0.4815763831138611, + "step": 6764 + }, + { + "epoch": 1.5598339866267006, + "grad_norm": 1.3803469238514527, + "learning_rate": 2.5363241443913454e-07, + "loss": 0.365215539932251, + "step": 6765 + }, + { + "epoch": 1.5600645607562833, + "grad_norm": 1.6088793691676593, + "learning_rate": 2.533787263926417e-07, + "loss": 0.486020028591156, + "step": 6766 + }, + { + "epoch": 1.5602951348858658, + "grad_norm": 1.5355383857513338, + "learning_rate": 2.5312514687511766e-07, + "loss": 0.38536715507507324, + "step": 6767 + }, + { + "epoch": 1.5605257090154485, + "grad_norm": 1.649862765507334, + "learning_rate": 2.528716759234227e-07, + "loss": 0.44713371992111206, + "step": 6768 + }, + { + "epoch": 1.560756283145031, + "grad_norm": 1.868794454538197, + "learning_rate": 2.5261831357440154e-07, + "loss": 0.4122806489467621, + "step": 6769 + }, + { + "epoch": 1.5609868572746137, + "grad_norm": 1.6234940940069353, + "learning_rate": 2.523650598648829e-07, + "loss": 0.40514320135116577, + "step": 6770 + }, + { + "epoch": 1.5612174314041964, + "grad_norm": 1.4417973525561176, + "learning_rate": 2.5211191483168027e-07, + "loss": 0.4273102283477783, + "step": 6771 + }, + { + "epoch": 1.5614480055337792, + "grad_norm": 1.4229504510118502, + "learning_rate": 2.5185887851159005e-07, + "loss": 0.4774209260940552, + "step": 6772 + }, + { + "epoch": 1.5616785796633619, + "grad_norm": 1.583645566960067, + "learning_rate": 2.5160595094139436e-07, + "loss": 0.3928600549697876, + "step": 6773 + }, + { + "epoch": 1.5619091537929446, + "grad_norm": 1.6757793450729852, + "learning_rate": 2.5135313215785816e-07, + "loss": 0.4414944052696228, + "step": 6774 + }, + { + "epoch": 1.562139727922527, + "grad_norm": 1.733143939427008, + "learning_rate": 2.5110042219773176e-07, + "loss": 0.36133646965026855, + "step": 6775 + }, + { + "epoch": 1.5623703020521098, + "grad_norm": 1.8443586806925936, + "learning_rate": 2.508478210977486e-07, + "loss": 0.44824904203414917, + "step": 6776 + }, + { + "epoch": 1.5626008761816923, + "grad_norm": 1.1693439456079453, + "learning_rate": 2.5059532889462707e-07, + "loss": 0.3699820637702942, + "step": 6777 + }, + { + "epoch": 1.562831450311275, + "grad_norm": 1.9309547773144982, + "learning_rate": 2.5034294562506976e-07, + "loss": 0.4809808135032654, + "step": 6778 + }, + { + "epoch": 1.5630620244408577, + "grad_norm": 1.7665230327633363, + "learning_rate": 2.5009067132576256e-07, + "loss": 0.487751841545105, + "step": 6779 + }, + { + "epoch": 1.5632925985704405, + "grad_norm": 1.5839144124062823, + "learning_rate": 2.4983850603337675e-07, + "loss": 0.47932374477386475, + "step": 6780 + }, + { + "epoch": 1.5635231727000232, + "grad_norm": 1.4782012523005248, + "learning_rate": 2.495864497845663e-07, + "loss": 0.42852234840393066, + "step": 6781 + }, + { + "epoch": 1.5637537468296059, + "grad_norm": 1.4802387383863571, + "learning_rate": 2.49334502615971e-07, + "loss": 0.4392131567001343, + "step": 6782 + }, + { + "epoch": 1.5639843209591884, + "grad_norm": 1.5042475261036963, + "learning_rate": 2.4908266456421323e-07, + "loss": 0.45050233602523804, + "step": 6783 + }, + { + "epoch": 1.5642148950887709, + "grad_norm": 1.4962883173938244, + "learning_rate": 2.488309356659004e-07, + "loss": 0.45328110456466675, + "step": 6784 + }, + { + "epoch": 1.5644454692183536, + "grad_norm": 1.451199382042834, + "learning_rate": 2.4857931595762403e-07, + "loss": 0.3851325511932373, + "step": 6785 + }, + { + "epoch": 1.5646760433479363, + "grad_norm": 1.5269726027188475, + "learning_rate": 2.4832780547595976e-07, + "loss": 0.4096960127353668, + "step": 6786 + }, + { + "epoch": 1.564906617477519, + "grad_norm": 1.4158017969205454, + "learning_rate": 2.480764042574669e-07, + "loss": 0.4439825117588043, + "step": 6787 + }, + { + "epoch": 1.5651371916071017, + "grad_norm": 1.5084778231824414, + "learning_rate": 2.4782511233868895e-07, + "loss": 0.4259459972381592, + "step": 6788 + }, + { + "epoch": 1.5653677657366845, + "grad_norm": 1.6383230301383533, + "learning_rate": 2.475739297561542e-07, + "loss": 0.4701216220855713, + "step": 6789 + }, + { + "epoch": 1.565598339866267, + "grad_norm": 1.4707071600317903, + "learning_rate": 2.473228565463742e-07, + "loss": 0.4435737133026123, + "step": 6790 + }, + { + "epoch": 1.5658289139958497, + "grad_norm": 1.4361527011832544, + "learning_rate": 2.4707189274584537e-07, + "loss": 0.4476662278175354, + "step": 6791 + }, + { + "epoch": 1.5660594881254322, + "grad_norm": 1.8319243980176085, + "learning_rate": 2.468210383910474e-07, + "loss": 0.4399911165237427, + "step": 6792 + }, + { + "epoch": 1.5662900622550149, + "grad_norm": 1.5617800363149925, + "learning_rate": 2.465702935184446e-07, + "loss": 0.4206039309501648, + "step": 6793 + }, + { + "epoch": 1.5665206363845976, + "grad_norm": 1.5998109403316092, + "learning_rate": 2.463196581644855e-07, + "loss": 0.44936686754226685, + "step": 6794 + }, + { + "epoch": 1.5667512105141803, + "grad_norm": 1.4750351364947134, + "learning_rate": 2.4606913236560277e-07, + "loss": 0.39926016330718994, + "step": 6795 + }, + { + "epoch": 1.566981784643763, + "grad_norm": 1.607414705164721, + "learning_rate": 2.4581871615821216e-07, + "loss": 0.4338487982749939, + "step": 6796 + }, + { + "epoch": 1.5672123587733457, + "grad_norm": 1.6693881073802184, + "learning_rate": 2.455684095787148e-07, + "loss": 0.5047430992126465, + "step": 6797 + }, + { + "epoch": 1.5674429329029282, + "grad_norm": 1.623571142038879, + "learning_rate": 2.4531821266349504e-07, + "loss": 0.46082550287246704, + "step": 6798 + }, + { + "epoch": 1.567673507032511, + "grad_norm": 1.5687485332342288, + "learning_rate": 2.450681254489214e-07, + "loss": 0.44586509466171265, + "step": 6799 + }, + { + "epoch": 1.5679040811620935, + "grad_norm": 1.6011741376497353, + "learning_rate": 2.4481814797134657e-07, + "loss": 0.5167746543884277, + "step": 6800 + }, + { + "epoch": 1.5681346552916762, + "grad_norm": 1.4074512111564024, + "learning_rate": 2.4456828026710753e-07, + "loss": 0.44062116742134094, + "step": 6801 + }, + { + "epoch": 1.5683652294212589, + "grad_norm": 1.718295945554571, + "learning_rate": 2.4431852237252524e-07, + "loss": 0.5096040368080139, + "step": 6802 + }, + { + "epoch": 1.5685958035508416, + "grad_norm": 1.3369851313651875, + "learning_rate": 2.440688743239042e-07, + "loss": 0.44234153628349304, + "step": 6803 + }, + { + "epoch": 1.5688263776804243, + "grad_norm": 1.7878168925295264, + "learning_rate": 2.4381933615753357e-07, + "loss": 0.431011825799942, + "step": 6804 + }, + { + "epoch": 1.569056951810007, + "grad_norm": 1.5221569168970472, + "learning_rate": 2.435699079096858e-07, + "loss": 0.4903266131877899, + "step": 6805 + }, + { + "epoch": 1.5692875259395895, + "grad_norm": 1.4830626229942445, + "learning_rate": 2.433205896166185e-07, + "loss": 0.4698626399040222, + "step": 6806 + }, + { + "epoch": 1.5695181000691723, + "grad_norm": 1.7678576287420633, + "learning_rate": 2.4307138131457184e-07, + "loss": 0.37576574087142944, + "step": 6807 + }, + { + "epoch": 1.5697486741987547, + "grad_norm": 1.442601981615427, + "learning_rate": 2.4282228303977113e-07, + "loss": 0.47068172693252563, + "step": 6808 + }, + { + "epoch": 1.5699792483283375, + "grad_norm": 1.5121414961596256, + "learning_rate": 2.425732948284257e-07, + "loss": 0.45246315002441406, + "step": 6809 + }, + { + "epoch": 1.5702098224579202, + "grad_norm": 1.670746435704044, + "learning_rate": 2.423244167167278e-07, + "loss": 0.4746376574039459, + "step": 6810 + }, + { + "epoch": 1.570440396587503, + "grad_norm": 1.6491072802367082, + "learning_rate": 2.420756487408551e-07, + "loss": 0.413469135761261, + "step": 6811 + }, + { + "epoch": 1.5706709707170856, + "grad_norm": 1.4392614299059656, + "learning_rate": 2.418269909369678e-07, + "loss": 0.3567890226840973, + "step": 6812 + }, + { + "epoch": 1.5709015448466683, + "grad_norm": 1.9034789277869502, + "learning_rate": 2.415784433412116e-07, + "loss": 0.4676034450531006, + "step": 6813 + }, + { + "epoch": 1.5711321189762508, + "grad_norm": 1.5100461636177536, + "learning_rate": 2.4133000598971477e-07, + "loss": 0.429337739944458, + "step": 6814 + }, + { + "epoch": 1.5713626931058335, + "grad_norm": 1.657098818036463, + "learning_rate": 2.4108167891859065e-07, + "loss": 0.35861289501190186, + "step": 6815 + }, + { + "epoch": 1.571593267235416, + "grad_norm": 1.7985300174152374, + "learning_rate": 2.4083346216393564e-07, + "loss": 0.43728363513946533, + "step": 6816 + }, + { + "epoch": 1.5718238413649988, + "grad_norm": 1.6655671112295587, + "learning_rate": 2.405853557618308e-07, + "loss": 0.44594380259513855, + "step": 6817 + }, + { + "epoch": 1.5720544154945815, + "grad_norm": 1.430621764890317, + "learning_rate": 2.403373597483414e-07, + "loss": 0.36871337890625, + "step": 6818 + }, + { + "epoch": 1.5722849896241642, + "grad_norm": 1.4284927159530842, + "learning_rate": 2.400894741595152e-07, + "loss": 0.3769477307796478, + "step": 6819 + }, + { + "epoch": 1.572515563753747, + "grad_norm": 1.6803573488891066, + "learning_rate": 2.3984169903138583e-07, + "loss": 0.503145694732666, + "step": 6820 + }, + { + "epoch": 1.5727461378833296, + "grad_norm": 1.552866324250783, + "learning_rate": 2.395940343999691e-07, + "loss": 0.4082655906677246, + "step": 6821 + }, + { + "epoch": 1.5729767120129121, + "grad_norm": 1.4215190376699491, + "learning_rate": 2.3934648030126625e-07, + "loss": 0.4106418192386627, + "step": 6822 + }, + { + "epoch": 1.5732072861424948, + "grad_norm": 1.663561714777188, + "learning_rate": 2.390990367712613e-07, + "loss": 0.45363783836364746, + "step": 6823 + }, + { + "epoch": 1.5734378602720773, + "grad_norm": 1.4253235303875884, + "learning_rate": 2.388517038459227e-07, + "loss": 0.4416825473308563, + "step": 6824 + }, + { + "epoch": 1.57366843440166, + "grad_norm": 1.5727508875619094, + "learning_rate": 2.3860448156120304e-07, + "loss": 0.5106863379478455, + "step": 6825 + }, + { + "epoch": 1.5738990085312428, + "grad_norm": 1.431151413456896, + "learning_rate": 2.3835736995303879e-07, + "loss": 0.4618466794490814, + "step": 6826 + }, + { + "epoch": 1.5741295826608255, + "grad_norm": 1.6611294255159201, + "learning_rate": 2.381103690573495e-07, + "loss": 0.414678692817688, + "step": 6827 + }, + { + "epoch": 1.5743601567904082, + "grad_norm": 1.3583782134926532, + "learning_rate": 2.3786347891004e-07, + "loss": 0.39774662256240845, + "step": 6828 + }, + { + "epoch": 1.574590730919991, + "grad_norm": 1.3689702631653482, + "learning_rate": 2.376166995469977e-07, + "loss": 0.4513537287712097, + "step": 6829 + }, + { + "epoch": 1.5748213050495734, + "grad_norm": 1.5433747348092586, + "learning_rate": 2.3737003100409447e-07, + "loss": 0.44062697887420654, + "step": 6830 + }, + { + "epoch": 1.5750518791791561, + "grad_norm": 1.6549219639884087, + "learning_rate": 2.3712347331718617e-07, + "loss": 0.42305582761764526, + "step": 6831 + }, + { + "epoch": 1.5752824533087386, + "grad_norm": 1.628456252942963, + "learning_rate": 2.3687702652211262e-07, + "loss": 0.46731626987457275, + "step": 6832 + }, + { + "epoch": 1.5755130274383213, + "grad_norm": 1.569042371408869, + "learning_rate": 2.3663069065469753e-07, + "loss": 0.4926149845123291, + "step": 6833 + }, + { + "epoch": 1.575743601567904, + "grad_norm": 1.8433451746214373, + "learning_rate": 2.3638446575074777e-07, + "loss": 0.49002933502197266, + "step": 6834 + }, + { + "epoch": 1.5759741756974868, + "grad_norm": 1.9286763636552064, + "learning_rate": 2.3613835184605523e-07, + "loss": 0.47110694646835327, + "step": 6835 + }, + { + "epoch": 1.5762047498270695, + "grad_norm": 1.7003781450027053, + "learning_rate": 2.3589234897639444e-07, + "loss": 0.4257816672325134, + "step": 6836 + }, + { + "epoch": 1.5764353239566522, + "grad_norm": 1.4515610553726317, + "learning_rate": 2.3564645717752506e-07, + "loss": 0.4031051695346832, + "step": 6837 + }, + { + "epoch": 1.5766658980862347, + "grad_norm": 1.7208107126331553, + "learning_rate": 2.3540067648518957e-07, + "loss": 0.5077808499336243, + "step": 6838 + }, + { + "epoch": 1.5768964722158174, + "grad_norm": 1.4184547433402042, + "learning_rate": 2.3515500693511449e-07, + "loss": 0.3877585232257843, + "step": 6839 + }, + { + "epoch": 1.5771270463454, + "grad_norm": 1.6806127701824354, + "learning_rate": 2.3490944856301064e-07, + "loss": 0.4356805682182312, + "step": 6840 + }, + { + "epoch": 1.5773576204749826, + "grad_norm": 1.5102184976880006, + "learning_rate": 2.346640014045723e-07, + "loss": 0.46679362654685974, + "step": 6841 + }, + { + "epoch": 1.5775881946045653, + "grad_norm": 1.4361079018846885, + "learning_rate": 2.3441866549547817e-07, + "loss": 0.4837648272514343, + "step": 6842 + }, + { + "epoch": 1.577818768734148, + "grad_norm": 1.5395603940472438, + "learning_rate": 2.341734408713897e-07, + "loss": 0.42723533511161804, + "step": 6843 + }, + { + "epoch": 1.5780493428637308, + "grad_norm": 1.7296429757269751, + "learning_rate": 2.3392832756795322e-07, + "loss": 0.3680928647518158, + "step": 6844 + }, + { + "epoch": 1.5782799169933135, + "grad_norm": 1.3398871717628533, + "learning_rate": 2.3368332562079797e-07, + "loss": 0.434980571269989, + "step": 6845 + }, + { + "epoch": 1.578510491122896, + "grad_norm": 1.5976407072584213, + "learning_rate": 2.3343843506553805e-07, + "loss": 0.45552271604537964, + "step": 6846 + }, + { + "epoch": 1.5787410652524787, + "grad_norm": 1.5496903398620734, + "learning_rate": 2.331936559377702e-07, + "loss": 0.4292616844177246, + "step": 6847 + }, + { + "epoch": 1.5789716393820612, + "grad_norm": 1.6907239258434268, + "learning_rate": 2.3294898827307573e-07, + "loss": 0.5025339126586914, + "step": 6848 + }, + { + "epoch": 1.579202213511644, + "grad_norm": 1.434142265629081, + "learning_rate": 2.3270443210701996e-07, + "loss": 0.47567370533943176, + "step": 6849 + }, + { + "epoch": 1.5794327876412266, + "grad_norm": 1.9792768486961878, + "learning_rate": 2.3245998747515095e-07, + "loss": 0.5435467958450317, + "step": 6850 + }, + { + "epoch": 1.5796633617708093, + "grad_norm": 1.2141081677893035, + "learning_rate": 2.3221565441300194e-07, + "loss": 0.4409145712852478, + "step": 6851 + }, + { + "epoch": 1.579893935900392, + "grad_norm": 1.3643265195449554, + "learning_rate": 2.3197143295608845e-07, + "loss": 0.40482181310653687, + "step": 6852 + }, + { + "epoch": 1.5801245100299748, + "grad_norm": 1.8983898955785605, + "learning_rate": 2.317273231399113e-07, + "loss": 0.40231794118881226, + "step": 6853 + }, + { + "epoch": 1.5803550841595573, + "grad_norm": 1.3860542767537625, + "learning_rate": 2.314833249999535e-07, + "loss": 0.43245166540145874, + "step": 6854 + }, + { + "epoch": 1.58058565828914, + "grad_norm": 1.5386782332278715, + "learning_rate": 2.3123943857168315e-07, + "loss": 0.40237659215927124, + "step": 6855 + }, + { + "epoch": 1.5808162324187225, + "grad_norm": 1.7869361833965254, + "learning_rate": 2.309956638905517e-07, + "loss": 0.48900318145751953, + "step": 6856 + }, + { + "epoch": 1.5810468065483052, + "grad_norm": 1.482622476685355, + "learning_rate": 2.3075200099199422e-07, + "loss": 0.42364567518234253, + "step": 6857 + }, + { + "epoch": 1.581277380677888, + "grad_norm": 1.6159587255295897, + "learning_rate": 2.3050844991142958e-07, + "loss": 0.4658735990524292, + "step": 6858 + }, + { + "epoch": 1.5815079548074706, + "grad_norm": 1.4775627716781476, + "learning_rate": 2.3026501068426007e-07, + "loss": 0.42268991470336914, + "step": 6859 + }, + { + "epoch": 1.5817385289370534, + "grad_norm": 1.4348002511722773, + "learning_rate": 2.3002168334587247e-07, + "loss": 0.44876742362976074, + "step": 6860 + }, + { + "epoch": 1.581969103066636, + "grad_norm": 1.5171591869453156, + "learning_rate": 2.2977846793163646e-07, + "loss": 0.42540132999420166, + "step": 6861 + }, + { + "epoch": 1.5821996771962186, + "grad_norm": 1.4296859038074168, + "learning_rate": 2.2953536447690636e-07, + "loss": 0.48768138885498047, + "step": 6862 + }, + { + "epoch": 1.5824302513258013, + "grad_norm": 1.5445046236967466, + "learning_rate": 2.292923730170192e-07, + "loss": 0.42905953526496887, + "step": 6863 + }, + { + "epoch": 1.5826608254553838, + "grad_norm": 1.4472242985886439, + "learning_rate": 2.2904949358729653e-07, + "loss": 0.4103778004646301, + "step": 6864 + }, + { + "epoch": 1.5828913995849665, + "grad_norm": 1.5180272333652802, + "learning_rate": 2.2880672622304331e-07, + "loss": 0.39303290843963623, + "step": 6865 + }, + { + "epoch": 1.5831219737145492, + "grad_norm": 1.4702183686842207, + "learning_rate": 2.2856407095954843e-07, + "loss": 0.5087130069732666, + "step": 6866 + }, + { + "epoch": 1.583352547844132, + "grad_norm": 1.5644640444387603, + "learning_rate": 2.283215278320839e-07, + "loss": 0.33117055892944336, + "step": 6867 + }, + { + "epoch": 1.5835831219737146, + "grad_norm": 1.7090383225203818, + "learning_rate": 2.280790968759063e-07, + "loss": 0.41781488060951233, + "step": 6868 + }, + { + "epoch": 1.5838136961032974, + "grad_norm": 1.4121975925065597, + "learning_rate": 2.2783677812625523e-07, + "loss": 0.5104382634162903, + "step": 6869 + }, + { + "epoch": 1.5840442702328799, + "grad_norm": 1.5723614045021508, + "learning_rate": 2.2759457161835372e-07, + "loss": 0.3987969160079956, + "step": 6870 + }, + { + "epoch": 1.5842748443624626, + "grad_norm": 1.705658009146651, + "learning_rate": 2.2735247738740936e-07, + "loss": 0.4723064601421356, + "step": 6871 + }, + { + "epoch": 1.584505418492045, + "grad_norm": 1.707721278006975, + "learning_rate": 2.2711049546861293e-07, + "loss": 0.3942141830921173, + "step": 6872 + }, + { + "epoch": 1.5847359926216278, + "grad_norm": 1.5657011191058785, + "learning_rate": 2.268686258971393e-07, + "loss": 0.38271787762641907, + "step": 6873 + }, + { + "epoch": 1.5849665667512105, + "grad_norm": 1.3977071321322045, + "learning_rate": 2.2662686870814607e-07, + "loss": 0.4944665729999542, + "step": 6874 + }, + { + "epoch": 1.5851971408807932, + "grad_norm": 1.7910306093530013, + "learning_rate": 2.2638522393677562e-07, + "loss": 0.46695005893707275, + "step": 6875 + }, + { + "epoch": 1.585427715010376, + "grad_norm": 1.7074115790208728, + "learning_rate": 2.2614369161815295e-07, + "loss": 0.4620080888271332, + "step": 6876 + }, + { + "epoch": 1.5856582891399587, + "grad_norm": 1.6877087434684872, + "learning_rate": 2.2590227178738776e-07, + "loss": 0.5650279521942139, + "step": 6877 + }, + { + "epoch": 1.5858888632695411, + "grad_norm": 1.3471081039016284, + "learning_rate": 2.2566096447957227e-07, + "loss": 0.3556622564792633, + "step": 6878 + }, + { + "epoch": 1.5861194373991239, + "grad_norm": 1.3889188451731431, + "learning_rate": 2.254197697297834e-07, + "loss": 0.4978718161582947, + "step": 6879 + }, + { + "epoch": 1.5863500115287064, + "grad_norm": 1.375490517958548, + "learning_rate": 2.2517868757308146e-07, + "loss": 0.4759003520011902, + "step": 6880 + }, + { + "epoch": 1.586580585658289, + "grad_norm": 1.579013983466932, + "learning_rate": 2.2493771804450945e-07, + "loss": 0.5078370571136475, + "step": 6881 + }, + { + "epoch": 1.5868111597878718, + "grad_norm": 1.3607586792133322, + "learning_rate": 2.2469686117909547e-07, + "loss": 0.4188239276409149, + "step": 6882 + }, + { + "epoch": 1.5870417339174545, + "grad_norm": 1.3488510335317552, + "learning_rate": 2.2445611701184997e-07, + "loss": 0.4075232744216919, + "step": 6883 + }, + { + "epoch": 1.5872723080470372, + "grad_norm": 1.5004910712339554, + "learning_rate": 2.2421548557776794e-07, + "loss": 0.3643442988395691, + "step": 6884 + }, + { + "epoch": 1.58750288217662, + "grad_norm": 1.4193604715362476, + "learning_rate": 2.2397496691182716e-07, + "loss": 0.38767147064208984, + "step": 6885 + }, + { + "epoch": 1.5877334563062024, + "grad_norm": 1.6373352976605955, + "learning_rate": 2.2373456104899e-07, + "loss": 0.4874354600906372, + "step": 6886 + }, + { + "epoch": 1.5879640304357852, + "grad_norm": 1.5573200679287742, + "learning_rate": 2.2349426802420134e-07, + "loss": 0.46412762999534607, + "step": 6887 + }, + { + "epoch": 1.5881946045653677, + "grad_norm": 1.3720639419051985, + "learning_rate": 2.2325408787239054e-07, + "loss": 0.4299372434616089, + "step": 6888 + }, + { + "epoch": 1.5884251786949504, + "grad_norm": 1.6309152140238423, + "learning_rate": 2.230140206284703e-07, + "loss": 0.3962220549583435, + "step": 6889 + }, + { + "epoch": 1.588655752824533, + "grad_norm": 1.617512400235996, + "learning_rate": 2.2277406632733653e-07, + "loss": 0.5048998594284058, + "step": 6890 + }, + { + "epoch": 1.5888863269541158, + "grad_norm": 2.0443646004817024, + "learning_rate": 2.2253422500386932e-07, + "loss": 0.35463857650756836, + "step": 6891 + }, + { + "epoch": 1.5891169010836985, + "grad_norm": 1.5696832175175914, + "learning_rate": 2.2229449669293165e-07, + "loss": 0.3969672620296478, + "step": 6892 + }, + { + "epoch": 1.5893474752132812, + "grad_norm": 1.5166803382402412, + "learning_rate": 2.22054881429371e-07, + "loss": 0.36300575733184814, + "step": 6893 + }, + { + "epoch": 1.5895780493428637, + "grad_norm": 1.41057555150973, + "learning_rate": 2.2181537924801729e-07, + "loss": 0.45796507596969604, + "step": 6894 + }, + { + "epoch": 1.5898086234724462, + "grad_norm": 1.556089643432737, + "learning_rate": 2.2157599018368488e-07, + "loss": 0.42725688219070435, + "step": 6895 + }, + { + "epoch": 1.590039197602029, + "grad_norm": 1.8436048050065164, + "learning_rate": 2.213367142711714e-07, + "loss": 0.4959419369697571, + "step": 6896 + }, + { + "epoch": 1.5902697717316117, + "grad_norm": 1.6607109480306586, + "learning_rate": 2.2109755154525821e-07, + "loss": 0.3707115948200226, + "step": 6897 + }, + { + "epoch": 1.5905003458611944, + "grad_norm": 1.4025605906760028, + "learning_rate": 2.2085850204070989e-07, + "loss": 0.3647577166557312, + "step": 6898 + }, + { + "epoch": 1.590730919990777, + "grad_norm": 1.505368584241417, + "learning_rate": 2.2061956579227447e-07, + "loss": 0.42227697372436523, + "step": 6899 + }, + { + "epoch": 1.5909614941203598, + "grad_norm": 1.508703122498175, + "learning_rate": 2.2038074283468412e-07, + "loss": 0.41736292839050293, + "step": 6900 + }, + { + "epoch": 1.5911920682499423, + "grad_norm": 1.6418039973045746, + "learning_rate": 2.201420332026538e-07, + "loss": 0.46005967259407043, + "step": 6901 + }, + { + "epoch": 1.591422642379525, + "grad_norm": 1.4328523009517202, + "learning_rate": 2.1990343693088243e-07, + "loss": 0.3572643995285034, + "step": 6902 + }, + { + "epoch": 1.5916532165091075, + "grad_norm": 1.744760153255399, + "learning_rate": 2.196649540540527e-07, + "loss": 0.5321012735366821, + "step": 6903 + }, + { + "epoch": 1.5918837906386902, + "grad_norm": 1.5415731453823578, + "learning_rate": 2.194265846068305e-07, + "loss": 0.4913836419582367, + "step": 6904 + }, + { + "epoch": 1.592114364768273, + "grad_norm": 1.7016363411577065, + "learning_rate": 2.1918832862386493e-07, + "loss": 0.37674903869628906, + "step": 6905 + }, + { + "epoch": 1.5923449388978557, + "grad_norm": 1.5772289300833298, + "learning_rate": 2.1895018613978934e-07, + "loss": 0.4385930001735687, + "step": 6906 + }, + { + "epoch": 1.5925755130274384, + "grad_norm": 2.224743671968565, + "learning_rate": 2.1871215718921964e-07, + "loss": 0.5219674706459045, + "step": 6907 + }, + { + "epoch": 1.592806087157021, + "grad_norm": 1.5215408344839954, + "learning_rate": 2.1847424180675622e-07, + "loss": 0.4241113066673279, + "step": 6908 + }, + { + "epoch": 1.5930366612866036, + "grad_norm": 1.4296843598144484, + "learning_rate": 2.1823644002698237e-07, + "loss": 0.4008786082267761, + "step": 6909 + }, + { + "epoch": 1.5932672354161863, + "grad_norm": 1.5021365471039205, + "learning_rate": 2.179987518844645e-07, + "loss": 0.3333933651447296, + "step": 6910 + }, + { + "epoch": 1.5934978095457688, + "grad_norm": 1.652596855301234, + "learning_rate": 2.1776117741375343e-07, + "loss": 0.48857730627059937, + "step": 6911 + }, + { + "epoch": 1.5937283836753515, + "grad_norm": 1.4724322236306013, + "learning_rate": 2.1752371664938306e-07, + "loss": 0.37393617630004883, + "step": 6912 + }, + { + "epoch": 1.5939589578049342, + "grad_norm": 1.4102085657254086, + "learning_rate": 2.172863696258709e-07, + "loss": 0.5365080833435059, + "step": 6913 + }, + { + "epoch": 1.594189531934517, + "grad_norm": 1.7683912421422305, + "learning_rate": 2.1704913637771705e-07, + "loss": 0.49318936467170715, + "step": 6914 + }, + { + "epoch": 1.5944201060640997, + "grad_norm": 1.8200372673393599, + "learning_rate": 2.1681201693940666e-07, + "loss": 0.37682920694351196, + "step": 6915 + }, + { + "epoch": 1.5946506801936824, + "grad_norm": 1.4120260343966702, + "learning_rate": 2.1657501134540657e-07, + "loss": 0.4894877076148987, + "step": 6916 + }, + { + "epoch": 1.5948812543232649, + "grad_norm": 1.5895963005275906, + "learning_rate": 2.1633811963016869e-07, + "loss": 0.4200783967971802, + "step": 6917 + }, + { + "epoch": 1.5951118284528476, + "grad_norm": 1.7361608161591027, + "learning_rate": 2.1610134182812702e-07, + "loss": 0.3953052759170532, + "step": 6918 + }, + { + "epoch": 1.59534240258243, + "grad_norm": 1.4727518091374385, + "learning_rate": 2.158646779736999e-07, + "loss": 0.4006558656692505, + "step": 6919 + }, + { + "epoch": 1.5955729767120128, + "grad_norm": 1.7355475804217702, + "learning_rate": 2.1562812810128906e-07, + "loss": 0.3749210238456726, + "step": 6920 + }, + { + "epoch": 1.5958035508415955, + "grad_norm": 1.5378158592599445, + "learning_rate": 2.1539169224527887e-07, + "loss": 0.4688538610935211, + "step": 6921 + }, + { + "epoch": 1.5960341249711782, + "grad_norm": 1.590308500795848, + "learning_rate": 2.151553704400383e-07, + "loss": 0.4483727216720581, + "step": 6922 + }, + { + "epoch": 1.596264699100761, + "grad_norm": 1.589431373760787, + "learning_rate": 2.149191627199185e-07, + "loss": 0.5118253827095032, + "step": 6923 + }, + { + "epoch": 1.5964952732303437, + "grad_norm": 1.644731800905039, + "learning_rate": 2.1468306911925525e-07, + "loss": 0.43641170859336853, + "step": 6924 + }, + { + "epoch": 1.5967258473599262, + "grad_norm": 1.4755114053374785, + "learning_rate": 2.1444708967236657e-07, + "loss": 0.38253384828567505, + "step": 6925 + }, + { + "epoch": 1.596956421489509, + "grad_norm": 1.5638213373412855, + "learning_rate": 2.1421122441355476e-07, + "loss": 0.43674635887145996, + "step": 6926 + }, + { + "epoch": 1.5971869956190914, + "grad_norm": 1.3940207891491625, + "learning_rate": 2.1397547337710519e-07, + "loss": 0.37392908334732056, + "step": 6927 + }, + { + "epoch": 1.597417569748674, + "grad_norm": 1.5097907813025324, + "learning_rate": 2.13739836597287e-07, + "loss": 0.4531250298023224, + "step": 6928 + }, + { + "epoch": 1.5976481438782568, + "grad_norm": 1.3308296891253455, + "learning_rate": 2.13504314108352e-07, + "loss": 0.38579899072647095, + "step": 6929 + }, + { + "epoch": 1.5978787180078395, + "grad_norm": 1.8618083111554995, + "learning_rate": 2.1326890594453563e-07, + "loss": 0.5215288400650024, + "step": 6930 + }, + { + "epoch": 1.5981092921374223, + "grad_norm": 1.6019249166669218, + "learning_rate": 2.130336121400572e-07, + "loss": 0.4396743178367615, + "step": 6931 + }, + { + "epoch": 1.598339866267005, + "grad_norm": 1.5371889029106374, + "learning_rate": 2.127984327291188e-07, + "loss": 0.5068432688713074, + "step": 6932 + }, + { + "epoch": 1.5985704403965875, + "grad_norm": 1.7855756215277538, + "learning_rate": 2.1256336774590643e-07, + "loss": 0.48809194564819336, + "step": 6933 + }, + { + "epoch": 1.5988010145261702, + "grad_norm": 1.4166815561679078, + "learning_rate": 2.123284172245885e-07, + "loss": 0.4191613793373108, + "step": 6934 + }, + { + "epoch": 1.5990315886557527, + "grad_norm": 1.5763678308245206, + "learning_rate": 2.1209358119931843e-07, + "loss": 0.41901010274887085, + "step": 6935 + }, + { + "epoch": 1.5992621627853354, + "grad_norm": 1.8296822391624505, + "learning_rate": 2.1185885970423133e-07, + "loss": 0.5046913623809814, + "step": 6936 + }, + { + "epoch": 1.5994927369149181, + "grad_norm": 2.1559492699976492, + "learning_rate": 2.1162425277344675e-07, + "loss": 0.5113730430603027, + "step": 6937 + }, + { + "epoch": 1.5997233110445008, + "grad_norm": 1.520077424866564, + "learning_rate": 2.1138976044106672e-07, + "loss": 0.34129637479782104, + "step": 6938 + }, + { + "epoch": 1.5999538851740835, + "grad_norm": 1.5890047902961466, + "learning_rate": 2.1115538274117762e-07, + "loss": 0.4492289423942566, + "step": 6939 + }, + { + "epoch": 1.6001844593036663, + "grad_norm": 1.5532375131614289, + "learning_rate": 2.1092111970784833e-07, + "loss": 0.41002708673477173, + "step": 6940 + }, + { + "epoch": 1.6004150334332488, + "grad_norm": 1.887817008406582, + "learning_rate": 2.1068697137513113e-07, + "loss": 0.5444740056991577, + "step": 6941 + }, + { + "epoch": 1.6006456075628315, + "grad_norm": 1.518981510824895, + "learning_rate": 2.1045293777706196e-07, + "loss": 0.3489699959754944, + "step": 6942 + }, + { + "epoch": 1.600876181692414, + "grad_norm": 1.5115486172446684, + "learning_rate": 2.1021901894766025e-07, + "loss": 0.41807419061660767, + "step": 6943 + }, + { + "epoch": 1.6011067558219967, + "grad_norm": 1.7376028221450257, + "learning_rate": 2.0998521492092857e-07, + "loss": 0.41074657440185547, + "step": 6944 + }, + { + "epoch": 1.6013373299515794, + "grad_norm": 1.370751011576157, + "learning_rate": 2.097515257308521e-07, + "loss": 0.4085312485694885, + "step": 6945 + }, + { + "epoch": 1.6015679040811621, + "grad_norm": 1.6632563260665783, + "learning_rate": 2.095179514114006e-07, + "loss": 0.42699170112609863, + "step": 6946 + }, + { + "epoch": 1.6017984782107448, + "grad_norm": 1.6347540938108835, + "learning_rate": 2.0928449199652597e-07, + "loss": 0.40041583776474, + "step": 6947 + }, + { + "epoch": 1.6020290523403276, + "grad_norm": 1.385214375087801, + "learning_rate": 2.090511475201643e-07, + "loss": 0.47465208172798157, + "step": 6948 + }, + { + "epoch": 1.60225962646991, + "grad_norm": 1.5233208405026366, + "learning_rate": 2.0881791801623405e-07, + "loss": 0.4338058829307556, + "step": 6949 + }, + { + "epoch": 1.6024902005994928, + "grad_norm": 1.857588116409586, + "learning_rate": 2.0858480351863794e-07, + "loss": 0.5398772954940796, + "step": 6950 + }, + { + "epoch": 1.6027207747290753, + "grad_norm": 1.41461865858101, + "learning_rate": 2.0835180406126151e-07, + "loss": 0.40750259160995483, + "step": 6951 + }, + { + "epoch": 1.602951348858658, + "grad_norm": 1.6330208123854022, + "learning_rate": 2.0811891967797336e-07, + "loss": 0.4365716278553009, + "step": 6952 + }, + { + "epoch": 1.6031819229882407, + "grad_norm": 1.395812913626374, + "learning_rate": 2.078861504026258e-07, + "loss": 0.41537174582481384, + "step": 6953 + }, + { + "epoch": 1.6034124971178234, + "grad_norm": 1.331855885968294, + "learning_rate": 2.0765349626905394e-07, + "loss": 0.3687853217124939, + "step": 6954 + }, + { + "epoch": 1.6036430712474061, + "grad_norm": 1.4291699726024594, + "learning_rate": 2.074209573110769e-07, + "loss": 0.48866790533065796, + "step": 6955 + }, + { + "epoch": 1.6038736453769888, + "grad_norm": 1.7541297686576787, + "learning_rate": 2.0718853356249588e-07, + "loss": 0.4618760347366333, + "step": 6956 + }, + { + "epoch": 1.6041042195065713, + "grad_norm": 1.820272898606224, + "learning_rate": 2.0695622505709654e-07, + "loss": 0.365873247385025, + "step": 6957 + }, + { + "epoch": 1.604334793636154, + "grad_norm": 1.7127779412462347, + "learning_rate": 2.0672403182864706e-07, + "loss": 0.4346495270729065, + "step": 6958 + }, + { + "epoch": 1.6045653677657365, + "grad_norm": 1.4385774019168192, + "learning_rate": 2.0649195391089935e-07, + "loss": 0.3995724618434906, + "step": 6959 + }, + { + "epoch": 1.6047959418953193, + "grad_norm": 1.890499669463449, + "learning_rate": 2.062599913375882e-07, + "loss": 0.4628515839576721, + "step": 6960 + }, + { + "epoch": 1.605026516024902, + "grad_norm": 1.8491035226730044, + "learning_rate": 2.060281441424314e-07, + "loss": 0.39776262640953064, + "step": 6961 + }, + { + "epoch": 1.6052570901544847, + "grad_norm": 1.6838333142700899, + "learning_rate": 2.057964123591307e-07, + "loss": 0.4622994065284729, + "step": 6962 + }, + { + "epoch": 1.6054876642840674, + "grad_norm": 1.3806987670969462, + "learning_rate": 2.0556479602137033e-07, + "loss": 0.4028933048248291, + "step": 6963 + }, + { + "epoch": 1.6057182384136501, + "grad_norm": 1.592137730506949, + "learning_rate": 2.0533329516281838e-07, + "loss": 0.46639660000801086, + "step": 6964 + }, + { + "epoch": 1.6059488125432326, + "grad_norm": 1.3243378898371028, + "learning_rate": 2.0510190981712537e-07, + "loss": 0.4063863158226013, + "step": 6965 + }, + { + "epoch": 1.6061793866728153, + "grad_norm": 1.6927530193908227, + "learning_rate": 2.0487064001792586e-07, + "loss": 0.471376895904541, + "step": 6966 + }, + { + "epoch": 1.6064099608023978, + "grad_norm": 1.5262354616100662, + "learning_rate": 2.0463948579883727e-07, + "loss": 0.5094102025032043, + "step": 6967 + }, + { + "epoch": 1.6066405349319806, + "grad_norm": 1.613731344454896, + "learning_rate": 2.0440844719346039e-07, + "loss": 0.3922441005706787, + "step": 6968 + }, + { + "epoch": 1.6068711090615633, + "grad_norm": 1.7524315605420397, + "learning_rate": 2.0417752423537882e-07, + "loss": 0.47777149081230164, + "step": 6969 + }, + { + "epoch": 1.607101683191146, + "grad_norm": 2.2487851564601065, + "learning_rate": 2.0394671695815924e-07, + "loss": 0.5780138969421387, + "step": 6970 + }, + { + "epoch": 1.6073322573207287, + "grad_norm": 1.6028588432287403, + "learning_rate": 2.0371602539535237e-07, + "loss": 0.43968862295150757, + "step": 6971 + }, + { + "epoch": 1.6075628314503114, + "grad_norm": 1.877374036184133, + "learning_rate": 2.0348544958049096e-07, + "loss": 0.5204722881317139, + "step": 6972 + }, + { + "epoch": 1.607793405579894, + "grad_norm": 1.5207193577135807, + "learning_rate": 2.0325498954709198e-07, + "loss": 0.3944805860519409, + "step": 6973 + }, + { + "epoch": 1.6080239797094766, + "grad_norm": 1.454235622222141, + "learning_rate": 2.0302464532865505e-07, + "loss": 0.42686349153518677, + "step": 6974 + }, + { + "epoch": 1.6082545538390591, + "grad_norm": 1.5958289830519565, + "learning_rate": 2.027944169586633e-07, + "loss": 0.3860762119293213, + "step": 6975 + }, + { + "epoch": 1.6084851279686418, + "grad_norm": 1.880005605643703, + "learning_rate": 2.0256430447058215e-07, + "loss": 0.5570458769798279, + "step": 6976 + }, + { + "epoch": 1.6087157020982246, + "grad_norm": 1.8351241687154358, + "learning_rate": 2.0233430789786132e-07, + "loss": 0.4556728005409241, + "step": 6977 + }, + { + "epoch": 1.6089462762278073, + "grad_norm": 1.4746534507162423, + "learning_rate": 2.0210442727393285e-07, + "loss": 0.48365700244903564, + "step": 6978 + }, + { + "epoch": 1.60917685035739, + "grad_norm": 1.7835628524046172, + "learning_rate": 2.018746626322124e-07, + "loss": 0.4456971287727356, + "step": 6979 + }, + { + "epoch": 1.6094074244869727, + "grad_norm": 1.6700237073697568, + "learning_rate": 2.0164501400609835e-07, + "loss": 0.41877123713493347, + "step": 6980 + }, + { + "epoch": 1.6096379986165552, + "grad_norm": 1.3803715462197303, + "learning_rate": 2.0141548142897246e-07, + "loss": 0.4073547124862671, + "step": 6981 + }, + { + "epoch": 1.609868572746138, + "grad_norm": 1.5181775501419725, + "learning_rate": 2.0118606493420021e-07, + "loss": 0.4987693727016449, + "step": 6982 + }, + { + "epoch": 1.6100991468757204, + "grad_norm": 1.603543806365415, + "learning_rate": 2.0095676455512878e-07, + "loss": 0.4391751289367676, + "step": 6983 + }, + { + "epoch": 1.6103297210053031, + "grad_norm": 1.4062982467603231, + "learning_rate": 2.0072758032508996e-07, + "loss": 0.409262478351593, + "step": 6984 + }, + { + "epoch": 1.6105602951348859, + "grad_norm": 1.353394057864669, + "learning_rate": 2.0049851227739744e-07, + "loss": 0.38653457164764404, + "step": 6985 + }, + { + "epoch": 1.6107908692644686, + "grad_norm": 1.9189325963312815, + "learning_rate": 2.0026956044534914e-07, + "loss": 0.4824348986148834, + "step": 6986 + }, + { + "epoch": 1.6110214433940513, + "grad_norm": 1.7037748706735498, + "learning_rate": 2.00040724862225e-07, + "loss": 0.45774850249290466, + "step": 6987 + }, + { + "epoch": 1.611252017523634, + "grad_norm": 1.5419477618151842, + "learning_rate": 1.9981200556128906e-07, + "loss": 0.45437830686569214, + "step": 6988 + }, + { + "epoch": 1.6114825916532165, + "grad_norm": 1.4581568342693196, + "learning_rate": 1.9958340257578753e-07, + "loss": 0.4563155770301819, + "step": 6989 + }, + { + "epoch": 1.6117131657827992, + "grad_norm": 1.7363246075229848, + "learning_rate": 1.9935491593895048e-07, + "loss": 0.5786794424057007, + "step": 6990 + }, + { + "epoch": 1.6119437399123817, + "grad_norm": 1.6120161181322603, + "learning_rate": 1.991265456839909e-07, + "loss": 0.5290218591690063, + "step": 6991 + }, + { + "epoch": 1.6121743140419644, + "grad_norm": 1.607774677113548, + "learning_rate": 1.9889829184410434e-07, + "loss": 0.3456650376319885, + "step": 6992 + }, + { + "epoch": 1.6124048881715471, + "grad_norm": 1.414142582496391, + "learning_rate": 1.9867015445247015e-07, + "loss": 0.40869832038879395, + "step": 6993 + }, + { + "epoch": 1.6126354623011299, + "grad_norm": 2.3563881452147992, + "learning_rate": 1.9844213354225004e-07, + "loss": 0.49926644563674927, + "step": 6994 + }, + { + "epoch": 1.6128660364307126, + "grad_norm": 1.904270429684393, + "learning_rate": 1.9821422914658957e-07, + "loss": 0.4874018132686615, + "step": 6995 + }, + { + "epoch": 1.6130966105602953, + "grad_norm": 1.872252891476363, + "learning_rate": 1.9798644129861654e-07, + "loss": 0.4228810667991638, + "step": 6996 + }, + { + "epoch": 1.6133271846898778, + "grad_norm": 1.4437194678200662, + "learning_rate": 1.9775877003144237e-07, + "loss": 0.4309043884277344, + "step": 6997 + }, + { + "epoch": 1.6135577588194605, + "grad_norm": 1.6133739556944033, + "learning_rate": 1.9753121537816142e-07, + "loss": 0.3917756676673889, + "step": 6998 + }, + { + "epoch": 1.613788332949043, + "grad_norm": 1.492105866056543, + "learning_rate": 1.9730377737185145e-07, + "loss": 0.4074435830116272, + "step": 6999 + }, + { + "epoch": 1.6140189070786257, + "grad_norm": 1.7474889804918834, + "learning_rate": 1.9707645604557243e-07, + "loss": 0.4581322968006134, + "step": 7000 + }, + { + "epoch": 1.6142494812082084, + "grad_norm": 1.5240615238309698, + "learning_rate": 1.9684925143236776e-07, + "loss": 0.4479151666164398, + "step": 7001 + }, + { + "epoch": 1.6144800553377912, + "grad_norm": 1.4379805154063257, + "learning_rate": 1.966221635652643e-07, + "loss": 0.3378838300704956, + "step": 7002 + }, + { + "epoch": 1.6147106294673739, + "grad_norm": 1.6755517427089033, + "learning_rate": 1.96395192477271e-07, + "loss": 0.3383278250694275, + "step": 7003 + }, + { + "epoch": 1.6149412035969566, + "grad_norm": 1.5430108527415651, + "learning_rate": 1.9616833820138091e-07, + "loss": 0.5164717435836792, + "step": 7004 + }, + { + "epoch": 1.615171777726539, + "grad_norm": 1.6927378959186403, + "learning_rate": 1.9594160077056932e-07, + "loss": 0.4548792243003845, + "step": 7005 + }, + { + "epoch": 1.6154023518561216, + "grad_norm": 1.608730816141968, + "learning_rate": 1.9571498021779531e-07, + "loss": 0.41074928641319275, + "step": 7006 + }, + { + "epoch": 1.6156329259857043, + "grad_norm": 1.5384399915677613, + "learning_rate": 1.9548847657599976e-07, + "loss": 0.4156193137168884, + "step": 7007 + }, + { + "epoch": 1.615863500115287, + "grad_norm": 1.742725966102226, + "learning_rate": 1.95262089878108e-07, + "loss": 0.4602770209312439, + "step": 7008 + }, + { + "epoch": 1.6160940742448697, + "grad_norm": 1.5880816009582301, + "learning_rate": 1.9503582015702713e-07, + "loss": 0.4911346733570099, + "step": 7009 + }, + { + "epoch": 1.6163246483744524, + "grad_norm": 1.5007140709934312, + "learning_rate": 1.9480966744564764e-07, + "loss": 0.394087553024292, + "step": 7010 + }, + { + "epoch": 1.6165552225040352, + "grad_norm": 1.5836059389854649, + "learning_rate": 1.9458363177684367e-07, + "loss": 0.4845706820487976, + "step": 7011 + }, + { + "epoch": 1.6167857966336177, + "grad_norm": 1.7088454795128305, + "learning_rate": 1.9435771318347116e-07, + "loss": 0.49142736196517944, + "step": 7012 + }, + { + "epoch": 1.6170163707632004, + "grad_norm": 1.3798831769041013, + "learning_rate": 1.9413191169836996e-07, + "loss": 0.4408283829689026, + "step": 7013 + }, + { + "epoch": 1.6172469448927829, + "grad_norm": 1.6476950016993046, + "learning_rate": 1.9390622735436268e-07, + "loss": 0.6088640689849854, + "step": 7014 + }, + { + "epoch": 1.6174775190223656, + "grad_norm": 1.912745817268737, + "learning_rate": 1.93680660184255e-07, + "loss": 0.5208842158317566, + "step": 7015 + }, + { + "epoch": 1.6177080931519483, + "grad_norm": 1.7742607180865566, + "learning_rate": 1.9345521022083488e-07, + "loss": 0.5652821660041809, + "step": 7016 + }, + { + "epoch": 1.617938667281531, + "grad_norm": 1.5895189074949856, + "learning_rate": 1.9322987749687437e-07, + "loss": 0.4861832857131958, + "step": 7017 + }, + { + "epoch": 1.6181692414111137, + "grad_norm": 1.5693969535816144, + "learning_rate": 1.930046620451272e-07, + "loss": 0.39583832025527954, + "step": 7018 + }, + { + "epoch": 1.6183998155406965, + "grad_norm": 1.6283824576887038, + "learning_rate": 1.927795638983313e-07, + "loss": 0.5638653039932251, + "step": 7019 + }, + { + "epoch": 1.618630389670279, + "grad_norm": 1.7595661530223012, + "learning_rate": 1.9255458308920648e-07, + "loss": 0.4737275242805481, + "step": 7020 + }, + { + "epoch": 1.6188609637998617, + "grad_norm": 1.3807112997659796, + "learning_rate": 1.923297196504563e-07, + "loss": 0.4526802897453308, + "step": 7021 + }, + { + "epoch": 1.6190915379294442, + "grad_norm": 1.5519742811018764, + "learning_rate": 1.9210497361476708e-07, + "loss": 0.40800565481185913, + "step": 7022 + }, + { + "epoch": 1.6193221120590269, + "grad_norm": 1.3169867108502276, + "learning_rate": 1.9188034501480744e-07, + "loss": 0.39532414078712463, + "step": 7023 + }, + { + "epoch": 1.6195526861886096, + "grad_norm": 1.3982522966659368, + "learning_rate": 1.9165583388322993e-07, + "loss": 0.40236538648605347, + "step": 7024 + }, + { + "epoch": 1.6197832603181923, + "grad_norm": 1.4838960013292628, + "learning_rate": 1.91431440252669e-07, + "loss": 0.4421047866344452, + "step": 7025 + }, + { + "epoch": 1.620013834447775, + "grad_norm": 1.5688320926864374, + "learning_rate": 1.9120716415574322e-07, + "loss": 0.4149084687232971, + "step": 7026 + }, + { + "epoch": 1.6202444085773577, + "grad_norm": 1.8747733544619556, + "learning_rate": 1.9098300562505264e-07, + "loss": 0.4186127185821533, + "step": 7027 + }, + { + "epoch": 1.6204749827069402, + "grad_norm": 1.5276498671204974, + "learning_rate": 1.9075896469318132e-07, + "loss": 0.4649406671524048, + "step": 7028 + }, + { + "epoch": 1.620705556836523, + "grad_norm": 1.5217002126023946, + "learning_rate": 1.9053504139269593e-07, + "loss": 0.43240052461624146, + "step": 7029 + }, + { + "epoch": 1.6209361309661054, + "grad_norm": 1.7731525747902717, + "learning_rate": 1.9031123575614628e-07, + "loss": 0.4874862730503082, + "step": 7030 + }, + { + "epoch": 1.6211667050956882, + "grad_norm": 1.6133636879972175, + "learning_rate": 1.900875478160644e-07, + "loss": 0.3771815896034241, + "step": 7031 + }, + { + "epoch": 1.6213972792252709, + "grad_norm": 1.548316338784864, + "learning_rate": 1.898639776049653e-07, + "loss": 0.49882376194000244, + "step": 7032 + }, + { + "epoch": 1.6216278533548536, + "grad_norm": 1.5189621230999546, + "learning_rate": 1.896405251553479e-07, + "loss": 0.3813830614089966, + "step": 7033 + }, + { + "epoch": 1.6218584274844363, + "grad_norm": 1.588790821712345, + "learning_rate": 1.8941719049969272e-07, + "loss": 0.41883599758148193, + "step": 7034 + }, + { + "epoch": 1.622089001614019, + "grad_norm": 1.4271058877816405, + "learning_rate": 1.8919397367046409e-07, + "loss": 0.42194586992263794, + "step": 7035 + }, + { + "epoch": 1.6223195757436015, + "grad_norm": 1.5957469997065072, + "learning_rate": 1.889708747001084e-07, + "loss": 0.36967700719833374, + "step": 7036 + }, + { + "epoch": 1.6225501498731842, + "grad_norm": 1.4373460175753532, + "learning_rate": 1.887478936210556e-07, + "loss": 0.4493946433067322, + "step": 7037 + }, + { + "epoch": 1.6227807240027667, + "grad_norm": 1.6526676224310628, + "learning_rate": 1.8852503046571833e-07, + "loss": 0.42121458053588867, + "step": 7038 + }, + { + "epoch": 1.6230112981323495, + "grad_norm": 1.430632776113786, + "learning_rate": 1.8830228526649207e-07, + "loss": 0.4529588222503662, + "step": 7039 + }, + { + "epoch": 1.6232418722619322, + "grad_norm": 1.537552702708545, + "learning_rate": 1.88079658055755e-07, + "loss": 0.387844443321228, + "step": 7040 + }, + { + "epoch": 1.623472446391515, + "grad_norm": 1.4872655198554567, + "learning_rate": 1.8785714886586802e-07, + "loss": 0.49954158067703247, + "step": 7041 + }, + { + "epoch": 1.6237030205210976, + "grad_norm": 1.3845875929093436, + "learning_rate": 1.8763475772917548e-07, + "loss": 0.4016296863555908, + "step": 7042 + }, + { + "epoch": 1.6239335946506803, + "grad_norm": 1.5208389143205874, + "learning_rate": 1.8741248467800362e-07, + "loss": 0.358657568693161, + "step": 7043 + }, + { + "epoch": 1.6241641687802628, + "grad_norm": 1.471037478852436, + "learning_rate": 1.8719032974466264e-07, + "loss": 0.434385746717453, + "step": 7044 + }, + { + "epoch": 1.6243947429098455, + "grad_norm": 1.4705602216948914, + "learning_rate": 1.8696829296144466e-07, + "loss": 0.4658992886543274, + "step": 7045 + }, + { + "epoch": 1.624625317039428, + "grad_norm": 1.8724382429627917, + "learning_rate": 1.8674637436062545e-07, + "loss": 0.5438188910484314, + "step": 7046 + }, + { + "epoch": 1.6248558911690107, + "grad_norm": 1.9024479318941907, + "learning_rate": 1.8652457397446254e-07, + "loss": 0.47364577651023865, + "step": 7047 + }, + { + "epoch": 1.6250864652985935, + "grad_norm": 1.386287471529149, + "learning_rate": 1.8630289183519733e-07, + "loss": 0.3664509654045105, + "step": 7048 + }, + { + "epoch": 1.6253170394281762, + "grad_norm": 1.5676786934992741, + "learning_rate": 1.8608132797505317e-07, + "loss": 0.4226282835006714, + "step": 7049 + }, + { + "epoch": 1.625547613557759, + "grad_norm": 1.4581751590991685, + "learning_rate": 1.8585988242623706e-07, + "loss": 0.47477972507476807, + "step": 7050 + }, + { + "epoch": 1.6257781876873416, + "grad_norm": 2.082606809210874, + "learning_rate": 1.8563855522093786e-07, + "loss": 0.5372269749641418, + "step": 7051 + }, + { + "epoch": 1.626008761816924, + "grad_norm": 1.3565872618977541, + "learning_rate": 1.8541734639132788e-07, + "loss": 0.37929385900497437, + "step": 7052 + }, + { + "epoch": 1.6262393359465068, + "grad_norm": 1.5119164625864447, + "learning_rate": 1.8519625596956244e-07, + "loss": 0.4029538631439209, + "step": 7053 + }, + { + "epoch": 1.6264699100760893, + "grad_norm": 1.5739338248608081, + "learning_rate": 1.8497528398777874e-07, + "loss": 0.3932439982891083, + "step": 7054 + }, + { + "epoch": 1.626700484205672, + "grad_norm": 1.5806776566898322, + "learning_rate": 1.847544304780978e-07, + "loss": 0.45190152525901794, + "step": 7055 + }, + { + "epoch": 1.6269310583352548, + "grad_norm": 1.8629994959724827, + "learning_rate": 1.8453369547262242e-07, + "loss": 0.4852195382118225, + "step": 7056 + }, + { + "epoch": 1.6271616324648375, + "grad_norm": 1.608209634523461, + "learning_rate": 1.8431307900343918e-07, + "loss": 0.41676801443099976, + "step": 7057 + }, + { + "epoch": 1.6273922065944202, + "grad_norm": 1.388166685170728, + "learning_rate": 1.8409258110261626e-07, + "loss": 0.44374561309814453, + "step": 7058 + }, + { + "epoch": 1.627622780724003, + "grad_norm": 1.5975340281654677, + "learning_rate": 1.838722018022061e-07, + "loss": 0.4348192811012268, + "step": 7059 + }, + { + "epoch": 1.6278533548535854, + "grad_norm": 1.626194256762104, + "learning_rate": 1.836519411342422e-07, + "loss": 0.46572640538215637, + "step": 7060 + }, + { + "epoch": 1.6280839289831681, + "grad_norm": 1.4985871084379754, + "learning_rate": 1.8343179913074214e-07, + "loss": 0.4633631408214569, + "step": 7061 + }, + { + "epoch": 1.6283145031127506, + "grad_norm": 1.3260867645697678, + "learning_rate": 1.8321177582370605e-07, + "loss": 0.44420552253723145, + "step": 7062 + }, + { + "epoch": 1.6285450772423333, + "grad_norm": 1.8207040168707305, + "learning_rate": 1.8299187124511594e-07, + "loss": 0.5628370046615601, + "step": 7063 + }, + { + "epoch": 1.628775651371916, + "grad_norm": 1.7448936691285617, + "learning_rate": 1.8277208542693778e-07, + "loss": 0.5342314839363098, + "step": 7064 + }, + { + "epoch": 1.6290062255014988, + "grad_norm": 1.529076197622531, + "learning_rate": 1.82552418401119e-07, + "loss": 0.440934419631958, + "step": 7065 + }, + { + "epoch": 1.6292367996310815, + "grad_norm": 1.4532572456773438, + "learning_rate": 1.823328701995912e-07, + "loss": 0.45218637585639954, + "step": 7066 + }, + { + "epoch": 1.6294673737606642, + "grad_norm": 1.456173637640115, + "learning_rate": 1.8211344085426716e-07, + "loss": 0.4059211015701294, + "step": 7067 + }, + { + "epoch": 1.6296979478902467, + "grad_norm": 2.0474805024349876, + "learning_rate": 1.818941303970435e-07, + "loss": 0.5036444067955017, + "step": 7068 + }, + { + "epoch": 1.6299285220198294, + "grad_norm": 1.6421868165266436, + "learning_rate": 1.8167493885979935e-07, + "loss": 0.5034196972846985, + "step": 7069 + }, + { + "epoch": 1.630159096149412, + "grad_norm": 1.5247456374523982, + "learning_rate": 1.8145586627439645e-07, + "loss": 0.4199259281158447, + "step": 7070 + }, + { + "epoch": 1.6303896702789946, + "grad_norm": 1.5913722133067008, + "learning_rate": 1.8123691267267915e-07, + "loss": 0.5439015626907349, + "step": 7071 + }, + { + "epoch": 1.6306202444085773, + "grad_norm": 1.6181852234306913, + "learning_rate": 1.810180780864743e-07, + "loss": 0.4349868893623352, + "step": 7072 + }, + { + "epoch": 1.63085081853816, + "grad_norm": 1.5299206997440553, + "learning_rate": 1.807993625475921e-07, + "loss": 0.39939552545547485, + "step": 7073 + }, + { + "epoch": 1.6310813926677428, + "grad_norm": 1.575600412629914, + "learning_rate": 1.8058076608782468e-07, + "loss": 0.43073540925979614, + "step": 7074 + }, + { + "epoch": 1.6313119667973255, + "grad_norm": 1.6461603718238804, + "learning_rate": 1.8036228873894744e-07, + "loss": 0.4735824465751648, + "step": 7075 + }, + { + "epoch": 1.631542540926908, + "grad_norm": 1.466337846989889, + "learning_rate": 1.8014393053271836e-07, + "loss": 0.42971551418304443, + "step": 7076 + }, + { + "epoch": 1.6317731150564907, + "grad_norm": 1.694502155411865, + "learning_rate": 1.7992569150087823e-07, + "loss": 0.48593759536743164, + "step": 7077 + }, + { + "epoch": 1.6320036891860732, + "grad_norm": 1.55292324755966, + "learning_rate": 1.7970757167514973e-07, + "loss": 0.530194878578186, + "step": 7078 + }, + { + "epoch": 1.632234263315656, + "grad_norm": 1.7324585048939796, + "learning_rate": 1.794895710872394e-07, + "loss": 0.43393629789352417, + "step": 7079 + }, + { + "epoch": 1.6324648374452386, + "grad_norm": 1.5827349286667418, + "learning_rate": 1.7927168976883556e-07, + "loss": 0.4211798906326294, + "step": 7080 + }, + { + "epoch": 1.6326954115748213, + "grad_norm": 1.5939322533043618, + "learning_rate": 1.790539277516091e-07, + "loss": 0.39001476764678955, + "step": 7081 + }, + { + "epoch": 1.632925985704404, + "grad_norm": 1.6028280785725797, + "learning_rate": 1.788362850672146e-07, + "loss": 0.4360283613204956, + "step": 7082 + }, + { + "epoch": 1.6331565598339868, + "grad_norm": 1.6516207153980025, + "learning_rate": 1.7861876174728807e-07, + "loss": 0.47754842042922974, + "step": 7083 + }, + { + "epoch": 1.6333871339635693, + "grad_norm": 1.634690883802538, + "learning_rate": 1.7840135782344888e-07, + "loss": 0.35193490982055664, + "step": 7084 + }, + { + "epoch": 1.633617708093152, + "grad_norm": 1.2825662437681398, + "learning_rate": 1.7818407332729912e-07, + "loss": 0.39997392892837524, + "step": 7085 + }, + { + "epoch": 1.6338482822227345, + "grad_norm": 1.324570823301632, + "learning_rate": 1.7796690829042328e-07, + "loss": 0.3255331218242645, + "step": 7086 + }, + { + "epoch": 1.6340788563523172, + "grad_norm": 1.424074701555127, + "learning_rate": 1.777498627443882e-07, + "loss": 0.47072282433509827, + "step": 7087 + }, + { + "epoch": 1.6343094304819, + "grad_norm": 1.5293726959445282, + "learning_rate": 1.775329367207441e-07, + "loss": 0.4231484830379486, + "step": 7088 + }, + { + "epoch": 1.6345400046114826, + "grad_norm": 1.4406985915809287, + "learning_rate": 1.7731613025102276e-07, + "loss": 0.37112197279930115, + "step": 7089 + }, + { + "epoch": 1.6347705787410653, + "grad_norm": 1.5117815815493545, + "learning_rate": 1.7709944336673986e-07, + "loss": 0.5772623419761658, + "step": 7090 + }, + { + "epoch": 1.635001152870648, + "grad_norm": 1.4205344879838042, + "learning_rate": 1.7688287609939244e-07, + "loss": 0.45922917127609253, + "step": 7091 + }, + { + "epoch": 1.6352317270002306, + "grad_norm": 1.6262912271430976, + "learning_rate": 1.7666642848046098e-07, + "loss": 0.42784950137138367, + "step": 7092 + }, + { + "epoch": 1.6354623011298133, + "grad_norm": 1.585709168390131, + "learning_rate": 1.7645010054140873e-07, + "loss": 0.4676967263221741, + "step": 7093 + }, + { + "epoch": 1.6356928752593958, + "grad_norm": 1.4782811209898545, + "learning_rate": 1.7623389231368046e-07, + "loss": 0.434337317943573, + "step": 7094 + }, + { + "epoch": 1.6359234493889785, + "grad_norm": 1.512954791126533, + "learning_rate": 1.760178038287048e-07, + "loss": 0.4667350947856903, + "step": 7095 + }, + { + "epoch": 1.6361540235185612, + "grad_norm": 1.3397712801467159, + "learning_rate": 1.7580183511789204e-07, + "loss": 0.42233705520629883, + "step": 7096 + }, + { + "epoch": 1.636384597648144, + "grad_norm": 1.5093056460018237, + "learning_rate": 1.7558598621263565e-07, + "loss": 0.4488460421562195, + "step": 7097 + }, + { + "epoch": 1.6366151717777266, + "grad_norm": 1.6708888950919063, + "learning_rate": 1.753702571443112e-07, + "loss": 0.4264194667339325, + "step": 7098 + }, + { + "epoch": 1.6368457459073094, + "grad_norm": 1.414729354018089, + "learning_rate": 1.7515464794427715e-07, + "loss": 0.32695144414901733, + "step": 7099 + }, + { + "epoch": 1.6370763200368919, + "grad_norm": 2.0744464699438825, + "learning_rate": 1.7493915864387487e-07, + "loss": 0.3573018014431, + "step": 7100 + }, + { + "epoch": 1.6373068941664746, + "grad_norm": 1.4506197336511393, + "learning_rate": 1.7472378927442732e-07, + "loss": 0.4545198082923889, + "step": 7101 + }, + { + "epoch": 1.637537468296057, + "grad_norm": 1.59875503504847, + "learning_rate": 1.7450853986724123e-07, + "loss": 0.42589202523231506, + "step": 7102 + }, + { + "epoch": 1.6377680424256398, + "grad_norm": 1.5169081767342318, + "learning_rate": 1.742934104536048e-07, + "loss": 0.4403502345085144, + "step": 7103 + }, + { + "epoch": 1.6379986165552225, + "grad_norm": 1.7606747961526963, + "learning_rate": 1.7407840106478955e-07, + "loss": 0.4262208938598633, + "step": 7104 + }, + { + "epoch": 1.6382291906848052, + "grad_norm": 1.6000265796951778, + "learning_rate": 1.7386351173204905e-07, + "loss": 0.4706578254699707, + "step": 7105 + }, + { + "epoch": 1.638459764814388, + "grad_norm": 1.4657752166922586, + "learning_rate": 1.7364874248661986e-07, + "loss": 0.4526079297065735, + "step": 7106 + }, + { + "epoch": 1.6386903389439706, + "grad_norm": 1.7833403214487409, + "learning_rate": 1.734340933597207e-07, + "loss": 0.42836326360702515, + "step": 7107 + }, + { + "epoch": 1.6389209130735531, + "grad_norm": 1.4453465477500804, + "learning_rate": 1.7321956438255292e-07, + "loss": 0.42680823802948, + "step": 7108 + }, + { + "epoch": 1.6391514872031359, + "grad_norm": 1.3964828689114657, + "learning_rate": 1.7300515558630068e-07, + "loss": 0.38365036249160767, + "step": 7109 + }, + { + "epoch": 1.6393820613327184, + "grad_norm": 1.4748773918598719, + "learning_rate": 1.7279086700213063e-07, + "loss": 0.4153991937637329, + "step": 7110 + }, + { + "epoch": 1.639612635462301, + "grad_norm": 1.5777502702437645, + "learning_rate": 1.7257669866119163e-07, + "loss": 0.42257291078567505, + "step": 7111 + }, + { + "epoch": 1.6398432095918838, + "grad_norm": 1.7309640190055833, + "learning_rate": 1.7236265059461498e-07, + "loss": 0.34990063309669495, + "step": 7112 + }, + { + "epoch": 1.6400737837214665, + "grad_norm": 1.3939407429934887, + "learning_rate": 1.72148722833515e-07, + "loss": 0.44848760962486267, + "step": 7113 + }, + { + "epoch": 1.6403043578510492, + "grad_norm": 1.4649667660689574, + "learning_rate": 1.7193491540898808e-07, + "loss": 0.4649186134338379, + "step": 7114 + }, + { + "epoch": 1.640534931980632, + "grad_norm": 1.5050161434573055, + "learning_rate": 1.7172122835211333e-07, + "loss": 0.480952650308609, + "step": 7115 + }, + { + "epoch": 1.6407655061102144, + "grad_norm": 1.6101365826637175, + "learning_rate": 1.7150766169395235e-07, + "loss": 0.4669501483440399, + "step": 7116 + }, + { + "epoch": 1.6409960802397972, + "grad_norm": 1.486994174732026, + "learning_rate": 1.7129421546554957e-07, + "loss": 0.4273250102996826, + "step": 7117 + }, + { + "epoch": 1.6412266543693796, + "grad_norm": 1.8106380448833757, + "learning_rate": 1.71080889697931e-07, + "loss": 0.47923076152801514, + "step": 7118 + }, + { + "epoch": 1.6414572284989624, + "grad_norm": 1.5033931180120297, + "learning_rate": 1.708676844221061e-07, + "loss": 0.42801159620285034, + "step": 7119 + }, + { + "epoch": 1.641687802628545, + "grad_norm": 1.4792875147029159, + "learning_rate": 1.7065459966906636e-07, + "loss": 0.39929044246673584, + "step": 7120 + }, + { + "epoch": 1.6419183767581278, + "grad_norm": 1.4727601001923896, + "learning_rate": 1.7044163546978553e-07, + "loss": 0.4919764995574951, + "step": 7121 + }, + { + "epoch": 1.6421489508877105, + "grad_norm": 1.5018740505050776, + "learning_rate": 1.702287918552202e-07, + "loss": 0.45943617820739746, + "step": 7122 + }, + { + "epoch": 1.642379525017293, + "grad_norm": 1.5202994857697039, + "learning_rate": 1.7001606885630948e-07, + "loss": 0.48078954219818115, + "step": 7123 + }, + { + "epoch": 1.6426100991468757, + "grad_norm": 1.406204806461001, + "learning_rate": 1.6980346650397505e-07, + "loss": 0.4217113256454468, + "step": 7124 + }, + { + "epoch": 1.6428406732764582, + "grad_norm": 1.479814078881505, + "learning_rate": 1.6959098482912037e-07, + "loss": 0.4643937051296234, + "step": 7125 + }, + { + "epoch": 1.643071247406041, + "grad_norm": 1.6157838326637273, + "learning_rate": 1.6937862386263212e-07, + "loss": 0.43977001309394836, + "step": 7126 + }, + { + "epoch": 1.6433018215356237, + "grad_norm": 1.4653862858165947, + "learning_rate": 1.6916638363537882e-07, + "loss": 0.3872392177581787, + "step": 7127 + }, + { + "epoch": 1.6435323956652064, + "grad_norm": 1.4668608493131068, + "learning_rate": 1.6895426417821213e-07, + "loss": 0.44625502824783325, + "step": 7128 + }, + { + "epoch": 1.643762969794789, + "grad_norm": 1.6445652935798991, + "learning_rate": 1.6874226552196523e-07, + "loss": 0.36836186051368713, + "step": 7129 + }, + { + "epoch": 1.6439935439243718, + "grad_norm": 1.5181829131466213, + "learning_rate": 1.6853038769745465e-07, + "loss": 0.35491907596588135, + "step": 7130 + }, + { + "epoch": 1.6442241180539543, + "grad_norm": 1.5107933584098798, + "learning_rate": 1.6831863073547913e-07, + "loss": 0.5210527181625366, + "step": 7131 + }, + { + "epoch": 1.644454692183537, + "grad_norm": 1.5854667470103982, + "learning_rate": 1.6810699466681932e-07, + "loss": 0.3805693984031677, + "step": 7132 + }, + { + "epoch": 1.6446852663131195, + "grad_norm": 1.8089883418272688, + "learning_rate": 1.6789547952223893e-07, + "loss": 0.5768346786499023, + "step": 7133 + }, + { + "epoch": 1.6449158404427022, + "grad_norm": 1.8423402992377882, + "learning_rate": 1.6768408533248356e-07, + "loss": 0.46465635299682617, + "step": 7134 + }, + { + "epoch": 1.645146414572285, + "grad_norm": 1.8710111931219464, + "learning_rate": 1.674728121282819e-07, + "loss": 0.43119215965270996, + "step": 7135 + }, + { + "epoch": 1.6453769887018677, + "grad_norm": 1.4436891948188744, + "learning_rate": 1.6726165994034402e-07, + "loss": 0.42814093828201294, + "step": 7136 + }, + { + "epoch": 1.6456075628314504, + "grad_norm": 1.5822684467576347, + "learning_rate": 1.6705062879936382e-07, + "loss": 0.41762328147888184, + "step": 7137 + }, + { + "epoch": 1.645838136961033, + "grad_norm": 2.059560914873905, + "learning_rate": 1.668397187360161e-07, + "loss": 0.42717012763023376, + "step": 7138 + }, + { + "epoch": 1.6460687110906156, + "grad_norm": 1.3692759576709286, + "learning_rate": 1.666289297809591e-07, + "loss": 0.37660926580429077, + "step": 7139 + }, + { + "epoch": 1.6462992852201983, + "grad_norm": 1.689926156627043, + "learning_rate": 1.664182619648331e-07, + "loss": 0.3905887007713318, + "step": 7140 + }, + { + "epoch": 1.6465298593497808, + "grad_norm": 1.5648955881343065, + "learning_rate": 1.6620771531826117e-07, + "loss": 0.4848547577857971, + "step": 7141 + }, + { + "epoch": 1.6467604334793635, + "grad_norm": 1.5642509939041707, + "learning_rate": 1.659972898718479e-07, + "loss": 0.37895438075065613, + "step": 7142 + }, + { + "epoch": 1.6469910076089462, + "grad_norm": 1.6050388867308452, + "learning_rate": 1.6578698565618075e-07, + "loss": 0.46770527958869934, + "step": 7143 + }, + { + "epoch": 1.647221581738529, + "grad_norm": 1.705579614415488, + "learning_rate": 1.6557680270182995e-07, + "loss": 0.44138044118881226, + "step": 7144 + }, + { + "epoch": 1.6474521558681117, + "grad_norm": 1.7922951246817975, + "learning_rate": 1.6536674103934734e-07, + "loss": 0.3681126832962036, + "step": 7145 + }, + { + "epoch": 1.6476827299976944, + "grad_norm": 1.454313444949356, + "learning_rate": 1.651568006992675e-07, + "loss": 0.4410884380340576, + "step": 7146 + }, + { + "epoch": 1.6479133041272769, + "grad_norm": 1.444668904765709, + "learning_rate": 1.6494698171210776e-07, + "loss": 0.4161960482597351, + "step": 7147 + }, + { + "epoch": 1.6481438782568596, + "grad_norm": 1.6873012096950248, + "learning_rate": 1.647372841083674e-07, + "loss": 0.4912784695625305, + "step": 7148 + }, + { + "epoch": 1.648374452386442, + "grad_norm": 1.8457570973340096, + "learning_rate": 1.6452770791852766e-07, + "loss": 0.5137985944747925, + "step": 7149 + }, + { + "epoch": 1.6486050265160248, + "grad_norm": 1.845102008062213, + "learning_rate": 1.6431825317305303e-07, + "loss": 0.43644070625305176, + "step": 7150 + }, + { + "epoch": 1.6488356006456075, + "grad_norm": 1.508191131690363, + "learning_rate": 1.6410891990238973e-07, + "loss": 0.4319378733634949, + "step": 7151 + }, + { + "epoch": 1.6490661747751902, + "grad_norm": 1.6137067673031091, + "learning_rate": 1.6389970813696619e-07, + "loss": 0.474090039730072, + "step": 7152 + }, + { + "epoch": 1.649296748904773, + "grad_norm": 1.656766330100741, + "learning_rate": 1.6369061790719375e-07, + "loss": 0.40291503071784973, + "step": 7153 + }, + { + "epoch": 1.6495273230343557, + "grad_norm": 1.5434308580585603, + "learning_rate": 1.6348164924346562e-07, + "loss": 0.51482754945755, + "step": 7154 + }, + { + "epoch": 1.6497578971639382, + "grad_norm": 1.421069671161851, + "learning_rate": 1.632728021761579e-07, + "loss": 0.35308974981307983, + "step": 7155 + }, + { + "epoch": 1.6499884712935209, + "grad_norm": 1.7501565194944115, + "learning_rate": 1.6306407673562815e-07, + "loss": 0.5269055366516113, + "step": 7156 + }, + { + "epoch": 1.6502190454231034, + "grad_norm": 1.4775332310798848, + "learning_rate": 1.6285547295221724e-07, + "loss": 0.41290512681007385, + "step": 7157 + }, + { + "epoch": 1.650449619552686, + "grad_norm": 1.4513808656924674, + "learning_rate": 1.6264699085624721e-07, + "loss": 0.39930522441864014, + "step": 7158 + }, + { + "epoch": 1.6506801936822688, + "grad_norm": 1.475028134913826, + "learning_rate": 1.6243863047802365e-07, + "loss": 0.4617648422718048, + "step": 7159 + }, + { + "epoch": 1.6509107678118515, + "grad_norm": 1.6583284073308129, + "learning_rate": 1.6223039184783337e-07, + "loss": 0.4618498980998993, + "step": 7160 + }, + { + "epoch": 1.6511413419414342, + "grad_norm": 1.5177380348824272, + "learning_rate": 1.6202227499594635e-07, + "loss": 0.43138834834098816, + "step": 7161 + }, + { + "epoch": 1.651371916071017, + "grad_norm": 1.9944130162827052, + "learning_rate": 1.618142799526141e-07, + "loss": 0.5330632925033569, + "step": 7162 + }, + { + "epoch": 1.6516024902005995, + "grad_norm": 1.4381555357456468, + "learning_rate": 1.6160640674807103e-07, + "loss": 0.45410698652267456, + "step": 7163 + }, + { + "epoch": 1.6518330643301822, + "grad_norm": 1.52256812211894, + "learning_rate": 1.6139865541253384e-07, + "loss": 0.4216715693473816, + "step": 7164 + }, + { + "epoch": 1.6520636384597647, + "grad_norm": 1.6818151368938485, + "learning_rate": 1.6119102597620083e-07, + "loss": 0.3738868832588196, + "step": 7165 + }, + { + "epoch": 1.6522942125893474, + "grad_norm": 1.587335339212439, + "learning_rate": 1.609835184692535e-07, + "loss": 0.44595998525619507, + "step": 7166 + }, + { + "epoch": 1.65252478671893, + "grad_norm": 1.8461813575956394, + "learning_rate": 1.6077613292185466e-07, + "loss": 0.5446096062660217, + "step": 7167 + }, + { + "epoch": 1.6527553608485128, + "grad_norm": 1.5661326715584178, + "learning_rate": 1.605688693641505e-07, + "loss": 0.47280746698379517, + "step": 7168 + }, + { + "epoch": 1.6529859349780955, + "grad_norm": 1.6260653553703972, + "learning_rate": 1.6036172782626823e-07, + "loss": 0.5280133485794067, + "step": 7169 + }, + { + "epoch": 1.6532165091076783, + "grad_norm": 1.6507744528919734, + "learning_rate": 1.6015470833831835e-07, + "loss": 0.4659959375858307, + "step": 7170 + }, + { + "epoch": 1.6534470832372608, + "grad_norm": 1.5548632331284282, + "learning_rate": 1.5994781093039335e-07, + "loss": 0.5196797251701355, + "step": 7171 + }, + { + "epoch": 1.6536776573668435, + "grad_norm": 1.298650586457363, + "learning_rate": 1.597410356325676e-07, + "loss": 0.41855669021606445, + "step": 7172 + }, + { + "epoch": 1.653908231496426, + "grad_norm": 1.6301682003715197, + "learning_rate": 1.5953438247489814e-07, + "loss": 0.43063706159591675, + "step": 7173 + }, + { + "epoch": 1.6541388056260087, + "grad_norm": 1.556025937846025, + "learning_rate": 1.59327851487424e-07, + "loss": 0.3954850435256958, + "step": 7174 + }, + { + "epoch": 1.6543693797555914, + "grad_norm": 1.6096102290125367, + "learning_rate": 1.591214427001667e-07, + "loss": 0.4497464895248413, + "step": 7175 + }, + { + "epoch": 1.6545999538851741, + "grad_norm": 1.573427243133678, + "learning_rate": 1.5891515614312967e-07, + "loss": 0.47012704610824585, + "step": 7176 + }, + { + "epoch": 1.6548305280147568, + "grad_norm": 1.345166831078004, + "learning_rate": 1.5870899184629872e-07, + "loss": 0.399054616689682, + "step": 7177 + }, + { + "epoch": 1.6550611021443395, + "grad_norm": 1.68897296856965, + "learning_rate": 1.5850294983964208e-07, + "loss": 0.41277164220809937, + "step": 7178 + }, + { + "epoch": 1.655291676273922, + "grad_norm": 1.6410807386564468, + "learning_rate": 1.5829703015311013e-07, + "loss": 0.4735640287399292, + "step": 7179 + }, + { + "epoch": 1.6555222504035048, + "grad_norm": 1.5414168893805387, + "learning_rate": 1.5809123281663516e-07, + "loss": 0.4244140386581421, + "step": 7180 + }, + { + "epoch": 1.6557528245330873, + "grad_norm": 1.6196858148033184, + "learning_rate": 1.5788555786013212e-07, + "loss": 0.4291320741176605, + "step": 7181 + }, + { + "epoch": 1.65598339866267, + "grad_norm": 1.8656270771434302, + "learning_rate": 1.576800053134979e-07, + "loss": 0.3965643048286438, + "step": 7182 + }, + { + "epoch": 1.6562139727922527, + "grad_norm": 1.5939688831505687, + "learning_rate": 1.5747457520661123e-07, + "loss": 0.4087764620780945, + "step": 7183 + }, + { + "epoch": 1.6564445469218354, + "grad_norm": 1.523375144006796, + "learning_rate": 1.5726926756933411e-07, + "loss": 0.4207920432090759, + "step": 7184 + }, + { + "epoch": 1.6566751210514181, + "grad_norm": 1.757376584691626, + "learning_rate": 1.570640824315095e-07, + "loss": 0.34311753511428833, + "step": 7185 + }, + { + "epoch": 1.6569056951810008, + "grad_norm": 2.079059544313622, + "learning_rate": 1.5685901982296345e-07, + "loss": 0.44728145003318787, + "step": 7186 + }, + { + "epoch": 1.6571362693105833, + "grad_norm": 1.6933442739443483, + "learning_rate": 1.5665407977350386e-07, + "loss": 0.38300156593322754, + "step": 7187 + }, + { + "epoch": 1.657366843440166, + "grad_norm": 1.4613322908312483, + "learning_rate": 1.56449262312921e-07, + "loss": 0.32724204659461975, + "step": 7188 + }, + { + "epoch": 1.6575974175697485, + "grad_norm": 1.5277123552551555, + "learning_rate": 1.562445674709868e-07, + "loss": 0.4812743067741394, + "step": 7189 + }, + { + "epoch": 1.6578279916993313, + "grad_norm": 1.279031260784297, + "learning_rate": 1.5603999527745615e-07, + "loss": 0.3974485397338867, + "step": 7190 + }, + { + "epoch": 1.658058565828914, + "grad_norm": 1.729819799365075, + "learning_rate": 1.5583554576206536e-07, + "loss": 0.5058138370513916, + "step": 7191 + }, + { + "epoch": 1.6582891399584967, + "grad_norm": 1.451214505055382, + "learning_rate": 1.5563121895453323e-07, + "loss": 0.4442358613014221, + "step": 7192 + }, + { + "epoch": 1.6585197140880794, + "grad_norm": 1.6317499919466611, + "learning_rate": 1.5542701488456077e-07, + "loss": 0.35400623083114624, + "step": 7193 + }, + { + "epoch": 1.6587502882176621, + "grad_norm": 1.8335890419904581, + "learning_rate": 1.5522293358183125e-07, + "loss": 0.5046352744102478, + "step": 7194 + }, + { + "epoch": 1.6589808623472446, + "grad_norm": 1.8150914477063191, + "learning_rate": 1.5501897507601015e-07, + "loss": 0.45344769954681396, + "step": 7195 + }, + { + "epoch": 1.6592114364768273, + "grad_norm": 1.7111771949579255, + "learning_rate": 1.548151393967444e-07, + "loss": 0.4251500368118286, + "step": 7196 + }, + { + "epoch": 1.6594420106064098, + "grad_norm": 1.4323459769713944, + "learning_rate": 1.5461142657366399e-07, + "loss": 0.3728788495063782, + "step": 7197 + }, + { + "epoch": 1.6596725847359926, + "grad_norm": 1.5246938682723656, + "learning_rate": 1.5440783663638036e-07, + "loss": 0.3143829107284546, + "step": 7198 + }, + { + "epoch": 1.6599031588655753, + "grad_norm": 1.3416076020806418, + "learning_rate": 1.5420436961448758e-07, + "loss": 0.5070813894271851, + "step": 7199 + }, + { + "epoch": 1.660133732995158, + "grad_norm": 1.2380684135092845, + "learning_rate": 1.5400102553756145e-07, + "loss": 0.3644014000892639, + "step": 7200 + }, + { + "epoch": 1.6603643071247407, + "grad_norm": 2.973338937285917, + "learning_rate": 1.5379780443516023e-07, + "loss": 0.4120270609855652, + "step": 7201 + }, + { + "epoch": 1.6605948812543234, + "grad_norm": 1.6150469405356445, + "learning_rate": 1.5359470633682425e-07, + "loss": 0.4327865242958069, + "step": 7202 + }, + { + "epoch": 1.660825455383906, + "grad_norm": 2.011470811225138, + "learning_rate": 1.5339173127207562e-07, + "loss": 0.626624584197998, + "step": 7203 + }, + { + "epoch": 1.6610560295134886, + "grad_norm": 1.6601868604564274, + "learning_rate": 1.5318887927041913e-07, + "loss": 0.45536088943481445, + "step": 7204 + }, + { + "epoch": 1.6612866036430711, + "grad_norm": 1.6789895391694964, + "learning_rate": 1.52986150361341e-07, + "loss": 0.5306276082992554, + "step": 7205 + }, + { + "epoch": 1.6615171777726538, + "grad_norm": 1.5374267124283623, + "learning_rate": 1.5278354457431043e-07, + "loss": 0.4263244867324829, + "step": 7206 + }, + { + "epoch": 1.6617477519022366, + "grad_norm": 1.5390387444640852, + "learning_rate": 1.5258106193877762e-07, + "loss": 0.4578266143798828, + "step": 7207 + }, + { + "epoch": 1.6619783260318193, + "grad_norm": 1.4963429405053086, + "learning_rate": 1.5237870248417605e-07, + "loss": 0.5120365619659424, + "step": 7208 + }, + { + "epoch": 1.662208900161402, + "grad_norm": 1.7987725718508283, + "learning_rate": 1.521764662399202e-07, + "loss": 0.4491463005542755, + "step": 7209 + }, + { + "epoch": 1.6624394742909847, + "grad_norm": 1.588713571736857, + "learning_rate": 1.5197435323540752e-07, + "loss": 0.4810635447502136, + "step": 7210 + }, + { + "epoch": 1.6626700484205672, + "grad_norm": 1.549550087406024, + "learning_rate": 1.5177236350001722e-07, + "loss": 0.4250200390815735, + "step": 7211 + }, + { + "epoch": 1.66290062255015, + "grad_norm": 1.8619243359226805, + "learning_rate": 1.515704970631102e-07, + "loss": 0.49981385469436646, + "step": 7212 + }, + { + "epoch": 1.6631311966797324, + "grad_norm": 1.621928409701738, + "learning_rate": 1.5136875395403027e-07, + "loss": 0.40204358100891113, + "step": 7213 + }, + { + "epoch": 1.6633617708093151, + "grad_norm": 1.504987607563178, + "learning_rate": 1.5116713420210236e-07, + "loss": 0.514127254486084, + "step": 7214 + }, + { + "epoch": 1.6635923449388978, + "grad_norm": 1.8745773841611948, + "learning_rate": 1.509656378366343e-07, + "loss": 0.5119338631629944, + "step": 7215 + }, + { + "epoch": 1.6638229190684806, + "grad_norm": 1.6137446017437618, + "learning_rate": 1.507642648869153e-07, + "loss": 0.45031970739364624, + "step": 7216 + }, + { + "epoch": 1.6640534931980633, + "grad_norm": 1.427878863576358, + "learning_rate": 1.5056301538221716e-07, + "loss": 0.4503582715988159, + "step": 7217 + }, + { + "epoch": 1.664284067327646, + "grad_norm": 1.4651953746761925, + "learning_rate": 1.503618893517935e-07, + "loss": 0.38793227076530457, + "step": 7218 + }, + { + "epoch": 1.6645146414572285, + "grad_norm": 1.4683280962315126, + "learning_rate": 1.5016088682488026e-07, + "loss": 0.4446987211704254, + "step": 7219 + }, + { + "epoch": 1.6647452155868112, + "grad_norm": 1.7835855909787117, + "learning_rate": 1.4996000783069485e-07, + "loss": 0.4687119722366333, + "step": 7220 + }, + { + "epoch": 1.6649757897163937, + "grad_norm": 1.6205230957470973, + "learning_rate": 1.4975925239843734e-07, + "loss": 0.48283010721206665, + "step": 7221 + }, + { + "epoch": 1.6652063638459764, + "grad_norm": 1.630894562773258, + "learning_rate": 1.4955862055728941e-07, + "loss": 0.510201632976532, + "step": 7222 + }, + { + "epoch": 1.6654369379755591, + "grad_norm": 1.4932233099831633, + "learning_rate": 1.4935811233641471e-07, + "loss": 0.4070482850074768, + "step": 7223 + }, + { + "epoch": 1.6656675121051419, + "grad_norm": 1.5683915035975688, + "learning_rate": 1.4915772776495948e-07, + "loss": 0.44347989559173584, + "step": 7224 + }, + { + "epoch": 1.6658980862347246, + "grad_norm": 1.6817444257008654, + "learning_rate": 1.4895746687205147e-07, + "loss": 0.4160166382789612, + "step": 7225 + }, + { + "epoch": 1.6661286603643073, + "grad_norm": 1.5428277862719844, + "learning_rate": 1.4875732968680098e-07, + "loss": 0.39939236640930176, + "step": 7226 + }, + { + "epoch": 1.6663592344938898, + "grad_norm": 1.8461591057744162, + "learning_rate": 1.4855731623829936e-07, + "loss": 0.4604174494743347, + "step": 7227 + }, + { + "epoch": 1.6665898086234725, + "grad_norm": 1.5963571116977615, + "learning_rate": 1.4835742655562134e-07, + "loss": 0.4691208004951477, + "step": 7228 + }, + { + "epoch": 1.666820382753055, + "grad_norm": 1.358957710417088, + "learning_rate": 1.481576606678222e-07, + "loss": 0.4146147668361664, + "step": 7229 + }, + { + "epoch": 1.6670509568826377, + "grad_norm": 1.4681059084163257, + "learning_rate": 1.4795801860394041e-07, + "loss": 0.4064391255378723, + "step": 7230 + }, + { + "epoch": 1.6672815310122204, + "grad_norm": 1.233349352710464, + "learning_rate": 1.4775850039299587e-07, + "loss": 0.3696960210800171, + "step": 7231 + }, + { + "epoch": 1.6675121051418031, + "grad_norm": 1.763624641268307, + "learning_rate": 1.4755910606399023e-07, + "loss": 0.4356287121772766, + "step": 7232 + }, + { + "epoch": 1.6677426792713859, + "grad_norm": 1.6119962512147328, + "learning_rate": 1.473598356459078e-07, + "loss": 0.39327436685562134, + "step": 7233 + }, + { + "epoch": 1.6679732534009684, + "grad_norm": 1.4528281796334948, + "learning_rate": 1.4716068916771452e-07, + "loss": 0.4722225069999695, + "step": 7234 + }, + { + "epoch": 1.668203827530551, + "grad_norm": 1.3954919737652625, + "learning_rate": 1.4696166665835852e-07, + "loss": 0.3645583987236023, + "step": 7235 + }, + { + "epoch": 1.6684344016601336, + "grad_norm": 1.628738998914794, + "learning_rate": 1.4676276814676935e-07, + "loss": 0.4153117537498474, + "step": 7236 + }, + { + "epoch": 1.6686649757897163, + "grad_norm": 1.2987847859472657, + "learning_rate": 1.4656399366185933e-07, + "loss": 0.3470612168312073, + "step": 7237 + }, + { + "epoch": 1.668895549919299, + "grad_norm": 1.424067964832139, + "learning_rate": 1.4636534323252203e-07, + "loss": 0.3934207260608673, + "step": 7238 + }, + { + "epoch": 1.6691261240488817, + "grad_norm": 1.6191654953115664, + "learning_rate": 1.4616681688763355e-07, + "loss": 0.35530412197113037, + "step": 7239 + }, + { + "epoch": 1.6693566981784644, + "grad_norm": 1.5867473768730196, + "learning_rate": 1.4596841465605136e-07, + "loss": 0.5218726396560669, + "step": 7240 + }, + { + "epoch": 1.6695872723080472, + "grad_norm": 1.9070671037743527, + "learning_rate": 1.4577013656661542e-07, + "loss": 0.4287494421005249, + "step": 7241 + }, + { + "epoch": 1.6698178464376296, + "grad_norm": 2.099754040079973, + "learning_rate": 1.4557198264814775e-07, + "loss": 0.5161805152893066, + "step": 7242 + }, + { + "epoch": 1.6700484205672124, + "grad_norm": 1.485709070131558, + "learning_rate": 1.4537395292945153e-07, + "loss": 0.4843006730079651, + "step": 7243 + }, + { + "epoch": 1.6702789946967949, + "grad_norm": 1.416657421952009, + "learning_rate": 1.4517604743931288e-07, + "loss": 0.526993989944458, + "step": 7244 + }, + { + "epoch": 1.6705095688263776, + "grad_norm": 1.318696888956493, + "learning_rate": 1.4497826620649888e-07, + "loss": 0.43705734610557556, + "step": 7245 + }, + { + "epoch": 1.6707401429559603, + "grad_norm": 1.626300355229789, + "learning_rate": 1.4478060925975942e-07, + "loss": 0.6001747846603394, + "step": 7246 + }, + { + "epoch": 1.670970717085543, + "grad_norm": 1.6701240840694564, + "learning_rate": 1.4458307662782564e-07, + "loss": 0.4041635990142822, + "step": 7247 + }, + { + "epoch": 1.6712012912151257, + "grad_norm": 1.6291301094782007, + "learning_rate": 1.4438566833941112e-07, + "loss": 0.4425908923149109, + "step": 7248 + }, + { + "epoch": 1.6714318653447084, + "grad_norm": 1.8234242321709921, + "learning_rate": 1.4418838442321102e-07, + "loss": 0.5202267169952393, + "step": 7249 + }, + { + "epoch": 1.671662439474291, + "grad_norm": 1.3646967283137599, + "learning_rate": 1.4399122490790293e-07, + "loss": 0.44352006912231445, + "step": 7250 + }, + { + "epoch": 1.6718930136038737, + "grad_norm": 1.5745296606833632, + "learning_rate": 1.4379418982214542e-07, + "loss": 0.4757179021835327, + "step": 7251 + }, + { + "epoch": 1.6721235877334562, + "grad_norm": 2.0125776677757825, + "learning_rate": 1.4359727919457998e-07, + "loss": 0.4748988747596741, + "step": 7252 + }, + { + "epoch": 1.6723541618630389, + "grad_norm": 1.4390886859105494, + "learning_rate": 1.434004930538294e-07, + "loss": 0.4280398190021515, + "step": 7253 + }, + { + "epoch": 1.6725847359926216, + "grad_norm": 1.5844583735943714, + "learning_rate": 1.4320383142849834e-07, + "loss": 0.4959871172904968, + "step": 7254 + }, + { + "epoch": 1.6728153101222043, + "grad_norm": 1.6551218088905322, + "learning_rate": 1.4300729434717396e-07, + "loss": 0.506413996219635, + "step": 7255 + }, + { + "epoch": 1.673045884251787, + "grad_norm": 1.5894513628120581, + "learning_rate": 1.4281088183842448e-07, + "loss": 0.4723675847053528, + "step": 7256 + }, + { + "epoch": 1.6732764583813697, + "grad_norm": 1.5735532616627814, + "learning_rate": 1.4261459393080076e-07, + "loss": 0.41801339387893677, + "step": 7257 + }, + { + "epoch": 1.6735070325109522, + "grad_norm": 1.651784117733762, + "learning_rate": 1.424184306528351e-07, + "loss": 0.4463369846343994, + "step": 7258 + }, + { + "epoch": 1.673737606640535, + "grad_norm": 1.6205372576102755, + "learning_rate": 1.422223920330421e-07, + "loss": 0.4167429506778717, + "step": 7259 + }, + { + "epoch": 1.6739681807701174, + "grad_norm": 1.448285732733219, + "learning_rate": 1.420264780999174e-07, + "loss": 0.48808401823043823, + "step": 7260 + }, + { + "epoch": 1.6741987548997002, + "grad_norm": 1.7994342785579152, + "learning_rate": 1.4183068888193973e-07, + "loss": 0.515659749507904, + "step": 7261 + }, + { + "epoch": 1.6744293290292829, + "grad_norm": 1.6582236339460064, + "learning_rate": 1.416350244075688e-07, + "loss": 0.4393026530742645, + "step": 7262 + }, + { + "epoch": 1.6746599031588656, + "grad_norm": 1.6750398739214198, + "learning_rate": 1.4143948470524602e-07, + "loss": 0.35053056478500366, + "step": 7263 + }, + { + "epoch": 1.6748904772884483, + "grad_norm": 1.1872706234379884, + "learning_rate": 1.4124406980339532e-07, + "loss": 0.35598453879356384, + "step": 7264 + }, + { + "epoch": 1.675121051418031, + "grad_norm": 1.747342634360751, + "learning_rate": 1.410487797304224e-07, + "loss": 0.47989165782928467, + "step": 7265 + }, + { + "epoch": 1.6753516255476135, + "grad_norm": 1.4767801179152846, + "learning_rate": 1.408536145147148e-07, + "loss": 0.4621499180793762, + "step": 7266 + }, + { + "epoch": 1.6755821996771962, + "grad_norm": 1.4469255776490486, + "learning_rate": 1.4065857418464122e-07, + "loss": 0.40567925572395325, + "step": 7267 + }, + { + "epoch": 1.6758127738067787, + "grad_norm": 2.121901896007684, + "learning_rate": 1.4046365876855326e-07, + "loss": 0.38889849185943604, + "step": 7268 + }, + { + "epoch": 1.6760433479363614, + "grad_norm": 1.8036845925466258, + "learning_rate": 1.4026886829478345e-07, + "loss": 0.516187846660614, + "step": 7269 + }, + { + "epoch": 1.6762739220659442, + "grad_norm": 1.3670995724086425, + "learning_rate": 1.4007420279164706e-07, + "loss": 0.4007910192012787, + "step": 7270 + }, + { + "epoch": 1.6765044961955269, + "grad_norm": 1.4513245632029468, + "learning_rate": 1.3987966228744007e-07, + "loss": 0.4426886737346649, + "step": 7271 + }, + { + "epoch": 1.6767350703251096, + "grad_norm": 1.7767592903800882, + "learning_rate": 1.3968524681044114e-07, + "loss": 0.46890369057655334, + "step": 7272 + }, + { + "epoch": 1.6769656444546923, + "grad_norm": 1.714201330640179, + "learning_rate": 1.3949095638891096e-07, + "loss": 0.510369598865509, + "step": 7273 + }, + { + "epoch": 1.6771962185842748, + "grad_norm": 1.697492362317676, + "learning_rate": 1.3929679105109106e-07, + "loss": 0.47810226678848267, + "step": 7274 + }, + { + "epoch": 1.6774267927138575, + "grad_norm": 1.6234301902278867, + "learning_rate": 1.3910275082520572e-07, + "loss": 0.48592591285705566, + "step": 7275 + }, + { + "epoch": 1.67765736684344, + "grad_norm": 1.5107060260742486, + "learning_rate": 1.3890883573946021e-07, + "loss": 0.4664943814277649, + "step": 7276 + }, + { + "epoch": 1.6778879409730227, + "grad_norm": 1.6514095493299281, + "learning_rate": 1.3871504582204263e-07, + "loss": 0.47146645188331604, + "step": 7277 + }, + { + "epoch": 1.6781185151026055, + "grad_norm": 1.615997642769361, + "learning_rate": 1.3852138110112166e-07, + "loss": 0.5171671509742737, + "step": 7278 + }, + { + "epoch": 1.6783490892321882, + "grad_norm": 1.8275491234958787, + "learning_rate": 1.3832784160484913e-07, + "loss": 0.45887336134910583, + "step": 7279 + }, + { + "epoch": 1.678579663361771, + "grad_norm": 1.494861700798582, + "learning_rate": 1.3813442736135728e-07, + "loss": 0.4363539516925812, + "step": 7280 + }, + { + "epoch": 1.6788102374913536, + "grad_norm": 2.0171892009876147, + "learning_rate": 1.379411383987612e-07, + "loss": 0.4626097083091736, + "step": 7281 + }, + { + "epoch": 1.679040811620936, + "grad_norm": 1.8196525383976765, + "learning_rate": 1.3774797474515766e-07, + "loss": 0.5939204096794128, + "step": 7282 + }, + { + "epoch": 1.6792713857505188, + "grad_norm": 1.6878435890648014, + "learning_rate": 1.3755493642862437e-07, + "loss": 0.5463666915893555, + "step": 7283 + }, + { + "epoch": 1.6795019598801013, + "grad_norm": 1.622691460463702, + "learning_rate": 1.3736202347722182e-07, + "loss": 0.3634001910686493, + "step": 7284 + }, + { + "epoch": 1.679732534009684, + "grad_norm": 1.6327202188647956, + "learning_rate": 1.3716923591899166e-07, + "loss": 0.39512360095977783, + "step": 7285 + }, + { + "epoch": 1.6799631081392667, + "grad_norm": 1.3361978857608434, + "learning_rate": 1.3697657378195772e-07, + "loss": 0.3858473300933838, + "step": 7286 + }, + { + "epoch": 1.6801936822688495, + "grad_norm": 1.4527844976472322, + "learning_rate": 1.36784037094125e-07, + "loss": 0.473757266998291, + "step": 7287 + }, + { + "epoch": 1.6804242563984322, + "grad_norm": 1.410877918262981, + "learning_rate": 1.3659162588348107e-07, + "loss": 0.41679126024246216, + "step": 7288 + }, + { + "epoch": 1.680654830528015, + "grad_norm": 1.7135792249847552, + "learning_rate": 1.363993401779946e-07, + "loss": 0.4267998933792114, + "step": 7289 + }, + { + "epoch": 1.6808854046575974, + "grad_norm": 1.6476835268765473, + "learning_rate": 1.3620718000561648e-07, + "loss": 0.5453667044639587, + "step": 7290 + }, + { + "epoch": 1.68111597878718, + "grad_norm": 1.4347316593862658, + "learning_rate": 1.3601514539427895e-07, + "loss": 0.3882933259010315, + "step": 7291 + }, + { + "epoch": 1.6813465529167626, + "grad_norm": 1.7177796725752086, + "learning_rate": 1.3582323637189653e-07, + "loss": 0.5565635561943054, + "step": 7292 + }, + { + "epoch": 1.6815771270463453, + "grad_norm": 1.448665873125515, + "learning_rate": 1.356314529663647e-07, + "loss": 0.49807024002075195, + "step": 7293 + }, + { + "epoch": 1.681807701175928, + "grad_norm": 1.5449122885779156, + "learning_rate": 1.3543979520556116e-07, + "loss": 0.40868130326271057, + "step": 7294 + }, + { + "epoch": 1.6820382753055108, + "grad_norm": 1.4045709349742252, + "learning_rate": 1.352482631173455e-07, + "loss": 0.46088406443595886, + "step": 7295 + }, + { + "epoch": 1.6822688494350935, + "grad_norm": 1.7658846162202777, + "learning_rate": 1.3505685672955869e-07, + "loss": 0.44346722960472107, + "step": 7296 + }, + { + "epoch": 1.6824994235646762, + "grad_norm": 1.3703801713050607, + "learning_rate": 1.348655760700239e-07, + "loss": 0.36585044860839844, + "step": 7297 + }, + { + "epoch": 1.6827299976942587, + "grad_norm": 1.8199719530329925, + "learning_rate": 1.3467442116654536e-07, + "loss": 0.46082472801208496, + "step": 7298 + }, + { + "epoch": 1.6829605718238414, + "grad_norm": 1.8043564550526412, + "learning_rate": 1.3448339204690974e-07, + "loss": 0.5011709928512573, + "step": 7299 + }, + { + "epoch": 1.683191145953424, + "grad_norm": 2.1355217293891378, + "learning_rate": 1.3429248873888454e-07, + "loss": 0.4382838010787964, + "step": 7300 + }, + { + "epoch": 1.6834217200830066, + "grad_norm": 1.4118543770807777, + "learning_rate": 1.3410171127022008e-07, + "loss": 0.35204610228538513, + "step": 7301 + }, + { + "epoch": 1.6836522942125893, + "grad_norm": 1.3718001359049319, + "learning_rate": 1.3391105966864745e-07, + "loss": 0.3915257453918457, + "step": 7302 + }, + { + "epoch": 1.683882868342172, + "grad_norm": 1.4102637825932318, + "learning_rate": 1.3372053396187967e-07, + "loss": 0.3945339322090149, + "step": 7303 + }, + { + "epoch": 1.6841134424717548, + "grad_norm": 1.7911618298179695, + "learning_rate": 1.335301341776117e-07, + "loss": 0.48783642053604126, + "step": 7304 + }, + { + "epoch": 1.6843440166013375, + "grad_norm": 1.745012134293522, + "learning_rate": 1.333398603435203e-07, + "loss": 0.49026161432266235, + "step": 7305 + }, + { + "epoch": 1.68457459073092, + "grad_norm": 1.9699708710220791, + "learning_rate": 1.3314971248726358e-07, + "loss": 0.5035061836242676, + "step": 7306 + }, + { + "epoch": 1.6848051648605027, + "grad_norm": 1.7602149086036532, + "learning_rate": 1.3295969063648126e-07, + "loss": 0.5452826023101807, + "step": 7307 + }, + { + "epoch": 1.6850357389900852, + "grad_norm": 1.7088858518580703, + "learning_rate": 1.3276979481879524e-07, + "loss": 0.4609105885028839, + "step": 7308 + }, + { + "epoch": 1.685266313119668, + "grad_norm": 1.6869514802612067, + "learning_rate": 1.3258002506180855e-07, + "loss": 0.5799046754837036, + "step": 7309 + }, + { + "epoch": 1.6854968872492506, + "grad_norm": 1.6691103426337504, + "learning_rate": 1.3239038139310644e-07, + "loss": 0.42096465826034546, + "step": 7310 + }, + { + "epoch": 1.6857274613788333, + "grad_norm": 1.9781377178498367, + "learning_rate": 1.3220086384025508e-07, + "loss": 0.4741813540458679, + "step": 7311 + }, + { + "epoch": 1.685958035508416, + "grad_norm": 1.5972207301313162, + "learning_rate": 1.3201147243080302e-07, + "loss": 0.4872191250324249, + "step": 7312 + }, + { + "epoch": 1.6861886096379988, + "grad_norm": 1.7767879845396581, + "learning_rate": 1.3182220719228054e-07, + "loss": 0.5210198163986206, + "step": 7313 + }, + { + "epoch": 1.6864191837675813, + "grad_norm": 1.932834262840403, + "learning_rate": 1.3163306815219878e-07, + "loss": 0.4873948395252228, + "step": 7314 + }, + { + "epoch": 1.686649757897164, + "grad_norm": 1.723686253702064, + "learning_rate": 1.3144405533805136e-07, + "loss": 0.46856212615966797, + "step": 7315 + }, + { + "epoch": 1.6868803320267465, + "grad_norm": 1.549399332710726, + "learning_rate": 1.3125516877731279e-07, + "loss": 0.3931645154953003, + "step": 7316 + }, + { + "epoch": 1.6871109061563292, + "grad_norm": 1.5988122745666866, + "learning_rate": 1.3106640849744023e-07, + "loss": 0.4473317861557007, + "step": 7317 + }, + { + "epoch": 1.687341480285912, + "grad_norm": 1.5841372684708825, + "learning_rate": 1.3087777452587124e-07, + "loss": 0.4499043822288513, + "step": 7318 + }, + { + "epoch": 1.6875720544154946, + "grad_norm": 1.6054649930580802, + "learning_rate": 1.30689266890026e-07, + "loss": 0.4992508292198181, + "step": 7319 + }, + { + "epoch": 1.6878026285450773, + "grad_norm": 1.426896936128743, + "learning_rate": 1.305008856173061e-07, + "loss": 0.4684743583202362, + "step": 7320 + }, + { + "epoch": 1.68803320267466, + "grad_norm": 1.7876602073965717, + "learning_rate": 1.303126307350948e-07, + "loss": 0.5543930530548096, + "step": 7321 + }, + { + "epoch": 1.6882637768042426, + "grad_norm": 1.3482084944505501, + "learning_rate": 1.3012450227075655e-07, + "loss": 0.3812211751937866, + "step": 7322 + }, + { + "epoch": 1.6884943509338253, + "grad_norm": 2.079165248146425, + "learning_rate": 1.299365002516377e-07, + "loss": 0.5455845594406128, + "step": 7323 + }, + { + "epoch": 1.6887249250634078, + "grad_norm": 1.3768890960712863, + "learning_rate": 1.2974862470506654e-07, + "loss": 0.4256778657436371, + "step": 7324 + }, + { + "epoch": 1.6889554991929905, + "grad_norm": 1.9468423520002898, + "learning_rate": 1.2956087565835228e-07, + "loss": 0.4973354637622833, + "step": 7325 + }, + { + "epoch": 1.6891860733225732, + "grad_norm": 1.5779840439512345, + "learning_rate": 1.2937325313878666e-07, + "loss": 0.5141343474388123, + "step": 7326 + }, + { + "epoch": 1.689416647452156, + "grad_norm": 1.5179632497576485, + "learning_rate": 1.2918575717364178e-07, + "loss": 0.3872978687286377, + "step": 7327 + }, + { + "epoch": 1.6896472215817386, + "grad_norm": 1.3857087225021212, + "learning_rate": 1.2899838779017292e-07, + "loss": 0.4333486557006836, + "step": 7328 + }, + { + "epoch": 1.6898777957113214, + "grad_norm": 1.5624646221048997, + "learning_rate": 1.2881114501561553e-07, + "loss": 0.42979496717453003, + "step": 7329 + }, + { + "epoch": 1.6901083698409038, + "grad_norm": 1.6512939392276094, + "learning_rate": 1.2862402887718771e-07, + "loss": 0.43296414613723755, + "step": 7330 + }, + { + "epoch": 1.6903389439704866, + "grad_norm": 1.4822998528875215, + "learning_rate": 1.2843703940208816e-07, + "loss": 0.41763681173324585, + "step": 7331 + }, + { + "epoch": 1.690569518100069, + "grad_norm": 1.4433304691783968, + "learning_rate": 1.2825017661749814e-07, + "loss": 0.4531592130661011, + "step": 7332 + }, + { + "epoch": 1.6908000922296518, + "grad_norm": 1.5515786608723572, + "learning_rate": 1.2806344055057995e-07, + "loss": 0.4608149826526642, + "step": 7333 + }, + { + "epoch": 1.6910306663592345, + "grad_norm": 1.5678716271625897, + "learning_rate": 1.2787683122847726e-07, + "loss": 0.4298786520957947, + "step": 7334 + }, + { + "epoch": 1.6912612404888172, + "grad_norm": 1.5882305453896473, + "learning_rate": 1.2769034867831586e-07, + "loss": 0.4404297471046448, + "step": 7335 + }, + { + "epoch": 1.6914918146184, + "grad_norm": 1.590662947019878, + "learning_rate": 1.2750399292720281e-07, + "loss": 0.3857702910900116, + "step": 7336 + }, + { + "epoch": 1.6917223887479826, + "grad_norm": 1.5092920813034143, + "learning_rate": 1.2731776400222716e-07, + "loss": 0.351214200258255, + "step": 7337 + }, + { + "epoch": 1.6919529628775651, + "grad_norm": 1.6618460717985095, + "learning_rate": 1.2713166193045854e-07, + "loss": 0.4711484909057617, + "step": 7338 + }, + { + "epoch": 1.6921835370071479, + "grad_norm": 1.605912014604012, + "learning_rate": 1.2694568673894946e-07, + "loss": 0.4819946587085724, + "step": 7339 + }, + { + "epoch": 1.6924141111367303, + "grad_norm": 1.5366035327097678, + "learning_rate": 1.267598384547327e-07, + "loss": 0.39870262145996094, + "step": 7340 + }, + { + "epoch": 1.692644685266313, + "grad_norm": 1.410709311062986, + "learning_rate": 1.265741171048237e-07, + "loss": 0.4775997996330261, + "step": 7341 + }, + { + "epoch": 1.6928752593958958, + "grad_norm": 1.5031428119722987, + "learning_rate": 1.2638852271621836e-07, + "loss": 0.4166836738586426, + "step": 7342 + }, + { + "epoch": 1.6931058335254785, + "grad_norm": 1.362546283009112, + "learning_rate": 1.2620305531589514e-07, + "loss": 0.396761953830719, + "step": 7343 + }, + { + "epoch": 1.6933364076550612, + "grad_norm": 1.5811036971551204, + "learning_rate": 1.260177149308136e-07, + "loss": 0.36929184198379517, + "step": 7344 + }, + { + "epoch": 1.6935669817846437, + "grad_norm": 1.6142308009439483, + "learning_rate": 1.2583250158791459e-07, + "loss": 0.4664369821548462, + "step": 7345 + }, + { + "epoch": 1.6937975559142264, + "grad_norm": 1.4490673957983151, + "learning_rate": 1.2564741531412115e-07, + "loss": 0.40877625346183777, + "step": 7346 + }, + { + "epoch": 1.694028130043809, + "grad_norm": 1.3363670323915413, + "learning_rate": 1.254624561363369e-07, + "loss": 0.4282684922218323, + "step": 7347 + }, + { + "epoch": 1.6942587041733916, + "grad_norm": 1.7781191335343183, + "learning_rate": 1.2527762408144805e-07, + "loss": 0.5430412292480469, + "step": 7348 + }, + { + "epoch": 1.6944892783029744, + "grad_norm": 1.7384245962384524, + "learning_rate": 1.2509291917632147e-07, + "loss": 0.45990923047065735, + "step": 7349 + }, + { + "epoch": 1.694719852432557, + "grad_norm": 1.5699544039589348, + "learning_rate": 1.2490834144780593e-07, + "loss": 0.38062262535095215, + "step": 7350 + }, + { + "epoch": 1.6949504265621398, + "grad_norm": 1.5427808320923257, + "learning_rate": 1.2472389092273172e-07, + "loss": 0.4704701900482178, + "step": 7351 + }, + { + "epoch": 1.6951810006917225, + "grad_norm": 1.3215044901700805, + "learning_rate": 1.2453956762791084e-07, + "loss": 0.4439951181411743, + "step": 7352 + }, + { + "epoch": 1.695411574821305, + "grad_norm": 1.6827848110964911, + "learning_rate": 1.2435537159013632e-07, + "loss": 0.49405014514923096, + "step": 7353 + }, + { + "epoch": 1.6956421489508877, + "grad_norm": 1.4071924274505998, + "learning_rate": 1.2417130283618282e-07, + "loss": 0.4282076060771942, + "step": 7354 + }, + { + "epoch": 1.6958727230804702, + "grad_norm": 1.4129187553888694, + "learning_rate": 1.2398736139280687e-07, + "loss": 0.43492811918258667, + "step": 7355 + }, + { + "epoch": 1.696103297210053, + "grad_norm": 1.550272919478409, + "learning_rate": 1.238035472867458e-07, + "loss": 0.37239378690719604, + "step": 7356 + }, + { + "epoch": 1.6963338713396356, + "grad_norm": 1.2721176079849843, + "learning_rate": 1.236198605447194e-07, + "loss": 0.39911961555480957, + "step": 7357 + }, + { + "epoch": 1.6965644454692184, + "grad_norm": 1.911188398718987, + "learning_rate": 1.2343630119342786e-07, + "loss": 0.4962255656719208, + "step": 7358 + }, + { + "epoch": 1.696795019598801, + "grad_norm": 1.3131623819116638, + "learning_rate": 1.2325286925955358e-07, + "loss": 0.37414759397506714, + "step": 7359 + }, + { + "epoch": 1.6970255937283838, + "grad_norm": 1.5092759235813635, + "learning_rate": 1.230695647697604e-07, + "loss": 0.41224929690361023, + "step": 7360 + }, + { + "epoch": 1.6972561678579663, + "grad_norm": 1.3964295729715615, + "learning_rate": 1.228863877506936e-07, + "loss": 0.43184489011764526, + "step": 7361 + }, + { + "epoch": 1.697486741987549, + "grad_norm": 1.6991026917946972, + "learning_rate": 1.227033382289795e-07, + "loss": 0.4741829037666321, + "step": 7362 + }, + { + "epoch": 1.6977173161171315, + "grad_norm": 1.677947901828469, + "learning_rate": 1.2252041623122643e-07, + "loss": 0.43224620819091797, + "step": 7363 + }, + { + "epoch": 1.6979478902467142, + "grad_norm": 1.678576477296345, + "learning_rate": 1.2233762178402386e-07, + "loss": 0.46645525097846985, + "step": 7364 + }, + { + "epoch": 1.698178464376297, + "grad_norm": 1.4201537921120515, + "learning_rate": 1.2215495491394256e-07, + "loss": 0.4237707555294037, + "step": 7365 + }, + { + "epoch": 1.6984090385058797, + "grad_norm": 1.3069690432597363, + "learning_rate": 1.2197241564753535e-07, + "loss": 0.36378395557403564, + "step": 7366 + }, + { + "epoch": 1.6986396126354624, + "grad_norm": 1.6387935949488672, + "learning_rate": 1.21790004011336e-07, + "loss": 0.4564269185066223, + "step": 7367 + }, + { + "epoch": 1.698870186765045, + "grad_norm": 1.3009015849639454, + "learning_rate": 1.2160772003186027e-07, + "loss": 0.4492420256137848, + "step": 7368 + }, + { + "epoch": 1.6991007608946276, + "grad_norm": 1.6097888974991954, + "learning_rate": 1.214255637356043e-07, + "loss": 0.515146017074585, + "step": 7369 + }, + { + "epoch": 1.6993313350242103, + "grad_norm": 1.5565943453492384, + "learning_rate": 1.2124353514904707e-07, + "loss": 0.41473329067230225, + "step": 7370 + }, + { + "epoch": 1.6995619091537928, + "grad_norm": 1.6571527829218886, + "learning_rate": 1.210616342986477e-07, + "loss": 0.4408412575721741, + "step": 7371 + }, + { + "epoch": 1.6997924832833755, + "grad_norm": 1.6546450900594125, + "learning_rate": 1.208798612108477e-07, + "loss": 0.5370820760726929, + "step": 7372 + }, + { + "epoch": 1.7000230574129582, + "grad_norm": 1.502975927661507, + "learning_rate": 1.206982159120693e-07, + "loss": 0.46518170833587646, + "step": 7373 + }, + { + "epoch": 1.700253631542541, + "grad_norm": 1.5801444025292624, + "learning_rate": 1.205166984287167e-07, + "loss": 0.45063477754592896, + "step": 7374 + }, + { + "epoch": 1.7004842056721237, + "grad_norm": 1.4109266758667123, + "learning_rate": 1.2033530878717546e-07, + "loss": 0.47391965985298157, + "step": 7375 + }, + { + "epoch": 1.7007147798017064, + "grad_norm": 1.680591382104731, + "learning_rate": 1.2015404701381205e-07, + "loss": 0.45812156796455383, + "step": 7376 + }, + { + "epoch": 1.7009453539312889, + "grad_norm": 1.7661450796417113, + "learning_rate": 1.1997291313497503e-07, + "loss": 0.5174708366394043, + "step": 7377 + }, + { + "epoch": 1.7011759280608716, + "grad_norm": 1.2379321910437706, + "learning_rate": 1.1979190717699373e-07, + "loss": 0.3412814736366272, + "step": 7378 + }, + { + "epoch": 1.701406502190454, + "grad_norm": 1.6619687091053885, + "learning_rate": 1.196110291661796e-07, + "loss": 0.41912171244621277, + "step": 7379 + }, + { + "epoch": 1.7016370763200368, + "grad_norm": 1.7384039938738447, + "learning_rate": 1.1943027912882464e-07, + "loss": 0.5569772720336914, + "step": 7380 + }, + { + "epoch": 1.7018676504496195, + "grad_norm": 1.309448309717786, + "learning_rate": 1.1924965709120304e-07, + "loss": 0.40875375270843506, + "step": 7381 + }, + { + "epoch": 1.7020982245792022, + "grad_norm": 1.5803953469974217, + "learning_rate": 1.1906916307956983e-07, + "loss": 0.46906760334968567, + "step": 7382 + }, + { + "epoch": 1.702328798708785, + "grad_norm": 1.2850228520937832, + "learning_rate": 1.1888879712016165e-07, + "loss": 0.40830397605895996, + "step": 7383 + }, + { + "epoch": 1.7025593728383677, + "grad_norm": 1.4770811279187035, + "learning_rate": 1.1870855923919687e-07, + "loss": 0.4051646590232849, + "step": 7384 + }, + { + "epoch": 1.7027899469679502, + "grad_norm": 1.696009847928002, + "learning_rate": 1.1852844946287432e-07, + "loss": 0.5042610764503479, + "step": 7385 + }, + { + "epoch": 1.7030205210975329, + "grad_norm": 1.6262740295484197, + "learning_rate": 1.183484678173754e-07, + "loss": 0.5304923057556152, + "step": 7386 + }, + { + "epoch": 1.7032510952271154, + "grad_norm": 1.2604579461831944, + "learning_rate": 1.1816861432886171e-07, + "loss": 0.443366676568985, + "step": 7387 + }, + { + "epoch": 1.703481669356698, + "grad_norm": 1.3836719865657088, + "learning_rate": 1.1798888902347714e-07, + "loss": 0.4527779817581177, + "step": 7388 + }, + { + "epoch": 1.7037122434862808, + "grad_norm": 1.3616715508883823, + "learning_rate": 1.1780929192734634e-07, + "loss": 0.4277183413505554, + "step": 7389 + }, + { + "epoch": 1.7039428176158635, + "grad_norm": 1.3714415020573154, + "learning_rate": 1.1762982306657577e-07, + "loss": 0.4908677637577057, + "step": 7390 + }, + { + "epoch": 1.7041733917454462, + "grad_norm": 1.4373179697113392, + "learning_rate": 1.1745048246725286e-07, + "loss": 0.398892879486084, + "step": 7391 + }, + { + "epoch": 1.704403965875029, + "grad_norm": 1.801155926723525, + "learning_rate": 1.1727127015544691e-07, + "loss": 0.4654615521430969, + "step": 7392 + }, + { + "epoch": 1.7046345400046115, + "grad_norm": 1.6258673974312492, + "learning_rate": 1.1709218615720806e-07, + "loss": 0.4850313663482666, + "step": 7393 + }, + { + "epoch": 1.7048651141341942, + "grad_norm": 1.3854283292952871, + "learning_rate": 1.1691323049856772e-07, + "loss": 0.4036976099014282, + "step": 7394 + }, + { + "epoch": 1.7050956882637767, + "grad_norm": 1.6824325261066553, + "learning_rate": 1.167344032055394e-07, + "loss": 0.39174383878707886, + "step": 7395 + }, + { + "epoch": 1.7053262623933594, + "grad_norm": 1.49190685623753, + "learning_rate": 1.1655570430411699e-07, + "loss": 0.44915109872817993, + "step": 7396 + }, + { + "epoch": 1.705556836522942, + "grad_norm": 1.4487302731781821, + "learning_rate": 1.1637713382027636e-07, + "loss": 0.4720522165298462, + "step": 7397 + }, + { + "epoch": 1.7057874106525248, + "grad_norm": 1.5236154065511855, + "learning_rate": 1.1619869177997455e-07, + "loss": 0.4452325105667114, + "step": 7398 + }, + { + "epoch": 1.7060179847821075, + "grad_norm": 1.489108876491428, + "learning_rate": 1.1602037820915023e-07, + "loss": 0.4009271562099457, + "step": 7399 + }, + { + "epoch": 1.7062485589116902, + "grad_norm": 1.3320502296097492, + "learning_rate": 1.1584219313372257e-07, + "loss": 0.37518051266670227, + "step": 7400 + }, + { + "epoch": 1.7064791330412727, + "grad_norm": 1.5361245639590775, + "learning_rate": 1.1566413657959295e-07, + "loss": 0.42883241176605225, + "step": 7401 + }, + { + "epoch": 1.7067097071708555, + "grad_norm": 1.5311391941499002, + "learning_rate": 1.1548620857264346e-07, + "loss": 0.4597551226615906, + "step": 7402 + }, + { + "epoch": 1.706940281300438, + "grad_norm": 1.4815045613998048, + "learning_rate": 1.1530840913873797e-07, + "loss": 0.5491876006126404, + "step": 7403 + }, + { + "epoch": 1.7071708554300207, + "grad_norm": 1.8810828492754625, + "learning_rate": 1.1513073830372122e-07, + "loss": 0.5632074475288391, + "step": 7404 + }, + { + "epoch": 1.7074014295596034, + "grad_norm": 1.557196455612015, + "learning_rate": 1.1495319609341947e-07, + "loss": 0.5251858234405518, + "step": 7405 + }, + { + "epoch": 1.707632003689186, + "grad_norm": 1.7979639485315768, + "learning_rate": 1.1477578253364028e-07, + "loss": 0.5388965606689453, + "step": 7406 + }, + { + "epoch": 1.7078625778187688, + "grad_norm": 1.7322317596816112, + "learning_rate": 1.145984976501726e-07, + "loss": 0.4429551959037781, + "step": 7407 + }, + { + "epoch": 1.7080931519483515, + "grad_norm": 1.5048923212213088, + "learning_rate": 1.144213414687868e-07, + "loss": 0.4702358841896057, + "step": 7408 + }, + { + "epoch": 1.708323726077934, + "grad_norm": 1.616629635802576, + "learning_rate": 1.1424431401523382e-07, + "loss": 0.4506569504737854, + "step": 7409 + }, + { + "epoch": 1.7085543002075168, + "grad_norm": 1.5722880063833475, + "learning_rate": 1.1406741531524689e-07, + "loss": 0.384244441986084, + "step": 7410 + }, + { + "epoch": 1.7087848743370992, + "grad_norm": 1.6254816299222574, + "learning_rate": 1.1389064539453952e-07, + "loss": 0.4642629027366638, + "step": 7411 + }, + { + "epoch": 1.709015448466682, + "grad_norm": 1.5180284715923413, + "learning_rate": 1.1371400427880761e-07, + "loss": 0.4568482041358948, + "step": 7412 + }, + { + "epoch": 1.7092460225962647, + "grad_norm": 1.6058744016500281, + "learning_rate": 1.135374919937272e-07, + "loss": 0.536895215511322, + "step": 7413 + }, + { + "epoch": 1.7094765967258474, + "grad_norm": 1.6944575711634469, + "learning_rate": 1.1336110856495628e-07, + "loss": 0.49696239829063416, + "step": 7414 + }, + { + "epoch": 1.7097071708554301, + "grad_norm": 1.802031783829704, + "learning_rate": 1.1318485401813438e-07, + "loss": 0.3857358694076538, + "step": 7415 + }, + { + "epoch": 1.7099377449850128, + "grad_norm": 1.5410848248596472, + "learning_rate": 1.1300872837888121e-07, + "loss": 0.38111335039138794, + "step": 7416 + }, + { + "epoch": 1.7101683191145953, + "grad_norm": 1.6014644101172142, + "learning_rate": 1.1283273167279906e-07, + "loss": 0.4255755543708801, + "step": 7417 + }, + { + "epoch": 1.710398893244178, + "grad_norm": 1.6646696692039435, + "learning_rate": 1.1265686392547024e-07, + "loss": 0.5048757791519165, + "step": 7418 + }, + { + "epoch": 1.7106294673737605, + "grad_norm": 1.6262992093918878, + "learning_rate": 1.1248112516245944e-07, + "loss": 0.5402916073799133, + "step": 7419 + }, + { + "epoch": 1.7108600415033433, + "grad_norm": 1.6105931834922984, + "learning_rate": 1.1230551540931165e-07, + "loss": 0.3617591857910156, + "step": 7420 + }, + { + "epoch": 1.711090615632926, + "grad_norm": 1.584818843359006, + "learning_rate": 1.1213003469155369e-07, + "loss": 0.4636116921901703, + "step": 7421 + }, + { + "epoch": 1.7113211897625087, + "grad_norm": 1.7626797404606351, + "learning_rate": 1.1195468303469346e-07, + "loss": 0.4675198495388031, + "step": 7422 + }, + { + "epoch": 1.7115517638920914, + "grad_norm": 1.6024517382949015, + "learning_rate": 1.1177946046422038e-07, + "loss": 0.48491787910461426, + "step": 7423 + }, + { + "epoch": 1.7117823380216741, + "grad_norm": 1.5413352133121294, + "learning_rate": 1.1160436700560449e-07, + "loss": 0.3898283839225769, + "step": 7424 + }, + { + "epoch": 1.7120129121512566, + "grad_norm": 1.5514584947710022, + "learning_rate": 1.1142940268429735e-07, + "loss": 0.41522908210754395, + "step": 7425 + }, + { + "epoch": 1.7122434862808393, + "grad_norm": 1.430903522239028, + "learning_rate": 1.1125456752573215e-07, + "loss": 0.4681985378265381, + "step": 7426 + }, + { + "epoch": 1.7124740604104218, + "grad_norm": 1.8962296460852388, + "learning_rate": 1.1107986155532245e-07, + "loss": 0.4788553714752197, + "step": 7427 + }, + { + "epoch": 1.7127046345400045, + "grad_norm": 1.5072364623848036, + "learning_rate": 1.1090528479846406e-07, + "loss": 0.43853843212127686, + "step": 7428 + }, + { + "epoch": 1.7129352086695873, + "grad_norm": 1.542463594674994, + "learning_rate": 1.107308372805329e-07, + "loss": 0.3736591637134552, + "step": 7429 + }, + { + "epoch": 1.71316578279917, + "grad_norm": 1.8237435289536401, + "learning_rate": 1.1055651902688712e-07, + "loss": 0.5770819783210754, + "step": 7430 + }, + { + "epoch": 1.7133963569287527, + "grad_norm": 1.7972828104133267, + "learning_rate": 1.1038233006286558e-07, + "loss": 0.5906555652618408, + "step": 7431 + }, + { + "epoch": 1.7136269310583354, + "grad_norm": 1.396062928601261, + "learning_rate": 1.1020827041378844e-07, + "loss": 0.4621407389640808, + "step": 7432 + }, + { + "epoch": 1.713857505187918, + "grad_norm": 1.6487194571266346, + "learning_rate": 1.1003434010495705e-07, + "loss": 0.4203164279460907, + "step": 7433 + }, + { + "epoch": 1.7140880793175006, + "grad_norm": 1.59720117870823, + "learning_rate": 1.0986053916165373e-07, + "loss": 0.4607565104961395, + "step": 7434 + }, + { + "epoch": 1.7143186534470831, + "grad_norm": 1.4411738322949479, + "learning_rate": 1.0968686760914248e-07, + "loss": 0.47256794571876526, + "step": 7435 + }, + { + "epoch": 1.7145492275766658, + "grad_norm": 2.1203032230505414, + "learning_rate": 1.0951332547266778e-07, + "loss": 0.479513943195343, + "step": 7436 + }, + { + "epoch": 1.7147798017062486, + "grad_norm": 1.7633354860000339, + "learning_rate": 1.0933991277745614e-07, + "loss": 0.47687965631484985, + "step": 7437 + }, + { + "epoch": 1.7150103758358313, + "grad_norm": 1.6696730348311766, + "learning_rate": 1.091666295487147e-07, + "loss": 0.45799845457077026, + "step": 7438 + }, + { + "epoch": 1.715240949965414, + "grad_norm": 1.4765505689651048, + "learning_rate": 1.089934758116322e-07, + "loss": 0.43398863077163696, + "step": 7439 + }, + { + "epoch": 1.7154715240949967, + "grad_norm": 1.627580558092534, + "learning_rate": 1.0882045159137788e-07, + "loss": 0.4098217189311981, + "step": 7440 + }, + { + "epoch": 1.7157020982245792, + "grad_norm": 1.8062601643320504, + "learning_rate": 1.086475569131029e-07, + "loss": 0.49889707565307617, + "step": 7441 + }, + { + "epoch": 1.715932672354162, + "grad_norm": 1.4613353368332702, + "learning_rate": 1.0847479180193897e-07, + "loss": 0.4187192916870117, + "step": 7442 + }, + { + "epoch": 1.7161632464837444, + "grad_norm": 2.068945016126778, + "learning_rate": 1.0830215628299954e-07, + "loss": 0.44331133365631104, + "step": 7443 + }, + { + "epoch": 1.7163938206133271, + "grad_norm": 1.6773749938074582, + "learning_rate": 1.0812965038137856e-07, + "loss": 0.4888196587562561, + "step": 7444 + }, + { + "epoch": 1.7166243947429098, + "grad_norm": 1.6578617629701122, + "learning_rate": 1.0795727412215183e-07, + "loss": 0.4884798228740692, + "step": 7445 + }, + { + "epoch": 1.7168549688724926, + "grad_norm": 1.5723023883356735, + "learning_rate": 1.07785027530376e-07, + "loss": 0.45655232667922974, + "step": 7446 + }, + { + "epoch": 1.7170855430020753, + "grad_norm": 1.685893884498356, + "learning_rate": 1.0761291063108857e-07, + "loss": 0.3086237907409668, + "step": 7447 + }, + { + "epoch": 1.717316117131658, + "grad_norm": 1.5738053973393145, + "learning_rate": 1.0744092344930888e-07, + "loss": 0.4279823899269104, + "step": 7448 + }, + { + "epoch": 1.7175466912612405, + "grad_norm": 1.7221029802689058, + "learning_rate": 1.072690660100366e-07, + "loss": 0.4241681396961212, + "step": 7449 + }, + { + "epoch": 1.7177772653908232, + "grad_norm": 1.7874830878272077, + "learning_rate": 1.070973383382533e-07, + "loss": 0.47086501121520996, + "step": 7450 + }, + { + "epoch": 1.7180078395204057, + "grad_norm": 1.3780373187479635, + "learning_rate": 1.0692574045892099e-07, + "loss": 0.43798619508743286, + "step": 7451 + }, + { + "epoch": 1.7182384136499884, + "grad_norm": 1.7289936352675708, + "learning_rate": 1.0675427239698354e-07, + "loss": 0.5781964659690857, + "step": 7452 + }, + { + "epoch": 1.7184689877795711, + "grad_norm": 1.4621228929512655, + "learning_rate": 1.0658293417736508e-07, + "loss": 0.4850879907608032, + "step": 7453 + }, + { + "epoch": 1.7186995619091539, + "grad_norm": 1.3236244677460836, + "learning_rate": 1.064117258249717e-07, + "loss": 0.40468811988830566, + "step": 7454 + }, + { + "epoch": 1.7189301360387366, + "grad_norm": 1.7069112900372936, + "learning_rate": 1.0624064736469052e-07, + "loss": 0.4054880142211914, + "step": 7455 + }, + { + "epoch": 1.719160710168319, + "grad_norm": 1.7589002706519377, + "learning_rate": 1.0606969882138894e-07, + "loss": 0.38633522391319275, + "step": 7456 + }, + { + "epoch": 1.7193912842979018, + "grad_norm": 1.6917357500409704, + "learning_rate": 1.0589888021991644e-07, + "loss": 0.4287499785423279, + "step": 7457 + }, + { + "epoch": 1.7196218584274843, + "grad_norm": 1.613018561241669, + "learning_rate": 1.0572819158510316e-07, + "loss": 0.49269533157348633, + "step": 7458 + }, + { + "epoch": 1.719852432557067, + "grad_norm": 1.4600608769783265, + "learning_rate": 1.0555763294176045e-07, + "loss": 0.38874679803848267, + "step": 7459 + }, + { + "epoch": 1.7200830066866497, + "grad_norm": 1.5663184097893508, + "learning_rate": 1.0538720431468051e-07, + "loss": 0.4381089508533478, + "step": 7460 + }, + { + "epoch": 1.7203135808162324, + "grad_norm": 1.6242553694361792, + "learning_rate": 1.0521690572863706e-07, + "loss": 0.4550422430038452, + "step": 7461 + }, + { + "epoch": 1.7205441549458151, + "grad_norm": 1.5017985009159773, + "learning_rate": 1.0504673720838476e-07, + "loss": 0.5173785090446472, + "step": 7462 + }, + { + "epoch": 1.7207747290753979, + "grad_norm": 1.4906138636113029, + "learning_rate": 1.0487669877865945e-07, + "loss": 0.5082184076309204, + "step": 7463 + }, + { + "epoch": 1.7210053032049804, + "grad_norm": 1.7383580581523643, + "learning_rate": 1.0470679046417786e-07, + "loss": 0.49810969829559326, + "step": 7464 + }, + { + "epoch": 1.721235877334563, + "grad_norm": 1.7302456540952424, + "learning_rate": 1.0453701228963751e-07, + "loss": 0.47808337211608887, + "step": 7465 + }, + { + "epoch": 1.7214664514641456, + "grad_norm": 1.6093569631380469, + "learning_rate": 1.0436736427971782e-07, + "loss": 0.5100537538528442, + "step": 7466 + }, + { + "epoch": 1.7216970255937283, + "grad_norm": 1.5019138408689112, + "learning_rate": 1.0419784645907858e-07, + "loss": 0.44948023557662964, + "step": 7467 + }, + { + "epoch": 1.721927599723311, + "grad_norm": 1.3792836042899619, + "learning_rate": 1.040284588523611e-07, + "loss": 0.4653180241584778, + "step": 7468 + }, + { + "epoch": 1.7221581738528937, + "grad_norm": 1.901421358760061, + "learning_rate": 1.0385920148418737e-07, + "loss": 0.4930723309516907, + "step": 7469 + }, + { + "epoch": 1.7223887479824764, + "grad_norm": 1.5964124799736943, + "learning_rate": 1.036900743791611e-07, + "loss": 0.48883867263793945, + "step": 7470 + }, + { + "epoch": 1.7226193221120591, + "grad_norm": 1.27924002772244, + "learning_rate": 1.0352107756186624e-07, + "loss": 0.4030319154262543, + "step": 7471 + }, + { + "epoch": 1.7228498962416416, + "grad_norm": 1.8060139526740588, + "learning_rate": 1.033522110568683e-07, + "loss": 0.4174875319004059, + "step": 7472 + }, + { + "epoch": 1.7230804703712244, + "grad_norm": 1.731157383735833, + "learning_rate": 1.0318347488871371e-07, + "loss": 0.5152361392974854, + "step": 7473 + }, + { + "epoch": 1.7233110445008069, + "grad_norm": 1.3983774946509473, + "learning_rate": 1.0301486908193014e-07, + "loss": 0.43221428990364075, + "step": 7474 + }, + { + "epoch": 1.7235416186303896, + "grad_norm": 1.6931290113673243, + "learning_rate": 1.0284639366102598e-07, + "loss": 0.4239969849586487, + "step": 7475 + }, + { + "epoch": 1.7237721927599723, + "grad_norm": 1.5094560861426634, + "learning_rate": 1.0267804865049068e-07, + "loss": 0.5171400904655457, + "step": 7476 + }, + { + "epoch": 1.724002766889555, + "grad_norm": 1.3913671775557208, + "learning_rate": 1.0250983407479518e-07, + "loss": 0.45670178532600403, + "step": 7477 + }, + { + "epoch": 1.7242333410191377, + "grad_norm": 1.3489970844922, + "learning_rate": 1.0234174995839107e-07, + "loss": 0.36458373069763184, + "step": 7478 + }, + { + "epoch": 1.7244639151487204, + "grad_norm": 1.6926167509742018, + "learning_rate": 1.0217379632571122e-07, + "loss": 0.4940750002861023, + "step": 7479 + }, + { + "epoch": 1.724694489278303, + "grad_norm": 1.3742895139526408, + "learning_rate": 1.0200597320116911e-07, + "loss": 0.43453872203826904, + "step": 7480 + }, + { + "epoch": 1.7249250634078857, + "grad_norm": 1.4325916198137496, + "learning_rate": 1.0183828060915989e-07, + "loss": 0.49255162477493286, + "step": 7481 + }, + { + "epoch": 1.7251556375374681, + "grad_norm": 1.5551839406586245, + "learning_rate": 1.0167071857405906e-07, + "loss": 0.46221014857292175, + "step": 7482 + }, + { + "epoch": 1.7253862116670509, + "grad_norm": 1.6044214909369097, + "learning_rate": 1.015032871202236e-07, + "loss": 0.43426087498664856, + "step": 7483 + }, + { + "epoch": 1.7256167857966336, + "grad_norm": 1.3471292376409894, + "learning_rate": 1.0133598627199136e-07, + "loss": 0.45327985286712646, + "step": 7484 + }, + { + "epoch": 1.7258473599262163, + "grad_norm": 1.7300792096053668, + "learning_rate": 1.011688160536811e-07, + "loss": 0.4691676199436188, + "step": 7485 + }, + { + "epoch": 1.726077934055799, + "grad_norm": 1.7168424748125397, + "learning_rate": 1.0100177648959296e-07, + "loss": 0.5080254077911377, + "step": 7486 + }, + { + "epoch": 1.7263085081853817, + "grad_norm": 1.3360541862160926, + "learning_rate": 1.008348676040075e-07, + "loss": 0.34122025966644287, + "step": 7487 + }, + { + "epoch": 1.7265390823149642, + "grad_norm": 1.650892930499383, + "learning_rate": 1.0066808942118699e-07, + "loss": 0.44408074021339417, + "step": 7488 + }, + { + "epoch": 1.726769656444547, + "grad_norm": 1.4603224951411022, + "learning_rate": 1.0050144196537402e-07, + "loss": 0.3777790665626526, + "step": 7489 + }, + { + "epoch": 1.7270002305741294, + "grad_norm": 1.6365267437093343, + "learning_rate": 1.0033492526079279e-07, + "loss": 0.48730146884918213, + "step": 7490 + }, + { + "epoch": 1.7272308047037122, + "grad_norm": 1.5792338555913825, + "learning_rate": 1.001685393316477e-07, + "loss": 0.35903626680374146, + "step": 7491 + }, + { + "epoch": 1.7274613788332949, + "grad_norm": 1.3953813288199584, + "learning_rate": 1.0000228420212509e-07, + "loss": 0.37729373574256897, + "step": 7492 + }, + { + "epoch": 1.7276919529628776, + "grad_norm": 1.6314801226105193, + "learning_rate": 9.98361598963916e-08, + "loss": 0.4388326406478882, + "step": 7493 + }, + { + "epoch": 1.7279225270924603, + "grad_norm": 1.4829220781258674, + "learning_rate": 9.967016643859527e-08, + "loss": 0.45095232129096985, + "step": 7494 + }, + { + "epoch": 1.728153101222043, + "grad_norm": 1.5130736602015042, + "learning_rate": 9.95043038528649e-08, + "loss": 0.4736475944519043, + "step": 7495 + }, + { + "epoch": 1.7283836753516255, + "grad_norm": 1.6393405202034401, + "learning_rate": 9.933857216330999e-08, + "loss": 0.2984190285205841, + "step": 7496 + }, + { + "epoch": 1.7286142494812082, + "grad_norm": 1.5993261500159095, + "learning_rate": 9.91729713940218e-08, + "loss": 0.45391780138015747, + "step": 7497 + }, + { + "epoch": 1.7288448236107907, + "grad_norm": 1.732905558263472, + "learning_rate": 9.900750156907157e-08, + "loss": 0.5150727033615112, + "step": 7498 + }, + { + "epoch": 1.7290753977403734, + "grad_norm": 1.372519788443724, + "learning_rate": 9.884216271251256e-08, + "loss": 0.41298598051071167, + "step": 7499 + }, + { + "epoch": 1.7293059718699562, + "grad_norm": 1.5310483983437806, + "learning_rate": 9.86769548483779e-08, + "loss": 0.4820541441440582, + "step": 7500 + }, + { + "epoch": 1.7295365459995389, + "grad_norm": 1.4103659952581913, + "learning_rate": 9.85118780006825e-08, + "loss": 0.4148511290550232, + "step": 7501 + }, + { + "epoch": 1.7297671201291216, + "grad_norm": 1.535383378975012, + "learning_rate": 9.834693219342183e-08, + "loss": 0.39676210284233093, + "step": 7502 + }, + { + "epoch": 1.7299976942587043, + "grad_norm": 1.3969764743432636, + "learning_rate": 9.818211745057292e-08, + "loss": 0.3665908873081207, + "step": 7503 + }, + { + "epoch": 1.7302282683882868, + "grad_norm": 1.5255452230855382, + "learning_rate": 9.801743379609274e-08, + "loss": 0.39340025186538696, + "step": 7504 + }, + { + "epoch": 1.7304588425178695, + "grad_norm": 1.4673439514671116, + "learning_rate": 9.785288125391977e-08, + "loss": 0.4677412807941437, + "step": 7505 + }, + { + "epoch": 1.730689416647452, + "grad_norm": 1.8421716352805986, + "learning_rate": 9.768845984797369e-08, + "loss": 0.49413764476776123, + "step": 7506 + }, + { + "epoch": 1.7309199907770347, + "grad_norm": 2.1097980684598223, + "learning_rate": 9.752416960215437e-08, + "loss": 0.5312438607215881, + "step": 7507 + }, + { + "epoch": 1.7311505649066175, + "grad_norm": 1.408973464564324, + "learning_rate": 9.736001054034338e-08, + "loss": 0.38522863388061523, + "step": 7508 + }, + { + "epoch": 1.7313811390362002, + "grad_norm": 1.4496862609377634, + "learning_rate": 9.719598268640283e-08, + "loss": 0.49167078733444214, + "step": 7509 + }, + { + "epoch": 1.7316117131657829, + "grad_norm": 1.7071655256469307, + "learning_rate": 9.7032086064176e-08, + "loss": 0.4465949535369873, + "step": 7510 + }, + { + "epoch": 1.7318422872953656, + "grad_norm": 1.580755639233498, + "learning_rate": 9.686832069748663e-08, + "loss": 0.4627634882926941, + "step": 7511 + }, + { + "epoch": 1.732072861424948, + "grad_norm": 1.5945960217093318, + "learning_rate": 9.670468661013998e-08, + "loss": 0.4188409447669983, + "step": 7512 + }, + { + "epoch": 1.7323034355545308, + "grad_norm": 1.6767285085334622, + "learning_rate": 9.654118382592146e-08, + "loss": 0.5775213241577148, + "step": 7513 + }, + { + "epoch": 1.7325340096841133, + "grad_norm": 1.4889326648746473, + "learning_rate": 9.637781236859843e-08, + "loss": 0.43912672996520996, + "step": 7514 + }, + { + "epoch": 1.732764583813696, + "grad_norm": 1.677177851910315, + "learning_rate": 9.62145722619182e-08, + "loss": 0.5364755392074585, + "step": 7515 + }, + { + "epoch": 1.7329951579432787, + "grad_norm": 1.5135890648676678, + "learning_rate": 9.605146352960935e-08, + "loss": 0.4832648038864136, + "step": 7516 + }, + { + "epoch": 1.7332257320728615, + "grad_norm": 1.640472153194824, + "learning_rate": 9.588848619538182e-08, + "loss": 0.36932459473609924, + "step": 7517 + }, + { + "epoch": 1.7334563062024442, + "grad_norm": 1.4731235594964114, + "learning_rate": 9.57256402829254e-08, + "loss": 0.43458276987075806, + "step": 7518 + }, + { + "epoch": 1.733686880332027, + "grad_norm": 1.457966513875051, + "learning_rate": 9.556292581591196e-08, + "loss": 0.41533568501472473, + "step": 7519 + }, + { + "epoch": 1.7339174544616094, + "grad_norm": 1.4363289807621746, + "learning_rate": 9.540034281799325e-08, + "loss": 0.45898690819740295, + "step": 7520 + }, + { + "epoch": 1.734148028591192, + "grad_norm": 1.610315429506808, + "learning_rate": 9.523789131280279e-08, + "loss": 0.3321181535720825, + "step": 7521 + }, + { + "epoch": 1.7343786027207746, + "grad_norm": 1.5824862936232118, + "learning_rate": 9.507557132395416e-08, + "loss": 0.3926161229610443, + "step": 7522 + }, + { + "epoch": 1.7346091768503573, + "grad_norm": 1.264710302836967, + "learning_rate": 9.491338287504247e-08, + "loss": 0.41051846742630005, + "step": 7523 + }, + { + "epoch": 1.73483975097994, + "grad_norm": 1.3604853902379428, + "learning_rate": 9.47513259896432e-08, + "loss": 0.4440652132034302, + "step": 7524 + }, + { + "epoch": 1.7350703251095227, + "grad_norm": 1.5933781203678954, + "learning_rate": 9.458940069131304e-08, + "loss": 0.5175125598907471, + "step": 7525 + }, + { + "epoch": 1.7353008992391055, + "grad_norm": 1.4535445480892137, + "learning_rate": 9.442760700358987e-08, + "loss": 0.45521751046180725, + "step": 7526 + }, + { + "epoch": 1.7355314733686882, + "grad_norm": 1.5707484811695662, + "learning_rate": 9.426594494999151e-08, + "loss": 0.5133911967277527, + "step": 7527 + }, + { + "epoch": 1.7357620474982707, + "grad_norm": 1.8770278394623805, + "learning_rate": 9.410441455401752e-08, + "loss": 0.4397609233856201, + "step": 7528 + }, + { + "epoch": 1.7359926216278534, + "grad_norm": 3.7292879258339693, + "learning_rate": 9.394301583914765e-08, + "loss": 0.4503510594367981, + "step": 7529 + }, + { + "epoch": 1.7362231957574359, + "grad_norm": 1.5909450336667472, + "learning_rate": 9.378174882884327e-08, + "loss": 0.44119834899902344, + "step": 7530 + }, + { + "epoch": 1.7364537698870186, + "grad_norm": 1.5959659498105105, + "learning_rate": 9.362061354654583e-08, + "loss": 0.46257996559143066, + "step": 7531 + }, + { + "epoch": 1.7366843440166013, + "grad_norm": 1.4727698319610416, + "learning_rate": 9.345961001567792e-08, + "loss": 0.4468308687210083, + "step": 7532 + }, + { + "epoch": 1.736914918146184, + "grad_norm": 1.329652616869682, + "learning_rate": 9.32987382596433e-08, + "loss": 0.3837989568710327, + "step": 7533 + }, + { + "epoch": 1.7371454922757668, + "grad_norm": 1.7149798865191848, + "learning_rate": 9.313799830182644e-08, + "loss": 0.4224961996078491, + "step": 7534 + }, + { + "epoch": 1.7373760664053495, + "grad_norm": 1.3527154365554523, + "learning_rate": 9.297739016559225e-08, + "loss": 0.37379956245422363, + "step": 7535 + }, + { + "epoch": 1.737606640534932, + "grad_norm": 1.3983736958193809, + "learning_rate": 9.281691387428658e-08, + "loss": 0.4204242527484894, + "step": 7536 + }, + { + "epoch": 1.7378372146645147, + "grad_norm": 1.550547566194999, + "learning_rate": 9.265656945123678e-08, + "loss": 0.5270572900772095, + "step": 7537 + }, + { + "epoch": 1.7380677887940972, + "grad_norm": 1.6826850331086136, + "learning_rate": 9.249635691975e-08, + "loss": 0.44208282232284546, + "step": 7538 + }, + { + "epoch": 1.73829836292368, + "grad_norm": 1.158547237110862, + "learning_rate": 9.233627630311502e-08, + "loss": 0.32514283061027527, + "step": 7539 + }, + { + "epoch": 1.7385289370532626, + "grad_norm": 1.42135951118167, + "learning_rate": 9.217632762460126e-08, + "loss": 0.35472434759140015, + "step": 7540 + }, + { + "epoch": 1.7387595111828453, + "grad_norm": 1.9134735814581072, + "learning_rate": 9.201651090745888e-08, + "loss": 0.5034215450286865, + "step": 7541 + }, + { + "epoch": 1.738990085312428, + "grad_norm": 1.4950522917395752, + "learning_rate": 9.185682617491863e-08, + "loss": 0.4779762029647827, + "step": 7542 + }, + { + "epoch": 1.7392206594420108, + "grad_norm": 1.7544463226218252, + "learning_rate": 9.169727345019263e-08, + "loss": 0.4964079260826111, + "step": 7543 + }, + { + "epoch": 1.7394512335715933, + "grad_norm": 1.8208500448761544, + "learning_rate": 9.153785275647319e-08, + "loss": 0.5125068426132202, + "step": 7544 + }, + { + "epoch": 1.739681807701176, + "grad_norm": 1.369096268264849, + "learning_rate": 9.13785641169339e-08, + "loss": 0.39051756262779236, + "step": 7545 + }, + { + "epoch": 1.7399123818307585, + "grad_norm": 1.6132499721446665, + "learning_rate": 9.121940755472901e-08, + "loss": 0.45951950550079346, + "step": 7546 + }, + { + "epoch": 1.7401429559603412, + "grad_norm": 1.402513218333582, + "learning_rate": 9.106038309299302e-08, + "loss": 0.42676979303359985, + "step": 7547 + }, + { + "epoch": 1.740373530089924, + "grad_norm": 1.6248647623340229, + "learning_rate": 9.090149075484255e-08, + "loss": 0.3585033416748047, + "step": 7548 + }, + { + "epoch": 1.7406041042195066, + "grad_norm": 1.5204418845888263, + "learning_rate": 9.074273056337366e-08, + "loss": 0.4613775312900543, + "step": 7549 + }, + { + "epoch": 1.7408346783490893, + "grad_norm": 1.5756472296671777, + "learning_rate": 9.058410254166415e-08, + "loss": 0.48934412002563477, + "step": 7550 + }, + { + "epoch": 1.741065252478672, + "grad_norm": 2.3682357853653895, + "learning_rate": 9.042560671277177e-08, + "loss": 0.5749069452285767, + "step": 7551 + }, + { + "epoch": 1.7412958266082545, + "grad_norm": 1.4990310296288942, + "learning_rate": 9.026724309973588e-08, + "loss": 0.4760423004627228, + "step": 7552 + }, + { + "epoch": 1.7415264007378373, + "grad_norm": 1.38070744019409, + "learning_rate": 9.010901172557594e-08, + "loss": 0.43080049753189087, + "step": 7553 + }, + { + "epoch": 1.7417569748674198, + "grad_norm": 1.4636238536042068, + "learning_rate": 8.99509126132928e-08, + "loss": 0.44850271940231323, + "step": 7554 + }, + { + "epoch": 1.7419875489970025, + "grad_norm": 1.5357653243690434, + "learning_rate": 8.979294578586738e-08, + "loss": 0.34593498706817627, + "step": 7555 + }, + { + "epoch": 1.7422181231265852, + "grad_norm": 1.3635590695208566, + "learning_rate": 8.963511126626188e-08, + "loss": 0.3738324046134949, + "step": 7556 + }, + { + "epoch": 1.742448697256168, + "grad_norm": 1.6262402635208488, + "learning_rate": 8.947740907741952e-08, + "loss": 0.47988662123680115, + "step": 7557 + }, + { + "epoch": 1.7426792713857506, + "grad_norm": 1.904530616299084, + "learning_rate": 8.931983924226338e-08, + "loss": 0.5863034725189209, + "step": 7558 + }, + { + "epoch": 1.7429098455153333, + "grad_norm": 1.497315511162884, + "learning_rate": 8.916240178369827e-08, + "loss": 0.38455232977867126, + "step": 7559 + }, + { + "epoch": 1.7431404196449158, + "grad_norm": 1.711133818053075, + "learning_rate": 8.900509672460899e-08, + "loss": 0.3919760584831238, + "step": 7560 + }, + { + "epoch": 1.7433709937744986, + "grad_norm": 1.8876361089943499, + "learning_rate": 8.884792408786169e-08, + "loss": 0.4090653657913208, + "step": 7561 + }, + { + "epoch": 1.743601567904081, + "grad_norm": 1.458591423296693, + "learning_rate": 8.869088389630264e-08, + "loss": 0.42597073316574097, + "step": 7562 + }, + { + "epoch": 1.7438321420336638, + "grad_norm": 1.4410906971279085, + "learning_rate": 8.853397617275959e-08, + "loss": 0.38760805130004883, + "step": 7563 + }, + { + "epoch": 1.7440627161632465, + "grad_norm": 1.3930314463175644, + "learning_rate": 8.837720094004042e-08, + "loss": 0.3753165900707245, + "step": 7564 + }, + { + "epoch": 1.7442932902928292, + "grad_norm": 1.4708100181524995, + "learning_rate": 8.822055822093432e-08, + "loss": 0.5169536471366882, + "step": 7565 + }, + { + "epoch": 1.744523864422412, + "grad_norm": 1.436339252382814, + "learning_rate": 8.806404803821077e-08, + "loss": 0.3886902332305908, + "step": 7566 + }, + { + "epoch": 1.7447544385519944, + "grad_norm": 1.7378167101447366, + "learning_rate": 8.790767041461977e-08, + "loss": 0.48971402645111084, + "step": 7567 + }, + { + "epoch": 1.7449850126815771, + "grad_norm": 1.3555756556469605, + "learning_rate": 8.775142537289282e-08, + "loss": 0.4656449556350708, + "step": 7568 + }, + { + "epoch": 1.7452155868111596, + "grad_norm": 1.24689144854066, + "learning_rate": 8.75953129357414e-08, + "loss": 0.43197786808013916, + "step": 7569 + }, + { + "epoch": 1.7454461609407423, + "grad_norm": 1.6584429086506909, + "learning_rate": 8.743933312585816e-08, + "loss": 0.5062606930732727, + "step": 7570 + }, + { + "epoch": 1.745676735070325, + "grad_norm": 1.714345013647294, + "learning_rate": 8.728348596591639e-08, + "loss": 0.5489983558654785, + "step": 7571 + }, + { + "epoch": 1.7459073091999078, + "grad_norm": 1.4457283500823468, + "learning_rate": 8.712777147857031e-08, + "loss": 0.4351652264595032, + "step": 7572 + }, + { + "epoch": 1.7461378833294905, + "grad_norm": 2.160367880410759, + "learning_rate": 8.697218968645403e-08, + "loss": 0.5096884965896606, + "step": 7573 + }, + { + "epoch": 1.7463684574590732, + "grad_norm": 1.2837319415683648, + "learning_rate": 8.681674061218347e-08, + "loss": 0.3127269744873047, + "step": 7574 + }, + { + "epoch": 1.7465990315886557, + "grad_norm": 1.8378362837335938, + "learning_rate": 8.666142427835443e-08, + "loss": 0.4738629460334778, + "step": 7575 + }, + { + "epoch": 1.7468296057182384, + "grad_norm": 1.5090024147723615, + "learning_rate": 8.650624070754375e-08, + "loss": 0.46921902894973755, + "step": 7576 + }, + { + "epoch": 1.747060179847821, + "grad_norm": 1.578667567709185, + "learning_rate": 8.635118992230906e-08, + "loss": 0.5296987891197205, + "step": 7577 + }, + { + "epoch": 1.7472907539774036, + "grad_norm": 1.1732895039201416, + "learning_rate": 8.619627194518819e-08, + "loss": 0.3522387742996216, + "step": 7578 + }, + { + "epoch": 1.7475213281069863, + "grad_norm": 1.550879536093582, + "learning_rate": 8.604148679870049e-08, + "loss": 0.42747724056243896, + "step": 7579 + }, + { + "epoch": 1.747751902236569, + "grad_norm": 1.535695568842986, + "learning_rate": 8.588683450534528e-08, + "loss": 0.399990439414978, + "step": 7580 + }, + { + "epoch": 1.7479824763661518, + "grad_norm": 1.688266581429453, + "learning_rate": 8.573231508760315e-08, + "loss": 0.48220518231391907, + "step": 7581 + }, + { + "epoch": 1.7482130504957345, + "grad_norm": 1.8452105924711204, + "learning_rate": 8.557792856793455e-08, + "loss": 0.5227106213569641, + "step": 7582 + }, + { + "epoch": 1.748443624625317, + "grad_norm": 1.596076015195143, + "learning_rate": 8.542367496878178e-08, + "loss": 0.5436732769012451, + "step": 7583 + }, + { + "epoch": 1.7486741987548997, + "grad_norm": 1.5781135040763308, + "learning_rate": 8.526955431256644e-08, + "loss": 0.48398053646087646, + "step": 7584 + }, + { + "epoch": 1.7489047728844822, + "grad_norm": 1.8109008330023073, + "learning_rate": 8.511556662169217e-08, + "loss": 0.5727924108505249, + "step": 7585 + }, + { + "epoch": 1.749135347014065, + "grad_norm": 1.7451913815699138, + "learning_rate": 8.496171191854229e-08, + "loss": 0.48077693581581116, + "step": 7586 + }, + { + "epoch": 1.7493659211436476, + "grad_norm": 1.4513314868999736, + "learning_rate": 8.480799022548113e-08, + "loss": 0.45447635650634766, + "step": 7587 + }, + { + "epoch": 1.7495964952732304, + "grad_norm": 1.7305734402801412, + "learning_rate": 8.465440156485392e-08, + "loss": 0.4605486989021301, + "step": 7588 + }, + { + "epoch": 1.749827069402813, + "grad_norm": 1.6087138586576477, + "learning_rate": 8.450094595898604e-08, + "loss": 0.4229927062988281, + "step": 7589 + }, + { + "epoch": 1.7500576435323958, + "grad_norm": 1.371495589643338, + "learning_rate": 8.434762343018408e-08, + "loss": 0.43005260825157166, + "step": 7590 + }, + { + "epoch": 1.7502882176619783, + "grad_norm": 1.739761797548497, + "learning_rate": 8.41944340007349e-08, + "loss": 0.47446098923683167, + "step": 7591 + }, + { + "epoch": 1.750518791791561, + "grad_norm": 1.6084919754115274, + "learning_rate": 8.40413776929062e-08, + "loss": 0.40554216504096985, + "step": 7592 + }, + { + "epoch": 1.7507493659211435, + "grad_norm": 1.2363538330087616, + "learning_rate": 8.38884545289461e-08, + "loss": 0.4144189953804016, + "step": 7593 + }, + { + "epoch": 1.7509799400507262, + "grad_norm": 1.6677815347140812, + "learning_rate": 8.373566453108361e-08, + "loss": 0.449351966381073, + "step": 7594 + }, + { + "epoch": 1.751210514180309, + "grad_norm": 1.8357616333643774, + "learning_rate": 8.358300772152849e-08, + "loss": 0.4584103226661682, + "step": 7595 + }, + { + "epoch": 1.7514410883098916, + "grad_norm": 1.6545876792386258, + "learning_rate": 8.343048412247066e-08, + "loss": 0.4739362895488739, + "step": 7596 + }, + { + "epoch": 1.7516716624394744, + "grad_norm": 1.3684829539670578, + "learning_rate": 8.327809375608131e-08, + "loss": 0.3970356583595276, + "step": 7597 + }, + { + "epoch": 1.751902236569057, + "grad_norm": 1.390074068538192, + "learning_rate": 8.312583664451157e-08, + "loss": 0.4298238754272461, + "step": 7598 + }, + { + "epoch": 1.7521328106986396, + "grad_norm": 1.5218432452457022, + "learning_rate": 8.297371280989385e-08, + "loss": 0.4920361340045929, + "step": 7599 + }, + { + "epoch": 1.7523633848282223, + "grad_norm": 1.6001856104794878, + "learning_rate": 8.282172227434059e-08, + "loss": 0.5035870671272278, + "step": 7600 + }, + { + "epoch": 1.7525939589578048, + "grad_norm": 1.8053658495544915, + "learning_rate": 8.266986505994555e-08, + "loss": 0.373248815536499, + "step": 7601 + }, + { + "epoch": 1.7528245330873875, + "grad_norm": 2.0338367024251345, + "learning_rate": 8.25181411887822e-08, + "loss": 0.48491543531417847, + "step": 7602 + }, + { + "epoch": 1.7530551072169702, + "grad_norm": 1.6403088167242337, + "learning_rate": 8.236655068290554e-08, + "loss": 0.4298476576805115, + "step": 7603 + }, + { + "epoch": 1.753285681346553, + "grad_norm": 1.5503246605292686, + "learning_rate": 8.221509356435064e-08, + "loss": 0.48804932832717896, + "step": 7604 + }, + { + "epoch": 1.7535162554761357, + "grad_norm": 1.595278442494436, + "learning_rate": 8.206376985513353e-08, + "loss": 0.467857301235199, + "step": 7605 + }, + { + "epoch": 1.7537468296057184, + "grad_norm": 1.8978537163965867, + "learning_rate": 8.19125795772504e-08, + "loss": 0.48995548486709595, + "step": 7606 + }, + { + "epoch": 1.7539774037353009, + "grad_norm": 1.488521983097995, + "learning_rate": 8.176152275267823e-08, + "loss": 0.4459487795829773, + "step": 7607 + }, + { + "epoch": 1.7542079778648836, + "grad_norm": 1.4326042778667836, + "learning_rate": 8.1610599403375e-08, + "loss": 0.5054866671562195, + "step": 7608 + }, + { + "epoch": 1.754438551994466, + "grad_norm": 1.4563884146816763, + "learning_rate": 8.145980955127862e-08, + "loss": 0.46223869919776917, + "step": 7609 + }, + { + "epoch": 1.7546691261240488, + "grad_norm": 1.696768225081691, + "learning_rate": 8.1309153218308e-08, + "loss": 0.4743426442146301, + "step": 7610 + }, + { + "epoch": 1.7548997002536315, + "grad_norm": 1.7623915082520603, + "learning_rate": 8.115863042636262e-08, + "loss": 0.40808072686195374, + "step": 7611 + }, + { + "epoch": 1.7551302743832142, + "grad_norm": 1.3859431275297254, + "learning_rate": 8.100824119732263e-08, + "loss": 0.4452321231365204, + "step": 7612 + }, + { + "epoch": 1.755360848512797, + "grad_norm": 1.556764426976114, + "learning_rate": 8.085798555304824e-08, + "loss": 0.4211857318878174, + "step": 7613 + }, + { + "epoch": 1.7555914226423797, + "grad_norm": 1.5080375348033017, + "learning_rate": 8.070786351538117e-08, + "loss": 0.3356667757034302, + "step": 7614 + }, + { + "epoch": 1.7558219967719622, + "grad_norm": 1.7842469682737618, + "learning_rate": 8.055787510614287e-08, + "loss": 0.4636021852493286, + "step": 7615 + }, + { + "epoch": 1.7560525709015449, + "grad_norm": 1.624229543588168, + "learning_rate": 8.040802034713546e-08, + "loss": 0.4066168963909149, + "step": 7616 + }, + { + "epoch": 1.7562831450311274, + "grad_norm": 1.4896510438449921, + "learning_rate": 8.025829926014216e-08, + "loss": 0.426937460899353, + "step": 7617 + }, + { + "epoch": 1.75651371916071, + "grad_norm": 1.838065393231424, + "learning_rate": 8.010871186692625e-08, + "loss": 0.464493989944458, + "step": 7618 + }, + { + "epoch": 1.7567442932902928, + "grad_norm": 1.7522078931434732, + "learning_rate": 7.995925818923222e-08, + "loss": 0.44130605459213257, + "step": 7619 + }, + { + "epoch": 1.7569748674198755, + "grad_norm": 1.6877219329526134, + "learning_rate": 7.980993824878402e-08, + "loss": 0.5241909027099609, + "step": 7620 + }, + { + "epoch": 1.7572054415494582, + "grad_norm": 1.605603526262718, + "learning_rate": 7.96607520672874e-08, + "loss": 0.45450860261917114, + "step": 7621 + }, + { + "epoch": 1.757436015679041, + "grad_norm": 1.6393742771356723, + "learning_rate": 7.951169966642757e-08, + "loss": 0.443767786026001, + "step": 7622 + }, + { + "epoch": 1.7576665898086234, + "grad_norm": 1.5258486167332923, + "learning_rate": 7.936278106787131e-08, + "loss": 0.3951075077056885, + "step": 7623 + }, + { + "epoch": 1.7578971639382062, + "grad_norm": 1.8216713225734935, + "learning_rate": 7.921399629326509e-08, + "loss": 0.44628477096557617, + "step": 7624 + }, + { + "epoch": 1.7581277380677887, + "grad_norm": 1.7421703870668572, + "learning_rate": 7.906534536423648e-08, + "loss": 0.38743889331817627, + "step": 7625 + }, + { + "epoch": 1.7583583121973714, + "grad_norm": 1.4726686928375068, + "learning_rate": 7.891682830239311e-08, + "loss": 0.4338032007217407, + "step": 7626 + }, + { + "epoch": 1.758588886326954, + "grad_norm": 1.7605246972541082, + "learning_rate": 7.876844512932367e-08, + "loss": 0.47387874126434326, + "step": 7627 + }, + { + "epoch": 1.7588194604565368, + "grad_norm": 1.6222674378421518, + "learning_rate": 7.86201958665973e-08, + "loss": 0.4082717299461365, + "step": 7628 + }, + { + "epoch": 1.7590500345861195, + "grad_norm": 1.462169761343313, + "learning_rate": 7.847208053576326e-08, + "loss": 0.4254682958126068, + "step": 7629 + }, + { + "epoch": 1.7592806087157022, + "grad_norm": 1.319688989297758, + "learning_rate": 7.832409915835181e-08, + "loss": 0.3572045564651489, + "step": 7630 + }, + { + "epoch": 1.7595111828452847, + "grad_norm": 1.398732808330898, + "learning_rate": 7.817625175587328e-08, + "loss": 0.39110279083251953, + "step": 7631 + }, + { + "epoch": 1.7597417569748675, + "grad_norm": 2.455493892116574, + "learning_rate": 7.802853834981926e-08, + "loss": 0.49292176961898804, + "step": 7632 + }, + { + "epoch": 1.75997233110445, + "grad_norm": 1.460109162216243, + "learning_rate": 7.78809589616608e-08, + "loss": 0.4271275997161865, + "step": 7633 + }, + { + "epoch": 1.7602029052340327, + "grad_norm": 1.5973984242111468, + "learning_rate": 7.77335136128503e-08, + "loss": 0.470772922039032, + "step": 7634 + }, + { + "epoch": 1.7604334793636154, + "grad_norm": 1.5415713448452681, + "learning_rate": 7.758620232482083e-08, + "loss": 0.4872988760471344, + "step": 7635 + }, + { + "epoch": 1.760664053493198, + "grad_norm": 1.2959777480648245, + "learning_rate": 7.743902511898492e-08, + "loss": 0.4300990104675293, + "step": 7636 + }, + { + "epoch": 1.7608946276227808, + "grad_norm": 1.4331560277043864, + "learning_rate": 7.729198201673682e-08, + "loss": 0.4524795711040497, + "step": 7637 + }, + { + "epoch": 1.7611252017523635, + "grad_norm": 1.580884966063861, + "learning_rate": 7.714507303945028e-08, + "loss": 0.4673241376876831, + "step": 7638 + }, + { + "epoch": 1.761355775881946, + "grad_norm": 1.7656151539321776, + "learning_rate": 7.699829820848048e-08, + "loss": 0.5171443223953247, + "step": 7639 + }, + { + "epoch": 1.7615863500115287, + "grad_norm": 1.5721911288259287, + "learning_rate": 7.68516575451621e-08, + "loss": 0.44416171312332153, + "step": 7640 + }, + { + "epoch": 1.7618169241411112, + "grad_norm": 1.8596688405579505, + "learning_rate": 7.670515107081122e-08, + "loss": 0.4456225633621216, + "step": 7641 + }, + { + "epoch": 1.762047498270694, + "grad_norm": 1.427384194238264, + "learning_rate": 7.65587788067239e-08, + "loss": 0.5235984921455383, + "step": 7642 + }, + { + "epoch": 1.7622780724002767, + "grad_norm": 1.5098894741733768, + "learning_rate": 7.641254077417702e-08, + "loss": 0.4957311749458313, + "step": 7643 + }, + { + "epoch": 1.7625086465298594, + "grad_norm": 1.9524483698152115, + "learning_rate": 7.626643699442748e-08, + "loss": 0.48401015996932983, + "step": 7644 + }, + { + "epoch": 1.762739220659442, + "grad_norm": 1.5925905896008645, + "learning_rate": 7.612046748871326e-08, + "loss": 0.5440249443054199, + "step": 7645 + }, + { + "epoch": 1.7629697947890248, + "grad_norm": 1.5363697612706335, + "learning_rate": 7.597463227825229e-08, + "loss": 0.3922181725502014, + "step": 7646 + }, + { + "epoch": 1.7632003689186073, + "grad_norm": 1.7121602067196948, + "learning_rate": 7.582893138424318e-08, + "loss": 0.4679541289806366, + "step": 7647 + }, + { + "epoch": 1.76343094304819, + "grad_norm": 1.63738592997542, + "learning_rate": 7.568336482786508e-08, + "loss": 0.4461076557636261, + "step": 7648 + }, + { + "epoch": 1.7636615171777725, + "grad_norm": 1.769800706819883, + "learning_rate": 7.553793263027752e-08, + "loss": 0.4028201997280121, + "step": 7649 + }, + { + "epoch": 1.7638920913073552, + "grad_norm": 1.6924130336118084, + "learning_rate": 7.53926348126206e-08, + "loss": 0.47307640314102173, + "step": 7650 + }, + { + "epoch": 1.764122665436938, + "grad_norm": 1.7236868707009407, + "learning_rate": 7.524747139601473e-08, + "loss": 0.4763333201408386, + "step": 7651 + }, + { + "epoch": 1.7643532395665207, + "grad_norm": 1.5475351462285587, + "learning_rate": 7.510244240156127e-08, + "loss": 0.5062815546989441, + "step": 7652 + }, + { + "epoch": 1.7645838136961034, + "grad_norm": 1.4648234779945293, + "learning_rate": 7.495754785034114e-08, + "loss": 0.38344740867614746, + "step": 7653 + }, + { + "epoch": 1.7648143878256861, + "grad_norm": 1.5630602768230752, + "learning_rate": 7.48127877634166e-08, + "loss": 0.36255425214767456, + "step": 7654 + }, + { + "epoch": 1.7650449619552686, + "grad_norm": 1.4144647369682326, + "learning_rate": 7.466816216182969e-08, + "loss": 0.4136468172073364, + "step": 7655 + }, + { + "epoch": 1.7652755360848513, + "grad_norm": 1.5589028620208925, + "learning_rate": 7.452367106660351e-08, + "loss": 0.4294041395187378, + "step": 7656 + }, + { + "epoch": 1.7655061102144338, + "grad_norm": 1.5271012787948486, + "learning_rate": 7.437931449874101e-08, + "loss": 0.3865356147289276, + "step": 7657 + }, + { + "epoch": 1.7657366843440165, + "grad_norm": 1.5355711497321805, + "learning_rate": 7.42350924792261e-08, + "loss": 0.44538289308547974, + "step": 7658 + }, + { + "epoch": 1.7659672584735993, + "grad_norm": 1.6285566114230512, + "learning_rate": 7.409100502902299e-08, + "loss": 0.4943844676017761, + "step": 7659 + }, + { + "epoch": 1.766197832603182, + "grad_norm": 1.759721404059002, + "learning_rate": 7.394705216907582e-08, + "loss": 0.41705092787742615, + "step": 7660 + }, + { + "epoch": 1.7664284067327647, + "grad_norm": 1.4175389623557053, + "learning_rate": 7.380323392031018e-08, + "loss": 0.4304206967353821, + "step": 7661 + }, + { + "epoch": 1.7666589808623474, + "grad_norm": 1.3933381760031749, + "learning_rate": 7.365955030363102e-08, + "loss": 0.4830179214477539, + "step": 7662 + }, + { + "epoch": 1.76688955499193, + "grad_norm": 1.51616499834235, + "learning_rate": 7.351600133992452e-08, + "loss": 0.47749078273773193, + "step": 7663 + }, + { + "epoch": 1.7671201291215126, + "grad_norm": 1.4074934707168656, + "learning_rate": 7.337258705005667e-08, + "loss": 0.3899204730987549, + "step": 7664 + }, + { + "epoch": 1.7673507032510951, + "grad_norm": 1.4123867126002758, + "learning_rate": 7.322930745487443e-08, + "loss": 0.4621524214744568, + "step": 7665 + }, + { + "epoch": 1.7675812773806778, + "grad_norm": 1.725639837898645, + "learning_rate": 7.308616257520506e-08, + "loss": 0.5305047035217285, + "step": 7666 + }, + { + "epoch": 1.7678118515102605, + "grad_norm": 2.1356750734168646, + "learning_rate": 7.294315243185578e-08, + "loss": 0.5894631147384644, + "step": 7667 + }, + { + "epoch": 1.7680424256398433, + "grad_norm": 1.5389151696841823, + "learning_rate": 7.280027704561498e-08, + "loss": 0.38509970903396606, + "step": 7668 + }, + { + "epoch": 1.768272999769426, + "grad_norm": 1.7309245548099654, + "learning_rate": 7.265753643725048e-08, + "loss": 0.45494410395622253, + "step": 7669 + }, + { + "epoch": 1.7685035738990087, + "grad_norm": 1.7035489800713894, + "learning_rate": 7.251493062751169e-08, + "loss": 0.4819248914718628, + "step": 7670 + }, + { + "epoch": 1.7687341480285912, + "grad_norm": 1.4325571648838293, + "learning_rate": 7.237245963712724e-08, + "loss": 0.43286386132240295, + "step": 7671 + }, + { + "epoch": 1.768964722158174, + "grad_norm": 1.3036122364237743, + "learning_rate": 7.223012348680724e-08, + "loss": 0.4285479187965393, + "step": 7672 + }, + { + "epoch": 1.7691952962877564, + "grad_norm": 1.6598071005655777, + "learning_rate": 7.208792219724124e-08, + "loss": 0.42678505182266235, + "step": 7673 + }, + { + "epoch": 1.7694258704173391, + "grad_norm": 1.647090361621967, + "learning_rate": 7.194585578909995e-08, + "loss": 0.47091686725616455, + "step": 7674 + }, + { + "epoch": 1.7696564445469218, + "grad_norm": 1.5115484466399114, + "learning_rate": 7.180392428303394e-08, + "loss": 0.41932445764541626, + "step": 7675 + }, + { + "epoch": 1.7698870186765046, + "grad_norm": 1.2463006271885857, + "learning_rate": 7.166212769967483e-08, + "loss": 0.4043616056442261, + "step": 7676 + }, + { + "epoch": 1.7701175928060873, + "grad_norm": 1.5310666660883137, + "learning_rate": 7.15204660596338e-08, + "loss": 0.395826518535614, + "step": 7677 + }, + { + "epoch": 1.7703481669356698, + "grad_norm": 1.4874807127430703, + "learning_rate": 7.13789393835027e-08, + "loss": 0.4684498906135559, + "step": 7678 + }, + { + "epoch": 1.7705787410652525, + "grad_norm": 1.8560085011670902, + "learning_rate": 7.12375476918542e-08, + "loss": 0.4713285565376282, + "step": 7679 + }, + { + "epoch": 1.770809315194835, + "grad_norm": 1.487262641155755, + "learning_rate": 7.109629100524073e-08, + "loss": 0.47559499740600586, + "step": 7680 + }, + { + "epoch": 1.7710398893244177, + "grad_norm": 1.5741914036439861, + "learning_rate": 7.095516934419554e-08, + "loss": 0.5364210605621338, + "step": 7681 + }, + { + "epoch": 1.7712704634540004, + "grad_norm": 1.942648846069337, + "learning_rate": 7.081418272923212e-08, + "loss": 0.5731894969940186, + "step": 7682 + }, + { + "epoch": 1.7715010375835831, + "grad_norm": 1.7006107903804015, + "learning_rate": 7.067333118084428e-08, + "loss": 0.4287458062171936, + "step": 7683 + }, + { + "epoch": 1.7717316117131658, + "grad_norm": 1.5575643616743255, + "learning_rate": 7.053261471950612e-08, + "loss": 0.3849913775920868, + "step": 7684 + }, + { + "epoch": 1.7719621858427486, + "grad_norm": 1.4243498094919005, + "learning_rate": 7.039203336567245e-08, + "loss": 0.4933156371116638, + "step": 7685 + }, + { + "epoch": 1.772192759972331, + "grad_norm": 1.897795122632639, + "learning_rate": 7.025158713977808e-08, + "loss": 0.5185002088546753, + "step": 7686 + }, + { + "epoch": 1.7724233341019138, + "grad_norm": 1.634847266537775, + "learning_rate": 7.011127606223799e-08, + "loss": 0.514995276927948, + "step": 7687 + }, + { + "epoch": 1.7726539082314963, + "grad_norm": 1.5845868665458605, + "learning_rate": 6.99711001534481e-08, + "loss": 0.4362761676311493, + "step": 7688 + }, + { + "epoch": 1.772884482361079, + "grad_norm": 1.699858455397738, + "learning_rate": 6.983105943378431e-08, + "loss": 0.44117432832717896, + "step": 7689 + }, + { + "epoch": 1.7731150564906617, + "grad_norm": 1.5875521204144505, + "learning_rate": 6.969115392360325e-08, + "loss": 0.4940808415412903, + "step": 7690 + }, + { + "epoch": 1.7733456306202444, + "grad_norm": 1.9046624573594293, + "learning_rate": 6.955138364324109e-08, + "loss": 0.4322758913040161, + "step": 7691 + }, + { + "epoch": 1.7735762047498271, + "grad_norm": 1.467450936859881, + "learning_rate": 6.941174861301536e-08, + "loss": 0.3867933750152588, + "step": 7692 + }, + { + "epoch": 1.7738067788794099, + "grad_norm": 1.6321329987514115, + "learning_rate": 6.927224885322302e-08, + "loss": 0.4380000829696655, + "step": 7693 + }, + { + "epoch": 1.7740373530089923, + "grad_norm": 1.7183023620516549, + "learning_rate": 6.913288438414222e-08, + "loss": 0.46499723196029663, + "step": 7694 + }, + { + "epoch": 1.774267927138575, + "grad_norm": 1.6625572218896962, + "learning_rate": 6.89936552260304e-08, + "loss": 0.4845675230026245, + "step": 7695 + }, + { + "epoch": 1.7744985012681576, + "grad_norm": 1.3920222388819354, + "learning_rate": 6.88545613991266e-08, + "loss": 0.3755526542663574, + "step": 7696 + }, + { + "epoch": 1.7747290753977403, + "grad_norm": 1.358162383242242, + "learning_rate": 6.871560292364887e-08, + "loss": 0.4765484929084778, + "step": 7697 + }, + { + "epoch": 1.774959649527323, + "grad_norm": 1.5701618596645643, + "learning_rate": 6.857677981979659e-08, + "loss": 0.4176154136657715, + "step": 7698 + }, + { + "epoch": 1.7751902236569057, + "grad_norm": 1.5881043143352427, + "learning_rate": 6.84380921077492e-08, + "loss": 0.410483717918396, + "step": 7699 + }, + { + "epoch": 1.7754207977864884, + "grad_norm": 1.876508092569716, + "learning_rate": 6.829953980766612e-08, + "loss": 0.5188060998916626, + "step": 7700 + }, + { + "epoch": 1.7756513719160711, + "grad_norm": 1.5514145308665186, + "learning_rate": 6.816112293968745e-08, + "loss": 0.47039783000946045, + "step": 7701 + }, + { + "epoch": 1.7758819460456536, + "grad_norm": 1.6296649452825585, + "learning_rate": 6.802284152393345e-08, + "loss": 0.5367648601531982, + "step": 7702 + }, + { + "epoch": 1.7761125201752364, + "grad_norm": 1.55513001656084, + "learning_rate": 6.78846955805048e-08, + "loss": 0.500449538230896, + "step": 7703 + }, + { + "epoch": 1.7763430943048188, + "grad_norm": 1.5060722099238588, + "learning_rate": 6.774668512948234e-08, + "loss": 0.4579819440841675, + "step": 7704 + }, + { + "epoch": 1.7765736684344016, + "grad_norm": 1.7824280377613644, + "learning_rate": 6.760881019092712e-08, + "loss": 0.41459107398986816, + "step": 7705 + }, + { + "epoch": 1.7768042425639843, + "grad_norm": 1.7900526752813857, + "learning_rate": 6.747107078488112e-08, + "loss": 0.46020573377609253, + "step": 7706 + }, + { + "epoch": 1.777034816693567, + "grad_norm": 1.7709884076088374, + "learning_rate": 6.733346693136566e-08, + "loss": 0.48069459199905396, + "step": 7707 + }, + { + "epoch": 1.7772653908231497, + "grad_norm": 1.4499402707441236, + "learning_rate": 6.719599865038328e-08, + "loss": 0.3514458239078522, + "step": 7708 + }, + { + "epoch": 1.7774959649527324, + "grad_norm": 1.7044500533180955, + "learning_rate": 6.705866596191601e-08, + "loss": 0.4696041941642761, + "step": 7709 + }, + { + "epoch": 1.777726539082315, + "grad_norm": 1.6058185659780073, + "learning_rate": 6.692146888592675e-08, + "loss": 0.45286083221435547, + "step": 7710 + }, + { + "epoch": 1.7779571132118976, + "grad_norm": 1.8525271361461533, + "learning_rate": 6.678440744235848e-08, + "loss": 0.4659677743911743, + "step": 7711 + }, + { + "epoch": 1.7781876873414801, + "grad_norm": 1.5770202034991272, + "learning_rate": 6.664748165113432e-08, + "loss": 0.4030906558036804, + "step": 7712 + }, + { + "epoch": 1.7784182614710629, + "grad_norm": 1.4781448065809968, + "learning_rate": 6.651069153215804e-08, + "loss": 0.4878493547439575, + "step": 7713 + }, + { + "epoch": 1.7786488356006456, + "grad_norm": 2.5716911461046115, + "learning_rate": 6.637403710531352e-08, + "loss": 0.4651924669742584, + "step": 7714 + }, + { + "epoch": 1.7788794097302283, + "grad_norm": 1.5268258649377473, + "learning_rate": 6.623751839046455e-08, + "loss": 0.37795954942703247, + "step": 7715 + }, + { + "epoch": 1.779109983859811, + "grad_norm": 1.8617699048987524, + "learning_rate": 6.610113540745577e-08, + "loss": 0.5722923278808594, + "step": 7716 + }, + { + "epoch": 1.7793405579893937, + "grad_norm": 2.039919155814789, + "learning_rate": 6.59648881761118e-08, + "loss": 0.46933984756469727, + "step": 7717 + }, + { + "epoch": 1.7795711321189762, + "grad_norm": 1.7692714186594531, + "learning_rate": 6.582877671623732e-08, + "loss": 0.5066707134246826, + "step": 7718 + }, + { + "epoch": 1.779801706248559, + "grad_norm": 1.5518843020711044, + "learning_rate": 6.569280104761787e-08, + "loss": 0.5064150094985962, + "step": 7719 + }, + { + "epoch": 1.7800322803781414, + "grad_norm": 1.4858522723338492, + "learning_rate": 6.555696119001853e-08, + "loss": 0.408633828163147, + "step": 7720 + }, + { + "epoch": 1.7802628545077241, + "grad_norm": 1.9460802080180855, + "learning_rate": 6.542125716318514e-08, + "loss": 0.4960691034793854, + "step": 7721 + }, + { + "epoch": 1.7804934286373069, + "grad_norm": 1.609433139750494, + "learning_rate": 6.528568898684373e-08, + "loss": 0.4275667071342468, + "step": 7722 + }, + { + "epoch": 1.7807240027668896, + "grad_norm": 1.5242191505097453, + "learning_rate": 6.515025668070062e-08, + "loss": 0.5309962630271912, + "step": 7723 + }, + { + "epoch": 1.7809545768964723, + "grad_norm": 1.3218748644597216, + "learning_rate": 6.501496026444197e-08, + "loss": 0.42067253589630127, + "step": 7724 + }, + { + "epoch": 1.781185151026055, + "grad_norm": 1.5205678956011466, + "learning_rate": 6.487979975773484e-08, + "loss": 0.43419337272644043, + "step": 7725 + }, + { + "epoch": 1.7814157251556375, + "grad_norm": 1.728456021255068, + "learning_rate": 6.474477518022592e-08, + "loss": 0.46563541889190674, + "step": 7726 + }, + { + "epoch": 1.7816462992852202, + "grad_norm": 1.2994636821353438, + "learning_rate": 6.460988655154232e-08, + "loss": 0.4233010411262512, + "step": 7727 + }, + { + "epoch": 1.7818768734148027, + "grad_norm": 1.5541073736247684, + "learning_rate": 6.447513389129155e-08, + "loss": 0.47119754552841187, + "step": 7728 + }, + { + "epoch": 1.7821074475443854, + "grad_norm": 1.7457851161988949, + "learning_rate": 6.434051721906142e-08, + "loss": 0.5227707624435425, + "step": 7729 + }, + { + "epoch": 1.7823380216739682, + "grad_norm": 1.6453844551794445, + "learning_rate": 6.42060365544198e-08, + "loss": 0.4521239399909973, + "step": 7730 + }, + { + "epoch": 1.7825685958035509, + "grad_norm": 1.5739071323130231, + "learning_rate": 6.407169191691464e-08, + "loss": 0.36693084239959717, + "step": 7731 + }, + { + "epoch": 1.7827991699331336, + "grad_norm": 1.9032214424835083, + "learning_rate": 6.393748332607463e-08, + "loss": 0.43610745668411255, + "step": 7732 + }, + { + "epoch": 1.7830297440627163, + "grad_norm": 1.4784257370105836, + "learning_rate": 6.380341080140794e-08, + "loss": 0.4471576511859894, + "step": 7733 + }, + { + "epoch": 1.7832603181922988, + "grad_norm": 1.61284007349941, + "learning_rate": 6.366947436240367e-08, + "loss": 0.48119011521339417, + "step": 7734 + }, + { + "epoch": 1.7834908923218815, + "grad_norm": 1.4393647934894105, + "learning_rate": 6.353567402853055e-08, + "loss": 0.44503623247146606, + "step": 7735 + }, + { + "epoch": 1.783721466451464, + "grad_norm": 1.3430253886827939, + "learning_rate": 6.340200981923804e-08, + "loss": 0.3350965678691864, + "step": 7736 + }, + { + "epoch": 1.7839520405810467, + "grad_norm": 1.4031838686370632, + "learning_rate": 6.326848175395572e-08, + "loss": 0.4814649224281311, + "step": 7737 + }, + { + "epoch": 1.7841826147106294, + "grad_norm": 1.3042254858214102, + "learning_rate": 6.313508985209281e-08, + "loss": 0.42114442586898804, + "step": 7738 + }, + { + "epoch": 1.7844131888402122, + "grad_norm": 1.4924201661244643, + "learning_rate": 6.30018341330396e-08, + "loss": 0.5044004917144775, + "step": 7739 + }, + { + "epoch": 1.7846437629697949, + "grad_norm": 1.7211591431218773, + "learning_rate": 6.286871461616594e-08, + "loss": 0.46084678173065186, + "step": 7740 + }, + { + "epoch": 1.7848743370993776, + "grad_norm": 1.8074380950640034, + "learning_rate": 6.273573132082222e-08, + "loss": 0.5159536600112915, + "step": 7741 + }, + { + "epoch": 1.78510491122896, + "grad_norm": 2.6340339816007394, + "learning_rate": 6.260288426633875e-08, + "loss": 0.4394105076789856, + "step": 7742 + }, + { + "epoch": 1.7853354853585428, + "grad_norm": 1.415651636415873, + "learning_rate": 6.247017347202643e-08, + "loss": 0.39798909425735474, + "step": 7743 + }, + { + "epoch": 1.7855660594881253, + "grad_norm": 1.439083218855293, + "learning_rate": 6.23375989571756e-08, + "loss": 0.3865649104118347, + "step": 7744 + }, + { + "epoch": 1.785796633617708, + "grad_norm": 1.3172940172138528, + "learning_rate": 6.220516074105808e-08, + "loss": 0.3641304671764374, + "step": 7745 + }, + { + "epoch": 1.7860272077472907, + "grad_norm": 1.7148086023867872, + "learning_rate": 6.207285884292468e-08, + "loss": 0.5025773644447327, + "step": 7746 + }, + { + "epoch": 1.7862577818768735, + "grad_norm": 1.5237733931532715, + "learning_rate": 6.194069328200669e-08, + "loss": 0.4289078414440155, + "step": 7747 + }, + { + "epoch": 1.7864883560064562, + "grad_norm": 1.5368409458369108, + "learning_rate": 6.180866407751595e-08, + "loss": 0.37442147731781006, + "step": 7748 + }, + { + "epoch": 1.7867189301360389, + "grad_norm": 1.6962674881863276, + "learning_rate": 6.167677124864412e-08, + "loss": 0.4975471794605255, + "step": 7749 + }, + { + "epoch": 1.7869495042656214, + "grad_norm": 1.7290797112616507, + "learning_rate": 6.154501481456331e-08, + "loss": 0.42754751443862915, + "step": 7750 + }, + { + "epoch": 1.787180078395204, + "grad_norm": 1.508949301788889, + "learning_rate": 6.141339479442542e-08, + "loss": 0.40203964710235596, + "step": 7751 + }, + { + "epoch": 1.7874106525247866, + "grad_norm": 1.6453479393381845, + "learning_rate": 6.128191120736293e-08, + "loss": 0.46465349197387695, + "step": 7752 + }, + { + "epoch": 1.7876412266543693, + "grad_norm": 1.527112166022553, + "learning_rate": 6.11505640724882e-08, + "loss": 0.43915730714797974, + "step": 7753 + }, + { + "epoch": 1.787871800783952, + "grad_norm": 1.6855929805801586, + "learning_rate": 6.101935340889419e-08, + "loss": 0.5205652713775635, + "step": 7754 + }, + { + "epoch": 1.7881023749135347, + "grad_norm": 1.8024849017160496, + "learning_rate": 6.088827923565321e-08, + "loss": 0.39400190114974976, + "step": 7755 + }, + { + "epoch": 1.7883329490431175, + "grad_norm": 1.585632228373493, + "learning_rate": 6.075734157181855e-08, + "loss": 0.48021531105041504, + "step": 7756 + }, + { + "epoch": 1.7885635231727002, + "grad_norm": 1.313118747015303, + "learning_rate": 6.062654043642334e-08, + "loss": 0.42780327796936035, + "step": 7757 + }, + { + "epoch": 1.7887940973022827, + "grad_norm": 1.5444008946931698, + "learning_rate": 6.049587584848059e-08, + "loss": 0.4307866096496582, + "step": 7758 + }, + { + "epoch": 1.7890246714318654, + "grad_norm": 1.8803266889221286, + "learning_rate": 6.036534782698377e-08, + "loss": 0.4258533716201782, + "step": 7759 + }, + { + "epoch": 1.7892552455614479, + "grad_norm": 1.7033971690196206, + "learning_rate": 6.02349563909067e-08, + "loss": 0.5159060955047607, + "step": 7760 + }, + { + "epoch": 1.7894858196910306, + "grad_norm": 1.4016246032179807, + "learning_rate": 6.0104701559203e-08, + "loss": 0.4407171308994293, + "step": 7761 + }, + { + "epoch": 1.7897163938206133, + "grad_norm": 1.4060175796774192, + "learning_rate": 5.99745833508063e-08, + "loss": 0.40273964405059814, + "step": 7762 + }, + { + "epoch": 1.789946967950196, + "grad_norm": 1.5929040194351833, + "learning_rate": 5.984460178463102e-08, + "loss": 0.42018163204193115, + "step": 7763 + }, + { + "epoch": 1.7901775420797787, + "grad_norm": 1.5421517490968868, + "learning_rate": 5.971475687957084e-08, + "loss": 0.519807755947113, + "step": 7764 + }, + { + "epoch": 1.7904081162093615, + "grad_norm": 1.4320196013314206, + "learning_rate": 5.9585048654500535e-08, + "loss": 0.42557477951049805, + "step": 7765 + }, + { + "epoch": 1.790638690338944, + "grad_norm": 1.520426042431449, + "learning_rate": 5.9455477128273924e-08, + "loss": 0.39568305015563965, + "step": 7766 + }, + { + "epoch": 1.7908692644685267, + "grad_norm": 1.566797519717712, + "learning_rate": 5.932604231972593e-08, + "loss": 0.43125781416893005, + "step": 7767 + }, + { + "epoch": 1.7910998385981092, + "grad_norm": 1.5764190405770546, + "learning_rate": 5.919674424767129e-08, + "loss": 0.46194958686828613, + "step": 7768 + }, + { + "epoch": 1.791330412727692, + "grad_norm": 1.3811294262508054, + "learning_rate": 5.906758293090441e-08, + "loss": 0.40115779638290405, + "step": 7769 + }, + { + "epoch": 1.7915609868572746, + "grad_norm": 1.4511176958262644, + "learning_rate": 5.893855838820061e-08, + "loss": 0.46589648723602295, + "step": 7770 + }, + { + "epoch": 1.7917915609868573, + "grad_norm": 1.4613820552852321, + "learning_rate": 5.880967063831455e-08, + "loss": 0.3540228605270386, + "step": 7771 + }, + { + "epoch": 1.79202213511644, + "grad_norm": 1.3900736631273891, + "learning_rate": 5.868091969998168e-08, + "loss": 0.4324638545513153, + "step": 7772 + }, + { + "epoch": 1.7922527092460228, + "grad_norm": 1.426811730253004, + "learning_rate": 5.855230559191693e-08, + "loss": 0.4301075339317322, + "step": 7773 + }, + { + "epoch": 1.7924832833756053, + "grad_norm": 1.4903234676277026, + "learning_rate": 5.842382833281612e-08, + "loss": 0.4496096670627594, + "step": 7774 + }, + { + "epoch": 1.792713857505188, + "grad_norm": 1.7119132871592322, + "learning_rate": 5.8295487941354195e-08, + "loss": 0.4554907977581024, + "step": 7775 + }, + { + "epoch": 1.7929444316347705, + "grad_norm": 1.6357284914311145, + "learning_rate": 5.816728443618701e-08, + "loss": 0.5020148158073425, + "step": 7776 + }, + { + "epoch": 1.7931750057643532, + "grad_norm": 1.5886767874513543, + "learning_rate": 5.803921783595045e-08, + "loss": 0.4073353409767151, + "step": 7777 + }, + { + "epoch": 1.793405579893936, + "grad_norm": 1.7806143022342438, + "learning_rate": 5.791128815925983e-08, + "loss": 0.4995894432067871, + "step": 7778 + }, + { + "epoch": 1.7936361540235186, + "grad_norm": 1.4290018525481676, + "learning_rate": 5.778349542471139e-08, + "loss": 0.5383706092834473, + "step": 7779 + }, + { + "epoch": 1.7938667281531013, + "grad_norm": 1.5928372327878688, + "learning_rate": 5.765583965088083e-08, + "loss": 0.4206235408782959, + "step": 7780 + }, + { + "epoch": 1.794097302282684, + "grad_norm": 1.516533597399375, + "learning_rate": 5.752832085632453e-08, + "loss": 0.49053555727005005, + "step": 7781 + }, + { + "epoch": 1.7943278764122665, + "grad_norm": 1.4761016261714877, + "learning_rate": 5.740093905957832e-08, + "loss": 0.4372660517692566, + "step": 7782 + }, + { + "epoch": 1.7945584505418493, + "grad_norm": 1.364372499711938, + "learning_rate": 5.727369427915851e-08, + "loss": 0.40125733613967896, + "step": 7783 + }, + { + "epoch": 1.7947890246714318, + "grad_norm": 1.5421908029736124, + "learning_rate": 5.714658653356153e-08, + "loss": 0.3595162034034729, + "step": 7784 + }, + { + "epoch": 1.7950195988010145, + "grad_norm": 1.4909078230640012, + "learning_rate": 5.7019615841263915e-08, + "loss": 0.42618101835250854, + "step": 7785 + }, + { + "epoch": 1.7952501729305972, + "grad_norm": 1.2890347032019704, + "learning_rate": 5.6892782220721694e-08, + "loss": 0.39135509729385376, + "step": 7786 + }, + { + "epoch": 1.79548074706018, + "grad_norm": 1.2930421412734876, + "learning_rate": 5.6766085690372004e-08, + "loss": 0.3792929947376251, + "step": 7787 + }, + { + "epoch": 1.7957113211897626, + "grad_norm": 2.137954515105217, + "learning_rate": 5.6639526268631e-08, + "loss": 0.5193231105804443, + "step": 7788 + }, + { + "epoch": 1.7959418953193451, + "grad_norm": 1.3992061535387368, + "learning_rate": 5.6513103973895415e-08, + "loss": 0.3896862268447876, + "step": 7789 + }, + { + "epoch": 1.7961724694489278, + "grad_norm": 1.6107653457361368, + "learning_rate": 5.638681882454211e-08, + "loss": 0.5345273017883301, + "step": 7790 + }, + { + "epoch": 1.7964030435785103, + "grad_norm": 1.597285051654587, + "learning_rate": 5.626067083892794e-08, + "loss": 0.4297627806663513, + "step": 7791 + }, + { + "epoch": 1.796633617708093, + "grad_norm": 1.8890048408663909, + "learning_rate": 5.6134660035389914e-08, + "loss": 0.3176969587802887, + "step": 7792 + }, + { + "epoch": 1.7968641918376758, + "grad_norm": 1.684652354437091, + "learning_rate": 5.600878643224471e-08, + "loss": 0.5449323654174805, + "step": 7793 + }, + { + "epoch": 1.7970947659672585, + "grad_norm": 1.3924882582172304, + "learning_rate": 5.588305004778959e-08, + "loss": 0.38096293807029724, + "step": 7794 + }, + { + "epoch": 1.7973253400968412, + "grad_norm": 1.6284420500901806, + "learning_rate": 5.575745090030137e-08, + "loss": 0.3917475938796997, + "step": 7795 + }, + { + "epoch": 1.797555914226424, + "grad_norm": 1.8012275849309003, + "learning_rate": 5.563198900803734e-08, + "loss": 0.41522616147994995, + "step": 7796 + }, + { + "epoch": 1.7977864883560064, + "grad_norm": 1.4000666419018515, + "learning_rate": 5.550666438923468e-08, + "loss": 0.46558207273483276, + "step": 7797 + }, + { + "epoch": 1.7980170624855891, + "grad_norm": 1.4562091239424864, + "learning_rate": 5.538147706211038e-08, + "loss": 0.43256324529647827, + "step": 7798 + }, + { + "epoch": 1.7982476366151716, + "grad_norm": 1.5167378404298808, + "learning_rate": 5.5256427044861666e-08, + "loss": 0.37302178144454956, + "step": 7799 + }, + { + "epoch": 1.7984782107447543, + "grad_norm": 1.7103098772379584, + "learning_rate": 5.5131514355666095e-08, + "loss": 0.5247504711151123, + "step": 7800 + }, + { + "epoch": 1.798708784874337, + "grad_norm": 1.3345270008803303, + "learning_rate": 5.5006739012680934e-08, + "loss": 0.3906348943710327, + "step": 7801 + }, + { + "epoch": 1.7989393590039198, + "grad_norm": 1.863821074304618, + "learning_rate": 5.488210103404345e-08, + "loss": 0.5293325185775757, + "step": 7802 + }, + { + "epoch": 1.7991699331335025, + "grad_norm": 1.8021445170106478, + "learning_rate": 5.4757600437871146e-08, + "loss": 0.4189381003379822, + "step": 7803 + }, + { + "epoch": 1.7994005072630852, + "grad_norm": 1.4161978936431723, + "learning_rate": 5.4633237242261207e-08, + "loss": 0.40476128458976746, + "step": 7804 + }, + { + "epoch": 1.7996310813926677, + "grad_norm": 1.6288403815954717, + "learning_rate": 5.45090114652913e-08, + "loss": 0.3908376097679138, + "step": 7805 + }, + { + "epoch": 1.7998616555222504, + "grad_norm": 1.4731211435711635, + "learning_rate": 5.438492312501885e-08, + "loss": 0.42332786321640015, + "step": 7806 + }, + { + "epoch": 1.800092229651833, + "grad_norm": 1.2492034971721793, + "learning_rate": 5.426097223948123e-08, + "loss": 0.3398321866989136, + "step": 7807 + }, + { + "epoch": 1.8003228037814156, + "grad_norm": 1.410970674481118, + "learning_rate": 5.413715882669623e-08, + "loss": 0.4610673189163208, + "step": 7808 + }, + { + "epoch": 1.8005533779109983, + "grad_norm": 1.4416956666235687, + "learning_rate": 5.401348290466112e-08, + "loss": 0.4149124026298523, + "step": 7809 + }, + { + "epoch": 1.800783952040581, + "grad_norm": 1.4475278396115219, + "learning_rate": 5.388994449135376e-08, + "loss": 0.47464168071746826, + "step": 7810 + }, + { + "epoch": 1.8010145261701638, + "grad_norm": 1.4581354291230397, + "learning_rate": 5.376654360473121e-08, + "loss": 0.4530913829803467, + "step": 7811 + }, + { + "epoch": 1.8012451002997465, + "grad_norm": 1.7198902838066041, + "learning_rate": 5.364328026273157e-08, + "loss": 0.5577078461647034, + "step": 7812 + }, + { + "epoch": 1.801475674429329, + "grad_norm": 1.828526033611825, + "learning_rate": 5.3520154483272075e-08, + "loss": 0.4772539436817169, + "step": 7813 + }, + { + "epoch": 1.8017062485589117, + "grad_norm": 1.690066578469317, + "learning_rate": 5.339716628425039e-08, + "loss": 0.5387610197067261, + "step": 7814 + }, + { + "epoch": 1.8019368226884942, + "grad_norm": 1.7130913599502742, + "learning_rate": 5.327431568354401e-08, + "loss": 0.4505125880241394, + "step": 7815 + }, + { + "epoch": 1.802167396818077, + "grad_norm": 1.5145450098970203, + "learning_rate": 5.3151602699010867e-08, + "loss": 0.43021589517593384, + "step": 7816 + }, + { + "epoch": 1.8023979709476596, + "grad_norm": 1.6184493194868252, + "learning_rate": 5.3029027348488244e-08, + "loss": 0.44107457995414734, + "step": 7817 + }, + { + "epoch": 1.8026285450772424, + "grad_norm": 1.6224833006548345, + "learning_rate": 5.2906589649793666e-08, + "loss": 0.42265504598617554, + "step": 7818 + }, + { + "epoch": 1.802859119206825, + "grad_norm": 1.3828256021454344, + "learning_rate": 5.2784289620724895e-08, + "loss": 0.4814263582229614, + "step": 7819 + }, + { + "epoch": 1.8030896933364078, + "grad_norm": 1.3840958899744187, + "learning_rate": 5.2662127279059275e-08, + "loss": 0.4255106747150421, + "step": 7820 + }, + { + "epoch": 1.8033202674659903, + "grad_norm": 1.3789211684549096, + "learning_rate": 5.2540102642554593e-08, + "loss": 0.43405312299728394, + "step": 7821 + }, + { + "epoch": 1.803550841595573, + "grad_norm": 1.5062041567676776, + "learning_rate": 5.2418215728948004e-08, + "loss": 0.3986097574234009, + "step": 7822 + }, + { + "epoch": 1.8037814157251555, + "grad_norm": 1.7653469724585684, + "learning_rate": 5.2296466555957205e-08, + "loss": 0.4988093972206116, + "step": 7823 + }, + { + "epoch": 1.8040119898547382, + "grad_norm": 1.6382094442265007, + "learning_rate": 5.217485514127973e-08, + "loss": 0.5290527939796448, + "step": 7824 + }, + { + "epoch": 1.804242563984321, + "grad_norm": 1.4794199807921353, + "learning_rate": 5.205338150259308e-08, + "loss": 0.3705815076828003, + "step": 7825 + }, + { + "epoch": 1.8044731381139036, + "grad_norm": 1.3872232407887637, + "learning_rate": 5.193204565755449e-08, + "loss": 0.37735384702682495, + "step": 7826 + }, + { + "epoch": 1.8047037122434864, + "grad_norm": 1.38875357732027, + "learning_rate": 5.1810847623801504e-08, + "loss": 0.39033758640289307, + "step": 7827 + }, + { + "epoch": 1.804934286373069, + "grad_norm": 1.5105458662939806, + "learning_rate": 5.168978741895147e-08, + "loss": 0.4669237732887268, + "step": 7828 + }, + { + "epoch": 1.8051648605026516, + "grad_norm": 1.6910832171163468, + "learning_rate": 5.156886506060154e-08, + "loss": 0.5178482532501221, + "step": 7829 + }, + { + "epoch": 1.8053954346322343, + "grad_norm": 1.4473544670706617, + "learning_rate": 5.14480805663291e-08, + "loss": 0.44134122133255005, + "step": 7830 + }, + { + "epoch": 1.8056260087618168, + "grad_norm": 1.5836257156251672, + "learning_rate": 5.132743395369144e-08, + "loss": 0.44371920824050903, + "step": 7831 + }, + { + "epoch": 1.8058565828913995, + "grad_norm": 1.513244295553376, + "learning_rate": 5.1206925240225964e-08, + "loss": 0.43268662691116333, + "step": 7832 + }, + { + "epoch": 1.8060871570209822, + "grad_norm": 1.736730853895812, + "learning_rate": 5.1086554443449445e-08, + "loss": 0.5035665035247803, + "step": 7833 + }, + { + "epoch": 1.806317731150565, + "grad_norm": 1.3694047806165788, + "learning_rate": 5.0966321580859336e-08, + "loss": 0.4987141191959381, + "step": 7834 + }, + { + "epoch": 1.8065483052801476, + "grad_norm": 1.816085685560109, + "learning_rate": 5.0846226669932437e-08, + "loss": 0.5951617956161499, + "step": 7835 + }, + { + "epoch": 1.8067788794097304, + "grad_norm": 1.464038827862328, + "learning_rate": 5.072626972812599e-08, + "loss": 0.4710814654827118, + "step": 7836 + }, + { + "epoch": 1.8070094535393129, + "grad_norm": 1.6196482413694708, + "learning_rate": 5.060645077287662e-08, + "loss": 0.5173348188400269, + "step": 7837 + }, + { + "epoch": 1.8072400276688956, + "grad_norm": 1.4170272466334293, + "learning_rate": 5.048676982160161e-08, + "loss": 0.49508416652679443, + "step": 7838 + }, + { + "epoch": 1.807470601798478, + "grad_norm": 1.7639395740589152, + "learning_rate": 5.03672268916977e-08, + "loss": 0.4535290598869324, + "step": 7839 + }, + { + "epoch": 1.8077011759280608, + "grad_norm": 1.7696762607003815, + "learning_rate": 5.024782200054145e-08, + "loss": 0.5337553024291992, + "step": 7840 + }, + { + "epoch": 1.8079317500576435, + "grad_norm": 1.6346280356935987, + "learning_rate": 5.012855516548986e-08, + "loss": 0.47118210792541504, + "step": 7841 + }, + { + "epoch": 1.8081623241872262, + "grad_norm": 1.504680600844573, + "learning_rate": 5.0009426403879283e-08, + "loss": 0.4458848237991333, + "step": 7842 + }, + { + "epoch": 1.808392898316809, + "grad_norm": 1.5297682575974059, + "learning_rate": 4.9890435733026536e-08, + "loss": 0.5055558681488037, + "step": 7843 + }, + { + "epoch": 1.8086234724463917, + "grad_norm": 1.4365609441585347, + "learning_rate": 4.9771583170228006e-08, + "loss": 0.43715038895606995, + "step": 7844 + }, + { + "epoch": 1.8088540465759742, + "grad_norm": 1.545411862707653, + "learning_rate": 4.96528687327602e-08, + "loss": 0.427906334400177, + "step": 7845 + }, + { + "epoch": 1.8090846207055569, + "grad_norm": 1.6703597275780244, + "learning_rate": 4.953429243787932e-08, + "loss": 0.48160994052886963, + "step": 7846 + }, + { + "epoch": 1.8093151948351394, + "grad_norm": 1.3261658854233023, + "learning_rate": 4.941585430282158e-08, + "loss": 0.40856754779815674, + "step": 7847 + }, + { + "epoch": 1.809545768964722, + "grad_norm": 1.3569384823756985, + "learning_rate": 4.929755434480354e-08, + "loss": 0.40482330322265625, + "step": 7848 + }, + { + "epoch": 1.8097763430943048, + "grad_norm": 1.530544362283251, + "learning_rate": 4.9179392581021e-08, + "loss": 0.4286755323410034, + "step": 7849 + }, + { + "epoch": 1.8100069172238875, + "grad_norm": 1.5805205551700128, + "learning_rate": 4.906136902864999e-08, + "loss": 0.4436051547527313, + "step": 7850 + }, + { + "epoch": 1.8102374913534702, + "grad_norm": 1.5320309451669083, + "learning_rate": 4.8943483704846465e-08, + "loss": 0.41794437170028687, + "step": 7851 + }, + { + "epoch": 1.810468065483053, + "grad_norm": 1.4506407579843814, + "learning_rate": 4.8825736626746384e-08, + "loss": 0.4308912754058838, + "step": 7852 + }, + { + "epoch": 1.8106986396126354, + "grad_norm": 1.5274898640972132, + "learning_rate": 4.870812781146516e-08, + "loss": 0.43090081214904785, + "step": 7853 + }, + { + "epoch": 1.8109292137422182, + "grad_norm": 1.3117483081436436, + "learning_rate": 4.859065727609857e-08, + "loss": 0.4329320192337036, + "step": 7854 + }, + { + "epoch": 1.8111597878718007, + "grad_norm": 1.266199300666261, + "learning_rate": 4.8473325037722276e-08, + "loss": 0.3162953853607178, + "step": 7855 + }, + { + "epoch": 1.8113903620013834, + "grad_norm": 1.4534333887380995, + "learning_rate": 4.835613111339165e-08, + "loss": 0.37513065338134766, + "step": 7856 + }, + { + "epoch": 1.811620936130966, + "grad_norm": 1.494207838495638, + "learning_rate": 4.823907552014195e-08, + "loss": 0.4120938181877136, + "step": 7857 + }, + { + "epoch": 1.8118515102605488, + "grad_norm": 1.555741011782435, + "learning_rate": 4.8122158274988555e-08, + "loss": 0.4295421242713928, + "step": 7858 + }, + { + "epoch": 1.8120820843901315, + "grad_norm": 1.4697042695976983, + "learning_rate": 4.8005379394926435e-08, + "loss": 0.44738203287124634, + "step": 7859 + }, + { + "epoch": 1.8123126585197142, + "grad_norm": 1.7388489283467792, + "learning_rate": 4.7888738896930456e-08, + "loss": 0.447609007358551, + "step": 7860 + }, + { + "epoch": 1.8125432326492967, + "grad_norm": 1.6367328188270214, + "learning_rate": 4.777223679795561e-08, + "loss": 0.38288167119026184, + "step": 7861 + }, + { + "epoch": 1.8127738067788794, + "grad_norm": 1.5566909994885838, + "learning_rate": 4.765587311493668e-08, + "loss": 0.5003981590270996, + "step": 7862 + }, + { + "epoch": 1.813004380908462, + "grad_norm": 1.5140425774804767, + "learning_rate": 4.7539647864788476e-08, + "loss": 0.5244492888450623, + "step": 7863 + }, + { + "epoch": 1.8132349550380447, + "grad_norm": 1.4098788698269693, + "learning_rate": 4.742356106440526e-08, + "loss": 0.505184531211853, + "step": 7864 + }, + { + "epoch": 1.8134655291676274, + "grad_norm": 2.493869291024891, + "learning_rate": 4.7307612730661636e-08, + "loss": 0.5364291071891785, + "step": 7865 + }, + { + "epoch": 1.81369610329721, + "grad_norm": 1.5655893218937025, + "learning_rate": 4.719180288041158e-08, + "loss": 0.4370742738246918, + "step": 7866 + }, + { + "epoch": 1.8139266774267928, + "grad_norm": 1.3233268572547954, + "learning_rate": 4.7076131530489505e-08, + "loss": 0.37784355878829956, + "step": 7867 + }, + { + "epoch": 1.8141572515563755, + "grad_norm": 1.6040150628213576, + "learning_rate": 4.6960598697709294e-08, + "loss": 0.5184513330459595, + "step": 7868 + }, + { + "epoch": 1.814387825685958, + "grad_norm": 1.6174173359265467, + "learning_rate": 4.6845204398864743e-08, + "loss": 0.41221511363983154, + "step": 7869 + }, + { + "epoch": 1.8146183998155407, + "grad_norm": 1.960596641519608, + "learning_rate": 4.672994865072965e-08, + "loss": 0.43040651082992554, + "step": 7870 + }, + { + "epoch": 1.8148489739451232, + "grad_norm": 1.887961823292038, + "learning_rate": 4.6614831470057625e-08, + "loss": 0.4681999385356903, + "step": 7871 + }, + { + "epoch": 1.815079548074706, + "grad_norm": 1.5463001442495705, + "learning_rate": 4.649985287358227e-08, + "loss": 0.49752098321914673, + "step": 7872 + }, + { + "epoch": 1.8153101222042887, + "grad_norm": 1.4528059880154254, + "learning_rate": 4.6385012878016663e-08, + "loss": 0.4621706008911133, + "step": 7873 + }, + { + "epoch": 1.8155406963338714, + "grad_norm": 1.339046035541834, + "learning_rate": 4.627031150005401e-08, + "loss": 0.4359724521636963, + "step": 7874 + }, + { + "epoch": 1.815771270463454, + "grad_norm": 1.4288119410903932, + "learning_rate": 4.6155748756367294e-08, + "loss": 0.4901214838027954, + "step": 7875 + }, + { + "epoch": 1.8160018445930368, + "grad_norm": 1.7234395975437273, + "learning_rate": 4.604132466360955e-08, + "loss": 0.5012428760528564, + "step": 7876 + }, + { + "epoch": 1.8162324187226193, + "grad_norm": 1.6768636456338364, + "learning_rate": 4.592703923841323e-08, + "loss": 0.5048446655273438, + "step": 7877 + }, + { + "epoch": 1.816462992852202, + "grad_norm": 1.5761086054200695, + "learning_rate": 4.5812892497390955e-08, + "loss": 0.5025140047073364, + "step": 7878 + }, + { + "epoch": 1.8166935669817845, + "grad_norm": 1.5593886228823222, + "learning_rate": 4.5698884457135324e-08, + "loss": 0.4456709623336792, + "step": 7879 + }, + { + "epoch": 1.8169241411113672, + "grad_norm": 1.4583950124069596, + "learning_rate": 4.5585015134218196e-08, + "loss": 0.38283586502075195, + "step": 7880 + }, + { + "epoch": 1.81715471524095, + "grad_norm": 1.5479198908902716, + "learning_rate": 4.5471284545192004e-08, + "loss": 0.3458648920059204, + "step": 7881 + }, + { + "epoch": 1.8173852893705327, + "grad_norm": 1.7126815699296334, + "learning_rate": 4.53576927065884e-08, + "loss": 0.4609532952308655, + "step": 7882 + }, + { + "epoch": 1.8176158635001154, + "grad_norm": 1.238404719965568, + "learning_rate": 4.524423963491919e-08, + "loss": 0.4250793159008026, + "step": 7883 + }, + { + "epoch": 1.817846437629698, + "grad_norm": 1.7276559977997992, + "learning_rate": 4.513092534667584e-08, + "loss": 0.41343796253204346, + "step": 7884 + }, + { + "epoch": 1.8180770117592806, + "grad_norm": 1.5863495927207087, + "learning_rate": 4.5017749858329736e-08, + "loss": 0.46575528383255005, + "step": 7885 + }, + { + "epoch": 1.8183075858888633, + "grad_norm": 1.7387493602059383, + "learning_rate": 4.4904713186332156e-08, + "loss": 0.47052180767059326, + "step": 7886 + }, + { + "epoch": 1.8185381600184458, + "grad_norm": 1.4938009961123744, + "learning_rate": 4.479181534711429e-08, + "loss": 0.42979568243026733, + "step": 7887 + }, + { + "epoch": 1.8187687341480285, + "grad_norm": 1.4298617258142596, + "learning_rate": 4.46790563570868e-08, + "loss": 0.4278537929058075, + "step": 7888 + }, + { + "epoch": 1.8189993082776112, + "grad_norm": 1.6571154898401685, + "learning_rate": 4.456643623264022e-08, + "loss": 0.45380616188049316, + "step": 7889 + }, + { + "epoch": 1.819229882407194, + "grad_norm": 1.6141969165708208, + "learning_rate": 4.445395499014526e-08, + "loss": 0.46085125207901, + "step": 7890 + }, + { + "epoch": 1.8194604565367767, + "grad_norm": 1.7363894486391924, + "learning_rate": 4.434161264595204e-08, + "loss": 0.47558531165122986, + "step": 7891 + }, + { + "epoch": 1.8196910306663594, + "grad_norm": 1.552212209885486, + "learning_rate": 4.4229409216390845e-08, + "loss": 0.42082321643829346, + "step": 7892 + }, + { + "epoch": 1.819921604795942, + "grad_norm": 1.6844917452185877, + "learning_rate": 4.411734471777129e-08, + "loss": 0.40222978591918945, + "step": 7893 + }, + { + "epoch": 1.8201521789255246, + "grad_norm": 1.7385505168528088, + "learning_rate": 4.400541916638323e-08, + "loss": 0.39737701416015625, + "step": 7894 + }, + { + "epoch": 1.820382753055107, + "grad_norm": 1.6976347614290264, + "learning_rate": 4.389363257849632e-08, + "loss": 0.46538835763931274, + "step": 7895 + }, + { + "epoch": 1.8206133271846898, + "grad_norm": 2.034464057065236, + "learning_rate": 4.378198497035979e-08, + "loss": 0.4994567036628723, + "step": 7896 + }, + { + "epoch": 1.8208439013142725, + "grad_norm": 1.517699554285521, + "learning_rate": 4.367047635820264e-08, + "loss": 0.4574298858642578, + "step": 7897 + }, + { + "epoch": 1.8210744754438553, + "grad_norm": 1.7361916973448048, + "learning_rate": 4.3559106758234044e-08, + "loss": 0.4716116786003113, + "step": 7898 + }, + { + "epoch": 1.821305049573438, + "grad_norm": 1.7495776361282012, + "learning_rate": 4.344787618664247e-08, + "loss": 0.35549741983413696, + "step": 7899 + }, + { + "epoch": 1.8215356237030205, + "grad_norm": 1.673931935617008, + "learning_rate": 4.3336784659596226e-08, + "loss": 0.44955599308013916, + "step": 7900 + }, + { + "epoch": 1.8217661978326032, + "grad_norm": 1.2588104675314307, + "learning_rate": 4.322583219324394e-08, + "loss": 0.4047467112541199, + "step": 7901 + }, + { + "epoch": 1.8219967719621857, + "grad_norm": 1.3892625958432285, + "learning_rate": 4.3115018803713596e-08, + "loss": 0.40367889404296875, + "step": 7902 + }, + { + "epoch": 1.8222273460917684, + "grad_norm": 1.3189968956301878, + "learning_rate": 4.3004344507113096e-08, + "loss": 0.32705235481262207, + "step": 7903 + }, + { + "epoch": 1.8224579202213511, + "grad_norm": 1.3777118561947166, + "learning_rate": 4.2893809319529794e-08, + "loss": 0.3845488727092743, + "step": 7904 + }, + { + "epoch": 1.8226884943509338, + "grad_norm": 1.4977030222677208, + "learning_rate": 4.2783413257031495e-08, + "loss": 0.49070197343826294, + "step": 7905 + }, + { + "epoch": 1.8229190684805165, + "grad_norm": 1.729181630904155, + "learning_rate": 4.267315633566493e-08, + "loss": 0.550437867641449, + "step": 7906 + }, + { + "epoch": 1.8231496426100993, + "grad_norm": 1.6119404797366197, + "learning_rate": 4.25630385714576e-08, + "loss": 0.5042926073074341, + "step": 7907 + }, + { + "epoch": 1.8233802167396818, + "grad_norm": 1.5956788246532367, + "learning_rate": 4.245305998041571e-08, + "loss": 0.48839205503463745, + "step": 7908 + }, + { + "epoch": 1.8236107908692645, + "grad_norm": 1.6028821186444346, + "learning_rate": 4.234322057852602e-08, + "loss": 0.4754030108451843, + "step": 7909 + }, + { + "epoch": 1.823841364998847, + "grad_norm": 1.5406282114264656, + "learning_rate": 4.223352038175487e-08, + "loss": 0.394174188375473, + "step": 7910 + }, + { + "epoch": 1.8240719391284297, + "grad_norm": 1.3144512253416945, + "learning_rate": 4.2123959406048183e-08, + "loss": 0.39882469177246094, + "step": 7911 + }, + { + "epoch": 1.8243025132580124, + "grad_norm": 1.3036980510979261, + "learning_rate": 4.201453766733176e-08, + "loss": 0.4611927270889282, + "step": 7912 + }, + { + "epoch": 1.8245330873875951, + "grad_norm": 1.3717750651706109, + "learning_rate": 4.190525518151122e-08, + "loss": 0.4164184331893921, + "step": 7913 + }, + { + "epoch": 1.8247636615171778, + "grad_norm": 1.7048234275294294, + "learning_rate": 4.179611196447186e-08, + "loss": 0.41586828231811523, + "step": 7914 + }, + { + "epoch": 1.8249942356467606, + "grad_norm": 1.486464242852147, + "learning_rate": 4.168710803207864e-08, + "loss": 0.4707748591899872, + "step": 7915 + }, + { + "epoch": 1.825224809776343, + "grad_norm": 1.6925426332325308, + "learning_rate": 4.157824340017657e-08, + "loss": 0.4235571622848511, + "step": 7916 + }, + { + "epoch": 1.8254553839059258, + "grad_norm": 1.5746767320284107, + "learning_rate": 4.146951808458998e-08, + "loss": 0.3761681914329529, + "step": 7917 + }, + { + "epoch": 1.8256859580355083, + "grad_norm": 1.9541083814793623, + "learning_rate": 4.136093210112346e-08, + "loss": 0.45545494556427, + "step": 7918 + }, + { + "epoch": 1.825916532165091, + "grad_norm": 1.4946968371557119, + "learning_rate": 4.1252485465561035e-08, + "loss": 0.4154251515865326, + "step": 7919 + }, + { + "epoch": 1.8261471062946737, + "grad_norm": 1.4442817043721163, + "learning_rate": 4.114417819366633e-08, + "loss": 0.3664330244064331, + "step": 7920 + }, + { + "epoch": 1.8263776804242564, + "grad_norm": 1.4915985489350694, + "learning_rate": 4.10360103011832e-08, + "loss": 0.4527730643749237, + "step": 7921 + }, + { + "epoch": 1.8266082545538391, + "grad_norm": 1.6683615123339999, + "learning_rate": 4.092798180383461e-08, + "loss": 0.5245767831802368, + "step": 7922 + }, + { + "epoch": 1.8268388286834218, + "grad_norm": 1.6122193238326974, + "learning_rate": 4.0820092717323894e-08, + "loss": 0.39781343936920166, + "step": 7923 + }, + { + "epoch": 1.8270694028130043, + "grad_norm": 1.592304216861808, + "learning_rate": 4.071234305733362e-08, + "loss": 0.4173957109451294, + "step": 7924 + }, + { + "epoch": 1.827299976942587, + "grad_norm": 1.7592031102615102, + "learning_rate": 4.0604732839526256e-08, + "loss": 0.38840869069099426, + "step": 7925 + }, + { + "epoch": 1.8275305510721696, + "grad_norm": 1.777360398097105, + "learning_rate": 4.0497262079544294e-08, + "loss": 0.4107547998428345, + "step": 7926 + }, + { + "epoch": 1.8277611252017523, + "grad_norm": 1.5475583296259725, + "learning_rate": 4.038993079300956e-08, + "loss": 0.41102874279022217, + "step": 7927 + }, + { + "epoch": 1.827991699331335, + "grad_norm": 1.4229533643496446, + "learning_rate": 4.028273899552381e-08, + "loss": 0.3393939733505249, + "step": 7928 + }, + { + "epoch": 1.8282222734609177, + "grad_norm": 1.4844610719466356, + "learning_rate": 4.017568670266835e-08, + "loss": 0.42469024658203125, + "step": 7929 + }, + { + "epoch": 1.8284528475905004, + "grad_norm": 1.316542585504155, + "learning_rate": 4.006877393000441e-08, + "loss": 0.4869099259376526, + "step": 7930 + }, + { + "epoch": 1.8286834217200831, + "grad_norm": 1.3905230120628338, + "learning_rate": 3.996200069307265e-08, + "loss": 0.4463779926300049, + "step": 7931 + }, + { + "epoch": 1.8289139958496656, + "grad_norm": 1.908726864953878, + "learning_rate": 3.985536700739378e-08, + "loss": 0.429579496383667, + "step": 7932 + }, + { + "epoch": 1.8291445699792483, + "grad_norm": 1.555687929117211, + "learning_rate": 3.9748872888468065e-08, + "loss": 0.38837558031082153, + "step": 7933 + }, + { + "epoch": 1.8293751441088308, + "grad_norm": 1.467502995951613, + "learning_rate": 3.964251835177568e-08, + "loss": 0.4444499909877777, + "step": 7934 + }, + { + "epoch": 1.8296057182384136, + "grad_norm": 1.5836026531003116, + "learning_rate": 3.953630341277603e-08, + "loss": 0.5216259360313416, + "step": 7935 + }, + { + "epoch": 1.8298362923679963, + "grad_norm": 1.316614330242316, + "learning_rate": 3.943022808690888e-08, + "loss": 0.46454817056655884, + "step": 7936 + }, + { + "epoch": 1.830066866497579, + "grad_norm": 1.5390661326727673, + "learning_rate": 3.9324292389593005e-08, + "loss": 0.38960570096969604, + "step": 7937 + }, + { + "epoch": 1.8302974406271617, + "grad_norm": 1.2960127878271992, + "learning_rate": 3.9218496336227426e-08, + "loss": 0.3318006992340088, + "step": 7938 + }, + { + "epoch": 1.8305280147567444, + "grad_norm": 1.501585055160058, + "learning_rate": 3.9112839942190725e-08, + "loss": 0.41555076837539673, + "step": 7939 + }, + { + "epoch": 1.830758588886327, + "grad_norm": 1.4035625255113318, + "learning_rate": 3.900732322284095e-08, + "loss": 0.4296320080757141, + "step": 7940 + }, + { + "epoch": 1.8309891630159096, + "grad_norm": 1.6738155247978692, + "learning_rate": 3.8901946193516055e-08, + "loss": 0.4416658282279968, + "step": 7941 + }, + { + "epoch": 1.8312197371454921, + "grad_norm": 1.885789179393057, + "learning_rate": 3.8796708869533676e-08, + "loss": 0.4539029598236084, + "step": 7942 + }, + { + "epoch": 1.8314503112750748, + "grad_norm": 1.4867619575158202, + "learning_rate": 3.869161126619136e-08, + "loss": 0.4526992440223694, + "step": 7943 + }, + { + "epoch": 1.8316808854046576, + "grad_norm": 1.5927522884216676, + "learning_rate": 3.8586653398765766e-08, + "loss": 0.3991963863372803, + "step": 7944 + }, + { + "epoch": 1.8319114595342403, + "grad_norm": 1.4460483349984772, + "learning_rate": 3.848183528251381e-08, + "loss": 0.44474589824676514, + "step": 7945 + }, + { + "epoch": 1.832142033663823, + "grad_norm": 1.7969739964524274, + "learning_rate": 3.837715693267174e-08, + "loss": 0.5022028684616089, + "step": 7946 + }, + { + "epoch": 1.8323726077934057, + "grad_norm": 1.6274178723126447, + "learning_rate": 3.8272618364455836e-08, + "loss": 0.4839058518409729, + "step": 7947 + }, + { + "epoch": 1.8326031819229882, + "grad_norm": 1.7924980398771633, + "learning_rate": 3.8168219593061376e-08, + "loss": 0.3580874800682068, + "step": 7948 + }, + { + "epoch": 1.832833756052571, + "grad_norm": 1.6096517551702718, + "learning_rate": 3.806396063366424e-08, + "loss": 0.4350799024105072, + "step": 7949 + }, + { + "epoch": 1.8330643301821534, + "grad_norm": 1.3546161389632028, + "learning_rate": 3.79598415014194e-08, + "loss": 0.4386145770549774, + "step": 7950 + }, + { + "epoch": 1.8332949043117361, + "grad_norm": 1.4421267919386862, + "learning_rate": 3.785586221146142e-08, + "loss": 0.5122627019882202, + "step": 7951 + }, + { + "epoch": 1.8335254784413189, + "grad_norm": 1.3507016201924953, + "learning_rate": 3.77520227789051e-08, + "loss": 0.41197121143341064, + "step": 7952 + }, + { + "epoch": 1.8337560525709016, + "grad_norm": 1.7729553069577912, + "learning_rate": 3.764832321884426e-08, + "loss": 0.5508084297180176, + "step": 7953 + }, + { + "epoch": 1.8339866267004843, + "grad_norm": 1.3788371713361898, + "learning_rate": 3.754476354635283e-08, + "loss": 0.40791934728622437, + "step": 7954 + }, + { + "epoch": 1.834217200830067, + "grad_norm": 1.4693932480728087, + "learning_rate": 3.7441343776484113e-08, + "loss": 0.3880457878112793, + "step": 7955 + }, + { + "epoch": 1.8344477749596495, + "grad_norm": 1.4561569110121497, + "learning_rate": 3.7338063924271304e-08, + "loss": 0.40519118309020996, + "step": 7956 + }, + { + "epoch": 1.8346783490892322, + "grad_norm": 1.4799489730655653, + "learning_rate": 3.723492400472716e-08, + "loss": 0.46081095933914185, + "step": 7957 + }, + { + "epoch": 1.8349089232188147, + "grad_norm": 1.3167338346767847, + "learning_rate": 3.713192403284438e-08, + "loss": 0.3946321904659271, + "step": 7958 + }, + { + "epoch": 1.8351394973483974, + "grad_norm": 1.743632986191688, + "learning_rate": 3.702906402359474e-08, + "loss": 0.4699859023094177, + "step": 7959 + }, + { + "epoch": 1.8353700714779801, + "grad_norm": 1.4691817330554993, + "learning_rate": 3.692634399192995e-08, + "loss": 0.43031781911849976, + "step": 7960 + }, + { + "epoch": 1.8356006456075629, + "grad_norm": 1.5694622813964751, + "learning_rate": 3.6823763952781636e-08, + "loss": 0.4072418212890625, + "step": 7961 + }, + { + "epoch": 1.8358312197371456, + "grad_norm": 1.7009922761684866, + "learning_rate": 3.672132392106053e-08, + "loss": 0.40659528970718384, + "step": 7962 + }, + { + "epoch": 1.8360617938667283, + "grad_norm": 1.2845193385628964, + "learning_rate": 3.661902391165772e-08, + "loss": 0.41279205679893494, + "step": 7963 + }, + { + "epoch": 1.8362923679963108, + "grad_norm": 1.407521764327922, + "learning_rate": 3.65168639394432e-08, + "loss": 0.43887826800346375, + "step": 7964 + }, + { + "epoch": 1.8365229421258935, + "grad_norm": 1.585883988281566, + "learning_rate": 3.6414844019267196e-08, + "loss": 0.46111762523651123, + "step": 7965 + }, + { + "epoch": 1.836753516255476, + "grad_norm": 1.5089060420061358, + "learning_rate": 3.63129641659593e-08, + "loss": 0.42694801092147827, + "step": 7966 + }, + { + "epoch": 1.8369840903850587, + "grad_norm": 1.563222995065882, + "learning_rate": 3.6211224394328775e-08, + "loss": 0.4674855172634125, + "step": 7967 + }, + { + "epoch": 1.8372146645146414, + "grad_norm": 1.6612957725595774, + "learning_rate": 3.610962471916435e-08, + "loss": 0.48998844623565674, + "step": 7968 + }, + { + "epoch": 1.8374452386442242, + "grad_norm": 1.517118505836267, + "learning_rate": 3.600816515523486e-08, + "loss": 0.4162273406982422, + "step": 7969 + }, + { + "epoch": 1.8376758127738069, + "grad_norm": 1.6498845355681542, + "learning_rate": 3.5906845717288304e-08, + "loss": 0.4446166753768921, + "step": 7970 + }, + { + "epoch": 1.8379063869033896, + "grad_norm": 1.6723175784368125, + "learning_rate": 3.580566642005245e-08, + "loss": 0.4782527983188629, + "step": 7971 + }, + { + "epoch": 1.838136961032972, + "grad_norm": 1.667138689471541, + "learning_rate": 3.570462727823476e-08, + "loss": 0.43014609813690186, + "step": 7972 + }, + { + "epoch": 1.8383675351625548, + "grad_norm": 1.5808858327085533, + "learning_rate": 3.560372830652225e-08, + "loss": 0.5155357122421265, + "step": 7973 + }, + { + "epoch": 1.8385981092921373, + "grad_norm": 1.4181681095350445, + "learning_rate": 3.5502969519581984e-08, + "loss": 0.4231104254722595, + "step": 7974 + }, + { + "epoch": 1.83882868342172, + "grad_norm": 1.8426199170185766, + "learning_rate": 3.540235093205979e-08, + "loss": 0.529877245426178, + "step": 7975 + }, + { + "epoch": 1.8390592575513027, + "grad_norm": 1.5632800597633676, + "learning_rate": 3.530187255858186e-08, + "loss": 0.4841991662979126, + "step": 7976 + }, + { + "epoch": 1.8392898316808854, + "grad_norm": 1.5770240615602402, + "learning_rate": 3.520153441375362e-08, + "loss": 0.40202534198760986, + "step": 7977 + }, + { + "epoch": 1.8395204058104682, + "grad_norm": 1.4104759549786023, + "learning_rate": 3.51013365121603e-08, + "loss": 0.398551344871521, + "step": 7978 + }, + { + "epoch": 1.8397509799400509, + "grad_norm": 1.5102819529399165, + "learning_rate": 3.500127886836668e-08, + "loss": 0.49139225482940674, + "step": 7979 + }, + { + "epoch": 1.8399815540696334, + "grad_norm": 1.7659081046335245, + "learning_rate": 3.4901361496917135e-08, + "loss": 0.4708287715911865, + "step": 7980 + }, + { + "epoch": 1.840212128199216, + "grad_norm": 1.3491474153090526, + "learning_rate": 3.4801584412335714e-08, + "loss": 0.4174381494522095, + "step": 7981 + }, + { + "epoch": 1.8404427023287986, + "grad_norm": 1.6453019064878467, + "learning_rate": 3.470194762912593e-08, + "loss": 0.535778284072876, + "step": 7982 + }, + { + "epoch": 1.8406732764583813, + "grad_norm": 1.7228199406120377, + "learning_rate": 3.4602451161771186e-08, + "loss": 0.540034294128418, + "step": 7983 + }, + { + "epoch": 1.840903850587964, + "grad_norm": 1.794022377740068, + "learning_rate": 3.450309502473403e-08, + "loss": 0.4399121403694153, + "step": 7984 + }, + { + "epoch": 1.8411344247175467, + "grad_norm": 1.6932512977389786, + "learning_rate": 3.4403879232457134e-08, + "loss": 0.5011022686958313, + "step": 7985 + }, + { + "epoch": 1.8413649988471295, + "grad_norm": 1.580497796669037, + "learning_rate": 3.4304803799362405e-08, + "loss": 0.392477810382843, + "step": 7986 + }, + { + "epoch": 1.8415955729767122, + "grad_norm": 1.5439573803469637, + "learning_rate": 3.420586873985132e-08, + "loss": 0.4734686315059662, + "step": 7987 + }, + { + "epoch": 1.8418261471062947, + "grad_norm": 1.3285059669744466, + "learning_rate": 3.410707406830537e-08, + "loss": 0.37347573041915894, + "step": 7988 + }, + { + "epoch": 1.8420567212358774, + "grad_norm": 1.6328708193086845, + "learning_rate": 3.400841979908531e-08, + "loss": 0.38837599754333496, + "step": 7989 + }, + { + "epoch": 1.8422872953654599, + "grad_norm": 1.6277616294407593, + "learning_rate": 3.390990594653142e-08, + "loss": 0.38598424196243286, + "step": 7990 + }, + { + "epoch": 1.8425178694950426, + "grad_norm": 1.584379501910531, + "learning_rate": 3.381153252496371e-08, + "loss": 0.48508739471435547, + "step": 7991 + }, + { + "epoch": 1.8427484436246253, + "grad_norm": 1.609395355542375, + "learning_rate": 3.3713299548681736e-08, + "loss": 0.41946491599082947, + "step": 7992 + }, + { + "epoch": 1.842979017754208, + "grad_norm": 1.4959274640542461, + "learning_rate": 3.3615207031964744e-08, + "loss": 0.4803915023803711, + "step": 7993 + }, + { + "epoch": 1.8432095918837907, + "grad_norm": 1.3835076847275678, + "learning_rate": 3.351725498907143e-08, + "loss": 0.39463797211647034, + "step": 7994 + }, + { + "epoch": 1.8434401660133735, + "grad_norm": 1.5742658557245284, + "learning_rate": 3.341944343424008e-08, + "loss": 0.43345123529434204, + "step": 7995 + }, + { + "epoch": 1.843670740142956, + "grad_norm": 1.7826616989180466, + "learning_rate": 3.332177238168854e-08, + "loss": 0.5164570212364197, + "step": 7996 + }, + { + "epoch": 1.8439013142725387, + "grad_norm": 1.71354580792071, + "learning_rate": 3.322424184561445e-08, + "loss": 0.5313355922698975, + "step": 7997 + }, + { + "epoch": 1.8441318884021212, + "grad_norm": 1.901316143248936, + "learning_rate": 3.3126851840194815e-08, + "loss": 0.4488258361816406, + "step": 7998 + }, + { + "epoch": 1.8443624625317039, + "grad_norm": 1.479116299891256, + "learning_rate": 3.30296023795863e-08, + "loss": 0.5122581720352173, + "step": 7999 + }, + { + "epoch": 1.8445930366612866, + "grad_norm": 1.4735639536720297, + "learning_rate": 3.293249347792493e-08, + "loss": 0.4619610905647278, + "step": 8000 + }, + { + "epoch": 1.8448236107908693, + "grad_norm": 1.3540260330438945, + "learning_rate": 3.2835525149326636e-08, + "loss": 0.4214603006839752, + "step": 8001 + }, + { + "epoch": 1.845054184920452, + "grad_norm": 1.4074387483331625, + "learning_rate": 3.2738697407886485e-08, + "loss": 0.40279510617256165, + "step": 8002 + }, + { + "epoch": 1.8452847590500348, + "grad_norm": 1.4474967943141424, + "learning_rate": 3.264201026767977e-08, + "loss": 0.4797242283821106, + "step": 8003 + }, + { + "epoch": 1.8455153331796172, + "grad_norm": 1.3554973222515974, + "learning_rate": 3.254546374276057e-08, + "loss": 0.3833237588405609, + "step": 8004 + }, + { + "epoch": 1.8457459073092, + "grad_norm": 1.4594426546625732, + "learning_rate": 3.244905784716323e-08, + "loss": 0.41461342573165894, + "step": 8005 + }, + { + "epoch": 1.8459764814387825, + "grad_norm": 1.5177617199741877, + "learning_rate": 3.235279259490109e-08, + "loss": 0.592107892036438, + "step": 8006 + }, + { + "epoch": 1.8462070555683652, + "grad_norm": 1.684042887917187, + "learning_rate": 3.2256667999967405e-08, + "loss": 0.39025670289993286, + "step": 8007 + }, + { + "epoch": 1.846437629697948, + "grad_norm": 1.286539298720562, + "learning_rate": 3.2160684076334766e-08, + "loss": 0.40197378396987915, + "step": 8008 + }, + { + "epoch": 1.8466682038275306, + "grad_norm": 1.8155125046022762, + "learning_rate": 3.206484083795558e-08, + "loss": 0.4013815224170685, + "step": 8009 + }, + { + "epoch": 1.8468987779571133, + "grad_norm": 1.5762142363003944, + "learning_rate": 3.1969138298761356e-08, + "loss": 0.45386412739753723, + "step": 8010 + }, + { + "epoch": 1.8471293520866958, + "grad_norm": 1.8756892627173425, + "learning_rate": 3.187357647266353e-08, + "loss": 0.43034985661506653, + "step": 8011 + }, + { + "epoch": 1.8473599262162785, + "grad_norm": 1.6730495727197179, + "learning_rate": 3.177815537355322e-08, + "loss": 0.4346637725830078, + "step": 8012 + }, + { + "epoch": 1.847590500345861, + "grad_norm": 1.8461631710642654, + "learning_rate": 3.1682875015300535e-08, + "loss": 0.5203511118888855, + "step": 8013 + }, + { + "epoch": 1.8478210744754437, + "grad_norm": 1.5817324628827356, + "learning_rate": 3.1587735411755636e-08, + "loss": 0.37658393383026123, + "step": 8014 + }, + { + "epoch": 1.8480516486050265, + "grad_norm": 1.6304961028131815, + "learning_rate": 3.149273657674789e-08, + "loss": 0.5473518371582031, + "step": 8015 + }, + { + "epoch": 1.8482822227346092, + "grad_norm": 1.800633884327913, + "learning_rate": 3.1397878524086484e-08, + "loss": 0.5171597599983215, + "step": 8016 + }, + { + "epoch": 1.848512796864192, + "grad_norm": 1.585245081928725, + "learning_rate": 3.130316126755983e-08, + "loss": 0.46588706970214844, + "step": 8017 + }, + { + "epoch": 1.8487433709937746, + "grad_norm": 1.496582071882617, + "learning_rate": 3.1208584820936244e-08, + "loss": 0.5571366548538208, + "step": 8018 + }, + { + "epoch": 1.848973945123357, + "grad_norm": 1.5249372170069353, + "learning_rate": 3.111414919796318e-08, + "loss": 0.45803195238113403, + "step": 8019 + }, + { + "epoch": 1.8492045192529398, + "grad_norm": 1.4834943043987898, + "learning_rate": 3.1019854412367875e-08, + "loss": 0.4732629060745239, + "step": 8020 + }, + { + "epoch": 1.8494350933825223, + "grad_norm": 1.7625144420898597, + "learning_rate": 3.092570047785714e-08, + "loss": 0.5268767476081848, + "step": 8021 + }, + { + "epoch": 1.849665667512105, + "grad_norm": 1.5017810734056087, + "learning_rate": 3.0831687408117035e-08, + "loss": 0.5179537534713745, + "step": 8022 + }, + { + "epoch": 1.8498962416416878, + "grad_norm": 1.7406452748153565, + "learning_rate": 3.073781521681351e-08, + "loss": 0.5110389590263367, + "step": 8023 + }, + { + "epoch": 1.8501268157712705, + "grad_norm": 1.442631804804713, + "learning_rate": 3.064408391759154e-08, + "loss": 0.4078633189201355, + "step": 8024 + }, + { + "epoch": 1.8503573899008532, + "grad_norm": 1.6619024740283894, + "learning_rate": 3.055049352407624e-08, + "loss": 0.4632648229598999, + "step": 8025 + }, + { + "epoch": 1.850587964030436, + "grad_norm": 1.577432813868154, + "learning_rate": 3.0457044049871705e-08, + "loss": 0.41569265723228455, + "step": 8026 + }, + { + "epoch": 1.8508185381600184, + "grad_norm": 1.3795657287644, + "learning_rate": 3.036373550856186e-08, + "loss": 0.4105853736400604, + "step": 8027 + }, + { + "epoch": 1.8510491122896011, + "grad_norm": 1.6584799060214424, + "learning_rate": 3.027056791370996e-08, + "loss": 0.4415978789329529, + "step": 8028 + }, + { + "epoch": 1.8512796864191836, + "grad_norm": 1.571030596092026, + "learning_rate": 3.017754127885908e-08, + "loss": 0.3990614414215088, + "step": 8029 + }, + { + "epoch": 1.8515102605487663, + "grad_norm": 1.5323241652532567, + "learning_rate": 3.0084655617531376e-08, + "loss": 0.42349040508270264, + "step": 8030 + }, + { + "epoch": 1.851740834678349, + "grad_norm": 1.4436112405033301, + "learning_rate": 2.9991910943228725e-08, + "loss": 0.4687228798866272, + "step": 8031 + }, + { + "epoch": 1.8519714088079318, + "grad_norm": 1.91227305815919, + "learning_rate": 2.989930726943268e-08, + "loss": 0.6091229915618896, + "step": 8032 + }, + { + "epoch": 1.8522019829375145, + "grad_norm": 1.527659992048368, + "learning_rate": 2.980684460960381e-08, + "loss": 0.43401795625686646, + "step": 8033 + }, + { + "epoch": 1.8524325570670972, + "grad_norm": 1.521615388244922, + "learning_rate": 2.9714522977182688e-08, + "loss": 0.47280481457710266, + "step": 8034 + }, + { + "epoch": 1.8526631311966797, + "grad_norm": 1.6019291161476, + "learning_rate": 2.962234238558925e-08, + "loss": 0.5078729391098022, + "step": 8035 + }, + { + "epoch": 1.8528937053262624, + "grad_norm": 1.8353491661496104, + "learning_rate": 2.9530302848223e-08, + "loss": 0.4279085695743561, + "step": 8036 + }, + { + "epoch": 1.853124279455845, + "grad_norm": 1.4587208506754334, + "learning_rate": 2.9438404378462455e-08, + "loss": 0.3720093369483948, + "step": 8037 + }, + { + "epoch": 1.8533548535854276, + "grad_norm": 1.810026420285634, + "learning_rate": 2.934664698966627e-08, + "loss": 0.26778513193130493, + "step": 8038 + }, + { + "epoch": 1.8535854277150103, + "grad_norm": 1.569617242169025, + "learning_rate": 2.9255030695172324e-08, + "loss": 0.47606828808784485, + "step": 8039 + }, + { + "epoch": 1.853816001844593, + "grad_norm": 1.8330928647910023, + "learning_rate": 2.9163555508297632e-08, + "loss": 0.437153160572052, + "step": 8040 + }, + { + "epoch": 1.8540465759741758, + "grad_norm": 1.3219241142527494, + "learning_rate": 2.907222144233945e-08, + "loss": 0.408009797334671, + "step": 8041 + }, + { + "epoch": 1.8542771501037585, + "grad_norm": 1.3761080217774861, + "learning_rate": 2.8981028510573824e-08, + "loss": 0.3435688018798828, + "step": 8042 + }, + { + "epoch": 1.854507724233341, + "grad_norm": 1.881646492298394, + "learning_rate": 2.8889976726256705e-08, + "loss": 0.4829018712043762, + "step": 8043 + }, + { + "epoch": 1.8547382983629237, + "grad_norm": 1.5758694223281, + "learning_rate": 2.879906610262339e-08, + "loss": 0.44579288363456726, + "step": 8044 + }, + { + "epoch": 1.8549688724925062, + "grad_norm": 1.3922554430382053, + "learning_rate": 2.8708296652888764e-08, + "loss": 0.4952869415283203, + "step": 8045 + }, + { + "epoch": 1.855199446622089, + "grad_norm": 1.4450922871815606, + "learning_rate": 2.8617668390246818e-08, + "loss": 0.4870997965335846, + "step": 8046 + }, + { + "epoch": 1.8554300207516716, + "grad_norm": 1.5651252792966914, + "learning_rate": 2.8527181327871465e-08, + "loss": 0.5009135603904724, + "step": 8047 + }, + { + "epoch": 1.8556605948812543, + "grad_norm": 1.3977550991376733, + "learning_rate": 2.8436835478915954e-08, + "loss": 0.4837114214897156, + "step": 8048 + }, + { + "epoch": 1.855891169010837, + "grad_norm": 1.6474653449248091, + "learning_rate": 2.8346630856512897e-08, + "loss": 0.47955578565597534, + "step": 8049 + }, + { + "epoch": 1.8561217431404198, + "grad_norm": 1.705788106947518, + "learning_rate": 2.8256567473774363e-08, + "loss": 0.4882965385913849, + "step": 8050 + }, + { + "epoch": 1.8563523172700023, + "grad_norm": 1.5940097685845425, + "learning_rate": 2.8166645343792094e-08, + "loss": 0.4542367458343506, + "step": 8051 + }, + { + "epoch": 1.856582891399585, + "grad_norm": 1.5880265061576002, + "learning_rate": 2.8076864479637198e-08, + "loss": 0.4506416916847229, + "step": 8052 + }, + { + "epoch": 1.8568134655291675, + "grad_norm": 1.699970116686096, + "learning_rate": 2.798722489436012e-08, + "loss": 0.5043084025382996, + "step": 8053 + }, + { + "epoch": 1.8570440396587502, + "grad_norm": 1.397398070036947, + "learning_rate": 2.78977266009911e-08, + "loss": 0.3711032271385193, + "step": 8054 + }, + { + "epoch": 1.857274613788333, + "grad_norm": 1.3008294527362816, + "learning_rate": 2.7808369612539405e-08, + "loss": 0.33371198177337646, + "step": 8055 + }, + { + "epoch": 1.8575051879179156, + "grad_norm": 1.7364482681056421, + "learning_rate": 2.771915394199409e-08, + "loss": 0.5328178405761719, + "step": 8056 + }, + { + "epoch": 1.8577357620474984, + "grad_norm": 1.925308909381556, + "learning_rate": 2.7630079602323443e-08, + "loss": 0.4615975618362427, + "step": 8057 + }, + { + "epoch": 1.857966336177081, + "grad_norm": 1.506605490676224, + "learning_rate": 2.754114660647533e-08, + "loss": 0.4667460024356842, + "step": 8058 + }, + { + "epoch": 1.8581969103066636, + "grad_norm": 1.7246190337812906, + "learning_rate": 2.745235496737719e-08, + "loss": 0.483825147151947, + "step": 8059 + }, + { + "epoch": 1.8584274844362463, + "grad_norm": 1.7802094460466942, + "learning_rate": 2.736370469793592e-08, + "loss": 0.4376814365386963, + "step": 8060 + }, + { + "epoch": 1.8586580585658288, + "grad_norm": 1.4605341926622646, + "learning_rate": 2.7275195811037432e-08, + "loss": 0.4862465262413025, + "step": 8061 + }, + { + "epoch": 1.8588886326954115, + "grad_norm": 1.6497121576486102, + "learning_rate": 2.718682831954744e-08, + "loss": 0.48104172945022583, + "step": 8062 + }, + { + "epoch": 1.8591192068249942, + "grad_norm": 1.3643295104524422, + "learning_rate": 2.709860223631122e-08, + "loss": 0.43358030915260315, + "step": 8063 + }, + { + "epoch": 1.859349780954577, + "grad_norm": 1.3052220670178016, + "learning_rate": 2.701051757415307e-08, + "loss": 0.44614607095718384, + "step": 8064 + }, + { + "epoch": 1.8595803550841596, + "grad_norm": 1.8220525339474862, + "learning_rate": 2.6922574345877303e-08, + "loss": 0.49824249744415283, + "step": 8065 + }, + { + "epoch": 1.8598109292137424, + "grad_norm": 1.3314333068504594, + "learning_rate": 2.683477256426714e-08, + "loss": 0.39621901512145996, + "step": 8066 + }, + { + "epoch": 1.8600415033433249, + "grad_norm": 1.3391032368154236, + "learning_rate": 2.6747112242085478e-08, + "loss": 0.40166205167770386, + "step": 8067 + }, + { + "epoch": 1.8602720774729076, + "grad_norm": 1.720101921843303, + "learning_rate": 2.6659593392074575e-08, + "loss": 0.4249534606933594, + "step": 8068 + }, + { + "epoch": 1.86050265160249, + "grad_norm": 1.3203085704476971, + "learning_rate": 2.6572216026956473e-08, + "loss": 0.4015510678291321, + "step": 8069 + }, + { + "epoch": 1.8607332257320728, + "grad_norm": 1.8982655978960439, + "learning_rate": 2.6484980159432236e-08, + "loss": 0.4691264033317566, + "step": 8070 + }, + { + "epoch": 1.8609637998616555, + "grad_norm": 1.6363630573411998, + "learning_rate": 2.639788580218216e-08, + "loss": 0.5095053315162659, + "step": 8071 + }, + { + "epoch": 1.8611943739912382, + "grad_norm": 1.707433776183968, + "learning_rate": 2.6310932967866794e-08, + "loss": 0.4658794403076172, + "step": 8072 + }, + { + "epoch": 1.861424948120821, + "grad_norm": 1.7622547433521365, + "learning_rate": 2.622412166912513e-08, + "loss": 0.495827853679657, + "step": 8073 + }, + { + "epoch": 1.8616555222504036, + "grad_norm": 1.6584095706736666, + "learning_rate": 2.6137451918576413e-08, + "loss": 0.43652772903442383, + "step": 8074 + }, + { + "epoch": 1.8618860963799861, + "grad_norm": 1.410927084601702, + "learning_rate": 2.6050923728818784e-08, + "loss": 0.4636423587799072, + "step": 8075 + }, + { + "epoch": 1.8621166705095689, + "grad_norm": 1.6137478822178715, + "learning_rate": 2.5964537112430186e-08, + "loss": 0.4572441577911377, + "step": 8076 + }, + { + "epoch": 1.8623472446391514, + "grad_norm": 1.5268149737583054, + "learning_rate": 2.587829208196757e-08, + "loss": 0.4549320340156555, + "step": 8077 + }, + { + "epoch": 1.862577818768734, + "grad_norm": 1.4757300368438027, + "learning_rate": 2.5792188649967795e-08, + "loss": 0.46412795782089233, + "step": 8078 + }, + { + "epoch": 1.8628083928983168, + "grad_norm": 1.566100546942984, + "learning_rate": 2.570622682894652e-08, + "loss": 0.40059781074523926, + "step": 8079 + }, + { + "epoch": 1.8630389670278995, + "grad_norm": 1.8382248312833556, + "learning_rate": 2.5620406631399416e-08, + "loss": 0.5396246910095215, + "step": 8080 + }, + { + "epoch": 1.8632695411574822, + "grad_norm": 1.630240250521673, + "learning_rate": 2.553472806980128e-08, + "loss": 0.4793856143951416, + "step": 8081 + }, + { + "epoch": 1.863500115287065, + "grad_norm": 1.7081981493499068, + "learning_rate": 2.5449191156606264e-08, + "loss": 0.4428815543651581, + "step": 8082 + }, + { + "epoch": 1.8637306894166474, + "grad_norm": 1.3161952024113066, + "learning_rate": 2.5363795904248086e-08, + "loss": 0.4024256467819214, + "step": 8083 + }, + { + "epoch": 1.8639612635462302, + "grad_norm": 1.7334425937535092, + "learning_rate": 2.5278542325139818e-08, + "loss": 0.4868123531341553, + "step": 8084 + }, + { + "epoch": 1.8641918376758126, + "grad_norm": 1.8199560965911645, + "learning_rate": 2.519343043167399e-08, + "loss": 0.602108359336853, + "step": 8085 + }, + { + "epoch": 1.8644224118053954, + "grad_norm": 1.8527423308196338, + "learning_rate": 2.510846023622237e-08, + "loss": 0.4500008225440979, + "step": 8086 + }, + { + "epoch": 1.864652985934978, + "grad_norm": 1.4521386296534855, + "learning_rate": 2.502363175113642e-08, + "loss": 0.3894640803337097, + "step": 8087 + }, + { + "epoch": 1.8648835600645608, + "grad_norm": 1.471988486213167, + "learning_rate": 2.493894498874649e-08, + "loss": 0.4525550305843353, + "step": 8088 + }, + { + "epoch": 1.8651141341941435, + "grad_norm": 1.362693221908779, + "learning_rate": 2.485439996136296e-08, + "loss": 0.3908608555793762, + "step": 8089 + }, + { + "epoch": 1.8653447083237262, + "grad_norm": 1.5537540661666722, + "learning_rate": 2.4769996681275106e-08, + "loss": 0.4551984667778015, + "step": 8090 + }, + { + "epoch": 1.8655752824533087, + "grad_norm": 1.3331466559033927, + "learning_rate": 2.468573516075201e-08, + "loss": 0.34474045038223267, + "step": 8091 + }, + { + "epoch": 1.8658058565828914, + "grad_norm": 1.675344505563735, + "learning_rate": 2.4601615412041755e-08, + "loss": 0.41480594873428345, + "step": 8092 + }, + { + "epoch": 1.866036430712474, + "grad_norm": 1.6368782805002868, + "learning_rate": 2.4517637447372007e-08, + "loss": 0.5043104887008667, + "step": 8093 + }, + { + "epoch": 1.8662670048420567, + "grad_norm": 1.7139805676568358, + "learning_rate": 2.4433801278950007e-08, + "loss": 0.4467152953147888, + "step": 8094 + }, + { + "epoch": 1.8664975789716394, + "grad_norm": 1.5274424401661542, + "learning_rate": 2.4350106918962e-08, + "loss": 0.454445481300354, + "step": 8095 + }, + { + "epoch": 1.866728153101222, + "grad_norm": 1.5661075903861215, + "learning_rate": 2.426655437957392e-08, + "loss": 0.4639291763305664, + "step": 8096 + }, + { + "epoch": 1.8669587272308048, + "grad_norm": 1.6251687636184629, + "learning_rate": 2.418314367293084e-08, + "loss": 0.46178731322288513, + "step": 8097 + }, + { + "epoch": 1.8671893013603875, + "grad_norm": 1.5047265923361783, + "learning_rate": 2.4099874811157383e-08, + "loss": 0.43832290172576904, + "step": 8098 + }, + { + "epoch": 1.86741987548997, + "grad_norm": 1.569040322283118, + "learning_rate": 2.4016747806357652e-08, + "loss": 0.4586114287376404, + "step": 8099 + }, + { + "epoch": 1.8676504496195527, + "grad_norm": 1.403368540081911, + "learning_rate": 2.3933762670614978e-08, + "loss": 0.37975889444351196, + "step": 8100 + }, + { + "epoch": 1.8678810237491352, + "grad_norm": 1.6666819300781532, + "learning_rate": 2.3850919415992042e-08, + "loss": 0.4579748511314392, + "step": 8101 + }, + { + "epoch": 1.868111597878718, + "grad_norm": 1.5976733248377182, + "learning_rate": 2.3768218054530775e-08, + "loss": 0.5120238661766052, + "step": 8102 + }, + { + "epoch": 1.8683421720083007, + "grad_norm": 1.47865092584181, + "learning_rate": 2.3685658598253e-08, + "loss": 0.41514822840690613, + "step": 8103 + }, + { + "epoch": 1.8685727461378834, + "grad_norm": 1.6132937806442644, + "learning_rate": 2.360324105915934e-08, + "loss": 0.49480026960372925, + "step": 8104 + }, + { + "epoch": 1.868803320267466, + "grad_norm": 1.516759878457302, + "learning_rate": 2.352096544922999e-08, + "loss": 0.41115111112594604, + "step": 8105 + }, + { + "epoch": 1.8690338943970488, + "grad_norm": 1.8593225608723183, + "learning_rate": 2.3438831780424607e-08, + "loss": 0.44793501496315, + "step": 8106 + }, + { + "epoch": 1.8692644685266313, + "grad_norm": 2.087747863463927, + "learning_rate": 2.3356840064682305e-08, + "loss": 0.4197582006454468, + "step": 8107 + }, + { + "epoch": 1.869495042656214, + "grad_norm": 1.3708560469219937, + "learning_rate": 2.3274990313921218e-08, + "loss": 0.3654597997665405, + "step": 8108 + }, + { + "epoch": 1.8697256167857965, + "grad_norm": 1.6733057347639861, + "learning_rate": 2.319328254003927e-08, + "loss": 0.5105487704277039, + "step": 8109 + }, + { + "epoch": 1.8699561909153792, + "grad_norm": 1.6787548385436994, + "learning_rate": 2.3111716754913192e-08, + "loss": 0.5202287435531616, + "step": 8110 + }, + { + "epoch": 1.870186765044962, + "grad_norm": 1.5305524386936447, + "learning_rate": 2.303029297039949e-08, + "loss": 0.4475836753845215, + "step": 8111 + }, + { + "epoch": 1.8704173391745447, + "grad_norm": 1.579007380002247, + "learning_rate": 2.2949011198334144e-08, + "loss": 0.5010285973548889, + "step": 8112 + }, + { + "epoch": 1.8706479133041274, + "grad_norm": 1.4473541177707174, + "learning_rate": 2.286787145053204e-08, + "loss": 0.41949477791786194, + "step": 8113 + }, + { + "epoch": 1.87087848743371, + "grad_norm": 1.3276801089952157, + "learning_rate": 2.2786873738787738e-08, + "loss": 0.38505449891090393, + "step": 8114 + }, + { + "epoch": 1.8711090615632926, + "grad_norm": 1.8776948972547884, + "learning_rate": 2.2706018074875043e-08, + "loss": 0.4854990839958191, + "step": 8115 + }, + { + "epoch": 1.8713396356928753, + "grad_norm": 1.3982424394333428, + "learning_rate": 2.2625304470547336e-08, + "loss": 0.3846585154533386, + "step": 8116 + }, + { + "epoch": 1.8715702098224578, + "grad_norm": 1.7499321509858707, + "learning_rate": 2.2544732937537003e-08, + "loss": 0.48948657512664795, + "step": 8117 + }, + { + "epoch": 1.8718007839520405, + "grad_norm": 2.062408637955344, + "learning_rate": 2.2464303487555902e-08, + "loss": 0.5571197867393494, + "step": 8118 + }, + { + "epoch": 1.8720313580816232, + "grad_norm": 1.6301482456607912, + "learning_rate": 2.2384016132295345e-08, + "loss": 0.514819324016571, + "step": 8119 + }, + { + "epoch": 1.872261932211206, + "grad_norm": 1.5677432247071832, + "learning_rate": 2.230387088342589e-08, + "loss": 0.4411713182926178, + "step": 8120 + }, + { + "epoch": 1.8724925063407887, + "grad_norm": 1.4508146354194726, + "learning_rate": 2.2223867752597437e-08, + "loss": 0.4494340717792511, + "step": 8121 + }, + { + "epoch": 1.8727230804703712, + "grad_norm": 1.6205003929883524, + "learning_rate": 2.2144006751439236e-08, + "loss": 0.4186316132545471, + "step": 8122 + }, + { + "epoch": 1.8729536545999539, + "grad_norm": 1.5017815147990925, + "learning_rate": 2.2064287891560007e-08, + "loss": 0.45932692289352417, + "step": 8123 + }, + { + "epoch": 1.8731842287295364, + "grad_norm": 1.475598332139336, + "learning_rate": 2.1984711184547477e-08, + "loss": 0.4095005989074707, + "step": 8124 + }, + { + "epoch": 1.873414802859119, + "grad_norm": 1.4633944208901333, + "learning_rate": 2.1905276641969284e-08, + "loss": 0.3822292685508728, + "step": 8125 + }, + { + "epoch": 1.8736453769887018, + "grad_norm": 1.5993925787143786, + "learning_rate": 2.1825984275371633e-08, + "loss": 0.41837501525878906, + "step": 8126 + }, + { + "epoch": 1.8738759511182845, + "grad_norm": 1.6176173713553115, + "learning_rate": 2.1746834096280752e-08, + "loss": 0.3903341591358185, + "step": 8127 + }, + { + "epoch": 1.8741065252478672, + "grad_norm": 1.4079834631265329, + "learning_rate": 2.166782611620177e-08, + "loss": 0.4760533571243286, + "step": 8128 + }, + { + "epoch": 1.87433709937745, + "grad_norm": 1.4208864897990974, + "learning_rate": 2.1588960346619388e-08, + "loss": 0.43960827589035034, + "step": 8129 + }, + { + "epoch": 1.8745676735070325, + "grad_norm": 1.7654096006141957, + "learning_rate": 2.151023679899755e-08, + "loss": 0.47941142320632935, + "step": 8130 + }, + { + "epoch": 1.8747982476366152, + "grad_norm": 1.41048993466122, + "learning_rate": 2.143165548477943e-08, + "loss": 0.4467000961303711, + "step": 8131 + }, + { + "epoch": 1.8750288217661977, + "grad_norm": 1.4796609851220597, + "learning_rate": 2.1353216415387788e-08, + "loss": 0.42472416162490845, + "step": 8132 + }, + { + "epoch": 1.8752593958957804, + "grad_norm": 1.9200971165248846, + "learning_rate": 2.1274919602224273e-08, + "loss": 0.5127208232879639, + "step": 8133 + }, + { + "epoch": 1.875489970025363, + "grad_norm": 1.8325759046238386, + "learning_rate": 2.119676505667045e-08, + "loss": 0.5362575650215149, + "step": 8134 + }, + { + "epoch": 1.8757205441549458, + "grad_norm": 1.2983178226172876, + "learning_rate": 2.111875279008657e-08, + "loss": 0.4025413990020752, + "step": 8135 + }, + { + "epoch": 1.8759511182845285, + "grad_norm": 1.5647543555868217, + "learning_rate": 2.1040882813812667e-08, + "loss": 0.49126237630844116, + "step": 8136 + }, + { + "epoch": 1.8761816924141113, + "grad_norm": 1.64373423682739, + "learning_rate": 2.096315513916791e-08, + "loss": 0.40609198808670044, + "step": 8137 + }, + { + "epoch": 1.8764122665436938, + "grad_norm": 1.4881317882345182, + "learning_rate": 2.0885569777450707e-08, + "loss": 0.47826945781707764, + "step": 8138 + }, + { + "epoch": 1.8766428406732765, + "grad_norm": 1.4578062807690564, + "learning_rate": 2.0808126739939035e-08, + "loss": 0.39987948536872864, + "step": 8139 + }, + { + "epoch": 1.876873414802859, + "grad_norm": 1.6010627164873539, + "learning_rate": 2.0730826037890003e-08, + "loss": 0.5727471113204956, + "step": 8140 + }, + { + "epoch": 1.8771039889324417, + "grad_norm": 1.3737495035065335, + "learning_rate": 2.0653667682540066e-08, + "loss": 0.4772847294807434, + "step": 8141 + }, + { + "epoch": 1.8773345630620244, + "grad_norm": 1.54097710668183, + "learning_rate": 2.0576651685104697e-08, + "loss": 0.3258974552154541, + "step": 8142 + }, + { + "epoch": 1.8775651371916071, + "grad_norm": 1.4067173519179077, + "learning_rate": 2.049977805677938e-08, + "loss": 0.5220766067504883, + "step": 8143 + }, + { + "epoch": 1.8777957113211898, + "grad_norm": 1.2918102910413813, + "learning_rate": 2.0423046808738077e-08, + "loss": 0.39550334215164185, + "step": 8144 + }, + { + "epoch": 1.8780262854507725, + "grad_norm": 2.3983596335767334, + "learning_rate": 2.034645795213463e-08, + "loss": 0.4487137198448181, + "step": 8145 + }, + { + "epoch": 1.878256859580355, + "grad_norm": 1.3947776950768658, + "learning_rate": 2.0270011498102147e-08, + "loss": 0.3363339304924011, + "step": 8146 + }, + { + "epoch": 1.8784874337099378, + "grad_norm": 1.5333942075668883, + "learning_rate": 2.019370745775273e-08, + "loss": 0.5161975026130676, + "step": 8147 + }, + { + "epoch": 1.8787180078395203, + "grad_norm": 1.4587907721196531, + "learning_rate": 2.011754584217784e-08, + "loss": 0.359643816947937, + "step": 8148 + }, + { + "epoch": 1.878948581969103, + "grad_norm": 1.3696377552673178, + "learning_rate": 2.0041526662448625e-08, + "loss": 0.4472349286079407, + "step": 8149 + }, + { + "epoch": 1.8791791560986857, + "grad_norm": 1.6693442042315434, + "learning_rate": 1.9965649929615135e-08, + "loss": 0.40363550186157227, + "step": 8150 + }, + { + "epoch": 1.8794097302282684, + "grad_norm": 1.7598833036688746, + "learning_rate": 1.9889915654706656e-08, + "loss": 0.46063172817230225, + "step": 8151 + }, + { + "epoch": 1.8796403043578511, + "grad_norm": 1.6348416553504144, + "learning_rate": 1.981432384873205e-08, + "loss": 0.4478832483291626, + "step": 8152 + }, + { + "epoch": 1.8798708784874338, + "grad_norm": 1.7016857171242656, + "learning_rate": 1.9738874522679304e-08, + "loss": 0.3438538908958435, + "step": 8153 + }, + { + "epoch": 1.8801014526170163, + "grad_norm": 2.2031337611169435, + "learning_rate": 1.966356768751598e-08, + "loss": 0.6035101413726807, + "step": 8154 + }, + { + "epoch": 1.880332026746599, + "grad_norm": 1.6642481554824737, + "learning_rate": 1.958840335418832e-08, + "loss": 0.42533814907073975, + "step": 8155 + }, + { + "epoch": 1.8805626008761815, + "grad_norm": 1.5825430260849223, + "learning_rate": 1.9513381533622587e-08, + "loss": 0.4117417633533478, + "step": 8156 + }, + { + "epoch": 1.8807931750057643, + "grad_norm": 1.6218701576707837, + "learning_rate": 1.943850223672361e-08, + "loss": 0.4353973865509033, + "step": 8157 + }, + { + "epoch": 1.881023749135347, + "grad_norm": 1.5613174256794196, + "learning_rate": 1.9363765474376125e-08, + "loss": 0.46115410327911377, + "step": 8158 + }, + { + "epoch": 1.8812543232649297, + "grad_norm": 1.4415196194001674, + "learning_rate": 1.9289171257443782e-08, + "loss": 0.3851476311683655, + "step": 8159 + }, + { + "epoch": 1.8814848973945124, + "grad_norm": 1.5586436794771006, + "learning_rate": 1.921471959676957e-08, + "loss": 0.4786919355392456, + "step": 8160 + }, + { + "epoch": 1.8817154715240951, + "grad_norm": 1.6398537249529117, + "learning_rate": 1.914041050317583e-08, + "loss": 0.4427906274795532, + "step": 8161 + }, + { + "epoch": 1.8819460456536776, + "grad_norm": 1.495606046913042, + "learning_rate": 1.906624398746415e-08, + "loss": 0.37774696946144104, + "step": 8162 + }, + { + "epoch": 1.8821766197832603, + "grad_norm": 1.5733237369323263, + "learning_rate": 1.8992220060415343e-08, + "loss": 0.43793195486068726, + "step": 8163 + }, + { + "epoch": 1.8824071939128428, + "grad_norm": 1.2904039749569203, + "learning_rate": 1.8918338732789587e-08, + "loss": 0.3869394063949585, + "step": 8164 + }, + { + "epoch": 1.8826377680424256, + "grad_norm": 1.9325019962539283, + "learning_rate": 1.8844600015326283e-08, + "loss": 0.4963928461074829, + "step": 8165 + }, + { + "epoch": 1.8828683421720083, + "grad_norm": 1.5945637624217548, + "learning_rate": 1.8771003918743978e-08, + "loss": 0.45727187395095825, + "step": 8166 + }, + { + "epoch": 1.883098916301591, + "grad_norm": 1.8455372682093192, + "learning_rate": 1.8697550453740884e-08, + "loss": 0.4878919720649719, + "step": 8167 + }, + { + "epoch": 1.8833294904311737, + "grad_norm": 1.7826396913976752, + "learning_rate": 1.862423963099391e-08, + "loss": 0.5376998782157898, + "step": 8168 + }, + { + "epoch": 1.8835600645607564, + "grad_norm": 1.4765870494853872, + "learning_rate": 1.8551071461159638e-08, + "loss": 0.4534180760383606, + "step": 8169 + }, + { + "epoch": 1.883790638690339, + "grad_norm": 1.561114582514347, + "learning_rate": 1.847804595487379e-08, + "loss": 0.43389183282852173, + "step": 8170 + }, + { + "epoch": 1.8840212128199216, + "grad_norm": 1.535519375075225, + "learning_rate": 1.8405163122751532e-08, + "loss": 0.4833742678165436, + "step": 8171 + }, + { + "epoch": 1.8842517869495041, + "grad_norm": 1.622186588307033, + "learning_rate": 1.833242297538695e-08, + "loss": 0.49344220757484436, + "step": 8172 + }, + { + "epoch": 1.8844823610790868, + "grad_norm": 1.4984978840285303, + "learning_rate": 1.8259825523353478e-08, + "loss": 0.49290287494659424, + "step": 8173 + }, + { + "epoch": 1.8847129352086696, + "grad_norm": 1.3380486770022888, + "learning_rate": 1.8187370777204115e-08, + "loss": 0.3971661627292633, + "step": 8174 + }, + { + "epoch": 1.8849435093382523, + "grad_norm": 1.5640300636460862, + "learning_rate": 1.811505874747066e-08, + "loss": 0.4984559416770935, + "step": 8175 + }, + { + "epoch": 1.885174083467835, + "grad_norm": 1.5865101985098036, + "learning_rate": 1.804288944466459e-08, + "loss": 0.38448822498321533, + "step": 8176 + }, + { + "epoch": 1.8854046575974177, + "grad_norm": 1.9477188873182039, + "learning_rate": 1.7970862879276406e-08, + "loss": 0.5468838214874268, + "step": 8177 + }, + { + "epoch": 1.8856352317270002, + "grad_norm": 1.4768596083300787, + "learning_rate": 1.7898979061775844e-08, + "loss": 0.46132227778434753, + "step": 8178 + }, + { + "epoch": 1.885865805856583, + "grad_norm": 1.436520509516384, + "learning_rate": 1.782723800261199e-08, + "loss": 0.4636603593826294, + "step": 8179 + }, + { + "epoch": 1.8860963799861654, + "grad_norm": 1.5429934177783204, + "learning_rate": 1.7755639712213057e-08, + "loss": 0.5302075147628784, + "step": 8180 + }, + { + "epoch": 1.8863269541157481, + "grad_norm": 1.6563780466455296, + "learning_rate": 1.7684184200986718e-08, + "loss": 0.4817178249359131, + "step": 8181 + }, + { + "epoch": 1.8865575282453309, + "grad_norm": 1.4897334937072715, + "learning_rate": 1.7612871479319668e-08, + "loss": 0.4535263180732727, + "step": 8182 + }, + { + "epoch": 1.8867881023749136, + "grad_norm": 1.6029244875460678, + "learning_rate": 1.7541701557577837e-08, + "loss": 0.5260534286499023, + "step": 8183 + }, + { + "epoch": 1.8870186765044963, + "grad_norm": 1.4065276330082377, + "learning_rate": 1.7470674446106614e-08, + "loss": 0.4526366591453552, + "step": 8184 + }, + { + "epoch": 1.887249250634079, + "grad_norm": 1.663451618032215, + "learning_rate": 1.7399790155230632e-08, + "loss": 0.4721973240375519, + "step": 8185 + }, + { + "epoch": 1.8874798247636615, + "grad_norm": 1.6510288712519465, + "learning_rate": 1.7329048695253422e-08, + "loss": 0.4331268072128296, + "step": 8186 + }, + { + "epoch": 1.8877103988932442, + "grad_norm": 1.9623503418050199, + "learning_rate": 1.7258450076458097e-08, + "loss": 0.5175650119781494, + "step": 8187 + }, + { + "epoch": 1.8879409730228267, + "grad_norm": 1.3640756960267433, + "learning_rate": 1.718799430910678e-08, + "loss": 0.45537033677101135, + "step": 8188 + }, + { + "epoch": 1.8881715471524094, + "grad_norm": 1.540072753548263, + "learning_rate": 1.7117681403441054e-08, + "loss": 0.5055547952651978, + "step": 8189 + }, + { + "epoch": 1.8884021212819921, + "grad_norm": 1.5849214553434074, + "learning_rate": 1.7047511369681522e-08, + "loss": 0.45514553785324097, + "step": 8190 + }, + { + "epoch": 1.8886326954115749, + "grad_norm": 1.4821599822935887, + "learning_rate": 1.6977484218028136e-08, + "loss": 0.44227129220962524, + "step": 8191 + }, + { + "epoch": 1.8888632695411576, + "grad_norm": 1.7163429603820965, + "learning_rate": 1.690759995866009e-08, + "loss": 0.4916682839393616, + "step": 8192 + }, + { + "epoch": 1.8890938436707403, + "grad_norm": 1.8219225402151713, + "learning_rate": 1.683785860173559e-08, + "loss": 0.48626652359962463, + "step": 8193 + }, + { + "epoch": 1.8893244178003228, + "grad_norm": 1.491517373721971, + "learning_rate": 1.676826015739252e-08, + "loss": 0.39982378482818604, + "step": 8194 + }, + { + "epoch": 1.8895549919299055, + "grad_norm": 1.8710391095575285, + "learning_rate": 1.6698804635747576e-08, + "loss": 0.49218645691871643, + "step": 8195 + }, + { + "epoch": 1.889785566059488, + "grad_norm": 1.5127362254029266, + "learning_rate": 1.6629492046896897e-08, + "loss": 0.38896578550338745, + "step": 8196 + }, + { + "epoch": 1.8900161401890707, + "grad_norm": 1.5870268370960243, + "learning_rate": 1.6560322400915538e-08, + "loss": 0.4217762053012848, + "step": 8197 + }, + { + "epoch": 1.8902467143186534, + "grad_norm": 1.5231528042475502, + "learning_rate": 1.6491295707858343e-08, + "loss": 0.4020112156867981, + "step": 8198 + }, + { + "epoch": 1.8904772884482361, + "grad_norm": 2.1189678944561954, + "learning_rate": 1.6422411977758843e-08, + "loss": 0.4630794823169708, + "step": 8199 + }, + { + "epoch": 1.8907078625778189, + "grad_norm": 1.526138087578761, + "learning_rate": 1.6353671220629917e-08, + "loss": 0.3673272132873535, + "step": 8200 + }, + { + "epoch": 1.8909384367074016, + "grad_norm": 1.4930616058109705, + "learning_rate": 1.6285073446463903e-08, + "loss": 0.4677228331565857, + "step": 8201 + }, + { + "epoch": 1.891169010836984, + "grad_norm": 1.718939922651036, + "learning_rate": 1.621661866523216e-08, + "loss": 0.4532579183578491, + "step": 8202 + }, + { + "epoch": 1.8913995849665668, + "grad_norm": 1.4990742550855458, + "learning_rate": 1.6148306886885287e-08, + "loss": 0.3011256456375122, + "step": 8203 + }, + { + "epoch": 1.8916301590961493, + "grad_norm": 1.731114486954807, + "learning_rate": 1.6080138121352892e-08, + "loss": 0.43071651458740234, + "step": 8204 + }, + { + "epoch": 1.891860733225732, + "grad_norm": 1.4183554819693576, + "learning_rate": 1.6012112378544272e-08, + "loss": 0.3180675506591797, + "step": 8205 + }, + { + "epoch": 1.8920913073553147, + "grad_norm": 1.6038525214828652, + "learning_rate": 1.594422966834741e-08, + "loss": 0.35130774974823, + "step": 8206 + }, + { + "epoch": 1.8923218814848974, + "grad_norm": 1.388613528735296, + "learning_rate": 1.587649000062996e-08, + "loss": 0.4953269958496094, + "step": 8207 + }, + { + "epoch": 1.8925524556144802, + "grad_norm": 1.5668590048532676, + "learning_rate": 1.5808893385238388e-08, + "loss": 0.3713166415691376, + "step": 8208 + }, + { + "epoch": 1.8927830297440629, + "grad_norm": 1.4824855259294067, + "learning_rate": 1.5741439831998827e-08, + "loss": 0.4273546040058136, + "step": 8209 + }, + { + "epoch": 1.8930136038736454, + "grad_norm": 1.8212221910711959, + "learning_rate": 1.5674129350715994e-08, + "loss": 0.45312386751174927, + "step": 8210 + }, + { + "epoch": 1.893244178003228, + "grad_norm": 1.4687276423683582, + "learning_rate": 1.560696195117439e-08, + "loss": 0.40246695280075073, + "step": 8211 + }, + { + "epoch": 1.8934747521328106, + "grad_norm": 1.9323139227263069, + "learning_rate": 1.5539937643137325e-08, + "loss": 0.5229366421699524, + "step": 8212 + }, + { + "epoch": 1.8937053262623933, + "grad_norm": 1.4419033757005335, + "learning_rate": 1.5473056436347554e-08, + "loss": 0.43834251165390015, + "step": 8213 + }, + { + "epoch": 1.893935900391976, + "grad_norm": 1.5176292463299432, + "learning_rate": 1.540631834052697e-08, + "loss": 0.4423528015613556, + "step": 8214 + }, + { + "epoch": 1.8941664745215587, + "grad_norm": 1.6176606345399394, + "learning_rate": 1.5339723365376478e-08, + "loss": 0.49888452887535095, + "step": 8215 + }, + { + "epoch": 1.8943970486511414, + "grad_norm": 1.7422668701695732, + "learning_rate": 1.5273271520576448e-08, + "loss": 0.44023919105529785, + "step": 8216 + }, + { + "epoch": 1.8946276227807242, + "grad_norm": 1.5430241161700802, + "learning_rate": 1.5206962815786262e-08, + "loss": 0.4733201861381531, + "step": 8217 + }, + { + "epoch": 1.8948581969103067, + "grad_norm": 1.992567039765999, + "learning_rate": 1.5140797260644768e-08, + "loss": 0.5393285751342773, + "step": 8218 + }, + { + "epoch": 1.8950887710398894, + "grad_norm": 1.5439154792235448, + "learning_rate": 1.507477486476949e-08, + "loss": 0.4240071773529053, + "step": 8219 + }, + { + "epoch": 1.8953193451694719, + "grad_norm": 1.4272355688005478, + "learning_rate": 1.5008895637757647e-08, + "loss": 0.42983078956604004, + "step": 8220 + }, + { + "epoch": 1.8955499192990546, + "grad_norm": 1.470069283076572, + "learning_rate": 1.4943159589185462e-08, + "loss": 0.47513502836227417, + "step": 8221 + }, + { + "epoch": 1.8957804934286373, + "grad_norm": 1.49966428795426, + "learning_rate": 1.4877566728608293e-08, + "loss": 0.41938167810440063, + "step": 8222 + }, + { + "epoch": 1.89601106755822, + "grad_norm": 1.513306290399523, + "learning_rate": 1.4812117065560625e-08, + "loss": 0.44817137718200684, + "step": 8223 + }, + { + "epoch": 1.8962416416878027, + "grad_norm": 1.6563869108965783, + "learning_rate": 1.4746810609556292e-08, + "loss": 0.46840909123420715, + "step": 8224 + }, + { + "epoch": 1.8964722158173855, + "grad_norm": 1.4822882914533433, + "learning_rate": 1.4681647370088369e-08, + "loss": 0.377409964799881, + "step": 8225 + }, + { + "epoch": 1.896702789946968, + "grad_norm": 1.595495246407856, + "learning_rate": 1.4616627356628831e-08, + "loss": 0.41149425506591797, + "step": 8226 + }, + { + "epoch": 1.8969333640765507, + "grad_norm": 1.548113444870098, + "learning_rate": 1.455175057862923e-08, + "loss": 0.39183878898620605, + "step": 8227 + }, + { + "epoch": 1.8971639382061332, + "grad_norm": 1.3643453838150799, + "learning_rate": 1.448701704551969e-08, + "loss": 0.3629387617111206, + "step": 8228 + }, + { + "epoch": 1.8973945123357159, + "grad_norm": 1.6546771139251113, + "learning_rate": 1.4422426766710239e-08, + "loss": 0.4007713794708252, + "step": 8229 + }, + { + "epoch": 1.8976250864652986, + "grad_norm": 1.648419698601457, + "learning_rate": 1.4357979751589477e-08, + "loss": 0.42354586720466614, + "step": 8230 + }, + { + "epoch": 1.8978556605948813, + "grad_norm": 1.9683167812350795, + "learning_rate": 1.429367600952558e-08, + "loss": 0.5321829319000244, + "step": 8231 + }, + { + "epoch": 1.898086234724464, + "grad_norm": 1.5240649560541817, + "learning_rate": 1.4229515549865845e-08, + "loss": 0.4840988218784332, + "step": 8232 + }, + { + "epoch": 1.8983168088540465, + "grad_norm": 1.6587626955063286, + "learning_rate": 1.4165498381936369e-08, + "loss": 0.5006803870201111, + "step": 8233 + }, + { + "epoch": 1.8985473829836292, + "grad_norm": 1.855334923621547, + "learning_rate": 1.4101624515042821e-08, + "loss": 0.40582865476608276, + "step": 8234 + }, + { + "epoch": 1.8987779571132117, + "grad_norm": 1.6458084674224973, + "learning_rate": 1.4037893958469993e-08, + "loss": 0.38199514150619507, + "step": 8235 + }, + { + "epoch": 1.8990085312427945, + "grad_norm": 1.4513711417071327, + "learning_rate": 1.3974306721481699e-08, + "loss": 0.39234936237335205, + "step": 8236 + }, + { + "epoch": 1.8992391053723772, + "grad_norm": 1.661857153956049, + "learning_rate": 1.391086281332099e-08, + "loss": 0.42211759090423584, + "step": 8237 + }, + { + "epoch": 1.8994696795019599, + "grad_norm": 1.5171507269414566, + "learning_rate": 1.3847562243210043e-08, + "loss": 0.4519961476325989, + "step": 8238 + }, + { + "epoch": 1.8997002536315426, + "grad_norm": 1.618394005210342, + "learning_rate": 1.3784405020350276e-08, + "loss": 0.4795762896537781, + "step": 8239 + }, + { + "epoch": 1.8999308277611253, + "grad_norm": 1.5749927795923588, + "learning_rate": 1.3721391153922235e-08, + "loss": 0.4549542963504791, + "step": 8240 + }, + { + "epoch": 1.9001614018907078, + "grad_norm": 1.759482125374446, + "learning_rate": 1.3658520653085703e-08, + "loss": 0.5253233313560486, + "step": 8241 + }, + { + "epoch": 1.9003919760202905, + "grad_norm": 1.4274315163192688, + "learning_rate": 1.3595793526979371e-08, + "loss": 0.44850921630859375, + "step": 8242 + }, + { + "epoch": 1.900622550149873, + "grad_norm": 1.5448941620644567, + "learning_rate": 1.35332097847215e-08, + "loss": 0.4416281580924988, + "step": 8243 + }, + { + "epoch": 1.9008531242794557, + "grad_norm": 1.932595440608825, + "learning_rate": 1.3470769435409036e-08, + "loss": 0.5567417740821838, + "step": 8244 + }, + { + "epoch": 1.9010836984090385, + "grad_norm": 1.4810071060864598, + "learning_rate": 1.3408472488118383e-08, + "loss": 0.43554848432540894, + "step": 8245 + }, + { + "epoch": 1.9013142725386212, + "grad_norm": 1.6729713604736038, + "learning_rate": 1.3346318951905077e-08, + "loss": 0.4219995141029358, + "step": 8246 + }, + { + "epoch": 1.901544846668204, + "grad_norm": 1.5600368865419485, + "learning_rate": 1.328430883580367e-08, + "loss": 0.45862913131713867, + "step": 8247 + }, + { + "epoch": 1.9017754207977866, + "grad_norm": 1.5932092717655322, + "learning_rate": 1.3222442148828172e-08, + "loss": 0.5026064515113831, + "step": 8248 + }, + { + "epoch": 1.902005994927369, + "grad_norm": 1.6308659122795583, + "learning_rate": 1.316071889997139e-08, + "loss": 0.46948713064193726, + "step": 8249 + }, + { + "epoch": 1.9022365690569518, + "grad_norm": 1.5718314790268124, + "learning_rate": 1.3099139098205258e-08, + "loss": 0.4263686537742615, + "step": 8250 + }, + { + "epoch": 1.9024671431865343, + "grad_norm": 1.516002170215572, + "learning_rate": 1.3037702752481394e-08, + "loss": 0.4652191400527954, + "step": 8251 + }, + { + "epoch": 1.902697717316117, + "grad_norm": 1.553138573631746, + "learning_rate": 1.2976409871729987e-08, + "loss": 0.4918743371963501, + "step": 8252 + }, + { + "epoch": 1.9029282914456997, + "grad_norm": 1.4916920711393407, + "learning_rate": 1.2915260464860466e-08, + "loss": 0.5297696590423584, + "step": 8253 + }, + { + "epoch": 1.9031588655752825, + "grad_norm": 1.7049232652010609, + "learning_rate": 1.2854254540761722e-08, + "loss": 0.5320281982421875, + "step": 8254 + }, + { + "epoch": 1.9033894397048652, + "grad_norm": 1.6403951625522013, + "learning_rate": 1.2793392108301437e-08, + "loss": 0.4424601197242737, + "step": 8255 + }, + { + "epoch": 1.903620013834448, + "grad_norm": 1.7301429652605729, + "learning_rate": 1.2732673176326758e-08, + "loss": 0.4811365008354187, + "step": 8256 + }, + { + "epoch": 1.9038505879640304, + "grad_norm": 1.4707627617860477, + "learning_rate": 1.2672097753663624e-08, + "loss": 0.3744504451751709, + "step": 8257 + }, + { + "epoch": 1.904081162093613, + "grad_norm": 1.4178929694153364, + "learning_rate": 1.2611665849117326e-08, + "loss": 0.4703986644744873, + "step": 8258 + }, + { + "epoch": 1.9043117362231956, + "grad_norm": 1.7267205141598052, + "learning_rate": 1.255137747147228e-08, + "loss": 0.5431181192398071, + "step": 8259 + }, + { + "epoch": 1.9045423103527783, + "grad_norm": 1.8088892551764337, + "learning_rate": 1.2491232629492143e-08, + "loss": 0.5066450238227844, + "step": 8260 + }, + { + "epoch": 1.904772884482361, + "grad_norm": 1.4945728049455276, + "learning_rate": 1.2431231331919368e-08, + "loss": 0.4374620020389557, + "step": 8261 + }, + { + "epoch": 1.9050034586119438, + "grad_norm": 1.5574450804582989, + "learning_rate": 1.2371373587475753e-08, + "loss": 0.3628976345062256, + "step": 8262 + }, + { + "epoch": 1.9052340327415265, + "grad_norm": 1.6159357629155715, + "learning_rate": 1.231165940486234e-08, + "loss": 0.43471890687942505, + "step": 8263 + }, + { + "epoch": 1.9054646068711092, + "grad_norm": 1.4892272896008858, + "learning_rate": 1.2252088792759074e-08, + "loss": 0.5038785934448242, + "step": 8264 + }, + { + "epoch": 1.9056951810006917, + "grad_norm": 1.388813738509663, + "learning_rate": 1.2192661759825363e-08, + "loss": 0.44022035598754883, + "step": 8265 + }, + { + "epoch": 1.9059257551302744, + "grad_norm": 1.8473214990080156, + "learning_rate": 1.2133378314699294e-08, + "loss": 0.4924722909927368, + "step": 8266 + }, + { + "epoch": 1.906156329259857, + "grad_norm": 1.525292247487046, + "learning_rate": 1.2074238465998532e-08, + "loss": 0.3824247121810913, + "step": 8267 + }, + { + "epoch": 1.9063869033894396, + "grad_norm": 1.821466956277618, + "learning_rate": 1.2015242222319422e-08, + "loss": 0.47094473242759705, + "step": 8268 + }, + { + "epoch": 1.9066174775190223, + "grad_norm": 1.7313158547849, + "learning_rate": 1.1956389592237881e-08, + "loss": 0.5653735399246216, + "step": 8269 + }, + { + "epoch": 1.906848051648605, + "grad_norm": 1.7620428814203788, + "learning_rate": 1.1897680584308512e-08, + "loss": 0.4763476848602295, + "step": 8270 + }, + { + "epoch": 1.9070786257781878, + "grad_norm": 1.5194232107831984, + "learning_rate": 1.1839115207065487e-08, + "loss": 0.3845449686050415, + "step": 8271 + }, + { + "epoch": 1.9073091999077705, + "grad_norm": 1.5881713237890829, + "learning_rate": 1.1780693469021775e-08, + "loss": 0.43071988224983215, + "step": 8272 + }, + { + "epoch": 1.907539774037353, + "grad_norm": 1.4466344827167648, + "learning_rate": 1.172241537866947e-08, + "loss": 0.43860751390457153, + "step": 8273 + }, + { + "epoch": 1.9077703481669357, + "grad_norm": 1.7623171007667486, + "learning_rate": 1.1664280944480132e-08, + "loss": 0.5077678561210632, + "step": 8274 + }, + { + "epoch": 1.9080009222965182, + "grad_norm": 1.4297374268054954, + "learning_rate": 1.1606290174903888e-08, + "loss": 0.3832993805408478, + "step": 8275 + }, + { + "epoch": 1.908231496426101, + "grad_norm": 1.629527864713481, + "learning_rate": 1.1548443078370551e-08, + "loss": 0.48003530502319336, + "step": 8276 + }, + { + "epoch": 1.9084620705556836, + "grad_norm": 1.5503547776003848, + "learning_rate": 1.1490739663288618e-08, + "loss": 0.6109439134597778, + "step": 8277 + }, + { + "epoch": 1.9086926446852663, + "grad_norm": 1.9064677948637023, + "learning_rate": 1.1433179938045823e-08, + "loss": 0.4559859037399292, + "step": 8278 + }, + { + "epoch": 1.908923218814849, + "grad_norm": 1.4670877218502, + "learning_rate": 1.137576391100925e-08, + "loss": 0.3935600221157074, + "step": 8279 + }, + { + "epoch": 1.9091537929444318, + "grad_norm": 1.6460426557554972, + "learning_rate": 1.1318491590524782e-08, + "loss": 0.44477611780166626, + "step": 8280 + }, + { + "epoch": 1.9093843670740143, + "grad_norm": 1.652813391764361, + "learning_rate": 1.1261362984917533e-08, + "loss": 0.47065627574920654, + "step": 8281 + }, + { + "epoch": 1.909614941203597, + "grad_norm": 1.567401132156008, + "learning_rate": 1.1204378102491862e-08, + "loss": 0.44851434230804443, + "step": 8282 + }, + { + "epoch": 1.9098455153331795, + "grad_norm": 1.6119259284309502, + "learning_rate": 1.1147536951530923e-08, + "loss": 0.38606488704681396, + "step": 8283 + }, + { + "epoch": 1.9100760894627622, + "grad_norm": 1.7145601291142103, + "learning_rate": 1.1090839540297103e-08, + "loss": 0.5400182008743286, + "step": 8284 + }, + { + "epoch": 1.910306663592345, + "grad_norm": 1.5193110263706777, + "learning_rate": 1.1034285877032146e-08, + "loss": 0.4225059449672699, + "step": 8285 + }, + { + "epoch": 1.9105372377219276, + "grad_norm": 1.8787563951518915, + "learning_rate": 1.0977875969956584e-08, + "loss": 0.5111556649208069, + "step": 8286 + }, + { + "epoch": 1.9107678118515103, + "grad_norm": 1.583999151547768, + "learning_rate": 1.0921609827270196e-08, + "loss": 0.40596213936805725, + "step": 8287 + }, + { + "epoch": 1.910998385981093, + "grad_norm": 1.619272502884341, + "learning_rate": 1.0865487457151768e-08, + "loss": 0.47917360067367554, + "step": 8288 + }, + { + "epoch": 1.9112289601106756, + "grad_norm": 1.8556422558472565, + "learning_rate": 1.0809508867759331e-08, + "loss": 0.45154574513435364, + "step": 8289 + }, + { + "epoch": 1.9114595342402583, + "grad_norm": 1.7391028962680364, + "learning_rate": 1.0753674067229935e-08, + "loss": 0.5024373531341553, + "step": 8290 + }, + { + "epoch": 1.9116901083698408, + "grad_norm": 1.6003253992080113, + "learning_rate": 1.069798306367975e-08, + "loss": 0.5084686875343323, + "step": 8291 + }, + { + "epoch": 1.9119206824994235, + "grad_norm": 1.5906220140950642, + "learning_rate": 1.064243586520408e-08, + "loss": 0.3947920501232147, + "step": 8292 + }, + { + "epoch": 1.9121512566290062, + "grad_norm": 1.5037329879323602, + "learning_rate": 1.0587032479877023e-08, + "loss": 0.5011960864067078, + "step": 8293 + }, + { + "epoch": 1.912381830758589, + "grad_norm": 1.6116996984750152, + "learning_rate": 1.0531772915752247e-08, + "loss": 0.43622612953186035, + "step": 8294 + }, + { + "epoch": 1.9126124048881716, + "grad_norm": 1.664400790122745, + "learning_rate": 1.0476657180862325e-08, + "loss": 0.380764365196228, + "step": 8295 + }, + { + "epoch": 1.9128429790177544, + "grad_norm": 1.59176785573853, + "learning_rate": 1.042168528321874e-08, + "loss": 0.4183109700679779, + "step": 8296 + }, + { + "epoch": 1.9130735531473368, + "grad_norm": 1.7993335153125511, + "learning_rate": 1.036685723081221e-08, + "loss": 0.4221222698688507, + "step": 8297 + }, + { + "epoch": 1.9133041272769196, + "grad_norm": 1.7816315005923467, + "learning_rate": 1.0312173031612804e-08, + "loss": 0.543656051158905, + "step": 8298 + }, + { + "epoch": 1.913534701406502, + "grad_norm": 1.5681621709441897, + "learning_rate": 1.0257632693569052e-08, + "loss": 0.48872441053390503, + "step": 8299 + }, + { + "epoch": 1.9137652755360848, + "grad_norm": 1.5640812032082956, + "learning_rate": 1.0203236224609169e-08, + "loss": 0.5447995662689209, + "step": 8300 + }, + { + "epoch": 1.9139958496656675, + "grad_norm": 1.4954141524676323, + "learning_rate": 1.0148983632640162e-08, + "loss": 0.39448055624961853, + "step": 8301 + }, + { + "epoch": 1.9142264237952502, + "grad_norm": 1.755968676337724, + "learning_rate": 1.009487492554828e-08, + "loss": 0.44735193252563477, + "step": 8302 + }, + { + "epoch": 1.914456997924833, + "grad_norm": 1.6151813931913763, + "learning_rate": 1.0040910111198786e-08, + "loss": 0.4747859537601471, + "step": 8303 + }, + { + "epoch": 1.9146875720544156, + "grad_norm": 1.6130507888649155, + "learning_rate": 9.987089197435739e-09, + "loss": 0.5120220184326172, + "step": 8304 + }, + { + "epoch": 1.9149181461839981, + "grad_norm": 1.6267491510418168, + "learning_rate": 9.933412192082991e-09, + "loss": 0.3889455795288086, + "step": 8305 + }, + { + "epoch": 1.9151487203135809, + "grad_norm": 1.497355606160038, + "learning_rate": 9.879879102942635e-09, + "loss": 0.36584073305130005, + "step": 8306 + }, + { + "epoch": 1.9153792944431633, + "grad_norm": 2.0010610263228643, + "learning_rate": 9.826489937796556e-09, + "loss": 0.6259280443191528, + "step": 8307 + }, + { + "epoch": 1.915609868572746, + "grad_norm": 1.780257440356438, + "learning_rate": 9.773244704405104e-09, + "loss": 0.45160970091819763, + "step": 8308 + }, + { + "epoch": 1.9158404427023288, + "grad_norm": 1.559258218463348, + "learning_rate": 9.720143410508309e-09, + "loss": 0.47028589248657227, + "step": 8309 + }, + { + "epoch": 1.9160710168319115, + "grad_norm": 1.7146410364961069, + "learning_rate": 9.667186063824773e-09, + "loss": 0.3850802183151245, + "step": 8310 + }, + { + "epoch": 1.9163015909614942, + "grad_norm": 1.69252010891113, + "learning_rate": 9.614372672052451e-09, + "loss": 0.4134417772293091, + "step": 8311 + }, + { + "epoch": 1.916532165091077, + "grad_norm": 1.4197660481073355, + "learning_rate": 9.561703242868425e-09, + "loss": 0.5340328216552734, + "step": 8312 + }, + { + "epoch": 1.9167627392206594, + "grad_norm": 1.5089395557239718, + "learning_rate": 9.509177783928569e-09, + "loss": 0.4580942392349243, + "step": 8313 + }, + { + "epoch": 1.9169933133502421, + "grad_norm": 1.559427035261756, + "learning_rate": 9.45679630286811e-09, + "loss": 0.4227365553379059, + "step": 8314 + }, + { + "epoch": 1.9172238874798246, + "grad_norm": 1.462151537342571, + "learning_rate": 9.404558807301065e-09, + "loss": 0.42711400985717773, + "step": 8315 + }, + { + "epoch": 1.9174544616094074, + "grad_norm": 1.6466969798320865, + "learning_rate": 9.352465304820811e-09, + "loss": 0.41088467836380005, + "step": 8316 + }, + { + "epoch": 1.91768503573899, + "grad_norm": 1.7161905508950221, + "learning_rate": 9.30051580299962e-09, + "loss": 0.4669058918952942, + "step": 8317 + }, + { + "epoch": 1.9179156098685728, + "grad_norm": 1.8956617878589224, + "learning_rate": 9.248710309388896e-09, + "loss": 0.34129124879837036, + "step": 8318 + }, + { + "epoch": 1.9181461839981555, + "grad_norm": 1.6346151888813216, + "learning_rate": 9.19704883151906e-09, + "loss": 0.5538367033004761, + "step": 8319 + }, + { + "epoch": 1.9183767581277382, + "grad_norm": 1.8993289351204807, + "learning_rate": 9.145531376899773e-09, + "loss": 0.4591939151287079, + "step": 8320 + }, + { + "epoch": 1.9186073322573207, + "grad_norm": 1.531598340011727, + "learning_rate": 9.094157953019376e-09, + "loss": 0.38709723949432373, + "step": 8321 + }, + { + "epoch": 1.9188379063869034, + "grad_norm": 1.7947823187484588, + "learning_rate": 9.042928567345787e-09, + "loss": 0.503919780254364, + "step": 8322 + }, + { + "epoch": 1.919068480516486, + "grad_norm": 1.6367087262197295, + "learning_rate": 8.991843227325491e-09, + "loss": 0.510110080242157, + "step": 8323 + }, + { + "epoch": 1.9192990546460686, + "grad_norm": 1.6066272425773898, + "learning_rate": 8.940901940384437e-09, + "loss": 0.5100687146186829, + "step": 8324 + }, + { + "epoch": 1.9195296287756514, + "grad_norm": 1.513750458500578, + "learning_rate": 8.89010471392726e-09, + "loss": 0.44701308012008667, + "step": 8325 + }, + { + "epoch": 1.919760202905234, + "grad_norm": 1.563320875474341, + "learning_rate": 8.83945155533794e-09, + "loss": 0.4657078981399536, + "step": 8326 + }, + { + "epoch": 1.9199907770348168, + "grad_norm": 1.9297827676028427, + "learning_rate": 8.788942471979588e-09, + "loss": 0.510329008102417, + "step": 8327 + }, + { + "epoch": 1.9202213511643995, + "grad_norm": 1.471307451139604, + "learning_rate": 8.738577471193997e-09, + "loss": 0.5373008847236633, + "step": 8328 + }, + { + "epoch": 1.920451925293982, + "grad_norm": 1.9012550118721963, + "learning_rate": 8.688356560302313e-09, + "loss": 0.46517014503479004, + "step": 8329 + }, + { + "epoch": 1.9206824994235647, + "grad_norm": 1.6705233787528915, + "learning_rate": 8.638279746604582e-09, + "loss": 0.3993692398071289, + "step": 8330 + }, + { + "epoch": 1.9209130735531472, + "grad_norm": 1.366585505535673, + "learning_rate": 8.588347037380095e-09, + "loss": 0.42480504512786865, + "step": 8331 + }, + { + "epoch": 1.92114364768273, + "grad_norm": 1.7413386006663227, + "learning_rate": 8.538558439887044e-09, + "loss": 0.44433218240737915, + "step": 8332 + }, + { + "epoch": 1.9213742218123127, + "grad_norm": 1.59463524320548, + "learning_rate": 8.488913961362643e-09, + "loss": 0.4645090103149414, + "step": 8333 + }, + { + "epoch": 1.9216047959418954, + "grad_norm": 1.7690127959905497, + "learning_rate": 8.439413609023227e-09, + "loss": 0.47265806794166565, + "step": 8334 + }, + { + "epoch": 1.921835370071478, + "grad_norm": 1.6930025984848287, + "learning_rate": 8.390057390064265e-09, + "loss": 0.46389561891555786, + "step": 8335 + }, + { + "epoch": 1.9220659442010608, + "grad_norm": 1.8286869444988214, + "learning_rate": 8.340845311660127e-09, + "loss": 0.45355337858200073, + "step": 8336 + }, + { + "epoch": 1.9222965183306433, + "grad_norm": 1.6861508362464954, + "learning_rate": 8.291777380964315e-09, + "loss": 0.47136229276657104, + "step": 8337 + }, + { + "epoch": 1.922527092460226, + "grad_norm": 1.7162470073135112, + "learning_rate": 8.242853605109234e-09, + "loss": 0.4914461374282837, + "step": 8338 + }, + { + "epoch": 1.9227576665898085, + "grad_norm": 1.5896610300054894, + "learning_rate": 8.194073991206641e-09, + "loss": 0.48298412561416626, + "step": 8339 + }, + { + "epoch": 1.9229882407193912, + "grad_norm": 1.591559243664797, + "learning_rate": 8.145438546346971e-09, + "loss": 0.5316052436828613, + "step": 8340 + }, + { + "epoch": 1.923218814848974, + "grad_norm": 1.530763445371585, + "learning_rate": 8.09694727760002e-09, + "loss": 0.45742303133010864, + "step": 8341 + }, + { + "epoch": 1.9234493889785567, + "grad_norm": 1.800664891434664, + "learning_rate": 8.048600192014365e-09, + "loss": 0.41579365730285645, + "step": 8342 + }, + { + "epoch": 1.9236799631081394, + "grad_norm": 1.4284255731817002, + "learning_rate": 8.000397296617834e-09, + "loss": 0.37775835394859314, + "step": 8343 + }, + { + "epoch": 1.9239105372377219, + "grad_norm": 1.7051685129810905, + "learning_rate": 7.95233859841704e-09, + "loss": 0.4720783531665802, + "step": 8344 + }, + { + "epoch": 1.9241411113673046, + "grad_norm": 1.608380789109436, + "learning_rate": 7.904424104398067e-09, + "loss": 0.5015095472335815, + "step": 8345 + }, + { + "epoch": 1.924371685496887, + "grad_norm": 1.5886093342032406, + "learning_rate": 7.856653821525672e-09, + "loss": 0.6053783893585205, + "step": 8346 + }, + { + "epoch": 1.9246022596264698, + "grad_norm": 1.71106607476921, + "learning_rate": 7.809027756743635e-09, + "loss": 0.47775521874427795, + "step": 8347 + }, + { + "epoch": 1.9248328337560525, + "grad_norm": 1.559597916397487, + "learning_rate": 7.761545916974976e-09, + "loss": 0.36487245559692383, + "step": 8348 + }, + { + "epoch": 1.9250634078856352, + "grad_norm": 1.6596969619350017, + "learning_rate": 7.714208309121617e-09, + "loss": 0.48085975646972656, + "step": 8349 + }, + { + "epoch": 1.925293982015218, + "grad_norm": 1.6156245324091865, + "learning_rate": 7.667014940064609e-09, + "loss": 0.48800790309906006, + "step": 8350 + }, + { + "epoch": 1.9255245561448007, + "grad_norm": 1.654653168113963, + "learning_rate": 7.61996581666402e-09, + "loss": 0.5294181704521179, + "step": 8351 + }, + { + "epoch": 1.9257551302743832, + "grad_norm": 1.4725020612800932, + "learning_rate": 7.573060945758936e-09, + "loss": 0.44024503231048584, + "step": 8352 + }, + { + "epoch": 1.9259857044039659, + "grad_norm": 1.8377372608503795, + "learning_rate": 7.526300334167235e-09, + "loss": 0.4359186887741089, + "step": 8353 + }, + { + "epoch": 1.9262162785335484, + "grad_norm": 1.6594669465231893, + "learning_rate": 7.479683988686259e-09, + "loss": 0.4803895652294159, + "step": 8354 + }, + { + "epoch": 1.926446852663131, + "grad_norm": 1.5824042504509404, + "learning_rate": 7.433211916092141e-09, + "loss": 0.43153274059295654, + "step": 8355 + }, + { + "epoch": 1.9266774267927138, + "grad_norm": 1.812737055881384, + "learning_rate": 7.386884123140036e-09, + "loss": 0.38263070583343506, + "step": 8356 + }, + { + "epoch": 1.9269080009222965, + "grad_norm": 1.42789662226475, + "learning_rate": 7.340700616564e-09, + "loss": 0.42121192812919617, + "step": 8357 + }, + { + "epoch": 1.9271385750518792, + "grad_norm": 1.6902764865159838, + "learning_rate": 7.294661403077662e-09, + "loss": 0.46008965373039246, + "step": 8358 + }, + { + "epoch": 1.927369149181462, + "grad_norm": 1.5923895901686829, + "learning_rate": 7.248766489372893e-09, + "loss": 0.48495203256607056, + "step": 8359 + }, + { + "epoch": 1.9275997233110445, + "grad_norm": 1.6833123633851883, + "learning_rate": 7.203015882121244e-09, + "loss": 0.5004169940948486, + "step": 8360 + }, + { + "epoch": 1.9278302974406272, + "grad_norm": 1.4732497687996942, + "learning_rate": 7.15740958797284e-09, + "loss": 0.5660319328308105, + "step": 8361 + }, + { + "epoch": 1.9280608715702097, + "grad_norm": 1.588922332622674, + "learning_rate": 7.111947613557268e-09, + "loss": 0.43854010105133057, + "step": 8362 + }, + { + "epoch": 1.9282914456997924, + "grad_norm": 2.093362311602714, + "learning_rate": 7.066629965482574e-09, + "loss": 0.44730937480926514, + "step": 8363 + }, + { + "epoch": 1.928522019829375, + "grad_norm": 1.6568658526601971, + "learning_rate": 7.021456650336377e-09, + "loss": 0.45642590522766113, + "step": 8364 + }, + { + "epoch": 1.9287525939589578, + "grad_norm": 1.9173353497487595, + "learning_rate": 6.976427674684871e-09, + "loss": 0.5613523721694946, + "step": 8365 + }, + { + "epoch": 1.9289831680885405, + "grad_norm": 1.7976713831697748, + "learning_rate": 6.931543045073706e-09, + "loss": 0.4231454133987427, + "step": 8366 + }, + { + "epoch": 1.9292137422181233, + "grad_norm": 1.9184335289270926, + "learning_rate": 6.886802768027223e-09, + "loss": 0.464144766330719, + "step": 8367 + }, + { + "epoch": 1.9294443163477057, + "grad_norm": 1.6282751196601715, + "learning_rate": 6.8422068500487705e-09, + "loss": 0.4303344488143921, + "step": 8368 + }, + { + "epoch": 1.9296748904772885, + "grad_norm": 1.5717538042291814, + "learning_rate": 6.797755297620944e-09, + "loss": 0.4333549737930298, + "step": 8369 + }, + { + "epoch": 1.929905464606871, + "grad_norm": 1.5673646456508366, + "learning_rate": 6.753448117205241e-09, + "loss": 0.4656146466732025, + "step": 8370 + }, + { + "epoch": 1.9301360387364537, + "grad_norm": 2.0556236314521077, + "learning_rate": 6.709285315242063e-09, + "loss": 0.3823866844177246, + "step": 8371 + }, + { + "epoch": 1.9303666128660364, + "grad_norm": 1.5412445917312292, + "learning_rate": 6.665266898150946e-09, + "loss": 0.4552363157272339, + "step": 8372 + }, + { + "epoch": 1.930597186995619, + "grad_norm": 1.5304233694461045, + "learning_rate": 6.6213928723304335e-09, + "loss": 0.48757460713386536, + "step": 8373 + }, + { + "epoch": 1.9308277611252018, + "grad_norm": 1.0877844091844102, + "learning_rate": 6.577663244158094e-09, + "loss": 0.3263235092163086, + "step": 8374 + }, + { + "epoch": 1.9310583352547845, + "grad_norm": 1.6065207890727204, + "learning_rate": 6.534078019990397e-09, + "loss": 0.510450541973114, + "step": 8375 + }, + { + "epoch": 1.931288909384367, + "grad_norm": 1.4737968731950963, + "learning_rate": 6.490637206162941e-09, + "loss": 0.37407904863357544, + "step": 8376 + }, + { + "epoch": 1.9315194835139498, + "grad_norm": 1.5691906942234775, + "learning_rate": 6.4473408089902315e-09, + "loss": 0.4216376543045044, + "step": 8377 + }, + { + "epoch": 1.9317500576435322, + "grad_norm": 1.647678033925203, + "learning_rate": 6.404188834766011e-09, + "loss": 0.41611379384994507, + "step": 8378 + }, + { + "epoch": 1.931980631773115, + "grad_norm": 1.6406917387427478, + "learning_rate": 6.361181289762596e-09, + "loss": 0.5301774740219116, + "step": 8379 + }, + { + "epoch": 1.9322112059026977, + "grad_norm": 1.457780743812755, + "learning_rate": 6.3183181802317635e-09, + "loss": 0.43767407536506653, + "step": 8380 + }, + { + "epoch": 1.9324417800322804, + "grad_norm": 1.5497586314138279, + "learning_rate": 6.275599512404084e-09, + "loss": 0.417082279920578, + "step": 8381 + }, + { + "epoch": 1.9326723541618631, + "grad_norm": 1.646560289289956, + "learning_rate": 6.233025292489147e-09, + "loss": 0.41670864820480347, + "step": 8382 + }, + { + "epoch": 1.9329029282914458, + "grad_norm": 1.4085441335066406, + "learning_rate": 6.190595526675446e-09, + "loss": 0.48778587579727173, + "step": 8383 + }, + { + "epoch": 1.9331335024210283, + "grad_norm": 1.39299487584749, + "learning_rate": 6.148310221130604e-09, + "loss": 0.44433802366256714, + "step": 8384 + }, + { + "epoch": 1.933364076550611, + "grad_norm": 1.7057166388160585, + "learning_rate": 6.106169382001369e-09, + "loss": 0.46826764941215515, + "step": 8385 + }, + { + "epoch": 1.9335946506801935, + "grad_norm": 1.6832081073908207, + "learning_rate": 6.064173015413177e-09, + "loss": 0.5509334802627563, + "step": 8386 + }, + { + "epoch": 1.9338252248097763, + "grad_norm": 1.4200036599053338, + "learning_rate": 6.022321127470698e-09, + "loss": 0.4436245560646057, + "step": 8387 + }, + { + "epoch": 1.934055798939359, + "grad_norm": 1.4658061886752614, + "learning_rate": 5.9806137242574e-09, + "loss": 0.3577145040035248, + "step": 8388 + }, + { + "epoch": 1.9342863730689417, + "grad_norm": 1.3485508447539643, + "learning_rate": 5.939050811835988e-09, + "loss": 0.39893999695777893, + "step": 8389 + }, + { + "epoch": 1.9345169471985244, + "grad_norm": 1.4373848732418595, + "learning_rate": 5.897632396248075e-09, + "loss": 0.4109868109226227, + "step": 8390 + }, + { + "epoch": 1.9347475213281071, + "grad_norm": 1.6148537069486861, + "learning_rate": 5.85635848351429e-09, + "loss": 0.4193134307861328, + "step": 8391 + }, + { + "epoch": 1.9349780954576896, + "grad_norm": 1.774944389887914, + "learning_rate": 5.8152290796340545e-09, + "loss": 0.44189178943634033, + "step": 8392 + }, + { + "epoch": 1.9352086695872723, + "grad_norm": 1.7653802191556502, + "learning_rate": 5.774244190586141e-09, + "loss": 0.5014302730560303, + "step": 8393 + }, + { + "epoch": 1.9354392437168548, + "grad_norm": 1.5565367331009852, + "learning_rate": 5.733403822328009e-09, + "loss": 0.4962024688720703, + "step": 8394 + }, + { + "epoch": 1.9356698178464375, + "grad_norm": 1.585877874844532, + "learning_rate": 5.69270798079613e-09, + "loss": 0.45495474338531494, + "step": 8395 + }, + { + "epoch": 1.9359003919760203, + "grad_norm": 1.4665884192601668, + "learning_rate": 5.652156671906105e-09, + "loss": 0.49062758684158325, + "step": 8396 + }, + { + "epoch": 1.936130966105603, + "grad_norm": 1.6573434385643893, + "learning_rate": 5.611749901552554e-09, + "loss": 0.45899879932403564, + "step": 8397 + }, + { + "epoch": 1.9363615402351857, + "grad_norm": 1.511951038657192, + "learning_rate": 5.57148767560911e-09, + "loss": 0.47287002205848694, + "step": 8398 + }, + { + "epoch": 1.9365921143647684, + "grad_norm": 1.5970704539129832, + "learning_rate": 5.531369999927982e-09, + "loss": 0.439136266708374, + "step": 8399 + }, + { + "epoch": 1.936822688494351, + "grad_norm": 1.2795152915391526, + "learning_rate": 5.4913968803410594e-09, + "loss": 0.3920954465866089, + "step": 8400 + }, + { + "epoch": 1.9370532626239336, + "grad_norm": 1.254790295470771, + "learning_rate": 5.451568322658473e-09, + "loss": 0.4608895480632782, + "step": 8401 + }, + { + "epoch": 1.9372838367535161, + "grad_norm": 1.4389672316514175, + "learning_rate": 5.4118843326699246e-09, + "loss": 0.4617875814437866, + "step": 8402 + }, + { + "epoch": 1.9375144108830988, + "grad_norm": 1.8398027260263112, + "learning_rate": 5.372344916143912e-09, + "loss": 0.5293254852294922, + "step": 8403 + }, + { + "epoch": 1.9377449850126816, + "grad_norm": 1.2603762011573385, + "learning_rate": 5.332950078827725e-09, + "loss": 0.3935343623161316, + "step": 8404 + }, + { + "epoch": 1.9379755591422643, + "grad_norm": 1.3159194137267558, + "learning_rate": 5.293699826447895e-09, + "loss": 0.4612414240837097, + "step": 8405 + }, + { + "epoch": 1.938206133271847, + "grad_norm": 1.5616222982589931, + "learning_rate": 5.254594164709858e-09, + "loss": 0.4779428243637085, + "step": 8406 + }, + { + "epoch": 1.9384367074014297, + "grad_norm": 1.3393838173044101, + "learning_rate": 5.215633099298067e-09, + "loss": 0.37436819076538086, + "step": 8407 + }, + { + "epoch": 1.9386672815310122, + "grad_norm": 1.5367283978531407, + "learning_rate": 5.1768166358757695e-09, + "loss": 0.458698570728302, + "step": 8408 + }, + { + "epoch": 1.938897855660595, + "grad_norm": 1.52395102556278, + "learning_rate": 5.1381447800854515e-09, + "loss": 0.39365172386169434, + "step": 8409 + }, + { + "epoch": 1.9391284297901774, + "grad_norm": 1.6915141620999796, + "learning_rate": 5.099617537548284e-09, + "loss": 0.46358722448349, + "step": 8410 + }, + { + "epoch": 1.9393590039197601, + "grad_norm": 1.4920931037664487, + "learning_rate": 5.061234913864898e-09, + "loss": 0.4286697506904602, + "step": 8411 + }, + { + "epoch": 1.9395895780493428, + "grad_norm": 1.2865245997479036, + "learning_rate": 5.022996914614275e-09, + "loss": 0.4925898015499115, + "step": 8412 + }, + { + "epoch": 1.9398201521789256, + "grad_norm": 1.5226712255874009, + "learning_rate": 4.984903545354857e-09, + "loss": 0.46924275159835815, + "step": 8413 + }, + { + "epoch": 1.9400507263085083, + "grad_norm": 1.5857623247989538, + "learning_rate": 4.946954811623994e-09, + "loss": 0.5326268672943115, + "step": 8414 + }, + { + "epoch": 1.940281300438091, + "grad_norm": 1.5901041586459477, + "learning_rate": 4.909150718937716e-09, + "loss": 0.4367690682411194, + "step": 8415 + }, + { + "epoch": 1.9405118745676735, + "grad_norm": 1.5390541996103484, + "learning_rate": 4.8714912727914055e-09, + "loss": 0.45579224824905396, + "step": 8416 + }, + { + "epoch": 1.9407424486972562, + "grad_norm": 1.5246826105956603, + "learning_rate": 4.8339764786590186e-09, + "loss": 0.4420431852340698, + "step": 8417 + }, + { + "epoch": 1.9409730228268387, + "grad_norm": 1.7713819487127218, + "learning_rate": 4.79660634199397e-09, + "loss": 0.4175274670124054, + "step": 8418 + }, + { + "epoch": 1.9412035969564214, + "grad_norm": 1.4046803968065067, + "learning_rate": 4.759380868228246e-09, + "loss": 0.41451364755630493, + "step": 8419 + }, + { + "epoch": 1.9414341710860041, + "grad_norm": 1.5394804899846177, + "learning_rate": 4.722300062772966e-09, + "loss": 0.4211805462837219, + "step": 8420 + }, + { + "epoch": 1.9416647452155869, + "grad_norm": 1.5805052208208792, + "learning_rate": 4.68536393101826e-09, + "loss": 0.4458296000957489, + "step": 8421 + }, + { + "epoch": 1.9418953193451696, + "grad_norm": 1.8263114249420374, + "learning_rate": 4.648572478333057e-09, + "loss": 0.6226488351821899, + "step": 8422 + }, + { + "epoch": 1.9421258934747523, + "grad_norm": 1.467298573422793, + "learning_rate": 4.611925710065523e-09, + "loss": 0.343037486076355, + "step": 8423 + }, + { + "epoch": 1.9423564676043348, + "grad_norm": 1.4279799784372957, + "learning_rate": 4.575423631542397e-09, + "loss": 0.42478299140930176, + "step": 8424 + }, + { + "epoch": 1.9425870417339175, + "grad_norm": 1.4809253602160373, + "learning_rate": 4.539066248069878e-09, + "loss": 0.4467424750328064, + "step": 8425 + }, + { + "epoch": 1.9428176158635, + "grad_norm": 1.5230213064501263, + "learning_rate": 4.50285356493274e-09, + "loss": 0.4598960876464844, + "step": 8426 + }, + { + "epoch": 1.9430481899930827, + "grad_norm": 1.767389183054306, + "learning_rate": 4.466785587394883e-09, + "loss": 0.43005913496017456, + "step": 8427 + }, + { + "epoch": 1.9432787641226654, + "grad_norm": 1.6819998310369073, + "learning_rate": 4.430862320699114e-09, + "loss": 0.4259253740310669, + "step": 8428 + }, + { + "epoch": 1.9435093382522481, + "grad_norm": 1.4809575809160866, + "learning_rate": 4.395083770067476e-09, + "loss": 0.4275285601615906, + "step": 8429 + }, + { + "epoch": 1.9437399123818309, + "grad_norm": 1.5009509074634573, + "learning_rate": 4.3594499407003656e-09, + "loss": 0.42151302099227905, + "step": 8430 + }, + { + "epoch": 1.9439704865114136, + "grad_norm": 1.2121055184272223, + "learning_rate": 4.3239608377778625e-09, + "loss": 0.41727957129478455, + "step": 8431 + }, + { + "epoch": 1.944201060640996, + "grad_norm": 1.6993320655678226, + "learning_rate": 4.288616466458395e-09, + "loss": 0.5026905536651611, + "step": 8432 + }, + { + "epoch": 1.9444316347705788, + "grad_norm": 1.7732059667125062, + "learning_rate": 4.2534168318798524e-09, + "loss": 0.5170408487319946, + "step": 8433 + }, + { + "epoch": 1.9446622089001613, + "grad_norm": 1.4027101607713113, + "learning_rate": 4.21836193915881e-09, + "loss": 0.3918447196483612, + "step": 8434 + }, + { + "epoch": 1.944892783029744, + "grad_norm": 1.6652823795220828, + "learning_rate": 4.183451793390747e-09, + "loss": 0.49871906638145447, + "step": 8435 + }, + { + "epoch": 1.9451233571593267, + "grad_norm": 1.4696705484226025, + "learning_rate": 4.1486863996502694e-09, + "loss": 0.43729400634765625, + "step": 8436 + }, + { + "epoch": 1.9453539312889094, + "grad_norm": 1.6971586346839116, + "learning_rate": 4.114065762990781e-09, + "loss": 0.49198442697525024, + "step": 8437 + }, + { + "epoch": 1.9455845054184921, + "grad_norm": 1.7555960999646751, + "learning_rate": 4.079589888444923e-09, + "loss": 0.48610788583755493, + "step": 8438 + }, + { + "epoch": 1.9458150795480749, + "grad_norm": 1.4385738810997333, + "learning_rate": 4.045258781024019e-09, + "loss": 0.43962734937667847, + "step": 8439 + }, + { + "epoch": 1.9460456536776574, + "grad_norm": 1.5800303425440292, + "learning_rate": 4.011072445718522e-09, + "loss": 0.3320704400539398, + "step": 8440 + }, + { + "epoch": 1.94627622780724, + "grad_norm": 1.6634559640737916, + "learning_rate": 3.977030887497568e-09, + "loss": 0.4773918092250824, + "step": 8441 + }, + { + "epoch": 1.9465068019368226, + "grad_norm": 1.6386159776295786, + "learning_rate": 3.9431341113096425e-09, + "loss": 0.424363911151886, + "step": 8442 + }, + { + "epoch": 1.9467373760664053, + "grad_norm": 1.9939094308024221, + "learning_rate": 3.9093821220818055e-09, + "loss": 0.5321601033210754, + "step": 8443 + }, + { + "epoch": 1.946967950195988, + "grad_norm": 1.7091737329216896, + "learning_rate": 3.875774924720465e-09, + "loss": 0.48579344153404236, + "step": 8444 + }, + { + "epoch": 1.9471985243255707, + "grad_norm": 1.4617398717494952, + "learning_rate": 3.842312524110603e-09, + "loss": 0.39313316345214844, + "step": 8445 + }, + { + "epoch": 1.9474290984551534, + "grad_norm": 1.6233833617742501, + "learning_rate": 3.8089949251163264e-09, + "loss": 0.522427499294281, + "step": 8446 + }, + { + "epoch": 1.9476596725847362, + "grad_norm": 1.601217744469266, + "learning_rate": 3.775822132580875e-09, + "loss": 0.3822653889656067, + "step": 8447 + }, + { + "epoch": 1.9478902467143187, + "grad_norm": 1.5787465509087006, + "learning_rate": 3.7427941513259454e-09, + "loss": 0.4322483241558075, + "step": 8448 + }, + { + "epoch": 1.9481208208439014, + "grad_norm": 1.6934897718136162, + "learning_rate": 3.7099109861528087e-09, + "loss": 0.4862939715385437, + "step": 8449 + }, + { + "epoch": 1.9483513949734839, + "grad_norm": 1.5875963080752307, + "learning_rate": 3.6771726418410863e-09, + "loss": 0.45388323068618774, + "step": 8450 + }, + { + "epoch": 1.9485819691030666, + "grad_norm": 1.5187043160616758, + "learning_rate": 3.644579123149749e-09, + "loss": 0.3937215805053711, + "step": 8451 + }, + { + "epoch": 1.9488125432326493, + "grad_norm": 1.5446261991465484, + "learning_rate": 3.6121304348165628e-09, + "loss": 0.46887993812561035, + "step": 8452 + }, + { + "epoch": 1.949043117362232, + "grad_norm": 1.763834546986469, + "learning_rate": 3.5798265815584204e-09, + "loss": 0.4444226026535034, + "step": 8453 + }, + { + "epoch": 1.9492736914918147, + "grad_norm": 1.639572253352884, + "learning_rate": 3.5476675680709e-09, + "loss": 0.4938625991344452, + "step": 8454 + }, + { + "epoch": 1.9495042656213972, + "grad_norm": 1.456362188758518, + "learning_rate": 3.5156533990285953e-09, + "loss": 0.37632471323013306, + "step": 8455 + }, + { + "epoch": 1.94973483975098, + "grad_norm": 1.8608548289842328, + "learning_rate": 3.483784079085117e-09, + "loss": 0.4345025420188904, + "step": 8456 + }, + { + "epoch": 1.9499654138805624, + "grad_norm": 1.4598938490767328, + "learning_rate": 3.4520596128729818e-09, + "loss": 0.3721727132797241, + "step": 8457 + }, + { + "epoch": 1.9501959880101452, + "grad_norm": 1.6409042038383927, + "learning_rate": 3.4204800050037232e-09, + "loss": 0.4871670603752136, + "step": 8458 + }, + { + "epoch": 1.9504265621397279, + "grad_norm": 1.8307964169711943, + "learning_rate": 3.38904526006778e-09, + "loss": 0.578133225440979, + "step": 8459 + }, + { + "epoch": 1.9506571362693106, + "grad_norm": 1.5202457315236042, + "learning_rate": 3.357755382634386e-09, + "loss": 0.4721870422363281, + "step": 8460 + }, + { + "epoch": 1.9508877103988933, + "grad_norm": 1.798795599183991, + "learning_rate": 3.3266103772519037e-09, + "loss": 0.4569184184074402, + "step": 8461 + }, + { + "epoch": 1.951118284528476, + "grad_norm": 1.7311036262190431, + "learning_rate": 3.2956102484477112e-09, + "loss": 0.48763811588287354, + "step": 8462 + }, + { + "epoch": 1.9513488586580585, + "grad_norm": 1.5898725581558353, + "learning_rate": 3.264755000727759e-09, + "loss": 0.45957818627357483, + "step": 8463 + }, + { + "epoch": 1.9515794327876412, + "grad_norm": 1.661536076059429, + "learning_rate": 3.234044638577238e-09, + "loss": 0.49398598074913025, + "step": 8464 + }, + { + "epoch": 1.9518100069172237, + "grad_norm": 1.8367269278410805, + "learning_rate": 3.2034791664603544e-09, + "loss": 0.48884931206703186, + "step": 8465 + }, + { + "epoch": 1.9520405810468064, + "grad_norm": 1.4322798652039197, + "learning_rate": 3.173058588819999e-09, + "loss": 0.45171886682510376, + "step": 8466 + }, + { + "epoch": 1.9522711551763892, + "grad_norm": 1.7896431151356735, + "learning_rate": 3.142782910077968e-09, + "loss": 0.45110028982162476, + "step": 8467 + }, + { + "epoch": 1.9525017293059719, + "grad_norm": 1.6339596386172939, + "learning_rate": 3.1126521346354074e-09, + "loss": 0.4602523446083069, + "step": 8468 + }, + { + "epoch": 1.9527323034355546, + "grad_norm": 1.4993439724695443, + "learning_rate": 3.082666266872036e-09, + "loss": 0.3908727169036865, + "step": 8469 + }, + { + "epoch": 1.9529628775651373, + "grad_norm": 1.6588394319404383, + "learning_rate": 3.0528253111464786e-09, + "loss": 0.4886831045150757, + "step": 8470 + }, + { + "epoch": 1.9531934516947198, + "grad_norm": 1.8142188930520524, + "learning_rate": 3.023129271796598e-09, + "loss": 0.4407721161842346, + "step": 8471 + }, + { + "epoch": 1.9534240258243025, + "grad_norm": 1.545809203271424, + "learning_rate": 2.9935781531389425e-09, + "loss": 0.46958622336387634, + "step": 8472 + }, + { + "epoch": 1.953654599953885, + "grad_norm": 1.5632050072309709, + "learning_rate": 2.964171959469075e-09, + "loss": 0.4642796516418457, + "step": 8473 + }, + { + "epoch": 1.9538851740834677, + "grad_norm": 1.5522529280671595, + "learning_rate": 2.9349106950613545e-09, + "loss": 0.5124588012695312, + "step": 8474 + }, + { + "epoch": 1.9541157482130505, + "grad_norm": 1.7441462887025347, + "learning_rate": 2.9057943641693784e-09, + "loss": 0.516730546951294, + "step": 8475 + }, + { + "epoch": 1.9543463223426332, + "grad_norm": 1.6015713883307108, + "learning_rate": 2.876822971025428e-09, + "loss": 0.47847944498062134, + "step": 8476 + }, + { + "epoch": 1.9545768964722159, + "grad_norm": 1.9133896423438201, + "learning_rate": 2.8479965198408007e-09, + "loss": 0.5167095065116882, + "step": 8477 + }, + { + "epoch": 1.9548074706017986, + "grad_norm": 1.4489948600651796, + "learning_rate": 2.819315014805812e-09, + "loss": 0.40728163719177246, + "step": 8478 + }, + { + "epoch": 1.955038044731381, + "grad_norm": 1.4413821780207463, + "learning_rate": 2.790778460089349e-09, + "loss": 0.49741852283477783, + "step": 8479 + }, + { + "epoch": 1.9552686188609638, + "grad_norm": 1.3759130199865537, + "learning_rate": 2.7623868598397603e-09, + "loss": 0.33847475051879883, + "step": 8480 + }, + { + "epoch": 1.9554991929905463, + "grad_norm": 1.6995475203184411, + "learning_rate": 2.734140218183856e-09, + "loss": 0.39727652072906494, + "step": 8481 + }, + { + "epoch": 1.955729767120129, + "grad_norm": 1.7012108842781224, + "learning_rate": 2.706038539227795e-09, + "loss": 0.40332260727882385, + "step": 8482 + }, + { + "epoch": 1.9559603412497117, + "grad_norm": 1.3388931691886075, + "learning_rate": 2.6780818270562e-09, + "loss": 0.40296924114227295, + "step": 8483 + }, + { + "epoch": 1.9561909153792945, + "grad_norm": 1.4889010944404621, + "learning_rate": 2.650270085732931e-09, + "loss": 0.4253476858139038, + "step": 8484 + }, + { + "epoch": 1.9564214895088772, + "grad_norm": 1.5794301308382195, + "learning_rate": 2.6226033193007535e-09, + "loss": 0.448941171169281, + "step": 8485 + }, + { + "epoch": 1.95665206363846, + "grad_norm": 1.9411463996799059, + "learning_rate": 2.59508153178134e-09, + "loss": 0.48213180899620056, + "step": 8486 + }, + { + "epoch": 1.9568826377680424, + "grad_norm": 1.6243019689896288, + "learning_rate": 2.5677047271752683e-09, + "loss": 0.48886558413505554, + "step": 8487 + }, + { + "epoch": 1.957113211897625, + "grad_norm": 1.4212209484619593, + "learning_rate": 2.5404729094619103e-09, + "loss": 0.49786341190338135, + "step": 8488 + }, + { + "epoch": 1.9573437860272076, + "grad_norm": 2.1312601270605365, + "learning_rate": 2.5133860825997667e-09, + "loss": 0.4487866163253784, + "step": 8489 + }, + { + "epoch": 1.9575743601567903, + "grad_norm": 1.7672945087914924, + "learning_rate": 2.486444250526243e-09, + "loss": 0.46193206310272217, + "step": 8490 + }, + { + "epoch": 1.957804934286373, + "grad_norm": 1.5923899778865398, + "learning_rate": 2.459647417157429e-09, + "loss": 0.44729042053222656, + "step": 8491 + }, + { + "epoch": 1.9580355084159557, + "grad_norm": 1.8298057614969963, + "learning_rate": 2.432995586388764e-09, + "loss": 0.4646851718425751, + "step": 8492 + }, + { + "epoch": 1.9582660825455385, + "grad_norm": 1.6514495959092017, + "learning_rate": 2.40648876209415e-09, + "loss": 0.49538400769233704, + "step": 8493 + }, + { + "epoch": 1.9584966566751212, + "grad_norm": 1.7330889796307278, + "learning_rate": 2.3801269481267262e-09, + "loss": 0.5548783540725708, + "step": 8494 + }, + { + "epoch": 1.9587272308047037, + "grad_norm": 1.65108674708811, + "learning_rate": 2.3539101483184277e-09, + "loss": 0.4390280544757843, + "step": 8495 + }, + { + "epoch": 1.9589578049342864, + "grad_norm": 1.323831070791993, + "learning_rate": 2.327838366480095e-09, + "loss": 0.3079942464828491, + "step": 8496 + }, + { + "epoch": 1.959188379063869, + "grad_norm": 2.030408303723105, + "learning_rate": 2.301911606401585e-09, + "loss": 0.5199894309043884, + "step": 8497 + }, + { + "epoch": 1.9594189531934516, + "grad_norm": 1.6402740340647268, + "learning_rate": 2.276129871851662e-09, + "loss": 0.3403523564338684, + "step": 8498 + }, + { + "epoch": 1.9596495273230343, + "grad_norm": 1.785907762491574, + "learning_rate": 2.2504931665777714e-09, + "loss": 0.49699991941452026, + "step": 8499 + }, + { + "epoch": 1.959880101452617, + "grad_norm": 1.5969429106714301, + "learning_rate": 2.2250014943066e-09, + "loss": 0.4178547263145447, + "step": 8500 + }, + { + "epoch": 1.9601106755821998, + "grad_norm": 1.8924231136601524, + "learning_rate": 2.199654858743627e-09, + "loss": 0.5622760057449341, + "step": 8501 + }, + { + "epoch": 1.9603412497117825, + "grad_norm": 1.4610200259542554, + "learning_rate": 2.1744532635733505e-09, + "loss": 0.4072464406490326, + "step": 8502 + }, + { + "epoch": 1.960571823841365, + "grad_norm": 1.5401248564682235, + "learning_rate": 2.1493967124587287e-09, + "loss": 0.475033164024353, + "step": 8503 + }, + { + "epoch": 1.9608023979709477, + "grad_norm": 1.7291130993603476, + "learning_rate": 2.1244852090424035e-09, + "loss": 0.4734419584274292, + "step": 8504 + }, + { + "epoch": 1.9610329721005302, + "grad_norm": 1.7230208360471804, + "learning_rate": 2.099718756945257e-09, + "loss": 0.42523911595344543, + "step": 8505 + }, + { + "epoch": 1.961263546230113, + "grad_norm": 1.510126016418521, + "learning_rate": 2.075097359767297e-09, + "loss": 0.5085049867630005, + "step": 8506 + }, + { + "epoch": 1.9614941203596956, + "grad_norm": 1.6269226735706044, + "learning_rate": 2.0506210210877728e-09, + "loss": 0.5682120323181152, + "step": 8507 + }, + { + "epoch": 1.9617246944892783, + "grad_norm": 1.5852715445159862, + "learning_rate": 2.0262897444642823e-09, + "loss": 0.4550264775753021, + "step": 8508 + }, + { + "epoch": 1.961955268618861, + "grad_norm": 1.560540594785291, + "learning_rate": 2.0021035334337745e-09, + "loss": 0.43745940923690796, + "step": 8509 + }, + { + "epoch": 1.9621858427484438, + "grad_norm": 1.421824915655791, + "learning_rate": 1.9780623915118812e-09, + "loss": 0.4523237347602844, + "step": 8510 + }, + { + "epoch": 1.9624164168780263, + "grad_norm": 1.354930266701351, + "learning_rate": 1.9541663221933623e-09, + "loss": 0.43080687522888184, + "step": 8511 + }, + { + "epoch": 1.962646991007609, + "grad_norm": 1.6208010256189354, + "learning_rate": 1.930415328951551e-09, + "loss": 0.5265613794326782, + "step": 8512 + }, + { + "epoch": 1.9628775651371915, + "grad_norm": 1.6858160892782517, + "learning_rate": 1.906809415239019e-09, + "loss": 0.5482667684555054, + "step": 8513 + }, + { + "epoch": 1.9631081392667742, + "grad_norm": 1.8258400073226166, + "learning_rate": 1.8833485844871322e-09, + "loss": 0.43548330664634705, + "step": 8514 + }, + { + "epoch": 1.963338713396357, + "grad_norm": 1.4726232338870595, + "learning_rate": 1.8600328401061627e-09, + "loss": 0.45715010166168213, + "step": 8515 + }, + { + "epoch": 1.9635692875259396, + "grad_norm": 1.4143739917928304, + "learning_rate": 1.8368621854852884e-09, + "loss": 0.48137760162353516, + "step": 8516 + }, + { + "epoch": 1.9637998616555223, + "grad_norm": 1.5443669851131265, + "learning_rate": 1.8138366239924818e-09, + "loss": 0.4607926607131958, + "step": 8517 + }, + { + "epoch": 1.964030435785105, + "grad_norm": 1.2018843862548443, + "learning_rate": 1.7909561589749545e-09, + "loss": 0.3551321029663086, + "step": 8518 + }, + { + "epoch": 1.9642610099146876, + "grad_norm": 1.4318523604861806, + "learning_rate": 1.7682207937583792e-09, + "loss": 0.4075126647949219, + "step": 8519 + }, + { + "epoch": 1.9644915840442703, + "grad_norm": 1.5238435411050293, + "learning_rate": 1.7456305316477793e-09, + "loss": 0.4470815658569336, + "step": 8520 + }, + { + "epoch": 1.9647221581738528, + "grad_norm": 1.7248235582994178, + "learning_rate": 1.72318537592675e-09, + "loss": 0.5074938535690308, + "step": 8521 + }, + { + "epoch": 1.9649527323034355, + "grad_norm": 1.684987227657268, + "learning_rate": 1.700885329857904e-09, + "loss": 0.4799109697341919, + "step": 8522 + }, + { + "epoch": 1.9651833064330182, + "grad_norm": 1.6217891186344597, + "learning_rate": 1.6787303966828703e-09, + "loss": 0.5603263974189758, + "step": 8523 + }, + { + "epoch": 1.965413880562601, + "grad_norm": 1.386089333333111, + "learning_rate": 1.656720579622073e-09, + "loss": 0.45492851734161377, + "step": 8524 + }, + { + "epoch": 1.9656444546921836, + "grad_norm": 1.9563157820273458, + "learning_rate": 1.6348558818748414e-09, + "loss": 0.47700050473213196, + "step": 8525 + }, + { + "epoch": 1.9658750288217663, + "grad_norm": 1.7426284772598926, + "learning_rate": 1.6131363066194115e-09, + "loss": 0.5105462074279785, + "step": 8526 + }, + { + "epoch": 1.9661056029513488, + "grad_norm": 1.6514750536849407, + "learning_rate": 1.5915618570130351e-09, + "loss": 0.47818124294281006, + "step": 8527 + }, + { + "epoch": 1.9663361770809316, + "grad_norm": 1.7136861974622173, + "learning_rate": 1.5701325361916484e-09, + "loss": 0.4549172520637512, + "step": 8528 + }, + { + "epoch": 1.966566751210514, + "grad_norm": 1.7152545383952742, + "learning_rate": 1.5488483472703151e-09, + "loss": 0.406271755695343, + "step": 8529 + }, + { + "epoch": 1.9667973253400968, + "grad_norm": 1.772427841344589, + "learning_rate": 1.5277092933427827e-09, + "loss": 0.4452788829803467, + "step": 8530 + }, + { + "epoch": 1.9670278994696795, + "grad_norm": 1.7369674304649072, + "learning_rate": 1.5067153774820374e-09, + "loss": 0.46621495485305786, + "step": 8531 + }, + { + "epoch": 1.9672584735992622, + "grad_norm": 1.294422205793256, + "learning_rate": 1.4858666027395272e-09, + "loss": 0.47837382555007935, + "step": 8532 + }, + { + "epoch": 1.967489047728845, + "grad_norm": 1.754058349269308, + "learning_rate": 1.4651629721460501e-09, + "loss": 0.5690933465957642, + "step": 8533 + }, + { + "epoch": 1.9677196218584276, + "grad_norm": 1.7627173783003411, + "learning_rate": 1.4446044887109764e-09, + "loss": 0.478906512260437, + "step": 8534 + }, + { + "epoch": 1.9679501959880101, + "grad_norm": 1.7296669537147416, + "learning_rate": 1.4241911554225827e-09, + "loss": 0.5024028420448303, + "step": 8535 + }, + { + "epoch": 1.9681807701175928, + "grad_norm": 1.6971062366905785, + "learning_rate": 1.4039229752483839e-09, + "loss": 0.4430769979953766, + "step": 8536 + }, + { + "epoch": 1.9684113442471753, + "grad_norm": 1.5177256060076265, + "learning_rate": 1.3837999511343567e-09, + "loss": 0.34506234526634216, + "step": 8537 + }, + { + "epoch": 1.968641918376758, + "grad_norm": 1.6051884301428612, + "learning_rate": 1.363822086005717e-09, + "loss": 0.47483426332473755, + "step": 8538 + }, + { + "epoch": 1.9688724925063408, + "grad_norm": 1.4685071017788778, + "learning_rate": 1.343989382766475e-09, + "loss": 0.3902367651462555, + "step": 8539 + }, + { + "epoch": 1.9691030666359235, + "grad_norm": 1.5919563191923878, + "learning_rate": 1.3243018442994358e-09, + "loss": 0.5114254951477051, + "step": 8540 + }, + { + "epoch": 1.9693336407655062, + "grad_norm": 1.6064476628756739, + "learning_rate": 1.3047594734663104e-09, + "loss": 0.4048948287963867, + "step": 8541 + }, + { + "epoch": 1.969564214895089, + "grad_norm": 1.3533697409791567, + "learning_rate": 1.2853622731079372e-09, + "loss": 0.4168536067008972, + "step": 8542 + }, + { + "epoch": 1.9697947890246714, + "grad_norm": 1.459175077584749, + "learning_rate": 1.2661102460437279e-09, + "loss": 0.38410186767578125, + "step": 8543 + }, + { + "epoch": 1.9700253631542541, + "grad_norm": 1.5096843994913236, + "learning_rate": 1.2470033950724435e-09, + "loss": 0.4931117296218872, + "step": 8544 + }, + { + "epoch": 1.9702559372838366, + "grad_norm": 1.863771997387379, + "learning_rate": 1.228041722971085e-09, + "loss": 0.41142135858535767, + "step": 8545 + }, + { + "epoch": 1.9704865114134194, + "grad_norm": 1.7868633908108185, + "learning_rate": 1.209225232496225e-09, + "loss": 0.5165313482284546, + "step": 8546 + }, + { + "epoch": 1.970717085543002, + "grad_norm": 1.284821780038077, + "learning_rate": 1.190553926382898e-09, + "loss": 0.3330427408218384, + "step": 8547 + }, + { + "epoch": 1.9709476596725848, + "grad_norm": 1.5242411906386457, + "learning_rate": 1.172027807345155e-09, + "loss": 0.43116509914398193, + "step": 8548 + }, + { + "epoch": 1.9711782338021675, + "grad_norm": 1.8011852071569119, + "learning_rate": 1.1536468780760643e-09, + "loss": 0.43564409017562866, + "step": 8549 + }, + { + "epoch": 1.9714088079317502, + "grad_norm": 1.7422483041269035, + "learning_rate": 1.1354111412472666e-09, + "loss": 0.5361013412475586, + "step": 8550 + }, + { + "epoch": 1.9716393820613327, + "grad_norm": 1.6110906687473352, + "learning_rate": 1.1173205995097524e-09, + "loss": 0.4049466550350189, + "step": 8551 + }, + { + "epoch": 1.9718699561909154, + "grad_norm": 1.6636539568656024, + "learning_rate": 1.0993752554930847e-09, + "loss": 0.45090144872665405, + "step": 8552 + }, + { + "epoch": 1.972100530320498, + "grad_norm": 1.5627616190247176, + "learning_rate": 1.0815751118057326e-09, + "loss": 0.43933606147766113, + "step": 8553 + }, + { + "epoch": 1.9723311044500806, + "grad_norm": 1.672183185343667, + "learning_rate": 1.063920171035182e-09, + "loss": 0.5254300832748413, + "step": 8554 + }, + { + "epoch": 1.9725616785796634, + "grad_norm": 1.4309558177904258, + "learning_rate": 1.0464104357477132e-09, + "loss": 0.45544567704200745, + "step": 8555 + }, + { + "epoch": 1.972792252709246, + "grad_norm": 1.9479324504983593, + "learning_rate": 1.0290459084886238e-09, + "loss": 0.5177001357078552, + "step": 8556 + }, + { + "epoch": 1.9730228268388288, + "grad_norm": 1.585288183336846, + "learning_rate": 1.0118265917818946e-09, + "loss": 0.4669674038887024, + "step": 8557 + }, + { + "epoch": 1.9732534009684115, + "grad_norm": 1.5203759714638625, + "learning_rate": 9.947524881307456e-10, + "loss": 0.4244263172149658, + "step": 8558 + }, + { + "epoch": 1.973483975097994, + "grad_norm": 1.810087521792982, + "learning_rate": 9.778236000168583e-10, + "loss": 0.44121527671813965, + "step": 8559 + }, + { + "epoch": 1.9737145492275767, + "grad_norm": 1.59326202559186, + "learning_rate": 9.610399299010418e-10, + "loss": 0.44209837913513184, + "step": 8560 + }, + { + "epoch": 1.9739451233571592, + "grad_norm": 1.5399236076354037, + "learning_rate": 9.444014802231226e-10, + "loss": 0.4036273956298828, + "step": 8561 + }, + { + "epoch": 1.974175697486742, + "grad_norm": 1.5589230288439277, + "learning_rate": 9.279082534014992e-10, + "loss": 0.47106266021728516, + "step": 8562 + }, + { + "epoch": 1.9744062716163246, + "grad_norm": 1.6389105898260865, + "learning_rate": 9.115602518338095e-10, + "loss": 0.41080260276794434, + "step": 8563 + }, + { + "epoch": 1.9746368457459074, + "grad_norm": 2.0418613187292918, + "learning_rate": 8.953574778962635e-10, + "loss": 0.4333069920539856, + "step": 8564 + }, + { + "epoch": 1.97486741987549, + "grad_norm": 1.4286669807437469, + "learning_rate": 8.792999339440887e-10, + "loss": 0.3939141631126404, + "step": 8565 + }, + { + "epoch": 1.9750979940050726, + "grad_norm": 1.7648959719228037, + "learning_rate": 8.633876223114178e-10, + "loss": 0.4202404022216797, + "step": 8566 + }, + { + "epoch": 1.9753285681346553, + "grad_norm": 1.6239377555078118, + "learning_rate": 8.476205453114005e-10, + "loss": 0.44722893834114075, + "step": 8567 + }, + { + "epoch": 1.9755591422642378, + "grad_norm": 1.6159852265335335, + "learning_rate": 8.319987052357591e-10, + "loss": 0.4095258414745331, + "step": 8568 + }, + { + "epoch": 1.9757897163938205, + "grad_norm": 1.359270850467109, + "learning_rate": 8.165221043553439e-10, + "loss": 0.43372297286987305, + "step": 8569 + }, + { + "epoch": 1.9760202905234032, + "grad_norm": 1.7602005237852472, + "learning_rate": 8.011907449199106e-10, + "loss": 0.4697731137275696, + "step": 8570 + }, + { + "epoch": 1.976250864652986, + "grad_norm": 1.759646277514859, + "learning_rate": 7.860046291580103e-10, + "loss": 0.49179136753082275, + "step": 8571 + }, + { + "epoch": 1.9764814387825687, + "grad_norm": 1.5966011788910657, + "learning_rate": 7.70963759277099e-10, + "loss": 0.35898157954216003, + "step": 8572 + }, + { + "epoch": 1.9767120129121514, + "grad_norm": 1.5427594087958296, + "learning_rate": 7.560681374634282e-10, + "loss": 0.48293429613113403, + "step": 8573 + }, + { + "epoch": 1.9769425870417339, + "grad_norm": 1.4911498565229593, + "learning_rate": 7.413177658822656e-10, + "loss": 0.39636045694351196, + "step": 8574 + }, + { + "epoch": 1.9771731611713166, + "grad_norm": 1.294544438076297, + "learning_rate": 7.267126466777851e-10, + "loss": 0.375876784324646, + "step": 8575 + }, + { + "epoch": 1.977403735300899, + "grad_norm": 1.438449662082489, + "learning_rate": 7.122527819729551e-10, + "loss": 0.4064311385154724, + "step": 8576 + }, + { + "epoch": 1.9776343094304818, + "grad_norm": 1.3024542737808098, + "learning_rate": 6.979381738696499e-10, + "loss": 0.4373857378959656, + "step": 8577 + }, + { + "epoch": 1.9778648835600645, + "grad_norm": 2.013857406007071, + "learning_rate": 6.837688244486494e-10, + "loss": 0.5008025765419006, + "step": 8578 + }, + { + "epoch": 1.9780954576896472, + "grad_norm": 1.5523385427514034, + "learning_rate": 6.697447357695285e-10, + "loss": 0.4286271035671234, + "step": 8579 + }, + { + "epoch": 1.97832603181923, + "grad_norm": 1.6941567857927917, + "learning_rate": 6.558659098711006e-10, + "loss": 0.4420759081840515, + "step": 8580 + }, + { + "epoch": 1.9785566059488127, + "grad_norm": 1.314306142904572, + "learning_rate": 6.421323487705299e-10, + "loss": 0.3946709632873535, + "step": 8581 + }, + { + "epoch": 1.9787871800783952, + "grad_norm": 1.6731376396011677, + "learning_rate": 6.285440544641085e-10, + "loss": 0.42874544858932495, + "step": 8582 + }, + { + "epoch": 1.9790177542079779, + "grad_norm": 1.5147129393930194, + "learning_rate": 6.151010289272563e-10, + "loss": 0.4728921055793762, + "step": 8583 + }, + { + "epoch": 1.9792483283375604, + "grad_norm": 1.4681942656331504, + "learning_rate": 6.018032741139656e-10, + "loss": 0.3756295442581177, + "step": 8584 + }, + { + "epoch": 1.979478902467143, + "grad_norm": 1.5314225760860438, + "learning_rate": 5.886507919570239e-10, + "loss": 0.48663657903671265, + "step": 8585 + }, + { + "epoch": 1.9797094765967258, + "grad_norm": 2.0571870297763377, + "learning_rate": 5.756435843685681e-10, + "loss": 0.46127766370773315, + "step": 8586 + }, + { + "epoch": 1.9799400507263085, + "grad_norm": 1.4783867212667936, + "learning_rate": 5.627816532390862e-10, + "loss": 0.493796169757843, + "step": 8587 + }, + { + "epoch": 1.9801706248558912, + "grad_norm": 1.2639174296233155, + "learning_rate": 5.500650004383045e-10, + "loss": 0.3703004717826843, + "step": 8588 + }, + { + "epoch": 1.980401198985474, + "grad_norm": 1.6202036973245495, + "learning_rate": 5.374936278146336e-10, + "loss": 0.5385284423828125, + "step": 8589 + }, + { + "epoch": 1.9806317731150564, + "grad_norm": 1.5325088206554112, + "learning_rate": 5.250675371956115e-10, + "loss": 0.3996584713459015, + "step": 8590 + }, + { + "epoch": 1.9808623472446392, + "grad_norm": 1.6001328200790206, + "learning_rate": 5.12786730387349e-10, + "loss": 0.4513227641582489, + "step": 8591 + }, + { + "epoch": 1.9810929213742217, + "grad_norm": 1.5317035339628575, + "learning_rate": 5.006512091750848e-10, + "loss": 0.46632474660873413, + "step": 8592 + }, + { + "epoch": 1.9813234955038044, + "grad_norm": 1.5599775050602098, + "learning_rate": 4.886609753227411e-10, + "loss": 0.5379712581634521, + "step": 8593 + }, + { + "epoch": 1.981554069633387, + "grad_norm": 1.6572300992446405, + "learning_rate": 4.768160305732572e-10, + "loss": 0.3606422543525696, + "step": 8594 + }, + { + "epoch": 1.9817846437629698, + "grad_norm": 1.927352159029303, + "learning_rate": 4.651163766484778e-10, + "loss": 0.39339596033096313, + "step": 8595 + }, + { + "epoch": 1.9820152178925525, + "grad_norm": 1.5930436461957604, + "learning_rate": 4.535620152489317e-10, + "loss": 0.4606707692146301, + "step": 8596 + }, + { + "epoch": 1.9822457920221352, + "grad_norm": 1.484957242621252, + "learning_rate": 4.421529480543862e-10, + "loss": 0.4234154522418976, + "step": 8597 + }, + { + "epoch": 1.9824763661517177, + "grad_norm": 1.3985130447330405, + "learning_rate": 4.308891767229594e-10, + "loss": 0.49317437410354614, + "step": 8598 + }, + { + "epoch": 1.9827069402813005, + "grad_norm": 1.5795407686648721, + "learning_rate": 4.197707028922304e-10, + "loss": 0.47756847739219666, + "step": 8599 + }, + { + "epoch": 1.982937514410883, + "grad_norm": 1.437347041692997, + "learning_rate": 4.0879752817823963e-10, + "loss": 0.37664321064949036, + "step": 8600 + }, + { + "epoch": 1.9831680885404657, + "grad_norm": 1.4684607347638514, + "learning_rate": 3.9796965417604465e-10, + "loss": 0.3927830457687378, + "step": 8601 + }, + { + "epoch": 1.9833986626700484, + "grad_norm": 1.5410832268522827, + "learning_rate": 3.8728708245971966e-10, + "loss": 0.41071420907974243, + "step": 8602 + }, + { + "epoch": 1.983629236799631, + "grad_norm": 1.7060421891461264, + "learning_rate": 3.7674981458191145e-10, + "loss": 0.49516505002975464, + "step": 8603 + }, + { + "epoch": 1.9838598109292138, + "grad_norm": 1.451667871155561, + "learning_rate": 3.6635785207439486e-10, + "loss": 0.474129855632782, + "step": 8604 + }, + { + "epoch": 1.9840903850587965, + "grad_norm": 1.6840089122105588, + "learning_rate": 3.5611119644773923e-10, + "loss": 0.4445813298225403, + "step": 8605 + }, + { + "epoch": 1.984320959188379, + "grad_norm": 2.027307915892804, + "learning_rate": 3.4600984919141987e-10, + "loss": 0.46165329217910767, + "step": 8606 + }, + { + "epoch": 1.9845515333179617, + "grad_norm": 1.3540207698004456, + "learning_rate": 3.3605381177381764e-10, + "loss": 0.4073392152786255, + "step": 8607 + }, + { + "epoch": 1.9847821074475442, + "grad_norm": 1.5051036984917558, + "learning_rate": 3.262430856419973e-10, + "loss": 0.46712470054626465, + "step": 8608 + }, + { + "epoch": 1.985012681577127, + "grad_norm": 1.4968737511198085, + "learning_rate": 3.165776722222624e-10, + "loss": 0.49993449449539185, + "step": 8609 + }, + { + "epoch": 1.9852432557067097, + "grad_norm": 1.67576101698744, + "learning_rate": 3.0705757291926705e-10, + "loss": 0.40737634897232056, + "step": 8610 + }, + { + "epoch": 1.9854738298362924, + "grad_norm": 1.5973815539324434, + "learning_rate": 2.976827891172373e-10, + "loss": 0.3714853823184967, + "step": 8611 + }, + { + "epoch": 1.985704403965875, + "grad_norm": 1.425745294363986, + "learning_rate": 2.884533221785279e-10, + "loss": 0.3818984925746918, + "step": 8612 + }, + { + "epoch": 1.9859349780954578, + "grad_norm": 1.892989564850047, + "learning_rate": 2.7936917344495435e-10, + "loss": 0.4529988765716553, + "step": 8613 + }, + { + "epoch": 1.9861655522250403, + "grad_norm": 1.3609709522865416, + "learning_rate": 2.7043034423701595e-10, + "loss": 0.44964706897735596, + "step": 8614 + }, + { + "epoch": 1.986396126354623, + "grad_norm": 1.5417500191784284, + "learning_rate": 2.616368358538956e-10, + "loss": 0.49079659581184387, + "step": 8615 + }, + { + "epoch": 1.9866267004842055, + "grad_norm": 1.5113331636323986, + "learning_rate": 2.529886495739042e-10, + "loss": 0.4411408305168152, + "step": 8616 + }, + { + "epoch": 1.9868572746137882, + "grad_norm": 1.5784526966638346, + "learning_rate": 2.444857866541472e-10, + "loss": 0.4386615455150604, + "step": 8617 + }, + { + "epoch": 1.987087848743371, + "grad_norm": 1.4030710400001012, + "learning_rate": 2.3612824833063594e-10, + "loss": 0.4545249342918396, + "step": 8618 + }, + { + "epoch": 1.9873184228729537, + "grad_norm": 1.3707438995019952, + "learning_rate": 2.2791603581817643e-10, + "loss": 0.40094703435897827, + "step": 8619 + }, + { + "epoch": 1.9875489970025364, + "grad_norm": 1.3947569997576104, + "learning_rate": 2.1984915031048047e-10, + "loss": 0.40233147144317627, + "step": 8620 + }, + { + "epoch": 1.9877795711321191, + "grad_norm": 1.6068677090202075, + "learning_rate": 2.1192759298016562e-10, + "loss": 0.460537314414978, + "step": 8621 + }, + { + "epoch": 1.9880101452617016, + "grad_norm": 1.5790092282402457, + "learning_rate": 2.0415136497875518e-10, + "loss": 0.4602966904640198, + "step": 8622 + }, + { + "epoch": 1.9882407193912843, + "grad_norm": 1.3484786116390262, + "learning_rate": 1.9652046743656724e-10, + "loss": 0.5004392266273499, + "step": 8623 + }, + { + "epoch": 1.9884712935208668, + "grad_norm": 1.6014865414140482, + "learning_rate": 1.8903490146282564e-10, + "loss": 0.48196107149124146, + "step": 8624 + }, + { + "epoch": 1.9887018676504495, + "grad_norm": 1.6074894882455422, + "learning_rate": 1.8169466814565992e-10, + "loss": 0.45684510469436646, + "step": 8625 + }, + { + "epoch": 1.9889324417800323, + "grad_norm": 1.60134146592956, + "learning_rate": 1.7449976855199444e-10, + "loss": 0.44381850957870483, + "step": 8626 + }, + { + "epoch": 1.989163015909615, + "grad_norm": 2.135748914298638, + "learning_rate": 1.674502037277703e-10, + "loss": 0.5301632881164551, + "step": 8627 + }, + { + "epoch": 1.9893935900391977, + "grad_norm": 1.6146386939845652, + "learning_rate": 1.6054597469761233e-10, + "loss": 0.5154398679733276, + "step": 8628 + }, + { + "epoch": 1.9896241641687804, + "grad_norm": 1.9992239097696207, + "learning_rate": 1.5378708246516215e-10, + "loss": 0.4334644079208374, + "step": 8629 + }, + { + "epoch": 1.989854738298363, + "grad_norm": 1.615721145436376, + "learning_rate": 1.4717352801296713e-10, + "loss": 0.45578733086586, + "step": 8630 + }, + { + "epoch": 1.9900853124279456, + "grad_norm": 1.9680117779038706, + "learning_rate": 1.4070531230225834e-10, + "loss": 0.48997777700424194, + "step": 8631 + }, + { + "epoch": 1.9903158865575281, + "grad_norm": 1.6305724090422111, + "learning_rate": 1.3438243627328371e-10, + "loss": 0.4760161340236664, + "step": 8632 + }, + { + "epoch": 1.9905464606871108, + "grad_norm": 1.628677759157358, + "learning_rate": 1.2820490084508583e-10, + "loss": 0.43040308356285095, + "step": 8633 + }, + { + "epoch": 1.9907770348166935, + "grad_norm": 1.4320674775365163, + "learning_rate": 1.2217270691583514e-10, + "loss": 0.4588020443916321, + "step": 8634 + }, + { + "epoch": 1.9910076089462763, + "grad_norm": 1.562424742526405, + "learning_rate": 1.1628585536216374e-10, + "loss": 0.46267229318618774, + "step": 8635 + }, + { + "epoch": 1.991238183075859, + "grad_norm": 1.5109131359979342, + "learning_rate": 1.1054434703994253e-10, + "loss": 0.4159420132637024, + "step": 8636 + }, + { + "epoch": 1.9914687572054417, + "grad_norm": 1.5987294041380085, + "learning_rate": 1.0494818278361518e-10, + "loss": 0.47950947284698486, + "step": 8637 + }, + { + "epoch": 1.9916993313350242, + "grad_norm": 1.6664716034008127, + "learning_rate": 9.949736340664206e-11, + "loss": 0.4912334680557251, + "step": 8638 + }, + { + "epoch": 1.991929905464607, + "grad_norm": 1.5249112719703917, + "learning_rate": 9.419188970150038e-11, + "loss": 0.4895044267177582, + "step": 8639 + }, + { + "epoch": 1.9921604795941894, + "grad_norm": 1.6059730233512621, + "learning_rate": 8.903176243935106e-11, + "loss": 0.4822810888290405, + "step": 8640 + }, + { + "epoch": 1.9923910537237721, + "grad_norm": 1.6775671432311143, + "learning_rate": 8.401698237014975e-11, + "loss": 0.4739280045032501, + "step": 8641 + }, + { + "epoch": 1.9926216278533548, + "grad_norm": 1.5254015473001428, + "learning_rate": 7.91475502228689e-11, + "loss": 0.5394953489303589, + "step": 8642 + }, + { + "epoch": 1.9928522019829376, + "grad_norm": 1.5656411080833423, + "learning_rate": 7.44234667054977e-11, + "loss": 0.38446712493896484, + "step": 8643 + }, + { + "epoch": 1.9930827761125203, + "grad_norm": 1.817887515771179, + "learning_rate": 6.98447325045981e-11, + "loss": 0.46814244985580444, + "step": 8644 + }, + { + "epoch": 1.993313350242103, + "grad_norm": 2.1046790616702284, + "learning_rate": 6.541134828574879e-11, + "loss": 0.5420444011688232, + "step": 8645 + }, + { + "epoch": 1.9935439243716855, + "grad_norm": 1.7622016760188661, + "learning_rate": 6.112331469332321e-11, + "loss": 0.45574939250946045, + "step": 8646 + }, + { + "epoch": 1.9937744985012682, + "grad_norm": 1.3457673361522478, + "learning_rate": 5.69806323507116e-11, + "loss": 0.37707841396331787, + "step": 8647 + }, + { + "epoch": 1.9940050726308507, + "grad_norm": 1.713931828869125, + "learning_rate": 5.298330186020994e-11, + "loss": 0.5139172077178955, + "step": 8648 + }, + { + "epoch": 1.9942356467604334, + "grad_norm": 1.5184794936547403, + "learning_rate": 4.913132380268692e-11, + "loss": 0.5251332521438599, + "step": 8649 + }, + { + "epoch": 1.9944662208900161, + "grad_norm": 1.603884960010875, + "learning_rate": 4.542469873802801e-11, + "loss": 0.38396936655044556, + "step": 8650 + }, + { + "epoch": 1.9946967950195988, + "grad_norm": 1.3451562633349459, + "learning_rate": 4.1863427205246495e-11, + "loss": 0.42507076263427734, + "step": 8651 + }, + { + "epoch": 1.9949273691491816, + "grad_norm": 1.3938730213086719, + "learning_rate": 3.8447509721817316e-11, + "loss": 0.3914533257484436, + "step": 8652 + }, + { + "epoch": 1.9951579432787643, + "grad_norm": 1.8085258279642746, + "learning_rate": 3.5176946784343245e-11, + "loss": 0.46923860907554626, + "step": 8653 + }, + { + "epoch": 1.9953885174083468, + "grad_norm": 1.5078315206639539, + "learning_rate": 3.205173886822177e-11, + "loss": 0.35363346338272095, + "step": 8654 + }, + { + "epoch": 1.9956190915379295, + "grad_norm": 1.8022263810516201, + "learning_rate": 2.9071886427867175e-11, + "loss": 0.4142746925354004, + "step": 8655 + }, + { + "epoch": 1.995849665667512, + "grad_norm": 1.4186888258792274, + "learning_rate": 2.623738989626645e-11, + "loss": 0.34989133477211, + "step": 8656 + }, + { + "epoch": 1.9960802397970947, + "grad_norm": 1.6690344619586774, + "learning_rate": 2.354824968542335e-11, + "loss": 0.5059055089950562, + "step": 8657 + }, + { + "epoch": 1.9963108139266774, + "grad_norm": 1.6307598945991617, + "learning_rate": 2.1004466186358426e-11, + "loss": 0.4772738516330719, + "step": 8658 + }, + { + "epoch": 1.9965413880562601, + "grad_norm": 2.0155808953661456, + "learning_rate": 1.860603976877595e-11, + "loss": 0.5055459141731262, + "step": 8659 + }, + { + "epoch": 1.9967719621858429, + "grad_norm": 1.634582725028991, + "learning_rate": 1.6352970781285946e-11, + "loss": 0.3764510154724121, + "step": 8660 + }, + { + "epoch": 1.9970025363154256, + "grad_norm": 1.5414383762022799, + "learning_rate": 1.424525955140421e-11, + "loss": 0.42315495014190674, + "step": 8661 + }, + { + "epoch": 1.997233110445008, + "grad_norm": 1.435478088309439, + "learning_rate": 1.2282906385552295e-11, + "loss": 0.3647070527076721, + "step": 8662 + }, + { + "epoch": 1.9974636845745908, + "grad_norm": 1.5518093691270274, + "learning_rate": 1.0465911568946495e-11, + "loss": 0.3832179307937622, + "step": 8663 + }, + { + "epoch": 1.9976942587041733, + "grad_norm": 1.6197061226224263, + "learning_rate": 8.79427536570887e-12, + "loss": 0.46649307012557983, + "step": 8664 + }, + { + "epoch": 1.997924832833756, + "grad_norm": 1.4914895158884427, + "learning_rate": 7.267998018867238e-12, + "loss": 0.5101447701454163, + "step": 8665 + }, + { + "epoch": 1.9981554069633387, + "grad_norm": 1.7518527885996649, + "learning_rate": 5.8870797502441615e-12, + "loss": 0.48426300287246704, + "step": 8666 + }, + { + "epoch": 1.9983859810929214, + "grad_norm": 1.74396723859127, + "learning_rate": 4.65152076045694e-12, + "loss": 0.5109666585922241, + "step": 8667 + }, + { + "epoch": 1.9986165552225041, + "grad_norm": 1.5553329658335424, + "learning_rate": 3.5613212293617023e-12, + "loss": 0.36605560779571533, + "step": 8668 + }, + { + "epoch": 1.9988471293520869, + "grad_norm": 1.635805382712207, + "learning_rate": 2.6164813152762533e-12, + "loss": 0.515751302242279, + "step": 8669 + }, + { + "epoch": 1.9990777034816694, + "grad_norm": 1.530462345782049, + "learning_rate": 1.8170011554241582e-12, + "loss": 0.48570311069488525, + "step": 8670 + }, + { + "epoch": 1.999308277611252, + "grad_norm": 1.643766587262656, + "learning_rate": 1.1628808662678124e-12, + "loss": 0.5033636093139648, + "step": 8671 + }, + { + "epoch": 1.9995388517408346, + "grad_norm": 1.839455005664103, + "learning_rate": 6.541205427312846e-13, + "loss": 0.4581984281539917, + "step": 8672 + }, + { + "epoch": 1.9997694258704173, + "grad_norm": 1.6255864134270288, + "learning_rate": 2.9072025886645037e-13, + "loss": 0.4574134945869446, + "step": 8673 + }, + { + "epoch": 2.0, + "grad_norm": 1.4400793609212648, + "learning_rate": 7.268006729788112e-14, + "loss": 0.39279258251190186, + "step": 8674 + } + ], + "logging_steps": 1, + "max_steps": 8674, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2994036868841472.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8674/training_args.bin b/checkpoint-8674/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9d22a9f5260d66a35a24391e4e9c5ae1d42e2bf --- /dev/null +++ b/checkpoint-8674/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b4d99570e121a32da71712aa554f3b32e79266529670ac42e5a5b8fc07e99d +size 6968 diff --git a/checkpoint-8674/zero_to_fp32.py b/checkpoint-8674/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..5995d6e6f04e43b989587aa9022a3aef0c66d694 --- /dev/null +++ b/checkpoint-8674/zero_to_fp32.py @@ -0,0 +1,760 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: +# python zero_to_fp32.py . output_dir/ +# or +# python zero_to_fp32.py . output_dir/ --safe_serialization + +import argparse +import torch +import glob +import math +import os +import re +import gc +import json +import numpy as np +from tqdm import tqdm +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device, weights_only=False) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + total_files = len(files) + state_dicts = [] + for f in tqdm(files, desc='Loading checkpoint shards'): + state_dict = torch.load(f, map_location=device, mmap=True, weights_only=False) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if ZERO_STAGE not in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +class GatheredTensor: + """ + A pseudo tensor that collects partitioned weights. + It is more memory efficient when there are multiple groups. + """ + + def __init__(self, flat_groups, flat_groups_offset, offset, partitioned_numel, shape): + self.flat_groups = flat_groups + self.flat_groups_offset = flat_groups_offset + self.offset = offset + self.partitioned_numel = partitioned_numel + self.shape = shape + self.dtype = self.flat_groups[0][0].dtype + + def contiguous(self): + """ + Merge partitioned weights from flat_groups into a single tensor. + """ + end_idx = self.offset + self.partitioned_numel + world_size = len(self.flat_groups) + pad_flat_param_chunks = [] + + for rank_i in range(world_size): + # for each rank, we need to collect weights from related group/groups + flat_groups_at_rank_i = self.flat_groups[rank_i] + start_group_id = None + end_group_id = None + for group_id in range(len(self.flat_groups_offset)): + if self.flat_groups_offset[group_id] <= self.offset < self.flat_groups_offset[group_id + 1]: + start_group_id = group_id + if self.flat_groups_offset[group_id] < end_idx <= self.flat_groups_offset[group_id + 1]: + end_group_id = group_id + break + # collect weights from related group/groups + for group_id in range(start_group_id, end_group_id + 1): + flat_tensor = flat_groups_at_rank_i[group_id] + start_offset = self.offset - self.flat_groups_offset[group_id] + end_offset = min(end_idx, self.flat_groups_offset[group_id + 1]) - self.flat_groups_offset[group_id] + pad_flat_param_chunks.append(flat_tensor[start_offset:end_offset]) + + # collect weights from all ranks + pad_flat_param = torch.cat(pad_flat_param_chunks, dim=0) + param = pad_flat_param[:self.shape.numel()].view(self.shape).contiguous() + return param + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = sum([flat_group.numel() for flat_group in fp32_flat_groups[0]]) * world_size + + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + flat_groups_offset = [0] + list(np.cumsum([flat_tensor.numel() for flat_tensor in fp32_flat_groups[0]])) + for name, shape in tqdm(param_shapes.items(), desc='Gathering sharded weights'): + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # memory efficient tensor + tensor = GatheredTensor(fp32_flat_groups, flat_groups_offset, offset, partitioned_numel, shape) + state_dict[name] = tensor + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def to_torch_tensor(state_dict, return_empty_tensor=False): + """ + Convert state_dict of GatheredTensor to torch tensor + """ + torch_state_dict = {} + converted_tensors = {} + for name, tensor in state_dict.items(): + tensor_id = id(tensor) + if tensor_id in converted_tensors: # shared tensors + shared_tensor = torch_state_dict[converted_tensors[tensor_id]] + torch_state_dict[name] = shared_tensor + else: + converted_tensors[tensor_id] = name + if return_empty_tensor: + torch_state_dict[name] = torch.empty(tensor.shape, dtype=tensor.dtype) + else: + torch_state_dict[name] = tensor.contiguous() + return torch_state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag=None, + exclude_frozen_parameters=False, + lazy_mode=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + - ``lazy_mode``: get state_dict in lazy mode. It returns a dict of pesduo tensor instead of torch tensor, which is more memory efficient. + Convert the pesduo tensor to torch tensor by ``.contiguous()`` + + Returns: + - pytorch ``state_dict`` + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + Note: the above usage may not work if your application doesn't have sufficient free CPU memory. + You may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. Or you can load state_dict in lazy mode :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, lazy_mode=True) # not on cpu + for name, lazy_tensor in state_dict.item(): + tensor = lazy_tensor.contiguous() # to cpu + print(name, tensor) + # del tensor to release memory if it no longer in use + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + state_dict = _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + if lazy_mode: + return state_dict + else: + return to_torch_tensor(state_dict) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, + output_dir, + max_shard_size="5GB", + safe_serialization=False, + tag=None, + exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_dir``: directory to the pytorch fp32 state_dict output files + - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB + - ``safe_serialization``: whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`). + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + # Dependency pre-check + if safe_serialization: + try: + from safetensors.torch import save_file + except ImportError: + print('If you want to use `safe_serialization`, please `pip install safetensors`') + raise + if max_shard_size is not None: + try: + from huggingface_hub import split_torch_state_dict_into_shards + except ImportError: + print('If you want to use `max_shard_size`, please `pip install huggingface_hub`') + raise + + # Convert zero checkpoint to state_dict + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, + tag, + exclude_frozen_parameters, + lazy_mode=True) + + # Shard the model if it is too big. + weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin" + if max_shard_size is not None: + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + # an memory-efficient approach for sharding + empty_state_dict = to_torch_tensor(state_dict, return_empty_tensor=True) + state_dict_split = split_torch_state_dict_into_shards(empty_state_dict, + filename_pattern=filename_pattern, + max_shard_size=max_shard_size) + else: + from collections import namedtuple + StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"]) + state_dict_split = StateDictSplit(is_sharded=False, + filename_to_tensors={weights_name: list(state_dict.keys())}) + + # Save the model by shard + os.makedirs(output_dir, exist_ok=True) + filename_to_tensors = state_dict_split.filename_to_tensors.items() + for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"): + shard_state_dict = {tensor_name: state_dict[tensor_name] for tensor_name in tensors} + shard_state_dict = to_torch_tensor(shard_state_dict) + output_path = os.path.join(output_dir, shard_file) + if safe_serialization: + save_file(shard_state_dict, output_path, metadata={"format": "pt"}) + else: + torch.save(shard_state_dict, output_path) + # release the memory of current shard + for tensor_name in list(shard_state_dict.keys()): + del state_dict[tensor_name] + del shard_state_dict[tensor_name] + del shard_state_dict + gc.collect() + + # Save index if sharded + if state_dict_split.is_sharded: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } + save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json" + save_index_file = os.path.join(output_dir, save_index_file) + with open(save_index_file, "w", encoding="utf-8") as f: + content = json.dumps(index, indent=2, sort_keys=True) + "\n" + f.write(content) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info("Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info("Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument("output_dir", + type=str, + help="directory to the pytorch fp32 state_dict output files" + "(e.g. path/checkpoint-12-output/)") + parser.add_argument( + "--max_shard_size", + type=str, + default="5GB", + help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size" + "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`" + "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances" + "without CPU OOM issues.") + parser.add_argument( + "--safe_serialization", + default=False, + action='store_true', + help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_dir, + max_shard_size=args.max_shard_size, + safe_serialization=args.safe_serialization, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters) diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e717d9bf475c411369034636e82e48cf79108a8 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,63 @@ +{ + "image_processor": { + "data_format": "channels_first", + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessorFast", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "merge_size": 2, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2 + }, + "processor_class": "Qwen2_5_VLProcessor", + "video_processor": { + "data_format": "channels_first", + "default_to_square": true, + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": false, + "image_mean": [ + 0.48145466, + 0.4578275, + 0.40821073 + ], + "image_processor_type": "Qwen2VLImageProcessor", + "image_std": [ + 0.26862954, + 0.26130258, + 0.27577711 + ], + "max_frames": 768, + "merge_size": 2, + "min_frames": 4, + "patch_size": 14, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "size": { + "longest_edge": 12845056, + "shortest_edge": 3136 + }, + "temporal_patch_size": 2, + "video_processor_type": "Qwen2VLVideoProcessor" + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..34510ff0037cd50428af467a17ead5a96140a32c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8 +size 11421892 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f7322db3e15385c79a5a29523dd1ccad6d343278 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,31 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "processor_class": "Qwen2_5_VLProcessor", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..96117cbcbd37faadb325e68d5cd0eadb0d1ef7da --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 2.0, + "total_flos": 2994036868841472.0, + "train_loss": 0.5227575608908595, + "train_runtime": 21685.2, + "train_samples_per_second": 1.6, + "train_steps_per_second": 0.4 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c5c6927a057e2175121c41c13327c965f36ec4e7 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,8675 @@ +{"current_steps": 1, "total_steps": 8674, "loss": 1.2793785333633423, "lr": 0.0, "epoch": 0.00023057412958266084, "percentage": 0.01, "elapsed_time": "0:00:06", "remaining_time": "16:08:19"} +{"current_steps": 2, "total_steps": 8674, "loss": 1.2810249328613281, "lr": 4.6082949308755755e-09, "epoch": 0.0004611482591653217, "percentage": 0.02, "elapsed_time": "0:00:09", "remaining_time": "11:57:20"} +{"current_steps": 3, "total_steps": 8674, "loss": 1.5180970430374146, "lr": 9.216589861751151e-09, "epoch": 0.0006917223887479825, "percentage": 0.03, "elapsed_time": "0:00:13", "remaining_time": "10:31:03"} +{"current_steps": 4, "total_steps": 8674, "loss": 1.2771815061569214, "lr": 1.3824884792626728e-08, "epoch": 0.0009222965183306433, "percentage": 0.05, "elapsed_time": "0:00:16", "remaining_time": "9:43:32"} +{"current_steps": 5, "total_steps": 8674, "loss": 1.6275714635849, "lr": 1.8433179723502302e-08, "epoch": 0.001152870647913304, "percentage": 0.06, "elapsed_time": "0:00:18", "remaining_time": "8:56:30"} +{"current_steps": 6, "total_steps": 8674, "loss": 1.4852838516235352, "lr": 2.304147465437788e-08, "epoch": 0.001383444777495965, "percentage": 0.07, "elapsed_time": "0:00:20", "remaining_time": "8:24:17"} +{"current_steps": 7, "total_steps": 8674, "loss": 1.3845010995864868, "lr": 2.7649769585253456e-08, "epoch": 0.0016140189070786258, "percentage": 0.08, "elapsed_time": "0:00:23", "remaining_time": "8:01:50"} +{"current_steps": 8, "total_steps": 8674, "loss": 1.2716574668884277, "lr": 3.225806451612903e-08, "epoch": 0.0018445930366612867, "percentage": 0.09, "elapsed_time": "0:00:26", "remaining_time": "7:50:37"} +{"current_steps": 9, "total_steps": 8674, "loss": 1.4046194553375244, "lr": 3.6866359447004604e-08, "epoch": 0.0020751671662439476, "percentage": 0.1, "elapsed_time": "0:00:28", "remaining_time": "7:36:41"} +{"current_steps": 10, "total_steps": 8674, "loss": 1.4988269805908203, "lr": 4.1474654377880186e-08, "epoch": 0.002305741295826608, "percentage": 0.12, "elapsed_time": "0:00:30", "remaining_time": "7:25:26"} +{"current_steps": 11, "total_steps": 8674, "loss": 1.3523340225219727, "lr": 4.608294930875576e-08, "epoch": 0.002536315425409269, "percentage": 0.13, "elapsed_time": "0:00:33", "remaining_time": "7:16:07"} +{"current_steps": 12, "total_steps": 8674, "loss": 1.3664941787719727, "lr": 5.069124423963134e-08, "epoch": 0.00276688955499193, "percentage": 0.14, "elapsed_time": "0:00:35", "remaining_time": "7:08:35"} +{"current_steps": 13, "total_steps": 8674, "loss": 1.4892609119415283, "lr": 5.529953917050691e-08, "epoch": 0.0029974636845745907, "percentage": 0.15, "elapsed_time": "0:00:38", "remaining_time": "7:02:00"} +{"current_steps": 14, "total_steps": 8674, "loss": 1.305836796760559, "lr": 5.990783410138249e-08, "epoch": 0.0032280378141572516, "percentage": 0.16, "elapsed_time": "0:00:40", "remaining_time": "6:57:04"} +{"current_steps": 15, "total_steps": 8674, "loss": 1.3458774089813232, "lr": 6.451612903225806e-08, "epoch": 0.0034586119437399125, "percentage": 0.17, "elapsed_time": "0:00:42", "remaining_time": "6:52:28"} +{"current_steps": 16, "total_steps": 8674, "loss": 1.4305222034454346, "lr": 6.912442396313364e-08, "epoch": 0.0036891860733225734, "percentage": 0.18, "elapsed_time": "0:00:45", "remaining_time": "6:50:51"} +{"current_steps": 17, "total_steps": 8674, "loss": 1.4247705936431885, "lr": 7.373271889400921e-08, "epoch": 0.003919760202905234, "percentage": 0.2, "elapsed_time": "0:00:47", "remaining_time": "6:47:04"} +{"current_steps": 18, "total_steps": 8674, "loss": 1.4151098728179932, "lr": 7.834101382488478e-08, "epoch": 0.004150334332487895, "percentage": 0.21, "elapsed_time": "0:00:50", "remaining_time": "6:43:44"} +{"current_steps": 19, "total_steps": 8674, "loss": 1.4633708000183105, "lr": 8.294930875576037e-08, "epoch": 0.004380908462070556, "percentage": 0.22, "elapsed_time": "0:00:52", "remaining_time": "6:40:40"} +{"current_steps": 20, "total_steps": 8674, "loss": 1.2271082401275635, "lr": 8.755760368663594e-08, "epoch": 0.004611482591653216, "percentage": 0.23, "elapsed_time": "0:00:55", "remaining_time": "6:37:58"} +{"current_steps": 21, "total_steps": 8674, "loss": 1.493757724761963, "lr": 9.216589861751152e-08, "epoch": 0.004842056721235877, "percentage": 0.24, "elapsed_time": "0:00:57", "remaining_time": "6:35:32"} +{"current_steps": 22, "total_steps": 8674, "loss": 1.446916103363037, "lr": 9.677419354838709e-08, "epoch": 0.005072630850818538, "percentage": 0.25, "elapsed_time": "0:01:00", "remaining_time": "6:35:00"} +{"current_steps": 23, "total_steps": 8674, "loss": 1.4575269222259521, "lr": 1.0138248847926267e-07, "epoch": 0.005303204980401199, "percentage": 0.27, "elapsed_time": "0:01:02", "remaining_time": "6:33:03"} +{"current_steps": 24, "total_steps": 8674, "loss": 1.5000505447387695, "lr": 1.0599078341013824e-07, "epoch": 0.00553377910998386, "percentage": 0.28, "elapsed_time": "0:01:05", "remaining_time": "6:31:21"} +{"current_steps": 25, "total_steps": 8674, "loss": 1.32895827293396, "lr": 1.1059907834101383e-07, "epoch": 0.005764353239566521, "percentage": 0.29, "elapsed_time": "0:01:07", "remaining_time": "6:31:16"} +{"current_steps": 26, "total_steps": 8674, "loss": 1.479337215423584, "lr": 1.152073732718894e-07, "epoch": 0.005994927369149181, "percentage": 0.3, "elapsed_time": "0:01:10", "remaining_time": "6:29:31"} +{"current_steps": 27, "total_steps": 8674, "loss": 1.3543293476104736, "lr": 1.1981566820276498e-07, "epoch": 0.006225501498731842, "percentage": 0.31, "elapsed_time": "0:01:12", "remaining_time": "6:28:30"} +{"current_steps": 28, "total_steps": 8674, "loss": 1.3075106143951416, "lr": 1.2442396313364054e-07, "epoch": 0.006456075628314503, "percentage": 0.32, "elapsed_time": "0:01:15", "remaining_time": "6:27:08"} +{"current_steps": 29, "total_steps": 8674, "loss": 1.2077248096466064, "lr": 1.2903225806451611e-07, "epoch": 0.006686649757897164, "percentage": 0.33, "elapsed_time": "0:01:17", "remaining_time": "6:25:59"} +{"current_steps": 30, "total_steps": 8674, "loss": 1.2841781377792358, "lr": 1.336405529953917e-07, "epoch": 0.006917223887479825, "percentage": 0.35, "elapsed_time": "0:01:20", "remaining_time": "6:24:44"} +{"current_steps": 31, "total_steps": 8674, "loss": 1.4022557735443115, "lr": 1.3824884792626728e-07, "epoch": 0.0071477980170624855, "percentage": 0.36, "elapsed_time": "0:01:22", "remaining_time": "6:23:28"} +{"current_steps": 32, "total_steps": 8674, "loss": 1.4971141815185547, "lr": 1.4285714285714285e-07, "epoch": 0.007378372146645147, "percentage": 0.37, "elapsed_time": "0:01:24", "remaining_time": "6:22:13"} +{"current_steps": 33, "total_steps": 8674, "loss": 1.3699426651000977, "lr": 1.4746543778801842e-07, "epoch": 0.007608946276227807, "percentage": 0.38, "elapsed_time": "0:01:27", "remaining_time": "6:22:10"} +{"current_steps": 34, "total_steps": 8674, "loss": 1.466570258140564, "lr": 1.52073732718894e-07, "epoch": 0.007839520405810468, "percentage": 0.39, "elapsed_time": "0:01:29", "remaining_time": "6:21:00"} +{"current_steps": 35, "total_steps": 8674, "loss": 1.3031455278396606, "lr": 1.5668202764976955e-07, "epoch": 0.008070094535393129, "percentage": 0.4, "elapsed_time": "0:01:32", "remaining_time": "6:19:58"} +{"current_steps": 36, "total_steps": 8674, "loss": 1.3989369869232178, "lr": 1.6129032258064515e-07, "epoch": 0.00830066866497579, "percentage": 0.42, "elapsed_time": "0:01:34", "remaining_time": "6:19:16"} +{"current_steps": 37, "total_steps": 8674, "loss": 1.41139817237854, "lr": 1.6589861751152074e-07, "epoch": 0.00853124279455845, "percentage": 0.43, "elapsed_time": "0:01:37", "remaining_time": "6:18:14"} +{"current_steps": 38, "total_steps": 8674, "loss": 1.305363655090332, "lr": 1.705069124423963e-07, "epoch": 0.008761816924141111, "percentage": 0.44, "elapsed_time": "0:01:39", "remaining_time": "6:17:23"} +{"current_steps": 39, "total_steps": 8674, "loss": 1.3931915760040283, "lr": 1.7511520737327188e-07, "epoch": 0.008992391053723773, "percentage": 0.45, "elapsed_time": "0:01:41", "remaining_time": "6:16:22"} +{"current_steps": 40, "total_steps": 8674, "loss": 1.4728009700775146, "lr": 1.7972350230414745e-07, "epoch": 0.009222965183306432, "percentage": 0.46, "elapsed_time": "0:01:44", "remaining_time": "6:15:38"} +{"current_steps": 41, "total_steps": 8674, "loss": 1.4165544509887695, "lr": 1.8433179723502305e-07, "epoch": 0.009453539312889093, "percentage": 0.47, "elapsed_time": "0:01:47", "remaining_time": "6:15:45"} +{"current_steps": 42, "total_steps": 8674, "loss": 1.3693115711212158, "lr": 1.889400921658986e-07, "epoch": 0.009684113442471755, "percentage": 0.48, "elapsed_time": "0:01:49", "remaining_time": "6:15:06"} +{"current_steps": 43, "total_steps": 8674, "loss": 1.468721866607666, "lr": 1.9354838709677418e-07, "epoch": 0.009914687572054416, "percentage": 0.5, "elapsed_time": "0:01:51", "remaining_time": "6:14:26"} +{"current_steps": 44, "total_steps": 8674, "loss": 1.4732704162597656, "lr": 1.9815668202764975e-07, "epoch": 0.010145261701637076, "percentage": 0.51, "elapsed_time": "0:01:54", "remaining_time": "6:13:45"} +{"current_steps": 45, "total_steps": 8674, "loss": 1.2579209804534912, "lr": 2.0276497695852535e-07, "epoch": 0.010375835831219737, "percentage": 0.52, "elapsed_time": "0:01:56", "remaining_time": "6:13:06"} +{"current_steps": 46, "total_steps": 8674, "loss": 1.3587429523468018, "lr": 2.073732718894009e-07, "epoch": 0.010606409960802398, "percentage": 0.53, "elapsed_time": "0:01:59", "remaining_time": "6:12:43"} +{"current_steps": 47, "total_steps": 8674, "loss": 1.368800401687622, "lr": 2.1198156682027649e-07, "epoch": 0.010836984090385058, "percentage": 0.54, "elapsed_time": "0:02:01", "remaining_time": "6:12:07"} +{"current_steps": 48, "total_steps": 8674, "loss": 1.2960132360458374, "lr": 2.1658986175115208e-07, "epoch": 0.01106755821996772, "percentage": 0.55, "elapsed_time": "0:02:04", "remaining_time": "6:11:34"} +{"current_steps": 49, "total_steps": 8674, "loss": 1.3035235404968262, "lr": 2.2119815668202765e-07, "epoch": 0.01129813234955038, "percentage": 0.56, "elapsed_time": "0:02:06", "remaining_time": "6:11:02"} +{"current_steps": 50, "total_steps": 8674, "loss": 1.5074443817138672, "lr": 2.2580645161290322e-07, "epoch": 0.011528706479133042, "percentage": 0.58, "elapsed_time": "0:02:09", "remaining_time": "6:11:09"} +{"current_steps": 51, "total_steps": 8674, "loss": 1.4689760208129883, "lr": 2.304147465437788e-07, "epoch": 0.011759280608715702, "percentage": 0.59, "elapsed_time": "0:02:11", "remaining_time": "6:10:34"} +{"current_steps": 52, "total_steps": 8674, "loss": 1.3542251586914062, "lr": 2.3502304147465438e-07, "epoch": 0.011989854738298363, "percentage": 0.6, "elapsed_time": "0:02:13", "remaining_time": "6:10:14"} +{"current_steps": 53, "total_steps": 8674, "loss": 1.3521728515625, "lr": 2.3963133640552995e-07, "epoch": 0.012220428867881024, "percentage": 0.61, "elapsed_time": "0:02:16", "remaining_time": "6:09:46"} +{"current_steps": 54, "total_steps": 8674, "loss": 1.3577494621276855, "lr": 2.442396313364055e-07, "epoch": 0.012451002997463684, "percentage": 0.62, "elapsed_time": "0:02:18", "remaining_time": "6:09:18"} +{"current_steps": 55, "total_steps": 8674, "loss": 1.459476351737976, "lr": 2.488479262672811e-07, "epoch": 0.012681577127046345, "percentage": 0.63, "elapsed_time": "0:02:21", "remaining_time": "6:08:50"} +{"current_steps": 56, "total_steps": 8674, "loss": 1.484410047531128, "lr": 2.534562211981567e-07, "epoch": 0.012912151256629006, "percentage": 0.65, "elapsed_time": "0:02:23", "remaining_time": "6:08:19"} +{"current_steps": 57, "total_steps": 8674, "loss": 1.3589065074920654, "lr": 2.5806451612903223e-07, "epoch": 0.013142725386211668, "percentage": 0.66, "elapsed_time": "0:02:26", "remaining_time": "6:07:53"} +{"current_steps": 58, "total_steps": 8674, "loss": 1.3558262586593628, "lr": 2.6267281105990777e-07, "epoch": 0.013373299515794327, "percentage": 0.67, "elapsed_time": "0:02:28", "remaining_time": "6:07:56"} +{"current_steps": 59, "total_steps": 8674, "loss": 1.5165367126464844, "lr": 2.672811059907834e-07, "epoch": 0.013603873645376989, "percentage": 0.68, "elapsed_time": "0:02:30", "remaining_time": "6:07:27"} +{"current_steps": 60, "total_steps": 8674, "loss": 1.3051776885986328, "lr": 2.7188940092165896e-07, "epoch": 0.01383444777495965, "percentage": 0.69, "elapsed_time": "0:02:33", "remaining_time": "6:06:56"} +{"current_steps": 61, "total_steps": 8674, "loss": 1.2916524410247803, "lr": 2.7649769585253456e-07, "epoch": 0.01406502190454231, "percentage": 0.7, "elapsed_time": "0:02:35", "remaining_time": "6:06:36"} +{"current_steps": 62, "total_steps": 8674, "loss": 1.440261721611023, "lr": 2.8110599078341015e-07, "epoch": 0.014295596034124971, "percentage": 0.71, "elapsed_time": "0:02:38", "remaining_time": "6:06:14"} +{"current_steps": 63, "total_steps": 8674, "loss": 1.3422625064849854, "lr": 2.857142857142857e-07, "epoch": 0.014526170163707632, "percentage": 0.73, "elapsed_time": "0:02:40", "remaining_time": "6:05:51"} +{"current_steps": 64, "total_steps": 8674, "loss": 1.374439001083374, "lr": 2.903225806451613e-07, "epoch": 0.014756744293290294, "percentage": 0.74, "elapsed_time": "0:02:43", "remaining_time": "6:05:29"} +{"current_steps": 65, "total_steps": 8674, "loss": 1.4382294416427612, "lr": 2.9493087557603683e-07, "epoch": 0.014987318422872953, "percentage": 0.75, "elapsed_time": "0:02:45", "remaining_time": "6:05:01"} +{"current_steps": 66, "total_steps": 8674, "loss": 1.3971002101898193, "lr": 2.9953917050691243e-07, "epoch": 0.015217892552455614, "percentage": 0.76, "elapsed_time": "0:02:47", "remaining_time": "6:04:36"} +{"current_steps": 67, "total_steps": 8674, "loss": 1.376272439956665, "lr": 3.04147465437788e-07, "epoch": 0.015448466682038276, "percentage": 0.77, "elapsed_time": "0:02:50", "remaining_time": "6:04:42"} +{"current_steps": 68, "total_steps": 8674, "loss": 1.3135097026824951, "lr": 3.0875576036866356e-07, "epoch": 0.015679040811620935, "percentage": 0.78, "elapsed_time": "0:02:52", "remaining_time": "6:04:17"} +{"current_steps": 69, "total_steps": 8674, "loss": 1.2688875198364258, "lr": 3.133640552995391e-07, "epoch": 0.015909614941203597, "percentage": 0.8, "elapsed_time": "0:02:55", "remaining_time": "6:03:55"} +{"current_steps": 70, "total_steps": 8674, "loss": 1.4380691051483154, "lr": 3.1797235023041476e-07, "epoch": 0.016140189070786258, "percentage": 0.81, "elapsed_time": "0:02:57", "remaining_time": "6:03:37"} +{"current_steps": 71, "total_steps": 8674, "loss": 1.319260835647583, "lr": 3.225806451612903e-07, "epoch": 0.01637076320036892, "percentage": 0.82, "elapsed_time": "0:02:59", "remaining_time": "6:03:29"} +{"current_steps": 72, "total_steps": 8674, "loss": 1.4083738327026367, "lr": 3.271889400921659e-07, "epoch": 0.01660133732995158, "percentage": 0.83, "elapsed_time": "0:03:02", "remaining_time": "6:03:08"} +{"current_steps": 73, "total_steps": 8674, "loss": 1.4904775619506836, "lr": 3.317972350230415e-07, "epoch": 0.01683191145953424, "percentage": 0.84, "elapsed_time": "0:03:04", "remaining_time": "6:02:50"} +{"current_steps": 74, "total_steps": 8674, "loss": 1.4534616470336914, "lr": 3.3640552995391703e-07, "epoch": 0.0170624855891169, "percentage": 0.85, "elapsed_time": "0:03:07", "remaining_time": "6:02:30"} +{"current_steps": 75, "total_steps": 8674, "loss": 1.6545689105987549, "lr": 3.410138248847926e-07, "epoch": 0.01729305971869956, "percentage": 0.86, "elapsed_time": "0:03:09", "remaining_time": "6:02:35"} +{"current_steps": 76, "total_steps": 8674, "loss": 1.2267192602157593, "lr": 3.4562211981566817e-07, "epoch": 0.017523633848282223, "percentage": 0.88, "elapsed_time": "0:03:12", "remaining_time": "6:02:18"} +{"current_steps": 77, "total_steps": 8674, "loss": 1.4207227230072021, "lr": 3.5023041474654376e-07, "epoch": 0.017754207977864884, "percentage": 0.89, "elapsed_time": "0:03:14", "remaining_time": "6:01:56"} +{"current_steps": 78, "total_steps": 8674, "loss": 1.4952092170715332, "lr": 3.5483870967741936e-07, "epoch": 0.017984782107447545, "percentage": 0.9, "elapsed_time": "0:03:16", "remaining_time": "6:01:42"} +{"current_steps": 79, "total_steps": 8674, "loss": 1.2932121753692627, "lr": 3.594470046082949e-07, "epoch": 0.018215356237030206, "percentage": 0.91, "elapsed_time": "0:03:19", "remaining_time": "6:01:29"} +{"current_steps": 80, "total_steps": 8674, "loss": 1.3855851888656616, "lr": 3.6405529953917044e-07, "epoch": 0.018445930366612864, "percentage": 0.92, "elapsed_time": "0:03:21", "remaining_time": "6:01:27"} +{"current_steps": 81, "total_steps": 8674, "loss": 1.3645650148391724, "lr": 3.686635944700461e-07, "epoch": 0.018676504496195526, "percentage": 0.93, "elapsed_time": "0:03:24", "remaining_time": "6:01:10"} +{"current_steps": 82, "total_steps": 8674, "loss": 1.322283387184143, "lr": 3.7327188940092163e-07, "epoch": 0.018907078625778187, "percentage": 0.95, "elapsed_time": "0:03:26", "remaining_time": "6:00:50"} +{"current_steps": 83, "total_steps": 8674, "loss": 1.3363629579544067, "lr": 3.778801843317972e-07, "epoch": 0.01913765275536085, "percentage": 0.96, "elapsed_time": "0:03:29", "remaining_time": "6:00:59"} +{"current_steps": 84, "total_steps": 8674, "loss": 1.553279161453247, "lr": 3.824884792626728e-07, "epoch": 0.01936822688494351, "percentage": 0.97, "elapsed_time": "0:03:31", "remaining_time": "6:00:40"} +{"current_steps": 85, "total_steps": 8674, "loss": 1.4434814453125, "lr": 3.8709677419354837e-07, "epoch": 0.01959880101452617, "percentage": 0.98, "elapsed_time": "0:03:34", "remaining_time": "6:00:29"} +{"current_steps": 86, "total_steps": 8674, "loss": 1.5134285688400269, "lr": 3.9170506912442396e-07, "epoch": 0.019829375144108832, "percentage": 0.99, "elapsed_time": "0:03:36", "remaining_time": "6:00:16"} +{"current_steps": 87, "total_steps": 8674, "loss": 1.4244651794433594, "lr": 3.963133640552995e-07, "epoch": 0.02005994927369149, "percentage": 1.0, "elapsed_time": "0:03:38", "remaining_time": "6:00:02"} +{"current_steps": 88, "total_steps": 8674, "loss": 1.4571855068206787, "lr": 4.009216589861751e-07, "epoch": 0.02029052340327415, "percentage": 1.01, "elapsed_time": "0:03:41", "remaining_time": "5:59:51"} +{"current_steps": 89, "total_steps": 8674, "loss": 1.2940685749053955, "lr": 4.055299539170507e-07, "epoch": 0.020521097532856813, "percentage": 1.03, "elapsed_time": "0:03:43", "remaining_time": "5:59:45"} +{"current_steps": 90, "total_steps": 8674, "loss": 1.3537572622299194, "lr": 4.1013824884792624e-07, "epoch": 0.020751671662439474, "percentage": 1.04, "elapsed_time": "0:03:46", "remaining_time": "5:59:32"} +{"current_steps": 91, "total_steps": 8674, "loss": 1.524500846862793, "lr": 4.147465437788018e-07, "epoch": 0.020982245792022135, "percentage": 1.05, "elapsed_time": "0:03:48", "remaining_time": "5:59:24"} +{"current_steps": 92, "total_steps": 8674, "loss": 1.4414368867874146, "lr": 4.1935483870967743e-07, "epoch": 0.021212819921604797, "percentage": 1.06, "elapsed_time": "0:03:51", "remaining_time": "5:59:37"} +{"current_steps": 93, "total_steps": 8674, "loss": 1.284010887145996, "lr": 4.2396313364055297e-07, "epoch": 0.021443394051187458, "percentage": 1.07, "elapsed_time": "0:03:53", "remaining_time": "5:59:21"} +{"current_steps": 94, "total_steps": 8674, "loss": 1.5901892185211182, "lr": 4.285714285714285e-07, "epoch": 0.021673968180770116, "percentage": 1.08, "elapsed_time": "0:03:56", "remaining_time": "5:59:07"} +{"current_steps": 95, "total_steps": 8674, "loss": 1.4408211708068848, "lr": 4.3317972350230416e-07, "epoch": 0.021904542310352777, "percentage": 1.1, "elapsed_time": "0:03:58", "remaining_time": "5:58:54"} +{"current_steps": 96, "total_steps": 8674, "loss": 1.2293554544448853, "lr": 4.377880184331797e-07, "epoch": 0.02213511643993544, "percentage": 1.11, "elapsed_time": "0:04:00", "remaining_time": "5:58:41"} +{"current_steps": 97, "total_steps": 8674, "loss": 1.4144377708435059, "lr": 4.423963133640553e-07, "epoch": 0.0223656905695181, "percentage": 1.12, "elapsed_time": "0:04:03", "remaining_time": "5:58:29"} +{"current_steps": 98, "total_steps": 8674, "loss": 1.359034776687622, "lr": 4.4700460829493084e-07, "epoch": 0.02259626469910076, "percentage": 1.13, "elapsed_time": "0:04:05", "remaining_time": "5:58:16"} +{"current_steps": 99, "total_steps": 8674, "loss": 1.3299517631530762, "lr": 4.5161290322580644e-07, "epoch": 0.022826838828683423, "percentage": 1.14, "elapsed_time": "0:04:08", "remaining_time": "5:58:08"} +{"current_steps": 100, "total_steps": 8674, "loss": 1.3072423934936523, "lr": 4.5622119815668203e-07, "epoch": 0.023057412958266084, "percentage": 1.15, "elapsed_time": "0:04:10", "remaining_time": "5:58:19"} +{"current_steps": 101, "total_steps": 8674, "loss": 1.5683096647262573, "lr": 4.608294930875576e-07, "epoch": 0.023287987087848742, "percentage": 1.16, "elapsed_time": "0:04:14", "remaining_time": "6:00:00"} +{"current_steps": 102, "total_steps": 8674, "loss": 1.6294015645980835, "lr": 4.654377880184331e-07, "epoch": 0.023518561217431403, "percentage": 1.18, "elapsed_time": "0:04:16", "remaining_time": "5:59:44"} +{"current_steps": 103, "total_steps": 8674, "loss": 1.424511194229126, "lr": 4.7004608294930877e-07, "epoch": 0.023749135347014064, "percentage": 1.19, "elapsed_time": "0:04:19", "remaining_time": "5:59:28"} +{"current_steps": 104, "total_steps": 8674, "loss": 1.4169164896011353, "lr": 4.746543778801843e-07, "epoch": 0.023979709476596726, "percentage": 1.2, "elapsed_time": "0:04:21", "remaining_time": "5:59:14"} +{"current_steps": 105, "total_steps": 8674, "loss": 1.3933480978012085, "lr": 4.792626728110599e-07, "epoch": 0.024210283606179387, "percentage": 1.21, "elapsed_time": "0:04:23", "remaining_time": "5:59:03"} +{"current_steps": 106, "total_steps": 8674, "loss": 1.488750696182251, "lr": 4.838709677419355e-07, "epoch": 0.02444085773576205, "percentage": 1.22, "elapsed_time": "0:04:26", "remaining_time": "5:58:47"} +{"current_steps": 107, "total_steps": 8674, "loss": 1.4852150678634644, "lr": 4.88479262672811e-07, "epoch": 0.02467143186534471, "percentage": 1.23, "elapsed_time": "0:04:28", "remaining_time": "5:58:32"} +{"current_steps": 108, "total_steps": 8674, "loss": 1.4256765842437744, "lr": 4.930875576036866e-07, "epoch": 0.024902005994927368, "percentage": 1.25, "elapsed_time": "0:04:31", "remaining_time": "5:58:17"} +{"current_steps": 109, "total_steps": 8674, "loss": 1.3063642978668213, "lr": 4.976958525345622e-07, "epoch": 0.02513258012451003, "percentage": 1.26, "elapsed_time": "0:04:33", "remaining_time": "5:58:22"} +{"current_steps": 110, "total_steps": 8674, "loss": 1.549802303314209, "lr": 5.023041474654378e-07, "epoch": 0.02536315425409269, "percentage": 1.27, "elapsed_time": "0:04:36", "remaining_time": "5:58:09"} +{"current_steps": 111, "total_steps": 8674, "loss": 1.301950454711914, "lr": 5.069124423963134e-07, "epoch": 0.02559372838367535, "percentage": 1.28, "elapsed_time": "0:04:38", "remaining_time": "5:57:56"} +{"current_steps": 112, "total_steps": 8674, "loss": 1.3025325536727905, "lr": 5.11520737327189e-07, "epoch": 0.025824302513258013, "percentage": 1.29, "elapsed_time": "0:04:40", "remaining_time": "5:57:44"} +{"current_steps": 113, "total_steps": 8674, "loss": 1.40749192237854, "lr": 5.161290322580645e-07, "epoch": 0.026054876642840674, "percentage": 1.3, "elapsed_time": "0:04:43", "remaining_time": "5:57:34"} +{"current_steps": 114, "total_steps": 8674, "loss": 1.3216793537139893, "lr": 5.2073732718894e-07, "epoch": 0.026285450772423335, "percentage": 1.31, "elapsed_time": "0:04:45", "remaining_time": "5:57:22"} +{"current_steps": 115, "total_steps": 8674, "loss": 1.3189308643341064, "lr": 5.253456221198155e-07, "epoch": 0.026516024902005993, "percentage": 1.33, "elapsed_time": "0:04:47", "remaining_time": "5:57:12"} +{"current_steps": 116, "total_steps": 8674, "loss": 1.430384635925293, "lr": 5.299539170506912e-07, "epoch": 0.026746599031588655, "percentage": 1.34, "elapsed_time": "0:04:50", "remaining_time": "5:56:59"} +{"current_steps": 117, "total_steps": 8674, "loss": 1.4081478118896484, "lr": 5.345622119815668e-07, "epoch": 0.026977173161171316, "percentage": 1.35, "elapsed_time": "0:04:52", "remaining_time": "5:57:02"} +{"current_steps": 118, "total_steps": 8674, "loss": 1.434388518333435, "lr": 5.391705069124423e-07, "epoch": 0.027207747290753977, "percentage": 1.36, "elapsed_time": "0:04:55", "remaining_time": "5:56:50"} +{"current_steps": 119, "total_steps": 8674, "loss": 1.4139282703399658, "lr": 5.437788018433179e-07, "epoch": 0.02743832142033664, "percentage": 1.37, "elapsed_time": "0:04:57", "remaining_time": "5:56:38"} +{"current_steps": 120, "total_steps": 8674, "loss": 1.4511487483978271, "lr": 5.483870967741935e-07, "epoch": 0.0276688955499193, "percentage": 1.38, "elapsed_time": "0:05:00", "remaining_time": "5:56:29"} +{"current_steps": 121, "total_steps": 8674, "loss": 1.5713481903076172, "lr": 5.529953917050691e-07, "epoch": 0.02789946967950196, "percentage": 1.39, "elapsed_time": "0:05:02", "remaining_time": "5:56:17"} +{"current_steps": 122, "total_steps": 8674, "loss": 1.4315730333328247, "lr": 5.576036866359447e-07, "epoch": 0.02813004380908462, "percentage": 1.41, "elapsed_time": "0:05:04", "remaining_time": "5:56:06"} +{"current_steps": 123, "total_steps": 8674, "loss": 1.3283708095550537, "lr": 5.622119815668203e-07, "epoch": 0.02836061793866728, "percentage": 1.42, "elapsed_time": "0:05:07", "remaining_time": "5:55:56"} +{"current_steps": 124, "total_steps": 8674, "loss": 1.4329016208648682, "lr": 5.668202764976958e-07, "epoch": 0.028591192068249942, "percentage": 1.43, "elapsed_time": "0:05:09", "remaining_time": "5:55:44"} +{"current_steps": 125, "total_steps": 8674, "loss": 1.444648265838623, "lr": 5.714285714285714e-07, "epoch": 0.028821766197832603, "percentage": 1.44, "elapsed_time": "0:05:12", "remaining_time": "5:55:48"} +{"current_steps": 126, "total_steps": 8674, "loss": 1.3584785461425781, "lr": 5.760368663594469e-07, "epoch": 0.029052340327415264, "percentage": 1.45, "elapsed_time": "0:05:14", "remaining_time": "5:55:41"} +{"current_steps": 127, "total_steps": 8674, "loss": 1.2815918922424316, "lr": 5.806451612903226e-07, "epoch": 0.029282914456997926, "percentage": 1.46, "elapsed_time": "0:05:16", "remaining_time": "5:55:30"} +{"current_steps": 128, "total_steps": 8674, "loss": 1.3332037925720215, "lr": 5.852534562211982e-07, "epoch": 0.029513488586580587, "percentage": 1.48, "elapsed_time": "0:05:19", "remaining_time": "5:55:20"} +{"current_steps": 129, "total_steps": 8674, "loss": 1.4522390365600586, "lr": 5.898617511520737e-07, "epoch": 0.029744062716163245, "percentage": 1.49, "elapsed_time": "0:05:21", "remaining_time": "5:55:09"} +{"current_steps": 130, "total_steps": 8674, "loss": 1.4362024068832397, "lr": 5.944700460829493e-07, "epoch": 0.029974636845745906, "percentage": 1.5, "elapsed_time": "0:05:24", "remaining_time": "5:55:02"} +{"current_steps": 131, "total_steps": 8674, "loss": 1.3271276950836182, "lr": 5.990783410138249e-07, "epoch": 0.030205210975328568, "percentage": 1.51, "elapsed_time": "0:05:26", "remaining_time": "5:54:54"} +{"current_steps": 132, "total_steps": 8674, "loss": 1.5936369895935059, "lr": 6.036866359447004e-07, "epoch": 0.03043578510491123, "percentage": 1.52, "elapsed_time": "0:05:28", "remaining_time": "5:54:46"} +{"current_steps": 133, "total_steps": 8674, "loss": 1.4786381721496582, "lr": 6.08294930875576e-07, "epoch": 0.03066635923449389, "percentage": 1.53, "elapsed_time": "0:05:31", "remaining_time": "5:54:54"} +{"current_steps": 134, "total_steps": 8674, "loss": 1.3499064445495605, "lr": 6.129032258064516e-07, "epoch": 0.03089693336407655, "percentage": 1.54, "elapsed_time": "0:05:34", "remaining_time": "5:54:47"} +{"current_steps": 135, "total_steps": 8674, "loss": 1.4434795379638672, "lr": 6.175115207373271e-07, "epoch": 0.031127507493659213, "percentage": 1.56, "elapsed_time": "0:05:36", "remaining_time": "5:54:36"} +{"current_steps": 136, "total_steps": 8674, "loss": 1.4064602851867676, "lr": 6.221198156682027e-07, "epoch": 0.03135808162324187, "percentage": 1.57, "elapsed_time": "0:05:38", "remaining_time": "5:54:27"} +{"current_steps": 137, "total_steps": 8674, "loss": 1.3325507640838623, "lr": 6.267281105990782e-07, "epoch": 0.03158865575282453, "percentage": 1.58, "elapsed_time": "0:05:41", "remaining_time": "5:54:21"} +{"current_steps": 138, "total_steps": 8674, "loss": 1.2584879398345947, "lr": 6.313364055299539e-07, "epoch": 0.03181922988240719, "percentage": 1.59, "elapsed_time": "0:05:43", "remaining_time": "5:54:12"} +{"current_steps": 139, "total_steps": 8674, "loss": 1.3754582405090332, "lr": 6.359447004608295e-07, "epoch": 0.032049804011989855, "percentage": 1.6, "elapsed_time": "0:05:45", "remaining_time": "5:54:04"} +{"current_steps": 140, "total_steps": 8674, "loss": 1.2700412273406982, "lr": 6.40552995391705e-07, "epoch": 0.032280378141572516, "percentage": 1.61, "elapsed_time": "0:05:48", "remaining_time": "5:53:56"} +{"current_steps": 141, "total_steps": 8674, "loss": 1.395858645439148, "lr": 6.451612903225806e-07, "epoch": 0.03251095227115518, "percentage": 1.63, "elapsed_time": "0:05:50", "remaining_time": "5:53:47"} +{"current_steps": 142, "total_steps": 8674, "loss": 1.402890682220459, "lr": 6.497695852534562e-07, "epoch": 0.03274152640073784, "percentage": 1.64, "elapsed_time": "0:05:53", "remaining_time": "5:54:20"} +{"current_steps": 143, "total_steps": 8674, "loss": 1.5405397415161133, "lr": 6.543778801843318e-07, "epoch": 0.0329721005303205, "percentage": 1.65, "elapsed_time": "0:05:56", "remaining_time": "5:54:48"} +{"current_steps": 144, "total_steps": 8674, "loss": 1.2394921779632568, "lr": 6.589861751152074e-07, "epoch": 0.03320267465990316, "percentage": 1.66, "elapsed_time": "0:05:59", "remaining_time": "5:54:42"} +{"current_steps": 145, "total_steps": 8674, "loss": 1.453255295753479, "lr": 6.63594470046083e-07, "epoch": 0.03343324878948582, "percentage": 1.67, "elapsed_time": "0:06:01", "remaining_time": "5:54:30"} +{"current_steps": 146, "total_steps": 8674, "loss": 1.3661112785339355, "lr": 6.682027649769585e-07, "epoch": 0.03366382291906848, "percentage": 1.68, "elapsed_time": "0:06:04", "remaining_time": "5:54:21"} +{"current_steps": 147, "total_steps": 8674, "loss": 1.2952282428741455, "lr": 6.728110599078341e-07, "epoch": 0.03389439704865114, "percentage": 1.69, "elapsed_time": "0:06:06", "remaining_time": "5:54:14"} +{"current_steps": 148, "total_steps": 8674, "loss": 1.396565318107605, "lr": 6.774193548387096e-07, "epoch": 0.0341249711782338, "percentage": 1.71, "elapsed_time": "0:06:08", "remaining_time": "5:54:04"} +{"current_steps": 149, "total_steps": 8674, "loss": 1.3207082748413086, "lr": 6.820276497695853e-07, "epoch": 0.03435554530781646, "percentage": 1.72, "elapsed_time": "0:06:11", "remaining_time": "5:53:59"} +{"current_steps": 150, "total_steps": 8674, "loss": 1.4085125923156738, "lr": 6.866359447004608e-07, "epoch": 0.03458611943739912, "percentage": 1.73, "elapsed_time": "0:06:13", "remaining_time": "5:54:08"} +{"current_steps": 151, "total_steps": 8674, "loss": 1.5698528289794922, "lr": 6.912442396313363e-07, "epoch": 0.034816693566981784, "percentage": 1.74, "elapsed_time": "0:06:16", "remaining_time": "5:54:00"} +{"current_steps": 152, "total_steps": 8674, "loss": 1.4091004133224487, "lr": 6.958525345622119e-07, "epoch": 0.035047267696564445, "percentage": 1.75, "elapsed_time": "0:06:18", "remaining_time": "5:53:51"} +{"current_steps": 153, "total_steps": 8674, "loss": 1.2392504215240479, "lr": 7.004608294930875e-07, "epoch": 0.035277841826147106, "percentage": 1.76, "elapsed_time": "0:06:21", "remaining_time": "5:53:44"} +{"current_steps": 154, "total_steps": 8674, "loss": 1.3355891704559326, "lr": 7.05069124423963e-07, "epoch": 0.03550841595572977, "percentage": 1.78, "elapsed_time": "0:06:23", "remaining_time": "5:53:35"} +{"current_steps": 155, "total_steps": 8674, "loss": 1.330599308013916, "lr": 7.096774193548387e-07, "epoch": 0.03573899008531243, "percentage": 1.79, "elapsed_time": "0:06:25", "remaining_time": "5:53:27"} +{"current_steps": 156, "total_steps": 8674, "loss": 1.344653844833374, "lr": 7.142857142857143e-07, "epoch": 0.03596956421489509, "percentage": 1.8, "elapsed_time": "0:06:28", "remaining_time": "5:53:24"} +{"current_steps": 157, "total_steps": 8674, "loss": 1.341560959815979, "lr": 7.188940092165898e-07, "epoch": 0.03620013834447775, "percentage": 1.81, "elapsed_time": "0:06:30", "remaining_time": "5:53:15"} +{"current_steps": 158, "total_steps": 8674, "loss": 1.2242077589035034, "lr": 7.235023041474654e-07, "epoch": 0.03643071247406041, "percentage": 1.82, "elapsed_time": "0:06:33", "remaining_time": "5:53:07"} +{"current_steps": 159, "total_steps": 8674, "loss": 1.2858202457427979, "lr": 7.281105990783409e-07, "epoch": 0.036661286603643074, "percentage": 1.83, "elapsed_time": "0:06:35", "remaining_time": "5:53:15"} +{"current_steps": 160, "total_steps": 8674, "loss": 1.479524850845337, "lr": 7.327188940092166e-07, "epoch": 0.03689186073322573, "percentage": 1.84, "elapsed_time": "0:06:38", "remaining_time": "5:53:08"} +{"current_steps": 161, "total_steps": 8674, "loss": 1.43915855884552, "lr": 7.373271889400922e-07, "epoch": 0.03712243486280839, "percentage": 1.86, "elapsed_time": "0:06:40", "remaining_time": "5:52:57"} +{"current_steps": 162, "total_steps": 8674, "loss": 1.3939034938812256, "lr": 7.419354838709677e-07, "epoch": 0.03735300899239105, "percentage": 1.87, "elapsed_time": "0:06:42", "remaining_time": "5:52:50"} +{"current_steps": 163, "total_steps": 8674, "loss": 1.2733443975448608, "lr": 7.465437788018433e-07, "epoch": 0.03758358312197371, "percentage": 1.88, "elapsed_time": "0:06:45", "remaining_time": "5:52:41"} +{"current_steps": 164, "total_steps": 8674, "loss": 1.3436474800109863, "lr": 7.511520737327189e-07, "epoch": 0.037814157251556374, "percentage": 1.89, "elapsed_time": "0:06:47", "remaining_time": "5:52:33"} +{"current_steps": 165, "total_steps": 8674, "loss": 1.4250465631484985, "lr": 7.557603686635944e-07, "epoch": 0.038044731381139035, "percentage": 1.9, "elapsed_time": "0:06:50", "remaining_time": "5:52:25"} +{"current_steps": 166, "total_steps": 8674, "loss": 1.4244422912597656, "lr": 7.603686635944701e-07, "epoch": 0.0382753055107217, "percentage": 1.91, "elapsed_time": "0:06:52", "remaining_time": "5:52:17"} +{"current_steps": 167, "total_steps": 8674, "loss": 1.5487544536590576, "lr": 7.649769585253457e-07, "epoch": 0.03850587964030436, "percentage": 1.93, "elapsed_time": "0:06:55", "remaining_time": "5:52:24"} +{"current_steps": 168, "total_steps": 8674, "loss": 1.3282281160354614, "lr": 7.695852534562211e-07, "epoch": 0.03873645376988702, "percentage": 1.94, "elapsed_time": "0:06:57", "remaining_time": "5:52:18"} +{"current_steps": 169, "total_steps": 8674, "loss": 1.2823774814605713, "lr": 7.741935483870967e-07, "epoch": 0.03896702789946968, "percentage": 1.95, "elapsed_time": "0:06:59", "remaining_time": "5:52:11"} +{"current_steps": 170, "total_steps": 8674, "loss": 1.2402329444885254, "lr": 7.788018433179722e-07, "epoch": 0.03919760202905234, "percentage": 1.96, "elapsed_time": "0:07:02", "remaining_time": "5:52:06"} +{"current_steps": 171, "total_steps": 8674, "loss": 1.3755587339401245, "lr": 7.834101382488479e-07, "epoch": 0.039428176158635, "percentage": 1.97, "elapsed_time": "0:07:04", "remaining_time": "5:52:05"} +{"current_steps": 172, "total_steps": 8674, "loss": 1.3403921127319336, "lr": 7.880184331797235e-07, "epoch": 0.039658750288217665, "percentage": 1.98, "elapsed_time": "0:07:07", "remaining_time": "5:51:59"} +{"current_steps": 173, "total_steps": 8674, "loss": 1.3742129802703857, "lr": 7.92626728110599e-07, "epoch": 0.039889324417800326, "percentage": 1.99, "elapsed_time": "0:07:09", "remaining_time": "5:51:52"} +{"current_steps": 174, "total_steps": 8674, "loss": 1.6444599628448486, "lr": 7.972350230414746e-07, "epoch": 0.04011989854738298, "percentage": 2.01, "elapsed_time": "0:07:12", "remaining_time": "5:51:45"} +{"current_steps": 175, "total_steps": 8674, "loss": 1.3891929388046265, "lr": 8.018433179723502e-07, "epoch": 0.04035047267696564, "percentage": 2.02, "elapsed_time": "0:07:14", "remaining_time": "5:51:50"} +{"current_steps": 176, "total_steps": 8674, "loss": 1.2279409170150757, "lr": 8.064516129032257e-07, "epoch": 0.0405810468065483, "percentage": 2.03, "elapsed_time": "0:07:17", "remaining_time": "5:51:43"} +{"current_steps": 177, "total_steps": 8674, "loss": 1.4576997756958008, "lr": 8.110599078341014e-07, "epoch": 0.040811620936130964, "percentage": 2.04, "elapsed_time": "0:07:19", "remaining_time": "5:51:41"} +{"current_steps": 178, "total_steps": 8674, "loss": 1.3585199117660522, "lr": 8.15668202764977e-07, "epoch": 0.041042195065713626, "percentage": 2.05, "elapsed_time": "0:07:21", "remaining_time": "5:51:32"} +{"current_steps": 179, "total_steps": 8674, "loss": 1.3056905269622803, "lr": 8.202764976958525e-07, "epoch": 0.04127276919529629, "percentage": 2.06, "elapsed_time": "0:07:24", "remaining_time": "5:51:26"} +{"current_steps": 180, "total_steps": 8674, "loss": 1.3029698133468628, "lr": 8.248847926267281e-07, "epoch": 0.04150334332487895, "percentage": 2.08, "elapsed_time": "0:07:26", "remaining_time": "5:51:22"} +{"current_steps": 181, "total_steps": 8674, "loss": 1.4368736743927002, "lr": 8.294930875576036e-07, "epoch": 0.04173391745446161, "percentage": 2.09, "elapsed_time": "0:07:29", "remaining_time": "5:51:17"} +{"current_steps": 182, "total_steps": 8674, "loss": 1.3243422508239746, "lr": 8.341013824884793e-07, "epoch": 0.04196449158404427, "percentage": 2.1, "elapsed_time": "0:07:31", "remaining_time": "5:51:10"} +{"current_steps": 183, "total_steps": 8674, "loss": 1.232081413269043, "lr": 8.387096774193549e-07, "epoch": 0.04219506571362693, "percentage": 2.11, "elapsed_time": "0:07:34", "remaining_time": "5:51:06"} +{"current_steps": 184, "total_steps": 8674, "loss": 1.4601390361785889, "lr": 8.433179723502303e-07, "epoch": 0.042425639843209594, "percentage": 2.12, "elapsed_time": "0:07:36", "remaining_time": "5:51:12"} +{"current_steps": 185, "total_steps": 8674, "loss": 1.3778860569000244, "lr": 8.479262672811059e-07, "epoch": 0.042656213972792255, "percentage": 2.13, "elapsed_time": "0:07:39", "remaining_time": "5:51:04"} +{"current_steps": 186, "total_steps": 8674, "loss": 1.3235092163085938, "lr": 8.525345622119815e-07, "epoch": 0.042886788102374916, "percentage": 2.14, "elapsed_time": "0:07:41", "remaining_time": "5:50:56"} +{"current_steps": 187, "total_steps": 8674, "loss": 1.4480581283569336, "lr": 8.57142857142857e-07, "epoch": 0.04311736223195758, "percentage": 2.16, "elapsed_time": "0:07:43", "remaining_time": "5:50:49"} +{"current_steps": 188, "total_steps": 8674, "loss": 1.4530816078186035, "lr": 8.617511520737327e-07, "epoch": 0.04334793636154023, "percentage": 2.17, "elapsed_time": "0:07:46", "remaining_time": "5:50:42"} +{"current_steps": 189, "total_steps": 8674, "loss": 1.4447407722473145, "lr": 8.663594470046083e-07, "epoch": 0.04357851049112289, "percentage": 2.18, "elapsed_time": "0:07:48", "remaining_time": "5:50:34"} +{"current_steps": 190, "total_steps": 8674, "loss": 1.3610244989395142, "lr": 8.709677419354838e-07, "epoch": 0.043809084620705555, "percentage": 2.19, "elapsed_time": "0:07:50", "remaining_time": "5:50:26"} +{"current_steps": 191, "total_steps": 8674, "loss": 1.4084277153015137, "lr": 8.755760368663594e-07, "epoch": 0.044039658750288216, "percentage": 2.2, "elapsed_time": "0:07:53", "remaining_time": "5:50:17"} +{"current_steps": 192, "total_steps": 8674, "loss": 1.3652758598327637, "lr": 8.801843317972349e-07, "epoch": 0.04427023287987088, "percentage": 2.21, "elapsed_time": "0:07:55", "remaining_time": "5:50:22"} +{"current_steps": 193, "total_steps": 8674, "loss": 1.4791496992111206, "lr": 8.847926267281106e-07, "epoch": 0.04450080700945354, "percentage": 2.23, "elapsed_time": "0:07:58", "remaining_time": "5:50:16"} +{"current_steps": 194, "total_steps": 8674, "loss": 1.3958008289337158, "lr": 8.894009216589862e-07, "epoch": 0.0447313811390362, "percentage": 2.24, "elapsed_time": "0:08:00", "remaining_time": "5:50:10"} +{"current_steps": 195, "total_steps": 8674, "loss": 1.4134410619735718, "lr": 8.940092165898617e-07, "epoch": 0.04496195526861886, "percentage": 2.25, "elapsed_time": "0:08:03", "remaining_time": "5:50:05"} +{"current_steps": 196, "total_steps": 8674, "loss": 1.4062776565551758, "lr": 8.986175115207373e-07, "epoch": 0.04519252939820152, "percentage": 2.26, "elapsed_time": "0:08:05", "remaining_time": "5:49:57"} +{"current_steps": 197, "total_steps": 8674, "loss": 1.375224232673645, "lr": 9.032258064516129e-07, "epoch": 0.045423103527784184, "percentage": 2.27, "elapsed_time": "0:08:07", "remaining_time": "5:49:51"} +{"current_steps": 198, "total_steps": 8674, "loss": 1.2440606355667114, "lr": 9.078341013824884e-07, "epoch": 0.045653677657366845, "percentage": 2.28, "elapsed_time": "0:08:10", "remaining_time": "5:49:44"} +{"current_steps": 199, "total_steps": 8674, "loss": 1.4068349599838257, "lr": 9.124423963133641e-07, "epoch": 0.045884251786949506, "percentage": 2.29, "elapsed_time": "0:08:12", "remaining_time": "5:49:38"} +{"current_steps": 200, "total_steps": 8674, "loss": 1.3797581195831299, "lr": 9.170506912442397e-07, "epoch": 0.04611482591653217, "percentage": 2.31, "elapsed_time": "0:08:15", "remaining_time": "5:49:44"} +{"current_steps": 201, "total_steps": 8674, "loss": 1.4441678524017334, "lr": 9.216589861751152e-07, "epoch": 0.04634540004611483, "percentage": 2.32, "elapsed_time": "0:08:18", "remaining_time": "5:50:32"} +{"current_steps": 202, "total_steps": 8674, "loss": 1.4727370738983154, "lr": 9.262672811059907e-07, "epoch": 0.046575974175697483, "percentage": 2.33, "elapsed_time": "0:08:21", "remaining_time": "5:50:28"} +{"current_steps": 203, "total_steps": 8674, "loss": 1.448495864868164, "lr": 9.308755760368662e-07, "epoch": 0.046806548305280145, "percentage": 2.34, "elapsed_time": "0:08:23", "remaining_time": "5:50:22"} +{"current_steps": 204, "total_steps": 8674, "loss": 1.3727293014526367, "lr": 9.354838709677418e-07, "epoch": 0.047037122434862806, "percentage": 2.35, "elapsed_time": "0:08:26", "remaining_time": "5:50:14"} +{"current_steps": 205, "total_steps": 8674, "loss": 1.4234352111816406, "lr": 9.400921658986175e-07, "epoch": 0.04726769656444547, "percentage": 2.36, "elapsed_time": "0:08:28", "remaining_time": "5:50:08"} +{"current_steps": 206, "total_steps": 8674, "loss": 1.2407056093215942, "lr": 9.44700460829493e-07, "epoch": 0.04749827069402813, "percentage": 2.37, "elapsed_time": "0:08:30", "remaining_time": "5:50:02"} +{"current_steps": 207, "total_steps": 8674, "loss": 1.3502311706542969, "lr": 9.493087557603686e-07, "epoch": 0.04772884482361079, "percentage": 2.39, "elapsed_time": "0:08:33", "remaining_time": "5:49:58"} +{"current_steps": 208, "total_steps": 8674, "loss": 1.4618254899978638, "lr": 9.539170506912442e-07, "epoch": 0.04795941895319345, "percentage": 2.4, "elapsed_time": "0:08:35", "remaining_time": "5:49:50"} +{"current_steps": 209, "total_steps": 8674, "loss": 1.3624317646026611, "lr": 9.585253456221198e-07, "epoch": 0.04818999308277611, "percentage": 2.41, "elapsed_time": "0:08:38", "remaining_time": "5:49:56"} +{"current_steps": 210, "total_steps": 8674, "loss": 1.512046456336975, "lr": 9.631336405529954e-07, "epoch": 0.048420567212358774, "percentage": 2.42, "elapsed_time": "0:08:40", "remaining_time": "5:49:49"} +{"current_steps": 211, "total_steps": 8674, "loss": 1.2896164655685425, "lr": 9.67741935483871e-07, "epoch": 0.048651141341941435, "percentage": 2.43, "elapsed_time": "0:08:43", "remaining_time": "5:49:44"} +{"current_steps": 212, "total_steps": 8674, "loss": 1.5507850646972656, "lr": 9.723502304147466e-07, "epoch": 0.0488817154715241, "percentage": 2.44, "elapsed_time": "0:08:45", "remaining_time": "5:49:49"} +{"current_steps": 213, "total_steps": 8674, "loss": 1.425408124923706, "lr": 9.76958525345622e-07, "epoch": 0.04911228960110676, "percentage": 2.46, "elapsed_time": "0:08:48", "remaining_time": "5:49:56"} +{"current_steps": 214, "total_steps": 8674, "loss": 1.347771406173706, "lr": 9.815668202764976e-07, "epoch": 0.04934286373068942, "percentage": 2.47, "elapsed_time": "0:08:50", "remaining_time": "5:49:51"} +{"current_steps": 215, "total_steps": 8674, "loss": 1.4044904708862305, "lr": 9.861751152073732e-07, "epoch": 0.04957343786027208, "percentage": 2.48, "elapsed_time": "0:08:53", "remaining_time": "5:49:43"} +{"current_steps": 216, "total_steps": 8674, "loss": 1.3507332801818848, "lr": 9.907834101382488e-07, "epoch": 0.049804011989854735, "percentage": 2.49, "elapsed_time": "0:08:55", "remaining_time": "5:49:37"} +{"current_steps": 217, "total_steps": 8674, "loss": 1.3022946119308472, "lr": 9.953917050691244e-07, "epoch": 0.050034586119437396, "percentage": 2.5, "elapsed_time": "0:08:58", "remaining_time": "5:49:40"} +{"current_steps": 218, "total_steps": 8674, "loss": 1.284754991531372, "lr": 1e-06, "epoch": 0.05026516024902006, "percentage": 2.51, "elapsed_time": "0:09:00", "remaining_time": "5:49:33"} +{"current_steps": 219, "total_steps": 8674, "loss": 1.2985923290252686, "lr": 1.0046082949308756e-06, "epoch": 0.05049573437860272, "percentage": 2.52, "elapsed_time": "0:09:03", "remaining_time": "5:49:29"} +{"current_steps": 220, "total_steps": 8674, "loss": 1.3855717182159424, "lr": 1.0092165898617511e-06, "epoch": 0.05072630850818538, "percentage": 2.54, "elapsed_time": "0:09:05", "remaining_time": "5:49:22"} +{"current_steps": 221, "total_steps": 8674, "loss": 1.357919692993164, "lr": 1.0138248847926267e-06, "epoch": 0.05095688263776804, "percentage": 2.55, "elapsed_time": "0:09:07", "remaining_time": "5:49:15"} +{"current_steps": 222, "total_steps": 8674, "loss": 1.2818949222564697, "lr": 1.0184331797235021e-06, "epoch": 0.0511874567673507, "percentage": 2.56, "elapsed_time": "0:09:10", "remaining_time": "5:49:08"} +{"current_steps": 223, "total_steps": 8674, "loss": 1.2488511800765991, "lr": 1.023041474654378e-06, "epoch": 0.051418030896933364, "percentage": 2.57, "elapsed_time": "0:09:12", "remaining_time": "5:49:03"} +{"current_steps": 224, "total_steps": 8674, "loss": 1.3824148178100586, "lr": 1.0276497695852535e-06, "epoch": 0.051648605026516026, "percentage": 2.58, "elapsed_time": "0:09:15", "remaining_time": "5:48:56"} +{"current_steps": 225, "total_steps": 8674, "loss": 1.3114633560180664, "lr": 1.032258064516129e-06, "epoch": 0.05187917915609869, "percentage": 2.59, "elapsed_time": "0:09:17", "remaining_time": "5:49:00"} +{"current_steps": 226, "total_steps": 8674, "loss": 1.272273063659668, "lr": 1.0368663594470047e-06, "epoch": 0.05210975328568135, "percentage": 2.61, "elapsed_time": "0:09:20", "remaining_time": "5:48:56"} +{"current_steps": 227, "total_steps": 8674, "loss": 1.5072649717330933, "lr": 1.04147465437788e-06, "epoch": 0.05234032741526401, "percentage": 2.62, "elapsed_time": "0:09:22", "remaining_time": "5:48:50"} +{"current_steps": 228, "total_steps": 8674, "loss": 1.4087142944335938, "lr": 1.0460829493087557e-06, "epoch": 0.05257090154484667, "percentage": 2.63, "elapsed_time": "0:09:24", "remaining_time": "5:48:44"} +{"current_steps": 229, "total_steps": 8674, "loss": 1.4866605997085571, "lr": 1.050691244239631e-06, "epoch": 0.05280147567442933, "percentage": 2.64, "elapsed_time": "0:09:27", "remaining_time": "5:48:38"} +{"current_steps": 230, "total_steps": 8674, "loss": 1.3377184867858887, "lr": 1.0552995391705069e-06, "epoch": 0.05303204980401199, "percentage": 2.65, "elapsed_time": "0:09:29", "remaining_time": "5:48:35"} +{"current_steps": 231, "total_steps": 8674, "loss": 1.4009103775024414, "lr": 1.0599078341013825e-06, "epoch": 0.05326262393359465, "percentage": 2.66, "elapsed_time": "0:09:32", "remaining_time": "5:48:29"} +{"current_steps": 232, "total_steps": 8674, "loss": 1.4878556728363037, "lr": 1.0645161290322579e-06, "epoch": 0.05349319806317731, "percentage": 2.67, "elapsed_time": "0:09:34", "remaining_time": "5:48:22"} +{"current_steps": 233, "total_steps": 8674, "loss": 1.4002021551132202, "lr": 1.0691244239631337e-06, "epoch": 0.05372377219275997, "percentage": 2.69, "elapsed_time": "0:09:36", "remaining_time": "5:48:16"} +{"current_steps": 234, "total_steps": 8674, "loss": 1.337146520614624, "lr": 1.073732718894009e-06, "epoch": 0.05395434632234263, "percentage": 2.7, "elapsed_time": "0:09:39", "remaining_time": "5:48:19"} +{"current_steps": 235, "total_steps": 8674, "loss": 1.4084792137145996, "lr": 1.0783410138248847e-06, "epoch": 0.05418492045192529, "percentage": 2.71, "elapsed_time": "0:09:41", "remaining_time": "5:48:13"} +{"current_steps": 236, "total_steps": 8674, "loss": 1.4131449460983276, "lr": 1.0829493087557605e-06, "epoch": 0.054415494581507955, "percentage": 2.72, "elapsed_time": "0:09:44", "remaining_time": "5:48:06"} +{"current_steps": 237, "total_steps": 8674, "loss": 1.1869292259216309, "lr": 1.0875576036866358e-06, "epoch": 0.054646068711090616, "percentage": 2.73, "elapsed_time": "0:09:46", "remaining_time": "5:48:01"} +{"current_steps": 238, "total_steps": 8674, "loss": 1.3970961570739746, "lr": 1.0921658986175114e-06, "epoch": 0.05487664284067328, "percentage": 2.74, "elapsed_time": "0:09:48", "remaining_time": "5:47:54"} +{"current_steps": 239, "total_steps": 8674, "loss": 1.2682442665100098, "lr": 1.096774193548387e-06, "epoch": 0.05510721697025594, "percentage": 2.76, "elapsed_time": "0:09:51", "remaining_time": "5:47:48"} +{"current_steps": 240, "total_steps": 8674, "loss": 1.2983934879302979, "lr": 1.1013824884792626e-06, "epoch": 0.0553377910998386, "percentage": 2.77, "elapsed_time": "0:09:53", "remaining_time": "5:47:43"} +{"current_steps": 241, "total_steps": 8674, "loss": 1.3980869054794312, "lr": 1.1059907834101382e-06, "epoch": 0.05556836522942126, "percentage": 2.78, "elapsed_time": "0:09:56", "remaining_time": "5:47:38"} +{"current_steps": 242, "total_steps": 8674, "loss": 1.3068631887435913, "lr": 1.1105990783410138e-06, "epoch": 0.05579893935900392, "percentage": 2.79, "elapsed_time": "0:09:58", "remaining_time": "5:47:42"} +{"current_steps": 243, "total_steps": 8674, "loss": 1.5353353023529053, "lr": 1.1152073732718894e-06, "epoch": 0.056029513488586584, "percentage": 2.8, "elapsed_time": "0:10:01", "remaining_time": "5:47:36"} +{"current_steps": 244, "total_steps": 8674, "loss": 1.290163278579712, "lr": 1.1198156682027648e-06, "epoch": 0.05626008761816924, "percentage": 2.81, "elapsed_time": "0:10:03", "remaining_time": "5:47:31"} +{"current_steps": 245, "total_steps": 8674, "loss": 1.3671848773956299, "lr": 1.1244239631336406e-06, "epoch": 0.0564906617477519, "percentage": 2.82, "elapsed_time": "0:10:05", "remaining_time": "5:47:24"} +{"current_steps": 246, "total_steps": 8674, "loss": 1.3020408153533936, "lr": 1.1290322580645162e-06, "epoch": 0.05672123587733456, "percentage": 2.84, "elapsed_time": "0:10:08", "remaining_time": "5:47:19"} +{"current_steps": 247, "total_steps": 8674, "loss": 1.3159775733947754, "lr": 1.1336405529953916e-06, "epoch": 0.05695181000691722, "percentage": 2.85, "elapsed_time": "0:10:10", "remaining_time": "5:47:14"} +{"current_steps": 248, "total_steps": 8674, "loss": 1.3163995742797852, "lr": 1.1382488479262674e-06, "epoch": 0.057182384136499884, "percentage": 2.86, "elapsed_time": "0:10:13", "remaining_time": "5:47:09"} +{"current_steps": 249, "total_steps": 8674, "loss": 1.5215930938720703, "lr": 1.1428571428571428e-06, "epoch": 0.057412958266082545, "percentage": 2.87, "elapsed_time": "0:10:15", "remaining_time": "5:47:04"} +{"current_steps": 250, "total_steps": 8674, "loss": 1.2870161533355713, "lr": 1.1474654377880184e-06, "epoch": 0.057643532395665206, "percentage": 2.88, "elapsed_time": "0:10:18", "remaining_time": "5:47:07"} +{"current_steps": 251, "total_steps": 8674, "loss": 1.2624198198318481, "lr": 1.1520737327188938e-06, "epoch": 0.05787410652524787, "percentage": 2.89, "elapsed_time": "0:10:20", "remaining_time": "5:47:04"} +{"current_steps": 252, "total_steps": 8674, "loss": 1.2778981924057007, "lr": 1.1566820276497696e-06, "epoch": 0.05810468065483053, "percentage": 2.91, "elapsed_time": "0:10:22", "remaining_time": "5:46:59"} +{"current_steps": 253, "total_steps": 8674, "loss": 1.1934442520141602, "lr": 1.1612903225806452e-06, "epoch": 0.05833525478441319, "percentage": 2.92, "elapsed_time": "0:10:25", "remaining_time": "5:46:54"} +{"current_steps": 254, "total_steps": 8674, "loss": 1.3840088844299316, "lr": 1.1658986175115205e-06, "epoch": 0.05856582891399585, "percentage": 2.93, "elapsed_time": "0:10:27", "remaining_time": "5:46:50"} +{"current_steps": 255, "total_steps": 8674, "loss": 1.373002290725708, "lr": 1.1705069124423963e-06, "epoch": 0.05879640304357851, "percentage": 2.94, "elapsed_time": "0:10:30", "remaining_time": "5:46:45"} +{"current_steps": 256, "total_steps": 8674, "loss": 1.3614685535430908, "lr": 1.1751152073732717e-06, "epoch": 0.059026977173161174, "percentage": 2.95, "elapsed_time": "0:10:32", "remaining_time": "5:46:40"} +{"current_steps": 257, "total_steps": 8674, "loss": 1.3525335788726807, "lr": 1.1797235023041473e-06, "epoch": 0.059257551302743836, "percentage": 2.96, "elapsed_time": "0:10:34", "remaining_time": "5:46:36"} +{"current_steps": 258, "total_steps": 8674, "loss": 1.3806469440460205, "lr": 1.1843317972350231e-06, "epoch": 0.05948812543232649, "percentage": 2.97, "elapsed_time": "0:10:37", "remaining_time": "5:46:31"} +{"current_steps": 259, "total_steps": 8674, "loss": 1.372736930847168, "lr": 1.1889400921658985e-06, "epoch": 0.05971869956190915, "percentage": 2.99, "elapsed_time": "0:10:40", "remaining_time": "5:46:36"} +{"current_steps": 260, "total_steps": 8674, "loss": 1.309061050415039, "lr": 1.1935483870967741e-06, "epoch": 0.05994927369149181, "percentage": 3.0, "elapsed_time": "0:10:42", "remaining_time": "5:46:30"} +{"current_steps": 261, "total_steps": 8674, "loss": 1.3500525951385498, "lr": 1.1981566820276497e-06, "epoch": 0.060179847821074474, "percentage": 3.01, "elapsed_time": "0:10:44", "remaining_time": "5:46:24"} +{"current_steps": 262, "total_steps": 8674, "loss": 1.4197357892990112, "lr": 1.2027649769585253e-06, "epoch": 0.060410421950657135, "percentage": 3.02, "elapsed_time": "0:10:47", "remaining_time": "5:46:18"} +{"current_steps": 263, "total_steps": 8674, "loss": 1.6454131603240967, "lr": 1.207373271889401e-06, "epoch": 0.0606409960802398, "percentage": 3.03, "elapsed_time": "0:10:49", "remaining_time": "5:46:12"} +{"current_steps": 264, "total_steps": 8674, "loss": 1.269604206085205, "lr": 1.2119815668202765e-06, "epoch": 0.06087157020982246, "percentage": 3.04, "elapsed_time": "0:10:51", "remaining_time": "5:46:07"} +{"current_steps": 265, "total_steps": 8674, "loss": 1.2358057498931885, "lr": 1.216589861751152e-06, "epoch": 0.06110214433940512, "percentage": 3.06, "elapsed_time": "0:10:54", "remaining_time": "5:46:01"} +{"current_steps": 266, "total_steps": 8674, "loss": 1.2713422775268555, "lr": 1.2211981566820275e-06, "epoch": 0.06133271846898778, "percentage": 3.07, "elapsed_time": "0:10:56", "remaining_time": "5:45:58"} +{"current_steps": 267, "total_steps": 8674, "loss": 1.225820779800415, "lr": 1.2258064516129033e-06, "epoch": 0.06156329259857044, "percentage": 3.08, "elapsed_time": "0:10:59", "remaining_time": "5:46:00"} +{"current_steps": 268, "total_steps": 8674, "loss": 1.279617190361023, "lr": 1.2304147465437787e-06, "epoch": 0.0617938667281531, "percentage": 3.09, "elapsed_time": "0:11:01", "remaining_time": "5:45:55"} +{"current_steps": 269, "total_steps": 8674, "loss": 1.2081385850906372, "lr": 1.2350230414746543e-06, "epoch": 0.062024440857735764, "percentage": 3.1, "elapsed_time": "0:11:04", "remaining_time": "5:45:50"} +{"current_steps": 270, "total_steps": 8674, "loss": 1.3121249675750732, "lr": 1.23963133640553e-06, "epoch": 0.062255014987318426, "percentage": 3.11, "elapsed_time": "0:11:06", "remaining_time": "5:45:45"} +{"current_steps": 271, "total_steps": 8674, "loss": 1.28495454788208, "lr": 1.2442396313364054e-06, "epoch": 0.06248558911690109, "percentage": 3.12, "elapsed_time": "0:11:08", "remaining_time": "5:45:39"} +{"current_steps": 272, "total_steps": 8674, "loss": 1.3837053775787354, "lr": 1.248847926267281e-06, "epoch": 0.06271616324648374, "percentage": 3.14, "elapsed_time": "0:11:11", "remaining_time": "5:45:33"} +{"current_steps": 273, "total_steps": 8674, "loss": 1.2119230031967163, "lr": 1.2534562211981564e-06, "epoch": 0.0629467373760664, "percentage": 3.15, "elapsed_time": "0:11:13", "remaining_time": "5:45:28"} +{"current_steps": 274, "total_steps": 8674, "loss": 1.323190450668335, "lr": 1.2580645161290322e-06, "epoch": 0.06317731150564906, "percentage": 3.16, "elapsed_time": "0:11:15", "remaining_time": "5:45:23"} +{"current_steps": 275, "total_steps": 8674, "loss": 1.4300715923309326, "lr": 1.2626728110599078e-06, "epoch": 0.06340788563523173, "percentage": 3.17, "elapsed_time": "0:11:18", "remaining_time": "5:45:17"} +{"current_steps": 276, "total_steps": 8674, "loss": 1.1680996417999268, "lr": 1.2672811059907832e-06, "epoch": 0.06363845976481439, "percentage": 3.18, "elapsed_time": "0:11:21", "remaining_time": "5:45:21"} +{"current_steps": 277, "total_steps": 8674, "loss": 1.3980211019515991, "lr": 1.271889400921659e-06, "epoch": 0.06386903389439705, "percentage": 3.19, "elapsed_time": "0:11:23", "remaining_time": "5:45:15"} +{"current_steps": 278, "total_steps": 8674, "loss": 1.40798020362854, "lr": 1.2764976958525344e-06, "epoch": 0.06409960802397971, "percentage": 3.2, "elapsed_time": "0:11:25", "remaining_time": "5:45:10"} +{"current_steps": 279, "total_steps": 8674, "loss": 1.2535033226013184, "lr": 1.28110599078341e-06, "epoch": 0.06433018215356237, "percentage": 3.22, "elapsed_time": "0:11:28", "remaining_time": "5:45:05"} +{"current_steps": 280, "total_steps": 8674, "loss": 1.3866907358169556, "lr": 1.2857142857142858e-06, "epoch": 0.06456075628314503, "percentage": 3.23, "elapsed_time": "0:11:30", "remaining_time": "5:45:00"} +{"current_steps": 281, "total_steps": 8674, "loss": 1.1985647678375244, "lr": 1.2903225806451612e-06, "epoch": 0.0647913304127277, "percentage": 3.24, "elapsed_time": "0:11:32", "remaining_time": "5:44:56"} +{"current_steps": 282, "total_steps": 8674, "loss": 1.3741936683654785, "lr": 1.2949308755760368e-06, "epoch": 0.06502190454231035, "percentage": 3.25, "elapsed_time": "0:11:35", "remaining_time": "5:44:50"} +{"current_steps": 283, "total_steps": 8674, "loss": 1.3684422969818115, "lr": 1.2995391705069124e-06, "epoch": 0.06525247867189302, "percentage": 3.26, "elapsed_time": "0:11:37", "remaining_time": "5:44:49"} +{"current_steps": 284, "total_steps": 8674, "loss": 1.3792086839675903, "lr": 1.304147465437788e-06, "epoch": 0.06548305280147568, "percentage": 3.27, "elapsed_time": "0:11:40", "remaining_time": "5:44:56"} +{"current_steps": 285, "total_steps": 8674, "loss": 1.3252873420715332, "lr": 1.3087557603686636e-06, "epoch": 0.06571362693105834, "percentage": 3.29, "elapsed_time": "0:11:42", "remaining_time": "5:44:50"} +{"current_steps": 286, "total_steps": 8674, "loss": 1.1918525695800781, "lr": 1.3133640552995392e-06, "epoch": 0.065944201060641, "percentage": 3.3, "elapsed_time": "0:11:45", "remaining_time": "5:44:45"} +{"current_steps": 287, "total_steps": 8674, "loss": 1.3760654926300049, "lr": 1.3179723502304148e-06, "epoch": 0.06617477519022366, "percentage": 3.31, "elapsed_time": "0:11:47", "remaining_time": "5:44:40"} +{"current_steps": 288, "total_steps": 8674, "loss": 1.3452839851379395, "lr": 1.3225806451612901e-06, "epoch": 0.06640534931980632, "percentage": 3.32, "elapsed_time": "0:11:50", "remaining_time": "5:44:35"} +{"current_steps": 289, "total_steps": 8674, "loss": 1.321220874786377, "lr": 1.327188940092166e-06, "epoch": 0.06663592344938898, "percentage": 3.33, "elapsed_time": "0:11:52", "remaining_time": "5:44:33"} +{"current_steps": 290, "total_steps": 8674, "loss": 1.222012996673584, "lr": 1.3317972350230413e-06, "epoch": 0.06686649757897165, "percentage": 3.34, "elapsed_time": "0:11:54", "remaining_time": "5:44:29"} +{"current_steps": 291, "total_steps": 8674, "loss": 1.3209044933319092, "lr": 1.336405529953917e-06, "epoch": 0.0670970717085543, "percentage": 3.35, "elapsed_time": "0:11:57", "remaining_time": "5:44:23"} +{"current_steps": 292, "total_steps": 8674, "loss": 1.3250432014465332, "lr": 1.3410138248847927e-06, "epoch": 0.06732764583813695, "percentage": 3.37, "elapsed_time": "0:11:59", "remaining_time": "5:44:26"} +{"current_steps": 293, "total_steps": 8674, "loss": 1.1738805770874023, "lr": 1.3456221198156681e-06, "epoch": 0.06755821996771962, "percentage": 3.38, "elapsed_time": "0:12:02", "remaining_time": "5:44:22"} +{"current_steps": 294, "total_steps": 8674, "loss": 1.238675832748413, "lr": 1.3502304147465437e-06, "epoch": 0.06778879409730228, "percentage": 3.39, "elapsed_time": "0:12:04", "remaining_time": "5:44:18"} +{"current_steps": 295, "total_steps": 8674, "loss": 1.2162814140319824, "lr": 1.354838709677419e-06, "epoch": 0.06801936822688494, "percentage": 3.4, "elapsed_time": "0:12:07", "remaining_time": "5:44:13"} +{"current_steps": 296, "total_steps": 8674, "loss": 1.2087210416793823, "lr": 1.359447004608295e-06, "epoch": 0.0682499423564676, "percentage": 3.41, "elapsed_time": "0:12:09", "remaining_time": "5:44:08"} +{"current_steps": 297, "total_steps": 8674, "loss": 1.2657420635223389, "lr": 1.3640552995391705e-06, "epoch": 0.06848051648605026, "percentage": 3.42, "elapsed_time": "0:12:11", "remaining_time": "5:44:03"} +{"current_steps": 298, "total_steps": 8674, "loss": 1.496249794960022, "lr": 1.3686635944700459e-06, "epoch": 0.06871109061563292, "percentage": 3.44, "elapsed_time": "0:12:14", "remaining_time": "5:43:58"} +{"current_steps": 299, "total_steps": 8674, "loss": 1.2698930501937866, "lr": 1.3732718894009217e-06, "epoch": 0.06894166474521558, "percentage": 3.45, "elapsed_time": "0:12:16", "remaining_time": "5:43:53"} +{"current_steps": 300, "total_steps": 8674, "loss": 1.2088165283203125, "lr": 1.377880184331797e-06, "epoch": 0.06917223887479824, "percentage": 3.46, "elapsed_time": "0:12:18", "remaining_time": "5:43:47"} +{"current_steps": 301, "total_steps": 8674, "loss": 1.392000436782837, "lr": 1.3824884792626727e-06, "epoch": 0.0694028130043809, "percentage": 3.47, "elapsed_time": "0:12:22", "remaining_time": "5:44:26"} +{"current_steps": 302, "total_steps": 8674, "loss": 1.366544485092163, "lr": 1.3870967741935485e-06, "epoch": 0.06963338713396357, "percentage": 3.48, "elapsed_time": "0:12:25", "remaining_time": "5:44:21"} +{"current_steps": 303, "total_steps": 8674, "loss": 1.3276031017303467, "lr": 1.3917050691244239e-06, "epoch": 0.06986396126354623, "percentage": 3.49, "elapsed_time": "0:12:27", "remaining_time": "5:44:15"} +{"current_steps": 304, "total_steps": 8674, "loss": 1.1413768529891968, "lr": 1.3963133640552995e-06, "epoch": 0.07009453539312889, "percentage": 3.5, "elapsed_time": "0:12:30", "remaining_time": "5:44:11"} +{"current_steps": 305, "total_steps": 8674, "loss": 1.2958520650863647, "lr": 1.400921658986175e-06, "epoch": 0.07032510952271155, "percentage": 3.52, "elapsed_time": "0:12:32", "remaining_time": "5:44:06"} +{"current_steps": 306, "total_steps": 8674, "loss": 1.3514549732208252, "lr": 1.4055299539170507e-06, "epoch": 0.07055568365229421, "percentage": 3.53, "elapsed_time": "0:12:34", "remaining_time": "5:44:03"} +{"current_steps": 307, "total_steps": 8674, "loss": 1.332120418548584, "lr": 1.410138248847926e-06, "epoch": 0.07078625778187687, "percentage": 3.54, "elapsed_time": "0:12:37", "remaining_time": "5:43:59"} +{"current_steps": 308, "total_steps": 8674, "loss": 1.282820463180542, "lr": 1.4147465437788018e-06, "epoch": 0.07101683191145954, "percentage": 3.55, "elapsed_time": "0:12:39", "remaining_time": "5:43:55"} +{"current_steps": 309, "total_steps": 8674, "loss": 1.3927665948867798, "lr": 1.4193548387096774e-06, "epoch": 0.0712474060410422, "percentage": 3.56, "elapsed_time": "0:12:42", "remaining_time": "5:43:58"} +{"current_steps": 310, "total_steps": 8674, "loss": 1.2459386587142944, "lr": 1.4239631336405528e-06, "epoch": 0.07147798017062486, "percentage": 3.57, "elapsed_time": "0:12:44", "remaining_time": "5:43:53"} +{"current_steps": 311, "total_steps": 8674, "loss": 1.1996700763702393, "lr": 1.4285714285714286e-06, "epoch": 0.07170855430020752, "percentage": 3.59, "elapsed_time": "0:12:47", "remaining_time": "5:43:49"} +{"current_steps": 312, "total_steps": 8674, "loss": 1.1007883548736572, "lr": 1.433179723502304e-06, "epoch": 0.07193912842979018, "percentage": 3.6, "elapsed_time": "0:12:49", "remaining_time": "5:43:45"} +{"current_steps": 313, "total_steps": 8674, "loss": 1.211327314376831, "lr": 1.4377880184331796e-06, "epoch": 0.07216970255937284, "percentage": 3.61, "elapsed_time": "0:12:51", "remaining_time": "5:43:41"} +{"current_steps": 314, "total_steps": 8674, "loss": 1.314349889755249, "lr": 1.4423963133640554e-06, "epoch": 0.0724002766889555, "percentage": 3.62, "elapsed_time": "0:12:54", "remaining_time": "5:43:36"} +{"current_steps": 315, "total_steps": 8674, "loss": 1.2270662784576416, "lr": 1.4470046082949308e-06, "epoch": 0.07263085081853816, "percentage": 3.63, "elapsed_time": "0:12:56", "remaining_time": "5:43:33"} +{"current_steps": 316, "total_steps": 8674, "loss": 1.1802537441253662, "lr": 1.4516129032258064e-06, "epoch": 0.07286142494812083, "percentage": 3.64, "elapsed_time": "0:12:59", "remaining_time": "5:43:29"} +{"current_steps": 317, "total_steps": 8674, "loss": 1.275806188583374, "lr": 1.4562211981566818e-06, "epoch": 0.07309199907770349, "percentage": 3.65, "elapsed_time": "0:13:01", "remaining_time": "5:43:31"} +{"current_steps": 318, "total_steps": 8674, "loss": 1.2713148593902588, "lr": 1.4608294930875576e-06, "epoch": 0.07332257320728615, "percentage": 3.67, "elapsed_time": "0:13:04", "remaining_time": "5:43:28"} +{"current_steps": 319, "total_steps": 8674, "loss": 1.3091093301773071, "lr": 1.4654377880184332e-06, "epoch": 0.07355314733686881, "percentage": 3.68, "elapsed_time": "0:13:06", "remaining_time": "5:43:24"} +{"current_steps": 320, "total_steps": 8674, "loss": 1.1274672746658325, "lr": 1.4700460829493086e-06, "epoch": 0.07378372146645146, "percentage": 3.69, "elapsed_time": "0:13:09", "remaining_time": "5:43:19"} +{"current_steps": 321, "total_steps": 8674, "loss": 1.236955165863037, "lr": 1.4746543778801844e-06, "epoch": 0.07401429559603412, "percentage": 3.7, "elapsed_time": "0:13:11", "remaining_time": "5:43:14"} +{"current_steps": 322, "total_steps": 8674, "loss": 1.2561366558074951, "lr": 1.4792626728110598e-06, "epoch": 0.07424486972561678, "percentage": 3.71, "elapsed_time": "0:13:13", "remaining_time": "5:43:11"} +{"current_steps": 323, "total_steps": 8674, "loss": 1.1229519844055176, "lr": 1.4838709677419353e-06, "epoch": 0.07447544385519944, "percentage": 3.72, "elapsed_time": "0:13:16", "remaining_time": "5:43:07"} +{"current_steps": 324, "total_steps": 8674, "loss": 1.200115442276001, "lr": 1.4884792626728112e-06, "epoch": 0.0747060179847821, "percentage": 3.74, "elapsed_time": "0:13:18", "remaining_time": "5:43:02"} +{"current_steps": 325, "total_steps": 8674, "loss": 1.1930850744247437, "lr": 1.4930875576036865e-06, "epoch": 0.07493659211436476, "percentage": 3.75, "elapsed_time": "0:13:21", "remaining_time": "5:42:57"} +{"current_steps": 326, "total_steps": 8674, "loss": 1.3204331398010254, "lr": 1.4976958525345621e-06, "epoch": 0.07516716624394743, "percentage": 3.76, "elapsed_time": "0:13:23", "remaining_time": "5:42:59"} +{"current_steps": 327, "total_steps": 8674, "loss": 1.109247088432312, "lr": 1.5023041474654377e-06, "epoch": 0.07539774037353009, "percentage": 3.77, "elapsed_time": "0:13:25", "remaining_time": "5:42:53"} +{"current_steps": 328, "total_steps": 8674, "loss": 1.1239254474639893, "lr": 1.5069124423963133e-06, "epoch": 0.07562831450311275, "percentage": 3.78, "elapsed_time": "0:13:28", "remaining_time": "5:42:50"} +{"current_steps": 329, "total_steps": 8674, "loss": 1.22686767578125, "lr": 1.5115207373271887e-06, "epoch": 0.07585888863269541, "percentage": 3.79, "elapsed_time": "0:13:30", "remaining_time": "5:42:46"} +{"current_steps": 330, "total_steps": 8674, "loss": 1.2846856117248535, "lr": 1.5161290322580645e-06, "epoch": 0.07608946276227807, "percentage": 3.8, "elapsed_time": "0:13:33", "remaining_time": "5:42:41"} +{"current_steps": 331, "total_steps": 8674, "loss": 1.1720764636993408, "lr": 1.5207373271889401e-06, "epoch": 0.07632003689186073, "percentage": 3.82, "elapsed_time": "0:13:35", "remaining_time": "5:42:36"} +{"current_steps": 332, "total_steps": 8674, "loss": 1.05867338180542, "lr": 1.5253456221198155e-06, "epoch": 0.0765506110214434, "percentage": 3.83, "elapsed_time": "0:13:37", "remaining_time": "5:42:31"} +{"current_steps": 333, "total_steps": 8674, "loss": 1.2652220726013184, "lr": 1.5299539170506913e-06, "epoch": 0.07678118515102605, "percentage": 3.84, "elapsed_time": "0:13:40", "remaining_time": "5:42:28"} +{"current_steps": 334, "total_steps": 8674, "loss": 1.1367218494415283, "lr": 1.5345622119815667e-06, "epoch": 0.07701175928060872, "percentage": 3.85, "elapsed_time": "0:13:42", "remaining_time": "5:42:29"} +{"current_steps": 335, "total_steps": 8674, "loss": 1.169439673423767, "lr": 1.5391705069124423e-06, "epoch": 0.07724233341019138, "percentage": 3.86, "elapsed_time": "0:13:45", "remaining_time": "5:42:25"} +{"current_steps": 336, "total_steps": 8674, "loss": 1.265104055404663, "lr": 1.543778801843318e-06, "epoch": 0.07747290753977404, "percentage": 3.87, "elapsed_time": "0:13:47", "remaining_time": "5:42:21"} +{"current_steps": 337, "total_steps": 8674, "loss": 1.059098243713379, "lr": 1.5483870967741935e-06, "epoch": 0.0777034816693567, "percentage": 3.89, "elapsed_time": "0:13:50", "remaining_time": "5:42:17"} +{"current_steps": 338, "total_steps": 8674, "loss": 1.0998419523239136, "lr": 1.552995391705069e-06, "epoch": 0.07793405579893936, "percentage": 3.9, "elapsed_time": "0:13:52", "remaining_time": "5:42:12"} +{"current_steps": 339, "total_steps": 8674, "loss": 1.1361349821090698, "lr": 1.5576036866359445e-06, "epoch": 0.07816462992852202, "percentage": 3.91, "elapsed_time": "0:13:54", "remaining_time": "5:42:07"} +{"current_steps": 340, "total_steps": 8674, "loss": 1.1051890850067139, "lr": 1.5622119815668203e-06, "epoch": 0.07839520405810468, "percentage": 3.92, "elapsed_time": "0:13:57", "remaining_time": "5:42:02"} +{"current_steps": 341, "total_steps": 8674, "loss": 1.1675043106079102, "lr": 1.5668202764976959e-06, "epoch": 0.07862577818768735, "percentage": 3.93, "elapsed_time": "0:13:59", "remaining_time": "5:41:58"} +{"current_steps": 342, "total_steps": 8674, "loss": 1.180741786956787, "lr": 1.5714285714285712e-06, "epoch": 0.07885635231727, "percentage": 3.94, "elapsed_time": "0:14:02", "remaining_time": "5:41:54"} +{"current_steps": 343, "total_steps": 8674, "loss": 1.241147518157959, "lr": 1.576036866359447e-06, "epoch": 0.07908692644685267, "percentage": 3.95, "elapsed_time": "0:14:04", "remaining_time": "5:41:55"} +{"current_steps": 344, "total_steps": 8674, "loss": 1.2831401824951172, "lr": 1.5806451612903224e-06, "epoch": 0.07931750057643533, "percentage": 3.97, "elapsed_time": "0:14:07", "remaining_time": "5:41:51"} +{"current_steps": 345, "total_steps": 8674, "loss": 1.2328094244003296, "lr": 1.585253456221198e-06, "epoch": 0.07954807470601799, "percentage": 3.98, "elapsed_time": "0:14:09", "remaining_time": "5:41:46"} +{"current_steps": 346, "total_steps": 8674, "loss": 1.296494960784912, "lr": 1.5898617511520738e-06, "epoch": 0.07977864883560065, "percentage": 3.99, "elapsed_time": "0:14:11", "remaining_time": "5:41:44"} +{"current_steps": 347, "total_steps": 8674, "loss": 1.1802153587341309, "lr": 1.5944700460829492e-06, "epoch": 0.08000922296518331, "percentage": 4.0, "elapsed_time": "0:14:14", "remaining_time": "5:41:52"} +{"current_steps": 348, "total_steps": 8674, "loss": 1.2387690544128418, "lr": 1.5990783410138248e-06, "epoch": 0.08023979709476596, "percentage": 4.01, "elapsed_time": "0:14:17", "remaining_time": "5:41:48"} +{"current_steps": 349, "total_steps": 8674, "loss": 1.1307916641235352, "lr": 1.6036866359447004e-06, "epoch": 0.08047037122434862, "percentage": 4.02, "elapsed_time": "0:14:19", "remaining_time": "5:41:43"} +{"current_steps": 350, "total_steps": 8674, "loss": 1.117497444152832, "lr": 1.608294930875576e-06, "epoch": 0.08070094535393128, "percentage": 4.04, "elapsed_time": "0:14:21", "remaining_time": "5:41:38"} +{"current_steps": 351, "total_steps": 8674, "loss": 1.1360805034637451, "lr": 1.6129032258064514e-06, "epoch": 0.08093151948351394, "percentage": 4.05, "elapsed_time": "0:14:24", "remaining_time": "5:41:39"} +{"current_steps": 352, "total_steps": 8674, "loss": 1.1756231784820557, "lr": 1.6175115207373272e-06, "epoch": 0.0811620936130966, "percentage": 4.06, "elapsed_time": "0:14:26", "remaining_time": "5:41:34"} +{"current_steps": 353, "total_steps": 8674, "loss": 1.0260417461395264, "lr": 1.6221198156682028e-06, "epoch": 0.08139266774267927, "percentage": 4.07, "elapsed_time": "0:14:29", "remaining_time": "5:41:30"} +{"current_steps": 354, "total_steps": 8674, "loss": 1.0863536596298218, "lr": 1.6267281105990782e-06, "epoch": 0.08162324187226193, "percentage": 4.08, "elapsed_time": "0:14:31", "remaining_time": "5:41:27"} +{"current_steps": 355, "total_steps": 8674, "loss": 1.0529779195785522, "lr": 1.631336405529954e-06, "epoch": 0.08185381600184459, "percentage": 4.09, "elapsed_time": "0:14:34", "remaining_time": "5:41:24"} +{"current_steps": 356, "total_steps": 8674, "loss": 1.0374994277954102, "lr": 1.6359447004608294e-06, "epoch": 0.08208439013142725, "percentage": 4.1, "elapsed_time": "0:14:36", "remaining_time": "5:41:20"} +{"current_steps": 357, "total_steps": 8674, "loss": 1.153419017791748, "lr": 1.640552995391705e-06, "epoch": 0.08231496426100991, "percentage": 4.12, "elapsed_time": "0:14:38", "remaining_time": "5:41:15"} +{"current_steps": 358, "total_steps": 8674, "loss": 1.0155376195907593, "lr": 1.6451612903225808e-06, "epoch": 0.08254553839059257, "percentage": 4.13, "elapsed_time": "0:14:41", "remaining_time": "5:41:11"} +{"current_steps": 359, "total_steps": 8674, "loss": 1.1288530826568604, "lr": 1.6497695852534561e-06, "epoch": 0.08277611252017524, "percentage": 4.14, "elapsed_time": "0:14:43", "remaining_time": "5:41:12"} +{"current_steps": 360, "total_steps": 8674, "loss": 1.1493456363677979, "lr": 1.6543778801843317e-06, "epoch": 0.0830066866497579, "percentage": 4.15, "elapsed_time": "0:14:46", "remaining_time": "5:41:07"} +{"current_steps": 361, "total_steps": 8674, "loss": 1.1064895391464233, "lr": 1.6589861751152071e-06, "epoch": 0.08323726077934056, "percentage": 4.16, "elapsed_time": "0:14:48", "remaining_time": "5:41:03"} +{"current_steps": 362, "total_steps": 8674, "loss": 1.0102828741073608, "lr": 1.663594470046083e-06, "epoch": 0.08346783490892322, "percentage": 4.17, "elapsed_time": "0:14:51", "remaining_time": "5:40:59"} +{"current_steps": 363, "total_steps": 8674, "loss": 1.0750138759613037, "lr": 1.6682027649769585e-06, "epoch": 0.08369840903850588, "percentage": 4.18, "elapsed_time": "0:14:53", "remaining_time": "5:40:55"} +{"current_steps": 364, "total_steps": 8674, "loss": 1.1611195802688599, "lr": 1.672811059907834e-06, "epoch": 0.08392898316808854, "percentage": 4.2, "elapsed_time": "0:14:55", "remaining_time": "5:40:50"} +{"current_steps": 365, "total_steps": 8674, "loss": 1.2799829244613647, "lr": 1.6774193548387097e-06, "epoch": 0.0841595572976712, "percentage": 4.21, "elapsed_time": "0:14:58", "remaining_time": "5:40:46"} +{"current_steps": 366, "total_steps": 8674, "loss": 1.2427947521209717, "lr": 1.682027649769585e-06, "epoch": 0.08439013142725386, "percentage": 4.22, "elapsed_time": "0:15:00", "remaining_time": "5:40:40"} +{"current_steps": 367, "total_steps": 8674, "loss": 1.0379959344863892, "lr": 1.6866359447004607e-06, "epoch": 0.08462070555683653, "percentage": 4.23, "elapsed_time": "0:15:02", "remaining_time": "5:40:36"} +{"current_steps": 368, "total_steps": 8674, "loss": 0.8439304828643799, "lr": 1.6912442396313363e-06, "epoch": 0.08485127968641919, "percentage": 4.24, "elapsed_time": "0:15:05", "remaining_time": "5:40:37"} +{"current_steps": 369, "total_steps": 8674, "loss": 1.1249288320541382, "lr": 1.6958525345622119e-06, "epoch": 0.08508185381600185, "percentage": 4.25, "elapsed_time": "0:15:07", "remaining_time": "5:40:33"} +{"current_steps": 370, "total_steps": 8674, "loss": 1.186207890510559, "lr": 1.7004608294930875e-06, "epoch": 0.08531242794558451, "percentage": 4.27, "elapsed_time": "0:15:10", "remaining_time": "5:40:28"} +{"current_steps": 371, "total_steps": 8674, "loss": 1.1181306838989258, "lr": 1.705069124423963e-06, "epoch": 0.08554300207516717, "percentage": 4.28, "elapsed_time": "0:15:12", "remaining_time": "5:40:23"} +{"current_steps": 372, "total_steps": 8674, "loss": 0.9828017950057983, "lr": 1.7096774193548387e-06, "epoch": 0.08577357620474983, "percentage": 4.29, "elapsed_time": "0:15:14", "remaining_time": "5:40:18"} +{"current_steps": 373, "total_steps": 8674, "loss": 1.1158804893493652, "lr": 1.714285714285714e-06, "epoch": 0.0860041503343325, "percentage": 4.3, "elapsed_time": "0:15:17", "remaining_time": "5:40:14"} +{"current_steps": 374, "total_steps": 8674, "loss": 1.1771481037139893, "lr": 1.7188940092165899e-06, "epoch": 0.08623472446391516, "percentage": 4.31, "elapsed_time": "0:15:19", "remaining_time": "5:40:10"} +{"current_steps": 375, "total_steps": 8674, "loss": 1.1378540992736816, "lr": 1.7235023041474655e-06, "epoch": 0.08646529859349782, "percentage": 4.32, "elapsed_time": "0:15:22", "remaining_time": "5:40:06"} +{"current_steps": 376, "total_steps": 8674, "loss": 1.2011152505874634, "lr": 1.7281105990783408e-06, "epoch": 0.08669587272308046, "percentage": 4.33, "elapsed_time": "0:15:24", "remaining_time": "5:40:08"} +{"current_steps": 377, "total_steps": 8674, "loss": 1.0932848453521729, "lr": 1.7327188940092167e-06, "epoch": 0.08692644685266313, "percentage": 4.35, "elapsed_time": "0:15:27", "remaining_time": "5:40:04"} +{"current_steps": 378, "total_steps": 8674, "loss": 1.0530626773834229, "lr": 1.737327188940092e-06, "epoch": 0.08715702098224579, "percentage": 4.36, "elapsed_time": "0:15:29", "remaining_time": "5:40:00"} +{"current_steps": 379, "total_steps": 8674, "loss": 1.09238600730896, "lr": 1.7419354838709676e-06, "epoch": 0.08738759511182845, "percentage": 4.37, "elapsed_time": "0:15:31", "remaining_time": "5:39:56"} +{"current_steps": 380, "total_steps": 8674, "loss": 1.10097336769104, "lr": 1.7465437788018434e-06, "epoch": 0.08761816924141111, "percentage": 4.38, "elapsed_time": "0:15:34", "remaining_time": "5:39:53"} +{"current_steps": 381, "total_steps": 8674, "loss": 1.1483392715454102, "lr": 1.7511520737327188e-06, "epoch": 0.08784874337099377, "percentage": 4.39, "elapsed_time": "0:15:36", "remaining_time": "5:39:50"} +{"current_steps": 382, "total_steps": 8674, "loss": 0.9776606559753418, "lr": 1.7557603686635944e-06, "epoch": 0.08807931750057643, "percentage": 4.4, "elapsed_time": "0:15:39", "remaining_time": "5:39:47"} +{"current_steps": 383, "total_steps": 8674, "loss": 0.9363219738006592, "lr": 1.7603686635944698e-06, "epoch": 0.08830989163015909, "percentage": 4.42, "elapsed_time": "0:15:41", "remaining_time": "5:39:43"} +{"current_steps": 384, "total_steps": 8674, "loss": 1.1259841918945312, "lr": 1.7649769585253456e-06, "epoch": 0.08854046575974175, "percentage": 4.43, "elapsed_time": "0:15:44", "remaining_time": "5:39:39"} +{"current_steps": 385, "total_steps": 8674, "loss": 1.0652339458465576, "lr": 1.7695852534562212e-06, "epoch": 0.08877103988932442, "percentage": 4.44, "elapsed_time": "0:15:46", "remaining_time": "5:39:41"} +{"current_steps": 386, "total_steps": 8674, "loss": 1.1088197231292725, "lr": 1.7741935483870966e-06, "epoch": 0.08900161401890708, "percentage": 4.45, "elapsed_time": "0:15:49", "remaining_time": "5:39:36"} +{"current_steps": 387, "total_steps": 8674, "loss": 1.0171717405319214, "lr": 1.7788018433179724e-06, "epoch": 0.08923218814848974, "percentage": 4.46, "elapsed_time": "0:15:51", "remaining_time": "5:39:33"} +{"current_steps": 388, "total_steps": 8674, "loss": 1.0391405820846558, "lr": 1.7834101382488478e-06, "epoch": 0.0894627622780724, "percentage": 4.47, "elapsed_time": "0:15:53", "remaining_time": "5:39:30"} +{"current_steps": 389, "total_steps": 8674, "loss": 0.9970325231552124, "lr": 1.7880184331797234e-06, "epoch": 0.08969333640765506, "percentage": 4.48, "elapsed_time": "0:15:56", "remaining_time": "5:39:26"} +{"current_steps": 390, "total_steps": 8674, "loss": 1.1427147388458252, "lr": 1.792626728110599e-06, "epoch": 0.08992391053723772, "percentage": 4.5, "elapsed_time": "0:15:58", "remaining_time": "5:39:22"} +{"current_steps": 391, "total_steps": 8674, "loss": 0.8830767273902893, "lr": 1.7972350230414746e-06, "epoch": 0.09015448466682038, "percentage": 4.51, "elapsed_time": "0:16:01", "remaining_time": "5:39:20"} +{"current_steps": 392, "total_steps": 8674, "loss": 1.0469788312911987, "lr": 1.8018433179723502e-06, "epoch": 0.09038505879640304, "percentage": 4.52, "elapsed_time": "0:16:03", "remaining_time": "5:39:17"} +{"current_steps": 393, "total_steps": 8674, "loss": 1.022156834602356, "lr": 1.8064516129032258e-06, "epoch": 0.0906156329259857, "percentage": 4.53, "elapsed_time": "0:16:06", "remaining_time": "5:39:19"} +{"current_steps": 394, "total_steps": 8674, "loss": 1.0723674297332764, "lr": 1.8110599078341013e-06, "epoch": 0.09084620705556837, "percentage": 4.54, "elapsed_time": "0:16:08", "remaining_time": "5:39:14"} +{"current_steps": 395, "total_steps": 8674, "loss": 0.9089772701263428, "lr": 1.8156682027649767e-06, "epoch": 0.09107678118515103, "percentage": 4.55, "elapsed_time": "0:16:11", "remaining_time": "5:39:11"} +{"current_steps": 396, "total_steps": 8674, "loss": 1.1029877662658691, "lr": 1.8202764976958525e-06, "epoch": 0.09130735531473369, "percentage": 4.57, "elapsed_time": "0:16:13", "remaining_time": "5:39:08"} +{"current_steps": 397, "total_steps": 8674, "loss": 0.998812198638916, "lr": 1.8248847926267281e-06, "epoch": 0.09153792944431635, "percentage": 4.58, "elapsed_time": "0:16:15", "remaining_time": "5:39:05"} +{"current_steps": 398, "total_steps": 8674, "loss": 1.116652250289917, "lr": 1.8294930875576035e-06, "epoch": 0.09176850357389901, "percentage": 4.59, "elapsed_time": "0:16:18", "remaining_time": "5:39:00"} +{"current_steps": 399, "total_steps": 8674, "loss": 1.0071923732757568, "lr": 1.8341013824884793e-06, "epoch": 0.09199907770348167, "percentage": 4.6, "elapsed_time": "0:16:20", "remaining_time": "5:38:57"} +{"current_steps": 400, "total_steps": 8674, "loss": 1.0713586807250977, "lr": 1.8387096774193547e-06, "epoch": 0.09222965183306434, "percentage": 4.61, "elapsed_time": "0:16:22", "remaining_time": "5:38:53"} +{"current_steps": 401, "total_steps": 8674, "loss": 1.0897400379180908, "lr": 1.8433179723502303e-06, "epoch": 0.092460225962647, "percentage": 4.62, "elapsed_time": "0:16:26", "remaining_time": "5:39:22"} +{"current_steps": 402, "total_steps": 8674, "loss": 0.9571444392204285, "lr": 1.8479262672811061e-06, "epoch": 0.09269080009222966, "percentage": 4.63, "elapsed_time": "0:16:29", "remaining_time": "5:39:18"} +{"current_steps": 403, "total_steps": 8674, "loss": 0.9822309017181396, "lr": 1.8525345622119815e-06, "epoch": 0.09292137422181232, "percentage": 4.65, "elapsed_time": "0:16:31", "remaining_time": "5:39:14"} +{"current_steps": 404, "total_steps": 8674, "loss": 1.0010900497436523, "lr": 1.857142857142857e-06, "epoch": 0.09315194835139497, "percentage": 4.66, "elapsed_time": "0:16:34", "remaining_time": "5:39:09"} +{"current_steps": 405, "total_steps": 8674, "loss": 0.8548961877822876, "lr": 1.8617511520737325e-06, "epoch": 0.09338252248097763, "percentage": 4.67, "elapsed_time": "0:16:36", "remaining_time": "5:39:05"} +{"current_steps": 406, "total_steps": 8674, "loss": 1.0856781005859375, "lr": 1.8663594470046083e-06, "epoch": 0.09361309661056029, "percentage": 4.68, "elapsed_time": "0:16:38", "remaining_time": "5:39:01"} +{"current_steps": 407, "total_steps": 8674, "loss": 1.0913856029510498, "lr": 1.8709677419354837e-06, "epoch": 0.09384367074014295, "percentage": 4.69, "elapsed_time": "0:16:41", "remaining_time": "5:38:58"} +{"current_steps": 408, "total_steps": 8674, "loss": 0.9409916400909424, "lr": 1.8755760368663593e-06, "epoch": 0.09407424486972561, "percentage": 4.7, "elapsed_time": "0:16:43", "remaining_time": "5:38:53"} +{"current_steps": 409, "total_steps": 8674, "loss": 0.9950551390647888, "lr": 1.880184331797235e-06, "epoch": 0.09430481899930827, "percentage": 4.72, "elapsed_time": "0:16:46", "remaining_time": "5:38:49"} +{"current_steps": 410, "total_steps": 8674, "loss": 0.9915211200714111, "lr": 1.8847926267281104e-06, "epoch": 0.09453539312889093, "percentage": 4.73, "elapsed_time": "0:16:48", "remaining_time": "5:38:51"} +{"current_steps": 411, "total_steps": 8674, "loss": 1.0381574630737305, "lr": 1.889400921658986e-06, "epoch": 0.0947659672584736, "percentage": 4.74, "elapsed_time": "0:16:51", "remaining_time": "5:38:47"} +{"current_steps": 412, "total_steps": 8674, "loss": 0.8911284804344177, "lr": 1.8940092165898616e-06, "epoch": 0.09499654138805626, "percentage": 4.75, "elapsed_time": "0:16:53", "remaining_time": "5:38:42"} +{"current_steps": 413, "total_steps": 8674, "loss": 0.8757172226905823, "lr": 1.8986175115207372e-06, "epoch": 0.09522711551763892, "percentage": 4.76, "elapsed_time": "0:16:55", "remaining_time": "5:38:38"} +{"current_steps": 414, "total_steps": 8674, "loss": 0.8362075090408325, "lr": 1.9032258064516128e-06, "epoch": 0.09545768964722158, "percentage": 4.77, "elapsed_time": "0:16:58", "remaining_time": "5:38:34"} +{"current_steps": 415, "total_steps": 8674, "loss": 0.906524658203125, "lr": 1.9078341013824884e-06, "epoch": 0.09568826377680424, "percentage": 4.78, "elapsed_time": "0:17:00", "remaining_time": "5:38:31"} +{"current_steps": 416, "total_steps": 8674, "loss": 1.100447654724121, "lr": 1.912442396313364e-06, "epoch": 0.0959188379063869, "percentage": 4.8, "elapsed_time": "0:17:02", "remaining_time": "5:38:27"} +{"current_steps": 417, "total_steps": 8674, "loss": 0.9658455848693848, "lr": 1.9170506912442396e-06, "epoch": 0.09614941203596956, "percentage": 4.81, "elapsed_time": "0:17:05", "remaining_time": "5:38:23"} +{"current_steps": 418, "total_steps": 8674, "loss": 0.971304714679718, "lr": 1.921658986175115e-06, "epoch": 0.09637998616555223, "percentage": 4.82, "elapsed_time": "0:17:07", "remaining_time": "5:38:24"} +{"current_steps": 419, "total_steps": 8674, "loss": 0.916153073310852, "lr": 1.926267281105991e-06, "epoch": 0.09661056029513489, "percentage": 4.83, "elapsed_time": "0:17:10", "remaining_time": "5:38:20"} +{"current_steps": 420, "total_steps": 8674, "loss": 0.9166572093963623, "lr": 1.930875576036866e-06, "epoch": 0.09684113442471755, "percentage": 4.84, "elapsed_time": "0:17:12", "remaining_time": "5:38:16"} +{"current_steps": 421, "total_steps": 8674, "loss": 0.8754867315292358, "lr": 1.935483870967742e-06, "epoch": 0.09707170855430021, "percentage": 4.85, "elapsed_time": "0:17:15", "remaining_time": "5:38:13"} +{"current_steps": 422, "total_steps": 8674, "loss": 0.9507668018341064, "lr": 1.9400921658986174e-06, "epoch": 0.09730228268388287, "percentage": 4.87, "elapsed_time": "0:17:17", "remaining_time": "5:38:09"} +{"current_steps": 423, "total_steps": 8674, "loss": 0.8977904319763184, "lr": 1.944700460829493e-06, "epoch": 0.09753285681346553, "percentage": 4.88, "elapsed_time": "0:17:19", "remaining_time": "5:38:05"} +{"current_steps": 424, "total_steps": 8674, "loss": 0.8359580039978027, "lr": 1.9493087557603686e-06, "epoch": 0.0977634309430482, "percentage": 4.89, "elapsed_time": "0:17:22", "remaining_time": "5:38:01"} +{"current_steps": 425, "total_steps": 8674, "loss": 0.8928875923156738, "lr": 1.953917050691244e-06, "epoch": 0.09799400507263085, "percentage": 4.9, "elapsed_time": "0:17:24", "remaining_time": "5:37:57"} +{"current_steps": 426, "total_steps": 8674, "loss": 0.9031360149383545, "lr": 1.9585253456221198e-06, "epoch": 0.09822457920221352, "percentage": 4.91, "elapsed_time": "0:17:27", "remaining_time": "5:37:55"} +{"current_steps": 427, "total_steps": 8674, "loss": 0.9135938286781311, "lr": 1.963133640552995e-06, "epoch": 0.09845515333179618, "percentage": 4.92, "elapsed_time": "0:17:29", "remaining_time": "5:37:57"} +{"current_steps": 428, "total_steps": 8674, "loss": 0.8978056907653809, "lr": 1.967741935483871e-06, "epoch": 0.09868572746137884, "percentage": 4.93, "elapsed_time": "0:17:32", "remaining_time": "5:37:53"} +{"current_steps": 429, "total_steps": 8674, "loss": 0.8236517906188965, "lr": 1.9723502304147463e-06, "epoch": 0.0989163015909615, "percentage": 4.95, "elapsed_time": "0:17:34", "remaining_time": "5:37:49"} +{"current_steps": 430, "total_steps": 8674, "loss": 0.8279497027397156, "lr": 1.976958525345622e-06, "epoch": 0.09914687572054416, "percentage": 4.96, "elapsed_time": "0:17:37", "remaining_time": "5:37:46"} +{"current_steps": 431, "total_steps": 8674, "loss": 0.9273175001144409, "lr": 1.9815668202764975e-06, "epoch": 0.09937744985012681, "percentage": 4.97, "elapsed_time": "0:17:39", "remaining_time": "5:37:43"} +{"current_steps": 432, "total_steps": 8674, "loss": 0.8990100622177124, "lr": 1.9861751152073733e-06, "epoch": 0.09960802397970947, "percentage": 4.98, "elapsed_time": "0:17:41", "remaining_time": "5:37:40"} +{"current_steps": 433, "total_steps": 8674, "loss": 0.9221487045288086, "lr": 1.9907834101382487e-06, "epoch": 0.09983859810929213, "percentage": 4.99, "elapsed_time": "0:17:44", "remaining_time": "5:37:38"} +{"current_steps": 434, "total_steps": 8674, "loss": 0.7376757264137268, "lr": 1.995391705069124e-06, "epoch": 0.10006917223887479, "percentage": 5.0, "elapsed_time": "0:17:46", "remaining_time": "5:37:34"} +{"current_steps": 435, "total_steps": 8674, "loss": 0.8496265411376953, "lr": 2e-06, "epoch": 0.10029974636845745, "percentage": 5.01, "elapsed_time": "0:17:49", "remaining_time": "5:37:35"} +{"current_steps": 436, "total_steps": 8674, "loss": 0.73260897397995, "lr": 1.9999999273199326e-06, "epoch": 0.10053032049804012, "percentage": 5.03, "elapsed_time": "0:17:51", "remaining_time": "5:37:30"} +{"current_steps": 437, "total_steps": 8674, "loss": 0.9583776593208313, "lr": 1.999999709279741e-06, "epoch": 0.10076089462762278, "percentage": 5.04, "elapsed_time": "0:17:54", "remaining_time": "5:37:26"} +{"current_steps": 438, "total_steps": 8674, "loss": 0.810507595539093, "lr": 1.9999993458794573e-06, "epoch": 0.10099146875720544, "percentage": 5.05, "elapsed_time": "0:17:56", "remaining_time": "5:37:22"} +{"current_steps": 439, "total_steps": 8674, "loss": 0.7957329750061035, "lr": 1.9999988371191337e-06, "epoch": 0.1012220428867881, "percentage": 5.06, "elapsed_time": "0:17:58", "remaining_time": "5:37:19"} +{"current_steps": 440, "total_steps": 8674, "loss": 0.8141027688980103, "lr": 1.9999981829988444e-06, "epoch": 0.10145261701637076, "percentage": 5.07, "elapsed_time": "0:18:01", "remaining_time": "5:37:16"} +{"current_steps": 441, "total_steps": 8674, "loss": 0.8454669117927551, "lr": 1.9999973835186847e-06, "epoch": 0.10168319114595342, "percentage": 5.08, "elapsed_time": "0:18:03", "remaining_time": "5:37:13"} +{"current_steps": 442, "total_steps": 8674, "loss": 0.7966687679290771, "lr": 1.9999964386787706e-06, "epoch": 0.10191376527553608, "percentage": 5.1, "elapsed_time": "0:18:06", "remaining_time": "5:37:09"} +{"current_steps": 443, "total_steps": 8674, "loss": 0.8623852133750916, "lr": 1.9999953484792394e-06, "epoch": 0.10214433940511874, "percentage": 5.11, "elapsed_time": "0:18:08", "remaining_time": "5:37:11"} +{"current_steps": 444, "total_steps": 8674, "loss": 0.9604165554046631, "lr": 1.9999941129202494e-06, "epoch": 0.1023749135347014, "percentage": 5.12, "elapsed_time": "0:18:11", "remaining_time": "5:37:07"} +{"current_steps": 445, "total_steps": 8674, "loss": 0.7461415529251099, "lr": 1.999992732001981e-06, "epoch": 0.10260548766428407, "percentage": 5.13, "elapsed_time": "0:18:13", "remaining_time": "5:37:05"} +{"current_steps": 446, "total_steps": 8674, "loss": 0.7243722677230835, "lr": 1.9999912057246342e-06, "epoch": 0.10283606179386673, "percentage": 5.14, "elapsed_time": "0:18:16", "remaining_time": "5:37:02"} +{"current_steps": 447, "total_steps": 8674, "loss": 0.8466402292251587, "lr": 1.999989534088431e-06, "epoch": 0.10306663592344939, "percentage": 5.15, "elapsed_time": "0:18:18", "remaining_time": "5:36:58"} +{"current_steps": 448, "total_steps": 8674, "loss": 0.8062578439712524, "lr": 1.9999877170936142e-06, "epoch": 0.10329721005303205, "percentage": 5.16, "elapsed_time": "0:18:20", "remaining_time": "5:36:54"} +{"current_steps": 449, "total_steps": 8674, "loss": 0.8979625701904297, "lr": 1.9999857547404484e-06, "epoch": 0.10352778418261471, "percentage": 5.18, "elapsed_time": "0:18:23", "remaining_time": "5:36:51"} +{"current_steps": 450, "total_steps": 8674, "loss": 0.7970046401023865, "lr": 1.999983647029219e-06, "epoch": 0.10375835831219737, "percentage": 5.19, "elapsed_time": "0:18:25", "remaining_time": "5:36:48"} +{"current_steps": 451, "total_steps": 8674, "loss": 0.9027936458587646, "lr": 1.999981393960231e-06, "epoch": 0.10398893244178004, "percentage": 5.2, "elapsed_time": "0:18:28", "remaining_time": "5:36:43"} +{"current_steps": 452, "total_steps": 8674, "loss": 0.8347916007041931, "lr": 1.9999789955338133e-06, "epoch": 0.1042195065713627, "percentage": 5.21, "elapsed_time": "0:18:30", "remaining_time": "5:36:43"} +{"current_steps": 453, "total_steps": 8674, "loss": 0.7856979370117188, "lr": 1.9999764517503146e-06, "epoch": 0.10445008070094536, "percentage": 5.22, "elapsed_time": "0:18:33", "remaining_time": "5:36:39"} +{"current_steps": 454, "total_steps": 8674, "loss": 0.8370383381843567, "lr": 1.9999737626101037e-06, "epoch": 0.10468065483052802, "percentage": 5.23, "elapsed_time": "0:18:35", "remaining_time": "5:36:36"} +{"current_steps": 455, "total_steps": 8674, "loss": 0.8629742860794067, "lr": 1.9999709281135718e-06, "epoch": 0.10491122896011068, "percentage": 5.25, "elapsed_time": "0:18:37", "remaining_time": "5:36:32"} +{"current_steps": 456, "total_steps": 8674, "loss": 0.8187414407730103, "lr": 1.9999679482611315e-06, "epoch": 0.10514180308969334, "percentage": 5.26, "elapsed_time": "0:18:40", "remaining_time": "5:36:29"} +{"current_steps": 457, "total_steps": 8674, "loss": 0.8169279098510742, "lr": 1.9999648230532156e-06, "epoch": 0.105372377219276, "percentage": 5.27, "elapsed_time": "0:18:42", "remaining_time": "5:36:25"} +{"current_steps": 458, "total_steps": 8674, "loss": 0.7186012268066406, "lr": 1.999961552490278e-06, "epoch": 0.10560295134885866, "percentage": 5.28, "elapsed_time": "0:18:45", "remaining_time": "5:36:22"} +{"current_steps": 459, "total_steps": 8674, "loss": 0.8088201284408569, "lr": 1.9999581365727947e-06, "epoch": 0.10583352547844131, "percentage": 5.29, "elapsed_time": "0:18:47", "remaining_time": "5:36:19"} +{"current_steps": 460, "total_steps": 8674, "loss": 0.7067796587944031, "lr": 1.999954575301262e-06, "epoch": 0.10606409960802397, "percentage": 5.3, "elapsed_time": "0:18:50", "remaining_time": "5:36:20"} +{"current_steps": 461, "total_steps": 8674, "loss": 0.8839461803436279, "lr": 1.9999508686761974e-06, "epoch": 0.10629467373760663, "percentage": 5.31, "elapsed_time": "0:18:52", "remaining_time": "5:36:17"} +{"current_steps": 462, "total_steps": 8674, "loss": 0.750046968460083, "lr": 1.99994701669814e-06, "epoch": 0.1065252478671893, "percentage": 5.33, "elapsed_time": "0:18:54", "remaining_time": "5:36:14"} +{"current_steps": 463, "total_steps": 8674, "loss": 0.7954964637756348, "lr": 1.999943019367649e-06, "epoch": 0.10675582199677196, "percentage": 5.34, "elapsed_time": "0:18:57", "remaining_time": "5:36:10"} +{"current_steps": 464, "total_steps": 8674, "loss": 0.7178900241851807, "lr": 1.9999388766853065e-06, "epoch": 0.10698639612635462, "percentage": 5.35, "elapsed_time": "0:18:59", "remaining_time": "5:36:07"} +{"current_steps": 465, "total_steps": 8674, "loss": 0.7583869695663452, "lr": 1.999934588651714e-06, "epoch": 0.10721697025593728, "percentage": 5.36, "elapsed_time": "0:19:02", "remaining_time": "5:36:03"} +{"current_steps": 466, "total_steps": 8674, "loss": 0.8068876266479492, "lr": 1.999930155267495e-06, "epoch": 0.10744754438551994, "percentage": 5.37, "elapsed_time": "0:19:04", "remaining_time": "5:35:59"} +{"current_steps": 467, "total_steps": 8674, "loss": 0.7507776021957397, "lr": 1.9999255765332946e-06, "epoch": 0.1076781185151026, "percentage": 5.38, "elapsed_time": "0:19:06", "remaining_time": "5:35:56"} +{"current_steps": 468, "total_steps": 8674, "loss": 0.7719494104385376, "lr": 1.999920852449777e-06, "epoch": 0.10790869264468526, "percentage": 5.4, "elapsed_time": "0:19:09", "remaining_time": "5:35:57"} +{"current_steps": 469, "total_steps": 8674, "loss": 0.7420990467071533, "lr": 1.99991598301763e-06, "epoch": 0.10813926677426793, "percentage": 5.41, "elapsed_time": "0:19:12", "remaining_time": "5:35:53"} +{"current_steps": 470, "total_steps": 8674, "loss": 0.7152374386787415, "lr": 1.9999109682375606e-06, "epoch": 0.10836984090385059, "percentage": 5.42, "elapsed_time": "0:19:14", "remaining_time": "5:35:50"} +{"current_steps": 471, "total_steps": 8674, "loss": 0.7971220016479492, "lr": 1.9999058081102985e-06, "epoch": 0.10860041503343325, "percentage": 5.43, "elapsed_time": "0:19:16", "remaining_time": "5:35:48"} +{"current_steps": 472, "total_steps": 8674, "loss": 0.774874746799469, "lr": 1.9999005026365936e-06, "epoch": 0.10883098916301591, "percentage": 5.44, "elapsed_time": "0:19:19", "remaining_time": "5:35:44"} +{"current_steps": 473, "total_steps": 8674, "loss": 0.7567731142044067, "lr": 1.999895051817216e-06, "epoch": 0.10906156329259857, "percentage": 5.45, "elapsed_time": "0:19:21", "remaining_time": "5:35:41"} +{"current_steps": 474, "total_steps": 8674, "loss": 0.7221060991287231, "lr": 1.99988945565296e-06, "epoch": 0.10929213742218123, "percentage": 5.46, "elapsed_time": "0:19:24", "remaining_time": "5:35:38"} +{"current_steps": 475, "total_steps": 8674, "loss": 0.8064852952957153, "lr": 1.9998837141446378e-06, "epoch": 0.1095227115517639, "percentage": 5.48, "elapsed_time": "0:19:26", "remaining_time": "5:35:34"} +{"current_steps": 476, "total_steps": 8674, "loss": 0.7329462766647339, "lr": 1.9998778272930842e-06, "epoch": 0.10975328568134655, "percentage": 5.49, "elapsed_time": "0:19:28", "remaining_time": "5:35:30"} +{"current_steps": 477, "total_steps": 8674, "loss": 0.715752363204956, "lr": 1.999871795099155e-06, "epoch": 0.10998385981092922, "percentage": 5.5, "elapsed_time": "0:19:31", "remaining_time": "5:35:31"} +{"current_steps": 478, "total_steps": 8674, "loss": 0.8702882528305054, "lr": 1.9998656175637265e-06, "epoch": 0.11021443394051188, "percentage": 5.51, "elapsed_time": "0:19:33", "remaining_time": "5:35:27"} +{"current_steps": 479, "total_steps": 8674, "loss": 0.8559622764587402, "lr": 1.9998592946876976e-06, "epoch": 0.11044500807009454, "percentage": 5.52, "elapsed_time": "0:19:36", "remaining_time": "5:35:23"} +{"current_steps": 480, "total_steps": 8674, "loss": 0.910442590713501, "lr": 1.999852826471987e-06, "epoch": 0.1106755821996772, "percentage": 5.53, "elapsed_time": "0:19:38", "remaining_time": "5:35:19"} +{"current_steps": 481, "total_steps": 8674, "loss": 0.8159372806549072, "lr": 1.9998462129175347e-06, "epoch": 0.11090615632925986, "percentage": 5.55, "elapsed_time": "0:19:40", "remaining_time": "5:35:16"} +{"current_steps": 482, "total_steps": 8674, "loss": 0.8120635747909546, "lr": 1.9998394540253022e-06, "epoch": 0.11113673045884252, "percentage": 5.56, "elapsed_time": "0:19:43", "remaining_time": "5:35:13"} +{"current_steps": 483, "total_steps": 8674, "loss": 0.7867682576179504, "lr": 1.999832549796272e-06, "epoch": 0.11136730458842518, "percentage": 5.57, "elapsed_time": "0:19:45", "remaining_time": "5:35:09"} +{"current_steps": 484, "total_steps": 8674, "loss": 0.695517897605896, "lr": 1.999825500231448e-06, "epoch": 0.11159787871800785, "percentage": 5.58, "elapsed_time": "0:19:48", "remaining_time": "5:35:06"} +{"current_steps": 485, "total_steps": 8674, "loss": 0.8402971029281616, "lr": 1.999818305331854e-06, "epoch": 0.1118284528475905, "percentage": 5.59, "elapsed_time": "0:19:50", "remaining_time": "5:35:06"} +{"current_steps": 486, "total_steps": 8674, "loss": 0.7987074851989746, "lr": 1.9998109650985372e-06, "epoch": 0.11205902697717317, "percentage": 5.6, "elapsed_time": "0:19:53", "remaining_time": "5:35:02"} +{"current_steps": 487, "total_steps": 8674, "loss": 0.6525362133979797, "lr": 1.9998034795325634e-06, "epoch": 0.11228960110675582, "percentage": 5.61, "elapsed_time": "0:19:55", "remaining_time": "5:34:59"} +{"current_steps": 488, "total_steps": 8674, "loss": 0.6218863725662231, "lr": 1.999795848635021e-06, "epoch": 0.11252017523633848, "percentage": 5.63, "elapsed_time": "0:19:57", "remaining_time": "5:34:55"} +{"current_steps": 489, "total_steps": 8674, "loss": 0.7225729823112488, "lr": 1.99978807240702e-06, "epoch": 0.11275074936592114, "percentage": 5.64, "elapsed_time": "0:20:00", "remaining_time": "5:34:52"} +{"current_steps": 490, "total_steps": 8674, "loss": 0.7553551197052002, "lr": 1.9997801508496893e-06, "epoch": 0.1129813234955038, "percentage": 5.65, "elapsed_time": "0:20:02", "remaining_time": "5:34:50"} +{"current_steps": 491, "total_steps": 8674, "loss": 0.6695772409439087, "lr": 1.999772083964182e-06, "epoch": 0.11321189762508646, "percentage": 5.66, "elapsed_time": "0:20:05", "remaining_time": "5:34:47"} +{"current_steps": 492, "total_steps": 8674, "loss": 0.7683162689208984, "lr": 1.999763871751669e-06, "epoch": 0.11344247175466912, "percentage": 5.67, "elapsed_time": "0:20:07", "remaining_time": "5:34:48"} +{"current_steps": 493, "total_steps": 8674, "loss": 0.7761441469192505, "lr": 1.9997555142133457e-06, "epoch": 0.11367304588425178, "percentage": 5.68, "elapsed_time": "0:20:10", "remaining_time": "5:34:44"} +{"current_steps": 494, "total_steps": 8674, "loss": 0.7204692959785461, "lr": 1.999747011350426e-06, "epoch": 0.11390362001383444, "percentage": 5.7, "elapsed_time": "0:20:12", "remaining_time": "5:34:44"} +{"current_steps": 495, "total_steps": 8674, "loss": 0.6960519552230835, "lr": 1.999738363164146e-06, "epoch": 0.1141341941434171, "percentage": 5.71, "elapsed_time": "0:20:15", "remaining_time": "5:34:42"} +{"current_steps": 496, "total_steps": 8674, "loss": 0.7502788305282593, "lr": 1.999729569655763e-06, "epoch": 0.11436476827299977, "percentage": 5.72, "elapsed_time": "0:20:17", "remaining_time": "5:34:39"} +{"current_steps": 497, "total_steps": 8674, "loss": 0.7649067640304565, "lr": 1.999720630826555e-06, "epoch": 0.11459534240258243, "percentage": 5.73, "elapsed_time": "0:20:20", "remaining_time": "5:34:37"} +{"current_steps": 498, "total_steps": 8674, "loss": 0.6867918968200684, "lr": 1.9997115466778214e-06, "epoch": 0.11482591653216509, "percentage": 5.74, "elapsed_time": "0:20:22", "remaining_time": "5:34:33"} +{"current_steps": 499, "total_steps": 8674, "loss": 0.7324330806732178, "lr": 1.9997023172108828e-06, "epoch": 0.11505649066174775, "percentage": 5.75, "elapsed_time": "0:20:25", "remaining_time": "5:34:30"} +{"current_steps": 500, "total_steps": 8674, "loss": 0.7452527284622192, "lr": 1.999692942427081e-06, "epoch": 0.11528706479133041, "percentage": 5.76, "elapsed_time": "0:20:27", "remaining_time": "5:34:28"} +{"current_steps": 501, "total_steps": 8674, "loss": 0.8311381340026855, "lr": 1.9996834223277775e-06, "epoch": 0.11551763892091307, "percentage": 5.78, "elapsed_time": "0:20:31", "remaining_time": "5:34:45"} +{"current_steps": 502, "total_steps": 8674, "loss": 0.6955340504646301, "lr": 1.999673756914358e-06, "epoch": 0.11574821305049574, "percentage": 5.79, "elapsed_time": "0:20:33", "remaining_time": "5:34:46"} +{"current_steps": 503, "total_steps": 8674, "loss": 0.802892804145813, "lr": 1.999663946188226e-06, "epoch": 0.1159787871800784, "percentage": 5.8, "elapsed_time": "0:20:36", "remaining_time": "5:34:42"} +{"current_steps": 504, "total_steps": 8674, "loss": 0.8307123184204102, "lr": 1.9996539901508086e-06, "epoch": 0.11620936130966106, "percentage": 5.81, "elapsed_time": "0:20:38", "remaining_time": "5:34:38"} +{"current_steps": 505, "total_steps": 8674, "loss": 0.7604272365570068, "lr": 1.9996438888035525e-06, "epoch": 0.11643993543924372, "percentage": 5.82, "elapsed_time": "0:20:41", "remaining_time": "5:34:34"} +{"current_steps": 506, "total_steps": 8674, "loss": 0.798006534576416, "lr": 1.9996336421479256e-06, "epoch": 0.11667050956882638, "percentage": 5.83, "elapsed_time": "0:20:43", "remaining_time": "5:34:31"} +{"current_steps": 507, "total_steps": 8674, "loss": 0.7342728972434998, "lr": 1.999623250185418e-06, "epoch": 0.11690108369840904, "percentage": 5.85, "elapsed_time": "0:20:45", "remaining_time": "5:34:27"} +{"current_steps": 508, "total_steps": 8674, "loss": 0.7659468650817871, "lr": 1.9996127129175402e-06, "epoch": 0.1171316578279917, "percentage": 5.86, "elapsed_time": "0:20:48", "remaining_time": "5:34:23"} +{"current_steps": 509, "total_steps": 8674, "loss": 0.6467913389205933, "lr": 1.999602030345824e-06, "epoch": 0.11736223195757436, "percentage": 5.87, "elapsed_time": "0:20:50", "remaining_time": "5:34:20"} +{"current_steps": 510, "total_steps": 8674, "loss": 0.8207371234893799, "lr": 1.9995912024718214e-06, "epoch": 0.11759280608715703, "percentage": 5.88, "elapsed_time": "0:20:53", "remaining_time": "5:34:20"} +{"current_steps": 511, "total_steps": 8674, "loss": 0.6865919232368469, "lr": 1.999580229297108e-06, "epoch": 0.11782338021673969, "percentage": 5.89, "elapsed_time": "0:20:55", "remaining_time": "5:34:16"} +{"current_steps": 512, "total_steps": 8674, "loss": 0.7367759346961975, "lr": 1.999569110823277e-06, "epoch": 0.11805395434632235, "percentage": 5.9, "elapsed_time": "0:20:57", "remaining_time": "5:34:12"} +{"current_steps": 513, "total_steps": 8674, "loss": 0.678460955619812, "lr": 1.9995578470519455e-06, "epoch": 0.11828452847590501, "percentage": 5.91, "elapsed_time": "0:21:00", "remaining_time": "5:34:09"} +{"current_steps": 514, "total_steps": 8674, "loss": 0.7442954182624817, "lr": 1.999546437984751e-06, "epoch": 0.11851510260548767, "percentage": 5.93, "elapsed_time": "0:21:02", "remaining_time": "5:34:05"} +{"current_steps": 515, "total_steps": 8674, "loss": 0.6881241798400879, "lr": 1.9995348836233515e-06, "epoch": 0.11874567673507032, "percentage": 5.94, "elapsed_time": "0:21:05", "remaining_time": "5:34:02"} +{"current_steps": 516, "total_steps": 8674, "loss": 0.6957181692123413, "lr": 1.9995231839694267e-06, "epoch": 0.11897625086465298, "percentage": 5.95, "elapsed_time": "0:21:07", "remaining_time": "5:33:58"} +{"current_steps": 517, "total_steps": 8674, "loss": 0.655665934085846, "lr": 1.9995113390246773e-06, "epoch": 0.11920682499423564, "percentage": 5.96, "elapsed_time": "0:21:09", "remaining_time": "5:33:54"} +{"current_steps": 518, "total_steps": 8674, "loss": 0.8156173229217529, "lr": 1.9994993487908245e-06, "epoch": 0.1194373991238183, "percentage": 5.97, "elapsed_time": "0:21:12", "remaining_time": "5:33:51"} +{"current_steps": 519, "total_steps": 8674, "loss": 0.7063135504722595, "lr": 1.9994872132696125e-06, "epoch": 0.11966797325340096, "percentage": 5.98, "elapsed_time": "0:21:14", "remaining_time": "5:33:52"} +{"current_steps": 520, "total_steps": 8674, "loss": 0.694409966468811, "lr": 1.9994749324628046e-06, "epoch": 0.11989854738298363, "percentage": 5.99, "elapsed_time": "0:21:17", "remaining_time": "5:33:48"} +{"current_steps": 521, "total_steps": 8674, "loss": 0.8167020082473755, "lr": 1.9994625063721852e-06, "epoch": 0.12012912151256629, "percentage": 6.01, "elapsed_time": "0:21:19", "remaining_time": "5:33:46"} +{"current_steps": 522, "total_steps": 8674, "loss": 0.7214051485061646, "lr": 1.9994499349995615e-06, "epoch": 0.12035969564214895, "percentage": 6.02, "elapsed_time": "0:21:22", "remaining_time": "5:33:42"} +{"current_steps": 523, "total_steps": 8674, "loss": 0.8798317909240723, "lr": 1.999437218346761e-06, "epoch": 0.12059026977173161, "percentage": 6.03, "elapsed_time": "0:21:24", "remaining_time": "5:33:38"} +{"current_steps": 524, "total_steps": 8674, "loss": 0.684230387210846, "lr": 1.9994243564156316e-06, "epoch": 0.12082084390131427, "percentage": 6.04, "elapsed_time": "0:21:26", "remaining_time": "5:33:35"} +{"current_steps": 525, "total_steps": 8674, "loss": 0.7519755363464355, "lr": 1.999411349208043e-06, "epoch": 0.12105141803089693, "percentage": 6.05, "elapsed_time": "0:21:29", "remaining_time": "5:33:31"} +{"current_steps": 526, "total_steps": 8674, "loss": 0.8420398235321045, "lr": 1.9993981967258857e-06, "epoch": 0.1212819921604796, "percentage": 6.06, "elapsed_time": "0:21:31", "remaining_time": "5:33:28"} +{"current_steps": 527, "total_steps": 8674, "loss": 0.8349270820617676, "lr": 1.999384898971073e-06, "epoch": 0.12151256629006225, "percentage": 6.08, "elapsed_time": "0:21:34", "remaining_time": "5:33:28"} +{"current_steps": 528, "total_steps": 8674, "loss": 0.794980525970459, "lr": 1.999371455945536e-06, "epoch": 0.12174314041964492, "percentage": 6.09, "elapsed_time": "0:21:36", "remaining_time": "5:33:24"} +{"current_steps": 529, "total_steps": 8674, "loss": 0.666529655456543, "lr": 1.9993578676512294e-06, "epoch": 0.12197371454922758, "percentage": 6.1, "elapsed_time": "0:21:38", "remaining_time": "5:33:20"} +{"current_steps": 530, "total_steps": 8674, "loss": 0.7356991767883301, "lr": 1.999344134090129e-06, "epoch": 0.12220428867881024, "percentage": 6.11, "elapsed_time": "0:21:41", "remaining_time": "5:33:17"} +{"current_steps": 531, "total_steps": 8674, "loss": 0.6289858818054199, "lr": 1.9993302552642305e-06, "epoch": 0.1224348628083929, "percentage": 6.12, "elapsed_time": "0:21:43", "remaining_time": "5:33:14"} +{"current_steps": 532, "total_steps": 8674, "loss": 0.706937313079834, "lr": 1.9993162311755516e-06, "epoch": 0.12266543693797556, "percentage": 6.13, "elapsed_time": "0:21:46", "remaining_time": "5:33:10"} +{"current_steps": 533, "total_steps": 8674, "loss": 0.7265158891677856, "lr": 1.99930206182613e-06, "epoch": 0.12289601106755822, "percentage": 6.14, "elapsed_time": "0:21:48", "remaining_time": "5:33:09"} +{"current_steps": 534, "total_steps": 8674, "loss": 0.6575910449028015, "lr": 1.999287747218027e-06, "epoch": 0.12312658519714088, "percentage": 6.16, "elapsed_time": "0:21:51", "remaining_time": "5:33:05"} +{"current_steps": 535, "total_steps": 8674, "loss": 0.6696841716766357, "lr": 1.999273287353322e-06, "epoch": 0.12335715932672355, "percentage": 6.17, "elapsed_time": "0:21:53", "remaining_time": "5:33:05"} +{"current_steps": 536, "total_steps": 8674, "loss": 0.7749101519584656, "lr": 1.9992586822341177e-06, "epoch": 0.1235877334563062, "percentage": 6.18, "elapsed_time": "0:21:56", "remaining_time": "5:33:01"} +{"current_steps": 537, "total_steps": 8674, "loss": 0.6880518198013306, "lr": 1.9992439318625367e-06, "epoch": 0.12381830758588887, "percentage": 6.19, "elapsed_time": "0:21:58", "remaining_time": "5:32:58"} +{"current_steps": 538, "total_steps": 8674, "loss": 0.6871178150177002, "lr": 1.999229036240723e-06, "epoch": 0.12404888171547153, "percentage": 6.2, "elapsed_time": "0:22:00", "remaining_time": "5:32:55"} +{"current_steps": 539, "total_steps": 8674, "loss": 0.5867285132408142, "lr": 1.999213995370842e-06, "epoch": 0.12427945584505419, "percentage": 6.21, "elapsed_time": "0:22:03", "remaining_time": "5:32:52"} +{"current_steps": 540, "total_steps": 8674, "loss": 0.8276966214179993, "lr": 1.99919880925508e-06, "epoch": 0.12451002997463685, "percentage": 6.23, "elapsed_time": "0:22:05", "remaining_time": "5:32:48"} +{"current_steps": 541, "total_steps": 8674, "loss": 0.7710754871368408, "lr": 1.9991834778956445e-06, "epoch": 0.12474060410421951, "percentage": 6.24, "elapsed_time": "0:22:08", "remaining_time": "5:32:44"} +{"current_steps": 542, "total_steps": 8674, "loss": 0.7753217816352844, "lr": 1.9991680012947642e-06, "epoch": 0.12497117823380217, "percentage": 6.25, "elapsed_time": "0:22:10", "remaining_time": "5:32:40"} +{"current_steps": 543, "total_steps": 8674, "loss": 0.7906090617179871, "lr": 1.9991523794546886e-06, "epoch": 0.12520175236338482, "percentage": 6.26, "elapsed_time": "0:22:12", "remaining_time": "5:32:37"} +{"current_steps": 544, "total_steps": 8674, "loss": 0.7199760675430298, "lr": 1.9991366123776885e-06, "epoch": 0.12543232649296748, "percentage": 6.27, "elapsed_time": "0:22:15", "remaining_time": "5:32:36"} +{"current_steps": 545, "total_steps": 8674, "loss": 0.671667218208313, "lr": 1.9991207000660556e-06, "epoch": 0.12566290062255014, "percentage": 6.28, "elapsed_time": "0:22:17", "remaining_time": "5:32:33"} +{"current_steps": 546, "total_steps": 8674, "loss": 0.7289182543754578, "lr": 1.9991046425221036e-06, "epoch": 0.1258934747521328, "percentage": 6.29, "elapsed_time": "0:22:20", "remaining_time": "5:32:30"} +{"current_steps": 547, "total_steps": 8674, "loss": 0.6894270181655884, "lr": 1.999088439748166e-06, "epoch": 0.12612404888171547, "percentage": 6.31, "elapsed_time": "0:22:22", "remaining_time": "5:32:26"} +{"current_steps": 548, "total_steps": 8674, "loss": 0.5861620306968689, "lr": 1.9990720917465983e-06, "epoch": 0.12635462301129813, "percentage": 6.32, "elapsed_time": "0:22:25", "remaining_time": "5:32:24"} +{"current_steps": 549, "total_steps": 8674, "loss": 0.7082245349884033, "lr": 1.999055598519777e-06, "epoch": 0.1265851971408808, "percentage": 6.33, "elapsed_time": "0:22:27", "remaining_time": "5:32:21"} +{"current_steps": 550, "total_steps": 8674, "loss": 0.6746149659156799, "lr": 1.999038960070099e-06, "epoch": 0.12681577127046345, "percentage": 6.34, "elapsed_time": "0:22:29", "remaining_time": "5:32:17"} +{"current_steps": 551, "total_steps": 8674, "loss": 0.7791188955307007, "lr": 1.999022176399983e-06, "epoch": 0.1270463454000461, "percentage": 6.35, "elapsed_time": "0:22:32", "remaining_time": "5:32:14"} +{"current_steps": 552, "total_steps": 8674, "loss": 0.6371017694473267, "lr": 1.999005247511869e-06, "epoch": 0.12727691952962877, "percentage": 6.36, "elapsed_time": "0:22:34", "remaining_time": "5:32:14"} +{"current_steps": 553, "total_steps": 8674, "loss": 0.7006558179855347, "lr": 1.9989881734082182e-06, "epoch": 0.12750749365921143, "percentage": 6.38, "elapsed_time": "0:22:37", "remaining_time": "5:32:09"} +{"current_steps": 554, "total_steps": 8674, "loss": 0.7011476755142212, "lr": 1.9989709540915115e-06, "epoch": 0.1277380677887941, "percentage": 6.39, "elapsed_time": "0:22:39", "remaining_time": "5:32:06"} +{"current_steps": 555, "total_steps": 8674, "loss": 0.6518280506134033, "lr": 1.998953589564252e-06, "epoch": 0.12796864191837676, "percentage": 6.4, "elapsed_time": "0:22:41", "remaining_time": "5:32:03"} +{"current_steps": 556, "total_steps": 8674, "loss": 0.703351616859436, "lr": 1.9989360798289646e-06, "epoch": 0.12819921604795942, "percentage": 6.41, "elapsed_time": "0:22:44", "remaining_time": "5:32:00"} +{"current_steps": 557, "total_steps": 8674, "loss": 0.7498817443847656, "lr": 1.998918424888194e-06, "epoch": 0.12842979017754208, "percentage": 6.42, "elapsed_time": "0:22:46", "remaining_time": "5:31:56"} +{"current_steps": 558, "total_steps": 8674, "loss": 0.647042989730835, "lr": 1.998900624744507e-06, "epoch": 0.12866036430712474, "percentage": 6.43, "elapsed_time": "0:22:49", "remaining_time": "5:31:53"} +{"current_steps": 559, "total_steps": 8674, "loss": 0.7519131898880005, "lr": 1.99888267940049e-06, "epoch": 0.1288909384367074, "percentage": 6.44, "elapsed_time": "0:22:51", "remaining_time": "5:31:49"} +{"current_steps": 560, "total_steps": 8674, "loss": 0.8416757583618164, "lr": 1.9988645888587524e-06, "epoch": 0.12912151256629006, "percentage": 6.46, "elapsed_time": "0:22:53", "remaining_time": "5:31:45"} +{"current_steps": 561, "total_steps": 8674, "loss": 0.7044156193733215, "lr": 1.9988463531219238e-06, "epoch": 0.12935208669587273, "percentage": 6.47, "elapsed_time": "0:22:56", "remaining_time": "5:31:46"} +{"current_steps": 562, "total_steps": 8674, "loss": 0.5429179668426514, "lr": 1.9988279721926547e-06, "epoch": 0.1295826608254554, "percentage": 6.48, "elapsed_time": "0:22:58", "remaining_time": "5:31:43"} +{"current_steps": 563, "total_steps": 8674, "loss": 0.6146735548973083, "lr": 1.9988094460736173e-06, "epoch": 0.12981323495503805, "percentage": 6.49, "elapsed_time": "0:23:01", "remaining_time": "5:31:41"} +{"current_steps": 564, "total_steps": 8674, "loss": 0.7544587850570679, "lr": 1.9987907747675038e-06, "epoch": 0.1300438090846207, "percentage": 6.5, "elapsed_time": "0:23:03", "remaining_time": "5:31:38"} +{"current_steps": 565, "total_steps": 8674, "loss": 0.7344266772270203, "lr": 1.998771958277029e-06, "epoch": 0.13027438321420337, "percentage": 6.51, "elapsed_time": "0:23:06", "remaining_time": "5:31:34"} +{"current_steps": 566, "total_steps": 8674, "loss": 0.6952091455459595, "lr": 1.9987529966049276e-06, "epoch": 0.13050495734378603, "percentage": 6.53, "elapsed_time": "0:23:08", "remaining_time": "5:31:31"} +{"current_steps": 567, "total_steps": 8674, "loss": 0.6164644956588745, "lr": 1.9987338897539563e-06, "epoch": 0.1307355314733687, "percentage": 6.54, "elapsed_time": "0:23:10", "remaining_time": "5:31:27"} +{"current_steps": 568, "total_steps": 8674, "loss": 0.7554208636283875, "lr": 1.998714637726892e-06, "epoch": 0.13096610560295135, "percentage": 6.55, "elapsed_time": "0:23:13", "remaining_time": "5:31:25"} +{"current_steps": 569, "total_steps": 8674, "loss": 0.6640980243682861, "lr": 1.9986952405265336e-06, "epoch": 0.13119667973253402, "percentage": 6.56, "elapsed_time": "0:23:15", "remaining_time": "5:31:24"} +{"current_steps": 570, "total_steps": 8674, "loss": 0.6947968006134033, "lr": 1.9986756981557005e-06, "epoch": 0.13142725386211668, "percentage": 6.57, "elapsed_time": "0:23:18", "remaining_time": "5:31:21"} +{"current_steps": 571, "total_steps": 8674, "loss": 0.5987592935562134, "lr": 1.9986560106172332e-06, "epoch": 0.13165782799169934, "percentage": 6.58, "elapsed_time": "0:23:20", "remaining_time": "5:31:18"} +{"current_steps": 572, "total_steps": 8674, "loss": 0.5830701589584351, "lr": 1.9986361779139944e-06, "epoch": 0.131888402121282, "percentage": 6.59, "elapsed_time": "0:23:23", "remaining_time": "5:31:15"} +{"current_steps": 573, "total_steps": 8674, "loss": 0.6589827537536621, "lr": 1.9986162000488655e-06, "epoch": 0.13211897625086466, "percentage": 6.61, "elapsed_time": "0:23:25", "remaining_time": "5:31:11"} +{"current_steps": 574, "total_steps": 8674, "loss": 0.7761766910552979, "lr": 1.9985960770247514e-06, "epoch": 0.13234955038044732, "percentage": 6.62, "elapsed_time": "0:23:27", "remaining_time": "5:31:08"} +{"current_steps": 575, "total_steps": 8674, "loss": 0.6817613244056702, "lr": 1.998575808844577e-06, "epoch": 0.13258012451002998, "percentage": 6.63, "elapsed_time": "0:23:30", "remaining_time": "5:31:04"} +{"current_steps": 576, "total_steps": 8674, "loss": 0.553085207939148, "lr": 1.998555395511289e-06, "epoch": 0.13281069863961265, "percentage": 6.64, "elapsed_time": "0:23:32", "remaining_time": "5:31:01"} +{"current_steps": 577, "total_steps": 8674, "loss": 0.6500711441040039, "lr": 1.998534837027854e-06, "epoch": 0.1330412727691953, "percentage": 6.65, "elapsed_time": "0:23:35", "remaining_time": "5:31:02"} +{"current_steps": 578, "total_steps": 8674, "loss": 0.7818950414657593, "lr": 1.9985141333972605e-06, "epoch": 0.13327184689877797, "percentage": 6.66, "elapsed_time": "0:23:37", "remaining_time": "5:30:58"} +{"current_steps": 579, "total_steps": 8674, "loss": 0.7030247449874878, "lr": 1.9984932846225178e-06, "epoch": 0.13350242102836063, "percentage": 6.68, "elapsed_time": "0:23:40", "remaining_time": "5:30:55"} +{"current_steps": 580, "total_steps": 8674, "loss": 0.6336206197738647, "lr": 1.9984722907066572e-06, "epoch": 0.1337329951579433, "percentage": 6.69, "elapsed_time": "0:23:42", "remaining_time": "5:30:52"} +{"current_steps": 581, "total_steps": 8674, "loss": 0.7483044862747192, "lr": 1.9984511516527295e-06, "epoch": 0.13396356928752595, "percentage": 6.7, "elapsed_time": "0:23:45", "remaining_time": "5:30:51"} +{"current_steps": 582, "total_steps": 8674, "loss": 0.7124725580215454, "lr": 1.9984298674638084e-06, "epoch": 0.1341941434171086, "percentage": 6.71, "elapsed_time": "0:23:47", "remaining_time": "5:30:47"} +{"current_steps": 583, "total_steps": 8674, "loss": 0.623436450958252, "lr": 1.998408438142987e-06, "epoch": 0.13442471754669127, "percentage": 6.72, "elapsed_time": "0:23:49", "remaining_time": "5:30:45"} +{"current_steps": 584, "total_steps": 8674, "loss": 0.646303653717041, "lr": 1.9983868636933804e-06, "epoch": 0.1346552916762739, "percentage": 6.73, "elapsed_time": "0:23:52", "remaining_time": "5:30:41"} +{"current_steps": 585, "total_steps": 8674, "loss": 0.6349619626998901, "lr": 1.998365144118125e-06, "epoch": 0.13488586580585657, "percentage": 6.74, "elapsed_time": "0:23:54", "remaining_time": "5:30:38"} +{"current_steps": 586, "total_steps": 8674, "loss": 0.5222466588020325, "lr": 1.9983432794203778e-06, "epoch": 0.13511643993543923, "percentage": 6.76, "elapsed_time": "0:23:57", "remaining_time": "5:30:39"} +{"current_steps": 587, "total_steps": 8674, "loss": 0.7210453152656555, "lr": 1.998321269603317e-06, "epoch": 0.1353470140650219, "percentage": 6.77, "elapsed_time": "0:23:59", "remaining_time": "5:30:35"} +{"current_steps": 588, "total_steps": 8674, "loss": 0.6829872131347656, "lr": 1.998299114670142e-06, "epoch": 0.13557758819460455, "percentage": 6.78, "elapsed_time": "0:24:02", "remaining_time": "5:30:31"} +{"current_steps": 589, "total_steps": 8674, "loss": 0.6493744254112244, "lr": 1.998276814624073e-06, "epoch": 0.13580816232418721, "percentage": 6.79, "elapsed_time": "0:24:04", "remaining_time": "5:30:28"} +{"current_steps": 590, "total_steps": 8674, "loss": 0.6885819435119629, "lr": 1.998254369468352e-06, "epoch": 0.13603873645376988, "percentage": 6.8, "elapsed_time": "0:24:06", "remaining_time": "5:30:25"} +{"current_steps": 591, "total_steps": 8674, "loss": 0.6393503546714783, "lr": 1.9982317792062415e-06, "epoch": 0.13626931058335254, "percentage": 6.81, "elapsed_time": "0:24:09", "remaining_time": "5:30:22"} +{"current_steps": 592, "total_steps": 8674, "loss": 0.7243417501449585, "lr": 1.998209043841025e-06, "epoch": 0.1364998847129352, "percentage": 6.82, "elapsed_time": "0:24:11", "remaining_time": "5:30:18"} +{"current_steps": 593, "total_steps": 8674, "loss": 0.5955190658569336, "lr": 1.9981861633760073e-06, "epoch": 0.13673045884251786, "percentage": 6.84, "elapsed_time": "0:24:14", "remaining_time": "5:30:14"} +{"current_steps": 594, "total_steps": 8674, "loss": 0.6907675862312317, "lr": 1.9981631378145147e-06, "epoch": 0.13696103297210052, "percentage": 6.85, "elapsed_time": "0:24:16", "remaining_time": "5:30:14"} +{"current_steps": 595, "total_steps": 8674, "loss": 0.8540418148040771, "lr": 1.9981399671598938e-06, "epoch": 0.13719160710168318, "percentage": 6.86, "elapsed_time": "0:24:19", "remaining_time": "5:30:11"} +{"current_steps": 596, "total_steps": 8674, "loss": 0.6558555364608765, "lr": 1.9981166514155128e-06, "epoch": 0.13742218123126584, "percentage": 6.87, "elapsed_time": "0:24:21", "remaining_time": "5:30:07"} +{"current_steps": 597, "total_steps": 8674, "loss": 0.6902164220809937, "lr": 1.9980931905847607e-06, "epoch": 0.1376527553608485, "percentage": 6.88, "elapsed_time": "0:24:23", "remaining_time": "5:30:04"} +{"current_steps": 598, "total_steps": 8674, "loss": 0.7090387344360352, "lr": 1.9980695846710485e-06, "epoch": 0.13788332949043117, "percentage": 6.89, "elapsed_time": "0:24:26", "remaining_time": "5:30:00"} +{"current_steps": 599, "total_steps": 8674, "loss": 0.5913621187210083, "lr": 1.9980458336778067e-06, "epoch": 0.13811390362001383, "percentage": 6.91, "elapsed_time": "0:24:28", "remaining_time": "5:29:56"} +{"current_steps": 600, "total_steps": 8674, "loss": 0.6742709279060364, "lr": 1.998021937608488e-06, "epoch": 0.1383444777495965, "percentage": 6.92, "elapsed_time": "0:24:30", "remaining_time": "5:29:52"} +{"current_steps": 601, "total_steps": 8674, "loss": 0.7156273126602173, "lr": 1.997997896466566e-06, "epoch": 0.13857505187917915, "percentage": 6.93, "elapsed_time": "0:24:34", "remaining_time": "5:30:08"} +{"current_steps": 602, "total_steps": 8674, "loss": 0.6039655208587646, "lr": 1.9979737102555358e-06, "epoch": 0.1388056260087618, "percentage": 6.94, "elapsed_time": "0:24:37", "remaining_time": "5:30:05"} +{"current_steps": 603, "total_steps": 8674, "loss": 0.6437175273895264, "lr": 1.9979493789789123e-06, "epoch": 0.13903620013834447, "percentage": 6.95, "elapsed_time": "0:24:39", "remaining_time": "5:30:04"} +{"current_steps": 604, "total_steps": 8674, "loss": 0.6037663221359253, "lr": 1.9979249026402327e-06, "epoch": 0.13926677426792713, "percentage": 6.96, "elapsed_time": "0:24:41", "remaining_time": "5:30:00"} +{"current_steps": 605, "total_steps": 8674, "loss": 0.6014829874038696, "lr": 1.9979002812430544e-06, "epoch": 0.1394973483975098, "percentage": 6.97, "elapsed_time": "0:24:44", "remaining_time": "5:29:56"} +{"current_steps": 606, "total_steps": 8674, "loss": 0.5644428133964539, "lr": 1.9978755147909575e-06, "epoch": 0.13972792252709246, "percentage": 6.99, "elapsed_time": "0:24:46", "remaining_time": "5:29:53"} +{"current_steps": 607, "total_steps": 8674, "loss": 0.5483256578445435, "lr": 1.997850603287541e-06, "epoch": 0.13995849665667512, "percentage": 7.0, "elapsed_time": "0:24:49", "remaining_time": "5:29:49"} +{"current_steps": 608, "total_steps": 8674, "loss": 0.6323236227035522, "lr": 1.9978255467364264e-06, "epoch": 0.14018907078625778, "percentage": 7.01, "elapsed_time": "0:24:51", "remaining_time": "5:29:45"} +{"current_steps": 609, "total_steps": 8674, "loss": 0.677186131477356, "lr": 1.9978003451412563e-06, "epoch": 0.14041964491584044, "percentage": 7.02, "elapsed_time": "0:24:53", "remaining_time": "5:29:41"} +{"current_steps": 610, "total_steps": 8674, "loss": 0.6768285036087036, "lr": 1.9977749985056934e-06, "epoch": 0.1406502190454231, "percentage": 7.03, "elapsed_time": "0:24:56", "remaining_time": "5:29:38"} +{"current_steps": 611, "total_steps": 8674, "loss": 0.5347047448158264, "lr": 1.997749506833422e-06, "epoch": 0.14088079317500576, "percentage": 7.04, "elapsed_time": "0:24:58", "remaining_time": "5:29:38"} +{"current_steps": 612, "total_steps": 8674, "loss": 0.7459336519241333, "lr": 1.9977238701281484e-06, "epoch": 0.14111136730458843, "percentage": 7.06, "elapsed_time": "0:25:01", "remaining_time": "5:29:34"} +{"current_steps": 613, "total_steps": 8674, "loss": 0.6617337465286255, "lr": 1.9976980883935982e-06, "epoch": 0.1413419414341711, "percentage": 7.07, "elapsed_time": "0:25:03", "remaining_time": "5:29:31"} +{"current_steps": 614, "total_steps": 8674, "loss": 0.6214765310287476, "lr": 1.9976721616335197e-06, "epoch": 0.14157251556375375, "percentage": 7.08, "elapsed_time": "0:25:05", "remaining_time": "5:29:27"} +{"current_steps": 615, "total_steps": 8674, "loss": 0.7468793392181396, "lr": 1.9976460898516814e-06, "epoch": 0.1418030896933364, "percentage": 7.09, "elapsed_time": "0:25:08", "remaining_time": "5:29:23"} +{"current_steps": 616, "total_steps": 8674, "loss": 0.676013708114624, "lr": 1.9976198730518733e-06, "epoch": 0.14203366382291907, "percentage": 7.1, "elapsed_time": "0:25:10", "remaining_time": "5:29:19"} +{"current_steps": 617, "total_steps": 8674, "loss": 0.6350057125091553, "lr": 1.9975935112379057e-06, "epoch": 0.14226423795250173, "percentage": 7.11, "elapsed_time": "0:25:12", "remaining_time": "5:29:16"} +{"current_steps": 618, "total_steps": 8674, "loss": 0.6743426322937012, "lr": 1.997567004413611e-06, "epoch": 0.1424948120820844, "percentage": 7.12, "elapsed_time": "0:25:15", "remaining_time": "5:29:12"} +{"current_steps": 619, "total_steps": 8674, "loss": 0.5894836187362671, "lr": 1.9975403525828423e-06, "epoch": 0.14272538621166705, "percentage": 7.14, "elapsed_time": "0:25:17", "remaining_time": "5:29:11"} +{"current_steps": 620, "total_steps": 8674, "loss": 0.7142415046691895, "lr": 1.9975135557494735e-06, "epoch": 0.14295596034124972, "percentage": 7.15, "elapsed_time": "0:25:20", "remaining_time": "5:29:07"} +{"current_steps": 621, "total_steps": 8674, "loss": 0.6402454972267151, "lr": 1.9974866139174e-06, "epoch": 0.14318653447083238, "percentage": 7.16, "elapsed_time": "0:25:22", "remaining_time": "5:29:03"} +{"current_steps": 622, "total_steps": 8674, "loss": 0.6870661973953247, "lr": 1.997459527090538e-06, "epoch": 0.14341710860041504, "percentage": 7.17, "elapsed_time": "0:25:24", "remaining_time": "5:29:00"} +{"current_steps": 623, "total_steps": 8674, "loss": 0.5526704788208008, "lr": 1.9974322952728247e-06, "epoch": 0.1436476827299977, "percentage": 7.18, "elapsed_time": "0:25:27", "remaining_time": "5:28:56"} +{"current_steps": 624, "total_steps": 8674, "loss": 0.6712762117385864, "lr": 1.9974049184682186e-06, "epoch": 0.14387825685958036, "percentage": 7.19, "elapsed_time": "0:25:29", "remaining_time": "5:28:52"} +{"current_steps": 625, "total_steps": 8674, "loss": 0.6064080595970154, "lr": 1.997377396680699e-06, "epoch": 0.14410883098916302, "percentage": 7.21, "elapsed_time": "0:25:31", "remaining_time": "5:28:49"} +{"current_steps": 626, "total_steps": 8674, "loss": 0.5540767908096313, "lr": 1.997349729914267e-06, "epoch": 0.14433940511874568, "percentage": 7.22, "elapsed_time": "0:25:34", "remaining_time": "5:28:45"} +{"current_steps": 627, "total_steps": 8674, "loss": 0.52143394947052, "lr": 1.997321918172944e-06, "epoch": 0.14456997924832835, "percentage": 7.23, "elapsed_time": "0:25:36", "remaining_time": "5:28:41"} +{"current_steps": 628, "total_steps": 8674, "loss": 0.7708792686462402, "lr": 1.9972939614607723e-06, "epoch": 0.144800553377911, "percentage": 7.24, "elapsed_time": "0:25:39", "remaining_time": "5:28:40"} +{"current_steps": 629, "total_steps": 8674, "loss": 0.706872284412384, "lr": 1.997265859781816e-06, "epoch": 0.14503112750749367, "percentage": 7.25, "elapsed_time": "0:25:41", "remaining_time": "5:28:37"} +{"current_steps": 630, "total_steps": 8674, "loss": 0.6643307209014893, "lr": 1.99723761314016e-06, "epoch": 0.14526170163707633, "percentage": 7.26, "elapsed_time": "0:25:43", "remaining_time": "5:28:33"} +{"current_steps": 631, "total_steps": 8674, "loss": 0.6582880020141602, "lr": 1.9972092215399107e-06, "epoch": 0.145492275766659, "percentage": 7.27, "elapsed_time": "0:25:46", "remaining_time": "5:28:30"} +{"current_steps": 632, "total_steps": 8674, "loss": 0.5704749822616577, "lr": 1.997180684985194e-06, "epoch": 0.14572284989624165, "percentage": 7.29, "elapsed_time": "0:25:48", "remaining_time": "5:28:27"} +{"current_steps": 633, "total_steps": 8674, "loss": 0.6021866798400879, "lr": 1.997152003480159e-06, "epoch": 0.1459534240258243, "percentage": 7.3, "elapsed_time": "0:25:51", "remaining_time": "5:28:23"} +{"current_steps": 634, "total_steps": 8674, "loss": 0.6980762481689453, "lr": 1.9971231770289745e-06, "epoch": 0.14618399815540697, "percentage": 7.31, "elapsed_time": "0:25:53", "remaining_time": "5:28:18"} +{"current_steps": 635, "total_steps": 8674, "loss": 0.6252140998840332, "lr": 1.9970942056358307e-06, "epoch": 0.14641457228498964, "percentage": 7.32, "elapsed_time": "0:25:55", "remaining_time": "5:28:15"} +{"current_steps": 636, "total_steps": 8674, "loss": 0.5938589572906494, "lr": 1.9970650893049384e-06, "epoch": 0.1466451464145723, "percentage": 7.33, "elapsed_time": "0:25:58", "remaining_time": "5:28:15"} +{"current_steps": 637, "total_steps": 8674, "loss": 0.48420464992523193, "lr": 1.997035828040531e-06, "epoch": 0.14687572054415496, "percentage": 7.34, "elapsed_time": "0:26:00", "remaining_time": "5:28:12"} +{"current_steps": 638, "total_steps": 8674, "loss": 0.6917499303817749, "lr": 1.997006421846861e-06, "epoch": 0.14710629467373762, "percentage": 7.36, "elapsed_time": "0:26:03", "remaining_time": "5:28:09"} +{"current_steps": 639, "total_steps": 8674, "loss": 0.7040522694587708, "lr": 1.9969768707282034e-06, "epoch": 0.14733686880332028, "percentage": 7.37, "elapsed_time": "0:26:05", "remaining_time": "5:28:06"} +{"current_steps": 640, "total_steps": 8674, "loss": 0.6131860017776489, "lr": 1.9969471746888535e-06, "epoch": 0.14756744293290291, "percentage": 7.38, "elapsed_time": "0:26:07", "remaining_time": "5:28:03"} +{"current_steps": 641, "total_steps": 8674, "loss": 0.7042062282562256, "lr": 1.996917333733128e-06, "epoch": 0.14779801706248558, "percentage": 7.39, "elapsed_time": "0:26:10", "remaining_time": "5:28:00"} +{"current_steps": 642, "total_steps": 8674, "loss": 0.6729326844215393, "lr": 1.9968873478653647e-06, "epoch": 0.14802859119206824, "percentage": 7.4, "elapsed_time": "0:26:12", "remaining_time": "5:27:56"} +{"current_steps": 643, "total_steps": 8674, "loss": 0.5801228880882263, "lr": 1.996857217089922e-06, "epoch": 0.1482591653216509, "percentage": 7.41, "elapsed_time": "0:26:15", "remaining_time": "5:27:52"} +{"current_steps": 644, "total_steps": 8674, "loss": 0.6657989025115967, "lr": 1.99682694141118e-06, "epoch": 0.14848973945123356, "percentage": 7.42, "elapsed_time": "0:26:17", "remaining_time": "5:27:51"} +{"current_steps": 645, "total_steps": 8674, "loss": 0.5915562510490417, "lr": 1.9967965208335395e-06, "epoch": 0.14872031358081622, "percentage": 7.44, "elapsed_time": "0:26:20", "remaining_time": "5:27:48"} +{"current_steps": 646, "total_steps": 8674, "loss": 0.6651759147644043, "lr": 1.9967659553614225e-06, "epoch": 0.14895088771039888, "percentage": 7.45, "elapsed_time": "0:26:22", "remaining_time": "5:27:44"} +{"current_steps": 647, "total_steps": 8674, "loss": 0.625860333442688, "lr": 1.996735244999272e-06, "epoch": 0.14918146183998154, "percentage": 7.46, "elapsed_time": "0:26:24", "remaining_time": "5:27:40"} +{"current_steps": 648, "total_steps": 8674, "loss": 0.5731238126754761, "lr": 1.996704389751552e-06, "epoch": 0.1494120359695642, "percentage": 7.47, "elapsed_time": "0:26:27", "remaining_time": "5:27:37"} +{"current_steps": 649, "total_steps": 8674, "loss": 0.6233615875244141, "lr": 1.996673389622748e-06, "epoch": 0.14964261009914687, "percentage": 7.48, "elapsed_time": "0:26:29", "remaining_time": "5:27:33"} +{"current_steps": 650, "total_steps": 8674, "loss": 0.5294947028160095, "lr": 1.9966422446173655e-06, "epoch": 0.14987318422872953, "percentage": 7.49, "elapsed_time": "0:26:31", "remaining_time": "5:27:29"} +{"current_steps": 651, "total_steps": 8674, "loss": 0.6234334707260132, "lr": 1.996610954739932e-06, "epoch": 0.1501037583583122, "percentage": 7.51, "elapsed_time": "0:26:34", "remaining_time": "5:27:26"} +{"current_steps": 652, "total_steps": 8674, "loss": 0.5800126194953918, "lr": 1.996579519994996e-06, "epoch": 0.15033433248789485, "percentage": 7.52, "elapsed_time": "0:26:36", "remaining_time": "5:27:23"} +{"current_steps": 653, "total_steps": 8674, "loss": 0.7072441577911377, "lr": 1.9965479403871268e-06, "epoch": 0.1505649066174775, "percentage": 7.53, "elapsed_time": "0:26:39", "remaining_time": "5:27:22"} +{"current_steps": 654, "total_steps": 8674, "loss": 0.6350210309028625, "lr": 1.996516215920915e-06, "epoch": 0.15079548074706017, "percentage": 7.54, "elapsed_time": "0:26:41", "remaining_time": "5:27:19"} +{"current_steps": 655, "total_steps": 8674, "loss": 0.6098944544792175, "lr": 1.996484346600971e-06, "epoch": 0.15102605487664283, "percentage": 7.55, "elapsed_time": "0:26:43", "remaining_time": "5:27:16"} +{"current_steps": 656, "total_steps": 8674, "loss": 0.6593213081359863, "lr": 1.996452332431929e-06, "epoch": 0.1512566290062255, "percentage": 7.56, "elapsed_time": "0:26:46", "remaining_time": "5:27:12"} +{"current_steps": 657, "total_steps": 8674, "loss": 0.6997909545898438, "lr": 1.9964201734184413e-06, "epoch": 0.15148720313580816, "percentage": 7.57, "elapsed_time": "0:26:48", "remaining_time": "5:27:09"} +{"current_steps": 658, "total_steps": 8674, "loss": 0.5672277212142944, "lr": 1.996387869565183e-06, "epoch": 0.15171777726539082, "percentage": 7.59, "elapsed_time": "0:26:51", "remaining_time": "5:27:05"} +{"current_steps": 659, "total_steps": 8674, "loss": 0.5835613012313843, "lr": 1.99635542087685e-06, "epoch": 0.15194835139497348, "percentage": 7.6, "elapsed_time": "0:26:53", "remaining_time": "5:27:01"} +{"current_steps": 660, "total_steps": 8674, "loss": 0.6001917123794556, "lr": 1.9963228273581587e-06, "epoch": 0.15217892552455614, "percentage": 7.61, "elapsed_time": "0:26:55", "remaining_time": "5:26:58"} +{"current_steps": 661, "total_steps": 8674, "loss": 0.6421242356300354, "lr": 1.996290089013847e-06, "epoch": 0.1524094996541388, "percentage": 7.62, "elapsed_time": "0:26:58", "remaining_time": "5:26:57"} +{"current_steps": 662, "total_steps": 8674, "loss": 0.6888365745544434, "lr": 1.996257205848674e-06, "epoch": 0.15264007378372146, "percentage": 7.63, "elapsed_time": "0:27:00", "remaining_time": "5:26:53"} +{"current_steps": 663, "total_steps": 8674, "loss": 0.6694042682647705, "lr": 1.9962241778674193e-06, "epoch": 0.15287064791330413, "percentage": 7.64, "elapsed_time": "0:27:02", "remaining_time": "5:26:50"} +{"current_steps": 664, "total_steps": 8674, "loss": 0.6754042506217957, "lr": 1.9961910050748836e-06, "epoch": 0.1531012220428868, "percentage": 7.66, "elapsed_time": "0:27:05", "remaining_time": "5:26:46"} +{"current_steps": 665, "total_steps": 8674, "loss": 0.576134979724884, "lr": 1.9961576874758893e-06, "epoch": 0.15333179617246945, "percentage": 7.67, "elapsed_time": "0:27:07", "remaining_time": "5:26:43"} +{"current_steps": 666, "total_steps": 8674, "loss": 0.6548957824707031, "lr": 1.9961242250752796e-06, "epoch": 0.1535623703020521, "percentage": 7.68, "elapsed_time": "0:27:10", "remaining_time": "5:26:40"} +{"current_steps": 667, "total_steps": 8674, "loss": 0.553372859954834, "lr": 1.9960906178779183e-06, "epoch": 0.15379294443163477, "percentage": 7.69, "elapsed_time": "0:27:12", "remaining_time": "5:26:36"} +{"current_steps": 668, "total_steps": 8674, "loss": 0.6749063730239868, "lr": 1.9960568658886904e-06, "epoch": 0.15402351856121743, "percentage": 7.7, "elapsed_time": "0:27:14", "remaining_time": "5:26:33"} +{"current_steps": 669, "total_steps": 8674, "loss": 0.6083666086196899, "lr": 1.9960229691125023e-06, "epoch": 0.1542540926908001, "percentage": 7.71, "elapsed_time": "0:27:17", "remaining_time": "5:26:30"} +{"current_steps": 670, "total_steps": 8674, "loss": 0.6468017101287842, "lr": 1.995988927554281e-06, "epoch": 0.15448466682038275, "percentage": 7.72, "elapsed_time": "0:27:19", "remaining_time": "5:26:29"} +{"current_steps": 671, "total_steps": 8674, "loss": 0.7095121145248413, "lr": 1.995954741218976e-06, "epoch": 0.15471524094996542, "percentage": 7.74, "elapsed_time": "0:27:22", "remaining_time": "5:26:26"} +{"current_steps": 672, "total_steps": 8674, "loss": 0.7167302966117859, "lr": 1.995920410111555e-06, "epoch": 0.15494581507954808, "percentage": 7.75, "elapsed_time": "0:27:24", "remaining_time": "5:26:23"} +{"current_steps": 673, "total_steps": 8674, "loss": 0.6563462018966675, "lr": 1.995885934237009e-06, "epoch": 0.15517638920913074, "percentage": 7.76, "elapsed_time": "0:27:26", "remaining_time": "5:26:19"} +{"current_steps": 674, "total_steps": 8674, "loss": 0.638554573059082, "lr": 1.9958513136003495e-06, "epoch": 0.1554069633387134, "percentage": 7.77, "elapsed_time": "0:27:29", "remaining_time": "5:26:16"} +{"current_steps": 675, "total_steps": 8674, "loss": 0.7051291465759277, "lr": 1.995816548206609e-06, "epoch": 0.15563753746829606, "percentage": 7.78, "elapsed_time": "0:27:31", "remaining_time": "5:26:12"} +{"current_steps": 676, "total_steps": 8674, "loss": 0.6292394399642944, "lr": 1.995781638060841e-06, "epoch": 0.15586811159787872, "percentage": 7.79, "elapsed_time": "0:27:34", "remaining_time": "5:26:09"} +{"current_steps": 677, "total_steps": 8674, "loss": 0.5266016721725464, "lr": 1.99574658316812e-06, "epoch": 0.15609868572746138, "percentage": 7.8, "elapsed_time": "0:27:36", "remaining_time": "5:26:06"} +{"current_steps": 678, "total_steps": 8674, "loss": 0.6059033870697021, "lr": 1.9957113835335415e-06, "epoch": 0.15632925985704405, "percentage": 7.82, "elapsed_time": "0:27:38", "remaining_time": "5:26:05"} +{"current_steps": 679, "total_steps": 8674, "loss": 0.5252447128295898, "lr": 1.995676039162222e-06, "epoch": 0.1565598339866267, "percentage": 7.83, "elapsed_time": "0:27:41", "remaining_time": "5:26:01"} +{"current_steps": 680, "total_steps": 8674, "loss": 0.5963196754455566, "lr": 1.9956405500593e-06, "epoch": 0.15679040811620937, "percentage": 7.84, "elapsed_time": "0:27:43", "remaining_time": "5:25:58"} +{"current_steps": 681, "total_steps": 8674, "loss": 0.7262317538261414, "lr": 1.9956049162299322e-06, "epoch": 0.15702098224579203, "percentage": 7.85, "elapsed_time": "0:27:46", "remaining_time": "5:25:54"} +{"current_steps": 682, "total_steps": 8674, "loss": 0.6701623201370239, "lr": 1.995569137679301e-06, "epoch": 0.1572515563753747, "percentage": 7.86, "elapsed_time": "0:27:48", "remaining_time": "5:25:52"} +{"current_steps": 683, "total_steps": 8674, "loss": 0.6201569437980652, "lr": 1.9955332144126048e-06, "epoch": 0.15748213050495735, "percentage": 7.87, "elapsed_time": "0:27:50", "remaining_time": "5:25:48"} +{"current_steps": 684, "total_steps": 8674, "loss": 0.5338399410247803, "lr": 1.9954971464350673e-06, "epoch": 0.15771270463454, "percentage": 7.89, "elapsed_time": "0:27:53", "remaining_time": "5:25:44"} +{"current_steps": 685, "total_steps": 8674, "loss": 0.6784210205078125, "lr": 1.99546093375193e-06, "epoch": 0.15794327876412267, "percentage": 7.9, "elapsed_time": "0:27:55", "remaining_time": "5:25:41"} +{"current_steps": 686, "total_steps": 8674, "loss": 0.6752813458442688, "lr": 1.9954245763684574e-06, "epoch": 0.15817385289370534, "percentage": 7.91, "elapsed_time": "0:27:58", "remaining_time": "5:25:41"} +{"current_steps": 687, "total_steps": 8674, "loss": 0.6734355688095093, "lr": 1.9953880742899344e-06, "epoch": 0.158404427023288, "percentage": 7.92, "elapsed_time": "0:28:00", "remaining_time": "5:25:37"} +{"current_steps": 688, "total_steps": 8674, "loss": 0.4857062101364136, "lr": 1.995351427521667e-06, "epoch": 0.15863500115287066, "percentage": 7.93, "elapsed_time": "0:28:02", "remaining_time": "5:25:34"} +{"current_steps": 689, "total_steps": 8674, "loss": 0.6014343500137329, "lr": 1.995314636068982e-06, "epoch": 0.15886557528245332, "percentage": 7.94, "elapsed_time": "0:28:05", "remaining_time": "5:25:30"} +{"current_steps": 690, "total_steps": 8674, "loss": 0.571649432182312, "lr": 1.995277699937227e-06, "epoch": 0.15909614941203598, "percentage": 7.95, "elapsed_time": "0:28:07", "remaining_time": "5:25:27"} +{"current_steps": 691, "total_steps": 8674, "loss": 0.5195556879043579, "lr": 1.9952406191317717e-06, "epoch": 0.15932672354161864, "percentage": 7.97, "elapsed_time": "0:28:09", "remaining_time": "5:25:23"} +{"current_steps": 692, "total_steps": 8674, "loss": 0.6520895957946777, "lr": 1.995203393658006e-06, "epoch": 0.1595572976712013, "percentage": 7.98, "elapsed_time": "0:28:12", "remaining_time": "5:25:20"} +{"current_steps": 693, "total_steps": 8674, "loss": 0.7223460674285889, "lr": 1.995166023521341e-06, "epoch": 0.15978787180078396, "percentage": 7.99, "elapsed_time": "0:28:14", "remaining_time": "5:25:17"} +{"current_steps": 694, "total_steps": 8674, "loss": 0.5540120005607605, "lr": 1.9951285087272085e-06, "epoch": 0.16001844593036663, "percentage": 8.0, "elapsed_time": "0:28:17", "remaining_time": "5:25:13"} +{"current_steps": 695, "total_steps": 8674, "loss": 0.6539945602416992, "lr": 1.995090849281062e-06, "epoch": 0.1602490200599493, "percentage": 8.01, "elapsed_time": "0:28:19", "remaining_time": "5:25:13"} +{"current_steps": 696, "total_steps": 8674, "loss": 0.595169186592102, "lr": 1.995053045188376e-06, "epoch": 0.16047959418953192, "percentage": 8.02, "elapsed_time": "0:28:22", "remaining_time": "5:25:11"} +{"current_steps": 697, "total_steps": 8674, "loss": 0.564440131187439, "lr": 1.995015096454645e-06, "epoch": 0.16071016831911458, "percentage": 8.04, "elapsed_time": "0:28:24", "remaining_time": "5:25:08"} +{"current_steps": 698, "total_steps": 8674, "loss": 0.5934277772903442, "lr": 1.9949770030853857e-06, "epoch": 0.16094074244869724, "percentage": 8.05, "elapsed_time": "0:28:26", "remaining_time": "5:25:04"} +{"current_steps": 699, "total_steps": 8674, "loss": 0.5645352602005005, "lr": 1.9949387650861353e-06, "epoch": 0.1611713165782799, "percentage": 8.06, "elapsed_time": "0:28:29", "remaining_time": "5:25:00"} +{"current_steps": 700, "total_steps": 8674, "loss": 0.6437552571296692, "lr": 1.9949003824624517e-06, "epoch": 0.16140189070786257, "percentage": 8.07, "elapsed_time": "0:28:31", "remaining_time": "5:24:56"} +{"current_steps": 701, "total_steps": 8674, "loss": 0.7052004337310791, "lr": 1.9948618552199147e-06, "epoch": 0.16163246483744523, "percentage": 8.08, "elapsed_time": "0:28:35", "remaining_time": "5:25:08"} +{"current_steps": 702, "total_steps": 8674, "loss": 0.6547686457633972, "lr": 1.994823183364124e-06, "epoch": 0.1618630389670279, "percentage": 8.09, "elapsed_time": "0:28:37", "remaining_time": "5:25:04"} +{"current_steps": 703, "total_steps": 8674, "loss": 0.582744836807251, "lr": 1.994784366900702e-06, "epoch": 0.16209361309661055, "percentage": 8.1, "elapsed_time": "0:28:40", "remaining_time": "5:25:03"} +{"current_steps": 704, "total_steps": 8674, "loss": 0.6668936014175415, "lr": 1.99474540583529e-06, "epoch": 0.1623241872261932, "percentage": 8.12, "elapsed_time": "0:28:42", "remaining_time": "5:24:59"} +{"current_steps": 705, "total_steps": 8674, "loss": 0.6076918840408325, "lr": 1.994706300173552e-06, "epoch": 0.16255476135577587, "percentage": 8.13, "elapsed_time": "0:28:44", "remaining_time": "5:24:56"} +{"current_steps": 706, "total_steps": 8674, "loss": 0.5053621530532837, "lr": 1.994667049921172e-06, "epoch": 0.16278533548535853, "percentage": 8.14, "elapsed_time": "0:28:47", "remaining_time": "5:24:52"} +{"current_steps": 707, "total_steps": 8674, "loss": 0.5480915904045105, "lr": 1.994627655083856e-06, "epoch": 0.1630159096149412, "percentage": 8.15, "elapsed_time": "0:28:49", "remaining_time": "5:24:49"} +{"current_steps": 708, "total_steps": 8674, "loss": 0.5851327776908875, "lr": 1.99458811566733e-06, "epoch": 0.16324648374452386, "percentage": 8.16, "elapsed_time": "0:28:51", "remaining_time": "5:24:45"} +{"current_steps": 709, "total_steps": 8674, "loss": 0.7058213949203491, "lr": 1.9945484316773415e-06, "epoch": 0.16347705787410652, "percentage": 8.17, "elapsed_time": "0:28:54", "remaining_time": "5:24:42"} +{"current_steps": 710, "total_steps": 8674, "loss": 0.6900246739387512, "lr": 1.9945086031196588e-06, "epoch": 0.16370763200368918, "percentage": 8.19, "elapsed_time": "0:28:56", "remaining_time": "5:24:38"} +{"current_steps": 711, "total_steps": 8674, "loss": 0.6088757514953613, "lr": 1.994468630000072e-06, "epoch": 0.16393820613327184, "percentage": 8.2, "elapsed_time": "0:28:59", "remaining_time": "5:24:36"} +{"current_steps": 712, "total_steps": 8674, "loss": 0.6167945861816406, "lr": 1.9944285123243908e-06, "epoch": 0.1641687802628545, "percentage": 8.21, "elapsed_time": "0:29:01", "remaining_time": "5:24:35"} +{"current_steps": 713, "total_steps": 8674, "loss": 0.5842427015304565, "lr": 1.994388250098447e-06, "epoch": 0.16439935439243716, "percentage": 8.22, "elapsed_time": "0:29:03", "remaining_time": "5:24:32"} +{"current_steps": 714, "total_steps": 8674, "loss": 0.6709132194519043, "lr": 1.9943478433280937e-06, "epoch": 0.16462992852201982, "percentage": 8.23, "elapsed_time": "0:29:06", "remaining_time": "5:24:28"} +{"current_steps": 715, "total_steps": 8674, "loss": 0.5600479245185852, "lr": 1.994307292019204e-06, "epoch": 0.1648605026516025, "percentage": 8.24, "elapsed_time": "0:29:08", "remaining_time": "5:24:24"} +{"current_steps": 716, "total_steps": 8674, "loss": 0.59420245885849, "lr": 1.994266596177672e-06, "epoch": 0.16509107678118515, "percentage": 8.25, "elapsed_time": "0:29:10", "remaining_time": "5:24:20"} +{"current_steps": 717, "total_steps": 8674, "loss": 0.6098697185516357, "lr": 1.994225755809414e-06, "epoch": 0.1653216509107678, "percentage": 8.27, "elapsed_time": "0:29:13", "remaining_time": "5:24:17"} +{"current_steps": 718, "total_steps": 8674, "loss": 0.5626084804534912, "lr": 1.994184770920366e-06, "epoch": 0.16555222504035047, "percentage": 8.28, "elapsed_time": "0:29:15", "remaining_time": "5:24:13"} +{"current_steps": 719, "total_steps": 8674, "loss": 0.633317232131958, "lr": 1.9941436415164854e-06, "epoch": 0.16578279916993313, "percentage": 8.29, "elapsed_time": "0:29:17", "remaining_time": "5:24:10"} +{"current_steps": 720, "total_steps": 8674, "loss": 0.6629287004470825, "lr": 1.994102367603752e-06, "epoch": 0.1660133732995158, "percentage": 8.3, "elapsed_time": "0:29:20", "remaining_time": "5:24:09"} +{"current_steps": 721, "total_steps": 8674, "loss": 0.6281176805496216, "lr": 1.994060949188164e-06, "epoch": 0.16624394742909845, "percentage": 8.31, "elapsed_time": "0:29:22", "remaining_time": "5:24:05"} +{"current_steps": 722, "total_steps": 8674, "loss": 0.49195849895477295, "lr": 1.994019386275743e-06, "epoch": 0.16647452155868112, "percentage": 8.32, "elapsed_time": "0:29:25", "remaining_time": "5:24:02"} +{"current_steps": 723, "total_steps": 8674, "loss": 0.5165697932243347, "lr": 1.9939776788725295e-06, "epoch": 0.16670509568826378, "percentage": 8.34, "elapsed_time": "0:29:27", "remaining_time": "5:23:59"} +{"current_steps": 724, "total_steps": 8674, "loss": 0.6294844150543213, "lr": 1.9939358269845867e-06, "epoch": 0.16693566981784644, "percentage": 8.35, "elapsed_time": "0:29:30", "remaining_time": "5:24:01"} +{"current_steps": 725, "total_steps": 8674, "loss": 0.6117822527885437, "lr": 1.9938938306179986e-06, "epoch": 0.1671662439474291, "percentage": 8.36, "elapsed_time": "0:29:32", "remaining_time": "5:23:58"} +{"current_steps": 726, "total_steps": 8674, "loss": 0.5904515981674194, "lr": 1.9938516897788693e-06, "epoch": 0.16739681807701176, "percentage": 8.37, "elapsed_time": "0:29:35", "remaining_time": "5:23:55"} +{"current_steps": 727, "total_steps": 8674, "loss": 0.5453853011131287, "lr": 1.9938094044733247e-06, "epoch": 0.16762739220659442, "percentage": 8.38, "elapsed_time": "0:29:37", "remaining_time": "5:23:51"} +{"current_steps": 728, "total_steps": 8674, "loss": 0.6724731922149658, "lr": 1.9937669747075107e-06, "epoch": 0.16785796633617708, "percentage": 8.39, "elapsed_time": "0:29:40", "remaining_time": "5:23:50"} +{"current_steps": 729, "total_steps": 8674, "loss": 0.4844778776168823, "lr": 1.993724400487596e-06, "epoch": 0.16808854046575974, "percentage": 8.4, "elapsed_time": "0:29:42", "remaining_time": "5:23:47"} +{"current_steps": 730, "total_steps": 8674, "loss": 0.6666063070297241, "lr": 1.9936816818197682e-06, "epoch": 0.1683191145953424, "percentage": 8.42, "elapsed_time": "0:29:44", "remaining_time": "5:23:44"} +{"current_steps": 731, "total_steps": 8674, "loss": 0.49354803562164307, "lr": 1.9936388187102374e-06, "epoch": 0.16854968872492507, "percentage": 8.43, "elapsed_time": "0:29:47", "remaining_time": "5:23:41"} +{"current_steps": 732, "total_steps": 8674, "loss": 0.6587027311325073, "lr": 1.993595811165234e-06, "epoch": 0.16878026285450773, "percentage": 8.44, "elapsed_time": "0:29:49", "remaining_time": "5:23:38"} +{"current_steps": 733, "total_steps": 8674, "loss": 0.5618065595626831, "lr": 1.9935526591910095e-06, "epoch": 0.1690108369840904, "percentage": 8.45, "elapsed_time": "0:29:52", "remaining_time": "5:23:34"} +{"current_steps": 734, "total_steps": 8674, "loss": 0.6332052946090698, "lr": 1.993509362793837e-06, "epoch": 0.16924141111367305, "percentage": 8.46, "elapsed_time": "0:29:54", "remaining_time": "5:23:31"} +{"current_steps": 735, "total_steps": 8674, "loss": 0.5888797044754028, "lr": 1.9934659219800095e-06, "epoch": 0.1694719852432557, "percentage": 8.47, "elapsed_time": "0:29:56", "remaining_time": "5:23:28"} +{"current_steps": 736, "total_steps": 8674, "loss": 0.6995177865028381, "lr": 1.9934223367558418e-06, "epoch": 0.16970255937283837, "percentage": 8.49, "elapsed_time": "0:29:59", "remaining_time": "5:23:25"} +{"current_steps": 737, "total_steps": 8674, "loss": 0.6117641925811768, "lr": 1.9933786071276693e-06, "epoch": 0.16993313350242104, "percentage": 8.5, "elapsed_time": "0:30:01", "remaining_time": "5:23:25"} +{"current_steps": 738, "total_steps": 8674, "loss": 0.7138235569000244, "lr": 1.9933347331018487e-06, "epoch": 0.1701637076320037, "percentage": 8.51, "elapsed_time": "0:30:04", "remaining_time": "5:23:21"} +{"current_steps": 739, "total_steps": 8674, "loss": 0.6139661073684692, "lr": 1.993290714684758e-06, "epoch": 0.17039428176158636, "percentage": 8.52, "elapsed_time": "0:30:06", "remaining_time": "5:23:18"} +{"current_steps": 740, "total_steps": 8674, "loss": 0.6998997926712036, "lr": 1.9932465518827945e-06, "epoch": 0.17062485589116902, "percentage": 8.53, "elapsed_time": "0:30:08", "remaining_time": "5:23:15"} +{"current_steps": 741, "total_steps": 8674, "loss": 0.5736757516860962, "lr": 1.9932022447023787e-06, "epoch": 0.17085543002075168, "percentage": 8.54, "elapsed_time": "0:30:11", "remaining_time": "5:23:12"} +{"current_steps": 742, "total_steps": 8674, "loss": 0.6069833040237427, "lr": 1.993157793149951e-06, "epoch": 0.17108600415033434, "percentage": 8.55, "elapsed_time": "0:30:13", "remaining_time": "5:23:09"} +{"current_steps": 743, "total_steps": 8674, "loss": 0.618720531463623, "lr": 1.9931131972319726e-06, "epoch": 0.171316578279917, "percentage": 8.57, "elapsed_time": "0:30:16", "remaining_time": "5:23:06"} +{"current_steps": 744, "total_steps": 8674, "loss": 0.6918530464172363, "lr": 1.9930684569549263e-06, "epoch": 0.17154715240949966, "percentage": 8.58, "elapsed_time": "0:30:18", "remaining_time": "5:23:02"} +{"current_steps": 745, "total_steps": 8674, "loss": 0.5303134322166443, "lr": 1.993023572325315e-06, "epoch": 0.17177772653908233, "percentage": 8.59, "elapsed_time": "0:30:21", "remaining_time": "5:23:02"} +{"current_steps": 746, "total_steps": 8674, "loss": 0.5017606019973755, "lr": 1.9929785433496637e-06, "epoch": 0.172008300668665, "percentage": 8.6, "elapsed_time": "0:30:23", "remaining_time": "5:22:58"} +{"current_steps": 747, "total_steps": 8674, "loss": 0.5683910846710205, "lr": 1.9929333700345176e-06, "epoch": 0.17223887479824765, "percentage": 8.61, "elapsed_time": "0:30:25", "remaining_time": "5:22:55"} +{"current_steps": 748, "total_steps": 8674, "loss": 0.7594112157821655, "lr": 1.992888052386443e-06, "epoch": 0.1724694489278303, "percentage": 8.62, "elapsed_time": "0:30:28", "remaining_time": "5:22:51"} +{"current_steps": 749, "total_steps": 8674, "loss": 0.5817109942436218, "lr": 1.9928425904120272e-06, "epoch": 0.17270002305741297, "percentage": 8.64, "elapsed_time": "0:30:30", "remaining_time": "5:22:49"} +{"current_steps": 750, "total_steps": 8674, "loss": 0.74810391664505, "lr": 1.9927969841178785e-06, "epoch": 0.17293059718699563, "percentage": 8.65, "elapsed_time": "0:30:32", "remaining_time": "5:22:45"} +{"current_steps": 751, "total_steps": 8674, "loss": 0.5620408654212952, "lr": 1.992751233510627e-06, "epoch": 0.17316117131657827, "percentage": 8.66, "elapsed_time": "0:30:35", "remaining_time": "5:22:42"} +{"current_steps": 752, "total_steps": 8674, "loss": 0.5661174654960632, "lr": 1.9927053385969224e-06, "epoch": 0.17339174544616093, "percentage": 8.67, "elapsed_time": "0:30:37", "remaining_time": "5:22:39"} +{"current_steps": 753, "total_steps": 8674, "loss": 0.6170656681060791, "lr": 1.992659299383436e-06, "epoch": 0.1736223195757436, "percentage": 8.68, "elapsed_time": "0:30:40", "remaining_time": "5:22:38"} +{"current_steps": 754, "total_steps": 8674, "loss": 0.6399837136268616, "lr": 1.99261311587686e-06, "epoch": 0.17385289370532625, "percentage": 8.69, "elapsed_time": "0:30:42", "remaining_time": "5:22:35"} +{"current_steps": 755, "total_steps": 8674, "loss": 0.646568775177002, "lr": 1.992566788083908e-06, "epoch": 0.1740834678349089, "percentage": 8.7, "elapsed_time": "0:30:45", "remaining_time": "5:22:32"} +{"current_steps": 756, "total_steps": 8674, "loss": 0.6836358904838562, "lr": 1.992520316011314e-06, "epoch": 0.17431404196449157, "percentage": 8.72, "elapsed_time": "0:30:47", "remaining_time": "5:22:28"} +{"current_steps": 757, "total_steps": 8674, "loss": 0.7077229619026184, "lr": 1.9924736996658327e-06, "epoch": 0.17454461609407423, "percentage": 8.73, "elapsed_time": "0:30:49", "remaining_time": "5:22:25"} +{"current_steps": 758, "total_steps": 8674, "loss": 0.5127657651901245, "lr": 1.9924269390542408e-06, "epoch": 0.1747751902236569, "percentage": 8.74, "elapsed_time": "0:30:52", "remaining_time": "5:22:22"} +{"current_steps": 759, "total_steps": 8674, "loss": 0.49244552850723267, "lr": 1.992380034183336e-06, "epoch": 0.17500576435323956, "percentage": 8.75, "elapsed_time": "0:30:54", "remaining_time": "5:22:19"} +{"current_steps": 760, "total_steps": 8674, "loss": 0.6145986318588257, "lr": 1.9923329850599353e-06, "epoch": 0.17523633848282222, "percentage": 8.76, "elapsed_time": "0:30:56", "remaining_time": "5:22:16"} +{"current_steps": 761, "total_steps": 8674, "loss": 0.5233397483825684, "lr": 1.9922857916908784e-06, "epoch": 0.17546691261240488, "percentage": 8.77, "elapsed_time": "0:30:59", "remaining_time": "5:22:15"} +{"current_steps": 762, "total_steps": 8674, "loss": 0.6296844482421875, "lr": 1.992238454083025e-06, "epoch": 0.17569748674198754, "percentage": 8.78, "elapsed_time": "0:31:01", "remaining_time": "5:22:11"} +{"current_steps": 763, "total_steps": 8674, "loss": 0.5274437665939331, "lr": 1.9921909722432565e-06, "epoch": 0.1759280608715702, "percentage": 8.8, "elapsed_time": "0:31:04", "remaining_time": "5:22:08"} +{"current_steps": 764, "total_steps": 8674, "loss": 0.6365554332733154, "lr": 1.9921433461784744e-06, "epoch": 0.17615863500115286, "percentage": 8.81, "elapsed_time": "0:31:06", "remaining_time": "5:22:05"} +{"current_steps": 765, "total_steps": 8674, "loss": 0.6256603002548218, "lr": 1.992095575895602e-06, "epoch": 0.17638920913073552, "percentage": 8.82, "elapsed_time": "0:31:08", "remaining_time": "5:22:02"} +{"current_steps": 766, "total_steps": 8674, "loss": 0.6914918422698975, "lr": 1.9920476614015827e-06, "epoch": 0.17661978326031819, "percentage": 8.83, "elapsed_time": "0:31:11", "remaining_time": "5:21:59"} +{"current_steps": 767, "total_steps": 8674, "loss": 0.618436336517334, "lr": 1.9919996027033823e-06, "epoch": 0.17685035738990085, "percentage": 8.84, "elapsed_time": "0:31:13", "remaining_time": "5:21:57"} +{"current_steps": 768, "total_steps": 8674, "loss": 0.7496027946472168, "lr": 1.9919513998079857e-06, "epoch": 0.1770809315194835, "percentage": 8.85, "elapsed_time": "0:31:16", "remaining_time": "5:21:53"} +{"current_steps": 769, "total_steps": 8674, "loss": 0.6188616752624512, "lr": 1.9919030527224e-06, "epoch": 0.17731150564906617, "percentage": 8.87, "elapsed_time": "0:31:18", "remaining_time": "5:21:51"} +{"current_steps": 770, "total_steps": 8674, "loss": 0.6525505185127258, "lr": 1.991854561453653e-06, "epoch": 0.17754207977864883, "percentage": 8.88, "elapsed_time": "0:31:21", "remaining_time": "5:21:50"} +{"current_steps": 771, "total_steps": 8674, "loss": 0.6302521228790283, "lr": 1.9918059260087933e-06, "epoch": 0.1777726539082315, "percentage": 8.89, "elapsed_time": "0:31:23", "remaining_time": "5:21:47"} +{"current_steps": 772, "total_steps": 8674, "loss": 0.48817628622055054, "lr": 1.9917571463948905e-06, "epoch": 0.17800322803781415, "percentage": 8.9, "elapsed_time": "0:31:26", "remaining_time": "5:21:45"} +{"current_steps": 773, "total_steps": 8674, "loss": 0.7571396231651306, "lr": 1.9917082226190357e-06, "epoch": 0.17823380216739682, "percentage": 8.91, "elapsed_time": "0:31:28", "remaining_time": "5:21:41"} +{"current_steps": 774, "total_steps": 8674, "loss": 0.6416890025138855, "lr": 1.99165915468834e-06, "epoch": 0.17846437629697948, "percentage": 8.92, "elapsed_time": "0:31:30", "remaining_time": "5:21:38"} +{"current_steps": 775, "total_steps": 8674, "loss": 0.5668659210205078, "lr": 1.9916099426099357e-06, "epoch": 0.17869495042656214, "percentage": 8.93, "elapsed_time": "0:31:33", "remaining_time": "5:21:35"} +{"current_steps": 776, "total_steps": 8674, "loss": 0.5491495132446289, "lr": 1.991560586390977e-06, "epoch": 0.1789255245561448, "percentage": 8.95, "elapsed_time": "0:31:35", "remaining_time": "5:21:32"} +{"current_steps": 777, "total_steps": 8674, "loss": 0.5596655607223511, "lr": 1.991511086038637e-06, "epoch": 0.17915609868572746, "percentage": 8.96, "elapsed_time": "0:31:37", "remaining_time": "5:21:28"} +{"current_steps": 778, "total_steps": 8674, "loss": 0.606618344783783, "lr": 1.991461441560113e-06, "epoch": 0.17938667281531012, "percentage": 8.97, "elapsed_time": "0:31:40", "remaining_time": "5:21:28"} +{"current_steps": 779, "total_steps": 8674, "loss": 0.6534444093704224, "lr": 1.9914116529626195e-06, "epoch": 0.17961724694489278, "percentage": 8.98, "elapsed_time": "0:31:42", "remaining_time": "5:21:25"} +{"current_steps": 780, "total_steps": 8674, "loss": 0.6566994190216064, "lr": 1.9913617202533956e-06, "epoch": 0.17984782107447544, "percentage": 8.99, "elapsed_time": "0:31:45", "remaining_time": "5:21:22"} +{"current_steps": 781, "total_steps": 8674, "loss": 0.6745898723602295, "lr": 1.9913116434396976e-06, "epoch": 0.1800783952040581, "percentage": 9.0, "elapsed_time": "0:31:47", "remaining_time": "5:21:21"} +{"current_steps": 782, "total_steps": 8674, "loss": 0.6260639429092407, "lr": 1.991261422528806e-06, "epoch": 0.18030896933364077, "percentage": 9.02, "elapsed_time": "0:31:50", "remaining_time": "5:21:18"} +{"current_steps": 783, "total_steps": 8674, "loss": 0.6937930583953857, "lr": 1.9912110575280203e-06, "epoch": 0.18053954346322343, "percentage": 9.03, "elapsed_time": "0:31:52", "remaining_time": "5:21:15"} +{"current_steps": 784, "total_steps": 8674, "loss": 0.5220614671707153, "lr": 1.991160548444662e-06, "epoch": 0.1807701175928061, "percentage": 9.04, "elapsed_time": "0:31:55", "remaining_time": "5:21:13"} +{"current_steps": 785, "total_steps": 8674, "loss": 0.630463719367981, "lr": 1.9911098952860725e-06, "epoch": 0.18100069172238875, "percentage": 9.05, "elapsed_time": "0:31:57", "remaining_time": "5:21:09"} +{"current_steps": 786, "total_steps": 8674, "loss": 0.5476818084716797, "lr": 1.9910590980596154e-06, "epoch": 0.1812312658519714, "percentage": 9.06, "elapsed_time": "0:31:59", "remaining_time": "5:21:06"} +{"current_steps": 787, "total_steps": 8674, "loss": 0.619910478591919, "lr": 1.9910081567726745e-06, "epoch": 0.18146183998155407, "percentage": 9.07, "elapsed_time": "0:32:02", "remaining_time": "5:21:05"} +{"current_steps": 788, "total_steps": 8674, "loss": 0.759405255317688, "lr": 1.990957071432654e-06, "epoch": 0.18169241411113674, "percentage": 9.08, "elapsed_time": "0:32:04", "remaining_time": "5:21:02"} +{"current_steps": 789, "total_steps": 8674, "loss": 0.6093606948852539, "lr": 1.9909058420469808e-06, "epoch": 0.1819229882407194, "percentage": 9.1, "elapsed_time": "0:32:07", "remaining_time": "5:20:58"} +{"current_steps": 790, "total_steps": 8674, "loss": 0.5358198285102844, "lr": 1.9908544686231e-06, "epoch": 0.18215356237030206, "percentage": 9.11, "elapsed_time": "0:32:09", "remaining_time": "5:20:55"} +{"current_steps": 791, "total_steps": 8674, "loss": 0.577926754951477, "lr": 1.9908029511684806e-06, "epoch": 0.18238413649988472, "percentage": 9.12, "elapsed_time": "0:32:11", "remaining_time": "5:20:52"} +{"current_steps": 792, "total_steps": 8674, "loss": 0.6232448816299438, "lr": 1.990751289690611e-06, "epoch": 0.18261471062946738, "percentage": 9.13, "elapsed_time": "0:32:14", "remaining_time": "5:20:49"} +{"current_steps": 793, "total_steps": 8674, "loss": 0.5461868047714233, "lr": 1.9906994841970005e-06, "epoch": 0.18284528475905004, "percentage": 9.14, "elapsed_time": "0:32:16", "remaining_time": "5:20:45"} +{"current_steps": 794, "total_steps": 8674, "loss": 0.6074671745300293, "lr": 1.9906475346951793e-06, "epoch": 0.1830758588886327, "percentage": 9.15, "elapsed_time": "0:32:18", "remaining_time": "5:20:42"} +{"current_steps": 795, "total_steps": 8674, "loss": 0.7101696729660034, "lr": 1.990595441192699e-06, "epoch": 0.18330643301821536, "percentage": 9.17, "elapsed_time": "0:32:21", "remaining_time": "5:20:40"} +{"current_steps": 796, "total_steps": 8674, "loss": 0.6507722735404968, "lr": 1.9905432036971318e-06, "epoch": 0.18353700714779803, "percentage": 9.18, "elapsed_time": "0:32:23", "remaining_time": "5:20:37"} +{"current_steps": 797, "total_steps": 8674, "loss": 0.6497524380683899, "lr": 1.9904908222160715e-06, "epoch": 0.1837675812773807, "percentage": 9.19, "elapsed_time": "0:32:26", "remaining_time": "5:20:34"} +{"current_steps": 798, "total_steps": 8674, "loss": 0.6359415054321289, "lr": 1.9904382967571315e-06, "epoch": 0.18399815540696335, "percentage": 9.2, "elapsed_time": "0:32:28", "remaining_time": "5:20:31"} +{"current_steps": 799, "total_steps": 8674, "loss": 0.6062989234924316, "lr": 1.9903856273279475e-06, "epoch": 0.184228729536546, "percentage": 9.21, "elapsed_time": "0:32:30", "remaining_time": "5:20:28"} +{"current_steps": 800, "total_steps": 8674, "loss": 0.5872690677642822, "lr": 1.9903328139361753e-06, "epoch": 0.18445930366612867, "percentage": 9.22, "elapsed_time": "0:32:33", "remaining_time": "5:20:24"} +{"current_steps": 801, "total_steps": 8674, "loss": 0.541993260383606, "lr": 1.9902798565894917e-06, "epoch": 0.18468987779571133, "percentage": 9.23, "elapsed_time": "0:32:36", "remaining_time": "5:20:34"} +{"current_steps": 802, "total_steps": 8674, "loss": 0.6509004235267639, "lr": 1.9902267552955948e-06, "epoch": 0.184920451925294, "percentage": 9.25, "elapsed_time": "0:32:39", "remaining_time": "5:20:31"} +{"current_steps": 803, "total_steps": 8674, "loss": 0.6994458436965942, "lr": 1.9901735100622034e-06, "epoch": 0.18515102605487666, "percentage": 9.26, "elapsed_time": "0:32:41", "remaining_time": "5:20:30"} +{"current_steps": 804, "total_steps": 8674, "loss": 0.5426214933395386, "lr": 1.9901201208970574e-06, "epoch": 0.18538160018445932, "percentage": 9.27, "elapsed_time": "0:32:44", "remaining_time": "5:20:27"} +{"current_steps": 805, "total_steps": 8674, "loss": 0.5889894366264343, "lr": 1.9900665878079172e-06, "epoch": 0.18561217431404198, "percentage": 9.28, "elapsed_time": "0:32:46", "remaining_time": "5:20:24"} +{"current_steps": 806, "total_steps": 8674, "loss": 0.6455902457237244, "lr": 1.990012910802564e-06, "epoch": 0.18584274844362464, "percentage": 9.29, "elapsed_time": "0:32:49", "remaining_time": "5:20:21"} +{"current_steps": 807, "total_steps": 8674, "loss": 0.6336048245429993, "lr": 1.989959089888801e-06, "epoch": 0.18607332257320727, "percentage": 9.3, "elapsed_time": "0:32:51", "remaining_time": "5:20:18"} +{"current_steps": 808, "total_steps": 8674, "loss": 0.6091762781143188, "lr": 1.9899051250744517e-06, "epoch": 0.18630389670278993, "percentage": 9.32, "elapsed_time": "0:32:53", "remaining_time": "5:20:16"} +{"current_steps": 809, "total_steps": 8674, "loss": 0.5551953315734863, "lr": 1.9898510163673594e-06, "epoch": 0.1865344708323726, "percentage": 9.33, "elapsed_time": "0:32:56", "remaining_time": "5:20:12"} +{"current_steps": 810, "total_steps": 8674, "loss": 0.6441607475280762, "lr": 1.9897967637753907e-06, "epoch": 0.18676504496195526, "percentage": 9.34, "elapsed_time": "0:32:58", "remaining_time": "5:20:09"} +{"current_steps": 811, "total_steps": 8674, "loss": 0.5766205787658691, "lr": 1.989742367306431e-06, "epoch": 0.18699561909153792, "percentage": 9.35, "elapsed_time": "0:33:01", "remaining_time": "5:20:08"} +{"current_steps": 812, "total_steps": 8674, "loss": 0.624677836894989, "lr": 1.9896878269683872e-06, "epoch": 0.18722619322112058, "percentage": 9.36, "elapsed_time": "0:33:03", "remaining_time": "5:20:06"} +{"current_steps": 813, "total_steps": 8674, "loss": 0.5942056775093079, "lr": 1.9896331427691878e-06, "epoch": 0.18745676735070324, "percentage": 9.37, "elapsed_time": "0:33:05", "remaining_time": "5:20:02"} +{"current_steps": 814, "total_steps": 8674, "loss": 0.5194109082221985, "lr": 1.989578314716781e-06, "epoch": 0.1876873414802859, "percentage": 9.38, "elapsed_time": "0:33:08", "remaining_time": "5:19:59"} +{"current_steps": 815, "total_steps": 8674, "loss": 0.5851193070411682, "lr": 1.9895233428191375e-06, "epoch": 0.18791791560986856, "percentage": 9.4, "elapsed_time": "0:33:10", "remaining_time": "5:19:56"} +{"current_steps": 816, "total_steps": 8674, "loss": 0.5596088171005249, "lr": 1.989468227084248e-06, "epoch": 0.18814848973945122, "percentage": 9.41, "elapsed_time": "0:33:13", "remaining_time": "5:19:53"} +{"current_steps": 817, "total_steps": 8674, "loss": 0.608109712600708, "lr": 1.989412967520123e-06, "epoch": 0.18837906386903389, "percentage": 9.42, "elapsed_time": "0:33:15", "remaining_time": "5:19:50"} +{"current_steps": 818, "total_steps": 8674, "loss": 0.6488924026489258, "lr": 1.9893575641347957e-06, "epoch": 0.18860963799861655, "percentage": 9.43, "elapsed_time": "0:33:17", "remaining_time": "5:19:46"} +{"current_steps": 819, "total_steps": 8674, "loss": 0.6668595671653748, "lr": 1.9893020169363202e-06, "epoch": 0.1888402121281992, "percentage": 9.44, "elapsed_time": "0:33:20", "remaining_time": "5:19:43"} +{"current_steps": 820, "total_steps": 8674, "loss": 0.6516261696815491, "lr": 1.9892463259327702e-06, "epoch": 0.18907078625778187, "percentage": 9.45, "elapsed_time": "0:33:22", "remaining_time": "5:19:42"} +{"current_steps": 821, "total_steps": 8674, "loss": 0.5960654020309448, "lr": 1.9891904911322408e-06, "epoch": 0.18930136038736453, "percentage": 9.47, "elapsed_time": "0:33:25", "remaining_time": "5:19:39"} +{"current_steps": 822, "total_steps": 8674, "loss": 0.5836078524589539, "lr": 1.989134512542848e-06, "epoch": 0.1895319345169472, "percentage": 9.48, "elapsed_time": "0:33:27", "remaining_time": "5:19:36"} +{"current_steps": 823, "total_steps": 8674, "loss": 0.6233468651771545, "lr": 1.98907839017273e-06, "epoch": 0.18976250864652985, "percentage": 9.49, "elapsed_time": "0:33:29", "remaining_time": "5:19:32"} +{"current_steps": 824, "total_steps": 8674, "loss": 0.6228024363517761, "lr": 1.989022124030043e-06, "epoch": 0.18999308277611252, "percentage": 9.5, "elapsed_time": "0:33:32", "remaining_time": "5:19:29"} +{"current_steps": 825, "total_steps": 8674, "loss": 0.5549489259719849, "lr": 1.9889657141229674e-06, "epoch": 0.19022365690569518, "percentage": 9.51, "elapsed_time": "0:33:34", "remaining_time": "5:19:26"} +{"current_steps": 826, "total_steps": 8674, "loss": 0.572743833065033, "lr": 1.988909160459703e-06, "epoch": 0.19045423103527784, "percentage": 9.52, "elapsed_time": "0:33:36", "remaining_time": "5:19:23"} +{"current_steps": 827, "total_steps": 8674, "loss": 0.5483371019363403, "lr": 1.988852463048469e-06, "epoch": 0.1906848051648605, "percentage": 9.53, "elapsed_time": "0:33:39", "remaining_time": "5:19:19"} +{"current_steps": 828, "total_steps": 8674, "loss": 0.6489086151123047, "lr": 1.988795621897508e-06, "epoch": 0.19091537929444316, "percentage": 9.55, "elapsed_time": "0:33:41", "remaining_time": "5:19:19"} +{"current_steps": 829, "total_steps": 8674, "loss": 0.5885359644889832, "lr": 1.9887386370150823e-06, "epoch": 0.19114595342402582, "percentage": 9.56, "elapsed_time": "0:33:44", "remaining_time": "5:19:16"} +{"current_steps": 830, "total_steps": 8674, "loss": 0.5725297927856445, "lr": 1.988681508409475e-06, "epoch": 0.19137652755360848, "percentage": 9.57, "elapsed_time": "0:33:46", "remaining_time": "5:19:13"} +{"current_steps": 831, "total_steps": 8674, "loss": 0.5165927410125732, "lr": 1.9886242360889907e-06, "epoch": 0.19160710168319114, "percentage": 9.58, "elapsed_time": "0:33:49", "remaining_time": "5:19:12"} +{"current_steps": 832, "total_steps": 8674, "loss": 0.4909062385559082, "lr": 1.988566820061954e-06, "epoch": 0.1918376758127738, "percentage": 9.59, "elapsed_time": "0:33:51", "remaining_time": "5:19:08"} +{"current_steps": 833, "total_steps": 8674, "loss": 0.6611230373382568, "lr": 1.988509260336711e-06, "epoch": 0.19206824994235647, "percentage": 9.6, "elapsed_time": "0:33:53", "remaining_time": "5:19:05"} +{"current_steps": 834, "total_steps": 8674, "loss": 0.5702481269836426, "lr": 1.9884515569216296e-06, "epoch": 0.19229882407193913, "percentage": 9.61, "elapsed_time": "0:33:56", "remaining_time": "5:19:02"} +{"current_steps": 835, "total_steps": 8674, "loss": 0.5923126935958862, "lr": 1.988393709825096e-06, "epoch": 0.1925293982015218, "percentage": 9.63, "elapsed_time": "0:33:58", "remaining_time": "5:18:59"} +{"current_steps": 836, "total_steps": 8674, "loss": 0.6054497957229614, "lr": 1.98833571905552e-06, "epoch": 0.19275997233110445, "percentage": 9.64, "elapsed_time": "0:34:01", "remaining_time": "5:18:56"} +{"current_steps": 837, "total_steps": 8674, "loss": 0.6688513159751892, "lr": 1.9882775846213305e-06, "epoch": 0.1929905464606871, "percentage": 9.65, "elapsed_time": "0:34:03", "remaining_time": "5:18:55"} +{"current_steps": 838, "total_steps": 8674, "loss": 0.5898394584655762, "lr": 1.988219306530978e-06, "epoch": 0.19322112059026977, "percentage": 9.66, "elapsed_time": "0:34:06", "remaining_time": "5:18:52"} +{"current_steps": 839, "total_steps": 8674, "loss": 0.575627326965332, "lr": 1.9881608847929345e-06, "epoch": 0.19345169471985244, "percentage": 9.67, "elapsed_time": "0:34:08", "remaining_time": "5:18:49"} +{"current_steps": 840, "total_steps": 8674, "loss": 0.5392276048660278, "lr": 1.9881023194156913e-06, "epoch": 0.1936822688494351, "percentage": 9.68, "elapsed_time": "0:34:10", "remaining_time": "5:18:46"} +{"current_steps": 841, "total_steps": 8674, "loss": 0.5464376211166382, "lr": 1.9880436104077624e-06, "epoch": 0.19391284297901776, "percentage": 9.7, "elapsed_time": "0:34:13", "remaining_time": "5:18:43"} +{"current_steps": 842, "total_steps": 8674, "loss": 0.5483032464981079, "lr": 1.9879847577776804e-06, "epoch": 0.19414341710860042, "percentage": 9.71, "elapsed_time": "0:34:15", "remaining_time": "5:18:40"} +{"current_steps": 843, "total_steps": 8674, "loss": 0.583878219127655, "lr": 1.9879257615340016e-06, "epoch": 0.19437399123818308, "percentage": 9.72, "elapsed_time": "0:34:17", "remaining_time": "5:18:37"} +{"current_steps": 844, "total_steps": 8674, "loss": 0.5646623373031616, "lr": 1.9878666216853005e-06, "epoch": 0.19460456536776574, "percentage": 9.73, "elapsed_time": "0:34:20", "remaining_time": "5:18:34"} +{"current_steps": 845, "total_steps": 8674, "loss": 0.4785343408584595, "lr": 1.9878073382401745e-06, "epoch": 0.1948351394973484, "percentage": 9.74, "elapsed_time": "0:34:22", "remaining_time": "5:18:33"} +{"current_steps": 846, "total_steps": 8674, "loss": 0.6247695684432983, "lr": 1.987747911207241e-06, "epoch": 0.19506571362693106, "percentage": 9.75, "elapsed_time": "0:34:25", "remaining_time": "5:18:30"} +{"current_steps": 847, "total_steps": 8674, "loss": 0.5686244368553162, "lr": 1.9876883405951377e-06, "epoch": 0.19529628775651373, "percentage": 9.76, "elapsed_time": "0:34:27", "remaining_time": "5:18:26"} +{"current_steps": 848, "total_steps": 8674, "loss": 0.5887250900268555, "lr": 1.9876286264125242e-06, "epoch": 0.1955268618860964, "percentage": 9.78, "elapsed_time": "0:34:30", "remaining_time": "5:18:23"} +{"current_steps": 849, "total_steps": 8674, "loss": 0.6225967407226562, "lr": 1.9875687686680808e-06, "epoch": 0.19575743601567905, "percentage": 9.79, "elapsed_time": "0:34:32", "remaining_time": "5:18:20"} +{"current_steps": 850, "total_steps": 8674, "loss": 0.4695369601249695, "lr": 1.987508767370508e-06, "epoch": 0.1959880101452617, "percentage": 9.8, "elapsed_time": "0:34:34", "remaining_time": "5:18:17"} +{"current_steps": 851, "total_steps": 8674, "loss": 0.5248171091079712, "lr": 1.9874486225285276e-06, "epoch": 0.19621858427484437, "percentage": 9.81, "elapsed_time": "0:34:37", "remaining_time": "5:18:13"} +{"current_steps": 852, "total_steps": 8674, "loss": 0.573886513710022, "lr": 1.9873883341508825e-06, "epoch": 0.19644915840442703, "percentage": 9.82, "elapsed_time": "0:34:39", "remaining_time": "5:18:10"} +{"current_steps": 853, "total_steps": 8674, "loss": 0.5309966802597046, "lr": 1.9873279022463365e-06, "epoch": 0.1966797325340097, "percentage": 9.83, "elapsed_time": "0:34:41", "remaining_time": "5:18:08"} +{"current_steps": 854, "total_steps": 8674, "loss": 0.7115850448608398, "lr": 1.987267326823673e-06, "epoch": 0.19691030666359235, "percentage": 9.85, "elapsed_time": "0:34:44", "remaining_time": "5:18:05"} +{"current_steps": 855, "total_steps": 8674, "loss": 0.6970044374465942, "lr": 1.9872066078916984e-06, "epoch": 0.19714088079317502, "percentage": 9.86, "elapsed_time": "0:34:46", "remaining_time": "5:18:02"} +{"current_steps": 856, "total_steps": 8674, "loss": 0.5956458449363708, "lr": 1.987145745459238e-06, "epoch": 0.19737145492275768, "percentage": 9.87, "elapsed_time": "0:34:49", "remaining_time": "5:17:59"} +{"current_steps": 857, "total_steps": 8674, "loss": 0.6200698614120483, "lr": 1.9870847395351395e-06, "epoch": 0.19760202905234034, "percentage": 9.88, "elapsed_time": "0:34:51", "remaining_time": "5:17:56"} +{"current_steps": 858, "total_steps": 8674, "loss": 0.6552712321281433, "lr": 1.98702359012827e-06, "epoch": 0.197832603181923, "percentage": 9.89, "elapsed_time": "0:34:53", "remaining_time": "5:17:53"} +{"current_steps": 859, "total_steps": 8674, "loss": 0.5995951294898987, "lr": 1.986962297247519e-06, "epoch": 0.19806317731150566, "percentage": 9.9, "elapsed_time": "0:34:56", "remaining_time": "5:17:50"} +{"current_steps": 860, "total_steps": 8674, "loss": 0.5903854966163635, "lr": 1.9869008609017946e-06, "epoch": 0.19829375144108832, "percentage": 9.91, "elapsed_time": "0:34:58", "remaining_time": "5:17:46"} +{"current_steps": 861, "total_steps": 8674, "loss": 0.49756956100463867, "lr": 1.986839281100029e-06, "epoch": 0.19852432557067098, "percentage": 9.93, "elapsed_time": "0:35:01", "remaining_time": "5:17:45"} +{"current_steps": 862, "total_steps": 8674, "loss": 0.6726386547088623, "lr": 1.986777557851172e-06, "epoch": 0.19875489970025362, "percentage": 9.94, "elapsed_time": "0:35:03", "remaining_time": "5:17:42"} +{"current_steps": 863, "total_steps": 8674, "loss": 0.5941756963729858, "lr": 1.9867156911641963e-06, "epoch": 0.19898547382983628, "percentage": 9.95, "elapsed_time": "0:35:05", "remaining_time": "5:17:39"} +{"current_steps": 864, "total_steps": 8674, "loss": 0.6148152351379395, "lr": 1.986653681048095e-06, "epoch": 0.19921604795941894, "percentage": 9.96, "elapsed_time": "0:35:08", "remaining_time": "5:17:36"} +{"current_steps": 865, "total_steps": 8674, "loss": 0.5484675765037537, "lr": 1.9865915275118815e-06, "epoch": 0.1994466220890016, "percentage": 9.97, "elapsed_time": "0:35:10", "remaining_time": "5:17:33"} +{"current_steps": 866, "total_steps": 8674, "loss": 0.5835011601448059, "lr": 1.986529230564591e-06, "epoch": 0.19967719621858426, "percentage": 9.98, "elapsed_time": "0:35:12", "remaining_time": "5:17:30"} +{"current_steps": 867, "total_steps": 8674, "loss": 0.5505619049072266, "lr": 1.9864667902152785e-06, "epoch": 0.19990777034816692, "percentage": 10.0, "elapsed_time": "0:35:15", "remaining_time": "5:17:27"} +{"current_steps": 868, "total_steps": 8674, "loss": 0.6170759797096252, "lr": 1.986404206473021e-06, "epoch": 0.20013834447774959, "percentage": 10.01, "elapsed_time": "0:35:17", "remaining_time": "5:17:24"} +{"current_steps": 869, "total_steps": 8674, "loss": 0.6302823424339294, "lr": 1.9863414793469144e-06, "epoch": 0.20036891860733225, "percentage": 10.02, "elapsed_time": "0:35:20", "remaining_time": "5:17:21"} +{"current_steps": 870, "total_steps": 8674, "loss": 0.6265357732772827, "lr": 1.9862786088460778e-06, "epoch": 0.2005994927369149, "percentage": 10.03, "elapsed_time": "0:35:22", "remaining_time": "5:17:20"} +{"current_steps": 871, "total_steps": 8674, "loss": 0.5346760749816895, "lr": 1.9862155949796497e-06, "epoch": 0.20083006686649757, "percentage": 10.04, "elapsed_time": "0:35:25", "remaining_time": "5:17:17"} +{"current_steps": 872, "total_steps": 8674, "loss": 0.5480276346206665, "lr": 1.98615243775679e-06, "epoch": 0.20106064099608023, "percentage": 10.05, "elapsed_time": "0:35:27", "remaining_time": "5:17:14"} +{"current_steps": 873, "total_steps": 8674, "loss": 0.615007758140564, "lr": 1.986089137186679e-06, "epoch": 0.2012912151256629, "percentage": 10.06, "elapsed_time": "0:35:29", "remaining_time": "5:17:11"} +{"current_steps": 874, "total_steps": 8674, "loss": 0.598671555519104, "lr": 1.986025693278518e-06, "epoch": 0.20152178925524555, "percentage": 10.08, "elapsed_time": "0:35:32", "remaining_time": "5:17:10"} +{"current_steps": 875, "total_steps": 8674, "loss": 0.6029553413391113, "lr": 1.98596210604153e-06, "epoch": 0.20175236338482821, "percentage": 10.09, "elapsed_time": "0:35:34", "remaining_time": "5:17:07"} +{"current_steps": 876, "total_steps": 8674, "loss": 0.6854428052902222, "lr": 1.985898375484957e-06, "epoch": 0.20198293751441088, "percentage": 10.1, "elapsed_time": "0:35:37", "remaining_time": "5:17:03"} +{"current_steps": 877, "total_steps": 8674, "loss": 0.5032496452331543, "lr": 1.9858345016180636e-06, "epoch": 0.20221351164399354, "percentage": 10.11, "elapsed_time": "0:35:39", "remaining_time": "5:17:00"} +{"current_steps": 878, "total_steps": 8674, "loss": 0.5521007776260376, "lr": 1.9857704844501343e-06, "epoch": 0.2024440857735762, "percentage": 10.12, "elapsed_time": "0:35:42", "remaining_time": "5:16:59"} +{"current_steps": 879, "total_steps": 8674, "loss": 0.6473567485809326, "lr": 1.9857063239904742e-06, "epoch": 0.20267465990315886, "percentage": 10.13, "elapsed_time": "0:35:44", "remaining_time": "5:16:56"} +{"current_steps": 880, "total_steps": 8674, "loss": 0.528810977935791, "lr": 1.9856420202484103e-06, "epoch": 0.20290523403274152, "percentage": 10.15, "elapsed_time": "0:35:46", "remaining_time": "5:16:53"} +{"current_steps": 881, "total_steps": 8674, "loss": 0.681857705116272, "lr": 1.9855775732332898e-06, "epoch": 0.20313580816232418, "percentage": 10.16, "elapsed_time": "0:35:49", "remaining_time": "5:16:50"} +{"current_steps": 882, "total_steps": 8674, "loss": 0.6510526537895203, "lr": 1.9855129829544805e-06, "epoch": 0.20336638229190684, "percentage": 10.17, "elapsed_time": "0:35:51", "remaining_time": "5:16:47"} +{"current_steps": 883, "total_steps": 8674, "loss": 0.5690885782241821, "lr": 1.985448249421371e-06, "epoch": 0.2035969564214895, "percentage": 10.18, "elapsed_time": "0:35:53", "remaining_time": "5:16:44"} +{"current_steps": 884, "total_steps": 8674, "loss": 0.6451331973075867, "lr": 1.985383372643371e-06, "epoch": 0.20382753055107217, "percentage": 10.19, "elapsed_time": "0:35:56", "remaining_time": "5:16:40"} +{"current_steps": 885, "total_steps": 8674, "loss": 0.493961900472641, "lr": 1.9853183526299117e-06, "epoch": 0.20405810468065483, "percentage": 10.2, "elapsed_time": "0:35:58", "remaining_time": "5:16:37"} +{"current_steps": 886, "total_steps": 8674, "loss": 0.5390207767486572, "lr": 1.9852531893904434e-06, "epoch": 0.2042886788102375, "percentage": 10.21, "elapsed_time": "0:36:00", "remaining_time": "5:16:34"} +{"current_steps": 887, "total_steps": 8674, "loss": 0.5976558923721313, "lr": 1.9851878829344395e-06, "epoch": 0.20451925293982015, "percentage": 10.23, "elapsed_time": "0:36:03", "remaining_time": "5:16:33"} +{"current_steps": 888, "total_steps": 8674, "loss": 0.539776623249054, "lr": 1.9851224332713917e-06, "epoch": 0.2047498270694028, "percentage": 10.24, "elapsed_time": "0:36:05", "remaining_time": "5:16:30"} +{"current_steps": 889, "total_steps": 8674, "loss": 0.6791383624076843, "lr": 1.9850568404108144e-06, "epoch": 0.20498040119898547, "percentage": 10.25, "elapsed_time": "0:36:08", "remaining_time": "5:16:27"} +{"current_steps": 890, "total_steps": 8674, "loss": 0.6195741891860962, "lr": 1.984991104362242e-06, "epoch": 0.20521097532856813, "percentage": 10.26, "elapsed_time": "0:36:10", "remaining_time": "5:16:24"} +{"current_steps": 891, "total_steps": 8674, "loss": 0.5792666673660278, "lr": 1.9849252251352303e-06, "epoch": 0.2054415494581508, "percentage": 10.27, "elapsed_time": "0:36:13", "remaining_time": "5:16:21"} +{"current_steps": 892, "total_steps": 8674, "loss": 0.5633316040039062, "lr": 1.984859202739355e-06, "epoch": 0.20567212358773346, "percentage": 10.28, "elapsed_time": "0:36:15", "remaining_time": "5:16:19"} +{"current_steps": 893, "total_steps": 8674, "loss": 0.6152814626693726, "lr": 1.9847930371842137e-06, "epoch": 0.20590269771731612, "percentage": 10.3, "elapsed_time": "0:36:17", "remaining_time": "5:16:16"} +{"current_steps": 894, "total_steps": 8674, "loss": 0.5584526658058167, "lr": 1.9847267284794234e-06, "epoch": 0.20613327184689878, "percentage": 10.31, "elapsed_time": "0:36:20", "remaining_time": "5:16:13"} +{"current_steps": 895, "total_steps": 8674, "loss": 0.5526787042617798, "lr": 1.9846602766346235e-06, "epoch": 0.20636384597648144, "percentage": 10.32, "elapsed_time": "0:36:22", "remaining_time": "5:16:12"} +{"current_steps": 896, "total_steps": 8674, "loss": 0.6851564049720764, "lr": 1.984593681659473e-06, "epoch": 0.2065944201060641, "percentage": 10.33, "elapsed_time": "0:36:25", "remaining_time": "5:16:08"} +{"current_steps": 897, "total_steps": 8674, "loss": 0.6012386083602905, "lr": 1.9845269435636524e-06, "epoch": 0.20682499423564676, "percentage": 10.34, "elapsed_time": "0:36:27", "remaining_time": "5:16:05"} +{"current_steps": 898, "total_steps": 8674, "loss": 0.5515716075897217, "lr": 1.9844600623568626e-06, "epoch": 0.20705556836522943, "percentage": 10.35, "elapsed_time": "0:36:29", "remaining_time": "5:16:02"} +{"current_steps": 899, "total_steps": 8674, "loss": 0.6534323692321777, "lr": 1.9843930380488255e-06, "epoch": 0.2072861424948121, "percentage": 10.36, "elapsed_time": "0:36:32", "remaining_time": "5:15:59"} +{"current_steps": 900, "total_steps": 8674, "loss": 0.726966381072998, "lr": 1.9843258706492836e-06, "epoch": 0.20751671662439475, "percentage": 10.38, "elapsed_time": "0:36:34", "remaining_time": "5:15:56"} +{"current_steps": 901, "total_steps": 8674, "loss": 0.6692399978637695, "lr": 1.984258560168001e-06, "epoch": 0.2077472907539774, "percentage": 10.39, "elapsed_time": "0:36:38", "remaining_time": "5:16:04"} +{"current_steps": 902, "total_steps": 8674, "loss": 0.5815941095352173, "lr": 1.9841911066147614e-06, "epoch": 0.20797786488356007, "percentage": 10.4, "elapsed_time": "0:36:40", "remaining_time": "5:16:01"} +{"current_steps": 903, "total_steps": 8674, "loss": 0.4850257933139801, "lr": 1.98412350999937e-06, "epoch": 0.20820843901314273, "percentage": 10.41, "elapsed_time": "0:36:43", "remaining_time": "5:16:00"} +{"current_steps": 904, "total_steps": 8674, "loss": 0.7309345006942749, "lr": 1.9840557703316524e-06, "epoch": 0.2084390131427254, "percentage": 10.42, "elapsed_time": "0:36:45", "remaining_time": "5:15:57"} +{"current_steps": 905, "total_steps": 8674, "loss": 0.6246342658996582, "lr": 1.9839878876214556e-06, "epoch": 0.20866958727230805, "percentage": 10.43, "elapsed_time": "0:36:48", "remaining_time": "5:15:54"} +{"current_steps": 906, "total_steps": 8674, "loss": 0.503870964050293, "lr": 1.983919861878647e-06, "epoch": 0.20890016140189072, "percentage": 10.45, "elapsed_time": "0:36:50", "remaining_time": "5:15:51"} +{"current_steps": 907, "total_steps": 8674, "loss": 0.5316766500473022, "lr": 1.9838516931131147e-06, "epoch": 0.20913073553147338, "percentage": 10.46, "elapsed_time": "0:36:52", "remaining_time": "5:15:48"} +{"current_steps": 908, "total_steps": 8674, "loss": 0.5707069039344788, "lr": 1.983783381334768e-06, "epoch": 0.20936130966105604, "percentage": 10.47, "elapsed_time": "0:36:55", "remaining_time": "5:15:45"} +{"current_steps": 909, "total_steps": 8674, "loss": 0.5482156276702881, "lr": 1.983714926553536e-06, "epoch": 0.2095918837906387, "percentage": 10.48, "elapsed_time": "0:36:57", "remaining_time": "5:15:42"} +{"current_steps": 910, "total_steps": 8674, "loss": 0.45747748017311096, "lr": 1.98364632877937e-06, "epoch": 0.20982245792022136, "percentage": 10.49, "elapsed_time": "0:36:59", "remaining_time": "5:15:38"} +{"current_steps": 911, "total_steps": 8674, "loss": 0.5599262118339539, "lr": 1.9835775880222414e-06, "epoch": 0.21005303204980402, "percentage": 10.5, "elapsed_time": "0:37:02", "remaining_time": "5:15:37"} +{"current_steps": 912, "total_steps": 8674, "loss": 0.5115377902984619, "lr": 1.9835087042921416e-06, "epoch": 0.21028360617938668, "percentage": 10.51, "elapsed_time": "0:37:04", "remaining_time": "5:15:34"} +{"current_steps": 913, "total_steps": 8674, "loss": 0.6577836275100708, "lr": 1.9834396775990846e-06, "epoch": 0.21051418030896935, "percentage": 10.53, "elapsed_time": "0:37:07", "remaining_time": "5:15:31"} +{"current_steps": 914, "total_steps": 8674, "loss": 0.4979211091995239, "lr": 1.9833705079531033e-06, "epoch": 0.210744754438552, "percentage": 10.54, "elapsed_time": "0:37:09", "remaining_time": "5:15:27"} +{"current_steps": 915, "total_steps": 8674, "loss": 0.5052670240402222, "lr": 1.983301195364252e-06, "epoch": 0.21097532856813467, "percentage": 10.55, "elapsed_time": "0:37:11", "remaining_time": "5:15:24"} +{"current_steps": 916, "total_steps": 8674, "loss": 0.5480808019638062, "lr": 1.9832317398426076e-06, "epoch": 0.21120590269771733, "percentage": 10.56, "elapsed_time": "0:37:14", "remaining_time": "5:15:21"} +{"current_steps": 917, "total_steps": 8674, "loss": 0.5328841209411621, "lr": 1.983162141398264e-06, "epoch": 0.2114364768273, "percentage": 10.57, "elapsed_time": "0:37:16", "remaining_time": "5:15:18"} +{"current_steps": 918, "total_steps": 8674, "loss": 0.5572643280029297, "lr": 1.98309240004134e-06, "epoch": 0.21166705095688262, "percentage": 10.58, "elapsed_time": "0:37:18", "remaining_time": "5:15:14"} +{"current_steps": 919, "total_steps": 8674, "loss": 0.5180699825286865, "lr": 1.983022515781972e-06, "epoch": 0.21189762508646529, "percentage": 10.59, "elapsed_time": "0:37:21", "remaining_time": "5:15:11"} +{"current_steps": 920, "total_steps": 8674, "loss": 0.5031566619873047, "lr": 1.9829524886303182e-06, "epoch": 0.21212819921604795, "percentage": 10.61, "elapsed_time": "0:37:23", "remaining_time": "5:15:10"} +{"current_steps": 921, "total_steps": 8674, "loss": 0.6579925417900085, "lr": 1.9828823185965587e-06, "epoch": 0.2123587733456306, "percentage": 10.62, "elapsed_time": "0:37:26", "remaining_time": "5:15:07"} +{"current_steps": 922, "total_steps": 8674, "loss": 0.6107230186462402, "lr": 1.982812005690893e-06, "epoch": 0.21258934747521327, "percentage": 10.63, "elapsed_time": "0:37:28", "remaining_time": "5:15:03"} +{"current_steps": 923, "total_steps": 8674, "loss": 0.5244725942611694, "lr": 1.982741549923542e-06, "epoch": 0.21281992160479593, "percentage": 10.64, "elapsed_time": "0:37:30", "remaining_time": "5:15:00"} +{"current_steps": 924, "total_steps": 8674, "loss": 0.5857048630714417, "lr": 1.9826709513047466e-06, "epoch": 0.2130504957343786, "percentage": 10.65, "elapsed_time": "0:37:33", "remaining_time": "5:14:57"} +{"current_steps": 925, "total_steps": 8674, "loss": 0.6417914628982544, "lr": 1.9826002098447694e-06, "epoch": 0.21328106986396125, "percentage": 10.66, "elapsed_time": "0:37:35", "remaining_time": "5:14:54"} +{"current_steps": 926, "total_steps": 8674, "loss": 0.6062248945236206, "lr": 1.982529325553893e-06, "epoch": 0.21351164399354391, "percentage": 10.68, "elapsed_time": "0:37:37", "remaining_time": "5:14:51"} +{"current_steps": 927, "total_steps": 8674, "loss": 0.4870455265045166, "lr": 1.982458298442422e-06, "epoch": 0.21374221812312658, "percentage": 10.69, "elapsed_time": "0:37:40", "remaining_time": "5:14:49"} +{"current_steps": 928, "total_steps": 8674, "loss": 0.6552037000656128, "lr": 1.9823871285206802e-06, "epoch": 0.21397279225270924, "percentage": 10.7, "elapsed_time": "0:37:42", "remaining_time": "5:14:48"} +{"current_steps": 929, "total_steps": 8674, "loss": 0.531679093837738, "lr": 1.9823158157990133e-06, "epoch": 0.2142033663822919, "percentage": 10.71, "elapsed_time": "0:37:45", "remaining_time": "5:14:45"} +{"current_steps": 930, "total_steps": 8674, "loss": 0.516847550868988, "lr": 1.982244360287787e-06, "epoch": 0.21443394051187456, "percentage": 10.72, "elapsed_time": "0:37:47", "remaining_time": "5:14:42"} +{"current_steps": 931, "total_steps": 8674, "loss": 0.47147709131240845, "lr": 1.982172761997388e-06, "epoch": 0.21466451464145722, "percentage": 10.73, "elapsed_time": "0:37:49", "remaining_time": "5:14:39"} +{"current_steps": 932, "total_steps": 8674, "loss": 0.627938985824585, "lr": 1.982101020938224e-06, "epoch": 0.21489508877103988, "percentage": 10.74, "elapsed_time": "0:37:52", "remaining_time": "5:14:36"} +{"current_steps": 933, "total_steps": 8674, "loss": 0.639348030090332, "lr": 1.9820291371207233e-06, "epoch": 0.21512566290062254, "percentage": 10.76, "elapsed_time": "0:37:54", "remaining_time": "5:14:33"} +{"current_steps": 934, "total_steps": 8674, "loss": 0.6480363607406616, "lr": 1.9819571105553354e-06, "epoch": 0.2153562370302052, "percentage": 10.77, "elapsed_time": "0:37:57", "remaining_time": "5:14:30"} +{"current_steps": 935, "total_steps": 8674, "loss": 0.5776711702346802, "lr": 1.9818849412525293e-06, "epoch": 0.21558681115978787, "percentage": 10.78, "elapsed_time": "0:37:59", "remaining_time": "5:14:28"} +{"current_steps": 936, "total_steps": 8674, "loss": 0.5891472101211548, "lr": 1.9818126292227957e-06, "epoch": 0.21581738528937053, "percentage": 10.79, "elapsed_time": "0:38:02", "remaining_time": "5:14:25"} +{"current_steps": 937, "total_steps": 8674, "loss": 0.5977755784988403, "lr": 1.9817401744766465e-06, "epoch": 0.2160479594189532, "percentage": 10.8, "elapsed_time": "0:38:04", "remaining_time": "5:14:24"} +{"current_steps": 938, "total_steps": 8674, "loss": 0.5263733863830566, "lr": 1.981667577024613e-06, "epoch": 0.21627853354853585, "percentage": 10.81, "elapsed_time": "0:38:07", "remaining_time": "5:14:23"} +{"current_steps": 939, "total_steps": 8674, "loss": 0.5440605878829956, "lr": 1.9815948368772484e-06, "epoch": 0.2165091076781185, "percentage": 10.83, "elapsed_time": "0:38:09", "remaining_time": "5:14:19"} +{"current_steps": 940, "total_steps": 8674, "loss": 0.5140440464019775, "lr": 1.9815219540451263e-06, "epoch": 0.21673968180770117, "percentage": 10.84, "elapsed_time": "0:38:11", "remaining_time": "5:14:16"} +{"current_steps": 941, "total_steps": 8674, "loss": 0.6741353273391724, "lr": 1.9814489285388402e-06, "epoch": 0.21697025593728383, "percentage": 10.85, "elapsed_time": "0:38:14", "remaining_time": "5:14:14"} +{"current_steps": 942, "total_steps": 8674, "loss": 0.6243258714675903, "lr": 1.981375760369006e-06, "epoch": 0.2172008300668665, "percentage": 10.86, "elapsed_time": "0:38:16", "remaining_time": "5:14:11"} +{"current_steps": 943, "total_steps": 8674, "loss": 0.6363699436187744, "lr": 1.981302449546259e-06, "epoch": 0.21743140419644916, "percentage": 10.87, "elapsed_time": "0:38:19", "remaining_time": "5:14:08"} +{"current_steps": 944, "total_steps": 8674, "loss": 0.5849490165710449, "lr": 1.981228996081256e-06, "epoch": 0.21766197832603182, "percentage": 10.88, "elapsed_time": "0:38:21", "remaining_time": "5:14:05"} +{"current_steps": 945, "total_steps": 8674, "loss": 0.43679118156433105, "lr": 1.9811553999846736e-06, "epoch": 0.21789255245561448, "percentage": 10.89, "elapsed_time": "0:38:24", "remaining_time": "5:14:04"} +{"current_steps": 946, "total_steps": 8674, "loss": 0.5575870275497437, "lr": 1.9810816612672104e-06, "epoch": 0.21812312658519714, "percentage": 10.91, "elapsed_time": "0:38:26", "remaining_time": "5:14:01"} +{"current_steps": 947, "total_steps": 8674, "loss": 0.5288122296333313, "lr": 1.9810077799395846e-06, "epoch": 0.2183537007147798, "percentage": 10.92, "elapsed_time": "0:38:28", "remaining_time": "5:13:58"} +{"current_steps": 948, "total_steps": 8674, "loss": 0.5618559718132019, "lr": 1.9809337560125357e-06, "epoch": 0.21858427484436246, "percentage": 10.93, "elapsed_time": "0:38:31", "remaining_time": "5:13:55"} +{"current_steps": 949, "total_steps": 8674, "loss": 0.6346654891967773, "lr": 1.980859589496824e-06, "epoch": 0.21881484897394513, "percentage": 10.94, "elapsed_time": "0:38:33", "remaining_time": "5:13:52"} +{"current_steps": 950, "total_steps": 8674, "loss": 0.5456810593605042, "lr": 1.98078528040323e-06, "epoch": 0.2190454231035278, "percentage": 10.95, "elapsed_time": "0:38:35", "remaining_time": "5:13:49"} +{"current_steps": 951, "total_steps": 8674, "loss": 0.6463650465011597, "lr": 1.980710828742556e-06, "epoch": 0.21927599723311045, "percentage": 10.96, "elapsed_time": "0:38:38", "remaining_time": "5:13:46"} +{"current_steps": 952, "total_steps": 8674, "loss": 0.5013638734817505, "lr": 1.980636234525624e-06, "epoch": 0.2195065713626931, "percentage": 10.98, "elapsed_time": "0:38:40", "remaining_time": "5:13:43"} +{"current_steps": 953, "total_steps": 8674, "loss": 0.6522110104560852, "lr": 1.9805614977632763e-06, "epoch": 0.21973714549227577, "percentage": 10.99, "elapsed_time": "0:38:43", "remaining_time": "5:13:42"} +{"current_steps": 954, "total_steps": 8674, "loss": 0.5864803791046143, "lr": 1.9804866184663775e-06, "epoch": 0.21996771962185843, "percentage": 11.0, "elapsed_time": "0:38:45", "remaining_time": "5:13:39"} +{"current_steps": 955, "total_steps": 8674, "loss": 0.5261500477790833, "lr": 1.9804115966458116e-06, "epoch": 0.2201982937514411, "percentage": 11.01, "elapsed_time": "0:38:47", "remaining_time": "5:13:36"} +{"current_steps": 956, "total_steps": 8674, "loss": 0.585462212562561, "lr": 1.980336432312484e-06, "epoch": 0.22042886788102375, "percentage": 11.02, "elapsed_time": "0:38:50", "remaining_time": "5:13:32"} +{"current_steps": 957, "total_steps": 8674, "loss": 0.5889539122581482, "lr": 1.9802611254773207e-06, "epoch": 0.22065944201060642, "percentage": 11.03, "elapsed_time": "0:38:52", "remaining_time": "5:13:29"} +{"current_steps": 958, "total_steps": 8674, "loss": 0.665162205696106, "lr": 1.980185676151268e-06, "epoch": 0.22089001614018908, "percentage": 11.04, "elapsed_time": "0:38:54", "remaining_time": "5:13:26"} +{"current_steps": 959, "total_steps": 8674, "loss": 0.5344980359077454, "lr": 1.9801100843452935e-06, "epoch": 0.22112059026977174, "percentage": 11.06, "elapsed_time": "0:38:57", "remaining_time": "5:13:23"} +{"current_steps": 960, "total_steps": 8674, "loss": 0.6301499009132385, "lr": 1.980034350070385e-06, "epoch": 0.2213511643993544, "percentage": 11.07, "elapsed_time": "0:38:59", "remaining_time": "5:13:20"} +{"current_steps": 961, "total_steps": 8674, "loss": 0.5114584565162659, "lr": 1.9799584733375512e-06, "epoch": 0.22158173852893706, "percentage": 11.08, "elapsed_time": "0:39:02", "remaining_time": "5:13:18"} +{"current_steps": 962, "total_steps": 8674, "loss": 0.5199861526489258, "lr": 1.979882454157822e-06, "epoch": 0.22181231265851972, "percentage": 11.09, "elapsed_time": "0:39:04", "remaining_time": "5:13:15"} +{"current_steps": 963, "total_steps": 8674, "loss": 0.5336212515830994, "lr": 1.9798062925422472e-06, "epoch": 0.22204288678810238, "percentage": 11.1, "elapsed_time": "0:39:06", "remaining_time": "5:13:12"} +{"current_steps": 964, "total_steps": 8674, "loss": 0.535847544670105, "lr": 1.9797299885018977e-06, "epoch": 0.22227346091768505, "percentage": 11.11, "elapsed_time": "0:39:09", "remaining_time": "5:13:09"} +{"current_steps": 965, "total_steps": 8674, "loss": 0.6234130859375, "lr": 1.979653542047865e-06, "epoch": 0.2225040350472677, "percentage": 11.13, "elapsed_time": "0:39:11", "remaining_time": "5:13:05"} +{"current_steps": 966, "total_steps": 8674, "loss": 0.5017205476760864, "lr": 1.979576953191262e-06, "epoch": 0.22273460917685037, "percentage": 11.14, "elapsed_time": "0:39:13", "remaining_time": "5:13:03"} +{"current_steps": 967, "total_steps": 8674, "loss": 0.4982973337173462, "lr": 1.9795002219432204e-06, "epoch": 0.22296518330643303, "percentage": 11.15, "elapsed_time": "0:39:16", "remaining_time": "5:13:00"} +{"current_steps": 968, "total_steps": 8674, "loss": 0.47946417331695557, "lr": 1.979423348314895e-06, "epoch": 0.2231957574360157, "percentage": 11.16, "elapsed_time": "0:39:18", "remaining_time": "5:12:57"} +{"current_steps": 969, "total_steps": 8674, "loss": 0.5431856513023376, "lr": 1.97934633231746e-06, "epoch": 0.22342633156559835, "percentage": 11.17, "elapsed_time": "0:39:21", "remaining_time": "5:12:53"} +{"current_steps": 970, "total_steps": 8674, "loss": 0.5355685949325562, "lr": 1.9792691739621097e-06, "epoch": 0.223656905695181, "percentage": 11.18, "elapsed_time": "0:39:23", "remaining_time": "5:12:52"} +{"current_steps": 971, "total_steps": 8674, "loss": 0.6103906631469727, "lr": 1.979191873260061e-06, "epoch": 0.22388747982476367, "percentage": 11.19, "elapsed_time": "0:39:26", "remaining_time": "5:12:49"} +{"current_steps": 972, "total_steps": 8674, "loss": 0.538421094417572, "lr": 1.9791144302225493e-06, "epoch": 0.22411805395434634, "percentage": 11.21, "elapsed_time": "0:39:28", "remaining_time": "5:12:46"} +{"current_steps": 973, "total_steps": 8674, "loss": 0.6068445444107056, "lr": 1.9790368448608322e-06, "epoch": 0.224348628083929, "percentage": 11.22, "elapsed_time": "0:39:30", "remaining_time": "5:12:43"} +{"current_steps": 974, "total_steps": 8674, "loss": 0.463737815618515, "lr": 1.9789591171861874e-06, "epoch": 0.22457920221351163, "percentage": 11.23, "elapsed_time": "0:39:33", "remaining_time": "5:12:40"} +{"current_steps": 975, "total_steps": 8674, "loss": 0.6588588953018188, "lr": 1.9788812472099135e-06, "epoch": 0.2248097763430943, "percentage": 11.24, "elapsed_time": "0:39:35", "remaining_time": "5:12:37"} +{"current_steps": 976, "total_steps": 8674, "loss": 0.678712010383606, "lr": 1.9788032349433297e-06, "epoch": 0.22504035047267695, "percentage": 11.25, "elapsed_time": "0:39:37", "remaining_time": "5:12:34"} +{"current_steps": 977, "total_steps": 8674, "loss": 0.6397948265075684, "lr": 1.9787250803977757e-06, "epoch": 0.22527092460225961, "percentage": 11.26, "elapsed_time": "0:39:40", "remaining_time": "5:12:31"} +{"current_steps": 978, "total_steps": 8674, "loss": 0.5422782897949219, "lr": 1.978646783584612e-06, "epoch": 0.22550149873184228, "percentage": 11.28, "elapsed_time": "0:39:42", "remaining_time": "5:12:30"} +{"current_steps": 979, "total_steps": 8674, "loss": 0.5314444303512573, "lr": 1.9785683445152204e-06, "epoch": 0.22573207286142494, "percentage": 11.29, "elapsed_time": "0:39:45", "remaining_time": "5:12:27"} +{"current_steps": 980, "total_steps": 8674, "loss": 0.6260710954666138, "lr": 1.9784897632010026e-06, "epoch": 0.2259626469910076, "percentage": 11.3, "elapsed_time": "0:39:47", "remaining_time": "5:12:24"} +{"current_steps": 981, "total_steps": 8674, "loss": 0.6765384078025818, "lr": 1.9784110396533804e-06, "epoch": 0.22619322112059026, "percentage": 11.31, "elapsed_time": "0:39:49", "remaining_time": "5:12:21"} +{"current_steps": 982, "total_steps": 8674, "loss": 0.6716702580451965, "lr": 1.9783321738837983e-06, "epoch": 0.22642379525017292, "percentage": 11.32, "elapsed_time": "0:39:52", "remaining_time": "5:12:18"} +{"current_steps": 983, "total_steps": 8674, "loss": 0.5537375211715698, "lr": 1.978253165903719e-06, "epoch": 0.22665436937975558, "percentage": 11.33, "elapsed_time": "0:39:54", "remaining_time": "5:12:14"} +{"current_steps": 984, "total_steps": 8674, "loss": 0.525878369808197, "lr": 1.9781740157246285e-06, "epoch": 0.22688494350933824, "percentage": 11.34, "elapsed_time": "0:39:56", "remaining_time": "5:12:10"} +{"current_steps": 985, "total_steps": 8674, "loss": 0.6349027156829834, "lr": 1.978094723358031e-06, "epoch": 0.2271155176389209, "percentage": 11.36, "elapsed_time": "0:39:59", "remaining_time": "5:12:07"} +{"current_steps": 986, "total_steps": 8674, "loss": 0.5777440071105957, "lr": 1.9780152888154525e-06, "epoch": 0.22734609176850357, "percentage": 11.37, "elapsed_time": "0:40:01", "remaining_time": "5:12:04"} +{"current_steps": 987, "total_steps": 8674, "loss": 0.6181483268737793, "lr": 1.9779357121084402e-06, "epoch": 0.22757666589808623, "percentage": 11.38, "elapsed_time": "0:40:04", "remaining_time": "5:12:03"} +{"current_steps": 988, "total_steps": 8674, "loss": 0.6364198923110962, "lr": 1.9778559932485606e-06, "epoch": 0.2278072400276689, "percentage": 11.39, "elapsed_time": "0:40:06", "remaining_time": "5:12:00"} +{"current_steps": 989, "total_steps": 8674, "loss": 0.623460054397583, "lr": 1.9777761322474024e-06, "epoch": 0.22803781415725155, "percentage": 11.4, "elapsed_time": "0:40:08", "remaining_time": "5:11:58"} +{"current_steps": 990, "total_steps": 8674, "loss": 0.504749059677124, "lr": 1.977696129116574e-06, "epoch": 0.2282683882868342, "percentage": 11.41, "elapsed_time": "0:40:11", "remaining_time": "5:11:55"} +{"current_steps": 991, "total_steps": 8674, "loss": 0.5228890180587769, "lr": 1.9776159838677048e-06, "epoch": 0.22849896241641687, "percentage": 11.42, "elapsed_time": "0:40:13", "remaining_time": "5:11:52"} +{"current_steps": 992, "total_steps": 8674, "loss": 0.5765929222106934, "lr": 1.977535696512444e-06, "epoch": 0.22872953654599953, "percentage": 11.44, "elapsed_time": "0:40:16", "remaining_time": "5:11:49"} +{"current_steps": 993, "total_steps": 8674, "loss": 0.5165348052978516, "lr": 1.977455267062463e-06, "epoch": 0.2289601106755822, "percentage": 11.45, "elapsed_time": "0:40:18", "remaining_time": "5:11:46"} +{"current_steps": 994, "total_steps": 8674, "loss": 0.6056735515594482, "lr": 1.9773746955294525e-06, "epoch": 0.22919068480516486, "percentage": 11.46, "elapsed_time": "0:40:20", "remaining_time": "5:11:42"} +{"current_steps": 995, "total_steps": 8674, "loss": 0.5430403351783752, "lr": 1.9772939819251245e-06, "epoch": 0.22942125893474752, "percentage": 11.47, "elapsed_time": "0:40:23", "remaining_time": "5:11:41"} +{"current_steps": 996, "total_steps": 8674, "loss": 0.5710945129394531, "lr": 1.977213126261212e-06, "epoch": 0.22965183306433018, "percentage": 11.48, "elapsed_time": "0:40:25", "remaining_time": "5:11:38"} +{"current_steps": 997, "total_steps": 8674, "loss": 0.5189366936683655, "lr": 1.977132128549468e-06, "epoch": 0.22988240719391284, "percentage": 11.49, "elapsed_time": "0:40:28", "remaining_time": "5:11:35"} +{"current_steps": 998, "total_steps": 8674, "loss": 0.6578037738800049, "lr": 1.977050988801666e-06, "epoch": 0.2301129813234955, "percentage": 11.51, "elapsed_time": "0:40:30", "remaining_time": "5:11:32"} +{"current_steps": 999, "total_steps": 8674, "loss": 0.5787034034729004, "lr": 1.9769697070296006e-06, "epoch": 0.23034355545307816, "percentage": 11.52, "elapsed_time": "0:40:32", "remaining_time": "5:11:29"} +{"current_steps": 1000, "total_steps": 8674, "loss": 0.5169408321380615, "lr": 1.976888283245087e-06, "epoch": 0.23057412958266083, "percentage": 11.53, "elapsed_time": "0:40:35", "remaining_time": "5:11:26"} +{"current_steps": 1001, "total_steps": 8674, "loss": 0.6326704025268555, "lr": 1.976806717459961e-06, "epoch": 0.2308047037122435, "percentage": 11.54, "elapsed_time": "0:40:38", "remaining_time": "5:11:33"} +{"current_steps": 1002, "total_steps": 8674, "loss": 0.5188414454460144, "lr": 1.9767250096860785e-06, "epoch": 0.23103527784182615, "percentage": 11.55, "elapsed_time": "0:40:41", "remaining_time": "5:11:30"} +{"current_steps": 1003, "total_steps": 8674, "loss": 0.5788798928260803, "lr": 1.9766431599353173e-06, "epoch": 0.2312658519714088, "percentage": 11.56, "elapsed_time": "0:40:43", "remaining_time": "5:11:29"} +{"current_steps": 1004, "total_steps": 8674, "loss": 0.5513355731964111, "lr": 1.976561168219575e-06, "epoch": 0.23149642610099147, "percentage": 11.57, "elapsed_time": "0:40:46", "remaining_time": "5:11:26"} +{"current_steps": 1005, "total_steps": 8674, "loss": 0.5810542106628418, "lr": 1.97647903455077e-06, "epoch": 0.23172700023057413, "percentage": 11.59, "elapsed_time": "0:40:48", "remaining_time": "5:11:23"} +{"current_steps": 1006, "total_steps": 8674, "loss": 0.6541746854782104, "lr": 1.9763967589408407e-06, "epoch": 0.2319575743601568, "percentage": 11.6, "elapsed_time": "0:40:50", "remaining_time": "5:11:19"} +{"current_steps": 1007, "total_steps": 8674, "loss": 0.48837774991989136, "lr": 1.976314341401747e-06, "epoch": 0.23218814848973945, "percentage": 11.61, "elapsed_time": "0:40:53", "remaining_time": "5:11:16"} +{"current_steps": 1008, "total_steps": 8674, "loss": 0.514664888381958, "lr": 1.976231781945469e-06, "epoch": 0.23241872261932212, "percentage": 11.62, "elapsed_time": "0:40:55", "remaining_time": "5:11:13"} +{"current_steps": 1009, "total_steps": 8674, "loss": 0.48295027017593384, "lr": 1.976149080584008e-06, "epoch": 0.23264929674890478, "percentage": 11.63, "elapsed_time": "0:40:57", "remaining_time": "5:11:10"} +{"current_steps": 1010, "total_steps": 8674, "loss": 0.5975791811943054, "lr": 1.9760662373293847e-06, "epoch": 0.23287987087848744, "percentage": 11.64, "elapsed_time": "0:41:00", "remaining_time": "5:11:07"} +{"current_steps": 1011, "total_steps": 8674, "loss": 0.4810718297958374, "lr": 1.9759832521936424e-06, "epoch": 0.2331104450080701, "percentage": 11.66, "elapsed_time": "0:41:02", "remaining_time": "5:11:06"} +{"current_steps": 1012, "total_steps": 8674, "loss": 0.5984642505645752, "lr": 1.9759001251888425e-06, "epoch": 0.23334101913765276, "percentage": 11.67, "elapsed_time": "0:41:05", "remaining_time": "5:11:03"} +{"current_steps": 1013, "total_steps": 8674, "loss": 0.600128710269928, "lr": 1.975816856327069e-06, "epoch": 0.23357159326723542, "percentage": 11.68, "elapsed_time": "0:41:07", "remaining_time": "5:11:00"} +{"current_steps": 1014, "total_steps": 8674, "loss": 0.5036175847053528, "lr": 1.9757334456204263e-06, "epoch": 0.23380216739681808, "percentage": 11.69, "elapsed_time": "0:41:09", "remaining_time": "5:10:57"} +{"current_steps": 1015, "total_steps": 8674, "loss": 0.49270063638687134, "lr": 1.975649893081038e-06, "epoch": 0.23403274152640074, "percentage": 11.7, "elapsed_time": "0:41:12", "remaining_time": "5:10:54"} +{"current_steps": 1016, "total_steps": 8674, "loss": 0.5337218642234802, "lr": 1.97556619872105e-06, "epoch": 0.2342633156559834, "percentage": 11.71, "elapsed_time": "0:41:14", "remaining_time": "5:10:51"} +{"current_steps": 1017, "total_steps": 8674, "loss": 0.5263136625289917, "lr": 1.9754823625526277e-06, "epoch": 0.23449388978556607, "percentage": 11.72, "elapsed_time": "0:41:16", "remaining_time": "5:10:48"} +{"current_steps": 1018, "total_steps": 8674, "loss": 0.6271284818649292, "lr": 1.975398384587958e-06, "epoch": 0.23472446391514873, "percentage": 11.74, "elapsed_time": "0:41:19", "remaining_time": "5:10:45"} +{"current_steps": 1019, "total_steps": 8674, "loss": 0.7009197473526001, "lr": 1.975314264839248e-06, "epoch": 0.2349550380447314, "percentage": 11.75, "elapsed_time": "0:41:21", "remaining_time": "5:10:42"} +{"current_steps": 1020, "total_steps": 8674, "loss": 0.5781605839729309, "lr": 1.9752300033187248e-06, "epoch": 0.23518561217431405, "percentage": 11.76, "elapsed_time": "0:41:24", "remaining_time": "5:10:41"} +{"current_steps": 1021, "total_steps": 8674, "loss": 0.549934446811676, "lr": 1.9751456000386367e-06, "epoch": 0.2354161863038967, "percentage": 11.77, "elapsed_time": "0:41:26", "remaining_time": "5:10:38"} +{"current_steps": 1022, "total_steps": 8674, "loss": 0.5856816172599792, "lr": 1.9750610550112535e-06, "epoch": 0.23564676043347937, "percentage": 11.78, "elapsed_time": "0:41:28", "remaining_time": "5:10:35"} +{"current_steps": 1023, "total_steps": 8674, "loss": 0.6225322484970093, "lr": 1.9749763682488638e-06, "epoch": 0.23587733456306204, "percentage": 11.79, "elapsed_time": "0:41:31", "remaining_time": "5:10:32"} +{"current_steps": 1024, "total_steps": 8674, "loss": 0.5533155202865601, "lr": 1.9748915397637775e-06, "epoch": 0.2361079086926447, "percentage": 11.81, "elapsed_time": "0:41:33", "remaining_time": "5:10:29"} +{"current_steps": 1025, "total_steps": 8674, "loss": 0.4960908889770508, "lr": 1.974806569568326e-06, "epoch": 0.23633848282222736, "percentage": 11.82, "elapsed_time": "0:41:36", "remaining_time": "5:10:26"} +{"current_steps": 1026, "total_steps": 8674, "loss": 0.5960450768470764, "lr": 1.97472145767486e-06, "epoch": 0.23656905695181002, "percentage": 11.83, "elapsed_time": "0:41:38", "remaining_time": "5:10:23"} +{"current_steps": 1027, "total_steps": 8674, "loss": 0.5653714537620544, "lr": 1.9746362040957517e-06, "epoch": 0.23679963108139268, "percentage": 11.84, "elapsed_time": "0:41:40", "remaining_time": "5:10:21"} +{"current_steps": 1028, "total_steps": 8674, "loss": 0.6400578022003174, "lr": 1.9745508088433936e-06, "epoch": 0.23703020521097534, "percentage": 11.85, "elapsed_time": "0:41:43", "remaining_time": "5:10:20"} +{"current_steps": 1029, "total_steps": 8674, "loss": 0.5459057092666626, "lr": 1.9744652719301987e-06, "epoch": 0.23726077934055798, "percentage": 11.86, "elapsed_time": "0:41:45", "remaining_time": "5:10:17"} +{"current_steps": 1030, "total_steps": 8674, "loss": 0.46735280752182007, "lr": 1.9743795933686005e-06, "epoch": 0.23749135347014064, "percentage": 11.87, "elapsed_time": "0:41:48", "remaining_time": "5:10:14"} +{"current_steps": 1031, "total_steps": 8674, "loss": 0.526339590549469, "lr": 1.9742937731710533e-06, "epoch": 0.2377219275997233, "percentage": 11.89, "elapsed_time": "0:41:50", "remaining_time": "5:10:11"} +{"current_steps": 1032, "total_steps": 8674, "loss": 0.5976641178131104, "lr": 1.9742078113500323e-06, "epoch": 0.23795250172930596, "percentage": 11.9, "elapsed_time": "0:41:52", "remaining_time": "5:10:08"} +{"current_steps": 1033, "total_steps": 8674, "loss": 0.5331728458404541, "lr": 1.9741217079180325e-06, "epoch": 0.23818307585888862, "percentage": 11.91, "elapsed_time": "0:41:55", "remaining_time": "5:10:05"} +{"current_steps": 1034, "total_steps": 8674, "loss": 0.5743261575698853, "lr": 1.9740354628875696e-06, "epoch": 0.23841364998847128, "percentage": 11.92, "elapsed_time": "0:41:57", "remaining_time": "5:10:03"} +{"current_steps": 1035, "total_steps": 8674, "loss": 0.54700767993927, "lr": 1.973949076271181e-06, "epoch": 0.23864422411805394, "percentage": 11.93, "elapsed_time": "0:42:00", "remaining_time": "5:10:00"} +{"current_steps": 1036, "total_steps": 8674, "loss": 0.5483411550521851, "lr": 1.9738625480814235e-06, "epoch": 0.2388747982476366, "percentage": 11.94, "elapsed_time": "0:42:02", "remaining_time": "5:09:57"} +{"current_steps": 1037, "total_steps": 8674, "loss": 0.5677193403244019, "lr": 1.973775878330875e-06, "epoch": 0.23910537237721927, "percentage": 11.96, "elapsed_time": "0:42:05", "remaining_time": "5:09:56"} +{"current_steps": 1038, "total_steps": 8674, "loss": 0.5092767477035522, "lr": 1.973689067032133e-06, "epoch": 0.23933594650680193, "percentage": 11.97, "elapsed_time": "0:42:07", "remaining_time": "5:09:53"} +{"current_steps": 1039, "total_steps": 8674, "loss": 0.5618614554405212, "lr": 1.973602114197818e-06, "epoch": 0.2395665206363846, "percentage": 11.98, "elapsed_time": "0:42:09", "remaining_time": "5:09:50"} +{"current_steps": 1040, "total_steps": 8674, "loss": 0.5601966977119446, "lr": 1.9735150198405677e-06, "epoch": 0.23979709476596725, "percentage": 11.99, "elapsed_time": "0:42:12", "remaining_time": "5:09:48"} +{"current_steps": 1041, "total_steps": 8674, "loss": 0.5945397019386292, "lr": 1.973427783973043e-06, "epoch": 0.2400276688955499, "percentage": 12.0, "elapsed_time": "0:42:14", "remaining_time": "5:09:44"} +{"current_steps": 1042, "total_steps": 8674, "loss": 0.42448002099990845, "lr": 1.9733404066079253e-06, "epoch": 0.24025824302513257, "percentage": 12.01, "elapsed_time": "0:42:16", "remaining_time": "5:09:41"} +{"current_steps": 1043, "total_steps": 8674, "loss": 0.5237313508987427, "lr": 1.9732528877579146e-06, "epoch": 0.24048881715471523, "percentage": 12.02, "elapsed_time": "0:42:19", "remaining_time": "5:09:38"} +{"current_steps": 1044, "total_steps": 8674, "loss": 0.6006743907928467, "lr": 1.973165227435733e-06, "epoch": 0.2407193912842979, "percentage": 12.04, "elapsed_time": "0:42:21", "remaining_time": "5:09:35"} +{"current_steps": 1045, "total_steps": 8674, "loss": 0.547584056854248, "lr": 1.973077425654123e-06, "epoch": 0.24094996541388056, "percentage": 12.05, "elapsed_time": "0:42:24", "remaining_time": "5:09:34"} +{"current_steps": 1046, "total_steps": 8674, "loss": 0.5472346544265747, "lr": 1.972989482425847e-06, "epoch": 0.24118053954346322, "percentage": 12.06, "elapsed_time": "0:42:26", "remaining_time": "5:09:31"} +{"current_steps": 1047, "total_steps": 8674, "loss": 0.5962260365486145, "lr": 1.972901397763689e-06, "epoch": 0.24141111367304588, "percentage": 12.07, "elapsed_time": "0:42:29", "remaining_time": "5:09:28"} +{"current_steps": 1048, "total_steps": 8674, "loss": 0.561386227607727, "lr": 1.9728131716804525e-06, "epoch": 0.24164168780262854, "percentage": 12.08, "elapsed_time": "0:42:31", "remaining_time": "5:09:26"} +{"current_steps": 1049, "total_steps": 8674, "loss": 0.46618524193763733, "lr": 1.9727248041889624e-06, "epoch": 0.2418722619322112, "percentage": 12.09, "elapsed_time": "0:42:33", "remaining_time": "5:09:23"} +{"current_steps": 1050, "total_steps": 8674, "loss": 0.4684019088745117, "lr": 1.9726362953020643e-06, "epoch": 0.24210283606179386, "percentage": 12.11, "elapsed_time": "0:42:36", "remaining_time": "5:09:20"} +{"current_steps": 1051, "total_steps": 8674, "loss": 0.5670303106307983, "lr": 1.9725476450326227e-06, "epoch": 0.24233341019137652, "percentage": 12.12, "elapsed_time": "0:42:38", "remaining_time": "5:09:17"} +{"current_steps": 1052, "total_steps": 8674, "loss": 0.5451534986495972, "lr": 1.9724588533935246e-06, "epoch": 0.2425639843209592, "percentage": 12.13, "elapsed_time": "0:42:40", "remaining_time": "5:09:14"} +{"current_steps": 1053, "total_steps": 8674, "loss": 0.578605592250824, "lr": 1.9723699203976766e-06, "epoch": 0.24279455845054185, "percentage": 12.14, "elapsed_time": "0:42:43", "remaining_time": "5:09:13"} +{"current_steps": 1054, "total_steps": 8674, "loss": 0.5844857692718506, "lr": 1.972280846058006e-06, "epoch": 0.2430251325801245, "percentage": 12.15, "elapsed_time": "0:42:45", "remaining_time": "5:09:10"} +{"current_steps": 1055, "total_steps": 8674, "loss": 0.5152320861816406, "lr": 1.9721916303874603e-06, "epoch": 0.24325570670970717, "percentage": 12.16, "elapsed_time": "0:42:48", "remaining_time": "5:09:07"} +{"current_steps": 1056, "total_steps": 8674, "loss": 0.5108952522277832, "lr": 1.9721022733990087e-06, "epoch": 0.24348628083928983, "percentage": 12.17, "elapsed_time": "0:42:50", "remaining_time": "5:09:04"} +{"current_steps": 1057, "total_steps": 8674, "loss": 0.6345964670181274, "lr": 1.97201277510564e-06, "epoch": 0.2437168549688725, "percentage": 12.19, "elapsed_time": "0:42:52", "remaining_time": "5:09:00"} +{"current_steps": 1058, "total_steps": 8674, "loss": 0.6699639558792114, "lr": 1.9719231355203627e-06, "epoch": 0.24394742909845515, "percentage": 12.2, "elapsed_time": "0:42:55", "remaining_time": "5:08:58"} +{"current_steps": 1059, "total_steps": 8674, "loss": 0.5426750779151917, "lr": 1.971833354656208e-06, "epoch": 0.24417800322803782, "percentage": 12.21, "elapsed_time": "0:42:57", "remaining_time": "5:08:55"} +{"current_steps": 1060, "total_steps": 8674, "loss": 0.45797908306121826, "lr": 1.9717434325262253e-06, "epoch": 0.24440857735762048, "percentage": 12.22, "elapsed_time": "0:43:00", "remaining_time": "5:08:52"} +{"current_steps": 1061, "total_steps": 8674, "loss": 0.46754708886146545, "lr": 1.9716533691434872e-06, "epoch": 0.24463915148720314, "percentage": 12.23, "elapsed_time": "0:43:02", "remaining_time": "5:08:52"} +{"current_steps": 1062, "total_steps": 8674, "loss": 0.6593209505081177, "lr": 1.9715631645210838e-06, "epoch": 0.2448697256167858, "percentage": 12.24, "elapsed_time": "0:43:05", "remaining_time": "5:08:50"} +{"current_steps": 1063, "total_steps": 8674, "loss": 0.5634866952896118, "lr": 1.9714728186721287e-06, "epoch": 0.24510029974636846, "percentage": 12.26, "elapsed_time": "0:43:07", "remaining_time": "5:08:48"} +{"current_steps": 1064, "total_steps": 8674, "loss": 0.5066277980804443, "lr": 1.971382331609753e-06, "epoch": 0.24533087387595112, "percentage": 12.27, "elapsed_time": "0:43:10", "remaining_time": "5:08:44"} +{"current_steps": 1065, "total_steps": 8674, "loss": 0.5721756219863892, "lr": 1.9712917033471113e-06, "epoch": 0.24556144800553378, "percentage": 12.28, "elapsed_time": "0:43:12", "remaining_time": "5:08:41"} +{"current_steps": 1066, "total_steps": 8674, "loss": 0.5188664197921753, "lr": 1.9712009338973765e-06, "epoch": 0.24579202213511644, "percentage": 12.29, "elapsed_time": "0:43:14", "remaining_time": "5:08:38"} +{"current_steps": 1067, "total_steps": 8674, "loss": 0.4879762828350067, "lr": 1.9711100232737434e-06, "epoch": 0.2460225962646991, "percentage": 12.3, "elapsed_time": "0:43:17", "remaining_time": "5:08:35"} +{"current_steps": 1068, "total_steps": 8674, "loss": 0.5169111490249634, "lr": 1.971018971489426e-06, "epoch": 0.24625317039428177, "percentage": 12.31, "elapsed_time": "0:43:19", "remaining_time": "5:08:32"} +{"current_steps": 1069, "total_steps": 8674, "loss": 0.7341418862342834, "lr": 1.9709277785576605e-06, "epoch": 0.24648374452386443, "percentage": 12.32, "elapsed_time": "0:43:21", "remaining_time": "5:08:30"} +{"current_steps": 1070, "total_steps": 8674, "loss": 0.48676228523254395, "lr": 1.970836444491702e-06, "epoch": 0.2467143186534471, "percentage": 12.34, "elapsed_time": "0:43:24", "remaining_time": "5:08:29"} +{"current_steps": 1071, "total_steps": 8674, "loss": 0.5594040751457214, "lr": 1.9707449693048277e-06, "epoch": 0.24694489278302975, "percentage": 12.35, "elapsed_time": "0:43:27", "remaining_time": "5:08:27"} +{"current_steps": 1072, "total_steps": 8674, "loss": 0.575579047203064, "lr": 1.970653353010334e-06, "epoch": 0.2471754669126124, "percentage": 12.36, "elapsed_time": "0:43:29", "remaining_time": "5:08:24"} +{"current_steps": 1073, "total_steps": 8674, "loss": 0.5212938189506531, "lr": 1.9705615956215375e-06, "epoch": 0.24740604104219507, "percentage": 12.37, "elapsed_time": "0:43:31", "remaining_time": "5:08:21"} +{"current_steps": 1074, "total_steps": 8674, "loss": 0.49838072061538696, "lr": 1.970469697151777e-06, "epoch": 0.24763661517177774, "percentage": 12.38, "elapsed_time": "0:43:34", "remaining_time": "5:08:18"} +{"current_steps": 1075, "total_steps": 8674, "loss": 0.505547285079956, "lr": 1.9703776576144106e-06, "epoch": 0.2478671893013604, "percentage": 12.39, "elapsed_time": "0:43:36", "remaining_time": "5:08:15"} +{"current_steps": 1076, "total_steps": 8674, "loss": 0.5236082077026367, "lr": 1.970285477022817e-06, "epoch": 0.24809776343094306, "percentage": 12.4, "elapsed_time": "0:43:38", "remaining_time": "5:08:13"} +{"current_steps": 1077, "total_steps": 8674, "loss": 0.5417677760124207, "lr": 1.9701931553903963e-06, "epoch": 0.24832833756052572, "percentage": 12.42, "elapsed_time": "0:43:41", "remaining_time": "5:08:10"} +{"current_steps": 1078, "total_steps": 8674, "loss": 0.624547004699707, "lr": 1.9701006927305676e-06, "epoch": 0.24855891169010838, "percentage": 12.43, "elapsed_time": "0:43:43", "remaining_time": "5:08:08"} +{"current_steps": 1079, "total_steps": 8674, "loss": 0.7127759456634521, "lr": 1.9700080890567713e-06, "epoch": 0.24878948581969104, "percentage": 12.44, "elapsed_time": "0:43:46", "remaining_time": "5:08:06"} +{"current_steps": 1080, "total_steps": 8674, "loss": 0.44590264558792114, "lr": 1.9699153443824686e-06, "epoch": 0.2490200599492737, "percentage": 12.45, "elapsed_time": "0:43:48", "remaining_time": "5:08:03"} +{"current_steps": 1081, "total_steps": 8674, "loss": 0.6311746835708618, "lr": 1.9698224587211407e-06, "epoch": 0.24925063407885636, "percentage": 12.46, "elapsed_time": "0:43:51", "remaining_time": "5:08:00"} +{"current_steps": 1082, "total_steps": 8674, "loss": 0.4837970733642578, "lr": 1.9697294320862898e-06, "epoch": 0.24948120820843903, "percentage": 12.47, "elapsed_time": "0:43:53", "remaining_time": "5:07:57"} +{"current_steps": 1083, "total_steps": 8674, "loss": 0.5749634504318237, "lr": 1.969636264491438e-06, "epoch": 0.2497117823380217, "percentage": 12.49, "elapsed_time": "0:43:55", "remaining_time": "5:07:54"} +{"current_steps": 1084, "total_steps": 8674, "loss": 0.5002774000167847, "lr": 1.9695429559501283e-06, "epoch": 0.24994235646760435, "percentage": 12.5, "elapsed_time": "0:43:58", "remaining_time": "5:07:52"} +{"current_steps": 1085, "total_steps": 8674, "loss": 0.5407592058181763, "lr": 1.9694495064759236e-06, "epoch": 0.250172930597187, "percentage": 12.51, "elapsed_time": "0:44:00", "remaining_time": "5:07:49"} +{"current_steps": 1086, "total_steps": 8674, "loss": 0.5557315349578857, "lr": 1.969355916082408e-06, "epoch": 0.25040350472676964, "percentage": 12.52, "elapsed_time": "0:44:02", "remaining_time": "5:07:46"} +{"current_steps": 1087, "total_steps": 8674, "loss": 0.4710160493850708, "lr": 1.9692621847831865e-06, "epoch": 0.2506340788563523, "percentage": 12.53, "elapsed_time": "0:44:05", "remaining_time": "5:07:45"} +{"current_steps": 1088, "total_steps": 8674, "loss": 0.5935187339782715, "lr": 1.969168312591883e-06, "epoch": 0.25086465298593497, "percentage": 12.54, "elapsed_time": "0:44:07", "remaining_time": "5:07:42"} +{"current_steps": 1089, "total_steps": 8674, "loss": 0.5358916521072388, "lr": 1.969074299522143e-06, "epoch": 0.2510952271155176, "percentage": 12.55, "elapsed_time": "0:44:10", "remaining_time": "5:07:40"} +{"current_steps": 1090, "total_steps": 8674, "loss": 0.40736621618270874, "lr": 1.968980145587632e-06, "epoch": 0.2513258012451003, "percentage": 12.57, "elapsed_time": "0:44:12", "remaining_time": "5:07:37"} +{"current_steps": 1091, "total_steps": 8674, "loss": 0.4986698627471924, "lr": 1.968885850802037e-06, "epoch": 0.25155637537468295, "percentage": 12.58, "elapsed_time": "0:44:15", "remaining_time": "5:07:34"} +{"current_steps": 1092, "total_steps": 8674, "loss": 0.5547258853912354, "lr": 1.968791415179064e-06, "epoch": 0.2517869495042656, "percentage": 12.59, "elapsed_time": "0:44:17", "remaining_time": "5:07:31"} +{"current_steps": 1093, "total_steps": 8674, "loss": 0.5187167525291443, "lr": 1.96869683873244e-06, "epoch": 0.2520175236338483, "percentage": 12.6, "elapsed_time": "0:44:19", "remaining_time": "5:07:28"} +{"current_steps": 1094, "total_steps": 8674, "loss": 0.560575008392334, "lr": 1.9686021214759136e-06, "epoch": 0.25224809776343093, "percentage": 12.61, "elapsed_time": "0:44:22", "remaining_time": "5:07:25"} +{"current_steps": 1095, "total_steps": 8674, "loss": 0.6441233158111572, "lr": 1.968507263423252e-06, "epoch": 0.2524786718930136, "percentage": 12.62, "elapsed_time": "0:44:24", "remaining_time": "5:07:24"} +{"current_steps": 1096, "total_steps": 8674, "loss": 0.6693669557571411, "lr": 1.9684122645882446e-06, "epoch": 0.25270924602259626, "percentage": 12.64, "elapsed_time": "0:44:27", "remaining_time": "5:07:21"} +{"current_steps": 1097, "total_steps": 8674, "loss": 0.4713742434978485, "lr": 1.9683171249846992e-06, "epoch": 0.2529398201521789, "percentage": 12.65, "elapsed_time": "0:44:29", "remaining_time": "5:07:18"} +{"current_steps": 1098, "total_steps": 8674, "loss": 0.5393046140670776, "lr": 1.9682218446264466e-06, "epoch": 0.2531703942817616, "percentage": 12.66, "elapsed_time": "0:44:31", "remaining_time": "5:07:15"} +{"current_steps": 1099, "total_steps": 8674, "loss": 0.44416874647140503, "lr": 1.968126423527336e-06, "epoch": 0.25340096841134424, "percentage": 12.67, "elapsed_time": "0:44:34", "remaining_time": "5:07:13"} +{"current_steps": 1100, "total_steps": 8674, "loss": 0.486186683177948, "lr": 1.9680308617012383e-06, "epoch": 0.2536315425409269, "percentage": 12.68, "elapsed_time": "0:44:36", "remaining_time": "5:07:10"} +{"current_steps": 1101, "total_steps": 8674, "loss": 0.5523893237113953, "lr": 1.9679351591620446e-06, "epoch": 0.25386211667050956, "percentage": 12.69, "elapsed_time": "0:44:40", "remaining_time": "5:07:17"} +{"current_steps": 1102, "total_steps": 8674, "loss": 0.49889492988586426, "lr": 1.967839315923665e-06, "epoch": 0.2540926908000922, "percentage": 12.7, "elapsed_time": "0:44:42", "remaining_time": "5:07:14"} +{"current_steps": 1103, "total_steps": 8674, "loss": 0.6084630489349365, "lr": 1.9677433320000325e-06, "epoch": 0.2543232649296749, "percentage": 12.72, "elapsed_time": "0:44:45", "remaining_time": "5:07:13"} +{"current_steps": 1104, "total_steps": 8674, "loss": 0.5458555221557617, "lr": 1.967647207405099e-06, "epoch": 0.25455383905925755, "percentage": 12.73, "elapsed_time": "0:44:47", "remaining_time": "5:07:10"} +{"current_steps": 1105, "total_steps": 8674, "loss": 0.5453877449035645, "lr": 1.9675509421528367e-06, "epoch": 0.2547844131888402, "percentage": 12.74, "elapsed_time": "0:44:50", "remaining_time": "5:07:08"} +{"current_steps": 1106, "total_steps": 8674, "loss": 0.5226954221725464, "lr": 1.9674545362572393e-06, "epoch": 0.25501498731842287, "percentage": 12.75, "elapsed_time": "0:44:52", "remaining_time": "5:07:06"} +{"current_steps": 1107, "total_steps": 8674, "loss": 0.5736720561981201, "lr": 1.96735798973232e-06, "epoch": 0.25524556144800553, "percentage": 12.76, "elapsed_time": "0:44:55", "remaining_time": "5:07:03"} +{"current_steps": 1108, "total_steps": 8674, "loss": 0.5474177598953247, "lr": 1.9672613025921135e-06, "epoch": 0.2554761355775882, "percentage": 12.77, "elapsed_time": "0:44:57", "remaining_time": "5:07:00"} +{"current_steps": 1109, "total_steps": 8674, "loss": 0.5146498084068298, "lr": 1.967164474850673e-06, "epoch": 0.25570670970717085, "percentage": 12.79, "elapsed_time": "0:45:00", "remaining_time": "5:06:58"} +{"current_steps": 1110, "total_steps": 8674, "loss": 0.6319057941436768, "lr": 1.967067506522075e-06, "epoch": 0.2559372838367535, "percentage": 12.8, "elapsed_time": "0:45:02", "remaining_time": "5:06:55"} +{"current_steps": 1111, "total_steps": 8674, "loss": 0.44495588541030884, "lr": 1.9669703976204136e-06, "epoch": 0.2561678579663362, "percentage": 12.81, "elapsed_time": "0:45:05", "remaining_time": "5:06:55"} +{"current_steps": 1112, "total_steps": 8674, "loss": 0.5331558585166931, "lr": 1.9668731481598052e-06, "epoch": 0.25639843209591884, "percentage": 12.82, "elapsed_time": "0:45:07", "remaining_time": "5:06:52"} +{"current_steps": 1113, "total_steps": 8674, "loss": 0.5409468412399292, "lr": 1.9667757581543856e-06, "epoch": 0.2566290062255015, "percentage": 12.83, "elapsed_time": "0:45:09", "remaining_time": "5:06:49"} +{"current_steps": 1114, "total_steps": 8674, "loss": 0.5743308663368225, "lr": 1.9666782276183112e-06, "epoch": 0.25685958035508416, "percentage": 12.84, "elapsed_time": "0:45:12", "remaining_time": "5:06:46"} +{"current_steps": 1115, "total_steps": 8674, "loss": 0.5612793564796448, "lr": 1.96658055656576e-06, "epoch": 0.2570901544846668, "percentage": 12.85, "elapsed_time": "0:45:14", "remaining_time": "5:06:44"} +{"current_steps": 1116, "total_steps": 8674, "loss": 0.554356575012207, "lr": 1.9664827450109285e-06, "epoch": 0.2573207286142495, "percentage": 12.87, "elapsed_time": "0:45:17", "remaining_time": "5:06:41"} +{"current_steps": 1117, "total_steps": 8674, "loss": 0.5999840497970581, "lr": 1.9663847929680352e-06, "epoch": 0.25755130274383214, "percentage": 12.88, "elapsed_time": "0:45:19", "remaining_time": "5:06:39"} +{"current_steps": 1118, "total_steps": 8674, "loss": 0.5152497291564941, "lr": 1.9662867004513184e-06, "epoch": 0.2577818768734148, "percentage": 12.89, "elapsed_time": "0:45:21", "remaining_time": "5:06:36"} +{"current_steps": 1119, "total_steps": 8674, "loss": 0.6333990097045898, "lr": 1.966188467475036e-06, "epoch": 0.25801245100299747, "percentage": 12.9, "elapsed_time": "0:45:24", "remaining_time": "5:06:34"} +{"current_steps": 1120, "total_steps": 8674, "loss": 0.5826340913772583, "lr": 1.9660900940534685e-06, "epoch": 0.25824302513258013, "percentage": 12.91, "elapsed_time": "0:45:27", "remaining_time": "5:06:33"} +{"current_steps": 1121, "total_steps": 8674, "loss": 0.5968586206436157, "lr": 1.965991580200915e-06, "epoch": 0.2584735992621628, "percentage": 12.92, "elapsed_time": "0:45:29", "remaining_time": "5:06:30"} +{"current_steps": 1122, "total_steps": 8674, "loss": 0.6164212226867676, "lr": 1.9658929259316945e-06, "epoch": 0.25870417339174545, "percentage": 12.94, "elapsed_time": "0:45:31", "remaining_time": "5:06:27"} +{"current_steps": 1123, "total_steps": 8674, "loss": 0.6115970611572266, "lr": 1.9657941312601487e-06, "epoch": 0.2589347475213281, "percentage": 12.95, "elapsed_time": "0:45:34", "remaining_time": "5:06:24"} +{"current_steps": 1124, "total_steps": 8674, "loss": 0.5490012168884277, "lr": 1.9656951962006376e-06, "epoch": 0.2591653216509108, "percentage": 12.96, "elapsed_time": "0:45:36", "remaining_time": "5:06:22"} +{"current_steps": 1125, "total_steps": 8674, "loss": 0.6350439786911011, "lr": 1.9655961207675425e-06, "epoch": 0.25939589578049344, "percentage": 12.97, "elapsed_time": "0:45:38", "remaining_time": "5:06:19"} +{"current_steps": 1126, "total_steps": 8674, "loss": 0.5667803287506104, "lr": 1.965496904975266e-06, "epoch": 0.2596264699100761, "percentage": 12.98, "elapsed_time": "0:45:41", "remaining_time": "5:06:16"} +{"current_steps": 1127, "total_steps": 8674, "loss": 0.6443949937820435, "lr": 1.9653975488382287e-06, "epoch": 0.25985704403965876, "percentage": 12.99, "elapsed_time": "0:45:43", "remaining_time": "5:06:13"} +{"current_steps": 1128, "total_steps": 8674, "loss": 0.6085849404335022, "lr": 1.965298052370874e-06, "epoch": 0.2600876181692414, "percentage": 13.0, "elapsed_time": "0:45:46", "remaining_time": "5:06:13"} +{"current_steps": 1129, "total_steps": 8674, "loss": 0.6633332967758179, "lr": 1.9651984155876644e-06, "epoch": 0.2603181922988241, "percentage": 13.02, "elapsed_time": "0:45:48", "remaining_time": "5:06:10"} +{"current_steps": 1130, "total_steps": 8674, "loss": 0.5997219085693359, "lr": 1.965098638503083e-06, "epoch": 0.26054876642840674, "percentage": 13.03, "elapsed_time": "0:45:51", "remaining_time": "5:06:07"} +{"current_steps": 1131, "total_steps": 8674, "loss": 0.5425878167152405, "lr": 1.9649987211316333e-06, "epoch": 0.2607793405579894, "percentage": 13.04, "elapsed_time": "0:45:53", "remaining_time": "5:06:04"} +{"current_steps": 1132, "total_steps": 8674, "loss": 0.5894105434417725, "lr": 1.9648986634878397e-06, "epoch": 0.26100991468757206, "percentage": 13.05, "elapsed_time": "0:45:56", "remaining_time": "5:06:02"} +{"current_steps": 1133, "total_steps": 8674, "loss": 0.5967395901679993, "lr": 1.9647984655862464e-06, "epoch": 0.2612404888171547, "percentage": 13.06, "elapsed_time": "0:45:58", "remaining_time": "5:05:59"} +{"current_steps": 1134, "total_steps": 8674, "loss": 0.5129253268241882, "lr": 1.964698127441418e-06, "epoch": 0.2614710629467374, "percentage": 13.07, "elapsed_time": "0:46:00", "remaining_time": "5:05:56"} +{"current_steps": 1135, "total_steps": 8674, "loss": 0.4503140449523926, "lr": 1.96459764906794e-06, "epoch": 0.26170163707632005, "percentage": 13.09, "elapsed_time": "0:46:03", "remaining_time": "5:05:54"} +{"current_steps": 1136, "total_steps": 8674, "loss": 0.5533326864242554, "lr": 1.964497030480418e-06, "epoch": 0.2619322112059027, "percentage": 13.1, "elapsed_time": "0:46:05", "remaining_time": "5:05:51"} +{"current_steps": 1137, "total_steps": 8674, "loss": 0.695278525352478, "lr": 1.9643962716934776e-06, "epoch": 0.26216278533548537, "percentage": 13.11, "elapsed_time": "0:46:08", "remaining_time": "5:05:50"} +{"current_steps": 1138, "total_steps": 8674, "loss": 0.5198212265968323, "lr": 1.9642953727217654e-06, "epoch": 0.26239335946506803, "percentage": 13.12, "elapsed_time": "0:46:10", "remaining_time": "5:05:47"} +{"current_steps": 1139, "total_steps": 8674, "loss": 0.4348503351211548, "lr": 1.9641943335799476e-06, "epoch": 0.2626239335946507, "percentage": 13.13, "elapsed_time": "0:46:13", "remaining_time": "5:05:44"} +{"current_steps": 1140, "total_steps": 8674, "loss": 0.5241343975067139, "lr": 1.9640931542827116e-06, "epoch": 0.26285450772423335, "percentage": 13.14, "elapsed_time": "0:46:15", "remaining_time": "5:05:42"} +{"current_steps": 1141, "total_steps": 8674, "loss": 0.6621984839439392, "lr": 1.9639918348447654e-06, "epoch": 0.263085081853816, "percentage": 13.15, "elapsed_time": "0:46:17", "remaining_time": "5:05:39"} +{"current_steps": 1142, "total_steps": 8674, "loss": 0.6091395020484924, "lr": 1.9638903752808358e-06, "epoch": 0.2633156559833987, "percentage": 13.17, "elapsed_time": "0:46:20", "remaining_time": "5:05:37"} +{"current_steps": 1143, "total_steps": 8674, "loss": 0.4857162833213806, "lr": 1.963788775605671e-06, "epoch": 0.26354623011298134, "percentage": 13.18, "elapsed_time": "0:46:22", "remaining_time": "5:05:34"} +{"current_steps": 1144, "total_steps": 8674, "loss": 0.5912413597106934, "lr": 1.9636870358340408e-06, "epoch": 0.263776804242564, "percentage": 13.19, "elapsed_time": "0:46:25", "remaining_time": "5:05:31"} +{"current_steps": 1145, "total_steps": 8674, "loss": 0.6006268858909607, "lr": 1.9635851559807326e-06, "epoch": 0.26400737837214666, "percentage": 13.2, "elapsed_time": "0:46:27", "remaining_time": "5:05:30"} +{"current_steps": 1146, "total_steps": 8674, "loss": 0.5580735802650452, "lr": 1.9634831360605567e-06, "epoch": 0.2642379525017293, "percentage": 13.21, "elapsed_time": "0:46:30", "remaining_time": "5:05:28"} +{"current_steps": 1147, "total_steps": 8674, "loss": 0.5554602146148682, "lr": 1.9633809760883423e-06, "epoch": 0.264468526631312, "percentage": 13.22, "elapsed_time": "0:46:32", "remaining_time": "5:05:25"} +{"current_steps": 1148, "total_steps": 8674, "loss": 0.5648301839828491, "lr": 1.9632786760789393e-06, "epoch": 0.26469910076089465, "percentage": 13.23, "elapsed_time": "0:46:34", "remaining_time": "5:05:21"} +{"current_steps": 1149, "total_steps": 8674, "loss": 0.5317412614822388, "lr": 1.9631762360472186e-06, "epoch": 0.2649296748904773, "percentage": 13.25, "elapsed_time": "0:46:37", "remaining_time": "5:05:18"} +{"current_steps": 1150, "total_steps": 8674, "loss": 0.5608310699462891, "lr": 1.96307365600807e-06, "epoch": 0.26516024902005997, "percentage": 13.26, "elapsed_time": "0:46:39", "remaining_time": "5:05:16"} +{"current_steps": 1151, "total_steps": 8674, "loss": 0.49922698736190796, "lr": 1.962970935976405e-06, "epoch": 0.26539082314964263, "percentage": 13.27, "elapsed_time": "0:46:41", "remaining_time": "5:05:13"} +{"current_steps": 1152, "total_steps": 8674, "loss": 0.5840054750442505, "lr": 1.9628680759671556e-06, "epoch": 0.2656213972792253, "percentage": 13.28, "elapsed_time": "0:46:44", "remaining_time": "5:05:11"} +{"current_steps": 1153, "total_steps": 8674, "loss": 0.6038475632667542, "lr": 1.9627650759952727e-06, "epoch": 0.26585197140880795, "percentage": 13.29, "elapsed_time": "0:46:46", "remaining_time": "5:05:09"} +{"current_steps": 1154, "total_steps": 8674, "loss": 0.5923193097114563, "lr": 1.9626619360757284e-06, "epoch": 0.2660825455383906, "percentage": 13.3, "elapsed_time": "0:46:49", "remaining_time": "5:05:07"} +{"current_steps": 1155, "total_steps": 8674, "loss": 0.5278598666191101, "lr": 1.962558656223516e-06, "epoch": 0.2663131196679733, "percentage": 13.32, "elapsed_time": "0:46:51", "remaining_time": "5:05:04"} +{"current_steps": 1156, "total_steps": 8674, "loss": 0.47691023349761963, "lr": 1.9624552364536472e-06, "epoch": 0.26654369379755594, "percentage": 13.33, "elapsed_time": "0:46:54", "remaining_time": "5:05:01"} +{"current_steps": 1157, "total_steps": 8674, "loss": 0.5801899433135986, "lr": 1.962351676781156e-06, "epoch": 0.2667742679271386, "percentage": 13.34, "elapsed_time": "0:46:56", "remaining_time": "5:04:58"} +{"current_steps": 1158, "total_steps": 8674, "loss": 0.5170506238937378, "lr": 1.962247977221095e-06, "epoch": 0.26700484205672126, "percentage": 13.35, "elapsed_time": "0:46:58", "remaining_time": "5:04:55"} +{"current_steps": 1159, "total_steps": 8674, "loss": 0.6114981174468994, "lr": 1.9621441377885387e-06, "epoch": 0.2672354161863039, "percentage": 13.36, "elapsed_time": "0:47:01", "remaining_time": "5:04:52"} +{"current_steps": 1160, "total_steps": 8674, "loss": 0.6377004384994507, "lr": 1.9620401584985807e-06, "epoch": 0.2674659903158866, "percentage": 13.37, "elapsed_time": "0:47:03", "remaining_time": "5:04:50"} +{"current_steps": 1161, "total_steps": 8674, "loss": 0.6177431344985962, "lr": 1.9619360393663356e-06, "epoch": 0.26769656444546924, "percentage": 13.38, "elapsed_time": "0:47:06", "remaining_time": "5:04:48"} +{"current_steps": 1162, "total_steps": 8674, "loss": 0.579784095287323, "lr": 1.9618317804069384e-06, "epoch": 0.2679271385750519, "percentage": 13.4, "elapsed_time": "0:47:08", "remaining_time": "5:04:46"} +{"current_steps": 1163, "total_steps": 8674, "loss": 0.6078776121139526, "lr": 1.9617273816355444e-06, "epoch": 0.26815771270463457, "percentage": 13.41, "elapsed_time": "0:47:11", "remaining_time": "5:04:43"} +{"current_steps": 1164, "total_steps": 8674, "loss": 0.5583093166351318, "lr": 1.961622843067328e-06, "epoch": 0.2683882868342172, "percentage": 13.42, "elapsed_time": "0:47:13", "remaining_time": "5:04:41"} +{"current_steps": 1165, "total_steps": 8674, "loss": 0.46033143997192383, "lr": 1.961518164717486e-06, "epoch": 0.2686188609637999, "percentage": 13.43, "elapsed_time": "0:47:15", "remaining_time": "5:04:38"} +{"current_steps": 1166, "total_steps": 8674, "loss": 0.5637123584747314, "lr": 1.961413346601234e-06, "epoch": 0.26884943509338255, "percentage": 13.44, "elapsed_time": "0:47:18", "remaining_time": "5:04:36"} +{"current_steps": 1167, "total_steps": 8674, "loss": 0.5943595170974731, "lr": 1.9613083887338085e-06, "epoch": 0.2690800092229652, "percentage": 13.45, "elapsed_time": "0:47:20", "remaining_time": "5:04:34"} +{"current_steps": 1168, "total_steps": 8674, "loss": 0.5440319776535034, "lr": 1.961203291130466e-06, "epoch": 0.2693105833525478, "percentage": 13.47, "elapsed_time": "0:47:23", "remaining_time": "5:04:32"} +{"current_steps": 1169, "total_steps": 8674, "loss": 0.5665608048439026, "lr": 1.961098053806484e-06, "epoch": 0.2695411574821305, "percentage": 13.48, "elapsed_time": "0:47:25", "remaining_time": "5:04:29"} +{"current_steps": 1170, "total_steps": 8674, "loss": 0.5707683563232422, "lr": 1.960992676777159e-06, "epoch": 0.26977173161171314, "percentage": 13.49, "elapsed_time": "0:47:28", "remaining_time": "5:04:28"} +{"current_steps": 1171, "total_steps": 8674, "loss": 0.5447777509689331, "lr": 1.9608871600578093e-06, "epoch": 0.2700023057412958, "percentage": 13.5, "elapsed_time": "0:47:30", "remaining_time": "5:04:25"} +{"current_steps": 1172, "total_steps": 8674, "loss": 0.5598857402801514, "lr": 1.9607815036637726e-06, "epoch": 0.27023287987087846, "percentage": 13.51, "elapsed_time": "0:47:33", "remaining_time": "5:04:22"} +{"current_steps": 1173, "total_steps": 8674, "loss": 0.558403491973877, "lr": 1.960675707610407e-06, "epoch": 0.2704634540004611, "percentage": 13.52, "elapsed_time": "0:47:35", "remaining_time": "5:04:19"} +{"current_steps": 1174, "total_steps": 8674, "loss": 0.6696962118148804, "lr": 1.960569771913091e-06, "epoch": 0.2706940281300438, "percentage": 13.53, "elapsed_time": "0:47:37", "remaining_time": "5:04:16"} +{"current_steps": 1175, "total_steps": 8674, "loss": 0.519884467124939, "lr": 1.960463696587224e-06, "epoch": 0.27092460225962645, "percentage": 13.55, "elapsed_time": "0:47:40", "remaining_time": "5:04:13"} +{"current_steps": 1176, "total_steps": 8674, "loss": 0.6440261602401733, "lr": 1.9603574816482243e-06, "epoch": 0.2711551763892091, "percentage": 13.56, "elapsed_time": "0:47:42", "remaining_time": "5:04:10"} +{"current_steps": 1177, "total_steps": 8674, "loss": 0.48713982105255127, "lr": 1.9602511271115317e-06, "epoch": 0.27138575051879177, "percentage": 13.57, "elapsed_time": "0:47:44", "remaining_time": "5:04:07"} +{"current_steps": 1178, "total_steps": 8674, "loss": 0.5257129073143005, "lr": 1.960144632992606e-06, "epoch": 0.27161632464837443, "percentage": 13.58, "elapsed_time": "0:47:47", "remaining_time": "5:04:06"} +{"current_steps": 1179, "total_steps": 8674, "loss": 0.5220426917076111, "lr": 1.9600379993069272e-06, "epoch": 0.2718468987779571, "percentage": 13.59, "elapsed_time": "0:47:49", "remaining_time": "5:04:03"} +{"current_steps": 1180, "total_steps": 8674, "loss": 0.569817304611206, "lr": 1.9599312260699955e-06, "epoch": 0.27207747290753975, "percentage": 13.6, "elapsed_time": "0:47:52", "remaining_time": "5:04:00"} +{"current_steps": 1181, "total_steps": 8674, "loss": 0.4370031952857971, "lr": 1.9598243132973317e-06, "epoch": 0.2723080470371224, "percentage": 13.62, "elapsed_time": "0:47:54", "remaining_time": "5:03:58"} +{"current_steps": 1182, "total_steps": 8674, "loss": 0.6060882210731506, "lr": 1.959717261004476e-06, "epoch": 0.2725386211667051, "percentage": 13.63, "elapsed_time": "0:47:56", "remaining_time": "5:03:55"} +{"current_steps": 1183, "total_steps": 8674, "loss": 0.5830891132354736, "lr": 1.9596100692069905e-06, "epoch": 0.27276919529628774, "percentage": 13.64, "elapsed_time": "0:47:59", "remaining_time": "5:03:52"} +{"current_steps": 1184, "total_steps": 8674, "loss": 0.5689493417739868, "lr": 1.9595027379204556e-06, "epoch": 0.2729997694258704, "percentage": 13.65, "elapsed_time": "0:48:01", "remaining_time": "5:03:49"} +{"current_steps": 1185, "total_steps": 8674, "loss": 0.5550887584686279, "lr": 1.9593952671604735e-06, "epoch": 0.27323034355545306, "percentage": 13.66, "elapsed_time": "0:48:04", "remaining_time": "5:03:46"} +{"current_steps": 1186, "total_steps": 8674, "loss": 0.48127567768096924, "lr": 1.9592876569426665e-06, "epoch": 0.2734609176850357, "percentage": 13.67, "elapsed_time": "0:48:06", "remaining_time": "5:03:44"} +{"current_steps": 1187, "total_steps": 8674, "loss": 0.640753984451294, "lr": 1.9591799072826764e-06, "epoch": 0.2736914918146184, "percentage": 13.68, "elapsed_time": "0:48:09", "remaining_time": "5:03:43"} +{"current_steps": 1188, "total_steps": 8674, "loss": 0.5266000032424927, "lr": 1.959072018196165e-06, "epoch": 0.27392206594420104, "percentage": 13.7, "elapsed_time": "0:48:11", "remaining_time": "5:03:40"} +{"current_steps": 1189, "total_steps": 8674, "loss": 0.5586614608764648, "lr": 1.958963989698817e-06, "epoch": 0.2741526400737837, "percentage": 13.71, "elapsed_time": "0:48:13", "remaining_time": "5:03:37"} +{"current_steps": 1190, "total_steps": 8674, "loss": 0.5937967896461487, "lr": 1.9588558218063336e-06, "epoch": 0.27438321420336637, "percentage": 13.72, "elapsed_time": "0:48:16", "remaining_time": "5:03:34"} +{"current_steps": 1191, "total_steps": 8674, "loss": 0.5887218713760376, "lr": 1.958747514534439e-06, "epoch": 0.274613788332949, "percentage": 13.73, "elapsed_time": "0:48:18", "remaining_time": "5:03:32"} +{"current_steps": 1192, "total_steps": 8674, "loss": 0.5151614546775818, "lr": 1.9586390678988766e-06, "epoch": 0.2748443624625317, "percentage": 13.74, "elapsed_time": "0:48:21", "remaining_time": "5:03:30"} +{"current_steps": 1193, "total_steps": 8674, "loss": 0.5392748713493347, "lr": 1.95853048191541e-06, "epoch": 0.27507493659211435, "percentage": 13.75, "elapsed_time": "0:48:23", "remaining_time": "5:03:27"} +{"current_steps": 1194, "total_steps": 8674, "loss": 0.5649560689926147, "lr": 1.9584217565998237e-06, "epoch": 0.275305510721697, "percentage": 13.77, "elapsed_time": "0:48:25", "remaining_time": "5:03:24"} +{"current_steps": 1195, "total_steps": 8674, "loss": 0.4888305962085724, "lr": 1.9583128919679213e-06, "epoch": 0.2755360848512797, "percentage": 13.78, "elapsed_time": "0:48:28", "remaining_time": "5:03:23"} +{"current_steps": 1196, "total_steps": 8674, "loss": 0.5026978850364685, "lr": 1.9582038880355282e-06, "epoch": 0.27576665898086233, "percentage": 13.79, "elapsed_time": "0:48:30", "remaining_time": "5:03:20"} +{"current_steps": 1197, "total_steps": 8674, "loss": 0.5358047485351562, "lr": 1.9580947448184887e-06, "epoch": 0.275997233110445, "percentage": 13.8, "elapsed_time": "0:48:33", "remaining_time": "5:03:17"} +{"current_steps": 1198, "total_steps": 8674, "loss": 0.6145739555358887, "lr": 1.957985462332668e-06, "epoch": 0.27622780724002766, "percentage": 13.81, "elapsed_time": "0:48:35", "remaining_time": "5:03:14"} +{"current_steps": 1199, "total_steps": 8674, "loss": 0.5155332684516907, "lr": 1.957876040593952e-06, "epoch": 0.2764583813696103, "percentage": 13.82, "elapsed_time": "0:48:37", "remaining_time": "5:03:11"} +{"current_steps": 1200, "total_steps": 8674, "loss": 0.48794522881507874, "lr": 1.957766479618245e-06, "epoch": 0.276688955499193, "percentage": 13.83, "elapsed_time": "0:48:40", "remaining_time": "5:03:08"} +{"current_steps": 1201, "total_steps": 8674, "loss": 0.5851761102676392, "lr": 1.957656779421474e-06, "epoch": 0.27691952962877564, "percentage": 13.85, "elapsed_time": "0:48:43", "remaining_time": "5:03:13"} +{"current_steps": 1202, "total_steps": 8674, "loss": 0.603874683380127, "lr": 1.957546940019584e-06, "epoch": 0.2771501037583583, "percentage": 13.86, "elapsed_time": "0:48:46", "remaining_time": "5:03:10"} +{"current_steps": 1203, "total_steps": 8674, "loss": 0.5022559762001038, "lr": 1.9574369614285426e-06, "epoch": 0.27738067788794096, "percentage": 13.87, "elapsed_time": "0:48:48", "remaining_time": "5:03:09"} +{"current_steps": 1204, "total_steps": 8674, "loss": 0.6469730138778687, "lr": 1.9573268436643347e-06, "epoch": 0.2776112520175236, "percentage": 13.88, "elapsed_time": "0:48:51", "remaining_time": "5:03:07"} +{"current_steps": 1205, "total_steps": 8674, "loss": 0.49918532371520996, "lr": 1.9572165867429685e-06, "epoch": 0.2778418261471063, "percentage": 13.89, "elapsed_time": "0:48:53", "remaining_time": "5:03:04"} +{"current_steps": 1206, "total_steps": 8674, "loss": 0.48623788356781006, "lr": 1.95710619068047e-06, "epoch": 0.27807240027668895, "percentage": 13.9, "elapsed_time": "0:48:56", "remaining_time": "5:03:02"} +{"current_steps": 1207, "total_steps": 8674, "loss": 0.4868438243865967, "lr": 1.956995655492887e-06, "epoch": 0.2783029744062716, "percentage": 13.92, "elapsed_time": "0:48:58", "remaining_time": "5:02:59"} +{"current_steps": 1208, "total_steps": 8674, "loss": 0.5989904403686523, "lr": 1.9568849811962862e-06, "epoch": 0.27853354853585427, "percentage": 13.93, "elapsed_time": "0:49:00", "remaining_time": "5:02:56"} +{"current_steps": 1209, "total_steps": 8674, "loss": 0.5125104188919067, "lr": 1.956774167806756e-06, "epoch": 0.27876412266543693, "percentage": 13.94, "elapsed_time": "0:49:03", "remaining_time": "5:02:53"} +{"current_steps": 1210, "total_steps": 8674, "loss": 0.5126978158950806, "lr": 1.956663215340404e-06, "epoch": 0.2789946967950196, "percentage": 13.95, "elapsed_time": "0:49:05", "remaining_time": "5:02:50"} +{"current_steps": 1211, "total_steps": 8674, "loss": 0.5009375810623169, "lr": 1.9565521238133576e-06, "epoch": 0.27922527092460225, "percentage": 13.96, "elapsed_time": "0:49:08", "remaining_time": "5:02:49"} +{"current_steps": 1212, "total_steps": 8674, "loss": 0.5601603984832764, "lr": 1.956440893241766e-06, "epoch": 0.2794558450541849, "percentage": 13.97, "elapsed_time": "0:49:10", "remaining_time": "5:02:46"} +{"current_steps": 1213, "total_steps": 8674, "loss": 0.6310690641403198, "lr": 1.956329523641797e-06, "epoch": 0.2796864191837676, "percentage": 13.98, "elapsed_time": "0:49:13", "remaining_time": "5:02:44"} +{"current_steps": 1214, "total_steps": 8674, "loss": 0.498830646276474, "lr": 1.95621801502964e-06, "epoch": 0.27991699331335024, "percentage": 14.0, "elapsed_time": "0:49:15", "remaining_time": "5:02:41"} +{"current_steps": 1215, "total_steps": 8674, "loss": 0.6612650156021118, "lr": 1.9561063674215036e-06, "epoch": 0.2801475674429329, "percentage": 14.01, "elapsed_time": "0:49:17", "remaining_time": "5:02:38"} +{"current_steps": 1216, "total_steps": 8674, "loss": 0.5651615858078003, "lr": 1.9559945808336166e-06, "epoch": 0.28037814157251556, "percentage": 14.02, "elapsed_time": "0:49:20", "remaining_time": "5:02:35"} +{"current_steps": 1217, "total_steps": 8674, "loss": 0.5675203800201416, "lr": 1.955882655282229e-06, "epoch": 0.2806087157020982, "percentage": 14.03, "elapsed_time": "0:49:22", "remaining_time": "5:02:32"} +{"current_steps": 1218, "total_steps": 8674, "loss": 0.5691455006599426, "lr": 1.9557705907836095e-06, "epoch": 0.2808392898316809, "percentage": 14.04, "elapsed_time": "0:49:24", "remaining_time": "5:02:30"} +{"current_steps": 1219, "total_steps": 8674, "loss": 0.6018673181533813, "lr": 1.955658387354048e-06, "epoch": 0.28106986396126354, "percentage": 14.05, "elapsed_time": "0:49:27", "remaining_time": "5:02:27"} +{"current_steps": 1220, "total_steps": 8674, "loss": 0.5188831090927124, "lr": 1.955546045009855e-06, "epoch": 0.2813004380908462, "percentage": 14.07, "elapsed_time": "0:49:29", "remaining_time": "5:02:26"} +{"current_steps": 1221, "total_steps": 8674, "loss": 0.5161044597625732, "lr": 1.9554335637673596e-06, "epoch": 0.28153101222042887, "percentage": 14.08, "elapsed_time": "0:49:32", "remaining_time": "5:02:23"} +{"current_steps": 1222, "total_steps": 8674, "loss": 0.5651452541351318, "lr": 1.9553209436429132e-06, "epoch": 0.28176158635001153, "percentage": 14.09, "elapsed_time": "0:49:34", "remaining_time": "5:02:20"} +{"current_steps": 1223, "total_steps": 8674, "loss": 0.5763273239135742, "lr": 1.9552081846528858e-06, "epoch": 0.2819921604795942, "percentage": 14.1, "elapsed_time": "0:49:37", "remaining_time": "5:02:18"} +{"current_steps": 1224, "total_steps": 8674, "loss": 0.6379664540290833, "lr": 1.9550952868136677e-06, "epoch": 0.28222273460917685, "percentage": 14.11, "elapsed_time": "0:49:39", "remaining_time": "5:02:15"} +{"current_steps": 1225, "total_steps": 8674, "loss": 0.4021342396736145, "lr": 1.95498225014167e-06, "epoch": 0.2824533087387595, "percentage": 14.12, "elapsed_time": "0:49:41", "remaining_time": "5:02:12"} +{"current_steps": 1226, "total_steps": 8674, "loss": 0.49230247735977173, "lr": 1.954869074653324e-06, "epoch": 0.2826838828683422, "percentage": 14.13, "elapsed_time": "0:49:44", "remaining_time": "5:02:09"} +{"current_steps": 1227, "total_steps": 8674, "loss": 0.5921554565429688, "lr": 1.954755760365081e-06, "epoch": 0.28291445699792483, "percentage": 14.15, "elapsed_time": "0:49:46", "remaining_time": "5:02:07"} +{"current_steps": 1228, "total_steps": 8674, "loss": 0.6495868563652039, "lr": 1.954642307293412e-06, "epoch": 0.2831450311275075, "percentage": 14.16, "elapsed_time": "0:49:49", "remaining_time": "5:02:05"} +{"current_steps": 1229, "total_steps": 8674, "loss": 0.5699795484542847, "lr": 1.954528715454808e-06, "epoch": 0.28337560525709016, "percentage": 14.17, "elapsed_time": "0:49:51", "remaining_time": "5:02:03"} +{"current_steps": 1230, "total_steps": 8674, "loss": 0.582231879234314, "lr": 1.9544149848657816e-06, "epoch": 0.2836061793866728, "percentage": 14.18, "elapsed_time": "0:49:54", "remaining_time": "5:02:00"} +{"current_steps": 1231, "total_steps": 8674, "loss": 0.5952359437942505, "lr": 1.9543011155428647e-06, "epoch": 0.2838367535162555, "percentage": 14.19, "elapsed_time": "0:49:56", "remaining_time": "5:01:57"} +{"current_steps": 1232, "total_steps": 8674, "loss": 0.646816611289978, "lr": 1.9541871075026092e-06, "epoch": 0.28406732764583814, "percentage": 14.2, "elapsed_time": "0:49:58", "remaining_time": "5:01:55"} +{"current_steps": 1233, "total_steps": 8674, "loss": 0.5781043767929077, "lr": 1.9540729607615866e-06, "epoch": 0.2842979017754208, "percentage": 14.21, "elapsed_time": "0:50:01", "remaining_time": "5:01:52"} +{"current_steps": 1234, "total_steps": 8674, "loss": 0.609764814376831, "lr": 1.95395867533639e-06, "epoch": 0.28452847590500346, "percentage": 14.23, "elapsed_time": "0:50:03", "remaining_time": "5:01:49"} +{"current_steps": 1235, "total_steps": 8674, "loss": 0.4673759341239929, "lr": 1.9538442512436325e-06, "epoch": 0.2847590500345861, "percentage": 14.24, "elapsed_time": "0:50:06", "remaining_time": "5:01:47"} +{"current_steps": 1236, "total_steps": 8674, "loss": 0.6310999393463135, "lr": 1.953729688499946e-06, "epoch": 0.2849896241641688, "percentage": 14.25, "elapsed_time": "0:50:08", "remaining_time": "5:01:44"} +{"current_steps": 1237, "total_steps": 8674, "loss": 0.5103853344917297, "lr": 1.953614987121983e-06, "epoch": 0.28522019829375145, "percentage": 14.26, "elapsed_time": "0:50:11", "remaining_time": "5:01:43"} +{"current_steps": 1238, "total_steps": 8674, "loss": 0.5735328197479248, "lr": 1.9535001471264178e-06, "epoch": 0.2854507724233341, "percentage": 14.27, "elapsed_time": "0:50:13", "remaining_time": "5:01:40"} +{"current_steps": 1239, "total_steps": 8674, "loss": 0.5617454051971436, "lr": 1.953385168529942e-06, "epoch": 0.28568134655291677, "percentage": 14.28, "elapsed_time": "0:50:15", "remaining_time": "5:01:38"} +{"current_steps": 1240, "total_steps": 8674, "loss": 0.49873489141464233, "lr": 1.9532700513492705e-06, "epoch": 0.28591192068249943, "percentage": 14.3, "elapsed_time": "0:50:18", "remaining_time": "5:01:35"} +{"current_steps": 1241, "total_steps": 8674, "loss": 0.49185073375701904, "lr": 1.9531547956011353e-06, "epoch": 0.2861424948120821, "percentage": 14.31, "elapsed_time": "0:50:20", "remaining_time": "5:01:33"} +{"current_steps": 1242, "total_steps": 8674, "loss": 0.6016734838485718, "lr": 1.9530394013022907e-06, "epoch": 0.28637306894166475, "percentage": 14.32, "elapsed_time": "0:50:23", "remaining_time": "5:01:30"} +{"current_steps": 1243, "total_steps": 8674, "loss": 0.5922054052352905, "lr": 1.9529238684695105e-06, "epoch": 0.2866036430712474, "percentage": 14.33, "elapsed_time": "0:50:25", "remaining_time": "5:01:27"} +{"current_steps": 1244, "total_steps": 8674, "loss": 0.6498355269432068, "lr": 1.952808197119588e-06, "epoch": 0.2868342172008301, "percentage": 14.34, "elapsed_time": "0:50:27", "remaining_time": "5:01:25"} +{"current_steps": 1245, "total_steps": 8674, "loss": 0.5564426183700562, "lr": 1.9526923872693382e-06, "epoch": 0.28706479133041274, "percentage": 14.35, "elapsed_time": "0:50:30", "remaining_time": "5:01:23"} +{"current_steps": 1246, "total_steps": 8674, "loss": 0.6144154071807861, "lr": 1.9525764389355945e-06, "epoch": 0.2872953654599954, "percentage": 14.36, "elapsed_time": "0:50:32", "remaining_time": "5:01:20"} +{"current_steps": 1247, "total_steps": 8674, "loss": 0.5958914756774902, "lr": 1.9524603521352116e-06, "epoch": 0.28752593958957806, "percentage": 14.38, "elapsed_time": "0:50:35", "remaining_time": "5:01:18"} +{"current_steps": 1248, "total_steps": 8674, "loss": 0.5471549034118652, "lr": 1.952344126885063e-06, "epoch": 0.2877565137191607, "percentage": 14.39, "elapsed_time": "0:50:37", "remaining_time": "5:01:15"} +{"current_steps": 1249, "total_steps": 8674, "loss": 0.5512329936027527, "lr": 1.952227763202044e-06, "epoch": 0.2879870878487434, "percentage": 14.4, "elapsed_time": "0:50:40", "remaining_time": "5:01:12"} +{"current_steps": 1250, "total_steps": 8674, "loss": 0.5545130968093872, "lr": 1.9521112611030695e-06, "epoch": 0.28821766197832605, "percentage": 14.41, "elapsed_time": "0:50:42", "remaining_time": "5:01:10"} +{"current_steps": 1251, "total_steps": 8674, "loss": 0.5409479737281799, "lr": 1.9519946206050734e-06, "epoch": 0.2884482361079087, "percentage": 14.42, "elapsed_time": "0:50:45", "remaining_time": "5:01:07"} +{"current_steps": 1252, "total_steps": 8674, "loss": 0.5248778462409973, "lr": 1.9518778417250114e-06, "epoch": 0.28867881023749137, "percentage": 14.43, "elapsed_time": "0:50:47", "remaining_time": "5:01:05"} +{"current_steps": 1253, "total_steps": 8674, "loss": 0.4985620975494385, "lr": 1.951760924479858e-06, "epoch": 0.28890938436707403, "percentage": 14.45, "elapsed_time": "0:50:50", "remaining_time": "5:01:03"} +{"current_steps": 1254, "total_steps": 8674, "loss": 0.5470424890518188, "lr": 1.951643868886608e-06, "epoch": 0.2891399584966567, "percentage": 14.46, "elapsed_time": "0:50:52", "remaining_time": "5:01:01"} +{"current_steps": 1255, "total_steps": 8674, "loss": 0.5082905292510986, "lr": 1.9515266749622776e-06, "epoch": 0.28937053262623935, "percentage": 14.47, "elapsed_time": "0:50:54", "remaining_time": "5:00:58"} +{"current_steps": 1256, "total_steps": 8674, "loss": 0.5734596252441406, "lr": 1.9514093427239013e-06, "epoch": 0.289601106755822, "percentage": 14.48, "elapsed_time": "0:50:57", "remaining_time": "5:00:56"} +{"current_steps": 1257, "total_steps": 8674, "loss": 0.4727100431919098, "lr": 1.951291872188535e-06, "epoch": 0.2898316808854047, "percentage": 14.49, "elapsed_time": "0:50:59", "remaining_time": "5:00:54"} +{"current_steps": 1258, "total_steps": 8674, "loss": 0.6727551221847534, "lr": 1.951174263373254e-06, "epoch": 0.29006225501498734, "percentage": 14.5, "elapsed_time": "0:51:02", "remaining_time": "5:00:51"} +{"current_steps": 1259, "total_steps": 8674, "loss": 0.5225725173950195, "lr": 1.9510565162951534e-06, "epoch": 0.29029282914457, "percentage": 14.51, "elapsed_time": "0:51:04", "remaining_time": "5:00:48"} +{"current_steps": 1260, "total_steps": 8674, "loss": 0.46537530422210693, "lr": 1.95093863097135e-06, "epoch": 0.29052340327415266, "percentage": 14.53, "elapsed_time": "0:51:06", "remaining_time": "5:00:45"} +{"current_steps": 1261, "total_steps": 8674, "loss": 0.4729498624801636, "lr": 1.950820607418979e-06, "epoch": 0.2907539774037353, "percentage": 14.54, "elapsed_time": "0:51:09", "remaining_time": "5:00:44"} +{"current_steps": 1262, "total_steps": 8674, "loss": 0.519434928894043, "lr": 1.950702445655196e-06, "epoch": 0.290984551533318, "percentage": 14.55, "elapsed_time": "0:51:11", "remaining_time": "5:00:41"} +{"current_steps": 1263, "total_steps": 8674, "loss": 0.5487297177314758, "lr": 1.9505841456971784e-06, "epoch": 0.29121512566290064, "percentage": 14.56, "elapsed_time": "0:51:14", "remaining_time": "5:00:39"} +{"current_steps": 1264, "total_steps": 8674, "loss": 0.6228574514389038, "lr": 1.9504657075621207e-06, "epoch": 0.2914456997924833, "percentage": 14.57, "elapsed_time": "0:51:16", "remaining_time": "5:00:36"} +{"current_steps": 1265, "total_steps": 8674, "loss": 0.486205518245697, "lr": 1.95034713126724e-06, "epoch": 0.29167627392206597, "percentage": 14.58, "elapsed_time": "0:51:19", "remaining_time": "5:00:34"} +{"current_steps": 1266, "total_steps": 8674, "loss": 0.6465567350387573, "lr": 1.950228416829772e-06, "epoch": 0.2919068480516486, "percentage": 14.6, "elapsed_time": "0:51:21", "remaining_time": "5:00:31"} +{"current_steps": 1267, "total_steps": 8674, "loss": 0.5160506963729858, "lr": 1.9501095642669735e-06, "epoch": 0.2921374221812313, "percentage": 14.61, "elapsed_time": "0:51:23", "remaining_time": "5:00:28"} +{"current_steps": 1268, "total_steps": 8674, "loss": 0.47334107756614685, "lr": 1.9499905735961206e-06, "epoch": 0.29236799631081395, "percentage": 14.62, "elapsed_time": "0:51:26", "remaining_time": "5:00:25"} +{"current_steps": 1269, "total_steps": 8674, "loss": 0.46453380584716797, "lr": 1.9498714448345103e-06, "epoch": 0.2925985704403966, "percentage": 14.63, "elapsed_time": "0:51:28", "remaining_time": "5:00:22"} +{"current_steps": 1270, "total_steps": 8674, "loss": 0.5617728233337402, "lr": 1.9497521779994582e-06, "epoch": 0.29282914456997927, "percentage": 14.64, "elapsed_time": "0:51:31", "remaining_time": "5:00:21"} +{"current_steps": 1271, "total_steps": 8674, "loss": 0.6129153966903687, "lr": 1.9496327731083026e-06, "epoch": 0.29305971869956193, "percentage": 14.65, "elapsed_time": "0:51:33", "remaining_time": "5:00:18"} +{"current_steps": 1272, "total_steps": 8674, "loss": 0.4903183579444885, "lr": 1.9495132301783983e-06, "epoch": 0.2932902928291446, "percentage": 14.66, "elapsed_time": "0:51:35", "remaining_time": "5:00:15"} +{"current_steps": 1273, "total_steps": 8674, "loss": 0.5087980628013611, "lr": 1.9493935492271235e-06, "epoch": 0.29352086695872726, "percentage": 14.68, "elapsed_time": "0:51:38", "remaining_time": "5:00:12"} +{"current_steps": 1274, "total_steps": 8674, "loss": 0.5102910399436951, "lr": 1.949273730271874e-06, "epoch": 0.2937514410883099, "percentage": 14.69, "elapsed_time": "0:51:40", "remaining_time": "5:00:09"} +{"current_steps": 1275, "total_steps": 8674, "loss": 0.5581132769584656, "lr": 1.9491537733300674e-06, "epoch": 0.2939820152178926, "percentage": 14.7, "elapsed_time": "0:51:42", "remaining_time": "5:00:06"} +{"current_steps": 1276, "total_steps": 8674, "loss": 0.5668213367462158, "lr": 1.949033678419141e-06, "epoch": 0.29421258934747524, "percentage": 14.71, "elapsed_time": "0:51:45", "remaining_time": "5:00:03"} +{"current_steps": 1277, "total_steps": 8674, "loss": 0.5352080464363098, "lr": 1.9489134455565503e-06, "epoch": 0.2944431634770579, "percentage": 14.72, "elapsed_time": "0:51:47", "remaining_time": "5:00:00"} +{"current_steps": 1278, "total_steps": 8674, "loss": 0.47343915700912476, "lr": 1.948793074759774e-06, "epoch": 0.29467373760664056, "percentage": 14.73, "elapsed_time": "0:51:50", "remaining_time": "4:59:59"} +{"current_steps": 1279, "total_steps": 8674, "loss": 0.5169435143470764, "lr": 1.9486725660463084e-06, "epoch": 0.29490431173622317, "percentage": 14.75, "elapsed_time": "0:51:52", "remaining_time": "4:59:56"} +{"current_steps": 1280, "total_steps": 8674, "loss": 0.4801402688026428, "lr": 1.9485519194336707e-06, "epoch": 0.29513488586580583, "percentage": 14.76, "elapsed_time": "0:51:54", "remaining_time": "4:59:53"} +{"current_steps": 1281, "total_steps": 8674, "loss": 0.6537381410598755, "lr": 1.9484311349393984e-06, "epoch": 0.2953654599953885, "percentage": 14.77, "elapsed_time": "0:51:57", "remaining_time": "4:59:50"} +{"current_steps": 1282, "total_steps": 8674, "loss": 0.5160089135169983, "lr": 1.9483102125810483e-06, "epoch": 0.29559603412497115, "percentage": 14.78, "elapsed_time": "0:51:59", "remaining_time": "4:59:48"} +{"current_steps": 1283, "total_steps": 8674, "loss": 0.5332320332527161, "lr": 1.9481891523761985e-06, "epoch": 0.2958266082545538, "percentage": 14.79, "elapsed_time": "0:52:02", "remaining_time": "4:59:45"} +{"current_steps": 1284, "total_steps": 8674, "loss": 0.5076215267181396, "lr": 1.9480679543424453e-06, "epoch": 0.2960571823841365, "percentage": 14.8, "elapsed_time": "0:52:04", "remaining_time": "4:59:43"} +{"current_steps": 1285, "total_steps": 8674, "loss": 0.607105016708374, "lr": 1.947946618497407e-06, "epoch": 0.29628775651371914, "percentage": 14.81, "elapsed_time": "0:52:07", "remaining_time": "4:59:40"} +{"current_steps": 1286, "total_steps": 8674, "loss": 0.6265846490859985, "lr": 1.9478251448587203e-06, "epoch": 0.2965183306433018, "percentage": 14.83, "elapsed_time": "0:52:09", "remaining_time": "4:59:37"} +{"current_steps": 1287, "total_steps": 8674, "loss": 0.5313390493392944, "lr": 1.9477035334440426e-06, "epoch": 0.29674890477288446, "percentage": 14.84, "elapsed_time": "0:52:11", "remaining_time": "4:59:36"} +{"current_steps": 1288, "total_steps": 8674, "loss": 0.5059833526611328, "lr": 1.947581784271052e-06, "epoch": 0.2969794789024671, "percentage": 14.85, "elapsed_time": "0:52:14", "remaining_time": "4:59:33"} +{"current_steps": 1289, "total_steps": 8674, "loss": 0.5550922155380249, "lr": 1.9474598973574455e-06, "epoch": 0.2972100530320498, "percentage": 14.86, "elapsed_time": "0:52:16", "remaining_time": "4:59:30"} +{"current_steps": 1290, "total_steps": 8674, "loss": 0.5594801306724548, "lr": 1.947337872720941e-06, "epoch": 0.29744062716163244, "percentage": 14.87, "elapsed_time": "0:52:19", "remaining_time": "4:59:28"} +{"current_steps": 1291, "total_steps": 8674, "loss": 0.6404933333396912, "lr": 1.9472157103792753e-06, "epoch": 0.2976712012912151, "percentage": 14.88, "elapsed_time": "0:52:21", "remaining_time": "4:59:25"} +{"current_steps": 1292, "total_steps": 8674, "loss": 0.5884830355644226, "lr": 1.947093410350206e-06, "epoch": 0.29790177542079777, "percentage": 14.9, "elapsed_time": "0:52:23", "remaining_time": "4:59:23"} +{"current_steps": 1293, "total_steps": 8674, "loss": 0.5723487138748169, "lr": 1.9469709726515114e-06, "epoch": 0.2981323495503804, "percentage": 14.91, "elapsed_time": "0:52:26", "remaining_time": "4:59:20"} +{"current_steps": 1294, "total_steps": 8674, "loss": 0.5298895239830017, "lr": 1.946848397300989e-06, "epoch": 0.2983629236799631, "percentage": 14.92, "elapsed_time": "0:52:28", "remaining_time": "4:59:17"} +{"current_steps": 1295, "total_steps": 8674, "loss": 0.6118877530097961, "lr": 1.9467256843164557e-06, "epoch": 0.29859349780954575, "percentage": 14.93, "elapsed_time": "0:52:31", "remaining_time": "4:59:16"} +{"current_steps": 1296, "total_steps": 8674, "loss": 0.6014599800109863, "lr": 1.9466028337157498e-06, "epoch": 0.2988240719391284, "percentage": 14.94, "elapsed_time": "0:52:33", "remaining_time": "4:59:14"} +{"current_steps": 1297, "total_steps": 8674, "loss": 0.5861071944236755, "lr": 1.9464798455167278e-06, "epoch": 0.29905464606871107, "percentage": 14.95, "elapsed_time": "0:52:36", "remaining_time": "4:59:11"} +{"current_steps": 1298, "total_steps": 8674, "loss": 0.5863409042358398, "lr": 1.9463567197372684e-06, "epoch": 0.29928522019829373, "percentage": 14.96, "elapsed_time": "0:52:38", "remaining_time": "4:59:08"} +{"current_steps": 1299, "total_steps": 8674, "loss": 0.6576352119445801, "lr": 1.9462334563952687e-06, "epoch": 0.2995157943278764, "percentage": 14.98, "elapsed_time": "0:52:40", "remaining_time": "4:59:05"} +{"current_steps": 1300, "total_steps": 8674, "loss": 0.5458395481109619, "lr": 1.9461100555086463e-06, "epoch": 0.29974636845745906, "percentage": 14.99, "elapsed_time": "0:52:43", "remaining_time": "4:59:03"} +{"current_steps": 1301, "total_steps": 8674, "loss": 0.48430997133255005, "lr": 1.945986517095339e-06, "epoch": 0.2999769425870417, "percentage": 15.0, "elapsed_time": "0:52:47", "remaining_time": "4:59:08"} +{"current_steps": 1302, "total_steps": 8674, "loss": 0.4212522506713867, "lr": 1.945862841173304e-06, "epoch": 0.3002075167166244, "percentage": 15.01, "elapsed_time": "0:52:49", "remaining_time": "4:59:05"} +{"current_steps": 1303, "total_steps": 8674, "loss": 0.5671685934066772, "lr": 1.9457390277605188e-06, "epoch": 0.30043809084620704, "percentage": 15.02, "elapsed_time": "0:52:52", "remaining_time": "4:59:03"} +{"current_steps": 1304, "total_steps": 8674, "loss": 0.5350982546806335, "lr": 1.945615076874981e-06, "epoch": 0.3006686649757897, "percentage": 15.03, "elapsed_time": "0:52:54", "remaining_time": "4:59:01"} +{"current_steps": 1305, "total_steps": 8674, "loss": 0.45792657136917114, "lr": 1.9454909885347088e-06, "epoch": 0.30089923910537236, "percentage": 15.04, "elapsed_time": "0:52:56", "remaining_time": "4:58:58"} +{"current_steps": 1306, "total_steps": 8674, "loss": 0.5644106864929199, "lr": 1.9453667627577387e-06, "epoch": 0.301129813234955, "percentage": 15.06, "elapsed_time": "0:52:59", "remaining_time": "4:58:55"} +{"current_steps": 1307, "total_steps": 8674, "loss": 0.554198145866394, "lr": 1.945242399562129e-06, "epoch": 0.3013603873645377, "percentage": 15.07, "elapsed_time": "0:53:01", "remaining_time": "4:58:52"} +{"current_steps": 1308, "total_steps": 8674, "loss": 0.5073474049568176, "lr": 1.9451178989659565e-06, "epoch": 0.30159096149412035, "percentage": 15.08, "elapsed_time": "0:53:03", "remaining_time": "4:58:50"} +{"current_steps": 1309, "total_steps": 8674, "loss": 0.569359302520752, "lr": 1.944993260987319e-06, "epoch": 0.301821535623703, "percentage": 15.09, "elapsed_time": "0:53:06", "remaining_time": "4:58:47"} +{"current_steps": 1310, "total_steps": 8674, "loss": 0.5011791586875916, "lr": 1.944868485644334e-06, "epoch": 0.30205210975328567, "percentage": 15.1, "elapsed_time": "0:53:08", "remaining_time": "4:58:44"} +{"current_steps": 1311, "total_steps": 8674, "loss": 0.41121986508369446, "lr": 1.9447435729551384e-06, "epoch": 0.30228268388286833, "percentage": 15.11, "elapsed_time": "0:53:11", "remaining_time": "4:58:43"} +{"current_steps": 1312, "total_steps": 8674, "loss": 0.5615876913070679, "lr": 1.9446185229378896e-06, "epoch": 0.302513258012451, "percentage": 15.13, "elapsed_time": "0:53:13", "remaining_time": "4:58:40"} +{"current_steps": 1313, "total_steps": 8674, "loss": 0.5450695157051086, "lr": 1.9444933356107652e-06, "epoch": 0.30274383214203365, "percentage": 15.14, "elapsed_time": "0:53:15", "remaining_time": "4:58:37"} +{"current_steps": 1314, "total_steps": 8674, "loss": 0.522222101688385, "lr": 1.9443680109919626e-06, "epoch": 0.3029744062716163, "percentage": 15.15, "elapsed_time": "0:53:18", "remaining_time": "4:58:34"} +{"current_steps": 1315, "total_steps": 8674, "loss": 0.5081876516342163, "lr": 1.9442425490996984e-06, "epoch": 0.303204980401199, "percentage": 15.16, "elapsed_time": "0:53:20", "remaining_time": "4:58:31"} +{"current_steps": 1316, "total_steps": 8674, "loss": 0.4955870509147644, "lr": 1.9441169499522104e-06, "epoch": 0.30343555453078164, "percentage": 15.17, "elapsed_time": "0:53:23", "remaining_time": "4:58:28"} +{"current_steps": 1317, "total_steps": 8674, "loss": 0.5098991990089417, "lr": 1.9439912135677553e-06, "epoch": 0.3036661286603643, "percentage": 15.18, "elapsed_time": "0:53:25", "remaining_time": "4:58:25"} +{"current_steps": 1318, "total_steps": 8674, "loss": 0.5686191320419312, "lr": 1.94386533996461e-06, "epoch": 0.30389670278994696, "percentage": 15.19, "elapsed_time": "0:53:27", "remaining_time": "4:58:22"} +{"current_steps": 1319, "total_steps": 8674, "loss": 0.606401264667511, "lr": 1.943739329161072e-06, "epoch": 0.3041272769195296, "percentage": 15.21, "elapsed_time": "0:53:30", "remaining_time": "4:58:19"} +{"current_steps": 1320, "total_steps": 8674, "loss": 0.49249163269996643, "lr": 1.9436131811754576e-06, "epoch": 0.3043578510491123, "percentage": 15.22, "elapsed_time": "0:53:32", "remaining_time": "4:58:18"} +{"current_steps": 1321, "total_steps": 8674, "loss": 0.5373499989509583, "lr": 1.9434868960261047e-06, "epoch": 0.30458842517869494, "percentage": 15.23, "elapsed_time": "0:53:34", "remaining_time": "4:58:15"} +{"current_steps": 1322, "total_steps": 8674, "loss": 0.4568977355957031, "lr": 1.943360473731369e-06, "epoch": 0.3048189993082776, "percentage": 15.24, "elapsed_time": "0:53:37", "remaining_time": "4:58:12"} +{"current_steps": 1323, "total_steps": 8674, "loss": 0.562126636505127, "lr": 1.943233914309628e-06, "epoch": 0.30504957343786027, "percentage": 15.25, "elapsed_time": "0:53:39", "remaining_time": "4:58:09"} +{"current_steps": 1324, "total_steps": 8674, "loss": 0.5795382261276245, "lr": 1.943107217779278e-06, "epoch": 0.3052801475674429, "percentage": 15.26, "elapsed_time": "0:53:42", "remaining_time": "4:58:06"} +{"current_steps": 1325, "total_steps": 8674, "loss": 0.5671530365943909, "lr": 1.942980384158736e-06, "epoch": 0.3055107216970256, "percentage": 15.28, "elapsed_time": "0:53:44", "remaining_time": "4:58:03"} +{"current_steps": 1326, "total_steps": 8674, "loss": 0.5511401891708374, "lr": 1.942853413466438e-06, "epoch": 0.30574129582660825, "percentage": 15.29, "elapsed_time": "0:53:46", "remaining_time": "4:58:01"} +{"current_steps": 1327, "total_steps": 8674, "loss": 0.5712149739265442, "lr": 1.942726305720841e-06, "epoch": 0.3059718699561909, "percentage": 15.3, "elapsed_time": "0:53:49", "remaining_time": "4:57:58"} +{"current_steps": 1328, "total_steps": 8674, "loss": 0.5181496739387512, "lr": 1.9425990609404215e-06, "epoch": 0.3062024440857736, "percentage": 15.31, "elapsed_time": "0:53:51", "remaining_time": "4:57:57"} +{"current_steps": 1329, "total_steps": 8674, "loss": 0.5758726596832275, "lr": 1.9424716791436753e-06, "epoch": 0.30643301821535623, "percentage": 15.32, "elapsed_time": "0:53:54", "remaining_time": "4:57:54"} +{"current_steps": 1330, "total_steps": 8674, "loss": 0.5757049322128296, "lr": 1.942344160349119e-06, "epoch": 0.3066635923449389, "percentage": 15.33, "elapsed_time": "0:53:56", "remaining_time": "4:57:51"} +{"current_steps": 1331, "total_steps": 8674, "loss": 0.47352534532546997, "lr": 1.9422165045752886e-06, "epoch": 0.30689416647452156, "percentage": 15.34, "elapsed_time": "0:53:58", "remaining_time": "4:57:48"} +{"current_steps": 1332, "total_steps": 8674, "loss": 0.5940845012664795, "lr": 1.94208871184074e-06, "epoch": 0.3071247406041042, "percentage": 15.36, "elapsed_time": "0:54:01", "remaining_time": "4:57:45"} +{"current_steps": 1333, "total_steps": 8674, "loss": 0.5225652456283569, "lr": 1.9419607821640496e-06, "epoch": 0.3073553147336869, "percentage": 15.37, "elapsed_time": "0:54:03", "remaining_time": "4:57:43"} +{"current_steps": 1334, "total_steps": 8674, "loss": 0.5253404378890991, "lr": 1.9418327155638126e-06, "epoch": 0.30758588886326954, "percentage": 15.38, "elapsed_time": "0:54:05", "remaining_time": "4:57:40"} +{"current_steps": 1335, "total_steps": 8674, "loss": 0.5637744665145874, "lr": 1.941704512058646e-06, "epoch": 0.3078164629928522, "percentage": 15.39, "elapsed_time": "0:54:08", "remaining_time": "4:57:37"} +{"current_steps": 1336, "total_steps": 8674, "loss": 0.48273587226867676, "lr": 1.941576171667184e-06, "epoch": 0.30804703712243486, "percentage": 15.4, "elapsed_time": "0:54:10", "remaining_time": "4:57:34"} +{"current_steps": 1337, "total_steps": 8674, "loss": 0.5989019870758057, "lr": 1.9414476944080833e-06, "epoch": 0.3082776112520175, "percentage": 15.41, "elapsed_time": "0:54:13", "remaining_time": "4:57:33"} +{"current_steps": 1338, "total_steps": 8674, "loss": 0.5231547951698303, "lr": 1.9413190803000183e-06, "epoch": 0.3085081853816002, "percentage": 15.43, "elapsed_time": "0:54:15", "remaining_time": "4:57:30"} +{"current_steps": 1339, "total_steps": 8674, "loss": 0.5125160217285156, "lr": 1.9411903293616853e-06, "epoch": 0.30873875951118285, "percentage": 15.44, "elapsed_time": "0:54:18", "remaining_time": "4:57:28"} +{"current_steps": 1340, "total_steps": 8674, "loss": 0.50664883852005, "lr": 1.9410614416117993e-06, "epoch": 0.3089693336407655, "percentage": 15.45, "elapsed_time": "0:54:20", "remaining_time": "4:57:25"} +{"current_steps": 1341, "total_steps": 8674, "loss": 0.5555824637413025, "lr": 1.9409324170690955e-06, "epoch": 0.30919990777034817, "percentage": 15.46, "elapsed_time": "0:54:22", "remaining_time": "4:57:22"} +{"current_steps": 1342, "total_steps": 8674, "loss": 0.5182096362113953, "lr": 1.940803255752329e-06, "epoch": 0.30943048189993083, "percentage": 15.47, "elapsed_time": "0:54:25", "remaining_time": "4:57:20"} +{"current_steps": 1343, "total_steps": 8674, "loss": 0.5202751159667969, "lr": 1.940673957680274e-06, "epoch": 0.3096610560295135, "percentage": 15.48, "elapsed_time": "0:54:27", "remaining_time": "4:57:17"} +{"current_steps": 1344, "total_steps": 8674, "loss": 0.49791598320007324, "lr": 1.940544522871726e-06, "epoch": 0.30989163015909615, "percentage": 15.49, "elapsed_time": "0:54:30", "remaining_time": "4:57:14"} +{"current_steps": 1345, "total_steps": 8674, "loss": 0.48691657185554504, "lr": 1.9404149513454995e-06, "epoch": 0.3101222042886788, "percentage": 15.51, "elapsed_time": "0:54:32", "remaining_time": "4:57:13"} +{"current_steps": 1346, "total_steps": 8674, "loss": 0.5726481676101685, "lr": 1.9402852431204293e-06, "epoch": 0.3103527784182615, "percentage": 15.52, "elapsed_time": "0:54:35", "remaining_time": "4:57:10"} +{"current_steps": 1347, "total_steps": 8674, "loss": 0.5443148016929626, "lr": 1.940155398215369e-06, "epoch": 0.31058335254784414, "percentage": 15.53, "elapsed_time": "0:54:37", "remaining_time": "4:57:07"} +{"current_steps": 1348, "total_steps": 8674, "loss": 0.5767767429351807, "lr": 1.9400254166491935e-06, "epoch": 0.3108139266774268, "percentage": 15.54, "elapsed_time": "0:54:39", "remaining_time": "4:57:04"} +{"current_steps": 1349, "total_steps": 8674, "loss": 0.5208882689476013, "lr": 1.9398952984407967e-06, "epoch": 0.31104450080700946, "percentage": 15.55, "elapsed_time": "0:54:42", "remaining_time": "4:57:01"} +{"current_steps": 1350, "total_steps": 8674, "loss": 0.5152548551559448, "lr": 1.939765043609093e-06, "epoch": 0.3112750749365921, "percentage": 15.56, "elapsed_time": "0:54:44", "remaining_time": "4:56:58"} +{"current_steps": 1351, "total_steps": 8674, "loss": 0.42542198300361633, "lr": 1.939634652173016e-06, "epoch": 0.3115056490661748, "percentage": 15.58, "elapsed_time": "0:54:46", "remaining_time": "4:56:55"} +{"current_steps": 1352, "total_steps": 8674, "loss": 0.6471734046936035, "lr": 1.9395041241515197e-06, "epoch": 0.31173622319575744, "percentage": 15.59, "elapsed_time": "0:54:49", "remaining_time": "4:56:52"} +{"current_steps": 1353, "total_steps": 8674, "loss": 0.6257486343383789, "lr": 1.9393734595635767e-06, "epoch": 0.3119667973253401, "percentage": 15.6, "elapsed_time": "0:54:51", "remaining_time": "4:56:51"} +{"current_steps": 1354, "total_steps": 8674, "loss": 0.562118649482727, "lr": 1.9392426584281815e-06, "epoch": 0.31219737145492277, "percentage": 15.61, "elapsed_time": "0:54:54", "remaining_time": "4:56:48"} +{"current_steps": 1355, "total_steps": 8674, "loss": 0.5602811574935913, "lr": 1.939111720764347e-06, "epoch": 0.31242794558450543, "percentage": 15.62, "elapsed_time": "0:54:56", "remaining_time": "4:56:45"} +{"current_steps": 1356, "total_steps": 8674, "loss": 0.54469895362854, "lr": 1.9389806465911056e-06, "epoch": 0.3126585197140881, "percentage": 15.63, "elapsed_time": "0:54:58", "remaining_time": "4:56:42"} +{"current_steps": 1357, "total_steps": 8674, "loss": 0.5262914896011353, "lr": 1.9388494359275115e-06, "epoch": 0.31288909384367075, "percentage": 15.64, "elapsed_time": "0:55:01", "remaining_time": "4:56:40"} +{"current_steps": 1358, "total_steps": 8674, "loss": 0.6137207746505737, "lr": 1.938718088792637e-06, "epoch": 0.3131196679732534, "percentage": 15.66, "elapsed_time": "0:55:03", "remaining_time": "4:56:37"} +{"current_steps": 1359, "total_steps": 8674, "loss": 0.5792986750602722, "lr": 1.9385866052055744e-06, "epoch": 0.3133502421028361, "percentage": 15.67, "elapsed_time": "0:55:05", "remaining_time": "4:56:34"} +{"current_steps": 1360, "total_steps": 8674, "loss": 0.4953799843788147, "lr": 1.938454985185437e-06, "epoch": 0.31358081623241874, "percentage": 15.68, "elapsed_time": "0:55:08", "remaining_time": "4:56:31"} +{"current_steps": 1361, "total_steps": 8674, "loss": 0.5722379684448242, "lr": 1.938323228751356e-06, "epoch": 0.3138113903620014, "percentage": 15.69, "elapsed_time": "0:55:10", "remaining_time": "4:56:30"} +{"current_steps": 1362, "total_steps": 8674, "loss": 0.513651967048645, "lr": 1.938191335922484e-06, "epoch": 0.31404196449158406, "percentage": 15.7, "elapsed_time": "0:55:13", "remaining_time": "4:56:27"} +{"current_steps": 1363, "total_steps": 8674, "loss": 0.4911235272884369, "lr": 1.9380593067179935e-06, "epoch": 0.3142725386211667, "percentage": 15.71, "elapsed_time": "0:55:15", "remaining_time": "4:56:24"} +{"current_steps": 1364, "total_steps": 8674, "loss": 0.5478678941726685, "lr": 1.9379271411570753e-06, "epoch": 0.3145031127507494, "percentage": 15.73, "elapsed_time": "0:55:17", "remaining_time": "4:56:21"} +{"current_steps": 1365, "total_steps": 8674, "loss": 0.46698129177093506, "lr": 1.9377948392589417e-06, "epoch": 0.31473368688033204, "percentage": 15.74, "elapsed_time": "0:55:20", "remaining_time": "4:56:19"} +{"current_steps": 1366, "total_steps": 8674, "loss": 0.5081343650817871, "lr": 1.9376624010428243e-06, "epoch": 0.3149642610099147, "percentage": 15.75, "elapsed_time": "0:55:22", "remaining_time": "4:56:16"} +{"current_steps": 1367, "total_steps": 8674, "loss": 0.583903431892395, "lr": 1.9375298265279735e-06, "epoch": 0.31519483513949736, "percentage": 15.76, "elapsed_time": "0:55:25", "remaining_time": "4:56:13"} +{"current_steps": 1368, "total_steps": 8674, "loss": 0.5249435901641846, "lr": 1.937397115733661e-06, "epoch": 0.31542540926908, "percentage": 15.77, "elapsed_time": "0:55:27", "remaining_time": "4:56:10"} +{"current_steps": 1369, "total_steps": 8674, "loss": 0.5463817119598389, "lr": 1.9372642686791777e-06, "epoch": 0.3156559833986627, "percentage": 15.78, "elapsed_time": "0:55:29", "remaining_time": "4:56:07"} +{"current_steps": 1370, "total_steps": 8674, "loss": 0.4634520709514618, "lr": 1.9371312853838338e-06, "epoch": 0.31588655752824535, "percentage": 15.79, "elapsed_time": "0:55:32", "remaining_time": "4:56:06"} +{"current_steps": 1371, "total_steps": 8674, "loss": 0.6018840074539185, "lr": 1.93699816586696e-06, "epoch": 0.316117131657828, "percentage": 15.81, "elapsed_time": "0:55:34", "remaining_time": "4:56:03"} +{"current_steps": 1372, "total_steps": 8674, "loss": 0.5507885813713074, "lr": 1.9368649101479072e-06, "epoch": 0.31634770578741067, "percentage": 15.82, "elapsed_time": "0:55:37", "remaining_time": "4:56:00"} +{"current_steps": 1373, "total_steps": 8674, "loss": 0.5520491600036621, "lr": 1.9367315182460442e-06, "epoch": 0.31657827991699333, "percentage": 15.83, "elapsed_time": "0:55:39", "remaining_time": "4:55:57"} +{"current_steps": 1374, "total_steps": 8674, "loss": 0.5410347580909729, "lr": 1.936597990180762e-06, "epoch": 0.316808854046576, "percentage": 15.84, "elapsed_time": "0:55:41", "remaining_time": "4:55:54"} +{"current_steps": 1375, "total_steps": 8674, "loss": 0.5771749019622803, "lr": 1.9364643259714694e-06, "epoch": 0.31703942817615866, "percentage": 15.85, "elapsed_time": "0:55:44", "remaining_time": "4:55:52"} +{"current_steps": 1376, "total_steps": 8674, "loss": 0.5071828365325928, "lr": 1.9363305256375965e-06, "epoch": 0.3172700023057413, "percentage": 15.86, "elapsed_time": "0:55:46", "remaining_time": "4:55:49"} +{"current_steps": 1377, "total_steps": 8674, "loss": 0.558908224105835, "lr": 1.936196589198592e-06, "epoch": 0.317500576435324, "percentage": 15.88, "elapsed_time": "0:55:49", "remaining_time": "4:55:47"} +{"current_steps": 1378, "total_steps": 8674, "loss": 0.5509803295135498, "lr": 1.9360625166739256e-06, "epoch": 0.31773115056490664, "percentage": 15.89, "elapsed_time": "0:55:51", "remaining_time": "4:55:45"} +{"current_steps": 1379, "total_steps": 8674, "loss": 0.5333945155143738, "lr": 1.935928308083085e-06, "epoch": 0.3179617246944893, "percentage": 15.9, "elapsed_time": "0:55:53", "remaining_time": "4:55:42"} +{"current_steps": 1380, "total_steps": 8674, "loss": 0.5337819457054138, "lr": 1.93579396344558e-06, "epoch": 0.31819229882407196, "percentage": 15.91, "elapsed_time": "0:55:56", "remaining_time": "4:55:39"} +{"current_steps": 1381, "total_steps": 8674, "loss": 0.5286899209022522, "lr": 1.9356594827809387e-06, "epoch": 0.3184228729536546, "percentage": 15.92, "elapsed_time": "0:55:58", "remaining_time": "4:55:37"} +{"current_steps": 1382, "total_steps": 8674, "loss": 0.5915369987487793, "lr": 1.9355248661087083e-06, "epoch": 0.3186534470832373, "percentage": 15.93, "elapsed_time": "0:56:01", "remaining_time": "4:55:34"} +{"current_steps": 1383, "total_steps": 8674, "loss": 0.5843492746353149, "lr": 1.9353901134484575e-06, "epoch": 0.31888402121281995, "percentage": 15.94, "elapsed_time": "0:56:03", "remaining_time": "4:55:31"} +{"current_steps": 1384, "total_steps": 8674, "loss": 0.5015528202056885, "lr": 1.935255224819774e-06, "epoch": 0.3191145953424026, "percentage": 15.96, "elapsed_time": "0:56:05", "remaining_time": "4:55:28"} +{"current_steps": 1385, "total_steps": 8674, "loss": 0.5650957822799683, "lr": 1.935120200242265e-06, "epoch": 0.31934516947198527, "percentage": 15.97, "elapsed_time": "0:56:08", "remaining_time": "4:55:25"} +{"current_steps": 1386, "total_steps": 8674, "loss": 0.5452740788459778, "lr": 1.9349850397355576e-06, "epoch": 0.31957574360156793, "percentage": 15.98, "elapsed_time": "0:56:10", "remaining_time": "4:55:22"} +{"current_steps": 1387, "total_steps": 8674, "loss": 0.5069071054458618, "lr": 1.934849743319299e-06, "epoch": 0.3198063177311506, "percentage": 15.99, "elapsed_time": "0:56:13", "remaining_time": "4:55:21"} +{"current_steps": 1388, "total_steps": 8674, "loss": 0.5350260734558105, "lr": 1.934714311013156e-06, "epoch": 0.32003689186073325, "percentage": 16.0, "elapsed_time": "0:56:15", "remaining_time": "4:55:18"} +{"current_steps": 1389, "total_steps": 8674, "loss": 0.6002014875411987, "lr": 1.9345787428368146e-06, "epoch": 0.3202674659903159, "percentage": 16.01, "elapsed_time": "0:56:17", "remaining_time": "4:55:16"} +{"current_steps": 1390, "total_steps": 8674, "loss": 0.5111383199691772, "lr": 1.9344430388099813e-06, "epoch": 0.3204980401198986, "percentage": 16.02, "elapsed_time": "0:56:20", "remaining_time": "4:55:13"} +{"current_steps": 1391, "total_steps": 8674, "loss": 0.6029741168022156, "lr": 1.934307198952382e-06, "epoch": 0.3207286142494812, "percentage": 16.04, "elapsed_time": "0:56:22", "remaining_time": "4:55:10"} +{"current_steps": 1392, "total_steps": 8674, "loss": 0.48339328169822693, "lr": 1.9341712232837628e-06, "epoch": 0.32095918837906384, "percentage": 16.05, "elapsed_time": "0:56:24", "remaining_time": "4:55:07"} +{"current_steps": 1393, "total_steps": 8674, "loss": 0.6080894470214844, "lr": 1.9340351118238882e-06, "epoch": 0.3211897625086465, "percentage": 16.06, "elapsed_time": "0:56:27", "remaining_time": "4:55:04"} +{"current_steps": 1394, "total_steps": 8674, "loss": 0.46375036239624023, "lr": 1.9338988645925444e-06, "epoch": 0.32142033663822916, "percentage": 16.07, "elapsed_time": "0:56:29", "remaining_time": "4:55:02"} +{"current_steps": 1395, "total_steps": 8674, "loss": 0.5974088907241821, "lr": 1.9337624816095357e-06, "epoch": 0.3216509107678118, "percentage": 16.08, "elapsed_time": "0:56:32", "remaining_time": "4:55:00"} +{"current_steps": 1396, "total_steps": 8674, "loss": 0.5759298801422119, "lr": 1.9336259628946865e-06, "epoch": 0.3218814848973945, "percentage": 16.09, "elapsed_time": "0:56:34", "remaining_time": "4:54:57"} +{"current_steps": 1397, "total_steps": 8674, "loss": 0.6050859689712524, "lr": 1.9334893084678417e-06, "epoch": 0.32211205902697715, "percentage": 16.11, "elapsed_time": "0:56:36", "remaining_time": "4:54:54"} +{"current_steps": 1398, "total_steps": 8674, "loss": 0.5879993438720703, "lr": 1.9333525183488657e-06, "epoch": 0.3223426331565598, "percentage": 16.12, "elapsed_time": "0:56:39", "remaining_time": "4:54:51"} +{"current_steps": 1399, "total_steps": 8674, "loss": 0.5496323108673096, "lr": 1.933215592557642e-06, "epoch": 0.32257320728614247, "percentage": 16.13, "elapsed_time": "0:56:41", "remaining_time": "4:54:49"} +{"current_steps": 1400, "total_steps": 8674, "loss": 0.48447534441947937, "lr": 1.9330785311140732e-06, "epoch": 0.32280378141572513, "percentage": 16.14, "elapsed_time": "0:56:44", "remaining_time": "4:54:46"} +{"current_steps": 1401, "total_steps": 8674, "loss": 0.5687322020530701, "lr": 1.932941334038084e-06, "epoch": 0.3230343555453078, "percentage": 16.15, "elapsed_time": "0:56:47", "remaining_time": "4:54:51"} +{"current_steps": 1402, "total_steps": 8674, "loss": 0.4070928990840912, "lr": 1.9328040013496166e-06, "epoch": 0.32326492967489046, "percentage": 16.16, "elapsed_time": "0:56:50", "remaining_time": "4:54:48"} +{"current_steps": 1403, "total_steps": 8674, "loss": 0.5131539106369019, "lr": 1.9326665330686344e-06, "epoch": 0.3234955038044731, "percentage": 16.17, "elapsed_time": "0:56:52", "remaining_time": "4:54:46"} +{"current_steps": 1404, "total_steps": 8674, "loss": 0.47571802139282227, "lr": 1.932528929215119e-06, "epoch": 0.3237260779340558, "percentage": 16.19, "elapsed_time": "0:56:55", "remaining_time": "4:54:44"} +{"current_steps": 1405, "total_steps": 8674, "loss": 0.5676391124725342, "lr": 1.9323911898090728e-06, "epoch": 0.32395665206363844, "percentage": 16.2, "elapsed_time": "0:56:57", "remaining_time": "4:54:41"} +{"current_steps": 1406, "total_steps": 8674, "loss": 0.5464721322059631, "lr": 1.9322533148705177e-06, "epoch": 0.3241872261932211, "percentage": 16.21, "elapsed_time": "0:56:59", "remaining_time": "4:54:38"} +{"current_steps": 1407, "total_steps": 8674, "loss": 0.6130954027175903, "lr": 1.9321153044194953e-06, "epoch": 0.32441780032280376, "percentage": 16.22, "elapsed_time": "0:57:02", "remaining_time": "4:54:36"} +{"current_steps": 1408, "total_steps": 8674, "loss": 0.6058028936386108, "lr": 1.9319771584760666e-06, "epoch": 0.3246483744523864, "percentage": 16.23, "elapsed_time": "0:57:04", "remaining_time": "4:54:33"} +{"current_steps": 1409, "total_steps": 8674, "loss": 0.5326286554336548, "lr": 1.9318388770603123e-06, "epoch": 0.3248789485819691, "percentage": 16.24, "elapsed_time": "0:57:07", "remaining_time": "4:54:30"} +{"current_steps": 1410, "total_steps": 8674, "loss": 0.6046053767204285, "lr": 1.9317004601923337e-06, "epoch": 0.32510952271155175, "percentage": 16.26, "elapsed_time": "0:57:09", "remaining_time": "4:54:28"} +{"current_steps": 1411, "total_steps": 8674, "loss": 0.4597975015640259, "lr": 1.931561907892251e-06, "epoch": 0.3253400968411344, "percentage": 16.27, "elapsed_time": "0:57:11", "remaining_time": "4:54:25"} +{"current_steps": 1412, "total_steps": 8674, "loss": 0.6024897694587708, "lr": 1.9314232201802035e-06, "epoch": 0.32557067097071707, "percentage": 16.28, "elapsed_time": "0:57:14", "remaining_time": "4:54:24"} +{"current_steps": 1413, "total_steps": 8674, "loss": 0.45463523268699646, "lr": 1.9312843970763512e-06, "epoch": 0.32580124510029973, "percentage": 16.29, "elapsed_time": "0:57:16", "remaining_time": "4:54:21"} +{"current_steps": 1414, "total_steps": 8674, "loss": 0.512498140335083, "lr": 1.9311454386008736e-06, "epoch": 0.3260318192298824, "percentage": 16.3, "elapsed_time": "0:57:19", "remaining_time": "4:54:18"} +{"current_steps": 1415, "total_steps": 8674, "loss": 0.4851795434951782, "lr": 1.9310063447739695e-06, "epoch": 0.32626239335946505, "percentage": 16.31, "elapsed_time": "0:57:21", "remaining_time": "4:54:15"} +{"current_steps": 1416, "total_steps": 8674, "loss": 0.5464169979095459, "lr": 1.930867115615858e-06, "epoch": 0.3264929674890477, "percentage": 16.32, "elapsed_time": "0:57:23", "remaining_time": "4:54:12"} +{"current_steps": 1417, "total_steps": 8674, "loss": 0.5614463090896606, "lr": 1.930727751146777e-06, "epoch": 0.3267235416186304, "percentage": 16.34, "elapsed_time": "0:57:26", "remaining_time": "4:54:10"} +{"current_steps": 1418, "total_steps": 8674, "loss": 0.635399341583252, "lr": 1.930588251386985e-06, "epoch": 0.32695411574821304, "percentage": 16.35, "elapsed_time": "0:57:28", "remaining_time": "4:54:07"} +{"current_steps": 1419, "total_steps": 8674, "loss": 0.4862840175628662, "lr": 1.9304486163567588e-06, "epoch": 0.3271846898777957, "percentage": 16.36, "elapsed_time": "0:57:31", "remaining_time": "4:54:04"} +{"current_steps": 1420, "total_steps": 8674, "loss": 0.6548877954483032, "lr": 1.930308846076397e-06, "epoch": 0.32741526400737836, "percentage": 16.37, "elapsed_time": "0:57:33", "remaining_time": "4:54:02"} +{"current_steps": 1421, "total_steps": 8674, "loss": 0.5781031250953674, "lr": 1.9301689405662154e-06, "epoch": 0.327645838136961, "percentage": 16.38, "elapsed_time": "0:57:36", "remaining_time": "4:54:00"} +{"current_steps": 1422, "total_steps": 8674, "loss": 0.4945180118083954, "lr": 1.930028899846552e-06, "epoch": 0.3278764122665437, "percentage": 16.39, "elapsed_time": "0:57:38", "remaining_time": "4:53:57"} +{"current_steps": 1423, "total_steps": 8674, "loss": 0.548690915107727, "lr": 1.9298887239377623e-06, "epoch": 0.32810698639612634, "percentage": 16.41, "elapsed_time": "0:57:40", "remaining_time": "4:53:54"} +{"current_steps": 1424, "total_steps": 8674, "loss": 0.44515126943588257, "lr": 1.929748412860222e-06, "epoch": 0.328337560525709, "percentage": 16.42, "elapsed_time": "0:57:43", "remaining_time": "4:53:51"} +{"current_steps": 1425, "total_steps": 8674, "loss": 0.433849573135376, "lr": 1.9296079666343273e-06, "epoch": 0.32856813465529167, "percentage": 16.43, "elapsed_time": "0:57:45", "remaining_time": "4:53:48"} +{"current_steps": 1426, "total_steps": 8674, "loss": 0.5600666403770447, "lr": 1.9294673852804938e-06, "epoch": 0.3287987087848743, "percentage": 16.44, "elapsed_time": "0:57:47", "remaining_time": "4:53:46"} +{"current_steps": 1427, "total_steps": 8674, "loss": 0.5302737355232239, "lr": 1.9293266688191555e-06, "epoch": 0.329029282914457, "percentage": 16.45, "elapsed_time": "0:57:50", "remaining_time": "4:53:43"} +{"current_steps": 1428, "total_steps": 8674, "loss": 0.5590239763259888, "lr": 1.929185817270768e-06, "epoch": 0.32925985704403965, "percentage": 16.46, "elapsed_time": "0:57:52", "remaining_time": "4:53:42"} +{"current_steps": 1429, "total_steps": 8674, "loss": 0.43225252628326416, "lr": 1.929044830655804e-06, "epoch": 0.3294904311736223, "percentage": 16.47, "elapsed_time": "0:57:55", "remaining_time": "4:53:40"} +{"current_steps": 1430, "total_steps": 8674, "loss": 0.4932950735092163, "lr": 1.9289037089947595e-06, "epoch": 0.329721005303205, "percentage": 16.49, "elapsed_time": "0:57:57", "remaining_time": "4:53:37"} +{"current_steps": 1431, "total_steps": 8674, "loss": 0.48358941078186035, "lr": 1.9287624523081457e-06, "epoch": 0.32995157943278763, "percentage": 16.5, "elapsed_time": "0:58:00", "remaining_time": "4:53:34"} +{"current_steps": 1432, "total_steps": 8674, "loss": 0.48359012603759766, "lr": 1.928621060616497e-06, "epoch": 0.3301821535623703, "percentage": 16.51, "elapsed_time": "0:58:02", "remaining_time": "4:53:32"} +{"current_steps": 1433, "total_steps": 8674, "loss": 0.48462390899658203, "lr": 1.9284795339403663e-06, "epoch": 0.33041272769195296, "percentage": 16.52, "elapsed_time": "0:58:04", "remaining_time": "4:53:29"} +{"current_steps": 1434, "total_steps": 8674, "loss": 0.5167088508605957, "lr": 1.9283378723003253e-06, "epoch": 0.3306433018215356, "percentage": 16.53, "elapsed_time": "0:58:07", "remaining_time": "4:53:26"} +{"current_steps": 1435, "total_steps": 8674, "loss": 0.47352856397628784, "lr": 1.928196075716966e-06, "epoch": 0.3308738759511183, "percentage": 16.54, "elapsed_time": "0:58:09", "remaining_time": "4:53:23"} +{"current_steps": 1436, "total_steps": 8674, "loss": 0.5013144016265869, "lr": 1.9280541442109e-06, "epoch": 0.33110445008070094, "percentage": 16.56, "elapsed_time": "0:58:11", "remaining_time": "4:53:20"} +{"current_steps": 1437, "total_steps": 8674, "loss": 0.5061586499214172, "lr": 1.927912077802759e-06, "epoch": 0.3313350242102836, "percentage": 16.57, "elapsed_time": "0:58:14", "remaining_time": "4:53:19"} +{"current_steps": 1438, "total_steps": 8674, "loss": 0.5718814134597778, "lr": 1.9277698765131927e-06, "epoch": 0.33156559833986626, "percentage": 16.58, "elapsed_time": "0:58:17", "remaining_time": "4:53:16"} +{"current_steps": 1439, "total_steps": 8674, "loss": 0.47547006607055664, "lr": 1.9276275403628727e-06, "epoch": 0.3317961724694489, "percentage": 16.59, "elapsed_time": "0:58:19", "remaining_time": "4:53:14"} +{"current_steps": 1440, "total_steps": 8674, "loss": 0.5387942790985107, "lr": 1.9274850693724884e-06, "epoch": 0.3320267465990316, "percentage": 16.6, "elapsed_time": "0:58:21", "remaining_time": "4:53:11"} +{"current_steps": 1441, "total_steps": 8674, "loss": 0.524285078048706, "lr": 1.9273424635627494e-06, "epoch": 0.33225732072861425, "percentage": 16.61, "elapsed_time": "0:58:24", "remaining_time": "4:53:08"} +{"current_steps": 1442, "total_steps": 8674, "loss": 0.5073943138122559, "lr": 1.927199722954385e-06, "epoch": 0.3324878948581969, "percentage": 16.62, "elapsed_time": "0:58:26", "remaining_time": "4:53:06"} +{"current_steps": 1443, "total_steps": 8674, "loss": 0.4609600007534027, "lr": 1.927056847568144e-06, "epoch": 0.33271846898777957, "percentage": 16.64, "elapsed_time": "0:58:28", "remaining_time": "4:53:03"} +{"current_steps": 1444, "total_steps": 8674, "loss": 0.4861013889312744, "lr": 1.926913837424795e-06, "epoch": 0.33294904311736223, "percentage": 16.65, "elapsed_time": "0:58:31", "remaining_time": "4:53:00"} +{"current_steps": 1445, "total_steps": 8674, "loss": 0.5255436897277832, "lr": 1.9267706925451253e-06, "epoch": 0.3331796172469449, "percentage": 16.66, "elapsed_time": "0:58:33", "remaining_time": "4:52:58"} +{"current_steps": 1446, "total_steps": 8674, "loss": 0.6673840880393982, "lr": 1.9266274129499434e-06, "epoch": 0.33341019137652755, "percentage": 16.67, "elapsed_time": "0:58:36", "remaining_time": "4:52:55"} +{"current_steps": 1447, "total_steps": 8674, "loss": 0.38582634925842285, "lr": 1.9264839986600757e-06, "epoch": 0.3336407655061102, "percentage": 16.68, "elapsed_time": "0:58:38", "remaining_time": "4:52:52"} +{"current_steps": 1448, "total_steps": 8674, "loss": 0.4597562253475189, "lr": 1.926340449696369e-06, "epoch": 0.3338713396356929, "percentage": 16.69, "elapsed_time": "0:58:40", "remaining_time": "4:52:50"} +{"current_steps": 1449, "total_steps": 8674, "loss": 0.5901148319244385, "lr": 1.92619676607969e-06, "epoch": 0.33410191376527554, "percentage": 16.71, "elapsed_time": "0:58:43", "remaining_time": "4:52:47"} +{"current_steps": 1450, "total_steps": 8674, "loss": 0.49872028827667236, "lr": 1.9260529478309242e-06, "epoch": 0.3343324878948582, "percentage": 16.72, "elapsed_time": "0:58:45", "remaining_time": "4:52:44"} +{"current_steps": 1451, "total_steps": 8674, "loss": 0.4611232578754425, "lr": 1.925908994970977e-06, "epoch": 0.33456306202444086, "percentage": 16.73, "elapsed_time": "0:58:48", "remaining_time": "4:52:42"} +{"current_steps": 1452, "total_steps": 8674, "loss": 0.5671408176422119, "lr": 1.9257649075207738e-06, "epoch": 0.3347936361540235, "percentage": 16.74, "elapsed_time": "0:58:50", "remaining_time": "4:52:39"} +{"current_steps": 1453, "total_steps": 8674, "loss": 0.4892054498195648, "lr": 1.925620685501259e-06, "epoch": 0.3350242102836062, "percentage": 16.75, "elapsed_time": "0:58:52", "remaining_time": "4:52:36"} +{"current_steps": 1454, "total_steps": 8674, "loss": 0.5506503582000732, "lr": 1.9254763289333966e-06, "epoch": 0.33525478441318884, "percentage": 16.76, "elapsed_time": "0:58:55", "remaining_time": "4:52:35"} +{"current_steps": 1455, "total_steps": 8674, "loss": 0.6233078241348267, "lr": 1.9253318378381702e-06, "epoch": 0.3354853585427715, "percentage": 16.77, "elapsed_time": "0:58:57", "remaining_time": "4:52:32"} +{"current_steps": 1456, "total_steps": 8674, "loss": 0.5551373958587646, "lr": 1.9251872122365835e-06, "epoch": 0.33571593267235417, "percentage": 16.79, "elapsed_time": "0:59:00", "remaining_time": "4:52:29"} +{"current_steps": 1457, "total_steps": 8674, "loss": 0.5561612844467163, "lr": 1.925042452149659e-06, "epoch": 0.33594650680193683, "percentage": 16.8, "elapsed_time": "0:59:02", "remaining_time": "4:52:26"} +{"current_steps": 1458, "total_steps": 8674, "loss": 0.613766074180603, "lr": 1.924897557598439e-06, "epoch": 0.3361770809315195, "percentage": 16.81, "elapsed_time": "0:59:05", "remaining_time": "4:52:25"} +{"current_steps": 1459, "total_steps": 8674, "loss": 0.5767652988433838, "lr": 1.9247525286039852e-06, "epoch": 0.33640765506110215, "percentage": 16.82, "elapsed_time": "0:59:07", "remaining_time": "4:52:23"} +{"current_steps": 1460, "total_steps": 8674, "loss": 0.49292564392089844, "lr": 1.9246073651873795e-06, "epoch": 0.3366382291906848, "percentage": 16.83, "elapsed_time": "0:59:09", "remaining_time": "4:52:20"} +{"current_steps": 1461, "total_steps": 8674, "loss": 0.5901867151260376, "lr": 1.9244620673697224e-06, "epoch": 0.3368688033202675, "percentage": 16.84, "elapsed_time": "0:59:12", "remaining_time": "4:52:18"} +{"current_steps": 1462, "total_steps": 8674, "loss": 0.5543808937072754, "lr": 1.924316635172135e-06, "epoch": 0.33709937744985013, "percentage": 16.85, "elapsed_time": "0:59:14", "remaining_time": "4:52:16"} +{"current_steps": 1463, "total_steps": 8674, "loss": 0.528805136680603, "lr": 1.9241710686157568e-06, "epoch": 0.3373299515794328, "percentage": 16.87, "elapsed_time": "0:59:17", "remaining_time": "4:52:13"} +{"current_steps": 1464, "total_steps": 8674, "loss": 0.6396733522415161, "lr": 1.924025367721748e-06, "epoch": 0.33756052570901546, "percentage": 16.88, "elapsed_time": "0:59:19", "remaining_time": "4:52:11"} +{"current_steps": 1465, "total_steps": 8674, "loss": 0.5558862686157227, "lr": 1.9238795325112867e-06, "epoch": 0.3377910998385981, "percentage": 16.89, "elapsed_time": "0:59:22", "remaining_time": "4:52:08"} +{"current_steps": 1466, "total_steps": 8674, "loss": 0.5863986015319824, "lr": 1.9237335630055724e-06, "epoch": 0.3380216739681808, "percentage": 16.9, "elapsed_time": "0:59:24", "remaining_time": "4:52:05"} +{"current_steps": 1467, "total_steps": 8674, "loss": 0.5636321306228638, "lr": 1.923587459225823e-06, "epoch": 0.33825224809776344, "percentage": 16.91, "elapsed_time": "0:59:26", "remaining_time": "4:52:02"} +{"current_steps": 1468, "total_steps": 8674, "loss": 0.6065811514854431, "lr": 1.923441221193276e-06, "epoch": 0.3384828222273461, "percentage": 16.92, "elapsed_time": "0:59:29", "remaining_time": "4:52:00"} +{"current_steps": 1469, "total_steps": 8674, "loss": 0.580939769744873, "lr": 1.9232948489291886e-06, "epoch": 0.33871339635692876, "percentage": 16.94, "elapsed_time": "0:59:31", "remaining_time": "4:51:58"} +{"current_steps": 1470, "total_steps": 8674, "loss": 0.5429994463920593, "lr": 1.9231483424548377e-06, "epoch": 0.3389439704865114, "percentage": 16.95, "elapsed_time": "0:59:34", "remaining_time": "4:51:56"} +{"current_steps": 1471, "total_steps": 8674, "loss": 0.5090892910957336, "lr": 1.92300170179152e-06, "epoch": 0.3391745446160941, "percentage": 16.96, "elapsed_time": "0:59:36", "remaining_time": "4:51:53"} +{"current_steps": 1472, "total_steps": 8674, "loss": 0.5280312299728394, "lr": 1.9228549269605498e-06, "epoch": 0.33940511874567675, "percentage": 16.97, "elapsed_time": "0:59:39", "remaining_time": "4:51:50"} +{"current_steps": 1473, "total_steps": 8674, "loss": 0.5098810195922852, "lr": 1.9227080179832634e-06, "epoch": 0.3396356928752594, "percentage": 16.98, "elapsed_time": "0:59:41", "remaining_time": "4:51:48"} +{"current_steps": 1474, "total_steps": 8674, "loss": 0.4554474353790283, "lr": 1.922560974881015e-06, "epoch": 0.33986626700484207, "percentage": 16.99, "elapsed_time": "0:59:43", "remaining_time": "4:51:45"} +{"current_steps": 1475, "total_steps": 8674, "loss": 0.4492517113685608, "lr": 1.9224137976751793e-06, "epoch": 0.34009684113442473, "percentage": 17.0, "elapsed_time": "0:59:46", "remaining_time": "4:51:42"} +{"current_steps": 1476, "total_steps": 8674, "loss": 0.47606343030929565, "lr": 1.9222664863871495e-06, "epoch": 0.3403274152640074, "percentage": 17.02, "elapsed_time": "0:59:48", "remaining_time": "4:51:39"} +{"current_steps": 1477, "total_steps": 8674, "loss": 0.5939435362815857, "lr": 1.9221190410383394e-06, "epoch": 0.34055798939359005, "percentage": 17.03, "elapsed_time": "0:59:50", "remaining_time": "4:51:36"} +{"current_steps": 1478, "total_steps": 8674, "loss": 0.5418350696563721, "lr": 1.921971461650181e-06, "epoch": 0.3407885635231727, "percentage": 17.04, "elapsed_time": "0:59:53", "remaining_time": "4:51:34"} +{"current_steps": 1479, "total_steps": 8674, "loss": 0.5307733416557312, "lr": 1.9218237482441265e-06, "epoch": 0.3410191376527554, "percentage": 17.05, "elapsed_time": "0:59:55", "remaining_time": "4:51:32"} +{"current_steps": 1480, "total_steps": 8674, "loss": 0.5102016925811768, "lr": 1.9216759008416483e-06, "epoch": 0.34124971178233804, "percentage": 17.06, "elapsed_time": "0:59:58", "remaining_time": "4:51:29"} +{"current_steps": 1481, "total_steps": 8674, "loss": 0.5043876767158508, "lr": 1.9215279194642366e-06, "epoch": 0.3414802859119207, "percentage": 17.07, "elapsed_time": "1:00:00", "remaining_time": "4:51:26"} +{"current_steps": 1482, "total_steps": 8674, "loss": 0.5365253686904907, "lr": 1.9213798041334025e-06, "epoch": 0.34171086004150336, "percentage": 17.09, "elapsed_time": "1:00:02", "remaining_time": "4:51:23"} +{"current_steps": 1483, "total_steps": 8674, "loss": 0.4938368797302246, "lr": 1.921231554870676e-06, "epoch": 0.341941434171086, "percentage": 17.1, "elapsed_time": "1:00:05", "remaining_time": "4:51:20"} +{"current_steps": 1484, "total_steps": 8674, "loss": 0.5274159908294678, "lr": 1.921083171697607e-06, "epoch": 0.3421720083006687, "percentage": 17.11, "elapsed_time": "1:00:07", "remaining_time": "4:51:17"} +{"current_steps": 1485, "total_steps": 8674, "loss": 0.4720276892185211, "lr": 1.9209346546357637e-06, "epoch": 0.34240258243025135, "percentage": 17.12, "elapsed_time": "1:00:09", "remaining_time": "4:51:15"} +{"current_steps": 1486, "total_steps": 8674, "loss": 0.42276352643966675, "lr": 1.920786003706735e-06, "epoch": 0.342633156559834, "percentage": 17.13, "elapsed_time": "1:00:12", "remaining_time": "4:51:12"} +{"current_steps": 1487, "total_steps": 8674, "loss": 0.5319294333457947, "lr": 1.920637218932129e-06, "epoch": 0.34286373068941667, "percentage": 17.14, "elapsed_time": "1:00:14", "remaining_time": "4:51:10"} +{"current_steps": 1488, "total_steps": 8674, "loss": 0.5197560787200928, "lr": 1.920488300333572e-06, "epoch": 0.34309430481899933, "percentage": 17.15, "elapsed_time": "1:00:17", "remaining_time": "4:51:08"} +{"current_steps": 1489, "total_steps": 8674, "loss": 0.550025463104248, "lr": 1.9203392479327127e-06, "epoch": 0.343324878948582, "percentage": 17.17, "elapsed_time": "1:00:19", "remaining_time": "4:51:05"} +{"current_steps": 1490, "total_steps": 8674, "loss": 0.50255286693573, "lr": 1.920190061751216e-06, "epoch": 0.34355545307816465, "percentage": 17.18, "elapsed_time": "1:00:21", "remaining_time": "4:51:02"} +{"current_steps": 1491, "total_steps": 8674, "loss": 0.5952906608581543, "lr": 1.9200407418107678e-06, "epoch": 0.3437860272077473, "percentage": 17.19, "elapsed_time": "1:00:24", "remaining_time": "4:51:00"} +{"current_steps": 1492, "total_steps": 8674, "loss": 0.48161056637763977, "lr": 1.9198912881330737e-06, "epoch": 0.34401660133733, "percentage": 17.2, "elapsed_time": "1:00:26", "remaining_time": "4:50:57"} +{"current_steps": 1493, "total_steps": 8674, "loss": 0.5490972995758057, "lr": 1.919741700739858e-06, "epoch": 0.34424717546691264, "percentage": 17.21, "elapsed_time": "1:00:29", "remaining_time": "4:50:55"} +{"current_steps": 1494, "total_steps": 8674, "loss": 0.45651519298553467, "lr": 1.9195919796528647e-06, "epoch": 0.3444777495964953, "percentage": 17.22, "elapsed_time": "1:00:31", "remaining_time": "4:50:52"} +{"current_steps": 1495, "total_steps": 8674, "loss": 0.5318460464477539, "lr": 1.919442124893857e-06, "epoch": 0.34470832372607796, "percentage": 17.24, "elapsed_time": "1:00:33", "remaining_time": "4:50:49"} +{"current_steps": 1496, "total_steps": 8674, "loss": 0.5052516460418701, "lr": 1.9192921364846187e-06, "epoch": 0.3449388978556606, "percentage": 17.25, "elapsed_time": "1:00:36", "remaining_time": "4:50:48"} +{"current_steps": 1497, "total_steps": 8674, "loss": 0.6653434038162231, "lr": 1.9191420144469515e-06, "epoch": 0.3451694719852433, "percentage": 17.26, "elapsed_time": "1:00:38", "remaining_time": "4:50:45"} +{"current_steps": 1498, "total_steps": 8674, "loss": 0.47182875871658325, "lr": 1.9189917588026774e-06, "epoch": 0.34540004611482594, "percentage": 17.27, "elapsed_time": "1:00:41", "remaining_time": "4:50:42"} +{"current_steps": 1499, "total_steps": 8674, "loss": 0.5257801413536072, "lr": 1.9188413695736376e-06, "epoch": 0.3456306202444086, "percentage": 17.28, "elapsed_time": "1:00:43", "remaining_time": "4:50:39"} +{"current_steps": 1500, "total_steps": 8674, "loss": 0.565075695514679, "lr": 1.918690846781692e-06, "epoch": 0.34586119437399127, "percentage": 17.29, "elapsed_time": "1:00:45", "remaining_time": "4:50:37"} +{"current_steps": 1501, "total_steps": 8674, "loss": 0.49737876653671265, "lr": 1.9185401904487214e-06, "epoch": 0.3460917685035739, "percentage": 17.3, "elapsed_time": "1:00:49", "remaining_time": "4:50:42"} +{"current_steps": 1502, "total_steps": 8674, "loss": 0.5136237144470215, "lr": 1.918389400596625e-06, "epoch": 0.34632234263315653, "percentage": 17.32, "elapsed_time": "1:00:52", "remaining_time": "4:50:39"} +{"current_steps": 1503, "total_steps": 8674, "loss": 0.5122819542884827, "lr": 1.9182384772473216e-06, "epoch": 0.3465529167627392, "percentage": 17.33, "elapsed_time": "1:00:54", "remaining_time": "4:50:36"} +{"current_steps": 1504, "total_steps": 8674, "loss": 0.4586041271686554, "lr": 1.91808742042275e-06, "epoch": 0.34678349089232186, "percentage": 17.34, "elapsed_time": "1:00:57", "remaining_time": "4:50:35"} +{"current_steps": 1505, "total_steps": 8674, "loss": 0.49752146005630493, "lr": 1.9179362301448666e-06, "epoch": 0.3470140650219045, "percentage": 17.35, "elapsed_time": "1:00:59", "remaining_time": "4:50:32"} +{"current_steps": 1506, "total_steps": 8674, "loss": 0.4423530101776123, "lr": 1.917784906435649e-06, "epoch": 0.3472446391514872, "percentage": 17.36, "elapsed_time": "1:01:01", "remaining_time": "4:50:29"} +{"current_steps": 1507, "total_steps": 8674, "loss": 0.4979468882083893, "lr": 1.9176334493170946e-06, "epoch": 0.34747521328106984, "percentage": 17.37, "elapsed_time": "1:01:04", "remaining_time": "4:50:26"} +{"current_steps": 1508, "total_steps": 8674, "loss": 0.5229524374008179, "lr": 1.9174818588112178e-06, "epoch": 0.3477057874106525, "percentage": 17.39, "elapsed_time": "1:01:06", "remaining_time": "4:50:24"} +{"current_steps": 1509, "total_steps": 8674, "loss": 0.47884654998779297, "lr": 1.9173301349400546e-06, "epoch": 0.34793636154023516, "percentage": 17.4, "elapsed_time": "1:01:09", "remaining_time": "4:50:21"} +{"current_steps": 1510, "total_steps": 8674, "loss": 0.5204922556877136, "lr": 1.9171782777256594e-06, "epoch": 0.3481669356698178, "percentage": 17.41, "elapsed_time": "1:01:11", "remaining_time": "4:50:18"} +{"current_steps": 1511, "total_steps": 8674, "loss": 0.5077674984931946, "lr": 1.917026287190106e-06, "epoch": 0.3483975097994005, "percentage": 17.42, "elapsed_time": "1:01:13", "remaining_time": "4:50:15"} +{"current_steps": 1512, "total_steps": 8674, "loss": 0.4171299934387207, "lr": 1.9168741633554885e-06, "epoch": 0.34862808392898315, "percentage": 17.43, "elapsed_time": "1:01:16", "remaining_time": "4:50:14"} +{"current_steps": 1513, "total_steps": 8674, "loss": 0.5228694081306458, "lr": 1.9167219062439187e-06, "epoch": 0.3488586580585658, "percentage": 17.44, "elapsed_time": "1:01:18", "remaining_time": "4:50:11"} +{"current_steps": 1514, "total_steps": 8674, "loss": 0.5496635437011719, "lr": 1.916569515877529e-06, "epoch": 0.34908923218814847, "percentage": 17.45, "elapsed_time": "1:01:21", "remaining_time": "4:50:09"} +{"current_steps": 1515, "total_steps": 8674, "loss": 0.5197573900222778, "lr": 1.9164169922784716e-06, "epoch": 0.34931980631773113, "percentage": 17.47, "elapsed_time": "1:01:23", "remaining_time": "4:50:06"} +{"current_steps": 1516, "total_steps": 8674, "loss": 0.5726813077926636, "lr": 1.9162643354689163e-06, "epoch": 0.3495503804473138, "percentage": 17.48, "elapsed_time": "1:01:25", "remaining_time": "4:50:03"} +{"current_steps": 1517, "total_steps": 8674, "loss": 0.53382408618927, "lr": 1.916111545471054e-06, "epoch": 0.34978095457689645, "percentage": 17.49, "elapsed_time": "1:01:28", "remaining_time": "4:50:01"} +{"current_steps": 1518, "total_steps": 8674, "loss": 0.5535515546798706, "lr": 1.915958622307094e-06, "epoch": 0.3500115287064791, "percentage": 17.5, "elapsed_time": "1:01:30", "remaining_time": "4:49:58"} +{"current_steps": 1519, "total_steps": 8674, "loss": 0.5295307040214539, "lr": 1.9158055659992648e-06, "epoch": 0.3502421028360618, "percentage": 17.51, "elapsed_time": "1:01:33", "remaining_time": "4:49:55"} +{"current_steps": 1520, "total_steps": 8674, "loss": 0.5397933125495911, "lr": 1.9156523765698158e-06, "epoch": 0.35047267696564444, "percentage": 17.52, "elapsed_time": "1:01:35", "remaining_time": "4:49:52"} +{"current_steps": 1521, "total_steps": 8674, "loss": 0.5614666938781738, "lr": 1.915499054041014e-06, "epoch": 0.3507032510952271, "percentage": 17.54, "elapsed_time": "1:01:37", "remaining_time": "4:49:50"} +{"current_steps": 1522, "total_steps": 8674, "loss": 0.5321720838546753, "lr": 1.915345598435146e-06, "epoch": 0.35093382522480976, "percentage": 17.55, "elapsed_time": "1:01:40", "remaining_time": "4:49:48"} +{"current_steps": 1523, "total_steps": 8674, "loss": 0.51869797706604, "lr": 1.9151920097745185e-06, "epoch": 0.3511643993543924, "percentage": 17.56, "elapsed_time": "1:01:42", "remaining_time": "4:49:45"} +{"current_steps": 1524, "total_steps": 8674, "loss": 0.58238685131073, "lr": 1.9150382880814577e-06, "epoch": 0.3513949734839751, "percentage": 17.57, "elapsed_time": "1:01:45", "remaining_time": "4:49:42"} +{"current_steps": 1525, "total_steps": 8674, "loss": 0.5617767572402954, "lr": 1.914884433378308e-06, "epoch": 0.35162554761355774, "percentage": 17.58, "elapsed_time": "1:01:47", "remaining_time": "4:49:40"} +{"current_steps": 1526, "total_steps": 8674, "loss": 0.5207428932189941, "lr": 1.9147304456874336e-06, "epoch": 0.3518561217431404, "percentage": 17.59, "elapsed_time": "1:01:49", "remaining_time": "4:49:38"} +{"current_steps": 1527, "total_steps": 8674, "loss": 0.5929840207099915, "lr": 1.914576325031218e-06, "epoch": 0.35208669587272307, "percentage": 17.6, "elapsed_time": "1:01:52", "remaining_time": "4:49:35"} +{"current_steps": 1528, "total_steps": 8674, "loss": 0.510567307472229, "lr": 1.914422071432065e-06, "epoch": 0.3523172700023057, "percentage": 17.62, "elapsed_time": "1:01:54", "remaining_time": "4:49:32"} +{"current_steps": 1529, "total_steps": 8674, "loss": 0.5524177551269531, "lr": 1.914267684912397e-06, "epoch": 0.3525478441318884, "percentage": 17.63, "elapsed_time": "1:01:57", "remaining_time": "4:49:30"} +{"current_steps": 1530, "total_steps": 8674, "loss": 0.5622289180755615, "lr": 1.9141131654946548e-06, "epoch": 0.35277841826147105, "percentage": 17.64, "elapsed_time": "1:01:59", "remaining_time": "4:49:28"} +{"current_steps": 1531, "total_steps": 8674, "loss": 0.5085979700088501, "lr": 1.9139585132012995e-06, "epoch": 0.3530089923910537, "percentage": 17.65, "elapsed_time": "1:02:02", "remaining_time": "4:49:25"} +{"current_steps": 1532, "total_steps": 8674, "loss": 0.47232770919799805, "lr": 1.9138037280548117e-06, "epoch": 0.35323956652063637, "percentage": 17.66, "elapsed_time": "1:02:04", "remaining_time": "4:49:22"} +{"current_steps": 1533, "total_steps": 8674, "loss": 0.535300612449646, "lr": 1.913648810077691e-06, "epoch": 0.35347014065021903, "percentage": 17.67, "elapsed_time": "1:02:06", "remaining_time": "4:49:20"} +{"current_steps": 1534, "total_steps": 8674, "loss": 0.4351940155029297, "lr": 1.9134937592924562e-06, "epoch": 0.3537007147798017, "percentage": 17.69, "elapsed_time": "1:02:09", "remaining_time": "4:49:17"} +{"current_steps": 1535, "total_steps": 8674, "loss": 0.4691917896270752, "lr": 1.9133385757216456e-06, "epoch": 0.35393128890938436, "percentage": 17.7, "elapsed_time": "1:02:11", "remaining_time": "4:49:14"} +{"current_steps": 1536, "total_steps": 8674, "loss": 0.4911034107208252, "lr": 1.9131832593878167e-06, "epoch": 0.354161863038967, "percentage": 17.71, "elapsed_time": "1:02:13", "remaining_time": "4:49:12"} +{"current_steps": 1537, "total_steps": 8674, "loss": 0.3954068422317505, "lr": 1.9130278103135458e-06, "epoch": 0.3543924371685497, "percentage": 17.72, "elapsed_time": "1:02:16", "remaining_time": "4:49:09"} +{"current_steps": 1538, "total_steps": 8674, "loss": 0.5541605949401855, "lr": 1.9128722285214297e-06, "epoch": 0.35462301129813234, "percentage": 17.73, "elapsed_time": "1:02:18", "remaining_time": "4:49:07"} +{"current_steps": 1539, "total_steps": 8674, "loss": 0.5719314217567444, "lr": 1.9127165140340832e-06, "epoch": 0.354853585427715, "percentage": 17.74, "elapsed_time": "1:02:21", "remaining_time": "4:49:04"} +{"current_steps": 1540, "total_steps": 8674, "loss": 0.60889732837677, "lr": 1.9125606668741418e-06, "epoch": 0.35508415955729766, "percentage": 17.75, "elapsed_time": "1:02:23", "remaining_time": "4:49:02"} +{"current_steps": 1541, "total_steps": 8674, "loss": 0.5247465968132019, "lr": 1.9124046870642587e-06, "epoch": 0.3553147336868803, "percentage": 17.77, "elapsed_time": "1:02:25", "remaining_time": "4:48:59"} +{"current_steps": 1542, "total_steps": 8674, "loss": 0.5681591033935547, "lr": 1.912248574627107e-06, "epoch": 0.355545307816463, "percentage": 17.78, "elapsed_time": "1:02:28", "remaining_time": "4:48:56"} +{"current_steps": 1543, "total_steps": 8674, "loss": 0.5995845794677734, "lr": 1.91209232958538e-06, "epoch": 0.35577588194604565, "percentage": 17.79, "elapsed_time": "1:02:30", "remaining_time": "4:48:54"} +{"current_steps": 1544, "total_steps": 8674, "loss": 0.514456033706665, "lr": 1.9119359519617893e-06, "epoch": 0.3560064560756283, "percentage": 17.8, "elapsed_time": "1:02:33", "remaining_time": "4:48:51"} +{"current_steps": 1545, "total_steps": 8674, "loss": 0.45192602276802063, "lr": 1.9117794417790657e-06, "epoch": 0.35623703020521097, "percentage": 17.81, "elapsed_time": "1:02:35", "remaining_time": "4:48:49"} +{"current_steps": 1546, "total_steps": 8674, "loss": 0.5529573559761047, "lr": 1.911622799059959e-06, "epoch": 0.35646760433479363, "percentage": 17.82, "elapsed_time": "1:02:38", "remaining_time": "4:48:49"} +{"current_steps": 1547, "total_steps": 8674, "loss": 0.4544152021408081, "lr": 1.9114660238272403e-06, "epoch": 0.3566981784643763, "percentage": 17.83, "elapsed_time": "1:02:40", "remaining_time": "4:48:46"} +{"current_steps": 1548, "total_steps": 8674, "loss": 0.5676225423812866, "lr": 1.9113091161036974e-06, "epoch": 0.35692875259395895, "percentage": 17.85, "elapsed_time": "1:02:43", "remaining_time": "4:48:43"} +{"current_steps": 1549, "total_steps": 8674, "loss": 0.5571830868721008, "lr": 1.9111520759121384e-06, "epoch": 0.3571593267235416, "percentage": 17.86, "elapsed_time": "1:02:45", "remaining_time": "4:48:40"} +{"current_steps": 1550, "total_steps": 8674, "loss": 0.5091487765312195, "lr": 1.910994903275391e-06, "epoch": 0.3573899008531243, "percentage": 17.87, "elapsed_time": "1:02:48", "remaining_time": "4:48:38"} +{"current_steps": 1551, "total_steps": 8674, "loss": 0.5484684705734253, "lr": 1.9108375982163015e-06, "epoch": 0.35762047498270694, "percentage": 17.88, "elapsed_time": "1:02:50", "remaining_time": "4:48:35"} +{"current_steps": 1552, "total_steps": 8674, "loss": 0.49742424488067627, "lr": 1.9106801607577364e-06, "epoch": 0.3578510491122896, "percentage": 17.89, "elapsed_time": "1:02:52", "remaining_time": "4:48:32"} +{"current_steps": 1553, "total_steps": 8674, "loss": 0.5871520638465881, "lr": 1.9105225909225804e-06, "epoch": 0.35808162324187226, "percentage": 17.9, "elapsed_time": "1:02:55", "remaining_time": "4:48:30"} +{"current_steps": 1554, "total_steps": 8674, "loss": 0.5096076726913452, "lr": 1.910364888733738e-06, "epoch": 0.3583121973714549, "percentage": 17.92, "elapsed_time": "1:02:57", "remaining_time": "4:48:28"} +{"current_steps": 1555, "total_steps": 8674, "loss": 0.7168693542480469, "lr": 1.910207054214133e-06, "epoch": 0.3585427715010376, "percentage": 17.93, "elapsed_time": "1:03:00", "remaining_time": "4:48:25"} +{"current_steps": 1556, "total_steps": 8674, "loss": 0.5603561997413635, "lr": 1.910049087386707e-06, "epoch": 0.35877334563062024, "percentage": 17.94, "elapsed_time": "1:03:02", "remaining_time": "4:48:22"} +{"current_steps": 1557, "total_steps": 8674, "loss": 0.5857734680175781, "lr": 1.909890988274424e-06, "epoch": 0.3590039197602029, "percentage": 17.95, "elapsed_time": "1:03:04", "remaining_time": "4:48:19"} +{"current_steps": 1558, "total_steps": 8674, "loss": 0.5612708926200867, "lr": 1.9097327569002642e-06, "epoch": 0.35923449388978557, "percentage": 17.96, "elapsed_time": "1:03:07", "remaining_time": "4:48:17"} +{"current_steps": 1559, "total_steps": 8674, "loss": 0.5264564752578735, "lr": 1.909574393287228e-06, "epoch": 0.35946506801936823, "percentage": 17.97, "elapsed_time": "1:03:09", "remaining_time": "4:48:14"} +{"current_steps": 1560, "total_steps": 8674, "loss": 0.4163395166397095, "lr": 1.9094158974583357e-06, "epoch": 0.3596956421489509, "percentage": 17.98, "elapsed_time": "1:03:11", "remaining_time": "4:48:11"} +{"current_steps": 1561, "total_steps": 8674, "loss": 0.483236163854599, "lr": 1.909257269436626e-06, "epoch": 0.35992621627853355, "percentage": 18.0, "elapsed_time": "1:03:14", "remaining_time": "4:48:08"} +{"current_steps": 1562, "total_steps": 8674, "loss": 0.48892003297805786, "lr": 1.9090985092451572e-06, "epoch": 0.3601567904081162, "percentage": 18.01, "elapsed_time": "1:03:16", "remaining_time": "4:48:05"} +{"current_steps": 1563, "total_steps": 8674, "loss": 0.45310860872268677, "lr": 1.908939616907007e-06, "epoch": 0.3603873645376989, "percentage": 18.02, "elapsed_time": "1:03:19", "remaining_time": "4:48:04"} +{"current_steps": 1564, "total_steps": 8674, "loss": 0.5242425799369812, "lr": 1.908780592445271e-06, "epoch": 0.36061793866728153, "percentage": 18.03, "elapsed_time": "1:03:21", "remaining_time": "4:48:01"} +{"current_steps": 1565, "total_steps": 8674, "loss": 0.47026845812797546, "lr": 1.9086214358830663e-06, "epoch": 0.3608485127968642, "percentage": 18.04, "elapsed_time": "1:03:23", "remaining_time": "4:47:58"} +{"current_steps": 1566, "total_steps": 8674, "loss": 0.5783924460411072, "lr": 1.9084621472435267e-06, "epoch": 0.36107908692644686, "percentage": 18.05, "elapsed_time": "1:03:26", "remaining_time": "4:47:55"} +{"current_steps": 1567, "total_steps": 8674, "loss": 0.5534437894821167, "lr": 1.9083027265498073e-06, "epoch": 0.3613096610560295, "percentage": 18.07, "elapsed_time": "1:03:28", "remaining_time": "4:47:53"} +{"current_steps": 1568, "total_steps": 8674, "loss": 0.49131953716278076, "lr": 1.9081431738250815e-06, "epoch": 0.3615402351856122, "percentage": 18.08, "elapsed_time": "1:03:30", "remaining_time": "4:47:50"} +{"current_steps": 1569, "total_steps": 8674, "loss": 0.4798020124435425, "lr": 1.9079834890925412e-06, "epoch": 0.36177080931519484, "percentage": 18.09, "elapsed_time": "1:03:33", "remaining_time": "4:47:47"} +{"current_steps": 1570, "total_steps": 8674, "loss": 0.4928893446922302, "lr": 1.9078236723753987e-06, "epoch": 0.3620013834447775, "percentage": 18.1, "elapsed_time": "1:03:35", "remaining_time": "4:47:44"} +{"current_steps": 1571, "total_steps": 8674, "loss": 0.4483630657196045, "lr": 1.9076637236968847e-06, "epoch": 0.36223195757436016, "percentage": 18.11, "elapsed_time": "1:03:38", "remaining_time": "4:47:42"} +{"current_steps": 1572, "total_steps": 8674, "loss": 0.593490481376648, "lr": 1.90750364308025e-06, "epoch": 0.3624625317039428, "percentage": 18.12, "elapsed_time": "1:03:40", "remaining_time": "4:47:40"} +{"current_steps": 1573, "total_steps": 8674, "loss": 0.5944634675979614, "lr": 1.9073434305487631e-06, "epoch": 0.3626931058335255, "percentage": 18.13, "elapsed_time": "1:03:42", "remaining_time": "4:47:37"} +{"current_steps": 1574, "total_steps": 8674, "loss": 0.5010452270507812, "lr": 1.9071830861257134e-06, "epoch": 0.36292367996310815, "percentage": 18.15, "elapsed_time": "1:03:45", "remaining_time": "4:47:35"} +{"current_steps": 1575, "total_steps": 8674, "loss": 0.5128473043441772, "lr": 1.9070226098344078e-06, "epoch": 0.3631542540926908, "percentage": 18.16, "elapsed_time": "1:03:47", "remaining_time": "4:47:32"} +{"current_steps": 1576, "total_steps": 8674, "loss": 0.6256363987922668, "lr": 1.9068620016981733e-06, "epoch": 0.36338482822227347, "percentage": 18.17, "elapsed_time": "1:03:49", "remaining_time": "4:47:29"} +{"current_steps": 1577, "total_steps": 8674, "loss": 0.5502322912216187, "lr": 1.9067012617403565e-06, "epoch": 0.36361540235185613, "percentage": 18.18, "elapsed_time": "1:03:52", "remaining_time": "4:47:26"} +{"current_steps": 1578, "total_steps": 8674, "loss": 0.5756800174713135, "lr": 1.906540389984322e-06, "epoch": 0.3638459764814388, "percentage": 18.19, "elapsed_time": "1:03:54", "remaining_time": "4:47:23"} +{"current_steps": 1579, "total_steps": 8674, "loss": 0.5131359696388245, "lr": 1.9063793864534543e-06, "epoch": 0.36407655061102145, "percentage": 18.2, "elapsed_time": "1:03:56", "remaining_time": "4:47:20"} +{"current_steps": 1580, "total_steps": 8674, "loss": 0.5776810646057129, "lr": 1.9062182511711567e-06, "epoch": 0.3643071247406041, "percentage": 18.22, "elapsed_time": "1:03:59", "remaining_time": "4:47:19"} +{"current_steps": 1581, "total_steps": 8674, "loss": 0.49460822343826294, "lr": 1.9060569841608523e-06, "epoch": 0.3645376988701868, "percentage": 18.23, "elapsed_time": "1:04:01", "remaining_time": "4:47:16"} +{"current_steps": 1582, "total_steps": 8674, "loss": 0.5031022429466248, "lr": 1.9058955854459823e-06, "epoch": 0.36476827299976944, "percentage": 18.24, "elapsed_time": "1:04:04", "remaining_time": "4:47:13"} +{"current_steps": 1583, "total_steps": 8674, "loss": 0.4957816004753113, "lr": 1.9057340550500082e-06, "epoch": 0.3649988471293521, "percentage": 18.25, "elapsed_time": "1:04:06", "remaining_time": "4:47:11"} +{"current_steps": 1584, "total_steps": 8674, "loss": 0.47861093282699585, "lr": 1.9055723929964102e-06, "epoch": 0.36522942125893476, "percentage": 18.26, "elapsed_time": "1:04:09", "remaining_time": "4:47:08"} +{"current_steps": 1585, "total_steps": 8674, "loss": 0.44517919421195984, "lr": 1.9054105993086868e-06, "epoch": 0.3654599953885174, "percentage": 18.27, "elapsed_time": "1:04:11", "remaining_time": "4:47:05"} +{"current_steps": 1586, "total_steps": 8674, "loss": 0.46661484241485596, "lr": 1.9052486740103568e-06, "epoch": 0.3656905695181001, "percentage": 18.28, "elapsed_time": "1:04:13", "remaining_time": "4:47:03"} +{"current_steps": 1587, "total_steps": 8674, "loss": 0.517694890499115, "lr": 1.9050866171249575e-06, "epoch": 0.36592114364768275, "percentage": 18.3, "elapsed_time": "1:04:16", "remaining_time": "4:47:00"} +{"current_steps": 1588, "total_steps": 8674, "loss": 0.49465644359588623, "lr": 1.904924428676046e-06, "epoch": 0.3661517177772654, "percentage": 18.31, "elapsed_time": "1:04:18", "remaining_time": "4:46:58"} +{"current_steps": 1589, "total_steps": 8674, "loss": 0.41830652952194214, "lr": 1.9047621086871971e-06, "epoch": 0.36638229190684807, "percentage": 18.32, "elapsed_time": "1:04:21", "remaining_time": "4:46:55"} +{"current_steps": 1590, "total_steps": 8674, "loss": 0.5540663003921509, "lr": 1.9045996571820067e-06, "epoch": 0.36661286603643073, "percentage": 18.33, "elapsed_time": "1:04:23", "remaining_time": "4:46:52"} +{"current_steps": 1591, "total_steps": 8674, "loss": 0.5619527101516724, "lr": 1.9044370741840882e-06, "epoch": 0.3668434401660134, "percentage": 18.34, "elapsed_time": "1:04:25", "remaining_time": "4:46:50"} +{"current_steps": 1592, "total_steps": 8674, "loss": 0.5086055994033813, "lr": 1.9042743597170746e-06, "epoch": 0.36707401429559605, "percentage": 18.35, "elapsed_time": "1:04:28", "remaining_time": "4:46:47"} +{"current_steps": 1593, "total_steps": 8674, "loss": 0.5839927196502686, "lr": 1.9041115138046183e-06, "epoch": 0.3673045884251787, "percentage": 18.37, "elapsed_time": "1:04:30", "remaining_time": "4:46:45"} +{"current_steps": 1594, "total_steps": 8674, "loss": 0.508616030216217, "lr": 1.9039485364703904e-06, "epoch": 0.3675351625547614, "percentage": 18.38, "elapsed_time": "1:04:32", "remaining_time": "4:46:42"} +{"current_steps": 1595, "total_steps": 8674, "loss": 0.46514832973480225, "lr": 1.903785427738082e-06, "epoch": 0.36776573668434404, "percentage": 18.39, "elapsed_time": "1:04:35", "remaining_time": "4:46:39"} +{"current_steps": 1596, "total_steps": 8674, "loss": 0.42142176628112793, "lr": 1.9036221876314016e-06, "epoch": 0.3679963108139267, "percentage": 18.4, "elapsed_time": "1:04:37", "remaining_time": "4:46:37"} +{"current_steps": 1597, "total_steps": 8674, "loss": 0.42195791006088257, "lr": 1.9034588161740786e-06, "epoch": 0.36822688494350936, "percentage": 18.41, "elapsed_time": "1:04:40", "remaining_time": "4:46:34"} +{"current_steps": 1598, "total_steps": 8674, "loss": 0.46705931425094604, "lr": 1.9032953133898601e-06, "epoch": 0.368457459073092, "percentage": 18.42, "elapsed_time": "1:04:42", "remaining_time": "4:46:32"} +{"current_steps": 1599, "total_steps": 8674, "loss": 0.4741164743900299, "lr": 1.9031316793025134e-06, "epoch": 0.3686880332026747, "percentage": 18.43, "elapsed_time": "1:04:44", "remaining_time": "4:46:29"} +{"current_steps": 1600, "total_steps": 8674, "loss": 0.49730339646339417, "lr": 1.902967913935824e-06, "epoch": 0.36891860733225734, "percentage": 18.45, "elapsed_time": "1:04:47", "remaining_time": "4:46:26"} +{"current_steps": 1601, "total_steps": 8674, "loss": 0.47678127884864807, "lr": 1.902804017313597e-06, "epoch": 0.36914918146184, "percentage": 18.46, "elapsed_time": "1:04:50", "remaining_time": "4:46:28"} +{"current_steps": 1602, "total_steps": 8674, "loss": 0.4954279661178589, "lr": 1.9026399894596565e-06, "epoch": 0.36937975559142266, "percentage": 18.47, "elapsed_time": "1:04:53", "remaining_time": "4:46:26"} +{"current_steps": 1603, "total_steps": 8674, "loss": 0.5115381479263306, "lr": 1.9024758303978456e-06, "epoch": 0.3696103297210053, "percentage": 18.48, "elapsed_time": "1:04:55", "remaining_time": "4:46:23"} +{"current_steps": 1604, "total_steps": 8674, "loss": 0.6147117614746094, "lr": 1.9023115401520264e-06, "epoch": 0.369840903850588, "percentage": 18.49, "elapsed_time": "1:04:57", "remaining_time": "4:46:21"} +{"current_steps": 1605, "total_steps": 8674, "loss": 0.5334371328353882, "lr": 1.9021471187460802e-06, "epoch": 0.37007147798017065, "percentage": 18.5, "elapsed_time": "1:05:00", "remaining_time": "4:46:19"} +{"current_steps": 1606, "total_steps": 8674, "loss": 0.4702361226081848, "lr": 1.9019825662039073e-06, "epoch": 0.3703020521097533, "percentage": 18.52, "elapsed_time": "1:05:02", "remaining_time": "4:46:16"} +{"current_steps": 1607, "total_steps": 8674, "loss": 0.5049586892127991, "lr": 1.901817882549427e-06, "epoch": 0.37053262623933597, "percentage": 18.53, "elapsed_time": "1:05:05", "remaining_time": "4:46:13"} +{"current_steps": 1608, "total_steps": 8674, "loss": 0.5063170194625854, "lr": 1.901653067806578e-06, "epoch": 0.37076320036891863, "percentage": 18.54, "elapsed_time": "1:05:07", "remaining_time": "4:46:11"} +{"current_steps": 1609, "total_steps": 8674, "loss": 0.540824294090271, "lr": 1.9014881219993175e-06, "epoch": 0.3709937744985013, "percentage": 18.55, "elapsed_time": "1:05:10", "remaining_time": "4:46:08"} +{"current_steps": 1610, "total_steps": 8674, "loss": 0.4744170904159546, "lr": 1.901323045151622e-06, "epoch": 0.37122434862808396, "percentage": 18.56, "elapsed_time": "1:05:12", "remaining_time": "4:46:05"} +{"current_steps": 1611, "total_steps": 8674, "loss": 0.5090929269790649, "lr": 1.9011578372874876e-06, "epoch": 0.3714549227576666, "percentage": 18.57, "elapsed_time": "1:05:14", "remaining_time": "4:46:03"} +{"current_steps": 1612, "total_steps": 8674, "loss": 0.3886772394180298, "lr": 1.9009924984309284e-06, "epoch": 0.3716854968872493, "percentage": 18.58, "elapsed_time": "1:05:17", "remaining_time": "4:46:00"} +{"current_steps": 1613, "total_steps": 8674, "loss": 0.4976482391357422, "lr": 1.9008270286059782e-06, "epoch": 0.3719160710168319, "percentage": 18.6, "elapsed_time": "1:05:19", "remaining_time": "4:45:58"} +{"current_steps": 1614, "total_steps": 8674, "loss": 0.4629209041595459, "lr": 1.9006614278366898e-06, "epoch": 0.37214664514641455, "percentage": 18.61, "elapsed_time": "1:05:22", "remaining_time": "4:45:56"} +{"current_steps": 1615, "total_steps": 8674, "loss": 0.49334412813186646, "lr": 1.9004956961471352e-06, "epoch": 0.3723772192759972, "percentage": 18.62, "elapsed_time": "1:05:24", "remaining_time": "4:45:53"} +{"current_steps": 1616, "total_steps": 8674, "loss": 0.614592432975769, "lr": 1.9003298335614047e-06, "epoch": 0.37260779340557987, "percentage": 18.63, "elapsed_time": "1:05:26", "remaining_time": "4:45:50"} +{"current_steps": 1617, "total_steps": 8674, "loss": 0.5339843034744263, "lr": 1.9001638401036082e-06, "epoch": 0.37283836753516253, "percentage": 18.64, "elapsed_time": "1:05:29", "remaining_time": "4:45:47"} +{"current_steps": 1618, "total_steps": 8674, "loss": 0.5516937375068665, "lr": 1.8999977157978749e-06, "epoch": 0.3730689416647452, "percentage": 18.65, "elapsed_time": "1:05:31", "remaining_time": "4:45:45"} +{"current_steps": 1619, "total_steps": 8674, "loss": 0.5034124255180359, "lr": 1.8998314606683522e-06, "epoch": 0.37329951579432785, "percentage": 18.66, "elapsed_time": "1:05:33", "remaining_time": "4:45:42"} +{"current_steps": 1620, "total_steps": 8674, "loss": 0.49766790866851807, "lr": 1.8996650747392073e-06, "epoch": 0.3735300899239105, "percentage": 18.68, "elapsed_time": "1:05:36", "remaining_time": "4:45:39"} +{"current_steps": 1621, "total_steps": 8674, "loss": 0.6662446856498718, "lr": 1.899498558034626e-06, "epoch": 0.3737606640534932, "percentage": 18.69, "elapsed_time": "1:05:38", "remaining_time": "4:45:37"} +{"current_steps": 1622, "total_steps": 8674, "loss": 0.5416747331619263, "lr": 1.8993319105788129e-06, "epoch": 0.37399123818307584, "percentage": 18.7, "elapsed_time": "1:05:41", "remaining_time": "4:45:35"} +{"current_steps": 1623, "total_steps": 8674, "loss": 0.5137313604354858, "lr": 1.8991651323959922e-06, "epoch": 0.3742218123126585, "percentage": 18.71, "elapsed_time": "1:05:43", "remaining_time": "4:45:32"} +{"current_steps": 1624, "total_steps": 8674, "loss": 0.566002607345581, "lr": 1.8989982235104072e-06, "epoch": 0.37445238644224116, "percentage": 18.72, "elapsed_time": "1:05:45", "remaining_time": "4:45:29"} +{"current_steps": 1625, "total_steps": 8674, "loss": 0.5201380252838135, "lr": 1.8988311839463188e-06, "epoch": 0.3746829605718238, "percentage": 18.73, "elapsed_time": "1:05:48", "remaining_time": "4:45:27"} +{"current_steps": 1626, "total_steps": 8674, "loss": 0.5103918313980103, "lr": 1.8986640137280087e-06, "epoch": 0.3749135347014065, "percentage": 18.75, "elapsed_time": "1:05:50", "remaining_time": "4:45:24"} +{"current_steps": 1627, "total_steps": 8674, "loss": 0.47900843620300293, "lr": 1.8984967128797763e-06, "epoch": 0.37514410883098914, "percentage": 18.76, "elapsed_time": "1:05:52", "remaining_time": "4:45:21"} +{"current_steps": 1628, "total_steps": 8674, "loss": 0.42991960048675537, "lr": 1.898329281425941e-06, "epoch": 0.3753746829605718, "percentage": 18.77, "elapsed_time": "1:05:55", "remaining_time": "4:45:18"} +{"current_steps": 1629, "total_steps": 8674, "loss": 0.5707317590713501, "lr": 1.89816171939084e-06, "epoch": 0.37560525709015447, "percentage": 18.78, "elapsed_time": "1:05:57", "remaining_time": "4:45:15"} +{"current_steps": 1630, "total_steps": 8674, "loss": 0.565521240234375, "lr": 1.8979940267988309e-06, "epoch": 0.3758358312197371, "percentage": 18.79, "elapsed_time": "1:06:00", "remaining_time": "4:45:13"} +{"current_steps": 1631, "total_steps": 8674, "loss": 0.6584400534629822, "lr": 1.8978262036742888e-06, "epoch": 0.3760664053493198, "percentage": 18.8, "elapsed_time": "1:06:02", "remaining_time": "4:45:11"} +{"current_steps": 1632, "total_steps": 8674, "loss": 0.4749317169189453, "lr": 1.897658250041609e-06, "epoch": 0.37629697947890245, "percentage": 18.81, "elapsed_time": "1:06:04", "remaining_time": "4:45:08"} +{"current_steps": 1633, "total_steps": 8674, "loss": 0.5495604872703552, "lr": 1.8974901659252048e-06, "epoch": 0.3765275536084851, "percentage": 18.83, "elapsed_time": "1:06:07", "remaining_time": "4:45:05"} +{"current_steps": 1634, "total_steps": 8674, "loss": 0.465708464384079, "lr": 1.8973219513495094e-06, "epoch": 0.37675812773806777, "percentage": 18.84, "elapsed_time": "1:06:09", "remaining_time": "4:45:03"} +{"current_steps": 1635, "total_steps": 8674, "loss": 0.4599069058895111, "lr": 1.8971536063389742e-06, "epoch": 0.37698870186765043, "percentage": 18.85, "elapsed_time": "1:06:12", "remaining_time": "4:45:00"} +{"current_steps": 1636, "total_steps": 8674, "loss": 0.4716145694255829, "lr": 1.89698513091807e-06, "epoch": 0.3772192759972331, "percentage": 18.86, "elapsed_time": "1:06:14", "remaining_time": "4:44:57"} +{"current_steps": 1637, "total_steps": 8674, "loss": 0.594079852104187, "lr": 1.8968165251112863e-06, "epoch": 0.37744985012681576, "percentage": 18.87, "elapsed_time": "1:06:16", "remaining_time": "4:44:54"} +{"current_steps": 1638, "total_steps": 8674, "loss": 0.4588915705680847, "lr": 1.8966477889431317e-06, "epoch": 0.3776804242563984, "percentage": 18.88, "elapsed_time": "1:06:19", "remaining_time": "4:44:52"} +{"current_steps": 1639, "total_steps": 8674, "loss": 0.5236901044845581, "lr": 1.8964789224381337e-06, "epoch": 0.3779109983859811, "percentage": 18.9, "elapsed_time": "1:06:21", "remaining_time": "4:44:49"} +{"current_steps": 1640, "total_steps": 8674, "loss": 0.4954737424850464, "lr": 1.8963099256208388e-06, "epoch": 0.37814157251556374, "percentage": 18.91, "elapsed_time": "1:06:23", "remaining_time": "4:44:47"} +{"current_steps": 1641, "total_steps": 8674, "loss": 0.4194701910018921, "lr": 1.8961407985158125e-06, "epoch": 0.3783721466451464, "percentage": 18.92, "elapsed_time": "1:06:26", "remaining_time": "4:44:44"} +{"current_steps": 1642, "total_steps": 8674, "loss": 0.5368303060531616, "lr": 1.8959715411476388e-06, "epoch": 0.37860272077472906, "percentage": 18.93, "elapsed_time": "1:06:28", "remaining_time": "4:44:41"} +{"current_steps": 1643, "total_steps": 8674, "loss": 0.5181677341461182, "lr": 1.8958021535409214e-06, "epoch": 0.3788332949043117, "percentage": 18.94, "elapsed_time": "1:06:30", "remaining_time": "4:44:38"} +{"current_steps": 1644, "total_steps": 8674, "loss": 0.4755169749259949, "lr": 1.8956326357202821e-06, "epoch": 0.3790638690338944, "percentage": 18.95, "elapsed_time": "1:06:33", "remaining_time": "4:44:36"} +{"current_steps": 1645, "total_steps": 8674, "loss": 0.5460895299911499, "lr": 1.8954629877103625e-06, "epoch": 0.37929444316347705, "percentage": 18.96, "elapsed_time": "1:06:36", "remaining_time": "4:44:37"} +{"current_steps": 1646, "total_steps": 8674, "loss": 0.47811684012413025, "lr": 1.8952932095358224e-06, "epoch": 0.3795250172930597, "percentage": 18.98, "elapsed_time": "1:06:39", "remaining_time": "4:44:38"} +{"current_steps": 1647, "total_steps": 8674, "loss": 0.5791733860969543, "lr": 1.8951233012213405e-06, "epoch": 0.37975559142264237, "percentage": 18.99, "elapsed_time": "1:06:42", "remaining_time": "4:44:38"} +{"current_steps": 1648, "total_steps": 8674, "loss": 0.4996911585330963, "lr": 1.8949532627916151e-06, "epoch": 0.37998616555222503, "percentage": 19.0, "elapsed_time": "1:06:45", "remaining_time": "4:44:36"} +{"current_steps": 1649, "total_steps": 8674, "loss": 0.6108353137969971, "lr": 1.8947830942713628e-06, "epoch": 0.3802167396818077, "percentage": 19.01, "elapsed_time": "1:06:47", "remaining_time": "4:44:33"} +{"current_steps": 1650, "total_steps": 8674, "loss": 0.5303040742874146, "lr": 1.8946127956853195e-06, "epoch": 0.38044731381139035, "percentage": 19.02, "elapsed_time": "1:06:50", "remaining_time": "4:44:31"} +{"current_steps": 1651, "total_steps": 8674, "loss": 0.4651896357536316, "lr": 1.8944423670582397e-06, "epoch": 0.380677887940973, "percentage": 19.03, "elapsed_time": "1:06:52", "remaining_time": "4:44:28"} +{"current_steps": 1652, "total_steps": 8674, "loss": 0.6321637630462646, "lr": 1.8942718084148969e-06, "epoch": 0.3809084620705557, "percentage": 19.05, "elapsed_time": "1:06:55", "remaining_time": "4:44:26"} +{"current_steps": 1653, "total_steps": 8674, "loss": 0.5124787092208862, "lr": 1.8941011197800836e-06, "epoch": 0.38113903620013834, "percentage": 19.06, "elapsed_time": "1:06:57", "remaining_time": "4:44:24"} +{"current_steps": 1654, "total_steps": 8674, "loss": 0.5779180526733398, "lr": 1.893930301178611e-06, "epoch": 0.381369610329721, "percentage": 19.07, "elapsed_time": "1:06:59", "remaining_time": "4:44:21"} +{"current_steps": 1655, "total_steps": 8674, "loss": 0.5723867416381836, "lr": 1.8937593526353096e-06, "epoch": 0.38160018445930366, "percentage": 19.08, "elapsed_time": "1:07:02", "remaining_time": "4:44:20"} +{"current_steps": 1656, "total_steps": 8674, "loss": 0.4312398433685303, "lr": 1.8935882741750281e-06, "epoch": 0.3818307585888863, "percentage": 19.09, "elapsed_time": "1:07:04", "remaining_time": "4:44:17"} +{"current_steps": 1657, "total_steps": 8674, "loss": 0.6503756046295166, "lr": 1.893417065822635e-06, "epoch": 0.382061332718469, "percentage": 19.1, "elapsed_time": "1:07:07", "remaining_time": "4:44:15"} +{"current_steps": 1658, "total_steps": 8674, "loss": 0.508478045463562, "lr": 1.8932457276030166e-06, "epoch": 0.38229190684805164, "percentage": 19.11, "elapsed_time": "1:07:09", "remaining_time": "4:44:12"} +{"current_steps": 1659, "total_steps": 8674, "loss": 0.46552446484565735, "lr": 1.8930742595410792e-06, "epoch": 0.3825224809776343, "percentage": 19.13, "elapsed_time": "1:07:12", "remaining_time": "4:44:10"} +{"current_steps": 1660, "total_steps": 8674, "loss": 0.4739278256893158, "lr": 1.8929026616617467e-06, "epoch": 0.38275305510721697, "percentage": 19.14, "elapsed_time": "1:07:14", "remaining_time": "4:44:07"} +{"current_steps": 1661, "total_steps": 8674, "loss": 0.5584233403205872, "lr": 1.8927309339899634e-06, "epoch": 0.3829836292367996, "percentage": 19.15, "elapsed_time": "1:07:17", "remaining_time": "4:44:07"} +{"current_steps": 1662, "total_steps": 8674, "loss": 0.6155074238777161, "lr": 1.8925590765506911e-06, "epoch": 0.3832142033663823, "percentage": 19.16, "elapsed_time": "1:07:19", "remaining_time": "4:44:04"} +{"current_steps": 1663, "total_steps": 8674, "loss": 0.5253106951713562, "lr": 1.8923870893689112e-06, "epoch": 0.38344477749596495, "percentage": 19.17, "elapsed_time": "1:07:22", "remaining_time": "4:44:04"} +{"current_steps": 1664, "total_steps": 8674, "loss": 0.4190565347671509, "lr": 1.8922149724696238e-06, "epoch": 0.3836753516255476, "percentage": 19.18, "elapsed_time": "1:07:25", "remaining_time": "4:44:02"} +{"current_steps": 1665, "total_steps": 8674, "loss": 0.5263853073120117, "lr": 1.892042725877848e-06, "epoch": 0.3839059257551303, "percentage": 19.2, "elapsed_time": "1:07:27", "remaining_time": "4:44:00"} +{"current_steps": 1666, "total_steps": 8674, "loss": 0.4492432773113251, "lr": 1.8918703496186214e-06, "epoch": 0.38413649988471293, "percentage": 19.21, "elapsed_time": "1:07:30", "remaining_time": "4:43:57"} +{"current_steps": 1667, "total_steps": 8674, "loss": 0.49745023250579834, "lr": 1.8916978437170004e-06, "epoch": 0.3843670740142956, "percentage": 19.22, "elapsed_time": "1:07:32", "remaining_time": "4:43:54"} +{"current_steps": 1668, "total_steps": 8674, "loss": 0.6003707647323608, "lr": 1.891525208198061e-06, "epoch": 0.38459764814387826, "percentage": 19.23, "elapsed_time": "1:07:35", "remaining_time": "4:43:52"} +{"current_steps": 1669, "total_steps": 8674, "loss": 0.5430049300193787, "lr": 1.8913524430868973e-06, "epoch": 0.3848282222734609, "percentage": 19.24, "elapsed_time": "1:07:37", "remaining_time": "4:43:49"} +{"current_steps": 1670, "total_steps": 8674, "loss": 0.5561289191246033, "lr": 1.8911795484086222e-06, "epoch": 0.3850587964030436, "percentage": 19.25, "elapsed_time": "1:07:39", "remaining_time": "4:43:46"} +{"current_steps": 1671, "total_steps": 8674, "loss": 0.5488184690475464, "lr": 1.8910065241883678e-06, "epoch": 0.38528937053262624, "percentage": 19.26, "elapsed_time": "1:07:42", "remaining_time": "4:43:44"} +{"current_steps": 1672, "total_steps": 8674, "loss": 0.46347010135650635, "lr": 1.890833370451285e-06, "epoch": 0.3855199446622089, "percentage": 19.28, "elapsed_time": "1:07:44", "remaining_time": "4:43:42"} +{"current_steps": 1673, "total_steps": 8674, "loss": 0.553687334060669, "lr": 1.8906600872225438e-06, "epoch": 0.38575051879179156, "percentage": 19.29, "elapsed_time": "1:07:47", "remaining_time": "4:43:39"} +{"current_steps": 1674, "total_steps": 8674, "loss": 0.46162208914756775, "lr": 1.8904866745273323e-06, "epoch": 0.3859810929213742, "percentage": 19.3, "elapsed_time": "1:07:49", "remaining_time": "4:43:37"} +{"current_steps": 1675, "total_steps": 8674, "loss": 0.4478996992111206, "lr": 1.8903131323908576e-06, "epoch": 0.3862116670509569, "percentage": 19.31, "elapsed_time": "1:07:51", "remaining_time": "4:43:34"} +{"current_steps": 1676, "total_steps": 8674, "loss": 0.5857031345367432, "lr": 1.8901394608383463e-06, "epoch": 0.38644224118053955, "percentage": 19.32, "elapsed_time": "1:07:54", "remaining_time": "4:43:31"} +{"current_steps": 1677, "total_steps": 8674, "loss": 0.592833399772644, "lr": 1.8899656598950432e-06, "epoch": 0.3866728153101222, "percentage": 19.33, "elapsed_time": "1:07:56", "remaining_time": "4:43:28"} +{"current_steps": 1678, "total_steps": 8674, "loss": 0.6007786989212036, "lr": 1.8897917295862117e-06, "epoch": 0.38690338943970487, "percentage": 19.35, "elapsed_time": "1:07:58", "remaining_time": "4:43:26"} +{"current_steps": 1679, "total_steps": 8674, "loss": 0.5248164534568787, "lr": 1.8896176699371343e-06, "epoch": 0.38713396356928753, "percentage": 19.36, "elapsed_time": "1:08:01", "remaining_time": "4:43:23"} +{"current_steps": 1680, "total_steps": 8674, "loss": 0.43112409114837646, "lr": 1.8894434809731128e-06, "epoch": 0.3873645376988702, "percentage": 19.37, "elapsed_time": "1:08:03", "remaining_time": "4:43:21"} +{"current_steps": 1681, "total_steps": 8674, "loss": 0.56545090675354, "lr": 1.8892691627194673e-06, "epoch": 0.38759511182845285, "percentage": 19.38, "elapsed_time": "1:08:06", "remaining_time": "4:43:19"} +{"current_steps": 1682, "total_steps": 8674, "loss": 0.6287904977798462, "lr": 1.8890947152015363e-06, "epoch": 0.3878256859580355, "percentage": 19.39, "elapsed_time": "1:08:08", "remaining_time": "4:43:16"} +{"current_steps": 1683, "total_steps": 8674, "loss": 0.48461633920669556, "lr": 1.8889201384446775e-06, "epoch": 0.3880562600876182, "percentage": 19.4, "elapsed_time": "1:08:10", "remaining_time": "4:43:13"} +{"current_steps": 1684, "total_steps": 8674, "loss": 0.5089331865310669, "lr": 1.888745432474268e-06, "epoch": 0.38828683421720084, "percentage": 19.41, "elapsed_time": "1:08:13", "remaining_time": "4:43:10"} +{"current_steps": 1685, "total_steps": 8674, "loss": 0.4805281162261963, "lr": 1.8885705973157027e-06, "epoch": 0.3885174083467835, "percentage": 19.43, "elapsed_time": "1:08:15", "remaining_time": "4:43:08"} +{"current_steps": 1686, "total_steps": 8674, "loss": 0.5243096947669983, "lr": 1.8883956329943955e-06, "epoch": 0.38874798247636616, "percentage": 19.44, "elapsed_time": "1:08:18", "remaining_time": "4:43:05"} +{"current_steps": 1687, "total_steps": 8674, "loss": 0.5808781981468201, "lr": 1.8882205395357795e-06, "epoch": 0.3889785566059488, "percentage": 19.45, "elapsed_time": "1:08:20", "remaining_time": "4:43:02"} +{"current_steps": 1688, "total_steps": 8674, "loss": 0.5397018194198608, "lr": 1.8880453169653063e-06, "epoch": 0.3892091307355315, "percentage": 19.46, "elapsed_time": "1:08:22", "remaining_time": "4:42:59"} +{"current_steps": 1689, "total_steps": 8674, "loss": 0.4475638270378113, "lr": 1.8878699653084462e-06, "epoch": 0.38943970486511414, "percentage": 19.47, "elapsed_time": "1:08:25", "remaining_time": "4:42:58"} +{"current_steps": 1690, "total_steps": 8674, "loss": 0.6212958693504333, "lr": 1.8876944845906884e-06, "epoch": 0.3896702789946968, "percentage": 19.48, "elapsed_time": "1:08:27", "remaining_time": "4:42:55"} +{"current_steps": 1691, "total_steps": 8674, "loss": 0.44465404748916626, "lr": 1.8875188748375407e-06, "epoch": 0.38990085312427947, "percentage": 19.5, "elapsed_time": "1:08:30", "remaining_time": "4:42:52"} +{"current_steps": 1692, "total_steps": 8674, "loss": 0.5711641311645508, "lr": 1.8873431360745297e-06, "epoch": 0.39013142725386213, "percentage": 19.51, "elapsed_time": "1:08:32", "remaining_time": "4:42:50"} +{"current_steps": 1693, "total_steps": 8674, "loss": 0.4527866244316101, "lr": 1.8871672683272012e-06, "epoch": 0.3903620013834448, "percentage": 19.52, "elapsed_time": "1:08:34", "remaining_time": "4:42:47"} +{"current_steps": 1694, "total_steps": 8674, "loss": 0.6242899894714355, "lr": 1.8869912716211188e-06, "epoch": 0.39059257551302745, "percentage": 19.53, "elapsed_time": "1:08:37", "remaining_time": "4:42:44"} +{"current_steps": 1695, "total_steps": 8674, "loss": 0.6294416189193726, "lr": 1.8868151459818656e-06, "epoch": 0.3908231496426101, "percentage": 19.54, "elapsed_time": "1:08:39", "remaining_time": "4:42:41"} +{"current_steps": 1696, "total_steps": 8674, "loss": 0.49869638681411743, "lr": 1.8866388914350435e-06, "epoch": 0.3910537237721928, "percentage": 19.55, "elapsed_time": "1:08:41", "remaining_time": "4:42:39"} +{"current_steps": 1697, "total_steps": 8674, "loss": 0.5456752777099609, "lr": 1.886462508006273e-06, "epoch": 0.39128429790177544, "percentage": 19.56, "elapsed_time": "1:08:44", "remaining_time": "4:42:37"} +{"current_steps": 1698, "total_steps": 8674, "loss": 0.4197172224521637, "lr": 1.8862859957211926e-06, "epoch": 0.3915148720313581, "percentage": 19.58, "elapsed_time": "1:08:46", "remaining_time": "4:42:34"} +{"current_steps": 1699, "total_steps": 8674, "loss": 0.5012276768684387, "lr": 1.8861093546054603e-06, "epoch": 0.39174544616094076, "percentage": 19.59, "elapsed_time": "1:08:49", "remaining_time": "4:42:32"} +{"current_steps": 1700, "total_steps": 8674, "loss": 0.48108845949172974, "lr": 1.8859325846847531e-06, "epoch": 0.3919760202905234, "percentage": 19.6, "elapsed_time": "1:08:51", "remaining_time": "4:42:29"} +{"current_steps": 1701, "total_steps": 8674, "loss": 0.48592355847358704, "lr": 1.885755685984766e-06, "epoch": 0.3922065944201061, "percentage": 19.61, "elapsed_time": "1:08:55", "remaining_time": "4:42:31"} +{"current_steps": 1702, "total_steps": 8674, "loss": 0.5744791030883789, "lr": 1.8855786585312132e-06, "epoch": 0.39243716854968874, "percentage": 19.62, "elapsed_time": "1:08:57", "remaining_time": "4:42:29"} +{"current_steps": 1703, "total_steps": 8674, "loss": 0.5378769040107727, "lr": 1.8854015023498273e-06, "epoch": 0.3926677426792714, "percentage": 19.63, "elapsed_time": "1:09:00", "remaining_time": "4:42:26"} +{"current_steps": 1704, "total_steps": 8674, "loss": 0.5630123615264893, "lr": 1.8852242174663594e-06, "epoch": 0.39289831680885406, "percentage": 19.64, "elapsed_time": "1:09:02", "remaining_time": "4:42:24"} +{"current_steps": 1705, "total_steps": 8674, "loss": 0.5247849225997925, "lr": 1.8850468039065806e-06, "epoch": 0.3931288909384367, "percentage": 19.66, "elapsed_time": "1:09:04", "remaining_time": "4:42:21"} +{"current_steps": 1706, "total_steps": 8674, "loss": 0.5679286122322083, "lr": 1.884869261696279e-06, "epoch": 0.3933594650680194, "percentage": 19.67, "elapsed_time": "1:09:07", "remaining_time": "4:42:19"} +{"current_steps": 1707, "total_steps": 8674, "loss": 0.4505179524421692, "lr": 1.8846915908612622e-06, "epoch": 0.39359003919760205, "percentage": 19.68, "elapsed_time": "1:09:09", "remaining_time": "4:42:17"} +{"current_steps": 1708, "total_steps": 8674, "loss": 0.6077077388763428, "lr": 1.8845137914273566e-06, "epoch": 0.3938206133271847, "percentage": 19.69, "elapsed_time": "1:09:12", "remaining_time": "4:42:14"} +{"current_steps": 1709, "total_steps": 8674, "loss": 0.4703037738800049, "lr": 1.8843358634204069e-06, "epoch": 0.39405118745676737, "percentage": 19.7, "elapsed_time": "1:09:14", "remaining_time": "4:42:11"} +{"current_steps": 1710, "total_steps": 8674, "loss": 0.6085091829299927, "lr": 1.8841578068662773e-06, "epoch": 0.39428176158635003, "percentage": 19.71, "elapsed_time": "1:09:16", "remaining_time": "4:42:09"} +{"current_steps": 1711, "total_steps": 8674, "loss": 0.6075730919837952, "lr": 1.8839796217908498e-06, "epoch": 0.3945123357159327, "percentage": 19.73, "elapsed_time": "1:09:19", "remaining_time": "4:42:06"} +{"current_steps": 1712, "total_steps": 8674, "loss": 0.581851601600647, "lr": 1.8838013082200252e-06, "epoch": 0.39474290984551536, "percentage": 19.74, "elapsed_time": "1:09:21", "remaining_time": "4:42:03"} +{"current_steps": 1713, "total_steps": 8674, "loss": 0.555284857749939, "lr": 1.8836228661797234e-06, "epoch": 0.394973483975098, "percentage": 19.75, "elapsed_time": "1:09:24", "remaining_time": "4:42:01"} +{"current_steps": 1714, "total_steps": 8674, "loss": 0.5342675447463989, "lr": 1.8834442956958832e-06, "epoch": 0.3952040581046807, "percentage": 19.76, "elapsed_time": "1:09:26", "remaining_time": "4:41:59"} +{"current_steps": 1715, "total_steps": 8674, "loss": 0.47501081228256226, "lr": 1.8832655967944605e-06, "epoch": 0.39543463223426334, "percentage": 19.77, "elapsed_time": "1:09:29", "remaining_time": "4:41:57"} +{"current_steps": 1716, "total_steps": 8674, "loss": 0.592293918132782, "lr": 1.8830867695014323e-06, "epoch": 0.395665206363846, "percentage": 19.78, "elapsed_time": "1:09:31", "remaining_time": "4:41:54"} +{"current_steps": 1717, "total_steps": 8674, "loss": 0.5903242826461792, "lr": 1.8829078138427921e-06, "epoch": 0.39589578049342866, "percentage": 19.79, "elapsed_time": "1:09:33", "remaining_time": "4:41:51"} +{"current_steps": 1718, "total_steps": 8674, "loss": 0.5292568206787109, "lr": 1.882728729844553e-06, "epoch": 0.3961263546230113, "percentage": 19.81, "elapsed_time": "1:09:36", "remaining_time": "4:41:49"} +{"current_steps": 1719, "total_steps": 8674, "loss": 0.5748786926269531, "lr": 1.8825495175327468e-06, "epoch": 0.396356928752594, "percentage": 19.82, "elapsed_time": "1:09:38", "remaining_time": "4:41:46"} +{"current_steps": 1720, "total_steps": 8674, "loss": 0.6191601753234863, "lr": 1.8823701769334242e-06, "epoch": 0.39658750288217665, "percentage": 19.83, "elapsed_time": "1:09:41", "remaining_time": "4:41:44"} +{"current_steps": 1721, "total_steps": 8674, "loss": 0.5569231510162354, "lr": 1.8821907080726535e-06, "epoch": 0.3968180770117593, "percentage": 19.84, "elapsed_time": "1:09:43", "remaining_time": "4:41:41"} +{"current_steps": 1722, "total_steps": 8674, "loss": 0.5103349089622498, "lr": 1.882011110976523e-06, "epoch": 0.39704865114134197, "percentage": 19.85, "elapsed_time": "1:09:46", "remaining_time": "4:41:40"} +{"current_steps": 1723, "total_steps": 8674, "loss": 0.4981175363063812, "lr": 1.8818313856711382e-06, "epoch": 0.39727922527092463, "percentage": 19.86, "elapsed_time": "1:09:48", "remaining_time": "4:41:37"} +{"current_steps": 1724, "total_steps": 8674, "loss": 0.5429514050483704, "lr": 1.8816515321826248e-06, "epoch": 0.39750979940050724, "percentage": 19.88, "elapsed_time": "1:09:50", "remaining_time": "4:41:34"} +{"current_steps": 1725, "total_steps": 8674, "loss": 0.5318386554718018, "lr": 1.8814715505371254e-06, "epoch": 0.3977403735300899, "percentage": 19.89, "elapsed_time": "1:09:53", "remaining_time": "4:41:32"} +{"current_steps": 1726, "total_steps": 8674, "loss": 0.47451460361480713, "lr": 1.881291440760803e-06, "epoch": 0.39797094765967256, "percentage": 19.9, "elapsed_time": "1:09:55", "remaining_time": "4:41:29"} +{"current_steps": 1727, "total_steps": 8674, "loss": 0.5141372680664062, "lr": 1.8811112028798384e-06, "epoch": 0.3982015217892552, "percentage": 19.91, "elapsed_time": "1:09:58", "remaining_time": "4:41:27"} +{"current_steps": 1728, "total_steps": 8674, "loss": 0.4950217008590698, "lr": 1.8809308369204302e-06, "epoch": 0.3984320959188379, "percentage": 19.92, "elapsed_time": "1:10:00", "remaining_time": "4:41:24"} +{"current_steps": 1729, "total_steps": 8674, "loss": 0.4961693286895752, "lr": 1.880750342908797e-06, "epoch": 0.39866267004842054, "percentage": 19.93, "elapsed_time": "1:10:02", "remaining_time": "4:41:22"} +{"current_steps": 1730, "total_steps": 8674, "loss": 0.43443650007247925, "lr": 1.8805697208711752e-06, "epoch": 0.3988932441780032, "percentage": 19.94, "elapsed_time": "1:10:05", "remaining_time": "4:41:19"} +{"current_steps": 1731, "total_steps": 8674, "loss": 0.6116896867752075, "lr": 1.8803889708338203e-06, "epoch": 0.39912381830758586, "percentage": 19.96, "elapsed_time": "1:10:07", "remaining_time": "4:41:17"} +{"current_steps": 1732, "total_steps": 8674, "loss": 0.46244728565216064, "lr": 1.8802080928230062e-06, "epoch": 0.3993543924371685, "percentage": 19.97, "elapsed_time": "1:10:10", "remaining_time": "4:41:15"} +{"current_steps": 1733, "total_steps": 8674, "loss": 0.5728162527084351, "lr": 1.880027086865025e-06, "epoch": 0.3995849665667512, "percentage": 19.98, "elapsed_time": "1:10:12", "remaining_time": "4:41:12"} +{"current_steps": 1734, "total_steps": 8674, "loss": 0.4472135901451111, "lr": 1.8798459529861876e-06, "epoch": 0.39981554069633385, "percentage": 19.99, "elapsed_time": "1:10:15", "remaining_time": "4:41:09"} +{"current_steps": 1735, "total_steps": 8674, "loss": 0.5862090587615967, "lr": 1.8796646912128246e-06, "epoch": 0.4000461148259165, "percentage": 20.0, "elapsed_time": "1:10:17", "remaining_time": "4:41:07"} +{"current_steps": 1736, "total_steps": 8674, "loss": 0.6406301259994507, "lr": 1.8794833015712831e-06, "epoch": 0.40027668895549917, "percentage": 20.01, "elapsed_time": "1:10:19", "remaining_time": "4:41:04"} +{"current_steps": 1737, "total_steps": 8674, "loss": 0.5865743160247803, "lr": 1.8793017840879306e-06, "epoch": 0.40050726308508183, "percentage": 20.03, "elapsed_time": "1:10:22", "remaining_time": "4:41:01"} +{"current_steps": 1738, "total_steps": 8674, "loss": 0.5521814823150635, "lr": 1.8791201387891524e-06, "epoch": 0.4007378372146645, "percentage": 20.04, "elapsed_time": "1:10:24", "remaining_time": "4:40:58"} +{"current_steps": 1739, "total_steps": 8674, "loss": 0.40027791261672974, "lr": 1.8789383657013522e-06, "epoch": 0.40096841134424716, "percentage": 20.05, "elapsed_time": "1:10:27", "remaining_time": "4:40:57"} +{"current_steps": 1740, "total_steps": 8674, "loss": 0.5594751238822937, "lr": 1.8787564648509528e-06, "epoch": 0.4011989854738298, "percentage": 20.06, "elapsed_time": "1:10:29", "remaining_time": "4:40:54"} +{"current_steps": 1741, "total_steps": 8674, "loss": 0.5029730796813965, "lr": 1.8785744362643955e-06, "epoch": 0.4014295596034125, "percentage": 20.07, "elapsed_time": "1:10:31", "remaining_time": "4:40:52"} +{"current_steps": 1742, "total_steps": 8674, "loss": 0.6089034676551819, "lr": 1.8783922799681397e-06, "epoch": 0.40166013373299514, "percentage": 20.08, "elapsed_time": "1:10:34", "remaining_time": "4:40:49"} +{"current_steps": 1743, "total_steps": 8674, "loss": 0.5238372683525085, "lr": 1.8782099959886639e-06, "epoch": 0.4018907078625778, "percentage": 20.09, "elapsed_time": "1:10:36", "remaining_time": "4:40:46"} +{"current_steps": 1744, "total_steps": 8674, "loss": 0.47281232476234436, "lr": 1.8780275843524643e-06, "epoch": 0.40212128199216046, "percentage": 20.11, "elapsed_time": "1:10:39", "remaining_time": "4:40:44"} +{"current_steps": 1745, "total_steps": 8674, "loss": 0.44885876774787903, "lr": 1.8778450450860571e-06, "epoch": 0.4023518561217431, "percentage": 20.12, "elapsed_time": "1:10:41", "remaining_time": "4:40:41"} +{"current_steps": 1746, "total_steps": 8674, "loss": 0.5915139317512512, "lr": 1.8776623782159762e-06, "epoch": 0.4025824302513258, "percentage": 20.13, "elapsed_time": "1:10:43", "remaining_time": "4:40:39"} +{"current_steps": 1747, "total_steps": 8674, "loss": 0.49341484904289246, "lr": 1.8774795837687736e-06, "epoch": 0.40281300438090845, "percentage": 20.14, "elapsed_time": "1:10:46", "remaining_time": "4:40:36"} +{"current_steps": 1748, "total_steps": 8674, "loss": 0.43253493309020996, "lr": 1.8772966617710205e-06, "epoch": 0.4030435785104911, "percentage": 20.15, "elapsed_time": "1:10:48", "remaining_time": "4:40:34"} +{"current_steps": 1749, "total_steps": 8674, "loss": 0.48660045862197876, "lr": 1.8771136122493064e-06, "epoch": 0.40327415264007377, "percentage": 20.16, "elapsed_time": "1:10:51", "remaining_time": "4:40:32"} +{"current_steps": 1750, "total_steps": 8674, "loss": 0.4493838846683502, "lr": 1.8769304352302396e-06, "epoch": 0.40350472676965643, "percentage": 20.18, "elapsed_time": "1:10:53", "remaining_time": "4:40:29"} +{"current_steps": 1751, "total_steps": 8674, "loss": 0.5656435489654541, "lr": 1.8767471307404464e-06, "epoch": 0.4037353008992391, "percentage": 20.19, "elapsed_time": "1:10:56", "remaining_time": "4:40:27"} +{"current_steps": 1752, "total_steps": 8674, "loss": 0.48047327995300293, "lr": 1.876563698806572e-06, "epoch": 0.40396587502882175, "percentage": 20.2, "elapsed_time": "1:10:58", "remaining_time": "4:40:25"} +{"current_steps": 1753, "total_steps": 8674, "loss": 0.5314204692840576, "lr": 1.8763801394552806e-06, "epoch": 0.4041964491584044, "percentage": 20.21, "elapsed_time": "1:11:01", "remaining_time": "4:40:23"} +{"current_steps": 1754, "total_steps": 8674, "loss": 0.5436627864837646, "lr": 1.876196452713254e-06, "epoch": 0.4044270232879871, "percentage": 20.22, "elapsed_time": "1:11:03", "remaining_time": "4:40:20"} +{"current_steps": 1755, "total_steps": 8674, "loss": 0.5383991599082947, "lr": 1.8760126386071933e-06, "epoch": 0.40465759741756974, "percentage": 20.23, "elapsed_time": "1:11:05", "remaining_time": "4:40:18"} +{"current_steps": 1756, "total_steps": 8674, "loss": 0.48271507024765015, "lr": 1.8758286971638171e-06, "epoch": 0.4048881715471524, "percentage": 20.24, "elapsed_time": "1:11:08", "remaining_time": "4:40:16"} +{"current_steps": 1757, "total_steps": 8674, "loss": 0.5920745134353638, "lr": 1.8756446284098638e-06, "epoch": 0.40511874567673506, "percentage": 20.26, "elapsed_time": "1:11:10", "remaining_time": "4:40:13"} +{"current_steps": 1758, "total_steps": 8674, "loss": 0.4467526078224182, "lr": 1.875460432372089e-06, "epoch": 0.4053493198063177, "percentage": 20.27, "elapsed_time": "1:11:13", "remaining_time": "4:40:11"} +{"current_steps": 1759, "total_steps": 8674, "loss": 0.425409734249115, "lr": 1.875276109077268e-06, "epoch": 0.4055798939359004, "percentage": 20.28, "elapsed_time": "1:11:15", "remaining_time": "4:40:08"} +{"current_steps": 1760, "total_steps": 8674, "loss": 0.4911944568157196, "lr": 1.8750916585521938e-06, "epoch": 0.40581046806548304, "percentage": 20.29, "elapsed_time": "1:11:18", "remaining_time": "4:40:06"} +{"current_steps": 1761, "total_steps": 8674, "loss": 0.49605780839920044, "lr": 1.8749070808236787e-06, "epoch": 0.4060410421950657, "percentage": 20.3, "elapsed_time": "1:11:20", "remaining_time": "4:40:03"} +{"current_steps": 1762, "total_steps": 8674, "loss": 0.5582889914512634, "lr": 1.874722375918552e-06, "epoch": 0.40627161632464837, "percentage": 20.31, "elapsed_time": "1:11:22", "remaining_time": "4:40:01"} +{"current_steps": 1763, "total_steps": 8674, "loss": 0.4867294132709503, "lr": 1.874537543863663e-06, "epoch": 0.406502190454231, "percentage": 20.33, "elapsed_time": "1:11:25", "remaining_time": "4:39:58"} +{"current_steps": 1764, "total_steps": 8674, "loss": 0.5050587058067322, "lr": 1.8743525846858787e-06, "epoch": 0.4067327645838137, "percentage": 20.34, "elapsed_time": "1:11:27", "remaining_time": "4:39:56"} +{"current_steps": 1765, "total_steps": 8674, "loss": 0.4380977749824524, "lr": 1.8741674984120852e-06, "epoch": 0.40696333871339635, "percentage": 20.35, "elapsed_time": "1:11:30", "remaining_time": "4:39:54"} +{"current_steps": 1766, "total_steps": 8674, "loss": 0.5159280300140381, "lr": 1.8739822850691865e-06, "epoch": 0.407193912842979, "percentage": 20.36, "elapsed_time": "1:11:32", "remaining_time": "4:39:51"} +{"current_steps": 1767, "total_steps": 8674, "loss": 0.6999780535697937, "lr": 1.8737969446841046e-06, "epoch": 0.4074244869725617, "percentage": 20.37, "elapsed_time": "1:11:35", "remaining_time": "4:39:49"} +{"current_steps": 1768, "total_steps": 8674, "loss": 0.5844931602478027, "lr": 1.8736114772837816e-06, "epoch": 0.40765506110214433, "percentage": 20.38, "elapsed_time": "1:11:37", "remaining_time": "4:39:46"} +{"current_steps": 1769, "total_steps": 8674, "loss": 0.5078610181808472, "lr": 1.8734258828951764e-06, "epoch": 0.407885635231727, "percentage": 20.39, "elapsed_time": "1:11:39", "remaining_time": "4:39:44"} +{"current_steps": 1770, "total_steps": 8674, "loss": 0.564793586730957, "lr": 1.8732401615452673e-06, "epoch": 0.40811620936130966, "percentage": 20.41, "elapsed_time": "1:11:42", "remaining_time": "4:39:41"} +{"current_steps": 1771, "total_steps": 8674, "loss": 0.6145100593566895, "lr": 1.8730543132610506e-06, "epoch": 0.4083467834908923, "percentage": 20.42, "elapsed_time": "1:11:44", "remaining_time": "4:39:38"} +{"current_steps": 1772, "total_steps": 8674, "loss": 0.45434027910232544, "lr": 1.8728683380695414e-06, "epoch": 0.408577357620475, "percentage": 20.43, "elapsed_time": "1:11:47", "remaining_time": "4:39:36"} +{"current_steps": 1773, "total_steps": 8674, "loss": 0.4917553961277008, "lr": 1.872682235997773e-06, "epoch": 0.40880793175005764, "percentage": 20.44, "elapsed_time": "1:11:49", "remaining_time": "4:39:34"} +{"current_steps": 1774, "total_steps": 8674, "loss": 0.5677252411842346, "lr": 1.872496007072797e-06, "epoch": 0.4090385058796403, "percentage": 20.45, "elapsed_time": "1:11:52", "remaining_time": "4:39:32"} +{"current_steps": 1775, "total_steps": 8674, "loss": 0.5516688823699951, "lr": 1.872309651321684e-06, "epoch": 0.40926908000922296, "percentage": 20.46, "elapsed_time": "1:11:54", "remaining_time": "4:39:29"} +{"current_steps": 1776, "total_steps": 8674, "loss": 0.46755337715148926, "lr": 1.8721231687715227e-06, "epoch": 0.4094996541388056, "percentage": 20.47, "elapsed_time": "1:11:56", "remaining_time": "4:39:27"} +{"current_steps": 1777, "total_steps": 8674, "loss": 0.6575521230697632, "lr": 1.8719365594494202e-06, "epoch": 0.4097302282683883, "percentage": 20.49, "elapsed_time": "1:11:59", "remaining_time": "4:39:24"} +{"current_steps": 1778, "total_steps": 8674, "loss": 0.6088716983795166, "lr": 1.8717498233825019e-06, "epoch": 0.40996080239797095, "percentage": 20.5, "elapsed_time": "1:12:01", "remaining_time": "4:39:21"} +{"current_steps": 1779, "total_steps": 8674, "loss": 0.39476478099823, "lr": 1.8715629605979118e-06, "epoch": 0.4101913765275536, "percentage": 20.51, "elapsed_time": "1:12:04", "remaining_time": "4:39:19"} +{"current_steps": 1780, "total_steps": 8674, "loss": 0.4893898665904999, "lr": 1.8713759711228123e-06, "epoch": 0.41042195065713627, "percentage": 20.52, "elapsed_time": "1:12:06", "remaining_time": "4:39:16"} +{"current_steps": 1781, "total_steps": 8674, "loss": 0.5077828764915466, "lr": 1.8711888549843842e-06, "epoch": 0.41065252478671893, "percentage": 20.53, "elapsed_time": "1:12:09", "remaining_time": "4:39:15"} +{"current_steps": 1782, "total_steps": 8674, "loss": 0.5212582349777222, "lr": 1.8710016122098269e-06, "epoch": 0.4108830989163016, "percentage": 20.54, "elapsed_time": "1:12:11", "remaining_time": "4:39:13"} +{"current_steps": 1783, "total_steps": 8674, "loss": 0.5135321617126465, "lr": 1.870814242826358e-06, "epoch": 0.41111367304588425, "percentage": 20.56, "elapsed_time": "1:12:14", "remaining_time": "4:39:10"} +{"current_steps": 1784, "total_steps": 8674, "loss": 0.5398930311203003, "lr": 1.8706267468612133e-06, "epoch": 0.4113442471754669, "percentage": 20.57, "elapsed_time": "1:12:16", "remaining_time": "4:39:07"} +{"current_steps": 1785, "total_steps": 8674, "loss": 0.49205562472343445, "lr": 1.8704391243416477e-06, "epoch": 0.4115748213050496, "percentage": 20.58, "elapsed_time": "1:12:18", "remaining_time": "4:39:05"} +{"current_steps": 1786, "total_steps": 8674, "loss": 0.5145718455314636, "lr": 1.8702513752949335e-06, "epoch": 0.41180539543463224, "percentage": 20.59, "elapsed_time": "1:12:21", "remaining_time": "4:39:02"} +{"current_steps": 1787, "total_steps": 8674, "loss": 0.4868374466896057, "lr": 1.8700634997483622e-06, "epoch": 0.4120359695642149, "percentage": 20.6, "elapsed_time": "1:12:23", "remaining_time": "4:39:00"} +{"current_steps": 1788, "total_steps": 8674, "loss": 0.5409311652183533, "lr": 1.8698754977292435e-06, "epoch": 0.41226654369379756, "percentage": 20.61, "elapsed_time": "1:12:25", "remaining_time": "4:38:57"} +{"current_steps": 1789, "total_steps": 8674, "loss": 0.5476658344268799, "lr": 1.8696873692649052e-06, "epoch": 0.4124971178233802, "percentage": 20.62, "elapsed_time": "1:12:28", "remaining_time": "4:38:54"} +{"current_steps": 1790, "total_steps": 8674, "loss": 0.5545511245727539, "lr": 1.8694991143826937e-06, "epoch": 0.4127276919529629, "percentage": 20.64, "elapsed_time": "1:12:31", "remaining_time": "4:38:53"} +{"current_steps": 1791, "total_steps": 8674, "loss": 0.5479267835617065, "lr": 1.869310733109974e-06, "epoch": 0.41295826608254554, "percentage": 20.65, "elapsed_time": "1:12:33", "remaining_time": "4:38:50"} +{"current_steps": 1792, "total_steps": 8674, "loss": 0.5261585712432861, "lr": 1.8691222254741289e-06, "epoch": 0.4131888402121282, "percentage": 20.66, "elapsed_time": "1:12:35", "remaining_time": "4:38:48"} +{"current_steps": 1793, "total_steps": 8674, "loss": 0.5478091239929199, "lr": 1.8689335915025599e-06, "epoch": 0.41341941434171087, "percentage": 20.67, "elapsed_time": "1:12:38", "remaining_time": "4:38:45"} +{"current_steps": 1794, "total_steps": 8674, "loss": 0.6739054322242737, "lr": 1.8687448312226872e-06, "epoch": 0.41364998847129353, "percentage": 20.68, "elapsed_time": "1:12:40", "remaining_time": "4:38:43"} +{"current_steps": 1795, "total_steps": 8674, "loss": 0.613865315914154, "lr": 1.8685559446619487e-06, "epoch": 0.4138805626008762, "percentage": 20.69, "elapsed_time": "1:12:42", "remaining_time": "4:38:40"} +{"current_steps": 1796, "total_steps": 8674, "loss": 0.3936721384525299, "lr": 1.8683669318478012e-06, "epoch": 0.41411113673045885, "percentage": 20.71, "elapsed_time": "1:12:45", "remaining_time": "4:38:37"} +{"current_steps": 1797, "total_steps": 8674, "loss": 0.5508556365966797, "lr": 1.8681777928077197e-06, "epoch": 0.4143417108600415, "percentage": 20.72, "elapsed_time": "1:12:47", "remaining_time": "4:38:34"} +{"current_steps": 1798, "total_steps": 8674, "loss": 0.47734567523002625, "lr": 1.867988527569197e-06, "epoch": 0.4145722849896242, "percentage": 20.73, "elapsed_time": "1:12:50", "remaining_time": "4:38:33"} +{"current_steps": 1799, "total_steps": 8674, "loss": 0.46847039461135864, "lr": 1.8677991361597449e-06, "epoch": 0.41480285911920683, "percentage": 20.74, "elapsed_time": "1:12:52", "remaining_time": "4:38:30"} +{"current_steps": 1800, "total_steps": 8674, "loss": 0.5202786326408386, "lr": 1.8676096186068937e-06, "epoch": 0.4150334332487895, "percentage": 20.75, "elapsed_time": "1:12:55", "remaining_time": "4:38:27"} +{"current_steps": 1801, "total_steps": 8674, "loss": 0.5144700407981873, "lr": 1.8674199749381914e-06, "epoch": 0.41526400737837216, "percentage": 20.76, "elapsed_time": "1:12:58", "remaining_time": "4:38:30"} +{"current_steps": 1802, "total_steps": 8674, "loss": 0.4394092559814453, "lr": 1.8672302051812048e-06, "epoch": 0.4154945815079548, "percentage": 20.77, "elapsed_time": "1:13:01", "remaining_time": "4:38:28"} +{"current_steps": 1803, "total_steps": 8674, "loss": 0.5017338991165161, "lr": 1.8670403093635185e-06, "epoch": 0.4157251556375375, "percentage": 20.79, "elapsed_time": "1:13:03", "remaining_time": "4:38:25"} +{"current_steps": 1804, "total_steps": 8674, "loss": 0.409381628036499, "lr": 1.8668502875127366e-06, "epoch": 0.41595572976712014, "percentage": 20.8, "elapsed_time": "1:13:05", "remaining_time": "4:38:22"} +{"current_steps": 1805, "total_steps": 8674, "loss": 0.5193957090377808, "lr": 1.8666601396564795e-06, "epoch": 0.4161863038967028, "percentage": 20.81, "elapsed_time": "1:13:08", "remaining_time": "4:38:20"} +{"current_steps": 1806, "total_steps": 8674, "loss": 0.5933586359024048, "lr": 1.8664698658223882e-06, "epoch": 0.41641687802628546, "percentage": 20.82, "elapsed_time": "1:13:11", "remaining_time": "4:38:19"} +{"current_steps": 1807, "total_steps": 8674, "loss": 0.5283366441726685, "lr": 1.8662794660381204e-06, "epoch": 0.4166474521558681, "percentage": 20.83, "elapsed_time": "1:13:13", "remaining_time": "4:38:16"} +{"current_steps": 1808, "total_steps": 8674, "loss": 0.5063748359680176, "lr": 1.8660889403313526e-06, "epoch": 0.4168780262854508, "percentage": 20.84, "elapsed_time": "1:13:16", "remaining_time": "4:38:14"} +{"current_steps": 1809, "total_steps": 8674, "loss": 0.6386028528213501, "lr": 1.86589828872978e-06, "epoch": 0.41710860041503345, "percentage": 20.86, "elapsed_time": "1:13:18", "remaining_time": "4:38:11"} +{"current_steps": 1810, "total_steps": 8674, "loss": 0.4618440270423889, "lr": 1.8657075112611153e-06, "epoch": 0.4173391745446161, "percentage": 20.87, "elapsed_time": "1:13:20", "remaining_time": "4:38:09"} +{"current_steps": 1811, "total_steps": 8674, "loss": 0.4523535966873169, "lr": 1.8655166079530903e-06, "epoch": 0.41756974867419877, "percentage": 20.88, "elapsed_time": "1:13:23", "remaining_time": "4:38:06"} +{"current_steps": 1812, "total_steps": 8674, "loss": 0.501311719417572, "lr": 1.8653255788334544e-06, "epoch": 0.41780032280378143, "percentage": 20.89, "elapsed_time": "1:13:25", "remaining_time": "4:38:04"} +{"current_steps": 1813, "total_steps": 8674, "loss": 0.5504614114761353, "lr": 1.865134423929976e-06, "epoch": 0.4180308969333641, "percentage": 20.9, "elapsed_time": "1:13:28", "remaining_time": "4:38:01"} +{"current_steps": 1814, "total_steps": 8674, "loss": 0.44275063276290894, "lr": 1.864943143270441e-06, "epoch": 0.41826147106294675, "percentage": 20.91, "elapsed_time": "1:13:30", "remaining_time": "4:37:58"} +{"current_steps": 1815, "total_steps": 8674, "loss": 0.5628173351287842, "lr": 1.8647517368826545e-06, "epoch": 0.4184920451925294, "percentage": 20.92, "elapsed_time": "1:13:33", "remaining_time": "4:37:57"} +{"current_steps": 1816, "total_steps": 8674, "loss": 0.489221453666687, "lr": 1.864560204794439e-06, "epoch": 0.4187226193221121, "percentage": 20.94, "elapsed_time": "1:13:35", "remaining_time": "4:37:54"} +{"current_steps": 1817, "total_steps": 8674, "loss": 0.5440137386322021, "lr": 1.8643685470336355e-06, "epoch": 0.41895319345169474, "percentage": 20.95, "elapsed_time": "1:13:37", "remaining_time": "4:37:51"} +{"current_steps": 1818, "total_steps": 8674, "loss": 0.4518952965736389, "lr": 1.8641767636281035e-06, "epoch": 0.4191837675812774, "percentage": 20.96, "elapsed_time": "1:13:40", "remaining_time": "4:37:49"} +{"current_steps": 1819, "total_steps": 8674, "loss": 0.591090977191925, "lr": 1.8639848546057209e-06, "epoch": 0.41941434171086006, "percentage": 20.97, "elapsed_time": "1:13:42", "remaining_time": "4:37:46"} +{"current_steps": 1820, "total_steps": 8674, "loss": 0.5622411966323853, "lr": 1.8637928199943836e-06, "epoch": 0.4196449158404427, "percentage": 20.98, "elapsed_time": "1:13:44", "remaining_time": "4:37:44"} +{"current_steps": 1821, "total_steps": 8674, "loss": 0.5086779594421387, "lr": 1.8636006598220052e-06, "epoch": 0.4198754899700254, "percentage": 20.99, "elapsed_time": "1:13:47", "remaining_time": "4:37:41"} +{"current_steps": 1822, "total_steps": 8674, "loss": 0.5055384635925293, "lr": 1.8634083741165188e-06, "epoch": 0.42010606409960805, "percentage": 21.01, "elapsed_time": "1:13:49", "remaining_time": "4:37:38"} +{"current_steps": 1823, "total_steps": 8674, "loss": 0.5076277852058411, "lr": 1.863215962905875e-06, "epoch": 0.4203366382291907, "percentage": 21.02, "elapsed_time": "1:13:52", "remaining_time": "4:37:37"} +{"current_steps": 1824, "total_steps": 8674, "loss": 0.5378403067588806, "lr": 1.8630234262180424e-06, "epoch": 0.42056721235877337, "percentage": 21.03, "elapsed_time": "1:13:54", "remaining_time": "4:37:34"} +{"current_steps": 1825, "total_steps": 8674, "loss": 0.6133165955543518, "lr": 1.8628307640810083e-06, "epoch": 0.42079778648835603, "percentage": 21.04, "elapsed_time": "1:13:57", "remaining_time": "4:37:31"} +{"current_steps": 1826, "total_steps": 8674, "loss": 0.4978156089782715, "lr": 1.8626379765227782e-06, "epoch": 0.4210283606179387, "percentage": 21.05, "elapsed_time": "1:13:59", "remaining_time": "4:37:29"} +{"current_steps": 1827, "total_steps": 8674, "loss": 0.43159037828445435, "lr": 1.8624450635713759e-06, "epoch": 0.42125893474752135, "percentage": 21.06, "elapsed_time": "1:14:01", "remaining_time": "4:37:26"} +{"current_steps": 1828, "total_steps": 8674, "loss": 0.48821642994880676, "lr": 1.8622520252548424e-06, "epoch": 0.421489508877104, "percentage": 21.07, "elapsed_time": "1:14:04", "remaining_time": "4:37:23"} +{"current_steps": 1829, "total_steps": 8674, "loss": 0.4666696786880493, "lr": 1.8620588616012387e-06, "epoch": 0.4217200830066867, "percentage": 21.09, "elapsed_time": "1:14:06", "remaining_time": "4:37:21"} +{"current_steps": 1830, "total_steps": 8674, "loss": 0.5278067588806152, "lr": 1.8618655726386425e-06, "epoch": 0.42195065713626934, "percentage": 21.1, "elapsed_time": "1:14:08", "remaining_time": "4:37:18"} +{"current_steps": 1831, "total_steps": 8674, "loss": 0.4357749819755554, "lr": 1.8616721583951512e-06, "epoch": 0.422181231265852, "percentage": 21.11, "elapsed_time": "1:14:11", "remaining_time": "4:37:15"} +{"current_steps": 1832, "total_steps": 8674, "loss": 0.5388439893722534, "lr": 1.8614786188988782e-06, "epoch": 0.42241180539543466, "percentage": 21.12, "elapsed_time": "1:14:13", "remaining_time": "4:37:14"} +{"current_steps": 1833, "total_steps": 8674, "loss": 0.5443956255912781, "lr": 1.8612849541779573e-06, "epoch": 0.4226423795250173, "percentage": 21.13, "elapsed_time": "1:14:16", "remaining_time": "4:37:11"} +{"current_steps": 1834, "total_steps": 8674, "loss": 0.5614160895347595, "lr": 1.86109116426054e-06, "epoch": 0.4228729536546, "percentage": 21.14, "elapsed_time": "1:14:18", "remaining_time": "4:37:09"} +{"current_steps": 1835, "total_steps": 8674, "loss": 0.45780229568481445, "lr": 1.8608972491747943e-06, "epoch": 0.4231035277841826, "percentage": 21.16, "elapsed_time": "1:14:21", "remaining_time": "4:37:06"} +{"current_steps": 1836, "total_steps": 8674, "loss": 0.6354867219924927, "lr": 1.8607032089489088e-06, "epoch": 0.42333410191376525, "percentage": 21.17, "elapsed_time": "1:14:23", "remaining_time": "4:37:03"} +{"current_steps": 1837, "total_steps": 8674, "loss": 0.5172948241233826, "lr": 1.860509043611089e-06, "epoch": 0.4235646760433479, "percentage": 21.18, "elapsed_time": "1:14:25", "remaining_time": "4:37:01"} +{"current_steps": 1838, "total_steps": 8674, "loss": 0.4353157877922058, "lr": 1.8603147531895586e-06, "epoch": 0.42379525017293057, "percentage": 21.19, "elapsed_time": "1:14:28", "remaining_time": "4:36:58"} +{"current_steps": 1839, "total_steps": 8674, "loss": 0.4971036911010742, "lr": 1.8601203377125599e-06, "epoch": 0.42402582430251323, "percentage": 21.2, "elapsed_time": "1:14:30", "remaining_time": "4:36:56"} +{"current_steps": 1840, "total_steps": 8674, "loss": 0.5037736296653748, "lr": 1.859925797208353e-06, "epoch": 0.4242563984320959, "percentage": 21.21, "elapsed_time": "1:14:33", "remaining_time": "4:36:54"} +{"current_steps": 1841, "total_steps": 8674, "loss": 0.4480808675289154, "lr": 1.8597311317052165e-06, "epoch": 0.42448697256167855, "percentage": 21.22, "elapsed_time": "1:14:35", "remaining_time": "4:36:51"} +{"current_steps": 1842, "total_steps": 8674, "loss": 0.5102680325508118, "lr": 1.8595363412314468e-06, "epoch": 0.4247175466912612, "percentage": 21.24, "elapsed_time": "1:14:38", "remaining_time": "4:36:49"} +{"current_steps": 1843, "total_steps": 8674, "loss": 0.5979090929031372, "lr": 1.8593414258153585e-06, "epoch": 0.4249481208208439, "percentage": 21.25, "elapsed_time": "1:14:40", "remaining_time": "4:36:46"} +{"current_steps": 1844, "total_steps": 8674, "loss": 0.4616047143936157, "lr": 1.8591463854852854e-06, "epoch": 0.42517869495042654, "percentage": 21.26, "elapsed_time": "1:14:42", "remaining_time": "4:36:44"} +{"current_steps": 1845, "total_steps": 8674, "loss": 0.4893925189971924, "lr": 1.8589512202695773e-06, "epoch": 0.4254092690800092, "percentage": 21.27, "elapsed_time": "1:14:45", "remaining_time": "4:36:41"} +{"current_steps": 1846, "total_steps": 8674, "loss": 0.49619823694229126, "lr": 1.8587559301966045e-06, "epoch": 0.42563984320959186, "percentage": 21.28, "elapsed_time": "1:14:47", "remaining_time": "4:36:38"} +{"current_steps": 1847, "total_steps": 8674, "loss": 0.5205181837081909, "lr": 1.858560515294754e-06, "epoch": 0.4258704173391745, "percentage": 21.29, "elapsed_time": "1:14:49", "remaining_time": "4:36:36"} +{"current_steps": 1848, "total_steps": 8674, "loss": 0.5910394191741943, "lr": 1.8583649755924315e-06, "epoch": 0.4261009914687572, "percentage": 21.31, "elapsed_time": "1:14:52", "remaining_time": "4:36:34"} +{"current_steps": 1849, "total_steps": 8674, "loss": 0.4916709363460541, "lr": 1.8581693111180603e-06, "epoch": 0.42633156559833985, "percentage": 21.32, "elapsed_time": "1:14:54", "remaining_time": "4:36:31"} +{"current_steps": 1850, "total_steps": 8674, "loss": 0.5728994011878967, "lr": 1.8579735219000824e-06, "epoch": 0.4265621397279225, "percentage": 21.33, "elapsed_time": "1:14:57", "remaining_time": "4:36:28"} +{"current_steps": 1851, "total_steps": 8674, "loss": 0.49620527029037476, "lr": 1.857777607966958e-06, "epoch": 0.42679271385750517, "percentage": 21.34, "elapsed_time": "1:14:59", "remaining_time": "4:36:26"} +{"current_steps": 1852, "total_steps": 8674, "loss": 0.5100233554840088, "lr": 1.8575815693471649e-06, "epoch": 0.42702328798708783, "percentage": 21.35, "elapsed_time": "1:15:02", "remaining_time": "4:36:23"} +{"current_steps": 1853, "total_steps": 8674, "loss": 0.48981544375419617, "lr": 1.8573854060691994e-06, "epoch": 0.4272538621166705, "percentage": 21.36, "elapsed_time": "1:15:04", "remaining_time": "4:36:20"} +{"current_steps": 1854, "total_steps": 8674, "loss": 0.44190293550491333, "lr": 1.8571891181615755e-06, "epoch": 0.42748443624625315, "percentage": 21.37, "elapsed_time": "1:15:06", "remaining_time": "4:36:18"} +{"current_steps": 1855, "total_steps": 8674, "loss": 0.42867448925971985, "lr": 1.8569927056528264e-06, "epoch": 0.4277150103758358, "percentage": 21.39, "elapsed_time": "1:15:09", "remaining_time": "4:36:16"} +{"current_steps": 1856, "total_steps": 8674, "loss": 0.4873782694339752, "lr": 1.8567961685715016e-06, "epoch": 0.4279455845054185, "percentage": 21.4, "elapsed_time": "1:15:11", "remaining_time": "4:36:13"} +{"current_steps": 1857, "total_steps": 8674, "loss": 0.4985312819480896, "lr": 1.8565995069461706e-06, "epoch": 0.42817615863500114, "percentage": 21.41, "elapsed_time": "1:15:14", "remaining_time": "4:36:12"} +{"current_steps": 1858, "total_steps": 8674, "loss": 0.5525496006011963, "lr": 1.85640272080542e-06, "epoch": 0.4284067327645838, "percentage": 21.42, "elapsed_time": "1:15:16", "remaining_time": "4:36:09"} +{"current_steps": 1859, "total_steps": 8674, "loss": 0.5645877122879028, "lr": 1.8562058101778547e-06, "epoch": 0.42863730689416646, "percentage": 21.43, "elapsed_time": "1:15:19", "remaining_time": "4:36:07"} +{"current_steps": 1860, "total_steps": 8674, "loss": 0.4304332137107849, "lr": 1.856008775092097e-06, "epoch": 0.4288678810237491, "percentage": 21.44, "elapsed_time": "1:15:21", "remaining_time": "4:36:04"} +{"current_steps": 1861, "total_steps": 8674, "loss": 0.4970170259475708, "lr": 1.8558116155767888e-06, "epoch": 0.4290984551533318, "percentage": 21.45, "elapsed_time": "1:15:23", "remaining_time": "4:36:01"} +{"current_steps": 1862, "total_steps": 8674, "loss": 0.5718003511428833, "lr": 1.8556143316605888e-06, "epoch": 0.42932902928291444, "percentage": 21.47, "elapsed_time": "1:15:26", "remaining_time": "4:35:59"} +{"current_steps": 1863, "total_steps": 8674, "loss": 0.4445415139198303, "lr": 1.8554169233721741e-06, "epoch": 0.4295596034124971, "percentage": 21.48, "elapsed_time": "1:15:28", "remaining_time": "4:35:56"} +{"current_steps": 1864, "total_steps": 8674, "loss": 0.5297178626060486, "lr": 1.8552193907402404e-06, "epoch": 0.42979017754207977, "percentage": 21.49, "elapsed_time": "1:15:31", "remaining_time": "4:35:53"} +{"current_steps": 1865, "total_steps": 8674, "loss": 0.4564483165740967, "lr": 1.8550217337935013e-06, "epoch": 0.4300207516716624, "percentage": 21.5, "elapsed_time": "1:15:33", "remaining_time": "4:35:52"} +{"current_steps": 1866, "total_steps": 8674, "loss": 0.4789202809333801, "lr": 1.8548239525606872e-06, "epoch": 0.4302513258012451, "percentage": 21.51, "elapsed_time": "1:15:36", "remaining_time": "4:35:50"} +{"current_steps": 1867, "total_steps": 8674, "loss": 0.5240263938903809, "lr": 1.8546260470705485e-06, "epoch": 0.43048189993082775, "percentage": 21.52, "elapsed_time": "1:15:38", "remaining_time": "4:35:47"} +{"current_steps": 1868, "total_steps": 8674, "loss": 0.4190866947174072, "lr": 1.8544280173518523e-06, "epoch": 0.4307124740604104, "percentage": 21.54, "elapsed_time": "1:15:40", "remaining_time": "4:35:44"} +{"current_steps": 1869, "total_steps": 8674, "loss": 0.502301812171936, "lr": 1.8542298634333844e-06, "epoch": 0.43094304818999307, "percentage": 21.55, "elapsed_time": "1:15:43", "remaining_time": "4:35:42"} +{"current_steps": 1870, "total_steps": 8674, "loss": 0.5752545595169067, "lr": 1.8540315853439488e-06, "epoch": 0.43117362231957573, "percentage": 21.56, "elapsed_time": "1:15:45", "remaining_time": "4:35:39"} +{"current_steps": 1871, "total_steps": 8674, "loss": 0.44959962368011475, "lr": 1.8538331831123667e-06, "epoch": 0.4314041964491584, "percentage": 21.57, "elapsed_time": "1:15:48", "remaining_time": "4:35:36"} +{"current_steps": 1872, "total_steps": 8674, "loss": 0.5320106148719788, "lr": 1.8536346567674782e-06, "epoch": 0.43163477057874106, "percentage": 21.58, "elapsed_time": "1:15:50", "remaining_time": "4:35:34"} +{"current_steps": 1873, "total_steps": 8674, "loss": 0.5981979966163635, "lr": 1.8534360063381407e-06, "epoch": 0.4318653447083237, "percentage": 21.59, "elapsed_time": "1:15:52", "remaining_time": "4:35:31"} +{"current_steps": 1874, "total_steps": 8674, "loss": 0.5567579865455627, "lr": 1.8532372318532306e-06, "epoch": 0.4320959188379064, "percentage": 21.6, "elapsed_time": "1:15:55", "remaining_time": "4:35:29"} +{"current_steps": 1875, "total_steps": 8674, "loss": 0.5604764223098755, "lr": 1.8530383333416415e-06, "epoch": 0.43232649296748904, "percentage": 21.62, "elapsed_time": "1:15:57", "remaining_time": "4:35:26"} +{"current_steps": 1876, "total_steps": 8674, "loss": 0.5410721302032471, "lr": 1.8528393108322852e-06, "epoch": 0.4325570670970717, "percentage": 21.63, "elapsed_time": "1:16:00", "remaining_time": "4:35:24"} +{"current_steps": 1877, "total_steps": 8674, "loss": 0.5417271852493286, "lr": 1.852640164354092e-06, "epoch": 0.43278764122665436, "percentage": 21.64, "elapsed_time": "1:16:02", "remaining_time": "4:35:21"} +{"current_steps": 1878, "total_steps": 8674, "loss": 0.5831471681594849, "lr": 1.8524408939360096e-06, "epoch": 0.433018215356237, "percentage": 21.65, "elapsed_time": "1:16:04", "remaining_time": "4:35:19"} +{"current_steps": 1879, "total_steps": 8674, "loss": 0.45030760765075684, "lr": 1.8522414996070045e-06, "epoch": 0.4332487894858197, "percentage": 21.66, "elapsed_time": "1:16:07", "remaining_time": "4:35:16"} +{"current_steps": 1880, "total_steps": 8674, "loss": 0.44657936692237854, "lr": 1.8520419813960596e-06, "epoch": 0.43347936361540235, "percentage": 21.67, "elapsed_time": "1:16:09", "remaining_time": "4:35:13"} +{"current_steps": 1881, "total_steps": 8674, "loss": 0.5472795963287354, "lr": 1.851842339332178e-06, "epoch": 0.433709937744985, "percentage": 21.69, "elapsed_time": "1:16:12", "remaining_time": "4:35:11"} +{"current_steps": 1882, "total_steps": 8674, "loss": 0.4883359968662262, "lr": 1.8516425734443786e-06, "epoch": 0.43394051187456767, "percentage": 21.7, "elapsed_time": "1:16:14", "remaining_time": "4:35:09"} +{"current_steps": 1883, "total_steps": 8674, "loss": 0.5172675848007202, "lr": 1.8514426837617006e-06, "epoch": 0.43417108600415033, "percentage": 21.71, "elapsed_time": "1:16:17", "remaining_time": "4:35:07"} +{"current_steps": 1884, "total_steps": 8674, "loss": 0.5253418684005737, "lr": 1.851242670313199e-06, "epoch": 0.434401660133733, "percentage": 21.72, "elapsed_time": "1:16:19", "remaining_time": "4:35:04"} +{"current_steps": 1885, "total_steps": 8674, "loss": 0.4684918522834778, "lr": 1.8510425331279485e-06, "epoch": 0.43463223426331565, "percentage": 21.73, "elapsed_time": "1:16:21", "remaining_time": "4:35:01"} +{"current_steps": 1886, "total_steps": 8674, "loss": 0.522485077381134, "lr": 1.8508422722350404e-06, "epoch": 0.4348628083928983, "percentage": 21.74, "elapsed_time": "1:16:24", "remaining_time": "4:34:59"} +{"current_steps": 1887, "total_steps": 8674, "loss": 0.5123787522315979, "lr": 1.8506418876635852e-06, "epoch": 0.435093382522481, "percentage": 21.75, "elapsed_time": "1:16:26", "remaining_time": "4:34:56"} +{"current_steps": 1888, "total_steps": 8674, "loss": 0.5195976495742798, "lr": 1.8504413794427106e-06, "epoch": 0.43532395665206364, "percentage": 21.77, "elapsed_time": "1:16:28", "remaining_time": "4:34:53"} +{"current_steps": 1889, "total_steps": 8674, "loss": 0.48394906520843506, "lr": 1.8502407476015626e-06, "epoch": 0.4355545307816463, "percentage": 21.78, "elapsed_time": "1:16:31", "remaining_time": "4:34:51"} +{"current_steps": 1890, "total_steps": 8674, "loss": 0.5083323121070862, "lr": 1.850039992169305e-06, "epoch": 0.43578510491122896, "percentage": 21.79, "elapsed_time": "1:16:33", "remaining_time": "4:34:49"} +{"current_steps": 1891, "total_steps": 8674, "loss": 0.5303651094436646, "lr": 1.8498391131751196e-06, "epoch": 0.4360156790408116, "percentage": 21.8, "elapsed_time": "1:16:36", "remaining_time": "4:34:46"} +{"current_steps": 1892, "total_steps": 8674, "loss": 0.49429047107696533, "lr": 1.8496381106482062e-06, "epoch": 0.4362462531703943, "percentage": 21.81, "elapsed_time": "1:16:38", "remaining_time": "4:34:44"} +{"current_steps": 1893, "total_steps": 8674, "loss": 0.5263347625732422, "lr": 1.8494369846177826e-06, "epoch": 0.43647682729997694, "percentage": 21.82, "elapsed_time": "1:16:41", "remaining_time": "4:34:41"} +{"current_steps": 1894, "total_steps": 8674, "loss": 0.5332654714584351, "lr": 1.8492357351130848e-06, "epoch": 0.4367074014295596, "percentage": 21.84, "elapsed_time": "1:16:43", "remaining_time": "4:34:39"} +{"current_steps": 1895, "total_steps": 8674, "loss": 0.5598278045654297, "lr": 1.8490343621633657e-06, "epoch": 0.43693797555914227, "percentage": 21.85, "elapsed_time": "1:16:45", "remaining_time": "4:34:36"} +{"current_steps": 1896, "total_steps": 8674, "loss": 0.4026976227760315, "lr": 1.8488328657978975e-06, "epoch": 0.43716854968872493, "percentage": 21.86, "elapsed_time": "1:16:48", "remaining_time": "4:34:34"} +{"current_steps": 1897, "total_steps": 8674, "loss": 0.4277791380882263, "lr": 1.8486312460459698e-06, "epoch": 0.4373991238183076, "percentage": 21.87, "elapsed_time": "1:16:50", "remaining_time": "4:34:31"} +{"current_steps": 1898, "total_steps": 8674, "loss": 0.49567973613739014, "lr": 1.8484295029368896e-06, "epoch": 0.43762969794789025, "percentage": 21.88, "elapsed_time": "1:16:53", "remaining_time": "4:34:29"} +{"current_steps": 1899, "total_steps": 8674, "loss": 0.4659258723258972, "lr": 1.8482276364999828e-06, "epoch": 0.4378602720774729, "percentage": 21.89, "elapsed_time": "1:16:55", "remaining_time": "4:34:27"} +{"current_steps": 1900, "total_steps": 8674, "loss": 0.4950314164161682, "lr": 1.8480256467645923e-06, "epoch": 0.4380908462070556, "percentage": 21.9, "elapsed_time": "1:16:58", "remaining_time": "4:34:24"} +{"current_steps": 1901, "total_steps": 8674, "loss": 0.5584981441497803, "lr": 1.8478235337600796e-06, "epoch": 0.43832142033663823, "percentage": 21.92, "elapsed_time": "1:17:01", "remaining_time": "4:34:26"} +{"current_steps": 1902, "total_steps": 8674, "loss": 0.6322404146194458, "lr": 1.847621297515824e-06, "epoch": 0.4385519944662209, "percentage": 21.93, "elapsed_time": "1:17:04", "remaining_time": "4:34:23"} +{"current_steps": 1903, "total_steps": 8674, "loss": 0.49535471200942993, "lr": 1.8474189380612225e-06, "epoch": 0.43878256859580356, "percentage": 21.94, "elapsed_time": "1:17:06", "remaining_time": "4:34:21"} +{"current_steps": 1904, "total_steps": 8674, "loss": 0.40703707933425903, "lr": 1.8472164554256897e-06, "epoch": 0.4390131427253862, "percentage": 21.95, "elapsed_time": "1:17:08", "remaining_time": "4:34:18"} +{"current_steps": 1905, "total_steps": 8674, "loss": 0.4540821313858032, "lr": 1.8470138496386588e-06, "epoch": 0.4392437168549689, "percentage": 21.96, "elapsed_time": "1:17:11", "remaining_time": "4:34:16"} +{"current_steps": 1906, "total_steps": 8674, "loss": 0.45964252948760986, "lr": 1.846811120729581e-06, "epoch": 0.43947429098455154, "percentage": 21.97, "elapsed_time": "1:17:13", "remaining_time": "4:34:13"} +{"current_steps": 1907, "total_steps": 8674, "loss": 0.4604472517967224, "lr": 1.8466082687279244e-06, "epoch": 0.4397048651141342, "percentage": 21.99, "elapsed_time": "1:17:16", "remaining_time": "4:34:11"} +{"current_steps": 1908, "total_steps": 8674, "loss": 0.44585052132606506, "lr": 1.8464052936631758e-06, "epoch": 0.43993543924371686, "percentage": 22.0, "elapsed_time": "1:17:18", "remaining_time": "4:34:08"} +{"current_steps": 1909, "total_steps": 8674, "loss": 0.43862414360046387, "lr": 1.8462021955648397e-06, "epoch": 0.4401660133732995, "percentage": 22.01, "elapsed_time": "1:17:20", "remaining_time": "4:34:06"} +{"current_steps": 1910, "total_steps": 8674, "loss": 0.5148224234580994, "lr": 1.8459989744624386e-06, "epoch": 0.4403965875028822, "percentage": 22.02, "elapsed_time": "1:17:23", "remaining_time": "4:34:03"} +{"current_steps": 1911, "total_steps": 8674, "loss": 0.6201390027999878, "lr": 1.8457956303855124e-06, "epoch": 0.44062716163246485, "percentage": 22.03, "elapsed_time": "1:17:25", "remaining_time": "4:34:01"} +{"current_steps": 1912, "total_steps": 8674, "loss": 0.5828813314437866, "lr": 1.8455921633636196e-06, "epoch": 0.4408577357620475, "percentage": 22.04, "elapsed_time": "1:17:28", "remaining_time": "4:33:58"} +{"current_steps": 1913, "total_steps": 8674, "loss": 0.5491579174995422, "lr": 1.845388573426336e-06, "epoch": 0.44108830989163017, "percentage": 22.05, "elapsed_time": "1:17:30", "remaining_time": "4:33:55"} +{"current_steps": 1914, "total_steps": 8674, "loss": 0.4204079508781433, "lr": 1.8451848606032554e-06, "epoch": 0.44131888402121283, "percentage": 22.07, "elapsed_time": "1:17:32", "remaining_time": "4:33:53"} +{"current_steps": 1915, "total_steps": 8674, "loss": 0.5734649300575256, "lr": 1.8449810249239898e-06, "epoch": 0.4415494581507955, "percentage": 22.08, "elapsed_time": "1:17:35", "remaining_time": "4:33:50"} +{"current_steps": 1916, "total_steps": 8674, "loss": 0.48931679129600525, "lr": 1.8447770664181684e-06, "epoch": 0.44178003228037815, "percentage": 22.09, "elapsed_time": "1:17:37", "remaining_time": "4:33:49"} +{"current_steps": 1917, "total_steps": 8674, "loss": 0.5206375122070312, "lr": 1.8445729851154392e-06, "epoch": 0.4420106064099608, "percentage": 22.1, "elapsed_time": "1:17:40", "remaining_time": "4:33:46"} +{"current_steps": 1918, "total_steps": 8674, "loss": 0.4916420578956604, "lr": 1.8443687810454666e-06, "epoch": 0.4422411805395435, "percentage": 22.11, "elapsed_time": "1:17:42", "remaining_time": "4:33:43"} +{"current_steps": 1919, "total_steps": 8674, "loss": 0.5021753311157227, "lr": 1.8441644542379348e-06, "epoch": 0.44247175466912614, "percentage": 22.12, "elapsed_time": "1:17:45", "remaining_time": "4:33:41"} +{"current_steps": 1920, "total_steps": 8674, "loss": 0.4615249037742615, "lr": 1.8439600047225441e-06, "epoch": 0.4427023287987088, "percentage": 22.14, "elapsed_time": "1:17:47", "remaining_time": "4:33:38"} +{"current_steps": 1921, "total_steps": 8674, "loss": 0.4849514365196228, "lr": 1.8437554325290133e-06, "epoch": 0.44293290292829146, "percentage": 22.15, "elapsed_time": "1:17:49", "remaining_time": "4:33:36"} +{"current_steps": 1922, "total_steps": 8674, "loss": 0.5872727632522583, "lr": 1.843550737687079e-06, "epoch": 0.4431634770578741, "percentage": 22.16, "elapsed_time": "1:17:52", "remaining_time": "4:33:33"} +{"current_steps": 1923, "total_steps": 8674, "loss": 0.48469966650009155, "lr": 1.843345920226496e-06, "epoch": 0.4433940511874568, "percentage": 22.17, "elapsed_time": "1:17:54", "remaining_time": "4:33:31"} +{"current_steps": 1924, "total_steps": 8674, "loss": 0.45931774377822876, "lr": 1.8431409801770364e-06, "epoch": 0.44362462531703944, "percentage": 22.18, "elapsed_time": "1:17:57", "remaining_time": "4:33:29"} +{"current_steps": 1925, "total_steps": 8674, "loss": 0.5138596296310425, "lr": 1.8429359175684907e-06, "epoch": 0.4438551994466221, "percentage": 22.19, "elapsed_time": "1:17:59", "remaining_time": "4:33:26"} +{"current_steps": 1926, "total_steps": 8674, "loss": 0.5586874485015869, "lr": 1.8427307324306661e-06, "epoch": 0.44408577357620477, "percentage": 22.2, "elapsed_time": "1:18:02", "remaining_time": "4:33:24"} +{"current_steps": 1927, "total_steps": 8674, "loss": 0.5373901724815369, "lr": 1.8425254247933887e-06, "epoch": 0.44431634770578743, "percentage": 22.22, "elapsed_time": "1:18:04", "remaining_time": "4:33:21"} +{"current_steps": 1928, "total_steps": 8674, "loss": 0.46104729175567627, "lr": 1.8423199946865022e-06, "epoch": 0.4445469218353701, "percentage": 22.23, "elapsed_time": "1:18:06", "remaining_time": "4:33:19"} +{"current_steps": 1929, "total_steps": 8674, "loss": 0.4837646782398224, "lr": 1.8421144421398678e-06, "epoch": 0.44477749596495275, "percentage": 22.24, "elapsed_time": "1:18:09", "remaining_time": "4:33:16"} +{"current_steps": 1930, "total_steps": 8674, "loss": 0.47685718536376953, "lr": 1.8419087671833647e-06, "epoch": 0.4450080700945354, "percentage": 22.25, "elapsed_time": "1:18:11", "remaining_time": "4:33:14"} +{"current_steps": 1931, "total_steps": 8674, "loss": 0.5904572606086731, "lr": 1.8417029698468897e-06, "epoch": 0.4452386442241181, "percentage": 22.26, "elapsed_time": "1:18:14", "remaining_time": "4:33:11"} +{"current_steps": 1932, "total_steps": 8674, "loss": 0.5434018969535828, "lr": 1.8414970501603577e-06, "epoch": 0.44546921835370074, "percentage": 22.27, "elapsed_time": "1:18:16", "remaining_time": "4:33:09"} +{"current_steps": 1933, "total_steps": 8674, "loss": 0.5532705783843994, "lr": 1.8412910081537012e-06, "epoch": 0.4456997924832834, "percentage": 22.28, "elapsed_time": "1:18:19", "remaining_time": "4:33:07"} +{"current_steps": 1934, "total_steps": 8674, "loss": 0.4900597929954529, "lr": 1.8410848438568704e-06, "epoch": 0.44593036661286606, "percentage": 22.3, "elapsed_time": "1:18:21", "remaining_time": "4:33:04"} +{"current_steps": 1935, "total_steps": 8674, "loss": 0.40426892042160034, "lr": 1.8408785572998334e-06, "epoch": 0.4461609407424487, "percentage": 22.31, "elapsed_time": "1:18:23", "remaining_time": "4:33:01"} +{"current_steps": 1936, "total_steps": 8674, "loss": 0.48805081844329834, "lr": 1.840672148512576e-06, "epoch": 0.4463915148720314, "percentage": 22.32, "elapsed_time": "1:18:26", "remaining_time": "4:32:59"} +{"current_steps": 1937, "total_steps": 8674, "loss": 0.4997096657752991, "lr": 1.8404656175251019e-06, "epoch": 0.44662208900161404, "percentage": 22.33, "elapsed_time": "1:18:28", "remaining_time": "4:32:56"} +{"current_steps": 1938, "total_steps": 8674, "loss": 0.5113422274589539, "lr": 1.8402589643674325e-06, "epoch": 0.4468526631311967, "percentage": 22.34, "elapsed_time": "1:18:30", "remaining_time": "4:32:53"} +{"current_steps": 1939, "total_steps": 8674, "loss": 0.44080060720443726, "lr": 1.8400521890696065e-06, "epoch": 0.44708323726077936, "percentage": 22.35, "elapsed_time": "1:18:33", "remaining_time": "4:32:51"} +{"current_steps": 1940, "total_steps": 8674, "loss": 0.4477943778038025, "lr": 1.8398452916616816e-06, "epoch": 0.447313811390362, "percentage": 22.37, "elapsed_time": "1:18:35", "remaining_time": "4:32:48"} +{"current_steps": 1941, "total_steps": 8674, "loss": 0.4597470760345459, "lr": 1.8396382721737318e-06, "epoch": 0.4475443855199447, "percentage": 22.38, "elapsed_time": "1:18:38", "remaining_time": "4:32:46"} +{"current_steps": 1942, "total_steps": 8674, "loss": 0.4758293628692627, "lr": 1.8394311306358494e-06, "epoch": 0.44777495964952735, "percentage": 22.39, "elapsed_time": "1:18:40", "remaining_time": "4:32:44"} +{"current_steps": 1943, "total_steps": 8674, "loss": 0.4573550224304199, "lr": 1.8392238670781453e-06, "epoch": 0.44800553377911, "percentage": 22.4, "elapsed_time": "1:18:43", "remaining_time": "4:32:41"} +{"current_steps": 1944, "total_steps": 8674, "loss": 0.504696786403656, "lr": 1.8390164815307465e-06, "epoch": 0.44823610790869267, "percentage": 22.41, "elapsed_time": "1:18:45", "remaining_time": "4:32:38"} +{"current_steps": 1945, "total_steps": 8674, "loss": 0.4936453700065613, "lr": 1.8388089740237991e-06, "epoch": 0.44846668203827533, "percentage": 22.42, "elapsed_time": "1:18:47", "remaining_time": "4:32:36"} +{"current_steps": 1946, "total_steps": 8674, "loss": 0.4851078987121582, "lr": 1.8386013445874661e-06, "epoch": 0.448697256167858, "percentage": 22.43, "elapsed_time": "1:18:50", "remaining_time": "4:32:33"} +{"current_steps": 1947, "total_steps": 8674, "loss": 0.4881519377231598, "lr": 1.8383935932519288e-06, "epoch": 0.4489278302974406, "percentage": 22.45, "elapsed_time": "1:18:52", "remaining_time": "4:32:31"} +{"current_steps": 1948, "total_steps": 8674, "loss": 0.5604408979415894, "lr": 1.8381857200473859e-06, "epoch": 0.44915840442702326, "percentage": 22.46, "elapsed_time": "1:18:54", "remaining_time": "4:32:28"} +{"current_steps": 1949, "total_steps": 8674, "loss": 0.5022269487380981, "lr": 1.8379777250040535e-06, "epoch": 0.4493889785566059, "percentage": 22.47, "elapsed_time": "1:18:57", "remaining_time": "4:32:26"} +{"current_steps": 1950, "total_steps": 8674, "loss": 0.6519315242767334, "lr": 1.8377696081521666e-06, "epoch": 0.4496195526861886, "percentage": 22.48, "elapsed_time": "1:18:59", "remaining_time": "4:32:24"} +{"current_steps": 1951, "total_steps": 8674, "loss": 0.3820997476577759, "lr": 1.8375613695219766e-06, "epoch": 0.44985012681577125, "percentage": 22.49, "elapsed_time": "1:19:02", "remaining_time": "4:32:21"} +{"current_steps": 1952, "total_steps": 8674, "loss": 0.5473283529281616, "lr": 1.8373530091437526e-06, "epoch": 0.4500807009453539, "percentage": 22.5, "elapsed_time": "1:19:04", "remaining_time": "4:32:18"} +{"current_steps": 1953, "total_steps": 8674, "loss": 0.5835955142974854, "lr": 1.8371445270477828e-06, "epoch": 0.45031127507493657, "percentage": 22.52, "elapsed_time": "1:19:07", "remaining_time": "4:32:16"} +{"current_steps": 1954, "total_steps": 8674, "loss": 0.5398194789886475, "lr": 1.8369359232643716e-06, "epoch": 0.45054184920451923, "percentage": 22.53, "elapsed_time": "1:19:09", "remaining_time": "4:32:13"} +{"current_steps": 1955, "total_steps": 8674, "loss": 0.36561834812164307, "lr": 1.8367271978238418e-06, "epoch": 0.4507724233341019, "percentage": 22.54, "elapsed_time": "1:19:11", "remaining_time": "4:32:11"} +{"current_steps": 1956, "total_steps": 8674, "loss": 0.319802463054657, "lr": 1.8365183507565342e-06, "epoch": 0.45100299746368455, "percentage": 22.55, "elapsed_time": "1:19:14", "remaining_time": "4:32:08"} +{"current_steps": 1957, "total_steps": 8674, "loss": 0.46466606855392456, "lr": 1.8363093820928063e-06, "epoch": 0.4512335715932672, "percentage": 22.56, "elapsed_time": "1:19:16", "remaining_time": "4:32:05"} +{"current_steps": 1958, "total_steps": 8674, "loss": 0.5839806199073792, "lr": 1.8361002918630338e-06, "epoch": 0.4514641457228499, "percentage": 22.57, "elapsed_time": "1:19:19", "remaining_time": "4:32:04"} +{"current_steps": 1959, "total_steps": 8674, "loss": 0.4472346603870392, "lr": 1.8358910800976105e-06, "epoch": 0.45169471985243254, "percentage": 22.58, "elapsed_time": "1:19:21", "remaining_time": "4:32:01"} +{"current_steps": 1960, "total_steps": 8674, "loss": 0.5191199779510498, "lr": 1.835681746826947e-06, "epoch": 0.4519252939820152, "percentage": 22.6, "elapsed_time": "1:19:23", "remaining_time": "4:31:58"} +{"current_steps": 1961, "total_steps": 8674, "loss": 0.5832456350326538, "lr": 1.8354722920814722e-06, "epoch": 0.45215586811159786, "percentage": 22.61, "elapsed_time": "1:19:26", "remaining_time": "4:31:56"} +{"current_steps": 1962, "total_steps": 8674, "loss": 0.604708194732666, "lr": 1.8352627158916326e-06, "epoch": 0.4523864422411805, "percentage": 22.62, "elapsed_time": "1:19:28", "remaining_time": "4:31:53"} +{"current_steps": 1963, "total_steps": 8674, "loss": 0.5640981793403625, "lr": 1.8350530182878924e-06, "epoch": 0.4526170163707632, "percentage": 22.63, "elapsed_time": "1:19:31", "remaining_time": "4:31:50"} +{"current_steps": 1964, "total_steps": 8674, "loss": 0.4816977381706238, "lr": 1.8348431993007326e-06, "epoch": 0.45284759050034584, "percentage": 22.64, "elapsed_time": "1:19:33", "remaining_time": "4:31:48"} +{"current_steps": 1965, "total_steps": 8674, "loss": 0.4226726293563843, "lr": 1.8346332589606526e-06, "epoch": 0.4530781646299285, "percentage": 22.65, "elapsed_time": "1:19:35", "remaining_time": "4:31:45"} +{"current_steps": 1966, "total_steps": 8674, "loss": 0.49635130167007446, "lr": 1.8344231972981701e-06, "epoch": 0.45330873875951117, "percentage": 22.67, "elapsed_time": "1:19:38", "remaining_time": "4:31:43"} +{"current_steps": 1967, "total_steps": 8674, "loss": 0.5275523662567139, "lr": 1.8342130143438193e-06, "epoch": 0.4535393128890938, "percentage": 22.68, "elapsed_time": "1:19:40", "remaining_time": "4:31:41"} +{"current_steps": 1968, "total_steps": 8674, "loss": 0.48517313599586487, "lr": 1.834002710128152e-06, "epoch": 0.4537698870186765, "percentage": 22.69, "elapsed_time": "1:19:43", "remaining_time": "4:31:38"} +{"current_steps": 1969, "total_steps": 8674, "loss": 0.4352126717567444, "lr": 1.8337922846817388e-06, "epoch": 0.45400046114825915, "percentage": 22.7, "elapsed_time": "1:19:45", "remaining_time": "4:31:35"} +{"current_steps": 1970, "total_steps": 8674, "loss": 0.48131102323532104, "lr": 1.8335817380351668e-06, "epoch": 0.4542310352778418, "percentage": 22.71, "elapsed_time": "1:19:47", "remaining_time": "4:31:33"} +{"current_steps": 1971, "total_steps": 8674, "loss": 0.48989611864089966, "lr": 1.8333710702190408e-06, "epoch": 0.45446160940742447, "percentage": 22.72, "elapsed_time": "1:19:50", "remaining_time": "4:31:30"} +{"current_steps": 1972, "total_steps": 8674, "loss": 0.4841296076774597, "lr": 1.8331602812639839e-06, "epoch": 0.45469218353700713, "percentage": 22.73, "elapsed_time": "1:19:52", "remaining_time": "4:31:28"} +{"current_steps": 1973, "total_steps": 8674, "loss": 0.5479841232299805, "lr": 1.8329493712006364e-06, "epoch": 0.4549227576665898, "percentage": 22.75, "elapsed_time": "1:19:54", "remaining_time": "4:31:25"} +{"current_steps": 1974, "total_steps": 8674, "loss": 0.4732212424278259, "lr": 1.8327383400596559e-06, "epoch": 0.45515333179617246, "percentage": 22.76, "elapsed_time": "1:19:57", "remaining_time": "4:31:23"} +{"current_steps": 1975, "total_steps": 8674, "loss": 0.46675610542297363, "lr": 1.8325271878717183e-06, "epoch": 0.4553839059257551, "percentage": 22.77, "elapsed_time": "1:19:59", "remaining_time": "4:31:20"} +{"current_steps": 1976, "total_steps": 8674, "loss": 0.5464143753051758, "lr": 1.8323159146675163e-06, "epoch": 0.4556144800553378, "percentage": 22.78, "elapsed_time": "1:20:02", "remaining_time": "4:31:18"} +{"current_steps": 1977, "total_steps": 8674, "loss": 0.3888660669326782, "lr": 1.832104520477761e-06, "epoch": 0.45584505418492044, "percentage": 22.79, "elapsed_time": "1:20:04", "remaining_time": "4:31:15"} +{"current_steps": 1978, "total_steps": 8674, "loss": 0.5163271427154541, "lr": 1.8318930053331805e-06, "epoch": 0.4560756283145031, "percentage": 22.8, "elapsed_time": "1:20:07", "remaining_time": "4:31:13"} +{"current_steps": 1979, "total_steps": 8674, "loss": 0.5471124649047852, "lr": 1.8316813692645208e-06, "epoch": 0.45630620244408576, "percentage": 22.82, "elapsed_time": "1:20:09", "remaining_time": "4:31:10"} +{"current_steps": 1980, "total_steps": 8674, "loss": 0.5907406210899353, "lr": 1.8314696123025452e-06, "epoch": 0.4565367765736684, "percentage": 22.83, "elapsed_time": "1:20:11", "remaining_time": "4:31:07"} +{"current_steps": 1981, "total_steps": 8674, "loss": 0.5249447226524353, "lr": 1.8312577344780346e-06, "epoch": 0.4567673507032511, "percentage": 22.84, "elapsed_time": "1:20:14", "remaining_time": "4:31:05"} +{"current_steps": 1982, "total_steps": 8674, "loss": 0.5063247084617615, "lr": 1.8310457358217879e-06, "epoch": 0.45699792483283375, "percentage": 22.85, "elapsed_time": "1:20:16", "remaining_time": "4:31:02"} +{"current_steps": 1983, "total_steps": 8674, "loss": 0.4448107182979584, "lr": 1.830833616364621e-06, "epoch": 0.4572284989624164, "percentage": 22.86, "elapsed_time": "1:20:19", "remaining_time": "4:31:01"} +{"current_steps": 1984, "total_steps": 8674, "loss": 0.5699697732925415, "lr": 1.830621376137368e-06, "epoch": 0.45745907309199907, "percentage": 22.87, "elapsed_time": "1:20:21", "remaining_time": "4:30:58"} +{"current_steps": 1985, "total_steps": 8674, "loss": 0.5701720118522644, "lr": 1.8304090151708794e-06, "epoch": 0.45768964722158173, "percentage": 22.88, "elapsed_time": "1:20:24", "remaining_time": "4:30:55"} +{"current_steps": 1986, "total_steps": 8674, "loss": 0.4754391014575958, "lr": 1.830196533496025e-06, "epoch": 0.4579202213511644, "percentage": 22.9, "elapsed_time": "1:20:26", "remaining_time": "4:30:53"} +{"current_steps": 1987, "total_steps": 8674, "loss": 0.47649019956588745, "lr": 1.8299839311436903e-06, "epoch": 0.45815079548074705, "percentage": 22.91, "elapsed_time": "1:20:28", "remaining_time": "4:30:50"} +{"current_steps": 1988, "total_steps": 8674, "loss": 0.5524393320083618, "lr": 1.8297712081447797e-06, "epoch": 0.4583813696103297, "percentage": 22.92, "elapsed_time": "1:20:31", "remaining_time": "4:30:48"} +{"current_steps": 1989, "total_steps": 8674, "loss": 0.45731648802757263, "lr": 1.8295583645302144e-06, "epoch": 0.4586119437399124, "percentage": 22.93, "elapsed_time": "1:20:33", "remaining_time": "4:30:45"} +{"current_steps": 1990, "total_steps": 8674, "loss": 0.4999742805957794, "lr": 1.8293454003309336e-06, "epoch": 0.45884251786949504, "percentage": 22.94, "elapsed_time": "1:20:35", "remaining_time": "4:30:42"} +{"current_steps": 1991, "total_steps": 8674, "loss": 0.49084147810935974, "lr": 1.829132315577894e-06, "epoch": 0.4590730919990777, "percentage": 22.95, "elapsed_time": "1:20:38", "remaining_time": "4:30:41"} +{"current_steps": 1992, "total_steps": 8674, "loss": 0.45332348346710205, "lr": 1.828919110302069e-06, "epoch": 0.45930366612866036, "percentage": 22.97, "elapsed_time": "1:20:40", "remaining_time": "4:30:38"} +{"current_steps": 1993, "total_steps": 8674, "loss": 0.5029363632202148, "lr": 1.8287057845344504e-06, "epoch": 0.459534240258243, "percentage": 22.98, "elapsed_time": "1:20:43", "remaining_time": "4:30:35"} +{"current_steps": 1994, "total_steps": 8674, "loss": 0.5373274087905884, "lr": 1.8284923383060475e-06, "epoch": 0.4597648143878257, "percentage": 22.99, "elapsed_time": "1:20:45", "remaining_time": "4:30:33"} +{"current_steps": 1995, "total_steps": 8674, "loss": 0.5022158622741699, "lr": 1.8282787716478867e-06, "epoch": 0.45999538851740834, "percentage": 23.0, "elapsed_time": "1:20:48", "remaining_time": "4:30:31"} +{"current_steps": 1996, "total_steps": 8674, "loss": 0.5093190670013428, "lr": 1.828065084591012e-06, "epoch": 0.460225962646991, "percentage": 23.01, "elapsed_time": "1:20:50", "remaining_time": "4:30:28"} +{"current_steps": 1997, "total_steps": 8674, "loss": 0.5406581163406372, "lr": 1.827851277166485e-06, "epoch": 0.46045653677657367, "percentage": 23.02, "elapsed_time": "1:20:52", "remaining_time": "4:30:25"} +{"current_steps": 1998, "total_steps": 8674, "loss": 0.4403364062309265, "lr": 1.8276373494053852e-06, "epoch": 0.4606871109061563, "percentage": 23.03, "elapsed_time": "1:20:55", "remaining_time": "4:30:23"} +{"current_steps": 1999, "total_steps": 8674, "loss": 0.48383134603500366, "lr": 1.8274233013388085e-06, "epoch": 0.460917685035739, "percentage": 23.05, "elapsed_time": "1:20:57", "remaining_time": "4:30:20"} +{"current_steps": 2000, "total_steps": 8674, "loss": 0.5177836418151855, "lr": 1.8272091329978693e-06, "epoch": 0.46114825916532165, "percentage": 23.06, "elapsed_time": "1:21:00", "remaining_time": "4:30:18"} +{"current_steps": 2001, "total_steps": 8674, "loss": 0.5699004530906677, "lr": 1.8269948444136991e-06, "epoch": 0.4613788332949043, "percentage": 23.07, "elapsed_time": "1:21:04", "remaining_time": "4:30:20"} +{"current_steps": 2002, "total_steps": 8674, "loss": 0.5415153503417969, "lr": 1.826780435617447e-06, "epoch": 0.461609407424487, "percentage": 23.08, "elapsed_time": "1:21:06", "remaining_time": "4:30:18"} +{"current_steps": 2003, "total_steps": 8674, "loss": 0.5521166920661926, "lr": 1.8265659066402792e-06, "epoch": 0.46183998155406963, "percentage": 23.09, "elapsed_time": "1:21:08", "remaining_time": "4:30:15"} +{"current_steps": 2004, "total_steps": 8674, "loss": 0.4518507122993469, "lr": 1.8263512575133802e-06, "epoch": 0.4620705556836523, "percentage": 23.1, "elapsed_time": "1:21:11", "remaining_time": "4:30:12"} +{"current_steps": 2005, "total_steps": 8674, "loss": 0.5997140407562256, "lr": 1.8261364882679508e-06, "epoch": 0.46230112981323496, "percentage": 23.12, "elapsed_time": "1:21:13", "remaining_time": "4:30:10"} +{"current_steps": 2006, "total_steps": 8674, "loss": 0.5105265974998474, "lr": 1.8259215989352103e-06, "epoch": 0.4625317039428176, "percentage": 23.13, "elapsed_time": "1:21:15", "remaining_time": "4:30:07"} +{"current_steps": 2007, "total_steps": 8674, "loss": 0.5229371190071106, "lr": 1.825706589546395e-06, "epoch": 0.4627622780724003, "percentage": 23.14, "elapsed_time": "1:21:18", "remaining_time": "4:30:04"} +{"current_steps": 2008, "total_steps": 8674, "loss": 0.4833800792694092, "lr": 1.825491460132759e-06, "epoch": 0.46299285220198294, "percentage": 23.15, "elapsed_time": "1:21:20", "remaining_time": "4:30:03"} +{"current_steps": 2009, "total_steps": 8674, "loss": 0.4323253035545349, "lr": 1.8252762107255727e-06, "epoch": 0.4632234263315656, "percentage": 23.16, "elapsed_time": "1:21:23", "remaining_time": "4:30:00"} +{"current_steps": 2010, "total_steps": 8674, "loss": 0.4563494026660919, "lr": 1.8250608413561253e-06, "epoch": 0.46345400046114826, "percentage": 23.17, "elapsed_time": "1:21:25", "remaining_time": "4:29:57"} +{"current_steps": 2011, "total_steps": 8674, "loss": 0.5656196475028992, "lr": 1.8248453520557228e-06, "epoch": 0.4636845745907309, "percentage": 23.18, "elapsed_time": "1:21:28", "remaining_time": "4:29:55"} +{"current_steps": 2012, "total_steps": 8674, "loss": 0.5448226928710938, "lr": 1.8246297428556887e-06, "epoch": 0.4639151487203136, "percentage": 23.2, "elapsed_time": "1:21:30", "remaining_time": "4:29:52"} +{"current_steps": 2013, "total_steps": 8674, "loss": 0.4692860543727875, "lr": 1.8244140137873645e-06, "epoch": 0.46414572284989625, "percentage": 23.21, "elapsed_time": "1:21:32", "remaining_time": "4:29:50"} +{"current_steps": 2014, "total_steps": 8674, "loss": 0.5948643088340759, "lr": 1.8241981648821079e-06, "epoch": 0.4643762969794789, "percentage": 23.22, "elapsed_time": "1:21:35", "remaining_time": "4:29:47"} +{"current_steps": 2015, "total_steps": 8674, "loss": 0.54410719871521, "lr": 1.823982196171295e-06, "epoch": 0.46460687110906157, "percentage": 23.23, "elapsed_time": "1:21:37", "remaining_time": "4:29:45"} +{"current_steps": 2016, "total_steps": 8674, "loss": 0.430447518825531, "lr": 1.8237661076863192e-06, "epoch": 0.46483744523864423, "percentage": 23.24, "elapsed_time": "1:21:40", "remaining_time": "4:29:43"} +{"current_steps": 2017, "total_steps": 8674, "loss": 0.5420910716056824, "lr": 1.8235498994585913e-06, "epoch": 0.4650680193682269, "percentage": 23.25, "elapsed_time": "1:21:42", "remaining_time": "4:29:41"} +{"current_steps": 2018, "total_steps": 8674, "loss": 0.5140334963798523, "lr": 1.823333571519539e-06, "epoch": 0.46529859349780955, "percentage": 23.26, "elapsed_time": "1:21:45", "remaining_time": "4:29:38"} +{"current_steps": 2019, "total_steps": 8674, "loss": 0.5901660323143005, "lr": 1.8231171239006075e-06, "epoch": 0.4655291676273922, "percentage": 23.28, "elapsed_time": "1:21:47", "remaining_time": "4:29:35"} +{"current_steps": 2020, "total_steps": 8674, "loss": 0.5025908350944519, "lr": 1.8229005566332603e-06, "epoch": 0.4657597417569749, "percentage": 23.29, "elapsed_time": "1:21:49", "remaining_time": "4:29:33"} +{"current_steps": 2021, "total_steps": 8674, "loss": 0.4884544909000397, "lr": 1.8226838697489772e-06, "epoch": 0.46599031588655754, "percentage": 23.3, "elapsed_time": "1:21:52", "remaining_time": "4:29:30"} +{"current_steps": 2022, "total_steps": 8674, "loss": 0.46449869871139526, "lr": 1.822467063279256e-06, "epoch": 0.4662208900161402, "percentage": 23.31, "elapsed_time": "1:21:54", "remaining_time": "4:29:27"} +{"current_steps": 2023, "total_steps": 8674, "loss": 0.49463552236557007, "lr": 1.8222501372556116e-06, "epoch": 0.46645146414572286, "percentage": 23.32, "elapsed_time": "1:21:56", "remaining_time": "4:29:25"} +{"current_steps": 2024, "total_steps": 8674, "loss": 0.5027149319648743, "lr": 1.8220330917095768e-06, "epoch": 0.4666820382753055, "percentage": 23.33, "elapsed_time": "1:21:59", "remaining_time": "4:29:22"} +{"current_steps": 2025, "total_steps": 8674, "loss": 0.564018726348877, "lr": 1.8218159266727007e-06, "epoch": 0.4669126124048882, "percentage": 23.35, "elapsed_time": "1:22:02", "remaining_time": "4:29:21"} +{"current_steps": 2026, "total_steps": 8674, "loss": 0.4235766530036926, "lr": 1.821598642176551e-06, "epoch": 0.46714318653447084, "percentage": 23.36, "elapsed_time": "1:22:04", "remaining_time": "4:29:18"} +{"current_steps": 2027, "total_steps": 8674, "loss": 0.5696560144424438, "lr": 1.8213812382527118e-06, "epoch": 0.4673737606640535, "percentage": 23.37, "elapsed_time": "1:22:06", "remaining_time": "4:29:15"} +{"current_steps": 2028, "total_steps": 8674, "loss": 0.6101738214492798, "lr": 1.8211637149327856e-06, "epoch": 0.46760433479363617, "percentage": 23.38, "elapsed_time": "1:22:09", "remaining_time": "4:29:13"} +{"current_steps": 2029, "total_steps": 8674, "loss": 0.46749603748321533, "lr": 1.820946072248391e-06, "epoch": 0.46783490892321883, "percentage": 23.39, "elapsed_time": "1:22:11", "remaining_time": "4:29:10"} +{"current_steps": 2030, "total_steps": 8674, "loss": 0.4713476300239563, "lr": 1.8207283102311646e-06, "epoch": 0.4680654830528015, "percentage": 23.4, "elapsed_time": "1:22:13", "remaining_time": "4:29:08"} +{"current_steps": 2031, "total_steps": 8674, "loss": 0.5381859540939331, "lr": 1.8205104289127607e-06, "epoch": 0.46829605718238415, "percentage": 23.41, "elapsed_time": "1:22:16", "remaining_time": "4:29:05"} +{"current_steps": 2032, "total_steps": 8674, "loss": 0.4871833324432373, "lr": 1.82029242832485e-06, "epoch": 0.4685266313119668, "percentage": 23.43, "elapsed_time": "1:22:18", "remaining_time": "4:29:03"} +{"current_steps": 2033, "total_steps": 8674, "loss": 0.520627498626709, "lr": 1.8200743084991217e-06, "epoch": 0.4687572054415495, "percentage": 23.44, "elapsed_time": "1:22:21", "remaining_time": "4:29:01"} +{"current_steps": 2034, "total_steps": 8674, "loss": 0.5382364392280579, "lr": 1.8198560694672813e-06, "epoch": 0.46898777957113214, "percentage": 23.45, "elapsed_time": "1:22:23", "remaining_time": "4:28:58"} +{"current_steps": 2035, "total_steps": 8674, "loss": 0.384588360786438, "lr": 1.8196377112610524e-06, "epoch": 0.4692183537007148, "percentage": 23.46, "elapsed_time": "1:22:26", "remaining_time": "4:28:56"} +{"current_steps": 2036, "total_steps": 8674, "loss": 0.5515186786651611, "lr": 1.8194192339121752e-06, "epoch": 0.46944892783029746, "percentage": 23.47, "elapsed_time": "1:22:28", "remaining_time": "4:28:53"} +{"current_steps": 2037, "total_steps": 8674, "loss": 0.5405331254005432, "lr": 1.819200637452408e-06, "epoch": 0.4696795019598801, "percentage": 23.48, "elapsed_time": "1:22:30", "remaining_time": "4:28:51"} +{"current_steps": 2038, "total_steps": 8674, "loss": 0.5565645694732666, "lr": 1.818981921913526e-06, "epoch": 0.4699100760894628, "percentage": 23.5, "elapsed_time": "1:22:33", "remaining_time": "4:28:48"} +{"current_steps": 2039, "total_steps": 8674, "loss": 0.4856358468532562, "lr": 1.818763087327321e-06, "epoch": 0.47014065021904544, "percentage": 23.51, "elapsed_time": "1:22:35", "remaining_time": "4:28:45"} +{"current_steps": 2040, "total_steps": 8674, "loss": 0.5495761632919312, "lr": 1.8185441337256035e-06, "epoch": 0.4703712243486281, "percentage": 23.52, "elapsed_time": "1:22:37", "remaining_time": "4:28:42"} +{"current_steps": 2041, "total_steps": 8674, "loss": 0.509435772895813, "lr": 1.8183250611402007e-06, "epoch": 0.47060179847821076, "percentage": 23.53, "elapsed_time": "1:22:40", "remaining_time": "4:28:40"} +{"current_steps": 2042, "total_steps": 8674, "loss": 0.4663920998573303, "lr": 1.8181058696029564e-06, "epoch": 0.4708323726077934, "percentage": 23.54, "elapsed_time": "1:22:42", "remaining_time": "4:28:38"} +{"current_steps": 2043, "total_steps": 8674, "loss": 0.5976128578186035, "lr": 1.817886559145733e-06, "epoch": 0.4710629467373761, "percentage": 23.55, "elapsed_time": "1:22:45", "remaining_time": "4:28:36"} +{"current_steps": 2044, "total_steps": 8674, "loss": 0.49167966842651367, "lr": 1.817667129800409e-06, "epoch": 0.47129352086695875, "percentage": 23.56, "elapsed_time": "1:22:47", "remaining_time": "4:28:33"} +{"current_steps": 2045, "total_steps": 8674, "loss": 0.5889153480529785, "lr": 1.817447581598881e-06, "epoch": 0.4715240949965414, "percentage": 23.58, "elapsed_time": "1:22:50", "remaining_time": "4:28:31"} +{"current_steps": 2046, "total_steps": 8674, "loss": 0.4970330595970154, "lr": 1.8172279145730622e-06, "epoch": 0.47175466912612407, "percentage": 23.59, "elapsed_time": "1:22:52", "remaining_time": "4:28:28"} +{"current_steps": 2047, "total_steps": 8674, "loss": 0.4840531051158905, "lr": 1.817008128754884e-06, "epoch": 0.47198524325570673, "percentage": 23.6, "elapsed_time": "1:22:54", "remaining_time": "4:28:26"} +{"current_steps": 2048, "total_steps": 8674, "loss": 0.48297861218452454, "lr": 1.816788224176294e-06, "epoch": 0.4722158173852894, "percentage": 23.61, "elapsed_time": "1:22:57", "remaining_time": "4:28:23"} +{"current_steps": 2049, "total_steps": 8674, "loss": 0.540350079536438, "lr": 1.8165682008692578e-06, "epoch": 0.47244639151487205, "percentage": 23.62, "elapsed_time": "1:22:59", "remaining_time": "4:28:20"} +{"current_steps": 2050, "total_steps": 8674, "loss": 0.46405351161956787, "lr": 1.8163480588657578e-06, "epoch": 0.4726769656444547, "percentage": 23.63, "elapsed_time": "1:23:02", "remaining_time": "4:28:18"} +{"current_steps": 2051, "total_steps": 8674, "loss": 0.5175468921661377, "lr": 1.816127798197794e-06, "epoch": 0.4729075397740374, "percentage": 23.65, "elapsed_time": "1:23:04", "remaining_time": "4:28:16"} +{"current_steps": 2052, "total_steps": 8674, "loss": 0.5923771858215332, "lr": 1.8159074188973836e-06, "epoch": 0.47313811390362004, "percentage": 23.66, "elapsed_time": "1:23:07", "remaining_time": "4:28:13"} +{"current_steps": 2053, "total_steps": 8674, "loss": 0.4999024569988251, "lr": 1.815686920996561e-06, "epoch": 0.4733686880332027, "percentage": 23.67, "elapsed_time": "1:23:09", "remaining_time": "4:28:10"} +{"current_steps": 2054, "total_steps": 8674, "loss": 0.5630939602851868, "lr": 1.8154663045273775e-06, "epoch": 0.47359926216278536, "percentage": 23.68, "elapsed_time": "1:23:11", "remaining_time": "4:28:08"} +{"current_steps": 2055, "total_steps": 8674, "loss": 0.5505836009979248, "lr": 1.8152455695219021e-06, "epoch": 0.473829836292368, "percentage": 23.69, "elapsed_time": "1:23:14", "remaining_time": "4:28:06"} +{"current_steps": 2056, "total_steps": 8674, "loss": 0.44550588726997375, "lr": 1.8150247160122213e-06, "epoch": 0.4740604104219507, "percentage": 23.7, "elapsed_time": "1:23:16", "remaining_time": "4:28:03"} +{"current_steps": 2057, "total_steps": 8674, "loss": 0.5387516021728516, "lr": 1.8148037440304375e-06, "epoch": 0.47429098455153335, "percentage": 23.71, "elapsed_time": "1:23:19", "remaining_time": "4:28:00"} +{"current_steps": 2058, "total_steps": 8674, "loss": 0.5941788554191589, "lr": 1.814582653608672e-06, "epoch": 0.47452155868111595, "percentage": 23.73, "elapsed_time": "1:23:21", "remaining_time": "4:27:59"} +{"current_steps": 2059, "total_steps": 8674, "loss": 0.552179217338562, "lr": 1.8143614447790622e-06, "epoch": 0.4747521328106986, "percentage": 23.74, "elapsed_time": "1:23:24", "remaining_time": "4:27:56"} +{"current_steps": 2060, "total_steps": 8674, "loss": 0.4475885033607483, "lr": 1.8141401175737632e-06, "epoch": 0.4749827069402813, "percentage": 23.75, "elapsed_time": "1:23:26", "remaining_time": "4:27:53"} +{"current_steps": 2061, "total_steps": 8674, "loss": 0.5821356773376465, "lr": 1.813918672024947e-06, "epoch": 0.47521328106986394, "percentage": 23.76, "elapsed_time": "1:23:28", "remaining_time": "4:27:51"} +{"current_steps": 2062, "total_steps": 8674, "loss": 0.4673501253128052, "lr": 1.8136971081648027e-06, "epoch": 0.4754438551994466, "percentage": 23.77, "elapsed_time": "1:23:31", "remaining_time": "4:27:48"} +{"current_steps": 2063, "total_steps": 8674, "loss": 0.582427978515625, "lr": 1.8134754260255373e-06, "epoch": 0.47567442932902926, "percentage": 23.78, "elapsed_time": "1:23:33", "remaining_time": "4:27:45"} +{"current_steps": 2064, "total_steps": 8674, "loss": 0.4494328498840332, "lr": 1.8132536256393744e-06, "epoch": 0.4759050034586119, "percentage": 23.8, "elapsed_time": "1:23:35", "remaining_time": "4:27:43"} +{"current_steps": 2065, "total_steps": 8674, "loss": 0.44775205850601196, "lr": 1.8130317070385552e-06, "epoch": 0.4761355775881946, "percentage": 23.81, "elapsed_time": "1:23:38", "remaining_time": "4:27:40"} +{"current_steps": 2066, "total_steps": 8674, "loss": 0.5456822514533997, "lr": 1.8128096702553372e-06, "epoch": 0.47636615171777724, "percentage": 23.82, "elapsed_time": "1:23:40", "remaining_time": "4:27:38"} +{"current_steps": 2067, "total_steps": 8674, "loss": 0.46396178007125854, "lr": 1.8125875153219963e-06, "epoch": 0.4765967258473599, "percentage": 23.83, "elapsed_time": "1:23:43", "remaining_time": "4:27:36"} +{"current_steps": 2068, "total_steps": 8674, "loss": 0.4479365944862366, "lr": 1.8123652422708247e-06, "epoch": 0.47682729997694256, "percentage": 23.84, "elapsed_time": "1:23:45", "remaining_time": "4:27:34"} +{"current_steps": 2069, "total_steps": 8674, "loss": 0.4633978605270386, "lr": 1.8121428511341322e-06, "epoch": 0.4770578741065252, "percentage": 23.85, "elapsed_time": "1:23:48", "remaining_time": "4:27:31"} +{"current_steps": 2070, "total_steps": 8674, "loss": 0.5190213918685913, "lr": 1.811920341944245e-06, "epoch": 0.4772884482361079, "percentage": 23.86, "elapsed_time": "1:23:50", "remaining_time": "4:27:29"} +{"current_steps": 2071, "total_steps": 8674, "loss": 0.3900855779647827, "lr": 1.811697714733508e-06, "epoch": 0.47751902236569055, "percentage": 23.88, "elapsed_time": "1:23:53", "remaining_time": "4:27:26"} +{"current_steps": 2072, "total_steps": 8674, "loss": 0.5130020380020142, "lr": 1.8114749695342816e-06, "epoch": 0.4777495964952732, "percentage": 23.89, "elapsed_time": "1:23:55", "remaining_time": "4:27:24"} +{"current_steps": 2073, "total_steps": 8674, "loss": 0.5279096364974976, "lr": 1.8112521063789444e-06, "epoch": 0.47798017062485587, "percentage": 23.9, "elapsed_time": "1:23:57", "remaining_time": "4:27:21"} +{"current_steps": 2074, "total_steps": 8674, "loss": 0.5048732161521912, "lr": 1.8110291252998918e-06, "epoch": 0.47821074475443853, "percentage": 23.91, "elapsed_time": "1:24:00", "remaining_time": "4:27:19"} +{"current_steps": 2075, "total_steps": 8674, "loss": 0.5410048365592957, "lr": 1.8108060263295362e-06, "epoch": 0.4784413188840212, "percentage": 23.92, "elapsed_time": "1:24:02", "remaining_time": "4:27:17"} +{"current_steps": 2076, "total_steps": 8674, "loss": 0.5144593715667725, "lr": 1.8105828095003073e-06, "epoch": 0.47867189301360386, "percentage": 23.93, "elapsed_time": "1:24:05", "remaining_time": "4:27:14"} +{"current_steps": 2077, "total_steps": 8674, "loss": 0.543846845626831, "lr": 1.810359474844652e-06, "epoch": 0.4789024671431865, "percentage": 23.95, "elapsed_time": "1:24:07", "remaining_time": "4:27:12"} +{"current_steps": 2078, "total_steps": 8674, "loss": 0.5628032684326172, "lr": 1.8101360223950346e-06, "epoch": 0.4791330412727692, "percentage": 23.96, "elapsed_time": "1:24:09", "remaining_time": "4:27:09"} +{"current_steps": 2079, "total_steps": 8674, "loss": 0.5248516201972961, "lr": 1.8099124521839358e-06, "epoch": 0.47936361540235184, "percentage": 23.97, "elapsed_time": "1:24:12", "remaining_time": "4:27:06"} +{"current_steps": 2080, "total_steps": 8674, "loss": 0.44171589612960815, "lr": 1.8096887642438537e-06, "epoch": 0.4795941895319345, "percentage": 23.98, "elapsed_time": "1:24:14", "remaining_time": "4:27:04"} +{"current_steps": 2081, "total_steps": 8674, "loss": 0.5003859996795654, "lr": 1.809464958607304e-06, "epoch": 0.47982476366151716, "percentage": 23.99, "elapsed_time": "1:24:17", "remaining_time": "4:27:01"} +{"current_steps": 2082, "total_steps": 8674, "loss": 0.5271269679069519, "lr": 1.8092410353068183e-06, "epoch": 0.4800553377910998, "percentage": 24.0, "elapsed_time": "1:24:19", "remaining_time": "4:26:59"} +{"current_steps": 2083, "total_steps": 8674, "loss": 0.5191465616226196, "lr": 1.8090169943749474e-06, "epoch": 0.4802859119206825, "percentage": 24.01, "elapsed_time": "1:24:21", "remaining_time": "4:26:56"} +{"current_steps": 2084, "total_steps": 8674, "loss": 0.4569256007671356, "lr": 1.8087928358442567e-06, "epoch": 0.48051648605026515, "percentage": 24.03, "elapsed_time": "1:24:24", "remaining_time": "4:26:54"} +{"current_steps": 2085, "total_steps": 8674, "loss": 0.521030068397522, "lr": 1.8085685597473307e-06, "epoch": 0.4807470601798478, "percentage": 24.04, "elapsed_time": "1:24:26", "remaining_time": "4:26:52"} +{"current_steps": 2086, "total_steps": 8674, "loss": 0.48959439992904663, "lr": 1.80834416611677e-06, "epoch": 0.48097763430943047, "percentage": 24.05, "elapsed_time": "1:24:29", "remaining_time": "4:26:49"} +{"current_steps": 2087, "total_steps": 8674, "loss": 0.6536514163017273, "lr": 1.8081196549851925e-06, "epoch": 0.48120820843901313, "percentage": 24.06, "elapsed_time": "1:24:31", "remaining_time": "4:26:46"} +{"current_steps": 2088, "total_steps": 8674, "loss": 0.5746080875396729, "lr": 1.8078950263852327e-06, "epoch": 0.4814387825685958, "percentage": 24.07, "elapsed_time": "1:24:33", "remaining_time": "4:26:44"} +{"current_steps": 2089, "total_steps": 8674, "loss": 0.5518802404403687, "lr": 1.8076702803495437e-06, "epoch": 0.48166935669817845, "percentage": 24.08, "elapsed_time": "1:24:36", "remaining_time": "4:26:41"} +{"current_steps": 2090, "total_steps": 8674, "loss": 0.49385470151901245, "lr": 1.8074454169107934e-06, "epoch": 0.4818999308277611, "percentage": 24.09, "elapsed_time": "1:24:38", "remaining_time": "4:26:39"} +{"current_steps": 2091, "total_steps": 8674, "loss": 0.4488806426525116, "lr": 1.8072204361016688e-06, "epoch": 0.4821305049573438, "percentage": 24.11, "elapsed_time": "1:24:41", "remaining_time": "4:26:36"} +{"current_steps": 2092, "total_steps": 8674, "loss": 0.4167511761188507, "lr": 1.8069953379548727e-06, "epoch": 0.48236107908692644, "percentage": 24.12, "elapsed_time": "1:24:43", "remaining_time": "4:26:34"} +{"current_steps": 2093, "total_steps": 8674, "loss": 0.4181321859359741, "lr": 1.8067701225031258e-06, "epoch": 0.4825916532165091, "percentage": 24.13, "elapsed_time": "1:24:46", "remaining_time": "4:26:32"} +{"current_steps": 2094, "total_steps": 8674, "loss": 0.5257805585861206, "lr": 1.806544789779165e-06, "epoch": 0.48282222734609176, "percentage": 24.14, "elapsed_time": "1:24:48", "remaining_time": "4:26:29"} +{"current_steps": 2095, "total_steps": 8674, "loss": 0.4687056541442871, "lr": 1.806319339815745e-06, "epoch": 0.4830528014756744, "percentage": 24.15, "elapsed_time": "1:24:50", "remaining_time": "4:26:27"} +{"current_steps": 2096, "total_steps": 8674, "loss": 0.48070380091667175, "lr": 1.8060937726456373e-06, "epoch": 0.4832833756052571, "percentage": 24.16, "elapsed_time": "1:24:53", "remaining_time": "4:26:24"} +{"current_steps": 2097, "total_steps": 8674, "loss": 0.516263484954834, "lr": 1.80586808830163e-06, "epoch": 0.48351394973483974, "percentage": 24.18, "elapsed_time": "1:24:55", "remaining_time": "4:26:21"} +{"current_steps": 2098, "total_steps": 8674, "loss": 0.44018858671188354, "lr": 1.805642286816529e-06, "epoch": 0.4837445238644224, "percentage": 24.19, "elapsed_time": "1:24:58", "remaining_time": "4:26:19"} +{"current_steps": 2099, "total_steps": 8674, "loss": 0.469373881816864, "lr": 1.8054163682231565e-06, "epoch": 0.48397509799400507, "percentage": 24.2, "elapsed_time": "1:25:00", "remaining_time": "4:26:16"} +{"current_steps": 2100, "total_steps": 8674, "loss": 0.4759753346443176, "lr": 1.8051903325543525e-06, "epoch": 0.4842056721235877, "percentage": 24.21, "elapsed_time": "1:25:03", "remaining_time": "4:26:15"} +{"current_steps": 2101, "total_steps": 8674, "loss": 0.5002714395523071, "lr": 1.804964179842973e-06, "epoch": 0.4844362462531704, "percentage": 24.22, "elapsed_time": "1:25:07", "remaining_time": "4:26:17"} +{"current_steps": 2102, "total_steps": 8674, "loss": 0.4869537353515625, "lr": 1.804737910121892e-06, "epoch": 0.48466682038275305, "percentage": 24.23, "elapsed_time": "1:25:09", "remaining_time": "4:26:14"} +{"current_steps": 2103, "total_steps": 8674, "loss": 0.4840247929096222, "lr": 1.804511523424e-06, "epoch": 0.4848973945123357, "percentage": 24.24, "elapsed_time": "1:25:11", "remaining_time": "4:26:12"} +{"current_steps": 2104, "total_steps": 8674, "loss": 0.48390740156173706, "lr": 1.8042850197822049e-06, "epoch": 0.4851279686419184, "percentage": 24.26, "elapsed_time": "1:25:14", "remaining_time": "4:26:09"} +{"current_steps": 2105, "total_steps": 8674, "loss": 0.5875431895256042, "lr": 1.8040583992294305e-06, "epoch": 0.48535854277150103, "percentage": 24.27, "elapsed_time": "1:25:16", "remaining_time": "4:26:06"} +{"current_steps": 2106, "total_steps": 8674, "loss": 0.4599287211894989, "lr": 1.803831661798619e-06, "epoch": 0.4855891169010837, "percentage": 24.28, "elapsed_time": "1:25:18", "remaining_time": "4:26:04"} +{"current_steps": 2107, "total_steps": 8674, "loss": 0.5266382694244385, "lr": 1.803604807522729e-06, "epoch": 0.48581969103066636, "percentage": 24.29, "elapsed_time": "1:25:21", "remaining_time": "4:26:01"} +{"current_steps": 2108, "total_steps": 8674, "loss": 0.5592058897018433, "lr": 1.8033778364347359e-06, "epoch": 0.486050265160249, "percentage": 24.3, "elapsed_time": "1:25:23", "remaining_time": "4:25:59"} +{"current_steps": 2109, "total_steps": 8674, "loss": 0.4385683834552765, "lr": 1.8031507485676324e-06, "epoch": 0.4862808392898317, "percentage": 24.31, "elapsed_time": "1:25:26", "remaining_time": "4:25:57"} +{"current_steps": 2110, "total_steps": 8674, "loss": 0.4205859303474426, "lr": 1.8029235439544277e-06, "epoch": 0.48651141341941434, "percentage": 24.33, "elapsed_time": "1:25:28", "remaining_time": "4:25:54"} +{"current_steps": 2111, "total_steps": 8674, "loss": 0.4179378151893616, "lr": 1.8026962226281484e-06, "epoch": 0.486741987548997, "percentage": 24.34, "elapsed_time": "1:25:30", "remaining_time": "4:25:51"} +{"current_steps": 2112, "total_steps": 8674, "loss": 0.5022565126419067, "lr": 1.8024687846218382e-06, "epoch": 0.48697256167857966, "percentage": 24.35, "elapsed_time": "1:25:33", "remaining_time": "4:25:49"} +{"current_steps": 2113, "total_steps": 8674, "loss": 0.4591484069824219, "lr": 1.8022412299685574e-06, "epoch": 0.4872031358081623, "percentage": 24.36, "elapsed_time": "1:25:35", "remaining_time": "4:25:46"} +{"current_steps": 2114, "total_steps": 8674, "loss": 0.44381004571914673, "lr": 1.8020135587013836e-06, "epoch": 0.487433709937745, "percentage": 24.37, "elapsed_time": "1:25:37", "remaining_time": "4:25:43"} +{"current_steps": 2115, "total_steps": 8674, "loss": 0.5418124198913574, "lr": 1.8017857708534106e-06, "epoch": 0.48766428406732765, "percentage": 24.38, "elapsed_time": "1:25:40", "remaining_time": "4:25:41"} +{"current_steps": 2116, "total_steps": 8674, "loss": 0.45836228132247925, "lr": 1.80155786645775e-06, "epoch": 0.4878948581969103, "percentage": 24.39, "elapsed_time": "1:25:42", "remaining_time": "4:25:38"} +{"current_steps": 2117, "total_steps": 8674, "loss": 0.6028016805648804, "lr": 1.80132984554753e-06, "epoch": 0.48812543232649297, "percentage": 24.41, "elapsed_time": "1:25:45", "remaining_time": "4:25:36"} +{"current_steps": 2118, "total_steps": 8674, "loss": 0.461037814617157, "lr": 1.8011017081558956e-06, "epoch": 0.48835600645607563, "percentage": 24.42, "elapsed_time": "1:25:47", "remaining_time": "4:25:34"} +{"current_steps": 2119, "total_steps": 8674, "loss": 0.45145073533058167, "lr": 1.8008734543160092e-06, "epoch": 0.4885865805856583, "percentage": 24.43, "elapsed_time": "1:25:50", "remaining_time": "4:25:31"} +{"current_steps": 2120, "total_steps": 8674, "loss": 0.5074604153633118, "lr": 1.8006450840610495e-06, "epoch": 0.48881715471524095, "percentage": 24.44, "elapsed_time": "1:25:52", "remaining_time": "4:25:28"} +{"current_steps": 2121, "total_steps": 8674, "loss": 0.48518210649490356, "lr": 1.8004165974242124e-06, "epoch": 0.4890477288448236, "percentage": 24.45, "elapsed_time": "1:25:54", "remaining_time": "4:25:26"} +{"current_steps": 2122, "total_steps": 8674, "loss": 0.5427801609039307, "lr": 1.800187994438711e-06, "epoch": 0.4892783029744063, "percentage": 24.46, "elapsed_time": "1:25:57", "remaining_time": "4:25:23"} +{"current_steps": 2123, "total_steps": 8674, "loss": 0.5002918839454651, "lr": 1.799959275137775e-06, "epoch": 0.48950887710398894, "percentage": 24.48, "elapsed_time": "1:25:59", "remaining_time": "4:25:20"} +{"current_steps": 2124, "total_steps": 8674, "loss": 0.4417838454246521, "lr": 1.799730439554651e-06, "epoch": 0.4897394512335716, "percentage": 24.49, "elapsed_time": "1:26:01", "remaining_time": "4:25:18"} +{"current_steps": 2125, "total_steps": 8674, "loss": 0.4260700047016144, "lr": 1.7995014877226024e-06, "epoch": 0.48997002536315426, "percentage": 24.5, "elapsed_time": "1:26:04", "remaining_time": "4:25:16"} +{"current_steps": 2126, "total_steps": 8674, "loss": 0.5480694770812988, "lr": 1.79927241967491e-06, "epoch": 0.4902005994927369, "percentage": 24.51, "elapsed_time": "1:26:06", "remaining_time": "4:25:14"} +{"current_steps": 2127, "total_steps": 8674, "loss": 0.3911926746368408, "lr": 1.7990432354448713e-06, "epoch": 0.4904311736223196, "percentage": 24.52, "elapsed_time": "1:26:09", "remaining_time": "4:25:11"} +{"current_steps": 2128, "total_steps": 8674, "loss": 0.5269262194633484, "lr": 1.7988139350657997e-06, "epoch": 0.49066174775190224, "percentage": 24.53, "elapsed_time": "1:26:11", "remaining_time": "4:25:09"} +{"current_steps": 2129, "total_steps": 8674, "loss": 0.47482216358184814, "lr": 1.7985845185710272e-06, "epoch": 0.4908923218814849, "percentage": 24.54, "elapsed_time": "1:26:14", "remaining_time": "4:25:06"} +{"current_steps": 2130, "total_steps": 8674, "loss": 0.5663374662399292, "lr": 1.7983549859939018e-06, "epoch": 0.49112289601106757, "percentage": 24.56, "elapsed_time": "1:26:16", "remaining_time": "4:25:04"} +{"current_steps": 2131, "total_steps": 8674, "loss": 0.5322546362876892, "lr": 1.7981253373677875e-06, "epoch": 0.49135347014065023, "percentage": 24.57, "elapsed_time": "1:26:19", "remaining_time": "4:25:01"} +{"current_steps": 2132, "total_steps": 8674, "loss": 0.4238794445991516, "lr": 1.797895572726067e-06, "epoch": 0.4915840442702329, "percentage": 24.58, "elapsed_time": "1:26:21", "remaining_time": "4:24:59"} +{"current_steps": 2133, "total_steps": 8674, "loss": 0.49363791942596436, "lr": 1.7976656921021384e-06, "epoch": 0.49181461839981555, "percentage": 24.59, "elapsed_time": "1:26:23", "remaining_time": "4:24:56"} +{"current_steps": 2134, "total_steps": 8674, "loss": 0.5079565048217773, "lr": 1.7974356955294178e-06, "epoch": 0.4920451925293982, "percentage": 24.6, "elapsed_time": "1:26:26", "remaining_time": "4:24:54"} +{"current_steps": 2135, "total_steps": 8674, "loss": 0.5259063243865967, "lr": 1.7972055830413369e-06, "epoch": 0.4922757666589809, "percentage": 24.61, "elapsed_time": "1:26:28", "remaining_time": "4:24:51"} +{"current_steps": 2136, "total_steps": 8674, "loss": 0.49021831154823303, "lr": 1.7969753546713448e-06, "epoch": 0.49250634078856353, "percentage": 24.63, "elapsed_time": "1:26:31", "remaining_time": "4:24:49"} +{"current_steps": 2137, "total_steps": 8674, "loss": 0.49721387028694153, "lr": 1.7967450104529078e-06, "epoch": 0.4927369149181462, "percentage": 24.64, "elapsed_time": "1:26:33", "remaining_time": "4:24:46"} +{"current_steps": 2138, "total_steps": 8674, "loss": 0.6129348278045654, "lr": 1.796514550419509e-06, "epoch": 0.49296748904772886, "percentage": 24.65, "elapsed_time": "1:26:35", "remaining_time": "4:24:44"} +{"current_steps": 2139, "total_steps": 8674, "loss": 0.5034269094467163, "lr": 1.7962839746046479e-06, "epoch": 0.4931980631773115, "percentage": 24.66, "elapsed_time": "1:26:38", "remaining_time": "4:24:41"} +{"current_steps": 2140, "total_steps": 8674, "loss": 0.490216463804245, "lr": 1.7960532830418408e-06, "epoch": 0.4934286373068942, "percentage": 24.67, "elapsed_time": "1:26:40", "remaining_time": "4:24:38"} +{"current_steps": 2141, "total_steps": 8674, "loss": 0.5609744787216187, "lr": 1.7958224757646212e-06, "epoch": 0.49365921143647684, "percentage": 24.68, "elapsed_time": "1:26:42", "remaining_time": "4:24:36"} +{"current_steps": 2142, "total_steps": 8674, "loss": 0.4438238739967346, "lr": 1.7955915528065395e-06, "epoch": 0.4938897855660595, "percentage": 24.69, "elapsed_time": "1:26:45", "remaining_time": "4:24:33"} +{"current_steps": 2143, "total_steps": 8674, "loss": 0.4704767167568207, "lr": 1.7953605142011626e-06, "epoch": 0.49412035969564216, "percentage": 24.71, "elapsed_time": "1:26:48", "remaining_time": "4:24:31"} +{"current_steps": 2144, "total_steps": 8674, "loss": 0.44819536805152893, "lr": 1.795129359982074e-06, "epoch": 0.4943509338252248, "percentage": 24.72, "elapsed_time": "1:26:50", "remaining_time": "4:24:29"} +{"current_steps": 2145, "total_steps": 8674, "loss": 0.5311752557754517, "lr": 1.7948980901828746e-06, "epoch": 0.4945815079548075, "percentage": 24.73, "elapsed_time": "1:26:52", "remaining_time": "4:24:26"} +{"current_steps": 2146, "total_steps": 8674, "loss": 0.46144258975982666, "lr": 1.7946667048371818e-06, "epoch": 0.49481208208439015, "percentage": 24.74, "elapsed_time": "1:26:55", "remaining_time": "4:24:24"} +{"current_steps": 2147, "total_steps": 8674, "loss": 0.5973725914955139, "lr": 1.7944352039786297e-06, "epoch": 0.4950426562139728, "percentage": 24.75, "elapsed_time": "1:26:57", "remaining_time": "4:24:21"} +{"current_steps": 2148, "total_steps": 8674, "loss": 0.4930835962295532, "lr": 1.7942035876408693e-06, "epoch": 0.49527323034355547, "percentage": 24.76, "elapsed_time": "1:26:59", "remaining_time": "4:24:18"} +{"current_steps": 2149, "total_steps": 8674, "loss": 0.39137697219848633, "lr": 1.7939718558575685e-06, "epoch": 0.49550380447313813, "percentage": 24.78, "elapsed_time": "1:27:02", "remaining_time": "4:24:16"} +{"current_steps": 2150, "total_steps": 8674, "loss": 0.47618329524993896, "lr": 1.7937400086624117e-06, "epoch": 0.4957343786027208, "percentage": 24.79, "elapsed_time": "1:27:04", "remaining_time": "4:24:13"} +{"current_steps": 2151, "total_steps": 8674, "loss": 0.4751483201980591, "lr": 1.7935080460891005e-06, "epoch": 0.49596495273230345, "percentage": 24.8, "elapsed_time": "1:27:07", "remaining_time": "4:24:11"} +{"current_steps": 2152, "total_steps": 8674, "loss": 0.4654052257537842, "lr": 1.7932759681713528e-06, "epoch": 0.4961955268618861, "percentage": 24.81, "elapsed_time": "1:27:09", "remaining_time": "4:24:08"} +{"current_steps": 2153, "total_steps": 8674, "loss": 0.551579475402832, "lr": 1.7930437749429035e-06, "epoch": 0.4964261009914688, "percentage": 24.82, "elapsed_time": "1:27:11", "remaining_time": "4:24:05"} +{"current_steps": 2154, "total_steps": 8674, "loss": 0.4967789053916931, "lr": 1.792811466437504e-06, "epoch": 0.49665667512105144, "percentage": 24.83, "elapsed_time": "1:27:14", "remaining_time": "4:24:03"} +{"current_steps": 2155, "total_steps": 8674, "loss": 0.5826432108879089, "lr": 1.7925790426889234e-06, "epoch": 0.4968872492506341, "percentage": 24.84, "elapsed_time": "1:27:16", "remaining_time": "4:24:00"} +{"current_steps": 2156, "total_steps": 8674, "loss": 0.4260643720626831, "lr": 1.792346503730946e-06, "epoch": 0.49711782338021676, "percentage": 24.86, "elapsed_time": "1:27:18", "remaining_time": "4:23:58"} +{"current_steps": 2157, "total_steps": 8674, "loss": 0.48679620027542114, "lr": 1.7921138495973741e-06, "epoch": 0.4973483975097994, "percentage": 24.87, "elapsed_time": "1:27:21", "remaining_time": "4:23:55"} +{"current_steps": 2158, "total_steps": 8674, "loss": 0.5048027634620667, "lr": 1.7918810803220266e-06, "epoch": 0.4975789716393821, "percentage": 24.88, "elapsed_time": "1:27:23", "remaining_time": "4:23:52"} +{"current_steps": 2159, "total_steps": 8674, "loss": 0.5073787569999695, "lr": 1.7916481959387384e-06, "epoch": 0.49780954576896475, "percentage": 24.89, "elapsed_time": "1:27:26", "remaining_time": "4:23:51"} +{"current_steps": 2160, "total_steps": 8674, "loss": 0.47361671924591064, "lr": 1.791415196481362e-06, "epoch": 0.4980401198985474, "percentage": 24.9, "elapsed_time": "1:27:28", "remaining_time": "4:23:48"} +{"current_steps": 2161, "total_steps": 8674, "loss": 0.46382519602775574, "lr": 1.7911820819837659e-06, "epoch": 0.49827069402813007, "percentage": 24.91, "elapsed_time": "1:27:31", "remaining_time": "4:23:45"} +{"current_steps": 2162, "total_steps": 8674, "loss": 0.5167688727378845, "lr": 1.7909488524798357e-06, "epoch": 0.49850126815771273, "percentage": 24.93, "elapsed_time": "1:27:33", "remaining_time": "4:23:43"} +{"current_steps": 2163, "total_steps": 8674, "loss": 0.4486730992794037, "lr": 1.7907155080034739e-06, "epoch": 0.4987318422872954, "percentage": 24.94, "elapsed_time": "1:27:35", "remaining_time": "4:23:40"} +{"current_steps": 2164, "total_steps": 8674, "loss": 0.508470356464386, "lr": 1.7904820485885991e-06, "epoch": 0.49896241641687805, "percentage": 24.95, "elapsed_time": "1:27:38", "remaining_time": "4:23:38"} +{"current_steps": 2165, "total_steps": 8674, "loss": 0.4752856492996216, "lr": 1.790248474269148e-06, "epoch": 0.4991929905464607, "percentage": 24.96, "elapsed_time": "1:27:40", "remaining_time": "4:23:35"} +{"current_steps": 2166, "total_steps": 8674, "loss": 0.47191953659057617, "lr": 1.7900147850790713e-06, "epoch": 0.4994235646760434, "percentage": 24.97, "elapsed_time": "1:27:42", "remaining_time": "4:23:33"} +{"current_steps": 2167, "total_steps": 8674, "loss": 0.48935621976852417, "lr": 1.7897809810523396e-06, "epoch": 0.49965413880562604, "percentage": 24.98, "elapsed_time": "1:27:45", "remaining_time": "4:23:30"} +{"current_steps": 2168, "total_steps": 8674, "loss": 0.5455219149589539, "lr": 1.789547062222938e-06, "epoch": 0.4998847129352087, "percentage": 24.99, "elapsed_time": "1:27:47", "remaining_time": "4:23:28"} +{"current_steps": 2169, "total_steps": 8674, "loss": 0.5068193078041077, "lr": 1.789313028624869e-06, "epoch": 0.5001152870647914, "percentage": 25.01, "elapsed_time": "1:27:50", "remaining_time": "4:23:25"} +{"current_steps": 2170, "total_steps": 8674, "loss": 0.5868322253227234, "lr": 1.789078880292152e-06, "epoch": 0.500345861194374, "percentage": 25.02, "elapsed_time": "1:27:52", "remaining_time": "4:23:23"} +{"current_steps": 2171, "total_steps": 8674, "loss": 0.5132089853286743, "lr": 1.7888446172588222e-06, "epoch": 0.5005764353239567, "percentage": 25.03, "elapsed_time": "1:27:55", "remaining_time": "4:23:20"} +{"current_steps": 2172, "total_steps": 8674, "loss": 0.5673823356628418, "lr": 1.788610239558933e-06, "epoch": 0.5008070094535393, "percentage": 25.04, "elapsed_time": "1:27:57", "remaining_time": "4:23:18"} +{"current_steps": 2173, "total_steps": 8674, "loss": 0.47085779905319214, "lr": 1.7883757472265533e-06, "epoch": 0.501037583583122, "percentage": 25.05, "elapsed_time": "1:27:59", "remaining_time": "4:23:15"} +{"current_steps": 2174, "total_steps": 8674, "loss": 0.5286725163459778, "lr": 1.7881411402957685e-06, "epoch": 0.5012681577127046, "percentage": 25.06, "elapsed_time": "1:28:02", "remaining_time": "4:23:13"} +{"current_steps": 2175, "total_steps": 8674, "loss": 0.5044010877609253, "lr": 1.7879064188006817e-06, "epoch": 0.5014987318422873, "percentage": 25.07, "elapsed_time": "1:28:04", "remaining_time": "4:23:10"} +{"current_steps": 2176, "total_steps": 8674, "loss": 0.5329761505126953, "lr": 1.7876715827754113e-06, "epoch": 0.5017293059718699, "percentage": 25.09, "elapsed_time": "1:28:07", "remaining_time": "4:23:08"} +{"current_steps": 2177, "total_steps": 8674, "loss": 0.5025275349617004, "lr": 1.7874366322540937e-06, "epoch": 0.5019598801014526, "percentage": 25.1, "elapsed_time": "1:28:09", "remaining_time": "4:23:05"} +{"current_steps": 2178, "total_steps": 8674, "loss": 0.48466378450393677, "lr": 1.7872015672708814e-06, "epoch": 0.5021904542310353, "percentage": 25.11, "elapsed_time": "1:28:11", "remaining_time": "4:23:03"} +{"current_steps": 2179, "total_steps": 8674, "loss": 0.505358099937439, "lr": 1.7869663878599427e-06, "epoch": 0.502421028360618, "percentage": 25.12, "elapsed_time": "1:28:14", "remaining_time": "4:23:00"} +{"current_steps": 2180, "total_steps": 8674, "loss": 0.4934875965118408, "lr": 1.7867310940554643e-06, "epoch": 0.5026516024902006, "percentage": 25.13, "elapsed_time": "1:28:16", "remaining_time": "4:22:57"} +{"current_steps": 2181, "total_steps": 8674, "loss": 0.4726678133010864, "lr": 1.7864956858916482e-06, "epoch": 0.5028821766197833, "percentage": 25.14, "elapsed_time": "1:28:18", "remaining_time": "4:22:55"} +{"current_steps": 2182, "total_steps": 8674, "loss": 0.4619986414909363, "lr": 1.786260163402713e-06, "epoch": 0.5031127507493659, "percentage": 25.16, "elapsed_time": "1:28:21", "remaining_time": "4:22:52"} +{"current_steps": 2183, "total_steps": 8674, "loss": 0.4483926594257355, "lr": 1.7860245266228946e-06, "epoch": 0.5033433248789486, "percentage": 25.17, "elapsed_time": "1:28:23", "remaining_time": "4:22:49"} +{"current_steps": 2184, "total_steps": 8674, "loss": 0.4756368100643158, "lr": 1.7857887755864451e-06, "epoch": 0.5035738990085312, "percentage": 25.18, "elapsed_time": "1:28:26", "remaining_time": "4:22:47"} +{"current_steps": 2185, "total_steps": 8674, "loss": 0.5610564351081848, "lr": 1.7855529103276334e-06, "epoch": 0.5038044731381139, "percentage": 25.19, "elapsed_time": "1:28:28", "remaining_time": "4:22:45"} +{"current_steps": 2186, "total_steps": 8674, "loss": 0.49948322772979736, "lr": 1.7853169308807447e-06, "epoch": 0.5040350472676965, "percentage": 25.2, "elapsed_time": "1:28:30", "remaining_time": "4:22:42"} +{"current_steps": 2187, "total_steps": 8674, "loss": 0.5023819208145142, "lr": 1.7850808372800813e-06, "epoch": 0.5042656213972793, "percentage": 25.21, "elapsed_time": "1:28:33", "remaining_time": "4:22:40"} +{"current_steps": 2188, "total_steps": 8674, "loss": 0.45893096923828125, "lr": 1.7848446295599617e-06, "epoch": 0.5044961955268619, "percentage": 25.22, "elapsed_time": "1:28:36", "remaining_time": "4:22:38"} +{"current_steps": 2189, "total_steps": 8674, "loss": 0.39129459857940674, "lr": 1.7846083077547212e-06, "epoch": 0.5047267696564446, "percentage": 25.24, "elapsed_time": "1:28:38", "remaining_time": "4:22:36"} +{"current_steps": 2190, "total_steps": 8674, "loss": 0.42348673939704895, "lr": 1.784371871898711e-06, "epoch": 0.5049573437860272, "percentage": 25.25, "elapsed_time": "1:28:40", "remaining_time": "4:22:33"} +{"current_steps": 2191, "total_steps": 8674, "loss": 0.5760704278945923, "lr": 1.7841353220263e-06, "epoch": 0.5051879179156099, "percentage": 25.26, "elapsed_time": "1:28:43", "remaining_time": "4:22:31"} +{"current_steps": 2192, "total_steps": 8674, "loss": 0.5281997323036194, "lr": 1.7838986581718731e-06, "epoch": 0.5054184920451925, "percentage": 25.27, "elapsed_time": "1:28:45", "remaining_time": "4:22:28"} +{"current_steps": 2193, "total_steps": 8674, "loss": 0.543775200843811, "lr": 1.7836618803698315e-06, "epoch": 0.5056490661747752, "percentage": 25.28, "elapsed_time": "1:28:48", "remaining_time": "4:22:26"} +{"current_steps": 2194, "total_steps": 8674, "loss": 0.4148549437522888, "lr": 1.7834249886545934e-06, "epoch": 0.5058796403043578, "percentage": 25.29, "elapsed_time": "1:28:50", "remaining_time": "4:22:23"} +{"current_steps": 2195, "total_steps": 8674, "loss": 0.5165001153945923, "lr": 1.7831879830605936e-06, "epoch": 0.5061102144339406, "percentage": 25.31, "elapsed_time": "1:28:52", "remaining_time": "4:22:20"} +{"current_steps": 2196, "total_steps": 8674, "loss": 0.4183283746242523, "lr": 1.782950863622283e-06, "epoch": 0.5063407885635232, "percentage": 25.32, "elapsed_time": "1:28:55", "remaining_time": "4:22:18"} +{"current_steps": 2197, "total_steps": 8674, "loss": 0.46558016538619995, "lr": 1.7827136303741292e-06, "epoch": 0.5065713626931059, "percentage": 25.33, "elapsed_time": "1:28:57", "remaining_time": "4:22:15"} +{"current_steps": 2198, "total_steps": 8674, "loss": 0.5491806268692017, "lr": 1.782476283350617e-06, "epoch": 0.5068019368226885, "percentage": 25.34, "elapsed_time": "1:28:59", "remaining_time": "4:22:12"} +{"current_steps": 2199, "total_steps": 8674, "loss": 0.42999008297920227, "lr": 1.7822388225862466e-06, "epoch": 0.5070325109522712, "percentage": 25.35, "elapsed_time": "1:29:02", "remaining_time": "4:22:10"} +{"current_steps": 2200, "total_steps": 8674, "loss": 0.42478299140930176, "lr": 1.7820012481155358e-06, "epoch": 0.5072630850818538, "percentage": 25.36, "elapsed_time": "1:29:04", "remaining_time": "4:22:07"} +{"current_steps": 2201, "total_steps": 8674, "loss": 0.4175076186656952, "lr": 1.781763559973018e-06, "epoch": 0.5074936592114365, "percentage": 25.37, "elapsed_time": "1:29:08", "remaining_time": "4:22:10"} +{"current_steps": 2202, "total_steps": 8674, "loss": 0.42197084426879883, "lr": 1.7815257581932439e-06, "epoch": 0.5077242333410191, "percentage": 25.39, "elapsed_time": "1:29:11", "remaining_time": "4:22:07"} +{"current_steps": 2203, "total_steps": 8674, "loss": 0.39872926473617554, "lr": 1.7812878428107803e-06, "epoch": 0.5079548074706018, "percentage": 25.4, "elapsed_time": "1:29:13", "remaining_time": "4:22:05"} +{"current_steps": 2204, "total_steps": 8674, "loss": 0.4572516977787018, "lr": 1.7810498138602106e-06, "epoch": 0.5081853816001844, "percentage": 25.41, "elapsed_time": "1:29:15", "remaining_time": "4:22:02"} +{"current_steps": 2205, "total_steps": 8674, "loss": 0.5261520147323608, "lr": 1.780811671376135e-06, "epoch": 0.5084159557297672, "percentage": 25.42, "elapsed_time": "1:29:18", "remaining_time": "4:21:59"} +{"current_steps": 2206, "total_steps": 8674, "loss": 0.4714658260345459, "lr": 1.7805734153931696e-06, "epoch": 0.5086465298593498, "percentage": 25.43, "elapsed_time": "1:29:20", "remaining_time": "4:21:57"} +{"current_steps": 2207, "total_steps": 8674, "loss": 0.46184858679771423, "lr": 1.7803350459459472e-06, "epoch": 0.5088771039889325, "percentage": 25.44, "elapsed_time": "1:29:23", "remaining_time": "4:21:55"} +{"current_steps": 2208, "total_steps": 8674, "loss": 0.48189157247543335, "lr": 1.7800965630691173e-06, "epoch": 0.5091076781185151, "percentage": 25.46, "elapsed_time": "1:29:25", "remaining_time": "4:21:52"} +{"current_steps": 2209, "total_steps": 8674, "loss": 0.47865352034568787, "lr": 1.7798579667973463e-06, "epoch": 0.5093382522480978, "percentage": 25.47, "elapsed_time": "1:29:27", "remaining_time": "4:21:49"} +{"current_steps": 2210, "total_steps": 8674, "loss": 0.46073317527770996, "lr": 1.7796192571653162e-06, "epoch": 0.5095688263776804, "percentage": 25.48, "elapsed_time": "1:29:30", "remaining_time": "4:21:47"} +{"current_steps": 2211, "total_steps": 8674, "loss": 0.5099648237228394, "lr": 1.7793804342077253e-06, "epoch": 0.5097994005072631, "percentage": 25.49, "elapsed_time": "1:29:32", "remaining_time": "4:21:45"} +{"current_steps": 2212, "total_steps": 8674, "loss": 0.5436147451400757, "lr": 1.7791414979592903e-06, "epoch": 0.5100299746368457, "percentage": 25.5, "elapsed_time": "1:29:35", "remaining_time": "4:21:42"} +{"current_steps": 2213, "total_steps": 8674, "loss": 0.5455893278121948, "lr": 1.7789024484547417e-06, "epoch": 0.5102605487664285, "percentage": 25.51, "elapsed_time": "1:29:37", "remaining_time": "4:21:39"} +{"current_steps": 2214, "total_steps": 8674, "loss": 0.4886546730995178, "lr": 1.7786632857288284e-06, "epoch": 0.5104911228960111, "percentage": 25.52, "elapsed_time": "1:29:39", "remaining_time": "4:21:37"} +{"current_steps": 2215, "total_steps": 8674, "loss": 0.4793723225593567, "lr": 1.778424009816315e-06, "epoch": 0.5107216970255938, "percentage": 25.54, "elapsed_time": "1:29:42", "remaining_time": "4:21:34"} +{"current_steps": 2216, "total_steps": 8674, "loss": 0.5814248323440552, "lr": 1.7781846207519826e-06, "epoch": 0.5109522711551764, "percentage": 25.55, "elapsed_time": "1:29:44", "remaining_time": "4:21:32"} +{"current_steps": 2217, "total_steps": 8674, "loss": 0.5057421326637268, "lr": 1.777945118570629e-06, "epoch": 0.5111828452847591, "percentage": 25.56, "elapsed_time": "1:29:47", "remaining_time": "4:21:29"} +{"current_steps": 2218, "total_steps": 8674, "loss": 0.3913435935974121, "lr": 1.7777055033070682e-06, "epoch": 0.5114134194143417, "percentage": 25.57, "elapsed_time": "1:29:49", "remaining_time": "4:21:28"} +{"current_steps": 2219, "total_steps": 8674, "loss": 0.4450770616531372, "lr": 1.7774657749961305e-06, "epoch": 0.5116439935439244, "percentage": 25.58, "elapsed_time": "1:29:52", "remaining_time": "4:21:25"} +{"current_steps": 2220, "total_steps": 8674, "loss": 0.5164940357208252, "lr": 1.7772259336726636e-06, "epoch": 0.511874567673507, "percentage": 25.59, "elapsed_time": "1:29:54", "remaining_time": "4:21:22"} +{"current_steps": 2221, "total_steps": 8674, "loss": 0.44231802225112915, "lr": 1.7769859793715298e-06, "epoch": 0.5121051418030897, "percentage": 25.61, "elapsed_time": "1:29:56", "remaining_time": "4:21:20"} +{"current_steps": 2222, "total_steps": 8674, "loss": 0.516791820526123, "lr": 1.7767459121276093e-06, "epoch": 0.5123357159326724, "percentage": 25.62, "elapsed_time": "1:29:59", "remaining_time": "4:21:17"} +{"current_steps": 2223, "total_steps": 8674, "loss": 0.4180450737476349, "lr": 1.7765057319757989e-06, "epoch": 0.5125662900622551, "percentage": 25.63, "elapsed_time": "1:30:01", "remaining_time": "4:21:15"} +{"current_steps": 2224, "total_steps": 8674, "loss": 0.49246734380722046, "lr": 1.77626543895101e-06, "epoch": 0.5127968641918377, "percentage": 25.64, "elapsed_time": "1:30:04", "remaining_time": "4:21:12"} +{"current_steps": 2225, "total_steps": 8674, "loss": 0.5058225393295288, "lr": 1.7760250330881728e-06, "epoch": 0.5130274383214203, "percentage": 25.65, "elapsed_time": "1:30:06", "remaining_time": "4:21:09"} +{"current_steps": 2226, "total_steps": 8674, "loss": 0.4752033054828644, "lr": 1.7757845144222321e-06, "epoch": 0.513258012451003, "percentage": 25.66, "elapsed_time": "1:30:08", "remaining_time": "4:21:07"} +{"current_steps": 2227, "total_steps": 8674, "loss": 0.45163947343826294, "lr": 1.77554388298815e-06, "epoch": 0.5134885865805856, "percentage": 25.67, "elapsed_time": "1:30:11", "remaining_time": "4:21:05"} +{"current_steps": 2228, "total_steps": 8674, "loss": 0.46295779943466187, "lr": 1.7753031388209044e-06, "epoch": 0.5137191607101683, "percentage": 25.69, "elapsed_time": "1:30:13", "remaining_time": "4:21:02"} +{"current_steps": 2229, "total_steps": 8674, "loss": 0.5682947635650635, "lr": 1.7750622819554903e-06, "epoch": 0.5139497348397509, "percentage": 25.7, "elapsed_time": "1:30:15", "remaining_time": "4:20:59"} +{"current_steps": 2230, "total_steps": 8674, "loss": 0.4890878200531006, "lr": 1.7748213124269187e-06, "epoch": 0.5141803089693336, "percentage": 25.71, "elapsed_time": "1:30:18", "remaining_time": "4:20:57"} +{"current_steps": 2231, "total_steps": 8674, "loss": 0.5952332615852356, "lr": 1.7745802302702164e-06, "epoch": 0.5144108830989162, "percentage": 25.72, "elapsed_time": "1:30:20", "remaining_time": "4:20:54"} +{"current_steps": 2232, "total_steps": 8674, "loss": 0.43224406242370605, "lr": 1.7743390355204278e-06, "epoch": 0.514641457228499, "percentage": 25.73, "elapsed_time": "1:30:23", "remaining_time": "4:20:51"} +{"current_steps": 2233, "total_steps": 8674, "loss": 0.5010303258895874, "lr": 1.7740977282126122e-06, "epoch": 0.5148720313580816, "percentage": 25.74, "elapsed_time": "1:30:25", "remaining_time": "4:20:49"} +{"current_steps": 2234, "total_steps": 8674, "loss": 0.5166633725166321, "lr": 1.7738563083818469e-06, "epoch": 0.5151026054876643, "percentage": 25.76, "elapsed_time": "1:30:28", "remaining_time": "4:20:47"} +{"current_steps": 2235, "total_steps": 8674, "loss": 0.4748263359069824, "lr": 1.7736147760632245e-06, "epoch": 0.5153331796172469, "percentage": 25.77, "elapsed_time": "1:30:30", "remaining_time": "4:20:44"} +{"current_steps": 2236, "total_steps": 8674, "loss": 0.46462053060531616, "lr": 1.773373131291854e-06, "epoch": 0.5155637537468296, "percentage": 25.78, "elapsed_time": "1:30:32", "remaining_time": "4:20:42"} +{"current_steps": 2237, "total_steps": 8674, "loss": 0.47799748182296753, "lr": 1.7731313741028608e-06, "epoch": 0.5157943278764122, "percentage": 25.79, "elapsed_time": "1:30:35", "remaining_time": "4:20:39"} +{"current_steps": 2238, "total_steps": 8674, "loss": 0.43448662757873535, "lr": 1.772889504531387e-06, "epoch": 0.5160249020059949, "percentage": 25.8, "elapsed_time": "1:30:37", "remaining_time": "4:20:36"} +{"current_steps": 2239, "total_steps": 8674, "loss": 0.4609360098838806, "lr": 1.7726475226125905e-06, "epoch": 0.5162554761355775, "percentage": 25.81, "elapsed_time": "1:30:39", "remaining_time": "4:20:34"} +{"current_steps": 2240, "total_steps": 8674, "loss": 0.505261242389679, "lr": 1.7724054283816463e-06, "epoch": 0.5164860502651603, "percentage": 25.82, "elapsed_time": "1:30:42", "remaining_time": "4:20:31"} +{"current_steps": 2241, "total_steps": 8674, "loss": 0.3812851905822754, "lr": 1.772163221873745e-06, "epoch": 0.5167166243947429, "percentage": 25.84, "elapsed_time": "1:30:44", "remaining_time": "4:20:28"} +{"current_steps": 2242, "total_steps": 8674, "loss": 0.42545294761657715, "lr": 1.7719209031240938e-06, "epoch": 0.5169471985243256, "percentage": 25.85, "elapsed_time": "1:30:46", "remaining_time": "4:20:25"} +{"current_steps": 2243, "total_steps": 8674, "loss": 0.45135340094566345, "lr": 1.771678472167916e-06, "epoch": 0.5171777726539082, "percentage": 25.86, "elapsed_time": "1:30:49", "remaining_time": "4:20:23"} +{"current_steps": 2244, "total_steps": 8674, "loss": 0.4499250650405884, "lr": 1.7714359290404514e-06, "epoch": 0.5174083467834909, "percentage": 25.87, "elapsed_time": "1:30:51", "remaining_time": "4:20:21"} +{"current_steps": 2245, "total_steps": 8674, "loss": 0.4355557858943939, "lr": 1.7711932737769564e-06, "epoch": 0.5176389209130735, "percentage": 25.88, "elapsed_time": "1:30:54", "remaining_time": "4:20:18"} +{"current_steps": 2246, "total_steps": 8674, "loss": 0.4140744209289551, "lr": 1.7709505064127036e-06, "epoch": 0.5178694950426562, "percentage": 25.89, "elapsed_time": "1:30:56", "remaining_time": "4:20:16"} +{"current_steps": 2247, "total_steps": 8674, "loss": 0.5108504891395569, "lr": 1.7707076269829809e-06, "epoch": 0.5181000691722388, "percentage": 25.91, "elapsed_time": "1:30:58", "remaining_time": "4:20:13"} +{"current_steps": 2248, "total_steps": 8674, "loss": 0.5064615607261658, "lr": 1.7704646355230936e-06, "epoch": 0.5183306433018215, "percentage": 25.92, "elapsed_time": "1:31:01", "remaining_time": "4:20:11"} +{"current_steps": 2249, "total_steps": 8674, "loss": 0.5922794342041016, "lr": 1.7702215320683636e-06, "epoch": 0.5185612174314042, "percentage": 25.93, "elapsed_time": "1:31:03", "remaining_time": "4:20:08"} +{"current_steps": 2250, "total_steps": 8674, "loss": 0.3890082836151123, "lr": 1.7699783166541279e-06, "epoch": 0.5187917915609869, "percentage": 25.94, "elapsed_time": "1:31:05", "remaining_time": "4:20:05"} +{"current_steps": 2251, "total_steps": 8674, "loss": 0.5585668087005615, "lr": 1.7697349893157402e-06, "epoch": 0.5190223656905695, "percentage": 25.95, "elapsed_time": "1:31:08", "remaining_time": "4:20:04"} +{"current_steps": 2252, "total_steps": 8674, "loss": 0.3904608488082886, "lr": 1.7694915500885706e-06, "epoch": 0.5192529398201522, "percentage": 25.96, "elapsed_time": "1:31:10", "remaining_time": "4:20:01"} +{"current_steps": 2253, "total_steps": 8674, "loss": 0.4764491617679596, "lr": 1.7692479990080056e-06, "epoch": 0.5194835139497348, "percentage": 25.97, "elapsed_time": "1:31:13", "remaining_time": "4:19:58"} +{"current_steps": 2254, "total_steps": 8674, "loss": 0.49443554878234863, "lr": 1.769004336109448e-06, "epoch": 0.5197140880793175, "percentage": 25.99, "elapsed_time": "1:31:15", "remaining_time": "4:19:56"} +{"current_steps": 2255, "total_steps": 8674, "loss": 0.4679003357887268, "lr": 1.7687605614283165e-06, "epoch": 0.5199446622089001, "percentage": 26.0, "elapsed_time": "1:31:18", "remaining_time": "4:19:53"} +{"current_steps": 2256, "total_steps": 8674, "loss": 0.6968683004379272, "lr": 1.7685166750000465e-06, "epoch": 0.5201752363384828, "percentage": 26.01, "elapsed_time": "1:31:20", "remaining_time": "4:19:50"} +{"current_steps": 2257, "total_steps": 8674, "loss": 0.5688217878341675, "lr": 1.7682726768600888e-06, "epoch": 0.5204058104680654, "percentage": 26.02, "elapsed_time": "1:31:22", "remaining_time": "4:19:48"} +{"current_steps": 2258, "total_steps": 8674, "loss": 0.4688011705875397, "lr": 1.7680285670439115e-06, "epoch": 0.5206363845976482, "percentage": 26.03, "elapsed_time": "1:31:24", "remaining_time": "4:19:45"} +{"current_steps": 2259, "total_steps": 8674, "loss": 0.6447713971138, "lr": 1.7677843455869984e-06, "epoch": 0.5208669587272308, "percentage": 26.04, "elapsed_time": "1:31:27", "remaining_time": "4:19:42"} +{"current_steps": 2260, "total_steps": 8674, "loss": 0.578650951385498, "lr": 1.767540012524849e-06, "epoch": 0.5210975328568135, "percentage": 26.05, "elapsed_time": "1:31:29", "remaining_time": "4:19:40"} +{"current_steps": 2261, "total_steps": 8674, "loss": 0.5001357197761536, "lr": 1.76729556789298e-06, "epoch": 0.5213281069863961, "percentage": 26.07, "elapsed_time": "1:31:32", "remaining_time": "4:19:38"} +{"current_steps": 2262, "total_steps": 8674, "loss": 0.5336331129074097, "lr": 1.7670510117269242e-06, "epoch": 0.5215586811159788, "percentage": 26.08, "elapsed_time": "1:31:34", "remaining_time": "4:19:35"} +{"current_steps": 2263, "total_steps": 8674, "loss": 0.5628900527954102, "lr": 1.76680634406223e-06, "epoch": 0.5217892552455614, "percentage": 26.09, "elapsed_time": "1:31:36", "remaining_time": "4:19:32"} +{"current_steps": 2264, "total_steps": 8674, "loss": 0.46497443318367004, "lr": 1.766561564934462e-06, "epoch": 0.5220198293751441, "percentage": 26.1, "elapsed_time": "1:31:39", "remaining_time": "4:19:30"} +{"current_steps": 2265, "total_steps": 8674, "loss": 0.617607831954956, "lr": 1.7663166743792019e-06, "epoch": 0.5222504035047267, "percentage": 26.11, "elapsed_time": "1:31:41", "remaining_time": "4:19:27"} +{"current_steps": 2266, "total_steps": 8674, "loss": 0.5236914157867432, "lr": 1.7660716724320468e-06, "epoch": 0.5224809776343095, "percentage": 26.12, "elapsed_time": "1:31:44", "remaining_time": "4:19:24"} +{"current_steps": 2267, "total_steps": 8674, "loss": 0.5527941584587097, "lr": 1.76582655912861e-06, "epoch": 0.5227115517638921, "percentage": 26.14, "elapsed_time": "1:31:46", "remaining_time": "4:19:22"} +{"current_steps": 2268, "total_steps": 8674, "loss": 0.5394654273986816, "lr": 1.7655813345045218e-06, "epoch": 0.5229421258934748, "percentage": 26.15, "elapsed_time": "1:31:48", "remaining_time": "4:19:20"} +{"current_steps": 2269, "total_steps": 8674, "loss": 0.47050246596336365, "lr": 1.7653359985954275e-06, "epoch": 0.5231727000230574, "percentage": 26.16, "elapsed_time": "1:31:51", "remaining_time": "4:19:17"} +{"current_steps": 2270, "total_steps": 8674, "loss": 0.49413689970970154, "lr": 1.7650905514369894e-06, "epoch": 0.5234032741526401, "percentage": 26.17, "elapsed_time": "1:31:53", "remaining_time": "4:19:14"} +{"current_steps": 2271, "total_steps": 8674, "loss": 0.5568829774856567, "lr": 1.7648449930648856e-06, "epoch": 0.5236338482822227, "percentage": 26.18, "elapsed_time": "1:31:55", "remaining_time": "4:19:12"} +{"current_steps": 2272, "total_steps": 8674, "loss": 0.49238815903663635, "lr": 1.7645993235148107e-06, "epoch": 0.5238644224118054, "percentage": 26.19, "elapsed_time": "1:31:58", "remaining_time": "4:19:09"} +{"current_steps": 2273, "total_steps": 8674, "loss": 0.5580959320068359, "lr": 1.7643535428224752e-06, "epoch": 0.524094996541388, "percentage": 26.2, "elapsed_time": "1:32:00", "remaining_time": "4:19:06"} +{"current_steps": 2274, "total_steps": 8674, "loss": 0.5853499174118042, "lr": 1.7641076510236052e-06, "epoch": 0.5243255706709707, "percentage": 26.22, "elapsed_time": "1:32:02", "remaining_time": "4:19:04"} +{"current_steps": 2275, "total_steps": 8674, "loss": 0.5638653635978699, "lr": 1.7638616481539448e-06, "epoch": 0.5245561448005533, "percentage": 26.23, "elapsed_time": "1:32:05", "remaining_time": "4:19:01"} +{"current_steps": 2276, "total_steps": 8674, "loss": 0.5197241306304932, "lr": 1.7636155342492521e-06, "epoch": 0.5247867189301361, "percentage": 26.24, "elapsed_time": "1:32:07", "remaining_time": "4:18:59"} +{"current_steps": 2277, "total_steps": 8674, "loss": 0.4137725234031677, "lr": 1.7633693093453026e-06, "epoch": 0.5250172930597187, "percentage": 26.25, "elapsed_time": "1:32:10", "remaining_time": "4:18:56"} +{"current_steps": 2278, "total_steps": 8674, "loss": 0.54244065284729, "lr": 1.7631229734778872e-06, "epoch": 0.5252478671893014, "percentage": 26.26, "elapsed_time": "1:32:12", "remaining_time": "4:18:54"} +{"current_steps": 2279, "total_steps": 8674, "loss": 0.5215432047843933, "lr": 1.7628765266828137e-06, "epoch": 0.525478441318884, "percentage": 26.27, "elapsed_time": "1:32:15", "remaining_time": "4:18:51"} +{"current_steps": 2280, "total_steps": 8674, "loss": 0.5559565424919128, "lr": 1.7626299689959057e-06, "epoch": 0.5257090154484667, "percentage": 26.29, "elapsed_time": "1:32:17", "remaining_time": "4:18:48"} +{"current_steps": 2281, "total_steps": 8674, "loss": 0.5251328945159912, "lr": 1.7623833004530026e-06, "epoch": 0.5259395895780493, "percentage": 26.3, "elapsed_time": "1:32:19", "remaining_time": "4:18:46"} +{"current_steps": 2282, "total_steps": 8674, "loss": 0.5351072549819946, "lr": 1.7621365210899598e-06, "epoch": 0.526170163707632, "percentage": 26.31, "elapsed_time": "1:32:22", "remaining_time": "4:18:43"} +{"current_steps": 2283, "total_steps": 8674, "loss": 0.46850037574768066, "lr": 1.7618896309426504e-06, "epoch": 0.5264007378372146, "percentage": 26.32, "elapsed_time": "1:32:24", "remaining_time": "4:18:41"} +{"current_steps": 2284, "total_steps": 8674, "loss": 0.5001033544540405, "lr": 1.761642630046961e-06, "epoch": 0.5266313119667974, "percentage": 26.33, "elapsed_time": "1:32:26", "remaining_time": "4:18:38"} +{"current_steps": 2285, "total_steps": 8674, "loss": 0.47946250438690186, "lr": 1.7613955184387968e-06, "epoch": 0.52686188609638, "percentage": 26.34, "elapsed_time": "1:32:29", "remaining_time": "4:18:36"} +{"current_steps": 2286, "total_steps": 8674, "loss": 0.4743049144744873, "lr": 1.761148296154077e-06, "epoch": 0.5270924602259627, "percentage": 26.35, "elapsed_time": "1:32:31", "remaining_time": "4:18:34"} +{"current_steps": 2287, "total_steps": 8674, "loss": 0.4518652558326721, "lr": 1.7609009632287389e-06, "epoch": 0.5273230343555453, "percentage": 26.37, "elapsed_time": "1:32:34", "remaining_time": "4:18:33"} +{"current_steps": 2288, "total_steps": 8674, "loss": 0.5021224617958069, "lr": 1.7606535196987338e-06, "epoch": 0.527553608485128, "percentage": 26.38, "elapsed_time": "1:32:37", "remaining_time": "4:18:32"} +{"current_steps": 2289, "total_steps": 8674, "loss": 0.4848078489303589, "lr": 1.760405965600031e-06, "epoch": 0.5277841826147106, "percentage": 26.39, "elapsed_time": "1:32:40", "remaining_time": "4:18:29"} +{"current_steps": 2290, "total_steps": 8674, "loss": 0.49077051877975464, "lr": 1.7601583009686142e-06, "epoch": 0.5280147567442933, "percentage": 26.4, "elapsed_time": "1:32:42", "remaining_time": "4:18:26"} +{"current_steps": 2291, "total_steps": 8674, "loss": 0.4802943468093872, "lr": 1.7599105258404848e-06, "epoch": 0.5282453308738759, "percentage": 26.41, "elapsed_time": "1:32:44", "remaining_time": "4:18:24"} +{"current_steps": 2292, "total_steps": 8674, "loss": 0.5397455096244812, "lr": 1.7596626402516589e-06, "epoch": 0.5284759050034586, "percentage": 26.42, "elapsed_time": "1:32:47", "remaining_time": "4:18:21"} +{"current_steps": 2293, "total_steps": 8674, "loss": 0.478559672832489, "lr": 1.759414644238169e-06, "epoch": 0.5287064791330413, "percentage": 26.44, "elapsed_time": "1:32:49", "remaining_time": "4:18:19"} +{"current_steps": 2294, "total_steps": 8674, "loss": 0.5080797672271729, "lr": 1.7591665378360644e-06, "epoch": 0.528937053262624, "percentage": 26.45, "elapsed_time": "1:32:52", "remaining_time": "4:18:16"} +{"current_steps": 2295, "total_steps": 8674, "loss": 0.4959479868412018, "lr": 1.7589183210814093e-06, "epoch": 0.5291676273922066, "percentage": 26.46, "elapsed_time": "1:32:54", "remaining_time": "4:18:14"} +{"current_steps": 2296, "total_steps": 8674, "loss": 0.512288510799408, "lr": 1.7586699940102853e-06, "epoch": 0.5293982015217893, "percentage": 26.47, "elapsed_time": "1:32:56", "remaining_time": "4:18:11"} +{"current_steps": 2297, "total_steps": 8674, "loss": 0.525113046169281, "lr": 1.7584215566587886e-06, "epoch": 0.5296287756513719, "percentage": 26.48, "elapsed_time": "1:32:59", "remaining_time": "4:18:09"} +{"current_steps": 2298, "total_steps": 8674, "loss": 0.3715069890022278, "lr": 1.7581730090630322e-06, "epoch": 0.5298593497809546, "percentage": 26.49, "elapsed_time": "1:33:01", "remaining_time": "4:18:06"} +{"current_steps": 2299, "total_steps": 8674, "loss": 0.5833072662353516, "lr": 1.757924351259145e-06, "epoch": 0.5300899239105372, "percentage": 26.5, "elapsed_time": "1:33:04", "remaining_time": "4:18:04"} +{"current_steps": 2300, "total_steps": 8674, "loss": 0.5942450761795044, "lr": 1.7576755832832721e-06, "epoch": 0.5303204980401199, "percentage": 26.52, "elapsed_time": "1:33:06", "remaining_time": "4:18:01"} +{"current_steps": 2301, "total_steps": 8674, "loss": 0.4754432737827301, "lr": 1.7574267051715745e-06, "epoch": 0.5305510721697025, "percentage": 26.53, "elapsed_time": "1:33:10", "remaining_time": "4:18:02"} +{"current_steps": 2302, "total_steps": 8674, "loss": 0.5272700190544128, "lr": 1.7571777169602287e-06, "epoch": 0.5307816462992853, "percentage": 26.54, "elapsed_time": "1:33:12", "remaining_time": "4:18:00"} +{"current_steps": 2303, "total_steps": 8674, "loss": 0.48376554250717163, "lr": 1.7569286186854283e-06, "epoch": 0.5310122204288679, "percentage": 26.55, "elapsed_time": "1:33:15", "remaining_time": "4:17:58"} +{"current_steps": 2304, "total_steps": 8674, "loss": 0.4324077367782593, "lr": 1.7566794103833816e-06, "epoch": 0.5312427945584506, "percentage": 26.56, "elapsed_time": "1:33:17", "remaining_time": "4:17:55"} +{"current_steps": 2305, "total_steps": 8674, "loss": 0.44939202070236206, "lr": 1.7564300920903142e-06, "epoch": 0.5314733686880332, "percentage": 26.57, "elapsed_time": "1:33:19", "remaining_time": "4:17:53"} +{"current_steps": 2306, "total_steps": 8674, "loss": 0.5256277322769165, "lr": 1.7561806638424662e-06, "epoch": 0.5317039428176159, "percentage": 26.59, "elapsed_time": "1:33:22", "remaining_time": "4:17:51"} +{"current_steps": 2307, "total_steps": 8674, "loss": 0.43901991844177246, "lr": 1.7559311256760955e-06, "epoch": 0.5319345169471985, "percentage": 26.6, "elapsed_time": "1:33:24", "remaining_time": "4:17:48"} +{"current_steps": 2308, "total_steps": 8674, "loss": 0.5256138443946838, "lr": 1.7556814776274746e-06, "epoch": 0.5321650910767812, "percentage": 26.61, "elapsed_time": "1:33:27", "remaining_time": "4:17:46"} +{"current_steps": 2309, "total_steps": 8674, "loss": 0.4664478600025177, "lr": 1.7554317197328922e-06, "epoch": 0.5323956652063638, "percentage": 26.62, "elapsed_time": "1:33:29", "remaining_time": "4:17:43"} +{"current_steps": 2310, "total_steps": 8674, "loss": 0.5042726397514343, "lr": 1.7551818520286532e-06, "epoch": 0.5326262393359465, "percentage": 26.63, "elapsed_time": "1:33:32", "remaining_time": "4:17:41"} +{"current_steps": 2311, "total_steps": 8674, "loss": 0.5682350397109985, "lr": 1.754931874551079e-06, "epoch": 0.5328568134655292, "percentage": 26.64, "elapsed_time": "1:33:34", "remaining_time": "4:17:38"} +{"current_steps": 2312, "total_steps": 8674, "loss": 0.5082807540893555, "lr": 1.754681787336505e-06, "epoch": 0.5330873875951119, "percentage": 26.65, "elapsed_time": "1:33:36", "remaining_time": "4:17:36"} +{"current_steps": 2313, "total_steps": 8674, "loss": 0.6020215749740601, "lr": 1.754431590421285e-06, "epoch": 0.5333179617246945, "percentage": 26.67, "elapsed_time": "1:33:39", "remaining_time": "4:17:33"} +{"current_steps": 2314, "total_steps": 8674, "loss": 0.5004276633262634, "lr": 1.7541812838417877e-06, "epoch": 0.5335485358542772, "percentage": 26.68, "elapsed_time": "1:33:41", "remaining_time": "4:17:30"} +{"current_steps": 2315, "total_steps": 8674, "loss": 0.4889993667602539, "lr": 1.753930867634397e-06, "epoch": 0.5337791099838598, "percentage": 26.69, "elapsed_time": "1:33:43", "remaining_time": "4:17:28"} +{"current_steps": 2316, "total_steps": 8674, "loss": 0.4179444909095764, "lr": 1.7536803418355141e-06, "epoch": 0.5340096841134425, "percentage": 26.7, "elapsed_time": "1:33:46", "remaining_time": "4:17:25"} +{"current_steps": 2317, "total_steps": 8674, "loss": 0.46807605028152466, "lr": 1.7534297064815554e-06, "epoch": 0.5342402582430251, "percentage": 26.71, "elapsed_time": "1:33:48", "remaining_time": "4:17:23"} +{"current_steps": 2318, "total_steps": 8674, "loss": 0.39173221588134766, "lr": 1.7531789616089528e-06, "epoch": 0.5344708323726078, "percentage": 26.72, "elapsed_time": "1:33:51", "remaining_time": "4:17:21"} +{"current_steps": 2319, "total_steps": 8674, "loss": 0.4290514886379242, "lr": 1.7529281072541548e-06, "epoch": 0.5347014065021904, "percentage": 26.74, "elapsed_time": "1:33:53", "remaining_time": "4:17:18"} +{"current_steps": 2320, "total_steps": 8674, "loss": 0.6052347421646118, "lr": 1.752677143453626e-06, "epoch": 0.5349319806317732, "percentage": 26.75, "elapsed_time": "1:33:56", "remaining_time": "4:17:15"} +{"current_steps": 2321, "total_steps": 8674, "loss": 0.47622209787368774, "lr": 1.752426070243846e-06, "epoch": 0.5351625547613558, "percentage": 26.76, "elapsed_time": "1:33:58", "remaining_time": "4:17:13"} +{"current_steps": 2322, "total_steps": 8674, "loss": 0.4216923415660858, "lr": 1.7521748876613112e-06, "epoch": 0.5353931288909385, "percentage": 26.77, "elapsed_time": "1:34:00", "remaining_time": "4:17:10"} +{"current_steps": 2323, "total_steps": 8674, "loss": 0.5527430772781372, "lr": 1.751923595742533e-06, "epoch": 0.5356237030205211, "percentage": 26.78, "elapsed_time": "1:34:03", "remaining_time": "4:17:08"} +{"current_steps": 2324, "total_steps": 8674, "loss": 0.5562101602554321, "lr": 1.75167219452404e-06, "epoch": 0.5358542771501038, "percentage": 26.79, "elapsed_time": "1:34:05", "remaining_time": "4:17:05"} +{"current_steps": 2325, "total_steps": 8674, "loss": 0.546181321144104, "lr": 1.7514206840423757e-06, "epoch": 0.5360848512796864, "percentage": 26.8, "elapsed_time": "1:34:08", "remaining_time": "4:17:03"} +{"current_steps": 2326, "total_steps": 8674, "loss": 0.5883532762527466, "lr": 1.7511690643340995e-06, "epoch": 0.5363154254092691, "percentage": 26.82, "elapsed_time": "1:34:10", "remaining_time": "4:17:00"} +{"current_steps": 2327, "total_steps": 8674, "loss": 0.5231350660324097, "lr": 1.750917335435787e-06, "epoch": 0.5365459995388517, "percentage": 26.83, "elapsed_time": "1:34:13", "remaining_time": "4:16:59"} +{"current_steps": 2328, "total_steps": 8674, "loss": 0.4846429228782654, "lr": 1.7506654973840292e-06, "epoch": 0.5367765736684345, "percentage": 26.84, "elapsed_time": "1:34:15", "remaining_time": "4:16:56"} +{"current_steps": 2329, "total_steps": 8674, "loss": 0.43692171573638916, "lr": 1.7504135502154335e-06, "epoch": 0.5370071477980171, "percentage": 26.85, "elapsed_time": "1:34:17", "remaining_time": "4:16:54"} +{"current_steps": 2330, "total_steps": 8674, "loss": 0.5076167583465576, "lr": 1.7501614939666234e-06, "epoch": 0.5372377219275998, "percentage": 26.86, "elapsed_time": "1:34:20", "remaining_time": "4:16:51"} +{"current_steps": 2331, "total_steps": 8674, "loss": 0.5302891135215759, "lr": 1.7499093286742373e-06, "epoch": 0.5374682960571824, "percentage": 26.87, "elapsed_time": "1:34:22", "remaining_time": "4:16:48"} +{"current_steps": 2332, "total_steps": 8674, "loss": 0.5827817916870117, "lr": 1.7496570543749303e-06, "epoch": 0.5376988701867651, "percentage": 26.88, "elapsed_time": "1:34:25", "remaining_time": "4:16:46"} +{"current_steps": 2333, "total_steps": 8674, "loss": 0.6765470504760742, "lr": 1.7494046711053726e-06, "epoch": 0.5379294443163477, "percentage": 26.9, "elapsed_time": "1:34:27", "remaining_time": "4:16:43"} +{"current_steps": 2334, "total_steps": 8674, "loss": 0.48666322231292725, "lr": 1.7491521789022513e-06, "epoch": 0.5381600184459304, "percentage": 26.91, "elapsed_time": "1:34:29", "remaining_time": "4:16:41"} +{"current_steps": 2335, "total_steps": 8674, "loss": 0.5163695812225342, "lr": 1.7488995778022685e-06, "epoch": 0.538390592575513, "percentage": 26.92, "elapsed_time": "1:34:32", "remaining_time": "4:16:39"} +{"current_steps": 2336, "total_steps": 8674, "loss": 0.44487982988357544, "lr": 1.748646867842142e-06, "epoch": 0.5386211667050956, "percentage": 26.93, "elapsed_time": "1:34:34", "remaining_time": "4:16:37"} +{"current_steps": 2337, "total_steps": 8674, "loss": 0.5512663722038269, "lr": 1.7483940490586058e-06, "epoch": 0.5388517408346783, "percentage": 26.94, "elapsed_time": "1:34:37", "remaining_time": "4:16:34"} +{"current_steps": 2338, "total_steps": 8674, "loss": 0.461128294467926, "lr": 1.7481411214884098e-06, "epoch": 0.539082314964261, "percentage": 26.95, "elapsed_time": "1:34:39", "remaining_time": "4:16:32"} +{"current_steps": 2339, "total_steps": 8674, "loss": 0.47291088104248047, "lr": 1.7478880851683197e-06, "epoch": 0.5393128890938437, "percentage": 26.97, "elapsed_time": "1:34:42", "remaining_time": "4:16:29"} +{"current_steps": 2340, "total_steps": 8674, "loss": 0.5900166034698486, "lr": 1.747634940135117e-06, "epoch": 0.5395434632234263, "percentage": 26.98, "elapsed_time": "1:34:44", "remaining_time": "4:16:26"} +{"current_steps": 2341, "total_steps": 8674, "loss": 0.3878340721130371, "lr": 1.7473816864255983e-06, "epoch": 0.539774037353009, "percentage": 26.99, "elapsed_time": "1:34:46", "remaining_time": "4:16:24"} +{"current_steps": 2342, "total_steps": 8674, "loss": 0.5671564340591431, "lr": 1.7471283240765775e-06, "epoch": 0.5400046114825916, "percentage": 27.0, "elapsed_time": "1:34:49", "remaining_time": "4:16:21"} +{"current_steps": 2343, "total_steps": 8674, "loss": 0.5153918266296387, "lr": 1.7468748531248824e-06, "epoch": 0.5402351856121743, "percentage": 27.01, "elapsed_time": "1:34:51", "remaining_time": "4:16:19"} +{"current_steps": 2344, "total_steps": 8674, "loss": 0.49520084261894226, "lr": 1.7466212736073585e-06, "epoch": 0.5404657597417569, "percentage": 27.02, "elapsed_time": "1:34:54", "remaining_time": "4:16:17"} +{"current_steps": 2345, "total_steps": 8674, "loss": 0.4884970784187317, "lr": 1.7463675855608654e-06, "epoch": 0.5406963338713396, "percentage": 27.03, "elapsed_time": "1:34:56", "remaining_time": "4:16:14"} +{"current_steps": 2346, "total_steps": 8674, "loss": 0.5233277678489685, "lr": 1.7461137890222798e-06, "epoch": 0.5409269080009222, "percentage": 27.05, "elapsed_time": "1:34:58", "remaining_time": "4:16:12"} +{"current_steps": 2347, "total_steps": 8674, "loss": 0.44011372327804565, "lr": 1.7458598840284928e-06, "epoch": 0.541157482130505, "percentage": 27.06, "elapsed_time": "1:35:01", "remaining_time": "4:16:09"} +{"current_steps": 2348, "total_steps": 8674, "loss": 0.4833263158798218, "lr": 1.745605870616413e-06, "epoch": 0.5413880562600876, "percentage": 27.07, "elapsed_time": "1:35:03", "remaining_time": "4:16:07"} +{"current_steps": 2349, "total_steps": 8674, "loss": 0.4852379262447357, "lr": 1.7453517488229634e-06, "epoch": 0.5416186303896703, "percentage": 27.08, "elapsed_time": "1:35:06", "remaining_time": "4:16:04"} +{"current_steps": 2350, "total_steps": 8674, "loss": 0.4710320830345154, "lr": 1.7450975186850831e-06, "epoch": 0.5418492045192529, "percentage": 27.09, "elapsed_time": "1:35:08", "remaining_time": "4:16:01"} +{"current_steps": 2351, "total_steps": 8674, "loss": 0.5144790410995483, "lr": 1.744843180239727e-06, "epoch": 0.5420797786488356, "percentage": 27.1, "elapsed_time": "1:35:11", "remaining_time": "4:15:59"} +{"current_steps": 2352, "total_steps": 8674, "loss": 0.5815445184707642, "lr": 1.7445887335238663e-06, "epoch": 0.5423103527784182, "percentage": 27.12, "elapsed_time": "1:35:13", "remaining_time": "4:15:57"} +{"current_steps": 2353, "total_steps": 8674, "loss": 0.5101407170295715, "lr": 1.7443341785744864e-06, "epoch": 0.5425409269080009, "percentage": 27.13, "elapsed_time": "1:35:16", "remaining_time": "4:15:55"} +{"current_steps": 2354, "total_steps": 8674, "loss": 0.4584839940071106, "lr": 1.7440795154285905e-06, "epoch": 0.5427715010375835, "percentage": 27.14, "elapsed_time": "1:35:18", "remaining_time": "4:15:53"} +{"current_steps": 2355, "total_steps": 8674, "loss": 0.482247531414032, "lr": 1.743824744123196e-06, "epoch": 0.5430020751671663, "percentage": 27.15, "elapsed_time": "1:35:20", "remaining_time": "4:15:50"} +{"current_steps": 2356, "total_steps": 8674, "loss": 0.5503325462341309, "lr": 1.7435698646953364e-06, "epoch": 0.5432326492967489, "percentage": 27.16, "elapsed_time": "1:35:23", "remaining_time": "4:15:47"} +{"current_steps": 2357, "total_steps": 8674, "loss": 0.4803489148616791, "lr": 1.7433148771820612e-06, "epoch": 0.5434632234263316, "percentage": 27.17, "elapsed_time": "1:35:25", "remaining_time": "4:15:45"} +{"current_steps": 2358, "total_steps": 8674, "loss": 0.5388872027397156, "lr": 1.7430597816204351e-06, "epoch": 0.5436937975559142, "percentage": 27.18, "elapsed_time": "1:35:28", "remaining_time": "4:15:43"} +{"current_steps": 2359, "total_steps": 8674, "loss": 0.512636125087738, "lr": 1.742804578047539e-06, "epoch": 0.5439243716854969, "percentage": 27.2, "elapsed_time": "1:35:30", "remaining_time": "4:15:40"} +{"current_steps": 2360, "total_steps": 8674, "loss": 0.49154865741729736, "lr": 1.7425492665004699e-06, "epoch": 0.5441549458150795, "percentage": 27.21, "elapsed_time": "1:35:33", "remaining_time": "4:15:38"} +{"current_steps": 2361, "total_steps": 8674, "loss": 0.5185250639915466, "lr": 1.7422938470163389e-06, "epoch": 0.5443855199446622, "percentage": 27.22, "elapsed_time": "1:35:35", "remaining_time": "4:15:36"} +{"current_steps": 2362, "total_steps": 8674, "loss": 0.5474511384963989, "lr": 1.7420383196322747e-06, "epoch": 0.5446160940742448, "percentage": 27.23, "elapsed_time": "1:35:37", "remaining_time": "4:15:33"} +{"current_steps": 2363, "total_steps": 8674, "loss": 0.48212137818336487, "lr": 1.7417826843854202e-06, "epoch": 0.5448466682038275, "percentage": 27.24, "elapsed_time": "1:35:40", "remaining_time": "4:15:30"} +{"current_steps": 2364, "total_steps": 8674, "loss": 0.47983086109161377, "lr": 1.7415269413129348e-06, "epoch": 0.5450772423334102, "percentage": 27.25, "elapsed_time": "1:35:42", "remaining_time": "4:15:28"} +{"current_steps": 2365, "total_steps": 8674, "loss": 0.4935225546360016, "lr": 1.7412710904519932e-06, "epoch": 0.5453078164629929, "percentage": 27.27, "elapsed_time": "1:35:45", "remaining_time": "4:15:26"} +{"current_steps": 2366, "total_steps": 8674, "loss": 0.5167664289474487, "lr": 1.7410151318397862e-06, "epoch": 0.5455383905925755, "percentage": 27.28, "elapsed_time": "1:35:47", "remaining_time": "4:15:23"} +{"current_steps": 2367, "total_steps": 8674, "loss": 0.5116056799888611, "lr": 1.74075906551352e-06, "epoch": 0.5457689647221582, "percentage": 27.29, "elapsed_time": "1:35:49", "remaining_time": "4:15:21"} +{"current_steps": 2368, "total_steps": 8674, "loss": 0.4709595739841461, "lr": 1.7405028915104158e-06, "epoch": 0.5459995388517408, "percentage": 27.3, "elapsed_time": "1:35:52", "remaining_time": "4:15:18"} +{"current_steps": 2369, "total_steps": 8674, "loss": 0.3989061117172241, "lr": 1.7402466098677118e-06, "epoch": 0.5462301129813235, "percentage": 27.31, "elapsed_time": "1:35:54", "remaining_time": "4:15:16"} +{"current_steps": 2370, "total_steps": 8674, "loss": 0.45720764994621277, "lr": 1.739990220622661e-06, "epoch": 0.5464606871109061, "percentage": 27.32, "elapsed_time": "1:35:57", "remaining_time": "4:15:13"} +{"current_steps": 2371, "total_steps": 8674, "loss": 0.5865384936332703, "lr": 1.739733723812532e-06, "epoch": 0.5466912612404888, "percentage": 27.33, "elapsed_time": "1:35:59", "remaining_time": "4:15:11"} +{"current_steps": 2372, "total_steps": 8674, "loss": 0.4451501965522766, "lr": 1.7394771194746092e-06, "epoch": 0.5469218353700714, "percentage": 27.35, "elapsed_time": "1:36:02", "remaining_time": "4:15:08"} +{"current_steps": 2373, "total_steps": 8674, "loss": 0.4628486633300781, "lr": 1.7392204076461928e-06, "epoch": 0.5471524094996542, "percentage": 27.36, "elapsed_time": "1:36:04", "remaining_time": "4:15:06"} +{"current_steps": 2374, "total_steps": 8674, "loss": 0.4797760248184204, "lr": 1.7389635883645984e-06, "epoch": 0.5473829836292368, "percentage": 27.37, "elapsed_time": "1:36:06", "remaining_time": "4:15:03"} +{"current_steps": 2375, "total_steps": 8674, "loss": 0.4716770648956299, "lr": 1.7387066616671571e-06, "epoch": 0.5476135577588195, "percentage": 27.38, "elapsed_time": "1:36:09", "remaining_time": "4:15:01"} +{"current_steps": 2376, "total_steps": 8674, "loss": 0.504901647567749, "lr": 1.738449627591216e-06, "epoch": 0.5478441318884021, "percentage": 27.39, "elapsed_time": "1:36:11", "remaining_time": "4:14:58"} +{"current_steps": 2377, "total_steps": 8674, "loss": 0.5248615145683289, "lr": 1.7381924861741375e-06, "epoch": 0.5480747060179848, "percentage": 27.4, "elapsed_time": "1:36:14", "remaining_time": "4:14:56"} +{"current_steps": 2378, "total_steps": 8674, "loss": 0.41704076528549194, "lr": 1.7379352374532998e-06, "epoch": 0.5483052801475674, "percentage": 27.42, "elapsed_time": "1:36:16", "remaining_time": "4:14:54"} +{"current_steps": 2379, "total_steps": 8674, "loss": 0.42278197407722473, "lr": 1.7376778814660966e-06, "epoch": 0.5485358542771501, "percentage": 27.43, "elapsed_time": "1:36:19", "remaining_time": "4:14:51"} +{"current_steps": 2380, "total_steps": 8674, "loss": 0.4104729890823364, "lr": 1.7374204182499372e-06, "epoch": 0.5487664284067327, "percentage": 27.44, "elapsed_time": "1:36:21", "remaining_time": "4:14:49"} +{"current_steps": 2381, "total_steps": 8674, "loss": 0.5205684304237366, "lr": 1.7371628478422467e-06, "epoch": 0.5489970025363154, "percentage": 27.45, "elapsed_time": "1:36:23", "remaining_time": "4:14:46"} +{"current_steps": 2382, "total_steps": 8674, "loss": 0.4743306040763855, "lr": 1.7369051702804648e-06, "epoch": 0.549227576665898, "percentage": 27.46, "elapsed_time": "1:36:26", "remaining_time": "4:14:44"} +{"current_steps": 2383, "total_steps": 8674, "loss": 0.6324253678321838, "lr": 1.7366473856020486e-06, "epoch": 0.5494581507954808, "percentage": 27.47, "elapsed_time": "1:36:28", "remaining_time": "4:14:41"} +{"current_steps": 2384, "total_steps": 8674, "loss": 0.46466588973999023, "lr": 1.736389493844469e-06, "epoch": 0.5496887249250634, "percentage": 27.48, "elapsed_time": "1:36:30", "remaining_time": "4:14:39"} +{"current_steps": 2385, "total_steps": 8674, "loss": 0.4117918014526367, "lr": 1.7361314950452136e-06, "epoch": 0.5499192990546461, "percentage": 27.5, "elapsed_time": "1:36:33", "remaining_time": "4:14:36"} +{"current_steps": 2386, "total_steps": 8674, "loss": 0.40341615676879883, "lr": 1.7358733892417848e-06, "epoch": 0.5501498731842287, "percentage": 27.51, "elapsed_time": "1:36:35", "remaining_time": "4:14:34"} +{"current_steps": 2387, "total_steps": 8674, "loss": 0.642855167388916, "lr": 1.735615176471701e-06, "epoch": 0.5503804473138114, "percentage": 27.52, "elapsed_time": "1:36:38", "remaining_time": "4:14:32"} +{"current_steps": 2388, "total_steps": 8674, "loss": 0.5490958094596863, "lr": 1.7353568567724959e-06, "epoch": 0.550611021443394, "percentage": 27.53, "elapsed_time": "1:36:40", "remaining_time": "4:14:29"} +{"current_steps": 2389, "total_steps": 8674, "loss": 0.5154834985733032, "lr": 1.7350984301817192e-06, "epoch": 0.5508415955729767, "percentage": 27.54, "elapsed_time": "1:36:43", "remaining_time": "4:14:26"} +{"current_steps": 2390, "total_steps": 8674, "loss": 0.49488651752471924, "lr": 1.7348398967369358e-06, "epoch": 0.5510721697025593, "percentage": 27.55, "elapsed_time": "1:36:45", "remaining_time": "4:14:24"} +{"current_steps": 2391, "total_steps": 8674, "loss": 0.4211215674877167, "lr": 1.7345812564757257e-06, "epoch": 0.5513027438321421, "percentage": 27.57, "elapsed_time": "1:36:47", "remaining_time": "4:14:21"} +{"current_steps": 2392, "total_steps": 8674, "loss": 0.41840964555740356, "lr": 1.7343225094356855e-06, "epoch": 0.5515333179617247, "percentage": 27.58, "elapsed_time": "1:36:50", "remaining_time": "4:14:19"} +{"current_steps": 2393, "total_steps": 8674, "loss": 0.540780782699585, "lr": 1.7340636556544264e-06, "epoch": 0.5517638920913074, "percentage": 27.59, "elapsed_time": "1:36:52", "remaining_time": "4:14:16"} +{"current_steps": 2394, "total_steps": 8674, "loss": 0.4967775046825409, "lr": 1.7338046951695754e-06, "epoch": 0.55199446622089, "percentage": 27.6, "elapsed_time": "1:36:55", "remaining_time": "4:14:15"} +{"current_steps": 2395, "total_steps": 8674, "loss": 0.5155577659606934, "lr": 1.733545628018775e-06, "epoch": 0.5522250403504727, "percentage": 27.61, "elapsed_time": "1:36:57", "remaining_time": "4:14:12"} +{"current_steps": 2396, "total_steps": 8674, "loss": 0.5106005072593689, "lr": 1.7332864542396832e-06, "epoch": 0.5524556144800553, "percentage": 27.62, "elapsed_time": "1:37:00", "remaining_time": "4:14:10"} +{"current_steps": 2397, "total_steps": 8674, "loss": 0.3459712862968445, "lr": 1.7330271738699737e-06, "epoch": 0.552686188609638, "percentage": 27.63, "elapsed_time": "1:37:02", "remaining_time": "4:14:07"} +{"current_steps": 2398, "total_steps": 8674, "loss": 0.4877927303314209, "lr": 1.7327677869473356e-06, "epoch": 0.5529167627392206, "percentage": 27.65, "elapsed_time": "1:37:05", "remaining_time": "4:14:05"} +{"current_steps": 2399, "total_steps": 8674, "loss": 0.5183857679367065, "lr": 1.7325082935094732e-06, "epoch": 0.5531473368688034, "percentage": 27.66, "elapsed_time": "1:37:07", "remaining_time": "4:14:02"} +{"current_steps": 2400, "total_steps": 8674, "loss": 0.4326491057872772, "lr": 1.7322486935941068e-06, "epoch": 0.553377910998386, "percentage": 27.67, "elapsed_time": "1:37:09", "remaining_time": "4:14:00"} +{"current_steps": 2401, "total_steps": 8674, "loss": 0.4688712954521179, "lr": 1.7319889872389716e-06, "epoch": 0.5536084851279687, "percentage": 27.68, "elapsed_time": "1:37:13", "remaining_time": "4:14:01"} +{"current_steps": 2402, "total_steps": 8674, "loss": 0.4997788071632385, "lr": 1.7317291744818184e-06, "epoch": 0.5538390592575513, "percentage": 27.69, "elapsed_time": "1:37:16", "remaining_time": "4:14:00"} +{"current_steps": 2403, "total_steps": 8674, "loss": 0.5271172523498535, "lr": 1.731469255360414e-06, "epoch": 0.554069633387134, "percentage": 27.7, "elapsed_time": "1:37:18", "remaining_time": "4:13:57"} +{"current_steps": 2404, "total_steps": 8674, "loss": 0.5339269042015076, "lr": 1.73120922991254e-06, "epoch": 0.5543002075167166, "percentage": 27.72, "elapsed_time": "1:37:21", "remaining_time": "4:13:55"} +{"current_steps": 2405, "total_steps": 8674, "loss": 0.47052568197250366, "lr": 1.7309490981759938e-06, "epoch": 0.5545307816462993, "percentage": 27.73, "elapsed_time": "1:37:23", "remaining_time": "4:13:52"} +{"current_steps": 2406, "total_steps": 8674, "loss": 0.4112280309200287, "lr": 1.7306888601885885e-06, "epoch": 0.5547613557758819, "percentage": 27.74, "elapsed_time": "1:37:26", "remaining_time": "4:13:49"} +{"current_steps": 2407, "total_steps": 8674, "loss": 0.5473710298538208, "lr": 1.730428515988152e-06, "epoch": 0.5549919299054646, "percentage": 27.75, "elapsed_time": "1:37:28", "remaining_time": "4:13:47"} +{"current_steps": 2408, "total_steps": 8674, "loss": 0.5079115629196167, "lr": 1.7301680656125277e-06, "epoch": 0.5552225040350472, "percentage": 27.76, "elapsed_time": "1:37:30", "remaining_time": "4:13:44"} +{"current_steps": 2409, "total_steps": 8674, "loss": 0.4805012345314026, "lr": 1.7299075090995755e-06, "epoch": 0.55545307816463, "percentage": 27.77, "elapsed_time": "1:37:33", "remaining_time": "4:13:42"} +{"current_steps": 2410, "total_steps": 8674, "loss": 0.4657474756240845, "lr": 1.729646846487169e-06, "epoch": 0.5556836522942126, "percentage": 27.78, "elapsed_time": "1:37:35", "remaining_time": "4:13:39"} +{"current_steps": 2411, "total_steps": 8674, "loss": 0.5887978076934814, "lr": 1.729386077813199e-06, "epoch": 0.5559142264237953, "percentage": 27.8, "elapsed_time": "1:37:38", "remaining_time": "4:13:37"} +{"current_steps": 2412, "total_steps": 8674, "loss": 0.43841421604156494, "lr": 1.7291252031155704e-06, "epoch": 0.5561448005533779, "percentage": 27.81, "elapsed_time": "1:37:40", "remaining_time": "4:13:35"} +{"current_steps": 2413, "total_steps": 8674, "loss": 0.5026551485061646, "lr": 1.728864222432204e-06, "epoch": 0.5563753746829606, "percentage": 27.82, "elapsed_time": "1:37:43", "remaining_time": "4:13:32"} +{"current_steps": 2414, "total_steps": 8674, "loss": 0.4525277614593506, "lr": 1.728603135801036e-06, "epoch": 0.5566059488125432, "percentage": 27.83, "elapsed_time": "1:37:45", "remaining_time": "4:13:30"} +{"current_steps": 2415, "total_steps": 8674, "loss": 0.4095644950866699, "lr": 1.7283419432600182e-06, "epoch": 0.5568365229421259, "percentage": 27.84, "elapsed_time": "1:37:47", "remaining_time": "4:13:27"} +{"current_steps": 2416, "total_steps": 8674, "loss": 0.5098834037780762, "lr": 1.7280806448471173e-06, "epoch": 0.5570670970717085, "percentage": 27.85, "elapsed_time": "1:37:50", "remaining_time": "4:13:25"} +{"current_steps": 2417, "total_steps": 8674, "loss": 0.42802777886390686, "lr": 1.7278192406003159e-06, "epoch": 0.5572976712012913, "percentage": 27.86, "elapsed_time": "1:37:52", "remaining_time": "4:13:22"} +{"current_steps": 2418, "total_steps": 8674, "loss": 0.5036378502845764, "lr": 1.7275577305576113e-06, "epoch": 0.5575282453308739, "percentage": 27.88, "elapsed_time": "1:37:55", "remaining_time": "4:13:20"} +{"current_steps": 2419, "total_steps": 8674, "loss": 0.5324885249137878, "lr": 1.7272961147570175e-06, "epoch": 0.5577588194604566, "percentage": 27.89, "elapsed_time": "1:37:57", "remaining_time": "4:13:18"} +{"current_steps": 2420, "total_steps": 8674, "loss": 0.5763842463493347, "lr": 1.727034393236562e-06, "epoch": 0.5579893935900392, "percentage": 27.9, "elapsed_time": "1:38:00", "remaining_time": "4:13:16"} +{"current_steps": 2421, "total_steps": 8674, "loss": 0.49291908740997314, "lr": 1.7267725660342895e-06, "epoch": 0.5582199677196219, "percentage": 27.91, "elapsed_time": "1:38:02", "remaining_time": "4:13:13"} +{"current_steps": 2422, "total_steps": 8674, "loss": 0.5114868879318237, "lr": 1.7265106331882588e-06, "epoch": 0.5584505418492045, "percentage": 27.92, "elapsed_time": "1:38:04", "remaining_time": "4:13:11"} +{"current_steps": 2423, "total_steps": 8674, "loss": 0.42442530393600464, "lr": 1.7262485947365449e-06, "epoch": 0.5586811159787872, "percentage": 27.93, "elapsed_time": "1:38:07", "remaining_time": "4:13:08"} +{"current_steps": 2424, "total_steps": 8674, "loss": 0.3680551052093506, "lr": 1.725986450717237e-06, "epoch": 0.5589116901083698, "percentage": 27.95, "elapsed_time": "1:38:09", "remaining_time": "4:13:06"} +{"current_steps": 2425, "total_steps": 8674, "loss": 0.5849576592445374, "lr": 1.725724201168441e-06, "epoch": 0.5591422642379525, "percentage": 27.96, "elapsed_time": "1:38:12", "remaining_time": "4:13:03"} +{"current_steps": 2426, "total_steps": 8674, "loss": 0.48919233679771423, "lr": 1.7254618461282773e-06, "epoch": 0.5593728383675352, "percentage": 27.97, "elapsed_time": "1:38:14", "remaining_time": "4:13:00"} +{"current_steps": 2427, "total_steps": 8674, "loss": 0.4857720732688904, "lr": 1.7251993856348821e-06, "epoch": 0.5596034124971179, "percentage": 27.98, "elapsed_time": "1:38:17", "remaining_time": "4:12:58"} +{"current_steps": 2428, "total_steps": 8674, "loss": 0.5106808543205261, "lr": 1.7249368197264062e-06, "epoch": 0.5598339866267005, "percentage": 27.99, "elapsed_time": "1:38:19", "remaining_time": "4:12:56"} +{"current_steps": 2429, "total_steps": 8674, "loss": 0.500100314617157, "lr": 1.724674148441017e-06, "epoch": 0.5600645607562832, "percentage": 28.0, "elapsed_time": "1:38:21", "remaining_time": "4:12:53"} +{"current_steps": 2430, "total_steps": 8674, "loss": 0.5389110445976257, "lr": 1.7244113718168957e-06, "epoch": 0.5602951348858658, "percentage": 28.01, "elapsed_time": "1:38:24", "remaining_time": "4:12:51"} +{"current_steps": 2431, "total_steps": 8674, "loss": 0.42860496044158936, "lr": 1.72414848989224e-06, "epoch": 0.5605257090154485, "percentage": 28.03, "elapsed_time": "1:38:26", "remaining_time": "4:12:48"} +{"current_steps": 2432, "total_steps": 8674, "loss": 0.4867728352546692, "lr": 1.723885502705262e-06, "epoch": 0.5607562831450311, "percentage": 28.04, "elapsed_time": "1:38:28", "remaining_time": "4:12:46"} +{"current_steps": 2433, "total_steps": 8674, "loss": 0.49194633960723877, "lr": 1.7236224102941899e-06, "epoch": 0.5609868572746138, "percentage": 28.05, "elapsed_time": "1:38:31", "remaining_time": "4:12:43"} +{"current_steps": 2434, "total_steps": 8674, "loss": 0.5194358229637146, "lr": 1.7233592126972667e-06, "epoch": 0.5612174314041964, "percentage": 28.06, "elapsed_time": "1:38:33", "remaining_time": "4:12:40"} +{"current_steps": 2435, "total_steps": 8674, "loss": 0.4738645553588867, "lr": 1.723095909952751e-06, "epoch": 0.5614480055337792, "percentage": 28.07, "elapsed_time": "1:38:36", "remaining_time": "4:12:39"} +{"current_steps": 2436, "total_steps": 8674, "loss": 0.48232927918434143, "lr": 1.7228325020989165e-06, "epoch": 0.5616785796633618, "percentage": 28.08, "elapsed_time": "1:38:38", "remaining_time": "4:12:36"} +{"current_steps": 2437, "total_steps": 8674, "loss": 0.5192145109176636, "lr": 1.7225689891740522e-06, "epoch": 0.5619091537929445, "percentage": 28.1, "elapsed_time": "1:38:41", "remaining_time": "4:12:34"} +{"current_steps": 2438, "total_steps": 8674, "loss": 0.4934930205345154, "lr": 1.7223053712164621e-06, "epoch": 0.5621397279225271, "percentage": 28.11, "elapsed_time": "1:38:43", "remaining_time": "4:12:31"} +{"current_steps": 2439, "total_steps": 8674, "loss": 0.5022200345993042, "lr": 1.722041648264466e-06, "epoch": 0.5623703020521098, "percentage": 28.12, "elapsed_time": "1:38:45", "remaining_time": "4:12:28"} +{"current_steps": 2440, "total_steps": 8674, "loss": 0.45300528407096863, "lr": 1.7217778203563986e-06, "epoch": 0.5626008761816924, "percentage": 28.13, "elapsed_time": "1:38:48", "remaining_time": "4:12:26"} +{"current_steps": 2441, "total_steps": 8674, "loss": 0.4965200126171112, "lr": 1.7215138875306103e-06, "epoch": 0.5628314503112751, "percentage": 28.14, "elapsed_time": "1:38:50", "remaining_time": "4:12:23"} +{"current_steps": 2442, "total_steps": 8674, "loss": 0.4618280231952667, "lr": 1.721249849825466e-06, "epoch": 0.5630620244408577, "percentage": 28.15, "elapsed_time": "1:38:53", "remaining_time": "4:12:21"} +{"current_steps": 2443, "total_steps": 8674, "loss": 0.42270147800445557, "lr": 1.7209857072793464e-06, "epoch": 0.5632925985704405, "percentage": 28.16, "elapsed_time": "1:38:55", "remaining_time": "4:12:18"} +{"current_steps": 2444, "total_steps": 8674, "loss": 0.5200725793838501, "lr": 1.720721459930647e-06, "epoch": 0.5635231727000231, "percentage": 28.18, "elapsed_time": "1:38:58", "remaining_time": "4:12:16"} +{"current_steps": 2445, "total_steps": 8674, "loss": 0.47475337982177734, "lr": 1.7204571078177792e-06, "epoch": 0.5637537468296058, "percentage": 28.19, "elapsed_time": "1:39:00", "remaining_time": "4:12:14"} +{"current_steps": 2446, "total_steps": 8674, "loss": 0.5493113994598389, "lr": 1.7201926509791693e-06, "epoch": 0.5639843209591884, "percentage": 28.2, "elapsed_time": "1:39:02", "remaining_time": "4:12:11"} +{"current_steps": 2447, "total_steps": 8674, "loss": 0.4743562340736389, "lr": 1.719928089453259e-06, "epoch": 0.564214895088771, "percentage": 28.21, "elapsed_time": "1:39:05", "remaining_time": "4:12:09"} +{"current_steps": 2448, "total_steps": 8674, "loss": 0.5145455598831177, "lr": 1.7196634232785038e-06, "epoch": 0.5644454692183537, "percentage": 28.22, "elapsed_time": "1:39:07", "remaining_time": "4:12:06"} +{"current_steps": 2449, "total_steps": 8674, "loss": 0.45072540640830994, "lr": 1.719398652493377e-06, "epoch": 0.5646760433479363, "percentage": 28.23, "elapsed_time": "1:39:10", "remaining_time": "4:12:04"} +{"current_steps": 2450, "total_steps": 8674, "loss": 0.5150895714759827, "lr": 1.7191337771363651e-06, "epoch": 0.564906617477519, "percentage": 28.25, "elapsed_time": "1:39:12", "remaining_time": "4:12:01"} +{"current_steps": 2451, "total_steps": 8674, "loss": 0.5025302171707153, "lr": 1.7188687972459705e-06, "epoch": 0.5651371916071016, "percentage": 28.26, "elapsed_time": "1:39:14", "remaining_time": "4:11:59"} +{"current_steps": 2452, "total_steps": 8674, "loss": 0.618930459022522, "lr": 1.7186037128607107e-06, "epoch": 0.5653677657366843, "percentage": 28.27, "elapsed_time": "1:39:17", "remaining_time": "4:11:57"} +{"current_steps": 2453, "total_steps": 8674, "loss": 0.5841591358184814, "lr": 1.7183385240191183e-06, "epoch": 0.565598339866267, "percentage": 28.28, "elapsed_time": "1:39:19", "remaining_time": "4:11:54"} +{"current_steps": 2454, "total_steps": 8674, "loss": 0.4915233850479126, "lr": 1.7180732307597413e-06, "epoch": 0.5658289139958497, "percentage": 28.29, "elapsed_time": "1:39:22", "remaining_time": "4:11:52"} +{"current_steps": 2455, "total_steps": 8674, "loss": 0.46732476353645325, "lr": 1.7178078331211429e-06, "epoch": 0.5660594881254323, "percentage": 28.3, "elapsed_time": "1:39:24", "remaining_time": "4:11:49"} +{"current_steps": 2456, "total_steps": 8674, "loss": 0.4640737771987915, "lr": 1.7175423311419013e-06, "epoch": 0.566290062255015, "percentage": 28.31, "elapsed_time": "1:39:27", "remaining_time": "4:11:47"} +{"current_steps": 2457, "total_steps": 8674, "loss": 0.39535683393478394, "lr": 1.7172767248606095e-06, "epoch": 0.5665206363845976, "percentage": 28.33, "elapsed_time": "1:39:29", "remaining_time": "4:11:44"} +{"current_steps": 2458, "total_steps": 8674, "loss": 0.4782179594039917, "lr": 1.7170110143158766e-06, "epoch": 0.5667512105141803, "percentage": 28.34, "elapsed_time": "1:39:31", "remaining_time": "4:11:42"} +{"current_steps": 2459, "total_steps": 8674, "loss": 0.6186003684997559, "lr": 1.7167451995463258e-06, "epoch": 0.5669817846437629, "percentage": 28.35, "elapsed_time": "1:39:34", "remaining_time": "4:11:39"} +{"current_steps": 2460, "total_steps": 8674, "loss": 0.4915347099304199, "lr": 1.7164792805905965e-06, "epoch": 0.5672123587733456, "percentage": 28.36, "elapsed_time": "1:39:36", "remaining_time": "4:11:37"} +{"current_steps": 2461, "total_steps": 8674, "loss": 0.4789005517959595, "lr": 1.7162132574873422e-06, "epoch": 0.5674429329029282, "percentage": 28.37, "elapsed_time": "1:39:39", "remaining_time": "4:11:35"} +{"current_steps": 2462, "total_steps": 8674, "loss": 0.6307233572006226, "lr": 1.7159471302752326e-06, "epoch": 0.567673507032511, "percentage": 28.38, "elapsed_time": "1:39:41", "remaining_time": "4:11:32"} +{"current_steps": 2463, "total_steps": 8674, "loss": 0.5278424024581909, "lr": 1.7156808989929514e-06, "epoch": 0.5679040811620936, "percentage": 28.4, "elapsed_time": "1:39:44", "remaining_time": "4:11:30"} +{"current_steps": 2464, "total_steps": 8674, "loss": 0.48552995920181274, "lr": 1.7154145636791988e-06, "epoch": 0.5681346552916763, "percentage": 28.41, "elapsed_time": "1:39:46", "remaining_time": "4:11:27"} +{"current_steps": 2465, "total_steps": 8674, "loss": 0.5125370621681213, "lr": 1.7151481243726885e-06, "epoch": 0.5683652294212589, "percentage": 28.42, "elapsed_time": "1:39:48", "remaining_time": "4:11:25"} +{"current_steps": 2466, "total_steps": 8674, "loss": 0.44231730699539185, "lr": 1.7148815811121506e-06, "epoch": 0.5685958035508416, "percentage": 28.43, "elapsed_time": "1:39:51", "remaining_time": "4:11:22"} +{"current_steps": 2467, "total_steps": 8674, "loss": 0.5593529939651489, "lr": 1.7146149339363296e-06, "epoch": 0.5688263776804242, "percentage": 28.44, "elapsed_time": "1:39:53", "remaining_time": "4:11:20"} +{"current_steps": 2468, "total_steps": 8674, "loss": 0.4830925464630127, "lr": 1.714348182883986e-06, "epoch": 0.5690569518100069, "percentage": 28.45, "elapsed_time": "1:39:56", "remaining_time": "4:11:17"} +{"current_steps": 2469, "total_steps": 8674, "loss": 0.5538743734359741, "lr": 1.714081327993894e-06, "epoch": 0.5692875259395895, "percentage": 28.46, "elapsed_time": "1:39:58", "remaining_time": "4:11:15"} +{"current_steps": 2470, "total_steps": 8674, "loss": 0.5145905613899231, "lr": 1.7138143693048441e-06, "epoch": 0.5695181000691723, "percentage": 28.48, "elapsed_time": "1:40:01", "remaining_time": "4:11:13"} +{"current_steps": 2471, "total_steps": 8674, "loss": 0.47706612944602966, "lr": 1.713547306855641e-06, "epoch": 0.5697486741987549, "percentage": 28.49, "elapsed_time": "1:40:03", "remaining_time": "4:11:10"} +{"current_steps": 2472, "total_steps": 8674, "loss": 0.45162689685821533, "lr": 1.7132801406851056e-06, "epoch": 0.5699792483283376, "percentage": 28.5, "elapsed_time": "1:40:06", "remaining_time": "4:11:08"} +{"current_steps": 2473, "total_steps": 8674, "loss": 0.5141111612319946, "lr": 1.7130128708320727e-06, "epoch": 0.5702098224579202, "percentage": 28.51, "elapsed_time": "1:40:08", "remaining_time": "4:11:05"} +{"current_steps": 2474, "total_steps": 8674, "loss": 0.4443173408508301, "lr": 1.7127454973353932e-06, "epoch": 0.5704403965875029, "percentage": 28.52, "elapsed_time": "1:40:10", "remaining_time": "4:11:03"} +{"current_steps": 2475, "total_steps": 8674, "loss": 0.4162046015262604, "lr": 1.7124780202339317e-06, "epoch": 0.5706709707170855, "percentage": 28.53, "elapsed_time": "1:40:13", "remaining_time": "4:11:01"} +{"current_steps": 2476, "total_steps": 8674, "loss": 0.44526439905166626, "lr": 1.7122104395665695e-06, "epoch": 0.5709015448466682, "percentage": 28.55, "elapsed_time": "1:40:15", "remaining_time": "4:10:58"} +{"current_steps": 2477, "total_steps": 8674, "loss": 0.5069452524185181, "lr": 1.7119427553722016e-06, "epoch": 0.5711321189762508, "percentage": 28.56, "elapsed_time": "1:40:18", "remaining_time": "4:10:56"} +{"current_steps": 2478, "total_steps": 8674, "loss": 0.46709829568862915, "lr": 1.7116749676897393e-06, "epoch": 0.5713626931058335, "percentage": 28.57, "elapsed_time": "1:40:20", "remaining_time": "4:10:54"} +{"current_steps": 2479, "total_steps": 8674, "loss": 0.5443992614746094, "lr": 1.7114070765581078e-06, "epoch": 0.5715932672354161, "percentage": 28.58, "elapsed_time": "1:40:23", "remaining_time": "4:10:51"} +{"current_steps": 2480, "total_steps": 8674, "loss": 0.4307284653186798, "lr": 1.7111390820162477e-06, "epoch": 0.5718238413649989, "percentage": 28.59, "elapsed_time": "1:40:25", "remaining_time": "4:10:49"} +{"current_steps": 2481, "total_steps": 8674, "loss": 0.4753509759902954, "lr": 1.7108709841031148e-06, "epoch": 0.5720544154945815, "percentage": 28.6, "elapsed_time": "1:40:27", "remaining_time": "4:10:46"} +{"current_steps": 2482, "total_steps": 8674, "loss": 0.5689436197280884, "lr": 1.7106027828576798e-06, "epoch": 0.5722849896241642, "percentage": 28.61, "elapsed_time": "1:40:30", "remaining_time": "4:10:44"} +{"current_steps": 2483, "total_steps": 8674, "loss": 0.47182410955429077, "lr": 1.710334478318929e-06, "epoch": 0.5725155637537468, "percentage": 28.63, "elapsed_time": "1:40:32", "remaining_time": "4:10:41"} +{"current_steps": 2484, "total_steps": 8674, "loss": 0.4418888986110687, "lr": 1.7100660705258623e-06, "epoch": 0.5727461378833295, "percentage": 28.64, "elapsed_time": "1:40:35", "remaining_time": "4:10:40"} +{"current_steps": 2485, "total_steps": 8674, "loss": 0.4315544366836548, "lr": 1.709797559517496e-06, "epoch": 0.5729767120129121, "percentage": 28.65, "elapsed_time": "1:40:37", "remaining_time": "4:10:37"} +{"current_steps": 2486, "total_steps": 8674, "loss": 0.34541741013526917, "lr": 1.709528945332861e-06, "epoch": 0.5732072861424948, "percentage": 28.66, "elapsed_time": "1:40:40", "remaining_time": "4:10:35"} +{"current_steps": 2487, "total_steps": 8674, "loss": 0.5380317568778992, "lr": 1.709260228011003e-06, "epoch": 0.5734378602720774, "percentage": 28.67, "elapsed_time": "1:40:42", "remaining_time": "4:10:33"} +{"current_steps": 2488, "total_steps": 8674, "loss": 0.5017478466033936, "lr": 1.7089914075909824e-06, "epoch": 0.5736684344016602, "percentage": 28.68, "elapsed_time": "1:40:45", "remaining_time": "4:10:30"} +{"current_steps": 2489, "total_steps": 8674, "loss": 0.5608090162277222, "lr": 1.7087224841118756e-06, "epoch": 0.5738990085312428, "percentage": 28.69, "elapsed_time": "1:40:47", "remaining_time": "4:10:27"} +{"current_steps": 2490, "total_steps": 8674, "loss": 0.5360782146453857, "lr": 1.708453457612773e-06, "epoch": 0.5741295826608255, "percentage": 28.71, "elapsed_time": "1:40:50", "remaining_time": "4:10:25"} +{"current_steps": 2491, "total_steps": 8674, "loss": 0.5638090372085571, "lr": 1.7081843281327802e-06, "epoch": 0.5743601567904081, "percentage": 28.72, "elapsed_time": "1:40:52", "remaining_time": "4:10:22"} +{"current_steps": 2492, "total_steps": 8674, "loss": 0.45777082443237305, "lr": 1.707915095711018e-06, "epoch": 0.5745907309199908, "percentage": 28.73, "elapsed_time": "1:40:54", "remaining_time": "4:10:20"} +{"current_steps": 2493, "total_steps": 8674, "loss": 0.5423707962036133, "lr": 1.7076457603866224e-06, "epoch": 0.5748213050495734, "percentage": 28.74, "elapsed_time": "1:40:57", "remaining_time": "4:10:17"} +{"current_steps": 2494, "total_steps": 8674, "loss": 0.4286508560180664, "lr": 1.7073763221987436e-06, "epoch": 0.5750518791791561, "percentage": 28.75, "elapsed_time": "1:40:59", "remaining_time": "4:10:15"} +{"current_steps": 2495, "total_steps": 8674, "loss": 0.4197548031806946, "lr": 1.7071067811865474e-06, "epoch": 0.5752824533087387, "percentage": 28.76, "elapsed_time": "1:41:02", "remaining_time": "4:10:13"} +{"current_steps": 2496, "total_steps": 8674, "loss": 0.47944843769073486, "lr": 1.7068371373892142e-06, "epoch": 0.5755130274383214, "percentage": 28.78, "elapsed_time": "1:41:04", "remaining_time": "4:10:10"} +{"current_steps": 2497, "total_steps": 8674, "loss": 0.49708908796310425, "lr": 1.7065673908459396e-06, "epoch": 0.575743601567904, "percentage": 28.79, "elapsed_time": "1:41:06", "remaining_time": "4:10:07"} +{"current_steps": 2498, "total_steps": 8674, "loss": 0.46402662992477417, "lr": 1.706297541595934e-06, "epoch": 0.5759741756974868, "percentage": 28.8, "elapsed_time": "1:41:09", "remaining_time": "4:10:05"} +{"current_steps": 2499, "total_steps": 8674, "loss": 0.4665846824645996, "lr": 1.7060275896784222e-06, "epoch": 0.5762047498270694, "percentage": 28.81, "elapsed_time": "1:41:11", "remaining_time": "4:10:02"} +{"current_steps": 2500, "total_steps": 8674, "loss": 0.511766791343689, "lr": 1.7057575351326452e-06, "epoch": 0.5764353239566521, "percentage": 28.82, "elapsed_time": "1:41:13", "remaining_time": "4:10:00"} +{"current_steps": 2501, "total_steps": 8674, "loss": 0.5731323957443237, "lr": 1.7054873779978578e-06, "epoch": 0.5766658980862347, "percentage": 28.83, "elapsed_time": "1:41:17", "remaining_time": "4:10:01"} +{"current_steps": 2502, "total_steps": 8674, "loss": 0.43246185779571533, "lr": 1.70521711831333e-06, "epoch": 0.5768964722158174, "percentage": 28.84, "elapsed_time": "1:41:20", "remaining_time": "4:09:58"} +{"current_steps": 2503, "total_steps": 8674, "loss": 0.5062395334243774, "lr": 1.704946756118347e-06, "epoch": 0.5771270463454, "percentage": 28.86, "elapsed_time": "1:41:22", "remaining_time": "4:09:56"} +{"current_steps": 2504, "total_steps": 8674, "loss": 0.5010061264038086, "lr": 1.7046762914522087e-06, "epoch": 0.5773576204749827, "percentage": 28.87, "elapsed_time": "1:41:25", "remaining_time": "4:09:54"} +{"current_steps": 2505, "total_steps": 8674, "loss": 0.5118759870529175, "lr": 1.7044057243542293e-06, "epoch": 0.5775881946045653, "percentage": 28.88, "elapsed_time": "1:41:27", "remaining_time": "4:09:51"} +{"current_steps": 2506, "total_steps": 8674, "loss": 0.5796714425086975, "lr": 1.7041350548637392e-06, "epoch": 0.5778187687341481, "percentage": 28.89, "elapsed_time": "1:41:29", "remaining_time": "4:09:49"} +{"current_steps": 2507, "total_steps": 8674, "loss": 0.43587976694107056, "lr": 1.7038642830200828e-06, "epoch": 0.5780493428637307, "percentage": 28.9, "elapsed_time": "1:41:32", "remaining_time": "4:09:46"} +{"current_steps": 2508, "total_steps": 8674, "loss": 0.4780135154724121, "lr": 1.7035934088626193e-06, "epoch": 0.5782799169933134, "percentage": 28.91, "elapsed_time": "1:41:34", "remaining_time": "4:09:43"} +{"current_steps": 2509, "total_steps": 8674, "loss": 0.48039600253105164, "lr": 1.7033224324307232e-06, "epoch": 0.578510491122896, "percentage": 28.93, "elapsed_time": "1:41:37", "remaining_time": "4:09:41"} +{"current_steps": 2510, "total_steps": 8674, "loss": 0.48075419664382935, "lr": 1.7030513537637835e-06, "epoch": 0.5787410652524787, "percentage": 28.94, "elapsed_time": "1:41:39", "remaining_time": "4:09:38"} +{"current_steps": 2511, "total_steps": 8674, "loss": 0.5006246566772461, "lr": 1.7027801729012044e-06, "epoch": 0.5789716393820613, "percentage": 28.95, "elapsed_time": "1:41:42", "remaining_time": "4:09:36"} +{"current_steps": 2512, "total_steps": 8674, "loss": 0.550139307975769, "lr": 1.7025088898824046e-06, "epoch": 0.579202213511644, "percentage": 28.96, "elapsed_time": "1:41:44", "remaining_time": "4:09:34"} +{"current_steps": 2513, "total_steps": 8674, "loss": 0.5228495001792908, "lr": 1.7022375047468178e-06, "epoch": 0.5794327876412266, "percentage": 28.97, "elapsed_time": "1:41:46", "remaining_time": "4:09:31"} +{"current_steps": 2514, "total_steps": 8674, "loss": 0.4783739149570465, "lr": 1.701966017533893e-06, "epoch": 0.5796633617708093, "percentage": 28.98, "elapsed_time": "1:41:49", "remaining_time": "4:09:29"} +{"current_steps": 2515, "total_steps": 8674, "loss": 0.47218769788742065, "lr": 1.701694428283093e-06, "epoch": 0.579893935900392, "percentage": 28.99, "elapsed_time": "1:41:51", "remaining_time": "4:09:26"} +{"current_steps": 2516, "total_steps": 8674, "loss": 0.5340671539306641, "lr": 1.7014227370338967e-06, "epoch": 0.5801245100299747, "percentage": 29.01, "elapsed_time": "1:41:53", "remaining_time": "4:09:23"} +{"current_steps": 2517, "total_steps": 8674, "loss": 0.4629259407520294, "lr": 1.7011509438257967e-06, "epoch": 0.5803550841595573, "percentage": 29.02, "elapsed_time": "1:41:56", "remaining_time": "4:09:21"} +{"current_steps": 2518, "total_steps": 8674, "loss": 0.6334242820739746, "lr": 1.7008790486983013e-06, "epoch": 0.58058565828914, "percentage": 29.03, "elapsed_time": "1:41:58", "remaining_time": "4:09:18"} +{"current_steps": 2519, "total_steps": 8674, "loss": 0.544147789478302, "lr": 1.7006070516909327e-06, "epoch": 0.5808162324187226, "percentage": 29.04, "elapsed_time": "1:42:01", "remaining_time": "4:09:16"} +{"current_steps": 2520, "total_steps": 8674, "loss": 0.47045618295669556, "lr": 1.700334952843229e-06, "epoch": 0.5810468065483053, "percentage": 29.05, "elapsed_time": "1:42:03", "remaining_time": "4:09:13"} +{"current_steps": 2521, "total_steps": 8674, "loss": 0.4582393169403076, "lr": 1.700062752194742e-06, "epoch": 0.5812773806778879, "percentage": 29.06, "elapsed_time": "1:42:05", "remaining_time": "4:09:11"} +{"current_steps": 2522, "total_steps": 8674, "loss": 0.507327139377594, "lr": 1.699790449785039e-06, "epoch": 0.5815079548074706, "percentage": 29.08, "elapsed_time": "1:42:08", "remaining_time": "4:09:08"} +{"current_steps": 2523, "total_steps": 8674, "loss": 0.5345891714096069, "lr": 1.6995180456537022e-06, "epoch": 0.5817385289370532, "percentage": 29.09, "elapsed_time": "1:42:10", "remaining_time": "4:09:05"} +{"current_steps": 2524, "total_steps": 8674, "loss": 0.4847550094127655, "lr": 1.6992455398403277e-06, "epoch": 0.581969103066636, "percentage": 29.1, "elapsed_time": "1:42:12", "remaining_time": "4:09:03"} +{"current_steps": 2525, "total_steps": 8674, "loss": 0.4472479820251465, "lr": 1.6989729323845276e-06, "epoch": 0.5821996771962186, "percentage": 29.11, "elapsed_time": "1:42:15", "remaining_time": "4:09:00"} +{"current_steps": 2526, "total_steps": 8674, "loss": 0.4426107108592987, "lr": 1.698700223325928e-06, "epoch": 0.5824302513258013, "percentage": 29.12, "elapsed_time": "1:42:17", "remaining_time": "4:08:58"} +{"current_steps": 2527, "total_steps": 8674, "loss": 0.4814276099205017, "lr": 1.6984274127041696e-06, "epoch": 0.5826608254553839, "percentage": 29.13, "elapsed_time": "1:42:19", "remaining_time": "4:08:55"} +{"current_steps": 2528, "total_steps": 8674, "loss": 0.5286451578140259, "lr": 1.6981545005589084e-06, "epoch": 0.5828913995849666, "percentage": 29.14, "elapsed_time": "1:42:22", "remaining_time": "4:08:53"} +{"current_steps": 2529, "total_steps": 8674, "loss": 0.5291767120361328, "lr": 1.6978814869298152e-06, "epoch": 0.5831219737145492, "percentage": 29.16, "elapsed_time": "1:42:25", "remaining_time": "4:08:51"} +{"current_steps": 2530, "total_steps": 8674, "loss": 0.5807399749755859, "lr": 1.6976083718565748e-06, "epoch": 0.5833525478441319, "percentage": 29.17, "elapsed_time": "1:42:27", "remaining_time": "4:08:48"} +{"current_steps": 2531, "total_steps": 8674, "loss": 0.5489222407341003, "lr": 1.6973351553788878e-06, "epoch": 0.5835831219737145, "percentage": 29.18, "elapsed_time": "1:42:29", "remaining_time": "4:08:45"} +{"current_steps": 2532, "total_steps": 8674, "loss": 0.5295521020889282, "lr": 1.6970618375364683e-06, "epoch": 0.5838136961032973, "percentage": 29.19, "elapsed_time": "1:42:32", "remaining_time": "4:08:43"} +{"current_steps": 2533, "total_steps": 8674, "loss": 0.4979495406150818, "lr": 1.6967884183690467e-06, "epoch": 0.5840442702328799, "percentage": 29.2, "elapsed_time": "1:42:34", "remaining_time": "4:08:40"} +{"current_steps": 2534, "total_steps": 8674, "loss": 0.45667344331741333, "lr": 1.6965148979163661e-06, "epoch": 0.5842748443624626, "percentage": 29.21, "elapsed_time": "1:42:36", "remaining_time": "4:08:38"} +{"current_steps": 2535, "total_steps": 8674, "loss": 0.42687737941741943, "lr": 1.6962412762181866e-06, "epoch": 0.5845054184920452, "percentage": 29.23, "elapsed_time": "1:42:39", "remaining_time": "4:08:35"} +{"current_steps": 2536, "total_steps": 8674, "loss": 0.5616278648376465, "lr": 1.6959675533142815e-06, "epoch": 0.5847359926216279, "percentage": 29.24, "elapsed_time": "1:42:41", "remaining_time": "4:08:33"} +{"current_steps": 2537, "total_steps": 8674, "loss": 0.4961121678352356, "lr": 1.6956937292444386e-06, "epoch": 0.5849665667512105, "percentage": 29.25, "elapsed_time": "1:42:44", "remaining_time": "4:08:31"} +{"current_steps": 2538, "total_steps": 8674, "loss": 0.5115770101547241, "lr": 1.6954198040484617e-06, "epoch": 0.5851971408807932, "percentage": 29.26, "elapsed_time": "1:42:46", "remaining_time": "4:08:28"} +{"current_steps": 2539, "total_steps": 8674, "loss": 0.540202260017395, "lr": 1.6951457777661686e-06, "epoch": 0.5854277150103758, "percentage": 29.27, "elapsed_time": "1:42:48", "remaining_time": "4:08:26"} +{"current_steps": 2540, "total_steps": 8674, "loss": 0.5312114357948303, "lr": 1.6948716504373914e-06, "epoch": 0.5856582891399585, "percentage": 29.28, "elapsed_time": "1:42:51", "remaining_time": "4:08:23"} +{"current_steps": 2541, "total_steps": 8674, "loss": 0.49323517084121704, "lr": 1.694597422101978e-06, "epoch": 0.5858888632695411, "percentage": 29.29, "elapsed_time": "1:42:53", "remaining_time": "4:08:20"} +{"current_steps": 2542, "total_steps": 8674, "loss": 0.42929738759994507, "lr": 1.6943230927997894e-06, "epoch": 0.5861194373991239, "percentage": 29.31, "elapsed_time": "1:42:55", "remaining_time": "4:08:18"} +{"current_steps": 2543, "total_steps": 8674, "loss": 0.45236462354660034, "lr": 1.6940486625707021e-06, "epoch": 0.5863500115287065, "percentage": 29.32, "elapsed_time": "1:42:58", "remaining_time": "4:08:15"} +{"current_steps": 2544, "total_steps": 8674, "loss": 0.5129071474075317, "lr": 1.6937741314546084e-06, "epoch": 0.5865805856582892, "percentage": 29.33, "elapsed_time": "1:43:00", "remaining_time": "4:08:13"} +{"current_steps": 2545, "total_steps": 8674, "loss": 0.5562577247619629, "lr": 1.693499499491413e-06, "epoch": 0.5868111597878718, "percentage": 29.34, "elapsed_time": "1:43:03", "remaining_time": "4:08:11"} +{"current_steps": 2546, "total_steps": 8674, "loss": 0.5593177080154419, "lr": 1.6932247667210372e-06, "epoch": 0.5870417339174545, "percentage": 29.35, "elapsed_time": "1:43:05", "remaining_time": "4:08:08"} +{"current_steps": 2547, "total_steps": 8674, "loss": 0.5536680221557617, "lr": 1.692949933183416e-06, "epoch": 0.5872723080470371, "percentage": 29.36, "elapsed_time": "1:43:08", "remaining_time": "4:08:05"} +{"current_steps": 2548, "total_steps": 8674, "loss": 0.5523338317871094, "lr": 1.6926749989184993e-06, "epoch": 0.5875028821766198, "percentage": 29.38, "elapsed_time": "1:43:10", "remaining_time": "4:08:03"} +{"current_steps": 2549, "total_steps": 8674, "loss": 0.41815924644470215, "lr": 1.692399963966251e-06, "epoch": 0.5877334563062024, "percentage": 29.39, "elapsed_time": "1:43:12", "remaining_time": "4:08:00"} +{"current_steps": 2550, "total_steps": 8674, "loss": 0.46959248185157776, "lr": 1.6921248283666508e-06, "epoch": 0.5879640304357852, "percentage": 29.4, "elapsed_time": "1:43:15", "remaining_time": "4:07:57"} +{"current_steps": 2551, "total_steps": 8674, "loss": 0.4748489260673523, "lr": 1.6918495921596928e-06, "epoch": 0.5881946045653678, "percentage": 29.41, "elapsed_time": "1:43:17", "remaining_time": "4:07:55"} +{"current_steps": 2552, "total_steps": 8674, "loss": 0.4541524052619934, "lr": 1.6915742553853845e-06, "epoch": 0.5884251786949505, "percentage": 29.42, "elapsed_time": "1:43:20", "remaining_time": "4:07:53"} +{"current_steps": 2553, "total_steps": 8674, "loss": 0.47106000781059265, "lr": 1.691298818083749e-06, "epoch": 0.5886557528245331, "percentage": 29.43, "elapsed_time": "1:43:22", "remaining_time": "4:07:50"} +{"current_steps": 2554, "total_steps": 8674, "loss": 0.5364842414855957, "lr": 1.6910232802948246e-06, "epoch": 0.5888863269541158, "percentage": 29.44, "elapsed_time": "1:43:24", "remaining_time": "4:07:48"} +{"current_steps": 2555, "total_steps": 8674, "loss": 0.48388350009918213, "lr": 1.690747642058663e-06, "epoch": 0.5891169010836984, "percentage": 29.46, "elapsed_time": "1:43:27", "remaining_time": "4:07:45"} +{"current_steps": 2556, "total_steps": 8674, "loss": 0.5075609683990479, "lr": 1.690471903415331e-06, "epoch": 0.5893474752132811, "percentage": 29.47, "elapsed_time": "1:43:29", "remaining_time": "4:07:42"} +{"current_steps": 2557, "total_steps": 8674, "loss": 0.45098066329956055, "lr": 1.6901960644049102e-06, "epoch": 0.5895780493428637, "percentage": 29.48, "elapsed_time": "1:43:31", "remaining_time": "4:07:40"} +{"current_steps": 2558, "total_steps": 8674, "loss": 0.5329077243804932, "lr": 1.6899201250674966e-06, "epoch": 0.5898086234724463, "percentage": 29.49, "elapsed_time": "1:43:34", "remaining_time": "4:07:37"} +{"current_steps": 2559, "total_steps": 8674, "loss": 0.4632904529571533, "lr": 1.6896440854432005e-06, "epoch": 0.590039197602029, "percentage": 29.5, "elapsed_time": "1:43:36", "remaining_time": "4:07:35"} +{"current_steps": 2560, "total_steps": 8674, "loss": 0.5302451848983765, "lr": 1.6893679455721474e-06, "epoch": 0.5902697717316117, "percentage": 29.51, "elapsed_time": "1:43:38", "remaining_time": "4:07:32"} +{"current_steps": 2561, "total_steps": 8674, "loss": 0.45363447070121765, "lr": 1.6890917054944768e-06, "epoch": 0.5905003458611944, "percentage": 29.53, "elapsed_time": "1:43:41", "remaining_time": "4:07:30"} +{"current_steps": 2562, "total_steps": 8674, "loss": 0.5103914737701416, "lr": 1.688815365250343e-06, "epoch": 0.590730919990777, "percentage": 29.54, "elapsed_time": "1:43:43", "remaining_time": "4:07:27"} +{"current_steps": 2563, "total_steps": 8674, "loss": 0.45474469661712646, "lr": 1.6885389248799152e-06, "epoch": 0.5909614941203597, "percentage": 29.55, "elapsed_time": "1:43:46", "remaining_time": "4:07:24"} +{"current_steps": 2564, "total_steps": 8674, "loss": 0.517952024936676, "lr": 1.6882623844233766e-06, "epoch": 0.5911920682499423, "percentage": 29.56, "elapsed_time": "1:43:48", "remaining_time": "4:07:22"} +{"current_steps": 2565, "total_steps": 8674, "loss": 0.4872232973575592, "lr": 1.6879857439209245e-06, "epoch": 0.591422642379525, "percentage": 29.57, "elapsed_time": "1:43:50", "remaining_time": "4:07:19"} +{"current_steps": 2566, "total_steps": 8674, "loss": 0.4938408136367798, "lr": 1.6877090034127726e-06, "epoch": 0.5916532165091076, "percentage": 29.58, "elapsed_time": "1:43:53", "remaining_time": "4:07:17"} +{"current_steps": 2567, "total_steps": 8674, "loss": 0.42687565088272095, "lr": 1.6874321629391469e-06, "epoch": 0.5918837906386903, "percentage": 29.59, "elapsed_time": "1:43:55", "remaining_time": "4:07:14"} +{"current_steps": 2568, "total_steps": 8674, "loss": 0.5272493362426758, "lr": 1.6871552225402896e-06, "epoch": 0.592114364768273, "percentage": 29.61, "elapsed_time": "1:43:57", "remaining_time": "4:07:12"} +{"current_steps": 2569, "total_steps": 8674, "loss": 0.4643193185329437, "lr": 1.6868781822564565e-06, "epoch": 0.5923449388978557, "percentage": 29.62, "elapsed_time": "1:44:00", "remaining_time": "4:07:09"} +{"current_steps": 2570, "total_steps": 8674, "loss": 0.4957782030105591, "lr": 1.6866010421279183e-06, "epoch": 0.5925755130274383, "percentage": 29.63, "elapsed_time": "1:44:02", "remaining_time": "4:07:07"} +{"current_steps": 2571, "total_steps": 8674, "loss": 0.442360520362854, "lr": 1.6863238021949605e-06, "epoch": 0.592806087157021, "percentage": 29.64, "elapsed_time": "1:44:05", "remaining_time": "4:07:04"} +{"current_steps": 2572, "total_steps": 8674, "loss": 0.5108935832977295, "lr": 1.6860464624978824e-06, "epoch": 0.5930366612866036, "percentage": 29.65, "elapsed_time": "1:44:07", "remaining_time": "4:07:02"} +{"current_steps": 2573, "total_steps": 8674, "loss": 0.46559715270996094, "lr": 1.6857690230769976e-06, "epoch": 0.5932672354161863, "percentage": 29.66, "elapsed_time": "1:44:10", "remaining_time": "4:07:00"} +{"current_steps": 2574, "total_steps": 8674, "loss": 0.44752076268196106, "lr": 1.6854914839726356e-06, "epoch": 0.5934978095457689, "percentage": 29.67, "elapsed_time": "1:44:12", "remaining_time": "4:06:58"} +{"current_steps": 2575, "total_steps": 8674, "loss": 0.4018149971961975, "lr": 1.6852138452251387e-06, "epoch": 0.5937283836753516, "percentage": 29.69, "elapsed_time": "1:44:15", "remaining_time": "4:06:55"} +{"current_steps": 2576, "total_steps": 8674, "loss": 0.47711417078971863, "lr": 1.6849361068748652e-06, "epoch": 0.5939589578049342, "percentage": 29.7, "elapsed_time": "1:44:17", "remaining_time": "4:06:53"} +{"current_steps": 2577, "total_steps": 8674, "loss": 0.4671875834465027, "lr": 1.684658268962187e-06, "epoch": 0.594189531934517, "percentage": 29.71, "elapsed_time": "1:44:19", "remaining_time": "4:06:50"} +{"current_steps": 2578, "total_steps": 8674, "loss": 0.48041921854019165, "lr": 1.6843803315274906e-06, "epoch": 0.5944201060640996, "percentage": 29.72, "elapsed_time": "1:44:22", "remaining_time": "4:06:48"} +{"current_steps": 2579, "total_steps": 8674, "loss": 0.3444385528564453, "lr": 1.6841022946111772e-06, "epoch": 0.5946506801936823, "percentage": 29.73, "elapsed_time": "1:44:24", "remaining_time": "4:06:45"} +{"current_steps": 2580, "total_steps": 8674, "loss": 0.46800029277801514, "lr": 1.6838241582536619e-06, "epoch": 0.5948812543232649, "percentage": 29.74, "elapsed_time": "1:44:27", "remaining_time": "4:06:43"} +{"current_steps": 2581, "total_steps": 8674, "loss": 0.4362339377403259, "lr": 1.683545922495375e-06, "epoch": 0.5951118284528476, "percentage": 29.76, "elapsed_time": "1:44:29", "remaining_time": "4:06:40"} +{"current_steps": 2582, "total_steps": 8674, "loss": 0.4818536043167114, "lr": 1.6832675873767606e-06, "epoch": 0.5953424025824302, "percentage": 29.77, "elapsed_time": "1:44:31", "remaining_time": "4:06:37"} +{"current_steps": 2583, "total_steps": 8674, "loss": 0.47899681329727173, "lr": 1.6829891529382775e-06, "epoch": 0.5955729767120129, "percentage": 29.78, "elapsed_time": "1:44:34", "remaining_time": "4:06:35"} +{"current_steps": 2584, "total_steps": 8674, "loss": 0.4239576458930969, "lr": 1.6827106192203995e-06, "epoch": 0.5958035508415955, "percentage": 29.79, "elapsed_time": "1:44:36", "remaining_time": "4:06:32"} +{"current_steps": 2585, "total_steps": 8674, "loss": 0.545168399810791, "lr": 1.6824319862636136e-06, "epoch": 0.5960341249711782, "percentage": 29.8, "elapsed_time": "1:44:39", "remaining_time": "4:06:30"} +{"current_steps": 2586, "total_steps": 8674, "loss": 0.4238642156124115, "lr": 1.6821532541084228e-06, "epoch": 0.5962646991007609, "percentage": 29.81, "elapsed_time": "1:44:41", "remaining_time": "4:06:28"} +{"current_steps": 2587, "total_steps": 8674, "loss": 0.39589810371398926, "lr": 1.6818744227953422e-06, "epoch": 0.5964952732303436, "percentage": 29.82, "elapsed_time": "1:44:43", "remaining_time": "4:06:25"} +{"current_steps": 2588, "total_steps": 8674, "loss": 0.4358367919921875, "lr": 1.6815954923649044e-06, "epoch": 0.5967258473599262, "percentage": 29.84, "elapsed_time": "1:44:46", "remaining_time": "4:06:23"} +{"current_steps": 2589, "total_steps": 8674, "loss": 0.5012080073356628, "lr": 1.6813164628576538e-06, "epoch": 0.5969564214895089, "percentage": 29.85, "elapsed_time": "1:44:48", "remaining_time": "4:06:20"} +{"current_steps": 2590, "total_steps": 8674, "loss": 0.4637286365032196, "lr": 1.6810373343141503e-06, "epoch": 0.5971869956190915, "percentage": 29.86, "elapsed_time": "1:44:51", "remaining_time": "4:06:17"} +{"current_steps": 2591, "total_steps": 8674, "loss": 0.6130828261375427, "lr": 1.6807581067749684e-06, "epoch": 0.5974175697486742, "percentage": 29.87, "elapsed_time": "1:44:53", "remaining_time": "4:06:15"} +{"current_steps": 2592, "total_steps": 8674, "loss": 0.5430021286010742, "lr": 1.680478780280696e-06, "epoch": 0.5976481438782568, "percentage": 29.88, "elapsed_time": "1:44:55", "remaining_time": "4:06:12"} +{"current_steps": 2593, "total_steps": 8674, "loss": 0.5195741653442383, "lr": 1.6801993548719368e-06, "epoch": 0.5978787180078395, "percentage": 29.89, "elapsed_time": "1:44:58", "remaining_time": "4:06:09"} +{"current_steps": 2594, "total_steps": 8674, "loss": 0.5452337265014648, "lr": 1.6799198305893077e-06, "epoch": 0.5981092921374221, "percentage": 29.91, "elapsed_time": "1:45:00", "remaining_time": "4:06:07"} +{"current_steps": 2595, "total_steps": 8674, "loss": 0.4802110493183136, "lr": 1.6796402074734402e-06, "epoch": 0.5983398662670049, "percentage": 29.92, "elapsed_time": "1:45:03", "remaining_time": "4:06:05"} +{"current_steps": 2596, "total_steps": 8674, "loss": 0.48554790019989014, "lr": 1.679360485564981e-06, "epoch": 0.5985704403965875, "percentage": 29.93, "elapsed_time": "1:45:05", "remaining_time": "4:06:02"} +{"current_steps": 2597, "total_steps": 8674, "loss": 0.5151324272155762, "lr": 1.6790806649045896e-06, "epoch": 0.5988010145261702, "percentage": 29.94, "elapsed_time": "1:45:07", "remaining_time": "4:06:00"} +{"current_steps": 2598, "total_steps": 8674, "loss": 0.5122699737548828, "lr": 1.6788007455329419e-06, "epoch": 0.5990315886557528, "percentage": 29.95, "elapsed_time": "1:45:10", "remaining_time": "4:05:57"} +{"current_steps": 2599, "total_steps": 8674, "loss": 0.47776496410369873, "lr": 1.6785207274907258e-06, "epoch": 0.5992621627853355, "percentage": 29.96, "elapsed_time": "1:45:12", "remaining_time": "4:05:54"} +{"current_steps": 2600, "total_steps": 8674, "loss": 0.5653492212295532, "lr": 1.6782406108186455e-06, "epoch": 0.5994927369149181, "percentage": 29.97, "elapsed_time": "1:45:14", "remaining_time": "4:05:52"} +{"current_steps": 2601, "total_steps": 8674, "loss": 0.44313424825668335, "lr": 1.677960395557419e-06, "epoch": 0.5997233110445008, "percentage": 29.99, "elapsed_time": "1:45:18", "remaining_time": "4:05:52"} +{"current_steps": 2602, "total_steps": 8674, "loss": 0.40465259552001953, "lr": 1.677680081747778e-06, "epoch": 0.5999538851740834, "percentage": 30.0, "elapsed_time": "1:45:20", "remaining_time": "4:05:49"} +{"current_steps": 2603, "total_steps": 8674, "loss": 0.5488068461418152, "lr": 1.6773996694304687e-06, "epoch": 0.6001844593036662, "percentage": 30.01, "elapsed_time": "1:45:23", "remaining_time": "4:05:47"} +{"current_steps": 2604, "total_steps": 8674, "loss": 0.5122859477996826, "lr": 1.6771191586462523e-06, "epoch": 0.6004150334332488, "percentage": 30.02, "elapsed_time": "1:45:25", "remaining_time": "4:05:45"} +{"current_steps": 2605, "total_steps": 8674, "loss": 0.47173869609832764, "lr": 1.6768385494359039e-06, "epoch": 0.6006456075628315, "percentage": 30.03, "elapsed_time": "1:45:28", "remaining_time": "4:05:42"} +{"current_steps": 2606, "total_steps": 8674, "loss": 0.527764081954956, "lr": 1.6765578418402129e-06, "epoch": 0.6008761816924141, "percentage": 30.04, "elapsed_time": "1:45:30", "remaining_time": "4:05:40"} +{"current_steps": 2607, "total_steps": 8674, "loss": 0.5399610996246338, "lr": 1.6762770358999826e-06, "epoch": 0.6011067558219968, "percentage": 30.06, "elapsed_time": "1:45:32", "remaining_time": "4:05:37"} +{"current_steps": 2608, "total_steps": 8674, "loss": 0.3441581428050995, "lr": 1.6759961316560314e-06, "epoch": 0.6013373299515794, "percentage": 30.07, "elapsed_time": "1:45:35", "remaining_time": "4:05:35"} +{"current_steps": 2609, "total_steps": 8674, "loss": 0.5027580857276917, "lr": 1.6757151291491916e-06, "epoch": 0.6015679040811621, "percentage": 30.08, "elapsed_time": "1:45:37", "remaining_time": "4:05:32"} +{"current_steps": 2610, "total_steps": 8674, "loss": 0.3898310363292694, "lr": 1.6754340284203095e-06, "epoch": 0.6017984782107447, "percentage": 30.09, "elapsed_time": "1:45:39", "remaining_time": "4:05:29"} +{"current_steps": 2611, "total_steps": 8674, "loss": 0.5577199459075928, "lr": 1.675152829510246e-06, "epoch": 0.6020290523403274, "percentage": 30.1, "elapsed_time": "1:45:42", "remaining_time": "4:05:27"} +{"current_steps": 2612, "total_steps": 8674, "loss": 0.47849035263061523, "lr": 1.6748715324598763e-06, "epoch": 0.60225962646991, "percentage": 30.11, "elapsed_time": "1:45:44", "remaining_time": "4:05:25"} +{"current_steps": 2613, "total_steps": 8674, "loss": 0.46845290064811707, "lr": 1.6745901373100896e-06, "epoch": 0.6024902005994928, "percentage": 30.12, "elapsed_time": "1:45:47", "remaining_time": "4:05:22"} +{"current_steps": 2614, "total_steps": 8674, "loss": 0.46008870005607605, "lr": 1.6743086441017899e-06, "epoch": 0.6027207747290754, "percentage": 30.14, "elapsed_time": "1:45:49", "remaining_time": "4:05:19"} +{"current_steps": 2615, "total_steps": 8674, "loss": 0.44386154413223267, "lr": 1.6740270528758948e-06, "epoch": 0.6029513488586581, "percentage": 30.15, "elapsed_time": "1:45:51", "remaining_time": "4:05:17"} +{"current_steps": 2616, "total_steps": 8674, "loss": 0.495368629693985, "lr": 1.6737453636733364e-06, "epoch": 0.6031819229882407, "percentage": 30.16, "elapsed_time": "1:45:54", "remaining_time": "4:05:14"} +{"current_steps": 2617, "total_steps": 8674, "loss": 0.519428551197052, "lr": 1.6734635765350613e-06, "epoch": 0.6034124971178234, "percentage": 30.17, "elapsed_time": "1:45:56", "remaining_time": "4:05:11"} +{"current_steps": 2618, "total_steps": 8674, "loss": 0.49346470832824707, "lr": 1.6731816915020302e-06, "epoch": 0.603643071247406, "percentage": 30.18, "elapsed_time": "1:45:58", "remaining_time": "4:05:09"} +{"current_steps": 2619, "total_steps": 8674, "loss": 0.554854691028595, "lr": 1.6728997086152173e-06, "epoch": 0.6038736453769887, "percentage": 30.19, "elapsed_time": "1:46:01", "remaining_time": "4:05:07"} +{"current_steps": 2620, "total_steps": 8674, "loss": 0.4930881857872009, "lr": 1.6726176279156125e-06, "epoch": 0.6041042195065713, "percentage": 30.21, "elapsed_time": "1:46:03", "remaining_time": "4:05:04"} +{"current_steps": 2621, "total_steps": 8674, "loss": 0.4082447588443756, "lr": 1.6723354494442186e-06, "epoch": 0.604334793636154, "percentage": 30.22, "elapsed_time": "1:46:06", "remaining_time": "4:05:02"} +{"current_steps": 2622, "total_steps": 8674, "loss": 0.5151821374893188, "lr": 1.6720531732420531e-06, "epoch": 0.6045653677657367, "percentage": 30.23, "elapsed_time": "1:46:08", "remaining_time": "4:04:59"} +{"current_steps": 2623, "total_steps": 8674, "loss": 0.44579264521598816, "lr": 1.671770799350148e-06, "epoch": 0.6047959418953194, "percentage": 30.24, "elapsed_time": "1:46:10", "remaining_time": "4:04:56"} +{"current_steps": 2624, "total_steps": 8674, "loss": 0.4937717020511627, "lr": 1.6714883278095489e-06, "epoch": 0.605026516024902, "percentage": 30.25, "elapsed_time": "1:46:13", "remaining_time": "4:04:54"} +{"current_steps": 2625, "total_steps": 8674, "loss": 0.46298685669898987, "lr": 1.671205758661316e-06, "epoch": 0.6052570901544847, "percentage": 30.26, "elapsed_time": "1:46:15", "remaining_time": "4:04:51"} +{"current_steps": 2626, "total_steps": 8674, "loss": 0.5535221695899963, "lr": 1.6709230919465233e-06, "epoch": 0.6054876642840673, "percentage": 30.27, "elapsed_time": "1:46:17", "remaining_time": "4:04:49"} +{"current_steps": 2627, "total_steps": 8674, "loss": 0.5289112329483032, "lr": 1.6706403277062599e-06, "epoch": 0.60571823841365, "percentage": 30.29, "elapsed_time": "1:46:20", "remaining_time": "4:04:47"} +{"current_steps": 2628, "total_steps": 8674, "loss": 0.506280779838562, "lr": 1.6703574659816285e-06, "epoch": 0.6059488125432326, "percentage": 30.3, "elapsed_time": "1:46:22", "remaining_time": "4:04:44"} +{"current_steps": 2629, "total_steps": 8674, "loss": 0.504257082939148, "lr": 1.6700745068137451e-06, "epoch": 0.6061793866728153, "percentage": 30.31, "elapsed_time": "1:46:25", "remaining_time": "4:04:41"} +{"current_steps": 2630, "total_steps": 8674, "loss": 0.624682605266571, "lr": 1.6697914502437411e-06, "epoch": 0.606409960802398, "percentage": 30.32, "elapsed_time": "1:46:27", "remaining_time": "4:04:39"} +{"current_steps": 2631, "total_steps": 8674, "loss": 0.4539645314216614, "lr": 1.6695082963127617e-06, "epoch": 0.6066405349319807, "percentage": 30.33, "elapsed_time": "1:46:29", "remaining_time": "4:04:36"} +{"current_steps": 2632, "total_steps": 8674, "loss": 0.5461890697479248, "lr": 1.6692250450619665e-06, "epoch": 0.6068711090615633, "percentage": 30.34, "elapsed_time": "1:46:32", "remaining_time": "4:04:34"} +{"current_steps": 2633, "total_steps": 8674, "loss": 0.615606427192688, "lr": 1.6689416965325282e-06, "epoch": 0.607101683191146, "percentage": 30.36, "elapsed_time": "1:46:34", "remaining_time": "4:04:31"} +{"current_steps": 2634, "total_steps": 8674, "loss": 0.5355387926101685, "lr": 1.668658250765635e-06, "epoch": 0.6073322573207286, "percentage": 30.37, "elapsed_time": "1:46:37", "remaining_time": "4:04:29"} +{"current_steps": 2635, "total_steps": 8674, "loss": 0.5804985165596008, "lr": 1.6683747078024886e-06, "epoch": 0.6075628314503113, "percentage": 30.38, "elapsed_time": "1:46:39", "remaining_time": "4:04:26"} +{"current_steps": 2636, "total_steps": 8674, "loss": 0.4514031410217285, "lr": 1.6680910676843042e-06, "epoch": 0.6077934055798939, "percentage": 30.39, "elapsed_time": "1:46:42", "remaining_time": "4:04:24"} +{"current_steps": 2637, "total_steps": 8674, "loss": 0.5621001720428467, "lr": 1.6678073304523123e-06, "epoch": 0.6080239797094766, "percentage": 30.4, "elapsed_time": "1:46:44", "remaining_time": "4:04:21"} +{"current_steps": 2638, "total_steps": 8674, "loss": 0.49387669563293457, "lr": 1.667523496147757e-06, "epoch": 0.6082545538390592, "percentage": 30.41, "elapsed_time": "1:46:46", "remaining_time": "4:04:19"} +{"current_steps": 2639, "total_steps": 8674, "loss": 0.5857938528060913, "lr": 1.6672395648118966e-06, "epoch": 0.608485127968642, "percentage": 30.42, "elapsed_time": "1:46:49", "remaining_time": "4:04:16"} +{"current_steps": 2640, "total_steps": 8674, "loss": 0.46403199434280396, "lr": 1.6669555364860029e-06, "epoch": 0.6087157020982246, "percentage": 30.44, "elapsed_time": "1:46:51", "remaining_time": "4:04:14"} +{"current_steps": 2641, "total_steps": 8674, "loss": 0.4998488128185272, "lr": 1.6666714112113627e-06, "epoch": 0.6089462762278073, "percentage": 30.45, "elapsed_time": "1:46:53", "remaining_time": "4:04:11"} +{"current_steps": 2642, "total_steps": 8674, "loss": 0.6291745901107788, "lr": 1.6663871890292765e-06, "epoch": 0.6091768503573899, "percentage": 30.46, "elapsed_time": "1:46:56", "remaining_time": "4:04:09"} +{"current_steps": 2643, "total_steps": 8674, "loss": 0.6326058506965637, "lr": 1.6661028699810587e-06, "epoch": 0.6094074244869726, "percentage": 30.47, "elapsed_time": "1:46:58", "remaining_time": "4:04:06"} +{"current_steps": 2644, "total_steps": 8674, "loss": 0.5737805366516113, "lr": 1.6658184541080378e-06, "epoch": 0.6096379986165552, "percentage": 30.48, "elapsed_time": "1:47:01", "remaining_time": "4:04:04"} +{"current_steps": 2645, "total_steps": 8674, "loss": 0.565047025680542, "lr": 1.6655339414515568e-06, "epoch": 0.6098685727461379, "percentage": 30.49, "elapsed_time": "1:47:03", "remaining_time": "4:04:02"} +{"current_steps": 2646, "total_steps": 8674, "loss": 0.5157296061515808, "lr": 1.6652493320529724e-06, "epoch": 0.6100991468757205, "percentage": 30.5, "elapsed_time": "1:47:06", "remaining_time": "4:03:59"} +{"current_steps": 2647, "total_steps": 8674, "loss": 0.4475112855434418, "lr": 1.6649646259536554e-06, "epoch": 0.6103297210053032, "percentage": 30.52, "elapsed_time": "1:47:08", "remaining_time": "4:03:57"} +{"current_steps": 2648, "total_steps": 8674, "loss": 0.5072107315063477, "lr": 1.6646798231949911e-06, "epoch": 0.6105602951348859, "percentage": 30.53, "elapsed_time": "1:47:10", "remaining_time": "4:03:54"} +{"current_steps": 2649, "total_steps": 8674, "loss": 0.44673952460289, "lr": 1.6643949238183778e-06, "epoch": 0.6107908692644686, "percentage": 30.54, "elapsed_time": "1:47:13", "remaining_time": "4:03:51"} +{"current_steps": 2650, "total_steps": 8674, "loss": 0.47460734844207764, "lr": 1.6641099278652293e-06, "epoch": 0.6110214433940512, "percentage": 30.55, "elapsed_time": "1:47:15", "remaining_time": "4:03:49"} +{"current_steps": 2651, "total_steps": 8674, "loss": 0.4529770612716675, "lr": 1.6638248353769718e-06, "epoch": 0.6112520175236339, "percentage": 30.56, "elapsed_time": "1:47:17", "remaining_time": "4:03:46"} +{"current_steps": 2652, "total_steps": 8674, "loss": 0.5200958251953125, "lr": 1.6635396463950473e-06, "epoch": 0.6114825916532165, "percentage": 30.57, "elapsed_time": "1:47:20", "remaining_time": "4:03:43"} +{"current_steps": 2653, "total_steps": 8674, "loss": 0.465969979763031, "lr": 1.66325436096091e-06, "epoch": 0.6117131657827992, "percentage": 30.59, "elapsed_time": "1:47:22", "remaining_time": "4:03:41"} +{"current_steps": 2654, "total_steps": 8674, "loss": 0.5173276662826538, "lr": 1.6629689791160298e-06, "epoch": 0.6119437399123818, "percentage": 30.6, "elapsed_time": "1:47:25", "remaining_time": "4:03:39"} +{"current_steps": 2655, "total_steps": 8674, "loss": 0.5539907217025757, "lr": 1.6626835009018892e-06, "epoch": 0.6121743140419645, "percentage": 30.61, "elapsed_time": "1:47:27", "remaining_time": "4:03:36"} +{"current_steps": 2656, "total_steps": 8674, "loss": 0.5617278814315796, "lr": 1.6623979263599857e-06, "epoch": 0.6124048881715471, "percentage": 30.62, "elapsed_time": "1:47:30", "remaining_time": "4:03:35"} +{"current_steps": 2657, "total_steps": 8674, "loss": 0.46238285303115845, "lr": 1.6621122555318304e-06, "epoch": 0.6126354623011299, "percentage": 30.63, "elapsed_time": "1:47:32", "remaining_time": "4:03:33"} +{"current_steps": 2658, "total_steps": 8674, "loss": 0.49247878789901733, "lr": 1.6618264884589484e-06, "epoch": 0.6128660364307125, "percentage": 30.64, "elapsed_time": "1:47:35", "remaining_time": "4:03:30"} +{"current_steps": 2659, "total_steps": 8674, "loss": 0.4844072163105011, "lr": 1.6615406251828793e-06, "epoch": 0.6130966105602952, "percentage": 30.65, "elapsed_time": "1:47:37", "remaining_time": "4:03:27"} +{"current_steps": 2660, "total_steps": 8674, "loss": 0.47372323274612427, "lr": 1.6612546657451754e-06, "epoch": 0.6133271846898778, "percentage": 30.67, "elapsed_time": "1:47:39", "remaining_time": "4:03:25"} +{"current_steps": 2661, "total_steps": 8674, "loss": 0.5287426114082336, "lr": 1.660968610187404e-06, "epoch": 0.6135577588194605, "percentage": 30.68, "elapsed_time": "1:47:42", "remaining_time": "4:03:23"} +{"current_steps": 2662, "total_steps": 8674, "loss": 0.5862994194030762, "lr": 1.6606824585511471e-06, "epoch": 0.6137883329490431, "percentage": 30.69, "elapsed_time": "1:47:45", "remaining_time": "4:03:21"} +{"current_steps": 2663, "total_steps": 8674, "loss": 0.4866197109222412, "lr": 1.6603962108779986e-06, "epoch": 0.6140189070786258, "percentage": 30.7, "elapsed_time": "1:47:47", "remaining_time": "4:03:18"} +{"current_steps": 2664, "total_steps": 8674, "loss": 0.5561861991882324, "lr": 1.660109867209568e-06, "epoch": 0.6142494812082084, "percentage": 30.71, "elapsed_time": "1:47:49", "remaining_time": "4:03:16"} +{"current_steps": 2665, "total_steps": 8674, "loss": 0.4878644645214081, "lr": 1.659823427587478e-06, "epoch": 0.6144800553377912, "percentage": 30.72, "elapsed_time": "1:47:52", "remaining_time": "4:03:13"} +{"current_steps": 2666, "total_steps": 8674, "loss": 0.5371976494789124, "lr": 1.659536892053366e-06, "epoch": 0.6147106294673738, "percentage": 30.74, "elapsed_time": "1:47:54", "remaining_time": "4:03:11"} +{"current_steps": 2667, "total_steps": 8674, "loss": 0.4816581606864929, "lr": 1.6592502606488824e-06, "epoch": 0.6149412035969565, "percentage": 30.75, "elapsed_time": "1:47:56", "remaining_time": "4:03:08"} +{"current_steps": 2668, "total_steps": 8674, "loss": 0.5105183124542236, "lr": 1.6589635334156919e-06, "epoch": 0.6151717777265391, "percentage": 30.76, "elapsed_time": "1:47:59", "remaining_time": "4:03:05"} +{"current_steps": 2669, "total_steps": 8674, "loss": 0.5524129271507263, "lr": 1.6586767103954737e-06, "epoch": 0.6154023518561217, "percentage": 30.77, "elapsed_time": "1:48:01", "remaining_time": "4:03:03"} +{"current_steps": 2670, "total_steps": 8674, "loss": 0.42373913526535034, "lr": 1.6583897916299204e-06, "epoch": 0.6156329259857044, "percentage": 30.78, "elapsed_time": "1:48:04", "remaining_time": "4:03:01"} +{"current_steps": 2671, "total_steps": 8674, "loss": 0.5620803833007812, "lr": 1.658102777160738e-06, "epoch": 0.615863500115287, "percentage": 30.79, "elapsed_time": "1:48:06", "remaining_time": "4:02:58"} +{"current_steps": 2672, "total_steps": 8674, "loss": 0.38180166482925415, "lr": 1.6578156670296472e-06, "epoch": 0.6160940742448697, "percentage": 30.8, "elapsed_time": "1:48:09", "remaining_time": "4:02:56"} +{"current_steps": 2673, "total_steps": 8674, "loss": 0.48596519231796265, "lr": 1.6575284612783825e-06, "epoch": 0.6163246483744523, "percentage": 30.82, "elapsed_time": "1:48:11", "remaining_time": "4:02:53"} +{"current_steps": 2674, "total_steps": 8674, "loss": 0.5098127126693726, "lr": 1.657241159948692e-06, "epoch": 0.616555222504035, "percentage": 30.83, "elapsed_time": "1:48:13", "remaining_time": "4:02:50"} +{"current_steps": 2675, "total_steps": 8674, "loss": 0.5650018453598022, "lr": 1.6569537630823382e-06, "epoch": 0.6167857966336177, "percentage": 30.84, "elapsed_time": "1:48:16", "remaining_time": "4:02:48"} +{"current_steps": 2676, "total_steps": 8674, "loss": 0.45061948895454407, "lr": 1.6566662707210967e-06, "epoch": 0.6170163707632004, "percentage": 30.85, "elapsed_time": "1:48:18", "remaining_time": "4:02:45"} +{"current_steps": 2677, "total_steps": 8674, "loss": 0.4292137622833252, "lr": 1.6563786829067576e-06, "epoch": 0.617246944892783, "percentage": 30.86, "elapsed_time": "1:48:20", "remaining_time": "4:02:43"} +{"current_steps": 2678, "total_steps": 8674, "loss": 0.4837046265602112, "lr": 1.656090999681125e-06, "epoch": 0.6174775190223657, "percentage": 30.87, "elapsed_time": "1:48:23", "remaining_time": "4:02:41"} +{"current_steps": 2679, "total_steps": 8674, "loss": 0.43580353260040283, "lr": 1.6558032210860162e-06, "epoch": 0.6177080931519483, "percentage": 30.89, "elapsed_time": "1:48:26", "remaining_time": "4:02:39"} +{"current_steps": 2680, "total_steps": 8674, "loss": 0.47321656346321106, "lr": 1.6555153471632628e-06, "epoch": 0.617938667281531, "percentage": 30.9, "elapsed_time": "1:48:28", "remaining_time": "4:02:36"} +{"current_steps": 2681, "total_steps": 8674, "loss": 0.47431111335754395, "lr": 1.65522737795471e-06, "epoch": 0.6181692414111136, "percentage": 30.91, "elapsed_time": "1:48:30", "remaining_time": "4:02:33"} +{"current_steps": 2682, "total_steps": 8674, "loss": 0.38062599301338196, "lr": 1.6549393135022181e-06, "epoch": 0.6183998155406963, "percentage": 30.92, "elapsed_time": "1:48:33", "remaining_time": "4:02:31"} +{"current_steps": 2683, "total_steps": 8674, "loss": 0.5941839218139648, "lr": 1.6546511538476584e-06, "epoch": 0.618630389670279, "percentage": 30.93, "elapsed_time": "1:48:35", "remaining_time": "4:02:28"} +{"current_steps": 2684, "total_steps": 8674, "loss": 0.5323158502578735, "lr": 1.6543628990329195e-06, "epoch": 0.6188609637998617, "percentage": 30.94, "elapsed_time": "1:48:37", "remaining_time": "4:02:26"} +{"current_steps": 2685, "total_steps": 8674, "loss": 0.3814772367477417, "lr": 1.654074549099901e-06, "epoch": 0.6190915379294443, "percentage": 30.95, "elapsed_time": "1:48:40", "remaining_time": "4:02:23"} +{"current_steps": 2686, "total_steps": 8674, "loss": 0.5520694255828857, "lr": 1.6537861040905181e-06, "epoch": 0.619322112059027, "percentage": 30.97, "elapsed_time": "1:48:43", "remaining_time": "4:02:22"} +{"current_steps": 2687, "total_steps": 8674, "loss": 0.5514999628067017, "lr": 1.653497564046699e-06, "epoch": 0.6195526861886096, "percentage": 30.98, "elapsed_time": "1:48:45", "remaining_time": "4:02:19"} +{"current_steps": 2688, "total_steps": 8674, "loss": 0.39057493209838867, "lr": 1.653208929010386e-06, "epoch": 0.6197832603181923, "percentage": 30.99, "elapsed_time": "1:48:47", "remaining_time": "4:02:16"} +{"current_steps": 2689, "total_steps": 8674, "loss": 0.4941304922103882, "lr": 1.6529201990235352e-06, "epoch": 0.6200138344477749, "percentage": 31.0, "elapsed_time": "1:48:50", "remaining_time": "4:02:14"} +{"current_steps": 2690, "total_steps": 8674, "loss": 0.539762020111084, "lr": 1.6526313741281164e-06, "epoch": 0.6202444085773576, "percentage": 31.01, "elapsed_time": "1:48:52", "remaining_time": "4:02:11"} +{"current_steps": 2691, "total_steps": 8674, "loss": 0.49524787068367004, "lr": 1.6523424543661127e-06, "epoch": 0.6204749827069402, "percentage": 31.02, "elapsed_time": "1:48:54", "remaining_time": "4:02:09"} +{"current_steps": 2692, "total_steps": 8674, "loss": 0.4261528253555298, "lr": 1.6520534397795225e-06, "epoch": 0.620705556836523, "percentage": 31.04, "elapsed_time": "1:48:57", "remaining_time": "4:02:06"} +{"current_steps": 2693, "total_steps": 8674, "loss": 0.578548789024353, "lr": 1.6517643304103563e-06, "epoch": 0.6209361309661056, "percentage": 31.05, "elapsed_time": "1:48:59", "remaining_time": "4:02:04"} +{"current_steps": 2694, "total_steps": 8674, "loss": 0.4766680598258972, "lr": 1.6514751263006393e-06, "epoch": 0.6211667050956883, "percentage": 31.06, "elapsed_time": "1:49:01", "remaining_time": "4:02:01"} +{"current_steps": 2695, "total_steps": 8674, "loss": 0.4146459996700287, "lr": 1.6511858274924098e-06, "epoch": 0.6213972792252709, "percentage": 31.07, "elapsed_time": "1:49:04", "remaining_time": "4:01:59"} +{"current_steps": 2696, "total_steps": 8674, "loss": 0.5148390531539917, "lr": 1.650896434027721e-06, "epoch": 0.6216278533548536, "percentage": 31.08, "elapsed_time": "1:49:06", "remaining_time": "4:01:56"} +{"current_steps": 2697, "total_steps": 8674, "loss": 0.4830890893936157, "lr": 1.6506069459486388e-06, "epoch": 0.6218584274844362, "percentage": 31.09, "elapsed_time": "1:49:09", "remaining_time": "4:01:54"} +{"current_steps": 2698, "total_steps": 8674, "loss": 0.4550463557243347, "lr": 1.6503173632972434e-06, "epoch": 0.6220890016140189, "percentage": 31.1, "elapsed_time": "1:49:11", "remaining_time": "4:01:51"} +{"current_steps": 2699, "total_steps": 8674, "loss": 0.5811448097229004, "lr": 1.6500276861156284e-06, "epoch": 0.6223195757436015, "percentage": 31.12, "elapsed_time": "1:49:14", "remaining_time": "4:01:49"} +{"current_steps": 2700, "total_steps": 8674, "loss": 0.44733545184135437, "lr": 1.6497379144459014e-06, "epoch": 0.6225501498731842, "percentage": 31.13, "elapsed_time": "1:49:16", "remaining_time": "4:01:46"} +{"current_steps": 2701, "total_steps": 8674, "loss": 0.4379687011241913, "lr": 1.6494480483301835e-06, "epoch": 0.6227807240027669, "percentage": 31.14, "elapsed_time": "1:49:20", "remaining_time": "4:01:47"} +{"current_steps": 2702, "total_steps": 8674, "loss": 0.5163959860801697, "lr": 1.6491580878106102e-06, "epoch": 0.6230112981323496, "percentage": 31.15, "elapsed_time": "1:49:22", "remaining_time": "4:01:45"} +{"current_steps": 2703, "total_steps": 8674, "loss": 0.5636980533599854, "lr": 1.6488680329293297e-06, "epoch": 0.6232418722619322, "percentage": 31.16, "elapsed_time": "1:49:25", "remaining_time": "4:01:43"} +{"current_steps": 2704, "total_steps": 8674, "loss": 0.46942776441574097, "lr": 1.6485778837285044e-06, "epoch": 0.6234724463915149, "percentage": 31.17, "elapsed_time": "1:49:28", "remaining_time": "4:01:41"} +{"current_steps": 2705, "total_steps": 8674, "loss": 0.5104436278343201, "lr": 1.6482876402503103e-06, "epoch": 0.6237030205210975, "percentage": 31.19, "elapsed_time": "1:49:30", "remaining_time": "4:01:38"} +{"current_steps": 2706, "total_steps": 8674, "loss": 0.4689507484436035, "lr": 1.6479973025369379e-06, "epoch": 0.6239335946506802, "percentage": 31.2, "elapsed_time": "1:49:32", "remaining_time": "4:01:36"} +{"current_steps": 2707, "total_steps": 8674, "loss": 0.4009973406791687, "lr": 1.64770687063059e-06, "epoch": 0.6241641687802628, "percentage": 31.21, "elapsed_time": "1:49:35", "remaining_time": "4:01:33"} +{"current_steps": 2708, "total_steps": 8674, "loss": 0.4938286542892456, "lr": 1.6474163445734846e-06, "epoch": 0.6243947429098455, "percentage": 31.22, "elapsed_time": "1:49:37", "remaining_time": "4:01:31"} +{"current_steps": 2709, "total_steps": 8674, "loss": 0.4756525754928589, "lr": 1.6471257244078519e-06, "epoch": 0.6246253170394281, "percentage": 31.23, "elapsed_time": "1:49:40", "remaining_time": "4:01:28"} +{"current_steps": 2710, "total_steps": 8674, "loss": 0.4322332739830017, "lr": 1.6468350101759366e-06, "epoch": 0.6248558911690109, "percentage": 31.24, "elapsed_time": "1:49:42", "remaining_time": "4:01:26"} +{"current_steps": 2711, "total_steps": 8674, "loss": 0.4605666995048523, "lr": 1.6465442019199972e-06, "epoch": 0.6250864652985935, "percentage": 31.25, "elapsed_time": "1:49:45", "remaining_time": "4:01:25"} +{"current_steps": 2712, "total_steps": 8674, "loss": 0.4576036334037781, "lr": 1.6462532996823053e-06, "epoch": 0.6253170394281762, "percentage": 31.27, "elapsed_time": "1:49:47", "remaining_time": "4:01:22"} +{"current_steps": 2713, "total_steps": 8674, "loss": 0.4860233664512634, "lr": 1.645962303505147e-06, "epoch": 0.6255476135577588, "percentage": 31.28, "elapsed_time": "1:49:50", "remaining_time": "4:01:20"} +{"current_steps": 2714, "total_steps": 8674, "loss": 0.4717915654182434, "lr": 1.6456712134308213e-06, "epoch": 0.6257781876873415, "percentage": 31.29, "elapsed_time": "1:49:52", "remaining_time": "4:01:17"} +{"current_steps": 2715, "total_steps": 8674, "loss": 0.49637067317962646, "lr": 1.645380029501641e-06, "epoch": 0.6260087618169241, "percentage": 31.3, "elapsed_time": "1:49:55", "remaining_time": "4:01:15"} +{"current_steps": 2716, "total_steps": 8674, "loss": 0.45388346910476685, "lr": 1.6450887517599326e-06, "epoch": 0.6262393359465068, "percentage": 31.31, "elapsed_time": "1:49:57", "remaining_time": "4:01:12"} +{"current_steps": 2717, "total_steps": 8674, "loss": 0.5549031496047974, "lr": 1.6447973802480362e-06, "epoch": 0.6264699100760894, "percentage": 31.32, "elapsed_time": "1:50:00", "remaining_time": "4:01:10"} +{"current_steps": 2718, "total_steps": 8674, "loss": 0.39759719371795654, "lr": 1.644505915008306e-06, "epoch": 0.6267004842056721, "percentage": 31.34, "elapsed_time": "1:50:02", "remaining_time": "4:01:08"} +{"current_steps": 2719, "total_steps": 8674, "loss": 0.5126739740371704, "lr": 1.644214356083109e-06, "epoch": 0.6269310583352548, "percentage": 31.35, "elapsed_time": "1:50:04", "remaining_time": "4:01:05"} +{"current_steps": 2720, "total_steps": 8674, "loss": 0.41424083709716797, "lr": 1.6439227035148265e-06, "epoch": 0.6271616324648375, "percentage": 31.36, "elapsed_time": "1:50:07", "remaining_time": "4:01:03"} +{"current_steps": 2721, "total_steps": 8674, "loss": 0.5829803943634033, "lr": 1.643630957345853e-06, "epoch": 0.6273922065944201, "percentage": 31.37, "elapsed_time": "1:50:10", "remaining_time": "4:01:01"} +{"current_steps": 2722, "total_steps": 8674, "loss": 0.4736567437648773, "lr": 1.6433391176185972e-06, "epoch": 0.6276227807240028, "percentage": 31.38, "elapsed_time": "1:50:12", "remaining_time": "4:00:58"} +{"current_steps": 2723, "total_steps": 8674, "loss": 0.41305306553840637, "lr": 1.6430471843754804e-06, "epoch": 0.6278533548535854, "percentage": 31.39, "elapsed_time": "1:50:14", "remaining_time": "4:00:56"} +{"current_steps": 2724, "total_steps": 8674, "loss": 0.38422563672065735, "lr": 1.6427551576589383e-06, "epoch": 0.6280839289831681, "percentage": 31.4, "elapsed_time": "1:50:17", "remaining_time": "4:00:53"} +{"current_steps": 2725, "total_steps": 8674, "loss": 0.48302626609802246, "lr": 1.6424630375114199e-06, "epoch": 0.6283145031127507, "percentage": 31.42, "elapsed_time": "1:50:19", "remaining_time": "4:00:51"} +{"current_steps": 2726, "total_steps": 8674, "loss": 0.4657328128814697, "lr": 1.6421708239753875e-06, "epoch": 0.6285450772423334, "percentage": 31.43, "elapsed_time": "1:50:21", "remaining_time": "4:00:48"} +{"current_steps": 2727, "total_steps": 8674, "loss": 0.46126431226730347, "lr": 1.641878517093318e-06, "epoch": 0.628775651371916, "percentage": 31.44, "elapsed_time": "1:50:24", "remaining_time": "4:00:46"} +{"current_steps": 2728, "total_steps": 8674, "loss": 0.5196214914321899, "lr": 1.6415861169077007e-06, "epoch": 0.6290062255014988, "percentage": 31.45, "elapsed_time": "1:50:26", "remaining_time": "4:00:44"} +{"current_steps": 2729, "total_steps": 8674, "loss": 0.5007073879241943, "lr": 1.641293623461039e-06, "epoch": 0.6292367996310814, "percentage": 31.46, "elapsed_time": "1:50:29", "remaining_time": "4:00:41"} +{"current_steps": 2730, "total_steps": 8674, "loss": 0.4699769616127014, "lr": 1.64100103679585e-06, "epoch": 0.6294673737606641, "percentage": 31.47, "elapsed_time": "1:50:31", "remaining_time": "4:00:39"} +{"current_steps": 2731, "total_steps": 8674, "loss": 0.5487842559814453, "lr": 1.6407083569546636e-06, "epoch": 0.6296979478902467, "percentage": 31.48, "elapsed_time": "1:50:34", "remaining_time": "4:00:36"} +{"current_steps": 2732, "total_steps": 8674, "loss": 0.42733538150787354, "lr": 1.6404155839800244e-06, "epoch": 0.6299285220198294, "percentage": 31.5, "elapsed_time": "1:50:36", "remaining_time": "4:00:34"} +{"current_steps": 2733, "total_steps": 8674, "loss": 0.5105363726615906, "lr": 1.64012271791449e-06, "epoch": 0.630159096149412, "percentage": 31.51, "elapsed_time": "1:50:38", "remaining_time": "4:00:31"} +{"current_steps": 2734, "total_steps": 8674, "loss": 0.5836968421936035, "lr": 1.6398297588006305e-06, "epoch": 0.6303896702789947, "percentage": 31.52, "elapsed_time": "1:50:41", "remaining_time": "4:00:28"} +{"current_steps": 2735, "total_steps": 8674, "loss": 0.4350558817386627, "lr": 1.639536706681031e-06, "epoch": 0.6306202444085773, "percentage": 31.53, "elapsed_time": "1:50:43", "remaining_time": "4:00:26"} +{"current_steps": 2736, "total_steps": 8674, "loss": 0.5388341546058655, "lr": 1.63924356159829e-06, "epoch": 0.63085081853816, "percentage": 31.54, "elapsed_time": "1:50:45", "remaining_time": "4:00:23"} +{"current_steps": 2737, "total_steps": 8674, "loss": 0.4576529860496521, "lr": 1.6389503235950186e-06, "epoch": 0.6310813926677427, "percentage": 31.55, "elapsed_time": "1:50:48", "remaining_time": "4:00:21"} +{"current_steps": 2738, "total_steps": 8674, "loss": 0.4525975286960602, "lr": 1.6386569927138422e-06, "epoch": 0.6313119667973254, "percentage": 31.57, "elapsed_time": "1:50:50", "remaining_time": "4:00:19"} +{"current_steps": 2739, "total_steps": 8674, "loss": 0.42143142223358154, "lr": 1.6383635689973993e-06, "epoch": 0.631542540926908, "percentage": 31.58, "elapsed_time": "1:50:53", "remaining_time": "4:00:16"} +{"current_steps": 2740, "total_steps": 8674, "loss": 0.4440336227416992, "lr": 1.6380700524883423e-06, "epoch": 0.6317731150564907, "percentage": 31.59, "elapsed_time": "1:50:55", "remaining_time": "4:00:14"} +{"current_steps": 2741, "total_steps": 8674, "loss": 0.5009843707084656, "lr": 1.637776443229336e-06, "epoch": 0.6320036891860733, "percentage": 31.6, "elapsed_time": "1:50:58", "remaining_time": "4:00:11"} +{"current_steps": 2742, "total_steps": 8674, "loss": 0.538151741027832, "lr": 1.6374827412630604e-06, "epoch": 0.632234263315656, "percentage": 31.61, "elapsed_time": "1:51:00", "remaining_time": "4:00:09"} +{"current_steps": 2743, "total_steps": 8674, "loss": 0.550201416015625, "lr": 1.6371889466322077e-06, "epoch": 0.6324648374452386, "percentage": 31.62, "elapsed_time": "1:51:02", "remaining_time": "4:00:06"} +{"current_steps": 2744, "total_steps": 8674, "loss": 0.5707399845123291, "lr": 1.6368950593794836e-06, "epoch": 0.6326954115748213, "percentage": 31.63, "elapsed_time": "1:51:05", "remaining_time": "4:00:04"} +{"current_steps": 2745, "total_steps": 8674, "loss": 0.5196787714958191, "lr": 1.6366010795476082e-06, "epoch": 0.632925985704404, "percentage": 31.65, "elapsed_time": "1:51:07", "remaining_time": "4:00:02"} +{"current_steps": 2746, "total_steps": 8674, "loss": 0.5243285894393921, "lr": 1.636307007179314e-06, "epoch": 0.6331565598339867, "percentage": 31.66, "elapsed_time": "1:51:10", "remaining_time": "3:59:59"} +{"current_steps": 2747, "total_steps": 8674, "loss": 0.4202825427055359, "lr": 1.6360128423173473e-06, "epoch": 0.6333871339635693, "percentage": 31.67, "elapsed_time": "1:51:12", "remaining_time": "3:59:57"} +{"current_steps": 2748, "total_steps": 8674, "loss": 0.49080896377563477, "lr": 1.6357185850044681e-06, "epoch": 0.633617708093152, "percentage": 31.68, "elapsed_time": "1:51:15", "remaining_time": "3:59:54"} +{"current_steps": 2749, "total_steps": 8674, "loss": 0.5537371635437012, "lr": 1.6354242352834502e-06, "epoch": 0.6338482822227346, "percentage": 31.69, "elapsed_time": "1:51:17", "remaining_time": "3:59:52"} +{"current_steps": 2750, "total_steps": 8674, "loss": 0.3744293451309204, "lr": 1.6351297931970796e-06, "epoch": 0.6340788563523173, "percentage": 31.7, "elapsed_time": "1:51:19", "remaining_time": "3:59:49"} +{"current_steps": 2751, "total_steps": 8674, "loss": 0.5176748037338257, "lr": 1.634835258788157e-06, "epoch": 0.6343094304818999, "percentage": 31.72, "elapsed_time": "1:51:22", "remaining_time": "3:59:47"} +{"current_steps": 2752, "total_steps": 8674, "loss": 0.5179395079612732, "lr": 1.6345406320994952e-06, "epoch": 0.6345400046114826, "percentage": 31.73, "elapsed_time": "1:51:24", "remaining_time": "3:59:44"} +{"current_steps": 2753, "total_steps": 8674, "loss": 0.4810818135738373, "lr": 1.634245913173922e-06, "epoch": 0.6347705787410652, "percentage": 31.74, "elapsed_time": "1:51:27", "remaining_time": "3:59:42"} +{"current_steps": 2754, "total_steps": 8674, "loss": 0.5188307762145996, "lr": 1.6339511020542775e-06, "epoch": 0.635001152870648, "percentage": 31.75, "elapsed_time": "1:51:29", "remaining_time": "3:59:40"} +{"current_steps": 2755, "total_steps": 8674, "loss": 0.41170865297317505, "lr": 1.6336561987834151e-06, "epoch": 0.6352317270002306, "percentage": 31.76, "elapsed_time": "1:51:32", "remaining_time": "3:59:37"} +{"current_steps": 2756, "total_steps": 8674, "loss": 0.48726415634155273, "lr": 1.6333612034042025e-06, "epoch": 0.6354623011298133, "percentage": 31.77, "elapsed_time": "1:51:34", "remaining_time": "3:59:35"} +{"current_steps": 2757, "total_steps": 8674, "loss": 0.4483524560928345, "lr": 1.63306611595952e-06, "epoch": 0.6356928752593959, "percentage": 31.78, "elapsed_time": "1:51:36", "remaining_time": "3:59:32"} +{"current_steps": 2758, "total_steps": 8674, "loss": 0.3979623019695282, "lr": 1.6327709364922618e-06, "epoch": 0.6359234493889786, "percentage": 31.8, "elapsed_time": "1:51:39", "remaining_time": "3:59:30"} +{"current_steps": 2759, "total_steps": 8674, "loss": 0.461483895778656, "lr": 1.6324756650453346e-06, "epoch": 0.6361540235185612, "percentage": 31.81, "elapsed_time": "1:51:41", "remaining_time": "3:59:27"} +{"current_steps": 2760, "total_steps": 8674, "loss": 0.40054333209991455, "lr": 1.6321803016616598e-06, "epoch": 0.6363845976481439, "percentage": 31.82, "elapsed_time": "1:51:44", "remaining_time": "3:59:25"} +{"current_steps": 2761, "total_steps": 8674, "loss": 0.534996747970581, "lr": 1.6318848463841712e-06, "epoch": 0.6366151717777265, "percentage": 31.83, "elapsed_time": "1:51:46", "remaining_time": "3:59:22"} +{"current_steps": 2762, "total_steps": 8674, "loss": 0.49408137798309326, "lr": 1.631589299255816e-06, "epoch": 0.6368457459073092, "percentage": 31.84, "elapsed_time": "1:51:49", "remaining_time": "3:59:20"} +{"current_steps": 2763, "total_steps": 8674, "loss": 0.49098217487335205, "lr": 1.6312936603195557e-06, "epoch": 0.6370763200368919, "percentage": 31.85, "elapsed_time": "1:51:51", "remaining_time": "3:59:18"} +{"current_steps": 2764, "total_steps": 8674, "loss": 0.4990113377571106, "lr": 1.6309979296183636e-06, "epoch": 0.6373068941664746, "percentage": 31.87, "elapsed_time": "1:51:53", "remaining_time": "3:59:15"} +{"current_steps": 2765, "total_steps": 8674, "loss": 0.49399930238723755, "lr": 1.6307021071952276e-06, "epoch": 0.6375374682960572, "percentage": 31.88, "elapsed_time": "1:51:56", "remaining_time": "3:59:13"} +{"current_steps": 2766, "total_steps": 8674, "loss": 0.5029928684234619, "lr": 1.6304061930931478e-06, "epoch": 0.6377680424256399, "percentage": 31.89, "elapsed_time": "1:51:58", "remaining_time": "3:59:10"} +{"current_steps": 2767, "total_steps": 8674, "loss": 0.5732289552688599, "lr": 1.6301101873551396e-06, "epoch": 0.6379986165552225, "percentage": 31.9, "elapsed_time": "1:52:01", "remaining_time": "3:59:08"} +{"current_steps": 2768, "total_steps": 8674, "loss": 0.47334790229797363, "lr": 1.6298140900242293e-06, "epoch": 0.6382291906848052, "percentage": 31.91, "elapsed_time": "1:52:03", "remaining_time": "3:59:05"} +{"current_steps": 2769, "total_steps": 8674, "loss": 0.44271016120910645, "lr": 1.6295179011434578e-06, "epoch": 0.6384597648143878, "percentage": 31.92, "elapsed_time": "1:52:05", "remaining_time": "3:59:03"} +{"current_steps": 2770, "total_steps": 8674, "loss": 0.5768353939056396, "lr": 1.6292216207558798e-06, "epoch": 0.6386903389439705, "percentage": 31.93, "elapsed_time": "1:52:08", "remaining_time": "3:59:01"} +{"current_steps": 2771, "total_steps": 8674, "loss": 0.48315417766571045, "lr": 1.6289252489045625e-06, "epoch": 0.6389209130735531, "percentage": 31.95, "elapsed_time": "1:52:10", "remaining_time": "3:58:58"} +{"current_steps": 2772, "total_steps": 8674, "loss": 0.5745590925216675, "lr": 1.6286287856325855e-06, "epoch": 0.6391514872031359, "percentage": 31.96, "elapsed_time": "1:52:13", "remaining_time": "3:58:56"} +{"current_steps": 2773, "total_steps": 8674, "loss": 0.6084291934967041, "lr": 1.6283322309830444e-06, "epoch": 0.6393820613327185, "percentage": 31.97, "elapsed_time": "1:52:15", "remaining_time": "3:58:53"} +{"current_steps": 2774, "total_steps": 8674, "loss": 0.4995007812976837, "lr": 1.6280355849990451e-06, "epoch": 0.6396126354623012, "percentage": 31.98, "elapsed_time": "1:52:18", "remaining_time": "3:58:51"} +{"current_steps": 2775, "total_steps": 8674, "loss": 0.45811381936073303, "lr": 1.6277388477237084e-06, "epoch": 0.6398432095918838, "percentage": 31.99, "elapsed_time": "1:52:20", "remaining_time": "3:58:48"} +{"current_steps": 2776, "total_steps": 8674, "loss": 0.5666211247444153, "lr": 1.6274420192001689e-06, "epoch": 0.6400737837214665, "percentage": 32.0, "elapsed_time": "1:52:23", "remaining_time": "3:58:46"} +{"current_steps": 2777, "total_steps": 8674, "loss": 0.5059396028518677, "lr": 1.6271450994715723e-06, "epoch": 0.6403043578510491, "percentage": 32.02, "elapsed_time": "1:52:25", "remaining_time": "3:58:43"} +{"current_steps": 2778, "total_steps": 8674, "loss": 0.5418530702590942, "lr": 1.6268480885810798e-06, "epoch": 0.6405349319806318, "percentage": 32.03, "elapsed_time": "1:52:27", "remaining_time": "3:58:41"} +{"current_steps": 2779, "total_steps": 8674, "loss": 0.5047061443328857, "lr": 1.6265509865718647e-06, "epoch": 0.6407655061102144, "percentage": 32.04, "elapsed_time": "1:52:30", "remaining_time": "3:58:39"} +{"current_steps": 2780, "total_steps": 8674, "loss": 0.5104432702064514, "lr": 1.6262537934871138e-06, "epoch": 0.6409960802397972, "percentage": 32.05, "elapsed_time": "1:52:32", "remaining_time": "3:58:36"} +{"current_steps": 2781, "total_steps": 8674, "loss": 0.44423484802246094, "lr": 1.625956509370027e-06, "epoch": 0.6412266543693798, "percentage": 32.06, "elapsed_time": "1:52:35", "remaining_time": "3:58:34"} +{"current_steps": 2782, "total_steps": 8674, "loss": 0.47618383169174194, "lr": 1.6256591342638179e-06, "epoch": 0.6414572284989624, "percentage": 32.07, "elapsed_time": "1:52:37", "remaining_time": "3:58:31"} +{"current_steps": 2783, "total_steps": 8674, "loss": 0.5423145890235901, "lr": 1.625361668211713e-06, "epoch": 0.6416878026285451, "percentage": 32.08, "elapsed_time": "1:52:39", "remaining_time": "3:58:29"} +{"current_steps": 2784, "total_steps": 8674, "loss": 0.517102837562561, "lr": 1.6250641112569515e-06, "epoch": 0.6419183767581277, "percentage": 32.1, "elapsed_time": "1:52:42", "remaining_time": "3:58:26"} +{"current_steps": 2785, "total_steps": 8674, "loss": 0.39601820707321167, "lr": 1.6247664634427864e-06, "epoch": 0.6421489508877104, "percentage": 32.11, "elapsed_time": "1:52:44", "remaining_time": "3:58:24"} +{"current_steps": 2786, "total_steps": 8674, "loss": 0.5480250120162964, "lr": 1.6244687248124843e-06, "epoch": 0.642379525017293, "percentage": 32.12, "elapsed_time": "1:52:47", "remaining_time": "3:58:22"} +{"current_steps": 2787, "total_steps": 8674, "loss": 0.4743092656135559, "lr": 1.624170895409324e-06, "epoch": 0.6426100991468757, "percentage": 32.13, "elapsed_time": "1:52:49", "remaining_time": "3:58:20"} +{"current_steps": 2788, "total_steps": 8674, "loss": 0.4595726728439331, "lr": 1.6238729752765985e-06, "epoch": 0.6428406732764583, "percentage": 32.14, "elapsed_time": "1:52:52", "remaining_time": "3:58:17"} +{"current_steps": 2789, "total_steps": 8674, "loss": 0.5058779716491699, "lr": 1.6235749644576132e-06, "epoch": 0.643071247406041, "percentage": 32.15, "elapsed_time": "1:52:54", "remaining_time": "3:58:15"} +{"current_steps": 2790, "total_steps": 8674, "loss": 0.5075543522834778, "lr": 1.623276862995687e-06, "epoch": 0.6433018215356237, "percentage": 32.17, "elapsed_time": "1:52:57", "remaining_time": "3:58:12"} +{"current_steps": 2791, "total_steps": 8674, "loss": 0.5623351335525513, "lr": 1.622978670934152e-06, "epoch": 0.6435323956652064, "percentage": 32.18, "elapsed_time": "1:52:59", "remaining_time": "3:58:10"} +{"current_steps": 2792, "total_steps": 8674, "loss": 0.3645760118961334, "lr": 1.6226803883163536e-06, "epoch": 0.643762969794789, "percentage": 32.19, "elapsed_time": "1:53:01", "remaining_time": "3:58:07"} +{"current_steps": 2793, "total_steps": 8674, "loss": 0.5666004419326782, "lr": 1.6223820151856501e-06, "epoch": 0.6439935439243717, "percentage": 32.2, "elapsed_time": "1:53:04", "remaining_time": "3:58:05"} +{"current_steps": 2794, "total_steps": 8674, "loss": 0.6571217775344849, "lr": 1.6220835515854133e-06, "epoch": 0.6442241180539543, "percentage": 32.21, "elapsed_time": "1:53:06", "remaining_time": "3:58:02"} +{"current_steps": 2795, "total_steps": 8674, "loss": 0.5684333443641663, "lr": 1.6217849975590271e-06, "epoch": 0.644454692183537, "percentage": 32.22, "elapsed_time": "1:53:09", "remaining_time": "3:58:00"} +{"current_steps": 2796, "total_steps": 8674, "loss": 0.43374937772750854, "lr": 1.62148635314989e-06, "epoch": 0.6446852663131196, "percentage": 32.23, "elapsed_time": "1:53:11", "remaining_time": "3:57:58"} +{"current_steps": 2797, "total_steps": 8674, "loss": 0.5102420449256897, "lr": 1.6211876184014134e-06, "epoch": 0.6449158404427023, "percentage": 32.25, "elapsed_time": "1:53:14", "remaining_time": "3:57:55"} +{"current_steps": 2798, "total_steps": 8674, "loss": 0.39345985651016235, "lr": 1.6208887933570203e-06, "epoch": 0.6451464145722849, "percentage": 32.26, "elapsed_time": "1:53:16", "remaining_time": "3:57:53"} +{"current_steps": 2799, "total_steps": 8674, "loss": 0.47554945945739746, "lr": 1.620589878060149e-06, "epoch": 0.6453769887018677, "percentage": 32.27, "elapsed_time": "1:53:18", "remaining_time": "3:57:50"} +{"current_steps": 2800, "total_steps": 8674, "loss": 0.4385503828525543, "lr": 1.6202908725542495e-06, "epoch": 0.6456075628314503, "percentage": 32.28, "elapsed_time": "1:53:21", "remaining_time": "3:57:48"} +{"current_steps": 2801, "total_steps": 8674, "loss": 0.5589696168899536, "lr": 1.619991776882785e-06, "epoch": 0.645838136961033, "percentage": 32.29, "elapsed_time": "1:53:25", "remaining_time": "3:57:48"} +{"current_steps": 2802, "total_steps": 8674, "loss": 0.4827546179294586, "lr": 1.619692591089232e-06, "epoch": 0.6460687110906156, "percentage": 32.3, "elapsed_time": "1:53:27", "remaining_time": "3:57:46"} +{"current_steps": 2803, "total_steps": 8674, "loss": 0.491131067276001, "lr": 1.6193933152170809e-06, "epoch": 0.6462992852201983, "percentage": 32.31, "elapsed_time": "1:53:29", "remaining_time": "3:57:43"} +{"current_steps": 2804, "total_steps": 8674, "loss": 0.47185173630714417, "lr": 1.6190939493098341e-06, "epoch": 0.6465298593497809, "percentage": 32.33, "elapsed_time": "1:53:32", "remaining_time": "3:57:41"} +{"current_steps": 2805, "total_steps": 8674, "loss": 0.4411182701587677, "lr": 1.6187944934110072e-06, "epoch": 0.6467604334793636, "percentage": 32.34, "elapsed_time": "1:53:34", "remaining_time": "3:57:38"} +{"current_steps": 2806, "total_steps": 8674, "loss": 0.47243285179138184, "lr": 1.6184949475641295e-06, "epoch": 0.6469910076089462, "percentage": 32.35, "elapsed_time": "1:53:37", "remaining_time": "3:57:36"} +{"current_steps": 2807, "total_steps": 8674, "loss": 0.4449295401573181, "lr": 1.6181953118127428e-06, "epoch": 0.647221581738529, "percentage": 32.36, "elapsed_time": "1:53:39", "remaining_time": "3:57:33"} +{"current_steps": 2808, "total_steps": 8674, "loss": 0.5148872137069702, "lr": 1.6178955862004024e-06, "epoch": 0.6474521558681116, "percentage": 32.37, "elapsed_time": "1:53:42", "remaining_time": "3:57:31"} +{"current_steps": 2809, "total_steps": 8674, "loss": 0.5017277598381042, "lr": 1.6175957707706762e-06, "epoch": 0.6476827299976943, "percentage": 32.38, "elapsed_time": "1:53:44", "remaining_time": "3:57:28"} +{"current_steps": 2810, "total_steps": 8674, "loss": 0.44220247864723206, "lr": 1.6172958655671458e-06, "epoch": 0.6479133041272769, "percentage": 32.4, "elapsed_time": "1:53:46", "remaining_time": "3:57:26"} +{"current_steps": 2811, "total_steps": 8674, "loss": 0.45421087741851807, "lr": 1.6169958706334053e-06, "epoch": 0.6481438782568596, "percentage": 32.41, "elapsed_time": "1:53:49", "remaining_time": "3:57:24"} +{"current_steps": 2812, "total_steps": 8674, "loss": 0.4772147536277771, "lr": 1.6166957860130618e-06, "epoch": 0.6483744523864422, "percentage": 32.42, "elapsed_time": "1:53:51", "remaining_time": "3:57:22"} +{"current_steps": 2813, "total_steps": 8674, "loss": 0.5319628715515137, "lr": 1.6163956117497357e-06, "epoch": 0.6486050265160249, "percentage": 32.43, "elapsed_time": "1:53:54", "remaining_time": "3:57:19"} +{"current_steps": 2814, "total_steps": 8674, "loss": 0.5109438896179199, "lr": 1.6160953478870608e-06, "epoch": 0.6488356006456075, "percentage": 32.44, "elapsed_time": "1:53:56", "remaining_time": "3:57:17"} +{"current_steps": 2815, "total_steps": 8674, "loss": 0.4417513608932495, "lr": 1.6157949944686827e-06, "epoch": 0.6490661747751902, "percentage": 32.45, "elapsed_time": "1:53:59", "remaining_time": "3:57:14"} +{"current_steps": 2816, "total_steps": 8674, "loss": 0.5013085007667542, "lr": 1.6154945515382616e-06, "epoch": 0.6492967489047728, "percentage": 32.46, "elapsed_time": "1:54:01", "remaining_time": "3:57:12"} +{"current_steps": 2817, "total_steps": 8674, "loss": 0.5197368860244751, "lr": 1.6151940191394693e-06, "epoch": 0.6495273230343556, "percentage": 32.48, "elapsed_time": "1:54:03", "remaining_time": "3:57:09"} +{"current_steps": 2818, "total_steps": 8674, "loss": 0.46540898084640503, "lr": 1.6148933973159914e-06, "epoch": 0.6497578971639382, "percentage": 32.49, "elapsed_time": "1:54:06", "remaining_time": "3:57:07"} +{"current_steps": 2819, "total_steps": 8674, "loss": 0.4867633581161499, "lr": 1.6145926861115268e-06, "epoch": 0.6499884712935209, "percentage": 32.5, "elapsed_time": "1:54:08", "remaining_time": "3:57:04"} +{"current_steps": 2820, "total_steps": 8674, "loss": 0.426607221364975, "lr": 1.6142918855697864e-06, "epoch": 0.6502190454231035, "percentage": 32.51, "elapsed_time": "1:54:11", "remaining_time": "3:57:02"} +{"current_steps": 2821, "total_steps": 8674, "loss": 0.5183024406433105, "lr": 1.613990995734495e-06, "epoch": 0.6504496195526862, "percentage": 32.52, "elapsed_time": "1:54:13", "remaining_time": "3:57:00"} +{"current_steps": 2822, "total_steps": 8674, "loss": 0.48635101318359375, "lr": 1.6136900166493893e-06, "epoch": 0.6506801936822688, "percentage": 32.53, "elapsed_time": "1:54:16", "remaining_time": "3:56:57"} +{"current_steps": 2823, "total_steps": 8674, "loss": 0.47468632459640503, "lr": 1.6133889483582204e-06, "epoch": 0.6509107678118515, "percentage": 32.55, "elapsed_time": "1:54:18", "remaining_time": "3:56:55"} +{"current_steps": 2824, "total_steps": 8674, "loss": 0.4665389358997345, "lr": 1.6130877909047515e-06, "epoch": 0.6511413419414341, "percentage": 32.56, "elapsed_time": "1:54:20", "remaining_time": "3:56:52"} +{"current_steps": 2825, "total_steps": 8674, "loss": 0.5069966316223145, "lr": 1.6127865443327585e-06, "epoch": 0.6513719160710169, "percentage": 32.57, "elapsed_time": "1:54:23", "remaining_time": "3:56:50"} +{"current_steps": 2826, "total_steps": 8674, "loss": 0.47820740938186646, "lr": 1.612485208686031e-06, "epoch": 0.6516024902005995, "percentage": 32.58, "elapsed_time": "1:54:25", "remaining_time": "3:56:47"} +{"current_steps": 2827, "total_steps": 8674, "loss": 0.43017104268074036, "lr": 1.612183784008371e-06, "epoch": 0.6518330643301822, "percentage": 32.59, "elapsed_time": "1:54:28", "remaining_time": "3:56:45"} +{"current_steps": 2828, "total_steps": 8674, "loss": 0.45495298504829407, "lr": 1.6118822703435937e-06, "epoch": 0.6520636384597648, "percentage": 32.6, "elapsed_time": "1:54:31", "remaining_time": "3:56:43"} +{"current_steps": 2829, "total_steps": 8674, "loss": 0.4624331593513489, "lr": 1.6115806677355272e-06, "epoch": 0.6522942125893475, "percentage": 32.61, "elapsed_time": "1:54:33", "remaining_time": "3:56:41"} +{"current_steps": 2830, "total_steps": 8674, "loss": 0.39458876848220825, "lr": 1.6112789762280125e-06, "epoch": 0.6525247867189301, "percentage": 32.63, "elapsed_time": "1:54:35", "remaining_time": "3:56:38"} +{"current_steps": 2831, "total_steps": 8674, "loss": 0.45552846789360046, "lr": 1.6109771958649035e-06, "epoch": 0.6527553608485128, "percentage": 32.64, "elapsed_time": "1:54:38", "remaining_time": "3:56:36"} +{"current_steps": 2832, "total_steps": 8674, "loss": 0.4579755663871765, "lr": 1.6106753266900671e-06, "epoch": 0.6529859349780954, "percentage": 32.65, "elapsed_time": "1:54:40", "remaining_time": "3:56:33"} +{"current_steps": 2833, "total_steps": 8674, "loss": 0.5164625644683838, "lr": 1.6103733687473823e-06, "epoch": 0.6532165091076781, "percentage": 32.66, "elapsed_time": "1:54:43", "remaining_time": "3:56:31"} +{"current_steps": 2834, "total_steps": 8674, "loss": 0.43071237206459045, "lr": 1.6100713220807432e-06, "epoch": 0.6534470832372608, "percentage": 32.67, "elapsed_time": "1:54:45", "remaining_time": "3:56:28"} +{"current_steps": 2835, "total_steps": 8674, "loss": 0.5174099802970886, "lr": 1.6097691867340543e-06, "epoch": 0.6536776573668435, "percentage": 32.68, "elapsed_time": "1:54:47", "remaining_time": "3:56:26"} +{"current_steps": 2836, "total_steps": 8674, "loss": 0.5944932699203491, "lr": 1.609466962751234e-06, "epoch": 0.6539082314964261, "percentage": 32.7, "elapsed_time": "1:54:50", "remaining_time": "3:56:23"} +{"current_steps": 2837, "total_steps": 8674, "loss": 0.45203912258148193, "lr": 1.6091646501762145e-06, "epoch": 0.6541388056260088, "percentage": 32.71, "elapsed_time": "1:54:52", "remaining_time": "3:56:21"} +{"current_steps": 2838, "total_steps": 8674, "loss": 0.4197826683521271, "lr": 1.6088622490529386e-06, "epoch": 0.6543693797555914, "percentage": 32.72, "elapsed_time": "1:54:55", "remaining_time": "3:56:19"} +{"current_steps": 2839, "total_steps": 8674, "loss": 0.4806807339191437, "lr": 1.6085597594253649e-06, "epoch": 0.6545999538851741, "percentage": 32.73, "elapsed_time": "1:54:57", "remaining_time": "3:56:16"} +{"current_steps": 2840, "total_steps": 8674, "loss": 0.4618797302246094, "lr": 1.608257181337462e-06, "epoch": 0.6548305280147567, "percentage": 32.74, "elapsed_time": "1:55:00", "remaining_time": "3:56:14"} +{"current_steps": 2841, "total_steps": 8674, "loss": 0.4901892840862274, "lr": 1.6079545148332137e-06, "epoch": 0.6550611021443394, "percentage": 32.75, "elapsed_time": "1:55:02", "remaining_time": "3:56:11"} +{"current_steps": 2842, "total_steps": 8674, "loss": 0.44869139790534973, "lr": 1.607651759956615e-06, "epoch": 0.655291676273922, "percentage": 32.76, "elapsed_time": "1:55:04", "remaining_time": "3:56:09"} +{"current_steps": 2843, "total_steps": 8674, "loss": 0.41470903158187866, "lr": 1.6073489167516747e-06, "epoch": 0.6555222504035048, "percentage": 32.78, "elapsed_time": "1:55:07", "remaining_time": "3:56:06"} +{"current_steps": 2844, "total_steps": 8674, "loss": 0.5498615503311157, "lr": 1.6070459852624143e-06, "epoch": 0.6557528245330874, "percentage": 32.79, "elapsed_time": "1:55:09", "remaining_time": "3:56:04"} +{"current_steps": 2845, "total_steps": 8674, "loss": 0.5462392568588257, "lr": 1.6067429655328675e-06, "epoch": 0.6559833986626701, "percentage": 32.8, "elapsed_time": "1:55:12", "remaining_time": "3:56:02"} +{"current_steps": 2846, "total_steps": 8674, "loss": 0.3775100111961365, "lr": 1.6064398576070815e-06, "epoch": 0.6562139727922527, "percentage": 32.81, "elapsed_time": "1:55:14", "remaining_time": "3:55:59"} +{"current_steps": 2847, "total_steps": 8674, "loss": 0.4712100028991699, "lr": 1.6061366615291161e-06, "epoch": 0.6564445469218354, "percentage": 32.82, "elapsed_time": "1:55:17", "remaining_time": "3:55:57"} +{"current_steps": 2848, "total_steps": 8674, "loss": 0.5152161121368408, "lr": 1.6058333773430439e-06, "epoch": 0.656675121051418, "percentage": 32.83, "elapsed_time": "1:55:19", "remaining_time": "3:55:54"} +{"current_steps": 2849, "total_steps": 8674, "loss": 0.46678972244262695, "lr": 1.6055300050929502e-06, "epoch": 0.6569056951810007, "percentage": 32.85, "elapsed_time": "1:55:21", "remaining_time": "3:55:52"} +{"current_steps": 2850, "total_steps": 8674, "loss": 0.4622490108013153, "lr": 1.6052265448229338e-06, "epoch": 0.6571362693105833, "percentage": 32.86, "elapsed_time": "1:55:24", "remaining_time": "3:55:49"} +{"current_steps": 2851, "total_steps": 8674, "loss": 0.49909311532974243, "lr": 1.6049229965771052e-06, "epoch": 0.657366843440166, "percentage": 32.87, "elapsed_time": "1:55:26", "remaining_time": "3:55:47"} +{"current_steps": 2852, "total_steps": 8674, "loss": 0.4428306221961975, "lr": 1.6046193603995884e-06, "epoch": 0.6575974175697487, "percentage": 32.88, "elapsed_time": "1:55:29", "remaining_time": "3:55:44"} +{"current_steps": 2853, "total_steps": 8674, "loss": 0.5842458009719849, "lr": 1.6043156363345196e-06, "epoch": 0.6578279916993314, "percentage": 32.89, "elapsed_time": "1:55:31", "remaining_time": "3:55:42"} +{"current_steps": 2854, "total_steps": 8674, "loss": 0.47183722257614136, "lr": 1.604011824426049e-06, "epoch": 0.658058565828914, "percentage": 32.9, "elapsed_time": "1:55:34", "remaining_time": "3:55:40"} +{"current_steps": 2855, "total_steps": 8674, "loss": 0.44225364923477173, "lr": 1.6037079247183379e-06, "epoch": 0.6582891399584967, "percentage": 32.91, "elapsed_time": "1:55:36", "remaining_time": "3:55:37"} +{"current_steps": 2856, "total_steps": 8674, "loss": 0.4820272922515869, "lr": 1.6034039372555617e-06, "epoch": 0.6585197140880793, "percentage": 32.93, "elapsed_time": "1:55:38", "remaining_time": "3:55:35"} +{"current_steps": 2857, "total_steps": 8674, "loss": 0.48118168115615845, "lr": 1.6030998620819075e-06, "epoch": 0.658750288217662, "percentage": 32.94, "elapsed_time": "1:55:41", "remaining_time": "3:55:32"} +{"current_steps": 2858, "total_steps": 8674, "loss": 0.4386011064052582, "lr": 1.6027956992415764e-06, "epoch": 0.6589808623472446, "percentage": 32.95, "elapsed_time": "1:55:43", "remaining_time": "3:55:30"} +{"current_steps": 2859, "total_steps": 8674, "loss": 0.48740649223327637, "lr": 1.6024914487787814e-06, "epoch": 0.6592114364768273, "percentage": 32.96, "elapsed_time": "1:55:46", "remaining_time": "3:55:27"} +{"current_steps": 2860, "total_steps": 8674, "loss": 0.46782761812210083, "lr": 1.602187110737748e-06, "epoch": 0.65944201060641, "percentage": 32.97, "elapsed_time": "1:55:48", "remaining_time": "3:55:25"} +{"current_steps": 2861, "total_steps": 8674, "loss": 0.5086358189582825, "lr": 1.6018826851627155e-06, "epoch": 0.6596725847359927, "percentage": 32.98, "elapsed_time": "1:55:51", "remaining_time": "3:55:23"} +{"current_steps": 2862, "total_steps": 8674, "loss": 0.5631915330886841, "lr": 1.6015781720979344e-06, "epoch": 0.6599031588655753, "percentage": 33.0, "elapsed_time": "1:55:53", "remaining_time": "3:55:21"} +{"current_steps": 2863, "total_steps": 8674, "loss": 0.5134458541870117, "lr": 1.6012735715876693e-06, "epoch": 0.660133732995158, "percentage": 33.01, "elapsed_time": "1:55:56", "remaining_time": "3:55:18"} +{"current_steps": 2864, "total_steps": 8674, "loss": 0.4308784008026123, "lr": 1.6009688836761969e-06, "epoch": 0.6603643071247406, "percentage": 33.02, "elapsed_time": "1:55:58", "remaining_time": "3:55:16"} +{"current_steps": 2865, "total_steps": 8674, "loss": 0.5149765610694885, "lr": 1.6006641084078068e-06, "epoch": 0.6605948812543233, "percentage": 33.03, "elapsed_time": "1:56:00", "remaining_time": "3:55:13"} +{"current_steps": 2866, "total_steps": 8674, "loss": 0.521892786026001, "lr": 1.6003592458268005e-06, "epoch": 0.6608254553839059, "percentage": 33.04, "elapsed_time": "1:56:03", "remaining_time": "3:55:11"} +{"current_steps": 2867, "total_steps": 8674, "loss": 0.46611008048057556, "lr": 1.6000542959774937e-06, "epoch": 0.6610560295134886, "percentage": 33.05, "elapsed_time": "1:56:05", "remaining_time": "3:55:09"} +{"current_steps": 2868, "total_steps": 8674, "loss": 0.43080392479896545, "lr": 1.5997492589042135e-06, "epoch": 0.6612866036430712, "percentage": 33.06, "elapsed_time": "1:56:08", "remaining_time": "3:55:06"} +{"current_steps": 2869, "total_steps": 8674, "loss": 0.48026901483535767, "lr": 1.5994441346513003e-06, "epoch": 0.661517177772654, "percentage": 33.08, "elapsed_time": "1:56:10", "remaining_time": "3:55:04"} +{"current_steps": 2870, "total_steps": 8674, "loss": 0.48706555366516113, "lr": 1.5991389232631068e-06, "epoch": 0.6617477519022366, "percentage": 33.09, "elapsed_time": "1:56:13", "remaining_time": "3:55:02"} +{"current_steps": 2871, "total_steps": 8674, "loss": 0.5093512535095215, "lr": 1.598833624783999e-06, "epoch": 0.6619783260318193, "percentage": 33.1, "elapsed_time": "1:56:15", "remaining_time": "3:54:59"} +{"current_steps": 2872, "total_steps": 8674, "loss": 0.5197086930274963, "lr": 1.5985282392583542e-06, "epoch": 0.6622089001614019, "percentage": 33.11, "elapsed_time": "1:56:18", "remaining_time": "3:54:57"} +{"current_steps": 2873, "total_steps": 8674, "loss": 0.497372031211853, "lr": 1.5982227667305646e-06, "epoch": 0.6624394742909846, "percentage": 33.12, "elapsed_time": "1:56:20", "remaining_time": "3:54:54"} +{"current_steps": 2874, "total_steps": 8674, "loss": 0.4746604561805725, "lr": 1.597917207245033e-06, "epoch": 0.6626700484205672, "percentage": 33.13, "elapsed_time": "1:56:22", "remaining_time": "3:54:52"} +{"current_steps": 2875, "total_steps": 8674, "loss": 0.5531996488571167, "lr": 1.5976115608461755e-06, "epoch": 0.6629006225501499, "percentage": 33.15, "elapsed_time": "1:56:25", "remaining_time": "3:54:49"} +{"current_steps": 2876, "total_steps": 8674, "loss": 0.44950544834136963, "lr": 1.5973058275784208e-06, "epoch": 0.6631311966797325, "percentage": 33.16, "elapsed_time": "1:56:27", "remaining_time": "3:54:46"} +{"current_steps": 2877, "total_steps": 8674, "loss": 0.45596158504486084, "lr": 1.597000007486211e-06, "epoch": 0.6633617708093152, "percentage": 33.17, "elapsed_time": "1:56:29", "remaining_time": "3:54:44"} +{"current_steps": 2878, "total_steps": 8674, "loss": 0.5243046879768372, "lr": 1.596694100613999e-06, "epoch": 0.6635923449388978, "percentage": 33.18, "elapsed_time": "1:56:32", "remaining_time": "3:54:41"} +{"current_steps": 2879, "total_steps": 8674, "loss": 0.46450644731521606, "lr": 1.5963881070062528e-06, "epoch": 0.6638229190684806, "percentage": 33.19, "elapsed_time": "1:56:35", "remaining_time": "3:54:39"} +{"current_steps": 2880, "total_steps": 8674, "loss": 0.5565767288208008, "lr": 1.5960820267074509e-06, "epoch": 0.6640534931980632, "percentage": 33.2, "elapsed_time": "1:56:37", "remaining_time": "3:54:37"} +{"current_steps": 2881, "total_steps": 8674, "loss": 0.4351605176925659, "lr": 1.595775859762085e-06, "epoch": 0.6642840673276459, "percentage": 33.21, "elapsed_time": "1:56:39", "remaining_time": "3:54:34"} +{"current_steps": 2882, "total_steps": 8674, "loss": 0.5113346576690674, "lr": 1.5954696062146603e-06, "epoch": 0.6645146414572285, "percentage": 33.23, "elapsed_time": "1:56:42", "remaining_time": "3:54:32"} +{"current_steps": 2883, "total_steps": 8674, "loss": 0.5005035996437073, "lr": 1.5951632661096932e-06, "epoch": 0.6647452155868112, "percentage": 33.24, "elapsed_time": "1:56:44", "remaining_time": "3:54:29"} +{"current_steps": 2884, "total_steps": 8674, "loss": 0.4539811611175537, "lr": 1.5948568394917138e-06, "epoch": 0.6649757897163938, "percentage": 33.25, "elapsed_time": "1:56:47", "remaining_time": "3:54:27"} +{"current_steps": 2885, "total_steps": 8674, "loss": 0.4519865810871124, "lr": 1.5945503264052637e-06, "epoch": 0.6652063638459765, "percentage": 33.26, "elapsed_time": "1:56:49", "remaining_time": "3:54:25"} +{"current_steps": 2886, "total_steps": 8674, "loss": 0.5688626766204834, "lr": 1.5942437268948985e-06, "epoch": 0.6654369379755591, "percentage": 33.27, "elapsed_time": "1:56:51", "remaining_time": "3:54:22"} +{"current_steps": 2887, "total_steps": 8674, "loss": 0.5038400888442993, "lr": 1.5939370410051846e-06, "epoch": 0.6656675121051419, "percentage": 33.28, "elapsed_time": "1:56:54", "remaining_time": "3:54:20"} +{"current_steps": 2888, "total_steps": 8674, "loss": 0.6332568526268005, "lr": 1.5936302687807028e-06, "epoch": 0.6658980862347245, "percentage": 33.29, "elapsed_time": "1:56:56", "remaining_time": "3:54:18"} +{"current_steps": 2889, "total_steps": 8674, "loss": 0.4994644820690155, "lr": 1.593323410266045e-06, "epoch": 0.6661286603643072, "percentage": 33.31, "elapsed_time": "1:56:59", "remaining_time": "3:54:15"} +{"current_steps": 2890, "total_steps": 8674, "loss": 0.4952617883682251, "lr": 1.5930164655058165e-06, "epoch": 0.6663592344938898, "percentage": 33.32, "elapsed_time": "1:57:01", "remaining_time": "3:54:12"} +{"current_steps": 2891, "total_steps": 8674, "loss": 0.4188910722732544, "lr": 1.5927094345446345e-06, "epoch": 0.6665898086234725, "percentage": 33.33, "elapsed_time": "1:57:04", "remaining_time": "3:54:10"} +{"current_steps": 2892, "total_steps": 8674, "loss": 0.47160637378692627, "lr": 1.5924023174271295e-06, "epoch": 0.6668203827530551, "percentage": 33.34, "elapsed_time": "1:57:06", "remaining_time": "3:54:07"} +{"current_steps": 2893, "total_steps": 8674, "loss": 0.44884049892425537, "lr": 1.592095114197944e-06, "epoch": 0.6670509568826377, "percentage": 33.35, "elapsed_time": "1:57:08", "remaining_time": "3:54:05"} +{"current_steps": 2894, "total_steps": 8674, "loss": 0.4105216860771179, "lr": 1.5917878249017327e-06, "epoch": 0.6672815310122204, "percentage": 33.36, "elapsed_time": "1:57:11", "remaining_time": "3:54:02"} +{"current_steps": 2895, "total_steps": 8674, "loss": 0.5000967383384705, "lr": 1.5914804495831634e-06, "epoch": 0.667512105141803, "percentage": 33.38, "elapsed_time": "1:57:13", "remaining_time": "3:54:00"} +{"current_steps": 2896, "total_steps": 8674, "loss": 0.45515477657318115, "lr": 1.5911729882869163e-06, "epoch": 0.6677426792713858, "percentage": 33.39, "elapsed_time": "1:57:16", "remaining_time": "3:53:58"} +{"current_steps": 2897, "total_steps": 8674, "loss": 0.4492835998535156, "lr": 1.590865441057684e-06, "epoch": 0.6679732534009684, "percentage": 33.4, "elapsed_time": "1:57:18", "remaining_time": "3:53:55"} +{"current_steps": 2898, "total_steps": 8674, "loss": 0.553781270980835, "lr": 1.5905578079401716e-06, "epoch": 0.6682038275305511, "percentage": 33.41, "elapsed_time": "1:57:20", "remaining_time": "3:53:52"} +{"current_steps": 2899, "total_steps": 8674, "loss": 0.5085616111755371, "lr": 1.5902500889790967e-06, "epoch": 0.6684344016601337, "percentage": 33.42, "elapsed_time": "1:57:23", "remaining_time": "3:53:50"} +{"current_steps": 2900, "total_steps": 8674, "loss": 0.4651145935058594, "lr": 1.5899422842191891e-06, "epoch": 0.6686649757897164, "percentage": 33.43, "elapsed_time": "1:57:25", "remaining_time": "3:53:48"} +{"current_steps": 2901, "total_steps": 8674, "loss": 0.5503841638565063, "lr": 1.5896343937051921e-06, "epoch": 0.668895549919299, "percentage": 33.44, "elapsed_time": "1:57:29", "remaining_time": "3:53:48"} +{"current_steps": 2902, "total_steps": 8674, "loss": 0.48213839530944824, "lr": 1.5893264174818599e-06, "epoch": 0.6691261240488817, "percentage": 33.46, "elapsed_time": "1:57:31", "remaining_time": "3:53:45"} +{"current_steps": 2903, "total_steps": 8674, "loss": 0.4602949023246765, "lr": 1.5890183555939604e-06, "epoch": 0.6693566981784643, "percentage": 33.47, "elapsed_time": "1:57:34", "remaining_time": "3:53:43"} +{"current_steps": 2904, "total_steps": 8674, "loss": 0.43991196155548096, "lr": 1.5887102080862736e-06, "epoch": 0.669587272308047, "percentage": 33.48, "elapsed_time": "1:57:36", "remaining_time": "3:53:41"} +{"current_steps": 2905, "total_steps": 8674, "loss": 0.48186323046684265, "lr": 1.5884019750035914e-06, "epoch": 0.6698178464376296, "percentage": 33.49, "elapsed_time": "1:57:39", "remaining_time": "3:53:38"} +{"current_steps": 2906, "total_steps": 8674, "loss": 0.44907671213150024, "lr": 1.5880936563907189e-06, "epoch": 0.6700484205672124, "percentage": 33.5, "elapsed_time": "1:57:41", "remaining_time": "3:53:35"} +{"current_steps": 2907, "total_steps": 8674, "loss": 0.4475386142730713, "lr": 1.587785252292473e-06, "epoch": 0.670278994696795, "percentage": 33.51, "elapsed_time": "1:57:43", "remaining_time": "3:53:33"} +{"current_steps": 2908, "total_steps": 8674, "loss": 0.4504704475402832, "lr": 1.587476762753684e-06, "epoch": 0.6705095688263777, "percentage": 33.53, "elapsed_time": "1:57:46", "remaining_time": "3:53:30"} +{"current_steps": 2909, "total_steps": 8674, "loss": 0.5090106129646301, "lr": 1.5871681878191937e-06, "epoch": 0.6707401429559603, "percentage": 33.54, "elapsed_time": "1:57:48", "remaining_time": "3:53:28"} +{"current_steps": 2910, "total_steps": 8674, "loss": 0.46150895953178406, "lr": 1.5868595275338561e-06, "epoch": 0.670970717085543, "percentage": 33.55, "elapsed_time": "1:57:50", "remaining_time": "3:53:25"} +{"current_steps": 2911, "total_steps": 8674, "loss": 0.5499979257583618, "lr": 1.586550781942539e-06, "epoch": 0.6712012912151256, "percentage": 33.56, "elapsed_time": "1:57:53", "remaining_time": "3:53:23"} +{"current_steps": 2912, "total_steps": 8674, "loss": 0.46628689765930176, "lr": 1.5862419510901211e-06, "epoch": 0.6714318653447083, "percentage": 33.57, "elapsed_time": "1:57:55", "remaining_time": "3:53:21"} +{"current_steps": 2913, "total_steps": 8674, "loss": 0.4517399072647095, "lr": 1.5859330350214941e-06, "epoch": 0.6716624394742909, "percentage": 33.58, "elapsed_time": "1:57:58", "remaining_time": "3:53:18"} +{"current_steps": 2914, "total_steps": 8674, "loss": 0.4696923792362213, "lr": 1.5856240337815621e-06, "epoch": 0.6718930136038737, "percentage": 33.59, "elapsed_time": "1:58:00", "remaining_time": "3:53:16"} +{"current_steps": 2915, "total_steps": 8674, "loss": 0.41357535123825073, "lr": 1.585314947415242e-06, "epoch": 0.6721235877334563, "percentage": 33.61, "elapsed_time": "1:58:03", "remaining_time": "3:53:13"} +{"current_steps": 2916, "total_steps": 8674, "loss": 0.5223745107650757, "lr": 1.5850057759674621e-06, "epoch": 0.672354161863039, "percentage": 33.62, "elapsed_time": "1:58:05", "remaining_time": "3:53:11"} +{"current_steps": 2917, "total_steps": 8674, "loss": 0.48562729358673096, "lr": 1.584696519483164e-06, "epoch": 0.6725847359926216, "percentage": 33.63, "elapsed_time": "1:58:07", "remaining_time": "3:53:08"} +{"current_steps": 2918, "total_steps": 8674, "loss": 0.3675496280193329, "lr": 1.5843871780073009e-06, "epoch": 0.6728153101222043, "percentage": 33.64, "elapsed_time": "1:58:10", "remaining_time": "3:53:06"} +{"current_steps": 2919, "total_steps": 8674, "loss": 0.5782667994499207, "lr": 1.5840777515848389e-06, "epoch": 0.6730458842517869, "percentage": 33.65, "elapsed_time": "1:58:12", "remaining_time": "3:53:03"} +{"current_steps": 2920, "total_steps": 8674, "loss": 0.419716477394104, "lr": 1.583768240260756e-06, "epoch": 0.6732764583813696, "percentage": 33.66, "elapsed_time": "1:58:15", "remaining_time": "3:53:01"} +{"current_steps": 2921, "total_steps": 8674, "loss": 0.4004133939743042, "lr": 1.5834586440800434e-06, "epoch": 0.6735070325109522, "percentage": 33.68, "elapsed_time": "1:58:17", "remaining_time": "3:52:59"} +{"current_steps": 2922, "total_steps": 8674, "loss": 0.4917314350605011, "lr": 1.5831489630877037e-06, "epoch": 0.673737606640535, "percentage": 33.69, "elapsed_time": "1:58:20", "remaining_time": "3:52:56"} +{"current_steps": 2923, "total_steps": 8674, "loss": 0.5488141179084778, "lr": 1.5828391973287522e-06, "epoch": 0.6739681807701176, "percentage": 33.7, "elapsed_time": "1:58:22", "remaining_time": "3:52:53"} +{"current_steps": 2924, "total_steps": 8674, "loss": 0.5047071576118469, "lr": 1.5825293468482163e-06, "epoch": 0.6741987548997003, "percentage": 33.71, "elapsed_time": "1:58:24", "remaining_time": "3:52:51"} +{"current_steps": 2925, "total_steps": 8674, "loss": 0.4830411672592163, "lr": 1.5822194116911364e-06, "epoch": 0.6744293290292829, "percentage": 33.72, "elapsed_time": "1:58:27", "remaining_time": "3:52:48"} +{"current_steps": 2926, "total_steps": 8674, "loss": 0.47517114877700806, "lr": 1.5819093919025641e-06, "epoch": 0.6746599031588656, "percentage": 33.73, "elapsed_time": "1:58:29", "remaining_time": "3:52:46"} +{"current_steps": 2927, "total_steps": 8674, "loss": 0.5617963075637817, "lr": 1.5815992875275642e-06, "epoch": 0.6748904772884482, "percentage": 33.74, "elapsed_time": "1:58:31", "remaining_time": "3:52:43"} +{"current_steps": 2928, "total_steps": 8674, "loss": 0.4360186457633972, "lr": 1.5812890986112137e-06, "epoch": 0.6751210514180309, "percentage": 33.76, "elapsed_time": "1:58:34", "remaining_time": "3:52:41"} +{"current_steps": 2929, "total_steps": 8674, "loss": 0.49538636207580566, "lr": 1.5809788251986014e-06, "epoch": 0.6753516255476135, "percentage": 33.77, "elapsed_time": "1:58:37", "remaining_time": "3:52:39"} +{"current_steps": 2930, "total_steps": 8674, "loss": 0.538766622543335, "lr": 1.5806684673348288e-06, "epoch": 0.6755821996771962, "percentage": 33.78, "elapsed_time": "1:58:39", "remaining_time": "3:52:36"} +{"current_steps": 2931, "total_steps": 8674, "loss": 0.4113287329673767, "lr": 1.5803580250650094e-06, "epoch": 0.6758127738067788, "percentage": 33.79, "elapsed_time": "1:58:41", "remaining_time": "3:52:34"} +{"current_steps": 2932, "total_steps": 8674, "loss": 0.5298923254013062, "lr": 1.5800474984342698e-06, "epoch": 0.6760433479363616, "percentage": 33.8, "elapsed_time": "1:58:44", "remaining_time": "3:52:32"} +{"current_steps": 2933, "total_steps": 8674, "loss": 0.4891100227832794, "lr": 1.5797368874877472e-06, "epoch": 0.6762739220659442, "percentage": 33.81, "elapsed_time": "1:58:46", "remaining_time": "3:52:29"} +{"current_steps": 2934, "total_steps": 8674, "loss": 0.4412326216697693, "lr": 1.579426192270593e-06, "epoch": 0.6765044961955269, "percentage": 33.83, "elapsed_time": "1:58:49", "remaining_time": "3:52:27"} +{"current_steps": 2935, "total_steps": 8674, "loss": 0.5514793395996094, "lr": 1.5791154128279693e-06, "epoch": 0.6767350703251095, "percentage": 33.84, "elapsed_time": "1:58:51", "remaining_time": "3:52:24"} +{"current_steps": 2936, "total_steps": 8674, "loss": 0.44050243496894836, "lr": 1.578804549205051e-06, "epoch": 0.6769656444546922, "percentage": 33.85, "elapsed_time": "1:58:53", "remaining_time": "3:52:22"} +{"current_steps": 2937, "total_steps": 8674, "loss": 0.47503453493118286, "lr": 1.5784936014470256e-06, "epoch": 0.6771962185842748, "percentage": 33.86, "elapsed_time": "1:58:56", "remaining_time": "3:52:19"} +{"current_steps": 2938, "total_steps": 8674, "loss": 0.524544894695282, "lr": 1.5781825695990922e-06, "epoch": 0.6774267927138575, "percentage": 33.87, "elapsed_time": "1:58:58", "remaining_time": "3:52:17"} +{"current_steps": 2939, "total_steps": 8674, "loss": 0.4203689694404602, "lr": 1.5778714537064628e-06, "epoch": 0.6776573668434401, "percentage": 33.88, "elapsed_time": "1:59:01", "remaining_time": "3:52:14"} +{"current_steps": 2940, "total_steps": 8674, "loss": 0.4305247664451599, "lr": 1.577560253814361e-06, "epoch": 0.6778879409730229, "percentage": 33.89, "elapsed_time": "1:59:03", "remaining_time": "3:52:12"} +{"current_steps": 2941, "total_steps": 8674, "loss": 0.6129249930381775, "lr": 1.577248969968023e-06, "epoch": 0.6781185151026055, "percentage": 33.91, "elapsed_time": "1:59:05", "remaining_time": "3:52:09"} +{"current_steps": 2942, "total_steps": 8674, "loss": 0.44431981444358826, "lr": 1.5769376022126969e-06, "epoch": 0.6783490892321882, "percentage": 33.92, "elapsed_time": "1:59:08", "remaining_time": "3:52:07"} +{"current_steps": 2943, "total_steps": 8674, "loss": 0.4394958019256592, "lr": 1.576626150593643e-06, "epoch": 0.6785796633617708, "percentage": 33.93, "elapsed_time": "1:59:10", "remaining_time": "3:52:04"} +{"current_steps": 2944, "total_steps": 8674, "loss": 0.44481268525123596, "lr": 1.5763146151561345e-06, "epoch": 0.6788102374913535, "percentage": 33.94, "elapsed_time": "1:59:13", "remaining_time": "3:52:02"} +{"current_steps": 2945, "total_steps": 8674, "loss": 0.4251822829246521, "lr": 1.5760029959454556e-06, "epoch": 0.6790408116209361, "percentage": 33.95, "elapsed_time": "1:59:15", "remaining_time": "3:52:00"} +{"current_steps": 2946, "total_steps": 8674, "loss": 0.41041696071624756, "lr": 1.575691293006904e-06, "epoch": 0.6792713857505188, "percentage": 33.96, "elapsed_time": "1:59:18", "remaining_time": "3:51:58"} +{"current_steps": 2947, "total_steps": 8674, "loss": 0.5710239410400391, "lr": 1.5753795063857883e-06, "epoch": 0.6795019598801014, "percentage": 33.98, "elapsed_time": "1:59:20", "remaining_time": "3:51:55"} +{"current_steps": 2948, "total_steps": 8674, "loss": 0.48825210332870483, "lr": 1.57506763612743e-06, "epoch": 0.6797325340096841, "percentage": 33.99, "elapsed_time": "1:59:23", "remaining_time": "3:51:53"} +{"current_steps": 2949, "total_steps": 8674, "loss": 0.37077784538269043, "lr": 1.5747556822771628e-06, "epoch": 0.6799631081392667, "percentage": 34.0, "elapsed_time": "1:59:25", "remaining_time": "3:51:50"} +{"current_steps": 2950, "total_steps": 8674, "loss": 0.4618649482727051, "lr": 1.5744436448803322e-06, "epoch": 0.6801936822688495, "percentage": 34.01, "elapsed_time": "1:59:27", "remaining_time": "3:51:48"} +{"current_steps": 2951, "total_steps": 8674, "loss": 0.4415496289730072, "lr": 1.574131523982296e-06, "epoch": 0.6804242563984321, "percentage": 34.02, "elapsed_time": "1:59:30", "remaining_time": "3:51:45"} +{"current_steps": 2952, "total_steps": 8674, "loss": 0.440029501914978, "lr": 1.5738193196284239e-06, "epoch": 0.6806548305280148, "percentage": 34.03, "elapsed_time": "1:59:32", "remaining_time": "3:51:43"} +{"current_steps": 2953, "total_steps": 8674, "loss": 0.5149378776550293, "lr": 1.5735070318640986e-06, "epoch": 0.6808854046575974, "percentage": 34.04, "elapsed_time": "1:59:35", "remaining_time": "3:51:40"} +{"current_steps": 2954, "total_steps": 8674, "loss": 0.4838085174560547, "lr": 1.5731946607347136e-06, "epoch": 0.6811159787871801, "percentage": 34.06, "elapsed_time": "1:59:37", "remaining_time": "3:51:38"} +{"current_steps": 2955, "total_steps": 8674, "loss": 0.48472005128860474, "lr": 1.5728822062856757e-06, "epoch": 0.6813465529167627, "percentage": 34.07, "elapsed_time": "1:59:40", "remaining_time": "3:51:36"} +{"current_steps": 2956, "total_steps": 8674, "loss": 0.5154656767845154, "lr": 1.572569668562403e-06, "epoch": 0.6815771270463454, "percentage": 34.08, "elapsed_time": "1:59:42", "remaining_time": "3:51:33"} +{"current_steps": 2957, "total_steps": 8674, "loss": 0.4094988703727722, "lr": 1.5722570476103263e-06, "epoch": 0.681807701175928, "percentage": 34.09, "elapsed_time": "1:59:44", "remaining_time": "3:51:31"} +{"current_steps": 2958, "total_steps": 8674, "loss": 0.5125937461853027, "lr": 1.5719443434748877e-06, "epoch": 0.6820382753055108, "percentage": 34.1, "elapsed_time": "1:59:47", "remaining_time": "3:51:28"} +{"current_steps": 2959, "total_steps": 8674, "loss": 0.4807034730911255, "lr": 1.5716315562015428e-06, "epoch": 0.6822688494350934, "percentage": 34.11, "elapsed_time": "1:59:49", "remaining_time": "3:51:26"} +{"current_steps": 2960, "total_steps": 8674, "loss": 0.6126741170883179, "lr": 1.5713186858357577e-06, "epoch": 0.6824994235646761, "percentage": 34.12, "elapsed_time": "1:59:52", "remaining_time": "3:51:23"} +{"current_steps": 2961, "total_steps": 8674, "loss": 0.5450708866119385, "lr": 1.5710057324230113e-06, "epoch": 0.6827299976942587, "percentage": 34.14, "elapsed_time": "1:59:54", "remaining_time": "3:51:20"} +{"current_steps": 2962, "total_steps": 8674, "loss": 0.47740328311920166, "lr": 1.5706926960087948e-06, "epoch": 0.6829605718238414, "percentage": 34.15, "elapsed_time": "1:59:56", "remaining_time": "3:51:18"} +{"current_steps": 2963, "total_steps": 8674, "loss": 0.4731057584285736, "lr": 1.5703795766386112e-06, "epoch": 0.683191145953424, "percentage": 34.16, "elapsed_time": "1:59:59", "remaining_time": "3:51:16"} +{"current_steps": 2964, "total_steps": 8674, "loss": 0.49735045433044434, "lr": 1.5700663743579754e-06, "epoch": 0.6834217200830067, "percentage": 34.17, "elapsed_time": "2:00:01", "remaining_time": "3:51:13"} +{"current_steps": 2965, "total_steps": 8674, "loss": 0.5257318019866943, "lr": 1.569753089212415e-06, "epoch": 0.6836522942125893, "percentage": 34.18, "elapsed_time": "2:00:04", "remaining_time": "3:51:11"} +{"current_steps": 2966, "total_steps": 8674, "loss": 0.3947733938694, "lr": 1.5694397212474685e-06, "epoch": 0.683882868342172, "percentage": 34.19, "elapsed_time": "2:00:06", "remaining_time": "3:51:08"} +{"current_steps": 2967, "total_steps": 8674, "loss": 0.5078107714653015, "lr": 1.5691262705086875e-06, "epoch": 0.6841134424717547, "percentage": 34.21, "elapsed_time": "2:00:08", "remaining_time": "3:51:06"} +{"current_steps": 2968, "total_steps": 8674, "loss": 0.5921520590782166, "lr": 1.5688127370416351e-06, "epoch": 0.6843440166013374, "percentage": 34.22, "elapsed_time": "2:00:11", "remaining_time": "3:51:03"} +{"current_steps": 2969, "total_steps": 8674, "loss": 0.45995181798934937, "lr": 1.5684991208918866e-06, "epoch": 0.68457459073092, "percentage": 34.23, "elapsed_time": "2:00:13", "remaining_time": "3:51:01"} +{"current_steps": 2970, "total_steps": 8674, "loss": 0.4874386787414551, "lr": 1.5681854221050293e-06, "epoch": 0.6848051648605027, "percentage": 34.24, "elapsed_time": "2:00:16", "remaining_time": "3:50:59"} +{"current_steps": 2971, "total_steps": 8674, "loss": 0.4522739052772522, "lr": 1.5678716407266625e-06, "epoch": 0.6850357389900853, "percentage": 34.25, "elapsed_time": "2:00:18", "remaining_time": "3:50:56"} +{"current_steps": 2972, "total_steps": 8674, "loss": 0.4596391022205353, "lr": 1.5675577768023977e-06, "epoch": 0.685266313119668, "percentage": 34.26, "elapsed_time": "2:00:21", "remaining_time": "3:50:54"} +{"current_steps": 2973, "total_steps": 8674, "loss": 0.5391427278518677, "lr": 1.567243830377858e-06, "epoch": 0.6854968872492506, "percentage": 34.27, "elapsed_time": "2:00:23", "remaining_time": "3:50:51"} +{"current_steps": 2974, "total_steps": 8674, "loss": 0.5583066940307617, "lr": 1.5669298014986786e-06, "epoch": 0.6857274613788333, "percentage": 34.29, "elapsed_time": "2:00:25", "remaining_time": "3:50:49"} +{"current_steps": 2975, "total_steps": 8674, "loss": 0.5410330295562744, "lr": 1.566615690210507e-06, "epoch": 0.6859580355084159, "percentage": 34.3, "elapsed_time": "2:00:28", "remaining_time": "3:50:46"} +{"current_steps": 2976, "total_steps": 8674, "loss": 0.5145233273506165, "lr": 1.566301496559002e-06, "epoch": 0.6861886096379987, "percentage": 34.31, "elapsed_time": "2:00:30", "remaining_time": "3:50:44"} +{"current_steps": 2977, "total_steps": 8674, "loss": 0.5021970272064209, "lr": 1.5659872205898356e-06, "epoch": 0.6864191837675813, "percentage": 34.32, "elapsed_time": "2:00:33", "remaining_time": "3:50:41"} +{"current_steps": 2978, "total_steps": 8674, "loss": 0.48251593112945557, "lr": 1.5656728623486903e-06, "epoch": 0.686649757897164, "percentage": 34.33, "elapsed_time": "2:00:35", "remaining_time": "3:50:39"} +{"current_steps": 2979, "total_steps": 8674, "loss": 0.4228450655937195, "lr": 1.5653584218812617e-06, "epoch": 0.6868803320267466, "percentage": 34.34, "elapsed_time": "2:00:38", "remaining_time": "3:50:37"} +{"current_steps": 2980, "total_steps": 8674, "loss": 0.3975197374820709, "lr": 1.5650438992332567e-06, "epoch": 0.6871109061563293, "percentage": 34.36, "elapsed_time": "2:00:40", "remaining_time": "3:50:34"} +{"current_steps": 2981, "total_steps": 8674, "loss": 0.5441234707832336, "lr": 1.5647292944503945e-06, "epoch": 0.6873414802859119, "percentage": 34.37, "elapsed_time": "2:00:42", "remaining_time": "3:50:32"} +{"current_steps": 2982, "total_steps": 8674, "loss": 0.5357148051261902, "lr": 1.5644146075784057e-06, "epoch": 0.6875720544154946, "percentage": 34.38, "elapsed_time": "2:00:45", "remaining_time": "3:50:29"} +{"current_steps": 2983, "total_steps": 8674, "loss": 0.530154824256897, "lr": 1.5640998386630337e-06, "epoch": 0.6878026285450772, "percentage": 34.39, "elapsed_time": "2:00:47", "remaining_time": "3:50:27"} +{"current_steps": 2984, "total_steps": 8674, "loss": 0.480657696723938, "lr": 1.563784987750033e-06, "epoch": 0.68803320267466, "percentage": 34.4, "elapsed_time": "2:00:50", "remaining_time": "3:50:24"} +{"current_steps": 2985, "total_steps": 8674, "loss": 0.4822859764099121, "lr": 1.5634700548851712e-06, "epoch": 0.6882637768042426, "percentage": 34.41, "elapsed_time": "2:00:52", "remaining_time": "3:50:22"} +{"current_steps": 2986, "total_steps": 8674, "loss": 0.48551490902900696, "lr": 1.5631550401142257e-06, "epoch": 0.6884943509338253, "percentage": 34.42, "elapsed_time": "2:00:54", "remaining_time": "3:50:19"} +{"current_steps": 2987, "total_steps": 8674, "loss": 0.43080294132232666, "lr": 1.562839943482988e-06, "epoch": 0.6887249250634079, "percentage": 34.44, "elapsed_time": "2:00:57", "remaining_time": "3:50:17"} +{"current_steps": 2988, "total_steps": 8674, "loss": 0.42780637741088867, "lr": 1.56252476503726e-06, "epoch": 0.6889554991929906, "percentage": 34.45, "elapsed_time": "2:00:59", "remaining_time": "3:50:14"} +{"current_steps": 2989, "total_steps": 8674, "loss": 0.539027214050293, "lr": 1.5622095048228565e-06, "epoch": 0.6891860733225732, "percentage": 34.46, "elapsed_time": "2:01:02", "remaining_time": "3:50:12"} +{"current_steps": 2990, "total_steps": 8674, "loss": 0.4529460668563843, "lr": 1.5618941628856037e-06, "epoch": 0.6894166474521559, "percentage": 34.47, "elapsed_time": "2:01:04", "remaining_time": "3:50:09"} +{"current_steps": 2991, "total_steps": 8674, "loss": 0.49724727869033813, "lr": 1.5615787392713395e-06, "epoch": 0.6896472215817385, "percentage": 34.48, "elapsed_time": "2:01:06", "remaining_time": "3:50:07"} +{"current_steps": 2992, "total_steps": 8674, "loss": 0.4711928963661194, "lr": 1.5612632340259144e-06, "epoch": 0.6898777957113212, "percentage": 34.49, "elapsed_time": "2:01:09", "remaining_time": "3:50:04"} +{"current_steps": 2993, "total_steps": 8674, "loss": 0.42258220911026, "lr": 1.56094764719519e-06, "epoch": 0.6901083698409038, "percentage": 34.51, "elapsed_time": "2:01:11", "remaining_time": "3:50:02"} +{"current_steps": 2994, "total_steps": 8674, "loss": 0.47754064202308655, "lr": 1.5606319788250398e-06, "epoch": 0.6903389439704866, "percentage": 34.52, "elapsed_time": "2:01:14", "remaining_time": "3:49:59"} +{"current_steps": 2995, "total_steps": 8674, "loss": 0.47200560569763184, "lr": 1.5603162289613501e-06, "epoch": 0.6905695181000692, "percentage": 34.53, "elapsed_time": "2:01:16", "remaining_time": "3:49:57"} +{"current_steps": 2996, "total_steps": 8674, "loss": 0.5194537043571472, "lr": 1.5600003976500173e-06, "epoch": 0.6908000922296519, "percentage": 34.54, "elapsed_time": "2:01:19", "remaining_time": "3:49:55"} +{"current_steps": 2997, "total_steps": 8674, "loss": 0.4874703586101532, "lr": 1.5596844849369518e-06, "epoch": 0.6910306663592345, "percentage": 34.55, "elapsed_time": "2:01:21", "remaining_time": "3:49:52"} +{"current_steps": 2998, "total_steps": 8674, "loss": 0.5028672218322754, "lr": 1.5593684908680738e-06, "epoch": 0.6912612404888172, "percentage": 34.56, "elapsed_time": "2:01:23", "remaining_time": "3:49:50"} +{"current_steps": 2999, "total_steps": 8674, "loss": 0.44250521063804626, "lr": 1.5590524154893169e-06, "epoch": 0.6914918146183998, "percentage": 34.57, "elapsed_time": "2:01:26", "remaining_time": "3:49:47"} +{"current_steps": 3000, "total_steps": 8674, "loss": 0.536510705947876, "lr": 1.5587362588466253e-06, "epoch": 0.6917223887479825, "percentage": 34.59, "elapsed_time": "2:01:28", "remaining_time": "3:49:45"} +{"current_steps": 3001, "total_steps": 8674, "loss": 0.4514959752559662, "lr": 1.5584200209859558e-06, "epoch": 0.6919529628775651, "percentage": 34.6, "elapsed_time": "2:01:32", "remaining_time": "3:49:45"} +{"current_steps": 3002, "total_steps": 8674, "loss": 0.4402197301387787, "lr": 1.5581037019532773e-06, "epoch": 0.6921835370071479, "percentage": 34.61, "elapsed_time": "2:01:34", "remaining_time": "3:49:42"} +{"current_steps": 3003, "total_steps": 8674, "loss": 0.508256196975708, "lr": 1.5577873017945691e-06, "epoch": 0.6924141111367305, "percentage": 34.62, "elapsed_time": "2:01:37", "remaining_time": "3:49:40"} +{"current_steps": 3004, "total_steps": 8674, "loss": 0.5123175978660583, "lr": 1.5574708205558236e-06, "epoch": 0.6926446852663131, "percentage": 34.63, "elapsed_time": "2:01:39", "remaining_time": "3:49:38"} +{"current_steps": 3005, "total_steps": 8674, "loss": 0.4874982237815857, "lr": 1.5571542582830447e-06, "epoch": 0.6928752593958958, "percentage": 34.64, "elapsed_time": "2:01:42", "remaining_time": "3:49:35"} +{"current_steps": 3006, "total_steps": 8674, "loss": 0.44554391503334045, "lr": 1.556837615022248e-06, "epoch": 0.6931058335254784, "percentage": 34.66, "elapsed_time": "2:01:44", "remaining_time": "3:49:33"} +{"current_steps": 3007, "total_steps": 8674, "loss": 0.5899895429611206, "lr": 1.5565208908194603e-06, "epoch": 0.6933364076550611, "percentage": 34.67, "elapsed_time": "2:01:47", "remaining_time": "3:49:30"} +{"current_steps": 3008, "total_steps": 8674, "loss": 0.5137951374053955, "lr": 1.5562040857207208e-06, "epoch": 0.6935669817846437, "percentage": 34.68, "elapsed_time": "2:01:49", "remaining_time": "3:49:28"} +{"current_steps": 3009, "total_steps": 8674, "loss": 0.5435892343521118, "lr": 1.5558871997720805e-06, "epoch": 0.6937975559142264, "percentage": 34.69, "elapsed_time": "2:01:51", "remaining_time": "3:49:25"} +{"current_steps": 3010, "total_steps": 8674, "loss": 0.45998525619506836, "lr": 1.5555702330196021e-06, "epoch": 0.694028130043809, "percentage": 34.7, "elapsed_time": "2:01:54", "remaining_time": "3:49:23"} +{"current_steps": 3011, "total_steps": 8674, "loss": 0.4676332473754883, "lr": 1.5552531855093597e-06, "epoch": 0.6942587041733917, "percentage": 34.71, "elapsed_time": "2:01:56", "remaining_time": "3:49:20"} +{"current_steps": 3012, "total_steps": 8674, "loss": 0.48250633478164673, "lr": 1.5549360572874397e-06, "epoch": 0.6944892783029744, "percentage": 34.72, "elapsed_time": "2:01:59", "remaining_time": "3:49:18"} +{"current_steps": 3013, "total_steps": 8674, "loss": 0.4841402769088745, "lr": 1.5546188483999396e-06, "epoch": 0.6947198524325571, "percentage": 34.74, "elapsed_time": "2:02:01", "remaining_time": "3:49:16"} +{"current_steps": 3014, "total_steps": 8674, "loss": 0.4717336893081665, "lr": 1.5543015588929688e-06, "epoch": 0.6949504265621397, "percentage": 34.75, "elapsed_time": "2:02:03", "remaining_time": "3:49:13"} +{"current_steps": 3015, "total_steps": 8674, "loss": 0.48844897747039795, "lr": 1.5539841888126488e-06, "epoch": 0.6951810006917224, "percentage": 34.76, "elapsed_time": "2:02:06", "remaining_time": "3:49:11"} +{"current_steps": 3016, "total_steps": 8674, "loss": 0.5244781970977783, "lr": 1.5536667382051127e-06, "epoch": 0.695411574821305, "percentage": 34.77, "elapsed_time": "2:02:08", "remaining_time": "3:49:08"} +{"current_steps": 3017, "total_steps": 8674, "loss": 0.4612278938293457, "lr": 1.5533492071165046e-06, "epoch": 0.6956421489508877, "percentage": 34.78, "elapsed_time": "2:02:10", "remaining_time": "3:49:05"} +{"current_steps": 3018, "total_steps": 8674, "loss": 0.40461257100105286, "lr": 1.5530315955929817e-06, "epoch": 0.6958727230804703, "percentage": 34.79, "elapsed_time": "2:02:13", "remaining_time": "3:49:03"} +{"current_steps": 3019, "total_steps": 8674, "loss": 0.5191174745559692, "lr": 1.5527139036807112e-06, "epoch": 0.696103297210053, "percentage": 34.81, "elapsed_time": "2:02:15", "remaining_time": "3:49:00"} +{"current_steps": 3020, "total_steps": 8674, "loss": 0.45882558822631836, "lr": 1.5523961314258731e-06, "epoch": 0.6963338713396356, "percentage": 34.82, "elapsed_time": "2:02:18", "remaining_time": "3:48:58"} +{"current_steps": 3021, "total_steps": 8674, "loss": 0.4766819477081299, "lr": 1.552078278874659e-06, "epoch": 0.6965644454692184, "percentage": 34.83, "elapsed_time": "2:02:20", "remaining_time": "3:48:56"} +{"current_steps": 3022, "total_steps": 8674, "loss": 0.4572867751121521, "lr": 1.5517603460732724e-06, "epoch": 0.696795019598801, "percentage": 34.84, "elapsed_time": "2:02:23", "remaining_time": "3:48:53"} +{"current_steps": 3023, "total_steps": 8674, "loss": 0.4689183235168457, "lr": 1.5514423330679272e-06, "epoch": 0.6970255937283837, "percentage": 34.85, "elapsed_time": "2:02:25", "remaining_time": "3:48:51"} +{"current_steps": 3024, "total_steps": 8674, "loss": 0.45769914984703064, "lr": 1.5511242399048504e-06, "epoch": 0.6972561678579663, "percentage": 34.86, "elapsed_time": "2:02:27", "remaining_time": "3:48:48"} +{"current_steps": 3025, "total_steps": 8674, "loss": 0.47367236018180847, "lr": 1.5508060666302796e-06, "epoch": 0.697486741987549, "percentage": 34.87, "elapsed_time": "2:02:30", "remaining_time": "3:48:46"} +{"current_steps": 3026, "total_steps": 8674, "loss": 0.40873080492019653, "lr": 1.550487813290465e-06, "epoch": 0.6977173161171316, "percentage": 34.89, "elapsed_time": "2:02:32", "remaining_time": "3:48:43"} +{"current_steps": 3027, "total_steps": 8674, "loss": 0.42366844415664673, "lr": 1.5501694799316671e-06, "epoch": 0.6979478902467143, "percentage": 34.9, "elapsed_time": "2:02:34", "remaining_time": "3:48:41"} +{"current_steps": 3028, "total_steps": 8674, "loss": 0.3133828043937683, "lr": 1.5498510666001602e-06, "epoch": 0.6981784643762969, "percentage": 34.91, "elapsed_time": "2:02:37", "remaining_time": "3:48:38"} +{"current_steps": 3029, "total_steps": 8674, "loss": 0.5188712477684021, "lr": 1.549532573342228e-06, "epoch": 0.6984090385058797, "percentage": 34.92, "elapsed_time": "2:02:39", "remaining_time": "3:48:36"} +{"current_steps": 3030, "total_steps": 8674, "loss": 0.4374960660934448, "lr": 1.5492140002041668e-06, "epoch": 0.6986396126354623, "percentage": 34.93, "elapsed_time": "2:02:42", "remaining_time": "3:48:33"} +{"current_steps": 3031, "total_steps": 8674, "loss": 0.5285592079162598, "lr": 1.5488953472322845e-06, "epoch": 0.698870186765045, "percentage": 34.94, "elapsed_time": "2:02:44", "remaining_time": "3:48:31"} +{"current_steps": 3032, "total_steps": 8674, "loss": 0.5331767797470093, "lr": 1.5485766144729006e-06, "epoch": 0.6991007608946276, "percentage": 34.96, "elapsed_time": "2:02:47", "remaining_time": "3:48:28"} +{"current_steps": 3033, "total_steps": 8674, "loss": 0.4546147584915161, "lr": 1.5482578019723462e-06, "epoch": 0.6993313350242103, "percentage": 34.97, "elapsed_time": "2:02:49", "remaining_time": "3:48:26"} +{"current_steps": 3034, "total_steps": 8674, "loss": 0.47674182057380676, "lr": 1.5479389097769639e-06, "epoch": 0.6995619091537929, "percentage": 34.98, "elapsed_time": "2:02:51", "remaining_time": "3:48:23"} +{"current_steps": 3035, "total_steps": 8674, "loss": 0.496138334274292, "lr": 1.5476199379331078e-06, "epoch": 0.6997924832833756, "percentage": 34.99, "elapsed_time": "2:02:54", "remaining_time": "3:48:21"} +{"current_steps": 3036, "total_steps": 8674, "loss": 0.4843756854534149, "lr": 1.547300886487144e-06, "epoch": 0.7000230574129582, "percentage": 35.0, "elapsed_time": "2:02:56", "remaining_time": "3:48:18"} +{"current_steps": 3037, "total_steps": 8674, "loss": 0.6028264760971069, "lr": 1.5469817554854494e-06, "epoch": 0.7002536315425409, "percentage": 35.01, "elapsed_time": "2:02:59", "remaining_time": "3:48:16"} +{"current_steps": 3038, "total_steps": 8674, "loss": 0.49528858065605164, "lr": 1.5466625449744134e-06, "epoch": 0.7004842056721235, "percentage": 35.02, "elapsed_time": "2:03:01", "remaining_time": "3:48:14"} +{"current_steps": 3039, "total_steps": 8674, "loss": 0.466439425945282, "lr": 1.5463432550004358e-06, "epoch": 0.7007147798017063, "percentage": 35.04, "elapsed_time": "2:03:03", "remaining_time": "3:48:11"} +{"current_steps": 3040, "total_steps": 8674, "loss": 0.4196532368659973, "lr": 1.5460238856099292e-06, "epoch": 0.7009453539312889, "percentage": 35.05, "elapsed_time": "2:03:06", "remaining_time": "3:48:09"} +{"current_steps": 3041, "total_steps": 8674, "loss": 0.47679999470710754, "lr": 1.5457044368493173e-06, "epoch": 0.7011759280608716, "percentage": 35.06, "elapsed_time": "2:03:08", "remaining_time": "3:48:06"} +{"current_steps": 3042, "total_steps": 8674, "loss": 0.4368046522140503, "lr": 1.5453849087650346e-06, "epoch": 0.7014065021904542, "percentage": 35.07, "elapsed_time": "2:03:11", "remaining_time": "3:48:04"} +{"current_steps": 3043, "total_steps": 8674, "loss": 0.45165273547172546, "lr": 1.5450653014035285e-06, "epoch": 0.7016370763200369, "percentage": 35.08, "elapsed_time": "2:03:13", "remaining_time": "3:48:01"} +{"current_steps": 3044, "total_steps": 8674, "loss": 0.44813454151153564, "lr": 1.5447456148112563e-06, "epoch": 0.7018676504496195, "percentage": 35.09, "elapsed_time": "2:03:15", "remaining_time": "3:47:59"} +{"current_steps": 3045, "total_steps": 8674, "loss": 0.44681504368782043, "lr": 1.5444258490346882e-06, "epoch": 0.7020982245792022, "percentage": 35.1, "elapsed_time": "2:03:18", "remaining_time": "3:47:56"} +{"current_steps": 3046, "total_steps": 8674, "loss": 0.44788169860839844, "lr": 1.5441060041203057e-06, "epoch": 0.7023287987087848, "percentage": 35.12, "elapsed_time": "2:03:21", "remaining_time": "3:47:54"} +{"current_steps": 3047, "total_steps": 8674, "loss": 0.3754178285598755, "lr": 1.5437860801146013e-06, "epoch": 0.7025593728383676, "percentage": 35.13, "elapsed_time": "2:03:23", "remaining_time": "3:47:52"} +{"current_steps": 3048, "total_steps": 8674, "loss": 0.3582305908203125, "lr": 1.5434660770640787e-06, "epoch": 0.7027899469679502, "percentage": 35.14, "elapsed_time": "2:03:25", "remaining_time": "3:47:49"} +{"current_steps": 3049, "total_steps": 8674, "loss": 0.42649000883102417, "lr": 1.543145995015254e-06, "epoch": 0.7030205210975329, "percentage": 35.15, "elapsed_time": "2:03:28", "remaining_time": "3:47:47"} +{"current_steps": 3050, "total_steps": 8674, "loss": 0.5164098143577576, "lr": 1.5428258340146543e-06, "epoch": 0.7032510952271155, "percentage": 35.16, "elapsed_time": "2:03:30", "remaining_time": "3:47:44"} +{"current_steps": 3051, "total_steps": 8674, "loss": 0.4193584620952606, "lr": 1.5425055941088181e-06, "epoch": 0.7034816693566982, "percentage": 35.17, "elapsed_time": "2:03:32", "remaining_time": "3:47:42"} +{"current_steps": 3052, "total_steps": 8674, "loss": 0.5230807662010193, "lr": 1.5421852753442957e-06, "epoch": 0.7037122434862808, "percentage": 35.19, "elapsed_time": "2:03:35", "remaining_time": "3:47:39"} +{"current_steps": 3053, "total_steps": 8674, "loss": 0.4573478102684021, "lr": 1.5418648777676488e-06, "epoch": 0.7039428176158635, "percentage": 35.2, "elapsed_time": "2:03:37", "remaining_time": "3:47:37"} +{"current_steps": 3054, "total_steps": 8674, "loss": 0.47031426429748535, "lr": 1.5415444014254503e-06, "epoch": 0.7041733917454461, "percentage": 35.21, "elapsed_time": "2:03:40", "remaining_time": "3:47:35"} +{"current_steps": 3055, "total_steps": 8674, "loss": 0.4499198794364929, "lr": 1.5412238463642844e-06, "epoch": 0.7044039658750288, "percentage": 35.22, "elapsed_time": "2:03:42", "remaining_time": "3:47:32"} +{"current_steps": 3056, "total_steps": 8674, "loss": 0.4775800406932831, "lr": 1.5409032126307477e-06, "epoch": 0.7046345400046115, "percentage": 35.23, "elapsed_time": "2:03:45", "remaining_time": "3:47:30"} +{"current_steps": 3057, "total_steps": 8674, "loss": 0.535969614982605, "lr": 1.540582500271447e-06, "epoch": 0.7048651141341942, "percentage": 35.24, "elapsed_time": "2:03:47", "remaining_time": "3:47:27"} +{"current_steps": 3058, "total_steps": 8674, "loss": 0.5358741283416748, "lr": 1.5402617093330013e-06, "epoch": 0.7050956882637768, "percentage": 35.25, "elapsed_time": "2:03:49", "remaining_time": "3:47:25"} +{"current_steps": 3059, "total_steps": 8674, "loss": 0.5392765998840332, "lr": 1.5399408398620406e-06, "epoch": 0.7053262623933595, "percentage": 35.27, "elapsed_time": "2:03:52", "remaining_time": "3:47:22"} +{"current_steps": 3060, "total_steps": 8674, "loss": 0.47976016998291016, "lr": 1.5396198919052066e-06, "epoch": 0.7055568365229421, "percentage": 35.28, "elapsed_time": "2:03:54", "remaining_time": "3:47:20"} +{"current_steps": 3061, "total_steps": 8674, "loss": 0.39919328689575195, "lr": 1.5392988655091526e-06, "epoch": 0.7057874106525248, "percentage": 35.29, "elapsed_time": "2:03:57", "remaining_time": "3:47:17"} +{"current_steps": 3062, "total_steps": 8674, "loss": 0.4503553509712219, "lr": 1.538977760720543e-06, "epoch": 0.7060179847821074, "percentage": 35.3, "elapsed_time": "2:03:59", "remaining_time": "3:47:15"} +{"current_steps": 3063, "total_steps": 8674, "loss": 0.4570388197898865, "lr": 1.5386565775860531e-06, "epoch": 0.7062485589116901, "percentage": 35.31, "elapsed_time": "2:04:02", "remaining_time": "3:47:13"} +{"current_steps": 3064, "total_steps": 8674, "loss": 0.54588782787323, "lr": 1.5383353161523706e-06, "epoch": 0.7064791330412727, "percentage": 35.32, "elapsed_time": "2:04:04", "remaining_time": "3:47:10"} +{"current_steps": 3065, "total_steps": 8674, "loss": 0.40369170904159546, "lr": 1.5380139764661945e-06, "epoch": 0.7067097071708555, "percentage": 35.34, "elapsed_time": "2:04:07", "remaining_time": "3:47:08"} +{"current_steps": 3066, "total_steps": 8674, "loss": 0.5079206228256226, "lr": 1.5376925585742341e-06, "epoch": 0.7069402813004381, "percentage": 35.35, "elapsed_time": "2:04:09", "remaining_time": "3:47:05"} +{"current_steps": 3067, "total_steps": 8674, "loss": 0.41418159008026123, "lr": 1.5373710625232107e-06, "epoch": 0.7071708554300208, "percentage": 35.36, "elapsed_time": "2:04:11", "remaining_time": "3:47:03"} +{"current_steps": 3068, "total_steps": 8674, "loss": 0.4546199142932892, "lr": 1.5370494883598575e-06, "epoch": 0.7074014295596034, "percentage": 35.37, "elapsed_time": "2:04:14", "remaining_time": "3:47:00"} +{"current_steps": 3069, "total_steps": 8674, "loss": 0.48041367530822754, "lr": 1.5367278361309183e-06, "epoch": 0.7076320036891861, "percentage": 35.38, "elapsed_time": "2:04:16", "remaining_time": "3:46:58"} +{"current_steps": 3070, "total_steps": 8674, "loss": 0.47676384449005127, "lr": 1.5364061058831486e-06, "epoch": 0.7078625778187687, "percentage": 35.39, "elapsed_time": "2:04:19", "remaining_time": "3:46:55"} +{"current_steps": 3071, "total_steps": 8674, "loss": 0.47341692447662354, "lr": 1.5360842976633148e-06, "epoch": 0.7080931519483514, "percentage": 35.4, "elapsed_time": "2:04:21", "remaining_time": "3:46:53"} +{"current_steps": 3072, "total_steps": 8674, "loss": 0.38436269760131836, "lr": 1.5357624115181956e-06, "epoch": 0.708323726077934, "percentage": 35.42, "elapsed_time": "2:04:24", "remaining_time": "3:46:51"} +{"current_steps": 3073, "total_steps": 8674, "loss": 0.5369806289672852, "lr": 1.5354404474945798e-06, "epoch": 0.7085543002075168, "percentage": 35.43, "elapsed_time": "2:04:26", "remaining_time": "3:46:48"} +{"current_steps": 3074, "total_steps": 8674, "loss": 0.5314677953720093, "lr": 1.535118405639269e-06, "epoch": 0.7087848743370994, "percentage": 35.44, "elapsed_time": "2:04:28", "remaining_time": "3:46:46"} +{"current_steps": 3075, "total_steps": 8674, "loss": 0.49233007431030273, "lr": 1.5347962859990742e-06, "epoch": 0.7090154484666821, "percentage": 35.45, "elapsed_time": "2:04:31", "remaining_time": "3:46:43"} +{"current_steps": 3076, "total_steps": 8674, "loss": 0.4834766983985901, "lr": 1.5344740886208194e-06, "epoch": 0.7092460225962647, "percentage": 35.46, "elapsed_time": "2:04:33", "remaining_time": "3:46:41"} +{"current_steps": 3077, "total_steps": 8674, "loss": 0.505670428276062, "lr": 1.534151813551339e-06, "epoch": 0.7094765967258474, "percentage": 35.47, "elapsed_time": "2:04:36", "remaining_time": "3:46:38"} +{"current_steps": 3078, "total_steps": 8674, "loss": 0.5256010293960571, "lr": 1.533829460837479e-06, "epoch": 0.70970717085543, "percentage": 35.49, "elapsed_time": "2:04:38", "remaining_time": "3:46:36"} +{"current_steps": 3079, "total_steps": 8674, "loss": 0.4186098873615265, "lr": 1.5335070305260967e-06, "epoch": 0.7099377449850127, "percentage": 35.5, "elapsed_time": "2:04:40", "remaining_time": "3:46:33"} +{"current_steps": 3080, "total_steps": 8674, "loss": 0.4034464359283447, "lr": 1.5331845226640607e-06, "epoch": 0.7101683191145953, "percentage": 35.51, "elapsed_time": "2:04:43", "remaining_time": "3:46:31"} +{"current_steps": 3081, "total_steps": 8674, "loss": 0.4521537721157074, "lr": 1.5328619372982505e-06, "epoch": 0.710398893244178, "percentage": 35.52, "elapsed_time": "2:04:45", "remaining_time": "3:46:29"} +{"current_steps": 3082, "total_steps": 8674, "loss": 0.4919602572917938, "lr": 1.5325392744755574e-06, "epoch": 0.7106294673737606, "percentage": 35.53, "elapsed_time": "2:04:48", "remaining_time": "3:46:26"} +{"current_steps": 3083, "total_steps": 8674, "loss": 0.4464415907859802, "lr": 1.5322165342428835e-06, "epoch": 0.7108600415033434, "percentage": 35.54, "elapsed_time": "2:04:50", "remaining_time": "3:46:24"} +{"current_steps": 3084, "total_steps": 8674, "loss": 0.47444385290145874, "lr": 1.5318937166471427e-06, "epoch": 0.711090615632926, "percentage": 35.55, "elapsed_time": "2:04:53", "remaining_time": "3:46:21"} +{"current_steps": 3085, "total_steps": 8674, "loss": 0.4014730453491211, "lr": 1.5315708217352595e-06, "epoch": 0.7113211897625087, "percentage": 35.57, "elapsed_time": "2:04:55", "remaining_time": "3:46:19"} +{"current_steps": 3086, "total_steps": 8674, "loss": 0.4528852701187134, "lr": 1.5312478495541703e-06, "epoch": 0.7115517638920913, "percentage": 35.58, "elapsed_time": "2:04:57", "remaining_time": "3:46:16"} +{"current_steps": 3087, "total_steps": 8674, "loss": 0.4919637441635132, "lr": 1.5309248001508216e-06, "epoch": 0.711782338021674, "percentage": 35.59, "elapsed_time": "2:05:00", "remaining_time": "3:46:14"} +{"current_steps": 3088, "total_steps": 8674, "loss": 0.5630985498428345, "lr": 1.530601673572173e-06, "epoch": 0.7120129121512566, "percentage": 35.6, "elapsed_time": "2:05:02", "remaining_time": "3:46:12"} +{"current_steps": 3089, "total_steps": 8674, "loss": 0.3920522630214691, "lr": 1.5302784698651935e-06, "epoch": 0.7122434862808393, "percentage": 35.61, "elapsed_time": "2:05:05", "remaining_time": "3:46:09"} +{"current_steps": 3090, "total_steps": 8674, "loss": 0.5502145290374756, "lr": 1.5299551890768642e-06, "epoch": 0.7124740604104219, "percentage": 35.62, "elapsed_time": "2:05:07", "remaining_time": "3:46:07"} +{"current_steps": 3091, "total_steps": 8674, "loss": 0.4839448928833008, "lr": 1.5296318312541767e-06, "epoch": 0.7127046345400047, "percentage": 35.64, "elapsed_time": "2:05:10", "remaining_time": "3:46:04"} +{"current_steps": 3092, "total_steps": 8674, "loss": 0.5029735565185547, "lr": 1.5293083964441355e-06, "epoch": 0.7129352086695873, "percentage": 35.65, "elapsed_time": "2:05:12", "remaining_time": "3:46:02"} +{"current_steps": 3093, "total_steps": 8674, "loss": 0.4724803566932678, "lr": 1.5289848846937544e-06, "epoch": 0.71316578279917, "percentage": 35.66, "elapsed_time": "2:05:14", "remaining_time": "3:45:59"} +{"current_steps": 3094, "total_steps": 8674, "loss": 0.4609840512275696, "lr": 1.528661296050059e-06, "epoch": 0.7133963569287526, "percentage": 35.67, "elapsed_time": "2:05:17", "remaining_time": "3:45:57"} +{"current_steps": 3095, "total_steps": 8674, "loss": 0.49763959646224976, "lr": 1.5283376305600863e-06, "epoch": 0.7136269310583353, "percentage": 35.68, "elapsed_time": "2:05:19", "remaining_time": "3:45:54"} +{"current_steps": 3096, "total_steps": 8674, "loss": 0.42384523153305054, "lr": 1.5280138882708847e-06, "epoch": 0.7138575051879179, "percentage": 35.69, "elapsed_time": "2:05:22", "remaining_time": "3:45:52"} +{"current_steps": 3097, "total_steps": 8674, "loss": 0.5034611225128174, "lr": 1.5276900692295134e-06, "epoch": 0.7140880793175006, "percentage": 35.7, "elapsed_time": "2:05:24", "remaining_time": "3:45:49"} +{"current_steps": 3098, "total_steps": 8674, "loss": 0.5617417097091675, "lr": 1.5273661734830423e-06, "epoch": 0.7143186534470832, "percentage": 35.72, "elapsed_time": "2:05:26", "remaining_time": "3:45:47"} +{"current_steps": 3099, "total_steps": 8674, "loss": 0.4562014937400818, "lr": 1.527042201078553e-06, "epoch": 0.714549227576666, "percentage": 35.73, "elapsed_time": "2:05:29", "remaining_time": "3:45:44"} +{"current_steps": 3100, "total_steps": 8674, "loss": 0.5626288056373596, "lr": 1.5267181520631386e-06, "epoch": 0.7147798017062486, "percentage": 35.74, "elapsed_time": "2:05:31", "remaining_time": "3:45:42"} +{"current_steps": 3101, "total_steps": 8674, "loss": 0.4882054924964905, "lr": 1.5263940264839028e-06, "epoch": 0.7150103758358313, "percentage": 35.75, "elapsed_time": "2:05:35", "remaining_time": "3:45:42"} +{"current_steps": 3102, "total_steps": 8674, "loss": 0.5371058583259583, "lr": 1.5260698243879603e-06, "epoch": 0.7152409499654139, "percentage": 35.76, "elapsed_time": "2:05:37", "remaining_time": "3:45:39"} +{"current_steps": 3103, "total_steps": 8674, "loss": 0.4683259129524231, "lr": 1.5257455458224368e-06, "epoch": 0.7154715240949966, "percentage": 35.77, "elapsed_time": "2:05:39", "remaining_time": "3:45:36"} +{"current_steps": 3104, "total_steps": 8674, "loss": 0.4894726872444153, "lr": 1.5254211908344704e-06, "epoch": 0.7157020982245792, "percentage": 35.79, "elapsed_time": "2:05:42", "remaining_time": "3:45:34"} +{"current_steps": 3105, "total_steps": 8674, "loss": 0.4517880082130432, "lr": 1.5250967594712089e-06, "epoch": 0.7159326723541619, "percentage": 35.8, "elapsed_time": "2:05:44", "remaining_time": "3:45:32"} +{"current_steps": 3106, "total_steps": 8674, "loss": 0.5062767267227173, "lr": 1.5247722517798118e-06, "epoch": 0.7161632464837445, "percentage": 35.81, "elapsed_time": "2:05:47", "remaining_time": "3:45:29"} +{"current_steps": 3107, "total_steps": 8674, "loss": 0.5029302835464478, "lr": 1.5244476678074494e-06, "epoch": 0.7163938206133272, "percentage": 35.82, "elapsed_time": "2:05:49", "remaining_time": "3:45:26"} +{"current_steps": 3108, "total_steps": 8674, "loss": 0.44112175703048706, "lr": 1.5241230076013035e-06, "epoch": 0.7166243947429098, "percentage": 35.83, "elapsed_time": "2:05:51", "remaining_time": "3:45:24"} +{"current_steps": 3109, "total_steps": 8674, "loss": 0.43693509697914124, "lr": 1.5237982712085665e-06, "epoch": 0.7168549688724926, "percentage": 35.84, "elapsed_time": "2:05:54", "remaining_time": "3:45:21"} +{"current_steps": 3110, "total_steps": 8674, "loss": 0.4544166922569275, "lr": 1.5234734586764422e-06, "epoch": 0.7170855430020752, "percentage": 35.85, "elapsed_time": "2:05:56", "remaining_time": "3:45:19"} +{"current_steps": 3111, "total_steps": 8674, "loss": 0.5470178127288818, "lr": 1.5231485700521451e-06, "epoch": 0.7173161171316579, "percentage": 35.87, "elapsed_time": "2:05:59", "remaining_time": "3:45:16"} +{"current_steps": 3112, "total_steps": 8674, "loss": 0.5215972065925598, "lr": 1.5228236053829017e-06, "epoch": 0.7175466912612405, "percentage": 35.88, "elapsed_time": "2:06:01", "remaining_time": "3:45:14"} +{"current_steps": 3113, "total_steps": 8674, "loss": 0.3922381103038788, "lr": 1.5224985647159488e-06, "epoch": 0.7177772653908232, "percentage": 35.89, "elapsed_time": "2:06:04", "remaining_time": "3:45:12"} +{"current_steps": 3114, "total_steps": 8674, "loss": 0.47455158829689026, "lr": 1.5221734480985341e-06, "epoch": 0.7180078395204058, "percentage": 35.9, "elapsed_time": "2:06:06", "remaining_time": "3:45:09"} +{"current_steps": 3115, "total_steps": 8674, "loss": 0.5776175260543823, "lr": 1.5218482555779164e-06, "epoch": 0.7182384136499884, "percentage": 35.91, "elapsed_time": "2:06:08", "remaining_time": "3:45:07"} +{"current_steps": 3116, "total_steps": 8674, "loss": 0.40414175391197205, "lr": 1.521522987201366e-06, "epoch": 0.7184689877795711, "percentage": 35.92, "elapsed_time": "2:06:11", "remaining_time": "3:45:04"} +{"current_steps": 3117, "total_steps": 8674, "loss": 0.44597384333610535, "lr": 1.5211976430161643e-06, "epoch": 0.7186995619091537, "percentage": 35.93, "elapsed_time": "2:06:13", "remaining_time": "3:45:02"} +{"current_steps": 3118, "total_steps": 8674, "loss": 0.50276118516922, "lr": 1.5208722230696024e-06, "epoch": 0.7189301360387365, "percentage": 35.95, "elapsed_time": "2:06:16", "remaining_time": "3:44:59"} +{"current_steps": 3119, "total_steps": 8674, "loss": 0.43281811475753784, "lr": 1.5205467274089844e-06, "epoch": 0.7191607101683191, "percentage": 35.96, "elapsed_time": "2:06:18", "remaining_time": "3:44:57"} +{"current_steps": 3120, "total_steps": 8674, "loss": 0.3796764016151428, "lr": 1.5202211560816243e-06, "epoch": 0.7193912842979018, "percentage": 35.97, "elapsed_time": "2:06:20", "remaining_time": "3:44:54"} +{"current_steps": 3121, "total_steps": 8674, "loss": 0.47820231318473816, "lr": 1.5198955091348463e-06, "epoch": 0.7196218584274844, "percentage": 35.98, "elapsed_time": "2:06:23", "remaining_time": "3:44:52"} +{"current_steps": 3122, "total_steps": 8674, "loss": 0.4737284779548645, "lr": 1.5195697866159875e-06, "epoch": 0.7198524325570671, "percentage": 35.99, "elapsed_time": "2:06:25", "remaining_time": "3:44:50"} +{"current_steps": 3123, "total_steps": 8674, "loss": 0.44652169942855835, "lr": 1.519243988572394e-06, "epoch": 0.7200830066866497, "percentage": 36.0, "elapsed_time": "2:06:28", "remaining_time": "3:44:47"} +{"current_steps": 3124, "total_steps": 8674, "loss": 0.42702072858810425, "lr": 1.518918115051425e-06, "epoch": 0.7203135808162324, "percentage": 36.02, "elapsed_time": "2:06:30", "remaining_time": "3:44:45"} +{"current_steps": 3125, "total_steps": 8674, "loss": 0.5003541707992554, "lr": 1.5185921661004483e-06, "epoch": 0.720544154945815, "percentage": 36.03, "elapsed_time": "2:06:32", "remaining_time": "3:44:42"} +{"current_steps": 3126, "total_steps": 8674, "loss": 0.5045102834701538, "lr": 1.518266141766845e-06, "epoch": 0.7207747290753977, "percentage": 36.04, "elapsed_time": "2:06:35", "remaining_time": "3:44:39"} +{"current_steps": 3127, "total_steps": 8674, "loss": 0.46619412302970886, "lr": 1.5179400420980052e-06, "epoch": 0.7210053032049804, "percentage": 36.05, "elapsed_time": "2:06:37", "remaining_time": "3:44:37"} +{"current_steps": 3128, "total_steps": 8674, "loss": 0.5006855726242065, "lr": 1.5176138671413314e-06, "epoch": 0.7212358773345631, "percentage": 36.06, "elapsed_time": "2:06:39", "remaining_time": "3:44:34"} +{"current_steps": 3129, "total_steps": 8674, "loss": 0.4394634962081909, "lr": 1.5172876169442362e-06, "epoch": 0.7214664514641457, "percentage": 36.07, "elapsed_time": "2:06:42", "remaining_time": "3:44:32"} +{"current_steps": 3130, "total_steps": 8674, "loss": 0.49311593174934387, "lr": 1.5169612915541428e-06, "epoch": 0.7216970255937284, "percentage": 36.08, "elapsed_time": "2:06:44", "remaining_time": "3:44:30"} +{"current_steps": 3131, "total_steps": 8674, "loss": 0.38406768441200256, "lr": 1.5166348910184868e-06, "epoch": 0.721927599723311, "percentage": 36.1, "elapsed_time": "2:06:47", "remaining_time": "3:44:27"} +{"current_steps": 3132, "total_steps": 8674, "loss": 0.547613799571991, "lr": 1.5163084153847132e-06, "epoch": 0.7221581738528937, "percentage": 36.11, "elapsed_time": "2:06:49", "remaining_time": "3:44:25"} +{"current_steps": 3133, "total_steps": 8674, "loss": 0.43875589966773987, "lr": 1.515981864700279e-06, "epoch": 0.7223887479824763, "percentage": 36.12, "elapsed_time": "2:06:51", "remaining_time": "3:44:22"} +{"current_steps": 3134, "total_steps": 8674, "loss": 0.41515982151031494, "lr": 1.5156552390126516e-06, "epoch": 0.722619322112059, "percentage": 36.13, "elapsed_time": "2:06:54", "remaining_time": "3:44:19"} +{"current_steps": 3135, "total_steps": 8674, "loss": 0.43297481536865234, "lr": 1.5153285383693088e-06, "epoch": 0.7228498962416416, "percentage": 36.14, "elapsed_time": "2:06:56", "remaining_time": "3:44:17"} +{"current_steps": 3136, "total_steps": 8674, "loss": 0.5059916377067566, "lr": 1.5150017628177408e-06, "epoch": 0.7230804703712244, "percentage": 36.15, "elapsed_time": "2:06:58", "remaining_time": "3:44:14"} +{"current_steps": 3137, "total_steps": 8674, "loss": 0.4776325225830078, "lr": 1.514674912405447e-06, "epoch": 0.723311044500807, "percentage": 36.17, "elapsed_time": "2:07:01", "remaining_time": "3:44:12"} +{"current_steps": 3138, "total_steps": 8674, "loss": 0.4925272464752197, "lr": 1.5143479871799381e-06, "epoch": 0.7235416186303897, "percentage": 36.18, "elapsed_time": "2:07:03", "remaining_time": "3:44:09"} +{"current_steps": 3139, "total_steps": 8674, "loss": 0.3825960159301758, "lr": 1.5140209871887368e-06, "epoch": 0.7237721927599723, "percentage": 36.19, "elapsed_time": "2:07:06", "remaining_time": "3:44:07"} +{"current_steps": 3140, "total_steps": 8674, "loss": 0.5582098960876465, "lr": 1.513693912479376e-06, "epoch": 0.724002766889555, "percentage": 36.2, "elapsed_time": "2:07:08", "remaining_time": "3:44:04"} +{"current_steps": 3141, "total_steps": 8674, "loss": 0.4079757630825043, "lr": 1.5133667630993983e-06, "epoch": 0.7242333410191376, "percentage": 36.21, "elapsed_time": "2:07:10", "remaining_time": "3:44:02"} +{"current_steps": 3142, "total_steps": 8674, "loss": 0.4996449947357178, "lr": 1.513039539096359e-06, "epoch": 0.7244639151487203, "percentage": 36.22, "elapsed_time": "2:07:13", "remaining_time": "3:43:59"} +{"current_steps": 3143, "total_steps": 8674, "loss": 0.4822157323360443, "lr": 1.5127122405178233e-06, "epoch": 0.7246944892783029, "percentage": 36.23, "elapsed_time": "2:07:15", "remaining_time": "3:43:57"} +{"current_steps": 3144, "total_steps": 8674, "loss": 0.43123728036880493, "lr": 1.512384867411367e-06, "epoch": 0.7249250634078857, "percentage": 36.25, "elapsed_time": "2:07:18", "remaining_time": "3:43:54"} +{"current_steps": 3145, "total_steps": 8674, "loss": 0.4942808151245117, "lr": 1.5120574198245776e-06, "epoch": 0.7251556375374683, "percentage": 36.26, "elapsed_time": "2:07:20", "remaining_time": "3:43:52"} +{"current_steps": 3146, "total_steps": 8674, "loss": 0.49165093898773193, "lr": 1.5117298978050525e-06, "epoch": 0.725386211667051, "percentage": 36.27, "elapsed_time": "2:07:23", "remaining_time": "3:43:50"} +{"current_steps": 3147, "total_steps": 8674, "loss": 0.4700804352760315, "lr": 1.5114023014004008e-06, "epoch": 0.7256167857966336, "percentage": 36.28, "elapsed_time": "2:07:25", "remaining_time": "3:43:47"} +{"current_steps": 3148, "total_steps": 8674, "loss": 0.4703143835067749, "lr": 1.5110746306582413e-06, "epoch": 0.7258473599262163, "percentage": 36.29, "elapsed_time": "2:07:27", "remaining_time": "3:43:45"} +{"current_steps": 3149, "total_steps": 8674, "loss": 0.47312211990356445, "lr": 1.5107468856262048e-06, "epoch": 0.7260779340557989, "percentage": 36.3, "elapsed_time": "2:07:30", "remaining_time": "3:43:42"} +{"current_steps": 3150, "total_steps": 8674, "loss": 0.49607813358306885, "lr": 1.5104190663519323e-06, "epoch": 0.7263085081853816, "percentage": 36.32, "elapsed_time": "2:07:32", "remaining_time": "3:43:40"} +{"current_steps": 3151, "total_steps": 8674, "loss": 0.4401499629020691, "lr": 1.5100911728830754e-06, "epoch": 0.7265390823149642, "percentage": 36.33, "elapsed_time": "2:07:35", "remaining_time": "3:43:37"} +{"current_steps": 3152, "total_steps": 8674, "loss": 0.4979579448699951, "lr": 1.5097632052672973e-06, "epoch": 0.7267696564445469, "percentage": 36.34, "elapsed_time": "2:07:37", "remaining_time": "3:43:35"} +{"current_steps": 3153, "total_steps": 8674, "loss": 0.42917048931121826, "lr": 1.5094351635522706e-06, "epoch": 0.7270002305741295, "percentage": 36.35, "elapsed_time": "2:07:39", "remaining_time": "3:43:32"} +{"current_steps": 3154, "total_steps": 8674, "loss": 0.41664260625839233, "lr": 1.50910704778568e-06, "epoch": 0.7272308047037123, "percentage": 36.36, "elapsed_time": "2:07:42", "remaining_time": "3:43:29"} +{"current_steps": 3155, "total_steps": 8674, "loss": 0.5000253915786743, "lr": 1.5087788580152206e-06, "epoch": 0.7274613788332949, "percentage": 36.37, "elapsed_time": "2:07:44", "remaining_time": "3:43:27"} +{"current_steps": 3156, "total_steps": 8674, "loss": 0.5075093507766724, "lr": 1.5084505942885976e-06, "epoch": 0.7276919529628776, "percentage": 36.38, "elapsed_time": "2:07:47", "remaining_time": "3:43:25"} +{"current_steps": 3157, "total_steps": 8674, "loss": 0.44975680112838745, "lr": 1.508122256653528e-06, "epoch": 0.7279225270924602, "percentage": 36.4, "elapsed_time": "2:07:49", "remaining_time": "3:43:22"} +{"current_steps": 3158, "total_steps": 8674, "loss": 0.44494926929473877, "lr": 1.5077938451577383e-06, "epoch": 0.7281531012220429, "percentage": 36.41, "elapsed_time": "2:07:51", "remaining_time": "3:43:20"} +{"current_steps": 3159, "total_steps": 8674, "loss": 0.5664352178573608, "lr": 1.5074653598489673e-06, "epoch": 0.7283836753516255, "percentage": 36.42, "elapsed_time": "2:07:54", "remaining_time": "3:43:17"} +{"current_steps": 3160, "total_steps": 8674, "loss": 0.5694705247879028, "lr": 1.507136800774963e-06, "epoch": 0.7286142494812082, "percentage": 36.43, "elapsed_time": "2:07:56", "remaining_time": "3:43:14"} +{"current_steps": 3161, "total_steps": 8674, "loss": 0.5121151804924011, "lr": 1.506808167983485e-06, "epoch": 0.7288448236107908, "percentage": 36.44, "elapsed_time": "2:07:58", "remaining_time": "3:43:12"} +{"current_steps": 3162, "total_steps": 8674, "loss": 0.45935380458831787, "lr": 1.5064794615223034e-06, "epoch": 0.7290753977403736, "percentage": 36.45, "elapsed_time": "2:08:01", "remaining_time": "3:43:09"} +{"current_steps": 3163, "total_steps": 8674, "loss": 0.517521858215332, "lr": 1.506150681439199e-06, "epoch": 0.7293059718699562, "percentage": 36.47, "elapsed_time": "2:08:03", "remaining_time": "3:43:07"} +{"current_steps": 3164, "total_steps": 8674, "loss": 0.5078546404838562, "lr": 1.5058218277819638e-06, "epoch": 0.7295365459995389, "percentage": 36.48, "elapsed_time": "2:08:06", "remaining_time": "3:43:04"} +{"current_steps": 3165, "total_steps": 8674, "loss": 0.47892552614212036, "lr": 1.5054929005983992e-06, "epoch": 0.7297671201291215, "percentage": 36.49, "elapsed_time": "2:08:08", "remaining_time": "3:43:02"} +{"current_steps": 3166, "total_steps": 8674, "loss": 0.48825597763061523, "lr": 1.5051638999363185e-06, "epoch": 0.7299976942587042, "percentage": 36.5, "elapsed_time": "2:08:10", "remaining_time": "3:42:59"} +{"current_steps": 3167, "total_steps": 8674, "loss": 0.488031804561615, "lr": 1.5048348258435457e-06, "epoch": 0.7302282683882868, "percentage": 36.51, "elapsed_time": "2:08:13", "remaining_time": "3:42:57"} +{"current_steps": 3168, "total_steps": 8674, "loss": 0.4669504761695862, "lr": 1.5045056783679143e-06, "epoch": 0.7304588425178695, "percentage": 36.52, "elapsed_time": "2:08:15", "remaining_time": "3:42:54"} +{"current_steps": 3169, "total_steps": 8674, "loss": 0.45620614290237427, "lr": 1.5041764575572695e-06, "epoch": 0.7306894166474521, "percentage": 36.53, "elapsed_time": "2:08:17", "remaining_time": "3:42:52"} +{"current_steps": 3170, "total_steps": 8674, "loss": 0.4271177649497986, "lr": 1.5038471634594667e-06, "epoch": 0.7309199907770348, "percentage": 36.55, "elapsed_time": "2:08:20", "remaining_time": "3:42:49"} +{"current_steps": 3171, "total_steps": 8674, "loss": 0.5170531272888184, "lr": 1.5035177961223726e-06, "epoch": 0.7311505649066175, "percentage": 36.56, "elapsed_time": "2:08:22", "remaining_time": "3:42:47"} +{"current_steps": 3172, "total_steps": 8674, "loss": 0.4261493682861328, "lr": 1.5031883555938638e-06, "epoch": 0.7313811390362002, "percentage": 36.57, "elapsed_time": "2:08:25", "remaining_time": "3:42:45"} +{"current_steps": 3173, "total_steps": 8674, "loss": 0.4958994686603546, "lr": 1.502858841921828e-06, "epoch": 0.7316117131657828, "percentage": 36.58, "elapsed_time": "2:08:27", "remaining_time": "3:42:42"} +{"current_steps": 3174, "total_steps": 8674, "loss": 0.49798572063446045, "lr": 1.502529255154163e-06, "epoch": 0.7318422872953655, "percentage": 36.59, "elapsed_time": "2:08:29", "remaining_time": "3:42:39"} +{"current_steps": 3175, "total_steps": 8674, "loss": 0.4067850708961487, "lr": 1.502199595338778e-06, "epoch": 0.7320728614249481, "percentage": 36.6, "elapsed_time": "2:08:32", "remaining_time": "3:42:37"} +{"current_steps": 3176, "total_steps": 8674, "loss": 0.4680994153022766, "lr": 1.5018698625235916e-06, "epoch": 0.7323034355545308, "percentage": 36.62, "elapsed_time": "2:08:34", "remaining_time": "3:42:34"} +{"current_steps": 3177, "total_steps": 8674, "loss": 0.49181580543518066, "lr": 1.501540056756535e-06, "epoch": 0.7325340096841134, "percentage": 36.63, "elapsed_time": "2:08:36", "remaining_time": "3:42:32"} +{"current_steps": 3178, "total_steps": 8674, "loss": 0.5425546169281006, "lr": 1.501210178085548e-06, "epoch": 0.7327645838136961, "percentage": 36.64, "elapsed_time": "2:08:39", "remaining_time": "3:42:29"} +{"current_steps": 3179, "total_steps": 8674, "loss": 0.4869355261325836, "lr": 1.500880226558582e-06, "epoch": 0.7329951579432787, "percentage": 36.65, "elapsed_time": "2:08:41", "remaining_time": "3:42:27"} +{"current_steps": 3180, "total_steps": 8674, "loss": 0.5157885551452637, "lr": 1.500550202223599e-06, "epoch": 0.7332257320728615, "percentage": 36.66, "elapsed_time": "2:08:44", "remaining_time": "3:42:25"} +{"current_steps": 3181, "total_steps": 8674, "loss": 0.528350293636322, "lr": 1.5002201051285707e-06, "epoch": 0.7334563062024441, "percentage": 36.67, "elapsed_time": "2:08:46", "remaining_time": "3:42:22"} +{"current_steps": 3182, "total_steps": 8674, "loss": 0.3963279128074646, "lr": 1.499889935321481e-06, "epoch": 0.7336868803320268, "percentage": 36.68, "elapsed_time": "2:08:49", "remaining_time": "3:42:20"} +{"current_steps": 3183, "total_steps": 8674, "loss": 0.36777108907699585, "lr": 1.499559692850323e-06, "epoch": 0.7339174544616094, "percentage": 36.7, "elapsed_time": "2:08:51", "remaining_time": "3:42:17"} +{"current_steps": 3184, "total_steps": 8674, "loss": 0.4592905044555664, "lr": 1.4992293777631004e-06, "epoch": 0.7341480285911921, "percentage": 36.71, "elapsed_time": "2:08:53", "remaining_time": "3:42:14"} +{"current_steps": 3185, "total_steps": 8674, "loss": 0.458257257938385, "lr": 1.4988989901078285e-06, "epoch": 0.7343786027207747, "percentage": 36.72, "elapsed_time": "2:08:56", "remaining_time": "3:42:12"} +{"current_steps": 3186, "total_steps": 8674, "loss": 0.4844989478588104, "lr": 1.4985685299325316e-06, "epoch": 0.7346091768503574, "percentage": 36.73, "elapsed_time": "2:08:58", "remaining_time": "3:42:09"} +{"current_steps": 3187, "total_steps": 8674, "loss": 0.381417453289032, "lr": 1.498237997285247e-06, "epoch": 0.73483975097994, "percentage": 36.74, "elapsed_time": "2:09:00", "remaining_time": "3:42:07"} +{"current_steps": 3188, "total_steps": 8674, "loss": 0.42452555894851685, "lr": 1.4979073922140196e-06, "epoch": 0.7350703251095227, "percentage": 36.75, "elapsed_time": "2:09:03", "remaining_time": "3:42:05"} +{"current_steps": 3189, "total_steps": 8674, "loss": 0.4660685956478119, "lr": 1.4975767147669063e-06, "epoch": 0.7353008992391054, "percentage": 36.77, "elapsed_time": "2:09:05", "remaining_time": "3:42:02"} +{"current_steps": 3190, "total_steps": 8674, "loss": 0.4332653880119324, "lr": 1.4972459649919748e-06, "epoch": 0.7355314733686881, "percentage": 36.78, "elapsed_time": "2:09:08", "remaining_time": "3:42:00"} +{"current_steps": 3191, "total_steps": 8674, "loss": 0.5580132007598877, "lr": 1.496915142937303e-06, "epoch": 0.7357620474982707, "percentage": 36.79, "elapsed_time": "2:09:10", "remaining_time": "3:41:57"} +{"current_steps": 3192, "total_steps": 8674, "loss": 0.43711793422698975, "lr": 1.4965842486509792e-06, "epoch": 0.7359926216278534, "percentage": 36.8, "elapsed_time": "2:09:12", "remaining_time": "3:41:55"} +{"current_steps": 3193, "total_steps": 8674, "loss": 0.44969767332077026, "lr": 1.496253282181102e-06, "epoch": 0.736223195757436, "percentage": 36.81, "elapsed_time": "2:09:15", "remaining_time": "3:41:52"} +{"current_steps": 3194, "total_steps": 8674, "loss": 0.5288668870925903, "lr": 1.4959222435757809e-06, "epoch": 0.7364537698870187, "percentage": 36.82, "elapsed_time": "2:09:17", "remaining_time": "3:41:50"} +{"current_steps": 3195, "total_steps": 8674, "loss": 0.45993220806121826, "lr": 1.4955911328831353e-06, "epoch": 0.7366843440166013, "percentage": 36.83, "elapsed_time": "2:09:20", "remaining_time": "3:41:47"} +{"current_steps": 3196, "total_steps": 8674, "loss": 0.5360512733459473, "lr": 1.4952599501512963e-06, "epoch": 0.736914918146184, "percentage": 36.85, "elapsed_time": "2:09:22", "remaining_time": "3:41:44"} +{"current_steps": 3197, "total_steps": 8674, "loss": 0.3923282325267792, "lr": 1.4949286954284044e-06, "epoch": 0.7371454922757666, "percentage": 36.86, "elapsed_time": "2:09:25", "remaining_time": "3:41:42"} +{"current_steps": 3198, "total_steps": 8674, "loss": 0.5051449537277222, "lr": 1.4945973687626103e-06, "epoch": 0.7373760664053494, "percentage": 36.87, "elapsed_time": "2:09:27", "remaining_time": "3:41:40"} +{"current_steps": 3199, "total_steps": 8674, "loss": 0.5035187602043152, "lr": 1.4942659702020763e-06, "epoch": 0.737606640534932, "percentage": 36.88, "elapsed_time": "2:09:29", "remaining_time": "3:41:37"} +{"current_steps": 3200, "total_steps": 8674, "loss": 0.4922195076942444, "lr": 1.4939344997949742e-06, "epoch": 0.7378372146645147, "percentage": 36.89, "elapsed_time": "2:09:32", "remaining_time": "3:41:35"} +{"current_steps": 3201, "total_steps": 8674, "loss": 0.49664247035980225, "lr": 1.4936029575894865e-06, "epoch": 0.7380677887940973, "percentage": 36.9, "elapsed_time": "2:09:36", "remaining_time": "3:41:35"} +{"current_steps": 3202, "total_steps": 8674, "loss": 0.4240155816078186, "lr": 1.4932713436338065e-06, "epoch": 0.73829836292368, "percentage": 36.91, "elapsed_time": "2:09:38", "remaining_time": "3:41:32"} +{"current_steps": 3203, "total_steps": 8674, "loss": 0.3830781579017639, "lr": 1.4929396579761376e-06, "epoch": 0.7385289370532626, "percentage": 36.93, "elapsed_time": "2:09:40", "remaining_time": "3:41:30"} +{"current_steps": 3204, "total_steps": 8674, "loss": 0.37983447313308716, "lr": 1.4926079006646936e-06, "epoch": 0.7387595111828453, "percentage": 36.94, "elapsed_time": "2:09:43", "remaining_time": "3:41:27"} +{"current_steps": 3205, "total_steps": 8674, "loss": 0.4680769443511963, "lr": 1.4922760717476989e-06, "epoch": 0.7389900853124279, "percentage": 36.95, "elapsed_time": "2:09:45", "remaining_time": "3:41:25"} +{"current_steps": 3206, "total_steps": 8674, "loss": 0.3801664710044861, "lr": 1.4919441712733878e-06, "epoch": 0.7392206594420107, "percentage": 36.96, "elapsed_time": "2:09:48", "remaining_time": "3:41:22"} +{"current_steps": 3207, "total_steps": 8674, "loss": 0.5506627559661865, "lr": 1.4916121992900062e-06, "epoch": 0.7394512335715933, "percentage": 36.97, "elapsed_time": "2:09:50", "remaining_time": "3:41:20"} +{"current_steps": 3208, "total_steps": 8674, "loss": 0.4976215660572052, "lr": 1.4912801558458087e-06, "epoch": 0.739681807701176, "percentage": 36.98, "elapsed_time": "2:09:52", "remaining_time": "3:41:18"} +{"current_steps": 3209, "total_steps": 8674, "loss": 0.42806485295295715, "lr": 1.4909480409890615e-06, "epoch": 0.7399123818307586, "percentage": 37.0, "elapsed_time": "2:09:55", "remaining_time": "3:41:15"} +{"current_steps": 3210, "total_steps": 8674, "loss": 0.3850712180137634, "lr": 1.4906158547680413e-06, "epoch": 0.7401429559603413, "percentage": 37.01, "elapsed_time": "2:09:57", "remaining_time": "3:41:12"} +{"current_steps": 3211, "total_steps": 8674, "loss": 0.4356945753097534, "lr": 1.4902835972310342e-06, "epoch": 0.7403735300899239, "percentage": 37.02, "elapsed_time": "2:09:59", "remaining_time": "3:41:10"} +{"current_steps": 3212, "total_steps": 8674, "loss": 0.4806904196739197, "lr": 1.4899512684263373e-06, "epoch": 0.7406041042195066, "percentage": 37.03, "elapsed_time": "2:10:02", "remaining_time": "3:41:07"} +{"current_steps": 3213, "total_steps": 8674, "loss": 0.544597327709198, "lr": 1.489618868402258e-06, "epoch": 0.7408346783490892, "percentage": 37.04, "elapsed_time": "2:10:04", "remaining_time": "3:41:05"} +{"current_steps": 3214, "total_steps": 8674, "loss": 0.39847469329833984, "lr": 1.4892863972071141e-06, "epoch": 0.7410652524786719, "percentage": 37.05, "elapsed_time": "2:10:07", "remaining_time": "3:41:03"} +{"current_steps": 3215, "total_steps": 8674, "loss": 0.4959847331047058, "lr": 1.4889538548892336e-06, "epoch": 0.7412958266082545, "percentage": 37.06, "elapsed_time": "2:10:09", "remaining_time": "3:41:00"} +{"current_steps": 3216, "total_steps": 8674, "loss": 0.3839089870452881, "lr": 1.488621241496955e-06, "epoch": 0.7415264007378373, "percentage": 37.08, "elapsed_time": "2:10:12", "remaining_time": "3:40:58"} +{"current_steps": 3217, "total_steps": 8674, "loss": 0.5187599658966064, "lr": 1.4882885570786266e-06, "epoch": 0.7417569748674199, "percentage": 37.09, "elapsed_time": "2:10:14", "remaining_time": "3:40:55"} +{"current_steps": 3218, "total_steps": 8674, "loss": 0.45735663175582886, "lr": 1.4879558016826082e-06, "epoch": 0.7419875489970026, "percentage": 37.1, "elapsed_time": "2:10:16", "remaining_time": "3:40:53"} +{"current_steps": 3219, "total_steps": 8674, "loss": 0.5635267496109009, "lr": 1.4876229753572687e-06, "epoch": 0.7422181231265852, "percentage": 37.11, "elapsed_time": "2:10:19", "remaining_time": "3:40:50"} +{"current_steps": 3220, "total_steps": 8674, "loss": 0.5255833268165588, "lr": 1.4872900781509876e-06, "epoch": 0.7424486972561679, "percentage": 37.12, "elapsed_time": "2:10:21", "remaining_time": "3:40:48"} +{"current_steps": 3221, "total_steps": 8674, "loss": 0.4563497304916382, "lr": 1.486957110112155e-06, "epoch": 0.7426792713857505, "percentage": 37.13, "elapsed_time": "2:10:23", "remaining_time": "3:40:45"} +{"current_steps": 3222, "total_steps": 8674, "loss": 0.3737669885158539, "lr": 1.4866240712891714e-06, "epoch": 0.7429098455153332, "percentage": 37.15, "elapsed_time": "2:10:26", "remaining_time": "3:40:43"} +{"current_steps": 3223, "total_steps": 8674, "loss": 0.48965659737586975, "lr": 1.4862909617304473e-06, "epoch": 0.7431404196449158, "percentage": 37.16, "elapsed_time": "2:10:28", "remaining_time": "3:40:40"} +{"current_steps": 3224, "total_steps": 8674, "loss": 0.40867483615875244, "lr": 1.4859577814844036e-06, "epoch": 0.7433709937744986, "percentage": 37.17, "elapsed_time": "2:10:31", "remaining_time": "3:40:38"} +{"current_steps": 3225, "total_steps": 8674, "loss": 0.5870566368103027, "lr": 1.4856245305994711e-06, "epoch": 0.7436015679040812, "percentage": 37.18, "elapsed_time": "2:10:33", "remaining_time": "3:40:35"} +{"current_steps": 3226, "total_steps": 8674, "loss": 0.5424025654792786, "lr": 1.4852912091240914e-06, "epoch": 0.7438321420336638, "percentage": 37.19, "elapsed_time": "2:10:35", "remaining_time": "3:40:33"} +{"current_steps": 3227, "total_steps": 8674, "loss": 0.5305285453796387, "lr": 1.4849578171067166e-06, "epoch": 0.7440627161632465, "percentage": 37.2, "elapsed_time": "2:10:38", "remaining_time": "3:40:30"} +{"current_steps": 3228, "total_steps": 8674, "loss": 0.4189227819442749, "lr": 1.4846243545958078e-06, "epoch": 0.7442932902928291, "percentage": 37.21, "elapsed_time": "2:10:40", "remaining_time": "3:40:28"} +{"current_steps": 3229, "total_steps": 8674, "loss": 0.44568121433258057, "lr": 1.4842908216398379e-06, "epoch": 0.7445238644224118, "percentage": 37.23, "elapsed_time": "2:10:42", "remaining_time": "3:40:25"} +{"current_steps": 3230, "total_steps": 8674, "loss": 0.5177523493766785, "lr": 1.4839572182872883e-06, "epoch": 0.7447544385519944, "percentage": 37.24, "elapsed_time": "2:10:45", "remaining_time": "3:40:23"} +{"current_steps": 3231, "total_steps": 8674, "loss": 0.5100630521774292, "lr": 1.4836235445866528e-06, "epoch": 0.7449850126815771, "percentage": 37.25, "elapsed_time": "2:10:47", "remaining_time": "3:40:20"} +{"current_steps": 3232, "total_steps": 8674, "loss": 0.45731791853904724, "lr": 1.4832898005864336e-06, "epoch": 0.7452155868111597, "percentage": 37.26, "elapsed_time": "2:10:50", "remaining_time": "3:40:18"} +{"current_steps": 3233, "total_steps": 8674, "loss": 0.5161736011505127, "lr": 1.4829559863351437e-06, "epoch": 0.7454461609407425, "percentage": 37.27, "elapsed_time": "2:10:52", "remaining_time": "3:40:15"} +{"current_steps": 3234, "total_steps": 8674, "loss": 0.4778611660003662, "lr": 1.4826221018813067e-06, "epoch": 0.7456767350703251, "percentage": 37.28, "elapsed_time": "2:10:54", "remaining_time": "3:40:13"} +{"current_steps": 3235, "total_steps": 8674, "loss": 0.467506468296051, "lr": 1.482288147273456e-06, "epoch": 0.7459073091999078, "percentage": 37.3, "elapsed_time": "2:10:57", "remaining_time": "3:40:10"} +{"current_steps": 3236, "total_steps": 8674, "loss": 0.5061084032058716, "lr": 1.4819541225601352e-06, "epoch": 0.7461378833294904, "percentage": 37.31, "elapsed_time": "2:10:59", "remaining_time": "3:40:08"} +{"current_steps": 3237, "total_steps": 8674, "loss": 0.5066365599632263, "lr": 1.4816200277898983e-06, "epoch": 0.7463684574590731, "percentage": 37.32, "elapsed_time": "2:11:02", "remaining_time": "3:40:05"} +{"current_steps": 3238, "total_steps": 8674, "loss": 0.44285398721694946, "lr": 1.4812858630113093e-06, "epoch": 0.7465990315886557, "percentage": 37.33, "elapsed_time": "2:11:04", "remaining_time": "3:40:03"} +{"current_steps": 3239, "total_steps": 8674, "loss": 0.5325936079025269, "lr": 1.4809516282729426e-06, "epoch": 0.7468296057182384, "percentage": 37.34, "elapsed_time": "2:11:07", "remaining_time": "3:40:00"} +{"current_steps": 3240, "total_steps": 8674, "loss": 0.37296950817108154, "lr": 1.4806173236233818e-06, "epoch": 0.747060179847821, "percentage": 37.35, "elapsed_time": "2:11:09", "remaining_time": "3:39:58"} +{"current_steps": 3241, "total_steps": 8674, "loss": 0.4596887230873108, "lr": 1.4802829491112228e-06, "epoch": 0.7472907539774037, "percentage": 37.36, "elapsed_time": "2:11:11", "remaining_time": "3:39:55"} +{"current_steps": 3242, "total_steps": 8674, "loss": 0.4344385266304016, "lr": 1.4799485047850693e-06, "epoch": 0.7475213281069863, "percentage": 37.38, "elapsed_time": "2:11:14", "remaining_time": "3:39:53"} +{"current_steps": 3243, "total_steps": 8674, "loss": 0.458631306886673, "lr": 1.4796139906935365e-06, "epoch": 0.7477519022365691, "percentage": 37.39, "elapsed_time": "2:11:16", "remaining_time": "3:39:50"} +{"current_steps": 3244, "total_steps": 8674, "loss": 0.5425032377243042, "lr": 1.4792794068852494e-06, "epoch": 0.7479824763661517, "percentage": 37.4, "elapsed_time": "2:11:18", "remaining_time": "3:39:48"} +{"current_steps": 3245, "total_steps": 8674, "loss": 0.4240065813064575, "lr": 1.478944753408843e-06, "epoch": 0.7482130504957344, "percentage": 37.41, "elapsed_time": "2:11:21", "remaining_time": "3:39:45"} +{"current_steps": 3246, "total_steps": 8674, "loss": 0.5533365607261658, "lr": 1.478610030312963e-06, "epoch": 0.748443624625317, "percentage": 37.42, "elapsed_time": "2:11:23", "remaining_time": "3:39:43"} +{"current_steps": 3247, "total_steps": 8674, "loss": 0.4089345335960388, "lr": 1.4782752376462647e-06, "epoch": 0.7486741987548997, "percentage": 37.43, "elapsed_time": "2:11:26", "remaining_time": "3:39:40"} +{"current_steps": 3248, "total_steps": 8674, "loss": 0.5098259449005127, "lr": 1.4779403754574131e-06, "epoch": 0.7489047728844823, "percentage": 37.45, "elapsed_time": "2:11:28", "remaining_time": "3:39:38"} +{"current_steps": 3249, "total_steps": 8674, "loss": 0.4615677297115326, "lr": 1.4776054437950842e-06, "epoch": 0.749135347014065, "percentage": 37.46, "elapsed_time": "2:11:30", "remaining_time": "3:39:35"} +{"current_steps": 3250, "total_steps": 8674, "loss": 0.460266649723053, "lr": 1.4772704427079639e-06, "epoch": 0.7493659211436476, "percentage": 37.47, "elapsed_time": "2:11:33", "remaining_time": "3:39:33"} +{"current_steps": 3251, "total_steps": 8674, "loss": 0.4727064371109009, "lr": 1.4769353722447476e-06, "epoch": 0.7495964952732304, "percentage": 37.48, "elapsed_time": "2:11:35", "remaining_time": "3:39:30"} +{"current_steps": 3252, "total_steps": 8674, "loss": 0.4733152985572815, "lr": 1.4766002324541411e-06, "epoch": 0.749827069402813, "percentage": 37.49, "elapsed_time": "2:11:38", "remaining_time": "3:39:28"} +{"current_steps": 3253, "total_steps": 8674, "loss": 0.5055218935012817, "lr": 1.4762650233848609e-06, "epoch": 0.7500576435323957, "percentage": 37.5, "elapsed_time": "2:11:40", "remaining_time": "3:39:25"} +{"current_steps": 3254, "total_steps": 8674, "loss": 0.6129124164581299, "lr": 1.4759297450856324e-06, "epoch": 0.7502882176619783, "percentage": 37.51, "elapsed_time": "2:11:42", "remaining_time": "3:39:23"} +{"current_steps": 3255, "total_steps": 8674, "loss": 0.46197545528411865, "lr": 1.4755943976051926e-06, "epoch": 0.750518791791561, "percentage": 37.53, "elapsed_time": "2:11:45", "remaining_time": "3:39:21"} +{"current_steps": 3256, "total_steps": 8674, "loss": 0.5227653980255127, "lr": 1.4752589809922868e-06, "epoch": 0.7507493659211436, "percentage": 37.54, "elapsed_time": "2:11:47", "remaining_time": "3:39:18"} +{"current_steps": 3257, "total_steps": 8674, "loss": 0.5189518928527832, "lr": 1.4749234952956715e-06, "epoch": 0.7509799400507263, "percentage": 37.55, "elapsed_time": "2:11:50", "remaining_time": "3:39:16"} +{"current_steps": 3258, "total_steps": 8674, "loss": 0.4850584864616394, "lr": 1.474587940564113e-06, "epoch": 0.7512105141803089, "percentage": 37.56, "elapsed_time": "2:11:52", "remaining_time": "3:39:13"} +{"current_steps": 3259, "total_steps": 8674, "loss": 0.5218943357467651, "lr": 1.4742523168463876e-06, "epoch": 0.7514410883098916, "percentage": 37.57, "elapsed_time": "2:11:54", "remaining_time": "3:39:11"} +{"current_steps": 3260, "total_steps": 8674, "loss": 0.4311223030090332, "lr": 1.4739166241912814e-06, "epoch": 0.7516716624394743, "percentage": 37.58, "elapsed_time": "2:11:57", "remaining_time": "3:39:08"} +{"current_steps": 3261, "total_steps": 8674, "loss": 0.525306224822998, "lr": 1.473580862647591e-06, "epoch": 0.751902236569057, "percentage": 37.6, "elapsed_time": "2:11:59", "remaining_time": "3:39:05"} +{"current_steps": 3262, "total_steps": 8674, "loss": 0.506609320640564, "lr": 1.4732450322641225e-06, "epoch": 0.7521328106986396, "percentage": 37.61, "elapsed_time": "2:12:02", "remaining_time": "3:39:03"} +{"current_steps": 3263, "total_steps": 8674, "loss": 0.5477846264839172, "lr": 1.4729091330896926e-06, "epoch": 0.7523633848282223, "percentage": 37.62, "elapsed_time": "2:12:04", "remaining_time": "3:39:00"} +{"current_steps": 3264, "total_steps": 8674, "loss": 0.48802629113197327, "lr": 1.4725731651731268e-06, "epoch": 0.7525939589578049, "percentage": 37.63, "elapsed_time": "2:12:07", "remaining_time": "3:38:58"} +{"current_steps": 3265, "total_steps": 8674, "loss": 0.4774906635284424, "lr": 1.4722371285632626e-06, "epoch": 0.7528245330873876, "percentage": 37.64, "elapsed_time": "2:12:09", "remaining_time": "3:38:56"} +{"current_steps": 3266, "total_steps": 8674, "loss": 0.4220488667488098, "lr": 1.4719010233089458e-06, "epoch": 0.7530551072169702, "percentage": 37.65, "elapsed_time": "2:12:11", "remaining_time": "3:38:53"} +{"current_steps": 3267, "total_steps": 8674, "loss": 0.43912187218666077, "lr": 1.4715648494590324e-06, "epoch": 0.7532856813465529, "percentage": 37.66, "elapsed_time": "2:12:14", "remaining_time": "3:38:51"} +{"current_steps": 3268, "total_steps": 8674, "loss": 0.5302494764328003, "lr": 1.4712286070623892e-06, "epoch": 0.7535162554761355, "percentage": 37.68, "elapsed_time": "2:12:16", "remaining_time": "3:38:48"} +{"current_steps": 3269, "total_steps": 8674, "loss": 0.4800306260585785, "lr": 1.4708922961678923e-06, "epoch": 0.7537468296057183, "percentage": 37.69, "elapsed_time": "2:12:18", "remaining_time": "3:38:46"} +{"current_steps": 3270, "total_steps": 8674, "loss": 0.3993161618709564, "lr": 1.4705559168244275e-06, "epoch": 0.7539774037353009, "percentage": 37.7, "elapsed_time": "2:12:21", "remaining_time": "3:38:43"} +{"current_steps": 3271, "total_steps": 8674, "loss": 0.37037837505340576, "lr": 1.4702194690808916e-06, "epoch": 0.7542079778648836, "percentage": 37.71, "elapsed_time": "2:12:23", "remaining_time": "3:38:41"} +{"current_steps": 3272, "total_steps": 8674, "loss": 0.44288602471351624, "lr": 1.4698829529861898e-06, "epoch": 0.7544385519944662, "percentage": 37.72, "elapsed_time": "2:12:26", "remaining_time": "3:38:39"} +{"current_steps": 3273, "total_steps": 8674, "loss": 0.5480727553367615, "lr": 1.469546368589239e-06, "epoch": 0.7546691261240489, "percentage": 37.73, "elapsed_time": "2:12:28", "remaining_time": "3:38:36"} +{"current_steps": 3274, "total_steps": 8674, "loss": 0.4964104890823364, "lr": 1.4692097159389649e-06, "epoch": 0.7548997002536315, "percentage": 37.74, "elapsed_time": "2:12:30", "remaining_time": "3:38:33"} +{"current_steps": 3275, "total_steps": 8674, "loss": 0.4744144082069397, "lr": 1.4688729950843033e-06, "epoch": 0.7551302743832142, "percentage": 37.76, "elapsed_time": "2:12:33", "remaining_time": "3:38:32"} +{"current_steps": 3276, "total_steps": 8674, "loss": 0.44675350189208984, "lr": 1.4685362060741997e-06, "epoch": 0.7553608485127968, "percentage": 37.77, "elapsed_time": "2:12:36", "remaining_time": "3:38:29"} +{"current_steps": 3277, "total_steps": 8674, "loss": 0.45261216163635254, "lr": 1.46819934895761e-06, "epoch": 0.7555914226423796, "percentage": 37.78, "elapsed_time": "2:12:38", "remaining_time": "3:38:27"} +{"current_steps": 3278, "total_steps": 8674, "loss": 0.4180977940559387, "lr": 1.4678624237835005e-06, "epoch": 0.7558219967719622, "percentage": 37.79, "elapsed_time": "2:12:40", "remaining_time": "3:38:24"} +{"current_steps": 3279, "total_steps": 8674, "loss": 0.39477843046188354, "lr": 1.4675254306008456e-06, "epoch": 0.7560525709015449, "percentage": 37.8, "elapsed_time": "2:12:43", "remaining_time": "3:38:21"} +{"current_steps": 3280, "total_steps": 8674, "loss": 0.5033801198005676, "lr": 1.467188369458631e-06, "epoch": 0.7562831450311275, "percentage": 37.81, "elapsed_time": "2:12:45", "remaining_time": "3:38:19"} +{"current_steps": 3281, "total_steps": 8674, "loss": 0.5719846487045288, "lr": 1.4668512404058527e-06, "epoch": 0.7565137191607102, "percentage": 37.83, "elapsed_time": "2:12:48", "remaining_time": "3:38:17"} +{"current_steps": 3282, "total_steps": 8674, "loss": 0.5198945999145508, "lr": 1.4665140434915147e-06, "epoch": 0.7567442932902928, "percentage": 37.84, "elapsed_time": "2:12:50", "remaining_time": "3:38:14"} +{"current_steps": 3283, "total_steps": 8674, "loss": 0.4641912579536438, "lr": 1.4661767787646326e-06, "epoch": 0.7569748674198755, "percentage": 37.85, "elapsed_time": "2:12:52", "remaining_time": "3:38:12"} +{"current_steps": 3284, "total_steps": 8674, "loss": 0.44070225954055786, "lr": 1.4658394462742309e-06, "epoch": 0.7572054415494581, "percentage": 37.86, "elapsed_time": "2:12:55", "remaining_time": "3:38:09"} +{"current_steps": 3285, "total_steps": 8674, "loss": 0.4324581027030945, "lr": 1.465502046069345e-06, "epoch": 0.7574360156790408, "percentage": 37.87, "elapsed_time": "2:12:57", "remaining_time": "3:38:07"} +{"current_steps": 3286, "total_steps": 8674, "loss": 0.5789060592651367, "lr": 1.4651645781990187e-06, "epoch": 0.7576665898086234, "percentage": 37.88, "elapsed_time": "2:13:00", "remaining_time": "3:38:04"} +{"current_steps": 3287, "total_steps": 8674, "loss": 0.45642149448394775, "lr": 1.4648270427123068e-06, "epoch": 0.7578971639382062, "percentage": 37.89, "elapsed_time": "2:13:02", "remaining_time": "3:38:02"} +{"current_steps": 3288, "total_steps": 8674, "loss": 0.4587763547897339, "lr": 1.4644894396582732e-06, "epoch": 0.7581277380677888, "percentage": 37.91, "elapsed_time": "2:13:04", "remaining_time": "3:37:59"} +{"current_steps": 3289, "total_steps": 8674, "loss": 0.5472866892814636, "lr": 1.4641517690859924e-06, "epoch": 0.7583583121973715, "percentage": 37.92, "elapsed_time": "2:13:07", "remaining_time": "3:37:57"} +{"current_steps": 3290, "total_steps": 8674, "loss": 0.5274207592010498, "lr": 1.4638140310445476e-06, "epoch": 0.7585888863269541, "percentage": 37.93, "elapsed_time": "2:13:09", "remaining_time": "3:37:54"} +{"current_steps": 3291, "total_steps": 8674, "loss": 0.46280741691589355, "lr": 1.4634762255830326e-06, "epoch": 0.7588194604565368, "percentage": 37.94, "elapsed_time": "2:13:12", "remaining_time": "3:37:52"} +{"current_steps": 3292, "total_steps": 8674, "loss": 0.5395090579986572, "lr": 1.4631383527505515e-06, "epoch": 0.7590500345861194, "percentage": 37.95, "elapsed_time": "2:13:14", "remaining_time": "3:37:49"} +{"current_steps": 3293, "total_steps": 8674, "loss": 0.49923229217529297, "lr": 1.4628004125962168e-06, "epoch": 0.7592806087157021, "percentage": 37.96, "elapsed_time": "2:13:16", "remaining_time": "3:37:47"} +{"current_steps": 3294, "total_steps": 8674, "loss": 0.5414037108421326, "lr": 1.462462405169152e-06, "epoch": 0.7595111828452847, "percentage": 37.98, "elapsed_time": "2:13:19", "remaining_time": "3:37:44"} +{"current_steps": 3295, "total_steps": 8674, "loss": 0.4246688485145569, "lr": 1.4621243305184895e-06, "epoch": 0.7597417569748675, "percentage": 37.99, "elapsed_time": "2:13:21", "remaining_time": "3:37:42"} +{"current_steps": 3296, "total_steps": 8674, "loss": 0.4997994005680084, "lr": 1.461786188693372e-06, "epoch": 0.7599723311044501, "percentage": 38.0, "elapsed_time": "2:13:23", "remaining_time": "3:37:39"} +{"current_steps": 3297, "total_steps": 8674, "loss": 0.4571123719215393, "lr": 1.4614479797429523e-06, "epoch": 0.7602029052340328, "percentage": 38.01, "elapsed_time": "2:13:26", "remaining_time": "3:37:37"} +{"current_steps": 3298, "total_steps": 8674, "loss": 0.5178083181381226, "lr": 1.4611097037163917e-06, "epoch": 0.7604334793636154, "percentage": 38.02, "elapsed_time": "2:13:28", "remaining_time": "3:37:35"} +{"current_steps": 3299, "total_steps": 8674, "loss": 0.538001298904419, "lr": 1.4607713606628625e-06, "epoch": 0.7606640534931981, "percentage": 38.03, "elapsed_time": "2:13:31", "remaining_time": "3:37:32"} +{"current_steps": 3300, "total_steps": 8674, "loss": 0.45941218733787537, "lr": 1.4604329506315464e-06, "epoch": 0.7608946276227807, "percentage": 38.04, "elapsed_time": "2:13:33", "remaining_time": "3:37:29"} +{"current_steps": 3301, "total_steps": 8674, "loss": 0.619648277759552, "lr": 1.4600944736716344e-06, "epoch": 0.7611252017523634, "percentage": 38.06, "elapsed_time": "2:13:37", "remaining_time": "3:37:29"} +{"current_steps": 3302, "total_steps": 8674, "loss": 0.4035170376300812, "lr": 1.4597559298323281e-06, "epoch": 0.761355775881946, "percentage": 38.07, "elapsed_time": "2:13:39", "remaining_time": "3:37:27"} +{"current_steps": 3303, "total_steps": 8674, "loss": 0.48657041788101196, "lr": 1.4594173191628374e-06, "epoch": 0.7615863500115287, "percentage": 38.08, "elapsed_time": "2:13:42", "remaining_time": "3:37:24"} +{"current_steps": 3304, "total_steps": 8674, "loss": 0.43324801325798035, "lr": 1.4590786417123838e-06, "epoch": 0.7618169241411114, "percentage": 38.09, "elapsed_time": "2:13:44", "remaining_time": "3:37:22"} +{"current_steps": 3305, "total_steps": 8674, "loss": 0.5020644664764404, "lr": 1.4587398975301968e-06, "epoch": 0.7620474982706941, "percentage": 38.1, "elapsed_time": "2:13:47", "remaining_time": "3:37:20"} +{"current_steps": 3306, "total_steps": 8674, "loss": 0.4123230576515198, "lr": 1.4584010866655163e-06, "epoch": 0.7622780724002767, "percentage": 38.11, "elapsed_time": "2:13:49", "remaining_time": "3:37:18"} +{"current_steps": 3307, "total_steps": 8674, "loss": 0.5110459327697754, "lr": 1.4580622091675925e-06, "epoch": 0.7625086465298594, "percentage": 38.13, "elapsed_time": "2:13:52", "remaining_time": "3:37:15"} +{"current_steps": 3308, "total_steps": 8674, "loss": 0.3956744074821472, "lr": 1.4577232650856842e-06, "epoch": 0.762739220659442, "percentage": 38.14, "elapsed_time": "2:13:54", "remaining_time": "3:37:12"} +{"current_steps": 3309, "total_steps": 8674, "loss": 0.44418880343437195, "lr": 1.4573842544690602e-06, "epoch": 0.7629697947890247, "percentage": 38.15, "elapsed_time": "2:13:56", "remaining_time": "3:37:10"} +{"current_steps": 3310, "total_steps": 8674, "loss": 0.46690821647644043, "lr": 1.4570451773669993e-06, "epoch": 0.7632003689186073, "percentage": 38.16, "elapsed_time": "2:13:59", "remaining_time": "3:37:07"} +{"current_steps": 3311, "total_steps": 8674, "loss": 0.5631324052810669, "lr": 1.45670603382879e-06, "epoch": 0.76343094304819, "percentage": 38.17, "elapsed_time": "2:14:01", "remaining_time": "3:37:05"} +{"current_steps": 3312, "total_steps": 8674, "loss": 0.42355209589004517, "lr": 1.4563668239037301e-06, "epoch": 0.7636615171777726, "percentage": 38.18, "elapsed_time": "2:14:04", "remaining_time": "3:37:02"} +{"current_steps": 3313, "total_steps": 8674, "loss": 0.4509078860282898, "lr": 1.4560275476411273e-06, "epoch": 0.7638920913073554, "percentage": 38.19, "elapsed_time": "2:14:06", "remaining_time": "3:37:00"} +{"current_steps": 3314, "total_steps": 8674, "loss": 0.48707491159439087, "lr": 1.4556882050902986e-06, "epoch": 0.764122665436938, "percentage": 38.21, "elapsed_time": "2:14:09", "remaining_time": "3:36:58"} +{"current_steps": 3315, "total_steps": 8674, "loss": 0.4768955707550049, "lr": 1.455348796300571e-06, "epoch": 0.7643532395665207, "percentage": 38.22, "elapsed_time": "2:14:11", "remaining_time": "3:36:55"} +{"current_steps": 3316, "total_steps": 8674, "loss": 0.44231370091438293, "lr": 1.4550093213212812e-06, "epoch": 0.7645838136961033, "percentage": 38.23, "elapsed_time": "2:14:13", "remaining_time": "3:36:53"} +{"current_steps": 3317, "total_steps": 8674, "loss": 0.41919445991516113, "lr": 1.4546697802017752e-06, "epoch": 0.764814387825686, "percentage": 38.24, "elapsed_time": "2:14:16", "remaining_time": "3:36:50"} +{"current_steps": 3318, "total_steps": 8674, "loss": 0.5004634857177734, "lr": 1.4543301729914086e-06, "epoch": 0.7650449619552686, "percentage": 38.25, "elapsed_time": "2:14:18", "remaining_time": "3:36:48"} +{"current_steps": 3319, "total_steps": 8674, "loss": 0.5327651500701904, "lr": 1.4539904997395467e-06, "epoch": 0.7652755360848513, "percentage": 38.26, "elapsed_time": "2:14:21", "remaining_time": "3:36:46"} +{"current_steps": 3320, "total_steps": 8674, "loss": 0.4571789801120758, "lr": 1.4536507604955647e-06, "epoch": 0.7655061102144339, "percentage": 38.28, "elapsed_time": "2:14:23", "remaining_time": "3:36:43"} +{"current_steps": 3321, "total_steps": 8674, "loss": 0.3989352583885193, "lr": 1.4533109553088474e-06, "epoch": 0.7657366843440166, "percentage": 38.29, "elapsed_time": "2:14:25", "remaining_time": "3:36:41"} +{"current_steps": 3322, "total_steps": 8674, "loss": 0.4661702513694763, "lr": 1.452971084228788e-06, "epoch": 0.7659672584735993, "percentage": 38.3, "elapsed_time": "2:14:28", "remaining_time": "3:36:38"} +{"current_steps": 3323, "total_steps": 8674, "loss": 0.5007051825523376, "lr": 1.4526311473047911e-06, "epoch": 0.766197832603182, "percentage": 38.31, "elapsed_time": "2:14:30", "remaining_time": "3:36:36"} +{"current_steps": 3324, "total_steps": 8674, "loss": 0.44391199946403503, "lr": 1.4522911445862697e-06, "epoch": 0.7664284067327646, "percentage": 38.32, "elapsed_time": "2:14:33", "remaining_time": "3:36:34"} +{"current_steps": 3325, "total_steps": 8674, "loss": 0.48606377840042114, "lr": 1.4519510761226466e-06, "epoch": 0.7666589808623473, "percentage": 38.33, "elapsed_time": "2:14:35", "remaining_time": "3:36:31"} +{"current_steps": 3326, "total_steps": 8674, "loss": 0.4831564426422119, "lr": 1.4516109419633543e-06, "epoch": 0.7668895549919299, "percentage": 38.34, "elapsed_time": "2:14:38", "remaining_time": "3:36:29"} +{"current_steps": 3327, "total_steps": 8674, "loss": 0.5033055543899536, "lr": 1.4512707421578344e-06, "epoch": 0.7671201291215126, "percentage": 38.36, "elapsed_time": "2:14:40", "remaining_time": "3:36:26"} +{"current_steps": 3328, "total_steps": 8674, "loss": 0.40440869331359863, "lr": 1.4509304767555385e-06, "epoch": 0.7673507032510952, "percentage": 38.37, "elapsed_time": "2:14:42", "remaining_time": "3:36:23"} +{"current_steps": 3329, "total_steps": 8674, "loss": 0.4281578063964844, "lr": 1.4505901458059282e-06, "epoch": 0.7675812773806779, "percentage": 38.38, "elapsed_time": "2:14:45", "remaining_time": "3:36:21"} +{"current_steps": 3330, "total_steps": 8674, "loss": 0.45301395654678345, "lr": 1.4502497493584735e-06, "epoch": 0.7678118515102605, "percentage": 38.39, "elapsed_time": "2:14:47", "remaining_time": "3:36:18"} +{"current_steps": 3331, "total_steps": 8674, "loss": 0.3971232771873474, "lr": 1.4499092874626545e-06, "epoch": 0.7680424256398433, "percentage": 38.4, "elapsed_time": "2:14:50", "remaining_time": "3:36:16"} +{"current_steps": 3332, "total_steps": 8674, "loss": 0.45382559299468994, "lr": 1.4495687601679607e-06, "epoch": 0.7682729997694259, "percentage": 38.41, "elapsed_time": "2:14:52", "remaining_time": "3:36:14"} +{"current_steps": 3333, "total_steps": 8674, "loss": 0.4101349711418152, "lr": 1.4492281675238916e-06, "epoch": 0.7685035738990086, "percentage": 38.43, "elapsed_time": "2:14:54", "remaining_time": "3:36:11"} +{"current_steps": 3334, "total_steps": 8674, "loss": 0.5322436690330505, "lr": 1.4488875095799555e-06, "epoch": 0.7687341480285912, "percentage": 38.44, "elapsed_time": "2:14:57", "remaining_time": "3:36:09"} +{"current_steps": 3335, "total_steps": 8674, "loss": 0.5497866272926331, "lr": 1.4485467863856703e-06, "epoch": 0.7689647221581739, "percentage": 38.45, "elapsed_time": "2:14:59", "remaining_time": "3:36:06"} +{"current_steps": 3336, "total_steps": 8674, "loss": 0.5088074207305908, "lr": 1.4482059979905642e-06, "epoch": 0.7691952962877565, "percentage": 38.46, "elapsed_time": "2:15:02", "remaining_time": "3:36:04"} +{"current_steps": 3337, "total_steps": 8674, "loss": 0.4444946050643921, "lr": 1.4478651444441736e-06, "epoch": 0.7694258704173391, "percentage": 38.47, "elapsed_time": "2:15:04", "remaining_time": "3:36:01"} +{"current_steps": 3338, "total_steps": 8674, "loss": 0.41257357597351074, "lr": 1.4475242257960454e-06, "epoch": 0.7696564445469218, "percentage": 38.48, "elapsed_time": "2:15:06", "remaining_time": "3:35:59"} +{"current_steps": 3339, "total_steps": 8674, "loss": 0.47933512926101685, "lr": 1.4471832420957356e-06, "epoch": 0.7698870186765044, "percentage": 38.49, "elapsed_time": "2:15:09", "remaining_time": "3:35:57"} +{"current_steps": 3340, "total_steps": 8674, "loss": 0.41331803798675537, "lr": 1.4468421933928093e-06, "epoch": 0.7701175928060872, "percentage": 38.51, "elapsed_time": "2:15:11", "remaining_time": "3:35:54"} +{"current_steps": 3341, "total_steps": 8674, "loss": 0.5047392845153809, "lr": 1.4465010797368416e-06, "epoch": 0.7703481669356698, "percentage": 38.52, "elapsed_time": "2:15:14", "remaining_time": "3:35:52"} +{"current_steps": 3342, "total_steps": 8674, "loss": 0.5265953540802002, "lr": 1.446159901177417e-06, "epoch": 0.7705787410652525, "percentage": 38.53, "elapsed_time": "2:15:16", "remaining_time": "3:35:49"} +{"current_steps": 3343, "total_steps": 8674, "loss": 0.48366689682006836, "lr": 1.4458186577641285e-06, "epoch": 0.7708093151948351, "percentage": 38.54, "elapsed_time": "2:15:18", "remaining_time": "3:35:47"} +{"current_steps": 3344, "total_steps": 8674, "loss": 0.4303058087825775, "lr": 1.4454773495465805e-06, "epoch": 0.7710398893244178, "percentage": 38.55, "elapsed_time": "2:15:21", "remaining_time": "3:35:44"} +{"current_steps": 3345, "total_steps": 8674, "loss": 0.44936758279800415, "lr": 1.4451359765743845e-06, "epoch": 0.7712704634540004, "percentage": 38.56, "elapsed_time": "2:15:23", "remaining_time": "3:35:41"} +{"current_steps": 3346, "total_steps": 8674, "loss": 0.37891095876693726, "lr": 1.4447945388971631e-06, "epoch": 0.7715010375835831, "percentage": 38.58, "elapsed_time": "2:15:26", "remaining_time": "3:35:39"} +{"current_steps": 3347, "total_steps": 8674, "loss": 0.4958759546279907, "lr": 1.4444530365645477e-06, "epoch": 0.7717316117131657, "percentage": 38.59, "elapsed_time": "2:15:28", "remaining_time": "3:35:37"} +{"current_steps": 3348, "total_steps": 8674, "loss": 0.5180525183677673, "lr": 1.4441114696261791e-06, "epoch": 0.7719621858427484, "percentage": 38.6, "elapsed_time": "2:15:31", "remaining_time": "3:35:35"} +{"current_steps": 3349, "total_steps": 8674, "loss": 0.4760133624076843, "lr": 1.4437698381317076e-06, "epoch": 0.772192759972331, "percentage": 38.61, "elapsed_time": "2:15:33", "remaining_time": "3:35:32"} +{"current_steps": 3350, "total_steps": 8674, "loss": 0.5095269680023193, "lr": 1.4434281421307923e-06, "epoch": 0.7724233341019138, "percentage": 38.62, "elapsed_time": "2:15:35", "remaining_time": "3:35:29"} +{"current_steps": 3351, "total_steps": 8674, "loss": 0.41132962703704834, "lr": 1.443086381673103e-06, "epoch": 0.7726539082314964, "percentage": 38.63, "elapsed_time": "2:15:38", "remaining_time": "3:35:27"} +{"current_steps": 3352, "total_steps": 8674, "loss": 0.5617398023605347, "lr": 1.442744556808317e-06, "epoch": 0.7728844823610791, "percentage": 38.64, "elapsed_time": "2:15:40", "remaining_time": "3:35:24"} +{"current_steps": 3353, "total_steps": 8674, "loss": 0.4421590566635132, "lr": 1.4424026675861229e-06, "epoch": 0.7731150564906617, "percentage": 38.66, "elapsed_time": "2:15:43", "remaining_time": "3:35:22"} +{"current_steps": 3354, "total_steps": 8674, "loss": 0.5533363223075867, "lr": 1.4420607140562175e-06, "epoch": 0.7733456306202444, "percentage": 38.67, "elapsed_time": "2:15:45", "remaining_time": "3:35:20"} +{"current_steps": 3355, "total_steps": 8674, "loss": 0.3703731298446655, "lr": 1.441718696268307e-06, "epoch": 0.773576204749827, "percentage": 38.68, "elapsed_time": "2:15:47", "remaining_time": "3:35:17"} +{"current_steps": 3356, "total_steps": 8674, "loss": 0.4078833758831024, "lr": 1.4413766142721074e-06, "epoch": 0.7738067788794097, "percentage": 38.69, "elapsed_time": "2:15:50", "remaining_time": "3:35:15"} +{"current_steps": 3357, "total_steps": 8674, "loss": 0.47297823429107666, "lr": 1.4410344681173436e-06, "epoch": 0.7740373530089923, "percentage": 38.7, "elapsed_time": "2:15:52", "remaining_time": "3:35:13"} +{"current_steps": 3358, "total_steps": 8674, "loss": 0.4586789309978485, "lr": 1.4406922578537501e-06, "epoch": 0.7742679271385751, "percentage": 38.71, "elapsed_time": "2:15:55", "remaining_time": "3:35:10"} +{"current_steps": 3359, "total_steps": 8674, "loss": 0.5284359455108643, "lr": 1.440349983531071e-06, "epoch": 0.7744985012681577, "percentage": 38.72, "elapsed_time": "2:15:57", "remaining_time": "3:35:08"} +{"current_steps": 3360, "total_steps": 8674, "loss": 0.47153323888778687, "lr": 1.4400076451990585e-06, "epoch": 0.7747290753977404, "percentage": 38.74, "elapsed_time": "2:16:00", "remaining_time": "3:35:05"} +{"current_steps": 3361, "total_steps": 8674, "loss": 0.3862396478652954, "lr": 1.4396652429074758e-06, "epoch": 0.774959649527323, "percentage": 38.75, "elapsed_time": "2:16:02", "remaining_time": "3:35:03"} +{"current_steps": 3362, "total_steps": 8674, "loss": 0.48918354511260986, "lr": 1.4393227767060938e-06, "epoch": 0.7751902236569057, "percentage": 38.76, "elapsed_time": "2:16:04", "remaining_time": "3:35:00"} +{"current_steps": 3363, "total_steps": 8674, "loss": 0.5541480779647827, "lr": 1.4389802466446942e-06, "epoch": 0.7754207977864883, "percentage": 38.77, "elapsed_time": "2:16:07", "remaining_time": "3:34:58"} +{"current_steps": 3364, "total_steps": 8674, "loss": 0.48972445726394653, "lr": 1.4386376527730665e-06, "epoch": 0.775651371916071, "percentage": 38.78, "elapsed_time": "2:16:09", "remaining_time": "3:34:56"} +{"current_steps": 3365, "total_steps": 8674, "loss": 0.5016083717346191, "lr": 1.4382949951410109e-06, "epoch": 0.7758819460456536, "percentage": 38.79, "elapsed_time": "2:16:12", "remaining_time": "3:34:53"} +{"current_steps": 3366, "total_steps": 8674, "loss": 0.40227651596069336, "lr": 1.4379522737983351e-06, "epoch": 0.7761125201752364, "percentage": 38.81, "elapsed_time": "2:16:14", "remaining_time": "3:34:51"} +{"current_steps": 3367, "total_steps": 8674, "loss": 0.42994722723960876, "lr": 1.4376094887948584e-06, "epoch": 0.776343094304819, "percentage": 38.82, "elapsed_time": "2:16:17", "remaining_time": "3:34:48"} +{"current_steps": 3368, "total_steps": 8674, "loss": 0.5087350010871887, "lr": 1.4372666401804073e-06, "epoch": 0.7765736684344017, "percentage": 38.83, "elapsed_time": "2:16:19", "remaining_time": "3:34:46"} +{"current_steps": 3369, "total_steps": 8674, "loss": 0.39419132471084595, "lr": 1.4369237280048186e-06, "epoch": 0.7768042425639843, "percentage": 38.84, "elapsed_time": "2:16:21", "remaining_time": "3:34:43"} +{"current_steps": 3370, "total_steps": 8674, "loss": 0.500682532787323, "lr": 1.4365807523179376e-06, "epoch": 0.777034816693567, "percentage": 38.85, "elapsed_time": "2:16:24", "remaining_time": "3:34:41"} +{"current_steps": 3371, "total_steps": 8674, "loss": 0.49243754148483276, "lr": 1.4362377131696198e-06, "epoch": 0.7772653908231496, "percentage": 38.86, "elapsed_time": "2:16:26", "remaining_time": "3:34:38"} +{"current_steps": 3372, "total_steps": 8674, "loss": 0.5479283332824707, "lr": 1.4358946106097295e-06, "epoch": 0.7774959649527323, "percentage": 38.87, "elapsed_time": "2:16:29", "remaining_time": "3:34:36"} +{"current_steps": 3373, "total_steps": 8674, "loss": 0.43217700719833374, "lr": 1.4355514446881396e-06, "epoch": 0.7777265390823149, "percentage": 38.89, "elapsed_time": "2:16:31", "remaining_time": "3:34:34"} +{"current_steps": 3374, "total_steps": 8674, "loss": 0.5351289510726929, "lr": 1.435208215454733e-06, "epoch": 0.7779571132118976, "percentage": 38.9, "elapsed_time": "2:16:34", "remaining_time": "3:34:31"} +{"current_steps": 3375, "total_steps": 8674, "loss": 0.45523375272750854, "lr": 1.4348649229594016e-06, "epoch": 0.7781876873414802, "percentage": 38.91, "elapsed_time": "2:16:36", "remaining_time": "3:34:29"} +{"current_steps": 3376, "total_steps": 8674, "loss": 0.49811118841171265, "lr": 1.4345215672520465e-06, "epoch": 0.778418261471063, "percentage": 38.92, "elapsed_time": "2:16:38", "remaining_time": "3:34:26"} +{"current_steps": 3377, "total_steps": 8674, "loss": 0.40621131658554077, "lr": 1.434178148382578e-06, "epoch": 0.7786488356006456, "percentage": 38.93, "elapsed_time": "2:16:41", "remaining_time": "3:34:24"} +{"current_steps": 3378, "total_steps": 8674, "loss": 0.43339842557907104, "lr": 1.4338346664009152e-06, "epoch": 0.7788794097302283, "percentage": 38.94, "elapsed_time": "2:16:43", "remaining_time": "3:34:21"} +{"current_steps": 3379, "total_steps": 8674, "loss": 0.4397253096103668, "lr": 1.433491121356987e-06, "epoch": 0.7791099838598109, "percentage": 38.96, "elapsed_time": "2:16:46", "remaining_time": "3:34:19"} +{"current_steps": 3380, "total_steps": 8674, "loss": 0.5146217942237854, "lr": 1.433147513300731e-06, "epoch": 0.7793405579893936, "percentage": 38.97, "elapsed_time": "2:16:48", "remaining_time": "3:34:16"} +{"current_steps": 3381, "total_steps": 8674, "loss": 0.46328768134117126, "lr": 1.432803842282094e-06, "epoch": 0.7795711321189762, "percentage": 38.98, "elapsed_time": "2:16:51", "remaining_time": "3:34:14"} +{"current_steps": 3382, "total_steps": 8674, "loss": 0.47743386030197144, "lr": 1.432460108351032e-06, "epoch": 0.7798017062485589, "percentage": 38.99, "elapsed_time": "2:16:53", "remaining_time": "3:34:12"} +{"current_steps": 3383, "total_steps": 8674, "loss": 0.467747300863266, "lr": 1.4321163115575105e-06, "epoch": 0.7800322803781415, "percentage": 39.0, "elapsed_time": "2:16:55", "remaining_time": "3:34:09"} +{"current_steps": 3384, "total_steps": 8674, "loss": 0.4269976019859314, "lr": 1.431772451951504e-06, "epoch": 0.7802628545077243, "percentage": 39.01, "elapsed_time": "2:16:58", "remaining_time": "3:34:07"} +{"current_steps": 3385, "total_steps": 8674, "loss": 0.5440881252288818, "lr": 1.4314285295829956e-06, "epoch": 0.7804934286373069, "percentage": 39.02, "elapsed_time": "2:17:00", "remaining_time": "3:34:04"} +{"current_steps": 3386, "total_steps": 8674, "loss": 0.42413994669914246, "lr": 1.431084544501978e-06, "epoch": 0.7807240027668896, "percentage": 39.04, "elapsed_time": "2:17:03", "remaining_time": "3:34:02"} +{"current_steps": 3387, "total_steps": 8674, "loss": 0.5563687086105347, "lr": 1.4307404967584528e-06, "epoch": 0.7809545768964722, "percentage": 39.05, "elapsed_time": "2:17:05", "remaining_time": "3:33:59"} +{"current_steps": 3388, "total_steps": 8674, "loss": 0.4822027087211609, "lr": 1.4303963864024314e-06, "epoch": 0.7811851510260549, "percentage": 39.06, "elapsed_time": "2:17:07", "remaining_time": "3:33:57"} +{"current_steps": 3389, "total_steps": 8674, "loss": 0.5267205834388733, "lr": 1.430052213483933e-06, "epoch": 0.7814157251556375, "percentage": 39.07, "elapsed_time": "2:17:10", "remaining_time": "3:33:55"} +{"current_steps": 3390, "total_steps": 8674, "loss": 0.49257054924964905, "lr": 1.4297079780529868e-06, "epoch": 0.7816462992852202, "percentage": 39.08, "elapsed_time": "2:17:12", "remaining_time": "3:33:52"} +{"current_steps": 3391, "total_steps": 8674, "loss": 0.45225608348846436, "lr": 1.4293636801596314e-06, "epoch": 0.7818768734148028, "percentage": 39.09, "elapsed_time": "2:17:15", "remaining_time": "3:33:50"} +{"current_steps": 3392, "total_steps": 8674, "loss": 0.4891412854194641, "lr": 1.4290193198539133e-06, "epoch": 0.7821074475443855, "percentage": 39.11, "elapsed_time": "2:17:17", "remaining_time": "3:33:47"} +{"current_steps": 3393, "total_steps": 8674, "loss": 0.4411062002182007, "lr": 1.4286748971858893e-06, "epoch": 0.7823380216739682, "percentage": 39.12, "elapsed_time": "2:17:20", "remaining_time": "3:33:45"} +{"current_steps": 3394, "total_steps": 8674, "loss": 0.4584164619445801, "lr": 1.4283304122056242e-06, "epoch": 0.7825685958035509, "percentage": 39.13, "elapsed_time": "2:17:22", "remaining_time": "3:33:42"} +{"current_steps": 3395, "total_steps": 8674, "loss": 0.46913737058639526, "lr": 1.4279858649631928e-06, "epoch": 0.7827991699331335, "percentage": 39.14, "elapsed_time": "2:17:24", "remaining_time": "3:33:40"} +{"current_steps": 3396, "total_steps": 8674, "loss": 0.40582767128944397, "lr": 1.4276412555086786e-06, "epoch": 0.7830297440627162, "percentage": 39.15, "elapsed_time": "2:17:27", "remaining_time": "3:33:37"} +{"current_steps": 3397, "total_steps": 8674, "loss": 0.5089453458786011, "lr": 1.4272965838921737e-06, "epoch": 0.7832603181922988, "percentage": 39.16, "elapsed_time": "2:17:29", "remaining_time": "3:33:35"} +{"current_steps": 3398, "total_steps": 8674, "loss": 0.4744444489479065, "lr": 1.4269518501637798e-06, "epoch": 0.7834908923218815, "percentage": 39.17, "elapsed_time": "2:17:32", "remaining_time": "3:33:33"} +{"current_steps": 3399, "total_steps": 8674, "loss": 0.49168163537979126, "lr": 1.426607054373608e-06, "epoch": 0.7837214664514641, "percentage": 39.19, "elapsed_time": "2:17:34", "remaining_time": "3:33:30"} +{"current_steps": 3400, "total_steps": 8674, "loss": 0.4423940181732178, "lr": 1.4262621965717768e-06, "epoch": 0.7839520405810468, "percentage": 39.2, "elapsed_time": "2:17:37", "remaining_time": "3:33:28"} +{"current_steps": 3401, "total_steps": 8674, "loss": 0.5138403177261353, "lr": 1.4259172768084152e-06, "epoch": 0.7841826147106294, "percentage": 39.21, "elapsed_time": "2:17:41", "remaining_time": "3:33:28"} +{"current_steps": 3402, "total_steps": 8674, "loss": 0.5248140096664429, "lr": 1.425572295133661e-06, "epoch": 0.7844131888402122, "percentage": 39.22, "elapsed_time": "2:17:43", "remaining_time": "3:33:25"} +{"current_steps": 3403, "total_steps": 8674, "loss": 0.39161059260368347, "lr": 1.4252272515976607e-06, "epoch": 0.7846437629697948, "percentage": 39.23, "elapsed_time": "2:17:45", "remaining_time": "3:33:23"} +{"current_steps": 3404, "total_steps": 8674, "loss": 0.46826744079589844, "lr": 1.4248821462505699e-06, "epoch": 0.7848743370993775, "percentage": 39.24, "elapsed_time": "2:17:48", "remaining_time": "3:33:21"} +{"current_steps": 3405, "total_steps": 8674, "loss": 0.4329161047935486, "lr": 1.424536979142553e-06, "epoch": 0.7851049112289601, "percentage": 39.26, "elapsed_time": "2:17:50", "remaining_time": "3:33:18"} +{"current_steps": 3406, "total_steps": 8674, "loss": 0.4691393971443176, "lr": 1.4241917503237834e-06, "epoch": 0.7853354853585428, "percentage": 39.27, "elapsed_time": "2:17:53", "remaining_time": "3:33:16"} +{"current_steps": 3407, "total_steps": 8674, "loss": 0.5130072236061096, "lr": 1.423846459844444e-06, "epoch": 0.7855660594881254, "percentage": 39.28, "elapsed_time": "2:17:55", "remaining_time": "3:33:14"} +{"current_steps": 3408, "total_steps": 8674, "loss": 0.37478166818618774, "lr": 1.4235011077547264e-06, "epoch": 0.7857966336177081, "percentage": 39.29, "elapsed_time": "2:17:58", "remaining_time": "3:33:11"} +{"current_steps": 3409, "total_steps": 8674, "loss": 0.46112769842147827, "lr": 1.4231556941048307e-06, "epoch": 0.7860272077472907, "percentage": 39.3, "elapsed_time": "2:18:00", "remaining_time": "3:33:09"} +{"current_steps": 3410, "total_steps": 8674, "loss": 0.5095282793045044, "lr": 1.422810218944966e-06, "epoch": 0.7862577818768735, "percentage": 39.31, "elapsed_time": "2:18:03", "remaining_time": "3:33:06"} +{"current_steps": 3411, "total_steps": 8674, "loss": 0.4182342290878296, "lr": 1.422464682325351e-06, "epoch": 0.7864883560064561, "percentage": 39.32, "elapsed_time": "2:18:05", "remaining_time": "3:33:04"} +{"current_steps": 3412, "total_steps": 8674, "loss": 0.3892830014228821, "lr": 1.422119084296213e-06, "epoch": 0.7867189301360388, "percentage": 39.34, "elapsed_time": "2:18:07", "remaining_time": "3:33:01"} +{"current_steps": 3413, "total_steps": 8674, "loss": 0.5294528603553772, "lr": 1.4217734249077877e-06, "epoch": 0.7869495042656214, "percentage": 39.35, "elapsed_time": "2:18:10", "remaining_time": "3:32:59"} +{"current_steps": 3414, "total_steps": 8674, "loss": 0.471803218126297, "lr": 1.4214277042103208e-06, "epoch": 0.7871800783952041, "percentage": 39.36, "elapsed_time": "2:18:12", "remaining_time": "3:32:56"} +{"current_steps": 3415, "total_steps": 8674, "loss": 0.4363842010498047, "lr": 1.4210819222540662e-06, "epoch": 0.7874106525247867, "percentage": 39.37, "elapsed_time": "2:18:15", "remaining_time": "3:32:54"} +{"current_steps": 3416, "total_steps": 8674, "loss": 0.3834928870201111, "lr": 1.4207360790892867e-06, "epoch": 0.7876412266543694, "percentage": 39.38, "elapsed_time": "2:18:17", "remaining_time": "3:32:52"} +{"current_steps": 3417, "total_steps": 8674, "loss": 0.4639194905757904, "lr": 1.4203901747662539e-06, "epoch": 0.787871800783952, "percentage": 39.39, "elapsed_time": "2:18:20", "remaining_time": "3:32:49"} +{"current_steps": 3418, "total_steps": 8674, "loss": 0.47130632400512695, "lr": 1.4200442093352486e-06, "epoch": 0.7881023749135347, "percentage": 39.41, "elapsed_time": "2:18:22", "remaining_time": "3:32:46"} +{"current_steps": 3419, "total_steps": 8674, "loss": 0.4848192632198334, "lr": 1.4196981828465606e-06, "epoch": 0.7883329490431173, "percentage": 39.42, "elapsed_time": "2:18:24", "remaining_time": "3:32:44"} +{"current_steps": 3420, "total_steps": 8674, "loss": 0.5137286186218262, "lr": 1.4193520953504884e-06, "epoch": 0.7885635231727001, "percentage": 39.43, "elapsed_time": "2:18:27", "remaining_time": "3:32:41"} +{"current_steps": 3421, "total_steps": 8674, "loss": 0.47639960050582886, "lr": 1.4190059468973385e-06, "epoch": 0.7887940973022827, "percentage": 39.44, "elapsed_time": "2:18:29", "remaining_time": "3:32:39"} +{"current_steps": 3422, "total_steps": 8674, "loss": 0.4300975799560547, "lr": 1.418659737537428e-06, "epoch": 0.7890246714318654, "percentage": 39.45, "elapsed_time": "2:18:31", "remaining_time": "3:32:36"} +{"current_steps": 3423, "total_steps": 8674, "loss": 0.5669160485267639, "lr": 1.4183134673210817e-06, "epoch": 0.789255245561448, "percentage": 39.46, "elapsed_time": "2:18:34", "remaining_time": "3:32:34"} +{"current_steps": 3424, "total_steps": 8674, "loss": 0.4113837480545044, "lr": 1.4179671362986336e-06, "epoch": 0.7894858196910307, "percentage": 39.47, "elapsed_time": "2:18:36", "remaining_time": "3:32:32"} +{"current_steps": 3425, "total_steps": 8674, "loss": 0.4992315173149109, "lr": 1.417620744520426e-06, "epoch": 0.7897163938206133, "percentage": 39.49, "elapsed_time": "2:18:39", "remaining_time": "3:32:29"} +{"current_steps": 3426, "total_steps": 8674, "loss": 0.5556696653366089, "lr": 1.417274292036811e-06, "epoch": 0.789946967950196, "percentage": 39.5, "elapsed_time": "2:18:41", "remaining_time": "3:32:27"} +{"current_steps": 3427, "total_steps": 8674, "loss": 0.47911009192466736, "lr": 1.4169277788981485e-06, "epoch": 0.7901775420797786, "percentage": 39.51, "elapsed_time": "2:18:44", "remaining_time": "3:32:24"} +{"current_steps": 3428, "total_steps": 8674, "loss": 0.45395466685295105, "lr": 1.416581205154808e-06, "epoch": 0.7904081162093614, "percentage": 39.52, "elapsed_time": "2:18:46", "remaining_time": "3:32:22"} +{"current_steps": 3429, "total_steps": 8674, "loss": 0.4404561519622803, "lr": 1.4162345708571674e-06, "epoch": 0.790638690338944, "percentage": 39.53, "elapsed_time": "2:18:48", "remaining_time": "3:32:19"} +{"current_steps": 3430, "total_steps": 8674, "loss": 0.5541578531265259, "lr": 1.4158878760556136e-06, "epoch": 0.7908692644685267, "percentage": 39.54, "elapsed_time": "2:18:51", "remaining_time": "3:32:17"} +{"current_steps": 3431, "total_steps": 8674, "loss": 0.5517834424972534, "lr": 1.4155411208005422e-06, "epoch": 0.7910998385981093, "percentage": 39.55, "elapsed_time": "2:18:53", "remaining_time": "3:32:15"} +{"current_steps": 3432, "total_steps": 8674, "loss": 0.42650169134140015, "lr": 1.4151943051423574e-06, "epoch": 0.791330412727692, "percentage": 39.57, "elapsed_time": "2:18:56", "remaining_time": "3:32:12"} +{"current_steps": 3433, "total_steps": 8674, "loss": 0.42724043130874634, "lr": 1.414847429131472e-06, "epoch": 0.7915609868572746, "percentage": 39.58, "elapsed_time": "2:18:58", "remaining_time": "3:32:10"} +{"current_steps": 3434, "total_steps": 8674, "loss": 0.41757941246032715, "lr": 1.414500492818309e-06, "epoch": 0.7917915609868573, "percentage": 39.59, "elapsed_time": "2:19:01", "remaining_time": "3:32:07"} +{"current_steps": 3435, "total_steps": 8674, "loss": 0.47318267822265625, "lr": 1.4141534962532984e-06, "epoch": 0.7920221351164399, "percentage": 39.6, "elapsed_time": "2:19:03", "remaining_time": "3:32:05"} +{"current_steps": 3436, "total_steps": 8674, "loss": 0.5540967583656311, "lr": 1.41380643948688e-06, "epoch": 0.7922527092460226, "percentage": 39.61, "elapsed_time": "2:19:05", "remaining_time": "3:32:02"} +{"current_steps": 3437, "total_steps": 8674, "loss": 0.4459697902202606, "lr": 1.4134593225695013e-06, "epoch": 0.7924832833756053, "percentage": 39.62, "elapsed_time": "2:19:08", "remaining_time": "3:32:00"} +{"current_steps": 3438, "total_steps": 8674, "loss": 0.5263698101043701, "lr": 1.41311214555162e-06, "epoch": 0.792713857505188, "percentage": 39.64, "elapsed_time": "2:19:10", "remaining_time": "3:31:57"} +{"current_steps": 3439, "total_steps": 8674, "loss": 0.40453940629959106, "lr": 1.4127649084837016e-06, "epoch": 0.7929444316347706, "percentage": 39.65, "elapsed_time": "2:19:12", "remaining_time": "3:31:55"} +{"current_steps": 3440, "total_steps": 8674, "loss": 0.3859207034111023, "lr": 1.412417611416221e-06, "epoch": 0.7931750057643533, "percentage": 39.66, "elapsed_time": "2:19:15", "remaining_time": "3:31:52"} +{"current_steps": 3441, "total_steps": 8674, "loss": 0.4604511260986328, "lr": 1.4120702543996603e-06, "epoch": 0.7934055798939359, "percentage": 39.67, "elapsed_time": "2:19:17", "remaining_time": "3:31:50"} +{"current_steps": 3442, "total_steps": 8674, "loss": 0.40292084217071533, "lr": 1.411722837484512e-06, "epoch": 0.7936361540235186, "percentage": 39.68, "elapsed_time": "2:19:20", "remaining_time": "3:31:47"} +{"current_steps": 3443, "total_steps": 8674, "loss": 0.40447625517845154, "lr": 1.4113753607212766e-06, "epoch": 0.7938667281531012, "percentage": 39.69, "elapsed_time": "2:19:22", "remaining_time": "3:31:45"} +{"current_steps": 3444, "total_steps": 8674, "loss": 0.48472997546195984, "lr": 1.4110278241604635e-06, "epoch": 0.7940973022826839, "percentage": 39.7, "elapsed_time": "2:19:25", "remaining_time": "3:31:43"} +{"current_steps": 3445, "total_steps": 8674, "loss": 0.5404670238494873, "lr": 1.4106802278525902e-06, "epoch": 0.7943278764122665, "percentage": 39.72, "elapsed_time": "2:19:27", "remaining_time": "3:31:40"} +{"current_steps": 3446, "total_steps": 8674, "loss": 0.5885064005851746, "lr": 1.4103325718481838e-06, "epoch": 0.7945584505418493, "percentage": 39.73, "elapsed_time": "2:19:29", "remaining_time": "3:31:38"} +{"current_steps": 3447, "total_steps": 8674, "loss": 0.47806939482688904, "lr": 1.4099848561977794e-06, "epoch": 0.7947890246714319, "percentage": 39.74, "elapsed_time": "2:19:32", "remaining_time": "3:31:35"} +{"current_steps": 3448, "total_steps": 8674, "loss": 0.4247834086418152, "lr": 1.4096370809519213e-06, "epoch": 0.7950195988010145, "percentage": 39.75, "elapsed_time": "2:19:34", "remaining_time": "3:31:33"} +{"current_steps": 3449, "total_steps": 8674, "loss": 0.508902370929718, "lr": 1.409289246161162e-06, "epoch": 0.7952501729305972, "percentage": 39.76, "elapsed_time": "2:19:37", "remaining_time": "3:31:31"} +{"current_steps": 3450, "total_steps": 8674, "loss": 0.4866124987602234, "lr": 1.4089413518760626e-06, "epoch": 0.7954807470601798, "percentage": 39.77, "elapsed_time": "2:19:39", "remaining_time": "3:31:28"} +{"current_steps": 3451, "total_steps": 8674, "loss": 0.5168731212615967, "lr": 1.408593398147193e-06, "epoch": 0.7957113211897625, "percentage": 39.79, "elapsed_time": "2:19:42", "remaining_time": "3:31:26"} +{"current_steps": 3452, "total_steps": 8674, "loss": 0.5039271712303162, "lr": 1.4082453850251326e-06, "epoch": 0.7959418953193451, "percentage": 39.8, "elapsed_time": "2:19:44", "remaining_time": "3:31:23"} +{"current_steps": 3453, "total_steps": 8674, "loss": 0.3660929799079895, "lr": 1.4078973125604674e-06, "epoch": 0.7961724694489278, "percentage": 39.81, "elapsed_time": "2:19:46", "remaining_time": "3:31:21"} +{"current_steps": 3454, "total_steps": 8674, "loss": 0.514503538608551, "lr": 1.407549180803794e-06, "epoch": 0.7964030435785104, "percentage": 39.82, "elapsed_time": "2:19:49", "remaining_time": "3:31:18"} +{"current_steps": 3455, "total_steps": 8674, "loss": 0.4803028702735901, "lr": 1.4072009898057172e-06, "epoch": 0.7966336177080932, "percentage": 39.83, "elapsed_time": "2:19:51", "remaining_time": "3:31:15"} +{"current_steps": 3456, "total_steps": 8674, "loss": 0.43116262555122375, "lr": 1.4068527396168492e-06, "epoch": 0.7968641918376758, "percentage": 39.84, "elapsed_time": "2:19:54", "remaining_time": "3:31:13"} +{"current_steps": 3457, "total_steps": 8674, "loss": 0.5009680986404419, "lr": 1.4065044302878125e-06, "epoch": 0.7970947659672585, "percentage": 39.85, "elapsed_time": "2:19:56", "remaining_time": "3:31:11"} +{"current_steps": 3458, "total_steps": 8674, "loss": 0.4047713875770569, "lr": 1.406156061869237e-06, "epoch": 0.7973253400968411, "percentage": 39.87, "elapsed_time": "2:19:58", "remaining_time": "3:31:08"} +{"current_steps": 3459, "total_steps": 8674, "loss": 0.5287230014801025, "lr": 1.4058076344117615e-06, "epoch": 0.7975559142264238, "percentage": 39.88, "elapsed_time": "2:20:01", "remaining_time": "3:31:06"} +{"current_steps": 3460, "total_steps": 8674, "loss": 0.5602750778198242, "lr": 1.4054591479660335e-06, "epoch": 0.7977864883560064, "percentage": 39.89, "elapsed_time": "2:20:03", "remaining_time": "3:31:03"} +{"current_steps": 3461, "total_steps": 8674, "loss": 0.4178144335746765, "lr": 1.4051106025827096e-06, "epoch": 0.7980170624855891, "percentage": 39.9, "elapsed_time": "2:20:06", "remaining_time": "3:31:01"} +{"current_steps": 3462, "total_steps": 8674, "loss": 0.5061960220336914, "lr": 1.4047619983124536e-06, "epoch": 0.7982476366151717, "percentage": 39.91, "elapsed_time": "2:20:08", "remaining_time": "3:30:58"} +{"current_steps": 3463, "total_steps": 8674, "loss": 0.5091691017150879, "lr": 1.4044133352059392e-06, "epoch": 0.7984782107447544, "percentage": 39.92, "elapsed_time": "2:20:10", "remaining_time": "3:30:56"} +{"current_steps": 3464, "total_steps": 8674, "loss": 0.5100894570350647, "lr": 1.4040646133138478e-06, "epoch": 0.798708784874337, "percentage": 39.94, "elapsed_time": "2:20:13", "remaining_time": "3:30:53"} +{"current_steps": 3465, "total_steps": 8674, "loss": 0.47493505477905273, "lr": 1.4037158326868697e-06, "epoch": 0.7989393590039198, "percentage": 39.95, "elapsed_time": "2:20:15", "remaining_time": "3:30:51"} +{"current_steps": 3466, "total_steps": 8674, "loss": 0.5561350584030151, "lr": 1.4033669933757038e-06, "epoch": 0.7991699331335024, "percentage": 39.96, "elapsed_time": "2:20:18", "remaining_time": "3:30:49"} +{"current_steps": 3467, "total_steps": 8674, "loss": 0.44552814960479736, "lr": 1.4030180954310574e-06, "epoch": 0.7994005072630851, "percentage": 39.97, "elapsed_time": "2:20:20", "remaining_time": "3:30:46"} +{"current_steps": 3468, "total_steps": 8674, "loss": 0.4624238908290863, "lr": 1.4026691389036465e-06, "epoch": 0.7996310813926677, "percentage": 39.98, "elapsed_time": "2:20:22", "remaining_time": "3:30:44"} +{"current_steps": 3469, "total_steps": 8674, "loss": 0.5424448251724243, "lr": 1.4023201238441951e-06, "epoch": 0.7998616555222504, "percentage": 39.99, "elapsed_time": "2:20:25", "remaining_time": "3:30:41"} +{"current_steps": 3470, "total_steps": 8674, "loss": 0.4629395008087158, "lr": 1.4019710503034367e-06, "epoch": 0.800092229651833, "percentage": 40.0, "elapsed_time": "2:20:27", "remaining_time": "3:30:39"} +{"current_steps": 3471, "total_steps": 8674, "loss": 0.4375717043876648, "lr": 1.401621918332112e-06, "epoch": 0.8003228037814157, "percentage": 40.02, "elapsed_time": "2:20:30", "remaining_time": "3:30:36"} +{"current_steps": 3472, "total_steps": 8674, "loss": 0.4419640302658081, "lr": 1.401272727980971e-06, "epoch": 0.8005533779109983, "percentage": 40.03, "elapsed_time": "2:20:32", "remaining_time": "3:30:34"} +{"current_steps": 3473, "total_steps": 8674, "loss": 0.42077577114105225, "lr": 1.4009234793007724e-06, "epoch": 0.8007839520405811, "percentage": 40.04, "elapsed_time": "2:20:35", "remaining_time": "3:30:32"} +{"current_steps": 3474, "total_steps": 8674, "loss": 0.3735182583332062, "lr": 1.400574172342283e-06, "epoch": 0.8010145261701637, "percentage": 40.05, "elapsed_time": "2:20:37", "remaining_time": "3:30:29"} +{"current_steps": 3475, "total_steps": 8674, "loss": 0.4263458251953125, "lr": 1.4002248071562778e-06, "epoch": 0.8012451002997464, "percentage": 40.06, "elapsed_time": "2:20:40", "remaining_time": "3:30:27"} +{"current_steps": 3476, "total_steps": 8674, "loss": 0.42377904057502747, "lr": 1.3998753837935406e-06, "epoch": 0.801475674429329, "percentage": 40.07, "elapsed_time": "2:20:42", "remaining_time": "3:30:24"} +{"current_steps": 3477, "total_steps": 8674, "loss": 0.5017589330673218, "lr": 1.399525902304864e-06, "epoch": 0.8017062485589117, "percentage": 40.09, "elapsed_time": "2:20:44", "remaining_time": "3:30:22"} +{"current_steps": 3478, "total_steps": 8674, "loss": 0.41022592782974243, "lr": 1.3991763627410485e-06, "epoch": 0.8019368226884943, "percentage": 40.1, "elapsed_time": "2:20:47", "remaining_time": "3:30:19"} +{"current_steps": 3479, "total_steps": 8674, "loss": 0.49957793951034546, "lr": 1.3988267651529028e-06, "epoch": 0.802167396818077, "percentage": 40.11, "elapsed_time": "2:20:49", "remaining_time": "3:30:17"} +{"current_steps": 3480, "total_steps": 8674, "loss": 0.5065722465515137, "lr": 1.398477109591245e-06, "epoch": 0.8023979709476596, "percentage": 40.12, "elapsed_time": "2:20:51", "remaining_time": "3:30:14"} +{"current_steps": 3481, "total_steps": 8674, "loss": 0.4353798031806946, "lr": 1.398127396106901e-06, "epoch": 0.8026285450772424, "percentage": 40.13, "elapsed_time": "2:20:54", "remaining_time": "3:30:12"} +{"current_steps": 3482, "total_steps": 8674, "loss": 0.41438236832618713, "lr": 1.3977776247507049e-06, "epoch": 0.802859119206825, "percentage": 40.14, "elapsed_time": "2:20:56", "remaining_time": "3:30:10"} +{"current_steps": 3483, "total_steps": 8674, "loss": 0.4348248839378357, "lr": 1.3974277955734996e-06, "epoch": 0.8030896933364077, "percentage": 40.15, "elapsed_time": "2:20:59", "remaining_time": "3:30:07"} +{"current_steps": 3484, "total_steps": 8674, "loss": 0.49369150400161743, "lr": 1.3970779086261363e-06, "epoch": 0.8033202674659903, "percentage": 40.17, "elapsed_time": "2:21:01", "remaining_time": "3:30:05"} +{"current_steps": 3485, "total_steps": 8674, "loss": 0.5694580078125, "lr": 1.396727963959475e-06, "epoch": 0.803550841595573, "percentage": 40.18, "elapsed_time": "2:21:04", "remaining_time": "3:30:02"} +{"current_steps": 3486, "total_steps": 8674, "loss": 0.5357070565223694, "lr": 1.3963779616243834e-06, "epoch": 0.8037814157251556, "percentage": 40.19, "elapsed_time": "2:21:06", "remaining_time": "3:30:00"} +{"current_steps": 3487, "total_steps": 8674, "loss": 0.41300907731056213, "lr": 1.3960279016717377e-06, "epoch": 0.8040119898547383, "percentage": 40.2, "elapsed_time": "2:21:09", "remaining_time": "3:29:57"} +{"current_steps": 3488, "total_steps": 8674, "loss": 0.5058030486106873, "lr": 1.395677784152423e-06, "epoch": 0.8042425639843209, "percentage": 40.21, "elapsed_time": "2:21:11", "remaining_time": "3:29:55"} +{"current_steps": 3489, "total_steps": 8674, "loss": 0.5225522518157959, "lr": 1.3953276091173326e-06, "epoch": 0.8044731381139036, "percentage": 40.22, "elapsed_time": "2:21:13", "remaining_time": "3:29:53"} +{"current_steps": 3490, "total_steps": 8674, "loss": 0.43893736600875854, "lr": 1.3949773766173675e-06, "epoch": 0.8047037122434862, "percentage": 40.24, "elapsed_time": "2:21:16", "remaining_time": "3:29:50"} +{"current_steps": 3491, "total_steps": 8674, "loss": 0.4583659768104553, "lr": 1.3946270867034375e-06, "epoch": 0.804934286373069, "percentage": 40.25, "elapsed_time": "2:21:18", "remaining_time": "3:29:48"} +{"current_steps": 3492, "total_steps": 8674, "loss": 0.49550747871398926, "lr": 1.394276739426461e-06, "epoch": 0.8051648605026516, "percentage": 40.26, "elapsed_time": "2:21:21", "remaining_time": "3:29:45"} +{"current_steps": 3493, "total_steps": 8674, "loss": 0.5637674331665039, "lr": 1.3939263348373648e-06, "epoch": 0.8053954346322343, "percentage": 40.27, "elapsed_time": "2:21:23", "remaining_time": "3:29:43"} +{"current_steps": 3494, "total_steps": 8674, "loss": 0.4853670299053192, "lr": 1.3935758729870835e-06, "epoch": 0.8056260087618169, "percentage": 40.28, "elapsed_time": "2:21:26", "remaining_time": "3:29:41"} +{"current_steps": 3495, "total_steps": 8674, "loss": 0.4535500407218933, "lr": 1.3932253539265603e-06, "epoch": 0.8058565828913996, "percentage": 40.29, "elapsed_time": "2:21:28", "remaining_time": "3:29:38"} +{"current_steps": 3496, "total_steps": 8674, "loss": 0.4198870062828064, "lr": 1.3928747777067464e-06, "epoch": 0.8060871570209822, "percentage": 40.3, "elapsed_time": "2:21:30", "remaining_time": "3:29:35"} +{"current_steps": 3497, "total_steps": 8674, "loss": 0.45773670077323914, "lr": 1.392524144378602e-06, "epoch": 0.8063177311505649, "percentage": 40.32, "elapsed_time": "2:21:33", "remaining_time": "3:29:33"} +{"current_steps": 3498, "total_steps": 8674, "loss": 0.45263248682022095, "lr": 1.3921734539930952e-06, "epoch": 0.8065483052801475, "percentage": 40.33, "elapsed_time": "2:21:35", "remaining_time": "3:29:31"} +{"current_steps": 3499, "total_steps": 8674, "loss": 0.473066508769989, "lr": 1.3918227066012025e-06, "epoch": 0.8067788794097303, "percentage": 40.34, "elapsed_time": "2:21:38", "remaining_time": "3:29:28"} +{"current_steps": 3500, "total_steps": 8674, "loss": 0.35737159848213196, "lr": 1.3914719022539082e-06, "epoch": 0.8070094535393129, "percentage": 40.35, "elapsed_time": "2:21:40", "remaining_time": "3:29:26"} +{"current_steps": 3501, "total_steps": 8674, "loss": 0.5162703394889832, "lr": 1.3911210410022054e-06, "epoch": 0.8072400276688956, "percentage": 40.36, "elapsed_time": "2:21:44", "remaining_time": "3:29:25"} +{"current_steps": 3502, "total_steps": 8674, "loss": 0.5347551703453064, "lr": 1.3907701228970955e-06, "epoch": 0.8074706017984782, "percentage": 40.37, "elapsed_time": "2:21:46", "remaining_time": "3:29:23"} +{"current_steps": 3503, "total_steps": 8674, "loss": 0.4889448881149292, "lr": 1.390419147989588e-06, "epoch": 0.8077011759280609, "percentage": 40.39, "elapsed_time": "2:21:49", "remaining_time": "3:29:20"} +{"current_steps": 3504, "total_steps": 8674, "loss": 0.47468650341033936, "lr": 1.3900681163306999e-06, "epoch": 0.8079317500576435, "percentage": 40.4, "elapsed_time": "2:21:51", "remaining_time": "3:29:18"} +{"current_steps": 3505, "total_steps": 8674, "loss": 0.43236857652664185, "lr": 1.3897170279714585e-06, "epoch": 0.8081623241872262, "percentage": 40.41, "elapsed_time": "2:21:53", "remaining_time": "3:29:16"} +{"current_steps": 3506, "total_steps": 8674, "loss": 0.46778976917266846, "lr": 1.3893658829628974e-06, "epoch": 0.8083928983168088, "percentage": 40.42, "elapsed_time": "2:21:56", "remaining_time": "3:29:13"} +{"current_steps": 3507, "total_steps": 8674, "loss": 0.49447667598724365, "lr": 1.389014681356059e-06, "epoch": 0.8086234724463915, "percentage": 40.43, "elapsed_time": "2:21:59", "remaining_time": "3:29:11"} +{"current_steps": 3508, "total_steps": 8674, "loss": 0.5221220254898071, "lr": 1.388663423201994e-06, "epoch": 0.8088540465759742, "percentage": 40.44, "elapsed_time": "2:22:01", "remaining_time": "3:29:09"} +{"current_steps": 3509, "total_steps": 8674, "loss": 0.5037325620651245, "lr": 1.3883121085517615e-06, "epoch": 0.8090846207055569, "percentage": 40.45, "elapsed_time": "2:22:03", "remaining_time": "3:29:06"} +{"current_steps": 3510, "total_steps": 8674, "loss": 0.46879589557647705, "lr": 1.387960737456429e-06, "epoch": 0.8093151948351395, "percentage": 40.47, "elapsed_time": "2:22:06", "remaining_time": "3:29:04"} +{"current_steps": 3511, "total_steps": 8674, "loss": 0.44216716289520264, "lr": 1.387609309967071e-06, "epoch": 0.8095457689647222, "percentage": 40.48, "elapsed_time": "2:22:08", "remaining_time": "3:29:01"} +{"current_steps": 3512, "total_steps": 8674, "loss": 0.4525749981403351, "lr": 1.3872578261347716e-06, "epoch": 0.8097763430943048, "percentage": 40.49, "elapsed_time": "2:22:11", "remaining_time": "3:28:59"} +{"current_steps": 3513, "total_steps": 8674, "loss": 0.44681644439697266, "lr": 1.3869062860106224e-06, "epoch": 0.8100069172238875, "percentage": 40.5, "elapsed_time": "2:22:13", "remaining_time": "3:28:56"} +{"current_steps": 3514, "total_steps": 8674, "loss": 0.4162617325782776, "lr": 1.3865546896457233e-06, "epoch": 0.8102374913534701, "percentage": 40.51, "elapsed_time": "2:22:15", "remaining_time": "3:28:54"} +{"current_steps": 3515, "total_steps": 8674, "loss": 0.5262776613235474, "lr": 1.3862030370911827e-06, "epoch": 0.8104680654830528, "percentage": 40.52, "elapsed_time": "2:22:18", "remaining_time": "3:28:52"} +{"current_steps": 3516, "total_steps": 8674, "loss": 0.48102372884750366, "lr": 1.3858513283981163e-06, "epoch": 0.8106986396126354, "percentage": 40.53, "elapsed_time": "2:22:20", "remaining_time": "3:28:49"} +{"current_steps": 3517, "total_steps": 8674, "loss": 0.46166497468948364, "lr": 1.385499563617649e-06, "epoch": 0.8109292137422182, "percentage": 40.55, "elapsed_time": "2:22:23", "remaining_time": "3:28:47"} +{"current_steps": 3518, "total_steps": 8674, "loss": 0.43523284792900085, "lr": 1.3851477428009133e-06, "epoch": 0.8111597878718008, "percentage": 40.56, "elapsed_time": "2:22:25", "remaining_time": "3:28:44"} +{"current_steps": 3519, "total_steps": 8674, "loss": 0.5413048267364502, "lr": 1.3847958659990497e-06, "epoch": 0.8113903620013835, "percentage": 40.57, "elapsed_time": "2:22:28", "remaining_time": "3:28:42"} +{"current_steps": 3520, "total_steps": 8674, "loss": 0.4257383346557617, "lr": 1.3844439332632073e-06, "epoch": 0.8116209361309661, "percentage": 40.58, "elapsed_time": "2:22:30", "remaining_time": "3:28:40"} +{"current_steps": 3521, "total_steps": 8674, "loss": 0.4812018871307373, "lr": 1.3840919446445427e-06, "epoch": 0.8118515102605488, "percentage": 40.59, "elapsed_time": "2:22:33", "remaining_time": "3:28:37"} +{"current_steps": 3522, "total_steps": 8674, "loss": 0.4890254735946655, "lr": 1.3837399001942216e-06, "epoch": 0.8120820843901314, "percentage": 40.6, "elapsed_time": "2:22:35", "remaining_time": "3:28:35"} +{"current_steps": 3523, "total_steps": 8674, "loss": 0.5079991817474365, "lr": 1.3833877999634166e-06, "epoch": 0.8123126585197141, "percentage": 40.62, "elapsed_time": "2:22:38", "remaining_time": "3:28:32"} +{"current_steps": 3524, "total_steps": 8674, "loss": 0.44703438878059387, "lr": 1.3830356440033096e-06, "epoch": 0.8125432326492967, "percentage": 40.63, "elapsed_time": "2:22:40", "remaining_time": "3:28:30"} +{"current_steps": 3525, "total_steps": 8674, "loss": 0.4218645989894867, "lr": 1.3826834323650898e-06, "epoch": 0.8127738067788794, "percentage": 40.64, "elapsed_time": "2:22:42", "remaining_time": "3:28:27"} +{"current_steps": 3526, "total_steps": 8674, "loss": 0.4544546902179718, "lr": 1.3823311650999547e-06, "epoch": 0.813004380908462, "percentage": 40.65, "elapsed_time": "2:22:45", "remaining_time": "3:28:25"} +{"current_steps": 3527, "total_steps": 8674, "loss": 0.4978422224521637, "lr": 1.3819788422591099e-06, "epoch": 0.8132349550380448, "percentage": 40.66, "elapsed_time": "2:22:47", "remaining_time": "3:28:23"} +{"current_steps": 3528, "total_steps": 8674, "loss": 0.42122140526771545, "lr": 1.3816264638937688e-06, "epoch": 0.8134655291676274, "percentage": 40.67, "elapsed_time": "2:22:50", "remaining_time": "3:28:20"} +{"current_steps": 3529, "total_steps": 8674, "loss": 0.45674729347229004, "lr": 1.381274030055154e-06, "epoch": 0.8136961032972101, "percentage": 40.68, "elapsed_time": "2:22:52", "remaining_time": "3:28:18"} +{"current_steps": 3530, "total_steps": 8674, "loss": 0.5075385570526123, "lr": 1.3809215407944947e-06, "epoch": 0.8139266774267927, "percentage": 40.7, "elapsed_time": "2:22:55", "remaining_time": "3:28:16"} +{"current_steps": 3531, "total_steps": 8674, "loss": 0.45952552556991577, "lr": 1.380568996163029e-06, "epoch": 0.8141572515563754, "percentage": 40.71, "elapsed_time": "2:22:57", "remaining_time": "3:28:13"} +{"current_steps": 3532, "total_steps": 8674, "loss": 0.5062624216079712, "lr": 1.3802163962120025e-06, "epoch": 0.814387825685958, "percentage": 40.72, "elapsed_time": "2:23:00", "remaining_time": "3:28:12"} +{"current_steps": 3533, "total_steps": 8674, "loss": 0.49294552206993103, "lr": 1.3798637409926698e-06, "epoch": 0.8146183998155407, "percentage": 40.73, "elapsed_time": "2:23:04", "remaining_time": "3:28:11"} +{"current_steps": 3534, "total_steps": 8674, "loss": 0.4389861822128296, "lr": 1.3795110305562926e-06, "epoch": 0.8148489739451233, "percentage": 40.74, "elapsed_time": "2:23:07", "remaining_time": "3:28:10"} +{"current_steps": 3535, "total_steps": 8674, "loss": 0.47733181715011597, "lr": 1.3791582649541401e-06, "epoch": 0.8150795480747061, "percentage": 40.75, "elapsed_time": "2:23:11", "remaining_time": "3:28:09"} +{"current_steps": 3536, "total_steps": 8674, "loss": 0.5007725358009338, "lr": 1.3788054442374918e-06, "epoch": 0.8153101222042887, "percentage": 40.77, "elapsed_time": "2:23:14", "remaining_time": "3:28:08"} +{"current_steps": 3537, "total_steps": 8674, "loss": 0.4857913553714752, "lr": 1.378452568457633e-06, "epoch": 0.8155406963338714, "percentage": 40.78, "elapsed_time": "2:23:17", "remaining_time": "3:28:07"} +{"current_steps": 3538, "total_steps": 8674, "loss": 0.5330549478530884, "lr": 1.3780996376658577e-06, "epoch": 0.815771270463454, "percentage": 40.79, "elapsed_time": "2:23:21", "remaining_time": "3:28:06"} +{"current_steps": 3539, "total_steps": 8674, "loss": 0.45034217834472656, "lr": 1.3777466519134684e-06, "epoch": 0.8160018445930367, "percentage": 40.8, "elapsed_time": "2:23:25", "remaining_time": "3:28:06"} +{"current_steps": 3540, "total_steps": 8674, "loss": 0.4442213773727417, "lr": 1.3773936112517746e-06, "epoch": 0.8162324187226193, "percentage": 40.81, "elapsed_time": "2:23:29", "remaining_time": "3:28:06"} +{"current_steps": 3541, "total_steps": 8674, "loss": 0.5000369548797607, "lr": 1.377040515732095e-06, "epoch": 0.816462992852202, "percentage": 40.82, "elapsed_time": "2:23:33", "remaining_time": "3:28:05"} +{"current_steps": 3542, "total_steps": 8674, "loss": 0.5117775797843933, "lr": 1.3766873654057551e-06, "epoch": 0.8166935669817846, "percentage": 40.83, "elapsed_time": "2:23:37", "remaining_time": "3:28:05"} +{"current_steps": 3543, "total_steps": 8674, "loss": 0.431648850440979, "lr": 1.3763341603240889e-06, "epoch": 0.8169241411113674, "percentage": 40.85, "elapsed_time": "2:23:41", "remaining_time": "3:28:05"} +{"current_steps": 3544, "total_steps": 8674, "loss": 0.39463019371032715, "lr": 1.3759809005384387e-06, "epoch": 0.81715471524095, "percentage": 40.86, "elapsed_time": "2:23:45", "remaining_time": "3:28:04"} +{"current_steps": 3545, "total_steps": 8674, "loss": 0.38739651441574097, "lr": 1.375627586100154e-06, "epoch": 0.8173852893705327, "percentage": 40.87, "elapsed_time": "2:23:48", "remaining_time": "3:28:04"} +{"current_steps": 3546, "total_steps": 8674, "loss": 0.3973360061645508, "lr": 1.3752742170605927e-06, "epoch": 0.8176158635001153, "percentage": 40.88, "elapsed_time": "2:23:52", "remaining_time": "3:28:04"} +{"current_steps": 3547, "total_steps": 8674, "loss": 0.4791724383831024, "lr": 1.3749207934711207e-06, "epoch": 0.817846437629698, "percentage": 40.89, "elapsed_time": "2:23:56", "remaining_time": "3:28:03"} +{"current_steps": 3548, "total_steps": 8674, "loss": 0.5245905518531799, "lr": 1.3745673153831114e-06, "epoch": 0.8180770117592806, "percentage": 40.9, "elapsed_time": "2:24:00", "remaining_time": "3:28:03"} +{"current_steps": 3549, "total_steps": 8674, "loss": 0.5507007241249084, "lr": 1.3742137828479472e-06, "epoch": 0.8183075858888633, "percentage": 40.92, "elapsed_time": "2:24:04", "remaining_time": "3:28:03"} +{"current_steps": 3550, "total_steps": 8674, "loss": 0.4555748701095581, "lr": 1.373860195917017e-06, "epoch": 0.8185381600184459, "percentage": 40.93, "elapsed_time": "2:24:08", "remaining_time": "3:28:03"} +{"current_steps": 3551, "total_steps": 8674, "loss": 0.39309239387512207, "lr": 1.3735065546417182e-06, "epoch": 0.8187687341480286, "percentage": 40.94, "elapsed_time": "2:24:12", "remaining_time": "3:28:03"} +{"current_steps": 3552, "total_steps": 8674, "loss": 0.4984157681465149, "lr": 1.3731528590734564e-06, "epoch": 0.8189993082776112, "percentage": 40.95, "elapsed_time": "2:24:16", "remaining_time": "3:28:02"} +{"current_steps": 3553, "total_steps": 8674, "loss": 0.45853057503700256, "lr": 1.3727991092636448e-06, "epoch": 0.819229882407194, "percentage": 40.96, "elapsed_time": "2:24:20", "remaining_time": "3:28:02"} +{"current_steps": 3554, "total_steps": 8674, "loss": 0.47412237524986267, "lr": 1.3724453052637043e-06, "epoch": 0.8194604565367766, "percentage": 40.97, "elapsed_time": "2:24:24", "remaining_time": "3:28:02"} +{"current_steps": 3555, "total_steps": 8674, "loss": 0.46433544158935547, "lr": 1.3720914471250642e-06, "epoch": 0.8196910306663593, "percentage": 40.98, "elapsed_time": "2:24:28", "remaining_time": "3:28:01"} +{"current_steps": 3556, "total_steps": 8674, "loss": 0.5773437023162842, "lr": 1.3717375348991612e-06, "epoch": 0.8199216047959419, "percentage": 41.0, "elapsed_time": "2:24:32", "remaining_time": "3:28:01"} +{"current_steps": 3557, "total_steps": 8674, "loss": 0.5943500995635986, "lr": 1.37138356863744e-06, "epoch": 0.8201521789255246, "percentage": 41.01, "elapsed_time": "2:24:36", "remaining_time": "3:28:01"} +{"current_steps": 3558, "total_steps": 8674, "loss": 0.4970731735229492, "lr": 1.3710295483913533e-06, "epoch": 0.8203827530551072, "percentage": 41.02, "elapsed_time": "2:24:40", "remaining_time": "3:28:01"} +{"current_steps": 3559, "total_steps": 8674, "loss": 0.44726189970970154, "lr": 1.3706754742123611e-06, "epoch": 0.8206133271846898, "percentage": 41.03, "elapsed_time": "2:24:44", "remaining_time": "3:28:00"} +{"current_steps": 3560, "total_steps": 8674, "loss": 0.3980759382247925, "lr": 1.3703213461519325e-06, "epoch": 0.8208439013142725, "percentage": 41.04, "elapsed_time": "2:24:47", "remaining_time": "3:28:00"} +{"current_steps": 3561, "total_steps": 8674, "loss": 0.5521829724311829, "lr": 1.3699671642615434e-06, "epoch": 0.8210744754438551, "percentage": 41.05, "elapsed_time": "2:24:51", "remaining_time": "3:27:59"} +{"current_steps": 3562, "total_steps": 8674, "loss": 0.42630624771118164, "lr": 1.3696129285926769e-06, "epoch": 0.8213050495734379, "percentage": 41.07, "elapsed_time": "2:24:55", "remaining_time": "3:27:59"} +{"current_steps": 3563, "total_steps": 8674, "loss": 0.5060243606567383, "lr": 1.3692586391968254e-06, "epoch": 0.8215356237030205, "percentage": 41.08, "elapsed_time": "2:24:59", "remaining_time": "3:27:59"} +{"current_steps": 3564, "total_steps": 8674, "loss": 0.5803407430648804, "lr": 1.3689042961254884e-06, "epoch": 0.8217661978326032, "percentage": 41.09, "elapsed_time": "2:25:03", "remaining_time": "3:27:58"} +{"current_steps": 3565, "total_steps": 8674, "loss": 0.4510403871536255, "lr": 1.3685498994301735e-06, "epoch": 0.8219967719621858, "percentage": 41.1, "elapsed_time": "2:25:07", "remaining_time": "3:27:58"} +{"current_steps": 3566, "total_steps": 8674, "loss": 0.5350467562675476, "lr": 1.3681954491623953e-06, "epoch": 0.8222273460917685, "percentage": 41.11, "elapsed_time": "2:25:11", "remaining_time": "3:27:58"} +{"current_steps": 3567, "total_steps": 8674, "loss": 0.5194679498672485, "lr": 1.367840945373677e-06, "epoch": 0.8224579202213511, "percentage": 41.12, "elapsed_time": "2:25:15", "remaining_time": "3:27:58"} +{"current_steps": 3568, "total_steps": 8674, "loss": 0.43574345111846924, "lr": 1.3674863881155495e-06, "epoch": 0.8226884943509338, "percentage": 41.13, "elapsed_time": "2:25:18", "remaining_time": "3:27:56"} +{"current_steps": 3569, "total_steps": 8674, "loss": 0.43051451444625854, "lr": 1.367131777439551e-06, "epoch": 0.8229190684805164, "percentage": 41.15, "elapsed_time": "2:25:21", "remaining_time": "3:27:54"} +{"current_steps": 3570, "total_steps": 8674, "loss": 0.44449925422668457, "lr": 1.3667771133972278e-06, "epoch": 0.8231496426100992, "percentage": 41.16, "elapsed_time": "2:25:24", "remaining_time": "3:27:53"} +{"current_steps": 3571, "total_steps": 8674, "loss": 0.4466608464717865, "lr": 1.3664223960401342e-06, "epoch": 0.8233802167396818, "percentage": 41.17, "elapsed_time": "2:25:27", "remaining_time": "3:27:52"} +{"current_steps": 3572, "total_steps": 8674, "loss": 0.6172389984130859, "lr": 1.3660676254198318e-06, "epoch": 0.8236107908692645, "percentage": 41.18, "elapsed_time": "2:25:31", "remaining_time": "3:27:51"} +{"current_steps": 3573, "total_steps": 8674, "loss": 0.3789742588996887, "lr": 1.36571280158789e-06, "epoch": 0.8238413649988471, "percentage": 41.19, "elapsed_time": "2:25:34", "remaining_time": "3:27:49"} +{"current_steps": 3574, "total_steps": 8674, "loss": 0.3871726095676422, "lr": 1.365357924595886e-06, "epoch": 0.8240719391284298, "percentage": 41.2, "elapsed_time": "2:25:38", "remaining_time": "3:27:49"} +{"current_steps": 3575, "total_steps": 8674, "loss": 0.5464534759521484, "lr": 1.3650029944954047e-06, "epoch": 0.8243025132580124, "percentage": 41.22, "elapsed_time": "2:25:41", "remaining_time": "3:27:48"} +{"current_steps": 3576, "total_steps": 8674, "loss": 0.4924513101577759, "lr": 1.3646480113380392e-06, "epoch": 0.8245330873875951, "percentage": 41.23, "elapsed_time": "2:25:45", "remaining_time": "3:27:48"} +{"current_steps": 3577, "total_steps": 8674, "loss": 0.39648669958114624, "lr": 1.3642929751753896e-06, "epoch": 0.8247636615171777, "percentage": 41.24, "elapsed_time": "2:25:49", "remaining_time": "3:27:47"} +{"current_steps": 3578, "total_steps": 8674, "loss": 0.44139498472213745, "lr": 1.3639378860590642e-06, "epoch": 0.8249942356467604, "percentage": 41.25, "elapsed_time": "2:25:52", "remaining_time": "3:27:46"} +{"current_steps": 3579, "total_steps": 8674, "loss": 0.4477856159210205, "lr": 1.3635827440406784e-06, "epoch": 0.825224809776343, "percentage": 41.26, "elapsed_time": "2:25:56", "remaining_time": "3:27:46"} +{"current_steps": 3580, "total_steps": 8674, "loss": 0.48722583055496216, "lr": 1.363227549171856e-06, "epoch": 0.8254553839059258, "percentage": 41.27, "elapsed_time": "2:26:00", "remaining_time": "3:27:45"} +{"current_steps": 3581, "total_steps": 8674, "loss": 0.44485795497894287, "lr": 1.3628723015042285e-06, "epoch": 0.8256859580355084, "percentage": 41.28, "elapsed_time": "2:26:03", "remaining_time": "3:27:43"} +{"current_steps": 3582, "total_steps": 8674, "loss": 0.510918140411377, "lr": 1.362517001089434e-06, "epoch": 0.8259165321650911, "percentage": 41.3, "elapsed_time": "2:26:07", "remaining_time": "3:27:43"} +{"current_steps": 3583, "total_steps": 8674, "loss": 0.5157535076141357, "lr": 1.3621616479791196e-06, "epoch": 0.8261471062946737, "percentage": 41.31, "elapsed_time": "2:26:10", "remaining_time": "3:27:41"} +{"current_steps": 3584, "total_steps": 8674, "loss": 0.6120826005935669, "lr": 1.361806242224939e-06, "epoch": 0.8263776804242564, "percentage": 41.32, "elapsed_time": "2:26:14", "remaining_time": "3:27:41"} +{"current_steps": 3585, "total_steps": 8674, "loss": 0.47521674633026123, "lr": 1.3614507838785545e-06, "epoch": 0.826608254553839, "percentage": 41.33, "elapsed_time": "2:26:18", "remaining_time": "3:27:40"} +{"current_steps": 3586, "total_steps": 8674, "loss": 0.431441068649292, "lr": 1.3610952729916352e-06, "epoch": 0.8268388286834217, "percentage": 41.34, "elapsed_time": "2:26:22", "remaining_time": "3:27:40"} +{"current_steps": 3587, "total_steps": 8674, "loss": 0.5168293118476868, "lr": 1.3607397096158587e-06, "epoch": 0.8270694028130043, "percentage": 41.35, "elapsed_time": "2:26:25", "remaining_time": "3:27:40"} +{"current_steps": 3588, "total_steps": 8674, "loss": 0.47669821977615356, "lr": 1.3603840938029092e-06, "epoch": 0.8272999769425871, "percentage": 41.36, "elapsed_time": "2:26:29", "remaining_time": "3:27:39"} +{"current_steps": 3589, "total_steps": 8674, "loss": 0.5170806050300598, "lr": 1.3600284256044791e-06, "epoch": 0.8275305510721697, "percentage": 41.38, "elapsed_time": "2:26:33", "remaining_time": "3:27:39"} +{"current_steps": 3590, "total_steps": 8674, "loss": 0.5578932762145996, "lr": 1.359672705072269e-06, "epoch": 0.8277611252017524, "percentage": 41.39, "elapsed_time": "2:26:37", "remaining_time": "3:27:39"} +{"current_steps": 3591, "total_steps": 8674, "loss": 0.45000678300857544, "lr": 1.3593169322579855e-06, "epoch": 0.827991699331335, "percentage": 41.4, "elapsed_time": "2:26:41", "remaining_time": "3:27:38"} +{"current_steps": 3592, "total_steps": 8674, "loss": 0.47859635949134827, "lr": 1.3589611072133448e-06, "epoch": 0.8282222734609177, "percentage": 41.41, "elapsed_time": "2:26:45", "remaining_time": "3:27:38"} +{"current_steps": 3593, "total_steps": 8674, "loss": 0.5373919606208801, "lr": 1.3586052299900693e-06, "epoch": 0.8284528475905003, "percentage": 41.42, "elapsed_time": "2:26:49", "remaining_time": "3:27:37"} +{"current_steps": 3594, "total_steps": 8674, "loss": 0.5461571216583252, "lr": 1.3582493006398888e-06, "epoch": 0.828683421720083, "percentage": 41.43, "elapsed_time": "2:26:53", "remaining_time": "3:27:37"} +{"current_steps": 3595, "total_steps": 8674, "loss": 0.522891640663147, "lr": 1.357893319214542e-06, "epoch": 0.8289139958496656, "percentage": 41.45, "elapsed_time": "2:26:57", "remaining_time": "3:27:36"} +{"current_steps": 3596, "total_steps": 8674, "loss": 0.503441572189331, "lr": 1.3575372857657739e-06, "epoch": 0.8291445699792483, "percentage": 41.46, "elapsed_time": "2:27:01", "remaining_time": "3:27:36"} +{"current_steps": 3597, "total_steps": 8674, "loss": 0.45475268363952637, "lr": 1.357181200345338e-06, "epoch": 0.829375144108831, "percentage": 41.47, "elapsed_time": "2:27:05", "remaining_time": "3:27:36"} +{"current_steps": 3598, "total_steps": 8674, "loss": 0.4626728296279907, "lr": 1.3568250630049944e-06, "epoch": 0.8296057182384137, "percentage": 41.48, "elapsed_time": "2:27:09", "remaining_time": "3:27:35"} +{"current_steps": 3599, "total_steps": 8674, "loss": 0.590618371963501, "lr": 1.3564688737965118e-06, "epoch": 0.8298362923679963, "percentage": 41.49, "elapsed_time": "2:27:12", "remaining_time": "3:27:35"} +{"current_steps": 3600, "total_steps": 8674, "loss": 0.4252029061317444, "lr": 1.3561126327716658e-06, "epoch": 0.830066866497579, "percentage": 41.5, "elapsed_time": "2:27:15", "remaining_time": "3:27:32"} +{"current_steps": 3601, "total_steps": 8674, "loss": 0.5741503238677979, "lr": 1.3557563399822396e-06, "epoch": 0.8302974406271616, "percentage": 41.51, "elapsed_time": "2:27:19", "remaining_time": "3:27:32"} +{"current_steps": 3602, "total_steps": 8674, "loss": 0.4591038227081299, "lr": 1.3553999954800236e-06, "epoch": 0.8305280147567443, "percentage": 41.53, "elapsed_time": "2:27:21", "remaining_time": "3:27:29"} +{"current_steps": 3603, "total_steps": 8674, "loss": 0.5761657953262329, "lr": 1.3550435993168164e-06, "epoch": 0.8307585888863269, "percentage": 41.54, "elapsed_time": "2:27:23", "remaining_time": "3:27:27"} +{"current_steps": 3604, "total_steps": 8674, "loss": 0.4835323691368103, "lr": 1.3546871515444239e-06, "epoch": 0.8309891630159096, "percentage": 41.55, "elapsed_time": "2:27:26", "remaining_time": "3:27:24"} +{"current_steps": 3605, "total_steps": 8674, "loss": 0.6152533292770386, "lr": 1.3543306522146594e-06, "epoch": 0.8312197371454922, "percentage": 41.56, "elapsed_time": "2:27:28", "remaining_time": "3:27:22"} +{"current_steps": 3606, "total_steps": 8674, "loss": 0.48106616735458374, "lr": 1.3539741013793431e-06, "epoch": 0.831450311275075, "percentage": 41.57, "elapsed_time": "2:27:31", "remaining_time": "3:27:19"} +{"current_steps": 3607, "total_steps": 8674, "loss": 0.48128771781921387, "lr": 1.3536174990903042e-06, "epoch": 0.8316808854046576, "percentage": 41.58, "elapsed_time": "2:27:33", "remaining_time": "3:27:17"} +{"current_steps": 3608, "total_steps": 8674, "loss": 0.4395609498023987, "lr": 1.353260845399378e-06, "epoch": 0.8319114595342403, "percentage": 41.6, "elapsed_time": "2:27:36", "remaining_time": "3:27:15"} +{"current_steps": 3609, "total_steps": 8674, "loss": 0.5298231840133667, "lr": 1.3529041403584076e-06, "epoch": 0.8321420336638229, "percentage": 41.61, "elapsed_time": "2:27:38", "remaining_time": "3:27:12"} +{"current_steps": 3610, "total_steps": 8674, "loss": 0.4694434404373169, "lr": 1.3525473840192436e-06, "epoch": 0.8323726077934056, "percentage": 41.62, "elapsed_time": "2:27:41", "remaining_time": "3:27:10"} +{"current_steps": 3611, "total_steps": 8674, "loss": 0.4264890253543854, "lr": 1.3521905764337449e-06, "epoch": 0.8326031819229882, "percentage": 41.63, "elapsed_time": "2:27:43", "remaining_time": "3:27:07"} +{"current_steps": 3612, "total_steps": 8674, "loss": 0.3266828656196594, "lr": 1.3518337176537762e-06, "epoch": 0.8328337560525709, "percentage": 41.64, "elapsed_time": "2:27:45", "remaining_time": "3:27:05"} +{"current_steps": 3613, "total_steps": 8674, "loss": 0.5554935336112976, "lr": 1.351476807731211e-06, "epoch": 0.8330643301821535, "percentage": 41.65, "elapsed_time": "2:27:48", "remaining_time": "3:27:02"} +{"current_steps": 3614, "total_steps": 8674, "loss": 0.4375999867916107, "lr": 1.3511198467179295e-06, "epoch": 0.8332949043117363, "percentage": 41.66, "elapsed_time": "2:27:50", "remaining_time": "3:27:00"} +{"current_steps": 3615, "total_steps": 8674, "loss": 0.564457893371582, "lr": 1.35076283466582e-06, "epoch": 0.8335254784413189, "percentage": 41.68, "elapsed_time": "2:27:53", "remaining_time": "3:26:57"} +{"current_steps": 3616, "total_steps": 8674, "loss": 0.5141148567199707, "lr": 1.3504057716267776e-06, "epoch": 0.8337560525709016, "percentage": 41.69, "elapsed_time": "2:27:55", "remaining_time": "3:26:55"} +{"current_steps": 3617, "total_steps": 8674, "loss": 0.45514535903930664, "lr": 1.350048657652705e-06, "epoch": 0.8339866267004842, "percentage": 41.7, "elapsed_time": "2:27:58", "remaining_time": "3:26:52"} +{"current_steps": 3618, "total_steps": 8674, "loss": 0.5224772691726685, "lr": 1.3496914927955122e-06, "epoch": 0.8342172008300669, "percentage": 41.71, "elapsed_time": "2:28:00", "remaining_time": "3:26:50"} +{"current_steps": 3619, "total_steps": 8674, "loss": 0.45185205340385437, "lr": 1.349334277107117e-06, "epoch": 0.8344477749596495, "percentage": 41.72, "elapsed_time": "2:28:03", "remaining_time": "3:26:47"} +{"current_steps": 3620, "total_steps": 8674, "loss": 0.47232794761657715, "lr": 1.3489770106394444e-06, "epoch": 0.8346783490892322, "percentage": 41.73, "elapsed_time": "2:28:05", "remaining_time": "3:26:45"} +{"current_steps": 3621, "total_steps": 8674, "loss": 0.44031190872192383, "lr": 1.3486196934444264e-06, "epoch": 0.8349089232188148, "percentage": 41.75, "elapsed_time": "2:28:07", "remaining_time": "3:26:42"} +{"current_steps": 3622, "total_steps": 8674, "loss": 0.4594510793685913, "lr": 1.3482623255740028e-06, "epoch": 0.8351394973483975, "percentage": 41.76, "elapsed_time": "2:28:10", "remaining_time": "3:26:40"} +{"current_steps": 3623, "total_steps": 8674, "loss": 0.38726723194122314, "lr": 1.347904907080121e-06, "epoch": 0.8353700714779801, "percentage": 41.77, "elapsed_time": "2:28:12", "remaining_time": "3:26:37"} +{"current_steps": 3624, "total_steps": 8674, "loss": 0.544617772102356, "lr": 1.3475474380147347e-06, "epoch": 0.8356006456075629, "percentage": 41.78, "elapsed_time": "2:28:15", "remaining_time": "3:26:35"} +{"current_steps": 3625, "total_steps": 8674, "loss": 0.503423810005188, "lr": 1.347189918429806e-06, "epoch": 0.8358312197371455, "percentage": 41.79, "elapsed_time": "2:28:17", "remaining_time": "3:26:33"} +{"current_steps": 3626, "total_steps": 8674, "loss": 0.4395143985748291, "lr": 1.3468323483773038e-06, "epoch": 0.8360617938667282, "percentage": 41.8, "elapsed_time": "2:28:20", "remaining_time": "3:26:30"} +{"current_steps": 3627, "total_steps": 8674, "loss": 0.41464856266975403, "lr": 1.346474727909205e-06, "epoch": 0.8362923679963108, "percentage": 41.81, "elapsed_time": "2:28:22", "remaining_time": "3:26:28"} +{"current_steps": 3628, "total_steps": 8674, "loss": 0.4782845079898834, "lr": 1.346117057077493e-06, "epoch": 0.8365229421258935, "percentage": 41.83, "elapsed_time": "2:28:25", "remaining_time": "3:26:25"} +{"current_steps": 3629, "total_steps": 8674, "loss": 0.48308104276657104, "lr": 1.345759335934159e-06, "epoch": 0.8367535162554761, "percentage": 41.84, "elapsed_time": "2:28:27", "remaining_time": "3:26:23"} +{"current_steps": 3630, "total_steps": 8674, "loss": 0.5759967565536499, "lr": 1.345401564531201e-06, "epoch": 0.8369840903850588, "percentage": 41.85, "elapsed_time": "2:28:29", "remaining_time": "3:26:20"} +{"current_steps": 3631, "total_steps": 8674, "loss": 0.5900512337684631, "lr": 1.3450437429206256e-06, "epoch": 0.8372146645146414, "percentage": 41.86, "elapsed_time": "2:28:32", "remaining_time": "3:26:17"} +{"current_steps": 3632, "total_steps": 8674, "loss": 0.4776286482810974, "lr": 1.3446858711544451e-06, "epoch": 0.8374452386442242, "percentage": 41.87, "elapsed_time": "2:28:34", "remaining_time": "3:26:15"} +{"current_steps": 3633, "total_steps": 8674, "loss": 0.5123563408851624, "lr": 1.34432794928468e-06, "epoch": 0.8376758127738068, "percentage": 41.88, "elapsed_time": "2:28:37", "remaining_time": "3:26:13"} +{"current_steps": 3634, "total_steps": 8674, "loss": 0.5505821108818054, "lr": 1.3439699773633574e-06, "epoch": 0.8379063869033895, "percentage": 41.9, "elapsed_time": "2:28:39", "remaining_time": "3:26:10"} +{"current_steps": 3635, "total_steps": 8674, "loss": 0.5525364875793457, "lr": 1.343611955442513e-06, "epoch": 0.8381369610329721, "percentage": 41.91, "elapsed_time": "2:28:42", "remaining_time": "3:26:08"} +{"current_steps": 3636, "total_steps": 8674, "loss": 0.44074952602386475, "lr": 1.3432538835741884e-06, "epoch": 0.8383675351625548, "percentage": 41.92, "elapsed_time": "2:28:44", "remaining_time": "3:26:05"} +{"current_steps": 3637, "total_steps": 8674, "loss": 0.5488649606704712, "lr": 1.3428957618104331e-06, "epoch": 0.8385981092921374, "percentage": 41.93, "elapsed_time": "2:28:47", "remaining_time": "3:26:03"} +{"current_steps": 3638, "total_steps": 8674, "loss": 0.4427725672721863, "lr": 1.3425375902033034e-06, "epoch": 0.8388286834217201, "percentage": 41.94, "elapsed_time": "2:28:49", "remaining_time": "3:26:00"} +{"current_steps": 3639, "total_steps": 8674, "loss": 0.5244250297546387, "lr": 1.3421793688048636e-06, "epoch": 0.8390592575513027, "percentage": 41.95, "elapsed_time": "2:28:51", "remaining_time": "3:25:58"} +{"current_steps": 3640, "total_steps": 8674, "loss": 0.4684640169143677, "lr": 1.3418210976671845e-06, "epoch": 0.8392898316808854, "percentage": 41.96, "elapsed_time": "2:28:54", "remaining_time": "3:25:55"} +{"current_steps": 3641, "total_steps": 8674, "loss": 0.4518035054206848, "lr": 1.3414627768423449e-06, "epoch": 0.839520405810468, "percentage": 41.98, "elapsed_time": "2:28:56", "remaining_time": "3:25:53"} +{"current_steps": 3642, "total_steps": 8674, "loss": 0.47504323720932007, "lr": 1.34110440638243e-06, "epoch": 0.8397509799400508, "percentage": 41.99, "elapsed_time": "2:28:59", "remaining_time": "3:25:51"} +{"current_steps": 3643, "total_steps": 8674, "loss": 0.3835057020187378, "lr": 1.3407459863395326e-06, "epoch": 0.8399815540696334, "percentage": 42.0, "elapsed_time": "2:29:01", "remaining_time": "3:25:48"} +{"current_steps": 3644, "total_steps": 8674, "loss": 0.4103546738624573, "lr": 1.3403875167657529e-06, "epoch": 0.8402121281992161, "percentage": 42.01, "elapsed_time": "2:29:04", "remaining_time": "3:25:45"} +{"current_steps": 3645, "total_steps": 8674, "loss": 0.48064136505126953, "lr": 1.3400289977131974e-06, "epoch": 0.8404427023287987, "percentage": 42.02, "elapsed_time": "2:29:06", "remaining_time": "3:25:43"} +{"current_steps": 3646, "total_steps": 8674, "loss": 0.49655234813690186, "lr": 1.3396704292339813e-06, "epoch": 0.8406732764583814, "percentage": 42.03, "elapsed_time": "2:29:08", "remaining_time": "3:25:40"} +{"current_steps": 3647, "total_steps": 8674, "loss": 0.5559303760528564, "lr": 1.3393118113802259e-06, "epoch": 0.840903850587964, "percentage": 42.05, "elapsed_time": "2:29:11", "remaining_time": "3:25:38"} +{"current_steps": 3648, "total_steps": 8674, "loss": 0.5173505544662476, "lr": 1.3389531442040599e-06, "epoch": 0.8411344247175467, "percentage": 42.06, "elapsed_time": "2:29:13", "remaining_time": "3:25:35"} +{"current_steps": 3649, "total_steps": 8674, "loss": 0.500524640083313, "lr": 1.338594427757619e-06, "epoch": 0.8413649988471293, "percentage": 42.07, "elapsed_time": "2:29:16", "remaining_time": "3:25:33"} +{"current_steps": 3650, "total_steps": 8674, "loss": 0.5167285203933716, "lr": 1.3382356620930467e-06, "epoch": 0.8415955729767121, "percentage": 42.08, "elapsed_time": "2:29:18", "remaining_time": "3:25:30"} +{"current_steps": 3651, "total_steps": 8674, "loss": 0.5006825923919678, "lr": 1.3378768472624929e-06, "epoch": 0.8418261471062947, "percentage": 42.09, "elapsed_time": "2:29:21", "remaining_time": "3:25:28"} +{"current_steps": 3652, "total_steps": 8674, "loss": 0.5421864986419678, "lr": 1.3375179833181153e-06, "epoch": 0.8420567212358774, "percentage": 42.1, "elapsed_time": "2:29:23", "remaining_time": "3:25:25"} +{"current_steps": 3653, "total_steps": 8674, "loss": 0.4964475929737091, "lr": 1.337159070312078e-06, "epoch": 0.84228729536546, "percentage": 42.11, "elapsed_time": "2:29:25", "remaining_time": "3:25:23"} +{"current_steps": 3654, "total_steps": 8674, "loss": 0.4020928144454956, "lr": 1.3368001082965528e-06, "epoch": 0.8425178694950427, "percentage": 42.13, "elapsed_time": "2:29:28", "remaining_time": "3:25:20"} +{"current_steps": 3655, "total_steps": 8674, "loss": 0.43009278178215027, "lr": 1.3364410973237183e-06, "epoch": 0.8427484436246253, "percentage": 42.14, "elapsed_time": "2:29:30", "remaining_time": "3:25:18"} +{"current_steps": 3656, "total_steps": 8674, "loss": 0.5939761400222778, "lr": 1.3360820374457608e-06, "epoch": 0.842979017754208, "percentage": 42.15, "elapsed_time": "2:29:32", "remaining_time": "3:25:15"} +{"current_steps": 3657, "total_steps": 8674, "loss": 0.43889346718788147, "lr": 1.335722928714873e-06, "epoch": 0.8432095918837906, "percentage": 42.16, "elapsed_time": "2:29:35", "remaining_time": "3:25:13"} +{"current_steps": 3658, "total_steps": 8674, "loss": 0.5125945806503296, "lr": 1.335363771183255e-06, "epoch": 0.8434401660133733, "percentage": 42.17, "elapsed_time": "2:29:37", "remaining_time": "3:25:11"} +{"current_steps": 3659, "total_steps": 8674, "loss": 0.516818642616272, "lr": 1.3350045649031143e-06, "epoch": 0.843670740142956, "percentage": 42.18, "elapsed_time": "2:29:40", "remaining_time": "3:25:08"} +{"current_steps": 3660, "total_steps": 8674, "loss": 0.5098299980163574, "lr": 1.3346453099266649e-06, "epoch": 0.8439013142725387, "percentage": 42.2, "elapsed_time": "2:29:42", "remaining_time": "3:25:06"} +{"current_steps": 3661, "total_steps": 8674, "loss": 0.46228134632110596, "lr": 1.334286006306128e-06, "epoch": 0.8441318884021213, "percentage": 42.21, "elapsed_time": "2:29:45", "remaining_time": "3:25:03"} +{"current_steps": 3662, "total_steps": 8674, "loss": 0.38364481925964355, "lr": 1.3339266540937324e-06, "epoch": 0.844362462531704, "percentage": 42.22, "elapsed_time": "2:29:47", "remaining_time": "3:25:00"} +{"current_steps": 3663, "total_steps": 8674, "loss": 0.4363073706626892, "lr": 1.3335672533417134e-06, "epoch": 0.8445930366612866, "percentage": 42.23, "elapsed_time": "2:29:50", "remaining_time": "3:24:58"} +{"current_steps": 3664, "total_steps": 8674, "loss": 0.463603675365448, "lr": 1.3332078041023133e-06, "epoch": 0.8448236107908693, "percentage": 42.24, "elapsed_time": "2:29:52", "remaining_time": "3:24:55"} +{"current_steps": 3665, "total_steps": 8674, "loss": 0.4173084795475006, "lr": 1.3328483064277816e-06, "epoch": 0.8450541849204519, "percentage": 42.25, "elapsed_time": "2:29:54", "remaining_time": "3:24:53"} +{"current_steps": 3666, "total_steps": 8674, "loss": 0.41451913118362427, "lr": 1.3324887603703756e-06, "epoch": 0.8452847590500346, "percentage": 42.26, "elapsed_time": "2:29:57", "remaining_time": "3:24:51"} +{"current_steps": 3667, "total_steps": 8674, "loss": 0.49418264627456665, "lr": 1.3321291659823587e-06, "epoch": 0.8455153331796172, "percentage": 42.28, "elapsed_time": "2:30:00", "remaining_time": "3:24:49"} +{"current_steps": 3668, "total_steps": 8674, "loss": 0.48787444829940796, "lr": 1.3317695233160015e-06, "epoch": 0.8457459073092, "percentage": 42.29, "elapsed_time": "2:30:02", "remaining_time": "3:24:46"} +{"current_steps": 3669, "total_steps": 8674, "loss": 0.484865665435791, "lr": 1.3314098324235814e-06, "epoch": 0.8459764814387826, "percentage": 42.3, "elapsed_time": "2:30:05", "remaining_time": "3:24:44"} +{"current_steps": 3670, "total_steps": 8674, "loss": 0.44162076711654663, "lr": 1.3310500933573837e-06, "epoch": 0.8462070555683652, "percentage": 42.31, "elapsed_time": "2:30:07", "remaining_time": "3:24:41"} +{"current_steps": 3671, "total_steps": 8674, "loss": 0.39880990982055664, "lr": 1.3306903061696999e-06, "epoch": 0.8464376296979479, "percentage": 42.32, "elapsed_time": "2:30:09", "remaining_time": "3:24:38"} +{"current_steps": 3672, "total_steps": 8674, "loss": 0.4405972957611084, "lr": 1.3303304709128288e-06, "epoch": 0.8466682038275305, "percentage": 42.33, "elapsed_time": "2:30:12", "remaining_time": "3:24:36"} +{"current_steps": 3673, "total_steps": 8674, "loss": 0.4228917956352234, "lr": 1.3299705876390755e-06, "epoch": 0.8468987779571132, "percentage": 42.34, "elapsed_time": "2:30:14", "remaining_time": "3:24:33"} +{"current_steps": 3674, "total_steps": 8674, "loss": 0.44533059000968933, "lr": 1.3296106564007532e-06, "epoch": 0.8471293520866958, "percentage": 42.36, "elapsed_time": "2:30:17", "remaining_time": "3:24:31"} +{"current_steps": 3675, "total_steps": 8674, "loss": 0.4672505855560303, "lr": 1.3292506772501816e-06, "epoch": 0.8473599262162785, "percentage": 42.37, "elapsed_time": "2:30:19", "remaining_time": "3:24:29"} +{"current_steps": 3676, "total_steps": 8674, "loss": 0.5651025772094727, "lr": 1.3288906502396873e-06, "epoch": 0.8475905003458611, "percentage": 42.38, "elapsed_time": "2:30:21", "remaining_time": "3:24:26"} +{"current_steps": 3677, "total_steps": 8674, "loss": 0.4877372086048126, "lr": 1.3285305754216034e-06, "epoch": 0.8478210744754439, "percentage": 42.39, "elapsed_time": "2:30:24", "remaining_time": "3:24:24"} +{"current_steps": 3678, "total_steps": 8674, "loss": 0.43767499923706055, "lr": 1.3281704528482713e-06, "epoch": 0.8480516486050265, "percentage": 42.4, "elapsed_time": "2:30:26", "remaining_time": "3:24:21"} +{"current_steps": 3679, "total_steps": 8674, "loss": 0.5077182650566101, "lr": 1.3278102825720376e-06, "epoch": 0.8482822227346092, "percentage": 42.41, "elapsed_time": "2:30:29", "remaining_time": "3:24:19"} +{"current_steps": 3680, "total_steps": 8674, "loss": 0.4814456105232239, "lr": 1.3274500646452573e-06, "epoch": 0.8485127968641918, "percentage": 42.43, "elapsed_time": "2:30:31", "remaining_time": "3:24:16"} +{"current_steps": 3681, "total_steps": 8674, "loss": 0.4454193115234375, "lr": 1.3270897991202913e-06, "epoch": 0.8487433709937745, "percentage": 42.44, "elapsed_time": "2:30:33", "remaining_time": "3:24:13"} +{"current_steps": 3682, "total_steps": 8674, "loss": 0.3973482549190521, "lr": 1.3267294860495084e-06, "epoch": 0.8489739451233571, "percentage": 42.45, "elapsed_time": "2:30:36", "remaining_time": "3:24:11"} +{"current_steps": 3683, "total_steps": 8674, "loss": 0.5115909576416016, "lr": 1.3263691254852834e-06, "epoch": 0.8492045192529398, "percentage": 42.46, "elapsed_time": "2:30:39", "remaining_time": "3:24:09"} +{"current_steps": 3684, "total_steps": 8674, "loss": 0.4217768907546997, "lr": 1.3260087174799982e-06, "epoch": 0.8494350933825224, "percentage": 42.47, "elapsed_time": "2:30:41", "remaining_time": "3:24:06"} +{"current_steps": 3685, "total_steps": 8674, "loss": 0.4462714195251465, "lr": 1.3256482620860414e-06, "epoch": 0.8496656675121051, "percentage": 42.48, "elapsed_time": "2:30:43", "remaining_time": "3:24:04"} +{"current_steps": 3686, "total_steps": 8674, "loss": 0.4617312550544739, "lr": 1.32528775935581e-06, "epoch": 0.8498962416416878, "percentage": 42.49, "elapsed_time": "2:30:46", "remaining_time": "3:24:01"} +{"current_steps": 3687, "total_steps": 8674, "loss": 0.4774616062641144, "lr": 1.324927209341706e-06, "epoch": 0.8501268157712705, "percentage": 42.51, "elapsed_time": "2:30:48", "remaining_time": "3:23:59"} +{"current_steps": 3688, "total_steps": 8674, "loss": 0.38730189204216003, "lr": 1.3245666120961389e-06, "epoch": 0.8503573899008531, "percentage": 42.52, "elapsed_time": "2:30:50", "remaining_time": "3:23:56"} +{"current_steps": 3689, "total_steps": 8674, "loss": 0.45189517736434937, "lr": 1.324205967671525e-06, "epoch": 0.8505879640304358, "percentage": 42.53, "elapsed_time": "2:30:53", "remaining_time": "3:23:53"} +{"current_steps": 3690, "total_steps": 8674, "loss": 0.4965584874153137, "lr": 1.3238452761202887e-06, "epoch": 0.8508185381600184, "percentage": 42.54, "elapsed_time": "2:30:55", "remaining_time": "3:23:51"} +{"current_steps": 3691, "total_steps": 8674, "loss": 0.4409075975418091, "lr": 1.3234845374948591e-06, "epoch": 0.8510491122896011, "percentage": 42.55, "elapsed_time": "2:30:58", "remaining_time": "3:23:49"} +{"current_steps": 3692, "total_steps": 8674, "loss": 0.4457218647003174, "lr": 1.3231237518476737e-06, "epoch": 0.8512796864191837, "percentage": 42.56, "elapsed_time": "2:31:00", "remaining_time": "3:23:46"} +{"current_steps": 3693, "total_steps": 8674, "loss": 0.42810603976249695, "lr": 1.3227629192311762e-06, "epoch": 0.8515102605487664, "percentage": 42.58, "elapsed_time": "2:31:03", "remaining_time": "3:23:44"} +{"current_steps": 3694, "total_steps": 8674, "loss": 0.40753173828125, "lr": 1.3224020396978172e-06, "epoch": 0.851740834678349, "percentage": 42.59, "elapsed_time": "2:31:05", "remaining_time": "3:23:41"} +{"current_steps": 3695, "total_steps": 8674, "loss": 0.5057830810546875, "lr": 1.3220411133000542e-06, "epoch": 0.8519714088079318, "percentage": 42.6, "elapsed_time": "2:31:08", "remaining_time": "3:23:39"} +{"current_steps": 3696, "total_steps": 8674, "loss": 0.42498981952667236, "lr": 1.3216801400903515e-06, "epoch": 0.8522019829375144, "percentage": 42.61, "elapsed_time": "2:31:10", "remaining_time": "3:23:36"} +{"current_steps": 3697, "total_steps": 8674, "loss": 0.44985881447792053, "lr": 1.3213191201211806e-06, "epoch": 0.8524325570670971, "percentage": 42.62, "elapsed_time": "2:31:13", "remaining_time": "3:23:34"} +{"current_steps": 3698, "total_steps": 8674, "loss": 0.39984816312789917, "lr": 1.3209580534450192e-06, "epoch": 0.8526631311966797, "percentage": 42.63, "elapsed_time": "2:31:15", "remaining_time": "3:23:31"} +{"current_steps": 3699, "total_steps": 8674, "loss": 0.4773896038532257, "lr": 1.3205969401143516e-06, "epoch": 0.8528937053262624, "percentage": 42.64, "elapsed_time": "2:31:17", "remaining_time": "3:23:29"} +{"current_steps": 3700, "total_steps": 8674, "loss": 0.5699855089187622, "lr": 1.3202357801816698e-06, "epoch": 0.853124279455845, "percentage": 42.66, "elapsed_time": "2:31:20", "remaining_time": "3:23:26"} +{"current_steps": 3701, "total_steps": 8674, "loss": 0.4486675262451172, "lr": 1.3198745736994714e-06, "epoch": 0.8533548535854277, "percentage": 42.67, "elapsed_time": "2:31:23", "remaining_time": "3:23:25"} +{"current_steps": 3702, "total_steps": 8674, "loss": 0.47909995913505554, "lr": 1.3195133207202625e-06, "epoch": 0.8535854277150103, "percentage": 42.68, "elapsed_time": "2:31:26", "remaining_time": "3:23:23"} +{"current_steps": 3703, "total_steps": 8674, "loss": 0.4356222450733185, "lr": 1.3191520212965542e-06, "epoch": 0.853816001844593, "percentage": 42.69, "elapsed_time": "2:31:28", "remaining_time": "3:23:20"} +{"current_steps": 3704, "total_steps": 8674, "loss": 0.4734821319580078, "lr": 1.3187906754808646e-06, "epoch": 0.8540465759741757, "percentage": 42.7, "elapsed_time": "2:31:30", "remaining_time": "3:23:18"} +{"current_steps": 3705, "total_steps": 8674, "loss": 0.4164031744003296, "lr": 1.3184292833257197e-06, "epoch": 0.8542771501037584, "percentage": 42.71, "elapsed_time": "2:31:33", "remaining_time": "3:23:15"} +{"current_steps": 3706, "total_steps": 8674, "loss": 0.505548357963562, "lr": 1.3180678448836516e-06, "epoch": 0.854507724233341, "percentage": 42.73, "elapsed_time": "2:31:35", "remaining_time": "3:23:13"} +{"current_steps": 3707, "total_steps": 8674, "loss": 0.4443202316761017, "lr": 1.3177063602071985e-06, "epoch": 0.8547382983629237, "percentage": 42.74, "elapsed_time": "2:31:38", "remaining_time": "3:23:10"} +{"current_steps": 3708, "total_steps": 8674, "loss": 0.4594070017337799, "lr": 1.317344829348906e-06, "epoch": 0.8549688724925063, "percentage": 42.75, "elapsed_time": "2:31:40", "remaining_time": "3:23:08"} +{"current_steps": 3709, "total_steps": 8674, "loss": 0.5346768498420715, "lr": 1.3169832523613265e-06, "epoch": 0.855199446622089, "percentage": 42.76, "elapsed_time": "2:31:43", "remaining_time": "3:23:05"} +{"current_steps": 3710, "total_steps": 8674, "loss": 0.44471168518066406, "lr": 1.3166216292970185e-06, "epoch": 0.8554300207516716, "percentage": 42.77, "elapsed_time": "2:31:45", "remaining_time": "3:23:03"} +{"current_steps": 3711, "total_steps": 8674, "loss": 0.4414154589176178, "lr": 1.3162599602085482e-06, "epoch": 0.8556605948812543, "percentage": 42.78, "elapsed_time": "2:31:48", "remaining_time": "3:23:00"} +{"current_steps": 3712, "total_steps": 8674, "loss": 0.4267842769622803, "lr": 1.3158982451484873e-06, "epoch": 0.855891169010837, "percentage": 42.79, "elapsed_time": "2:31:50", "remaining_time": "3:22:58"} +{"current_steps": 3713, "total_steps": 8674, "loss": 0.5282812118530273, "lr": 1.315536484169415e-06, "epoch": 0.8561217431404197, "percentage": 42.81, "elapsed_time": "2:31:52", "remaining_time": "3:22:55"} +{"current_steps": 3714, "total_steps": 8674, "loss": 0.3831692934036255, "lr": 1.3151746773239167e-06, "epoch": 0.8563523172700023, "percentage": 42.82, "elapsed_time": "2:31:55", "remaining_time": "3:22:53"} +{"current_steps": 3715, "total_steps": 8674, "loss": 0.4714779853820801, "lr": 1.3148128246645848e-06, "epoch": 0.856582891399585, "percentage": 42.83, "elapsed_time": "2:31:58", "remaining_time": "3:22:51"} +{"current_steps": 3716, "total_steps": 8674, "loss": 0.515029788017273, "lr": 1.3144509262440185e-06, "epoch": 0.8568134655291676, "percentage": 42.84, "elapsed_time": "2:32:00", "remaining_time": "3:22:49"} +{"current_steps": 3717, "total_steps": 8674, "loss": 0.48407065868377686, "lr": 1.314088982114823e-06, "epoch": 0.8570440396587503, "percentage": 42.85, "elapsed_time": "2:32:03", "remaining_time": "3:22:46"} +{"current_steps": 3718, "total_steps": 8674, "loss": 0.4756847620010376, "lr": 1.3137269923296111e-06, "epoch": 0.8572746137883329, "percentage": 42.86, "elapsed_time": "2:32:05", "remaining_time": "3:22:44"} +{"current_steps": 3719, "total_steps": 8674, "loss": 0.47744277119636536, "lr": 1.313364956941001e-06, "epoch": 0.8575051879179156, "percentage": 42.88, "elapsed_time": "2:32:07", "remaining_time": "3:22:41"} +{"current_steps": 3720, "total_steps": 8674, "loss": 0.4967440366744995, "lr": 1.3130028760016187e-06, "epoch": 0.8577357620474982, "percentage": 42.89, "elapsed_time": "2:32:10", "remaining_time": "3:22:39"} +{"current_steps": 3721, "total_steps": 8674, "loss": 0.44999921321868896, "lr": 1.312640749564096e-06, "epoch": 0.857966336177081, "percentage": 42.9, "elapsed_time": "2:32:12", "remaining_time": "3:22:36"} +{"current_steps": 3722, "total_steps": 8674, "loss": 0.4454652667045593, "lr": 1.3122785776810723e-06, "epoch": 0.8581969103066636, "percentage": 42.91, "elapsed_time": "2:32:15", "remaining_time": "3:22:34"} +{"current_steps": 3723, "total_steps": 8674, "loss": 0.37483078241348267, "lr": 1.3119163604051923e-06, "epoch": 0.8584274844362463, "percentage": 42.92, "elapsed_time": "2:32:17", "remaining_time": "3:22:31"} +{"current_steps": 3724, "total_steps": 8674, "loss": 0.3732140064239502, "lr": 1.3115540977891076e-06, "epoch": 0.8586580585658289, "percentage": 42.93, "elapsed_time": "2:32:20", "remaining_time": "3:22:30"} +{"current_steps": 3725, "total_steps": 8674, "loss": 0.5709421634674072, "lr": 1.3111917898854779e-06, "epoch": 0.8588886326954116, "percentage": 42.94, "elapsed_time": "2:32:23", "remaining_time": "3:22:27"} +{"current_steps": 3726, "total_steps": 8674, "loss": 0.5301297307014465, "lr": 1.3108294367469677e-06, "epoch": 0.8591192068249942, "percentage": 42.96, "elapsed_time": "2:32:25", "remaining_time": "3:22:24"} +{"current_steps": 3727, "total_steps": 8674, "loss": 0.45979735255241394, "lr": 1.3104670384262484e-06, "epoch": 0.8593497809545769, "percentage": 42.97, "elapsed_time": "2:32:27", "remaining_time": "3:22:22"} +{"current_steps": 3728, "total_steps": 8674, "loss": 0.5051921606063843, "lr": 1.3101045949759985e-06, "epoch": 0.8595803550841595, "percentage": 42.98, "elapsed_time": "2:32:30", "remaining_time": "3:22:19"} +{"current_steps": 3729, "total_steps": 8674, "loss": 0.5057204365730286, "lr": 1.309742106448903e-06, "epoch": 0.8598109292137422, "percentage": 42.99, "elapsed_time": "2:32:32", "remaining_time": "3:22:17"} +{"current_steps": 3730, "total_steps": 8674, "loss": 0.4265059530735016, "lr": 1.3093795728976535e-06, "epoch": 0.8600415033433249, "percentage": 43.0, "elapsed_time": "2:32:35", "remaining_time": "3:22:14"} +{"current_steps": 3731, "total_steps": 8674, "loss": 0.39166492223739624, "lr": 1.3090169943749473e-06, "epoch": 0.8602720774729076, "percentage": 43.01, "elapsed_time": "2:32:37", "remaining_time": "3:22:12"} +{"current_steps": 3732, "total_steps": 8674, "loss": 0.4321832060813904, "lr": 1.308654370933489e-06, "epoch": 0.8605026516024902, "percentage": 43.03, "elapsed_time": "2:32:39", "remaining_time": "3:22:09"} +{"current_steps": 3733, "total_steps": 8674, "loss": 0.5028939247131348, "lr": 1.3082917026259906e-06, "epoch": 0.8607332257320729, "percentage": 43.04, "elapsed_time": "2:32:42", "remaining_time": "3:22:07"} +{"current_steps": 3734, "total_steps": 8674, "loss": 0.4642373323440552, "lr": 1.3079289895051681e-06, "epoch": 0.8609637998616555, "percentage": 43.05, "elapsed_time": "2:32:44", "remaining_time": "3:22:05"} +{"current_steps": 3735, "total_steps": 8674, "loss": 0.416348397731781, "lr": 1.3075662316237464e-06, "epoch": 0.8611943739912382, "percentage": 43.06, "elapsed_time": "2:32:47", "remaining_time": "3:22:02"} +{"current_steps": 3736, "total_steps": 8674, "loss": 0.48442524671554565, "lr": 1.3072034290344556e-06, "epoch": 0.8614249481208208, "percentage": 43.07, "elapsed_time": "2:32:49", "remaining_time": "3:21:59"} +{"current_steps": 3737, "total_steps": 8674, "loss": 0.46903935074806213, "lr": 1.3068405817900332e-06, "epoch": 0.8616555222504035, "percentage": 43.08, "elapsed_time": "2:32:52", "remaining_time": "3:21:57"} +{"current_steps": 3738, "total_steps": 8674, "loss": 0.48172008991241455, "lr": 1.3064776899432224e-06, "epoch": 0.8618860963799861, "percentage": 43.09, "elapsed_time": "2:32:54", "remaining_time": "3:21:54"} +{"current_steps": 3739, "total_steps": 8674, "loss": 0.44460922479629517, "lr": 1.3061147535467734e-06, "epoch": 0.8621166705095689, "percentage": 43.11, "elapsed_time": "2:32:57", "remaining_time": "3:21:52"} +{"current_steps": 3740, "total_steps": 8674, "loss": 0.4728608727455139, "lr": 1.3057517726534423e-06, "epoch": 0.8623472446391515, "percentage": 43.12, "elapsed_time": "2:32:59", "remaining_time": "3:21:49"} +{"current_steps": 3741, "total_steps": 8674, "loss": 0.36457544565200806, "lr": 1.3053887473159928e-06, "epoch": 0.8625778187687342, "percentage": 43.13, "elapsed_time": "2:33:01", "remaining_time": "3:21:47"} +{"current_steps": 3742, "total_steps": 8674, "loss": 0.3753359317779541, "lr": 1.3050256775871936e-06, "epoch": 0.8628083928983168, "percentage": 43.14, "elapsed_time": "2:33:04", "remaining_time": "3:21:45"} +{"current_steps": 3743, "total_steps": 8674, "loss": 0.38679057359695435, "lr": 1.304662563519821e-06, "epoch": 0.8630389670278995, "percentage": 43.15, "elapsed_time": "2:33:06", "remaining_time": "3:21:42"} +{"current_steps": 3744, "total_steps": 8674, "loss": 0.5008635520935059, "lr": 1.304299405166657e-06, "epoch": 0.8632695411574821, "percentage": 43.16, "elapsed_time": "2:33:09", "remaining_time": "3:21:40"} +{"current_steps": 3745, "total_steps": 8674, "loss": 0.3723052740097046, "lr": 1.3039362025804903e-06, "epoch": 0.8635001152870648, "percentage": 43.18, "elapsed_time": "2:33:11", "remaining_time": "3:21:37"} +{"current_steps": 3746, "total_steps": 8674, "loss": 0.4227592945098877, "lr": 1.3035729558141166e-06, "epoch": 0.8637306894166474, "percentage": 43.19, "elapsed_time": "2:33:13", "remaining_time": "3:21:34"} +{"current_steps": 3747, "total_steps": 8674, "loss": 0.44072139263153076, "lr": 1.3032096649203369e-06, "epoch": 0.8639612635462302, "percentage": 43.2, "elapsed_time": "2:33:16", "remaining_time": "3:21:32"} +{"current_steps": 3748, "total_steps": 8674, "loss": 0.49321871995925903, "lr": 1.3028463299519594e-06, "epoch": 0.8641918376758128, "percentage": 43.21, "elapsed_time": "2:33:18", "remaining_time": "3:21:30"} +{"current_steps": 3749, "total_steps": 8674, "loss": 0.3751382827758789, "lr": 1.3024829509617987e-06, "epoch": 0.8644224118053955, "percentage": 43.22, "elapsed_time": "2:33:21", "remaining_time": "3:21:27"} +{"current_steps": 3750, "total_steps": 8674, "loss": 0.43967729806900024, "lr": 1.3021195280026755e-06, "epoch": 0.8646529859349781, "percentage": 43.23, "elapsed_time": "2:33:23", "remaining_time": "3:21:25"} +{"current_steps": 3751, "total_steps": 8674, "loss": 0.4102880358695984, "lr": 1.3017560611274172e-06, "epoch": 0.8648835600645608, "percentage": 43.24, "elapsed_time": "2:33:26", "remaining_time": "3:21:22"} +{"current_steps": 3752, "total_steps": 8674, "loss": 0.5225233435630798, "lr": 1.301392550388857e-06, "epoch": 0.8651141341941434, "percentage": 43.26, "elapsed_time": "2:33:28", "remaining_time": "3:21:20"} +{"current_steps": 3753, "total_steps": 8674, "loss": 0.6021677255630493, "lr": 1.3010289958398352e-06, "epoch": 0.8653447083237261, "percentage": 43.27, "elapsed_time": "2:33:31", "remaining_time": "3:21:17"} +{"current_steps": 3754, "total_steps": 8674, "loss": 0.5031560063362122, "lr": 1.300665397533198e-06, "epoch": 0.8655752824533087, "percentage": 43.28, "elapsed_time": "2:33:33", "remaining_time": "3:21:15"} +{"current_steps": 3755, "total_steps": 8674, "loss": 0.5406110286712646, "lr": 1.300301755521798e-06, "epoch": 0.8658058565828914, "percentage": 43.29, "elapsed_time": "2:33:36", "remaining_time": "3:21:13"} +{"current_steps": 3756, "total_steps": 8674, "loss": 0.5359587669372559, "lr": 1.2999380698584945e-06, "epoch": 0.866036430712474, "percentage": 43.3, "elapsed_time": "2:33:38", "remaining_time": "3:21:10"} +{"current_steps": 3757, "total_steps": 8674, "loss": 0.46089720726013184, "lr": 1.2995743405961525e-06, "epoch": 0.8662670048420568, "percentage": 43.31, "elapsed_time": "2:33:40", "remaining_time": "3:21:08"} +{"current_steps": 3758, "total_steps": 8674, "loss": 0.4611746668815613, "lr": 1.2992105677876444e-06, "epoch": 0.8664975789716394, "percentage": 43.32, "elapsed_time": "2:33:43", "remaining_time": "3:21:05"} +{"current_steps": 3759, "total_steps": 8674, "loss": 0.47040778398513794, "lr": 1.2988467514858478e-06, "epoch": 0.8667281531012221, "percentage": 43.34, "elapsed_time": "2:33:45", "remaining_time": "3:21:03"} +{"current_steps": 3760, "total_steps": 8674, "loss": 0.5118452310562134, "lr": 1.2984828917436469e-06, "epoch": 0.8669587272308047, "percentage": 43.35, "elapsed_time": "2:33:48", "remaining_time": "3:21:00"} +{"current_steps": 3761, "total_steps": 8674, "loss": 0.42349302768707275, "lr": 1.2981189886139326e-06, "epoch": 0.8671893013603874, "percentage": 43.36, "elapsed_time": "2:33:50", "remaining_time": "3:20:58"} +{"current_steps": 3762, "total_steps": 8674, "loss": 0.4888027310371399, "lr": 1.2977550421496022e-06, "epoch": 0.86741987548997, "percentage": 43.37, "elapsed_time": "2:33:53", "remaining_time": "3:20:55"} +{"current_steps": 3763, "total_steps": 8674, "loss": 0.5637897849082947, "lr": 1.2973910524035587e-06, "epoch": 0.8676504496195527, "percentage": 43.38, "elapsed_time": "2:33:55", "remaining_time": "3:20:53"} +{"current_steps": 3764, "total_steps": 8674, "loss": 0.4159572124481201, "lr": 1.2970270194287119e-06, "epoch": 0.8678810237491353, "percentage": 43.39, "elapsed_time": "2:33:58", "remaining_time": "3:20:50"} +{"current_steps": 3765, "total_steps": 8674, "loss": 0.4558612108230591, "lr": 1.2966629432779775e-06, "epoch": 0.868111597878718, "percentage": 43.41, "elapsed_time": "2:34:00", "remaining_time": "3:20:48"} +{"current_steps": 3766, "total_steps": 8674, "loss": 0.4235115647315979, "lr": 1.2962988240042775e-06, "epoch": 0.8683421720083007, "percentage": 43.42, "elapsed_time": "2:34:03", "remaining_time": "3:20:46"} +{"current_steps": 3767, "total_steps": 8674, "loss": 0.5096476078033447, "lr": 1.2959346616605404e-06, "epoch": 0.8685727461378834, "percentage": 43.43, "elapsed_time": "2:34:05", "remaining_time": "3:20:43"} +{"current_steps": 3768, "total_steps": 8674, "loss": 0.47097906470298767, "lr": 1.2955704562997013e-06, "epoch": 0.868803320267466, "percentage": 43.44, "elapsed_time": "2:34:08", "remaining_time": "3:20:41"} +{"current_steps": 3769, "total_steps": 8674, "loss": 0.4508157968521118, "lr": 1.2952062079747008e-06, "epoch": 0.8690338943970487, "percentage": 43.45, "elapsed_time": "2:34:10", "remaining_time": "3:20:38"} +{"current_steps": 3770, "total_steps": 8674, "loss": 0.43800675868988037, "lr": 1.2948419167384864e-06, "epoch": 0.8692644685266313, "percentage": 43.46, "elapsed_time": "2:34:12", "remaining_time": "3:20:36"} +{"current_steps": 3771, "total_steps": 8674, "loss": 0.5512480735778809, "lr": 1.2944775826440108e-06, "epoch": 0.869495042656214, "percentage": 43.47, "elapsed_time": "2:34:15", "remaining_time": "3:20:33"} +{"current_steps": 3772, "total_steps": 8674, "loss": 0.4654430150985718, "lr": 1.2941132057442342e-06, "epoch": 0.8697256167857966, "percentage": 43.49, "elapsed_time": "2:34:17", "remaining_time": "3:20:31"} +{"current_steps": 3773, "total_steps": 8674, "loss": 0.5429458618164062, "lr": 1.293748786092123e-06, "epoch": 0.8699561909153793, "percentage": 43.5, "elapsed_time": "2:34:20", "remaining_time": "3:20:28"} +{"current_steps": 3774, "total_steps": 8674, "loss": 0.415671169757843, "lr": 1.2933843237406481e-06, "epoch": 0.870186765044962, "percentage": 43.51, "elapsed_time": "2:34:22", "remaining_time": "3:20:26"} +{"current_steps": 3775, "total_steps": 8674, "loss": 0.4347325563430786, "lr": 1.2930198187427884e-06, "epoch": 0.8704173391745447, "percentage": 43.52, "elapsed_time": "2:34:25", "remaining_time": "3:20:24"} +{"current_steps": 3776, "total_steps": 8674, "loss": 0.41997528076171875, "lr": 1.2926552711515287e-06, "epoch": 0.8706479133041273, "percentage": 43.53, "elapsed_time": "2:34:27", "remaining_time": "3:20:21"} +{"current_steps": 3777, "total_steps": 8674, "loss": 0.45956090092658997, "lr": 1.292290681019859e-06, "epoch": 0.87087848743371, "percentage": 43.54, "elapsed_time": "2:34:30", "remaining_time": "3:20:18"} +{"current_steps": 3778, "total_steps": 8674, "loss": 0.4615165889263153, "lr": 1.2919260484007767e-06, "epoch": 0.8711090615632926, "percentage": 43.56, "elapsed_time": "2:34:32", "remaining_time": "3:20:16"} +{"current_steps": 3779, "total_steps": 8674, "loss": 0.3919866681098938, "lr": 1.2915613733472848e-06, "epoch": 0.8713396356928753, "percentage": 43.57, "elapsed_time": "2:34:34", "remaining_time": "3:20:13"} +{"current_steps": 3780, "total_steps": 8674, "loss": 0.5324772000312805, "lr": 1.2911966559123922e-06, "epoch": 0.8715702098224579, "percentage": 43.58, "elapsed_time": "2:34:37", "remaining_time": "3:20:11"} +{"current_steps": 3781, "total_steps": 8674, "loss": 0.4813354015350342, "lr": 1.2908318961491147e-06, "epoch": 0.8718007839520405, "percentage": 43.59, "elapsed_time": "2:34:39", "remaining_time": "3:20:08"} +{"current_steps": 3782, "total_steps": 8674, "loss": 0.5617851614952087, "lr": 1.2904670941104735e-06, "epoch": 0.8720313580816232, "percentage": 43.6, "elapsed_time": "2:34:42", "remaining_time": "3:20:06"} +{"current_steps": 3783, "total_steps": 8674, "loss": 0.5369905233383179, "lr": 1.2901022498494963e-06, "epoch": 0.8722619322112058, "percentage": 43.61, "elapsed_time": "2:34:45", "remaining_time": "3:20:04"} +{"current_steps": 3784, "total_steps": 8674, "loss": 0.469723641872406, "lr": 1.289737363419217e-06, "epoch": 0.8724925063407886, "percentage": 43.62, "elapsed_time": "2:34:47", "remaining_time": "3:20:02"} +{"current_steps": 3785, "total_steps": 8674, "loss": 0.5100580453872681, "lr": 1.2893724348726757e-06, "epoch": 0.8727230804703712, "percentage": 43.64, "elapsed_time": "2:34:50", "remaining_time": "3:19:59"} +{"current_steps": 3786, "total_steps": 8674, "loss": 0.3959219455718994, "lr": 1.289007464262918e-06, "epoch": 0.8729536545999539, "percentage": 43.65, "elapsed_time": "2:34:52", "remaining_time": "3:19:57"} +{"current_steps": 3787, "total_steps": 8674, "loss": 0.4237936735153198, "lr": 1.2886424516429967e-06, "epoch": 0.8731842287295365, "percentage": 43.66, "elapsed_time": "2:34:55", "remaining_time": "3:19:54"} +{"current_steps": 3788, "total_steps": 8674, "loss": 0.4604552984237671, "lr": 1.2882773970659693e-06, "epoch": 0.8734148028591192, "percentage": 43.67, "elapsed_time": "2:34:57", "remaining_time": "3:19:52"} +{"current_steps": 3789, "total_steps": 8674, "loss": 0.4265769124031067, "lr": 1.287912300584901e-06, "epoch": 0.8736453769887018, "percentage": 43.68, "elapsed_time": "2:34:59", "remaining_time": "3:19:49"} +{"current_steps": 3790, "total_steps": 8674, "loss": 0.4644312262535095, "lr": 1.2875471622528617e-06, "epoch": 0.8738759511182845, "percentage": 43.69, "elapsed_time": "2:35:02", "remaining_time": "3:19:47"} +{"current_steps": 3791, "total_steps": 8674, "loss": 0.5520300269126892, "lr": 1.2871819821229282e-06, "epoch": 0.8741065252478671, "percentage": 43.71, "elapsed_time": "2:35:05", "remaining_time": "3:19:45"} +{"current_steps": 3792, "total_steps": 8674, "loss": 0.42350637912750244, "lr": 1.2868167602481831e-06, "epoch": 0.8743370993774499, "percentage": 43.72, "elapsed_time": "2:35:07", "remaining_time": "3:19:43"} +{"current_steps": 3793, "total_steps": 8674, "loss": 0.5148683786392212, "lr": 1.2864514966817155e-06, "epoch": 0.8745676735070325, "percentage": 43.73, "elapsed_time": "2:35:10", "remaining_time": "3:19:40"} +{"current_steps": 3794, "total_steps": 8674, "loss": 0.4506865441799164, "lr": 1.2860861914766191e-06, "epoch": 0.8747982476366152, "percentage": 43.74, "elapsed_time": "2:35:12", "remaining_time": "3:19:37"} +{"current_steps": 3795, "total_steps": 8674, "loss": 0.4042026996612549, "lr": 1.2857208446859957e-06, "epoch": 0.8750288217661978, "percentage": 43.75, "elapsed_time": "2:35:14", "remaining_time": "3:19:35"} +{"current_steps": 3796, "total_steps": 8674, "loss": 0.4601382613182068, "lr": 1.2853554563629521e-06, "epoch": 0.8752593958957805, "percentage": 43.76, "elapsed_time": "2:35:17", "remaining_time": "3:19:32"} +{"current_steps": 3797, "total_steps": 8674, "loss": 0.3387809097766876, "lr": 1.2849900265606007e-06, "epoch": 0.8754899700253631, "percentage": 43.77, "elapsed_time": "2:35:19", "remaining_time": "3:19:30"} +{"current_steps": 3798, "total_steps": 8674, "loss": 0.5295180082321167, "lr": 1.2846245553320604e-06, "epoch": 0.8757205441549458, "percentage": 43.79, "elapsed_time": "2:35:21", "remaining_time": "3:19:27"} +{"current_steps": 3799, "total_steps": 8674, "loss": 0.47733891010284424, "lr": 1.2842590427304564e-06, "epoch": 0.8759511182845284, "percentage": 43.8, "elapsed_time": "2:35:24", "remaining_time": "3:19:25"} +{"current_steps": 3800, "total_steps": 8674, "loss": 0.46294957399368286, "lr": 1.2838934888089198e-06, "epoch": 0.8761816924141111, "percentage": 43.81, "elapsed_time": "2:35:26", "remaining_time": "3:19:22"} +{"current_steps": 3801, "total_steps": 8674, "loss": 0.4638972580432892, "lr": 1.2835278936205877e-06, "epoch": 0.8764122665436938, "percentage": 43.82, "elapsed_time": "2:35:30", "remaining_time": "3:19:22"} +{"current_steps": 3802, "total_steps": 8674, "loss": 0.5078087449073792, "lr": 1.2831622572186027e-06, "epoch": 0.8766428406732765, "percentage": 43.83, "elapsed_time": "2:35:32", "remaining_time": "3:19:19"} +{"current_steps": 3803, "total_steps": 8674, "loss": 0.49626827239990234, "lr": 1.2827965796561138e-06, "epoch": 0.8768734148028591, "percentage": 43.84, "elapsed_time": "2:35:35", "remaining_time": "3:19:17"} +{"current_steps": 3804, "total_steps": 8674, "loss": 0.4857192635536194, "lr": 1.2824308609862758e-06, "epoch": 0.8771039889324418, "percentage": 43.86, "elapsed_time": "2:35:37", "remaining_time": "3:19:14"} +{"current_steps": 3805, "total_steps": 8674, "loss": 0.5403131246566772, "lr": 1.2820651012622498e-06, "epoch": 0.8773345630620244, "percentage": 43.87, "elapsed_time": "2:35:40", "remaining_time": "3:19:12"} +{"current_steps": 3806, "total_steps": 8674, "loss": 0.519463837146759, "lr": 1.2816993005372029e-06, "epoch": 0.8775651371916071, "percentage": 43.88, "elapsed_time": "2:35:42", "remaining_time": "3:19:09"} +{"current_steps": 3807, "total_steps": 8674, "loss": 0.6038607954978943, "lr": 1.2813334588643077e-06, "epoch": 0.8777957113211897, "percentage": 43.89, "elapsed_time": "2:35:44", "remaining_time": "3:19:06"} +{"current_steps": 3808, "total_steps": 8674, "loss": 0.4892663359642029, "lr": 1.280967576296743e-06, "epoch": 0.8780262854507724, "percentage": 43.9, "elapsed_time": "2:35:47", "remaining_time": "3:19:04"} +{"current_steps": 3809, "total_steps": 8674, "loss": 0.47872501611709595, "lr": 1.2806016528876934e-06, "epoch": 0.878256859580355, "percentage": 43.91, "elapsed_time": "2:35:50", "remaining_time": "3:19:02"} +{"current_steps": 3810, "total_steps": 8674, "loss": 0.4863993227481842, "lr": 1.28023568869035e-06, "epoch": 0.8784874337099378, "percentage": 43.92, "elapsed_time": "2:35:52", "remaining_time": "3:19:00"} +{"current_steps": 3811, "total_steps": 8674, "loss": 0.45241546630859375, "lr": 1.2798696837579088e-06, "epoch": 0.8787180078395204, "percentage": 43.94, "elapsed_time": "2:35:55", "remaining_time": "3:18:57"} +{"current_steps": 3812, "total_steps": 8674, "loss": 0.48720863461494446, "lr": 1.2795036381435728e-06, "epoch": 0.8789485819691031, "percentage": 43.95, "elapsed_time": "2:35:57", "remaining_time": "3:18:55"} +{"current_steps": 3813, "total_steps": 8674, "loss": 0.49139827489852905, "lr": 1.2791375519005507e-06, "epoch": 0.8791791560986857, "percentage": 43.96, "elapsed_time": "2:36:00", "remaining_time": "3:18:52"} +{"current_steps": 3814, "total_steps": 8674, "loss": 0.41915225982666016, "lr": 1.278771425082056e-06, "epoch": 0.8794097302282684, "percentage": 43.97, "elapsed_time": "2:36:02", "remaining_time": "3:18:50"} +{"current_steps": 3815, "total_steps": 8674, "loss": 0.41831016540527344, "lr": 1.2784052577413095e-06, "epoch": 0.879640304357851, "percentage": 43.98, "elapsed_time": "2:36:04", "remaining_time": "3:18:47"} +{"current_steps": 3816, "total_steps": 8674, "loss": 0.49456197023391724, "lr": 1.2780390499315374e-06, "epoch": 0.8798708784874337, "percentage": 43.99, "elapsed_time": "2:36:07", "remaining_time": "3:18:45"} +{"current_steps": 3817, "total_steps": 8674, "loss": 0.4656866192817688, "lr": 1.2776728017059714e-06, "epoch": 0.8801014526170163, "percentage": 44.01, "elapsed_time": "2:36:10", "remaining_time": "3:18:43"} +{"current_steps": 3818, "total_steps": 8674, "loss": 0.449514776468277, "lr": 1.2773065131178494e-06, "epoch": 0.880332026746599, "percentage": 44.02, "elapsed_time": "2:36:12", "remaining_time": "3:18:40"} +{"current_steps": 3819, "total_steps": 8674, "loss": 0.3762073516845703, "lr": 1.2769401842204156e-06, "epoch": 0.8805626008761817, "percentage": 44.03, "elapsed_time": "2:36:14", "remaining_time": "3:18:38"} +{"current_steps": 3820, "total_steps": 8674, "loss": 0.5680521130561829, "lr": 1.2765738150669192e-06, "epoch": 0.8807931750057644, "percentage": 44.04, "elapsed_time": "2:36:17", "remaining_time": "3:18:35"} +{"current_steps": 3821, "total_steps": 8674, "loss": 0.35371482372283936, "lr": 1.276207405710616e-06, "epoch": 0.881023749135347, "percentage": 44.05, "elapsed_time": "2:36:19", "remaining_time": "3:18:32"} +{"current_steps": 3822, "total_steps": 8674, "loss": 0.5145018100738525, "lr": 1.2758409562047669e-06, "epoch": 0.8812543232649297, "percentage": 44.06, "elapsed_time": "2:36:22", "remaining_time": "3:18:30"} +{"current_steps": 3823, "total_steps": 8674, "loss": 0.5425234436988831, "lr": 1.2754744666026392e-06, "epoch": 0.8814848973945123, "percentage": 44.07, "elapsed_time": "2:36:24", "remaining_time": "3:18:28"} +{"current_steps": 3824, "total_steps": 8674, "loss": 0.48439931869506836, "lr": 1.275107936957506e-06, "epoch": 0.881715471524095, "percentage": 44.09, "elapsed_time": "2:36:27", "remaining_time": "3:18:25"} +{"current_steps": 3825, "total_steps": 8674, "loss": 0.5177323818206787, "lr": 1.2747413673226462e-06, "epoch": 0.8819460456536776, "percentage": 44.1, "elapsed_time": "2:36:29", "remaining_time": "3:18:23"} +{"current_steps": 3826, "total_steps": 8674, "loss": 0.4718499779701233, "lr": 1.2743747577513437e-06, "epoch": 0.8821766197832603, "percentage": 44.11, "elapsed_time": "2:36:31", "remaining_time": "3:18:20"} +{"current_steps": 3827, "total_steps": 8674, "loss": 0.5140804648399353, "lr": 1.27400810829689e-06, "epoch": 0.882407193912843, "percentage": 44.12, "elapsed_time": "2:36:34", "remaining_time": "3:18:18"} +{"current_steps": 3828, "total_steps": 8674, "loss": 0.4611731767654419, "lr": 1.2736414190125805e-06, "epoch": 0.8826377680424257, "percentage": 44.13, "elapsed_time": "2:36:36", "remaining_time": "3:18:15"} +{"current_steps": 3829, "total_steps": 8674, "loss": 0.526127815246582, "lr": 1.2732746899517175e-06, "epoch": 0.8828683421720083, "percentage": 44.14, "elapsed_time": "2:36:39", "remaining_time": "3:18:13"} +{"current_steps": 3830, "total_steps": 8674, "loss": 0.4039766192436218, "lr": 1.2729079211676085e-06, "epoch": 0.883098916301591, "percentage": 44.15, "elapsed_time": "2:36:41", "remaining_time": "3:18:10"} +{"current_steps": 3831, "total_steps": 8674, "loss": 0.4232807159423828, "lr": 1.2725411127135676e-06, "epoch": 0.8833294904311736, "percentage": 44.17, "elapsed_time": "2:36:44", "remaining_time": "3:18:08"} +{"current_steps": 3832, "total_steps": 8674, "loss": 0.48490262031555176, "lr": 1.2721742646429142e-06, "epoch": 0.8835600645607563, "percentage": 44.18, "elapsed_time": "2:36:46", "remaining_time": "3:18:05"} +{"current_steps": 3833, "total_steps": 8674, "loss": 0.4664677083492279, "lr": 1.2718073770089729e-06, "epoch": 0.8837906386903389, "percentage": 44.19, "elapsed_time": "2:36:49", "remaining_time": "3:18:03"} +{"current_steps": 3834, "total_steps": 8674, "loss": 0.4402846097946167, "lr": 1.2714404498650742e-06, "epoch": 0.8840212128199216, "percentage": 44.2, "elapsed_time": "2:36:51", "remaining_time": "3:18:01"} +{"current_steps": 3835, "total_steps": 8674, "loss": 0.45942988991737366, "lr": 1.2710734832645555e-06, "epoch": 0.8842517869495042, "percentage": 44.21, "elapsed_time": "2:36:53", "remaining_time": "3:17:58"} +{"current_steps": 3836, "total_steps": 8674, "loss": 0.45924365520477295, "lr": 1.2707064772607587e-06, "epoch": 0.884482361079087, "percentage": 44.22, "elapsed_time": "2:36:56", "remaining_time": "3:17:56"} +{"current_steps": 3837, "total_steps": 8674, "loss": 0.3877851963043213, "lr": 1.270339431907032e-06, "epoch": 0.8847129352086696, "percentage": 44.24, "elapsed_time": "2:36:58", "remaining_time": "3:17:53"} +{"current_steps": 3838, "total_steps": 8674, "loss": 0.45364105701446533, "lr": 1.2699723472567288e-06, "epoch": 0.8849435093382523, "percentage": 44.25, "elapsed_time": "2:37:01", "remaining_time": "3:17:51"} +{"current_steps": 3839, "total_steps": 8674, "loss": 0.3527877926826477, "lr": 1.2696052233632089e-06, "epoch": 0.8851740834678349, "percentage": 44.26, "elapsed_time": "2:37:03", "remaining_time": "3:17:48"} +{"current_steps": 3840, "total_steps": 8674, "loss": 0.499268501996994, "lr": 1.2692380602798375e-06, "epoch": 0.8854046575974176, "percentage": 44.27, "elapsed_time": "2:37:06", "remaining_time": "3:17:46"} +{"current_steps": 3841, "total_steps": 8674, "loss": 0.39443689584732056, "lr": 1.2688708580599854e-06, "epoch": 0.8856352317270002, "percentage": 44.28, "elapsed_time": "2:37:08", "remaining_time": "3:17:44"} +{"current_steps": 3842, "total_steps": 8674, "loss": 0.5262328386306763, "lr": 1.268503616757029e-06, "epoch": 0.8858658058565829, "percentage": 44.29, "elapsed_time": "2:37:11", "remaining_time": "3:17:41"} +{"current_steps": 3843, "total_steps": 8674, "loss": 0.4761236608028412, "lr": 1.2681363364243509e-06, "epoch": 0.8860963799861655, "percentage": 44.3, "elapsed_time": "2:37:13", "remaining_time": "3:17:39"} +{"current_steps": 3844, "total_steps": 8674, "loss": 0.5173169374465942, "lr": 1.2677690171153391e-06, "epoch": 0.8863269541157482, "percentage": 44.32, "elapsed_time": "2:37:16", "remaining_time": "3:17:36"} +{"current_steps": 3845, "total_steps": 8674, "loss": 0.5304574966430664, "lr": 1.2674016588833866e-06, "epoch": 0.8865575282453309, "percentage": 44.33, "elapsed_time": "2:37:18", "remaining_time": "3:17:33"} +{"current_steps": 3846, "total_steps": 8674, "loss": 0.44707632064819336, "lr": 1.2670342617818925e-06, "epoch": 0.8867881023749136, "percentage": 44.34, "elapsed_time": "2:37:20", "remaining_time": "3:17:31"} +{"current_steps": 3847, "total_steps": 8674, "loss": 0.44395360350608826, "lr": 1.2666668258642628e-06, "epoch": 0.8870186765044962, "percentage": 44.35, "elapsed_time": "2:37:23", "remaining_time": "3:17:28"} +{"current_steps": 3848, "total_steps": 8674, "loss": 0.4993078112602234, "lr": 1.266299351183907e-06, "epoch": 0.8872492506340789, "percentage": 44.36, "elapsed_time": "2:37:25", "remaining_time": "3:17:26"} +{"current_steps": 3849, "total_steps": 8674, "loss": 0.4836229681968689, "lr": 1.2659318377942418e-06, "epoch": 0.8874798247636615, "percentage": 44.37, "elapsed_time": "2:37:28", "remaining_time": "3:17:23"} +{"current_steps": 3850, "total_steps": 8674, "loss": 0.4898098111152649, "lr": 1.2655642857486885e-06, "epoch": 0.8877103988932442, "percentage": 44.39, "elapsed_time": "2:37:30", "remaining_time": "3:17:21"} +{"current_steps": 3851, "total_steps": 8674, "loss": 0.5117218494415283, "lr": 1.2651966951006753e-06, "epoch": 0.8879409730228268, "percentage": 44.4, "elapsed_time": "2:37:33", "remaining_time": "3:17:19"} +{"current_steps": 3852, "total_steps": 8674, "loss": 0.3920857906341553, "lr": 1.2648290659036347e-06, "epoch": 0.8881715471524095, "percentage": 44.41, "elapsed_time": "2:37:35", "remaining_time": "3:17:16"} +{"current_steps": 3853, "total_steps": 8674, "loss": 0.42527467012405396, "lr": 1.2644613982110055e-06, "epoch": 0.8884021212819921, "percentage": 44.42, "elapsed_time": "2:37:38", "remaining_time": "3:17:14"} +{"current_steps": 3854, "total_steps": 8674, "loss": 0.5283650159835815, "lr": 1.2640936920762318e-06, "epoch": 0.8886326954115749, "percentage": 44.43, "elapsed_time": "2:37:40", "remaining_time": "3:17:11"} +{"current_steps": 3855, "total_steps": 8674, "loss": 0.3976718783378601, "lr": 1.2637259475527634e-06, "epoch": 0.8888632695411575, "percentage": 44.44, "elapsed_time": "2:37:42", "remaining_time": "3:17:09"} +{"current_steps": 3856, "total_steps": 8674, "loss": 0.3767106533050537, "lr": 1.2633581646940555e-06, "epoch": 0.8890938436707402, "percentage": 44.45, "elapsed_time": "2:37:45", "remaining_time": "3:17:06"} +{"current_steps": 3857, "total_steps": 8674, "loss": 0.4002486765384674, "lr": 1.2629903435535695e-06, "epoch": 0.8893244178003228, "percentage": 44.47, "elapsed_time": "2:37:47", "remaining_time": "3:17:04"} +{"current_steps": 3858, "total_steps": 8674, "loss": 0.3829443156719208, "lr": 1.2626224841847718e-06, "epoch": 0.8895549919299055, "percentage": 44.48, "elapsed_time": "2:37:50", "remaining_time": "3:17:01"} +{"current_steps": 3859, "total_steps": 8674, "loss": 0.5338312983512878, "lr": 1.2622545866411342e-06, "epoch": 0.8897855660594881, "percentage": 44.49, "elapsed_time": "2:37:52", "remaining_time": "3:16:59"} +{"current_steps": 3860, "total_steps": 8674, "loss": 0.49615299701690674, "lr": 1.2618866509761347e-06, "epoch": 0.8900161401890708, "percentage": 44.5, "elapsed_time": "2:37:55", "remaining_time": "3:16:56"} +{"current_steps": 3861, "total_steps": 8674, "loss": 0.5080281496047974, "lr": 1.2615186772432562e-06, "epoch": 0.8902467143186534, "percentage": 44.51, "elapsed_time": "2:37:57", "remaining_time": "3:16:54"} +{"current_steps": 3862, "total_steps": 8674, "loss": 0.4631335139274597, "lr": 1.2611506654959877e-06, "epoch": 0.8904772884482361, "percentage": 44.52, "elapsed_time": "2:37:59", "remaining_time": "3:16:51"} +{"current_steps": 3863, "total_steps": 8674, "loss": 0.5179207921028137, "lr": 1.2607826157878232e-06, "epoch": 0.8907078625778188, "percentage": 44.54, "elapsed_time": "2:38:02", "remaining_time": "3:16:49"} +{"current_steps": 3864, "total_steps": 8674, "loss": 0.5107406973838806, "lr": 1.260414528172263e-06, "epoch": 0.8909384367074015, "percentage": 44.55, "elapsed_time": "2:38:04", "remaining_time": "3:16:46"} +{"current_steps": 3865, "total_steps": 8674, "loss": 0.3719855844974518, "lr": 1.2600464027028112e-06, "epoch": 0.8911690108369841, "percentage": 44.56, "elapsed_time": "2:38:07", "remaining_time": "3:16:44"} +{"current_steps": 3866, "total_steps": 8674, "loss": 0.4703129231929779, "lr": 1.2596782394329797e-06, "epoch": 0.8913995849665668, "percentage": 44.57, "elapsed_time": "2:38:09", "remaining_time": "3:16:42"} +{"current_steps": 3867, "total_steps": 8674, "loss": 0.49239644408226013, "lr": 1.2593100384162842e-06, "epoch": 0.8916301590961494, "percentage": 44.58, "elapsed_time": "2:38:12", "remaining_time": "3:16:39"} +{"current_steps": 3868, "total_steps": 8674, "loss": 0.5194324851036072, "lr": 1.2589417997062468e-06, "epoch": 0.8918607332257321, "percentage": 44.59, "elapsed_time": "2:38:14", "remaining_time": "3:16:37"} +{"current_steps": 3869, "total_steps": 8674, "loss": 0.4224633574485779, "lr": 1.2585735233563943e-06, "epoch": 0.8920913073553147, "percentage": 44.6, "elapsed_time": "2:38:17", "remaining_time": "3:16:34"} +{"current_steps": 3870, "total_steps": 8674, "loss": 0.4377749562263489, "lr": 1.2582052094202594e-06, "epoch": 0.8923218814848974, "percentage": 44.62, "elapsed_time": "2:38:19", "remaining_time": "3:16:32"} +{"current_steps": 3871, "total_steps": 8674, "loss": 0.42847269773483276, "lr": 1.2578368579513809e-06, "epoch": 0.89255245561448, "percentage": 44.63, "elapsed_time": "2:38:21", "remaining_time": "3:16:29"} +{"current_steps": 3872, "total_steps": 8674, "loss": 0.5194802284240723, "lr": 1.2574684690033018e-06, "epoch": 0.8927830297440628, "percentage": 44.64, "elapsed_time": "2:38:24", "remaining_time": "3:16:27"} +{"current_steps": 3873, "total_steps": 8674, "loss": 0.4401082396507263, "lr": 1.2571000426295716e-06, "epoch": 0.8930136038736454, "percentage": 44.65, "elapsed_time": "2:38:26", "remaining_time": "3:16:24"} +{"current_steps": 3874, "total_steps": 8674, "loss": 0.38890570402145386, "lr": 1.2567315788837442e-06, "epoch": 0.8932441780032281, "percentage": 44.66, "elapsed_time": "2:38:29", "remaining_time": "3:16:22"} +{"current_steps": 3875, "total_steps": 8674, "loss": 0.522612452507019, "lr": 1.2563630778193802e-06, "epoch": 0.8934747521328107, "percentage": 44.67, "elapsed_time": "2:38:31", "remaining_time": "3:16:19"} +{"current_steps": 3876, "total_steps": 8674, "loss": 0.516444981098175, "lr": 1.2559945394900447e-06, "epoch": 0.8937053262623934, "percentage": 44.69, "elapsed_time": "2:38:34", "remaining_time": "3:16:17"} +{"current_steps": 3877, "total_steps": 8674, "loss": 0.4084436297416687, "lr": 1.255625963949308e-06, "epoch": 0.893935900391976, "percentage": 44.7, "elapsed_time": "2:38:36", "remaining_time": "3:16:14"} +{"current_steps": 3878, "total_steps": 8674, "loss": 0.4561755657196045, "lr": 1.2552573512507474e-06, "epoch": 0.8941664745215587, "percentage": 44.71, "elapsed_time": "2:38:39", "remaining_time": "3:16:12"} +{"current_steps": 3879, "total_steps": 8674, "loss": 0.44372665882110596, "lr": 1.2548887014479435e-06, "epoch": 0.8943970486511413, "percentage": 44.72, "elapsed_time": "2:38:41", "remaining_time": "3:16:09"} +{"current_steps": 3880, "total_steps": 8674, "loss": 0.4714791774749756, "lr": 1.2545200145944837e-06, "epoch": 0.894627622780724, "percentage": 44.73, "elapsed_time": "2:38:43", "remaining_time": "3:16:07"} +{"current_steps": 3881, "total_steps": 8674, "loss": 0.48050814867019653, "lr": 1.25415129074396e-06, "epoch": 0.8948581969103067, "percentage": 44.74, "elapsed_time": "2:38:46", "remaining_time": "3:16:04"} +{"current_steps": 3882, "total_steps": 8674, "loss": 0.4078127145767212, "lr": 1.2537825299499708e-06, "epoch": 0.8950887710398894, "percentage": 44.75, "elapsed_time": "2:38:48", "remaining_time": "3:16:02"} +{"current_steps": 3883, "total_steps": 8674, "loss": 0.41556763648986816, "lr": 1.2534137322661187e-06, "epoch": 0.895319345169472, "percentage": 44.77, "elapsed_time": "2:38:51", "remaining_time": "3:16:00"} +{"current_steps": 3884, "total_steps": 8674, "loss": 0.3862306475639343, "lr": 1.2530448977460127e-06, "epoch": 0.8955499192990547, "percentage": 44.78, "elapsed_time": "2:38:53", "remaining_time": "3:15:57"} +{"current_steps": 3885, "total_steps": 8674, "loss": 0.4071112871170044, "lr": 1.2526760264432656e-06, "epoch": 0.8957804934286373, "percentage": 44.79, "elapsed_time": "2:38:56", "remaining_time": "3:15:55"} +{"current_steps": 3886, "total_steps": 8674, "loss": 0.36956706643104553, "lr": 1.2523071184114978e-06, "epoch": 0.89601106755822, "percentage": 44.8, "elapsed_time": "2:38:58", "remaining_time": "3:15:52"} +{"current_steps": 3887, "total_steps": 8674, "loss": 0.5087941884994507, "lr": 1.251938173704333e-06, "epoch": 0.8962416416878026, "percentage": 44.81, "elapsed_time": "2:39:00", "remaining_time": "3:15:50"} +{"current_steps": 3888, "total_steps": 8674, "loss": 0.5636804103851318, "lr": 1.2515691923754017e-06, "epoch": 0.8964722158173853, "percentage": 44.82, "elapsed_time": "2:39:03", "remaining_time": "3:15:47"} +{"current_steps": 3889, "total_steps": 8674, "loss": 0.40899237990379333, "lr": 1.2512001744783383e-06, "epoch": 0.896702789946968, "percentage": 44.84, "elapsed_time": "2:39:05", "remaining_time": "3:15:45"} +{"current_steps": 3890, "total_steps": 8674, "loss": 0.3964187800884247, "lr": 1.2508311200667839e-06, "epoch": 0.8969333640765507, "percentage": 44.85, "elapsed_time": "2:39:08", "remaining_time": "3:15:42"} +{"current_steps": 3891, "total_steps": 8674, "loss": 0.43190568685531616, "lr": 1.2504620291943838e-06, "epoch": 0.8971639382061333, "percentage": 44.86, "elapsed_time": "2:39:10", "remaining_time": "3:15:40"} +{"current_steps": 3892, "total_steps": 8674, "loss": 0.5640079379081726, "lr": 1.25009290191479e-06, "epoch": 0.897394512335716, "percentage": 44.87, "elapsed_time": "2:39:13", "remaining_time": "3:15:37"} +{"current_steps": 3893, "total_steps": 8674, "loss": 0.4969727396965027, "lr": 1.2497237382816577e-06, "epoch": 0.8976250864652986, "percentage": 44.88, "elapsed_time": "2:39:15", "remaining_time": "3:15:35"} +{"current_steps": 3894, "total_steps": 8674, "loss": 0.43710076808929443, "lr": 1.2493545383486497e-06, "epoch": 0.8978556605948812, "percentage": 44.89, "elapsed_time": "2:39:18", "remaining_time": "3:15:32"} +{"current_steps": 3895, "total_steps": 8674, "loss": 0.4246212840080261, "lr": 1.248985302169432e-06, "epoch": 0.8980862347244639, "percentage": 44.9, "elapsed_time": "2:39:20", "remaining_time": "3:15:30"} +{"current_steps": 3896, "total_steps": 8674, "loss": 0.3812369108200073, "lr": 1.2486160297976776e-06, "epoch": 0.8983168088540465, "percentage": 44.92, "elapsed_time": "2:39:22", "remaining_time": "3:15:27"} +{"current_steps": 3897, "total_steps": 8674, "loss": 0.6407653093338013, "lr": 1.248246721287063e-06, "epoch": 0.8985473829836292, "percentage": 44.93, "elapsed_time": "2:39:25", "remaining_time": "3:15:25"} +{"current_steps": 3898, "total_steps": 8674, "loss": 0.47748661041259766, "lr": 1.247877376691272e-06, "epoch": 0.8987779571132118, "percentage": 44.94, "elapsed_time": "2:39:27", "remaining_time": "3:15:22"} +{"current_steps": 3899, "total_steps": 8674, "loss": 0.5047964453697205, "lr": 1.2475079960639922e-06, "epoch": 0.8990085312427946, "percentage": 44.95, "elapsed_time": "2:39:30", "remaining_time": "3:15:20"} +{"current_steps": 3900, "total_steps": 8674, "loss": 0.37989485263824463, "lr": 1.2471385794589167e-06, "epoch": 0.8992391053723772, "percentage": 44.96, "elapsed_time": "2:39:32", "remaining_time": "3:15:18"} +{"current_steps": 3901, "total_steps": 8674, "loss": 0.38857924938201904, "lr": 1.2467691269297437e-06, "epoch": 0.8994696795019599, "percentage": 44.97, "elapsed_time": "2:39:36", "remaining_time": "3:15:17"} +{"current_steps": 3902, "total_steps": 8674, "loss": 0.45452386140823364, "lr": 1.2463996385301776e-06, "epoch": 0.8997002536315425, "percentage": 44.99, "elapsed_time": "2:39:38", "remaining_time": "3:15:14"} +{"current_steps": 3903, "total_steps": 8674, "loss": 0.41920900344848633, "lr": 1.2460301143139267e-06, "epoch": 0.8999308277611252, "percentage": 45.0, "elapsed_time": "2:39:41", "remaining_time": "3:15:12"} +{"current_steps": 3904, "total_steps": 8674, "loss": 0.5979125499725342, "lr": 1.2456605543347051e-06, "epoch": 0.9001614018907078, "percentage": 45.01, "elapsed_time": "2:39:43", "remaining_time": "3:15:09"} +{"current_steps": 3905, "total_steps": 8674, "loss": 0.5517082214355469, "lr": 1.2452909586462323e-06, "epoch": 0.9003919760202905, "percentage": 45.02, "elapsed_time": "2:39:46", "remaining_time": "3:15:07"} +{"current_steps": 3906, "total_steps": 8674, "loss": 0.4558248519897461, "lr": 1.244921327302233e-06, "epoch": 0.9006225501498731, "percentage": 45.03, "elapsed_time": "2:39:48", "remaining_time": "3:15:04"} +{"current_steps": 3907, "total_steps": 8674, "loss": 0.5637399554252625, "lr": 1.2445516603564362e-06, "epoch": 0.9008531242794559, "percentage": 45.04, "elapsed_time": "2:39:50", "remaining_time": "3:15:02"} +{"current_steps": 3908, "total_steps": 8674, "loss": 0.5208043456077576, "lr": 1.2441819578625775e-06, "epoch": 0.9010836984090385, "percentage": 45.05, "elapsed_time": "2:39:53", "remaining_time": "3:14:59"} +{"current_steps": 3909, "total_steps": 8674, "loss": 0.3901744484901428, "lr": 1.243812219874396e-06, "epoch": 0.9013142725386212, "percentage": 45.07, "elapsed_time": "2:39:55", "remaining_time": "3:14:57"} +{"current_steps": 3910, "total_steps": 8674, "loss": 0.5770972967147827, "lr": 1.2434424464456376e-06, "epoch": 0.9015448466682038, "percentage": 45.08, "elapsed_time": "2:39:58", "remaining_time": "3:14:54"} +{"current_steps": 3911, "total_steps": 8674, "loss": 0.3457295894622803, "lr": 1.2430726376300525e-06, "epoch": 0.9017754207977865, "percentage": 45.09, "elapsed_time": "2:40:00", "remaining_time": "3:14:52"} +{"current_steps": 3912, "total_steps": 8674, "loss": 0.4487595558166504, "lr": 1.242702793481396e-06, "epoch": 0.9020059949273691, "percentage": 45.1, "elapsed_time": "2:40:03", "remaining_time": "3:14:49"} +{"current_steps": 3913, "total_steps": 8674, "loss": 0.4369876980781555, "lr": 1.2423329140534286e-06, "epoch": 0.9022365690569518, "percentage": 45.11, "elapsed_time": "2:40:05", "remaining_time": "3:14:47"} +{"current_steps": 3914, "total_steps": 8674, "loss": 0.43154388666152954, "lr": 1.2419629993999165e-06, "epoch": 0.9024671431865344, "percentage": 45.12, "elapsed_time": "2:40:07", "remaining_time": "3:14:44"} +{"current_steps": 3915, "total_steps": 8674, "loss": 0.4528294801712036, "lr": 1.24159304957463e-06, "epoch": 0.9026977173161171, "percentage": 45.13, "elapsed_time": "2:40:10", "remaining_time": "3:14:42"} +{"current_steps": 3916, "total_steps": 8674, "loss": 0.4204830527305603, "lr": 1.2412230646313452e-06, "epoch": 0.9029282914456997, "percentage": 45.15, "elapsed_time": "2:40:12", "remaining_time": "3:14:39"} +{"current_steps": 3917, "total_steps": 8674, "loss": 0.46544623374938965, "lr": 1.2408530446238433e-06, "epoch": 0.9031588655752825, "percentage": 45.16, "elapsed_time": "2:40:15", "remaining_time": "3:14:37"} +{"current_steps": 3918, "total_steps": 8674, "loss": 0.39419203996658325, "lr": 1.2404829896059107e-06, "epoch": 0.9033894397048651, "percentage": 45.17, "elapsed_time": "2:40:17", "remaining_time": "3:14:34"} +{"current_steps": 3919, "total_steps": 8674, "loss": 0.4214451014995575, "lr": 1.240112899631338e-06, "epoch": 0.9036200138344478, "percentage": 45.18, "elapsed_time": "2:40:20", "remaining_time": "3:14:32"} +{"current_steps": 3920, "total_steps": 8674, "loss": 0.42385220527648926, "lr": 1.239742774753922e-06, "epoch": 0.9038505879640304, "percentage": 45.19, "elapsed_time": "2:40:22", "remaining_time": "3:14:29"} +{"current_steps": 3921, "total_steps": 8674, "loss": 0.5206592082977295, "lr": 1.2393726150274636e-06, "epoch": 0.9040811620936131, "percentage": 45.2, "elapsed_time": "2:40:25", "remaining_time": "3:14:27"} +{"current_steps": 3922, "total_steps": 8674, "loss": 0.4358803629875183, "lr": 1.23900242050577e-06, "epoch": 0.9043117362231957, "percentage": 45.22, "elapsed_time": "2:40:27", "remaining_time": "3:14:25"} +{"current_steps": 3923, "total_steps": 8674, "loss": 0.4525173306465149, "lr": 1.2386321912426524e-06, "epoch": 0.9045423103527784, "percentage": 45.23, "elapsed_time": "2:40:30", "remaining_time": "3:14:22"} +{"current_steps": 3924, "total_steps": 8674, "loss": 0.48877185583114624, "lr": 1.2382619272919273e-06, "epoch": 0.904772884482361, "percentage": 45.24, "elapsed_time": "2:40:32", "remaining_time": "3:14:20"} +{"current_steps": 3925, "total_steps": 8674, "loss": 0.4401814341545105, "lr": 1.2378916287074162e-06, "epoch": 0.9050034586119438, "percentage": 45.25, "elapsed_time": "2:40:35", "remaining_time": "3:14:17"} +{"current_steps": 3926, "total_steps": 8674, "loss": 0.37818846106529236, "lr": 1.2375212955429459e-06, "epoch": 0.9052340327415264, "percentage": 45.26, "elapsed_time": "2:40:37", "remaining_time": "3:14:15"} +{"current_steps": 3927, "total_steps": 8674, "loss": 0.36472904682159424, "lr": 1.2371509278523482e-06, "epoch": 0.9054646068711091, "percentage": 45.27, "elapsed_time": "2:40:40", "remaining_time": "3:14:12"} +{"current_steps": 3928, "total_steps": 8674, "loss": 0.5113309025764465, "lr": 1.2367805256894596e-06, "epoch": 0.9056951810006917, "percentage": 45.28, "elapsed_time": "2:40:42", "remaining_time": "3:14:10"} +{"current_steps": 3929, "total_steps": 8674, "loss": 0.36074432730674744, "lr": 1.2364100891081218e-06, "epoch": 0.9059257551302744, "percentage": 45.3, "elapsed_time": "2:40:44", "remaining_time": "3:14:07"} +{"current_steps": 3930, "total_steps": 8674, "loss": 0.39177048206329346, "lr": 1.2360396181621819e-06, "epoch": 0.906156329259857, "percentage": 45.31, "elapsed_time": "2:40:47", "remaining_time": "3:14:05"} +{"current_steps": 3931, "total_steps": 8674, "loss": 0.4758113622665405, "lr": 1.2356691129054912e-06, "epoch": 0.9063869033894397, "percentage": 45.32, "elapsed_time": "2:40:49", "remaining_time": "3:14:02"} +{"current_steps": 3932, "total_steps": 8674, "loss": 0.3840598464012146, "lr": 1.2352985733919065e-06, "epoch": 0.9066174775190223, "percentage": 45.33, "elapsed_time": "2:40:51", "remaining_time": "3:14:00"} +{"current_steps": 3933, "total_steps": 8674, "loss": 0.5103816986083984, "lr": 1.2349279996752892e-06, "epoch": 0.906848051648605, "percentage": 45.34, "elapsed_time": "2:40:54", "remaining_time": "3:13:58"} +{"current_steps": 3934, "total_steps": 8674, "loss": 0.4175255298614502, "lr": 1.234557391809507e-06, "epoch": 0.9070786257781877, "percentage": 45.35, "elapsed_time": "2:40:57", "remaining_time": "3:13:55"} +{"current_steps": 3935, "total_steps": 8674, "loss": 0.4504377245903015, "lr": 1.2341867498484302e-06, "epoch": 0.9073091999077704, "percentage": 45.37, "elapsed_time": "2:40:59", "remaining_time": "3:13:53"} +{"current_steps": 3936, "total_steps": 8674, "loss": 0.45868122577667236, "lr": 1.2338160738459355e-06, "epoch": 0.907539774037353, "percentage": 45.38, "elapsed_time": "2:41:01", "remaining_time": "3:13:50"} +{"current_steps": 3937, "total_steps": 8674, "loss": 0.5161639451980591, "lr": 1.2334453638559054e-06, "epoch": 0.9077703481669357, "percentage": 45.39, "elapsed_time": "2:41:04", "remaining_time": "3:13:48"} +{"current_steps": 3938, "total_steps": 8674, "loss": 0.44561630487442017, "lr": 1.2330746199322257e-06, "epoch": 0.9080009222965183, "percentage": 45.4, "elapsed_time": "2:41:06", "remaining_time": "3:13:45"} +{"current_steps": 3939, "total_steps": 8674, "loss": 0.4780126214027405, "lr": 1.2327038421287876e-06, "epoch": 0.908231496426101, "percentage": 45.41, "elapsed_time": "2:41:09", "remaining_time": "3:13:43"} +{"current_steps": 3940, "total_steps": 8674, "loss": 0.505066990852356, "lr": 1.2323330304994877e-06, "epoch": 0.9084620705556836, "percentage": 45.42, "elapsed_time": "2:41:11", "remaining_time": "3:13:40"} +{"current_steps": 3941, "total_steps": 8674, "loss": 0.5053813457489014, "lr": 1.2319621850982274e-06, "epoch": 0.9086926446852663, "percentage": 45.43, "elapsed_time": "2:41:14", "remaining_time": "3:13:38"} +{"current_steps": 3942, "total_steps": 8674, "loss": 0.3579134941101074, "lr": 1.2315913059789125e-06, "epoch": 0.9089232188148489, "percentage": 45.45, "elapsed_time": "2:41:16", "remaining_time": "3:13:35"} +{"current_steps": 3943, "total_steps": 8674, "loss": 0.5703507661819458, "lr": 1.2312203931954543e-06, "epoch": 0.9091537929444317, "percentage": 45.46, "elapsed_time": "2:41:19", "remaining_time": "3:13:33"} +{"current_steps": 3944, "total_steps": 8674, "loss": 0.4972035884857178, "lr": 1.2308494468017685e-06, "epoch": 0.9093843670740143, "percentage": 45.47, "elapsed_time": "2:41:21", "remaining_time": "3:13:30"} +{"current_steps": 3945, "total_steps": 8674, "loss": 0.5528955459594727, "lr": 1.230478466851776e-06, "epoch": 0.909614941203597, "percentage": 45.48, "elapsed_time": "2:41:23", "remaining_time": "3:13:28"} +{"current_steps": 3946, "total_steps": 8674, "loss": 0.4099786877632141, "lr": 1.2301074533994024e-06, "epoch": 0.9098455153331796, "percentage": 45.49, "elapsed_time": "2:41:26", "remaining_time": "3:13:25"} +{"current_steps": 3947, "total_steps": 8674, "loss": 0.41020166873931885, "lr": 1.2297364064985786e-06, "epoch": 0.9100760894627623, "percentage": 45.5, "elapsed_time": "2:41:28", "remaining_time": "3:13:23"} +{"current_steps": 3948, "total_steps": 8674, "loss": 0.4340355694293976, "lr": 1.2293653262032395e-06, "epoch": 0.9103066635923449, "percentage": 45.52, "elapsed_time": "2:41:31", "remaining_time": "3:13:20"} +{"current_steps": 3949, "total_steps": 8674, "loss": 0.4369847774505615, "lr": 1.2289942125673261e-06, "epoch": 0.9105372377219276, "percentage": 45.53, "elapsed_time": "2:41:33", "remaining_time": "3:13:18"} +{"current_steps": 3950, "total_steps": 8674, "loss": 0.406423956155777, "lr": 1.228623065644783e-06, "epoch": 0.9107678118515102, "percentage": 45.54, "elapsed_time": "2:41:36", "remaining_time": "3:13:16"} +{"current_steps": 3951, "total_steps": 8674, "loss": 0.5081946849822998, "lr": 1.22825188548956e-06, "epoch": 0.910998385981093, "percentage": 45.55, "elapsed_time": "2:41:38", "remaining_time": "3:13:13"} +{"current_steps": 3952, "total_steps": 8674, "loss": 0.43494492769241333, "lr": 1.2278806721556124e-06, "epoch": 0.9112289601106756, "percentage": 45.56, "elapsed_time": "2:41:40", "remaining_time": "3:13:11"} +{"current_steps": 3953, "total_steps": 8674, "loss": 0.35356831550598145, "lr": 1.2275094256968996e-06, "epoch": 0.9114595342402583, "percentage": 45.57, "elapsed_time": "2:41:43", "remaining_time": "3:13:08"} +{"current_steps": 3954, "total_steps": 8674, "loss": 0.36741551756858826, "lr": 1.227138146167386e-06, "epoch": 0.9116901083698409, "percentage": 45.58, "elapsed_time": "2:41:45", "remaining_time": "3:13:06"} +{"current_steps": 3955, "total_steps": 8674, "loss": 0.491504430770874, "lr": 1.226766833621041e-06, "epoch": 0.9119206824994236, "percentage": 45.6, "elapsed_time": "2:41:48", "remaining_time": "3:13:03"} +{"current_steps": 3956, "total_steps": 8674, "loss": 0.4558037519454956, "lr": 1.2263954881118384e-06, "epoch": 0.9121512566290062, "percentage": 45.61, "elapsed_time": "2:41:50", "remaining_time": "3:13:01"} +{"current_steps": 3957, "total_steps": 8674, "loss": 0.3941671848297119, "lr": 1.2260241096937571e-06, "epoch": 0.9123818307585889, "percentage": 45.62, "elapsed_time": "2:41:52", "remaining_time": "3:12:58"} +{"current_steps": 3958, "total_steps": 8674, "loss": 0.40505191683769226, "lr": 1.2256526984207809e-06, "epoch": 0.9126124048881715, "percentage": 45.63, "elapsed_time": "2:41:55", "remaining_time": "3:12:56"} +{"current_steps": 3959, "total_steps": 8674, "loss": 0.4669588804244995, "lr": 1.2252812543468982e-06, "epoch": 0.9128429790177542, "percentage": 45.64, "elapsed_time": "2:41:58", "remaining_time": "3:12:53"} +{"current_steps": 3960, "total_steps": 8674, "loss": 0.535057544708252, "lr": 1.2249097775261014e-06, "epoch": 0.9130735531473368, "percentage": 45.65, "elapsed_time": "2:42:00", "remaining_time": "3:12:51"} +{"current_steps": 3961, "total_steps": 8674, "loss": 0.5127478837966919, "lr": 1.2245382680123898e-06, "epoch": 0.9133041272769196, "percentage": 45.67, "elapsed_time": "2:42:02", "remaining_time": "3:12:48"} +{"current_steps": 3962, "total_steps": 8674, "loss": 0.5004767179489136, "lr": 1.224166725859765e-06, "epoch": 0.9135347014065022, "percentage": 45.68, "elapsed_time": "2:42:05", "remaining_time": "3:12:46"} +{"current_steps": 3963, "total_steps": 8674, "loss": 0.47929924726486206, "lr": 1.2237951511222346e-06, "epoch": 0.9137652755360849, "percentage": 45.69, "elapsed_time": "2:42:07", "remaining_time": "3:12:43"} +{"current_steps": 3964, "total_steps": 8674, "loss": 0.5619359016418457, "lr": 1.2234235438538109e-06, "epoch": 0.9139958496656675, "percentage": 45.7, "elapsed_time": "2:42:10", "remaining_time": "3:12:41"} +{"current_steps": 3965, "total_steps": 8674, "loss": 0.44648507237434387, "lr": 1.223051904108511e-06, "epoch": 0.9142264237952502, "percentage": 45.71, "elapsed_time": "2:42:12", "remaining_time": "3:12:38"} +{"current_steps": 3966, "total_steps": 8674, "loss": 0.4451872706413269, "lr": 1.2226802319403562e-06, "epoch": 0.9144569979248328, "percentage": 45.72, "elapsed_time": "2:42:14", "remaining_time": "3:12:36"} +{"current_steps": 3967, "total_steps": 8674, "loss": 0.44295474886894226, "lr": 1.222308527403373e-06, "epoch": 0.9146875720544155, "percentage": 45.73, "elapsed_time": "2:42:17", "remaining_time": "3:12:34"} +{"current_steps": 3968, "total_steps": 8674, "loss": 0.517430305480957, "lr": 1.221936790551592e-06, "epoch": 0.9149181461839981, "percentage": 45.75, "elapsed_time": "2:42:19", "remaining_time": "3:12:31"} +{"current_steps": 3969, "total_steps": 8674, "loss": 0.4819454252719879, "lr": 1.2215650214390493e-06, "epoch": 0.9151487203135809, "percentage": 45.76, "elapsed_time": "2:42:22", "remaining_time": "3:12:28"} +{"current_steps": 3970, "total_steps": 8674, "loss": 0.41739264130592346, "lr": 1.2211932201197855e-06, "epoch": 0.9153792944431635, "percentage": 45.77, "elapsed_time": "2:42:24", "remaining_time": "3:12:26"} +{"current_steps": 3971, "total_steps": 8674, "loss": 0.38833269476890564, "lr": 1.2208213866478452e-06, "epoch": 0.9156098685727462, "percentage": 45.78, "elapsed_time": "2:42:27", "remaining_time": "3:12:23"} +{"current_steps": 3972, "total_steps": 8674, "loss": 0.48899054527282715, "lr": 1.2204495210772784e-06, "epoch": 0.9158404427023288, "percentage": 45.79, "elapsed_time": "2:42:29", "remaining_time": "3:12:21"} +{"current_steps": 3973, "total_steps": 8674, "loss": 0.5201622247695923, "lr": 1.2200776234621395e-06, "epoch": 0.9160710168319115, "percentage": 45.8, "elapsed_time": "2:42:32", "remaining_time": "3:12:18"} +{"current_steps": 3974, "total_steps": 8674, "loss": 0.4105098843574524, "lr": 1.219705693856488e-06, "epoch": 0.9163015909614941, "percentage": 45.82, "elapsed_time": "2:42:34", "remaining_time": "3:12:16"} +{"current_steps": 3975, "total_steps": 8674, "loss": 0.45458245277404785, "lr": 1.2193337323143865e-06, "epoch": 0.9165321650910768, "percentage": 45.83, "elapsed_time": "2:42:37", "remaining_time": "3:12:14"} +{"current_steps": 3976, "total_steps": 8674, "loss": 0.5013390779495239, "lr": 1.2189617388899049e-06, "epoch": 0.9167627392206594, "percentage": 45.84, "elapsed_time": "2:42:39", "remaining_time": "3:12:11"} +{"current_steps": 3977, "total_steps": 8674, "loss": 0.37065303325653076, "lr": 1.218589713637115e-06, "epoch": 0.9169933133502421, "percentage": 45.85, "elapsed_time": "2:42:41", "remaining_time": "3:12:09"} +{"current_steps": 3978, "total_steps": 8674, "loss": 0.45158177614212036, "lr": 1.218217656610095e-06, "epoch": 0.9172238874798248, "percentage": 45.86, "elapsed_time": "2:42:44", "remaining_time": "3:12:06"} +{"current_steps": 3979, "total_steps": 8674, "loss": 0.4439426064491272, "lr": 1.2178455678629271e-06, "epoch": 0.9174544616094075, "percentage": 45.87, "elapsed_time": "2:42:46", "remaining_time": "3:12:04"} +{"current_steps": 3980, "total_steps": 8674, "loss": 0.42215704917907715, "lr": 1.217473447449698e-06, "epoch": 0.9176850357389901, "percentage": 45.88, "elapsed_time": "2:42:49", "remaining_time": "3:12:01"} +{"current_steps": 3981, "total_steps": 8674, "loss": 0.42273545265197754, "lr": 1.2171012954244991e-06, "epoch": 0.9179156098685728, "percentage": 45.9, "elapsed_time": "2:42:51", "remaining_time": "3:11:59"} +{"current_steps": 3982, "total_steps": 8674, "loss": 0.6045219898223877, "lr": 1.216729111841427e-06, "epoch": 0.9181461839981554, "percentage": 45.91, "elapsed_time": "2:42:53", "remaining_time": "3:11:56"} +{"current_steps": 3983, "total_steps": 8674, "loss": 0.49316874146461487, "lr": 1.216356896754582e-06, "epoch": 0.9183767581277381, "percentage": 45.92, "elapsed_time": "2:42:56", "remaining_time": "3:11:54"} +{"current_steps": 3984, "total_steps": 8674, "loss": 0.5222599506378174, "lr": 1.2159846502180692e-06, "epoch": 0.9186073322573207, "percentage": 45.93, "elapsed_time": "2:42:59", "remaining_time": "3:11:51"} +{"current_steps": 3985, "total_steps": 8674, "loss": 0.4513903856277466, "lr": 1.2156123722859988e-06, "epoch": 0.9188379063869034, "percentage": 45.94, "elapsed_time": "2:43:01", "remaining_time": "3:11:49"} +{"current_steps": 3986, "total_steps": 8674, "loss": 0.4946150779724121, "lr": 1.2152400630124846e-06, "epoch": 0.919068480516486, "percentage": 45.95, "elapsed_time": "2:43:03", "remaining_time": "3:11:46"} +{"current_steps": 3987, "total_steps": 8674, "loss": 0.5482569336891174, "lr": 1.2148677224516458e-06, "epoch": 0.9192990546460688, "percentage": 45.96, "elapsed_time": "2:43:06", "remaining_time": "3:11:44"} +{"current_steps": 3988, "total_steps": 8674, "loss": 0.457091361284256, "lr": 1.2144953506576061e-06, "epoch": 0.9195296287756514, "percentage": 45.98, "elapsed_time": "2:43:08", "remaining_time": "3:11:41"} +{"current_steps": 3989, "total_steps": 8674, "loss": 0.4262084364891052, "lr": 1.2141229476844933e-06, "epoch": 0.9197602029052341, "percentage": 45.99, "elapsed_time": "2:43:10", "remaining_time": "3:11:39"} +{"current_steps": 3990, "total_steps": 8674, "loss": 0.4905529022216797, "lr": 1.2137505135864402e-06, "epoch": 0.9199907770348167, "percentage": 46.0, "elapsed_time": "2:43:13", "remaining_time": "3:11:36"} +{"current_steps": 3991, "total_steps": 8674, "loss": 0.5001873970031738, "lr": 1.2133780484175833e-06, "epoch": 0.9202213511643994, "percentage": 46.01, "elapsed_time": "2:43:15", "remaining_time": "3:11:34"} +{"current_steps": 3992, "total_steps": 8674, "loss": 0.396418035030365, "lr": 1.2130055522320647e-06, "epoch": 0.920451925293982, "percentage": 46.02, "elapsed_time": "2:43:18", "remaining_time": "3:11:31"} +{"current_steps": 3993, "total_steps": 8674, "loss": 0.5743722915649414, "lr": 1.2126330250840302e-06, "epoch": 0.9206824994235647, "percentage": 46.03, "elapsed_time": "2:43:20", "remaining_time": "3:11:29"} +{"current_steps": 3994, "total_steps": 8674, "loss": 0.5134707689285278, "lr": 1.212260467027631e-06, "epoch": 0.9209130735531473, "percentage": 46.05, "elapsed_time": "2:43:23", "remaining_time": "3:11:26"} +{"current_steps": 3995, "total_steps": 8674, "loss": 0.4191853404045105, "lr": 1.2118878781170213e-06, "epoch": 0.92114364768273, "percentage": 46.06, "elapsed_time": "2:43:25", "remaining_time": "3:11:24"} +{"current_steps": 3996, "total_steps": 8674, "loss": 0.3430103063583374, "lr": 1.2115152584063613e-06, "epoch": 0.9213742218123127, "percentage": 46.07, "elapsed_time": "2:43:27", "remaining_time": "3:11:21"} +{"current_steps": 3997, "total_steps": 8674, "loss": 0.5229896903038025, "lr": 1.2111426079498147e-06, "epoch": 0.9216047959418954, "percentage": 46.08, "elapsed_time": "2:43:30", "remaining_time": "3:11:19"} +{"current_steps": 3998, "total_steps": 8674, "loss": 0.5028181076049805, "lr": 1.2107699268015501e-06, "epoch": 0.921835370071478, "percentage": 46.09, "elapsed_time": "2:43:32", "remaining_time": "3:11:16"} +{"current_steps": 3999, "total_steps": 8674, "loss": 0.4662501811981201, "lr": 1.2103972150157407e-06, "epoch": 0.9220659442010607, "percentage": 46.1, "elapsed_time": "2:43:35", "remaining_time": "3:11:14"} +{"current_steps": 4000, "total_steps": 8674, "loss": 0.5581385493278503, "lr": 1.2100244726465636e-06, "epoch": 0.9222965183306433, "percentage": 46.11, "elapsed_time": "2:43:38", "remaining_time": "3:11:12"} +{"current_steps": 4001, "total_steps": 8674, "loss": 0.3925841450691223, "lr": 1.2096516997482012e-06, "epoch": 0.922527092460226, "percentage": 46.13, "elapsed_time": "2:43:41", "remaining_time": "3:11:11"} +{"current_steps": 4002, "total_steps": 8674, "loss": 0.4021197557449341, "lr": 1.2092788963748393e-06, "epoch": 0.9227576665898086, "percentage": 46.14, "elapsed_time": "2:43:44", "remaining_time": "3:11:09"} +{"current_steps": 4003, "total_steps": 8674, "loss": 0.5519800186157227, "lr": 1.2089060625806683e-06, "epoch": 0.9229882407193913, "percentage": 46.15, "elapsed_time": "2:43:46", "remaining_time": "3:11:06"} +{"current_steps": 4004, "total_steps": 8674, "loss": 0.4426038861274719, "lr": 1.2085331984198847e-06, "epoch": 0.923218814848974, "percentage": 46.16, "elapsed_time": "2:43:49", "remaining_time": "3:11:04"} +{"current_steps": 4005, "total_steps": 8674, "loss": 0.4370608925819397, "lr": 1.2081603039466872e-06, "epoch": 0.9234493889785566, "percentage": 46.17, "elapsed_time": "2:43:51", "remaining_time": "3:11:01"} +{"current_steps": 4006, "total_steps": 8674, "loss": 0.5535042881965637, "lr": 1.2077873792152797e-06, "epoch": 0.9236799631081393, "percentage": 46.18, "elapsed_time": "2:43:53", "remaining_time": "3:10:58"} +{"current_steps": 4007, "total_steps": 8674, "loss": 0.45786774158477783, "lr": 1.2074144242798708e-06, "epoch": 0.9239105372377219, "percentage": 46.2, "elapsed_time": "2:43:56", "remaining_time": "3:10:56"} +{"current_steps": 4008, "total_steps": 8674, "loss": 0.38189244270324707, "lr": 1.207041439194673e-06, "epoch": 0.9241411113673046, "percentage": 46.21, "elapsed_time": "2:43:58", "remaining_time": "3:10:54"} +{"current_steps": 4009, "total_steps": 8674, "loss": 0.48782190680503845, "lr": 1.206668424013904e-06, "epoch": 0.9243716854968872, "percentage": 46.22, "elapsed_time": "2:44:01", "remaining_time": "3:10:51"} +{"current_steps": 4010, "total_steps": 8674, "loss": 0.46295344829559326, "lr": 1.2062953787917852e-06, "epoch": 0.9246022596264699, "percentage": 46.23, "elapsed_time": "2:44:03", "remaining_time": "3:10:49"} +{"current_steps": 4011, "total_steps": 8674, "loss": 0.5205795764923096, "lr": 1.205922303582542e-06, "epoch": 0.9248328337560525, "percentage": 46.24, "elapsed_time": "2:44:05", "remaining_time": "3:10:46"} +{"current_steps": 4012, "total_steps": 8674, "loss": 0.47622987627983093, "lr": 1.205549198440405e-06, "epoch": 0.9250634078856352, "percentage": 46.25, "elapsed_time": "2:44:08", "remaining_time": "3:10:43"} +{"current_steps": 4013, "total_steps": 8674, "loss": 0.4826146960258484, "lr": 1.2051760634196091e-06, "epoch": 0.9252939820152178, "percentage": 46.26, "elapsed_time": "2:44:10", "remaining_time": "3:10:41"} +{"current_steps": 4014, "total_steps": 8674, "loss": 0.46193474531173706, "lr": 1.2048028985743928e-06, "epoch": 0.9255245561448006, "percentage": 46.28, "elapsed_time": "2:44:13", "remaining_time": "3:10:38"} +{"current_steps": 4015, "total_steps": 8674, "loss": 0.523394763469696, "lr": 1.2044297039589996e-06, "epoch": 0.9257551302743832, "percentage": 46.29, "elapsed_time": "2:44:15", "remaining_time": "3:10:36"} +{"current_steps": 4016, "total_steps": 8674, "loss": 0.3963446617126465, "lr": 1.2040564796276773e-06, "epoch": 0.9259857044039659, "percentage": 46.3, "elapsed_time": "2:44:17", "remaining_time": "3:10:33"} +{"current_steps": 4017, "total_steps": 8674, "loss": 0.5016456842422485, "lr": 1.2036832256346774e-06, "epoch": 0.9262162785335485, "percentage": 46.31, "elapsed_time": "2:44:20", "remaining_time": "3:10:31"} +{"current_steps": 4018, "total_steps": 8674, "loss": 0.47298160195350647, "lr": 1.2033099420342566e-06, "epoch": 0.9264468526631312, "percentage": 46.32, "elapsed_time": "2:44:22", "remaining_time": "3:10:28"} +{"current_steps": 4019, "total_steps": 8674, "loss": 0.387129545211792, "lr": 1.2029366288806748e-06, "epoch": 0.9266774267927138, "percentage": 46.33, "elapsed_time": "2:44:25", "remaining_time": "3:10:26"} +{"current_steps": 4020, "total_steps": 8674, "loss": 0.46101367473602295, "lr": 1.2025632862281976e-06, "epoch": 0.9269080009222965, "percentage": 46.35, "elapsed_time": "2:44:27", "remaining_time": "3:10:23"} +{"current_steps": 4021, "total_steps": 8674, "loss": 0.4242950677871704, "lr": 1.2021899141310938e-06, "epoch": 0.9271385750518791, "percentage": 46.36, "elapsed_time": "2:44:29", "remaining_time": "3:10:21"} +{"current_steps": 4022, "total_steps": 8674, "loss": 0.45983830094337463, "lr": 1.201816512643637e-06, "epoch": 0.9273691491814618, "percentage": 46.37, "elapsed_time": "2:44:32", "remaining_time": "3:10:18"} +{"current_steps": 4023, "total_steps": 8674, "loss": 0.39785802364349365, "lr": 1.2014430818201044e-06, "epoch": 0.9275997233110445, "percentage": 46.38, "elapsed_time": "2:44:34", "remaining_time": "3:10:16"} +{"current_steps": 4024, "total_steps": 8674, "loss": 0.39265739917755127, "lr": 1.2010696217147783e-06, "epoch": 0.9278302974406272, "percentage": 46.39, "elapsed_time": "2:44:37", "remaining_time": "3:10:13"} +{"current_steps": 4025, "total_steps": 8674, "loss": 0.49783703684806824, "lr": 1.2006961323819455e-06, "epoch": 0.9280608715702098, "percentage": 46.4, "elapsed_time": "2:44:39", "remaining_time": "3:10:11"} +{"current_steps": 4026, "total_steps": 8674, "loss": 0.4479181170463562, "lr": 1.2003226138758953e-06, "epoch": 0.9282914456997925, "percentage": 46.41, "elapsed_time": "2:44:42", "remaining_time": "3:10:08"} +{"current_steps": 4027, "total_steps": 8674, "loss": 0.5205901265144348, "lr": 1.199949066250923e-06, "epoch": 0.9285220198293751, "percentage": 46.43, "elapsed_time": "2:44:44", "remaining_time": "3:10:06"} +{"current_steps": 4028, "total_steps": 8674, "loss": 0.5163009762763977, "lr": 1.1995754895613277e-06, "epoch": 0.9287525939589578, "percentage": 46.44, "elapsed_time": "2:44:46", "remaining_time": "3:10:03"} +{"current_steps": 4029, "total_steps": 8674, "loss": 0.5746268033981323, "lr": 1.1992018838614124e-06, "epoch": 0.9289831680885404, "percentage": 46.45, "elapsed_time": "2:44:49", "remaining_time": "3:10:01"} +{"current_steps": 4030, "total_steps": 8674, "loss": 0.5306442975997925, "lr": 1.1988282492054844e-06, "epoch": 0.9292137422181231, "percentage": 46.46, "elapsed_time": "2:44:51", "remaining_time": "3:09:58"} +{"current_steps": 4031, "total_steps": 8674, "loss": 0.4219534993171692, "lr": 1.198454585647855e-06, "epoch": 0.9294443163477057, "percentage": 46.47, "elapsed_time": "2:44:54", "remaining_time": "3:09:56"} +{"current_steps": 4032, "total_steps": 8674, "loss": 0.4167936444282532, "lr": 1.1980808932428406e-06, "epoch": 0.9296748904772885, "percentage": 46.48, "elapsed_time": "2:44:56", "remaining_time": "3:09:53"} +{"current_steps": 4033, "total_steps": 8674, "loss": 0.42376089096069336, "lr": 1.197707172044761e-06, "epoch": 0.9299054646068711, "percentage": 46.5, "elapsed_time": "2:44:59", "remaining_time": "3:09:51"} +{"current_steps": 4034, "total_steps": 8674, "loss": 0.48729848861694336, "lr": 1.1973334221079398e-06, "epoch": 0.9301360387364538, "percentage": 46.51, "elapsed_time": "2:45:01", "remaining_time": "3:09:49"} +{"current_steps": 4035, "total_steps": 8674, "loss": 0.45877987146377563, "lr": 1.1969596434867062e-06, "epoch": 0.9303666128660364, "percentage": 46.52, "elapsed_time": "2:45:04", "remaining_time": "3:09:47"} +{"current_steps": 4036, "total_steps": 8674, "loss": 0.504621684551239, "lr": 1.196585836235392e-06, "epoch": 0.9305971869956191, "percentage": 46.53, "elapsed_time": "2:45:06", "remaining_time": "3:09:44"} +{"current_steps": 4037, "total_steps": 8674, "loss": 0.45170748233795166, "lr": 1.1962120004083342e-06, "epoch": 0.9308277611252017, "percentage": 46.54, "elapsed_time": "2:45:09", "remaining_time": "3:09:42"} +{"current_steps": 4038, "total_steps": 8674, "loss": 0.3969152569770813, "lr": 1.1958381360598737e-06, "epoch": 0.9310583352547844, "percentage": 46.55, "elapsed_time": "2:45:11", "remaining_time": "3:09:39"} +{"current_steps": 4039, "total_steps": 8674, "loss": 0.4286048412322998, "lr": 1.1954642432443553e-06, "epoch": 0.931288909384367, "percentage": 46.56, "elapsed_time": "2:45:14", "remaining_time": "3:09:37"} +{"current_steps": 4040, "total_steps": 8674, "loss": 0.3755400776863098, "lr": 1.1950903220161284e-06, "epoch": 0.9315194835139498, "percentage": 46.58, "elapsed_time": "2:45:16", "remaining_time": "3:09:34"} +{"current_steps": 4041, "total_steps": 8674, "loss": 0.553135871887207, "lr": 1.1947163724295457e-06, "epoch": 0.9317500576435324, "percentage": 46.59, "elapsed_time": "2:45:19", "remaining_time": "3:09:32"} +{"current_steps": 4042, "total_steps": 8674, "loss": 0.53995281457901, "lr": 1.194342394538965e-06, "epoch": 0.9319806317731151, "percentage": 46.6, "elapsed_time": "2:45:21", "remaining_time": "3:09:30"} +{"current_steps": 4043, "total_steps": 8674, "loss": 0.4405739903450012, "lr": 1.1939683883987476e-06, "epoch": 0.9322112059026977, "percentage": 46.61, "elapsed_time": "2:45:24", "remaining_time": "3:09:27"} +{"current_steps": 4044, "total_steps": 8674, "loss": 0.5046489238739014, "lr": 1.1935943540632591e-06, "epoch": 0.9324417800322804, "percentage": 46.62, "elapsed_time": "2:45:26", "remaining_time": "3:09:25"} +{"current_steps": 4045, "total_steps": 8674, "loss": 0.4699453115463257, "lr": 1.1932202915868694e-06, "epoch": 0.932672354161863, "percentage": 46.63, "elapsed_time": "2:45:29", "remaining_time": "3:09:22"} +{"current_steps": 4046, "total_steps": 8674, "loss": 0.5643539428710938, "lr": 1.192846201023952e-06, "epoch": 0.9329029282914457, "percentage": 46.65, "elapsed_time": "2:45:31", "remaining_time": "3:09:20"} +{"current_steps": 4047, "total_steps": 8674, "loss": 0.4423527121543884, "lr": 1.192472082428885e-06, "epoch": 0.9331335024210283, "percentage": 46.66, "elapsed_time": "2:45:34", "remaining_time": "3:09:17"} +{"current_steps": 4048, "total_steps": 8674, "loss": 0.4446362257003784, "lr": 1.1920979358560498e-06, "epoch": 0.933364076550611, "percentage": 46.67, "elapsed_time": "2:45:36", "remaining_time": "3:09:15"} +{"current_steps": 4049, "total_steps": 8674, "loss": 0.48347601294517517, "lr": 1.1917237613598332e-06, "epoch": 0.9335946506801936, "percentage": 46.68, "elapsed_time": "2:45:38", "remaining_time": "3:09:12"} +{"current_steps": 4050, "total_steps": 8674, "loss": 0.4736206531524658, "lr": 1.1913495589946243e-06, "epoch": 0.9338252248097764, "percentage": 46.69, "elapsed_time": "2:45:41", "remaining_time": "3:09:10"} +{"current_steps": 4051, "total_steps": 8674, "loss": 0.4896177053451538, "lr": 1.1909753288148181e-06, "epoch": 0.934055798939359, "percentage": 46.7, "elapsed_time": "2:45:43", "remaining_time": "3:09:07"} +{"current_steps": 4052, "total_steps": 8674, "loss": 0.3953405022621155, "lr": 1.1906010708748124e-06, "epoch": 0.9342863730689417, "percentage": 46.71, "elapsed_time": "2:45:46", "remaining_time": "3:09:05"} +{"current_steps": 4053, "total_steps": 8674, "loss": 0.30871689319610596, "lr": 1.1902267852290092e-06, "epoch": 0.9345169471985243, "percentage": 46.73, "elapsed_time": "2:45:48", "remaining_time": "3:09:02"} +{"current_steps": 4054, "total_steps": 8674, "loss": 0.44187474250793457, "lr": 1.1898524719318151e-06, "epoch": 0.934747521328107, "percentage": 46.74, "elapsed_time": "2:45:51", "remaining_time": "3:09:00"} +{"current_steps": 4055, "total_steps": 8674, "loss": 0.4069768488407135, "lr": 1.1894781310376396e-06, "epoch": 0.9349780954576896, "percentage": 46.75, "elapsed_time": "2:45:53", "remaining_time": "3:08:57"} +{"current_steps": 4056, "total_steps": 8674, "loss": 0.36307692527770996, "lr": 1.1891037626008982e-06, "epoch": 0.9352086695872723, "percentage": 46.76, "elapsed_time": "2:45:55", "remaining_time": "3:08:55"} +{"current_steps": 4057, "total_steps": 8674, "loss": 0.38535594940185547, "lr": 1.188729366676008e-06, "epoch": 0.9354392437168549, "percentage": 46.77, "elapsed_time": "2:45:58", "remaining_time": "3:08:52"} +{"current_steps": 4058, "total_steps": 8674, "loss": 0.46454256772994995, "lr": 1.1883549433173916e-06, "epoch": 0.9356698178464377, "percentage": 46.78, "elapsed_time": "2:46:01", "remaining_time": "3:08:50"} +{"current_steps": 4059, "total_steps": 8674, "loss": 0.48537465929985046, "lr": 1.1879804925794752e-06, "epoch": 0.9359003919760203, "percentage": 46.8, "elapsed_time": "2:46:03", "remaining_time": "3:08:48"} +{"current_steps": 4060, "total_steps": 8674, "loss": 0.4355062246322632, "lr": 1.1876060145166893e-06, "epoch": 0.936130966105603, "percentage": 46.81, "elapsed_time": "2:46:05", "remaining_time": "3:08:45"} +{"current_steps": 4061, "total_steps": 8674, "loss": 0.47248804569244385, "lr": 1.1872315091834676e-06, "epoch": 0.9363615402351856, "percentage": 46.82, "elapsed_time": "2:46:08", "remaining_time": "3:08:43"} +{"current_steps": 4062, "total_steps": 8674, "loss": 0.4896939992904663, "lr": 1.1868569766342488e-06, "epoch": 0.9365921143647683, "percentage": 46.83, "elapsed_time": "2:46:10", "remaining_time": "3:08:40"} +{"current_steps": 4063, "total_steps": 8674, "loss": 0.4259600043296814, "lr": 1.1864824169234744e-06, "epoch": 0.9368226884943509, "percentage": 46.84, "elapsed_time": "2:46:13", "remaining_time": "3:08:38"} +{"current_steps": 4064, "total_steps": 8674, "loss": 0.4228817820549011, "lr": 1.186107830105591e-06, "epoch": 0.9370532626239336, "percentage": 46.85, "elapsed_time": "2:46:15", "remaining_time": "3:08:35"} +{"current_steps": 4065, "total_steps": 8674, "loss": 0.44750750064849854, "lr": 1.1857332162350484e-06, "epoch": 0.9372838367535162, "percentage": 46.86, "elapsed_time": "2:46:17", "remaining_time": "3:08:33"} +{"current_steps": 4066, "total_steps": 8674, "loss": 0.49125558137893677, "lr": 1.1853585753663003e-06, "epoch": 0.937514410883099, "percentage": 46.88, "elapsed_time": "2:46:20", "remaining_time": "3:08:30"} +{"current_steps": 4067, "total_steps": 8674, "loss": 0.446805477142334, "lr": 1.1849839075538048e-06, "epoch": 0.9377449850126816, "percentage": 46.89, "elapsed_time": "2:46:22", "remaining_time": "3:08:28"} +{"current_steps": 4068, "total_steps": 8674, "loss": 0.4516616463661194, "lr": 1.1846092128520235e-06, "epoch": 0.9379755591422643, "percentage": 46.9, "elapsed_time": "2:46:25", "remaining_time": "3:08:25"} +{"current_steps": 4069, "total_steps": 8674, "loss": 0.5271207690238953, "lr": 1.1842344913154223e-06, "epoch": 0.9382061332718469, "percentage": 46.91, "elapsed_time": "2:46:27", "remaining_time": "3:08:23"} +{"current_steps": 4070, "total_steps": 8674, "loss": 0.46718811988830566, "lr": 1.1838597429984702e-06, "epoch": 0.9384367074014296, "percentage": 46.92, "elapsed_time": "2:46:30", "remaining_time": "3:08:20"} +{"current_steps": 4071, "total_steps": 8674, "loss": 0.4948880672454834, "lr": 1.1834849679556416e-06, "epoch": 0.9386672815310122, "percentage": 46.93, "elapsed_time": "2:46:32", "remaining_time": "3:08:18"} +{"current_steps": 4072, "total_steps": 8674, "loss": 0.5141744613647461, "lr": 1.183110166241413e-06, "epoch": 0.9388978556605949, "percentage": 46.94, "elapsed_time": "2:46:34", "remaining_time": "3:08:15"} +{"current_steps": 4073, "total_steps": 8674, "loss": 0.43921130895614624, "lr": 1.1827353379102662e-06, "epoch": 0.9391284297901775, "percentage": 46.96, "elapsed_time": "2:46:37", "remaining_time": "3:08:13"} +{"current_steps": 4074, "total_steps": 8674, "loss": 0.35931193828582764, "lr": 1.182360483016686e-06, "epoch": 0.9393590039197602, "percentage": 46.97, "elapsed_time": "2:46:39", "remaining_time": "3:08:10"} +{"current_steps": 4075, "total_steps": 8674, "loss": 0.4376310408115387, "lr": 1.1819856016151615e-06, "epoch": 0.9395895780493428, "percentage": 46.98, "elapsed_time": "2:46:42", "remaining_time": "3:08:08"} +{"current_steps": 4076, "total_steps": 8674, "loss": 0.45419907569885254, "lr": 1.1816106937601856e-06, "epoch": 0.9398201521789256, "percentage": 46.99, "elapsed_time": "2:46:44", "remaining_time": "3:08:06"} +{"current_steps": 4077, "total_steps": 8674, "loss": 0.4077754616737366, "lr": 1.1812357595062545e-06, "epoch": 0.9400507263085082, "percentage": 47.0, "elapsed_time": "2:46:47", "remaining_time": "3:08:03"} +{"current_steps": 4078, "total_steps": 8674, "loss": 0.5555585622787476, "lr": 1.1808607989078686e-06, "epoch": 0.9402813004380909, "percentage": 47.01, "elapsed_time": "2:46:49", "remaining_time": "3:08:01"} +{"current_steps": 4079, "total_steps": 8674, "loss": 0.4566183090209961, "lr": 1.1804858120195334e-06, "epoch": 0.9405118745676735, "percentage": 47.03, "elapsed_time": "2:46:52", "remaining_time": "3:07:58"} +{"current_steps": 4080, "total_steps": 8674, "loss": 0.39149847626686096, "lr": 1.180110798895756e-06, "epoch": 0.9407424486972562, "percentage": 47.04, "elapsed_time": "2:46:54", "remaining_time": "3:07:56"} +{"current_steps": 4081, "total_steps": 8674, "loss": 0.42695966362953186, "lr": 1.1797357595910485e-06, "epoch": 0.9409730228268388, "percentage": 47.05, "elapsed_time": "2:46:57", "remaining_time": "3:07:53"} +{"current_steps": 4082, "total_steps": 8674, "loss": 0.49673956632614136, "lr": 1.1793606941599266e-06, "epoch": 0.9412035969564215, "percentage": 47.06, "elapsed_time": "2:46:59", "remaining_time": "3:07:51"} +{"current_steps": 4083, "total_steps": 8674, "loss": 0.44765836000442505, "lr": 1.17898560265691e-06, "epoch": 0.9414341710860041, "percentage": 47.07, "elapsed_time": "2:47:01", "remaining_time": "3:07:48"} +{"current_steps": 4084, "total_steps": 8674, "loss": 0.40580642223358154, "lr": 1.1786104851365227e-06, "epoch": 0.9416647452155869, "percentage": 47.08, "elapsed_time": "2:47:04", "remaining_time": "3:07:46"} +{"current_steps": 4085, "total_steps": 8674, "loss": 0.5389235019683838, "lr": 1.1782353416532907e-06, "epoch": 0.9418953193451695, "percentage": 47.09, "elapsed_time": "2:47:07", "remaining_time": "3:07:44"} +{"current_steps": 4086, "total_steps": 8674, "loss": 0.5130764245986938, "lr": 1.1778601722617456e-06, "epoch": 0.9421258934747522, "percentage": 47.11, "elapsed_time": "2:47:09", "remaining_time": "3:07:41"} +{"current_steps": 4087, "total_steps": 8674, "loss": 0.5031291842460632, "lr": 1.1774849770164218e-06, "epoch": 0.9423564676043348, "percentage": 47.12, "elapsed_time": "2:47:11", "remaining_time": "3:07:39"} +{"current_steps": 4088, "total_steps": 8674, "loss": 0.463434636592865, "lr": 1.1771097559718581e-06, "epoch": 0.9425870417339175, "percentage": 47.13, "elapsed_time": "2:47:14", "remaining_time": "3:07:36"} +{"current_steps": 4089, "total_steps": 8674, "loss": 0.4249681234359741, "lr": 1.1767345091825962e-06, "epoch": 0.9428176158635001, "percentage": 47.14, "elapsed_time": "2:47:16", "remaining_time": "3:07:34"} +{"current_steps": 4090, "total_steps": 8674, "loss": 0.39353805780410767, "lr": 1.176359236703182e-06, "epoch": 0.9430481899930828, "percentage": 47.15, "elapsed_time": "2:47:19", "remaining_time": "3:07:31"} +{"current_steps": 4091, "total_steps": 8674, "loss": 0.4554273188114166, "lr": 1.1759839385881657e-06, "epoch": 0.9432787641226654, "percentage": 47.16, "elapsed_time": "2:47:21", "remaining_time": "3:07:29"} +{"current_steps": 4092, "total_steps": 8674, "loss": 0.6275606155395508, "lr": 1.1756086148921005e-06, "epoch": 0.9435093382522481, "percentage": 47.18, "elapsed_time": "2:47:24", "remaining_time": "3:07:26"} +{"current_steps": 4093, "total_steps": 8674, "loss": 0.5058892965316772, "lr": 1.1752332656695432e-06, "epoch": 0.9437399123818307, "percentage": 47.19, "elapsed_time": "2:47:26", "remaining_time": "3:07:24"} +{"current_steps": 4094, "total_steps": 8674, "loss": 0.4318118095397949, "lr": 1.1748578909750547e-06, "epoch": 0.9439704865114135, "percentage": 47.2, "elapsed_time": "2:47:29", "remaining_time": "3:07:22"} +{"current_steps": 4095, "total_steps": 8674, "loss": 0.4873964190483093, "lr": 1.1744824908631996e-06, "epoch": 0.9442010606409961, "percentage": 47.21, "elapsed_time": "2:47:31", "remaining_time": "3:07:19"} +{"current_steps": 4096, "total_steps": 8674, "loss": 0.5026696920394897, "lr": 1.1741070653885467e-06, "epoch": 0.9444316347705788, "percentage": 47.22, "elapsed_time": "2:47:33", "remaining_time": "3:07:17"} +{"current_steps": 4097, "total_steps": 8674, "loss": 0.4337490200996399, "lr": 1.1737316146056667e-06, "epoch": 0.9446622089001614, "percentage": 47.23, "elapsed_time": "2:47:36", "remaining_time": "3:07:14"} +{"current_steps": 4098, "total_steps": 8674, "loss": 0.4552634358406067, "lr": 1.173356138569136e-06, "epoch": 0.9448927830297441, "percentage": 47.24, "elapsed_time": "2:47:38", "remaining_time": "3:07:12"} +{"current_steps": 4099, "total_steps": 8674, "loss": 0.4631303548812866, "lr": 1.1729806373335336e-06, "epoch": 0.9451233571593267, "percentage": 47.26, "elapsed_time": "2:47:41", "remaining_time": "3:07:09"} +{"current_steps": 4100, "total_steps": 8674, "loss": 0.5004513263702393, "lr": 1.1726051109534424e-06, "epoch": 0.9453539312889094, "percentage": 47.27, "elapsed_time": "2:47:44", "remaining_time": "3:07:07"} +{"current_steps": 4101, "total_steps": 8674, "loss": 0.4634668827056885, "lr": 1.172229559483449e-06, "epoch": 0.945584505418492, "percentage": 47.28, "elapsed_time": "2:47:47", "remaining_time": "3:07:06"} +{"current_steps": 4102, "total_steps": 8674, "loss": 0.4034295678138733, "lr": 1.171853982978144e-06, "epoch": 0.9458150795480748, "percentage": 47.29, "elapsed_time": "2:47:50", "remaining_time": "3:07:04"} +{"current_steps": 4103, "total_steps": 8674, "loss": 0.4981224536895752, "lr": 1.1714783814921206e-06, "epoch": 0.9460456536776574, "percentage": 47.3, "elapsed_time": "2:47:52", "remaining_time": "3:07:01"} +{"current_steps": 4104, "total_steps": 8674, "loss": 0.460249125957489, "lr": 1.1711027550799767e-06, "epoch": 0.9462762278072401, "percentage": 47.31, "elapsed_time": "2:47:55", "remaining_time": "3:06:59"} +{"current_steps": 4105, "total_steps": 8674, "loss": 0.4794936180114746, "lr": 1.170727103796313e-06, "epoch": 0.9465068019368227, "percentage": 47.33, "elapsed_time": "2:47:57", "remaining_time": "3:06:56"} +{"current_steps": 4106, "total_steps": 8674, "loss": 0.42724454402923584, "lr": 1.170351427695735e-06, "epoch": 0.9467373760664054, "percentage": 47.34, "elapsed_time": "2:47:59", "remaining_time": "3:06:54"} +{"current_steps": 4107, "total_steps": 8674, "loss": 0.4612593948841095, "lr": 1.16997572683285e-06, "epoch": 0.946967950195988, "percentage": 47.35, "elapsed_time": "2:48:02", "remaining_time": "3:06:51"} +{"current_steps": 4108, "total_steps": 8674, "loss": 0.49512046575546265, "lr": 1.169600001262271e-06, "epoch": 0.9471985243255707, "percentage": 47.36, "elapsed_time": "2:48:04", "remaining_time": "3:06:49"} +{"current_steps": 4109, "total_steps": 8674, "loss": 0.49438196420669556, "lr": 1.1692242510386124e-06, "epoch": 0.9474290984551533, "percentage": 47.37, "elapsed_time": "2:48:07", "remaining_time": "3:06:46"} +{"current_steps": 4110, "total_steps": 8674, "loss": 0.4833865165710449, "lr": 1.1688484762164938e-06, "epoch": 0.947659672584736, "percentage": 47.38, "elapsed_time": "2:48:09", "remaining_time": "3:06:44"} +{"current_steps": 4111, "total_steps": 8674, "loss": 0.49647942185401917, "lr": 1.1684726768505385e-06, "epoch": 0.9478902467143187, "percentage": 47.39, "elapsed_time": "2:48:12", "remaining_time": "3:06:41"} +{"current_steps": 4112, "total_steps": 8674, "loss": 0.4299147129058838, "lr": 1.1680968529953718e-06, "epoch": 0.9481208208439014, "percentage": 47.41, "elapsed_time": "2:48:14", "remaining_time": "3:06:39"} +{"current_steps": 4113, "total_steps": 8674, "loss": 0.42613041400909424, "lr": 1.167721004705624e-06, "epoch": 0.948351394973484, "percentage": 47.42, "elapsed_time": "2:48:16", "remaining_time": "3:06:36"} +{"current_steps": 4114, "total_steps": 8674, "loss": 0.3989883065223694, "lr": 1.1673451320359284e-06, "epoch": 0.9485819691030667, "percentage": 47.43, "elapsed_time": "2:48:19", "remaining_time": "3:06:34"} +{"current_steps": 4115, "total_steps": 8674, "loss": 0.41362684965133667, "lr": 1.1669692350409222e-06, "epoch": 0.9488125432326493, "percentage": 47.44, "elapsed_time": "2:48:21", "remaining_time": "3:06:31"} +{"current_steps": 4116, "total_steps": 8674, "loss": 0.3807048201560974, "lr": 1.1665933137752452e-06, "epoch": 0.9490431173622319, "percentage": 47.45, "elapsed_time": "2:48:24", "remaining_time": "3:06:29"} +{"current_steps": 4117, "total_steps": 8674, "loss": 0.3440876007080078, "lr": 1.1662173682935414e-06, "epoch": 0.9492736914918146, "percentage": 47.46, "elapsed_time": "2:48:26", "remaining_time": "3:06:26"} +{"current_steps": 4118, "total_steps": 8674, "loss": 0.43534499406814575, "lr": 1.165841398650459e-06, "epoch": 0.9495042656213972, "percentage": 47.48, "elapsed_time": "2:48:29", "remaining_time": "3:06:24"} +{"current_steps": 4119, "total_steps": 8674, "loss": 0.4900544285774231, "lr": 1.1654654049006484e-06, "epoch": 0.9497348397509799, "percentage": 47.49, "elapsed_time": "2:48:31", "remaining_time": "3:06:21"} +{"current_steps": 4120, "total_steps": 8674, "loss": 0.5189288854598999, "lr": 1.1650893870987643e-06, "epoch": 0.9499654138805625, "percentage": 47.5, "elapsed_time": "2:48:33", "remaining_time": "3:06:19"} +{"current_steps": 4121, "total_steps": 8674, "loss": 0.587873101234436, "lr": 1.1647133452994643e-06, "epoch": 0.9501959880101453, "percentage": 47.51, "elapsed_time": "2:48:36", "remaining_time": "3:06:16"} +{"current_steps": 4122, "total_steps": 8674, "loss": 0.4367108941078186, "lr": 1.1643372795574106e-06, "epoch": 0.9504265621397279, "percentage": 47.52, "elapsed_time": "2:48:38", "remaining_time": "3:06:14"} +{"current_steps": 4123, "total_steps": 8674, "loss": 0.4121246635913849, "lr": 1.1639611899272679e-06, "epoch": 0.9506571362693106, "percentage": 47.53, "elapsed_time": "2:48:40", "remaining_time": "3:06:11"} +{"current_steps": 4124, "total_steps": 8674, "loss": 0.4993973672389984, "lr": 1.1635850764637042e-06, "epoch": 0.9508877103988932, "percentage": 47.54, "elapsed_time": "2:48:43", "remaining_time": "3:06:08"} +{"current_steps": 4125, "total_steps": 8674, "loss": 0.39145413041114807, "lr": 1.163208939221392e-06, "epoch": 0.9511182845284759, "percentage": 47.56, "elapsed_time": "2:48:45", "remaining_time": "3:06:06"} +{"current_steps": 4126, "total_steps": 8674, "loss": 0.45954760909080505, "lr": 1.1628327782550065e-06, "epoch": 0.9513488586580585, "percentage": 47.57, "elapsed_time": "2:48:48", "remaining_time": "3:06:04"} +{"current_steps": 4127, "total_steps": 8674, "loss": 0.5159680843353271, "lr": 1.1624565936192263e-06, "epoch": 0.9515794327876412, "percentage": 47.58, "elapsed_time": "2:48:50", "remaining_time": "3:06:01"} +{"current_steps": 4128, "total_steps": 8674, "loss": 0.4441346228122711, "lr": 1.1620803853687337e-06, "epoch": 0.9518100069172238, "percentage": 47.59, "elapsed_time": "2:48:53", "remaining_time": "3:05:59"} +{"current_steps": 4129, "total_steps": 8674, "loss": 0.3842248320579529, "lr": 1.1617041535582144e-06, "epoch": 0.9520405810468066, "percentage": 47.6, "elapsed_time": "2:48:55", "remaining_time": "3:05:56"} +{"current_steps": 4130, "total_steps": 8674, "loss": 0.5332437753677368, "lr": 1.1613278982423577e-06, "epoch": 0.9522711551763892, "percentage": 47.61, "elapsed_time": "2:48:57", "remaining_time": "3:05:54"} +{"current_steps": 4131, "total_steps": 8674, "loss": 0.4265931248664856, "lr": 1.160951619475856e-06, "epoch": 0.9525017293059719, "percentage": 47.63, "elapsed_time": "2:49:00", "remaining_time": "3:05:51"} +{"current_steps": 4132, "total_steps": 8674, "loss": 0.47442418336868286, "lr": 1.1605753173134052e-06, "epoch": 0.9527323034355545, "percentage": 47.64, "elapsed_time": "2:49:02", "remaining_time": "3:05:49"} +{"current_steps": 4133, "total_steps": 8674, "loss": 0.6128898859024048, "lr": 1.1601989918097044e-06, "epoch": 0.9529628775651372, "percentage": 47.65, "elapsed_time": "2:49:05", "remaining_time": "3:05:46"} +{"current_steps": 4134, "total_steps": 8674, "loss": 0.5347775220870972, "lr": 1.159822643019457e-06, "epoch": 0.9531934516947198, "percentage": 47.66, "elapsed_time": "2:49:07", "remaining_time": "3:05:44"} +{"current_steps": 4135, "total_steps": 8674, "loss": 0.39984625577926636, "lr": 1.1594462709973682e-06, "epoch": 0.9534240258243025, "percentage": 47.67, "elapsed_time": "2:49:10", "remaining_time": "3:05:41"} +{"current_steps": 4136, "total_steps": 8674, "loss": 0.5146951675415039, "lr": 1.1590698757981483e-06, "epoch": 0.9536545999538851, "percentage": 47.68, "elapsed_time": "2:49:12", "remaining_time": "3:05:39"} +{"current_steps": 4137, "total_steps": 8674, "loss": 0.3589641749858856, "lr": 1.1586934574765097e-06, "epoch": 0.9538851740834678, "percentage": 47.69, "elapsed_time": "2:49:15", "remaining_time": "3:05:36"} +{"current_steps": 4138, "total_steps": 8674, "loss": 0.428930401802063, "lr": 1.1583170160871689e-06, "epoch": 0.9541157482130505, "percentage": 47.71, "elapsed_time": "2:49:17", "remaining_time": "3:05:34"} +{"current_steps": 4139, "total_steps": 8674, "loss": 0.46921080350875854, "lr": 1.1579405516848452e-06, "epoch": 0.9543463223426332, "percentage": 47.72, "elapsed_time": "2:49:19", "remaining_time": "3:05:31"} +{"current_steps": 4140, "total_steps": 8674, "loss": 0.39079514145851135, "lr": 1.1575640643242616e-06, "epoch": 0.9545768964722158, "percentage": 47.73, "elapsed_time": "2:49:22", "remaining_time": "3:05:29"} +{"current_steps": 4141, "total_steps": 8674, "loss": 0.4475102424621582, "lr": 1.1571875540601443e-06, "epoch": 0.9548074706017985, "percentage": 47.74, "elapsed_time": "2:49:24", "remaining_time": "3:05:26"} +{"current_steps": 4142, "total_steps": 8674, "loss": 0.43881016969680786, "lr": 1.1568110209472232e-06, "epoch": 0.9550380447313811, "percentage": 47.75, "elapsed_time": "2:49:27", "remaining_time": "3:05:24"} +{"current_steps": 4143, "total_steps": 8674, "loss": 0.4382214844226837, "lr": 1.156434465040231e-06, "epoch": 0.9552686188609638, "percentage": 47.76, "elapsed_time": "2:49:29", "remaining_time": "3:05:22"} +{"current_steps": 4144, "total_steps": 8674, "loss": 0.5390958786010742, "lr": 1.1560578863939037e-06, "epoch": 0.9554991929905464, "percentage": 47.77, "elapsed_time": "2:49:32", "remaining_time": "3:05:19"} +{"current_steps": 4145, "total_steps": 8674, "loss": 0.4276137948036194, "lr": 1.155681285062981e-06, "epoch": 0.9557297671201291, "percentage": 47.79, "elapsed_time": "2:49:34", "remaining_time": "3:05:17"} +{"current_steps": 4146, "total_steps": 8674, "loss": 0.4541968107223511, "lr": 1.1553046611022058e-06, "epoch": 0.9559603412497117, "percentage": 47.8, "elapsed_time": "2:49:36", "remaining_time": "3:05:14"} +{"current_steps": 4147, "total_steps": 8674, "loss": 0.43287473917007446, "lr": 1.1549280145663242e-06, "epoch": 0.9561909153792945, "percentage": 47.81, "elapsed_time": "2:49:39", "remaining_time": "3:05:12"} +{"current_steps": 4148, "total_steps": 8674, "loss": 0.432822585105896, "lr": 1.1545513455100855e-06, "epoch": 0.9564214895088771, "percentage": 47.82, "elapsed_time": "2:49:41", "remaining_time": "3:05:09"} +{"current_steps": 4149, "total_steps": 8674, "loss": 0.519271969795227, "lr": 1.1541746539882424e-06, "epoch": 0.9566520636384598, "percentage": 47.83, "elapsed_time": "2:49:44", "remaining_time": "3:05:07"} +{"current_steps": 4150, "total_steps": 8674, "loss": 0.4158627390861511, "lr": 1.1537979400555506e-06, "epoch": 0.9568826377680424, "percentage": 47.84, "elapsed_time": "2:49:46", "remaining_time": "3:05:04"} +{"current_steps": 4151, "total_steps": 8674, "loss": 0.42122989892959595, "lr": 1.1534212037667698e-06, "epoch": 0.9571132118976251, "percentage": 47.86, "elapsed_time": "2:49:49", "remaining_time": "3:05:02"} +{"current_steps": 4152, "total_steps": 8674, "loss": 0.4141794443130493, "lr": 1.1530444451766623e-06, "epoch": 0.9573437860272077, "percentage": 47.87, "elapsed_time": "2:49:51", "remaining_time": "3:04:59"} +{"current_steps": 4153, "total_steps": 8674, "loss": 0.4935780167579651, "lr": 1.1526676643399933e-06, "epoch": 0.9575743601567904, "percentage": 47.88, "elapsed_time": "2:49:54", "remaining_time": "3:04:57"} +{"current_steps": 4154, "total_steps": 8674, "loss": 0.5075733661651611, "lr": 1.152290861311532e-06, "epoch": 0.957804934286373, "percentage": 47.89, "elapsed_time": "2:49:56", "remaining_time": "3:04:54"} +{"current_steps": 4155, "total_steps": 8674, "loss": 0.4852841794490814, "lr": 1.151914036146051e-06, "epoch": 0.9580355084159557, "percentage": 47.9, "elapsed_time": "2:49:58", "remaining_time": "3:04:52"} +{"current_steps": 4156, "total_steps": 8674, "loss": 0.46114620566368103, "lr": 1.151537188898325e-06, "epoch": 0.9582660825455384, "percentage": 47.91, "elapsed_time": "2:50:01", "remaining_time": "3:04:49"} +{"current_steps": 4157, "total_steps": 8674, "loss": 0.519254207611084, "lr": 1.1511603196231327e-06, "epoch": 0.9584966566751211, "percentage": 47.92, "elapsed_time": "2:50:03", "remaining_time": "3:04:47"} +{"current_steps": 4158, "total_steps": 8674, "loss": 0.43635690212249756, "lr": 1.1507834283752562e-06, "epoch": 0.9587272308047037, "percentage": 47.94, "elapsed_time": "2:50:06", "remaining_time": "3:04:44"} +{"current_steps": 4159, "total_steps": 8674, "loss": 0.48448023200035095, "lr": 1.1504065152094802e-06, "epoch": 0.9589578049342864, "percentage": 47.95, "elapsed_time": "2:50:08", "remaining_time": "3:04:42"} +{"current_steps": 4160, "total_steps": 8674, "loss": 0.4461054801940918, "lr": 1.1500295801805927e-06, "epoch": 0.959188379063869, "percentage": 47.96, "elapsed_time": "2:50:11", "remaining_time": "3:04:40"} +{"current_steps": 4161, "total_steps": 8674, "loss": 0.44869595766067505, "lr": 1.1496526233433852e-06, "epoch": 0.9594189531934517, "percentage": 47.97, "elapsed_time": "2:50:13", "remaining_time": "3:04:37"} +{"current_steps": 4162, "total_steps": 8674, "loss": 0.4592103660106659, "lr": 1.1492756447526524e-06, "epoch": 0.9596495273230343, "percentage": 47.98, "elapsed_time": "2:50:16", "remaining_time": "3:04:35"} +{"current_steps": 4163, "total_steps": 8674, "loss": 0.48352301120758057, "lr": 1.1488986444631918e-06, "epoch": 0.959880101452617, "percentage": 47.99, "elapsed_time": "2:50:18", "remaining_time": "3:04:32"} +{"current_steps": 4164, "total_steps": 8674, "loss": 0.44718503952026367, "lr": 1.1485216225298043e-06, "epoch": 0.9601106755821996, "percentage": 48.01, "elapsed_time": "2:50:21", "remaining_time": "3:04:30"} +{"current_steps": 4165, "total_steps": 8674, "loss": 0.44659486413002014, "lr": 1.1481445790072933e-06, "epoch": 0.9603412497117824, "percentage": 48.02, "elapsed_time": "2:50:23", "remaining_time": "3:04:27"} +{"current_steps": 4166, "total_steps": 8674, "loss": 0.5143063068389893, "lr": 1.1477675139504665e-06, "epoch": 0.960571823841365, "percentage": 48.03, "elapsed_time": "2:50:26", "remaining_time": "3:04:25"} +{"current_steps": 4167, "total_steps": 8674, "loss": 0.6708887815475464, "lr": 1.1473904274141344e-06, "epoch": 0.9608023979709477, "percentage": 48.04, "elapsed_time": "2:50:28", "remaining_time": "3:04:23"} +{"current_steps": 4168, "total_steps": 8674, "loss": 0.3889666199684143, "lr": 1.1470133194531094e-06, "epoch": 0.9610329721005303, "percentage": 48.05, "elapsed_time": "2:50:31", "remaining_time": "3:04:20"} +{"current_steps": 4169, "total_steps": 8674, "loss": 0.4610622227191925, "lr": 1.1466361901222086e-06, "epoch": 0.961263546230113, "percentage": 48.06, "elapsed_time": "2:50:33", "remaining_time": "3:04:18"} +{"current_steps": 4170, "total_steps": 8674, "loss": 0.46372538805007935, "lr": 1.1462590394762514e-06, "epoch": 0.9614941203596956, "percentage": 48.07, "elapsed_time": "2:50:36", "remaining_time": "3:04:15"} +{"current_steps": 4171, "total_steps": 8674, "loss": 0.5197097063064575, "lr": 1.1458818675700607e-06, "epoch": 0.9617246944892783, "percentage": 48.09, "elapsed_time": "2:50:38", "remaining_time": "3:04:13"} +{"current_steps": 4172, "total_steps": 8674, "loss": 0.3849745988845825, "lr": 1.145504674458462e-06, "epoch": 0.9619552686188609, "percentage": 48.1, "elapsed_time": "2:50:40", "remaining_time": "3:04:11"} +{"current_steps": 4173, "total_steps": 8674, "loss": 0.4572817385196686, "lr": 1.1451274601962841e-06, "epoch": 0.9621858427484437, "percentage": 48.11, "elapsed_time": "2:50:43", "remaining_time": "3:04:08"} +{"current_steps": 4174, "total_steps": 8674, "loss": 0.4383746385574341, "lr": 1.1447502248383594e-06, "epoch": 0.9624164168780263, "percentage": 48.12, "elapsed_time": "2:50:45", "remaining_time": "3:04:06"} +{"current_steps": 4175, "total_steps": 8674, "loss": 0.5319672226905823, "lr": 1.1443729684395222e-06, "epoch": 0.962646991007609, "percentage": 48.13, "elapsed_time": "2:50:48", "remaining_time": "3:04:03"} +{"current_steps": 4176, "total_steps": 8674, "loss": 0.4351249933242798, "lr": 1.143995691054611e-06, "epoch": 0.9628775651371916, "percentage": 48.14, "elapsed_time": "2:50:51", "remaining_time": "3:04:01"} +{"current_steps": 4177, "total_steps": 8674, "loss": 0.5453774929046631, "lr": 1.1436183927384668e-06, "epoch": 0.9631081392667743, "percentage": 48.16, "elapsed_time": "2:50:53", "remaining_time": "3:03:59"} +{"current_steps": 4178, "total_steps": 8674, "loss": 0.5605905055999756, "lr": 1.1432410735459336e-06, "epoch": 0.9633387133963569, "percentage": 48.17, "elapsed_time": "2:50:55", "remaining_time": "3:03:56"} +{"current_steps": 4179, "total_steps": 8674, "loss": 0.4556693434715271, "lr": 1.1428637335318587e-06, "epoch": 0.9635692875259396, "percentage": 48.18, "elapsed_time": "2:50:58", "remaining_time": "3:03:54"} +{"current_steps": 4180, "total_steps": 8674, "loss": 0.45428892970085144, "lr": 1.142486372751092e-06, "epoch": 0.9637998616555222, "percentage": 48.19, "elapsed_time": "2:51:00", "remaining_time": "3:03:51"} +{"current_steps": 4181, "total_steps": 8674, "loss": 0.4897412657737732, "lr": 1.142108991258487e-06, "epoch": 0.9640304357851049, "percentage": 48.2, "elapsed_time": "2:51:03", "remaining_time": "3:03:49"} +{"current_steps": 4182, "total_steps": 8674, "loss": 0.5478836894035339, "lr": 1.1417315891089004e-06, "epoch": 0.9642610099146876, "percentage": 48.21, "elapsed_time": "2:51:05", "remaining_time": "3:03:46"} +{"current_steps": 4183, "total_steps": 8674, "loss": 0.42394131422042847, "lr": 1.1413541663571904e-06, "epoch": 0.9644915840442703, "percentage": 48.22, "elapsed_time": "2:51:08", "remaining_time": "3:03:44"} +{"current_steps": 4184, "total_steps": 8674, "loss": 0.5047104954719543, "lr": 1.1409767230582199e-06, "epoch": 0.9647221581738529, "percentage": 48.24, "elapsed_time": "2:51:10", "remaining_time": "3:03:41"} +{"current_steps": 4185, "total_steps": 8674, "loss": 0.43985825777053833, "lr": 1.1405992592668538e-06, "epoch": 0.9649527323034356, "percentage": 48.25, "elapsed_time": "2:51:13", "remaining_time": "3:03:39"} +{"current_steps": 4186, "total_steps": 8674, "loss": 0.4338407516479492, "lr": 1.1402217750379608e-06, "epoch": 0.9651833064330182, "percentage": 48.26, "elapsed_time": "2:51:15", "remaining_time": "3:03:37"} +{"current_steps": 4187, "total_steps": 8674, "loss": 0.4532614052295685, "lr": 1.1398442704264118e-06, "epoch": 0.9654138805626009, "percentage": 48.27, "elapsed_time": "2:51:18", "remaining_time": "3:03:34"} +{"current_steps": 4188, "total_steps": 8674, "loss": 0.4546123445034027, "lr": 1.1394667454870802e-06, "epoch": 0.9656444546921835, "percentage": 48.28, "elapsed_time": "2:51:20", "remaining_time": "3:03:32"} +{"current_steps": 4189, "total_steps": 8674, "loss": 0.44743451476097107, "lr": 1.139089200274844e-06, "epoch": 0.9658750288217662, "percentage": 48.29, "elapsed_time": "2:51:23", "remaining_time": "3:03:29"} +{"current_steps": 4190, "total_steps": 8674, "loss": 0.4566968083381653, "lr": 1.138711634844583e-06, "epoch": 0.9661056029513488, "percentage": 48.31, "elapsed_time": "2:51:25", "remaining_time": "3:03:27"} +{"current_steps": 4191, "total_steps": 8674, "loss": 0.46385467052459717, "lr": 1.13833404925118e-06, "epoch": 0.9663361770809316, "percentage": 48.32, "elapsed_time": "2:51:27", "remaining_time": "3:03:24"} +{"current_steps": 4192, "total_steps": 8674, "loss": 0.4614461660385132, "lr": 1.137956443549521e-06, "epoch": 0.9665667512105142, "percentage": 48.33, "elapsed_time": "2:51:30", "remaining_time": "3:03:22"} +{"current_steps": 4193, "total_steps": 8674, "loss": 0.4351955056190491, "lr": 1.1375788177944945e-06, "epoch": 0.9667973253400969, "percentage": 48.34, "elapsed_time": "2:51:33", "remaining_time": "3:03:20"} +{"current_steps": 4194, "total_steps": 8674, "loss": 0.4172135591506958, "lr": 1.1372011720409927e-06, "epoch": 0.9670278994696795, "percentage": 48.35, "elapsed_time": "2:51:35", "remaining_time": "3:03:17"} +{"current_steps": 4195, "total_steps": 8674, "loss": 0.5482916831970215, "lr": 1.1368235063439102e-06, "epoch": 0.9672584735992622, "percentage": 48.36, "elapsed_time": "2:51:37", "remaining_time": "3:03:15"} +{"current_steps": 4196, "total_steps": 8674, "loss": 0.4336891770362854, "lr": 1.136445820758144e-06, "epoch": 0.9674890477288448, "percentage": 48.37, "elapsed_time": "2:51:40", "remaining_time": "3:03:12"} +{"current_steps": 4197, "total_steps": 8674, "loss": 0.42612385749816895, "lr": 1.1360681153385956e-06, "epoch": 0.9677196218584275, "percentage": 48.39, "elapsed_time": "2:51:42", "remaining_time": "3:03:10"} +{"current_steps": 4198, "total_steps": 8674, "loss": 0.513736367225647, "lr": 1.135690390140167e-06, "epoch": 0.9679501959880101, "percentage": 48.4, "elapsed_time": "2:51:45", "remaining_time": "3:03:07"} +{"current_steps": 4199, "total_steps": 8674, "loss": 0.45551058650016785, "lr": 1.1353126452177656e-06, "epoch": 0.9681807701175928, "percentage": 48.41, "elapsed_time": "2:51:47", "remaining_time": "3:03:05"} +{"current_steps": 4200, "total_steps": 8674, "loss": 0.45450061559677124, "lr": 1.1349348806262994e-06, "epoch": 0.9684113442471755, "percentage": 48.42, "elapsed_time": "2:51:50", "remaining_time": "3:03:02"} +{"current_steps": 4201, "total_steps": 8674, "loss": 0.43962353467941284, "lr": 1.1345570964206807e-06, "epoch": 0.9686419183767582, "percentage": 48.43, "elapsed_time": "2:51:54", "remaining_time": "3:03:01"} +{"current_steps": 4202, "total_steps": 8674, "loss": 0.5304821729660034, "lr": 1.1341792926558245e-06, "epoch": 0.9688724925063408, "percentage": 48.44, "elapsed_time": "2:51:56", "remaining_time": "3:02:59"} +{"current_steps": 4203, "total_steps": 8674, "loss": 0.6079045534133911, "lr": 1.1338014693866483e-06, "epoch": 0.9691030666359235, "percentage": 48.46, "elapsed_time": "2:51:58", "remaining_time": "3:02:56"} +{"current_steps": 4204, "total_steps": 8674, "loss": 0.39895182847976685, "lr": 1.1334236266680724e-06, "epoch": 0.9693336407655061, "percentage": 48.47, "elapsed_time": "2:52:01", "remaining_time": "3:02:54"} +{"current_steps": 4205, "total_steps": 8674, "loss": 0.5264945030212402, "lr": 1.1330457645550202e-06, "epoch": 0.9695642148950888, "percentage": 48.48, "elapsed_time": "2:52:03", "remaining_time": "3:02:51"} +{"current_steps": 4206, "total_steps": 8674, "loss": 0.4794533848762512, "lr": 1.1326678831024178e-06, "epoch": 0.9697947890246714, "percentage": 48.49, "elapsed_time": "2:52:06", "remaining_time": "3:02:49"} +{"current_steps": 4207, "total_steps": 8674, "loss": 0.42917680740356445, "lr": 1.1322899823651938e-06, "epoch": 0.9700253631542541, "percentage": 48.5, "elapsed_time": "2:52:08", "remaining_time": "3:02:47"} +{"current_steps": 4208, "total_steps": 8674, "loss": 0.42155951261520386, "lr": 1.1319120623982804e-06, "epoch": 0.9702559372838367, "percentage": 48.51, "elapsed_time": "2:52:11", "remaining_time": "3:02:44"} +{"current_steps": 4209, "total_steps": 8674, "loss": 0.5119719505310059, "lr": 1.1315341232566121e-06, "epoch": 0.9704865114134195, "percentage": 48.52, "elapsed_time": "2:52:13", "remaining_time": "3:02:42"} +{"current_steps": 4210, "total_steps": 8674, "loss": 0.5261529684066772, "lr": 1.1311561649951255e-06, "epoch": 0.9707170855430021, "percentage": 48.54, "elapsed_time": "2:52:16", "remaining_time": "3:02:39"} +{"current_steps": 4211, "total_steps": 8674, "loss": 0.5133010149002075, "lr": 1.1307781876687609e-06, "epoch": 0.9709476596725848, "percentage": 48.55, "elapsed_time": "2:52:18", "remaining_time": "3:02:37"} +{"current_steps": 4212, "total_steps": 8674, "loss": 0.5214196443557739, "lr": 1.1304001913324617e-06, "epoch": 0.9711782338021674, "percentage": 48.56, "elapsed_time": "2:52:21", "remaining_time": "3:02:34"} +{"current_steps": 4213, "total_steps": 8674, "loss": 0.4665095806121826, "lr": 1.1300221760411732e-06, "epoch": 0.9714088079317501, "percentage": 48.57, "elapsed_time": "2:52:23", "remaining_time": "3:02:32"} +{"current_steps": 4214, "total_steps": 8674, "loss": 0.44912537932395935, "lr": 1.1296441418498435e-06, "epoch": 0.9716393820613327, "percentage": 48.58, "elapsed_time": "2:52:25", "remaining_time": "3:02:29"} +{"current_steps": 4215, "total_steps": 8674, "loss": 0.48622840642929077, "lr": 1.1292660888134241e-06, "epoch": 0.9718699561909154, "percentage": 48.59, "elapsed_time": "2:52:28", "remaining_time": "3:02:27"} +{"current_steps": 4216, "total_steps": 8674, "loss": 0.40099745988845825, "lr": 1.128888016986868e-06, "epoch": 0.972100530320498, "percentage": 48.61, "elapsed_time": "2:52:30", "remaining_time": "3:02:24"} +{"current_steps": 4217, "total_steps": 8674, "loss": 0.4981631934642792, "lr": 1.1285099264251331e-06, "epoch": 0.9723311044500808, "percentage": 48.62, "elapsed_time": "2:52:33", "remaining_time": "3:02:22"} +{"current_steps": 4218, "total_steps": 8674, "loss": 0.3902980387210846, "lr": 1.1281318171831778e-06, "epoch": 0.9725616785796634, "percentage": 48.63, "elapsed_time": "2:52:35", "remaining_time": "3:02:20"} +{"current_steps": 4219, "total_steps": 8674, "loss": 0.5120723843574524, "lr": 1.1277536893159641e-06, "epoch": 0.9727922527092461, "percentage": 48.64, "elapsed_time": "2:52:38", "remaining_time": "3:02:17"} +{"current_steps": 4220, "total_steps": 8674, "loss": 0.47908157110214233, "lr": 1.1273755428784568e-06, "epoch": 0.9730228268388287, "percentage": 48.65, "elapsed_time": "2:52:40", "remaining_time": "3:02:15"} +{"current_steps": 4221, "total_steps": 8674, "loss": 0.44935697317123413, "lr": 1.126997377925624e-06, "epoch": 0.9732534009684114, "percentage": 48.66, "elapsed_time": "2:52:42", "remaining_time": "3:02:12"} +{"current_steps": 4222, "total_steps": 8674, "loss": 0.46883124113082886, "lr": 1.1266191945124345e-06, "epoch": 0.973483975097994, "percentage": 48.67, "elapsed_time": "2:52:45", "remaining_time": "3:02:09"} +{"current_steps": 4223, "total_steps": 8674, "loss": 0.41385799646377563, "lr": 1.1262409926938622e-06, "epoch": 0.9737145492275767, "percentage": 48.69, "elapsed_time": "2:52:47", "remaining_time": "3:02:07"} +{"current_steps": 4224, "total_steps": 8674, "loss": 0.5450118780136108, "lr": 1.1258627725248821e-06, "epoch": 0.9739451233571593, "percentage": 48.7, "elapsed_time": "2:52:50", "remaining_time": "3:02:04"} +{"current_steps": 4225, "total_steps": 8674, "loss": 0.4728820323944092, "lr": 1.1254845340604725e-06, "epoch": 0.974175697486742, "percentage": 48.71, "elapsed_time": "2:52:52", "remaining_time": "3:02:02"} +{"current_steps": 4226, "total_steps": 8674, "loss": 0.5111296772956848, "lr": 1.1251062773556143e-06, "epoch": 0.9744062716163246, "percentage": 48.72, "elapsed_time": "2:52:55", "remaining_time": "3:02:00"} +{"current_steps": 4227, "total_steps": 8674, "loss": 0.4538743793964386, "lr": 1.1247280024652908e-06, "epoch": 0.9746368457459073, "percentage": 48.73, "elapsed_time": "2:52:57", "remaining_time": "3:01:57"} +{"current_steps": 4228, "total_steps": 8674, "loss": 0.4917091131210327, "lr": 1.1243497094444877e-06, "epoch": 0.97486741987549, "percentage": 48.74, "elapsed_time": "2:52:59", "remaining_time": "3:01:55"} +{"current_steps": 4229, "total_steps": 8674, "loss": 0.40376198291778564, "lr": 1.1239713983481945e-06, "epoch": 0.9750979940050726, "percentage": 48.75, "elapsed_time": "2:53:02", "remaining_time": "3:01:52"} +{"current_steps": 4230, "total_steps": 8674, "loss": 0.5356566905975342, "lr": 1.1235930692314019e-06, "epoch": 0.9753285681346553, "percentage": 48.77, "elapsed_time": "2:53:04", "remaining_time": "3:01:50"} +{"current_steps": 4231, "total_steps": 8674, "loss": 0.4374624490737915, "lr": 1.123214722149104e-06, "epoch": 0.9755591422642379, "percentage": 48.78, "elapsed_time": "2:53:07", "remaining_time": "3:01:47"} +{"current_steps": 4232, "total_steps": 8674, "loss": 0.4225429594516754, "lr": 1.1228363571562976e-06, "epoch": 0.9757897163938206, "percentage": 48.79, "elapsed_time": "2:53:09", "remaining_time": "3:01:45"} +{"current_steps": 4233, "total_steps": 8674, "loss": 0.5389699935913086, "lr": 1.1224579743079819e-06, "epoch": 0.9760202905234032, "percentage": 48.8, "elapsed_time": "2:53:11", "remaining_time": "3:01:42"} +{"current_steps": 4234, "total_steps": 8674, "loss": 0.4925463795661926, "lr": 1.1220795736591584e-06, "epoch": 0.9762508646529859, "percentage": 48.81, "elapsed_time": "2:53:14", "remaining_time": "3:01:40"} +{"current_steps": 4235, "total_steps": 8674, "loss": 0.4694328308105469, "lr": 1.1217011552648315e-06, "epoch": 0.9764814387825685, "percentage": 48.82, "elapsed_time": "2:53:17", "remaining_time": "3:01:37"} +{"current_steps": 4236, "total_steps": 8674, "loss": 0.39887624979019165, "lr": 1.1213227191800086e-06, "epoch": 0.9767120129121513, "percentage": 48.84, "elapsed_time": "2:53:19", "remaining_time": "3:01:35"} +{"current_steps": 4237, "total_steps": 8674, "loss": 0.4930388927459717, "lr": 1.120944265459699e-06, "epoch": 0.9769425870417339, "percentage": 48.85, "elapsed_time": "2:53:21", "remaining_time": "3:01:32"} +{"current_steps": 4238, "total_steps": 8674, "loss": 0.4595404863357544, "lr": 1.1205657941589143e-06, "epoch": 0.9771731611713166, "percentage": 48.86, "elapsed_time": "2:53:24", "remaining_time": "3:01:30"} +{"current_steps": 4239, "total_steps": 8674, "loss": 0.44177496433258057, "lr": 1.1201873053326695e-06, "epoch": 0.9774037353008992, "percentage": 48.87, "elapsed_time": "2:53:26", "remaining_time": "3:01:28"} +{"current_steps": 4240, "total_steps": 8674, "loss": 0.47095373272895813, "lr": 1.119808799035982e-06, "epoch": 0.9776343094304819, "percentage": 48.88, "elapsed_time": "2:53:29", "remaining_time": "3:01:25"} +{"current_steps": 4241, "total_steps": 8674, "loss": 0.4649583697319031, "lr": 1.1194302753238716e-06, "epoch": 0.9778648835600645, "percentage": 48.89, "elapsed_time": "2:53:31", "remaining_time": "3:01:23"} +{"current_steps": 4242, "total_steps": 8674, "loss": 0.44815266132354736, "lr": 1.1190517342513598e-06, "epoch": 0.9780954576896472, "percentage": 48.9, "elapsed_time": "2:53:34", "remaining_time": "3:01:20"} +{"current_steps": 4243, "total_steps": 8674, "loss": 0.4861665368080139, "lr": 1.118673175873472e-06, "epoch": 0.9783260318192298, "percentage": 48.92, "elapsed_time": "2:53:36", "remaining_time": "3:01:18"} +{"current_steps": 4244, "total_steps": 8674, "loss": 0.5196468830108643, "lr": 1.1182946002452354e-06, "epoch": 0.9785566059488126, "percentage": 48.93, "elapsed_time": "2:53:39", "remaining_time": "3:01:15"} +{"current_steps": 4245, "total_steps": 8674, "loss": 0.49746841192245483, "lr": 1.11791600742168e-06, "epoch": 0.9787871800783952, "percentage": 48.94, "elapsed_time": "2:53:41", "remaining_time": "3:01:13"} +{"current_steps": 4246, "total_steps": 8674, "loss": 0.4637739956378937, "lr": 1.1175373974578377e-06, "epoch": 0.9790177542079779, "percentage": 48.95, "elapsed_time": "2:53:44", "remaining_time": "3:01:10"} +{"current_steps": 4247, "total_steps": 8674, "loss": 0.46009692549705505, "lr": 1.1171587704087434e-06, "epoch": 0.9792483283375605, "percentage": 48.96, "elapsed_time": "2:53:46", "remaining_time": "3:01:08"} +{"current_steps": 4248, "total_steps": 8674, "loss": 0.49036258459091187, "lr": 1.1167801263294346e-06, "epoch": 0.9794789024671432, "percentage": 48.97, "elapsed_time": "2:53:49", "remaining_time": "3:01:06"} +{"current_steps": 4249, "total_steps": 8674, "loss": 0.4730580449104309, "lr": 1.1164014652749509e-06, "epoch": 0.9797094765967258, "percentage": 48.99, "elapsed_time": "2:53:51", "remaining_time": "3:01:03"} +{"current_steps": 4250, "total_steps": 8674, "loss": 0.5029968023300171, "lr": 1.1160227873003345e-06, "epoch": 0.9799400507263085, "percentage": 49.0, "elapsed_time": "2:53:53", "remaining_time": "3:01:00"} +{"current_steps": 4251, "total_steps": 8674, "loss": 0.5149805545806885, "lr": 1.1156440924606299e-06, "epoch": 0.9801706248558911, "percentage": 49.01, "elapsed_time": "2:53:56", "remaining_time": "3:00:58"} +{"current_steps": 4252, "total_steps": 8674, "loss": 0.5017384886741638, "lr": 1.1152653808108845e-06, "epoch": 0.9804011989854738, "percentage": 49.02, "elapsed_time": "2:53:58", "remaining_time": "3:00:56"} +{"current_steps": 4253, "total_steps": 8674, "loss": 0.47569048404693604, "lr": 1.114886652406148e-06, "epoch": 0.9806317731150564, "percentage": 49.03, "elapsed_time": "2:54:01", "remaining_time": "3:00:53"} +{"current_steps": 4254, "total_steps": 8674, "loss": 0.5127655863761902, "lr": 1.1145079073014722e-06, "epoch": 0.9808623472446392, "percentage": 49.04, "elapsed_time": "2:54:03", "remaining_time": "3:00:51"} +{"current_steps": 4255, "total_steps": 8674, "loss": 0.4014360308647156, "lr": 1.1141291455519114e-06, "epoch": 0.9810929213742218, "percentage": 49.05, "elapsed_time": "2:54:06", "remaining_time": "3:00:48"} +{"current_steps": 4256, "total_steps": 8674, "loss": 0.43737465143203735, "lr": 1.1137503672125228e-06, "epoch": 0.9813234955038045, "percentage": 49.07, "elapsed_time": "2:54:08", "remaining_time": "3:00:46"} +{"current_steps": 4257, "total_steps": 8674, "loss": 0.4389764070510864, "lr": 1.1133715723383655e-06, "epoch": 0.9815540696333871, "percentage": 49.08, "elapsed_time": "2:54:10", "remaining_time": "3:00:43"} +{"current_steps": 4258, "total_steps": 8674, "loss": 0.5105381608009338, "lr": 1.112992760984501e-06, "epoch": 0.9817846437629698, "percentage": 49.09, "elapsed_time": "2:54:13", "remaining_time": "3:00:41"} +{"current_steps": 4259, "total_steps": 8674, "loss": 0.4393002688884735, "lr": 1.1126139332059937e-06, "epoch": 0.9820152178925524, "percentage": 49.1, "elapsed_time": "2:54:15", "remaining_time": "3:00:38"} +{"current_steps": 4260, "total_steps": 8674, "loss": 0.541419267654419, "lr": 1.1122350890579102e-06, "epoch": 0.9822457920221351, "percentage": 49.11, "elapsed_time": "2:54:18", "remaining_time": "3:00:36"} +{"current_steps": 4261, "total_steps": 8674, "loss": 0.4153546094894409, "lr": 1.1118562285953186e-06, "epoch": 0.9824763661517177, "percentage": 49.12, "elapsed_time": "2:54:20", "remaining_time": "3:00:33"} +{"current_steps": 4262, "total_steps": 8674, "loss": 0.5060696601867676, "lr": 1.1114773518732907e-06, "epoch": 0.9827069402813005, "percentage": 49.14, "elapsed_time": "2:54:23", "remaining_time": "3:00:31"} +{"current_steps": 4263, "total_steps": 8674, "loss": 0.5975456237792969, "lr": 1.1110984589468998e-06, "epoch": 0.9829375144108831, "percentage": 49.15, "elapsed_time": "2:54:25", "remaining_time": "3:00:28"} +{"current_steps": 4264, "total_steps": 8674, "loss": 0.5729621648788452, "lr": 1.110719549871222e-06, "epoch": 0.9831680885404658, "percentage": 49.16, "elapsed_time": "2:54:28", "remaining_time": "3:00:26"} +{"current_steps": 4265, "total_steps": 8674, "loss": 0.3948165476322174, "lr": 1.1103406247013356e-06, "epoch": 0.9833986626700484, "percentage": 49.17, "elapsed_time": "2:54:30", "remaining_time": "3:00:23"} +{"current_steps": 4266, "total_steps": 8674, "loss": 0.41744932532310486, "lr": 1.1099616834923212e-06, "epoch": 0.9836292367996311, "percentage": 49.18, "elapsed_time": "2:54:32", "remaining_time": "3:00:21"} +{"current_steps": 4267, "total_steps": 8674, "loss": 0.4701330065727234, "lr": 1.1095827262992611e-06, "epoch": 0.9838598109292137, "percentage": 49.19, "elapsed_time": "2:54:35", "remaining_time": "3:00:19"} +{"current_steps": 4268, "total_steps": 8674, "loss": 0.4841681718826294, "lr": 1.109203753177242e-06, "epoch": 0.9840903850587964, "percentage": 49.2, "elapsed_time": "2:54:38", "remaining_time": "3:00:16"} +{"current_steps": 4269, "total_steps": 8674, "loss": 0.4180435538291931, "lr": 1.10882476418135e-06, "epoch": 0.984320959188379, "percentage": 49.22, "elapsed_time": "2:54:40", "remaining_time": "3:00:14"} +{"current_steps": 4270, "total_steps": 8674, "loss": 0.39362633228302, "lr": 1.1084457593666758e-06, "epoch": 0.9845515333179617, "percentage": 49.23, "elapsed_time": "2:54:42", "remaining_time": "3:00:11"} +{"current_steps": 4271, "total_steps": 8674, "loss": 0.5192993879318237, "lr": 1.1080667387883116e-06, "epoch": 0.9847821074475444, "percentage": 49.24, "elapsed_time": "2:54:45", "remaining_time": "3:00:09"} +{"current_steps": 4272, "total_steps": 8674, "loss": 0.48835504055023193, "lr": 1.1076877025013517e-06, "epoch": 0.9850126815771271, "percentage": 49.25, "elapsed_time": "2:54:47", "remaining_time": "3:00:06"} +{"current_steps": 4273, "total_steps": 8674, "loss": 0.44442474842071533, "lr": 1.1073086505608925e-06, "epoch": 0.9852432557067097, "percentage": 49.26, "elapsed_time": "2:54:50", "remaining_time": "3:00:04"} +{"current_steps": 4274, "total_steps": 8674, "loss": 0.4544455409049988, "lr": 1.1069295830220339e-06, "epoch": 0.9854738298362924, "percentage": 49.27, "elapsed_time": "2:54:52", "remaining_time": "3:00:01"} +{"current_steps": 4275, "total_steps": 8674, "loss": 0.482341468334198, "lr": 1.106550499939876e-06, "epoch": 0.985704403965875, "percentage": 49.29, "elapsed_time": "2:54:54", "remaining_time": "2:59:59"} +{"current_steps": 4276, "total_steps": 8674, "loss": 0.5251357555389404, "lr": 1.1061714013695236e-06, "epoch": 0.9859349780954577, "percentage": 49.3, "elapsed_time": "2:54:57", "remaining_time": "2:59:56"} +{"current_steps": 4277, "total_steps": 8674, "loss": 0.4538683295249939, "lr": 1.1057922873660819e-06, "epoch": 0.9861655522250403, "percentage": 49.31, "elapsed_time": "2:55:00", "remaining_time": "2:59:54"} +{"current_steps": 4278, "total_steps": 8674, "loss": 0.5112448930740356, "lr": 1.105413157984659e-06, "epoch": 0.986396126354623, "percentage": 49.32, "elapsed_time": "2:55:02", "remaining_time": "2:59:52"} +{"current_steps": 4279, "total_steps": 8674, "loss": 0.48863890767097473, "lr": 1.1050340132803654e-06, "epoch": 0.9866267004842056, "percentage": 49.33, "elapsed_time": "2:55:04", "remaining_time": "2:59:49"} +{"current_steps": 4280, "total_steps": 8674, "loss": 0.43637439608573914, "lr": 1.1046548533083134e-06, "epoch": 0.9868572746137884, "percentage": 49.34, "elapsed_time": "2:55:07", "remaining_time": "2:59:47"} +{"current_steps": 4281, "total_steps": 8674, "loss": 0.5231983065605164, "lr": 1.104275678123618e-06, "epoch": 0.987087848743371, "percentage": 49.35, "elapsed_time": "2:55:09", "remaining_time": "2:59:44"} +{"current_steps": 4282, "total_steps": 8674, "loss": 0.46838122606277466, "lr": 1.1038964877813955e-06, "epoch": 0.9873184228729537, "percentage": 49.37, "elapsed_time": "2:55:12", "remaining_time": "2:59:42"} +{"current_steps": 4283, "total_steps": 8674, "loss": 0.4330589473247528, "lr": 1.1035172823367658e-06, "epoch": 0.9875489970025363, "percentage": 49.38, "elapsed_time": "2:55:14", "remaining_time": "2:59:39"} +{"current_steps": 4284, "total_steps": 8674, "loss": 0.44962531328201294, "lr": 1.1031380618448501e-06, "epoch": 0.987779571132119, "percentage": 49.39, "elapsed_time": "2:55:16", "remaining_time": "2:59:37"} +{"current_steps": 4285, "total_steps": 8674, "loss": 0.44549795985221863, "lr": 1.1027588263607719e-06, "epoch": 0.9880101452617016, "percentage": 49.4, "elapsed_time": "2:55:19", "remaining_time": "2:59:34"} +{"current_steps": 4286, "total_steps": 8674, "loss": 0.43510758876800537, "lr": 1.1023795759396568e-06, "epoch": 0.9882407193912843, "percentage": 49.41, "elapsed_time": "2:55:22", "remaining_time": "2:59:32"} +{"current_steps": 4287, "total_steps": 8674, "loss": 0.4369906187057495, "lr": 1.1020003106366324e-06, "epoch": 0.9884712935208669, "percentage": 49.42, "elapsed_time": "2:55:24", "remaining_time": "2:59:29"} +{"current_steps": 4288, "total_steps": 8674, "loss": 0.42049574851989746, "lr": 1.1016210305068296e-06, "epoch": 0.9887018676504497, "percentage": 49.44, "elapsed_time": "2:55:26", "remaining_time": "2:59:27"} +{"current_steps": 4289, "total_steps": 8674, "loss": 0.449156790971756, "lr": 1.10124173560538e-06, "epoch": 0.9889324417800323, "percentage": 49.45, "elapsed_time": "2:55:29", "remaining_time": "2:59:24"} +{"current_steps": 4290, "total_steps": 8674, "loss": 0.4736451506614685, "lr": 1.1008624259874177e-06, "epoch": 0.989163015909615, "percentage": 49.46, "elapsed_time": "2:55:31", "remaining_time": "2:59:22"} +{"current_steps": 4291, "total_steps": 8674, "loss": 0.3988722860813141, "lr": 1.10048310170808e-06, "epoch": 0.9893935900391976, "percentage": 49.47, "elapsed_time": "2:55:34", "remaining_time": "2:59:19"} +{"current_steps": 4292, "total_steps": 8674, "loss": 0.44330862164497375, "lr": 1.100103762822505e-06, "epoch": 0.9896241641687803, "percentage": 49.48, "elapsed_time": "2:55:36", "remaining_time": "2:59:17"} +{"current_steps": 4293, "total_steps": 8674, "loss": 0.5294286608695984, "lr": 1.0997244093858336e-06, "epoch": 0.9898547382983629, "percentage": 49.49, "elapsed_time": "2:55:39", "remaining_time": "2:59:15"} +{"current_steps": 4294, "total_steps": 8674, "loss": 0.463120698928833, "lr": 1.0993450414532082e-06, "epoch": 0.9900853124279456, "percentage": 49.5, "elapsed_time": "2:55:41", "remaining_time": "2:59:12"} +{"current_steps": 4295, "total_steps": 8674, "loss": 0.4481865167617798, "lr": 1.0989656590797747e-06, "epoch": 0.9903158865575282, "percentage": 49.52, "elapsed_time": "2:55:44", "remaining_time": "2:59:10"} +{"current_steps": 4296, "total_steps": 8674, "loss": 0.4467630386352539, "lr": 1.0985862623206794e-06, "epoch": 0.9905464606871109, "percentage": 49.53, "elapsed_time": "2:55:46", "remaining_time": "2:59:07"} +{"current_steps": 4297, "total_steps": 8674, "loss": 0.43485027551651, "lr": 1.0982068512310717e-06, "epoch": 0.9907770348166935, "percentage": 49.54, "elapsed_time": "2:55:48", "remaining_time": "2:59:05"} +{"current_steps": 4298, "total_steps": 8674, "loss": 0.4556450843811035, "lr": 1.0978274258661032e-06, "epoch": 0.9910076089462763, "percentage": 49.55, "elapsed_time": "2:55:51", "remaining_time": "2:59:02"} +{"current_steps": 4299, "total_steps": 8674, "loss": 0.48326122760772705, "lr": 1.0974479862809268e-06, "epoch": 0.9912381830758589, "percentage": 49.56, "elapsed_time": "2:55:53", "remaining_time": "2:59:00"} +{"current_steps": 4300, "total_steps": 8674, "loss": 0.42254534363746643, "lr": 1.097068532530698e-06, "epoch": 0.9914687572054416, "percentage": 49.57, "elapsed_time": "2:55:56", "remaining_time": "2:58:57"} +{"current_steps": 4301, "total_steps": 8674, "loss": 0.4076887369155884, "lr": 1.096689064670574e-06, "epoch": 0.9916993313350242, "percentage": 49.58, "elapsed_time": "2:56:00", "remaining_time": "2:58:56"} +{"current_steps": 4302, "total_steps": 8674, "loss": 0.40615612268447876, "lr": 1.0963095827557146e-06, "epoch": 0.9919299054646069, "percentage": 49.6, "elapsed_time": "2:56:02", "remaining_time": "2:58:54"} +{"current_steps": 4303, "total_steps": 8674, "loss": 0.47794467210769653, "lr": 1.095930086841281e-06, "epoch": 0.9921604795941895, "percentage": 49.61, "elapsed_time": "2:56:05", "remaining_time": "2:58:52"} +{"current_steps": 4304, "total_steps": 8674, "loss": 0.4927758574485779, "lr": 1.0955505769824375e-06, "epoch": 0.9923910537237722, "percentage": 49.62, "elapsed_time": "2:56:07", "remaining_time": "2:58:49"} +{"current_steps": 4305, "total_steps": 8674, "loss": 0.40777790546417236, "lr": 1.0951710532343493e-06, "epoch": 0.9926216278533548, "percentage": 49.63, "elapsed_time": "2:56:10", "remaining_time": "2:58:47"} +{"current_steps": 4306, "total_steps": 8674, "loss": 0.41996532678604126, "lr": 1.0947915156521837e-06, "epoch": 0.9928522019829376, "percentage": 49.64, "elapsed_time": "2:56:12", "remaining_time": "2:58:44"} +{"current_steps": 4307, "total_steps": 8674, "loss": 0.4366680383682251, "lr": 1.0944119642911107e-06, "epoch": 0.9930827761125202, "percentage": 49.65, "elapsed_time": "2:56:15", "remaining_time": "2:58:42"} +{"current_steps": 4308, "total_steps": 8674, "loss": 0.5350530743598938, "lr": 1.094032399206302e-06, "epoch": 0.9933133502421029, "percentage": 49.67, "elapsed_time": "2:56:17", "remaining_time": "2:58:39"} +{"current_steps": 4309, "total_steps": 8674, "loss": 0.5166209936141968, "lr": 1.093652820452931e-06, "epoch": 0.9935439243716855, "percentage": 49.68, "elapsed_time": "2:56:19", "remaining_time": "2:58:37"} +{"current_steps": 4310, "total_steps": 8674, "loss": 0.5104992389678955, "lr": 1.0932732280861734e-06, "epoch": 0.9937744985012682, "percentage": 49.69, "elapsed_time": "2:56:22", "remaining_time": "2:58:35"} +{"current_steps": 4311, "total_steps": 8674, "loss": 0.38249820470809937, "lr": 1.0928936221612068e-06, "epoch": 0.9940050726308508, "percentage": 49.7, "elapsed_time": "2:56:25", "remaining_time": "2:58:32"} +{"current_steps": 4312, "total_steps": 8674, "loss": 0.4930746555328369, "lr": 1.0925140027332107e-06, "epoch": 0.9942356467604335, "percentage": 49.71, "elapsed_time": "2:56:27", "remaining_time": "2:58:30"} +{"current_steps": 4313, "total_steps": 8674, "loss": 0.46536654233932495, "lr": 1.092134369857367e-06, "epoch": 0.9944662208900161, "percentage": 49.72, "elapsed_time": "2:56:29", "remaining_time": "2:58:27"} +{"current_steps": 4314, "total_steps": 8674, "loss": 0.4591559171676636, "lr": 1.0917547235888582e-06, "epoch": 0.9946967950195988, "percentage": 49.73, "elapsed_time": "2:56:32", "remaining_time": "2:58:25"} +{"current_steps": 4315, "total_steps": 8674, "loss": 0.5034719705581665, "lr": 1.0913750639828709e-06, "epoch": 0.9949273691491815, "percentage": 49.75, "elapsed_time": "2:56:34", "remaining_time": "2:58:22"} +{"current_steps": 4316, "total_steps": 8674, "loss": 0.5289135575294495, "lr": 1.0909953910945921e-06, "epoch": 0.9951579432787642, "percentage": 49.76, "elapsed_time": "2:56:37", "remaining_time": "2:58:20"} +{"current_steps": 4317, "total_steps": 8674, "loss": 0.48736900091171265, "lr": 1.090615704979211e-06, "epoch": 0.9953885174083468, "percentage": 49.77, "elapsed_time": "2:56:39", "remaining_time": "2:58:17"} +{"current_steps": 4318, "total_steps": 8674, "loss": 0.44812899827957153, "lr": 1.0902360056919186e-06, "epoch": 0.9956190915379295, "percentage": 49.78, "elapsed_time": "2:56:42", "remaining_time": "2:58:15"} +{"current_steps": 4319, "total_steps": 8674, "loss": 0.42837953567504883, "lr": 1.0898562932879083e-06, "epoch": 0.9958496656675121, "percentage": 49.79, "elapsed_time": "2:56:44", "remaining_time": "2:58:12"} +{"current_steps": 4320, "total_steps": 8674, "loss": 0.4946538805961609, "lr": 1.089476567822375e-06, "epoch": 0.9960802397970948, "percentage": 49.8, "elapsed_time": "2:56:46", "remaining_time": "2:58:10"} +{"current_steps": 4321, "total_steps": 8674, "loss": 0.472694993019104, "lr": 1.089096829350516e-06, "epoch": 0.9963108139266774, "percentage": 49.82, "elapsed_time": "2:56:49", "remaining_time": "2:58:07"} +{"current_steps": 4322, "total_steps": 8674, "loss": 0.546560525894165, "lr": 1.0887170779275297e-06, "epoch": 0.9965413880562601, "percentage": 49.83, "elapsed_time": "2:56:51", "remaining_time": "2:58:05"} +{"current_steps": 4323, "total_steps": 8674, "loss": 0.5098580718040466, "lr": 1.088337313608617e-06, "epoch": 0.9967719621858427, "percentage": 49.84, "elapsed_time": "2:56:54", "remaining_time": "2:58:02"} +{"current_steps": 4324, "total_steps": 8674, "loss": 0.4127371907234192, "lr": 1.0879575364489807e-06, "epoch": 0.9970025363154255, "percentage": 49.85, "elapsed_time": "2:56:56", "remaining_time": "2:58:00"} +{"current_steps": 4325, "total_steps": 8674, "loss": 0.41234201192855835, "lr": 1.0875777465038249e-06, "epoch": 0.9972331104450081, "percentage": 49.86, "elapsed_time": "2:56:58", "remaining_time": "2:57:57"} +{"current_steps": 4326, "total_steps": 8674, "loss": 0.42657697200775146, "lr": 1.087197943828356e-06, "epoch": 0.9974636845745908, "percentage": 49.87, "elapsed_time": "2:57:01", "remaining_time": "2:57:55"} +{"current_steps": 4327, "total_steps": 8674, "loss": 0.5168975591659546, "lr": 1.0868181284777825e-06, "epoch": 0.9976942587041734, "percentage": 49.88, "elapsed_time": "2:57:03", "remaining_time": "2:57:53"} +{"current_steps": 4328, "total_steps": 8674, "loss": 0.4712294340133667, "lr": 1.0864383005073142e-06, "epoch": 0.9979248328337561, "percentage": 49.9, "elapsed_time": "2:57:06", "remaining_time": "2:57:50"} +{"current_steps": 4329, "total_steps": 8674, "loss": 0.4685649871826172, "lr": 1.0860584599721624e-06, "epoch": 0.9981554069633387, "percentage": 49.91, "elapsed_time": "2:57:08", "remaining_time": "2:57:48"} +{"current_steps": 4330, "total_steps": 8674, "loss": 0.4699268937110901, "lr": 1.0856786069275417e-06, "epoch": 0.9983859810929214, "percentage": 49.92, "elapsed_time": "2:57:11", "remaining_time": "2:57:45"} +{"current_steps": 4331, "total_steps": 8674, "loss": 0.44216299057006836, "lr": 1.0852987414286669e-06, "epoch": 0.998616555222504, "percentage": 49.93, "elapsed_time": "2:57:13", "remaining_time": "2:57:43"} +{"current_steps": 4332, "total_steps": 8674, "loss": 0.4374035894870758, "lr": 1.0849188635307558e-06, "epoch": 0.9988471293520867, "percentage": 49.94, "elapsed_time": "2:57:16", "remaining_time": "2:57:40"} +{"current_steps": 4333, "total_steps": 8674, "loss": 0.4538502097129822, "lr": 1.0845389732890269e-06, "epoch": 0.9990777034816694, "percentage": 49.95, "elapsed_time": "2:57:18", "remaining_time": "2:57:38"} +{"current_steps": 4334, "total_steps": 8674, "loss": 0.4432523250579834, "lr": 1.0841590707587017e-06, "epoch": 0.9993082776112521, "percentage": 49.97, "elapsed_time": "2:57:21", "remaining_time": "2:57:35"} +{"current_steps": 4335, "total_steps": 8674, "loss": 0.3614054322242737, "lr": 1.0837791559950026e-06, "epoch": 0.9995388517408347, "percentage": 49.98, "elapsed_time": "2:57:23", "remaining_time": "2:57:33"} +{"current_steps": 4336, "total_steps": 8674, "loss": 0.5412651300430298, "lr": 1.0833992290531542e-06, "epoch": 0.9997694258704174, "percentage": 49.99, "elapsed_time": "2:57:26", "remaining_time": "2:57:30"} +{"current_steps": 4337, "total_steps": 8674, "loss": 0.43333327770233154, "lr": 1.0830192899883825e-06, "epoch": 1.0, "percentage": 50.0, "elapsed_time": "2:57:28", "remaining_time": "2:57:28"} +{"current_steps": 4338, "total_steps": 8674, "loss": 0.40433377027511597, "lr": 1.0826393388559156e-06, "epoch": 1.0002305741295827, "percentage": 50.01, "elapsed_time": "2:57:30", "remaining_time": "2:57:26"} +{"current_steps": 4339, "total_steps": 8674, "loss": 0.49699902534484863, "lr": 1.0822593757109835e-06, "epoch": 1.0004611482591652, "percentage": 50.02, "elapsed_time": "2:57:33", "remaining_time": "2:57:23"} +{"current_steps": 4340, "total_steps": 8674, "loss": 0.4992629289627075, "lr": 1.0818794006088174e-06, "epoch": 1.000691722388748, "percentage": 50.03, "elapsed_time": "2:57:35", "remaining_time": "2:57:21"} +{"current_steps": 4341, "total_steps": 8674, "loss": 0.39532744884490967, "lr": 1.0814994136046503e-06, "epoch": 1.0009222965183306, "percentage": 50.05, "elapsed_time": "2:57:38", "remaining_time": "2:57:18"} +{"current_steps": 4342, "total_steps": 8674, "loss": 0.48260024189949036, "lr": 1.0811194147537177e-06, "epoch": 1.0011528706479134, "percentage": 50.06, "elapsed_time": "2:57:40", "remaining_time": "2:57:15"} +{"current_steps": 4343, "total_steps": 8674, "loss": 0.40896737575531006, "lr": 1.0807394041112562e-06, "epoch": 1.0013834447774959, "percentage": 50.07, "elapsed_time": "2:57:43", "remaining_time": "2:57:13"} +{"current_steps": 4344, "total_steps": 8674, "loss": 0.361757755279541, "lr": 1.0803593817325037e-06, "epoch": 1.0016140189070786, "percentage": 50.08, "elapsed_time": "2:57:45", "remaining_time": "2:57:11"} +{"current_steps": 4345, "total_steps": 8674, "loss": 0.5524640083312988, "lr": 1.0799793476727006e-06, "epoch": 1.0018445930366613, "percentage": 50.09, "elapsed_time": "2:57:48", "remaining_time": "2:57:08"} +{"current_steps": 4346, "total_steps": 8674, "loss": 0.4798622727394104, "lr": 1.0795993019870891e-06, "epoch": 1.002075167166244, "percentage": 50.1, "elapsed_time": "2:57:50", "remaining_time": "2:57:06"} +{"current_steps": 4347, "total_steps": 8674, "loss": 0.3408532440662384, "lr": 1.079219244730912e-06, "epoch": 1.0023057412958265, "percentage": 50.12, "elapsed_time": "2:57:52", "remaining_time": "2:57:03"} +{"current_steps": 4348, "total_steps": 8674, "loss": 0.4185452461242676, "lr": 1.0788391759594152e-06, "epoch": 1.0025363154254092, "percentage": 50.13, "elapsed_time": "2:57:55", "remaining_time": "2:57:01"} +{"current_steps": 4349, "total_steps": 8674, "loss": 0.4656596779823303, "lr": 1.078459095727845e-06, "epoch": 1.002766889554992, "percentage": 50.14, "elapsed_time": "2:57:57", "remaining_time": "2:56:58"} +{"current_steps": 4350, "total_steps": 8674, "loss": 0.45649081468582153, "lr": 1.07807900409145e-06, "epoch": 1.0029974636845747, "percentage": 50.15, "elapsed_time": "2:58:00", "remaining_time": "2:56:56"} +{"current_steps": 4351, "total_steps": 8674, "loss": 0.4732903242111206, "lr": 1.0776989011054806e-06, "epoch": 1.0032280378141571, "percentage": 50.16, "elapsed_time": "2:58:02", "remaining_time": "2:56:53"} +{"current_steps": 4352, "total_steps": 8674, "loss": 0.5313757658004761, "lr": 1.0773187868251882e-06, "epoch": 1.0034586119437399, "percentage": 50.17, "elapsed_time": "2:58:05", "remaining_time": "2:56:51"} +{"current_steps": 4353, "total_steps": 8674, "loss": 0.5373719334602356, "lr": 1.0769386613058267e-06, "epoch": 1.0036891860733226, "percentage": 50.18, "elapsed_time": "2:58:07", "remaining_time": "2:56:49"} +{"current_steps": 4354, "total_steps": 8674, "loss": 0.4530528783798218, "lr": 1.076558524602651e-06, "epoch": 1.0039197602029053, "percentage": 50.2, "elapsed_time": "2:58:10", "remaining_time": "2:56:46"} +{"current_steps": 4355, "total_steps": 8674, "loss": 0.361511766910553, "lr": 1.076178376770918e-06, "epoch": 1.0041503343324878, "percentage": 50.21, "elapsed_time": "2:58:12", "remaining_time": "2:56:44"} +{"current_steps": 4356, "total_steps": 8674, "loss": 0.4260486364364624, "lr": 1.0757982178658857e-06, "epoch": 1.0043809084620705, "percentage": 50.22, "elapsed_time": "2:58:14", "remaining_time": "2:56:41"} +{"current_steps": 4357, "total_steps": 8674, "loss": 0.4765712320804596, "lr": 1.0754180479428142e-06, "epoch": 1.0046114825916532, "percentage": 50.23, "elapsed_time": "2:58:17", "remaining_time": "2:56:39"} +{"current_steps": 4358, "total_steps": 8674, "loss": 0.485443115234375, "lr": 1.0750378670569652e-06, "epoch": 1.004842056721236, "percentage": 50.24, "elapsed_time": "2:58:19", "remaining_time": "2:56:36"} +{"current_steps": 4359, "total_steps": 8674, "loss": 0.5010418891906738, "lr": 1.074657675263602e-06, "epoch": 1.0050726308508184, "percentage": 50.25, "elapsed_time": "2:58:22", "remaining_time": "2:56:34"} +{"current_steps": 4360, "total_steps": 8674, "loss": 0.42195719480514526, "lr": 1.074277472617989e-06, "epoch": 1.0053032049804012, "percentage": 50.27, "elapsed_time": "2:58:24", "remaining_time": "2:56:31"} +{"current_steps": 4361, "total_steps": 8674, "loss": 0.48555606603622437, "lr": 1.073897259175392e-06, "epoch": 1.0055337791099839, "percentage": 50.28, "elapsed_time": "2:58:27", "remaining_time": "2:56:29"} +{"current_steps": 4362, "total_steps": 8674, "loss": 0.4991112947463989, "lr": 1.07351703499108e-06, "epoch": 1.0057643532395666, "percentage": 50.29, "elapsed_time": "2:58:29", "remaining_time": "2:56:26"} +{"current_steps": 4363, "total_steps": 8674, "loss": 0.43016430735588074, "lr": 1.0731368001203217e-06, "epoch": 1.005994927369149, "percentage": 50.3, "elapsed_time": "2:58:32", "remaining_time": "2:56:24"} +{"current_steps": 4364, "total_steps": 8674, "loss": 0.47147876024246216, "lr": 1.0727565546183883e-06, "epoch": 1.0062255014987318, "percentage": 50.31, "elapsed_time": "2:58:34", "remaining_time": "2:56:21"} +{"current_steps": 4365, "total_steps": 8674, "loss": 0.4695407748222351, "lr": 1.0723762985405522e-06, "epoch": 1.0064560756283145, "percentage": 50.32, "elapsed_time": "2:58:36", "remaining_time": "2:56:19"} +{"current_steps": 4366, "total_steps": 8674, "loss": 0.42666512727737427, "lr": 1.0719960319420878e-06, "epoch": 1.0066866497578972, "percentage": 50.33, "elapsed_time": "2:58:39", "remaining_time": "2:56:16"} +{"current_steps": 4367, "total_steps": 8674, "loss": 0.5685237050056458, "lr": 1.0716157548782705e-06, "epoch": 1.0069172238874797, "percentage": 50.35, "elapsed_time": "2:58:41", "remaining_time": "2:56:14"} +{"current_steps": 4368, "total_steps": 8674, "loss": 0.45181894302368164, "lr": 1.0712354674043774e-06, "epoch": 1.0071477980170624, "percentage": 50.36, "elapsed_time": "2:58:44", "remaining_time": "2:56:12"} +{"current_steps": 4369, "total_steps": 8674, "loss": 0.4079795479774475, "lr": 1.070855169575687e-06, "epoch": 1.0073783721466452, "percentage": 50.37, "elapsed_time": "2:58:46", "remaining_time": "2:56:09"} +{"current_steps": 4370, "total_steps": 8674, "loss": 0.4011094570159912, "lr": 1.0704748614474798e-06, "epoch": 1.0076089462762279, "percentage": 50.38, "elapsed_time": "2:58:49", "remaining_time": "2:56:07"} +{"current_steps": 4371, "total_steps": 8674, "loss": 0.48842671513557434, "lr": 1.0700945430750373e-06, "epoch": 1.0078395204058104, "percentage": 50.39, "elapsed_time": "2:58:51", "remaining_time": "2:56:04"} +{"current_steps": 4372, "total_steps": 8674, "loss": 0.5183907151222229, "lr": 1.0697142145136425e-06, "epoch": 1.008070094535393, "percentage": 50.4, "elapsed_time": "2:58:54", "remaining_time": "2:56:02"} +{"current_steps": 4373, "total_steps": 8674, "loss": 0.5022784471511841, "lr": 1.0693338758185797e-06, "epoch": 1.0083006686649758, "percentage": 50.42, "elapsed_time": "2:58:56", "remaining_time": "2:55:59"} +{"current_steps": 4374, "total_steps": 8674, "loss": 0.500054121017456, "lr": 1.0689535270451358e-06, "epoch": 1.0085312427945585, "percentage": 50.43, "elapsed_time": "2:58:59", "remaining_time": "2:55:57"} +{"current_steps": 4375, "total_steps": 8674, "loss": 0.43674880266189575, "lr": 1.068573168248598e-06, "epoch": 1.008761816924141, "percentage": 50.44, "elapsed_time": "2:59:01", "remaining_time": "2:55:54"} +{"current_steps": 4376, "total_steps": 8674, "loss": 0.4272059202194214, "lr": 1.068192799484255e-06, "epoch": 1.0089923910537237, "percentage": 50.45, "elapsed_time": "2:59:03", "remaining_time": "2:55:52"} +{"current_steps": 4377, "total_steps": 8674, "loss": 0.41053932905197144, "lr": 1.0678124208073972e-06, "epoch": 1.0092229651833065, "percentage": 50.46, "elapsed_time": "2:59:06", "remaining_time": "2:55:50"} +{"current_steps": 4378, "total_steps": 8674, "loss": 0.4571593701839447, "lr": 1.0674320322733173e-06, "epoch": 1.0094535393128892, "percentage": 50.47, "elapsed_time": "2:59:09", "remaining_time": "2:55:47"} +{"current_steps": 4379, "total_steps": 8674, "loss": 0.464965283870697, "lr": 1.0670516339373081e-06, "epoch": 1.0096841134424717, "percentage": 50.48, "elapsed_time": "2:59:11", "remaining_time": "2:55:45"} +{"current_steps": 4380, "total_steps": 8674, "loss": 0.4086726903915405, "lr": 1.0666712258546639e-06, "epoch": 1.0099146875720544, "percentage": 50.5, "elapsed_time": "2:59:13", "remaining_time": "2:55:42"} +{"current_steps": 4381, "total_steps": 8674, "loss": 0.49988412857055664, "lr": 1.0662908080806815e-06, "epoch": 1.010145261701637, "percentage": 50.51, "elapsed_time": "2:59:16", "remaining_time": "2:55:40"} +{"current_steps": 4382, "total_steps": 8674, "loss": 0.3976360559463501, "lr": 1.0659103806706587e-06, "epoch": 1.0103758358312198, "percentage": 50.52, "elapsed_time": "2:59:19", "remaining_time": "2:55:38"} +{"current_steps": 4383, "total_steps": 8674, "loss": 0.4500683546066284, "lr": 1.065529943679894e-06, "epoch": 1.0106064099608023, "percentage": 50.53, "elapsed_time": "2:59:21", "remaining_time": "2:55:35"} +{"current_steps": 4384, "total_steps": 8674, "loss": 0.5617754459381104, "lr": 1.0651494971636875e-06, "epoch": 1.010836984090385, "percentage": 50.54, "elapsed_time": "2:59:23", "remaining_time": "2:55:33"} +{"current_steps": 4385, "total_steps": 8674, "loss": 0.4180886745452881, "lr": 1.0647690411773414e-06, "epoch": 1.0110675582199677, "percentage": 50.55, "elapsed_time": "2:59:26", "remaining_time": "2:55:30"} +{"current_steps": 4386, "total_steps": 8674, "loss": 0.406663179397583, "lr": 1.0643885757761588e-06, "epoch": 1.0112981323495505, "percentage": 50.56, "elapsed_time": "2:59:29", "remaining_time": "2:55:28"} +{"current_steps": 4387, "total_steps": 8674, "loss": 0.4698946475982666, "lr": 1.0640081010154443e-06, "epoch": 1.011528706479133, "percentage": 50.58, "elapsed_time": "2:59:31", "remaining_time": "2:55:25"} +{"current_steps": 4388, "total_steps": 8674, "loss": 0.4845995306968689, "lr": 1.0636276169505034e-06, "epoch": 1.0117592806087157, "percentage": 50.59, "elapsed_time": "2:59:33", "remaining_time": "2:55:23"} +{"current_steps": 4389, "total_steps": 8674, "loss": 0.5065066814422607, "lr": 1.0632471236366435e-06, "epoch": 1.0119898547382984, "percentage": 50.6, "elapsed_time": "2:59:36", "remaining_time": "2:55:21"} +{"current_steps": 4390, "total_steps": 8674, "loss": 0.4302946925163269, "lr": 1.0628666211291735e-06, "epoch": 1.012220428867881, "percentage": 50.61, "elapsed_time": "2:59:38", "remaining_time": "2:55:18"} +{"current_steps": 4391, "total_steps": 8674, "loss": 0.5772345066070557, "lr": 1.0624861094834029e-06, "epoch": 1.0124510029974636, "percentage": 50.62, "elapsed_time": "2:59:41", "remaining_time": "2:55:16"} +{"current_steps": 4392, "total_steps": 8674, "loss": 0.5294336080551147, "lr": 1.0621055887546425e-06, "epoch": 1.0126815771270463, "percentage": 50.63, "elapsed_time": "2:59:43", "remaining_time": "2:55:13"} +{"current_steps": 4393, "total_steps": 8674, "loss": 0.5028249621391296, "lr": 1.0617250589982059e-06, "epoch": 1.012912151256629, "percentage": 50.65, "elapsed_time": "2:59:46", "remaining_time": "2:55:11"} +{"current_steps": 4394, "total_steps": 8674, "loss": 0.5072348713874817, "lr": 1.0613445202694065e-06, "epoch": 1.0131427253862118, "percentage": 50.66, "elapsed_time": "2:59:48", "remaining_time": "2:55:08"} +{"current_steps": 4395, "total_steps": 8674, "loss": 0.3632262945175171, "lr": 1.060963972623559e-06, "epoch": 1.0133732995157942, "percentage": 50.67, "elapsed_time": "2:59:51", "remaining_time": "2:55:06"} +{"current_steps": 4396, "total_steps": 8674, "loss": 0.419277161359787, "lr": 1.06058341611598e-06, "epoch": 1.013603873645377, "percentage": 50.68, "elapsed_time": "2:59:53", "remaining_time": "2:55:03"} +{"current_steps": 4397, "total_steps": 8674, "loss": 0.4056069850921631, "lr": 1.060202850801988e-06, "epoch": 1.0138344477749597, "percentage": 50.69, "elapsed_time": "2:59:55", "remaining_time": "2:55:01"} +{"current_steps": 4398, "total_steps": 8674, "loss": 0.5591505765914917, "lr": 1.0598222767369014e-06, "epoch": 1.0140650219045424, "percentage": 50.7, "elapsed_time": "2:59:58", "remaining_time": "2:54:58"} +{"current_steps": 4399, "total_steps": 8674, "loss": 0.38529443740844727, "lr": 1.0594416939760408e-06, "epoch": 1.014295596034125, "percentage": 50.71, "elapsed_time": "3:00:00", "remaining_time": "2:54:56"} +{"current_steps": 4400, "total_steps": 8674, "loss": 0.3609437644481659, "lr": 1.0590611025747272e-06, "epoch": 1.0145261701637076, "percentage": 50.73, "elapsed_time": "3:00:03", "remaining_time": "2:54:53"} +{"current_steps": 4401, "total_steps": 8674, "loss": 0.4849050045013428, "lr": 1.058680502588284e-06, "epoch": 1.0147567442932903, "percentage": 50.74, "elapsed_time": "3:00:06", "remaining_time": "2:54:52"} +{"current_steps": 4402, "total_steps": 8674, "loss": 0.39454251527786255, "lr": 1.058299894072035e-06, "epoch": 1.014987318422873, "percentage": 50.75, "elapsed_time": "3:00:09", "remaining_time": "2:54:50"} +{"current_steps": 4403, "total_steps": 8674, "loss": 0.39726459980010986, "lr": 1.0579192770813052e-06, "epoch": 1.0152178925524555, "percentage": 50.76, "elapsed_time": "3:00:12", "remaining_time": "2:54:47"} +{"current_steps": 4404, "total_steps": 8674, "loss": 0.4730626940727234, "lr": 1.0575386516714218e-06, "epoch": 1.0154484666820383, "percentage": 50.77, "elapsed_time": "3:00:14", "remaining_time": "2:54:45"} +{"current_steps": 4405, "total_steps": 8674, "loss": 0.5436214804649353, "lr": 1.0571580178977123e-06, "epoch": 1.015679040811621, "percentage": 50.78, "elapsed_time": "3:00:16", "remaining_time": "2:54:42"} +{"current_steps": 4406, "total_steps": 8674, "loss": 0.4197273850440979, "lr": 1.0567773758155055e-06, "epoch": 1.0159096149412037, "percentage": 50.8, "elapsed_time": "3:00:19", "remaining_time": "2:54:40"} +{"current_steps": 4407, "total_steps": 8674, "loss": 0.46460944414138794, "lr": 1.0563967254801316e-06, "epoch": 1.0161401890707862, "percentage": 50.81, "elapsed_time": "3:00:21", "remaining_time": "2:54:37"} +{"current_steps": 4408, "total_steps": 8674, "loss": 0.3504630923271179, "lr": 1.056016066946922e-06, "epoch": 1.016370763200369, "percentage": 50.82, "elapsed_time": "3:00:24", "remaining_time": "2:54:35"} +{"current_steps": 4409, "total_steps": 8674, "loss": 0.4620180130004883, "lr": 1.0556354002712098e-06, "epoch": 1.0166013373299516, "percentage": 50.83, "elapsed_time": "3:00:26", "remaining_time": "2:54:32"} +{"current_steps": 4410, "total_steps": 8674, "loss": 0.5642764568328857, "lr": 1.0552547255083283e-06, "epoch": 1.0168319114595343, "percentage": 50.84, "elapsed_time": "3:00:28", "remaining_time": "2:54:30"} +{"current_steps": 4411, "total_steps": 8674, "loss": 0.48283201456069946, "lr": 1.054874042713612e-06, "epoch": 1.0170624855891168, "percentage": 50.85, "elapsed_time": "3:00:31", "remaining_time": "2:54:28"} +{"current_steps": 4412, "total_steps": 8674, "loss": 0.5346091985702515, "lr": 1.0544933519423976e-06, "epoch": 1.0172930597186995, "percentage": 50.86, "elapsed_time": "3:00:34", "remaining_time": "2:54:25"} +{"current_steps": 4413, "total_steps": 8674, "loss": 0.4710259437561035, "lr": 1.0541126532500224e-06, "epoch": 1.0175236338482823, "percentage": 50.88, "elapsed_time": "3:00:36", "remaining_time": "2:54:23"} +{"current_steps": 4414, "total_steps": 8674, "loss": 0.535955548286438, "lr": 1.0537319466918243e-06, "epoch": 1.017754207977865, "percentage": 50.89, "elapsed_time": "3:00:39", "remaining_time": "2:54:20"} +{"current_steps": 4415, "total_steps": 8674, "loss": 0.4127902388572693, "lr": 1.0533512323231438e-06, "epoch": 1.0179847821074475, "percentage": 50.9, "elapsed_time": "3:00:41", "remaining_time": "2:54:18"} +{"current_steps": 4416, "total_steps": 8674, "loss": 0.5209894180297852, "lr": 1.0529705101993203e-06, "epoch": 1.0182153562370302, "percentage": 50.91, "elapsed_time": "3:00:43", "remaining_time": "2:54:15"} +{"current_steps": 4417, "total_steps": 8674, "loss": 0.45600390434265137, "lr": 1.0525897803756967e-06, "epoch": 1.018445930366613, "percentage": 50.92, "elapsed_time": "3:00:46", "remaining_time": "2:54:13"} +{"current_steps": 4418, "total_steps": 8674, "loss": 0.5043426156044006, "lr": 1.0522090429076155e-06, "epoch": 1.0186765044961956, "percentage": 50.93, "elapsed_time": "3:00:48", "remaining_time": "2:54:10"} +{"current_steps": 4419, "total_steps": 8674, "loss": 0.43386173248291016, "lr": 1.0518282978504207e-06, "epoch": 1.0189070786257781, "percentage": 50.95, "elapsed_time": "3:00:51", "remaining_time": "2:54:08"} +{"current_steps": 4420, "total_steps": 8674, "loss": 0.44956767559051514, "lr": 1.0514475452594578e-06, "epoch": 1.0191376527553608, "percentage": 50.96, "elapsed_time": "3:00:53", "remaining_time": "2:54:06"} +{"current_steps": 4421, "total_steps": 8674, "loss": 0.47164878249168396, "lr": 1.0510667851900726e-06, "epoch": 1.0193682268849436, "percentage": 50.97, "elapsed_time": "3:00:56", "remaining_time": "2:54:03"} +{"current_steps": 4422, "total_steps": 8674, "loss": 0.4977136552333832, "lr": 1.0506860176976127e-06, "epoch": 1.0195988010145263, "percentage": 50.98, "elapsed_time": "3:00:58", "remaining_time": "2:54:01"} +{"current_steps": 4423, "total_steps": 8674, "loss": 0.4344305396080017, "lr": 1.0503052428374264e-06, "epoch": 1.0198293751441088, "percentage": 50.99, "elapsed_time": "3:01:00", "remaining_time": "2:53:58"} +{"current_steps": 4424, "total_steps": 8674, "loss": 0.46536487340927124, "lr": 1.049924460664863e-06, "epoch": 1.0200599492736915, "percentage": 51.0, "elapsed_time": "3:01:03", "remaining_time": "2:53:56"} +{"current_steps": 4425, "total_steps": 8674, "loss": 0.4583844840526581, "lr": 1.0495436712352733e-06, "epoch": 1.0202905234032742, "percentage": 51.01, "elapsed_time": "3:01:05", "remaining_time": "2:53:53"} +{"current_steps": 4426, "total_steps": 8674, "loss": 0.4098002314567566, "lr": 1.049162874604009e-06, "epoch": 1.020521097532857, "percentage": 51.03, "elapsed_time": "3:01:08", "remaining_time": "2:53:51"} +{"current_steps": 4427, "total_steps": 8674, "loss": 0.48168665170669556, "lr": 1.0487820708264227e-06, "epoch": 1.0207516716624394, "percentage": 51.04, "elapsed_time": "3:01:10", "remaining_time": "2:53:48"} +{"current_steps": 4428, "total_steps": 8674, "loss": 0.5517562627792358, "lr": 1.048401259957868e-06, "epoch": 1.0209822457920221, "percentage": 51.05, "elapsed_time": "3:01:13", "remaining_time": "2:53:46"} +{"current_steps": 4429, "total_steps": 8674, "loss": 0.5131476521492004, "lr": 1.0480204420536998e-06, "epoch": 1.0212128199216048, "percentage": 51.06, "elapsed_time": "3:01:15", "remaining_time": "2:53:43"} +{"current_steps": 4430, "total_steps": 8674, "loss": 0.4590519666671753, "lr": 1.0476396171692734e-06, "epoch": 1.0214433940511876, "percentage": 51.07, "elapsed_time": "3:01:17", "remaining_time": "2:53:41"} +{"current_steps": 4431, "total_steps": 8674, "loss": 0.5581461191177368, "lr": 1.0472587853599458e-06, "epoch": 1.02167396818077, "percentage": 51.08, "elapsed_time": "3:01:20", "remaining_time": "2:53:38"} +{"current_steps": 4432, "total_steps": 8674, "loss": 0.4169657826423645, "lr": 1.046877946681075e-06, "epoch": 1.0219045423103528, "percentage": 51.1, "elapsed_time": "3:01:22", "remaining_time": "2:53:36"} +{"current_steps": 4433, "total_steps": 8674, "loss": 0.48135459423065186, "lr": 1.0464971011880195e-06, "epoch": 1.0221351164399355, "percentage": 51.11, "elapsed_time": "3:01:25", "remaining_time": "2:53:33"} +{"current_steps": 4434, "total_steps": 8674, "loss": 0.5116040706634521, "lr": 1.046116248936139e-06, "epoch": 1.0223656905695182, "percentage": 51.12, "elapsed_time": "3:01:27", "remaining_time": "2:53:31"} +{"current_steps": 4435, "total_steps": 8674, "loss": 0.4369809329509735, "lr": 1.0457353899807946e-06, "epoch": 1.0225962646991007, "percentage": 51.13, "elapsed_time": "3:01:29", "remaining_time": "2:53:28"} +{"current_steps": 4436, "total_steps": 8674, "loss": 0.42936772108078003, "lr": 1.0453545243773474e-06, "epoch": 1.0228268388286834, "percentage": 51.14, "elapsed_time": "3:01:32", "remaining_time": "2:53:26"} +{"current_steps": 4437, "total_steps": 8674, "loss": 0.3614712357521057, "lr": 1.0449736521811605e-06, "epoch": 1.0230574129582661, "percentage": 51.15, "elapsed_time": "3:01:34", "remaining_time": "2:53:23"} +{"current_steps": 4438, "total_steps": 8674, "loss": 0.40728119015693665, "lr": 1.0445927734475977e-06, "epoch": 1.0232879870878488, "percentage": 51.16, "elapsed_time": "3:01:37", "remaining_time": "2:53:21"} +{"current_steps": 4439, "total_steps": 8674, "loss": 0.4940561056137085, "lr": 1.0442118882320233e-06, "epoch": 1.0235185612174313, "percentage": 51.18, "elapsed_time": "3:01:39", "remaining_time": "2:53:18"} +{"current_steps": 4440, "total_steps": 8674, "loss": 0.49529674649238586, "lr": 1.0438309965898027e-06, "epoch": 1.023749135347014, "percentage": 51.19, "elapsed_time": "3:01:41", "remaining_time": "2:53:16"} +{"current_steps": 4441, "total_steps": 8674, "loss": 0.4849408268928528, "lr": 1.0434500985763027e-06, "epoch": 1.0239797094765968, "percentage": 51.2, "elapsed_time": "3:01:44", "remaining_time": "2:53:13"} +{"current_steps": 4442, "total_steps": 8674, "loss": 0.4121132791042328, "lr": 1.0430691942468903e-06, "epoch": 1.0242102836061795, "percentage": 51.21, "elapsed_time": "3:01:46", "remaining_time": "2:53:11"} +{"current_steps": 4443, "total_steps": 8674, "loss": 0.4348478317260742, "lr": 1.042688283656934e-06, "epoch": 1.024440857735762, "percentage": 51.22, "elapsed_time": "3:01:49", "remaining_time": "2:53:08"} +{"current_steps": 4444, "total_steps": 8674, "loss": 0.46817919611930847, "lr": 1.0423073668618033e-06, "epoch": 1.0246714318653447, "percentage": 51.23, "elapsed_time": "3:01:51", "remaining_time": "2:53:06"} +{"current_steps": 4445, "total_steps": 8674, "loss": 0.4422008991241455, "lr": 1.041926443916868e-06, "epoch": 1.0249020059949274, "percentage": 51.25, "elapsed_time": "3:01:54", "remaining_time": "2:53:03"} +{"current_steps": 4446, "total_steps": 8674, "loss": 0.5108183026313782, "lr": 1.041545514877499e-06, "epoch": 1.02513258012451, "percentage": 51.26, "elapsed_time": "3:01:56", "remaining_time": "2:53:01"} +{"current_steps": 4447, "total_steps": 8674, "loss": 0.4759529232978821, "lr": 1.0411645797990685e-06, "epoch": 1.0253631542540926, "percentage": 51.27, "elapsed_time": "3:01:59", "remaining_time": "2:52:58"} +{"current_steps": 4448, "total_steps": 8674, "loss": 0.44447648525238037, "lr": 1.040783638736949e-06, "epoch": 1.0255937283836754, "percentage": 51.28, "elapsed_time": "3:02:01", "remaining_time": "2:52:56"} +{"current_steps": 4449, "total_steps": 8674, "loss": 0.4558752477169037, "lr": 1.0404026917465144e-06, "epoch": 1.025824302513258, "percentage": 51.29, "elapsed_time": "3:02:03", "remaining_time": "2:52:53"} +{"current_steps": 4450, "total_steps": 8674, "loss": 0.4728459417819977, "lr": 1.0400217388831393e-06, "epoch": 1.0260548766428408, "percentage": 51.3, "elapsed_time": "3:02:06", "remaining_time": "2:52:51"} +{"current_steps": 4451, "total_steps": 8674, "loss": 0.4815519452095032, "lr": 1.0396407802021985e-06, "epoch": 1.0262854507724233, "percentage": 51.31, "elapsed_time": "3:02:08", "remaining_time": "2:52:48"} +{"current_steps": 4452, "total_steps": 8674, "loss": 0.5173656344413757, "lr": 1.0392598157590685e-06, "epoch": 1.026516024902006, "percentage": 51.33, "elapsed_time": "3:02:11", "remaining_time": "2:52:46"} +{"current_steps": 4453, "total_steps": 8674, "loss": 0.5280762910842896, "lr": 1.0388788456091267e-06, "epoch": 1.0267465990315887, "percentage": 51.34, "elapsed_time": "3:02:13", "remaining_time": "2:52:44"} +{"current_steps": 4454, "total_steps": 8674, "loss": 0.4524118900299072, "lr": 1.0384978698077506e-06, "epoch": 1.0269771731611712, "percentage": 51.35, "elapsed_time": "3:02:16", "remaining_time": "2:52:41"} +{"current_steps": 4455, "total_steps": 8674, "loss": 0.4011715054512024, "lr": 1.0381168884103186e-06, "epoch": 1.027207747290754, "percentage": 51.36, "elapsed_time": "3:02:18", "remaining_time": "2:52:39"} +{"current_steps": 4456, "total_steps": 8674, "loss": 0.518020749092102, "lr": 1.0377359014722108e-06, "epoch": 1.0274383214203366, "percentage": 51.37, "elapsed_time": "3:02:21", "remaining_time": "2:52:36"} +{"current_steps": 4457, "total_steps": 8674, "loss": 0.44726112484931946, "lr": 1.0373549090488073e-06, "epoch": 1.0276688955499194, "percentage": 51.38, "elapsed_time": "3:02:23", "remaining_time": "2:52:34"} +{"current_steps": 4458, "total_steps": 8674, "loss": 0.5344264507293701, "lr": 1.0369739111954894e-06, "epoch": 1.0278994696795019, "percentage": 51.39, "elapsed_time": "3:02:26", "remaining_time": "2:52:32"} +{"current_steps": 4459, "total_steps": 8674, "loss": 0.4902813732624054, "lr": 1.0365929079676387e-06, "epoch": 1.0281300438090846, "percentage": 51.41, "elapsed_time": "3:02:28", "remaining_time": "2:52:29"} +{"current_steps": 4460, "total_steps": 8674, "loss": 0.38346555829048157, "lr": 1.0362118994206378e-06, "epoch": 1.0283606179386673, "percentage": 51.42, "elapsed_time": "3:02:31", "remaining_time": "2:52:27"} +{"current_steps": 4461, "total_steps": 8674, "loss": 0.4232872724533081, "lr": 1.0358308856098705e-06, "epoch": 1.02859119206825, "percentage": 51.43, "elapsed_time": "3:02:33", "remaining_time": "2:52:24"} +{"current_steps": 4462, "total_steps": 8674, "loss": 0.5184470415115356, "lr": 1.0354498665907207e-06, "epoch": 1.0288217661978325, "percentage": 51.44, "elapsed_time": "3:02:36", "remaining_time": "2:52:22"} +{"current_steps": 4463, "total_steps": 8674, "loss": 0.4989054203033447, "lr": 1.0350688424185733e-06, "epoch": 1.0290523403274152, "percentage": 51.45, "elapsed_time": "3:02:38", "remaining_time": "2:52:19"} +{"current_steps": 4464, "total_steps": 8674, "loss": 0.5204064249992371, "lr": 1.0346878131488145e-06, "epoch": 1.029282914456998, "percentage": 51.46, "elapsed_time": "3:02:40", "remaining_time": "2:52:17"} +{"current_steps": 4465, "total_steps": 8674, "loss": 0.47872811555862427, "lr": 1.0343067788368307e-06, "epoch": 1.0295134885865806, "percentage": 51.48, "elapsed_time": "3:02:43", "remaining_time": "2:52:14"} +{"current_steps": 4466, "total_steps": 8674, "loss": 0.4104915261268616, "lr": 1.0339257395380087e-06, "epoch": 1.0297440627161631, "percentage": 51.49, "elapsed_time": "3:02:45", "remaining_time": "2:52:12"} +{"current_steps": 4467, "total_steps": 8674, "loss": 0.39327263832092285, "lr": 1.0335446953077366e-06, "epoch": 1.0299746368457459, "percentage": 51.5, "elapsed_time": "3:02:48", "remaining_time": "2:52:09"} +{"current_steps": 4468, "total_steps": 8674, "loss": 0.4395657777786255, "lr": 1.033163646201403e-06, "epoch": 1.0302052109753286, "percentage": 51.51, "elapsed_time": "3:02:50", "remaining_time": "2:52:07"} +{"current_steps": 4469, "total_steps": 8674, "loss": 0.462537944316864, "lr": 1.0327825922743976e-06, "epoch": 1.0304357851049113, "percentage": 51.52, "elapsed_time": "3:02:53", "remaining_time": "2:52:05"} +{"current_steps": 4470, "total_steps": 8674, "loss": 0.4399976134300232, "lr": 1.03240153358211e-06, "epoch": 1.0306663592344938, "percentage": 51.53, "elapsed_time": "3:02:55", "remaining_time": "2:52:02"} +{"current_steps": 4471, "total_steps": 8674, "loss": 0.4289684593677521, "lr": 1.0320204701799311e-06, "epoch": 1.0308969333640765, "percentage": 51.54, "elapsed_time": "3:02:58", "remaining_time": "2:52:00"} +{"current_steps": 4472, "total_steps": 8674, "loss": 0.4771305322647095, "lr": 1.0316394021232524e-06, "epoch": 1.0311275074936592, "percentage": 51.56, "elapsed_time": "3:03:00", "remaining_time": "2:51:57"} +{"current_steps": 4473, "total_steps": 8674, "loss": 0.4544849395751953, "lr": 1.031258329467466e-06, "epoch": 1.031358081623242, "percentage": 51.57, "elapsed_time": "3:03:03", "remaining_time": "2:51:55"} +{"current_steps": 4474, "total_steps": 8674, "loss": 0.5362099409103394, "lr": 1.0308772522679646e-06, "epoch": 1.0315886557528244, "percentage": 51.58, "elapsed_time": "3:03:05", "remaining_time": "2:51:52"} +{"current_steps": 4475, "total_steps": 8674, "loss": 0.48966753482818604, "lr": 1.0304961705801413e-06, "epoch": 1.0318192298824072, "percentage": 51.59, "elapsed_time": "3:03:07", "remaining_time": "2:51:50"} +{"current_steps": 4476, "total_steps": 8674, "loss": 0.3750344216823578, "lr": 1.0301150844593908e-06, "epoch": 1.0320498040119899, "percentage": 51.6, "elapsed_time": "3:03:10", "remaining_time": "2:51:47"} +{"current_steps": 4477, "total_steps": 8674, "loss": 0.453983873128891, "lr": 1.0297339939611076e-06, "epoch": 1.0322803781415726, "percentage": 51.61, "elapsed_time": "3:03:12", "remaining_time": "2:51:45"} +{"current_steps": 4478, "total_steps": 8674, "loss": 0.5096027255058289, "lr": 1.029352899140687e-06, "epoch": 1.032510952271155, "percentage": 51.63, "elapsed_time": "3:03:15", "remaining_time": "2:51:42"} +{"current_steps": 4479, "total_steps": 8674, "loss": 0.4387558698654175, "lr": 1.028971800053525e-06, "epoch": 1.0327415264007378, "percentage": 51.64, "elapsed_time": "3:03:17", "remaining_time": "2:51:40"} +{"current_steps": 4480, "total_steps": 8674, "loss": 0.45710843801498413, "lr": 1.0285906967550184e-06, "epoch": 1.0329721005303205, "percentage": 51.65, "elapsed_time": "3:03:20", "remaining_time": "2:51:38"} +{"current_steps": 4481, "total_steps": 8674, "loss": 0.5258994102478027, "lr": 1.0282095893005643e-06, "epoch": 1.0332026746599032, "percentage": 51.66, "elapsed_time": "3:03:22", "remaining_time": "2:51:35"} +{"current_steps": 4482, "total_steps": 8674, "loss": 0.5037236213684082, "lr": 1.0278284777455603e-06, "epoch": 1.0334332487894857, "percentage": 51.67, "elapsed_time": "3:03:25", "remaining_time": "2:51:33"} +{"current_steps": 4483, "total_steps": 8674, "loss": 0.4730300307273865, "lr": 1.027447362145405e-06, "epoch": 1.0336638229190684, "percentage": 51.68, "elapsed_time": "3:03:27", "remaining_time": "2:51:30"} +{"current_steps": 4484, "total_steps": 8674, "loss": 0.4373326301574707, "lr": 1.0270662425554974e-06, "epoch": 1.0338943970486512, "percentage": 51.69, "elapsed_time": "3:03:29", "remaining_time": "2:51:28"} +{"current_steps": 4485, "total_steps": 8674, "loss": 0.3915579319000244, "lr": 1.0266851190312373e-06, "epoch": 1.0341249711782339, "percentage": 51.71, "elapsed_time": "3:03:32", "remaining_time": "2:51:25"} +{"current_steps": 4486, "total_steps": 8674, "loss": 0.36588191986083984, "lr": 1.0263039916280247e-06, "epoch": 1.0343555453078164, "percentage": 51.72, "elapsed_time": "3:03:35", "remaining_time": "2:51:23"} +{"current_steps": 4487, "total_steps": 8674, "loss": 0.4287286400794983, "lr": 1.0259228604012602e-06, "epoch": 1.034586119437399, "percentage": 51.73, "elapsed_time": "3:03:37", "remaining_time": "2:51:20"} +{"current_steps": 4488, "total_steps": 8674, "loss": 0.4405861496925354, "lr": 1.0255417254063454e-06, "epoch": 1.0348166935669818, "percentage": 51.74, "elapsed_time": "3:03:39", "remaining_time": "2:51:18"} +{"current_steps": 4489, "total_steps": 8674, "loss": 0.4859738349914551, "lr": 1.0251605866986818e-06, "epoch": 1.0350472676965645, "percentage": 51.75, "elapsed_time": "3:03:42", "remaining_time": "2:51:15"} +{"current_steps": 4490, "total_steps": 8674, "loss": 0.40879446268081665, "lr": 1.0247794443336722e-06, "epoch": 1.035277841826147, "percentage": 51.76, "elapsed_time": "3:03:44", "remaining_time": "2:51:13"} +{"current_steps": 4491, "total_steps": 8674, "loss": 0.44872337579727173, "lr": 1.024398298366719e-06, "epoch": 1.0355084159557297, "percentage": 51.78, "elapsed_time": "3:03:47", "remaining_time": "2:51:10"} +{"current_steps": 4492, "total_steps": 8674, "loss": 0.41155117750167847, "lr": 1.0240171488532258e-06, "epoch": 1.0357389900853124, "percentage": 51.79, "elapsed_time": "3:03:49", "remaining_time": "2:51:08"} +{"current_steps": 4493, "total_steps": 8674, "loss": 0.48941487073898315, "lr": 1.0236359958485966e-06, "epoch": 1.0359695642148952, "percentage": 51.8, "elapsed_time": "3:03:51", "remaining_time": "2:51:05"} +{"current_steps": 4494, "total_steps": 8674, "loss": 0.4462544322013855, "lr": 1.0232548394082362e-06, "epoch": 1.0362001383444777, "percentage": 51.81, "elapsed_time": "3:03:54", "remaining_time": "2:51:03"} +{"current_steps": 4495, "total_steps": 8674, "loss": 0.3791837692260742, "lr": 1.0228736795875487e-06, "epoch": 1.0364307124740604, "percentage": 51.82, "elapsed_time": "3:03:56", "remaining_time": "2:51:01"} +{"current_steps": 4496, "total_steps": 8674, "loss": 0.5037285685539246, "lr": 1.0224925164419404e-06, "epoch": 1.036661286603643, "percentage": 51.83, "elapsed_time": "3:03:59", "remaining_time": "2:50:58"} +{"current_steps": 4497, "total_steps": 8674, "loss": 0.4762890636920929, "lr": 1.0221113500268169e-06, "epoch": 1.0368918607332258, "percentage": 51.84, "elapsed_time": "3:04:01", "remaining_time": "2:50:56"} +{"current_steps": 4498, "total_steps": 8674, "loss": 0.4673793315887451, "lr": 1.0217301803975844e-06, "epoch": 1.0371224348628083, "percentage": 51.86, "elapsed_time": "3:04:04", "remaining_time": "2:50:53"} +{"current_steps": 4499, "total_steps": 8674, "loss": 0.37522250413894653, "lr": 1.0213490076096501e-06, "epoch": 1.037353008992391, "percentage": 51.87, "elapsed_time": "3:04:06", "remaining_time": "2:50:50"} +{"current_steps": 4500, "total_steps": 8674, "loss": 0.4986375570297241, "lr": 1.020967831718421e-06, "epoch": 1.0375835831219737, "percentage": 51.88, "elapsed_time": "3:04:08", "remaining_time": "2:50:48"} +{"current_steps": 4501, "total_steps": 8674, "loss": 0.488337904214859, "lr": 1.0205866527793053e-06, "epoch": 1.0378141572515565, "percentage": 51.89, "elapsed_time": "3:04:17", "remaining_time": "2:50:51"} +{"current_steps": 4502, "total_steps": 8674, "loss": 0.37420767545700073, "lr": 1.0202054708477107e-06, "epoch": 1.038044731381139, "percentage": 51.9, "elapsed_time": "3:04:20", "remaining_time": "2:50:49"} +{"current_steps": 4503, "total_steps": 8674, "loss": 0.42453843355178833, "lr": 1.0198242859790465e-06, "epoch": 1.0382753055107217, "percentage": 51.91, "elapsed_time": "3:04:22", "remaining_time": "2:50:47"} +{"current_steps": 4504, "total_steps": 8674, "loss": 0.4431978166103363, "lr": 1.0194430982287211e-06, "epoch": 1.0385058796403044, "percentage": 51.93, "elapsed_time": "3:04:25", "remaining_time": "2:50:44"} +{"current_steps": 4505, "total_steps": 8674, "loss": 0.5079195499420166, "lr": 1.0190619076521445e-06, "epoch": 1.038736453769887, "percentage": 51.94, "elapsed_time": "3:04:27", "remaining_time": "2:50:42"} +{"current_steps": 4506, "total_steps": 8674, "loss": 0.442915678024292, "lr": 1.0186807143047263e-06, "epoch": 1.0389670278994696, "percentage": 51.95, "elapsed_time": "3:04:29", "remaining_time": "2:50:39"} +{"current_steps": 4507, "total_steps": 8674, "loss": 0.4720972180366516, "lr": 1.018299518241877e-06, "epoch": 1.0391976020290523, "percentage": 51.96, "elapsed_time": "3:04:32", "remaining_time": "2:50:37"} +{"current_steps": 4508, "total_steps": 8674, "loss": 0.4637352526187897, "lr": 1.0179183195190073e-06, "epoch": 1.039428176158635, "percentage": 51.97, "elapsed_time": "3:04:34", "remaining_time": "2:50:34"} +{"current_steps": 4509, "total_steps": 8674, "loss": 0.4207759499549866, "lr": 1.0175371181915283e-06, "epoch": 1.0396587502882177, "percentage": 51.98, "elapsed_time": "3:04:37", "remaining_time": "2:50:32"} +{"current_steps": 4510, "total_steps": 8674, "loss": 0.49227845668792725, "lr": 1.0171559143148514e-06, "epoch": 1.0398893244178002, "percentage": 51.99, "elapsed_time": "3:04:39", "remaining_time": "2:50:29"} +{"current_steps": 4511, "total_steps": 8674, "loss": 0.5006893873214722, "lr": 1.0167747079443884e-06, "epoch": 1.040119898547383, "percentage": 52.01, "elapsed_time": "3:04:42", "remaining_time": "2:50:27"} +{"current_steps": 4512, "total_steps": 8674, "loss": 0.42048192024230957, "lr": 1.016393499135552e-06, "epoch": 1.0403504726769657, "percentage": 52.02, "elapsed_time": "3:04:44", "remaining_time": "2:50:24"} +{"current_steps": 4513, "total_steps": 8674, "loss": 0.5275895595550537, "lr": 1.0160122879437538e-06, "epoch": 1.0405810468065484, "percentage": 52.03, "elapsed_time": "3:04:46", "remaining_time": "2:50:22"} +{"current_steps": 4514, "total_steps": 8674, "loss": 0.4677985906600952, "lr": 1.0156310744244073e-06, "epoch": 1.0408116209361309, "percentage": 52.04, "elapsed_time": "3:04:49", "remaining_time": "2:50:19"} +{"current_steps": 4515, "total_steps": 8674, "loss": 0.5214150547981262, "lr": 1.015249858632926e-06, "epoch": 1.0410421950657136, "percentage": 52.05, "elapsed_time": "3:04:51", "remaining_time": "2:50:17"} +{"current_steps": 4516, "total_steps": 8674, "loss": 0.40790024399757385, "lr": 1.0148686406247232e-06, "epoch": 1.0412727691952963, "percentage": 52.06, "elapsed_time": "3:04:54", "remaining_time": "2:50:14"} +{"current_steps": 4517, "total_steps": 8674, "loss": 0.5943785309791565, "lr": 1.0144874204552125e-06, "epoch": 1.041503343324879, "percentage": 52.08, "elapsed_time": "3:04:56", "remaining_time": "2:50:12"} +{"current_steps": 4518, "total_steps": 8674, "loss": 0.4590263366699219, "lr": 1.0141061981798086e-06, "epoch": 1.0417339174544615, "percentage": 52.09, "elapsed_time": "3:04:58", "remaining_time": "2:50:09"} +{"current_steps": 4519, "total_steps": 8674, "loss": 0.4106098413467407, "lr": 1.0137249738539257e-06, "epoch": 1.0419644915840443, "percentage": 52.1, "elapsed_time": "3:05:01", "remaining_time": "2:50:07"} +{"current_steps": 4520, "total_steps": 8674, "loss": 0.4730203151702881, "lr": 1.013343747532979e-06, "epoch": 1.042195065713627, "percentage": 52.11, "elapsed_time": "3:05:04", "remaining_time": "2:50:04"} +{"current_steps": 4521, "total_steps": 8674, "loss": 0.43245944380760193, "lr": 1.0129625192723833e-06, "epoch": 1.0424256398432097, "percentage": 52.12, "elapsed_time": "3:05:06", "remaining_time": "2:50:02"} +{"current_steps": 4522, "total_steps": 8674, "loss": 0.40828272700309753, "lr": 1.012581289127554e-06, "epoch": 1.0426562139727922, "percentage": 52.13, "elapsed_time": "3:05:08", "remaining_time": "2:49:59"} +{"current_steps": 4523, "total_steps": 8674, "loss": 0.4232874810695648, "lr": 1.0122000571539069e-06, "epoch": 1.042886788102375, "percentage": 52.14, "elapsed_time": "3:05:11", "remaining_time": "2:49:57"} +{"current_steps": 4524, "total_steps": 8674, "loss": 0.43044984340667725, "lr": 1.0118188234068579e-06, "epoch": 1.0431173622319576, "percentage": 52.16, "elapsed_time": "3:05:13", "remaining_time": "2:49:54"} +{"current_steps": 4525, "total_steps": 8674, "loss": 0.4502897262573242, "lr": 1.011437587941823e-06, "epoch": 1.0433479363615403, "percentage": 52.17, "elapsed_time": "3:05:16", "remaining_time": "2:49:52"} +{"current_steps": 4526, "total_steps": 8674, "loss": 0.5505340099334717, "lr": 1.0110563508142185e-06, "epoch": 1.0435785104911228, "percentage": 52.18, "elapsed_time": "3:05:18", "remaining_time": "2:49:49"} +{"current_steps": 4527, "total_steps": 8674, "loss": 0.4026086628437042, "lr": 1.0106751120794617e-06, "epoch": 1.0438090846207055, "percentage": 52.19, "elapsed_time": "3:05:21", "remaining_time": "2:49:47"} +{"current_steps": 4528, "total_steps": 8674, "loss": 0.3910222053527832, "lr": 1.0102938717929692e-06, "epoch": 1.0440396587502883, "percentage": 52.2, "elapsed_time": "3:05:23", "remaining_time": "2:49:45"} +{"current_steps": 4529, "total_steps": 8674, "loss": 0.4134068191051483, "lr": 1.009912630010158e-06, "epoch": 1.044270232879871, "percentage": 52.21, "elapsed_time": "3:05:25", "remaining_time": "2:49:42"} +{"current_steps": 4530, "total_steps": 8674, "loss": 0.4801563024520874, "lr": 1.0095313867864457e-06, "epoch": 1.0445008070094535, "percentage": 52.23, "elapsed_time": "3:05:28", "remaining_time": "2:49:39"} +{"current_steps": 4531, "total_steps": 8674, "loss": 0.4269358515739441, "lr": 1.0091501421772495e-06, "epoch": 1.0447313811390362, "percentage": 52.24, "elapsed_time": "3:05:30", "remaining_time": "2:49:37"} +{"current_steps": 4532, "total_steps": 8674, "loss": 0.5300281047821045, "lr": 1.0087688962379877e-06, "epoch": 1.044961955268619, "percentage": 52.25, "elapsed_time": "3:05:33", "remaining_time": "2:49:35"} +{"current_steps": 4533, "total_steps": 8674, "loss": 0.4634189009666443, "lr": 1.0083876490240777e-06, "epoch": 1.0451925293982016, "percentage": 52.26, "elapsed_time": "3:05:35", "remaining_time": "2:49:32"} +{"current_steps": 4534, "total_steps": 8674, "loss": 0.37037551403045654, "lr": 1.0080064005909379e-06, "epoch": 1.0454231035277841, "percentage": 52.27, "elapsed_time": "3:05:37", "remaining_time": "2:49:30"} +{"current_steps": 4535, "total_steps": 8674, "loss": 0.4740016460418701, "lr": 1.0076251509939867e-06, "epoch": 1.0456536776573668, "percentage": 52.28, "elapsed_time": "3:05:40", "remaining_time": "2:49:27"} +{"current_steps": 4536, "total_steps": 8674, "loss": 0.4824775159358978, "lr": 1.0072439002886426e-06, "epoch": 1.0458842517869495, "percentage": 52.29, "elapsed_time": "3:05:42", "remaining_time": "2:49:25"} +{"current_steps": 4537, "total_steps": 8674, "loss": 0.4891430735588074, "lr": 1.0068626485303242e-06, "epoch": 1.0461148259165323, "percentage": 52.31, "elapsed_time": "3:05:45", "remaining_time": "2:49:22"} +{"current_steps": 4538, "total_steps": 8674, "loss": 0.48089975118637085, "lr": 1.00648139577445e-06, "epoch": 1.0463454000461148, "percentage": 52.32, "elapsed_time": "3:05:47", "remaining_time": "2:49:20"} +{"current_steps": 4539, "total_steps": 8674, "loss": 0.4353799521923065, "lr": 1.0061001420764395e-06, "epoch": 1.0465759741756975, "percentage": 52.33, "elapsed_time": "3:05:50", "remaining_time": "2:49:17"} +{"current_steps": 4540, "total_steps": 8674, "loss": 0.4259982705116272, "lr": 1.0057188874917117e-06, "epoch": 1.0468065483052802, "percentage": 52.34, "elapsed_time": "3:05:52", "remaining_time": "2:49:15"} +{"current_steps": 4541, "total_steps": 8674, "loss": 0.4400532841682434, "lr": 1.0053376320756852e-06, "epoch": 1.047037122434863, "percentage": 52.35, "elapsed_time": "3:05:54", "remaining_time": "2:49:12"} +{"current_steps": 4542, "total_steps": 8674, "loss": 0.48598533868789673, "lr": 1.00495637588378e-06, "epoch": 1.0472676965644454, "percentage": 52.36, "elapsed_time": "3:05:57", "remaining_time": "2:49:10"} +{"current_steps": 4543, "total_steps": 8674, "loss": 0.6310586929321289, "lr": 1.0045751189714153e-06, "epoch": 1.0474982706940281, "percentage": 52.37, "elapsed_time": "3:05:59", "remaining_time": "2:49:07"} +{"current_steps": 4544, "total_steps": 8674, "loss": 0.49084293842315674, "lr": 1.0041938613940108e-06, "epoch": 1.0477288448236108, "percentage": 52.39, "elapsed_time": "3:06:02", "remaining_time": "2:49:05"} +{"current_steps": 4545, "total_steps": 8674, "loss": 0.5144428014755249, "lr": 1.003812603206986e-06, "epoch": 1.0479594189531936, "percentage": 52.4, "elapsed_time": "3:06:04", "remaining_time": "2:49:02"} +{"current_steps": 4546, "total_steps": 8674, "loss": 0.4480917155742645, "lr": 1.0034313444657605e-06, "epoch": 1.048189993082776, "percentage": 52.41, "elapsed_time": "3:06:07", "remaining_time": "2:49:00"} +{"current_steps": 4547, "total_steps": 8674, "loss": 0.4505491852760315, "lr": 1.0030500852257545e-06, "epoch": 1.0484205672123588, "percentage": 52.42, "elapsed_time": "3:06:09", "remaining_time": "2:48:57"} +{"current_steps": 4548, "total_steps": 8674, "loss": 0.3344930410385132, "lr": 1.0026688255423876e-06, "epoch": 1.0486511413419415, "percentage": 52.43, "elapsed_time": "3:06:11", "remaining_time": "2:48:55"} +{"current_steps": 4549, "total_steps": 8674, "loss": 0.4006739854812622, "lr": 1.0022875654710801e-06, "epoch": 1.0488817154715242, "percentage": 52.44, "elapsed_time": "3:06:14", "remaining_time": "2:48:52"} +{"current_steps": 4550, "total_steps": 8674, "loss": 0.4815717935562134, "lr": 1.0019063050672517e-06, "epoch": 1.0491122896011067, "percentage": 52.46, "elapsed_time": "3:06:16", "remaining_time": "2:48:50"} +{"current_steps": 4551, "total_steps": 8674, "loss": 0.4660469889640808, "lr": 1.0015250443863223e-06, "epoch": 1.0493428637306894, "percentage": 52.47, "elapsed_time": "3:06:19", "remaining_time": "2:48:47"} +{"current_steps": 4552, "total_steps": 8674, "loss": 0.5233521461486816, "lr": 1.0011437834837125e-06, "epoch": 1.0495734378602721, "percentage": 52.48, "elapsed_time": "3:06:21", "remaining_time": "2:48:45"} +{"current_steps": 4553, "total_steps": 8674, "loss": 0.6037864685058594, "lr": 1.0007625224148418e-06, "epoch": 1.0498040119898548, "percentage": 52.49, "elapsed_time": "3:06:24", "remaining_time": "2:48:43"} +{"current_steps": 4554, "total_steps": 8674, "loss": 0.469952255487442, "lr": 1.000381261235131e-06, "epoch": 1.0500345861194373, "percentage": 52.5, "elapsed_time": "3:06:26", "remaining_time": "2:48:40"} +{"current_steps": 4555, "total_steps": 8674, "loss": 0.4577752649784088, "lr": 1e-06, "epoch": 1.05026516024902, "percentage": 52.51, "elapsed_time": "3:06:29", "remaining_time": "2:48:38"} +{"current_steps": 4556, "total_steps": 8674, "loss": 0.46796074509620667, "lr": 9.996187387648692e-07, "epoch": 1.0504957343786028, "percentage": 52.52, "elapsed_time": "3:06:31", "remaining_time": "2:48:35"} +{"current_steps": 4557, "total_steps": 8674, "loss": 0.40709036588668823, "lr": 9.992374775851583e-07, "epoch": 1.0507263085081853, "percentage": 52.54, "elapsed_time": "3:06:33", "remaining_time": "2:48:33"} +{"current_steps": 4558, "total_steps": 8674, "loss": 0.3997795879840851, "lr": 9.988562165162878e-07, "epoch": 1.050956882637768, "percentage": 52.55, "elapsed_time": "3:06:36", "remaining_time": "2:48:30"} +{"current_steps": 4559, "total_steps": 8674, "loss": 0.4677845239639282, "lr": 9.984749556136779e-07, "epoch": 1.0511874567673507, "percentage": 52.56, "elapsed_time": "3:06:38", "remaining_time": "2:48:28"} +{"current_steps": 4560, "total_steps": 8674, "loss": 0.40411800146102905, "lr": 9.980936949327487e-07, "epoch": 1.0514180308969334, "percentage": 52.57, "elapsed_time": "3:06:41", "remaining_time": "2:48:25"} +{"current_steps": 4561, "total_steps": 8674, "loss": 0.50546795129776, "lr": 9.9771243452892e-07, "epoch": 1.0516486050265161, "percentage": 52.58, "elapsed_time": "3:06:44", "remaining_time": "2:48:23"} +{"current_steps": 4562, "total_steps": 8674, "loss": 0.4116637110710144, "lr": 9.973311744576125e-07, "epoch": 1.0518791791560986, "percentage": 52.59, "elapsed_time": "3:06:46", "remaining_time": "2:48:21"} +{"current_steps": 4563, "total_steps": 8674, "loss": 0.4271109700202942, "lr": 9.969499147742454e-07, "epoch": 1.0521097532856813, "percentage": 52.61, "elapsed_time": "3:06:48", "remaining_time": "2:48:18"} +{"current_steps": 4564, "total_steps": 8674, "loss": 0.37195074558258057, "lr": 9.965686555342396e-07, "epoch": 1.052340327415264, "percentage": 52.62, "elapsed_time": "3:06:51", "remaining_time": "2:48:16"} +{"current_steps": 4565, "total_steps": 8674, "loss": 0.4020707607269287, "lr": 9.96187396793014e-07, "epoch": 1.0525709015448466, "percentage": 52.63, "elapsed_time": "3:06:53", "remaining_time": "2:48:13"} +{"current_steps": 4566, "total_steps": 8674, "loss": 0.4980151951313019, "lr": 9.95806138605989e-07, "epoch": 1.0528014756744293, "percentage": 52.64, "elapsed_time": "3:06:56", "remaining_time": "2:48:11"} +{"current_steps": 4567, "total_steps": 8674, "loss": 0.39553767442703247, "lr": 9.95424881028585e-07, "epoch": 1.053032049804012, "percentage": 52.65, "elapsed_time": "3:06:58", "remaining_time": "2:48:08"} +{"current_steps": 4568, "total_steps": 8674, "loss": 0.47618645429611206, "lr": 9.9504362411622e-07, "epoch": 1.0532626239335947, "percentage": 52.66, "elapsed_time": "3:07:00", "remaining_time": "2:48:06"} +{"current_steps": 4569, "total_steps": 8674, "loss": 0.4613817036151886, "lr": 9.94662367924315e-07, "epoch": 1.0534931980631772, "percentage": 52.67, "elapsed_time": "3:07:03", "remaining_time": "2:48:03"} +{"current_steps": 4570, "total_steps": 8674, "loss": 0.35888034105300903, "lr": 9.942811125082884e-07, "epoch": 1.05372377219276, "percentage": 52.69, "elapsed_time": "3:07:06", "remaining_time": "2:48:01"} +{"current_steps": 4571, "total_steps": 8674, "loss": 0.45810097455978394, "lr": 9.938998579235606e-07, "epoch": 1.0539543463223426, "percentage": 52.7, "elapsed_time": "3:07:08", "remaining_time": "2:47:59"} +{"current_steps": 4572, "total_steps": 8674, "loss": 0.5351384878158569, "lr": 9.935186042255499e-07, "epoch": 1.0541849204519254, "percentage": 52.71, "elapsed_time": "3:07:11", "remaining_time": "2:47:56"} +{"current_steps": 4573, "total_steps": 8674, "loss": 0.5261274576187134, "lr": 9.931373514696759e-07, "epoch": 1.0544154945815079, "percentage": 52.72, "elapsed_time": "3:07:13", "remaining_time": "2:47:54"} +{"current_steps": 4574, "total_steps": 8674, "loss": 0.483295202255249, "lr": 9.927560997113573e-07, "epoch": 1.0546460687110906, "percentage": 52.73, "elapsed_time": "3:07:15", "remaining_time": "2:47:51"} +{"current_steps": 4575, "total_steps": 8674, "loss": 0.5371580719947815, "lr": 9.923748490060132e-07, "epoch": 1.0548766428406733, "percentage": 52.74, "elapsed_time": "3:07:18", "remaining_time": "2:47:48"} +{"current_steps": 4576, "total_steps": 8674, "loss": 0.4863673746585846, "lr": 9.919935994090622e-07, "epoch": 1.055107216970256, "percentage": 52.76, "elapsed_time": "3:07:20", "remaining_time": "2:47:46"} +{"current_steps": 4577, "total_steps": 8674, "loss": 0.47929099202156067, "lr": 9.916123509759224e-07, "epoch": 1.0553377910998385, "percentage": 52.77, "elapsed_time": "3:07:23", "remaining_time": "2:47:43"} +{"current_steps": 4578, "total_steps": 8674, "loss": 0.4687851667404175, "lr": 9.912311037620126e-07, "epoch": 1.0555683652294212, "percentage": 52.78, "elapsed_time": "3:07:25", "remaining_time": "2:47:41"} +{"current_steps": 4579, "total_steps": 8674, "loss": 0.5308720469474792, "lr": 9.908498578227504e-07, "epoch": 1.055798939359004, "percentage": 52.79, "elapsed_time": "3:07:27", "remaining_time": "2:47:39"} +{"current_steps": 4580, "total_steps": 8674, "loss": 0.45900580286979675, "lr": 9.904686132135546e-07, "epoch": 1.0560295134885866, "percentage": 52.8, "elapsed_time": "3:07:30", "remaining_time": "2:47:36"} +{"current_steps": 4581, "total_steps": 8674, "loss": 0.49392157793045044, "lr": 9.900873699898422e-07, "epoch": 1.0562600876181691, "percentage": 52.81, "elapsed_time": "3:07:32", "remaining_time": "2:47:34"} +{"current_steps": 4582, "total_steps": 8674, "loss": 0.47190070152282715, "lr": 9.89706128207031e-07, "epoch": 1.0564906617477519, "percentage": 52.82, "elapsed_time": "3:07:35", "remaining_time": "2:47:31"} +{"current_steps": 4583, "total_steps": 8674, "loss": 0.4431575834751129, "lr": 9.893248879205382e-07, "epoch": 1.0567212358773346, "percentage": 52.84, "elapsed_time": "3:07:37", "remaining_time": "2:47:29"} +{"current_steps": 4584, "total_steps": 8674, "loss": 0.49873441457748413, "lr": 9.889436491857814e-07, "epoch": 1.0569518100069173, "percentage": 52.85, "elapsed_time": "3:07:40", "remaining_time": "2:47:26"} +{"current_steps": 4585, "total_steps": 8674, "loss": 0.41190844774246216, "lr": 9.885624120581772e-07, "epoch": 1.0571823841364998, "percentage": 52.86, "elapsed_time": "3:07:42", "remaining_time": "2:47:24"} +{"current_steps": 4586, "total_steps": 8674, "loss": 0.5164123773574829, "lr": 9.881811765931423e-07, "epoch": 1.0574129582660825, "percentage": 52.87, "elapsed_time": "3:07:45", "remaining_time": "2:47:21"} +{"current_steps": 4587, "total_steps": 8674, "loss": 0.4141567349433899, "lr": 9.877999428460933e-07, "epoch": 1.0576435323956652, "percentage": 52.88, "elapsed_time": "3:07:47", "remaining_time": "2:47:19"} +{"current_steps": 4588, "total_steps": 8674, "loss": 0.457628458738327, "lr": 9.87418710872446e-07, "epoch": 1.057874106525248, "percentage": 52.89, "elapsed_time": "3:07:49", "remaining_time": "2:47:16"} +{"current_steps": 4589, "total_steps": 8674, "loss": 0.41788995265960693, "lr": 9.870374807276168e-07, "epoch": 1.0581046806548304, "percentage": 52.91, "elapsed_time": "3:07:52", "remaining_time": "2:47:14"} +{"current_steps": 4590, "total_steps": 8674, "loss": 0.5124667882919312, "lr": 9.866562524670209e-07, "epoch": 1.0583352547844131, "percentage": 52.92, "elapsed_time": "3:07:54", "remaining_time": "2:47:11"} +{"current_steps": 4591, "total_steps": 8674, "loss": 0.4192196726799011, "lr": 9.862750261460742e-07, "epoch": 1.0585658289139959, "percentage": 52.93, "elapsed_time": "3:07:57", "remaining_time": "2:47:09"} +{"current_steps": 4592, "total_steps": 8674, "loss": 0.4345153868198395, "lr": 9.858938018201913e-07, "epoch": 1.0587964030435786, "percentage": 52.94, "elapsed_time": "3:07:59", "remaining_time": "2:47:06"} +{"current_steps": 4593, "total_steps": 8674, "loss": 0.391804963350296, "lr": 9.855125795447874e-07, "epoch": 1.059026977173161, "percentage": 52.95, "elapsed_time": "3:08:02", "remaining_time": "2:47:04"} +{"current_steps": 4594, "total_steps": 8674, "loss": 0.3904710114002228, "lr": 9.851313593752767e-07, "epoch": 1.0592575513027438, "percentage": 52.96, "elapsed_time": "3:08:04", "remaining_time": "2:47:02"} +{"current_steps": 4595, "total_steps": 8674, "loss": 0.37314411997795105, "lr": 9.847501413670742e-07, "epoch": 1.0594881254323265, "percentage": 52.97, "elapsed_time": "3:08:07", "remaining_time": "2:46:59"} +{"current_steps": 4596, "total_steps": 8674, "loss": 0.5402779579162598, "lr": 9.843689255755926e-07, "epoch": 1.0597186995619092, "percentage": 52.99, "elapsed_time": "3:08:09", "remaining_time": "2:46:57"} +{"current_steps": 4597, "total_steps": 8674, "loss": 0.4243565797805786, "lr": 9.839877120562463e-07, "epoch": 1.0599492736914917, "percentage": 53.0, "elapsed_time": "3:08:11", "remaining_time": "2:46:54"} +{"current_steps": 4598, "total_steps": 8674, "loss": 0.4504585564136505, "lr": 9.836065008644484e-07, "epoch": 1.0601798478210744, "percentage": 53.01, "elapsed_time": "3:08:14", "remaining_time": "2:46:51"} +{"current_steps": 4599, "total_steps": 8674, "loss": 0.46487870812416077, "lr": 9.832252920556115e-07, "epoch": 1.0604104219506572, "percentage": 53.02, "elapsed_time": "3:08:16", "remaining_time": "2:46:49"} +{"current_steps": 4600, "total_steps": 8674, "loss": 0.470059871673584, "lr": 9.828440856851487e-07, "epoch": 1.0606409960802399, "percentage": 53.03, "elapsed_time": "3:08:19", "remaining_time": "2:46:46"} +{"current_steps": 4601, "total_steps": 8674, "loss": 0.4307391047477722, "lr": 9.824628818084716e-07, "epoch": 1.0608715702098224, "percentage": 53.04, "elapsed_time": "3:08:22", "remaining_time": "2:46:45"} +{"current_steps": 4602, "total_steps": 8674, "loss": 0.49449142813682556, "lr": 9.820816804809927e-07, "epoch": 1.061102144339405, "percentage": 53.06, "elapsed_time": "3:08:25", "remaining_time": "2:46:43"} +{"current_steps": 4603, "total_steps": 8674, "loss": 0.4932701885700226, "lr": 9.817004817581229e-07, "epoch": 1.0613327184689878, "percentage": 53.07, "elapsed_time": "3:08:27", "remaining_time": "2:46:40"} +{"current_steps": 4604, "total_steps": 8674, "loss": 0.49543553590774536, "lr": 9.813192856952739e-07, "epoch": 1.0615632925985705, "percentage": 53.08, "elapsed_time": "3:08:30", "remaining_time": "2:46:38"} +{"current_steps": 4605, "total_steps": 8674, "loss": 0.3906818926334381, "lr": 9.809380923478554e-07, "epoch": 1.061793866728153, "percentage": 53.09, "elapsed_time": "3:08:32", "remaining_time": "2:46:36"} +{"current_steps": 4606, "total_steps": 8674, "loss": 0.41667112708091736, "lr": 9.80556901771279e-07, "epoch": 1.0620244408577357, "percentage": 53.1, "elapsed_time": "3:08:35", "remaining_time": "2:46:33"} +{"current_steps": 4607, "total_steps": 8674, "loss": 0.36195361614227295, "lr": 9.801757140209538e-07, "epoch": 1.0622550149873184, "percentage": 53.11, "elapsed_time": "3:08:37", "remaining_time": "2:46:30"} +{"current_steps": 4608, "total_steps": 8674, "loss": 0.4056081175804138, "lr": 9.797945291522892e-07, "epoch": 1.0624855891169012, "percentage": 53.12, "elapsed_time": "3:08:39", "remaining_time": "2:46:28"} +{"current_steps": 4609, "total_steps": 8674, "loss": 0.5048736929893494, "lr": 9.794133472206948e-07, "epoch": 1.0627161632464837, "percentage": 53.14, "elapsed_time": "3:08:42", "remaining_time": "2:46:25"} +{"current_steps": 4610, "total_steps": 8674, "loss": 0.4846169352531433, "lr": 9.790321682815788e-07, "epoch": 1.0629467373760664, "percentage": 53.15, "elapsed_time": "3:08:45", "remaining_time": "2:46:23"} +{"current_steps": 4611, "total_steps": 8674, "loss": 0.5149316787719727, "lr": 9.7865099239035e-07, "epoch": 1.063177311505649, "percentage": 53.16, "elapsed_time": "3:08:47", "remaining_time": "2:46:21"} +{"current_steps": 4612, "total_steps": 8674, "loss": 0.3816874623298645, "lr": 9.782698196024155e-07, "epoch": 1.0634078856352318, "percentage": 53.17, "elapsed_time": "3:08:50", "remaining_time": "2:46:19"} +{"current_steps": 4613, "total_steps": 8674, "loss": 0.5469645261764526, "lr": 9.77888649973183e-07, "epoch": 1.0636384597648143, "percentage": 53.18, "elapsed_time": "3:08:52", "remaining_time": "2:46:16"} +{"current_steps": 4614, "total_steps": 8674, "loss": 0.42796647548675537, "lr": 9.775074835580593e-07, "epoch": 1.063869033894397, "percentage": 53.19, "elapsed_time": "3:08:55", "remaining_time": "2:46:14"} +{"current_steps": 4615, "total_steps": 8674, "loss": 0.4931715726852417, "lr": 9.771263204124512e-07, "epoch": 1.0640996080239797, "percentage": 53.2, "elapsed_time": "3:08:57", "remaining_time": "2:46:11"} +{"current_steps": 4616, "total_steps": 8674, "loss": 0.5435268878936768, "lr": 9.767451605917641e-07, "epoch": 1.0643301821535625, "percentage": 53.22, "elapsed_time": "3:08:59", "remaining_time": "2:46:09"} +{"current_steps": 4617, "total_steps": 8674, "loss": 0.46361953020095825, "lr": 9.763640041514033e-07, "epoch": 1.064560756283145, "percentage": 53.23, "elapsed_time": "3:09:02", "remaining_time": "2:46:06"} +{"current_steps": 4618, "total_steps": 8674, "loss": 0.3742775619029999, "lr": 9.759828511467743e-07, "epoch": 1.0647913304127277, "percentage": 53.24, "elapsed_time": "3:09:04", "remaining_time": "2:46:04"} +{"current_steps": 4619, "total_steps": 8674, "loss": 0.4060659408569336, "lr": 9.75601701633281e-07, "epoch": 1.0650219045423104, "percentage": 53.25, "elapsed_time": "3:09:07", "remaining_time": "2:46:01"} +{"current_steps": 4620, "total_steps": 8674, "loss": 0.45316505432128906, "lr": 9.75220555666328e-07, "epoch": 1.065252478671893, "percentage": 53.26, "elapsed_time": "3:09:09", "remaining_time": "2:45:59"} +{"current_steps": 4621, "total_steps": 8674, "loss": 0.4548850655555725, "lr": 9.748394133013179e-07, "epoch": 1.0654830528014756, "percentage": 53.27, "elapsed_time": "3:09:12", "remaining_time": "2:45:56"} +{"current_steps": 4622, "total_steps": 8674, "loss": 0.5065705180168152, "lr": 9.744582745936547e-07, "epoch": 1.0657136269310583, "percentage": 53.29, "elapsed_time": "3:09:14", "remaining_time": "2:45:54"} +{"current_steps": 4623, "total_steps": 8674, "loss": 0.4114503860473633, "lr": 9.740771395987395e-07, "epoch": 1.065944201060641, "percentage": 53.3, "elapsed_time": "3:09:16", "remaining_time": "2:45:51"} +{"current_steps": 4624, "total_steps": 8674, "loss": 0.4568501114845276, "lr": 9.736960083719752e-07, "epoch": 1.0661747751902237, "percentage": 53.31, "elapsed_time": "3:09:19", "remaining_time": "2:45:49"} +{"current_steps": 4625, "total_steps": 8674, "loss": 0.49967026710510254, "lr": 9.733148809687624e-07, "epoch": 1.0664053493198062, "percentage": 53.32, "elapsed_time": "3:09:21", "remaining_time": "2:45:46"} +{"current_steps": 4626, "total_steps": 8674, "loss": 0.529681384563446, "lr": 9.729337574445025e-07, "epoch": 1.066635923449389, "percentage": 53.33, "elapsed_time": "3:09:24", "remaining_time": "2:45:44"} +{"current_steps": 4627, "total_steps": 8674, "loss": 0.4819791316986084, "lr": 9.72552637854595e-07, "epoch": 1.0668664975789717, "percentage": 53.34, "elapsed_time": "3:09:26", "remaining_time": "2:45:41"} +{"current_steps": 4628, "total_steps": 8674, "loss": 0.4186001718044281, "lr": 9.721715222544396e-07, "epoch": 1.0670970717085544, "percentage": 53.35, "elapsed_time": "3:09:29", "remaining_time": "2:45:39"} +{"current_steps": 4629, "total_steps": 8674, "loss": 0.4442529082298279, "lr": 9.717904106994359e-07, "epoch": 1.0673276458381369, "percentage": 53.37, "elapsed_time": "3:09:31", "remaining_time": "2:45:36"} +{"current_steps": 4630, "total_steps": 8674, "loss": 0.4655953049659729, "lr": 9.714093032449815e-07, "epoch": 1.0675582199677196, "percentage": 53.38, "elapsed_time": "3:09:33", "remaining_time": "2:45:34"} +{"current_steps": 4631, "total_steps": 8674, "loss": 0.45248714089393616, "lr": 9.71028199946475e-07, "epoch": 1.0677887940973023, "percentage": 53.39, "elapsed_time": "3:09:36", "remaining_time": "2:45:31"} +{"current_steps": 4632, "total_steps": 8674, "loss": 0.4244336485862732, "lr": 9.706471008593128e-07, "epoch": 1.068019368226885, "percentage": 53.4, "elapsed_time": "3:09:38", "remaining_time": "2:45:29"} +{"current_steps": 4633, "total_steps": 8674, "loss": 0.4396495819091797, "lr": 9.702660060388923e-07, "epoch": 1.0682499423564675, "percentage": 53.41, "elapsed_time": "3:09:41", "remaining_time": "2:45:26"} +{"current_steps": 4634, "total_steps": 8674, "loss": 0.4504232406616211, "lr": 9.698849155406089e-07, "epoch": 1.0684805164860502, "percentage": 53.42, "elapsed_time": "3:09:43", "remaining_time": "2:45:24"} +{"current_steps": 4635, "total_steps": 8674, "loss": 0.40112000703811646, "lr": 9.695038294198588e-07, "epoch": 1.068711090615633, "percentage": 53.44, "elapsed_time": "3:09:45", "remaining_time": "2:45:21"} +{"current_steps": 4636, "total_steps": 8674, "loss": 0.4511067271232605, "lr": 9.691227477320357e-07, "epoch": 1.0689416647452157, "percentage": 53.45, "elapsed_time": "3:09:48", "remaining_time": "2:45:19"} +{"current_steps": 4637, "total_steps": 8674, "loss": 0.44541406631469727, "lr": 9.687416705325342e-07, "epoch": 1.0691722388747982, "percentage": 53.46, "elapsed_time": "3:09:50", "remaining_time": "2:45:16"} +{"current_steps": 4638, "total_steps": 8674, "loss": 0.5038847327232361, "lr": 9.68360597876748e-07, "epoch": 1.069402813004381, "percentage": 53.47, "elapsed_time": "3:09:53", "remaining_time": "2:45:14"} +{"current_steps": 4639, "total_steps": 8674, "loss": 0.41960060596466064, "lr": 9.67979529820069e-07, "epoch": 1.0696333871339636, "percentage": 53.48, "elapsed_time": "3:09:55", "remaining_time": "2:45:11"} +{"current_steps": 4640, "total_steps": 8674, "loss": 0.49760064482688904, "lr": 9.6759846641789e-07, "epoch": 1.0698639612635463, "percentage": 53.49, "elapsed_time": "3:09:58", "remaining_time": "2:45:09"} +{"current_steps": 4641, "total_steps": 8674, "loss": 0.46513333916664124, "lr": 9.672174077256023e-07, "epoch": 1.0700945353931288, "percentage": 53.5, "elapsed_time": "3:10:00", "remaining_time": "2:45:06"} +{"current_steps": 4642, "total_steps": 8674, "loss": 0.41129356622695923, "lr": 9.66836353798597e-07, "epoch": 1.0703251095227115, "percentage": 53.52, "elapsed_time": "3:10:02", "remaining_time": "2:45:04"} +{"current_steps": 4643, "total_steps": 8674, "loss": 0.5021853446960449, "lr": 9.664553046922634e-07, "epoch": 1.0705556836522943, "percentage": 53.53, "elapsed_time": "3:10:05", "remaining_time": "2:45:01"} +{"current_steps": 4644, "total_steps": 8674, "loss": 0.5184302926063538, "lr": 9.660742604619912e-07, "epoch": 1.070786257781877, "percentage": 53.54, "elapsed_time": "3:10:07", "remaining_time": "2:44:59"} +{"current_steps": 4645, "total_steps": 8674, "loss": 0.4793940484523773, "lr": 9.65693221163169e-07, "epoch": 1.0710168319114595, "percentage": 53.55, "elapsed_time": "3:10:10", "remaining_time": "2:44:56"} +{"current_steps": 4646, "total_steps": 8674, "loss": 0.43454456329345703, "lr": 9.653121868511854e-07, "epoch": 1.0712474060410422, "percentage": 53.56, "elapsed_time": "3:10:12", "remaining_time": "2:44:54"} +{"current_steps": 4647, "total_steps": 8674, "loss": 0.49123185873031616, "lr": 9.649311575814266e-07, "epoch": 1.071477980170625, "percentage": 53.57, "elapsed_time": "3:10:14", "remaining_time": "2:44:51"} +{"current_steps": 4648, "total_steps": 8674, "loss": 0.37020617723464966, "lr": 9.645501334092792e-07, "epoch": 1.0717085543002076, "percentage": 53.59, "elapsed_time": "3:10:17", "remaining_time": "2:44:49"} +{"current_steps": 4649, "total_steps": 8674, "loss": 0.461778849363327, "lr": 9.641691143901296e-07, "epoch": 1.0719391284297901, "percentage": 53.6, "elapsed_time": "3:10:19", "remaining_time": "2:44:46"} +{"current_steps": 4650, "total_steps": 8674, "loss": 0.46640273928642273, "lr": 9.63788100579362e-07, "epoch": 1.0721697025593728, "percentage": 53.61, "elapsed_time": "3:10:22", "remaining_time": "2:44:44"} +{"current_steps": 4651, "total_steps": 8674, "loss": 0.44978517293930054, "lr": 9.634070920323614e-07, "epoch": 1.0724002766889555, "percentage": 53.62, "elapsed_time": "3:10:24", "remaining_time": "2:44:41"} +{"current_steps": 4652, "total_steps": 8674, "loss": 0.5070945024490356, "lr": 9.630260888045103e-07, "epoch": 1.0726308508185383, "percentage": 53.63, "elapsed_time": "3:10:27", "remaining_time": "2:44:39"} +{"current_steps": 4653, "total_steps": 8674, "loss": 0.4513545334339142, "lr": 9.626450909511926e-07, "epoch": 1.0728614249481208, "percentage": 53.64, "elapsed_time": "3:10:29", "remaining_time": "2:44:37"} +{"current_steps": 4654, "total_steps": 8674, "loss": 0.4430030584335327, "lr": 9.622640985277889e-07, "epoch": 1.0730919990777035, "percentage": 53.65, "elapsed_time": "3:10:32", "remaining_time": "2:44:34"} +{"current_steps": 4655, "total_steps": 8674, "loss": 0.45619165897369385, "lr": 9.618831115896814e-07, "epoch": 1.0733225732072862, "percentage": 53.67, "elapsed_time": "3:10:34", "remaining_time": "2:44:32"} +{"current_steps": 4656, "total_steps": 8674, "loss": 0.411594033241272, "lr": 9.615021301922497e-07, "epoch": 1.073553147336869, "percentage": 53.68, "elapsed_time": "3:10:36", "remaining_time": "2:44:29"} +{"current_steps": 4657, "total_steps": 8674, "loss": 0.5230164527893066, "lr": 9.611211543908732e-07, "epoch": 1.0737837214664514, "percentage": 53.69, "elapsed_time": "3:10:39", "remaining_time": "2:44:27"} +{"current_steps": 4658, "total_steps": 8674, "loss": 0.45379406213760376, "lr": 9.607401842409316e-07, "epoch": 1.0740142955960341, "percentage": 53.7, "elapsed_time": "3:10:41", "remaining_time": "2:44:24"} +{"current_steps": 4659, "total_steps": 8674, "loss": 0.47254839539527893, "lr": 9.603592197978016e-07, "epoch": 1.0742448697256168, "percentage": 53.71, "elapsed_time": "3:10:44", "remaining_time": "2:44:22"} +{"current_steps": 4660, "total_steps": 8674, "loss": 0.3990492820739746, "lr": 9.59978261116861e-07, "epoch": 1.0744754438551993, "percentage": 53.72, "elapsed_time": "3:10:46", "remaining_time": "2:44:19"} +{"current_steps": 4661, "total_steps": 8674, "loss": 0.41671720147132874, "lr": 9.595973082534855e-07, "epoch": 1.074706017984782, "percentage": 53.74, "elapsed_time": "3:10:49", "remaining_time": "2:44:17"} +{"current_steps": 4662, "total_steps": 8674, "loss": 0.4269324839115143, "lr": 9.59216361263051e-07, "epoch": 1.0749365921143648, "percentage": 53.75, "elapsed_time": "3:10:51", "remaining_time": "2:44:15"} +{"current_steps": 4663, "total_steps": 8674, "loss": 0.42989516258239746, "lr": 9.588354202009314e-07, "epoch": 1.0751671662439475, "percentage": 53.76, "elapsed_time": "3:10:54", "remaining_time": "2:44:12"} +{"current_steps": 4664, "total_steps": 8674, "loss": 0.5224605798721313, "lr": 9.584544851225008e-07, "epoch": 1.0753977403735302, "percentage": 53.77, "elapsed_time": "3:10:56", "remaining_time": "2:44:10"} +{"current_steps": 4665, "total_steps": 8674, "loss": 0.3853871524333954, "lr": 9.580735560831318e-07, "epoch": 1.0756283145031127, "percentage": 53.78, "elapsed_time": "3:10:59", "remaining_time": "2:44:07"} +{"current_steps": 4666, "total_steps": 8674, "loss": 0.4460698366165161, "lr": 9.576926331381968e-07, "epoch": 1.0758588886326954, "percentage": 53.79, "elapsed_time": "3:11:01", "remaining_time": "2:44:05"} +{"current_steps": 4667, "total_steps": 8674, "loss": 0.45617812871932983, "lr": 9.57311716343066e-07, "epoch": 1.0760894627622781, "percentage": 53.8, "elapsed_time": "3:11:04", "remaining_time": "2:44:02"} +{"current_steps": 4668, "total_steps": 8674, "loss": 0.5631355047225952, "lr": 9.569308057531096e-07, "epoch": 1.0763200368918606, "percentage": 53.82, "elapsed_time": "3:11:06", "remaining_time": "2:44:00"} +{"current_steps": 4669, "total_steps": 8674, "loss": 0.4197179973125458, "lr": 9.565499014236977e-07, "epoch": 1.0765506110214433, "percentage": 53.83, "elapsed_time": "3:11:09", "remaining_time": "2:43:57"} +{"current_steps": 4670, "total_steps": 8674, "loss": 0.4262646436691284, "lr": 9.561690034101973e-07, "epoch": 1.076781185151026, "percentage": 53.84, "elapsed_time": "3:11:11", "remaining_time": "2:43:55"} +{"current_steps": 4671, "total_steps": 8674, "loss": 0.42719966173171997, "lr": 9.557881117679768e-07, "epoch": 1.0770117592806088, "percentage": 53.85, "elapsed_time": "3:11:14", "remaining_time": "2:43:53"} +{"current_steps": 4672, "total_steps": 8674, "loss": 0.4278491735458374, "lr": 9.554072265524022e-07, "epoch": 1.0772423334101915, "percentage": 53.86, "elapsed_time": "3:11:17", "remaining_time": "2:43:51"} +{"current_steps": 4673, "total_steps": 8674, "loss": 0.3915478587150574, "lr": 9.550263478188396e-07, "epoch": 1.077472907539774, "percentage": 53.87, "elapsed_time": "3:11:19", "remaining_time": "2:43:48"} +{"current_steps": 4674, "total_steps": 8674, "loss": 0.4391477704048157, "lr": 9.546454756226525e-07, "epoch": 1.0777034816693567, "percentage": 53.89, "elapsed_time": "3:11:22", "remaining_time": "2:43:46"} +{"current_steps": 4675, "total_steps": 8674, "loss": 0.47325795888900757, "lr": 9.542646100192055e-07, "epoch": 1.0779340557989394, "percentage": 53.9, "elapsed_time": "3:11:24", "remaining_time": "2:43:43"} +{"current_steps": 4676, "total_steps": 8674, "loss": 0.4698373079299927, "lr": 9.538837510638607e-07, "epoch": 1.078164629928522, "percentage": 53.91, "elapsed_time": "3:11:26", "remaining_time": "2:43:41"} +{"current_steps": 4677, "total_steps": 8674, "loss": 0.4252272844314575, "lr": 9.535028988119805e-07, "epoch": 1.0783952040581046, "percentage": 53.92, "elapsed_time": "3:11:29", "remaining_time": "2:43:38"} +{"current_steps": 4678, "total_steps": 8674, "loss": 0.46726179122924805, "lr": 9.531220533189253e-07, "epoch": 1.0786257781876873, "percentage": 53.93, "elapsed_time": "3:11:31", "remaining_time": "2:43:36"} +{"current_steps": 4679, "total_steps": 8674, "loss": 0.46616411209106445, "lr": 9.527412146400542e-07, "epoch": 1.07885635231727, "percentage": 53.94, "elapsed_time": "3:11:34", "remaining_time": "2:43:34"} +{"current_steps": 4680, "total_steps": 8674, "loss": 0.5607181787490845, "lr": 9.523603828307268e-07, "epoch": 1.0790869264468528, "percentage": 53.95, "elapsed_time": "3:11:36", "remaining_time": "2:43:31"} +{"current_steps": 4681, "total_steps": 8674, "loss": 0.5039520859718323, "lr": 9.519795579463002e-07, "epoch": 1.0793175005764353, "percentage": 53.97, "elapsed_time": "3:11:39", "remaining_time": "2:43:29"} +{"current_steps": 4682, "total_steps": 8674, "loss": 0.45532113313674927, "lr": 9.515987400421322e-07, "epoch": 1.079548074706018, "percentage": 53.98, "elapsed_time": "3:11:41", "remaining_time": "2:43:26"} +{"current_steps": 4683, "total_steps": 8674, "loss": 0.4198398292064667, "lr": 9.512179291735772e-07, "epoch": 1.0797786488356007, "percentage": 53.99, "elapsed_time": "3:11:44", "remaining_time": "2:43:24"} +{"current_steps": 4684, "total_steps": 8674, "loss": 0.371380090713501, "lr": 9.508371253959909e-07, "epoch": 1.0800092229651832, "percentage": 54.0, "elapsed_time": "3:11:46", "remaining_time": "2:43:21"} +{"current_steps": 4685, "total_steps": 8674, "loss": 0.44341978430747986, "lr": 9.504563287647265e-07, "epoch": 1.080239797094766, "percentage": 54.01, "elapsed_time": "3:11:48", "remaining_time": "2:43:19"} +{"current_steps": 4686, "total_steps": 8674, "loss": 0.4184574484825134, "lr": 9.500755393351372e-07, "epoch": 1.0804703712243486, "percentage": 54.02, "elapsed_time": "3:11:51", "remaining_time": "2:43:16"} +{"current_steps": 4687, "total_steps": 8674, "loss": 0.5584033727645874, "lr": 9.496947571625739e-07, "epoch": 1.0807009453539314, "percentage": 54.04, "elapsed_time": "3:11:54", "remaining_time": "2:43:14"} +{"current_steps": 4688, "total_steps": 8674, "loss": 0.44405317306518555, "lr": 9.493139823023874e-07, "epoch": 1.0809315194835138, "percentage": 54.05, "elapsed_time": "3:11:56", "remaining_time": "2:43:11"} +{"current_steps": 4689, "total_steps": 8674, "loss": 0.41137009859085083, "lr": 9.489332148099277e-07, "epoch": 1.0811620936130966, "percentage": 54.06, "elapsed_time": "3:11:58", "remaining_time": "2:43:09"} +{"current_steps": 4690, "total_steps": 8674, "loss": 0.4831092357635498, "lr": 9.485524547405424e-07, "epoch": 1.0813926677426793, "percentage": 54.07, "elapsed_time": "3:12:01", "remaining_time": "2:43:06"} +{"current_steps": 4691, "total_steps": 8674, "loss": 0.41243845224380493, "lr": 9.481717021495793e-07, "epoch": 1.081623241872262, "percentage": 54.08, "elapsed_time": "3:12:03", "remaining_time": "2:43:04"} +{"current_steps": 4692, "total_steps": 8674, "loss": 0.33649003505706787, "lr": 9.477909570923844e-07, "epoch": 1.0818538160018445, "percentage": 54.09, "elapsed_time": "3:12:06", "remaining_time": "2:43:02"} +{"current_steps": 4693, "total_steps": 8674, "loss": 0.4959014654159546, "lr": 9.474102196243033e-07, "epoch": 1.0820843901314272, "percentage": 54.1, "elapsed_time": "3:12:08", "remaining_time": "2:42:59"} +{"current_steps": 4694, "total_steps": 8674, "loss": 0.43924248218536377, "lr": 9.470294898006795e-07, "epoch": 1.08231496426101, "percentage": 54.12, "elapsed_time": "3:12:11", "remaining_time": "2:42:57"} +{"current_steps": 4695, "total_steps": 8674, "loss": 0.4777243137359619, "lr": 9.466487676768563e-07, "epoch": 1.0825455383905926, "percentage": 54.13, "elapsed_time": "3:12:13", "remaining_time": "2:42:54"} +{"current_steps": 4696, "total_steps": 8674, "loss": 0.4488077759742737, "lr": 9.462680533081752e-07, "epoch": 1.0827761125201751, "percentage": 54.14, "elapsed_time": "3:12:16", "remaining_time": "2:42:52"} +{"current_steps": 4697, "total_steps": 8674, "loss": 0.5058270692825317, "lr": 9.458873467499778e-07, "epoch": 1.0830066866497579, "percentage": 54.15, "elapsed_time": "3:12:18", "remaining_time": "2:42:49"} +{"current_steps": 4698, "total_steps": 8674, "loss": 0.4537619650363922, "lr": 9.455066480576025e-07, "epoch": 1.0832372607793406, "percentage": 54.16, "elapsed_time": "3:12:21", "remaining_time": "2:42:47"} +{"current_steps": 4699, "total_steps": 8674, "loss": 0.4725874960422516, "lr": 9.45125957286388e-07, "epoch": 1.0834678349089233, "percentage": 54.17, "elapsed_time": "3:12:23", "remaining_time": "2:42:44"} +{"current_steps": 4700, "total_steps": 8674, "loss": 0.4967196583747864, "lr": 9.447452744916722e-07, "epoch": 1.0836984090385058, "percentage": 54.18, "elapsed_time": "3:12:26", "remaining_time": "2:42:42"} +{"current_steps": 4701, "total_steps": 8674, "loss": 0.43682345747947693, "lr": 9.443645997287902e-07, "epoch": 1.0839289831680885, "percentage": 54.2, "elapsed_time": "3:12:29", "remaining_time": "2:42:41"} +{"current_steps": 4702, "total_steps": 8674, "loss": 0.48844271898269653, "lr": 9.439839330530781e-07, "epoch": 1.0841595572976712, "percentage": 54.21, "elapsed_time": "3:12:32", "remaining_time": "2:42:38"} +{"current_steps": 4703, "total_steps": 8674, "loss": 0.43654918670654297, "lr": 9.436032745198682e-07, "epoch": 1.084390131427254, "percentage": 54.22, "elapsed_time": "3:12:34", "remaining_time": "2:42:36"} +{"current_steps": 4704, "total_steps": 8674, "loss": 0.5034382939338684, "lr": 9.432226241844947e-07, "epoch": 1.0846207055568364, "percentage": 54.23, "elapsed_time": "3:12:37", "remaining_time": "2:42:33"} +{"current_steps": 4705, "total_steps": 8674, "loss": 0.5407527089118958, "lr": 9.428419821022877e-07, "epoch": 1.0848512796864191, "percentage": 54.24, "elapsed_time": "3:12:39", "remaining_time": "2:42:31"} +{"current_steps": 4706, "total_steps": 8674, "loss": 0.4372078478336334, "lr": 9.424613483285783e-07, "epoch": 1.0850818538160019, "percentage": 54.25, "elapsed_time": "3:12:42", "remaining_time": "2:42:28"} +{"current_steps": 4707, "total_steps": 8674, "loss": 0.5264855623245239, "lr": 9.420807229186949e-07, "epoch": 1.0853124279455846, "percentage": 54.27, "elapsed_time": "3:12:44", "remaining_time": "2:42:26"} +{"current_steps": 4708, "total_steps": 8674, "loss": 0.3810223937034607, "lr": 9.417001059279652e-07, "epoch": 1.085543002075167, "percentage": 54.28, "elapsed_time": "3:12:46", "remaining_time": "2:42:23"} +{"current_steps": 4709, "total_steps": 8674, "loss": 0.368865430355072, "lr": 9.413194974117163e-07, "epoch": 1.0857735762047498, "percentage": 54.29, "elapsed_time": "3:12:49", "remaining_time": "2:42:21"} +{"current_steps": 4710, "total_steps": 8674, "loss": 0.41845810413360596, "lr": 9.409388974252729e-07, "epoch": 1.0860041503343325, "percentage": 54.3, "elapsed_time": "3:12:51", "remaining_time": "2:42:18"} +{"current_steps": 4711, "total_steps": 8674, "loss": 0.5185590982437134, "lr": 9.405583060239594e-07, "epoch": 1.0862347244639152, "percentage": 54.31, "elapsed_time": "3:12:54", "remaining_time": "2:42:16"} +{"current_steps": 4712, "total_steps": 8674, "loss": 0.4848501682281494, "lr": 9.401777232630983e-07, "epoch": 1.0864652985934977, "percentage": 54.32, "elapsed_time": "3:12:56", "remaining_time": "2:42:14"} +{"current_steps": 4713, "total_steps": 8674, "loss": 0.5581566691398621, "lr": 9.397971491980119e-07, "epoch": 1.0866958727230804, "percentage": 54.33, "elapsed_time": "3:12:59", "remaining_time": "2:42:11"} +{"current_steps": 4714, "total_steps": 8674, "loss": 0.42043447494506836, "lr": 9.394165838840196e-07, "epoch": 1.0869264468526632, "percentage": 54.35, "elapsed_time": "3:13:01", "remaining_time": "2:42:09"} +{"current_steps": 4715, "total_steps": 8674, "loss": 0.45076289772987366, "lr": 9.39036027376441e-07, "epoch": 1.0871570209822459, "percentage": 54.36, "elapsed_time": "3:13:04", "remaining_time": "2:42:06"} +{"current_steps": 4716, "total_steps": 8674, "loss": 0.3650796413421631, "lr": 9.386554797305934e-07, "epoch": 1.0873875951118284, "percentage": 54.37, "elapsed_time": "3:13:06", "remaining_time": "2:42:04"} +{"current_steps": 4717, "total_steps": 8674, "loss": 0.4837912321090698, "lr": 9.38274941001794e-07, "epoch": 1.087618169241411, "percentage": 54.38, "elapsed_time": "3:13:08", "remaining_time": "2:42:01"} +{"current_steps": 4718, "total_steps": 8674, "loss": 0.41277679800987244, "lr": 9.378944112453574e-07, "epoch": 1.0878487433709938, "percentage": 54.39, "elapsed_time": "3:13:11", "remaining_time": "2:41:59"} +{"current_steps": 4719, "total_steps": 8674, "loss": 0.48409390449523926, "lr": 9.375138905165973e-07, "epoch": 1.0880793175005765, "percentage": 54.4, "elapsed_time": "3:13:13", "remaining_time": "2:41:56"} +{"current_steps": 4720, "total_steps": 8674, "loss": 0.3952450752258301, "lr": 9.371333788708268e-07, "epoch": 1.088309891630159, "percentage": 54.42, "elapsed_time": "3:13:16", "remaining_time": "2:41:54"} +{"current_steps": 4721, "total_steps": 8674, "loss": 0.42314866185188293, "lr": 9.367528763633563e-07, "epoch": 1.0885404657597417, "percentage": 54.43, "elapsed_time": "3:13:18", "remaining_time": "2:41:51"} +{"current_steps": 4722, "total_steps": 8674, "loss": 0.5322449207305908, "lr": 9.363723830494966e-07, "epoch": 1.0887710398893244, "percentage": 54.44, "elapsed_time": "3:13:21", "remaining_time": "2:41:49"} +{"current_steps": 4723, "total_steps": 8674, "loss": 0.42307883501052856, "lr": 9.359918989845557e-07, "epoch": 1.0890016140189072, "percentage": 54.45, "elapsed_time": "3:13:23", "remaining_time": "2:41:46"} +{"current_steps": 4724, "total_steps": 8674, "loss": 0.39321061968803406, "lr": 9.356114242238413e-07, "epoch": 1.0892321881484897, "percentage": 54.46, "elapsed_time": "3:13:25", "remaining_time": "2:41:44"} +{"current_steps": 4725, "total_steps": 8674, "loss": 0.5064421892166138, "lr": 9.352309588226585e-07, "epoch": 1.0894627622780724, "percentage": 54.47, "elapsed_time": "3:13:28", "remaining_time": "2:41:41"} +{"current_steps": 4726, "total_steps": 8674, "loss": 0.44825220108032227, "lr": 9.348505028363125e-07, "epoch": 1.089693336407655, "percentage": 54.48, "elapsed_time": "3:13:30", "remaining_time": "2:41:39"} +{"current_steps": 4727, "total_steps": 8674, "loss": 0.4323306679725647, "lr": 9.344700563201065e-07, "epoch": 1.0899239105372378, "percentage": 54.5, "elapsed_time": "3:13:33", "remaining_time": "2:41:36"} +{"current_steps": 4728, "total_steps": 8674, "loss": 0.44907987117767334, "lr": 9.340896193293414e-07, "epoch": 1.0901544846668203, "percentage": 54.51, "elapsed_time": "3:13:35", "remaining_time": "2:41:34"} +{"current_steps": 4729, "total_steps": 8674, "loss": 0.416559636592865, "lr": 9.337091919193185e-07, "epoch": 1.090385058796403, "percentage": 54.52, "elapsed_time": "3:13:38", "remaining_time": "2:41:32"} +{"current_steps": 4730, "total_steps": 8674, "loss": 0.5361836552619934, "lr": 9.33328774145336e-07, "epoch": 1.0906156329259857, "percentage": 54.53, "elapsed_time": "3:13:40", "remaining_time": "2:41:29"} +{"current_steps": 4731, "total_steps": 8674, "loss": 0.4815465211868286, "lr": 9.329483660626922e-07, "epoch": 1.0908462070555685, "percentage": 54.54, "elapsed_time": "3:13:43", "remaining_time": "2:41:27"} +{"current_steps": 4732, "total_steps": 8674, "loss": 0.5205050110816956, "lr": 9.325679677266826e-07, "epoch": 1.091076781185151, "percentage": 54.55, "elapsed_time": "3:13:45", "remaining_time": "2:41:24"} +{"current_steps": 4733, "total_steps": 8674, "loss": 0.4830896258354187, "lr": 9.321875791926028e-07, "epoch": 1.0913073553147337, "percentage": 54.57, "elapsed_time": "3:13:48", "remaining_time": "2:41:22"} +{"current_steps": 4734, "total_steps": 8674, "loss": 0.4394579827785492, "lr": 9.318072005157451e-07, "epoch": 1.0915379294443164, "percentage": 54.58, "elapsed_time": "3:13:50", "remaining_time": "2:41:19"} +{"current_steps": 4735, "total_steps": 8674, "loss": 0.4614049792289734, "lr": 9.314268317514022e-07, "epoch": 1.091768503573899, "percentage": 54.59, "elapsed_time": "3:13:52", "remaining_time": "2:41:17"} +{"current_steps": 4736, "total_steps": 8674, "loss": 0.5123867988586426, "lr": 9.31046472954864e-07, "epoch": 1.0919990777034816, "percentage": 54.6, "elapsed_time": "3:13:55", "remaining_time": "2:41:14"} +{"current_steps": 4737, "total_steps": 8674, "loss": 0.43548035621643066, "lr": 9.306661241814204e-07, "epoch": 1.0922296518330643, "percentage": 54.61, "elapsed_time": "3:13:57", "remaining_time": "2:41:12"} +{"current_steps": 4738, "total_steps": 8674, "loss": 0.4102709889411926, "lr": 9.302857854863579e-07, "epoch": 1.092460225962647, "percentage": 54.62, "elapsed_time": "3:14:00", "remaining_time": "2:41:09"} +{"current_steps": 4739, "total_steps": 8674, "loss": 0.46276605129241943, "lr": 9.299054569249628e-07, "epoch": 1.0926908000922297, "percentage": 54.63, "elapsed_time": "3:14:02", "remaining_time": "2:41:07"} +{"current_steps": 4740, "total_steps": 8674, "loss": 0.47700244188308716, "lr": 9.295251385525204e-07, "epoch": 1.0929213742218122, "percentage": 54.65, "elapsed_time": "3:14:05", "remaining_time": "2:41:04"} +{"current_steps": 4741, "total_steps": 8674, "loss": 0.5492758750915527, "lr": 9.29144830424313e-07, "epoch": 1.093151948351395, "percentage": 54.66, "elapsed_time": "3:14:07", "remaining_time": "2:41:02"} +{"current_steps": 4742, "total_steps": 8674, "loss": 0.3846803307533264, "lr": 9.287645325956228e-07, "epoch": 1.0933825224809777, "percentage": 54.67, "elapsed_time": "3:14:09", "remaining_time": "2:40:59"} +{"current_steps": 4743, "total_steps": 8674, "loss": 0.47237372398376465, "lr": 9.283842451217294e-07, "epoch": 1.0936130966105604, "percentage": 54.68, "elapsed_time": "3:14:12", "remaining_time": "2:40:57"} +{"current_steps": 4744, "total_steps": 8674, "loss": 0.4651675820350647, "lr": 9.280039680579122e-07, "epoch": 1.0938436707401429, "percentage": 54.69, "elapsed_time": "3:14:14", "remaining_time": "2:40:54"} +{"current_steps": 4745, "total_steps": 8674, "loss": 0.5472640991210938, "lr": 9.276237014594476e-07, "epoch": 1.0940742448697256, "percentage": 54.7, "elapsed_time": "3:14:17", "remaining_time": "2:40:52"} +{"current_steps": 4746, "total_steps": 8674, "loss": 0.45672351121902466, "lr": 9.272434453816117e-07, "epoch": 1.0943048189993083, "percentage": 54.72, "elapsed_time": "3:14:19", "remaining_time": "2:40:49"} +{"current_steps": 4747, "total_steps": 8674, "loss": 0.4589729905128479, "lr": 9.268631998796785e-07, "epoch": 1.094535393128891, "percentage": 54.73, "elapsed_time": "3:14:21", "remaining_time": "2:40:47"} +{"current_steps": 4748, "total_steps": 8674, "loss": 0.45882588624954224, "lr": 9.264829650089201e-07, "epoch": 1.0947659672584735, "percentage": 54.74, "elapsed_time": "3:14:24", "remaining_time": "2:40:44"} +{"current_steps": 4749, "total_steps": 8674, "loss": 0.6183863282203674, "lr": 9.26102740824608e-07, "epoch": 1.0949965413880562, "percentage": 54.75, "elapsed_time": "3:14:26", "remaining_time": "2:40:42"} +{"current_steps": 4750, "total_steps": 8674, "loss": 0.4512014389038086, "lr": 9.257225273820112e-07, "epoch": 1.095227115517639, "percentage": 54.76, "elapsed_time": "3:14:28", "remaining_time": "2:40:39"} +{"current_steps": 4751, "total_steps": 8674, "loss": 0.5006139874458313, "lr": 9.253423247363983e-07, "epoch": 1.0954576896472217, "percentage": 54.77, "elapsed_time": "3:14:31", "remaining_time": "2:40:37"} +{"current_steps": 4752, "total_steps": 8674, "loss": 0.5394018888473511, "lr": 9.249621329430346e-07, "epoch": 1.0956882637768042, "percentage": 54.78, "elapsed_time": "3:14:33", "remaining_time": "2:40:34"} +{"current_steps": 4753, "total_steps": 8674, "loss": 0.35523056983947754, "lr": 9.245819520571858e-07, "epoch": 1.095918837906387, "percentage": 54.8, "elapsed_time": "3:14:36", "remaining_time": "2:40:32"} +{"current_steps": 4754, "total_steps": 8674, "loss": 0.44379743933677673, "lr": 9.242017821341143e-07, "epoch": 1.0961494120359696, "percentage": 54.81, "elapsed_time": "3:14:38", "remaining_time": "2:40:29"} +{"current_steps": 4755, "total_steps": 8674, "loss": 0.4190908968448639, "lr": 9.238216232290821e-07, "epoch": 1.0963799861655523, "percentage": 54.82, "elapsed_time": "3:14:40", "remaining_time": "2:40:27"} +{"current_steps": 4756, "total_steps": 8674, "loss": 0.44818970561027527, "lr": 9.234414753973488e-07, "epoch": 1.0966105602951348, "percentage": 54.83, "elapsed_time": "3:14:43", "remaining_time": "2:40:24"} +{"current_steps": 4757, "total_steps": 8674, "loss": 0.4134204685688019, "lr": 9.230613386941734e-07, "epoch": 1.0968411344247175, "percentage": 54.84, "elapsed_time": "3:14:45", "remaining_time": "2:40:22"} +{"current_steps": 4758, "total_steps": 8674, "loss": 0.3554952144622803, "lr": 9.226812131748118e-07, "epoch": 1.0970717085543003, "percentage": 54.85, "elapsed_time": "3:14:48", "remaining_time": "2:40:19"} +{"current_steps": 4759, "total_steps": 8674, "loss": 0.522594690322876, "lr": 9.223010988945194e-07, "epoch": 1.097302282683883, "percentage": 54.87, "elapsed_time": "3:14:50", "remaining_time": "2:40:17"} +{"current_steps": 4760, "total_steps": 8674, "loss": 0.44814133644104004, "lr": 9.219209959085502e-07, "epoch": 1.0975328568134655, "percentage": 54.88, "elapsed_time": "3:14:52", "remaining_time": "2:40:14"} +{"current_steps": 4761, "total_steps": 8674, "loss": 0.42479634284973145, "lr": 9.215409042721551e-07, "epoch": 1.0977634309430482, "percentage": 54.89, "elapsed_time": "3:14:55", "remaining_time": "2:40:12"} +{"current_steps": 4762, "total_steps": 8674, "loss": 0.4384934902191162, "lr": 9.211608240405849e-07, "epoch": 1.097994005072631, "percentage": 54.9, "elapsed_time": "3:14:57", "remaining_time": "2:40:09"} +{"current_steps": 4763, "total_steps": 8674, "loss": 0.5378658771514893, "lr": 9.207807552690878e-07, "epoch": 1.0982245792022136, "percentage": 54.91, "elapsed_time": "3:15:00", "remaining_time": "2:40:07"} +{"current_steps": 4764, "total_steps": 8674, "loss": 0.5071386694908142, "lr": 9.204006980129111e-07, "epoch": 1.098455153331796, "percentage": 54.92, "elapsed_time": "3:15:02", "remaining_time": "2:40:04"} +{"current_steps": 4765, "total_steps": 8674, "loss": 0.46085822582244873, "lr": 9.200206523272992e-07, "epoch": 1.0986857274613788, "percentage": 54.93, "elapsed_time": "3:15:05", "remaining_time": "2:40:02"} +{"current_steps": 4766, "total_steps": 8674, "loss": 0.5083057880401611, "lr": 9.196406182674964e-07, "epoch": 1.0989163015909615, "percentage": 54.95, "elapsed_time": "3:15:07", "remaining_time": "2:40:00"} +{"current_steps": 4767, "total_steps": 8674, "loss": 0.48307740688323975, "lr": 9.192605958887438e-07, "epoch": 1.0991468757205443, "percentage": 54.96, "elapsed_time": "3:15:10", "remaining_time": "2:39:57"} +{"current_steps": 4768, "total_steps": 8674, "loss": 0.5195509791374207, "lr": 9.188805852462824e-07, "epoch": 1.0993774498501268, "percentage": 54.97, "elapsed_time": "3:15:12", "remaining_time": "2:39:54"} +{"current_steps": 4769, "total_steps": 8674, "loss": 0.5161266326904297, "lr": 9.185005863953498e-07, "epoch": 1.0996080239797095, "percentage": 54.98, "elapsed_time": "3:15:14", "remaining_time": "2:39:52"} +{"current_steps": 4770, "total_steps": 8674, "loss": 0.4757764935493469, "lr": 9.181205993911827e-07, "epoch": 1.0998385981092922, "percentage": 54.99, "elapsed_time": "3:15:17", "remaining_time": "2:39:50"} +{"current_steps": 4771, "total_steps": 8674, "loss": 0.4071381688117981, "lr": 9.177406242890167e-07, "epoch": 1.1000691722388747, "percentage": 55.0, "elapsed_time": "3:15:19", "remaining_time": "2:39:47"} +{"current_steps": 4772, "total_steps": 8674, "loss": 0.4794449210166931, "lr": 9.173606611440842e-07, "epoch": 1.1002997463684574, "percentage": 55.01, "elapsed_time": "3:15:22", "remaining_time": "2:39:45"} +{"current_steps": 4773, "total_steps": 8674, "loss": 0.4678712487220764, "lr": 9.169807100116175e-07, "epoch": 1.1005303204980401, "percentage": 55.03, "elapsed_time": "3:15:24", "remaining_time": "2:39:42"} +{"current_steps": 4774, "total_steps": 8674, "loss": 0.43200960755348206, "lr": 9.166007709468456e-07, "epoch": 1.1007608946276228, "percentage": 55.04, "elapsed_time": "3:15:27", "remaining_time": "2:39:40"} +{"current_steps": 4775, "total_steps": 8674, "loss": 0.49283260107040405, "lr": 9.162208440049974e-07, "epoch": 1.1009914687572055, "percentage": 55.05, "elapsed_time": "3:15:29", "remaining_time": "2:39:37"} +{"current_steps": 4776, "total_steps": 8674, "loss": 0.4430215358734131, "lr": 9.158409292412982e-07, "epoch": 1.101222042886788, "percentage": 55.06, "elapsed_time": "3:15:32", "remaining_time": "2:39:35"} +{"current_steps": 4777, "total_steps": 8674, "loss": 0.4529581069946289, "lr": 9.154610267109731e-07, "epoch": 1.1014526170163708, "percentage": 55.07, "elapsed_time": "3:15:34", "remaining_time": "2:39:32"} +{"current_steps": 4778, "total_steps": 8674, "loss": 0.3872554302215576, "lr": 9.150811364692446e-07, "epoch": 1.1016831911459535, "percentage": 55.08, "elapsed_time": "3:15:37", "remaining_time": "2:39:30"} +{"current_steps": 4779, "total_steps": 8674, "loss": 0.466983437538147, "lr": 9.147012585713331e-07, "epoch": 1.101913765275536, "percentage": 55.1, "elapsed_time": "3:15:39", "remaining_time": "2:39:28"} +{"current_steps": 4780, "total_steps": 8674, "loss": 0.4841456115245819, "lr": 9.143213930724587e-07, "epoch": 1.1021443394051187, "percentage": 55.11, "elapsed_time": "3:15:42", "remaining_time": "2:39:25"} +{"current_steps": 4781, "total_steps": 8674, "loss": 0.4506613612174988, "lr": 9.139415400278376e-07, "epoch": 1.1023749135347014, "percentage": 55.12, "elapsed_time": "3:15:44", "remaining_time": "2:39:23"} +{"current_steps": 4782, "total_steps": 8674, "loss": 0.428241491317749, "lr": 9.135616994926861e-07, "epoch": 1.1026054876642841, "percentage": 55.13, "elapsed_time": "3:15:46", "remaining_time": "2:39:20"} +{"current_steps": 4783, "total_steps": 8674, "loss": 0.46940821409225464, "lr": 9.131818715222175e-07, "epoch": 1.1028360617938668, "percentage": 55.14, "elapsed_time": "3:15:49", "remaining_time": "2:39:18"} +{"current_steps": 4784, "total_steps": 8674, "loss": 0.4527658224105835, "lr": 9.12802056171644e-07, "epoch": 1.1030666359234493, "percentage": 55.15, "elapsed_time": "3:15:51", "remaining_time": "2:39:15"} +{"current_steps": 4785, "total_steps": 8674, "loss": 0.3284989893436432, "lr": 9.124222534961749e-07, "epoch": 1.103297210053032, "percentage": 55.16, "elapsed_time": "3:15:54", "remaining_time": "2:39:13"} +{"current_steps": 4786, "total_steps": 8674, "loss": 0.448346883058548, "lr": 9.120424635510193e-07, "epoch": 1.1035277841826148, "percentage": 55.18, "elapsed_time": "3:15:56", "remaining_time": "2:39:10"} +{"current_steps": 4787, "total_steps": 8674, "loss": 0.4625587463378906, "lr": 9.116626863913826e-07, "epoch": 1.1037583583121973, "percentage": 55.19, "elapsed_time": "3:15:59", "remaining_time": "2:39:08"} +{"current_steps": 4788, "total_steps": 8674, "loss": 0.37891942262649536, "lr": 9.112829220724703e-07, "epoch": 1.10398893244178, "percentage": 55.2, "elapsed_time": "3:16:01", "remaining_time": "2:39:06"} +{"current_steps": 4789, "total_steps": 8674, "loss": 0.48719239234924316, "lr": 9.109031706494841e-07, "epoch": 1.1042195065713627, "percentage": 55.21, "elapsed_time": "3:16:04", "remaining_time": "2:39:03"} +{"current_steps": 4790, "total_steps": 8674, "loss": 0.5341615676879883, "lr": 9.105234321776247e-07, "epoch": 1.1044500807009454, "percentage": 55.22, "elapsed_time": "3:16:06", "remaining_time": "2:39:01"} +{"current_steps": 4791, "total_steps": 8674, "loss": 0.36677777767181396, "lr": 9.101437067120918e-07, "epoch": 1.1046806548305281, "percentage": 55.23, "elapsed_time": "3:16:09", "remaining_time": "2:38:58"} +{"current_steps": 4792, "total_steps": 8674, "loss": 0.4348159432411194, "lr": 9.097639943080813e-07, "epoch": 1.1049112289601106, "percentage": 55.25, "elapsed_time": "3:16:11", "remaining_time": "2:38:56"} +{"current_steps": 4793, "total_steps": 8674, "loss": 0.44912683963775635, "lr": 9.093842950207891e-07, "epoch": 1.1051418030896933, "percentage": 55.26, "elapsed_time": "3:16:13", "remaining_time": "2:38:53"} +{"current_steps": 4794, "total_steps": 8674, "loss": 0.5576057434082031, "lr": 9.090046089054077e-07, "epoch": 1.105372377219276, "percentage": 55.27, "elapsed_time": "3:16:16", "remaining_time": "2:38:51"} +{"current_steps": 4795, "total_steps": 8674, "loss": 0.43964770436286926, "lr": 9.08624936017129e-07, "epoch": 1.1056029513488586, "percentage": 55.28, "elapsed_time": "3:16:19", "remaining_time": "2:38:48"} +{"current_steps": 4796, "total_steps": 8674, "loss": 0.4285386800765991, "lr": 9.082452764111415e-07, "epoch": 1.1058335254784413, "percentage": 55.29, "elapsed_time": "3:16:21", "remaining_time": "2:38:46"} +{"current_steps": 4797, "total_steps": 8674, "loss": 0.4257868230342865, "lr": 9.078656301426332e-07, "epoch": 1.106064099608024, "percentage": 55.3, "elapsed_time": "3:16:23", "remaining_time": "2:38:43"} +{"current_steps": 4798, "total_steps": 8674, "loss": 0.4540346562862396, "lr": 9.074859972667895e-07, "epoch": 1.1062946737376067, "percentage": 55.31, "elapsed_time": "3:16:26", "remaining_time": "2:38:41"} +{"current_steps": 4799, "total_steps": 8674, "loss": 0.5273457765579224, "lr": 9.071063778387933e-07, "epoch": 1.1065252478671892, "percentage": 55.33, "elapsed_time": "3:16:28", "remaining_time": "2:38:38"} +{"current_steps": 4800, "total_steps": 8674, "loss": 0.391310453414917, "lr": 9.067267719138268e-07, "epoch": 1.106755821996772, "percentage": 55.34, "elapsed_time": "3:16:31", "remaining_time": "2:38:36"} +{"current_steps": 4801, "total_steps": 8674, "loss": 0.47945383191108704, "lr": 9.063471795470691e-07, "epoch": 1.1069863961263546, "percentage": 55.35, "elapsed_time": "3:16:35", "remaining_time": "2:38:35"} +{"current_steps": 4802, "total_steps": 8674, "loss": 0.49561476707458496, "lr": 9.05967600793698e-07, "epoch": 1.1072169702559373, "percentage": 55.36, "elapsed_time": "3:16:37", "remaining_time": "2:38:32"} +{"current_steps": 4803, "total_steps": 8674, "loss": 0.4505256414413452, "lr": 9.05588035708889e-07, "epoch": 1.1074475443855198, "percentage": 55.37, "elapsed_time": "3:16:40", "remaining_time": "2:38:30"} +{"current_steps": 4804, "total_steps": 8674, "loss": 0.37591490149497986, "lr": 9.052084843478164e-07, "epoch": 1.1076781185151026, "percentage": 55.38, "elapsed_time": "3:16:42", "remaining_time": "2:38:27"} +{"current_steps": 4805, "total_steps": 8674, "loss": 0.478586345911026, "lr": 9.048289467656508e-07, "epoch": 1.1079086926446853, "percentage": 55.4, "elapsed_time": "3:16:45", "remaining_time": "2:38:25"} +{"current_steps": 4806, "total_steps": 8674, "loss": 0.4373725354671478, "lr": 9.044494230175625e-07, "epoch": 1.108139266774268, "percentage": 55.41, "elapsed_time": "3:16:47", "remaining_time": "2:38:23"} +{"current_steps": 4807, "total_steps": 8674, "loss": 0.3976345360279083, "lr": 9.040699131587186e-07, "epoch": 1.1083698409038505, "percentage": 55.42, "elapsed_time": "3:16:49", "remaining_time": "2:38:20"} +{"current_steps": 4808, "total_steps": 8674, "loss": 0.44611310958862305, "lr": 9.036904172442857e-07, "epoch": 1.1086004150334332, "percentage": 55.43, "elapsed_time": "3:16:52", "remaining_time": "2:38:18"} +{"current_steps": 4809, "total_steps": 8674, "loss": 0.40816667675971985, "lr": 9.033109353294262e-07, "epoch": 1.108830989163016, "percentage": 55.44, "elapsed_time": "3:16:54", "remaining_time": "2:38:15"} +{"current_steps": 4810, "total_steps": 8674, "loss": 0.37462317943573, "lr": 9.029314674693023e-07, "epoch": 1.1090615632925986, "percentage": 55.45, "elapsed_time": "3:16:57", "remaining_time": "2:38:13"} +{"current_steps": 4811, "total_steps": 8674, "loss": 0.3856509327888489, "lr": 9.025520137190735e-07, "epoch": 1.1092921374221811, "percentage": 55.46, "elapsed_time": "3:16:59", "remaining_time": "2:38:10"} +{"current_steps": 4812, "total_steps": 8674, "loss": 0.4728443920612335, "lr": 9.021725741338969e-07, "epoch": 1.1095227115517639, "percentage": 55.48, "elapsed_time": "3:17:02", "remaining_time": "2:38:08"} +{"current_steps": 4813, "total_steps": 8674, "loss": 0.4614938795566559, "lr": 9.017931487689282e-07, "epoch": 1.1097532856813466, "percentage": 55.49, "elapsed_time": "3:17:04", "remaining_time": "2:38:06"} +{"current_steps": 4814, "total_steps": 8674, "loss": 0.4137331247329712, "lr": 9.014137376793203e-07, "epoch": 1.1099838598109293, "percentage": 55.5, "elapsed_time": "3:17:07", "remaining_time": "2:38:03"} +{"current_steps": 4815, "total_steps": 8674, "loss": 0.42436620593070984, "lr": 9.010343409202255e-07, "epoch": 1.1102144339405118, "percentage": 55.51, "elapsed_time": "3:17:09", "remaining_time": "2:38:01"} +{"current_steps": 4816, "total_steps": 8674, "loss": 0.43592822551727295, "lr": 9.006549585467916e-07, "epoch": 1.1104450080700945, "percentage": 55.52, "elapsed_time": "3:17:12", "remaining_time": "2:37:58"} +{"current_steps": 4817, "total_steps": 8674, "loss": 0.45627349615097046, "lr": 9.002755906141666e-07, "epoch": 1.1106755821996772, "percentage": 55.53, "elapsed_time": "3:17:14", "remaining_time": "2:37:56"} +{"current_steps": 4818, "total_steps": 8674, "loss": 0.5103771686553955, "lr": 8.998962371774953e-07, "epoch": 1.11090615632926, "percentage": 55.55, "elapsed_time": "3:17:17", "remaining_time": "2:37:53"} +{"current_steps": 4819, "total_steps": 8674, "loss": 0.470276802778244, "lr": 8.995168982919201e-07, "epoch": 1.1111367304588424, "percentage": 55.56, "elapsed_time": "3:17:19", "remaining_time": "2:37:51"} +{"current_steps": 4820, "total_steps": 8674, "loss": 0.49486416578292847, "lr": 8.991375740125823e-07, "epoch": 1.1113673045884251, "percentage": 55.57, "elapsed_time": "3:17:22", "remaining_time": "2:37:48"} +{"current_steps": 4821, "total_steps": 8674, "loss": 0.338329017162323, "lr": 8.987582643946201e-07, "epoch": 1.1115978787180079, "percentage": 55.58, "elapsed_time": "3:17:24", "remaining_time": "2:37:46"} +{"current_steps": 4822, "total_steps": 8674, "loss": 0.38252198696136475, "lr": 8.983789694931706e-07, "epoch": 1.1118284528475906, "percentage": 55.59, "elapsed_time": "3:17:27", "remaining_time": "2:37:43"} +{"current_steps": 4823, "total_steps": 8674, "loss": 0.47691571712493896, "lr": 8.979996893633675e-07, "epoch": 1.112059026977173, "percentage": 55.6, "elapsed_time": "3:17:29", "remaining_time": "2:37:41"} +{"current_steps": 4824, "total_steps": 8674, "loss": 0.40156808495521545, "lr": 8.976204240603433e-07, "epoch": 1.1122896011067558, "percentage": 55.61, "elapsed_time": "3:17:32", "remaining_time": "2:37:39"} +{"current_steps": 4825, "total_steps": 8674, "loss": 0.3837090730667114, "lr": 8.97241173639228e-07, "epoch": 1.1125201752363385, "percentage": 55.63, "elapsed_time": "3:17:34", "remaining_time": "2:37:36"} +{"current_steps": 4826, "total_steps": 8674, "loss": 0.5094380378723145, "lr": 8.968619381551499e-07, "epoch": 1.1127507493659212, "percentage": 55.64, "elapsed_time": "3:17:36", "remaining_time": "2:37:34"} +{"current_steps": 4827, "total_steps": 8674, "loss": 0.48674100637435913, "lr": 8.964827176632339e-07, "epoch": 1.1129813234955037, "percentage": 55.65, "elapsed_time": "3:17:39", "remaining_time": "2:37:31"} +{"current_steps": 4828, "total_steps": 8674, "loss": 0.49288761615753174, "lr": 8.961035122186045e-07, "epoch": 1.1132118976250864, "percentage": 55.66, "elapsed_time": "3:17:41", "remaining_time": "2:37:29"} +{"current_steps": 4829, "total_steps": 8674, "loss": 0.42933952808380127, "lr": 8.957243218763824e-07, "epoch": 1.1134424717546691, "percentage": 55.67, "elapsed_time": "3:17:44", "remaining_time": "2:37:26"} +{"current_steps": 4830, "total_steps": 8674, "loss": 0.39244914054870605, "lr": 8.953451466916866e-07, "epoch": 1.1136730458842519, "percentage": 55.68, "elapsed_time": "3:17:46", "remaining_time": "2:37:24"} +{"current_steps": 4831, "total_steps": 8674, "loss": 0.44688090682029724, "lr": 8.949659867196348e-07, "epoch": 1.1139036200138344, "percentage": 55.7, "elapsed_time": "3:17:49", "remaining_time": "2:37:21"} +{"current_steps": 4832, "total_steps": 8674, "loss": 0.5388743877410889, "lr": 8.945868420153409e-07, "epoch": 1.114134194143417, "percentage": 55.71, "elapsed_time": "3:17:51", "remaining_time": "2:37:19"} +{"current_steps": 4833, "total_steps": 8674, "loss": 0.4320666193962097, "lr": 8.942077126339182e-07, "epoch": 1.1143647682729998, "percentage": 55.72, "elapsed_time": "3:17:54", "remaining_time": "2:37:16"} +{"current_steps": 4834, "total_steps": 8674, "loss": 0.37623411417007446, "lr": 8.938285986304762e-07, "epoch": 1.1145953424025825, "percentage": 55.73, "elapsed_time": "3:17:56", "remaining_time": "2:37:14"} +{"current_steps": 4835, "total_steps": 8674, "loss": 0.4743962287902832, "lr": 8.93449500060124e-07, "epoch": 1.114825916532165, "percentage": 55.74, "elapsed_time": "3:17:58", "remaining_time": "2:37:11"} +{"current_steps": 4836, "total_steps": 8674, "loss": 0.4833221435546875, "lr": 8.930704169779663e-07, "epoch": 1.1150564906617477, "percentage": 55.75, "elapsed_time": "3:18:01", "remaining_time": "2:37:09"} +{"current_steps": 4837, "total_steps": 8674, "loss": 0.48811084032058716, "lr": 8.926913494391074e-07, "epoch": 1.1152870647913304, "percentage": 55.76, "elapsed_time": "3:18:04", "remaining_time": "2:37:07"} +{"current_steps": 4838, "total_steps": 8674, "loss": 0.42525774240493774, "lr": 8.923122974986487e-07, "epoch": 1.1155176389209132, "percentage": 55.78, "elapsed_time": "3:18:06", "remaining_time": "2:37:04"} +{"current_steps": 4839, "total_steps": 8674, "loss": 0.4347909688949585, "lr": 8.919332612116884e-07, "epoch": 1.1157482130504957, "percentage": 55.79, "elapsed_time": "3:18:08", "remaining_time": "2:37:02"} +{"current_steps": 4840, "total_steps": 8674, "loss": 0.5085601806640625, "lr": 8.915542406333241e-07, "epoch": 1.1159787871800784, "percentage": 55.8, "elapsed_time": "3:18:11", "remaining_time": "2:37:00"} +{"current_steps": 4841, "total_steps": 8674, "loss": 0.4620482325553894, "lr": 8.911752358186497e-07, "epoch": 1.116209361309661, "percentage": 55.81, "elapsed_time": "3:18:14", "remaining_time": "2:36:57"} +{"current_steps": 4842, "total_steps": 8674, "loss": 0.44923216104507446, "lr": 8.907962468227582e-07, "epoch": 1.1164399354392438, "percentage": 55.82, "elapsed_time": "3:18:16", "remaining_time": "2:36:55"} +{"current_steps": 4843, "total_steps": 8674, "loss": 0.547439694404602, "lr": 8.904172737007386e-07, "epoch": 1.1166705095688263, "percentage": 55.83, "elapsed_time": "3:18:19", "remaining_time": "2:36:52"} +{"current_steps": 4844, "total_steps": 8674, "loss": 0.4609268307685852, "lr": 8.900383165076789e-07, "epoch": 1.116901083698409, "percentage": 55.85, "elapsed_time": "3:18:21", "remaining_time": "2:36:50"} +{"current_steps": 4845, "total_steps": 8674, "loss": 0.41780030727386475, "lr": 8.896593752986642e-07, "epoch": 1.1171316578279917, "percentage": 55.86, "elapsed_time": "3:18:24", "remaining_time": "2:36:47"} +{"current_steps": 4846, "total_steps": 8674, "loss": 0.506212592124939, "lr": 8.89280450128778e-07, "epoch": 1.1173622319575744, "percentage": 55.87, "elapsed_time": "3:18:26", "remaining_time": "2:36:45"} +{"current_steps": 4847, "total_steps": 8674, "loss": 0.4436545968055725, "lr": 8.889015410531001e-07, "epoch": 1.117592806087157, "percentage": 55.88, "elapsed_time": "3:18:29", "remaining_time": "2:36:42"} +{"current_steps": 4848, "total_steps": 8674, "loss": 0.4473826289176941, "lr": 8.885226481267093e-07, "epoch": 1.1178233802167397, "percentage": 55.89, "elapsed_time": "3:18:31", "remaining_time": "2:36:40"} +{"current_steps": 4849, "total_steps": 8674, "loss": 0.43499836325645447, "lr": 8.881437714046815e-07, "epoch": 1.1180539543463224, "percentage": 55.9, "elapsed_time": "3:18:33", "remaining_time": "2:36:37"} +{"current_steps": 4850, "total_steps": 8674, "loss": 0.522705078125, "lr": 8.877649109420899e-07, "epoch": 1.118284528475905, "percentage": 55.91, "elapsed_time": "3:18:36", "remaining_time": "2:36:35"} +{"current_steps": 4851, "total_steps": 8674, "loss": 0.42146036028862, "lr": 8.873860667940064e-07, "epoch": 1.1185151026054876, "percentage": 55.93, "elapsed_time": "3:18:38", "remaining_time": "2:36:32"} +{"current_steps": 4852, "total_steps": 8674, "loss": 0.5875130891799927, "lr": 8.870072390154989e-07, "epoch": 1.1187456767350703, "percentage": 55.94, "elapsed_time": "3:18:40", "remaining_time": "2:36:30"} +{"current_steps": 4853, "total_steps": 8674, "loss": 0.5187985301017761, "lr": 8.866284276616345e-07, "epoch": 1.118976250864653, "percentage": 55.95, "elapsed_time": "3:18:43", "remaining_time": "2:36:27"} +{"current_steps": 4854, "total_steps": 8674, "loss": 0.46115952730178833, "lr": 8.86249632787477e-07, "epoch": 1.1192068249942357, "percentage": 55.96, "elapsed_time": "3:18:45", "remaining_time": "2:36:25"} +{"current_steps": 4855, "total_steps": 8674, "loss": 0.4926493167877197, "lr": 8.858708544480886e-07, "epoch": 1.1194373991238182, "percentage": 55.97, "elapsed_time": "3:18:48", "remaining_time": "2:36:22"} +{"current_steps": 4856, "total_steps": 8674, "loss": 0.44512006640434265, "lr": 8.854920926985278e-07, "epoch": 1.119667973253401, "percentage": 55.98, "elapsed_time": "3:18:50", "remaining_time": "2:36:20"} +{"current_steps": 4857, "total_steps": 8674, "loss": 0.45973241329193115, "lr": 8.85113347593852e-07, "epoch": 1.1198985473829837, "percentage": 55.99, "elapsed_time": "3:18:53", "remaining_time": "2:36:17"} +{"current_steps": 4858, "total_steps": 8674, "loss": 0.4915385842323303, "lr": 8.847346191891157e-07, "epoch": 1.1201291215125664, "percentage": 56.01, "elapsed_time": "3:18:55", "remaining_time": "2:36:15"} +{"current_steps": 4859, "total_steps": 8674, "loss": 0.4457864463329315, "lr": 8.843559075393701e-07, "epoch": 1.1203596956421489, "percentage": 56.02, "elapsed_time": "3:18:57", "remaining_time": "2:36:12"} +{"current_steps": 4860, "total_steps": 8674, "loss": 0.4782453775405884, "lr": 8.839772126996658e-07, "epoch": 1.1205902697717316, "percentage": 56.03, "elapsed_time": "3:19:00", "remaining_time": "2:36:10"} +{"current_steps": 4861, "total_steps": 8674, "loss": 0.42789584398269653, "lr": 8.835985347250492e-07, "epoch": 1.1208208439013143, "percentage": 56.04, "elapsed_time": "3:19:03", "remaining_time": "2:36:08"} +{"current_steps": 4862, "total_steps": 8674, "loss": 0.49990910291671753, "lr": 8.832198736705657e-07, "epoch": 1.121051418030897, "percentage": 56.05, "elapsed_time": "3:19:05", "remaining_time": "2:36:05"} +{"current_steps": 4863, "total_steps": 8674, "loss": 0.3735005855560303, "lr": 8.828412295912566e-07, "epoch": 1.1212819921604795, "percentage": 56.06, "elapsed_time": "3:19:08", "remaining_time": "2:36:03"} +{"current_steps": 4864, "total_steps": 8674, "loss": 0.402673602104187, "lr": 8.824626025421624e-07, "epoch": 1.1215125662900622, "percentage": 56.08, "elapsed_time": "3:19:10", "remaining_time": "2:36:00"} +{"current_steps": 4865, "total_steps": 8674, "loss": 0.4675491452217102, "lr": 8.820839925783198e-07, "epoch": 1.121743140419645, "percentage": 56.09, "elapsed_time": "3:19:12", "remaining_time": "2:35:58"} +{"current_steps": 4866, "total_steps": 8674, "loss": 0.5098662376403809, "lr": 8.817053997547645e-07, "epoch": 1.1219737145492277, "percentage": 56.1, "elapsed_time": "3:19:15", "remaining_time": "2:35:55"} +{"current_steps": 4867, "total_steps": 8674, "loss": 0.44478029012680054, "lr": 8.813268241265278e-07, "epoch": 1.1222042886788102, "percentage": 56.11, "elapsed_time": "3:19:17", "remaining_time": "2:35:53"} +{"current_steps": 4868, "total_steps": 8674, "loss": 0.410754919052124, "lr": 8.809482657486401e-07, "epoch": 1.1224348628083929, "percentage": 56.12, "elapsed_time": "3:19:20", "remaining_time": "2:35:50"} +{"current_steps": 4869, "total_steps": 8674, "loss": 0.4198191165924072, "lr": 8.805697246761288e-07, "epoch": 1.1226654369379756, "percentage": 56.13, "elapsed_time": "3:19:22", "remaining_time": "2:35:48"} +{"current_steps": 4870, "total_steps": 8674, "loss": 0.5399911403656006, "lr": 8.801912009640178e-07, "epoch": 1.1228960110675583, "percentage": 56.14, "elapsed_time": "3:19:25", "remaining_time": "2:35:46"} +{"current_steps": 4871, "total_steps": 8674, "loss": 0.3879680633544922, "lr": 8.798126946673305e-07, "epoch": 1.1231265851971408, "percentage": 56.16, "elapsed_time": "3:19:27", "remaining_time": "2:35:43"} +{"current_steps": 4872, "total_steps": 8674, "loss": 0.4629073739051819, "lr": 8.794342058410856e-07, "epoch": 1.1233571593267235, "percentage": 56.17, "elapsed_time": "3:19:30", "remaining_time": "2:35:41"} +{"current_steps": 4873, "total_steps": 8674, "loss": 0.42299884557724, "lr": 8.790557345403013e-07, "epoch": 1.1235877334563062, "percentage": 56.18, "elapsed_time": "3:19:32", "remaining_time": "2:35:38"} +{"current_steps": 4874, "total_steps": 8674, "loss": 0.509437620639801, "lr": 8.786772808199912e-07, "epoch": 1.123818307585889, "percentage": 56.19, "elapsed_time": "3:19:34", "remaining_time": "2:35:36"} +{"current_steps": 4875, "total_steps": 8674, "loss": 0.4682687222957611, "lr": 8.782988447351684e-07, "epoch": 1.1240488817154715, "percentage": 56.2, "elapsed_time": "3:19:37", "remaining_time": "2:35:33"} +{"current_steps": 4876, "total_steps": 8674, "loss": 0.41155606508255005, "lr": 8.779204263408416e-07, "epoch": 1.1242794558450542, "percentage": 56.21, "elapsed_time": "3:19:39", "remaining_time": "2:35:31"} +{"current_steps": 4877, "total_steps": 8674, "loss": 0.4705810844898224, "lr": 8.775420256920182e-07, "epoch": 1.124510029974637, "percentage": 56.23, "elapsed_time": "3:19:42", "remaining_time": "2:35:28"} +{"current_steps": 4878, "total_steps": 8674, "loss": 0.36571264266967773, "lr": 8.771636428437022e-07, "epoch": 1.1247406041042196, "percentage": 56.24, "elapsed_time": "3:19:44", "remaining_time": "2:35:26"} +{"current_steps": 4879, "total_steps": 8674, "loss": 0.36618396639823914, "lr": 8.76785277850896e-07, "epoch": 1.124971178233802, "percentage": 56.25, "elapsed_time": "3:19:47", "remaining_time": "2:35:24"} +{"current_steps": 4880, "total_steps": 8674, "loss": 0.4861210584640503, "lr": 8.764069307685983e-07, "epoch": 1.1252017523633848, "percentage": 56.26, "elapsed_time": "3:19:49", "remaining_time": "2:35:21"} +{"current_steps": 4881, "total_steps": 8674, "loss": 0.43346846103668213, "lr": 8.760286016518056e-07, "epoch": 1.1254323264929675, "percentage": 56.27, "elapsed_time": "3:19:52", "remaining_time": "2:35:19"} +{"current_steps": 4882, "total_steps": 8674, "loss": 0.40088707208633423, "lr": 8.756502905555123e-07, "epoch": 1.12566290062255, "percentage": 56.28, "elapsed_time": "3:19:54", "remaining_time": "2:35:16"} +{"current_steps": 4883, "total_steps": 8674, "loss": 0.4088619649410248, "lr": 8.752719975347092e-07, "epoch": 1.1258934747521328, "percentage": 56.29, "elapsed_time": "3:19:57", "remaining_time": "2:35:14"} +{"current_steps": 4884, "total_steps": 8674, "loss": 0.4988909661769867, "lr": 8.748937226443857e-07, "epoch": 1.1261240488817155, "percentage": 56.31, "elapsed_time": "3:19:59", "remaining_time": "2:35:11"} +{"current_steps": 4885, "total_steps": 8674, "loss": 0.47022196650505066, "lr": 8.745154659395271e-07, "epoch": 1.1263546230112982, "percentage": 56.32, "elapsed_time": "3:20:01", "remaining_time": "2:35:09"} +{"current_steps": 4886, "total_steps": 8674, "loss": 0.45005398988723755, "lr": 8.741372274751178e-07, "epoch": 1.126585197140881, "percentage": 56.33, "elapsed_time": "3:20:04", "remaining_time": "2:35:06"} +{"current_steps": 4887, "total_steps": 8674, "loss": 0.4632537364959717, "lr": 8.737590073061376e-07, "epoch": 1.1268157712704634, "percentage": 56.34, "elapsed_time": "3:20:07", "remaining_time": "2:35:04"} +{"current_steps": 4888, "total_steps": 8674, "loss": 0.41034963726997375, "lr": 8.733808054875653e-07, "epoch": 1.1270463454000461, "percentage": 56.35, "elapsed_time": "3:20:09", "remaining_time": "2:35:01"} +{"current_steps": 4889, "total_steps": 8674, "loss": 0.5169668793678284, "lr": 8.730026220743765e-07, "epoch": 1.1272769195296288, "percentage": 56.36, "elapsed_time": "3:20:11", "remaining_time": "2:34:59"} +{"current_steps": 4890, "total_steps": 8674, "loss": 0.44972485303878784, "lr": 8.726244571215431e-07, "epoch": 1.1275074936592113, "percentage": 56.38, "elapsed_time": "3:20:14", "remaining_time": "2:34:57"} +{"current_steps": 4891, "total_steps": 8674, "loss": 0.4854368567466736, "lr": 8.722463106840361e-07, "epoch": 1.127738067788794, "percentage": 56.39, "elapsed_time": "3:20:16", "remaining_time": "2:34:54"} +{"current_steps": 4892, "total_steps": 8674, "loss": 0.3029147982597351, "lr": 8.718681828168223e-07, "epoch": 1.1279686419183768, "percentage": 56.4, "elapsed_time": "3:20:19", "remaining_time": "2:34:52"} +{"current_steps": 4893, "total_steps": 8674, "loss": 0.4770504832267761, "lr": 8.714900735748671e-07, "epoch": 1.1281992160479595, "percentage": 56.41, "elapsed_time": "3:20:21", "remaining_time": "2:34:49"} +{"current_steps": 4894, "total_steps": 8674, "loss": 0.48508110642433167, "lr": 8.711119830131317e-07, "epoch": 1.1284297901775422, "percentage": 56.42, "elapsed_time": "3:20:24", "remaining_time": "2:34:47"} +{"current_steps": 4895, "total_steps": 8674, "loss": 0.43302488327026367, "lr": 8.707339111865761e-07, "epoch": 1.1286603643071247, "percentage": 56.43, "elapsed_time": "3:20:26", "remaining_time": "2:34:44"} +{"current_steps": 4896, "total_steps": 8674, "loss": 0.5720575451850891, "lr": 8.703558581501563e-07, "epoch": 1.1288909384367074, "percentage": 56.44, "elapsed_time": "3:20:29", "remaining_time": "2:34:42"} +{"current_steps": 4897, "total_steps": 8674, "loss": 0.48236098885536194, "lr": 8.69977823958827e-07, "epoch": 1.1291215125662901, "percentage": 56.46, "elapsed_time": "3:20:32", "remaining_time": "2:34:40"} +{"current_steps": 4898, "total_steps": 8674, "loss": 0.48531901836395264, "lr": 8.69599808667538e-07, "epoch": 1.1293520866958726, "percentage": 56.47, "elapsed_time": "3:20:34", "remaining_time": "2:34:37"} +{"current_steps": 4899, "total_steps": 8674, "loss": 0.4150174856185913, "lr": 8.69221812331239e-07, "epoch": 1.1295826608254553, "percentage": 56.48, "elapsed_time": "3:20:36", "remaining_time": "2:34:35"} +{"current_steps": 4900, "total_steps": 8674, "loss": 0.4729560911655426, "lr": 8.688438350048748e-07, "epoch": 1.129813234955038, "percentage": 56.49, "elapsed_time": "3:20:39", "remaining_time": "2:34:32"} +{"current_steps": 4901, "total_steps": 8674, "loss": 0.5081748962402344, "lr": 8.684658767433881e-07, "epoch": 1.1300438090846208, "percentage": 56.5, "elapsed_time": "3:20:43", "remaining_time": "2:34:31"} +{"current_steps": 4902, "total_steps": 8674, "loss": 0.4552333354949951, "lr": 8.680879376017197e-07, "epoch": 1.1302743832142035, "percentage": 56.51, "elapsed_time": "3:20:45", "remaining_time": "2:34:29"} +{"current_steps": 4903, "total_steps": 8674, "loss": 0.4784387946128845, "lr": 8.67710017634806e-07, "epoch": 1.130504957343786, "percentage": 56.53, "elapsed_time": "3:20:48", "remaining_time": "2:34:26"} +{"current_steps": 4904, "total_steps": 8674, "loss": 0.46922338008880615, "lr": 8.673321168975823e-07, "epoch": 1.1307355314733687, "percentage": 56.54, "elapsed_time": "3:20:51", "remaining_time": "2:34:24"} +{"current_steps": 4905, "total_steps": 8674, "loss": 0.38181525468826294, "lr": 8.669542354449797e-07, "epoch": 1.1309661056029514, "percentage": 56.55, "elapsed_time": "3:20:53", "remaining_time": "2:34:21"} +{"current_steps": 4906, "total_steps": 8674, "loss": 0.4729689359664917, "lr": 8.665763733319278e-07, "epoch": 1.131196679732534, "percentage": 56.56, "elapsed_time": "3:20:55", "remaining_time": "2:34:19"} +{"current_steps": 4907, "total_steps": 8674, "loss": 0.3934294581413269, "lr": 8.661985306133517e-07, "epoch": 1.1314272538621166, "percentage": 56.57, "elapsed_time": "3:20:58", "remaining_time": "2:34:16"} +{"current_steps": 4908, "total_steps": 8674, "loss": 0.40270352363586426, "lr": 8.658207073441754e-07, "epoch": 1.1316578279916993, "percentage": 56.58, "elapsed_time": "3:21:00", "remaining_time": "2:34:14"} +{"current_steps": 4909, "total_steps": 8674, "loss": 0.43291163444519043, "lr": 8.654429035793196e-07, "epoch": 1.131888402121282, "percentage": 56.59, "elapsed_time": "3:21:03", "remaining_time": "2:34:12"} +{"current_steps": 4910, "total_steps": 8674, "loss": 0.5054877996444702, "lr": 8.650651193737009e-07, "epoch": 1.1321189762508648, "percentage": 56.61, "elapsed_time": "3:21:05", "remaining_time": "2:34:09"} +{"current_steps": 4911, "total_steps": 8674, "loss": 0.5043776035308838, "lr": 8.646873547822347e-07, "epoch": 1.1323495503804473, "percentage": 56.62, "elapsed_time": "3:21:08", "remaining_time": "2:34:07"} +{"current_steps": 4912, "total_steps": 8674, "loss": 0.4246225953102112, "lr": 8.643096098598328e-07, "epoch": 1.13258012451003, "percentage": 56.63, "elapsed_time": "3:21:10", "remaining_time": "2:34:04"} +{"current_steps": 4913, "total_steps": 8674, "loss": 0.4514849781990051, "lr": 8.639318846614048e-07, "epoch": 1.1328106986396127, "percentage": 56.64, "elapsed_time": "3:21:13", "remaining_time": "2:34:02"} +{"current_steps": 4914, "total_steps": 8674, "loss": 0.4780477285385132, "lr": 8.635541792418557e-07, "epoch": 1.1330412727691952, "percentage": 56.65, "elapsed_time": "3:21:15", "remaining_time": "2:33:59"} +{"current_steps": 4915, "total_steps": 8674, "loss": 0.47164270281791687, "lr": 8.631764936560899e-07, "epoch": 1.133271846898778, "percentage": 56.66, "elapsed_time": "3:21:18", "remaining_time": "2:33:57"} +{"current_steps": 4916, "total_steps": 8674, "loss": 0.5462276339530945, "lr": 8.62798827959007e-07, "epoch": 1.1335024210283606, "percentage": 56.68, "elapsed_time": "3:21:20", "remaining_time": "2:33:54"} +{"current_steps": 4917, "total_steps": 8674, "loss": 0.37229591608047485, "lr": 8.624211822055055e-07, "epoch": 1.1337329951579433, "percentage": 56.69, "elapsed_time": "3:21:23", "remaining_time": "2:33:52"} +{"current_steps": 4918, "total_steps": 8674, "loss": 0.46595901250839233, "lr": 8.620435564504791e-07, "epoch": 1.133963569287526, "percentage": 56.7, "elapsed_time": "3:21:25", "remaining_time": "2:33:50"} +{"current_steps": 4919, "total_steps": 8674, "loss": 0.4645708203315735, "lr": 8.616659507488201e-07, "epoch": 1.1341941434171086, "percentage": 56.71, "elapsed_time": "3:21:27", "remaining_time": "2:33:47"} +{"current_steps": 4920, "total_steps": 8674, "loss": 0.4309888482093811, "lr": 8.612883651554173e-07, "epoch": 1.1344247175466913, "percentage": 56.72, "elapsed_time": "3:21:30", "remaining_time": "2:33:45"} +{"current_steps": 4921, "total_steps": 8674, "loss": 0.4000548720359802, "lr": 8.60910799725156e-07, "epoch": 1.134655291676274, "percentage": 56.73, "elapsed_time": "3:21:32", "remaining_time": "2:33:42"} +{"current_steps": 4922, "total_steps": 8674, "loss": 0.41321274638175964, "lr": 8.6053325451292e-07, "epoch": 1.1348858658058565, "percentage": 56.74, "elapsed_time": "3:21:35", "remaining_time": "2:33:40"} +{"current_steps": 4923, "total_steps": 8674, "loss": 0.38982951641082764, "lr": 8.601557295735884e-07, "epoch": 1.1351164399354392, "percentage": 56.76, "elapsed_time": "3:21:37", "remaining_time": "2:33:37"} +{"current_steps": 4924, "total_steps": 8674, "loss": 0.44623300433158875, "lr": 8.597782249620394e-07, "epoch": 1.135347014065022, "percentage": 56.77, "elapsed_time": "3:21:40", "remaining_time": "2:33:35"} +{"current_steps": 4925, "total_steps": 8674, "loss": 0.46876993775367737, "lr": 8.594007407331458e-07, "epoch": 1.1355775881946046, "percentage": 56.78, "elapsed_time": "3:21:42", "remaining_time": "2:33:32"} +{"current_steps": 4926, "total_steps": 8674, "loss": 0.41345149278640747, "lr": 8.590232769417803e-07, "epoch": 1.1358081623241871, "percentage": 56.79, "elapsed_time": "3:21:45", "remaining_time": "2:33:30"} +{"current_steps": 4927, "total_steps": 8674, "loss": 0.4199402332305908, "lr": 8.586458336428095e-07, "epoch": 1.1360387364537698, "percentage": 56.8, "elapsed_time": "3:21:47", "remaining_time": "2:33:27"} +{"current_steps": 4928, "total_steps": 8674, "loss": 0.4424753785133362, "lr": 8.582684108910998e-07, "epoch": 1.1362693105833526, "percentage": 56.81, "elapsed_time": "3:21:49", "remaining_time": "2:33:25"} +{"current_steps": 4929, "total_steps": 8674, "loss": 0.5066598057746887, "lr": 8.57891008741513e-07, "epoch": 1.1364998847129353, "percentage": 56.82, "elapsed_time": "3:21:52", "remaining_time": "2:33:22"} +{"current_steps": 4930, "total_steps": 8674, "loss": 0.45959407091140747, "lr": 8.575136272489081e-07, "epoch": 1.1367304588425178, "percentage": 56.84, "elapsed_time": "3:21:55", "remaining_time": "2:33:20"} +{"current_steps": 4931, "total_steps": 8674, "loss": 0.4579051733016968, "lr": 8.571362664681415e-07, "epoch": 1.1369610329721005, "percentage": 56.85, "elapsed_time": "3:21:57", "remaining_time": "2:33:18"} +{"current_steps": 4932, "total_steps": 8674, "loss": 0.5125559568405151, "lr": 8.567589264540665e-07, "epoch": 1.1371916071016832, "percentage": 56.86, "elapsed_time": "3:21:59", "remaining_time": "2:33:15"} +{"current_steps": 4933, "total_steps": 8674, "loss": 0.4236595630645752, "lr": 8.563816072615335e-07, "epoch": 1.137422181231266, "percentage": 56.87, "elapsed_time": "3:22:02", "remaining_time": "2:33:13"} +{"current_steps": 4934, "total_steps": 8674, "loss": 0.40187013149261475, "lr": 8.56004308945389e-07, "epoch": 1.1376527553608484, "percentage": 56.88, "elapsed_time": "3:22:04", "remaining_time": "2:33:10"} +{"current_steps": 4935, "total_steps": 8674, "loss": 0.5069487690925598, "lr": 8.556270315604778e-07, "epoch": 1.1378833294904311, "percentage": 56.89, "elapsed_time": "3:22:07", "remaining_time": "2:33:08"} +{"current_steps": 4936, "total_steps": 8674, "loss": 0.4032680094242096, "lr": 8.552497751616406e-07, "epoch": 1.1381139036200139, "percentage": 56.91, "elapsed_time": "3:22:09", "remaining_time": "2:33:05"} +{"current_steps": 4937, "total_steps": 8674, "loss": 0.4745323061943054, "lr": 8.548725398037158e-07, "epoch": 1.1383444777495966, "percentage": 56.92, "elapsed_time": "3:22:12", "remaining_time": "2:33:03"} +{"current_steps": 4938, "total_steps": 8674, "loss": 0.5203470587730408, "lr": 8.544953255415379e-07, "epoch": 1.138575051879179, "percentage": 56.93, "elapsed_time": "3:22:14", "remaining_time": "2:33:00"} +{"current_steps": 4939, "total_steps": 8674, "loss": 0.3780457079410553, "lr": 8.541181324299392e-07, "epoch": 1.1388056260087618, "percentage": 56.94, "elapsed_time": "3:22:17", "remaining_time": "2:32:58"} +{"current_steps": 4940, "total_steps": 8674, "loss": 0.4544069766998291, "lr": 8.537409605237486e-07, "epoch": 1.1390362001383445, "percentage": 56.95, "elapsed_time": "3:22:19", "remaining_time": "2:32:55"} +{"current_steps": 4941, "total_steps": 8674, "loss": 0.3692469000816345, "lr": 8.533638098777914e-07, "epoch": 1.1392667742679272, "percentage": 56.96, "elapsed_time": "3:22:21", "remaining_time": "2:32:53"} +{"current_steps": 4942, "total_steps": 8674, "loss": 0.4733508825302124, "lr": 8.529866805468907e-07, "epoch": 1.1394973483975097, "percentage": 56.97, "elapsed_time": "3:22:24", "remaining_time": "2:32:50"} +{"current_steps": 4943, "total_steps": 8674, "loss": 0.5165152549743652, "lr": 8.526095725858658e-07, "epoch": 1.1397279225270924, "percentage": 56.99, "elapsed_time": "3:22:26", "remaining_time": "2:32:48"} +{"current_steps": 4944, "total_steps": 8674, "loss": 0.40220290422439575, "lr": 8.522324860495336e-07, "epoch": 1.1399584966566751, "percentage": 57.0, "elapsed_time": "3:22:29", "remaining_time": "2:32:45"} +{"current_steps": 4945, "total_steps": 8674, "loss": 0.511976957321167, "lr": 8.518554209927066e-07, "epoch": 1.1401890707862579, "percentage": 57.01, "elapsed_time": "3:22:31", "remaining_time": "2:32:43"} +{"current_steps": 4946, "total_steps": 8674, "loss": 0.4472247362136841, "lr": 8.514783774701959e-07, "epoch": 1.1404196449158404, "percentage": 57.02, "elapsed_time": "3:22:34", "remaining_time": "2:32:41"} +{"current_steps": 4947, "total_steps": 8674, "loss": 0.4368797242641449, "lr": 8.51101355536808e-07, "epoch": 1.140650219045423, "percentage": 57.03, "elapsed_time": "3:22:36", "remaining_time": "2:32:38"} +{"current_steps": 4948, "total_steps": 8674, "loss": 0.3794320225715637, "lr": 8.507243552473476e-07, "epoch": 1.1408807931750058, "percentage": 57.04, "elapsed_time": "3:22:38", "remaining_time": "2:32:36"} +{"current_steps": 4949, "total_steps": 8674, "loss": 0.5229817628860474, "lr": 8.50347376656615e-07, "epoch": 1.1411113673045885, "percentage": 57.06, "elapsed_time": "3:22:41", "remaining_time": "2:32:33"} +{"current_steps": 4950, "total_steps": 8674, "loss": 0.4771326780319214, "lr": 8.499704198194075e-07, "epoch": 1.141341941434171, "percentage": 57.07, "elapsed_time": "3:22:43", "remaining_time": "2:32:30"} +{"current_steps": 4951, "total_steps": 8674, "loss": 0.45151978731155396, "lr": 8.495934847905201e-07, "epoch": 1.1415725155637537, "percentage": 57.08, "elapsed_time": "3:22:46", "remaining_time": "2:32:28"} +{"current_steps": 4952, "total_steps": 8674, "loss": 0.3963208496570587, "lr": 8.492165716247439e-07, "epoch": 1.1418030896933364, "percentage": 57.09, "elapsed_time": "3:22:48", "remaining_time": "2:32:25"} +{"current_steps": 4953, "total_steps": 8674, "loss": 0.37465882301330566, "lr": 8.488396803768675e-07, "epoch": 1.1420336638229192, "percentage": 57.1, "elapsed_time": "3:22:50", "remaining_time": "2:32:23"} +{"current_steps": 4954, "total_steps": 8674, "loss": 0.437372088432312, "lr": 8.484628111016752e-07, "epoch": 1.1422642379525016, "percentage": 57.11, "elapsed_time": "3:22:53", "remaining_time": "2:32:21"} +{"current_steps": 4955, "total_steps": 8674, "loss": 0.40495651960372925, "lr": 8.480859638539492e-07, "epoch": 1.1424948120820844, "percentage": 57.12, "elapsed_time": "3:22:55", "remaining_time": "2:32:18"} +{"current_steps": 4956, "total_steps": 8674, "loss": 0.5346927642822266, "lr": 8.477091386884677e-07, "epoch": 1.142725386211667, "percentage": 57.14, "elapsed_time": "3:22:58", "remaining_time": "2:32:16"} +{"current_steps": 4957, "total_steps": 8674, "loss": 0.42448925971984863, "lr": 8.473323356600068e-07, "epoch": 1.1429559603412498, "percentage": 57.15, "elapsed_time": "3:23:00", "remaining_time": "2:32:13"} +{"current_steps": 4958, "total_steps": 8674, "loss": 0.4715193808078766, "lr": 8.469555548233378e-07, "epoch": 1.1431865344708323, "percentage": 57.16, "elapsed_time": "3:23:02", "remaining_time": "2:32:11"} +{"current_steps": 4959, "total_steps": 8674, "loss": 0.4721440076828003, "lr": 8.465787962332301e-07, "epoch": 1.143417108600415, "percentage": 57.17, "elapsed_time": "3:23:05", "remaining_time": "2:32:08"} +{"current_steps": 4960, "total_steps": 8674, "loss": 0.5478333234786987, "lr": 8.462020599444495e-07, "epoch": 1.1436476827299977, "percentage": 57.18, "elapsed_time": "3:23:07", "remaining_time": "2:32:06"} +{"current_steps": 4961, "total_steps": 8674, "loss": 0.4005582928657532, "lr": 8.458253460117577e-07, "epoch": 1.1438782568595804, "percentage": 57.19, "elapsed_time": "3:23:10", "remaining_time": "2:32:03"} +{"current_steps": 4962, "total_steps": 8674, "loss": 0.43886178731918335, "lr": 8.454486544899146e-07, "epoch": 1.144108830989163, "percentage": 57.21, "elapsed_time": "3:23:12", "remaining_time": "2:32:01"} +{"current_steps": 4963, "total_steps": 8674, "loss": 0.4404095709323883, "lr": 8.450719854336758e-07, "epoch": 1.1443394051187457, "percentage": 57.22, "elapsed_time": "3:23:15", "remaining_time": "2:31:58"} +{"current_steps": 4964, "total_steps": 8674, "loss": 0.5386335849761963, "lr": 8.446953388977943e-07, "epoch": 1.1445699792483284, "percentage": 57.23, "elapsed_time": "3:23:17", "remaining_time": "2:31:56"} +{"current_steps": 4965, "total_steps": 8674, "loss": 0.4576258659362793, "lr": 8.44318714937019e-07, "epoch": 1.144800553377911, "percentage": 57.24, "elapsed_time": "3:23:20", "remaining_time": "2:31:53"} +{"current_steps": 4966, "total_steps": 8674, "loss": 0.4619024991989136, "lr": 8.439421136060964e-07, "epoch": 1.1450311275074936, "percentage": 57.25, "elapsed_time": "3:23:22", "remaining_time": "2:31:51"} +{"current_steps": 4967, "total_steps": 8674, "loss": 0.4071081876754761, "lr": 8.435655349597689e-07, "epoch": 1.1452617016370763, "percentage": 57.26, "elapsed_time": "3:23:25", "remaining_time": "2:31:49"} +{"current_steps": 4968, "total_steps": 8674, "loss": 0.4605948328971863, "lr": 8.431889790527769e-07, "epoch": 1.145492275766659, "percentage": 57.27, "elapsed_time": "3:23:27", "remaining_time": "2:31:46"} +{"current_steps": 4969, "total_steps": 8674, "loss": 0.46706438064575195, "lr": 8.428124459398554e-07, "epoch": 1.1457228498962417, "percentage": 57.29, "elapsed_time": "3:23:30", "remaining_time": "2:31:44"} +{"current_steps": 4970, "total_steps": 8674, "loss": 0.39674657583236694, "lr": 8.424359356757383e-07, "epoch": 1.1459534240258242, "percentage": 57.3, "elapsed_time": "3:23:32", "remaining_time": "2:31:41"} +{"current_steps": 4971, "total_steps": 8674, "loss": 0.4421246647834778, "lr": 8.42059448315155e-07, "epoch": 1.146183998155407, "percentage": 57.31, "elapsed_time": "3:23:35", "remaining_time": "2:31:39"} +{"current_steps": 4972, "total_steps": 8674, "loss": 0.5220682621002197, "lr": 8.416829839128312e-07, "epoch": 1.1464145722849897, "percentage": 57.32, "elapsed_time": "3:23:37", "remaining_time": "2:31:36"} +{"current_steps": 4973, "total_steps": 8674, "loss": 0.40189129114151, "lr": 8.413065425234904e-07, "epoch": 1.1466451464145724, "percentage": 57.33, "elapsed_time": "3:23:39", "remaining_time": "2:31:34"} +{"current_steps": 4974, "total_steps": 8674, "loss": 0.448421835899353, "lr": 8.409301242018517e-07, "epoch": 1.1468757205441549, "percentage": 57.34, "elapsed_time": "3:23:42", "remaining_time": "2:31:31"} +{"current_steps": 4975, "total_steps": 8674, "loss": 0.49476757645606995, "lr": 8.405537290026318e-07, "epoch": 1.1471062946737376, "percentage": 57.36, "elapsed_time": "3:23:44", "remaining_time": "2:31:29"} +{"current_steps": 4976, "total_steps": 8674, "loss": 0.3888528347015381, "lr": 8.401773569805431e-07, "epoch": 1.1473368688033203, "percentage": 57.37, "elapsed_time": "3:23:47", "remaining_time": "2:31:26"} +{"current_steps": 4977, "total_steps": 8674, "loss": 0.49057653546333313, "lr": 8.398010081902956e-07, "epoch": 1.1475674429329028, "percentage": 57.38, "elapsed_time": "3:23:49", "remaining_time": "2:31:24"} +{"current_steps": 4978, "total_steps": 8674, "loss": 0.41700610518455505, "lr": 8.39424682686595e-07, "epoch": 1.1477980170624855, "percentage": 57.39, "elapsed_time": "3:23:52", "remaining_time": "2:31:21"} +{"current_steps": 4979, "total_steps": 8674, "loss": 0.4801902770996094, "lr": 8.390483805241441e-07, "epoch": 1.1480285911920682, "percentage": 57.4, "elapsed_time": "3:23:54", "remaining_time": "2:31:19"} +{"current_steps": 4980, "total_steps": 8674, "loss": 0.5438926219940186, "lr": 8.386721017576426e-07, "epoch": 1.148259165321651, "percentage": 57.41, "elapsed_time": "3:23:57", "remaining_time": "2:31:17"} +{"current_steps": 4981, "total_steps": 8674, "loss": 0.3991735577583313, "lr": 8.382958464417857e-07, "epoch": 1.1484897394512337, "percentage": 57.42, "elapsed_time": "3:23:59", "remaining_time": "2:31:14"} +{"current_steps": 4982, "total_steps": 8674, "loss": 0.4918370246887207, "lr": 8.379196146312664e-07, "epoch": 1.1487203135808162, "percentage": 57.44, "elapsed_time": "3:24:01", "remaining_time": "2:31:12"} +{"current_steps": 4983, "total_steps": 8674, "loss": 0.5280467867851257, "lr": 8.375434063807737e-07, "epoch": 1.1489508877103989, "percentage": 57.45, "elapsed_time": "3:24:04", "remaining_time": "2:31:09"} +{"current_steps": 4984, "total_steps": 8674, "loss": 0.4186179041862488, "lr": 8.371672217449936e-07, "epoch": 1.1491814618399816, "percentage": 57.46, "elapsed_time": "3:24:06", "remaining_time": "2:31:07"} +{"current_steps": 4985, "total_steps": 8674, "loss": 0.3698224723339081, "lr": 8.367910607786079e-07, "epoch": 1.149412035969564, "percentage": 57.47, "elapsed_time": "3:24:09", "remaining_time": "2:31:04"} +{"current_steps": 4986, "total_steps": 8674, "loss": 0.45402267575263977, "lr": 8.364149235362956e-07, "epoch": 1.1496426100991468, "percentage": 57.48, "elapsed_time": "3:24:11", "remaining_time": "2:31:02"} +{"current_steps": 4987, "total_steps": 8674, "loss": 0.5145484209060669, "lr": 8.36038810072732e-07, "epoch": 1.1498731842287295, "percentage": 57.49, "elapsed_time": "3:24:14", "remaining_time": "2:30:59"} +{"current_steps": 4988, "total_steps": 8674, "loss": 0.4293951392173767, "lr": 8.356627204425893e-07, "epoch": 1.1501037583583122, "percentage": 57.51, "elapsed_time": "3:24:16", "remaining_time": "2:30:57"} +{"current_steps": 4989, "total_steps": 8674, "loss": 0.3916272521018982, "lr": 8.352866547005354e-07, "epoch": 1.150334332487895, "percentage": 57.52, "elapsed_time": "3:24:19", "remaining_time": "2:30:54"} +{"current_steps": 4990, "total_steps": 8674, "loss": 0.40171611309051514, "lr": 8.349106129012357e-07, "epoch": 1.1505649066174775, "percentage": 57.53, "elapsed_time": "3:24:21", "remaining_time": "2:30:52"} +{"current_steps": 4991, "total_steps": 8674, "loss": 0.49580252170562744, "lr": 8.345345950993518e-07, "epoch": 1.1507954807470602, "percentage": 57.54, "elapsed_time": "3:24:24", "remaining_time": "2:30:49"} +{"current_steps": 4992, "total_steps": 8674, "loss": 0.4521256685256958, "lr": 8.34158601349541e-07, "epoch": 1.151026054876643, "percentage": 57.55, "elapsed_time": "3:24:26", "remaining_time": "2:30:47"} +{"current_steps": 4993, "total_steps": 8674, "loss": 0.3920813798904419, "lr": 8.337826317064585e-07, "epoch": 1.1512566290062254, "percentage": 57.56, "elapsed_time": "3:24:28", "remaining_time": "2:30:44"} +{"current_steps": 4994, "total_steps": 8674, "loss": 0.4263145923614502, "lr": 8.334066862247547e-07, "epoch": 1.151487203135808, "percentage": 57.57, "elapsed_time": "3:24:31", "remaining_time": "2:30:42"} +{"current_steps": 4995, "total_steps": 8674, "loss": 0.4746140241622925, "lr": 8.330307649590779e-07, "epoch": 1.1517177772653908, "percentage": 57.59, "elapsed_time": "3:24:33", "remaining_time": "2:30:39"} +{"current_steps": 4996, "total_steps": 8674, "loss": 0.37520158290863037, "lr": 8.326548679640713e-07, "epoch": 1.1519483513949735, "percentage": 57.6, "elapsed_time": "3:24:36", "remaining_time": "2:30:37"} +{"current_steps": 4997, "total_steps": 8674, "loss": 0.4481951892375946, "lr": 8.322789952943759e-07, "epoch": 1.1521789255245563, "percentage": 57.61, "elapsed_time": "3:24:38", "remaining_time": "2:30:35"} +{"current_steps": 4998, "total_steps": 8674, "loss": 0.40319859981536865, "lr": 8.319031470046281e-07, "epoch": 1.1524094996541387, "percentage": 57.62, "elapsed_time": "3:24:41", "remaining_time": "2:30:32"} +{"current_steps": 4999, "total_steps": 8674, "loss": 0.47720152139663696, "lr": 8.315273231494615e-07, "epoch": 1.1526400737837215, "percentage": 57.63, "elapsed_time": "3:24:43", "remaining_time": "2:30:30"} +{"current_steps": 5000, "total_steps": 8674, "loss": 0.4027557969093323, "lr": 8.311515237835063e-07, "epoch": 1.1528706479133042, "percentage": 57.64, "elapsed_time": "3:24:46", "remaining_time": "2:30:28"} +{"current_steps": 5001, "total_steps": 8674, "loss": 0.3939552307128906, "lr": 8.307757489613878e-07, "epoch": 1.1531012220428867, "percentage": 57.66, "elapsed_time": "3:24:50", "remaining_time": "2:30:26"} +{"current_steps": 5002, "total_steps": 8674, "loss": 0.379425585269928, "lr": 8.303999987377295e-07, "epoch": 1.1533317961724694, "percentage": 57.67, "elapsed_time": "3:24:52", "remaining_time": "2:30:24"} +{"current_steps": 5003, "total_steps": 8674, "loss": 0.46231499314308167, "lr": 8.300242731671499e-07, "epoch": 1.153562370302052, "percentage": 57.68, "elapsed_time": "3:24:55", "remaining_time": "2:30:21"} +{"current_steps": 5004, "total_steps": 8674, "loss": 0.4639621675014496, "lr": 8.296485723042654e-07, "epoch": 1.1537929444316348, "percentage": 57.69, "elapsed_time": "3:24:57", "remaining_time": "2:30:19"} +{"current_steps": 5005, "total_steps": 8674, "loss": 0.49264025688171387, "lr": 8.29272896203687e-07, "epoch": 1.1540235185612175, "percentage": 57.7, "elapsed_time": "3:25:00", "remaining_time": "2:30:16"} +{"current_steps": 5006, "total_steps": 8674, "loss": 0.4145156145095825, "lr": 8.288972449200233e-07, "epoch": 1.1542540926908, "percentage": 57.71, "elapsed_time": "3:25:02", "remaining_time": "2:30:14"} +{"current_steps": 5007, "total_steps": 8674, "loss": 0.39693811535835266, "lr": 8.285216185078792e-07, "epoch": 1.1544846668203828, "percentage": 57.72, "elapsed_time": "3:25:05", "remaining_time": "2:30:12"} +{"current_steps": 5008, "total_steps": 8674, "loss": 0.46224820613861084, "lr": 8.281460170218561e-07, "epoch": 1.1547152409499655, "percentage": 57.74, "elapsed_time": "3:25:07", "remaining_time": "2:30:09"} +{"current_steps": 5009, "total_steps": 8674, "loss": 0.48868128657341003, "lr": 8.277704405165506e-07, "epoch": 1.154945815079548, "percentage": 57.75, "elapsed_time": "3:25:09", "remaining_time": "2:30:07"} +{"current_steps": 5010, "total_steps": 8674, "loss": 0.5127776265144348, "lr": 8.273948890465574e-07, "epoch": 1.1551763892091307, "percentage": 57.76, "elapsed_time": "3:25:12", "remaining_time": "2:30:04"} +{"current_steps": 5011, "total_steps": 8674, "loss": 0.4039389491081238, "lr": 8.270193626664665e-07, "epoch": 1.1554069633387134, "percentage": 57.77, "elapsed_time": "3:25:14", "remaining_time": "2:30:02"} +{"current_steps": 5012, "total_steps": 8674, "loss": 0.4224502444267273, "lr": 8.266438614308641e-07, "epoch": 1.1556375374682961, "percentage": 57.78, "elapsed_time": "3:25:17", "remaining_time": "2:29:59"} +{"current_steps": 5013, "total_steps": 8674, "loss": 0.4392918050289154, "lr": 8.262683853943335e-07, "epoch": 1.1558681115978788, "percentage": 57.79, "elapsed_time": "3:25:20", "remaining_time": "2:29:57"} +{"current_steps": 5014, "total_steps": 8674, "loss": 0.5055289268493652, "lr": 8.258929346114534e-07, "epoch": 1.1560986857274613, "percentage": 57.8, "elapsed_time": "3:25:22", "remaining_time": "2:29:54"} +{"current_steps": 5015, "total_steps": 8674, "loss": 0.43851351737976074, "lr": 8.255175091368003e-07, "epoch": 1.156329259857044, "percentage": 57.82, "elapsed_time": "3:25:24", "remaining_time": "2:29:52"} +{"current_steps": 5016, "total_steps": 8674, "loss": 0.4557814598083496, "lr": 8.251421090249451e-07, "epoch": 1.1565598339866268, "percentage": 57.83, "elapsed_time": "3:25:27", "remaining_time": "2:29:49"} +{"current_steps": 5017, "total_steps": 8674, "loss": 0.4288882613182068, "lr": 8.247667343304568e-07, "epoch": 1.1567904081162093, "percentage": 57.84, "elapsed_time": "3:25:29", "remaining_time": "2:29:47"} +{"current_steps": 5018, "total_steps": 8674, "loss": 0.42711886763572693, "lr": 8.243913851078994e-07, "epoch": 1.157020982245792, "percentage": 57.85, "elapsed_time": "3:25:32", "remaining_time": "2:29:44"} +{"current_steps": 5019, "total_steps": 8674, "loss": 0.515809953212738, "lr": 8.240160614118342e-07, "epoch": 1.1572515563753747, "percentage": 57.86, "elapsed_time": "3:25:34", "remaining_time": "2:29:42"} +{"current_steps": 5020, "total_steps": 8674, "loss": 0.5754632949829102, "lr": 8.236407632968182e-07, "epoch": 1.1574821305049574, "percentage": 57.87, "elapsed_time": "3:25:36", "remaining_time": "2:29:39"} +{"current_steps": 5021, "total_steps": 8674, "loss": 0.4601830244064331, "lr": 8.232654908174038e-07, "epoch": 1.1577127046345401, "percentage": 57.89, "elapsed_time": "3:25:39", "remaining_time": "2:29:37"} +{"current_steps": 5022, "total_steps": 8674, "loss": 0.4740797281265259, "lr": 8.228902440281422e-07, "epoch": 1.1579432787641226, "percentage": 57.9, "elapsed_time": "3:25:41", "remaining_time": "2:29:35"} +{"current_steps": 5023, "total_steps": 8674, "loss": 0.4066367745399475, "lr": 8.225150229835781e-07, "epoch": 1.1581738528937053, "percentage": 57.91, "elapsed_time": "3:25:44", "remaining_time": "2:29:32"} +{"current_steps": 5024, "total_steps": 8674, "loss": 0.4664362668991089, "lr": 8.221398277382546e-07, "epoch": 1.158404427023288, "percentage": 57.92, "elapsed_time": "3:25:46", "remaining_time": "2:29:30"} +{"current_steps": 5025, "total_steps": 8674, "loss": 0.5204637050628662, "lr": 8.217646583467093e-07, "epoch": 1.1586350011528705, "percentage": 57.93, "elapsed_time": "3:25:49", "remaining_time": "2:29:27"} +{"current_steps": 5026, "total_steps": 8674, "loss": 0.4991419017314911, "lr": 8.213895148634775e-07, "epoch": 1.1588655752824533, "percentage": 57.94, "elapsed_time": "3:25:51", "remaining_time": "2:29:25"} +{"current_steps": 5027, "total_steps": 8674, "loss": 0.40420424938201904, "lr": 8.210143973430896e-07, "epoch": 1.159096149412036, "percentage": 57.95, "elapsed_time": "3:25:54", "remaining_time": "2:29:22"} +{"current_steps": 5028, "total_steps": 8674, "loss": 0.523331880569458, "lr": 8.206393058400736e-07, "epoch": 1.1593267235416187, "percentage": 57.97, "elapsed_time": "3:25:56", "remaining_time": "2:29:20"} +{"current_steps": 5029, "total_steps": 8674, "loss": 0.5019216537475586, "lr": 8.202642404089516e-07, "epoch": 1.1595572976712014, "percentage": 57.98, "elapsed_time": "3:25:59", "remaining_time": "2:29:18"} +{"current_steps": 5030, "total_steps": 8674, "loss": 0.522672712802887, "lr": 8.198892011042442e-07, "epoch": 1.159787871800784, "percentage": 57.99, "elapsed_time": "3:26:01", "remaining_time": "2:29:15"} +{"current_steps": 5031, "total_steps": 8674, "loss": 0.418377548456192, "lr": 8.195141879804668e-07, "epoch": 1.1600184459303666, "percentage": 58.0, "elapsed_time": "3:26:04", "remaining_time": "2:29:13"} +{"current_steps": 5032, "total_steps": 8674, "loss": 0.4914432764053345, "lr": 8.191392010921312e-07, "epoch": 1.1602490200599493, "percentage": 58.01, "elapsed_time": "3:26:06", "remaining_time": "2:29:10"} +{"current_steps": 5033, "total_steps": 8674, "loss": 0.42149683833122253, "lr": 8.187642404937459e-07, "epoch": 1.1604795941895318, "percentage": 58.02, "elapsed_time": "3:26:09", "remaining_time": "2:29:08"} +{"current_steps": 5034, "total_steps": 8674, "loss": 0.5637058019638062, "lr": 8.183893062398145e-07, "epoch": 1.1607101683191146, "percentage": 58.04, "elapsed_time": "3:26:11", "remaining_time": "2:29:05"} +{"current_steps": 5035, "total_steps": 8674, "loss": 0.49930211901664734, "lr": 8.180143983848387e-07, "epoch": 1.1609407424486973, "percentage": 58.05, "elapsed_time": "3:26:14", "remaining_time": "2:29:03"} +{"current_steps": 5036, "total_steps": 8674, "loss": 0.4217071235179901, "lr": 8.176395169833139e-07, "epoch": 1.16117131657828, "percentage": 58.06, "elapsed_time": "3:26:16", "remaining_time": "2:29:01"} +{"current_steps": 5037, "total_steps": 8674, "loss": 0.4208733141422272, "lr": 8.172646620897336e-07, "epoch": 1.1614018907078625, "percentage": 58.07, "elapsed_time": "3:26:19", "remaining_time": "2:28:58"} +{"current_steps": 5038, "total_steps": 8674, "loss": 0.42970529198646545, "lr": 8.168898337585866e-07, "epoch": 1.1616324648374452, "percentage": 58.08, "elapsed_time": "3:26:21", "remaining_time": "2:28:56"} +{"current_steps": 5039, "total_steps": 8674, "loss": 0.49482622742652893, "lr": 8.165150320443584e-07, "epoch": 1.161863038967028, "percentage": 58.09, "elapsed_time": "3:26:24", "remaining_time": "2:28:53"} +{"current_steps": 5040, "total_steps": 8674, "loss": 0.4106384217739105, "lr": 8.161402570015297e-07, "epoch": 1.1620936130966106, "percentage": 58.1, "elapsed_time": "3:26:26", "remaining_time": "2:28:51"} +{"current_steps": 5041, "total_steps": 8674, "loss": 0.4550397992134094, "lr": 8.157655086845778e-07, "epoch": 1.1623241872261931, "percentage": 58.12, "elapsed_time": "3:26:29", "remaining_time": "2:28:48"} +{"current_steps": 5042, "total_steps": 8674, "loss": 0.5144504308700562, "lr": 8.153907871479768e-07, "epoch": 1.1625547613557758, "percentage": 58.13, "elapsed_time": "3:26:31", "remaining_time": "2:28:46"} +{"current_steps": 5043, "total_steps": 8674, "loss": 0.3970009684562683, "lr": 8.150160924461953e-07, "epoch": 1.1627853354853586, "percentage": 58.14, "elapsed_time": "3:26:33", "remaining_time": "2:28:43"} +{"current_steps": 5044, "total_steps": 8674, "loss": 0.45825856924057007, "lr": 8.146414246336998e-07, "epoch": 1.1630159096149413, "percentage": 58.15, "elapsed_time": "3:26:36", "remaining_time": "2:28:41"} +{"current_steps": 5045, "total_steps": 8674, "loss": 0.4515247344970703, "lr": 8.142667837649515e-07, "epoch": 1.1632464837445238, "percentage": 58.16, "elapsed_time": "3:26:38", "remaining_time": "2:28:38"} +{"current_steps": 5046, "total_steps": 8674, "loss": 0.41265833377838135, "lr": 8.13892169894409e-07, "epoch": 1.1634770578741065, "percentage": 58.17, "elapsed_time": "3:26:41", "remaining_time": "2:28:36"} +{"current_steps": 5047, "total_steps": 8674, "loss": 0.39820557832717896, "lr": 8.135175830765254e-07, "epoch": 1.1637076320036892, "percentage": 58.19, "elapsed_time": "3:26:43", "remaining_time": "2:28:33"} +{"current_steps": 5048, "total_steps": 8674, "loss": 0.41528987884521484, "lr": 8.131430233657514e-07, "epoch": 1.163938206133272, "percentage": 58.2, "elapsed_time": "3:26:46", "remaining_time": "2:28:31"} +{"current_steps": 5049, "total_steps": 8674, "loss": 0.4453636407852173, "lr": 8.127684908165323e-07, "epoch": 1.1641687802628544, "percentage": 58.21, "elapsed_time": "3:26:48", "remaining_time": "2:28:28"} +{"current_steps": 5050, "total_steps": 8674, "loss": 0.45008519291877747, "lr": 8.123939854833107e-07, "epoch": 1.1643993543924371, "percentage": 58.22, "elapsed_time": "3:26:50", "remaining_time": "2:28:26"} +{"current_steps": 5051, "total_steps": 8674, "loss": 0.456550657749176, "lr": 8.120195074205249e-07, "epoch": 1.1646299285220199, "percentage": 58.23, "elapsed_time": "3:26:53", "remaining_time": "2:28:23"} +{"current_steps": 5052, "total_steps": 8674, "loss": 0.44465887546539307, "lr": 8.116450566826086e-07, "epoch": 1.1648605026516026, "percentage": 58.24, "elapsed_time": "3:26:55", "remaining_time": "2:28:21"} +{"current_steps": 5053, "total_steps": 8674, "loss": 0.4769172668457031, "lr": 8.112706333239923e-07, "epoch": 1.165091076781185, "percentage": 58.25, "elapsed_time": "3:26:58", "remaining_time": "2:28:18"} +{"current_steps": 5054, "total_steps": 8674, "loss": 0.42662739753723145, "lr": 8.108962373991019e-07, "epoch": 1.1653216509107678, "percentage": 58.27, "elapsed_time": "3:27:00", "remaining_time": "2:28:16"} +{"current_steps": 5055, "total_steps": 8674, "loss": 0.4923250079154968, "lr": 8.105218689623603e-07, "epoch": 1.1655522250403505, "percentage": 58.28, "elapsed_time": "3:27:02", "remaining_time": "2:28:13"} +{"current_steps": 5056, "total_steps": 8674, "loss": 0.42462587356567383, "lr": 8.10147528068185e-07, "epoch": 1.1657827991699332, "percentage": 58.29, "elapsed_time": "3:27:05", "remaining_time": "2:28:11"} +{"current_steps": 5057, "total_steps": 8674, "loss": 0.47610223293304443, "lr": 8.097732147709908e-07, "epoch": 1.1660133732995157, "percentage": 58.3, "elapsed_time": "3:27:07", "remaining_time": "2:28:08"} +{"current_steps": 5058, "total_steps": 8674, "loss": 0.47519630193710327, "lr": 8.093989291251875e-07, "epoch": 1.1662439474290984, "percentage": 58.31, "elapsed_time": "3:27:10", "remaining_time": "2:28:06"} +{"current_steps": 5059, "total_steps": 8674, "loss": 0.38865840435028076, "lr": 8.090246711851819e-07, "epoch": 1.1664745215586811, "percentage": 58.32, "elapsed_time": "3:27:12", "remaining_time": "2:28:03"} +{"current_steps": 5060, "total_steps": 8674, "loss": 0.39990776777267456, "lr": 8.086504410053757e-07, "epoch": 1.1667050956882639, "percentage": 58.34, "elapsed_time": "3:27:14", "remaining_time": "2:28:01"} +{"current_steps": 5061, "total_steps": 8674, "loss": 0.4330836534500122, "lr": 8.082762386401669e-07, "epoch": 1.1669356698178464, "percentage": 58.35, "elapsed_time": "3:27:17", "remaining_time": "2:27:58"} +{"current_steps": 5062, "total_steps": 8674, "loss": 0.4285934865474701, "lr": 8.079020641439504e-07, "epoch": 1.167166243947429, "percentage": 58.36, "elapsed_time": "3:27:19", "remaining_time": "2:27:56"} +{"current_steps": 5063, "total_steps": 8674, "loss": 0.3900645077228546, "lr": 8.075279175711152e-07, "epoch": 1.1673968180770118, "percentage": 58.37, "elapsed_time": "3:27:22", "remaining_time": "2:27:54"} +{"current_steps": 5064, "total_steps": 8674, "loss": 0.48145759105682373, "lr": 8.07153798976048e-07, "epoch": 1.1676273922065945, "percentage": 58.38, "elapsed_time": "3:27:24", "remaining_time": "2:27:51"} +{"current_steps": 5065, "total_steps": 8674, "loss": 0.4239045977592468, "lr": 8.067797084131305e-07, "epoch": 1.167857966336177, "percentage": 58.39, "elapsed_time": "3:27:27", "remaining_time": "2:27:49"} +{"current_steps": 5066, "total_steps": 8674, "loss": 0.55517578125, "lr": 8.064056459367409e-07, "epoch": 1.1680885404657597, "percentage": 58.4, "elapsed_time": "3:27:29", "remaining_time": "2:27:46"} +{"current_steps": 5067, "total_steps": 8674, "loss": 0.4956046938896179, "lr": 8.060316116012524e-07, "epoch": 1.1683191145953424, "percentage": 58.42, "elapsed_time": "3:27:32", "remaining_time": "2:27:44"} +{"current_steps": 5068, "total_steps": 8674, "loss": 0.4051878750324249, "lr": 8.05657605461035e-07, "epoch": 1.1685496887249252, "percentage": 58.43, "elapsed_time": "3:27:34", "remaining_time": "2:27:41"} +{"current_steps": 5069, "total_steps": 8674, "loss": 0.47389912605285645, "lr": 8.052836275704541e-07, "epoch": 1.1687802628545076, "percentage": 58.44, "elapsed_time": "3:27:37", "remaining_time": "2:27:39"} +{"current_steps": 5070, "total_steps": 8674, "loss": 0.5023842453956604, "lr": 8.049096779838717e-07, "epoch": 1.1690108369840904, "percentage": 58.45, "elapsed_time": "3:27:39", "remaining_time": "2:27:36"} +{"current_steps": 5071, "total_steps": 8674, "loss": 0.4895137548446655, "lr": 8.045357567556449e-07, "epoch": 1.169241411113673, "percentage": 58.46, "elapsed_time": "3:27:42", "remaining_time": "2:27:34"} +{"current_steps": 5072, "total_steps": 8674, "loss": 0.47874224185943604, "lr": 8.041618639401264e-07, "epoch": 1.1694719852432558, "percentage": 58.47, "elapsed_time": "3:27:44", "remaining_time": "2:27:32"} +{"current_steps": 5073, "total_steps": 8674, "loss": 0.4784395694732666, "lr": 8.037879995916659e-07, "epoch": 1.1697025593728383, "percentage": 58.49, "elapsed_time": "3:27:47", "remaining_time": "2:27:29"} +{"current_steps": 5074, "total_steps": 8674, "loss": 0.45289772748947144, "lr": 8.034141637646079e-07, "epoch": 1.169933133502421, "percentage": 58.5, "elapsed_time": "3:27:49", "remaining_time": "2:27:27"} +{"current_steps": 5075, "total_steps": 8674, "loss": 0.5375204682350159, "lr": 8.030403565132942e-07, "epoch": 1.1701637076320037, "percentage": 58.51, "elapsed_time": "3:27:51", "remaining_time": "2:27:24"} +{"current_steps": 5076, "total_steps": 8674, "loss": 0.45003899931907654, "lr": 8.026665778920602e-07, "epoch": 1.1703942817615864, "percentage": 58.52, "elapsed_time": "3:27:54", "remaining_time": "2:27:22"} +{"current_steps": 5077, "total_steps": 8674, "loss": 0.4236389994621277, "lr": 8.022928279552392e-07, "epoch": 1.170624855891169, "percentage": 58.53, "elapsed_time": "3:27:56", "remaining_time": "2:27:19"} +{"current_steps": 5078, "total_steps": 8674, "loss": 0.43182557821273804, "lr": 8.019191067571592e-07, "epoch": 1.1708554300207517, "percentage": 58.54, "elapsed_time": "3:27:59", "remaining_time": "2:27:17"} +{"current_steps": 5079, "total_steps": 8674, "loss": 0.5171953439712524, "lr": 8.01545414352145e-07, "epoch": 1.1710860041503344, "percentage": 58.55, "elapsed_time": "3:28:01", "remaining_time": "2:27:14"} +{"current_steps": 5080, "total_steps": 8674, "loss": 0.5084770321846008, "lr": 8.011717507945157e-07, "epoch": 1.171316578279917, "percentage": 58.57, "elapsed_time": "3:28:04", "remaining_time": "2:27:12"} +{"current_steps": 5081, "total_steps": 8674, "loss": 0.4685532748699188, "lr": 8.007981161385876e-07, "epoch": 1.1715471524094996, "percentage": 58.58, "elapsed_time": "3:28:06", "remaining_time": "2:27:09"} +{"current_steps": 5082, "total_steps": 8674, "loss": 0.4647448658943176, "lr": 8.004245104386724e-07, "epoch": 1.1717777265390823, "percentage": 58.59, "elapsed_time": "3:28:09", "remaining_time": "2:27:07"} +{"current_steps": 5083, "total_steps": 8674, "loss": 0.4038098454475403, "lr": 8.000509337490768e-07, "epoch": 1.172008300668665, "percentage": 58.6, "elapsed_time": "3:28:11", "remaining_time": "2:27:04"} +{"current_steps": 5084, "total_steps": 8674, "loss": 0.4153759479522705, "lr": 7.996773861241047e-07, "epoch": 1.1722388747982477, "percentage": 58.61, "elapsed_time": "3:28:14", "remaining_time": "2:27:02"} +{"current_steps": 5085, "total_steps": 8674, "loss": 0.4569447636604309, "lr": 7.993038676180545e-07, "epoch": 1.1724694489278302, "percentage": 58.62, "elapsed_time": "3:28:16", "remaining_time": "2:26:59"} +{"current_steps": 5086, "total_steps": 8674, "loss": 0.4419426918029785, "lr": 7.989303782852215e-07, "epoch": 1.172700023057413, "percentage": 58.64, "elapsed_time": "3:28:18", "remaining_time": "2:26:57"} +{"current_steps": 5087, "total_steps": 8674, "loss": 0.3902894854545593, "lr": 7.985569181798955e-07, "epoch": 1.1729305971869957, "percentage": 58.65, "elapsed_time": "3:28:21", "remaining_time": "2:26:55"} +{"current_steps": 5088, "total_steps": 8674, "loss": 0.4066358208656311, "lr": 7.981834873563631e-07, "epoch": 1.1731611713165782, "percentage": 58.66, "elapsed_time": "3:28:23", "remaining_time": "2:26:52"} +{"current_steps": 5089, "total_steps": 8674, "loss": 0.4589639902114868, "lr": 7.978100858689059e-07, "epoch": 1.1733917454461609, "percentage": 58.67, "elapsed_time": "3:28:26", "remaining_time": "2:26:50"} +{"current_steps": 5090, "total_steps": 8674, "loss": 0.4431188106536865, "lr": 7.974367137718024e-07, "epoch": 1.1736223195757436, "percentage": 58.68, "elapsed_time": "3:28:28", "remaining_time": "2:26:47"} +{"current_steps": 5091, "total_steps": 8674, "loss": 0.43412742018699646, "lr": 7.970633711193252e-07, "epoch": 1.1738528937053263, "percentage": 58.69, "elapsed_time": "3:28:31", "remaining_time": "2:26:45"} +{"current_steps": 5092, "total_steps": 8674, "loss": 0.40296387672424316, "lr": 7.966900579657435e-07, "epoch": 1.174083467834909, "percentage": 58.7, "elapsed_time": "3:28:33", "remaining_time": "2:26:42"} +{"current_steps": 5093, "total_steps": 8674, "loss": 0.4814741611480713, "lr": 7.963167743653228e-07, "epoch": 1.1743140419644915, "percentage": 58.72, "elapsed_time": "3:28:36", "remaining_time": "2:26:40"} +{"current_steps": 5094, "total_steps": 8674, "loss": 0.4412423372268677, "lr": 7.959435203723228e-07, "epoch": 1.1745446160940742, "percentage": 58.73, "elapsed_time": "3:28:38", "remaining_time": "2:26:37"} +{"current_steps": 5095, "total_steps": 8674, "loss": 0.49773266911506653, "lr": 7.955702960410006e-07, "epoch": 1.174775190223657, "percentage": 58.74, "elapsed_time": "3:28:40", "remaining_time": "2:26:35"} +{"current_steps": 5096, "total_steps": 8674, "loss": 0.4657529592514038, "lr": 7.951971014256073e-07, "epoch": 1.1750057643532394, "percentage": 58.75, "elapsed_time": "3:28:43", "remaining_time": "2:26:32"} +{"current_steps": 5097, "total_steps": 8674, "loss": 0.4062782824039459, "lr": 7.94823936580391e-07, "epoch": 1.1752363384828222, "percentage": 58.76, "elapsed_time": "3:28:46", "remaining_time": "2:26:30"} +{"current_steps": 5098, "total_steps": 8674, "loss": 0.4154980182647705, "lr": 7.944508015595948e-07, "epoch": 1.1754669126124049, "percentage": 58.77, "elapsed_time": "3:28:48", "remaining_time": "2:26:28"} +{"current_steps": 5099, "total_steps": 8674, "loss": 0.4724680185317993, "lr": 7.940776964174582e-07, "epoch": 1.1756974867419876, "percentage": 58.78, "elapsed_time": "3:28:50", "remaining_time": "2:26:25"} +{"current_steps": 5100, "total_steps": 8674, "loss": 0.48808538913726807, "lr": 7.937046212082149e-07, "epoch": 1.1759280608715703, "percentage": 58.8, "elapsed_time": "3:28:53", "remaining_time": "2:26:23"} +{"current_steps": 5101, "total_steps": 8674, "loss": 0.4985845983028412, "lr": 7.933315759860959e-07, "epoch": 1.1761586350011528, "percentage": 58.81, "elapsed_time": "3:28:57", "remaining_time": "2:26:21"} +{"current_steps": 5102, "total_steps": 8674, "loss": 0.3735587000846863, "lr": 7.92958560805327e-07, "epoch": 1.1763892091307355, "percentage": 58.82, "elapsed_time": "3:28:59", "remaining_time": "2:26:19"} +{"current_steps": 5103, "total_steps": 8674, "loss": 0.4198414385318756, "lr": 7.925855757201294e-07, "epoch": 1.1766197832603182, "percentage": 58.83, "elapsed_time": "3:29:02", "remaining_time": "2:26:16"} +{"current_steps": 5104, "total_steps": 8674, "loss": 0.41973787546157837, "lr": 7.922126207847204e-07, "epoch": 1.1768503573899007, "percentage": 58.84, "elapsed_time": "3:29:04", "remaining_time": "2:26:14"} +{"current_steps": 5105, "total_steps": 8674, "loss": 0.5179545283317566, "lr": 7.918396960533128e-07, "epoch": 1.1770809315194835, "percentage": 58.85, "elapsed_time": "3:29:07", "remaining_time": "2:26:11"} +{"current_steps": 5106, "total_steps": 8674, "loss": 0.4917227625846863, "lr": 7.914668015801153e-07, "epoch": 1.1773115056490662, "percentage": 58.87, "elapsed_time": "3:29:09", "remaining_time": "2:26:09"} +{"current_steps": 5107, "total_steps": 8674, "loss": 0.41775548458099365, "lr": 7.910939374193312e-07, "epoch": 1.1775420797786489, "percentage": 58.88, "elapsed_time": "3:29:12", "remaining_time": "2:26:07"} +{"current_steps": 5108, "total_steps": 8674, "loss": 0.45468997955322266, "lr": 7.907211036251608e-07, "epoch": 1.1777726539082316, "percentage": 58.89, "elapsed_time": "3:29:14", "remaining_time": "2:26:04"} +{"current_steps": 5109, "total_steps": 8674, "loss": 0.3749620318412781, "lr": 7.903483002517988e-07, "epoch": 1.178003228037814, "percentage": 58.9, "elapsed_time": "3:29:16", "remaining_time": "2:26:02"} +{"current_steps": 5110, "total_steps": 8674, "loss": 0.48940956592559814, "lr": 7.899755273534365e-07, "epoch": 1.1782338021673968, "percentage": 58.91, "elapsed_time": "3:29:19", "remaining_time": "2:25:59"} +{"current_steps": 5111, "total_steps": 8674, "loss": 0.4561386704444885, "lr": 7.896027849842594e-07, "epoch": 1.1784643762969795, "percentage": 58.92, "elapsed_time": "3:29:21", "remaining_time": "2:25:57"} +{"current_steps": 5112, "total_steps": 8674, "loss": 0.441898375749588, "lr": 7.892300731984498e-07, "epoch": 1.178694950426562, "percentage": 58.93, "elapsed_time": "3:29:24", "remaining_time": "2:25:54"} +{"current_steps": 5113, "total_steps": 8674, "loss": 0.43445056676864624, "lr": 7.888573920501856e-07, "epoch": 1.1789255245561447, "percentage": 58.95, "elapsed_time": "3:29:26", "remaining_time": "2:25:52"} +{"current_steps": 5114, "total_steps": 8674, "loss": 0.42653167247772217, "lr": 7.884847415936389e-07, "epoch": 1.1791560986857275, "percentage": 58.96, "elapsed_time": "3:29:29", "remaining_time": "2:25:49"} +{"current_steps": 5115, "total_steps": 8674, "loss": 0.42003321647644043, "lr": 7.881121218829787e-07, "epoch": 1.1793866728153102, "percentage": 58.97, "elapsed_time": "3:29:31", "remaining_time": "2:25:47"} +{"current_steps": 5116, "total_steps": 8674, "loss": 0.4920128881931305, "lr": 7.87739532972369e-07, "epoch": 1.179617246944893, "percentage": 58.98, "elapsed_time": "3:29:34", "remaining_time": "2:25:45"} +{"current_steps": 5117, "total_steps": 8674, "loss": 0.49529707431793213, "lr": 7.873669749159697e-07, "epoch": 1.1798478210744754, "percentage": 58.99, "elapsed_time": "3:29:36", "remaining_time": "2:25:42"} +{"current_steps": 5118, "total_steps": 8674, "loss": 0.4813005328178406, "lr": 7.869944477679351e-07, "epoch": 1.180078395204058, "percentage": 59.0, "elapsed_time": "3:29:39", "remaining_time": "2:25:40"} +{"current_steps": 5119, "total_steps": 8674, "loss": 0.47239556908607483, "lr": 7.866219515824168e-07, "epoch": 1.1803089693336408, "percentage": 59.02, "elapsed_time": "3:29:41", "remaining_time": "2:25:37"} +{"current_steps": 5120, "total_steps": 8674, "loss": 0.4808405935764313, "lr": 7.862494864135596e-07, "epoch": 1.1805395434632233, "percentage": 59.03, "elapsed_time": "3:29:44", "remaining_time": "2:25:35"} +{"current_steps": 5121, "total_steps": 8674, "loss": 0.44946521520614624, "lr": 7.858770523155066e-07, "epoch": 1.180770117592806, "percentage": 59.04, "elapsed_time": "3:29:46", "remaining_time": "2:25:32"} +{"current_steps": 5122, "total_steps": 8674, "loss": 0.5344874858856201, "lr": 7.85504649342394e-07, "epoch": 1.1810006917223888, "percentage": 59.05, "elapsed_time": "3:29:49", "remaining_time": "2:25:30"} +{"current_steps": 5123, "total_steps": 8674, "loss": 0.49354079365730286, "lr": 7.851322775483542e-07, "epoch": 1.1812312658519715, "percentage": 59.06, "elapsed_time": "3:29:51", "remaining_time": "2:25:27"} +{"current_steps": 5124, "total_steps": 8674, "loss": 0.414085328578949, "lr": 7.847599369875155e-07, "epoch": 1.1814618399815542, "percentage": 59.07, "elapsed_time": "3:29:54", "remaining_time": "2:25:25"} +{"current_steps": 5125, "total_steps": 8674, "loss": 0.4638150632381439, "lr": 7.843876277140013e-07, "epoch": 1.1816924141111367, "percentage": 59.08, "elapsed_time": "3:29:56", "remaining_time": "2:25:23"} +{"current_steps": 5126, "total_steps": 8674, "loss": 0.39239877462387085, "lr": 7.84015349781931e-07, "epoch": 1.1819229882407194, "percentage": 59.1, "elapsed_time": "3:29:59", "remaining_time": "2:25:20"} +{"current_steps": 5127, "total_steps": 8674, "loss": 0.46846455335617065, "lr": 7.83643103245418e-07, "epoch": 1.1821535623703021, "percentage": 59.11, "elapsed_time": "3:30:01", "remaining_time": "2:25:18"} +{"current_steps": 5128, "total_steps": 8674, "loss": 0.5257229804992676, "lr": 7.832708881585729e-07, "epoch": 1.1823841364998846, "percentage": 59.12, "elapsed_time": "3:30:04", "remaining_time": "2:25:15"} +{"current_steps": 5129, "total_steps": 8674, "loss": 0.3858698904514313, "lr": 7.828987045755006e-07, "epoch": 1.1826147106294673, "percentage": 59.13, "elapsed_time": "3:30:06", "remaining_time": "2:25:13"} +{"current_steps": 5130, "total_steps": 8674, "loss": 0.48664575815200806, "lr": 7.82526552550302e-07, "epoch": 1.18284528475905, "percentage": 59.14, "elapsed_time": "3:30:09", "remaining_time": "2:25:10"} +{"current_steps": 5131, "total_steps": 8674, "loss": 0.5246836543083191, "lr": 7.821544321370731e-07, "epoch": 1.1830758588886328, "percentage": 59.15, "elapsed_time": "3:30:11", "remaining_time": "2:25:08"} +{"current_steps": 5132, "total_steps": 8674, "loss": 0.5538516640663147, "lr": 7.817823433899049e-07, "epoch": 1.1833064330182155, "percentage": 59.17, "elapsed_time": "3:30:14", "remaining_time": "2:25:06"} +{"current_steps": 5133, "total_steps": 8674, "loss": 0.4563618302345276, "lr": 7.814102863628852e-07, "epoch": 1.183537007147798, "percentage": 59.18, "elapsed_time": "3:30:16", "remaining_time": "2:25:03"} +{"current_steps": 5134, "total_steps": 8674, "loss": 0.48093757033348083, "lr": 7.810382611100952e-07, "epoch": 1.1837675812773807, "percentage": 59.19, "elapsed_time": "3:30:19", "remaining_time": "2:25:01"} +{"current_steps": 5135, "total_steps": 8674, "loss": 0.41152772307395935, "lr": 7.806662676856133e-07, "epoch": 1.1839981554069634, "percentage": 59.2, "elapsed_time": "3:30:21", "remaining_time": "2:24:58"} +{"current_steps": 5136, "total_steps": 8674, "loss": 0.4429926574230194, "lr": 7.802943061435121e-07, "epoch": 1.184228729536546, "percentage": 59.21, "elapsed_time": "3:30:23", "remaining_time": "2:24:56"} +{"current_steps": 5137, "total_steps": 8674, "loss": 0.5795058012008667, "lr": 7.799223765378604e-07, "epoch": 1.1844593036661286, "percentage": 59.22, "elapsed_time": "3:30:26", "remaining_time": "2:24:53"} +{"current_steps": 5138, "total_steps": 8674, "loss": 0.43219637870788574, "lr": 7.795504789227214e-07, "epoch": 1.1846898777957113, "percentage": 59.23, "elapsed_time": "3:30:28", "remaining_time": "2:24:51"} +{"current_steps": 5139, "total_steps": 8674, "loss": 0.472915917634964, "lr": 7.791786133521547e-07, "epoch": 1.184920451925294, "percentage": 59.25, "elapsed_time": "3:30:31", "remaining_time": "2:24:48"} +{"current_steps": 5140, "total_steps": 8674, "loss": 0.609251081943512, "lr": 7.788067798802144e-07, "epoch": 1.1851510260548768, "percentage": 59.26, "elapsed_time": "3:30:33", "remaining_time": "2:24:46"} +{"current_steps": 5141, "total_steps": 8674, "loss": 0.5051882266998291, "lr": 7.784349785609506e-07, "epoch": 1.1853816001844593, "percentage": 59.27, "elapsed_time": "3:30:36", "remaining_time": "2:24:44"} +{"current_steps": 5142, "total_steps": 8674, "loss": 0.5062044858932495, "lr": 7.780632094484081e-07, "epoch": 1.185612174314042, "percentage": 59.28, "elapsed_time": "3:30:38", "remaining_time": "2:24:41"} +{"current_steps": 5143, "total_steps": 8674, "loss": 0.48717936873435974, "lr": 7.77691472596627e-07, "epoch": 1.1858427484436247, "percentage": 59.29, "elapsed_time": "3:30:41", "remaining_time": "2:24:39"} +{"current_steps": 5144, "total_steps": 8674, "loss": 0.4755759537220001, "lr": 7.773197680596439e-07, "epoch": 1.1860733225732072, "percentage": 59.3, "elapsed_time": "3:30:43", "remaining_time": "2:24:36"} +{"current_steps": 5145, "total_steps": 8674, "loss": 0.4549487829208374, "lr": 7.769480958914889e-07, "epoch": 1.18630389670279, "percentage": 59.32, "elapsed_time": "3:30:46", "remaining_time": "2:24:34"} +{"current_steps": 5146, "total_steps": 8674, "loss": 0.39759546518325806, "lr": 7.765764561461891e-07, "epoch": 1.1865344708323726, "percentage": 59.33, "elapsed_time": "3:30:48", "remaining_time": "2:24:31"} +{"current_steps": 5147, "total_steps": 8674, "loss": 0.5151915550231934, "lr": 7.762048488777654e-07, "epoch": 1.1867650449619553, "percentage": 59.34, "elapsed_time": "3:30:51", "remaining_time": "2:24:29"} +{"current_steps": 5148, "total_steps": 8674, "loss": 0.4555166959762573, "lr": 7.758332741402351e-07, "epoch": 1.1869956190915378, "percentage": 59.35, "elapsed_time": "3:30:53", "remaining_time": "2:24:26"} +{"current_steps": 5149, "total_steps": 8674, "loss": 0.3639993667602539, "lr": 7.754617319876102e-07, "epoch": 1.1872261932211206, "percentage": 59.36, "elapsed_time": "3:30:56", "remaining_time": "2:24:24"} +{"current_steps": 5150, "total_steps": 8674, "loss": 0.4158916473388672, "lr": 7.750902224738984e-07, "epoch": 1.1874567673507033, "percentage": 59.37, "elapsed_time": "3:30:58", "remaining_time": "2:24:21"} +{"current_steps": 5151, "total_steps": 8674, "loss": 0.44933754205703735, "lr": 7.747187456531021e-07, "epoch": 1.187687341480286, "percentage": 59.38, "elapsed_time": "3:31:00", "remaining_time": "2:24:19"} +{"current_steps": 5152, "total_steps": 8674, "loss": 0.35436397790908813, "lr": 7.74347301579219e-07, "epoch": 1.1879179156098685, "percentage": 59.4, "elapsed_time": "3:31:03", "remaining_time": "2:24:16"} +{"current_steps": 5153, "total_steps": 8674, "loss": 0.40650928020477295, "lr": 7.73975890306243e-07, "epoch": 1.1881484897394512, "percentage": 59.41, "elapsed_time": "3:31:05", "remaining_time": "2:24:14"} +{"current_steps": 5154, "total_steps": 8674, "loss": 0.424211710691452, "lr": 7.736045118881615e-07, "epoch": 1.188379063869034, "percentage": 59.42, "elapsed_time": "3:31:08", "remaining_time": "2:24:12"} +{"current_steps": 5155, "total_steps": 8674, "loss": 0.38909512758255005, "lr": 7.73233166378959e-07, "epoch": 1.1886096379986166, "percentage": 59.43, "elapsed_time": "3:31:10", "remaining_time": "2:24:09"} +{"current_steps": 5156, "total_steps": 8674, "loss": 0.4452083110809326, "lr": 7.728618538326139e-07, "epoch": 1.1888402121281991, "percentage": 59.44, "elapsed_time": "3:31:13", "remaining_time": "2:24:07"} +{"current_steps": 5157, "total_steps": 8674, "loss": 0.45061540603637695, "lr": 7.724905743031005e-07, "epoch": 1.1890707862577818, "percentage": 59.45, "elapsed_time": "3:31:16", "remaining_time": "2:24:05"} +{"current_steps": 5158, "total_steps": 8674, "loss": 0.5301374197006226, "lr": 7.721193278443875e-07, "epoch": 1.1893013603873646, "percentage": 59.47, "elapsed_time": "3:31:18", "remaining_time": "2:24:02"} +{"current_steps": 5159, "total_steps": 8674, "loss": 0.4386521577835083, "lr": 7.717481145104398e-07, "epoch": 1.1895319345169473, "percentage": 59.48, "elapsed_time": "3:31:21", "remaining_time": "2:24:00"} +{"current_steps": 5160, "total_steps": 8674, "loss": 0.447623074054718, "lr": 7.713769343552169e-07, "epoch": 1.1897625086465298, "percentage": 59.49, "elapsed_time": "3:31:23", "remaining_time": "2:23:57"} +{"current_steps": 5161, "total_steps": 8674, "loss": 0.44326454401016235, "lr": 7.71005787432674e-07, "epoch": 1.1899930827761125, "percentage": 59.5, "elapsed_time": "3:31:25", "remaining_time": "2:23:54"} +{"current_steps": 5162, "total_steps": 8674, "loss": 0.564007043838501, "lr": 7.706346737967603e-07, "epoch": 1.1902236569056952, "percentage": 59.51, "elapsed_time": "3:31:28", "remaining_time": "2:23:52"} +{"current_steps": 5163, "total_steps": 8674, "loss": 0.5338540077209473, "lr": 7.702635935014213e-07, "epoch": 1.190454231035278, "percentage": 59.52, "elapsed_time": "3:31:30", "remaining_time": "2:23:49"} +{"current_steps": 5164, "total_steps": 8674, "loss": 0.45307862758636475, "lr": 7.698925466005977e-07, "epoch": 1.1906848051648604, "percentage": 59.53, "elapsed_time": "3:31:33", "remaining_time": "2:23:47"} +{"current_steps": 5165, "total_steps": 8674, "loss": 0.5383142232894897, "lr": 7.69521533148224e-07, "epoch": 1.1909153792944431, "percentage": 59.55, "elapsed_time": "3:31:35", "remaining_time": "2:23:45"} +{"current_steps": 5166, "total_steps": 8674, "loss": 0.3794770836830139, "lr": 7.691505531982316e-07, "epoch": 1.1911459534240258, "percentage": 59.56, "elapsed_time": "3:31:38", "remaining_time": "2:23:42"} +{"current_steps": 5167, "total_steps": 8674, "loss": 0.4633198082447052, "lr": 7.687796068045455e-07, "epoch": 1.1913765275536086, "percentage": 59.57, "elapsed_time": "3:31:40", "remaining_time": "2:23:40"} +{"current_steps": 5168, "total_steps": 8674, "loss": 0.5080294609069824, "lr": 7.684086940210875e-07, "epoch": 1.191607101683191, "percentage": 59.58, "elapsed_time": "3:31:43", "remaining_time": "2:23:37"} +{"current_steps": 5169, "total_steps": 8674, "loss": 0.3952289819717407, "lr": 7.680378149017724e-07, "epoch": 1.1918376758127738, "percentage": 59.59, "elapsed_time": "3:31:46", "remaining_time": "2:23:35"} +{"current_steps": 5170, "total_steps": 8674, "loss": 0.4518551528453827, "lr": 7.676669695005122e-07, "epoch": 1.1920682499423565, "percentage": 59.6, "elapsed_time": "3:31:49", "remaining_time": "2:23:33"} +{"current_steps": 5171, "total_steps": 8674, "loss": 0.4752943515777588, "lr": 7.672961578712125e-07, "epoch": 1.1922988240719392, "percentage": 59.61, "elapsed_time": "3:31:52", "remaining_time": "2:23:31"} +{"current_steps": 5172, "total_steps": 8674, "loss": 0.5059680342674255, "lr": 7.669253800677744e-07, "epoch": 1.1925293982015217, "percentage": 59.63, "elapsed_time": "3:31:55", "remaining_time": "2:23:29"} +{"current_steps": 5173, "total_steps": 8674, "loss": 0.47073960304260254, "lr": 7.665546361440949e-07, "epoch": 1.1927599723311044, "percentage": 59.64, "elapsed_time": "3:31:57", "remaining_time": "2:23:27"} +{"current_steps": 5174, "total_steps": 8674, "loss": 0.5851496458053589, "lr": 7.661839261540644e-07, "epoch": 1.1929905464606871, "percentage": 59.65, "elapsed_time": "3:32:00", "remaining_time": "2:23:24"} +{"current_steps": 5175, "total_steps": 8674, "loss": 0.44255387783050537, "lr": 7.658132501515701e-07, "epoch": 1.1932211205902699, "percentage": 59.66, "elapsed_time": "3:32:02", "remaining_time": "2:23:22"} +{"current_steps": 5176, "total_steps": 8674, "loss": 0.543785810470581, "lr": 7.654426081904931e-07, "epoch": 1.1934516947198524, "percentage": 59.67, "elapsed_time": "3:32:05", "remaining_time": "2:23:19"} +{"current_steps": 5177, "total_steps": 8674, "loss": 0.503501296043396, "lr": 7.650720003247107e-07, "epoch": 1.193682268849435, "percentage": 59.68, "elapsed_time": "3:32:07", "remaining_time": "2:23:17"} +{"current_steps": 5178, "total_steps": 8674, "loss": 0.43894368410110474, "lr": 7.647014266080935e-07, "epoch": 1.1939128429790178, "percentage": 59.7, "elapsed_time": "3:32:10", "remaining_time": "2:23:14"} +{"current_steps": 5179, "total_steps": 8674, "loss": 0.5014036297798157, "lr": 7.643308870945088e-07, "epoch": 1.1941434171086005, "percentage": 59.71, "elapsed_time": "3:32:12", "remaining_time": "2:23:12"} +{"current_steps": 5180, "total_steps": 8674, "loss": 0.4859309196472168, "lr": 7.639603818378178e-07, "epoch": 1.194373991238183, "percentage": 59.72, "elapsed_time": "3:32:14", "remaining_time": "2:23:09"} +{"current_steps": 5181, "total_steps": 8674, "loss": 0.40631920099258423, "lr": 7.635899108918781e-07, "epoch": 1.1946045653677657, "percentage": 59.73, "elapsed_time": "3:32:17", "remaining_time": "2:23:07"} +{"current_steps": 5182, "total_steps": 8674, "loss": 0.5206565856933594, "lr": 7.632194743105405e-07, "epoch": 1.1948351394973484, "percentage": 59.74, "elapsed_time": "3:32:19", "remaining_time": "2:23:05"} +{"current_steps": 5183, "total_steps": 8674, "loss": 0.5052351355552673, "lr": 7.628490721476517e-07, "epoch": 1.1950657136269311, "percentage": 59.75, "elapsed_time": "3:32:22", "remaining_time": "2:23:02"} +{"current_steps": 5184, "total_steps": 8674, "loss": 0.4921465516090393, "lr": 7.624787044570543e-07, "epoch": 1.1952962877565136, "percentage": 59.76, "elapsed_time": "3:32:24", "remaining_time": "2:23:00"} +{"current_steps": 5185, "total_steps": 8674, "loss": 0.3307859003543854, "lr": 7.621083712925839e-07, "epoch": 1.1955268618860964, "percentage": 59.78, "elapsed_time": "3:32:27", "remaining_time": "2:22:57"} +{"current_steps": 5186, "total_steps": 8674, "loss": 0.4276743531227112, "lr": 7.617380727080728e-07, "epoch": 1.195757436015679, "percentage": 59.79, "elapsed_time": "3:32:29", "remaining_time": "2:22:55"} +{"current_steps": 5187, "total_steps": 8674, "loss": 0.5065702795982361, "lr": 7.613678087573475e-07, "epoch": 1.1959880101452618, "percentage": 59.8, "elapsed_time": "3:32:32", "remaining_time": "2:22:52"} +{"current_steps": 5188, "total_steps": 8674, "loss": 0.3588709533214569, "lr": 7.609975794942301e-07, "epoch": 1.1962185842748443, "percentage": 59.81, "elapsed_time": "3:32:34", "remaining_time": "2:22:50"} +{"current_steps": 5189, "total_steps": 8674, "loss": 0.4296506941318512, "lr": 7.606273849725362e-07, "epoch": 1.196449158404427, "percentage": 59.82, "elapsed_time": "3:32:37", "remaining_time": "2:22:47"} +{"current_steps": 5190, "total_steps": 8674, "loss": 0.517792820930481, "lr": 7.602572252460782e-07, "epoch": 1.1966797325340097, "percentage": 59.83, "elapsed_time": "3:32:39", "remaining_time": "2:22:45"} +{"current_steps": 5191, "total_steps": 8674, "loss": 0.38939881324768066, "lr": 7.598871003686619e-07, "epoch": 1.1969103066635924, "percentage": 59.85, "elapsed_time": "3:32:42", "remaining_time": "2:22:42"} +{"current_steps": 5192, "total_steps": 8674, "loss": 0.5759290456771851, "lr": 7.595170103940896e-07, "epoch": 1.197140880793175, "percentage": 59.86, "elapsed_time": "3:32:44", "remaining_time": "2:22:40"} +{"current_steps": 5193, "total_steps": 8674, "loss": 0.4705851078033447, "lr": 7.591469553761569e-07, "epoch": 1.1973714549227576, "percentage": 59.87, "elapsed_time": "3:32:46", "remaining_time": "2:22:37"} +{"current_steps": 5194, "total_steps": 8674, "loss": 0.5137619972229004, "lr": 7.587769353686548e-07, "epoch": 1.1976020290523404, "percentage": 59.88, "elapsed_time": "3:32:49", "remaining_time": "2:22:35"} +{"current_steps": 5195, "total_steps": 8674, "loss": 0.43207496404647827, "lr": 7.584069504253701e-07, "epoch": 1.197832603181923, "percentage": 59.89, "elapsed_time": "3:32:51", "remaining_time": "2:22:32"} +{"current_steps": 5196, "total_steps": 8674, "loss": 0.3976139426231384, "lr": 7.580370006000835e-07, "epoch": 1.1980631773115056, "percentage": 59.9, "elapsed_time": "3:32:54", "remaining_time": "2:22:30"} +{"current_steps": 5197, "total_steps": 8674, "loss": 0.41323673725128174, "lr": 7.576670859465715e-07, "epoch": 1.1982937514410883, "percentage": 59.91, "elapsed_time": "3:32:56", "remaining_time": "2:22:28"} +{"current_steps": 5198, "total_steps": 8674, "loss": 0.404024600982666, "lr": 7.57297206518604e-07, "epoch": 1.198524325570671, "percentage": 59.93, "elapsed_time": "3:32:59", "remaining_time": "2:22:25"} +{"current_steps": 5199, "total_steps": 8674, "loss": 0.4010540843009949, "lr": 7.569273623699475e-07, "epoch": 1.1987548997002535, "percentage": 59.94, "elapsed_time": "3:33:01", "remaining_time": "2:22:23"} +{"current_steps": 5200, "total_steps": 8674, "loss": 0.44299256801605225, "lr": 7.565575535543623e-07, "epoch": 1.1989854738298362, "percentage": 59.95, "elapsed_time": "3:33:04", "remaining_time": "2:22:20"} +{"current_steps": 5201, "total_steps": 8674, "loss": 0.5217546820640564, "lr": 7.561877801256041e-07, "epoch": 1.199216047959419, "percentage": 59.96, "elapsed_time": "3:33:08", "remaining_time": "2:22:19"} +{"current_steps": 5202, "total_steps": 8674, "loss": 0.5192688703536987, "lr": 7.558180421374229e-07, "epoch": 1.1994466220890017, "percentage": 59.97, "elapsed_time": "3:33:10", "remaining_time": "2:22:16"} +{"current_steps": 5203, "total_steps": 8674, "loss": 0.38272884488105774, "lr": 7.554483396435637e-07, "epoch": 1.1996771962185844, "percentage": 59.98, "elapsed_time": "3:33:12", "remaining_time": "2:22:14"} +{"current_steps": 5204, "total_steps": 8674, "loss": 0.474464476108551, "lr": 7.550786726977673e-07, "epoch": 1.1999077703481669, "percentage": 60.0, "elapsed_time": "3:33:15", "remaining_time": "2:22:11"} +{"current_steps": 5205, "total_steps": 8674, "loss": 0.540134072303772, "lr": 7.547090413537676e-07, "epoch": 1.2001383444777496, "percentage": 60.01, "elapsed_time": "3:33:17", "remaining_time": "2:22:09"} +{"current_steps": 5206, "total_steps": 8674, "loss": 0.4662882089614868, "lr": 7.543394456652948e-07, "epoch": 1.2003689186073323, "percentage": 60.02, "elapsed_time": "3:33:20", "remaining_time": "2:22:07"} +{"current_steps": 5207, "total_steps": 8674, "loss": 0.440970778465271, "lr": 7.539698856860732e-07, "epoch": 1.2005994927369148, "percentage": 60.03, "elapsed_time": "3:33:22", "remaining_time": "2:22:04"} +{"current_steps": 5208, "total_steps": 8674, "loss": 0.41787397861480713, "lr": 7.536003614698225e-07, "epoch": 1.2008300668664975, "percentage": 60.04, "elapsed_time": "3:33:25", "remaining_time": "2:22:02"} +{"current_steps": 5209, "total_steps": 8674, "loss": 0.5503408908843994, "lr": 7.532308730702561e-07, "epoch": 1.2010606409960802, "percentage": 60.05, "elapsed_time": "3:33:27", "remaining_time": "2:21:59"} +{"current_steps": 5210, "total_steps": 8674, "loss": 0.43713903427124023, "lr": 7.528614205410833e-07, "epoch": 1.201291215125663, "percentage": 60.06, "elapsed_time": "3:33:30", "remaining_time": "2:21:57"} +{"current_steps": 5211, "total_steps": 8674, "loss": 0.4145667552947998, "lr": 7.524920039360076e-07, "epoch": 1.2015217892552457, "percentage": 60.08, "elapsed_time": "3:33:32", "remaining_time": "2:21:54"} +{"current_steps": 5212, "total_steps": 8674, "loss": 0.4307587146759033, "lr": 7.521226233087279e-07, "epoch": 1.2017523633848282, "percentage": 60.09, "elapsed_time": "3:33:35", "remaining_time": "2:21:52"} +{"current_steps": 5213, "total_steps": 8674, "loss": 0.43784570693969727, "lr": 7.517532787129369e-07, "epoch": 1.2019829375144109, "percentage": 60.1, "elapsed_time": "3:33:37", "remaining_time": "2:21:49"} +{"current_steps": 5214, "total_steps": 8674, "loss": 0.40003830194473267, "lr": 7.513839702023226e-07, "epoch": 1.2022135116439936, "percentage": 60.11, "elapsed_time": "3:33:40", "remaining_time": "2:21:47"} +{"current_steps": 5215, "total_steps": 8674, "loss": 0.4880738854408264, "lr": 7.510146978305682e-07, "epoch": 1.202444085773576, "percentage": 60.12, "elapsed_time": "3:33:42", "remaining_time": "2:21:45"} +{"current_steps": 5216, "total_steps": 8674, "loss": 0.39548349380493164, "lr": 7.506454616513505e-07, "epoch": 1.2026746599031588, "percentage": 60.13, "elapsed_time": "3:33:45", "remaining_time": "2:21:42"} +{"current_steps": 5217, "total_steps": 8674, "loss": 0.4060090184211731, "lr": 7.502762617183425e-07, "epoch": 1.2029052340327415, "percentage": 60.15, "elapsed_time": "3:33:47", "remaining_time": "2:21:40"} +{"current_steps": 5218, "total_steps": 8674, "loss": 0.44657808542251587, "lr": 7.499070980852101e-07, "epoch": 1.2031358081623242, "percentage": 60.16, "elapsed_time": "3:33:50", "remaining_time": "2:21:37"} +{"current_steps": 5219, "total_steps": 8674, "loss": 0.5283595323562622, "lr": 7.495379708056161e-07, "epoch": 1.203366382291907, "percentage": 60.17, "elapsed_time": "3:33:52", "remaining_time": "2:21:35"} +{"current_steps": 5220, "total_steps": 8674, "loss": 0.4424205422401428, "lr": 7.49168879933216e-07, "epoch": 1.2035969564214895, "percentage": 60.18, "elapsed_time": "3:33:54", "remaining_time": "2:21:32"} +{"current_steps": 5221, "total_steps": 8674, "loss": 0.4998319745063782, "lr": 7.487998255216619e-07, "epoch": 1.2038275305510722, "percentage": 60.19, "elapsed_time": "3:33:57", "remaining_time": "2:21:30"} +{"current_steps": 5222, "total_steps": 8674, "loss": 0.3821876645088196, "lr": 7.484308076245987e-07, "epoch": 1.2040581046806549, "percentage": 60.2, "elapsed_time": "3:34:00", "remaining_time": "2:21:27"} +{"current_steps": 5223, "total_steps": 8674, "loss": 0.4567919373512268, "lr": 7.480618262956669e-07, "epoch": 1.2042886788102374, "percentage": 60.21, "elapsed_time": "3:34:02", "remaining_time": "2:21:25"} +{"current_steps": 5224, "total_steps": 8674, "loss": 0.4561428427696228, "lr": 7.476928815885026e-07, "epoch": 1.20451925293982, "percentage": 60.23, "elapsed_time": "3:34:05", "remaining_time": "2:21:23"} +{"current_steps": 5225, "total_steps": 8674, "loss": 0.4384823739528656, "lr": 7.473239735567344e-07, "epoch": 1.2047498270694028, "percentage": 60.24, "elapsed_time": "3:34:07", "remaining_time": "2:21:20"} +{"current_steps": 5226, "total_steps": 8674, "loss": 0.42840123176574707, "lr": 7.469551022539877e-07, "epoch": 1.2049804011989855, "percentage": 60.25, "elapsed_time": "3:34:10", "remaining_time": "2:21:18"} +{"current_steps": 5227, "total_steps": 8674, "loss": 0.39553213119506836, "lr": 7.465862677338812e-07, "epoch": 1.2052109753285682, "percentage": 60.26, "elapsed_time": "3:34:12", "remaining_time": "2:21:15"} +{"current_steps": 5228, "total_steps": 8674, "loss": 0.4325043559074402, "lr": 7.462174700500295e-07, "epoch": 1.2054415494581507, "percentage": 60.27, "elapsed_time": "3:34:14", "remaining_time": "2:21:13"} +{"current_steps": 5229, "total_steps": 8674, "loss": 0.5004623532295227, "lr": 7.4584870925604e-07, "epoch": 1.2056721235877335, "percentage": 60.28, "elapsed_time": "3:34:17", "remaining_time": "2:21:10"} +{"current_steps": 5230, "total_steps": 8674, "loss": 0.42296791076660156, "lr": 7.454799854055165e-07, "epoch": 1.2059026977173162, "percentage": 60.3, "elapsed_time": "3:34:19", "remaining_time": "2:21:08"} +{"current_steps": 5231, "total_steps": 8674, "loss": 0.45638370513916016, "lr": 7.451112985520565e-07, "epoch": 1.2061332718468987, "percentage": 60.31, "elapsed_time": "3:34:22", "remaining_time": "2:21:05"} +{"current_steps": 5232, "total_steps": 8674, "loss": 0.5134493112564087, "lr": 7.447426487492528e-07, "epoch": 1.2063638459764814, "percentage": 60.32, "elapsed_time": "3:34:24", "remaining_time": "2:21:03"} +{"current_steps": 5233, "total_steps": 8674, "loss": 0.4132578372955322, "lr": 7.443740360506918e-07, "epoch": 1.206594420106064, "percentage": 60.33, "elapsed_time": "3:34:27", "remaining_time": "2:21:01"} +{"current_steps": 5234, "total_steps": 8674, "loss": 0.4363224506378174, "lr": 7.440054605099552e-07, "epoch": 1.2068249942356468, "percentage": 60.34, "elapsed_time": "3:34:29", "remaining_time": "2:20:58"} +{"current_steps": 5235, "total_steps": 8674, "loss": 0.44970041513442993, "lr": 7.4363692218062e-07, "epoch": 1.2070555683652295, "percentage": 60.35, "elapsed_time": "3:34:32", "remaining_time": "2:20:56"} +{"current_steps": 5236, "total_steps": 8674, "loss": 0.39787235856056213, "lr": 7.432684211162556e-07, "epoch": 1.207286142494812, "percentage": 60.36, "elapsed_time": "3:34:34", "remaining_time": "2:20:53"} +{"current_steps": 5237, "total_steps": 8674, "loss": 0.46057572960853577, "lr": 7.428999573704284e-07, "epoch": 1.2075167166243947, "percentage": 60.38, "elapsed_time": "3:34:37", "remaining_time": "2:20:51"} +{"current_steps": 5238, "total_steps": 8674, "loss": 0.46754559874534607, "lr": 7.42531530996698e-07, "epoch": 1.2077472907539775, "percentage": 60.39, "elapsed_time": "3:34:39", "remaining_time": "2:20:48"} +{"current_steps": 5239, "total_steps": 8674, "loss": 0.5072697401046753, "lr": 7.42163142048619e-07, "epoch": 1.20797786488356, "percentage": 60.4, "elapsed_time": "3:34:42", "remaining_time": "2:20:46"} +{"current_steps": 5240, "total_steps": 8674, "loss": 0.4691959023475647, "lr": 7.417947905797403e-07, "epoch": 1.2082084390131427, "percentage": 60.41, "elapsed_time": "3:34:44", "remaining_time": "2:20:43"} +{"current_steps": 5241, "total_steps": 8674, "loss": 0.43248072266578674, "lr": 7.414264766436056e-07, "epoch": 1.2084390131427254, "percentage": 60.42, "elapsed_time": "3:34:47", "remaining_time": "2:20:41"} +{"current_steps": 5242, "total_steps": 8674, "loss": 0.4748457968235016, "lr": 7.410582002937534e-07, "epoch": 1.208669587272308, "percentage": 60.43, "elapsed_time": "3:34:49", "remaining_time": "2:20:38"} +{"current_steps": 5243, "total_steps": 8674, "loss": 0.4682820439338684, "lr": 7.406899615837157e-07, "epoch": 1.2089001614018908, "percentage": 60.45, "elapsed_time": "3:34:52", "remaining_time": "2:20:36"} +{"current_steps": 5244, "total_steps": 8674, "loss": 0.41747021675109863, "lr": 7.403217605670205e-07, "epoch": 1.2091307355314733, "percentage": 60.46, "elapsed_time": "3:34:54", "remaining_time": "2:20:34"} +{"current_steps": 5245, "total_steps": 8674, "loss": 0.4968727231025696, "lr": 7.399535972971886e-07, "epoch": 1.209361309661056, "percentage": 60.47, "elapsed_time": "3:34:57", "remaining_time": "2:20:31"} +{"current_steps": 5246, "total_steps": 8674, "loss": 0.486778199672699, "lr": 7.395854718277372e-07, "epoch": 1.2095918837906388, "percentage": 60.48, "elapsed_time": "3:34:59", "remaining_time": "2:20:29"} +{"current_steps": 5247, "total_steps": 8674, "loss": 0.5153725147247314, "lr": 7.392173842121765e-07, "epoch": 1.2098224579202213, "percentage": 60.49, "elapsed_time": "3:35:02", "remaining_time": "2:20:26"} +{"current_steps": 5248, "total_steps": 8674, "loss": 0.42352354526519775, "lr": 7.388493345040123e-07, "epoch": 1.210053032049804, "percentage": 60.5, "elapsed_time": "3:35:04", "remaining_time": "2:20:24"} +{"current_steps": 5249, "total_steps": 8674, "loss": 0.363994300365448, "lr": 7.384813227567437e-07, "epoch": 1.2102836061793867, "percentage": 60.51, "elapsed_time": "3:35:06", "remaining_time": "2:20:21"} +{"current_steps": 5250, "total_steps": 8674, "loss": 0.44195863604545593, "lr": 7.381133490238654e-07, "epoch": 1.2105141803089694, "percentage": 60.53, "elapsed_time": "3:35:09", "remaining_time": "2:20:19"} +{"current_steps": 5251, "total_steps": 8674, "loss": 0.5031026601791382, "lr": 7.377454133588657e-07, "epoch": 1.2107447544385521, "percentage": 60.54, "elapsed_time": "3:35:11", "remaining_time": "2:20:16"} +{"current_steps": 5252, "total_steps": 8674, "loss": 0.3900304436683655, "lr": 7.373775158152284e-07, "epoch": 1.2109753285681346, "percentage": 60.55, "elapsed_time": "3:35:14", "remaining_time": "2:20:14"} +{"current_steps": 5253, "total_steps": 8674, "loss": 0.406912624835968, "lr": 7.370096564464308e-07, "epoch": 1.2112059026977173, "percentage": 60.56, "elapsed_time": "3:35:16", "remaining_time": "2:20:11"} +{"current_steps": 5254, "total_steps": 8674, "loss": 0.407238632440567, "lr": 7.366418353059445e-07, "epoch": 1.2114364768273, "percentage": 60.57, "elapsed_time": "3:35:18", "remaining_time": "2:20:09"} +{"current_steps": 5255, "total_steps": 8674, "loss": 0.5605549216270447, "lr": 7.36274052447237e-07, "epoch": 1.2116670509568825, "percentage": 60.58, "elapsed_time": "3:35:21", "remaining_time": "2:20:06"} +{"current_steps": 5256, "total_steps": 8674, "loss": 0.5016111731529236, "lr": 7.359063079237684e-07, "epoch": 1.2118976250864653, "percentage": 60.59, "elapsed_time": "3:35:23", "remaining_time": "2:20:04"} +{"current_steps": 5257, "total_steps": 8674, "loss": 0.38812315464019775, "lr": 7.355386017889946e-07, "epoch": 1.212128199216048, "percentage": 60.61, "elapsed_time": "3:35:26", "remaining_time": "2:20:01"} +{"current_steps": 5258, "total_steps": 8674, "loss": 0.46022963523864746, "lr": 7.35170934096365e-07, "epoch": 1.2123587733456307, "percentage": 60.62, "elapsed_time": "3:35:28", "remaining_time": "2:19:59"} +{"current_steps": 5259, "total_steps": 8674, "loss": 0.40029624104499817, "lr": 7.348033048993246e-07, "epoch": 1.2125893474752132, "percentage": 60.63, "elapsed_time": "3:35:31", "remaining_time": "2:19:56"} +{"current_steps": 5260, "total_steps": 8674, "loss": 0.4331943392753601, "lr": 7.344357142513111e-07, "epoch": 1.212819921604796, "percentage": 60.64, "elapsed_time": "3:35:33", "remaining_time": "2:19:54"} +{"current_steps": 5261, "total_steps": 8674, "loss": 0.43757596611976624, "lr": 7.340681622057582e-07, "epoch": 1.2130504957343786, "percentage": 60.65, "elapsed_time": "3:35:35", "remaining_time": "2:19:51"} +{"current_steps": 5262, "total_steps": 8674, "loss": 0.49733203649520874, "lr": 7.337006488160931e-07, "epoch": 1.2132810698639613, "percentage": 60.66, "elapsed_time": "3:35:38", "remaining_time": "2:19:49"} +{"current_steps": 5263, "total_steps": 8674, "loss": 0.35552018880844116, "lr": 7.333331741357373e-07, "epoch": 1.2135116439935438, "percentage": 60.68, "elapsed_time": "3:35:40", "remaining_time": "2:19:46"} +{"current_steps": 5264, "total_steps": 8674, "loss": 0.4102798104286194, "lr": 7.329657382181074e-07, "epoch": 1.2137422181231265, "percentage": 60.69, "elapsed_time": "3:35:43", "remaining_time": "2:19:44"} +{"current_steps": 5265, "total_steps": 8674, "loss": 0.4517349600791931, "lr": 7.325983411166136e-07, "epoch": 1.2139727922527093, "percentage": 60.7, "elapsed_time": "3:35:45", "remaining_time": "2:19:42"} +{"current_steps": 5266, "total_steps": 8674, "loss": 0.48924458026885986, "lr": 7.322309828846613e-07, "epoch": 1.214203366382292, "percentage": 60.71, "elapsed_time": "3:35:48", "remaining_time": "2:19:39"} +{"current_steps": 5267, "total_steps": 8674, "loss": 0.38971561193466187, "lr": 7.31863663575649e-07, "epoch": 1.2144339405118745, "percentage": 60.72, "elapsed_time": "3:35:50", "remaining_time": "2:19:37"} +{"current_steps": 5268, "total_steps": 8674, "loss": 0.6503559350967407, "lr": 7.31496383242971e-07, "epoch": 1.2146645146414572, "percentage": 60.73, "elapsed_time": "3:35:53", "remaining_time": "2:19:34"} +{"current_steps": 5269, "total_steps": 8674, "loss": 0.4615272879600525, "lr": 7.311291419400146e-07, "epoch": 1.21489508877104, "percentage": 60.74, "elapsed_time": "3:35:55", "remaining_time": "2:19:32"} +{"current_steps": 5270, "total_steps": 8674, "loss": 0.3793429732322693, "lr": 7.307619397201625e-07, "epoch": 1.2151256629006226, "percentage": 60.76, "elapsed_time": "3:35:57", "remaining_time": "2:19:29"} +{"current_steps": 5271, "total_steps": 8674, "loss": 0.48186585307121277, "lr": 7.303947766367909e-07, "epoch": 1.2153562370302051, "percentage": 60.77, "elapsed_time": "3:36:00", "remaining_time": "2:19:27"} +{"current_steps": 5272, "total_steps": 8674, "loss": 0.4051778018474579, "lr": 7.300276527432713e-07, "epoch": 1.2155868111597878, "percentage": 60.78, "elapsed_time": "3:36:03", "remaining_time": "2:19:24"} +{"current_steps": 5273, "total_steps": 8674, "loss": 0.43364250659942627, "lr": 7.296605680929684e-07, "epoch": 1.2158173852893706, "percentage": 60.79, "elapsed_time": "3:36:05", "remaining_time": "2:19:22"} +{"current_steps": 5274, "total_steps": 8674, "loss": 0.4893898367881775, "lr": 7.292935227392414e-07, "epoch": 1.2160479594189533, "percentage": 60.8, "elapsed_time": "3:36:08", "remaining_time": "2:19:20"} +{"current_steps": 5275, "total_steps": 8674, "loss": 0.43125462532043457, "lr": 7.289265167354448e-07, "epoch": 1.2162785335485358, "percentage": 60.81, "elapsed_time": "3:36:10", "remaining_time": "2:19:17"} +{"current_steps": 5276, "total_steps": 8674, "loss": 0.4086509943008423, "lr": 7.285595501349258e-07, "epoch": 1.2165091076781185, "percentage": 60.83, "elapsed_time": "3:36:12", "remaining_time": "2:19:15"} +{"current_steps": 5277, "total_steps": 8674, "loss": 0.5176471471786499, "lr": 7.281926229910274e-07, "epoch": 1.2167396818077012, "percentage": 60.84, "elapsed_time": "3:36:15", "remaining_time": "2:19:12"} +{"current_steps": 5278, "total_steps": 8674, "loss": 0.4783210754394531, "lr": 7.278257353570857e-07, "epoch": 1.216970255937284, "percentage": 60.85, "elapsed_time": "3:36:17", "remaining_time": "2:19:10"} +{"current_steps": 5279, "total_steps": 8674, "loss": 0.4847145080566406, "lr": 7.274588872864322e-07, "epoch": 1.2172008300668664, "percentage": 60.86, "elapsed_time": "3:36:20", "remaining_time": "2:19:07"} +{"current_steps": 5280, "total_steps": 8674, "loss": 0.4691849946975708, "lr": 7.270920788323911e-07, "epoch": 1.2174314041964491, "percentage": 60.87, "elapsed_time": "3:36:22", "remaining_time": "2:19:05"} +{"current_steps": 5281, "total_steps": 8674, "loss": 0.5755687952041626, "lr": 7.267253100482824e-07, "epoch": 1.2176619783260318, "percentage": 60.88, "elapsed_time": "3:36:25", "remaining_time": "2:19:02"} +{"current_steps": 5282, "total_steps": 8674, "loss": 0.42995721101760864, "lr": 7.263585809874193e-07, "epoch": 1.2178925524556146, "percentage": 60.89, "elapsed_time": "3:36:27", "remaining_time": "2:19:00"} +{"current_steps": 5283, "total_steps": 8674, "loss": 0.501590371131897, "lr": 7.259918917031101e-07, "epoch": 1.218123126585197, "percentage": 60.91, "elapsed_time": "3:36:30", "remaining_time": "2:18:57"} +{"current_steps": 5284, "total_steps": 8674, "loss": 0.5499469041824341, "lr": 7.256252422486563e-07, "epoch": 1.2183537007147798, "percentage": 60.92, "elapsed_time": "3:36:32", "remaining_time": "2:18:55"} +{"current_steps": 5285, "total_steps": 8674, "loss": 0.4567297399044037, "lr": 7.25258632677354e-07, "epoch": 1.2185842748443625, "percentage": 60.93, "elapsed_time": "3:36:34", "remaining_time": "2:18:52"} +{"current_steps": 5286, "total_steps": 8674, "loss": 0.4046020805835724, "lr": 7.248920630424942e-07, "epoch": 1.2188148489739452, "percentage": 60.94, "elapsed_time": "3:36:37", "remaining_time": "2:18:50"} +{"current_steps": 5287, "total_steps": 8674, "loss": 0.3534840941429138, "lr": 7.245255333973608e-07, "epoch": 1.2190454231035277, "percentage": 60.95, "elapsed_time": "3:36:39", "remaining_time": "2:18:47"} +{"current_steps": 5288, "total_steps": 8674, "loss": 0.45795637369155884, "lr": 7.241590437952331e-07, "epoch": 1.2192759972331104, "percentage": 60.96, "elapsed_time": "3:36:42", "remaining_time": "2:18:45"} +{"current_steps": 5289, "total_steps": 8674, "loss": 0.3984150290489197, "lr": 7.237925942893839e-07, "epoch": 1.2195065713626931, "percentage": 60.98, "elapsed_time": "3:36:44", "remaining_time": "2:18:43"} +{"current_steps": 5290, "total_steps": 8674, "loss": 0.46833336353302, "lr": 7.234261849330807e-07, "epoch": 1.2197371454922759, "percentage": 60.99, "elapsed_time": "3:36:47", "remaining_time": "2:18:40"} +{"current_steps": 5291, "total_steps": 8674, "loss": 0.5395709276199341, "lr": 7.230598157795842e-07, "epoch": 1.2199677196218583, "percentage": 61.0, "elapsed_time": "3:36:49", "remaining_time": "2:18:38"} +{"current_steps": 5292, "total_steps": 8674, "loss": 0.4556152820587158, "lr": 7.226934868821505e-07, "epoch": 1.220198293751441, "percentage": 61.01, "elapsed_time": "3:36:52", "remaining_time": "2:18:35"} +{"current_steps": 5293, "total_steps": 8674, "loss": 0.49564266204833984, "lr": 7.223271982940287e-07, "epoch": 1.2204288678810238, "percentage": 61.02, "elapsed_time": "3:36:54", "remaining_time": "2:18:33"} +{"current_steps": 5294, "total_steps": 8674, "loss": 0.5389127731323242, "lr": 7.219609500684625e-07, "epoch": 1.2206594420106065, "percentage": 61.03, "elapsed_time": "3:36:57", "remaining_time": "2:18:30"} +{"current_steps": 5295, "total_steps": 8674, "loss": 0.48815661668777466, "lr": 7.215947422586905e-07, "epoch": 1.220890016140189, "percentage": 61.04, "elapsed_time": "3:36:59", "remaining_time": "2:18:28"} +{"current_steps": 5296, "total_steps": 8674, "loss": 0.4204339385032654, "lr": 7.21228574917944e-07, "epoch": 1.2211205902697717, "percentage": 61.06, "elapsed_time": "3:37:01", "remaining_time": "2:18:25"} +{"current_steps": 5297, "total_steps": 8674, "loss": 0.39993199706077576, "lr": 7.208624480994494e-07, "epoch": 1.2213511643993544, "percentage": 61.07, "elapsed_time": "3:37:04", "remaining_time": "2:18:23"} +{"current_steps": 5298, "total_steps": 8674, "loss": 0.5679433941841125, "lr": 7.204963618564268e-07, "epoch": 1.2215817385289371, "percentage": 61.08, "elapsed_time": "3:37:07", "remaining_time": "2:18:21"} +{"current_steps": 5299, "total_steps": 8674, "loss": 0.46620815992355347, "lr": 7.201303162420913e-07, "epoch": 1.2218123126585196, "percentage": 61.09, "elapsed_time": "3:37:09", "remaining_time": "2:18:18"} +{"current_steps": 5300, "total_steps": 8674, "loss": 0.44684547185897827, "lr": 7.1976431130965e-07, "epoch": 1.2220428867881024, "percentage": 61.1, "elapsed_time": "3:37:11", "remaining_time": "2:18:16"} +{"current_steps": 5301, "total_steps": 8674, "loss": 0.4518858790397644, "lr": 7.193983471123066e-07, "epoch": 1.222273460917685, "percentage": 61.11, "elapsed_time": "3:37:15", "remaining_time": "2:18:14"} +{"current_steps": 5302, "total_steps": 8674, "loss": 0.3966304659843445, "lr": 7.190324237032569e-07, "epoch": 1.2225040350472678, "percentage": 61.13, "elapsed_time": "3:37:17", "remaining_time": "2:18:11"} +{"current_steps": 5303, "total_steps": 8674, "loss": 0.5541782379150391, "lr": 7.186665411356925e-07, "epoch": 1.2227346091768503, "percentage": 61.14, "elapsed_time": "3:37:20", "remaining_time": "2:18:09"} +{"current_steps": 5304, "total_steps": 8674, "loss": 0.3986799120903015, "lr": 7.183006994627972e-07, "epoch": 1.222965183306433, "percentage": 61.15, "elapsed_time": "3:37:22", "remaining_time": "2:18:06"} +{"current_steps": 5305, "total_steps": 8674, "loss": 0.485867977142334, "lr": 7.1793489873775e-07, "epoch": 1.2231957574360157, "percentage": 61.16, "elapsed_time": "3:37:25", "remaining_time": "2:18:04"} +{"current_steps": 5306, "total_steps": 8674, "loss": 0.40187692642211914, "lr": 7.175691390137244e-07, "epoch": 1.2234263315655984, "percentage": 61.17, "elapsed_time": "3:37:27", "remaining_time": "2:18:02"} +{"current_steps": 5307, "total_steps": 8674, "loss": 0.4679393172264099, "lr": 7.172034203438864e-07, "epoch": 1.223656905695181, "percentage": 61.18, "elapsed_time": "3:37:30", "remaining_time": "2:17:59"} +{"current_steps": 5308, "total_steps": 8674, "loss": 0.512301504611969, "lr": 7.168377427813974e-07, "epoch": 1.2238874798247636, "percentage": 61.19, "elapsed_time": "3:37:32", "remaining_time": "2:17:57"} +{"current_steps": 5309, "total_steps": 8674, "loss": 0.5340646505355835, "lr": 7.164721063794122e-07, "epoch": 1.2241180539543464, "percentage": 61.21, "elapsed_time": "3:37:35", "remaining_time": "2:17:54"} +{"current_steps": 5310, "total_steps": 8674, "loss": 0.4757506847381592, "lr": 7.1610651119108e-07, "epoch": 1.224348628083929, "percentage": 61.22, "elapsed_time": "3:37:37", "remaining_time": "2:17:52"} +{"current_steps": 5311, "total_steps": 8674, "loss": 0.5697519779205322, "lr": 7.157409572695434e-07, "epoch": 1.2245792022135116, "percentage": 61.23, "elapsed_time": "3:37:39", "remaining_time": "2:17:49"} +{"current_steps": 5312, "total_steps": 8674, "loss": 0.47521811723709106, "lr": 7.153754446679395e-07, "epoch": 1.2248097763430943, "percentage": 61.24, "elapsed_time": "3:37:42", "remaining_time": "2:17:47"} +{"current_steps": 5313, "total_steps": 8674, "loss": 0.40484973788261414, "lr": 7.150099734393997e-07, "epoch": 1.225040350472677, "percentage": 61.25, "elapsed_time": "3:37:44", "remaining_time": "2:17:44"} +{"current_steps": 5314, "total_steps": 8674, "loss": 0.4465969204902649, "lr": 7.146445436370481e-07, "epoch": 1.2252709246022597, "percentage": 61.26, "elapsed_time": "3:37:47", "remaining_time": "2:17:42"} +{"current_steps": 5315, "total_steps": 8674, "loss": 0.44878089427948, "lr": 7.142791553140044e-07, "epoch": 1.2255014987318422, "percentage": 61.28, "elapsed_time": "3:37:49", "remaining_time": "2:17:39"} +{"current_steps": 5316, "total_steps": 8674, "loss": 0.5049536228179932, "lr": 7.139138085233809e-07, "epoch": 1.225732072861425, "percentage": 61.29, "elapsed_time": "3:37:52", "remaining_time": "2:17:37"} +{"current_steps": 5317, "total_steps": 8674, "loss": 0.42945951223373413, "lr": 7.135485033182847e-07, "epoch": 1.2259626469910077, "percentage": 61.3, "elapsed_time": "3:37:54", "remaining_time": "2:17:34"} +{"current_steps": 5318, "total_steps": 8674, "loss": 0.4668564200401306, "lr": 7.131832397518167e-07, "epoch": 1.2261932211205901, "percentage": 61.31, "elapsed_time": "3:37:56", "remaining_time": "2:17:32"} +{"current_steps": 5319, "total_steps": 8674, "loss": 0.4691551625728607, "lr": 7.128180178770718e-07, "epoch": 1.2264237952501729, "percentage": 61.32, "elapsed_time": "3:37:59", "remaining_time": "2:17:29"} +{"current_steps": 5320, "total_steps": 8674, "loss": 0.4306211769580841, "lr": 7.124528377471382e-07, "epoch": 1.2266543693797556, "percentage": 61.33, "elapsed_time": "3:38:01", "remaining_time": "2:17:27"} +{"current_steps": 5321, "total_steps": 8674, "loss": 0.4986322522163391, "lr": 7.120876994150991e-07, "epoch": 1.2268849435093383, "percentage": 61.34, "elapsed_time": "3:38:04", "remaining_time": "2:17:24"} +{"current_steps": 5322, "total_steps": 8674, "loss": 0.4058566093444824, "lr": 7.117226029340304e-07, "epoch": 1.227115517638921, "percentage": 61.36, "elapsed_time": "3:38:06", "remaining_time": "2:17:22"} +{"current_steps": 5323, "total_steps": 8674, "loss": 0.390174925327301, "lr": 7.113575483570036e-07, "epoch": 1.2273460917685035, "percentage": 61.37, "elapsed_time": "3:38:09", "remaining_time": "2:17:20"} +{"current_steps": 5324, "total_steps": 8674, "loss": 0.38822996616363525, "lr": 7.109925357370821e-07, "epoch": 1.2275766658980862, "percentage": 61.38, "elapsed_time": "3:38:11", "remaining_time": "2:17:17"} +{"current_steps": 5325, "total_steps": 8674, "loss": 0.47792741656303406, "lr": 7.106275651273244e-07, "epoch": 1.227807240027669, "percentage": 61.39, "elapsed_time": "3:38:14", "remaining_time": "2:17:15"} +{"current_steps": 5326, "total_steps": 8674, "loss": 0.5332789421081543, "lr": 7.102626365807833e-07, "epoch": 1.2280378141572514, "percentage": 61.4, "elapsed_time": "3:38:16", "remaining_time": "2:17:12"} +{"current_steps": 5327, "total_steps": 8674, "loss": 0.5325096845626831, "lr": 7.098977501505036e-07, "epoch": 1.2282683882868342, "percentage": 61.41, "elapsed_time": "3:38:18", "remaining_time": "2:17:10"} +{"current_steps": 5328, "total_steps": 8674, "loss": 0.4184231162071228, "lr": 7.095329058895267e-07, "epoch": 1.2284989624164169, "percentage": 61.42, "elapsed_time": "3:38:21", "remaining_time": "2:17:07"} +{"current_steps": 5329, "total_steps": 8674, "loss": 0.43037641048431396, "lr": 7.091681038508852e-07, "epoch": 1.2287295365459996, "percentage": 61.44, "elapsed_time": "3:38:23", "remaining_time": "2:17:05"} +{"current_steps": 5330, "total_steps": 8674, "loss": 0.4466821551322937, "lr": 7.088033440876078e-07, "epoch": 1.2289601106755823, "percentage": 61.45, "elapsed_time": "3:38:26", "remaining_time": "2:17:02"} +{"current_steps": 5331, "total_steps": 8674, "loss": 0.35853004455566406, "lr": 7.084386266527151e-07, "epoch": 1.2291906848051648, "percentage": 61.46, "elapsed_time": "3:38:28", "remaining_time": "2:17:00"} +{"current_steps": 5332, "total_steps": 8674, "loss": 0.44986268877983093, "lr": 7.080739515992231e-07, "epoch": 1.2294212589347475, "percentage": 61.47, "elapsed_time": "3:38:31", "remaining_time": "2:16:57"} +{"current_steps": 5333, "total_steps": 8674, "loss": 0.3563602566719055, "lr": 7.07709318980141e-07, "epoch": 1.2296518330643302, "percentage": 61.48, "elapsed_time": "3:38:33", "remaining_time": "2:16:55"} +{"current_steps": 5334, "total_steps": 8674, "loss": 0.4505435824394226, "lr": 7.073447288484715e-07, "epoch": 1.2298824071939127, "percentage": 61.49, "elapsed_time": "3:38:36", "remaining_time": "2:16:52"} +{"current_steps": 5335, "total_steps": 8674, "loss": 0.4477807283401489, "lr": 7.069801812572116e-07, "epoch": 1.2301129813234954, "percentage": 61.51, "elapsed_time": "3:38:38", "remaining_time": "2:16:50"} +{"current_steps": 5336, "total_steps": 8674, "loss": 0.4470565915107727, "lr": 7.066156762593518e-07, "epoch": 1.2303435554530782, "percentage": 61.52, "elapsed_time": "3:38:41", "remaining_time": "2:16:48"} +{"current_steps": 5337, "total_steps": 8674, "loss": 0.4236464500427246, "lr": 7.062512139078773e-07, "epoch": 1.2305741295826609, "percentage": 61.53, "elapsed_time": "3:38:43", "remaining_time": "2:16:45"} +{"current_steps": 5338, "total_steps": 8674, "loss": 0.3221476376056671, "lr": 7.058867942557655e-07, "epoch": 1.2308047037122436, "percentage": 61.54, "elapsed_time": "3:38:45", "remaining_time": "2:16:43"} +{"current_steps": 5339, "total_steps": 8674, "loss": 0.502305269241333, "lr": 7.055224173559891e-07, "epoch": 1.231035277841826, "percentage": 61.55, "elapsed_time": "3:38:48", "remaining_time": "2:16:40"} +{"current_steps": 5340, "total_steps": 8674, "loss": 0.5121853351593018, "lr": 7.051580832615136e-07, "epoch": 1.2312658519714088, "percentage": 61.56, "elapsed_time": "3:38:50", "remaining_time": "2:16:38"} +{"current_steps": 5341, "total_steps": 8674, "loss": 0.5468438863754272, "lr": 7.047937920252991e-07, "epoch": 1.2314964261009915, "percentage": 61.57, "elapsed_time": "3:38:53", "remaining_time": "2:16:35"} +{"current_steps": 5342, "total_steps": 8674, "loss": 0.5026402473449707, "lr": 7.044295437002985e-07, "epoch": 1.231727000230574, "percentage": 61.59, "elapsed_time": "3:38:55", "remaining_time": "2:16:33"} +{"current_steps": 5343, "total_steps": 8674, "loss": 0.5205342173576355, "lr": 7.040653383394596e-07, "epoch": 1.2319575743601567, "percentage": 61.6, "elapsed_time": "3:38:58", "remaining_time": "2:16:30"} +{"current_steps": 5344, "total_steps": 8674, "loss": 0.5184727311134338, "lr": 7.037011759957228e-07, "epoch": 1.2321881484897395, "percentage": 61.61, "elapsed_time": "3:39:00", "remaining_time": "2:16:28"} +{"current_steps": 5345, "total_steps": 8674, "loss": 0.414316862821579, "lr": 7.033370567220227e-07, "epoch": 1.2324187226193222, "percentage": 61.62, "elapsed_time": "3:39:03", "remaining_time": "2:16:25"} +{"current_steps": 5346, "total_steps": 8674, "loss": 0.42133980989456177, "lr": 7.029729805712885e-07, "epoch": 1.2326492967489049, "percentage": 61.63, "elapsed_time": "3:39:05", "remaining_time": "2:16:23"} +{"current_steps": 5347, "total_steps": 8674, "loss": 0.4888553321361542, "lr": 7.026089475964414e-07, "epoch": 1.2328798708784874, "percentage": 61.64, "elapsed_time": "3:39:07", "remaining_time": "2:16:20"} +{"current_steps": 5348, "total_steps": 8674, "loss": 0.4702431857585907, "lr": 7.022449578503979e-07, "epoch": 1.23311044500807, "percentage": 61.66, "elapsed_time": "3:39:10", "remaining_time": "2:16:18"} +{"current_steps": 5349, "total_steps": 8674, "loss": 0.5312628746032715, "lr": 7.018810113860672e-07, "epoch": 1.2333410191376528, "percentage": 61.67, "elapsed_time": "3:39:12", "remaining_time": "2:16:16"} +{"current_steps": 5350, "total_steps": 8674, "loss": 0.5297777056694031, "lr": 7.015171082563533e-07, "epoch": 1.2335715932672353, "percentage": 61.68, "elapsed_time": "3:39:15", "remaining_time": "2:16:13"} +{"current_steps": 5351, "total_steps": 8674, "loss": 0.5172504782676697, "lr": 7.011532485141524e-07, "epoch": 1.233802167396818, "percentage": 61.69, "elapsed_time": "3:39:17", "remaining_time": "2:16:11"} +{"current_steps": 5352, "total_steps": 8674, "loss": 0.4288995862007141, "lr": 7.007894322123556e-07, "epoch": 1.2340327415264007, "percentage": 61.7, "elapsed_time": "3:39:20", "remaining_time": "2:16:08"} +{"current_steps": 5353, "total_steps": 8674, "loss": 0.4194108247756958, "lr": 7.004256594038475e-07, "epoch": 1.2342633156559835, "percentage": 61.71, "elapsed_time": "3:39:22", "remaining_time": "2:16:06"} +{"current_steps": 5354, "total_steps": 8674, "loss": 0.48825979232788086, "lr": 7.000619301415056e-07, "epoch": 1.2344938897855662, "percentage": 61.72, "elapsed_time": "3:39:24", "remaining_time": "2:16:03"} +{"current_steps": 5355, "total_steps": 8674, "loss": 0.4721163213253021, "lr": 6.99698244478202e-07, "epoch": 1.2347244639151487, "percentage": 61.74, "elapsed_time": "3:39:27", "remaining_time": "2:16:01"} +{"current_steps": 5356, "total_steps": 8674, "loss": 0.5104520916938782, "lr": 6.993346024668019e-07, "epoch": 1.2349550380447314, "percentage": 61.75, "elapsed_time": "3:39:29", "remaining_time": "2:15:58"} +{"current_steps": 5357, "total_steps": 8674, "loss": 0.5257378816604614, "lr": 6.98971004160165e-07, "epoch": 1.235185612174314, "percentage": 61.76, "elapsed_time": "3:39:32", "remaining_time": "2:15:56"} +{"current_steps": 5358, "total_steps": 8674, "loss": 0.5624911785125732, "lr": 6.986074496111429e-07, "epoch": 1.2354161863038966, "percentage": 61.77, "elapsed_time": "3:39:34", "remaining_time": "2:15:53"} +{"current_steps": 5359, "total_steps": 8674, "loss": 0.5186502933502197, "lr": 6.982439388725828e-07, "epoch": 1.2356467604334793, "percentage": 61.78, "elapsed_time": "3:39:37", "remaining_time": "2:15:51"} +{"current_steps": 5360, "total_steps": 8674, "loss": 0.42711856961250305, "lr": 6.978804719973241e-07, "epoch": 1.235877334563062, "percentage": 61.79, "elapsed_time": "3:39:39", "remaining_time": "2:15:48"} +{"current_steps": 5361, "total_steps": 8674, "loss": 0.525848388671875, "lr": 6.975170490382013e-07, "epoch": 1.2361079086926448, "percentage": 61.81, "elapsed_time": "3:39:41", "remaining_time": "2:15:46"} +{"current_steps": 5362, "total_steps": 8674, "loss": 0.41279107332229614, "lr": 6.971536700480405e-07, "epoch": 1.2363384828222275, "percentage": 61.82, "elapsed_time": "3:39:44", "remaining_time": "2:15:43"} +{"current_steps": 5363, "total_steps": 8674, "loss": 0.38868075609207153, "lr": 6.967903350796632e-07, "epoch": 1.23656905695181, "percentage": 61.83, "elapsed_time": "3:39:46", "remaining_time": "2:15:41"} +{"current_steps": 5364, "total_steps": 8674, "loss": 0.41875284910202026, "lr": 6.964270441858837e-07, "epoch": 1.2367996310813927, "percentage": 61.84, "elapsed_time": "3:39:48", "remaining_time": "2:15:38"} +{"current_steps": 5365, "total_steps": 8674, "loss": 0.4754808843135834, "lr": 6.960637974195096e-07, "epoch": 1.2370302052109754, "percentage": 61.85, "elapsed_time": "3:39:51", "remaining_time": "2:15:36"} +{"current_steps": 5366, "total_steps": 8674, "loss": 0.5073249340057373, "lr": 6.957005948333434e-07, "epoch": 1.237260779340558, "percentage": 61.86, "elapsed_time": "3:39:53", "remaining_time": "2:15:33"} +{"current_steps": 5367, "total_steps": 8674, "loss": 0.4545915126800537, "lr": 6.953374364801792e-07, "epoch": 1.2374913534701406, "percentage": 61.87, "elapsed_time": "3:39:56", "remaining_time": "2:15:31"} +{"current_steps": 5368, "total_steps": 8674, "loss": 0.42797422409057617, "lr": 6.949743224128064e-07, "epoch": 1.2377219275997233, "percentage": 61.89, "elapsed_time": "3:39:58", "remaining_time": "2:15:28"} +{"current_steps": 5369, "total_steps": 8674, "loss": 0.570556104183197, "lr": 6.946112526840071e-07, "epoch": 1.237952501729306, "percentage": 61.9, "elapsed_time": "3:40:01", "remaining_time": "2:15:26"} +{"current_steps": 5370, "total_steps": 8674, "loss": 0.3866136074066162, "lr": 6.942482273465577e-07, "epoch": 1.2381830758588885, "percentage": 61.91, "elapsed_time": "3:40:03", "remaining_time": "2:15:23"} +{"current_steps": 5371, "total_steps": 8674, "loss": 0.3716529309749603, "lr": 6.938852464532267e-07, "epoch": 1.2384136499884713, "percentage": 61.92, "elapsed_time": "3:40:06", "remaining_time": "2:15:21"} +{"current_steps": 5372, "total_steps": 8674, "loss": 0.4781096577644348, "lr": 6.935223100567776e-07, "epoch": 1.238644224118054, "percentage": 61.93, "elapsed_time": "3:40:08", "remaining_time": "2:15:19"} +{"current_steps": 5373, "total_steps": 8674, "loss": 0.4262877106666565, "lr": 6.931594182099671e-07, "epoch": 1.2388747982476367, "percentage": 61.94, "elapsed_time": "3:40:11", "remaining_time": "2:15:16"} +{"current_steps": 5374, "total_steps": 8674, "loss": 0.49859267473220825, "lr": 6.927965709655444e-07, "epoch": 1.2391053723772192, "percentage": 61.96, "elapsed_time": "3:40:13", "remaining_time": "2:15:14"} +{"current_steps": 5375, "total_steps": 8674, "loss": 0.4710119664669037, "lr": 6.924337683762539e-07, "epoch": 1.239335946506802, "percentage": 61.97, "elapsed_time": "3:40:16", "remaining_time": "2:15:11"} +{"current_steps": 5376, "total_steps": 8674, "loss": 0.4974974989891052, "lr": 6.92071010494832e-07, "epoch": 1.2395665206363846, "percentage": 61.98, "elapsed_time": "3:40:18", "remaining_time": "2:15:09"} +{"current_steps": 5377, "total_steps": 8674, "loss": 0.4118514657020569, "lr": 6.917082973740098e-07, "epoch": 1.2397970947659673, "percentage": 61.99, "elapsed_time": "3:40:21", "remaining_time": "2:15:06"} +{"current_steps": 5378, "total_steps": 8674, "loss": 0.4223165214061737, "lr": 6.913456290665106e-07, "epoch": 1.2400276688955498, "percentage": 62.0, "elapsed_time": "3:40:23", "remaining_time": "2:15:04"} +{"current_steps": 5379, "total_steps": 8674, "loss": 0.4896865487098694, "lr": 6.909830056250526e-07, "epoch": 1.2402582430251325, "percentage": 62.01, "elapsed_time": "3:40:25", "remaining_time": "2:15:01"} +{"current_steps": 5380, "total_steps": 8674, "loss": 0.36112266778945923, "lr": 6.906204271023463e-07, "epoch": 1.2404888171547153, "percentage": 62.02, "elapsed_time": "3:40:28", "remaining_time": "2:14:59"} +{"current_steps": 5381, "total_steps": 8674, "loss": 0.4665502905845642, "lr": 6.902578935510969e-07, "epoch": 1.240719391284298, "percentage": 62.04, "elapsed_time": "3:40:31", "remaining_time": "2:14:57"} +{"current_steps": 5382, "total_steps": 8674, "loss": 0.46059858798980713, "lr": 6.898954050240013e-07, "epoch": 1.2409499654138805, "percentage": 62.05, "elapsed_time": "3:40:33", "remaining_time": "2:14:54"} +{"current_steps": 5383, "total_steps": 8674, "loss": 0.46149420738220215, "lr": 6.895329615737515e-07, "epoch": 1.2411805395434632, "percentage": 62.06, "elapsed_time": "3:40:36", "remaining_time": "2:14:52"} +{"current_steps": 5384, "total_steps": 8674, "loss": 0.42226743698120117, "lr": 6.891705632530327e-07, "epoch": 1.241411113673046, "percentage": 62.07, "elapsed_time": "3:40:38", "remaining_time": "2:14:49"} +{"current_steps": 5385, "total_steps": 8674, "loss": 0.45789939165115356, "lr": 6.88808210114522e-07, "epoch": 1.2416416878026286, "percentage": 62.08, "elapsed_time": "3:40:41", "remaining_time": "2:14:47"} +{"current_steps": 5386, "total_steps": 8674, "loss": 0.44569891691207886, "lr": 6.884459022108922e-07, "epoch": 1.2418722619322111, "percentage": 62.09, "elapsed_time": "3:40:43", "remaining_time": "2:14:44"} +{"current_steps": 5387, "total_steps": 8674, "loss": 0.3971112370491028, "lr": 6.880836395948078e-07, "epoch": 1.2421028360617938, "percentage": 62.11, "elapsed_time": "3:40:45", "remaining_time": "2:14:42"} +{"current_steps": 5388, "total_steps": 8674, "loss": 0.46052566170692444, "lr": 6.877214223189278e-07, "epoch": 1.2423334101913766, "percentage": 62.12, "elapsed_time": "3:40:48", "remaining_time": "2:14:39"} +{"current_steps": 5389, "total_steps": 8674, "loss": 0.42730599641799927, "lr": 6.873592504359037e-07, "epoch": 1.2425639843209593, "percentage": 62.13, "elapsed_time": "3:40:50", "remaining_time": "2:14:37"} +{"current_steps": 5390, "total_steps": 8674, "loss": 0.4391734004020691, "lr": 6.869971239983814e-07, "epoch": 1.2427945584505418, "percentage": 62.14, "elapsed_time": "3:40:53", "remaining_time": "2:14:34"} +{"current_steps": 5391, "total_steps": 8674, "loss": 0.4523593485355377, "lr": 6.866350430589989e-07, "epoch": 1.2430251325801245, "percentage": 62.15, "elapsed_time": "3:40:55", "remaining_time": "2:14:32"} +{"current_steps": 5392, "total_steps": 8674, "loss": 0.5398315787315369, "lr": 6.86273007670389e-07, "epoch": 1.2432557067097072, "percentage": 62.16, "elapsed_time": "3:40:58", "remaining_time": "2:14:29"} +{"current_steps": 5393, "total_steps": 8674, "loss": 0.40480807423591614, "lr": 6.859110178851767e-07, "epoch": 1.24348628083929, "percentage": 62.17, "elapsed_time": "3:41:00", "remaining_time": "2:14:27"} +{"current_steps": 5394, "total_steps": 8674, "loss": 0.42483675479888916, "lr": 6.855490737559816e-07, "epoch": 1.2437168549688724, "percentage": 62.19, "elapsed_time": "3:41:03", "remaining_time": "2:14:25"} +{"current_steps": 5395, "total_steps": 8674, "loss": 0.39951619505882263, "lr": 6.851871753354153e-07, "epoch": 1.2439474290984551, "percentage": 62.2, "elapsed_time": "3:41:05", "remaining_time": "2:14:22"} +{"current_steps": 5396, "total_steps": 8674, "loss": 0.48650771379470825, "lr": 6.848253226760833e-07, "epoch": 1.2441780032280378, "percentage": 62.21, "elapsed_time": "3:41:07", "remaining_time": "2:14:20"} +{"current_steps": 5397, "total_steps": 8674, "loss": 0.5377830266952515, "lr": 6.844635158305853e-07, "epoch": 1.2444085773576206, "percentage": 62.22, "elapsed_time": "3:41:10", "remaining_time": "2:14:17"} +{"current_steps": 5398, "total_steps": 8674, "loss": 0.4365614950656891, "lr": 6.841017548515127e-07, "epoch": 1.244639151487203, "percentage": 62.23, "elapsed_time": "3:41:13", "remaining_time": "2:14:15"} +{"current_steps": 5399, "total_steps": 8674, "loss": 0.39739400148391724, "lr": 6.837400397914519e-07, "epoch": 1.2448697256167858, "percentage": 62.24, "elapsed_time": "3:41:15", "remaining_time": "2:14:12"} +{"current_steps": 5400, "total_steps": 8674, "loss": 0.4005683660507202, "lr": 6.833783707029812e-07, "epoch": 1.2451002997463685, "percentage": 62.26, "elapsed_time": "3:41:18", "remaining_time": "2:14:10"} +{"current_steps": 5401, "total_steps": 8674, "loss": 0.5635108351707458, "lr": 6.830167476386737e-07, "epoch": 1.2453308738759512, "percentage": 62.27, "elapsed_time": "3:41:22", "remaining_time": "2:14:08"} +{"current_steps": 5402, "total_steps": 8674, "loss": 0.4332388639450073, "lr": 6.82655170651094e-07, "epoch": 1.2455614480055337, "percentage": 62.28, "elapsed_time": "3:41:24", "remaining_time": "2:14:06"} +{"current_steps": 5403, "total_steps": 8674, "loss": 0.47506433725357056, "lr": 6.822936397928015e-07, "epoch": 1.2457920221351164, "percentage": 62.29, "elapsed_time": "3:41:26", "remaining_time": "2:14:03"} +{"current_steps": 5404, "total_steps": 8674, "loss": 0.5081777572631836, "lr": 6.819321551163486e-07, "epoch": 1.2460225962646991, "percentage": 62.3, "elapsed_time": "3:41:29", "remaining_time": "2:14:01"} +{"current_steps": 5405, "total_steps": 8674, "loss": 0.4038957953453064, "lr": 6.815707166742801e-07, "epoch": 1.2462531703942819, "percentage": 62.31, "elapsed_time": "3:41:31", "remaining_time": "2:13:58"} +{"current_steps": 5406, "total_steps": 8674, "loss": 0.4665706753730774, "lr": 6.812093245191354e-07, "epoch": 1.2464837445238643, "percentage": 62.32, "elapsed_time": "3:41:34", "remaining_time": "2:13:56"} +{"current_steps": 5407, "total_steps": 8674, "loss": 0.45610785484313965, "lr": 6.808479787034459e-07, "epoch": 1.246714318653447, "percentage": 62.34, "elapsed_time": "3:41:36", "remaining_time": "2:13:54"} +{"current_steps": 5408, "total_steps": 8674, "loss": 0.4334958493709564, "lr": 6.804866792797377e-07, "epoch": 1.2469448927830298, "percentage": 62.35, "elapsed_time": "3:41:39", "remaining_time": "2:13:51"} +{"current_steps": 5409, "total_steps": 8674, "loss": 0.5505996942520142, "lr": 6.801254263005283e-07, "epoch": 1.2471754669126125, "percentage": 62.36, "elapsed_time": "3:41:41", "remaining_time": "2:13:49"} +{"current_steps": 5410, "total_steps": 8674, "loss": 0.5589424967765808, "lr": 6.797642198183303e-07, "epoch": 1.247406041042195, "percentage": 62.37, "elapsed_time": "3:41:44", "remaining_time": "2:13:46"} +{"current_steps": 5411, "total_steps": 8674, "loss": 0.48142847418785095, "lr": 6.794030598856483e-07, "epoch": 1.2476366151717777, "percentage": 62.38, "elapsed_time": "3:41:46", "remaining_time": "2:13:44"} +{"current_steps": 5412, "total_steps": 8674, "loss": 0.5549830198287964, "lr": 6.790419465549811e-07, "epoch": 1.2478671893013604, "percentage": 62.39, "elapsed_time": "3:41:48", "remaining_time": "2:13:41"} +{"current_steps": 5413, "total_steps": 8674, "loss": 0.5974072217941284, "lr": 6.786808798788193e-07, "epoch": 1.2480977634309431, "percentage": 62.4, "elapsed_time": "3:41:51", "remaining_time": "2:13:39"} +{"current_steps": 5414, "total_steps": 8674, "loss": 0.38189029693603516, "lr": 6.783198599096484e-07, "epoch": 1.2483283375605256, "percentage": 62.42, "elapsed_time": "3:41:53", "remaining_time": "2:13:36"} +{"current_steps": 5415, "total_steps": 8674, "loss": 0.41150039434432983, "lr": 6.779588866999459e-07, "epoch": 1.2485589116901084, "percentage": 62.43, "elapsed_time": "3:41:56", "remaining_time": "2:13:34"} +{"current_steps": 5416, "total_steps": 8674, "loss": 0.4291636645793915, "lr": 6.775979603021828e-07, "epoch": 1.248789485819691, "percentage": 62.44, "elapsed_time": "3:41:58", "remaining_time": "2:13:31"} +{"current_steps": 5417, "total_steps": 8674, "loss": 0.45324140787124634, "lr": 6.772370807688242e-07, "epoch": 1.2490200599492738, "percentage": 62.45, "elapsed_time": "3:42:01", "remaining_time": "2:13:29"} +{"current_steps": 5418, "total_steps": 8674, "loss": 0.4748731851577759, "lr": 6.768762481523262e-07, "epoch": 1.2492506340788563, "percentage": 62.46, "elapsed_time": "3:42:03", "remaining_time": "2:13:26"} +{"current_steps": 5419, "total_steps": 8674, "loss": 0.43602505326271057, "lr": 6.765154625051408e-07, "epoch": 1.249481208208439, "percentage": 62.47, "elapsed_time": "3:42:06", "remaining_time": "2:13:24"} +{"current_steps": 5420, "total_steps": 8674, "loss": 0.49135684967041016, "lr": 6.761547238797112e-07, "epoch": 1.2497117823380217, "percentage": 62.49, "elapsed_time": "3:42:08", "remaining_time": "2:13:22"} +{"current_steps": 5421, "total_steps": 8674, "loss": 0.47508272528648376, "lr": 6.757940323284747e-07, "epoch": 1.2499423564676044, "percentage": 62.5, "elapsed_time": "3:42:10", "remaining_time": "2:13:19"} +{"current_steps": 5422, "total_steps": 8674, "loss": 0.399259090423584, "lr": 6.754333879038611e-07, "epoch": 1.250172930597187, "percentage": 62.51, "elapsed_time": "3:42:13", "remaining_time": "2:13:17"} +{"current_steps": 5423, "total_steps": 8674, "loss": 0.426364004611969, "lr": 6.750727906582941e-07, "epoch": 1.2504035047267696, "percentage": 62.52, "elapsed_time": "3:42:16", "remaining_time": "2:13:14"} +{"current_steps": 5424, "total_steps": 8674, "loss": 0.4641951322555542, "lr": 6.747122406441903e-07, "epoch": 1.2506340788563524, "percentage": 62.53, "elapsed_time": "3:42:18", "remaining_time": "2:13:12"} +{"current_steps": 5425, "total_steps": 8674, "loss": 0.35008323192596436, "lr": 6.743517379139585e-07, "epoch": 1.250864652985935, "percentage": 62.54, "elapsed_time": "3:42:20", "remaining_time": "2:13:09"} +{"current_steps": 5426, "total_steps": 8674, "loss": 0.49627771973609924, "lr": 6.739912825200022e-07, "epoch": 1.2510952271155176, "percentage": 62.55, "elapsed_time": "3:42:23", "remaining_time": "2:13:07"} +{"current_steps": 5427, "total_steps": 8674, "loss": 0.4926851987838745, "lr": 6.736308745147168e-07, "epoch": 1.2513258012451003, "percentage": 62.57, "elapsed_time": "3:42:25", "remaining_time": "2:13:04"} +{"current_steps": 5428, "total_steps": 8674, "loss": 0.44777536392211914, "lr": 6.732705139504917e-07, "epoch": 1.251556375374683, "percentage": 62.58, "elapsed_time": "3:42:28", "remaining_time": "2:13:02"} +{"current_steps": 5429, "total_steps": 8674, "loss": 0.39160430431365967, "lr": 6.729102008797085e-07, "epoch": 1.2517869495042655, "percentage": 62.59, "elapsed_time": "3:42:30", "remaining_time": "2:12:59"} +{"current_steps": 5430, "total_steps": 8674, "loss": 0.4585273861885071, "lr": 6.725499353547426e-07, "epoch": 1.2520175236338482, "percentage": 62.6, "elapsed_time": "3:42:32", "remaining_time": "2:12:57"} +{"current_steps": 5431, "total_steps": 8674, "loss": 0.5245224237442017, "lr": 6.721897174279621e-07, "epoch": 1.252248097763431, "percentage": 62.61, "elapsed_time": "3:42:35", "remaining_time": "2:12:54"} +{"current_steps": 5432, "total_steps": 8674, "loss": 0.4217349886894226, "lr": 6.718295471517288e-07, "epoch": 1.2524786718930137, "percentage": 62.62, "elapsed_time": "3:42:38", "remaining_time": "2:12:52"} +{"current_steps": 5433, "total_steps": 8674, "loss": 0.4944193661212921, "lr": 6.714694245783963e-07, "epoch": 1.2527092460225964, "percentage": 62.64, "elapsed_time": "3:42:40", "remaining_time": "2:12:50"} +{"current_steps": 5434, "total_steps": 8674, "loss": 0.5058057904243469, "lr": 6.711093497603127e-07, "epoch": 1.2529398201521789, "percentage": 62.65, "elapsed_time": "3:42:42", "remaining_time": "2:12:47"} +{"current_steps": 5435, "total_steps": 8674, "loss": 0.45669037103652954, "lr": 6.707493227498186e-07, "epoch": 1.2531703942817616, "percentage": 62.66, "elapsed_time": "3:42:45", "remaining_time": "2:12:45"} +{"current_steps": 5436, "total_steps": 8674, "loss": 0.4248945116996765, "lr": 6.703893435992469e-07, "epoch": 1.2534009684113443, "percentage": 62.67, "elapsed_time": "3:42:47", "remaining_time": "2:12:42"} +{"current_steps": 5437, "total_steps": 8674, "loss": 0.3984343707561493, "lr": 6.700294123609249e-07, "epoch": 1.2536315425409268, "percentage": 62.68, "elapsed_time": "3:42:50", "remaining_time": "2:12:40"} +{"current_steps": 5438, "total_steps": 8674, "loss": 0.435299813747406, "lr": 6.696695290871715e-07, "epoch": 1.2538621166705095, "percentage": 62.69, "elapsed_time": "3:42:52", "remaining_time": "2:12:37"} +{"current_steps": 5439, "total_steps": 8674, "loss": 0.4225304126739502, "lr": 6.693096938303002e-07, "epoch": 1.2540926908000922, "percentage": 62.7, "elapsed_time": "3:42:55", "remaining_time": "2:12:35"} +{"current_steps": 5440, "total_steps": 8674, "loss": 0.4686669111251831, "lr": 6.689499066426161e-07, "epoch": 1.254323264929675, "percentage": 62.72, "elapsed_time": "3:42:58", "remaining_time": "2:12:33"} +{"current_steps": 5441, "total_steps": 8674, "loss": 0.45163553953170776, "lr": 6.685901675764186e-07, "epoch": 1.2545538390592577, "percentage": 62.73, "elapsed_time": "3:43:00", "remaining_time": "2:12:30"} +{"current_steps": 5442, "total_steps": 8674, "loss": 0.44223567843437195, "lr": 6.682304766839986e-07, "epoch": 1.2547844131888402, "percentage": 62.74, "elapsed_time": "3:43:03", "remaining_time": "2:12:28"} +{"current_steps": 5443, "total_steps": 8674, "loss": 0.4008648991584778, "lr": 6.678708340176413e-07, "epoch": 1.2550149873184229, "percentage": 62.75, "elapsed_time": "3:43:05", "remaining_time": "2:12:25"} +{"current_steps": 5444, "total_steps": 8674, "loss": 0.4500792324542999, "lr": 6.675112396296245e-07, "epoch": 1.2552455614480056, "percentage": 62.76, "elapsed_time": "3:43:07", "remaining_time": "2:12:23"} +{"current_steps": 5445, "total_steps": 8674, "loss": 0.42558690905570984, "lr": 6.671516935722183e-07, "epoch": 1.255476135577588, "percentage": 62.77, "elapsed_time": "3:43:10", "remaining_time": "2:12:20"} +{"current_steps": 5446, "total_steps": 8674, "loss": 0.4676043391227722, "lr": 6.667921958976871e-07, "epoch": 1.2557067097071708, "percentage": 62.79, "elapsed_time": "3:43:12", "remaining_time": "2:12:18"} +{"current_steps": 5447, "total_steps": 8674, "loss": 0.44114184379577637, "lr": 6.664327466582869e-07, "epoch": 1.2559372838367535, "percentage": 62.8, "elapsed_time": "3:43:15", "remaining_time": "2:12:15"} +{"current_steps": 5448, "total_steps": 8674, "loss": 0.33865463733673096, "lr": 6.660733459062679e-07, "epoch": 1.2561678579663362, "percentage": 62.81, "elapsed_time": "3:43:17", "remaining_time": "2:12:13"} +{"current_steps": 5449, "total_steps": 8674, "loss": 0.5397414565086365, "lr": 6.65713993693872e-07, "epoch": 1.256398432095919, "percentage": 62.82, "elapsed_time": "3:43:20", "remaining_time": "2:12:10"} +{"current_steps": 5450, "total_steps": 8674, "loss": 0.49249517917633057, "lr": 6.653546900733352e-07, "epoch": 1.2566290062255014, "percentage": 62.83, "elapsed_time": "3:43:22", "remaining_time": "2:12:08"} +{"current_steps": 5451, "total_steps": 8674, "loss": 0.5438433885574341, "lr": 6.649954350968855e-07, "epoch": 1.2568595803550842, "percentage": 62.84, "elapsed_time": "3:43:24", "remaining_time": "2:12:05"} +{"current_steps": 5452, "total_steps": 8674, "loss": 0.43725037574768066, "lr": 6.646362288167448e-07, "epoch": 1.2570901544846669, "percentage": 62.85, "elapsed_time": "3:43:27", "remaining_time": "2:12:03"} +{"current_steps": 5453, "total_steps": 8674, "loss": 0.5369226336479187, "lr": 6.642770712851269e-07, "epoch": 1.2573207286142494, "percentage": 62.87, "elapsed_time": "3:43:29", "remaining_time": "2:12:00"} +{"current_steps": 5454, "total_steps": 8674, "loss": 0.45022842288017273, "lr": 6.63917962554239e-07, "epoch": 1.257551302743832, "percentage": 62.88, "elapsed_time": "3:43:32", "remaining_time": "2:11:58"} +{"current_steps": 5455, "total_steps": 8674, "loss": 0.42483362555503845, "lr": 6.635589026762818e-07, "epoch": 1.2577818768734148, "percentage": 62.89, "elapsed_time": "3:43:34", "remaining_time": "2:11:55"} +{"current_steps": 5456, "total_steps": 8674, "loss": 0.4909497797489166, "lr": 6.631998917034474e-07, "epoch": 1.2580124510029975, "percentage": 62.9, "elapsed_time": "3:43:36", "remaining_time": "2:11:53"} +{"current_steps": 5457, "total_steps": 8674, "loss": 0.4927433431148529, "lr": 6.628409296879223e-07, "epoch": 1.2582430251325802, "percentage": 62.91, "elapsed_time": "3:43:39", "remaining_time": "2:11:51"} +{"current_steps": 5458, "total_steps": 8674, "loss": 0.4452761113643646, "lr": 6.624820166818847e-07, "epoch": 1.2584735992621627, "percentage": 62.92, "elapsed_time": "3:43:42", "remaining_time": "2:11:48"} +{"current_steps": 5459, "total_steps": 8674, "loss": 0.4637982249259949, "lr": 6.62123152737507e-07, "epoch": 1.2587041733917455, "percentage": 62.94, "elapsed_time": "3:43:44", "remaining_time": "2:11:46"} +{"current_steps": 5460, "total_steps": 8674, "loss": 0.3189438581466675, "lr": 6.617643379069532e-07, "epoch": 1.2589347475213282, "percentage": 62.95, "elapsed_time": "3:43:46", "remaining_time": "2:11:43"} +{"current_steps": 5461, "total_steps": 8674, "loss": 0.420698881149292, "lr": 6.614055722423808e-07, "epoch": 1.2591653216509107, "percentage": 62.96, "elapsed_time": "3:43:49", "remaining_time": "2:11:41"} +{"current_steps": 5462, "total_steps": 8674, "loss": 0.5187642574310303, "lr": 6.610468557959398e-07, "epoch": 1.2593958957804934, "percentage": 62.97, "elapsed_time": "3:43:51", "remaining_time": "2:11:38"} +{"current_steps": 5463, "total_steps": 8674, "loss": 0.48519381880760193, "lr": 6.606881886197741e-07, "epoch": 1.259626469910076, "percentage": 62.98, "elapsed_time": "3:43:53", "remaining_time": "2:11:36"} +{"current_steps": 5464, "total_steps": 8674, "loss": 0.3930806815624237, "lr": 6.60329570766019e-07, "epoch": 1.2598570440396588, "percentage": 62.99, "elapsed_time": "3:43:56", "remaining_time": "2:11:33"} +{"current_steps": 5465, "total_steps": 8674, "loss": 0.4890612065792084, "lr": 6.599710022868027e-07, "epoch": 1.2600876181692415, "percentage": 63.0, "elapsed_time": "3:43:58", "remaining_time": "2:11:31"} +{"current_steps": 5466, "total_steps": 8674, "loss": 0.4202774465084076, "lr": 6.596124832342476e-07, "epoch": 1.260318192298824, "percentage": 63.02, "elapsed_time": "3:44:01", "remaining_time": "2:11:28"} +{"current_steps": 5467, "total_steps": 8674, "loss": 0.5053761005401611, "lr": 6.592540136604674e-07, "epoch": 1.2605487664284067, "percentage": 63.03, "elapsed_time": "3:44:03", "remaining_time": "2:11:26"} +{"current_steps": 5468, "total_steps": 8674, "loss": 0.4827175736427307, "lr": 6.588955936175702e-07, "epoch": 1.2607793405579895, "percentage": 63.04, "elapsed_time": "3:44:06", "remaining_time": "2:11:23"} +{"current_steps": 5469, "total_steps": 8674, "loss": 0.45179229974746704, "lr": 6.585372231576551e-07, "epoch": 1.261009914687572, "percentage": 63.05, "elapsed_time": "3:44:08", "remaining_time": "2:11:21"} +{"current_steps": 5470, "total_steps": 8674, "loss": 0.4024949073791504, "lr": 6.581789023328155e-07, "epoch": 1.2612404888171547, "percentage": 63.06, "elapsed_time": "3:44:11", "remaining_time": "2:11:19"} +{"current_steps": 5471, "total_steps": 8674, "loss": 0.48839491605758667, "lr": 6.578206311951363e-07, "epoch": 1.2614710629467374, "percentage": 63.07, "elapsed_time": "3:44:13", "remaining_time": "2:11:16"} +{"current_steps": 5472, "total_steps": 8674, "loss": 0.45897620916366577, "lr": 6.574624097966968e-07, "epoch": 1.26170163707632, "percentage": 63.09, "elapsed_time": "3:44:16", "remaining_time": "2:11:14"} +{"current_steps": 5473, "total_steps": 8674, "loss": 0.48471882939338684, "lr": 6.571042381895671e-07, "epoch": 1.2619322112059028, "percentage": 63.1, "elapsed_time": "3:44:19", "remaining_time": "2:11:11"} +{"current_steps": 5474, "total_steps": 8674, "loss": 0.44159913063049316, "lr": 6.567461164258117e-07, "epoch": 1.2621627853354853, "percentage": 63.11, "elapsed_time": "3:44:21", "remaining_time": "2:11:09"} +{"current_steps": 5475, "total_steps": 8674, "loss": 0.39186012744903564, "lr": 6.563880445574872e-07, "epoch": 1.262393359465068, "percentage": 63.12, "elapsed_time": "3:44:23", "remaining_time": "2:11:06"} +{"current_steps": 5476, "total_steps": 8674, "loss": 0.5332233905792236, "lr": 6.560300226366425e-07, "epoch": 1.2626239335946507, "percentage": 63.13, "elapsed_time": "3:44:26", "remaining_time": "2:11:04"} +{"current_steps": 5477, "total_steps": 8674, "loss": 0.4252084195613861, "lr": 6.556720507153201e-07, "epoch": 1.2628545077242332, "percentage": 63.14, "elapsed_time": "3:44:28", "remaining_time": "2:11:01"} +{"current_steps": 5478, "total_steps": 8674, "loss": 0.36927711963653564, "lr": 6.553141288455548e-07, "epoch": 1.263085081853816, "percentage": 63.15, "elapsed_time": "3:44:31", "remaining_time": "2:10:59"} +{"current_steps": 5479, "total_steps": 8674, "loss": 0.4405602216720581, "lr": 6.549562570793745e-07, "epoch": 1.2633156559833987, "percentage": 63.17, "elapsed_time": "3:44:33", "remaining_time": "2:10:56"} +{"current_steps": 5480, "total_steps": 8674, "loss": 0.5691590309143066, "lr": 6.545984354687986e-07, "epoch": 1.2635462301129814, "percentage": 63.18, "elapsed_time": "3:44:35", "remaining_time": "2:10:54"} +{"current_steps": 5481, "total_steps": 8674, "loss": 0.3750354051589966, "lr": 6.542406640658411e-07, "epoch": 1.2637768042425641, "percentage": 63.19, "elapsed_time": "3:44:38", "remaining_time": "2:10:51"} +{"current_steps": 5482, "total_steps": 8674, "loss": 0.47816041111946106, "lr": 6.538829429225068e-07, "epoch": 1.2640073783721466, "percentage": 63.2, "elapsed_time": "3:44:41", "remaining_time": "2:10:49"} +{"current_steps": 5483, "total_steps": 8674, "loss": 0.42470186948776245, "lr": 6.535252720907951e-07, "epoch": 1.2642379525017293, "percentage": 63.21, "elapsed_time": "3:44:43", "remaining_time": "2:10:47"} +{"current_steps": 5484, "total_steps": 8674, "loss": 0.37356555461883545, "lr": 6.531676516226961e-07, "epoch": 1.264468526631312, "percentage": 63.22, "elapsed_time": "3:44:45", "remaining_time": "2:10:44"} +{"current_steps": 5485, "total_steps": 8674, "loss": 0.4895293116569519, "lr": 6.528100815701942e-07, "epoch": 1.2646991007608945, "percentage": 63.23, "elapsed_time": "3:44:48", "remaining_time": "2:10:42"} +{"current_steps": 5486, "total_steps": 8674, "loss": 0.4963725805282593, "lr": 6.524525619852656e-07, "epoch": 1.2649296748904773, "percentage": 63.25, "elapsed_time": "3:44:50", "remaining_time": "2:10:39"} +{"current_steps": 5487, "total_steps": 8674, "loss": 0.5443764925003052, "lr": 6.520950929198792e-07, "epoch": 1.26516024902006, "percentage": 63.26, "elapsed_time": "3:44:53", "remaining_time": "2:10:37"} +{"current_steps": 5488, "total_steps": 8674, "loss": 0.400549054145813, "lr": 6.517376744259972e-07, "epoch": 1.2653908231496427, "percentage": 63.27, "elapsed_time": "3:44:55", "remaining_time": "2:10:34"} +{"current_steps": 5489, "total_steps": 8674, "loss": 0.46384042501449585, "lr": 6.513803065555736e-07, "epoch": 1.2656213972792254, "percentage": 63.28, "elapsed_time": "3:44:58", "remaining_time": "2:10:32"} +{"current_steps": 5490, "total_steps": 8674, "loss": 0.5044240951538086, "lr": 6.510229893605556e-07, "epoch": 1.265851971408808, "percentage": 63.29, "elapsed_time": "3:45:00", "remaining_time": "2:10:29"} +{"current_steps": 5491, "total_steps": 8674, "loss": 0.4544214904308319, "lr": 6.506657228928827e-07, "epoch": 1.2660825455383906, "percentage": 63.3, "elapsed_time": "3:45:03", "remaining_time": "2:10:27"} +{"current_steps": 5492, "total_steps": 8674, "loss": 0.36688071489334106, "lr": 6.503085072044878e-07, "epoch": 1.2663131196679733, "percentage": 63.32, "elapsed_time": "3:45:05", "remaining_time": "2:10:24"} +{"current_steps": 5493, "total_steps": 8674, "loss": 0.4058225154876709, "lr": 6.499513423472951e-07, "epoch": 1.2665436937975558, "percentage": 63.33, "elapsed_time": "3:45:07", "remaining_time": "2:10:22"} +{"current_steps": 5494, "total_steps": 8674, "loss": 0.36429229378700256, "lr": 6.495942283732225e-07, "epoch": 1.2667742679271385, "percentage": 63.34, "elapsed_time": "3:45:10", "remaining_time": "2:10:19"} +{"current_steps": 5495, "total_steps": 8674, "loss": 0.47116899490356445, "lr": 6.492371653341802e-07, "epoch": 1.2670048420567213, "percentage": 63.35, "elapsed_time": "3:45:12", "remaining_time": "2:10:17"} +{"current_steps": 5496, "total_steps": 8674, "loss": 0.4437965750694275, "lr": 6.488801532820706e-07, "epoch": 1.267235416186304, "percentage": 63.36, "elapsed_time": "3:45:15", "remaining_time": "2:10:15"} +{"current_steps": 5497, "total_steps": 8674, "loss": 0.4810328483581543, "lr": 6.485231922687893e-07, "epoch": 1.2674659903158867, "percentage": 63.37, "elapsed_time": "3:45:17", "remaining_time": "2:10:12"} +{"current_steps": 5498, "total_steps": 8674, "loss": 0.362907350063324, "lr": 6.481662823462238e-07, "epoch": 1.2676965644454692, "percentage": 63.38, "elapsed_time": "3:45:20", "remaining_time": "2:10:10"} +{"current_steps": 5499, "total_steps": 8674, "loss": 0.43647170066833496, "lr": 6.478094235662554e-07, "epoch": 1.267927138575052, "percentage": 63.4, "elapsed_time": "3:45:22", "remaining_time": "2:10:07"} +{"current_steps": 5500, "total_steps": 8674, "loss": 0.4566631317138672, "lr": 6.474526159807563e-07, "epoch": 1.2681577127046346, "percentage": 63.41, "elapsed_time": "3:45:25", "remaining_time": "2:10:05"} +{"current_steps": 5501, "total_steps": 8674, "loss": 0.3940081298351288, "lr": 6.470958596415925e-07, "epoch": 1.2683882868342171, "percentage": 63.42, "elapsed_time": "3:45:28", "remaining_time": "2:10:03"} +{"current_steps": 5502, "total_steps": 8674, "loss": 0.5275603532791138, "lr": 6.46739154600622e-07, "epoch": 1.2686188609637998, "percentage": 63.43, "elapsed_time": "3:45:31", "remaining_time": "2:10:01"} +{"current_steps": 5503, "total_steps": 8674, "loss": 0.42546436190605164, "lr": 6.463825009096959e-07, "epoch": 1.2688494350933825, "percentage": 63.44, "elapsed_time": "3:45:33", "remaining_time": "2:09:58"} +{"current_steps": 5504, "total_steps": 8674, "loss": 0.3833821713924408, "lr": 6.460258986206566e-07, "epoch": 1.2690800092229653, "percentage": 63.45, "elapsed_time": "3:45:36", "remaining_time": "2:09:56"} +{"current_steps": 5505, "total_steps": 8674, "loss": 0.5056046843528748, "lr": 6.456693477853408e-07, "epoch": 1.2693105833525478, "percentage": 63.47, "elapsed_time": "3:45:38", "remaining_time": "2:09:53"} +{"current_steps": 5506, "total_steps": 8674, "loss": 0.3544192910194397, "lr": 6.453128484555764e-07, "epoch": 1.2695411574821305, "percentage": 63.48, "elapsed_time": "3:45:40", "remaining_time": "2:09:51"} +{"current_steps": 5507, "total_steps": 8674, "loss": 0.47164130210876465, "lr": 6.449564006831836e-07, "epoch": 1.2697717316117132, "percentage": 63.49, "elapsed_time": "3:45:43", "remaining_time": "2:09:48"} +{"current_steps": 5508, "total_steps": 8674, "loss": 0.4580638110637665, "lr": 6.446000045199765e-07, "epoch": 1.2700023057412957, "percentage": 63.5, "elapsed_time": "3:45:46", "remaining_time": "2:09:46"} +{"current_steps": 5509, "total_steps": 8674, "loss": 0.45945844054222107, "lr": 6.442436600177606e-07, "epoch": 1.2702328798708784, "percentage": 63.51, "elapsed_time": "3:45:48", "remaining_time": "2:09:43"} +{"current_steps": 5510, "total_steps": 8674, "loss": 0.5539910793304443, "lr": 6.438873672283343e-07, "epoch": 1.2704634540004611, "percentage": 63.52, "elapsed_time": "3:45:50", "remaining_time": "2:09:41"} +{"current_steps": 5511, "total_steps": 8674, "loss": 0.4661790132522583, "lr": 6.43531126203488e-07, "epoch": 1.2706940281300438, "percentage": 63.53, "elapsed_time": "3:45:53", "remaining_time": "2:09:38"} +{"current_steps": 5512, "total_steps": 8674, "loss": 0.3781178891658783, "lr": 6.431749369950057e-07, "epoch": 1.2709246022596266, "percentage": 63.55, "elapsed_time": "3:45:55", "remaining_time": "2:09:36"} +{"current_steps": 5513, "total_steps": 8674, "loss": 0.4858461618423462, "lr": 6.428187996546621e-07, "epoch": 1.271155176389209, "percentage": 63.56, "elapsed_time": "3:45:58", "remaining_time": "2:09:33"} +{"current_steps": 5514, "total_steps": 8674, "loss": 0.5003963708877563, "lr": 6.424627142342262e-07, "epoch": 1.2713857505187918, "percentage": 63.57, "elapsed_time": "3:46:00", "remaining_time": "2:09:31"} +{"current_steps": 5515, "total_steps": 8674, "loss": 0.4620795249938965, "lr": 6.421066807854584e-07, "epoch": 1.2716163246483745, "percentage": 63.58, "elapsed_time": "3:46:03", "remaining_time": "2:09:29"} +{"current_steps": 5516, "total_steps": 8674, "loss": 0.43998581171035767, "lr": 6.417506993601114e-07, "epoch": 1.271846898777957, "percentage": 63.59, "elapsed_time": "3:46:05", "remaining_time": "2:09:26"} +{"current_steps": 5517, "total_steps": 8674, "loss": 0.5204107165336609, "lr": 6.413947700099311e-07, "epoch": 1.2720774729075397, "percentage": 63.6, "elapsed_time": "3:46:08", "remaining_time": "2:09:24"} +{"current_steps": 5518, "total_steps": 8674, "loss": 0.46675950288772583, "lr": 6.410388927866551e-07, "epoch": 1.2723080470371224, "percentage": 63.62, "elapsed_time": "3:46:10", "remaining_time": "2:09:21"} +{"current_steps": 5519, "total_steps": 8674, "loss": 0.4002436101436615, "lr": 6.406830677420146e-07, "epoch": 1.2725386211667051, "percentage": 63.63, "elapsed_time": "3:46:13", "remaining_time": "2:09:19"} +{"current_steps": 5520, "total_steps": 8674, "loss": 0.4051012396812439, "lr": 6.403272949277312e-07, "epoch": 1.2727691952962878, "percentage": 63.64, "elapsed_time": "3:46:15", "remaining_time": "2:09:16"} +{"current_steps": 5521, "total_steps": 8674, "loss": 0.4847797751426697, "lr": 6.399715743955209e-07, "epoch": 1.2729997694258703, "percentage": 63.65, "elapsed_time": "3:46:17", "remaining_time": "2:09:14"} +{"current_steps": 5522, "total_steps": 8674, "loss": 0.4742053151130676, "lr": 6.396159061970907e-07, "epoch": 1.273230343555453, "percentage": 63.66, "elapsed_time": "3:46:20", "remaining_time": "2:09:11"} +{"current_steps": 5523, "total_steps": 8674, "loss": 0.44291001558303833, "lr": 6.392602903841415e-07, "epoch": 1.2734609176850358, "percentage": 63.67, "elapsed_time": "3:46:22", "remaining_time": "2:09:09"} +{"current_steps": 5524, "total_steps": 8674, "loss": 0.38993996381759644, "lr": 6.389047270083646e-07, "epoch": 1.2736914918146183, "percentage": 63.68, "elapsed_time": "3:46:25", "remaining_time": "2:09:06"} +{"current_steps": 5525, "total_steps": 8674, "loss": 0.5045995116233826, "lr": 6.385492161214454e-07, "epoch": 1.273922065944201, "percentage": 63.7, "elapsed_time": "3:46:27", "remaining_time": "2:09:04"} +{"current_steps": 5526, "total_steps": 8674, "loss": 0.4377788305282593, "lr": 6.381937577750611e-07, "epoch": 1.2741526400737837, "percentage": 63.71, "elapsed_time": "3:46:30", "remaining_time": "2:09:01"} +{"current_steps": 5527, "total_steps": 8674, "loss": 0.5363353490829468, "lr": 6.378383520208806e-07, "epoch": 1.2743832142033664, "percentage": 63.72, "elapsed_time": "3:46:32", "remaining_time": "2:08:59"} +{"current_steps": 5528, "total_steps": 8674, "loss": 0.42230546474456787, "lr": 6.374829989105661e-07, "epoch": 1.2746137883329491, "percentage": 63.73, "elapsed_time": "3:46:35", "remaining_time": "2:08:56"} +{"current_steps": 5529, "total_steps": 8674, "loss": 0.39565908908843994, "lr": 6.371276984957715e-07, "epoch": 1.2748443624625316, "percentage": 63.74, "elapsed_time": "3:46:37", "remaining_time": "2:08:54"} +{"current_steps": 5530, "total_steps": 8674, "loss": 0.4375323951244354, "lr": 6.36772450828144e-07, "epoch": 1.2750749365921143, "percentage": 63.75, "elapsed_time": "3:46:39", "remaining_time": "2:08:52"} +{"current_steps": 5531, "total_steps": 8674, "loss": 0.4901241660118103, "lr": 6.364172559593215e-07, "epoch": 1.275305510721697, "percentage": 63.77, "elapsed_time": "3:46:42", "remaining_time": "2:08:49"} +{"current_steps": 5532, "total_steps": 8674, "loss": 0.4108780026435852, "lr": 6.360621139409359e-07, "epoch": 1.2755360848512796, "percentage": 63.78, "elapsed_time": "3:46:45", "remaining_time": "2:08:47"} +{"current_steps": 5533, "total_steps": 8674, "loss": 0.43631279468536377, "lr": 6.357070248246102e-07, "epoch": 1.2757666589808623, "percentage": 63.79, "elapsed_time": "3:46:47", "remaining_time": "2:08:44"} +{"current_steps": 5534, "total_steps": 8674, "loss": 0.4623757004737854, "lr": 6.353519886619607e-07, "epoch": 1.275997233110445, "percentage": 63.8, "elapsed_time": "3:46:49", "remaining_time": "2:08:42"} +{"current_steps": 5535, "total_steps": 8674, "loss": 0.41303062438964844, "lr": 6.349970055045954e-07, "epoch": 1.2762278072400277, "percentage": 63.81, "elapsed_time": "3:46:52", "remaining_time": "2:08:39"} +{"current_steps": 5536, "total_steps": 8674, "loss": 0.5157878994941711, "lr": 6.34642075404114e-07, "epoch": 1.2764583813696104, "percentage": 63.82, "elapsed_time": "3:46:54", "remaining_time": "2:08:37"} +{"current_steps": 5537, "total_steps": 8674, "loss": 0.41295093297958374, "lr": 6.342871984121103e-07, "epoch": 1.276688955499193, "percentage": 63.83, "elapsed_time": "3:46:57", "remaining_time": "2:08:34"} +{"current_steps": 5538, "total_steps": 8674, "loss": 0.4636460542678833, "lr": 6.339323745801682e-07, "epoch": 1.2769195296287756, "percentage": 63.85, "elapsed_time": "3:46:59", "remaining_time": "2:08:32"} +{"current_steps": 5539, "total_steps": 8674, "loss": 0.45273804664611816, "lr": 6.335776039598659e-07, "epoch": 1.2771501037583584, "percentage": 63.86, "elapsed_time": "3:47:02", "remaining_time": "2:08:29"} +{"current_steps": 5540, "total_steps": 8674, "loss": 0.4562758803367615, "lr": 6.332228866027721e-07, "epoch": 1.2773806778879409, "percentage": 63.87, "elapsed_time": "3:47:04", "remaining_time": "2:08:27"} +{"current_steps": 5541, "total_steps": 8674, "loss": 0.3162837326526642, "lr": 6.328682225604491e-07, "epoch": 1.2776112520175236, "percentage": 63.88, "elapsed_time": "3:47:07", "remaining_time": "2:08:25"} +{"current_steps": 5542, "total_steps": 8674, "loss": 0.48594871163368225, "lr": 6.325136118844504e-07, "epoch": 1.2778418261471063, "percentage": 63.89, "elapsed_time": "3:47:09", "remaining_time": "2:08:22"} +{"current_steps": 5543, "total_steps": 8674, "loss": 0.4346798360347748, "lr": 6.321590546263231e-07, "epoch": 1.278072400276689, "percentage": 63.9, "elapsed_time": "3:47:12", "remaining_time": "2:08:20"} +{"current_steps": 5544, "total_steps": 8674, "loss": 0.5133204460144043, "lr": 6.318045508376046e-07, "epoch": 1.2783029744062717, "percentage": 63.92, "elapsed_time": "3:47:14", "remaining_time": "2:08:17"} +{"current_steps": 5545, "total_steps": 8674, "loss": 0.40679338574409485, "lr": 6.314501005698266e-07, "epoch": 1.2785335485358542, "percentage": 63.93, "elapsed_time": "3:47:16", "remaining_time": "2:08:15"} +{"current_steps": 5546, "total_steps": 8674, "loss": 0.363874614238739, "lr": 6.310957038745117e-07, "epoch": 1.278764122665437, "percentage": 63.94, "elapsed_time": "3:47:19", "remaining_time": "2:08:12"} +{"current_steps": 5547, "total_steps": 8674, "loss": 0.43020665645599365, "lr": 6.307413608031746e-07, "epoch": 1.2789946967950196, "percentage": 63.95, "elapsed_time": "3:47:21", "remaining_time": "2:08:10"} +{"current_steps": 5548, "total_steps": 8674, "loss": 0.5280083417892456, "lr": 6.303870714073233e-07, "epoch": 1.2792252709246021, "percentage": 63.96, "elapsed_time": "3:47:24", "remaining_time": "2:08:07"} +{"current_steps": 5549, "total_steps": 8674, "loss": 0.4584185481071472, "lr": 6.300328357384568e-07, "epoch": 1.2794558450541849, "percentage": 63.97, "elapsed_time": "3:47:26", "remaining_time": "2:08:05"} +{"current_steps": 5550, "total_steps": 8674, "loss": 0.4068162441253662, "lr": 6.296786538480675e-07, "epoch": 1.2796864191837676, "percentage": 63.98, "elapsed_time": "3:47:29", "remaining_time": "2:08:02"} +{"current_steps": 5551, "total_steps": 8674, "loss": 0.4336085915565491, "lr": 6.293245257876387e-07, "epoch": 1.2799169933133503, "percentage": 64.0, "elapsed_time": "3:47:31", "remaining_time": "2:08:00"} +{"current_steps": 5552, "total_steps": 8674, "loss": 0.4932886064052582, "lr": 6.289704516086468e-07, "epoch": 1.280147567442933, "percentage": 64.01, "elapsed_time": "3:47:34", "remaining_time": "2:07:57"} +{"current_steps": 5553, "total_steps": 8674, "loss": 0.437292218208313, "lr": 6.2861643136256e-07, "epoch": 1.2803781415725155, "percentage": 64.02, "elapsed_time": "3:47:36", "remaining_time": "2:07:55"} +{"current_steps": 5554, "total_steps": 8674, "loss": 0.4131085276603699, "lr": 6.28262465100839e-07, "epoch": 1.2806087157020982, "percentage": 64.03, "elapsed_time": "3:47:38", "remaining_time": "2:07:53"} +{"current_steps": 5555, "total_steps": 8674, "loss": 0.4146266579627991, "lr": 6.27908552874936e-07, "epoch": 1.280839289831681, "percentage": 64.04, "elapsed_time": "3:47:41", "remaining_time": "2:07:50"} +{"current_steps": 5556, "total_steps": 8674, "loss": 0.4778539538383484, "lr": 6.275546947362957e-07, "epoch": 1.2810698639612634, "percentage": 64.05, "elapsed_time": "3:47:43", "remaining_time": "2:07:48"} +{"current_steps": 5557, "total_steps": 8674, "loss": 0.3989019989967346, "lr": 6.272008907363555e-07, "epoch": 1.2813004380908461, "percentage": 64.07, "elapsed_time": "3:47:46", "remaining_time": "2:07:45"} +{"current_steps": 5558, "total_steps": 8674, "loss": 0.4433528184890747, "lr": 6.268471409265436e-07, "epoch": 1.2815310122204289, "percentage": 64.08, "elapsed_time": "3:47:48", "remaining_time": "2:07:43"} +{"current_steps": 5559, "total_steps": 8674, "loss": 0.46929931640625, "lr": 6.264934453582817e-07, "epoch": 1.2817615863500116, "percentage": 64.09, "elapsed_time": "3:47:51", "remaining_time": "2:07:40"} +{"current_steps": 5560, "total_steps": 8674, "loss": 0.4908202886581421, "lr": 6.261398040829829e-07, "epoch": 1.2819921604795943, "percentage": 64.1, "elapsed_time": "3:47:53", "remaining_time": "2:07:38"} +{"current_steps": 5561, "total_steps": 8674, "loss": 0.44195377826690674, "lr": 6.257862171520528e-07, "epoch": 1.2822227346091768, "percentage": 64.11, "elapsed_time": "3:47:56", "remaining_time": "2:07:35"} +{"current_steps": 5562, "total_steps": 8674, "loss": 0.548696756362915, "lr": 6.254326846168882e-07, "epoch": 1.2824533087387595, "percentage": 64.12, "elapsed_time": "3:47:58", "remaining_time": "2:07:33"} +{"current_steps": 5563, "total_steps": 8674, "loss": 0.4015994668006897, "lr": 6.250792065288794e-07, "epoch": 1.2826838828683422, "percentage": 64.13, "elapsed_time": "3:48:01", "remaining_time": "2:07:30"} +{"current_steps": 5564, "total_steps": 8674, "loss": 0.4281688928604126, "lr": 6.247257829394074e-07, "epoch": 1.2829144569979247, "percentage": 64.15, "elapsed_time": "3:48:03", "remaining_time": "2:07:28"} +{"current_steps": 5565, "total_steps": 8674, "loss": 0.37623634934425354, "lr": 6.243724138998462e-07, "epoch": 1.2831450311275074, "percentage": 64.16, "elapsed_time": "3:48:06", "remaining_time": "2:07:25"} +{"current_steps": 5566, "total_steps": 8674, "loss": 0.4753819704055786, "lr": 6.240190994615617e-07, "epoch": 1.2833756052570902, "percentage": 64.17, "elapsed_time": "3:48:08", "remaining_time": "2:07:23"} +{"current_steps": 5567, "total_steps": 8674, "loss": 0.4584893584251404, "lr": 6.236658396759111e-07, "epoch": 1.2836061793866729, "percentage": 64.18, "elapsed_time": "3:48:10", "remaining_time": "2:07:20"} +{"current_steps": 5568, "total_steps": 8674, "loss": 0.445067435503006, "lr": 6.23312634594245e-07, "epoch": 1.2838367535162556, "percentage": 64.19, "elapsed_time": "3:48:13", "remaining_time": "2:07:18"} +{"current_steps": 5569, "total_steps": 8674, "loss": 0.4209640920162201, "lr": 6.229594842679049e-07, "epoch": 1.284067327645838, "percentage": 64.2, "elapsed_time": "3:48:15", "remaining_time": "2:07:15"} +{"current_steps": 5570, "total_steps": 8674, "loss": 0.34620141983032227, "lr": 6.226063887482254e-07, "epoch": 1.2842979017754208, "percentage": 64.21, "elapsed_time": "3:48:17", "remaining_time": "2:07:13"} +{"current_steps": 5571, "total_steps": 8674, "loss": 0.43683767318725586, "lr": 6.222533480865315e-07, "epoch": 1.2845284759050035, "percentage": 64.23, "elapsed_time": "3:48:20", "remaining_time": "2:07:10"} +{"current_steps": 5572, "total_steps": 8674, "loss": 0.45881450176239014, "lr": 6.219003623341421e-07, "epoch": 1.284759050034586, "percentage": 64.24, "elapsed_time": "3:48:22", "remaining_time": "2:07:08"} +{"current_steps": 5573, "total_steps": 8674, "loss": 0.40115928649902344, "lr": 6.215474315423667e-07, "epoch": 1.2849896241641687, "percentage": 64.25, "elapsed_time": "3:48:25", "remaining_time": "2:07:05"} +{"current_steps": 5574, "total_steps": 8674, "loss": 0.4181373119354248, "lr": 6.211945557625082e-07, "epoch": 1.2852201982937514, "percentage": 64.26, "elapsed_time": "3:48:27", "remaining_time": "2:07:03"} +{"current_steps": 5575, "total_steps": 8674, "loss": 0.4743300676345825, "lr": 6.208417350458598e-07, "epoch": 1.2854507724233342, "percentage": 64.27, "elapsed_time": "3:48:30", "remaining_time": "2:07:01"} +{"current_steps": 5576, "total_steps": 8674, "loss": 0.4236707091331482, "lr": 6.204889694437077e-07, "epoch": 1.2856813465529169, "percentage": 64.28, "elapsed_time": "3:48:32", "remaining_time": "2:06:58"} +{"current_steps": 5577, "total_steps": 8674, "loss": 0.4105497896671295, "lr": 6.201362590073305e-07, "epoch": 1.2859119206824994, "percentage": 64.3, "elapsed_time": "3:48:34", "remaining_time": "2:06:56"} +{"current_steps": 5578, "total_steps": 8674, "loss": 0.4164474606513977, "lr": 6.197836037879973e-07, "epoch": 1.286142494812082, "percentage": 64.31, "elapsed_time": "3:48:37", "remaining_time": "2:06:53"} +{"current_steps": 5579, "total_steps": 8674, "loss": 0.49809616804122925, "lr": 6.19431003836971e-07, "epoch": 1.2863730689416648, "percentage": 64.32, "elapsed_time": "3:48:39", "remaining_time": "2:06:51"} +{"current_steps": 5580, "total_steps": 8674, "loss": 0.4902994632720947, "lr": 6.19078459205505e-07, "epoch": 1.2866036430712473, "percentage": 64.33, "elapsed_time": "3:48:42", "remaining_time": "2:06:48"} +{"current_steps": 5581, "total_steps": 8674, "loss": 0.3697085380554199, "lr": 6.18725969944846e-07, "epoch": 1.28683421720083, "percentage": 64.34, "elapsed_time": "3:48:44", "remaining_time": "2:06:46"} +{"current_steps": 5582, "total_steps": 8674, "loss": 0.446627140045166, "lr": 6.183735361062309e-07, "epoch": 1.2870647913304127, "percentage": 64.35, "elapsed_time": "3:48:47", "remaining_time": "2:06:43"} +{"current_steps": 5583, "total_steps": 8674, "loss": 0.39521220326423645, "lr": 6.180211577408901e-07, "epoch": 1.2872953654599955, "percentage": 64.36, "elapsed_time": "3:48:49", "remaining_time": "2:06:41"} +{"current_steps": 5584, "total_steps": 8674, "loss": 0.6308573484420776, "lr": 6.176688349000452e-07, "epoch": 1.2875259395895782, "percentage": 64.38, "elapsed_time": "3:48:52", "remaining_time": "2:06:38"} +{"current_steps": 5585, "total_steps": 8674, "loss": 0.4558343291282654, "lr": 6.173165676349102e-07, "epoch": 1.2877565137191607, "percentage": 64.39, "elapsed_time": "3:48:54", "remaining_time": "2:06:36"} +{"current_steps": 5586, "total_steps": 8674, "loss": 0.5487015247344971, "lr": 6.169643559966906e-07, "epoch": 1.2879870878487434, "percentage": 64.4, "elapsed_time": "3:48:57", "remaining_time": "2:06:34"} +{"current_steps": 5587, "total_steps": 8674, "loss": 0.39074039459228516, "lr": 6.166122000365834e-07, "epoch": 1.288217661978326, "percentage": 64.41, "elapsed_time": "3:48:59", "remaining_time": "2:06:31"} +{"current_steps": 5588, "total_steps": 8674, "loss": 0.5136120915412903, "lr": 6.162600998057787e-07, "epoch": 1.2884482361079086, "percentage": 64.42, "elapsed_time": "3:49:01", "remaining_time": "2:06:29"} +{"current_steps": 5589, "total_steps": 8674, "loss": 0.5344336628913879, "lr": 6.159080553554572e-07, "epoch": 1.2886788102374913, "percentage": 64.43, "elapsed_time": "3:49:04", "remaining_time": "2:06:26"} +{"current_steps": 5590, "total_steps": 8674, "loss": 0.5204205513000488, "lr": 6.15556066736793e-07, "epoch": 1.288909384367074, "percentage": 64.45, "elapsed_time": "3:49:06", "remaining_time": "2:06:24"} +{"current_steps": 5591, "total_steps": 8674, "loss": 0.4768211245536804, "lr": 6.152041340009504e-07, "epoch": 1.2891399584966567, "percentage": 64.46, "elapsed_time": "3:49:09", "remaining_time": "2:06:21"} +{"current_steps": 5592, "total_steps": 8674, "loss": 0.44098299741744995, "lr": 6.148522571990868e-07, "epoch": 1.2893705326262395, "percentage": 64.47, "elapsed_time": "3:49:11", "remaining_time": "2:06:19"} +{"current_steps": 5593, "total_steps": 8674, "loss": 0.5038055181503296, "lr": 6.145004363823509e-07, "epoch": 1.289601106755822, "percentage": 64.48, "elapsed_time": "3:49:14", "remaining_time": "2:06:16"} +{"current_steps": 5594, "total_steps": 8674, "loss": 0.417998343706131, "lr": 6.141486716018837e-07, "epoch": 1.2898316808854047, "percentage": 64.49, "elapsed_time": "3:49:16", "remaining_time": "2:06:14"} +{"current_steps": 5595, "total_steps": 8674, "loss": 0.48858124017715454, "lr": 6.137969629088174e-07, "epoch": 1.2900622550149874, "percentage": 64.5, "elapsed_time": "3:49:19", "remaining_time": "2:06:11"} +{"current_steps": 5596, "total_steps": 8674, "loss": 0.46988582611083984, "lr": 6.134453103542765e-07, "epoch": 1.2902928291445699, "percentage": 64.51, "elapsed_time": "3:49:21", "remaining_time": "2:06:09"} +{"current_steps": 5597, "total_steps": 8674, "loss": 0.5100589394569397, "lr": 6.130937139893779e-07, "epoch": 1.2905234032741526, "percentage": 64.53, "elapsed_time": "3:49:23", "remaining_time": "2:06:06"} +{"current_steps": 5598, "total_steps": 8674, "loss": 0.490558922290802, "lr": 6.127421738652286e-07, "epoch": 1.2907539774037353, "percentage": 64.54, "elapsed_time": "3:49:26", "remaining_time": "2:06:04"} +{"current_steps": 5599, "total_steps": 8674, "loss": 0.4749597907066345, "lr": 6.123906900329291e-07, "epoch": 1.290984551533318, "percentage": 64.55, "elapsed_time": "3:49:28", "remaining_time": "2:06:01"} +{"current_steps": 5600, "total_steps": 8674, "loss": 0.5006792545318604, "lr": 6.12039262543571e-07, "epoch": 1.2912151256629008, "percentage": 64.56, "elapsed_time": "3:49:31", "remaining_time": "2:05:59"} +{"current_steps": 5601, "total_steps": 8674, "loss": 0.46902909874916077, "lr": 6.116878914482384e-07, "epoch": 1.2914456997924832, "percentage": 64.57, "elapsed_time": "3:49:35", "remaining_time": "2:05:57"} +{"current_steps": 5602, "total_steps": 8674, "loss": 0.46765559911727905, "lr": 6.113365767980059e-07, "epoch": 1.291676273922066, "percentage": 64.58, "elapsed_time": "3:49:37", "remaining_time": "2:05:55"} +{"current_steps": 5603, "total_steps": 8674, "loss": 0.45960646867752075, "lr": 6.10985318643941e-07, "epoch": 1.2919068480516487, "percentage": 64.6, "elapsed_time": "3:49:39", "remaining_time": "2:05:52"} +{"current_steps": 5604, "total_steps": 8674, "loss": 0.4067912697792053, "lr": 6.106341170371024e-07, "epoch": 1.2921374221812312, "percentage": 64.61, "elapsed_time": "3:49:42", "remaining_time": "2:05:50"} +{"current_steps": 5605, "total_steps": 8674, "loss": 0.45004114508628845, "lr": 6.102829720285414e-07, "epoch": 1.292367996310814, "percentage": 64.62, "elapsed_time": "3:49:44", "remaining_time": "2:05:47"} +{"current_steps": 5606, "total_steps": 8674, "loss": 0.5086014270782471, "lr": 6.099318836692999e-07, "epoch": 1.2925985704403966, "percentage": 64.63, "elapsed_time": "3:49:47", "remaining_time": "2:05:45"} +{"current_steps": 5607, "total_steps": 8674, "loss": 0.49985191226005554, "lr": 6.095808520104122e-07, "epoch": 1.2928291445699793, "percentage": 64.64, "elapsed_time": "3:49:49", "remaining_time": "2:05:43"} +{"current_steps": 5608, "total_steps": 8674, "loss": 0.5066381096839905, "lr": 6.092298771029047e-07, "epoch": 1.293059718699562, "percentage": 64.65, "elapsed_time": "3:49:52", "remaining_time": "2:05:40"} +{"current_steps": 5609, "total_steps": 8674, "loss": 0.49626559019088745, "lr": 6.088789589977947e-07, "epoch": 1.2932902928291445, "percentage": 64.66, "elapsed_time": "3:49:54", "remaining_time": "2:05:38"} +{"current_steps": 5610, "total_steps": 8674, "loss": 0.4837498962879181, "lr": 6.085280977460921e-07, "epoch": 1.2935208669587273, "percentage": 64.68, "elapsed_time": "3:49:57", "remaining_time": "2:05:35"} +{"current_steps": 5611, "total_steps": 8674, "loss": 0.41308102011680603, "lr": 6.081772933987977e-07, "epoch": 1.29375144108831, "percentage": 64.69, "elapsed_time": "3:49:59", "remaining_time": "2:05:33"} +{"current_steps": 5612, "total_steps": 8674, "loss": 0.4453086853027344, "lr": 6.078265460069048e-07, "epoch": 1.2939820152178925, "percentage": 64.7, "elapsed_time": "3:50:02", "remaining_time": "2:05:30"} +{"current_steps": 5613, "total_steps": 8674, "loss": 0.4700174927711487, "lr": 6.074758556213976e-07, "epoch": 1.2942125893474752, "percentage": 64.71, "elapsed_time": "3:50:04", "remaining_time": "2:05:28"} +{"current_steps": 5614, "total_steps": 8674, "loss": 0.578227162361145, "lr": 6.071252222932537e-07, "epoch": 1.294443163477058, "percentage": 64.72, "elapsed_time": "3:50:07", "remaining_time": "2:05:25"} +{"current_steps": 5615, "total_steps": 8674, "loss": 0.36468571424484253, "lr": 6.067746460734398e-07, "epoch": 1.2946737376066406, "percentage": 64.73, "elapsed_time": "3:50:09", "remaining_time": "2:05:23"} +{"current_steps": 5616, "total_steps": 8674, "loss": 0.4793199896812439, "lr": 6.064241270129166e-07, "epoch": 1.2949043117362231, "percentage": 64.75, "elapsed_time": "3:50:12", "remaining_time": "2:05:20"} +{"current_steps": 5617, "total_steps": 8674, "loss": 0.40342214703559875, "lr": 6.060736651626355e-07, "epoch": 1.2951348858658058, "percentage": 64.76, "elapsed_time": "3:50:14", "remaining_time": "2:05:18"} +{"current_steps": 5618, "total_steps": 8674, "loss": 0.4212435185909271, "lr": 6.05723260573539e-07, "epoch": 1.2953654599953885, "percentage": 64.77, "elapsed_time": "3:50:17", "remaining_time": "2:05:16"} +{"current_steps": 5619, "total_steps": 8674, "loss": 0.44668713212013245, "lr": 6.053729132965626e-07, "epoch": 1.295596034124971, "percentage": 64.78, "elapsed_time": "3:50:19", "remaining_time": "2:05:13"} +{"current_steps": 5620, "total_steps": 8674, "loss": 0.5159831643104553, "lr": 6.050226233826326e-07, "epoch": 1.2958266082545538, "percentage": 64.79, "elapsed_time": "3:50:21", "remaining_time": "2:05:11"} +{"current_steps": 5621, "total_steps": 8674, "loss": 0.5091866850852966, "lr": 6.046723908826676e-07, "epoch": 1.2960571823841365, "percentage": 64.8, "elapsed_time": "3:50:24", "remaining_time": "2:05:08"} +{"current_steps": 5622, "total_steps": 8674, "loss": 0.34838563203811646, "lr": 6.043222158475767e-07, "epoch": 1.2962877565137192, "percentage": 64.81, "elapsed_time": "3:50:26", "remaining_time": "2:05:06"} +{"current_steps": 5623, "total_steps": 8674, "loss": 0.46576952934265137, "lr": 6.039720983282621e-07, "epoch": 1.296518330643302, "percentage": 64.83, "elapsed_time": "3:50:29", "remaining_time": "2:05:03"} +{"current_steps": 5624, "total_steps": 8674, "loss": 0.4971234202384949, "lr": 6.036220383756163e-07, "epoch": 1.2967489047728844, "percentage": 64.84, "elapsed_time": "3:50:32", "remaining_time": "2:05:01"} +{"current_steps": 5625, "total_steps": 8674, "loss": 0.4792482256889343, "lr": 6.03272036040525e-07, "epoch": 1.2969794789024671, "percentage": 64.85, "elapsed_time": "3:50:34", "remaining_time": "2:04:58"} +{"current_steps": 5626, "total_steps": 8674, "loss": 0.45584213733673096, "lr": 6.029220913738636e-07, "epoch": 1.2972100530320498, "percentage": 64.86, "elapsed_time": "3:50:36", "remaining_time": "2:04:56"} +{"current_steps": 5627, "total_steps": 8674, "loss": 0.5094096064567566, "lr": 6.025722044265004e-07, "epoch": 1.2974406271616323, "percentage": 64.87, "elapsed_time": "3:50:39", "remaining_time": "2:04:53"} +{"current_steps": 5628, "total_steps": 8674, "loss": 0.33178865909576416, "lr": 6.022223752492954e-07, "epoch": 1.297671201291215, "percentage": 64.88, "elapsed_time": "3:50:41", "remaining_time": "2:04:51"} +{"current_steps": 5629, "total_steps": 8674, "loss": 0.4955121874809265, "lr": 6.018726038930991e-07, "epoch": 1.2979017754207978, "percentage": 64.9, "elapsed_time": "3:50:44", "remaining_time": "2:04:49"} +{"current_steps": 5630, "total_steps": 8674, "loss": 0.46253639459609985, "lr": 6.01522890408755e-07, "epoch": 1.2981323495503805, "percentage": 64.91, "elapsed_time": "3:50:46", "remaining_time": "2:04:46"} +{"current_steps": 5631, "total_steps": 8674, "loss": 0.4760236442089081, "lr": 6.011732348470971e-07, "epoch": 1.2983629236799632, "percentage": 64.92, "elapsed_time": "3:50:49", "remaining_time": "2:04:44"} +{"current_steps": 5632, "total_steps": 8674, "loss": 0.44413092732429504, "lr": 6.008236372589516e-07, "epoch": 1.2985934978095457, "percentage": 64.93, "elapsed_time": "3:50:51", "remaining_time": "2:04:41"} +{"current_steps": 5633, "total_steps": 8674, "loss": 0.5431559681892395, "lr": 6.004740976951358e-07, "epoch": 1.2988240719391284, "percentage": 64.94, "elapsed_time": "3:50:54", "remaining_time": "2:04:39"} +{"current_steps": 5634, "total_steps": 8674, "loss": 0.41276806592941284, "lr": 6.001246162064592e-07, "epoch": 1.2990546460687111, "percentage": 64.95, "elapsed_time": "3:50:56", "remaining_time": "2:04:36"} +{"current_steps": 5635, "total_steps": 8674, "loss": 0.3998986482620239, "lr": 5.997751928437219e-07, "epoch": 1.2992852201982936, "percentage": 64.96, "elapsed_time": "3:50:59", "remaining_time": "2:04:34"} +{"current_steps": 5636, "total_steps": 8674, "loss": 0.47741782665252686, "lr": 5.994258276577169e-07, "epoch": 1.2995157943278763, "percentage": 64.98, "elapsed_time": "3:51:01", "remaining_time": "2:04:31"} +{"current_steps": 5637, "total_steps": 8674, "loss": 0.4294115900993347, "lr": 5.990765206992277e-07, "epoch": 1.299746368457459, "percentage": 64.99, "elapsed_time": "3:51:03", "remaining_time": "2:04:29"} +{"current_steps": 5638, "total_steps": 8674, "loss": 0.4717773199081421, "lr": 5.987272720190288e-07, "epoch": 1.2999769425870418, "percentage": 65.0, "elapsed_time": "3:51:06", "remaining_time": "2:04:26"} +{"current_steps": 5639, "total_steps": 8674, "loss": 0.5169499516487122, "lr": 5.983780816678881e-07, "epoch": 1.3002075167166245, "percentage": 65.01, "elapsed_time": "3:51:08", "remaining_time": "2:04:24"} +{"current_steps": 5640, "total_steps": 8674, "loss": 0.3796359598636627, "lr": 5.980289496965634e-07, "epoch": 1.300438090846207, "percentage": 65.02, "elapsed_time": "3:51:11", "remaining_time": "2:04:21"} +{"current_steps": 5641, "total_steps": 8674, "loss": 0.44377613067626953, "lr": 5.976798761558048e-07, "epoch": 1.3006686649757897, "percentage": 65.03, "elapsed_time": "3:51:13", "remaining_time": "2:04:19"} +{"current_steps": 5642, "total_steps": 8674, "loss": 0.46863383054733276, "lr": 5.973308610963534e-07, "epoch": 1.3008992391053724, "percentage": 65.04, "elapsed_time": "3:51:16", "remaining_time": "2:04:17"} +{"current_steps": 5643, "total_steps": 8674, "loss": 0.5437184572219849, "lr": 5.969819045689426e-07, "epoch": 1.301129813234955, "percentage": 65.06, "elapsed_time": "3:51:18", "remaining_time": "2:04:14"} +{"current_steps": 5644, "total_steps": 8674, "loss": 0.4487720727920532, "lr": 5.96633006624296e-07, "epoch": 1.3013603873645376, "percentage": 65.07, "elapsed_time": "3:51:21", "remaining_time": "2:04:12"} +{"current_steps": 5645, "total_steps": 8674, "loss": 0.42834270000457764, "lr": 5.962841673131305e-07, "epoch": 1.3015909614941203, "percentage": 65.08, "elapsed_time": "3:51:23", "remaining_time": "2:04:09"} +{"current_steps": 5646, "total_steps": 8674, "loss": 0.5242533087730408, "lr": 5.959353866861525e-07, "epoch": 1.301821535623703, "percentage": 65.09, "elapsed_time": "3:51:26", "remaining_time": "2:04:07"} +{"current_steps": 5647, "total_steps": 8674, "loss": 0.4529950022697449, "lr": 5.955866647940609e-07, "epoch": 1.3020521097532858, "percentage": 65.1, "elapsed_time": "3:51:28", "remaining_time": "2:04:04"} +{"current_steps": 5648, "total_steps": 8674, "loss": 0.41109561920166016, "lr": 5.952380016875465e-07, "epoch": 1.3022826838828683, "percentage": 65.11, "elapsed_time": "3:51:30", "remaining_time": "2:04:02"} +{"current_steps": 5649, "total_steps": 8674, "loss": 0.5468418598175049, "lr": 5.948893974172904e-07, "epoch": 1.302513258012451, "percentage": 65.13, "elapsed_time": "3:51:33", "remaining_time": "2:03:59"} +{"current_steps": 5650, "total_steps": 8674, "loss": 0.4594927430152893, "lr": 5.945408520339663e-07, "epoch": 1.3027438321420337, "percentage": 65.14, "elapsed_time": "3:51:35", "remaining_time": "2:03:57"} +{"current_steps": 5651, "total_steps": 8674, "loss": 0.5011999011039734, "lr": 5.941923655882383e-07, "epoch": 1.3029744062716162, "percentage": 65.15, "elapsed_time": "3:51:38", "remaining_time": "2:03:54"} +{"current_steps": 5652, "total_steps": 8674, "loss": 0.519101083278656, "lr": 5.938439381307632e-07, "epoch": 1.303204980401199, "percentage": 65.16, "elapsed_time": "3:51:40", "remaining_time": "2:03:52"} +{"current_steps": 5653, "total_steps": 8674, "loss": 0.521979570388794, "lr": 5.934955697121875e-07, "epoch": 1.3034355545307816, "percentage": 65.17, "elapsed_time": "3:51:43", "remaining_time": "2:03:50"} +{"current_steps": 5654, "total_steps": 8674, "loss": 0.5969122648239136, "lr": 5.931472603831507e-07, "epoch": 1.3036661286603644, "percentage": 65.18, "elapsed_time": "3:51:45", "remaining_time": "2:03:47"} +{"current_steps": 5655, "total_steps": 8674, "loss": 0.47013232111930847, "lr": 5.927990101942826e-07, "epoch": 1.303896702789947, "percentage": 65.19, "elapsed_time": "3:51:48", "remaining_time": "2:03:45"} +{"current_steps": 5656, "total_steps": 8674, "loss": 0.4135271906852722, "lr": 5.924508191962059e-07, "epoch": 1.3041272769195296, "percentage": 65.21, "elapsed_time": "3:51:50", "remaining_time": "2:03:42"} +{"current_steps": 5657, "total_steps": 8674, "loss": 0.45639151334762573, "lr": 5.921026874395327e-07, "epoch": 1.3043578510491123, "percentage": 65.22, "elapsed_time": "3:51:52", "remaining_time": "2:03:40"} +{"current_steps": 5658, "total_steps": 8674, "loss": 0.4047633409500122, "lr": 5.917546149748676e-07, "epoch": 1.304588425178695, "percentage": 65.23, "elapsed_time": "3:51:55", "remaining_time": "2:03:37"} +{"current_steps": 5659, "total_steps": 8674, "loss": 0.4352290630340576, "lr": 5.91406601852807e-07, "epoch": 1.3048189993082775, "percentage": 65.24, "elapsed_time": "3:51:57", "remaining_time": "2:03:35"} +{"current_steps": 5660, "total_steps": 8674, "loss": 0.4912130534648895, "lr": 5.910586481239375e-07, "epoch": 1.3050495734378602, "percentage": 65.25, "elapsed_time": "3:52:00", "remaining_time": "2:03:32"} +{"current_steps": 5661, "total_steps": 8674, "loss": 0.4114433526992798, "lr": 5.907107538388383e-07, "epoch": 1.305280147567443, "percentage": 65.26, "elapsed_time": "3:52:02", "remaining_time": "2:03:30"} +{"current_steps": 5662, "total_steps": 8674, "loss": 0.4230955243110657, "lr": 5.903629190480786e-07, "epoch": 1.3055107216970256, "percentage": 65.28, "elapsed_time": "3:52:05", "remaining_time": "2:03:27"} +{"current_steps": 5663, "total_steps": 8674, "loss": 0.5020648241043091, "lr": 5.900151438022205e-07, "epoch": 1.3057412958266084, "percentage": 65.29, "elapsed_time": "3:52:07", "remaining_time": "2:03:25"} +{"current_steps": 5664, "total_steps": 8674, "loss": 0.48636388778686523, "lr": 5.89667428151816e-07, "epoch": 1.3059718699561909, "percentage": 65.3, "elapsed_time": "3:52:09", "remaining_time": "2:03:22"} +{"current_steps": 5665, "total_steps": 8674, "loss": 0.412000447511673, "lr": 5.893197721474099e-07, "epoch": 1.3062024440857736, "percentage": 65.31, "elapsed_time": "3:52:12", "remaining_time": "2:03:20"} +{"current_steps": 5666, "total_steps": 8674, "loss": 0.3584952652454376, "lr": 5.889721758395369e-07, "epoch": 1.3064330182153563, "percentage": 65.32, "elapsed_time": "3:52:14", "remaining_time": "2:03:17"} +{"current_steps": 5667, "total_steps": 8674, "loss": 0.4538918733596802, "lr": 5.886246392787234e-07, "epoch": 1.3066635923449388, "percentage": 65.33, "elapsed_time": "3:52:17", "remaining_time": "2:03:15"} +{"current_steps": 5668, "total_steps": 8674, "loss": 0.478498637676239, "lr": 5.882771625154883e-07, "epoch": 1.3068941664745215, "percentage": 65.34, "elapsed_time": "3:52:19", "remaining_time": "2:03:12"} +{"current_steps": 5669, "total_steps": 8674, "loss": 0.49535906314849854, "lr": 5.879297456003398e-07, "epoch": 1.3071247406041042, "percentage": 65.36, "elapsed_time": "3:52:22", "remaining_time": "2:03:10"} +{"current_steps": 5670, "total_steps": 8674, "loss": 0.48975661396980286, "lr": 5.875823885837793e-07, "epoch": 1.307355314733687, "percentage": 65.37, "elapsed_time": "3:52:24", "remaining_time": "2:03:07"} +{"current_steps": 5671, "total_steps": 8674, "loss": 0.4870087802410126, "lr": 5.87235091516298e-07, "epoch": 1.3075858888632697, "percentage": 65.38, "elapsed_time": "3:52:26", "remaining_time": "2:03:05"} +{"current_steps": 5672, "total_steps": 8674, "loss": 0.43411481380462646, "lr": 5.8688785444838e-07, "epoch": 1.3078164629928521, "percentage": 65.39, "elapsed_time": "3:52:29", "remaining_time": "2:03:02"} +{"current_steps": 5673, "total_steps": 8674, "loss": 0.5108835697174072, "lr": 5.865406774304986e-07, "epoch": 1.3080470371224349, "percentage": 65.4, "elapsed_time": "3:52:31", "remaining_time": "2:03:00"} +{"current_steps": 5674, "total_steps": 8674, "loss": 0.47449198365211487, "lr": 5.861935605131202e-07, "epoch": 1.3082776112520176, "percentage": 65.41, "elapsed_time": "3:52:34", "remaining_time": "2:02:58"} +{"current_steps": 5675, "total_steps": 8674, "loss": 0.5550234913825989, "lr": 5.858465037467014e-07, "epoch": 1.3085081853816, "percentage": 65.43, "elapsed_time": "3:52:36", "remaining_time": "2:02:55"} +{"current_steps": 5676, "total_steps": 8674, "loss": 0.4548208713531494, "lr": 5.854995071816911e-07, "epoch": 1.3087387595111828, "percentage": 65.44, "elapsed_time": "3:52:39", "remaining_time": "2:02:53"} +{"current_steps": 5677, "total_steps": 8674, "loss": 0.5176935195922852, "lr": 5.851525708685279e-07, "epoch": 1.3089693336407655, "percentage": 65.45, "elapsed_time": "3:52:41", "remaining_time": "2:02:50"} +{"current_steps": 5678, "total_steps": 8674, "loss": 0.4460016191005707, "lr": 5.848056948576428e-07, "epoch": 1.3091999077703482, "percentage": 65.46, "elapsed_time": "3:52:43", "remaining_time": "2:02:48"} +{"current_steps": 5679, "total_steps": 8674, "loss": 0.5344464182853699, "lr": 5.84458879199458e-07, "epoch": 1.309430481899931, "percentage": 65.47, "elapsed_time": "3:52:46", "remaining_time": "2:02:45"} +{"current_steps": 5680, "total_steps": 8674, "loss": 0.48601672053337097, "lr": 5.841121239443863e-07, "epoch": 1.3096610560295134, "percentage": 65.48, "elapsed_time": "3:52:48", "remaining_time": "2:02:43"} +{"current_steps": 5681, "total_steps": 8674, "loss": 0.46849286556243896, "lr": 5.837654291428327e-07, "epoch": 1.3098916301590962, "percentage": 65.49, "elapsed_time": "3:52:51", "remaining_time": "2:02:40"} +{"current_steps": 5682, "total_steps": 8674, "loss": 0.4353019893169403, "lr": 5.834187948451918e-07, "epoch": 1.3101222042886789, "percentage": 65.51, "elapsed_time": "3:52:53", "remaining_time": "2:02:38"} +{"current_steps": 5683, "total_steps": 8674, "loss": 0.5345665812492371, "lr": 5.830722211018516e-07, "epoch": 1.3103527784182614, "percentage": 65.52, "elapsed_time": "3:52:56", "remaining_time": "2:02:35"} +{"current_steps": 5684, "total_steps": 8674, "loss": 0.4060036540031433, "lr": 5.827257079631886e-07, "epoch": 1.310583352547844, "percentage": 65.53, "elapsed_time": "3:52:58", "remaining_time": "2:02:33"} +{"current_steps": 5685, "total_steps": 8674, "loss": 0.43724536895751953, "lr": 5.823792554795738e-07, "epoch": 1.3108139266774268, "percentage": 65.54, "elapsed_time": "3:53:01", "remaining_time": "2:02:30"} +{"current_steps": 5686, "total_steps": 8674, "loss": 0.4600690007209778, "lr": 5.820328637013665e-07, "epoch": 1.3110445008070095, "percentage": 65.55, "elapsed_time": "3:53:03", "remaining_time": "2:02:28"} +{"current_steps": 5687, "total_steps": 8674, "loss": 0.4352531433105469, "lr": 5.816865326789182e-07, "epoch": 1.3112750749365922, "percentage": 65.56, "elapsed_time": "3:53:05", "remaining_time": "2:02:25"} +{"current_steps": 5688, "total_steps": 8674, "loss": 0.39384984970092773, "lr": 5.813402624625722e-07, "epoch": 1.3115056490661747, "percentage": 65.58, "elapsed_time": "3:53:08", "remaining_time": "2:02:23"} +{"current_steps": 5689, "total_steps": 8674, "loss": 0.44367098808288574, "lr": 5.809940531026616e-07, "epoch": 1.3117362231957574, "percentage": 65.59, "elapsed_time": "3:53:10", "remaining_time": "2:02:20"} +{"current_steps": 5690, "total_steps": 8674, "loss": 0.4757416546344757, "lr": 5.806479046495123e-07, "epoch": 1.3119667973253402, "percentage": 65.6, "elapsed_time": "3:53:13", "remaining_time": "2:02:18"} +{"current_steps": 5691, "total_steps": 8674, "loss": 0.521708607673645, "lr": 5.803018171534396e-07, "epoch": 1.3121973714549227, "percentage": 65.61, "elapsed_time": "3:53:15", "remaining_time": "2:02:16"} +{"current_steps": 5692, "total_steps": 8674, "loss": 0.4127439260482788, "lr": 5.799557906647514e-07, "epoch": 1.3124279455845054, "percentage": 65.62, "elapsed_time": "3:53:18", "remaining_time": "2:02:13"} +{"current_steps": 5693, "total_steps": 8674, "loss": 0.4809693396091461, "lr": 5.79609825233746e-07, "epoch": 1.312658519714088, "percentage": 65.63, "elapsed_time": "3:53:20", "remaining_time": "2:02:11"} +{"current_steps": 5694, "total_steps": 8674, "loss": 0.5075684189796448, "lr": 5.792639209107134e-07, "epoch": 1.3128890938436708, "percentage": 65.64, "elapsed_time": "3:53:23", "remaining_time": "2:02:08"} +{"current_steps": 5695, "total_steps": 8674, "loss": 0.416393518447876, "lr": 5.789180777459336e-07, "epoch": 1.3131196679732535, "percentage": 65.66, "elapsed_time": "3:53:25", "remaining_time": "2:02:06"} +{"current_steps": 5696, "total_steps": 8674, "loss": 0.4456642270088196, "lr": 5.78572295789679e-07, "epoch": 1.313350242102836, "percentage": 65.67, "elapsed_time": "3:53:28", "remaining_time": "2:02:03"} +{"current_steps": 5697, "total_steps": 8674, "loss": 0.4757812023162842, "lr": 5.782265750922124e-07, "epoch": 1.3135808162324187, "percentage": 65.68, "elapsed_time": "3:53:30", "remaining_time": "2:02:01"} +{"current_steps": 5698, "total_steps": 8674, "loss": 0.5081768035888672, "lr": 5.778809157037872e-07, "epoch": 1.3138113903620015, "percentage": 65.69, "elapsed_time": "3:53:33", "remaining_time": "2:01:58"} +{"current_steps": 5699, "total_steps": 8674, "loss": 0.4604584872722626, "lr": 5.775353176746489e-07, "epoch": 1.314041964491584, "percentage": 65.7, "elapsed_time": "3:53:35", "remaining_time": "2:01:56"} +{"current_steps": 5700, "total_steps": 8674, "loss": 0.4153773784637451, "lr": 5.771897810550339e-07, "epoch": 1.3142725386211667, "percentage": 65.71, "elapsed_time": "3:53:38", "remaining_time": "2:01:54"} +{"current_steps": 5701, "total_steps": 8674, "loss": 0.5194085836410522, "lr": 5.768443058951695e-07, "epoch": 1.3145031127507494, "percentage": 65.73, "elapsed_time": "3:53:41", "remaining_time": "2:01:52"} +{"current_steps": 5702, "total_steps": 8674, "loss": 0.4398482143878937, "lr": 5.764988922452733e-07, "epoch": 1.314733686880332, "percentage": 65.74, "elapsed_time": "3:53:44", "remaining_time": "2:01:49"} +{"current_steps": 5703, "total_steps": 8674, "loss": 0.5148836374282837, "lr": 5.761535401555558e-07, "epoch": 1.3149642610099148, "percentage": 65.75, "elapsed_time": "3:53:46", "remaining_time": "2:01:47"} +{"current_steps": 5704, "total_steps": 8674, "loss": 0.533142626285553, "lr": 5.758082496762163e-07, "epoch": 1.3151948351394973, "percentage": 65.76, "elapsed_time": "3:53:49", "remaining_time": "2:01:44"} +{"current_steps": 5705, "total_steps": 8674, "loss": 0.4059423804283142, "lr": 5.754630208574473e-07, "epoch": 1.31542540926908, "percentage": 65.77, "elapsed_time": "3:53:51", "remaining_time": "2:01:42"} +{"current_steps": 5706, "total_steps": 8674, "loss": 0.4685533940792084, "lr": 5.751178537494302e-07, "epoch": 1.3156559833986627, "percentage": 65.78, "elapsed_time": "3:53:54", "remaining_time": "2:01:39"} +{"current_steps": 5707, "total_steps": 8674, "loss": 0.4454694986343384, "lr": 5.747727484023392e-07, "epoch": 1.3158865575282452, "percentage": 65.79, "elapsed_time": "3:53:56", "remaining_time": "2:01:37"} +{"current_steps": 5708, "total_steps": 8674, "loss": 0.4058796167373657, "lr": 5.74427704866339e-07, "epoch": 1.316117131657828, "percentage": 65.81, "elapsed_time": "3:53:59", "remaining_time": "2:01:35"} +{"current_steps": 5709, "total_steps": 8674, "loss": 0.3891766369342804, "lr": 5.740827231915847e-07, "epoch": 1.3163477057874107, "percentage": 65.82, "elapsed_time": "3:54:01", "remaining_time": "2:01:32"} +{"current_steps": 5710, "total_steps": 8674, "loss": 0.47912657260894775, "lr": 5.737378034282235e-07, "epoch": 1.3165782799169934, "percentage": 65.83, "elapsed_time": "3:54:04", "remaining_time": "2:01:30"} +{"current_steps": 5711, "total_steps": 8674, "loss": 0.4221952557563782, "lr": 5.733929456263922e-07, "epoch": 1.316808854046576, "percentage": 65.84, "elapsed_time": "3:54:06", "remaining_time": "2:01:27"} +{"current_steps": 5712, "total_steps": 8674, "loss": 0.39018404483795166, "lr": 5.730481498362202e-07, "epoch": 1.3170394281761586, "percentage": 65.85, "elapsed_time": "3:54:09", "remaining_time": "2:01:25"} +{"current_steps": 5713, "total_steps": 8674, "loss": 0.5388307571411133, "lr": 5.727034161078262e-07, "epoch": 1.3172700023057413, "percentage": 65.86, "elapsed_time": "3:54:11", "remaining_time": "2:01:22"} +{"current_steps": 5714, "total_steps": 8674, "loss": 0.3243408501148224, "lr": 5.723587444913216e-07, "epoch": 1.317500576435324, "percentage": 65.88, "elapsed_time": "3:54:14", "remaining_time": "2:01:20"} +{"current_steps": 5715, "total_steps": 8674, "loss": 0.46480363607406616, "lr": 5.720141350368072e-07, "epoch": 1.3177311505649065, "percentage": 65.89, "elapsed_time": "3:54:16", "remaining_time": "2:01:17"} +{"current_steps": 5716, "total_steps": 8674, "loss": 0.5286417603492737, "lr": 5.716695877943757e-07, "epoch": 1.3179617246944892, "percentage": 65.9, "elapsed_time": "3:54:19", "remaining_time": "2:01:15"} +{"current_steps": 5717, "total_steps": 8674, "loss": 0.4170069694519043, "lr": 5.71325102814111e-07, "epoch": 1.318192298824072, "percentage": 65.91, "elapsed_time": "3:54:21", "remaining_time": "2:01:13"} +{"current_steps": 5718, "total_steps": 8674, "loss": 0.5738973617553711, "lr": 5.709806801460867e-07, "epoch": 1.3184228729536547, "percentage": 65.92, "elapsed_time": "3:54:24", "remaining_time": "2:01:10"} +{"current_steps": 5719, "total_steps": 8674, "loss": 0.5309658050537109, "lr": 5.706363198403689e-07, "epoch": 1.3186534470832374, "percentage": 65.93, "elapsed_time": "3:54:26", "remaining_time": "2:01:08"} +{"current_steps": 5720, "total_steps": 8674, "loss": 0.4569379389286041, "lr": 5.70292021947013e-07, "epoch": 1.31888402121282, "percentage": 65.94, "elapsed_time": "3:54:28", "remaining_time": "2:01:05"} +{"current_steps": 5721, "total_steps": 8674, "loss": 0.46686258912086487, "lr": 5.699477865160674e-07, "epoch": 1.3191145953424026, "percentage": 65.96, "elapsed_time": "3:54:31", "remaining_time": "2:01:03"} +{"current_steps": 5722, "total_steps": 8674, "loss": 0.5333213806152344, "lr": 5.696036135975688e-07, "epoch": 1.3193451694719853, "percentage": 65.97, "elapsed_time": "3:54:33", "remaining_time": "2:01:00"} +{"current_steps": 5723, "total_steps": 8674, "loss": 0.3519536256790161, "lr": 5.69259503241547e-07, "epoch": 1.3195757436015678, "percentage": 65.98, "elapsed_time": "3:54:36", "remaining_time": "2:00:58"} +{"current_steps": 5724, "total_steps": 8674, "loss": 0.4763161242008209, "lr": 5.689154554980218e-07, "epoch": 1.3198063177311505, "percentage": 65.99, "elapsed_time": "3:54:38", "remaining_time": "2:00:55"} +{"current_steps": 5725, "total_steps": 8674, "loss": 0.43600207567214966, "lr": 5.685714704170044e-07, "epoch": 1.3200368918607333, "percentage": 66.0, "elapsed_time": "3:54:41", "remaining_time": "2:00:53"} +{"current_steps": 5726, "total_steps": 8674, "loss": 0.41991305351257324, "lr": 5.682275480484958e-07, "epoch": 1.320267465990316, "percentage": 66.01, "elapsed_time": "3:54:43", "remaining_time": "2:00:50"} +{"current_steps": 5727, "total_steps": 8674, "loss": 0.44275131821632385, "lr": 5.678836884424894e-07, "epoch": 1.3204980401198987, "percentage": 66.02, "elapsed_time": "3:54:46", "remaining_time": "2:00:48"} +{"current_steps": 5728, "total_steps": 8674, "loss": 0.4339372515678406, "lr": 5.675398916489682e-07, "epoch": 1.3207286142494812, "percentage": 66.04, "elapsed_time": "3:54:48", "remaining_time": "2:00:46"} +{"current_steps": 5729, "total_steps": 8674, "loss": 0.4462248384952545, "lr": 5.671961577179062e-07, "epoch": 1.320959188379064, "percentage": 66.05, "elapsed_time": "3:54:51", "remaining_time": "2:00:43"} +{"current_steps": 5730, "total_steps": 8674, "loss": 0.36548441648483276, "lr": 5.668524866992693e-07, "epoch": 1.3211897625086464, "percentage": 66.06, "elapsed_time": "3:54:53", "remaining_time": "2:00:41"} +{"current_steps": 5731, "total_steps": 8674, "loss": 0.4709678888320923, "lr": 5.665088786430129e-07, "epoch": 1.321420336638229, "percentage": 66.07, "elapsed_time": "3:54:56", "remaining_time": "2:00:38"} +{"current_steps": 5732, "total_steps": 8674, "loss": 0.40125030279159546, "lr": 5.661653335990848e-07, "epoch": 1.3216509107678118, "percentage": 66.08, "elapsed_time": "3:54:58", "remaining_time": "2:00:36"} +{"current_steps": 5733, "total_steps": 8674, "loss": 0.5288605690002441, "lr": 5.658218516174218e-07, "epoch": 1.3218814848973945, "percentage": 66.09, "elapsed_time": "3:55:01", "remaining_time": "2:00:33"} +{"current_steps": 5734, "total_steps": 8674, "loss": 0.41306072473526, "lr": 5.654784327479534e-07, "epoch": 1.3221120590269773, "percentage": 66.11, "elapsed_time": "3:55:03", "remaining_time": "2:00:31"} +{"current_steps": 5735, "total_steps": 8674, "loss": 0.34327009320259094, "lr": 5.651350770405983e-07, "epoch": 1.3223426331565598, "percentage": 66.12, "elapsed_time": "3:55:06", "remaining_time": "2:00:28"} +{"current_steps": 5736, "total_steps": 8674, "loss": 0.5055800080299377, "lr": 5.647917845452671e-07, "epoch": 1.3225732072861425, "percentage": 66.13, "elapsed_time": "3:55:08", "remaining_time": "2:00:26"} +{"current_steps": 5737, "total_steps": 8674, "loss": 0.45496249198913574, "lr": 5.644485553118609e-07, "epoch": 1.3228037814157252, "percentage": 66.14, "elapsed_time": "3:55:10", "remaining_time": "2:00:23"} +{"current_steps": 5738, "total_steps": 8674, "loss": 0.4626169502735138, "lr": 5.641053893902708e-07, "epoch": 1.3230343555453077, "percentage": 66.15, "elapsed_time": "3:55:13", "remaining_time": "2:00:21"} +{"current_steps": 5739, "total_steps": 8674, "loss": 0.46621328592300415, "lr": 5.637622868303802e-07, "epoch": 1.3232649296748904, "percentage": 66.16, "elapsed_time": "3:55:15", "remaining_time": "2:00:18"} +{"current_steps": 5740, "total_steps": 8674, "loss": 0.47793662548065186, "lr": 5.634192476820623e-07, "epoch": 1.3234955038044731, "percentage": 66.17, "elapsed_time": "3:55:18", "remaining_time": "2:00:16"} +{"current_steps": 5741, "total_steps": 8674, "loss": 0.42578715085983276, "lr": 5.630762719951816e-07, "epoch": 1.3237260779340558, "percentage": 66.19, "elapsed_time": "3:55:20", "remaining_time": "2:00:14"} +{"current_steps": 5742, "total_steps": 8674, "loss": 0.3146113157272339, "lr": 5.627333598195927e-07, "epoch": 1.3239566520636386, "percentage": 66.2, "elapsed_time": "3:55:23", "remaining_time": "2:00:11"} +{"current_steps": 5743, "total_steps": 8674, "loss": 0.39731544256210327, "lr": 5.623905112051417e-07, "epoch": 1.324187226193221, "percentage": 66.21, "elapsed_time": "3:55:25", "remaining_time": "2:00:09"} +{"current_steps": 5744, "total_steps": 8674, "loss": 0.3755846619606018, "lr": 5.620477262016647e-07, "epoch": 1.3244178003228038, "percentage": 66.22, "elapsed_time": "3:55:28", "remaining_time": "2:00:06"} +{"current_steps": 5745, "total_steps": 8674, "loss": 0.43060415983200073, "lr": 5.617050048589896e-07, "epoch": 1.3246483744523865, "percentage": 66.23, "elapsed_time": "3:55:30", "remaining_time": "2:00:04"} +{"current_steps": 5746, "total_steps": 8674, "loss": 0.4213481545448303, "lr": 5.613623472269334e-07, "epoch": 1.324878948581969, "percentage": 66.24, "elapsed_time": "3:55:32", "remaining_time": "2:00:01"} +{"current_steps": 5747, "total_steps": 8674, "loss": 0.3923456072807312, "lr": 5.610197533553057e-07, "epoch": 1.3251095227115517, "percentage": 66.26, "elapsed_time": "3:55:35", "remaining_time": "1:59:59"} +{"current_steps": 5748, "total_steps": 8674, "loss": 0.42293328046798706, "lr": 5.606772232939061e-07, "epoch": 1.3253400968411344, "percentage": 66.27, "elapsed_time": "3:55:37", "remaining_time": "1:59:56"} +{"current_steps": 5749, "total_steps": 8674, "loss": 0.4545479118824005, "lr": 5.603347570925242e-07, "epoch": 1.3255706709707171, "percentage": 66.28, "elapsed_time": "3:55:39", "remaining_time": "1:59:54"} +{"current_steps": 5750, "total_steps": 8674, "loss": 0.3969312310218811, "lr": 5.599923548009416e-07, "epoch": 1.3258012451002998, "percentage": 66.29, "elapsed_time": "3:55:42", "remaining_time": "1:59:51"} +{"current_steps": 5751, "total_steps": 8674, "loss": 0.4296644330024719, "lr": 5.59650016468929e-07, "epoch": 1.3260318192298823, "percentage": 66.3, "elapsed_time": "3:55:45", "remaining_time": "1:59:49"} +{"current_steps": 5752, "total_steps": 8674, "loss": 0.43291348218917847, "lr": 5.5930774214625e-07, "epoch": 1.326262393359465, "percentage": 66.31, "elapsed_time": "3:55:47", "remaining_time": "1:59:46"} +{"current_steps": 5753, "total_steps": 8674, "loss": 0.47684454917907715, "lr": 5.589655318826564e-07, "epoch": 1.3264929674890478, "percentage": 66.32, "elapsed_time": "3:55:50", "remaining_time": "1:59:44"} +{"current_steps": 5754, "total_steps": 8674, "loss": 0.48520004749298096, "lr": 5.586233857278924e-07, "epoch": 1.3267235416186303, "percentage": 66.34, "elapsed_time": "3:55:53", "remaining_time": "1:59:42"} +{"current_steps": 5755, "total_steps": 8674, "loss": 0.4434587359428406, "lr": 5.582813037316926e-07, "epoch": 1.326954115748213, "percentage": 66.35, "elapsed_time": "3:55:55", "remaining_time": "1:59:39"} +{"current_steps": 5756, "total_steps": 8674, "loss": 0.47306808829307556, "lr": 5.579392859437825e-07, "epoch": 1.3271846898777957, "percentage": 66.36, "elapsed_time": "3:55:57", "remaining_time": "1:59:37"} +{"current_steps": 5757, "total_steps": 8674, "loss": 0.4349653720855713, "lr": 5.575973324138772e-07, "epoch": 1.3274152640073784, "percentage": 66.37, "elapsed_time": "3:56:00", "remaining_time": "1:59:34"} +{"current_steps": 5758, "total_steps": 8674, "loss": 0.31277602910995483, "lr": 5.572554431916829e-07, "epoch": 1.3276458381369611, "percentage": 66.38, "elapsed_time": "3:56:02", "remaining_time": "1:59:32"} +{"current_steps": 5759, "total_steps": 8674, "loss": 0.4281114637851715, "lr": 5.569136183268974e-07, "epoch": 1.3278764122665436, "percentage": 66.39, "elapsed_time": "3:56:05", "remaining_time": "1:59:29"} +{"current_steps": 5760, "total_steps": 8674, "loss": 0.45071113109588623, "lr": 5.565718578692076e-07, "epoch": 1.3281069863961263, "percentage": 66.41, "elapsed_time": "3:56:07", "remaining_time": "1:59:27"} +{"current_steps": 5761, "total_steps": 8674, "loss": 0.426133394241333, "lr": 5.562301618682927e-07, "epoch": 1.328337560525709, "percentage": 66.42, "elapsed_time": "3:56:10", "remaining_time": "1:59:25"} +{"current_steps": 5762, "total_steps": 8674, "loss": 0.3882424235343933, "lr": 5.558885303738209e-07, "epoch": 1.3285681346552916, "percentage": 66.43, "elapsed_time": "3:56:12", "remaining_time": "1:59:22"} +{"current_steps": 5763, "total_steps": 8674, "loss": 0.4706958532333374, "lr": 5.55546963435452e-07, "epoch": 1.3287987087848743, "percentage": 66.44, "elapsed_time": "3:56:15", "remaining_time": "1:59:20"} +{"current_steps": 5764, "total_steps": 8674, "loss": 0.4868433475494385, "lr": 5.552054611028365e-07, "epoch": 1.329029282914457, "percentage": 66.45, "elapsed_time": "3:56:17", "remaining_time": "1:59:17"} +{"current_steps": 5765, "total_steps": 8674, "loss": 0.41839566826820374, "lr": 5.548640234256154e-07, "epoch": 1.3292598570440397, "percentage": 66.46, "elapsed_time": "3:56:19", "remaining_time": "1:59:15"} +{"current_steps": 5766, "total_steps": 8674, "loss": 0.4088629484176636, "lr": 5.545226504534195e-07, "epoch": 1.3294904311736224, "percentage": 66.47, "elapsed_time": "3:56:22", "remaining_time": "1:59:12"} +{"current_steps": 5767, "total_steps": 8674, "loss": 0.34617769718170166, "lr": 5.541813422358715e-07, "epoch": 1.329721005303205, "percentage": 66.49, "elapsed_time": "3:56:24", "remaining_time": "1:59:10"} +{"current_steps": 5768, "total_steps": 8674, "loss": 0.5098900198936462, "lr": 5.538400988225835e-07, "epoch": 1.3299515794327876, "percentage": 66.5, "elapsed_time": "3:56:27", "remaining_time": "1:59:07"} +{"current_steps": 5769, "total_steps": 8674, "loss": 0.4294108748435974, "lr": 5.534989202631586e-07, "epoch": 1.3301821535623704, "percentage": 66.51, "elapsed_time": "3:56:29", "remaining_time": "1:59:05"} +{"current_steps": 5770, "total_steps": 8674, "loss": 0.42205139994621277, "lr": 5.531578066071907e-07, "epoch": 1.3304127276919528, "percentage": 66.52, "elapsed_time": "3:56:31", "remaining_time": "1:59:02"} +{"current_steps": 5771, "total_steps": 8674, "loss": 0.5009530186653137, "lr": 5.528167579042645e-07, "epoch": 1.3306433018215356, "percentage": 66.53, "elapsed_time": "3:56:34", "remaining_time": "1:59:00"} +{"current_steps": 5772, "total_steps": 8674, "loss": 0.554497241973877, "lr": 5.524757742039545e-07, "epoch": 1.3308738759511183, "percentage": 66.54, "elapsed_time": "3:56:36", "remaining_time": "1:58:57"} +{"current_steps": 5773, "total_steps": 8674, "loss": 0.3514432907104492, "lr": 5.521348555558263e-07, "epoch": 1.331104450080701, "percentage": 66.56, "elapsed_time": "3:56:39", "remaining_time": "1:58:55"} +{"current_steps": 5774, "total_steps": 8674, "loss": 0.4712038040161133, "lr": 5.51794002009436e-07, "epoch": 1.3313350242102837, "percentage": 66.57, "elapsed_time": "3:56:41", "remaining_time": "1:58:52"} +{"current_steps": 5775, "total_steps": 8674, "loss": 0.48556071519851685, "lr": 5.514532136143295e-07, "epoch": 1.3315655983398662, "percentage": 66.58, "elapsed_time": "3:56:44", "remaining_time": "1:58:50"} +{"current_steps": 5776, "total_steps": 8674, "loss": 0.43158456683158875, "lr": 5.511124904200448e-07, "epoch": 1.331796172469449, "percentage": 66.59, "elapsed_time": "3:56:46", "remaining_time": "1:58:47"} +{"current_steps": 5777, "total_steps": 8674, "loss": 0.5376255512237549, "lr": 5.507718324761085e-07, "epoch": 1.3320267465990316, "percentage": 66.6, "elapsed_time": "3:56:48", "remaining_time": "1:58:45"} +{"current_steps": 5778, "total_steps": 8674, "loss": 0.3800685405731201, "lr": 5.504312398320392e-07, "epoch": 1.3322573207286141, "percentage": 66.61, "elapsed_time": "3:56:51", "remaining_time": "1:58:42"} +{"current_steps": 5779, "total_steps": 8674, "loss": 0.4015260338783264, "lr": 5.500907125373458e-07, "epoch": 1.3324878948581969, "percentage": 66.62, "elapsed_time": "3:56:53", "remaining_time": "1:58:40"} +{"current_steps": 5780, "total_steps": 8674, "loss": 0.42762285470962524, "lr": 5.497502506415266e-07, "epoch": 1.3327184689877796, "percentage": 66.64, "elapsed_time": "3:56:56", "remaining_time": "1:58:37"} +{"current_steps": 5781, "total_steps": 8674, "loss": 0.4467644691467285, "lr": 5.494098541940719e-07, "epoch": 1.3329490431173623, "percentage": 66.65, "elapsed_time": "3:56:58", "remaining_time": "1:58:35"} +{"current_steps": 5782, "total_steps": 8674, "loss": 0.42699599266052246, "lr": 5.490695232444613e-07, "epoch": 1.333179617246945, "percentage": 66.66, "elapsed_time": "3:57:00", "remaining_time": "1:58:32"} +{"current_steps": 5783, "total_steps": 8674, "loss": 0.586537778377533, "lr": 5.487292578421659e-07, "epoch": 1.3334101913765275, "percentage": 66.67, "elapsed_time": "3:57:04", "remaining_time": "1:58:30"} +{"current_steps": 5784, "total_steps": 8674, "loss": 0.4525066018104553, "lr": 5.48389058036646e-07, "epoch": 1.3336407655061102, "percentage": 66.68, "elapsed_time": "3:57:06", "remaining_time": "1:58:28"} +{"current_steps": 5785, "total_steps": 8674, "loss": 0.40520548820495605, "lr": 5.480489238773535e-07, "epoch": 1.333871339635693, "percentage": 66.69, "elapsed_time": "3:57:08", "remaining_time": "1:58:25"} +{"current_steps": 5786, "total_steps": 8674, "loss": 0.3910450339317322, "lr": 5.477088554137304e-07, "epoch": 1.3341019137652754, "percentage": 66.71, "elapsed_time": "3:57:11", "remaining_time": "1:58:23"} +{"current_steps": 5787, "total_steps": 8674, "loss": 0.45285511016845703, "lr": 5.473688526952087e-07, "epoch": 1.3343324878948581, "percentage": 66.72, "elapsed_time": "3:57:13", "remaining_time": "1:58:20"} +{"current_steps": 5788, "total_steps": 8674, "loss": 0.39207279682159424, "lr": 5.47028915771212e-07, "epoch": 1.3345630620244409, "percentage": 66.73, "elapsed_time": "3:57:16", "remaining_time": "1:58:18"} +{"current_steps": 5789, "total_steps": 8674, "loss": 0.40281063318252563, "lr": 5.466890446911527e-07, "epoch": 1.3347936361540236, "percentage": 66.74, "elapsed_time": "3:57:18", "remaining_time": "1:58:15"} +{"current_steps": 5790, "total_steps": 8674, "loss": 0.5087814927101135, "lr": 5.463492395044354e-07, "epoch": 1.3350242102836063, "percentage": 66.75, "elapsed_time": "3:57:20", "remaining_time": "1:58:13"} +{"current_steps": 5791, "total_steps": 8674, "loss": 0.47597891092300415, "lr": 5.460095002604532e-07, "epoch": 1.3352547844131888, "percentage": 66.76, "elapsed_time": "3:57:23", "remaining_time": "1:58:10"} +{"current_steps": 5792, "total_steps": 8674, "loss": 0.5722953677177429, "lr": 5.456698270085917e-07, "epoch": 1.3354853585427715, "percentage": 66.77, "elapsed_time": "3:57:25", "remaining_time": "1:58:08"} +{"current_steps": 5793, "total_steps": 8674, "loss": 0.5133349299430847, "lr": 5.45330219798225e-07, "epoch": 1.3357159326723542, "percentage": 66.79, "elapsed_time": "3:57:28", "remaining_time": "1:58:06"} +{"current_steps": 5794, "total_steps": 8674, "loss": 0.46230804920196533, "lr": 5.449906786787187e-07, "epoch": 1.3359465068019367, "percentage": 66.8, "elapsed_time": "3:57:31", "remaining_time": "1:58:03"} +{"current_steps": 5795, "total_steps": 8674, "loss": 0.42002394795417786, "lr": 5.446512036994286e-07, "epoch": 1.3361770809315194, "percentage": 66.81, "elapsed_time": "3:57:33", "remaining_time": "1:58:01"} +{"current_steps": 5796, "total_steps": 8674, "loss": 0.42281097173690796, "lr": 5.443117949097013e-07, "epoch": 1.3364076550611022, "percentage": 66.82, "elapsed_time": "3:57:35", "remaining_time": "1:57:58"} +{"current_steps": 5797, "total_steps": 8674, "loss": 0.511898398399353, "lr": 5.439724523588726e-07, "epoch": 1.3366382291906849, "percentage": 66.83, "elapsed_time": "3:57:38", "remaining_time": "1:57:56"} +{"current_steps": 5798, "total_steps": 8674, "loss": 0.4475559592247009, "lr": 5.4363317609627e-07, "epoch": 1.3368688033202676, "percentage": 66.84, "elapsed_time": "3:57:40", "remaining_time": "1:57:53"} +{"current_steps": 5799, "total_steps": 8674, "loss": 0.4872414469718933, "lr": 5.432939661712103e-07, "epoch": 1.33709937744985, "percentage": 66.85, "elapsed_time": "3:57:43", "remaining_time": "1:57:51"} +{"current_steps": 5800, "total_steps": 8674, "loss": 0.40401679277420044, "lr": 5.429548226330009e-07, "epoch": 1.3373299515794328, "percentage": 66.87, "elapsed_time": "3:57:45", "remaining_time": "1:57:48"} +{"current_steps": 5801, "total_steps": 8674, "loss": 0.43559926748275757, "lr": 5.426157455309399e-07, "epoch": 1.3375605257090155, "percentage": 66.88, "elapsed_time": "3:57:49", "remaining_time": "1:57:47"} +{"current_steps": 5802, "total_steps": 8674, "loss": 0.44283759593963623, "lr": 5.422767349143158e-07, "epoch": 1.337791099838598, "percentage": 66.89, "elapsed_time": "3:57:51", "remaining_time": "1:57:44"} +{"current_steps": 5803, "total_steps": 8674, "loss": 0.3770032525062561, "lr": 5.419377908324077e-07, "epoch": 1.3380216739681807, "percentage": 66.9, "elapsed_time": "3:57:54", "remaining_time": "1:57:42"} +{"current_steps": 5804, "total_steps": 8674, "loss": 0.4497501850128174, "lr": 5.415989133344834e-07, "epoch": 1.3382522480977634, "percentage": 66.91, "elapsed_time": "3:57:56", "remaining_time": "1:57:39"} +{"current_steps": 5805, "total_steps": 8674, "loss": 0.5008253455162048, "lr": 5.412601024698033e-07, "epoch": 1.3384828222273462, "percentage": 66.92, "elapsed_time": "3:57:59", "remaining_time": "1:57:37"} +{"current_steps": 5806, "total_steps": 8674, "loss": 0.46178537607192993, "lr": 5.409213582876162e-07, "epoch": 1.3387133963569289, "percentage": 66.94, "elapsed_time": "3:58:01", "remaining_time": "1:57:34"} +{"current_steps": 5807, "total_steps": 8674, "loss": 0.39843931794166565, "lr": 5.405826808371625e-07, "epoch": 1.3389439704865114, "percentage": 66.95, "elapsed_time": "3:58:04", "remaining_time": "1:57:32"} +{"current_steps": 5808, "total_steps": 8674, "loss": 0.4829174280166626, "lr": 5.402440701676724e-07, "epoch": 1.339174544616094, "percentage": 66.96, "elapsed_time": "3:58:06", "remaining_time": "1:57:29"} +{"current_steps": 5809, "total_steps": 8674, "loss": 0.36173316836357117, "lr": 5.399055263283656e-07, "epoch": 1.3394051187456768, "percentage": 66.97, "elapsed_time": "3:58:09", "remaining_time": "1:57:27"} +{"current_steps": 5810, "total_steps": 8674, "loss": 0.400304913520813, "lr": 5.395670493684536e-07, "epoch": 1.3396356928752593, "percentage": 66.98, "elapsed_time": "3:58:11", "remaining_time": "1:57:24"} +{"current_steps": 5811, "total_steps": 8674, "loss": 0.4536975622177124, "lr": 5.392286393371372e-07, "epoch": 1.339866267004842, "percentage": 66.99, "elapsed_time": "3:58:13", "remaining_time": "1:57:22"} +{"current_steps": 5812, "total_steps": 8674, "loss": 0.6474577188491821, "lr": 5.388902962836084e-07, "epoch": 1.3400968411344247, "percentage": 67.0, "elapsed_time": "3:58:16", "remaining_time": "1:57:19"} +{"current_steps": 5813, "total_steps": 8674, "loss": 0.48008009791374207, "lr": 5.385520202570477e-07, "epoch": 1.3403274152640074, "percentage": 67.02, "elapsed_time": "3:58:18", "remaining_time": "1:57:17"} +{"current_steps": 5814, "total_steps": 8674, "loss": 0.4518657326698303, "lr": 5.38213811306628e-07, "epoch": 1.3405579893935902, "percentage": 67.03, "elapsed_time": "3:58:21", "remaining_time": "1:57:14"} +{"current_steps": 5815, "total_steps": 8674, "loss": 0.449008584022522, "lr": 5.378756694815105e-07, "epoch": 1.3407885635231727, "percentage": 67.04, "elapsed_time": "3:58:23", "remaining_time": "1:57:12"} +{"current_steps": 5816, "total_steps": 8674, "loss": 0.5448319315910339, "lr": 5.375375948308483e-07, "epoch": 1.3410191376527554, "percentage": 67.05, "elapsed_time": "3:58:26", "remaining_time": "1:57:10"} +{"current_steps": 5817, "total_steps": 8674, "loss": 0.5078369379043579, "lr": 5.371995874037832e-07, "epoch": 1.341249711782338, "percentage": 67.06, "elapsed_time": "3:58:28", "remaining_time": "1:57:07"} +{"current_steps": 5818, "total_steps": 8674, "loss": 0.508685290813446, "lr": 5.368616472494482e-07, "epoch": 1.3414802859119206, "percentage": 67.07, "elapsed_time": "3:58:31", "remaining_time": "1:57:05"} +{"current_steps": 5819, "total_steps": 8674, "loss": 0.4166705012321472, "lr": 5.365237744169672e-07, "epoch": 1.3417108600415033, "percentage": 67.09, "elapsed_time": "3:58:33", "remaining_time": "1:57:02"} +{"current_steps": 5820, "total_steps": 8674, "loss": 0.4741361737251282, "lr": 5.361859689554524e-07, "epoch": 1.341941434171086, "percentage": 67.1, "elapsed_time": "3:58:35", "remaining_time": "1:57:00"} +{"current_steps": 5821, "total_steps": 8674, "loss": 0.36658185720443726, "lr": 5.358482309140079e-07, "epoch": 1.3421720083006687, "percentage": 67.11, "elapsed_time": "3:58:38", "remaining_time": "1:56:57"} +{"current_steps": 5822, "total_steps": 8674, "loss": 0.38921263813972473, "lr": 5.355105603417267e-07, "epoch": 1.3424025824302515, "percentage": 67.12, "elapsed_time": "3:58:40", "remaining_time": "1:56:55"} +{"current_steps": 5823, "total_steps": 8674, "loss": 0.5553977489471436, "lr": 5.351729572876935e-07, "epoch": 1.342633156559834, "percentage": 67.13, "elapsed_time": "3:58:43", "remaining_time": "1:56:52"} +{"current_steps": 5824, "total_steps": 8674, "loss": 0.3968391418457031, "lr": 5.348354218009813e-07, "epoch": 1.3428637306894167, "percentage": 67.14, "elapsed_time": "3:58:45", "remaining_time": "1:56:50"} +{"current_steps": 5825, "total_steps": 8674, "loss": 0.4289783239364624, "lr": 5.344979539306549e-07, "epoch": 1.3430943048189994, "percentage": 67.15, "elapsed_time": "3:58:48", "remaining_time": "1:56:47"} +{"current_steps": 5826, "total_steps": 8674, "loss": 0.45359861850738525, "lr": 5.341605537257686e-07, "epoch": 1.3433248789485819, "percentage": 67.17, "elapsed_time": "3:58:50", "remaining_time": "1:56:45"} +{"current_steps": 5827, "total_steps": 8674, "loss": 0.3571642339229584, "lr": 5.338232212353675e-07, "epoch": 1.3435554530781646, "percentage": 67.18, "elapsed_time": "3:58:53", "remaining_time": "1:56:43"} +{"current_steps": 5828, "total_steps": 8674, "loss": 0.3784096837043762, "lr": 5.334859565084855e-07, "epoch": 1.3437860272077473, "percentage": 67.19, "elapsed_time": "3:58:55", "remaining_time": "1:56:40"} +{"current_steps": 5829, "total_steps": 8674, "loss": 0.44996407628059387, "lr": 5.331487595941475e-07, "epoch": 1.34401660133733, "percentage": 67.2, "elapsed_time": "3:58:58", "remaining_time": "1:56:38"} +{"current_steps": 5830, "total_steps": 8674, "loss": 0.4466405510902405, "lr": 5.32811630541369e-07, "epoch": 1.3442471754669127, "percentage": 67.21, "elapsed_time": "3:59:00", "remaining_time": "1:56:35"} +{"current_steps": 5831, "total_steps": 8674, "loss": 0.34488850831985474, "lr": 5.324745693991545e-07, "epoch": 1.3444777495964952, "percentage": 67.22, "elapsed_time": "3:59:02", "remaining_time": "1:56:33"} +{"current_steps": 5832, "total_steps": 8674, "loss": 0.5530165433883667, "lr": 5.321375762164999e-07, "epoch": 1.344708323726078, "percentage": 67.24, "elapsed_time": "3:59:05", "remaining_time": "1:56:30"} +{"current_steps": 5833, "total_steps": 8674, "loss": 0.40732342004776, "lr": 5.318006510423898e-07, "epoch": 1.3449388978556607, "percentage": 67.25, "elapsed_time": "3:59:08", "remaining_time": "1:56:28"} +{"current_steps": 5834, "total_steps": 8674, "loss": 0.3364611566066742, "lr": 5.314637939258002e-07, "epoch": 1.3451694719852432, "percentage": 67.26, "elapsed_time": "3:59:10", "remaining_time": "1:56:26"} +{"current_steps": 5835, "total_steps": 8674, "loss": 0.43964290618896484, "lr": 5.311270049156966e-07, "epoch": 1.3454000461148259, "percentage": 67.27, "elapsed_time": "3:59:13", "remaining_time": "1:56:23"} +{"current_steps": 5836, "total_steps": 8674, "loss": 0.5203431844711304, "lr": 5.30790284061035e-07, "epoch": 1.3456306202444086, "percentage": 67.28, "elapsed_time": "3:59:15", "remaining_time": "1:56:21"} +{"current_steps": 5837, "total_steps": 8674, "loss": 0.4779793620109558, "lr": 5.304536314107607e-07, "epoch": 1.3458611943739913, "percentage": 67.29, "elapsed_time": "3:59:18", "remaining_time": "1:56:18"} +{"current_steps": 5838, "total_steps": 8674, "loss": 0.4769410490989685, "lr": 5.301170470138102e-07, "epoch": 1.346091768503574, "percentage": 67.3, "elapsed_time": "3:59:20", "remaining_time": "1:56:16"} +{"current_steps": 5839, "total_steps": 8674, "loss": 0.42390304803848267, "lr": 5.297805309191089e-07, "epoch": 1.3463223426331565, "percentage": 67.32, "elapsed_time": "3:59:23", "remaining_time": "1:56:13"} +{"current_steps": 5840, "total_steps": 8674, "loss": 0.5550302863121033, "lr": 5.294440831755727e-07, "epoch": 1.3465529167627392, "percentage": 67.33, "elapsed_time": "3:59:26", "remaining_time": "1:56:11"} +{"current_steps": 5841, "total_steps": 8674, "loss": 0.4897978901863098, "lr": 5.291077038321078e-07, "epoch": 1.3467834908923217, "percentage": 67.34, "elapsed_time": "3:59:28", "remaining_time": "1:56:09"} +{"current_steps": 5842, "total_steps": 8674, "loss": 0.4014284610748291, "lr": 5.287713929376105e-07, "epoch": 1.3470140650219045, "percentage": 67.35, "elapsed_time": "3:59:31", "remaining_time": "1:56:06"} +{"current_steps": 5843, "total_steps": 8674, "loss": 0.4299513101577759, "lr": 5.284351505409675e-07, "epoch": 1.3472446391514872, "percentage": 67.36, "elapsed_time": "3:59:33", "remaining_time": "1:56:04"} +{"current_steps": 5844, "total_steps": 8674, "loss": 0.44863104820251465, "lr": 5.280989766910541e-07, "epoch": 1.34747521328107, "percentage": 67.37, "elapsed_time": "3:59:35", "remaining_time": "1:56:01"} +{"current_steps": 5845, "total_steps": 8674, "loss": 0.41933274269104004, "lr": 5.277628714367374e-07, "epoch": 1.3477057874106526, "percentage": 67.39, "elapsed_time": "3:59:38", "remaining_time": "1:55:59"} +{"current_steps": 5846, "total_steps": 8674, "loss": 0.48257556557655334, "lr": 5.274268348268729e-07, "epoch": 1.347936361540235, "percentage": 67.4, "elapsed_time": "3:59:40", "remaining_time": "1:55:56"} +{"current_steps": 5847, "total_steps": 8674, "loss": 0.435384064912796, "lr": 5.270908669103078e-07, "epoch": 1.3481669356698178, "percentage": 67.41, "elapsed_time": "3:59:43", "remaining_time": "1:55:54"} +{"current_steps": 5848, "total_steps": 8674, "loss": 0.43291670083999634, "lr": 5.267549677358775e-07, "epoch": 1.3483975097994005, "percentage": 67.42, "elapsed_time": "3:59:45", "remaining_time": "1:55:51"} +{"current_steps": 5849, "total_steps": 8674, "loss": 0.4584086537361145, "lr": 5.264191373524089e-07, "epoch": 1.348628083928983, "percentage": 67.43, "elapsed_time": "3:59:47", "remaining_time": "1:55:49"} +{"current_steps": 5850, "total_steps": 8674, "loss": 0.44879037141799927, "lr": 5.260833758087187e-07, "epoch": 1.3488586580585658, "percentage": 67.44, "elapsed_time": "3:59:50", "remaining_time": "1:55:46"} +{"current_steps": 5851, "total_steps": 8674, "loss": 0.48467326164245605, "lr": 5.257476831536124e-07, "epoch": 1.3490892321881485, "percentage": 67.45, "elapsed_time": "3:59:53", "remaining_time": "1:55:44"} +{"current_steps": 5852, "total_steps": 8674, "loss": 0.4126189947128296, "lr": 5.254120594358871e-07, "epoch": 1.3493198063177312, "percentage": 67.47, "elapsed_time": "3:59:55", "remaining_time": "1:55:41"} +{"current_steps": 5853, "total_steps": 8674, "loss": 0.5592546463012695, "lr": 5.250765047043284e-07, "epoch": 1.349550380447314, "percentage": 67.48, "elapsed_time": "3:59:58", "remaining_time": "1:55:39"} +{"current_steps": 5854, "total_steps": 8674, "loss": 0.3269529342651367, "lr": 5.247410190077134e-07, "epoch": 1.3497809545768964, "percentage": 67.49, "elapsed_time": "4:00:00", "remaining_time": "1:55:37"} +{"current_steps": 5855, "total_steps": 8674, "loss": 0.42812949419021606, "lr": 5.244056023948075e-07, "epoch": 1.3500115287064791, "percentage": 67.5, "elapsed_time": "4:00:02", "remaining_time": "1:55:34"} +{"current_steps": 5856, "total_steps": 8674, "loss": 0.4266297221183777, "lr": 5.240702549143676e-07, "epoch": 1.3502421028360618, "percentage": 67.51, "elapsed_time": "4:00:05", "remaining_time": "1:55:32"} +{"current_steps": 5857, "total_steps": 8674, "loss": 0.43848085403442383, "lr": 5.237349766151392e-07, "epoch": 1.3504726769656443, "percentage": 67.52, "elapsed_time": "4:00:07", "remaining_time": "1:55:29"} +{"current_steps": 5858, "total_steps": 8674, "loss": 0.47512906789779663, "lr": 5.233997675458588e-07, "epoch": 1.350703251095227, "percentage": 67.54, "elapsed_time": "4:00:10", "remaining_time": "1:55:27"} +{"current_steps": 5859, "total_steps": 8674, "loss": 0.3484492897987366, "lr": 5.230646277552527e-07, "epoch": 1.3509338252248098, "percentage": 67.55, "elapsed_time": "4:00:12", "remaining_time": "1:55:24"} +{"current_steps": 5860, "total_steps": 8674, "loss": 0.48915669322013855, "lr": 5.227295572920363e-07, "epoch": 1.3511643993543925, "percentage": 67.56, "elapsed_time": "4:00:15", "remaining_time": "1:55:22"} +{"current_steps": 5861, "total_steps": 8674, "loss": 0.415932834148407, "lr": 5.223945562049159e-07, "epoch": 1.3513949734839752, "percentage": 67.57, "elapsed_time": "4:00:17", "remaining_time": "1:55:19"} +{"current_steps": 5862, "total_steps": 8674, "loss": 0.47945982217788696, "lr": 5.220596245425869e-07, "epoch": 1.3516255476135577, "percentage": 67.58, "elapsed_time": "4:00:20", "remaining_time": "1:55:17"} +{"current_steps": 5863, "total_steps": 8674, "loss": 0.4322330951690674, "lr": 5.217247623537356e-07, "epoch": 1.3518561217431404, "percentage": 67.59, "elapsed_time": "4:00:22", "remaining_time": "1:55:14"} +{"current_steps": 5864, "total_steps": 8674, "loss": 0.4608469605445862, "lr": 5.213899696870369e-07, "epoch": 1.3520866958727231, "percentage": 67.6, "elapsed_time": "4:00:24", "remaining_time": "1:55:12"} +{"current_steps": 5865, "total_steps": 8674, "loss": 0.5108528137207031, "lr": 5.210552465911566e-07, "epoch": 1.3523172700023056, "percentage": 67.62, "elapsed_time": "4:00:27", "remaining_time": "1:55:09"} +{"current_steps": 5866, "total_steps": 8674, "loss": 0.37947285175323486, "lr": 5.207205931147502e-07, "epoch": 1.3525478441318883, "percentage": 67.63, "elapsed_time": "4:00:29", "remaining_time": "1:55:07"} +{"current_steps": 5867, "total_steps": 8674, "loss": 0.49094486236572266, "lr": 5.203860093064635e-07, "epoch": 1.352778418261471, "percentage": 67.64, "elapsed_time": "4:00:32", "remaining_time": "1:55:04"} +{"current_steps": 5868, "total_steps": 8674, "loss": 0.34238702058792114, "lr": 5.200514952149308e-07, "epoch": 1.3530089923910538, "percentage": 67.65, "elapsed_time": "4:00:34", "remaining_time": "1:55:02"} +{"current_steps": 5869, "total_steps": 8674, "loss": 0.46390393376350403, "lr": 5.197170508887774e-07, "epoch": 1.3532395665206365, "percentage": 67.66, "elapsed_time": "4:00:36", "remaining_time": "1:54:59"} +{"current_steps": 5870, "total_steps": 8674, "loss": 0.44219160079956055, "lr": 5.193826763766183e-07, "epoch": 1.353470140650219, "percentage": 67.67, "elapsed_time": "4:00:39", "remaining_time": "1:54:57"} +{"current_steps": 5871, "total_steps": 8674, "loss": 0.42801350355148315, "lr": 5.190483717270578e-07, "epoch": 1.3537007147798017, "percentage": 67.69, "elapsed_time": "4:00:41", "remaining_time": "1:54:54"} +{"current_steps": 5872, "total_steps": 8674, "loss": 0.43861454725265503, "lr": 5.187141369886906e-07, "epoch": 1.3539312889093844, "percentage": 67.7, "elapsed_time": "4:00:44", "remaining_time": "1:54:52"} +{"current_steps": 5873, "total_steps": 8674, "loss": 0.4381449222564697, "lr": 5.183799722101014e-07, "epoch": 1.354161863038967, "percentage": 67.71, "elapsed_time": "4:00:46", "remaining_time": "1:54:49"} +{"current_steps": 5874, "total_steps": 8674, "loss": 0.4341619610786438, "lr": 5.180458774398646e-07, "epoch": 1.3543924371685496, "percentage": 67.72, "elapsed_time": "4:00:48", "remaining_time": "1:54:47"} +{"current_steps": 5875, "total_steps": 8674, "loss": 0.4376588463783264, "lr": 5.177118527265437e-07, "epoch": 1.3546230112981323, "percentage": 67.73, "elapsed_time": "4:00:51", "remaining_time": "1:54:44"} +{"current_steps": 5876, "total_steps": 8674, "loss": 0.38568538427352905, "lr": 5.173778981186932e-07, "epoch": 1.354853585427715, "percentage": 67.74, "elapsed_time": "4:00:53", "remaining_time": "1:54:42"} +{"current_steps": 5877, "total_steps": 8674, "loss": 0.44178056716918945, "lr": 5.170440136648561e-07, "epoch": 1.3550841595572978, "percentage": 67.75, "elapsed_time": "4:00:56", "remaining_time": "1:54:39"} +{"current_steps": 5878, "total_steps": 8674, "loss": 0.49847882986068726, "lr": 5.167101994135665e-07, "epoch": 1.3553147336868803, "percentage": 67.77, "elapsed_time": "4:00:58", "remaining_time": "1:54:37"} +{"current_steps": 5879, "total_steps": 8674, "loss": 0.33697545528411865, "lr": 5.163764554133476e-07, "epoch": 1.355545307816463, "percentage": 67.78, "elapsed_time": "4:01:00", "remaining_time": "1:54:34"} +{"current_steps": 5880, "total_steps": 8674, "loss": 0.5216578841209412, "lr": 5.160427817127117e-07, "epoch": 1.3557758819460457, "percentage": 67.79, "elapsed_time": "4:01:03", "remaining_time": "1:54:32"} +{"current_steps": 5881, "total_steps": 8674, "loss": 0.5101301670074463, "lr": 5.157091783601624e-07, "epoch": 1.3560064560756282, "percentage": 67.8, "elapsed_time": "4:01:05", "remaining_time": "1:54:29"} +{"current_steps": 5882, "total_steps": 8674, "loss": 0.47876495122909546, "lr": 5.15375645404192e-07, "epoch": 1.356237030205211, "percentage": 67.81, "elapsed_time": "4:01:07", "remaining_time": "1:54:27"} +{"current_steps": 5883, "total_steps": 8674, "loss": 0.4656233787536621, "lr": 5.150421828932837e-07, "epoch": 1.3564676043347936, "percentage": 67.82, "elapsed_time": "4:01:10", "remaining_time": "1:54:25"} +{"current_steps": 5884, "total_steps": 8674, "loss": 0.4392930269241333, "lr": 5.147087908759082e-07, "epoch": 1.3566981784643763, "percentage": 67.83, "elapsed_time": "4:01:13", "remaining_time": "1:54:22"} +{"current_steps": 5885, "total_steps": 8674, "loss": 0.5044047832489014, "lr": 5.143754694005289e-07, "epoch": 1.356928752593959, "percentage": 67.85, "elapsed_time": "4:01:15", "remaining_time": "1:54:20"} +{"current_steps": 5886, "total_steps": 8674, "loss": 0.4345476031303406, "lr": 5.140422185155964e-07, "epoch": 1.3571593267235416, "percentage": 67.86, "elapsed_time": "4:01:17", "remaining_time": "1:54:17"} +{"current_steps": 5887, "total_steps": 8674, "loss": 0.49207669496536255, "lr": 5.137090382695528e-07, "epoch": 1.3573899008531243, "percentage": 67.87, "elapsed_time": "4:01:20", "remaining_time": "1:54:15"} +{"current_steps": 5888, "total_steps": 8674, "loss": 0.4054356813430786, "lr": 5.133759287108286e-07, "epoch": 1.357620474982707, "percentage": 67.88, "elapsed_time": "4:01:22", "remaining_time": "1:54:12"} +{"current_steps": 5889, "total_steps": 8674, "loss": 0.5436004400253296, "lr": 5.130428898878449e-07, "epoch": 1.3578510491122895, "percentage": 67.89, "elapsed_time": "4:01:25", "remaining_time": "1:54:10"} +{"current_steps": 5890, "total_steps": 8674, "loss": 0.4832550287246704, "lr": 5.127099218490127e-07, "epoch": 1.3580816232418722, "percentage": 67.9, "elapsed_time": "4:01:27", "remaining_time": "1:54:07"} +{"current_steps": 5891, "total_steps": 8674, "loss": 0.38890475034713745, "lr": 5.123770246427315e-07, "epoch": 1.358312197371455, "percentage": 67.92, "elapsed_time": "4:01:29", "remaining_time": "1:54:05"} +{"current_steps": 5892, "total_steps": 8674, "loss": 0.49784210324287415, "lr": 5.12044198317392e-07, "epoch": 1.3585427715010376, "percentage": 67.93, "elapsed_time": "4:01:32", "remaining_time": "1:54:02"} +{"current_steps": 5893, "total_steps": 8674, "loss": 0.5033924579620361, "lr": 5.117114429213732e-07, "epoch": 1.3587733456306204, "percentage": 67.94, "elapsed_time": "4:01:35", "remaining_time": "1:54:00"} +{"current_steps": 5894, "total_steps": 8674, "loss": 0.4857698678970337, "lr": 5.113787585030454e-07, "epoch": 1.3590039197602028, "percentage": 67.95, "elapsed_time": "4:01:37", "remaining_time": "1:53:58"} +{"current_steps": 5895, "total_steps": 8674, "loss": 0.4269944429397583, "lr": 5.110461451107663e-07, "epoch": 1.3592344938897856, "percentage": 67.96, "elapsed_time": "4:01:40", "remaining_time": "1:53:55"} +{"current_steps": 5896, "total_steps": 8674, "loss": 0.44045162200927734, "lr": 5.107136027928858e-07, "epoch": 1.3594650680193683, "percentage": 67.97, "elapsed_time": "4:01:42", "remaining_time": "1:53:53"} +{"current_steps": 5897, "total_steps": 8674, "loss": 0.5223391056060791, "lr": 5.103811315977418e-07, "epoch": 1.3596956421489508, "percentage": 67.98, "elapsed_time": "4:01:45", "remaining_time": "1:53:50"} +{"current_steps": 5898, "total_steps": 8674, "loss": 0.45988473296165466, "lr": 5.100487315736627e-07, "epoch": 1.3599262162785335, "percentage": 68.0, "elapsed_time": "4:01:47", "remaining_time": "1:53:48"} +{"current_steps": 5899, "total_steps": 8674, "loss": 0.46342164278030396, "lr": 5.097164027689661e-07, "epoch": 1.3601567904081162, "percentage": 68.01, "elapsed_time": "4:01:49", "remaining_time": "1:53:45"} +{"current_steps": 5900, "total_steps": 8674, "loss": 0.48150479793548584, "lr": 5.093841452319588e-07, "epoch": 1.360387364537699, "percentage": 68.02, "elapsed_time": "4:01:52", "remaining_time": "1:53:43"} +{"current_steps": 5901, "total_steps": 8674, "loss": 0.3971351981163025, "lr": 5.090519590109386e-07, "epoch": 1.3606179386672816, "percentage": 68.03, "elapsed_time": "4:01:56", "remaining_time": "1:53:41"} +{"current_steps": 5902, "total_steps": 8674, "loss": 0.44869956374168396, "lr": 5.087198441541914e-07, "epoch": 1.3608485127968641, "percentage": 68.04, "elapsed_time": "4:01:58", "remaining_time": "1:53:39"} +{"current_steps": 5903, "total_steps": 8674, "loss": 0.3402775526046753, "lr": 5.083878007099943e-07, "epoch": 1.3610790869264469, "percentage": 68.05, "elapsed_time": "4:02:01", "remaining_time": "1:53:36"} +{"current_steps": 5904, "total_steps": 8674, "loss": 0.4031033515930176, "lr": 5.080558287266119e-07, "epoch": 1.3613096610560296, "percentage": 68.07, "elapsed_time": "4:02:03", "remaining_time": "1:53:34"} +{"current_steps": 5905, "total_steps": 8674, "loss": 0.493259459733963, "lr": 5.077239282523012e-07, "epoch": 1.361540235185612, "percentage": 68.08, "elapsed_time": "4:02:05", "remaining_time": "1:53:31"} +{"current_steps": 5906, "total_steps": 8674, "loss": 0.39178919792175293, "lr": 5.073920993353063e-07, "epoch": 1.3617708093151948, "percentage": 68.09, "elapsed_time": "4:02:08", "remaining_time": "1:53:29"} +{"current_steps": 5907, "total_steps": 8674, "loss": 0.5091253519058228, "lr": 5.070603420238624e-07, "epoch": 1.3620013834447775, "percentage": 68.1, "elapsed_time": "4:02:10", "remaining_time": "1:53:26"} +{"current_steps": 5908, "total_steps": 8674, "loss": 0.416462779045105, "lr": 5.067286563661934e-07, "epoch": 1.3622319575743602, "percentage": 68.11, "elapsed_time": "4:02:13", "remaining_time": "1:53:24"} +{"current_steps": 5909, "total_steps": 8674, "loss": 0.5018768310546875, "lr": 5.063970424105137e-07, "epoch": 1.362462531703943, "percentage": 68.12, "elapsed_time": "4:02:15", "remaining_time": "1:53:21"} +{"current_steps": 5910, "total_steps": 8674, "loss": 0.5512624979019165, "lr": 5.060655002050262e-07, "epoch": 1.3626931058335254, "percentage": 68.13, "elapsed_time": "4:02:18", "remaining_time": "1:53:19"} +{"current_steps": 5911, "total_steps": 8674, "loss": 0.3953768014907837, "lr": 5.057340297979241e-07, "epoch": 1.3629236799631081, "percentage": 68.15, "elapsed_time": "4:02:20", "remaining_time": "1:53:16"} +{"current_steps": 5912, "total_steps": 8674, "loss": 0.4355456233024597, "lr": 5.054026312373896e-07, "epoch": 1.3631542540926909, "percentage": 68.16, "elapsed_time": "4:02:23", "remaining_time": "1:53:14"} +{"current_steps": 5913, "total_steps": 8674, "loss": 0.4826827645301819, "lr": 5.050713045715955e-07, "epoch": 1.3633848282222734, "percentage": 68.17, "elapsed_time": "4:02:25", "remaining_time": "1:53:11"} +{"current_steps": 5914, "total_steps": 8674, "loss": 0.47084230184555054, "lr": 5.047400498487035e-07, "epoch": 1.363615402351856, "percentage": 68.18, "elapsed_time": "4:02:27", "remaining_time": "1:53:09"} +{"current_steps": 5915, "total_steps": 8674, "loss": 0.5273452997207642, "lr": 5.044088671168644e-07, "epoch": 1.3638459764814388, "percentage": 68.19, "elapsed_time": "4:02:30", "remaining_time": "1:53:06"} +{"current_steps": 5916, "total_steps": 8674, "loss": 0.44878947734832764, "lr": 5.040777564242194e-07, "epoch": 1.3640765506110215, "percentage": 68.2, "elapsed_time": "4:02:32", "remaining_time": "1:53:04"} +{"current_steps": 5917, "total_steps": 8674, "loss": 0.47986388206481934, "lr": 5.03746717818898e-07, "epoch": 1.3643071247406042, "percentage": 68.22, "elapsed_time": "4:02:35", "remaining_time": "1:53:02"} +{"current_steps": 5918, "total_steps": 8674, "loss": 0.4807628393173218, "lr": 5.034157513490211e-07, "epoch": 1.3645376988701867, "percentage": 68.23, "elapsed_time": "4:02:37", "remaining_time": "1:52:59"} +{"current_steps": 5919, "total_steps": 8674, "loss": 0.46027708053588867, "lr": 5.030848570626969e-07, "epoch": 1.3647682729997694, "percentage": 68.24, "elapsed_time": "4:02:40", "remaining_time": "1:52:57"} +{"current_steps": 5920, "total_steps": 8674, "loss": 0.3803088963031769, "lr": 5.027540350080249e-07, "epoch": 1.3649988471293522, "percentage": 68.25, "elapsed_time": "4:02:42", "remaining_time": "1:52:54"} +{"current_steps": 5921, "total_steps": 8674, "loss": 0.5530920028686523, "lr": 5.024232852330939e-07, "epoch": 1.3652294212589347, "percentage": 68.26, "elapsed_time": "4:02:45", "remaining_time": "1:52:52"} +{"current_steps": 5922, "total_steps": 8674, "loss": 0.45984846353530884, "lr": 5.020926077859805e-07, "epoch": 1.3654599953885174, "percentage": 68.27, "elapsed_time": "4:02:47", "remaining_time": "1:52:49"} +{"current_steps": 5923, "total_steps": 8674, "loss": 0.4448089301586151, "lr": 5.017620027147533e-07, "epoch": 1.3656905695181, "percentage": 68.28, "elapsed_time": "4:02:49", "remaining_time": "1:52:47"} +{"current_steps": 5924, "total_steps": 8674, "loss": 0.4226706326007843, "lr": 5.01431470067468e-07, "epoch": 1.3659211436476828, "percentage": 68.3, "elapsed_time": "4:02:52", "remaining_time": "1:52:44"} +{"current_steps": 5925, "total_steps": 8674, "loss": 0.5243814587593079, "lr": 5.011010098921718e-07, "epoch": 1.3661517177772655, "percentage": 68.31, "elapsed_time": "4:02:55", "remaining_time": "1:52:42"} +{"current_steps": 5926, "total_steps": 8674, "loss": 0.5733383893966675, "lr": 5.007706222368995e-07, "epoch": 1.366382291906848, "percentage": 68.32, "elapsed_time": "4:02:57", "remaining_time": "1:52:39"} +{"current_steps": 5927, "total_steps": 8674, "loss": 0.4583539366722107, "lr": 5.00440307149677e-07, "epoch": 1.3666128660364307, "percentage": 68.33, "elapsed_time": "4:02:59", "remaining_time": "1:52:37"} +{"current_steps": 5928, "total_steps": 8674, "loss": 0.474712610244751, "lr": 5.001100646785186e-07, "epoch": 1.3668434401660134, "percentage": 68.34, "elapsed_time": "4:03:02", "remaining_time": "1:52:34"} +{"current_steps": 5929, "total_steps": 8674, "loss": 0.3995950222015381, "lr": 4.997798948714291e-07, "epoch": 1.367074014295596, "percentage": 68.35, "elapsed_time": "4:03:04", "remaining_time": "1:52:32"} +{"current_steps": 5930, "total_steps": 8674, "loss": 0.4236767888069153, "lr": 4.994497977764011e-07, "epoch": 1.3673045884251787, "percentage": 68.37, "elapsed_time": "4:03:07", "remaining_time": "1:52:29"} +{"current_steps": 5931, "total_steps": 8674, "loss": 0.4972396492958069, "lr": 4.991197734414178e-07, "epoch": 1.3675351625547614, "percentage": 68.38, "elapsed_time": "4:03:09", "remaining_time": "1:52:27"} +{"current_steps": 5932, "total_steps": 8674, "loss": 0.444613516330719, "lr": 4.98789821914452e-07, "epoch": 1.367765736684344, "percentage": 68.39, "elapsed_time": "4:03:11", "remaining_time": "1:52:24"} +{"current_steps": 5933, "total_steps": 8674, "loss": 0.4955690801143646, "lr": 4.984599432434649e-07, "epoch": 1.3679963108139268, "percentage": 68.4, "elapsed_time": "4:03:14", "remaining_time": "1:52:22"} +{"current_steps": 5934, "total_steps": 8674, "loss": 0.4983398914337158, "lr": 4.981301374764084e-07, "epoch": 1.3682268849435093, "percentage": 68.41, "elapsed_time": "4:03:16", "remaining_time": "1:52:20"} +{"current_steps": 5935, "total_steps": 8674, "loss": 0.45190921425819397, "lr": 4.978004046612223e-07, "epoch": 1.368457459073092, "percentage": 68.42, "elapsed_time": "4:03:19", "remaining_time": "1:52:17"} +{"current_steps": 5936, "total_steps": 8674, "loss": 0.5014151334762573, "lr": 4.974707448458369e-07, "epoch": 1.3686880332026747, "percentage": 68.43, "elapsed_time": "4:03:21", "remaining_time": "1:52:15"} +{"current_steps": 5937, "total_steps": 8674, "loss": 0.3868405818939209, "lr": 4.971411580781719e-07, "epoch": 1.3689186073322572, "percentage": 68.45, "elapsed_time": "4:03:24", "remaining_time": "1:52:12"} +{"current_steps": 5938, "total_steps": 8674, "loss": 0.4093654155731201, "lr": 4.968116444061363e-07, "epoch": 1.36914918146184, "percentage": 68.46, "elapsed_time": "4:03:26", "remaining_time": "1:52:10"} +{"current_steps": 5939, "total_steps": 8674, "loss": 0.3945506513118744, "lr": 4.964822038776276e-07, "epoch": 1.3693797555914227, "percentage": 68.47, "elapsed_time": "4:03:29", "remaining_time": "1:52:07"} +{"current_steps": 5940, "total_steps": 8674, "loss": 0.3645547330379486, "lr": 4.961528365405333e-07, "epoch": 1.3696103297210054, "percentage": 68.48, "elapsed_time": "4:03:31", "remaining_time": "1:52:05"} +{"current_steps": 5941, "total_steps": 8674, "loss": 0.36679786443710327, "lr": 4.958235424427309e-07, "epoch": 1.369840903850588, "percentage": 68.49, "elapsed_time": "4:03:33", "remaining_time": "1:52:02"} +{"current_steps": 5942, "total_steps": 8674, "loss": 0.4892774820327759, "lr": 4.954943216320861e-07, "epoch": 1.3700714779801706, "percentage": 68.5, "elapsed_time": "4:03:36", "remaining_time": "1:52:00"} +{"current_steps": 5943, "total_steps": 8674, "loss": 0.40406349301338196, "lr": 4.951651741564544e-07, "epoch": 1.3703020521097533, "percentage": 68.52, "elapsed_time": "4:03:38", "remaining_time": "1:51:57"} +{"current_steps": 5944, "total_steps": 8674, "loss": 0.4219849407672882, "lr": 4.948361000636812e-07, "epoch": 1.370532626239336, "percentage": 68.53, "elapsed_time": "4:03:41", "remaining_time": "1:51:55"} +{"current_steps": 5945, "total_steps": 8674, "loss": 0.5329363346099854, "lr": 4.945070994016008e-07, "epoch": 1.3707632003689185, "percentage": 68.54, "elapsed_time": "4:03:43", "remaining_time": "1:51:52"} +{"current_steps": 5946, "total_steps": 8674, "loss": 0.42577850818634033, "lr": 4.941781722180361e-07, "epoch": 1.3709937744985012, "percentage": 68.55, "elapsed_time": "4:03:46", "remaining_time": "1:51:50"} +{"current_steps": 5947, "total_steps": 8674, "loss": 0.4157155156135559, "lr": 4.938493185608008e-07, "epoch": 1.371224348628084, "percentage": 68.56, "elapsed_time": "4:03:48", "remaining_time": "1:51:48"} +{"current_steps": 5948, "total_steps": 8674, "loss": 0.46491485834121704, "lr": 4.935205384776965e-07, "epoch": 1.3714549227576667, "percentage": 68.57, "elapsed_time": "4:03:51", "remaining_time": "1:51:45"} +{"current_steps": 5949, "total_steps": 8674, "loss": 0.39582759141921997, "lr": 4.931918320165151e-07, "epoch": 1.3716854968872494, "percentage": 68.58, "elapsed_time": "4:03:53", "remaining_time": "1:51:43"} +{"current_steps": 5950, "total_steps": 8674, "loss": 0.4380473792552948, "lr": 4.928631992250371e-07, "epoch": 1.3719160710168319, "percentage": 68.6, "elapsed_time": "4:03:56", "remaining_time": "1:51:40"} +{"current_steps": 5951, "total_steps": 8674, "loss": 0.5044572949409485, "lr": 4.925346401510327e-07, "epoch": 1.3721466451464146, "percentage": 68.61, "elapsed_time": "4:03:58", "remaining_time": "1:51:38"} +{"current_steps": 5952, "total_steps": 8674, "loss": 0.4808889627456665, "lr": 4.922061548422617e-07, "epoch": 1.372377219275997, "percentage": 68.62, "elapsed_time": "4:04:01", "remaining_time": "1:51:35"} +{"current_steps": 5953, "total_steps": 8674, "loss": 0.4215632677078247, "lr": 4.91877743346472e-07, "epoch": 1.3726077934055798, "percentage": 68.63, "elapsed_time": "4:04:03", "remaining_time": "1:51:33"} +{"current_steps": 5954, "total_steps": 8674, "loss": 0.4999268651008606, "lr": 4.915494057114025e-07, "epoch": 1.3728383675351625, "percentage": 68.64, "elapsed_time": "4:04:05", "remaining_time": "1:51:30"} +{"current_steps": 5955, "total_steps": 8674, "loss": 0.476152241230011, "lr": 4.912211419847793e-07, "epoch": 1.3730689416647452, "percentage": 68.65, "elapsed_time": "4:04:08", "remaining_time": "1:51:28"} +{"current_steps": 5956, "total_steps": 8674, "loss": 0.4253045320510864, "lr": 4.908929522143201e-07, "epoch": 1.373299515794328, "percentage": 68.66, "elapsed_time": "4:04:10", "remaining_time": "1:51:25"} +{"current_steps": 5957, "total_steps": 8674, "loss": 0.4251098036766052, "lr": 4.905648364477293e-07, "epoch": 1.3735300899239105, "percentage": 68.68, "elapsed_time": "4:04:13", "remaining_time": "1:51:23"} +{"current_steps": 5958, "total_steps": 8674, "loss": 0.3820844888687134, "lr": 4.902367947327029e-07, "epoch": 1.3737606640534932, "percentage": 68.69, "elapsed_time": "4:04:15", "remaining_time": "1:51:20"} +{"current_steps": 5959, "total_steps": 8674, "loss": 0.4725508689880371, "lr": 4.899088271169245e-07, "epoch": 1.373991238183076, "percentage": 68.7, "elapsed_time": "4:04:18", "remaining_time": "1:51:18"} +{"current_steps": 5960, "total_steps": 8674, "loss": 0.48313626646995544, "lr": 4.895809336480675e-07, "epoch": 1.3742218123126584, "percentage": 68.71, "elapsed_time": "4:04:20", "remaining_time": "1:51:16"} +{"current_steps": 5961, "total_steps": 8674, "loss": 0.5344939231872559, "lr": 4.892531143737952e-07, "epoch": 1.374452386442241, "percentage": 68.72, "elapsed_time": "4:04:23", "remaining_time": "1:51:13"} +{"current_steps": 5962, "total_steps": 8674, "loss": 0.4305552840232849, "lr": 4.889253693417585e-07, "epoch": 1.3746829605718238, "percentage": 68.73, "elapsed_time": "4:04:25", "remaining_time": "1:51:11"} +{"current_steps": 5963, "total_steps": 8674, "loss": 0.3564034700393677, "lr": 4.885976985995996e-07, "epoch": 1.3749135347014065, "percentage": 68.75, "elapsed_time": "4:04:28", "remaining_time": "1:51:08"} +{"current_steps": 5964, "total_steps": 8674, "loss": 0.5498751997947693, "lr": 4.882701021949475e-07, "epoch": 1.3751441088309893, "percentage": 68.76, "elapsed_time": "4:04:30", "remaining_time": "1:51:06"} +{"current_steps": 5965, "total_steps": 8674, "loss": 0.4489964246749878, "lr": 4.879425801754226e-07, "epoch": 1.3753746829605717, "percentage": 68.77, "elapsed_time": "4:04:33", "remaining_time": "1:51:03"} +{"current_steps": 5966, "total_steps": 8674, "loss": 0.4142688810825348, "lr": 4.87615132588633e-07, "epoch": 1.3756052570901545, "percentage": 68.78, "elapsed_time": "4:04:35", "remaining_time": "1:51:01"} +{"current_steps": 5967, "total_steps": 8674, "loss": 0.3823632597923279, "lr": 4.872877594821767e-07, "epoch": 1.3758358312197372, "percentage": 68.79, "elapsed_time": "4:04:38", "remaining_time": "1:50:58"} +{"current_steps": 5968, "total_steps": 8674, "loss": 0.39014697074890137, "lr": 4.869604609036408e-07, "epoch": 1.3760664053493197, "percentage": 68.8, "elapsed_time": "4:04:40", "remaining_time": "1:50:56"} +{"current_steps": 5969, "total_steps": 8674, "loss": 0.3907933235168457, "lr": 4.866332369006016e-07, "epoch": 1.3762969794789024, "percentage": 68.81, "elapsed_time": "4:04:42", "remaining_time": "1:50:53"} +{"current_steps": 5970, "total_steps": 8674, "loss": 0.3872087001800537, "lr": 4.863060875206244e-07, "epoch": 1.376527553608485, "percentage": 68.83, "elapsed_time": "4:04:45", "remaining_time": "1:50:51"} +{"current_steps": 5971, "total_steps": 8674, "loss": 0.40380537509918213, "lr": 4.85979012811263e-07, "epoch": 1.3767581277380678, "percentage": 68.84, "elapsed_time": "4:04:47", "remaining_time": "1:50:48"} +{"current_steps": 5972, "total_steps": 8674, "loss": 0.39867663383483887, "lr": 4.856520128200621e-07, "epoch": 1.3769887018676505, "percentage": 68.85, "elapsed_time": "4:04:49", "remaining_time": "1:50:46"} +{"current_steps": 5973, "total_steps": 8674, "loss": 0.5337423086166382, "lr": 4.853250875945534e-07, "epoch": 1.377219275997233, "percentage": 68.86, "elapsed_time": "4:04:52", "remaining_time": "1:50:43"} +{"current_steps": 5974, "total_steps": 8674, "loss": 0.3824300765991211, "lr": 4.849982371822593e-07, "epoch": 1.3774498501268158, "percentage": 68.87, "elapsed_time": "4:04:54", "remaining_time": "1:50:41"} +{"current_steps": 5975, "total_steps": 8674, "loss": 0.3613823652267456, "lr": 4.846714616306907e-07, "epoch": 1.3776804242563985, "percentage": 68.88, "elapsed_time": "4:04:57", "remaining_time": "1:50:39"} +{"current_steps": 5976, "total_steps": 8674, "loss": 0.5040241479873657, "lr": 4.843447609873484e-07, "epoch": 1.377910998385981, "percentage": 68.9, "elapsed_time": "4:04:59", "remaining_time": "1:50:36"} +{"current_steps": 5977, "total_steps": 8674, "loss": 0.4639400243759155, "lr": 4.840181352997207e-07, "epoch": 1.3781415725155637, "percentage": 68.91, "elapsed_time": "4:05:02", "remaining_time": "1:50:34"} +{"current_steps": 5978, "total_steps": 8674, "loss": 0.503246009349823, "lr": 4.836915846152867e-07, "epoch": 1.3783721466451464, "percentage": 68.92, "elapsed_time": "4:05:04", "remaining_time": "1:50:31"} +{"current_steps": 5979, "total_steps": 8674, "loss": 0.3974607586860657, "lr": 4.833651089815135e-07, "epoch": 1.3786027207747291, "percentage": 68.93, "elapsed_time": "4:05:06", "remaining_time": "1:50:29"} +{"current_steps": 5980, "total_steps": 8674, "loss": 0.43200844526290894, "lr": 4.830387084458573e-07, "epoch": 1.3788332949043118, "percentage": 68.94, "elapsed_time": "4:05:09", "remaining_time": "1:50:26"} +{"current_steps": 5981, "total_steps": 8674, "loss": 0.547272801399231, "lr": 4.827123830557644e-07, "epoch": 1.3790638690338943, "percentage": 68.95, "elapsed_time": "4:05:11", "remaining_time": "1:50:24"} +{"current_steps": 5982, "total_steps": 8674, "loss": 0.4509696960449219, "lr": 4.823861328586688e-07, "epoch": 1.379294443163477, "percentage": 68.96, "elapsed_time": "4:05:14", "remaining_time": "1:50:21"} +{"current_steps": 5983, "total_steps": 8674, "loss": 0.46022483706474304, "lr": 4.820599579019946e-07, "epoch": 1.3795250172930598, "percentage": 68.98, "elapsed_time": "4:05:16", "remaining_time": "1:50:19"} +{"current_steps": 5984, "total_steps": 8674, "loss": 0.40973198413848877, "lr": 4.817338582331548e-07, "epoch": 1.3797555914226423, "percentage": 68.99, "elapsed_time": "4:05:19", "remaining_time": "1:50:16"} +{"current_steps": 5985, "total_steps": 8674, "loss": 0.39012736082077026, "lr": 4.814078338995515e-07, "epoch": 1.379986165552225, "percentage": 69.0, "elapsed_time": "4:05:21", "remaining_time": "1:50:14"} +{"current_steps": 5986, "total_steps": 8674, "loss": 0.40657323598861694, "lr": 4.810818849485749e-07, "epoch": 1.3802167396818077, "percentage": 69.01, "elapsed_time": "4:05:23", "remaining_time": "1:50:11"} +{"current_steps": 5987, "total_steps": 8674, "loss": 0.38662189245224, "lr": 4.80756011427606e-07, "epoch": 1.3804473138113904, "percentage": 69.02, "elapsed_time": "4:05:26", "remaining_time": "1:50:09"} +{"current_steps": 5988, "total_steps": 8674, "loss": 0.4888705015182495, "lr": 4.804302133840126e-07, "epoch": 1.3806778879409731, "percentage": 69.03, "elapsed_time": "4:05:28", "remaining_time": "1:50:06"} +{"current_steps": 5989, "total_steps": 8674, "loss": 0.4559556245803833, "lr": 4.801044908651537e-07, "epoch": 1.3809084620705556, "percentage": 69.05, "elapsed_time": "4:05:31", "remaining_time": "1:50:04"} +{"current_steps": 5990, "total_steps": 8674, "loss": 0.40912386775016785, "lr": 4.797788439183757e-07, "epoch": 1.3811390362001383, "percentage": 69.06, "elapsed_time": "4:05:33", "remaining_time": "1:50:01"} +{"current_steps": 5991, "total_steps": 8674, "loss": 0.3848627209663391, "lr": 4.794532725910152e-07, "epoch": 1.381369610329721, "percentage": 69.07, "elapsed_time": "4:05:36", "remaining_time": "1:49:59"} +{"current_steps": 5992, "total_steps": 8674, "loss": 0.4995359778404236, "lr": 4.791277769303975e-07, "epoch": 1.3816001844593035, "percentage": 69.08, "elapsed_time": "4:05:38", "remaining_time": "1:49:57"} +{"current_steps": 5993, "total_steps": 8674, "loss": 0.38717859983444214, "lr": 4.788023569838356e-07, "epoch": 1.3818307585888863, "percentage": 69.09, "elapsed_time": "4:05:41", "remaining_time": "1:49:54"} +{"current_steps": 5994, "total_steps": 8674, "loss": 0.39855217933654785, "lr": 4.784770127986339e-07, "epoch": 1.382061332718469, "percentage": 69.1, "elapsed_time": "4:05:43", "remaining_time": "1:49:52"} +{"current_steps": 5995, "total_steps": 8674, "loss": 0.38494858145713806, "lr": 4.781517444220835e-07, "epoch": 1.3822919068480517, "percentage": 69.11, "elapsed_time": "4:05:46", "remaining_time": "1:49:49"} +{"current_steps": 5996, "total_steps": 8674, "loss": 0.44064784049987793, "lr": 4.778265519014661e-07, "epoch": 1.3825224809776344, "percentage": 69.13, "elapsed_time": "4:05:49", "remaining_time": "1:49:47"} +{"current_steps": 5997, "total_steps": 8674, "loss": 0.39377373456954956, "lr": 4.775014352840512e-07, "epoch": 1.382753055107217, "percentage": 69.14, "elapsed_time": "4:05:51", "remaining_time": "1:49:44"} +{"current_steps": 5998, "total_steps": 8674, "loss": 0.45127296447753906, "lr": 4.771763946170979e-07, "epoch": 1.3829836292367996, "percentage": 69.15, "elapsed_time": "4:05:53", "remaining_time": "1:49:42"} +{"current_steps": 5999, "total_steps": 8674, "loss": 0.4999358654022217, "lr": 4.768514299478545e-07, "epoch": 1.3832142033663823, "percentage": 69.16, "elapsed_time": "4:05:56", "remaining_time": "1:49:39"} +{"current_steps": 6000, "total_steps": 8674, "loss": 0.49552851915359497, "lr": 4.7652654132355784e-07, "epoch": 1.3834447774959648, "percentage": 69.17, "elapsed_time": "4:05:58", "remaining_time": "1:49:37"} +{"current_steps": 6001, "total_steps": 8674, "loss": 0.49196135997772217, "lr": 4.762017287914338e-07, "epoch": 1.3836753516255476, "percentage": 69.18, "elapsed_time": "4:06:02", "remaining_time": "1:49:35"} +{"current_steps": 6002, "total_steps": 8674, "loss": 0.3870600461959839, "lr": 4.758769923986966e-07, "epoch": 1.3839059257551303, "percentage": 69.2, "elapsed_time": "4:06:05", "remaining_time": "1:49:33"} +{"current_steps": 6003, "total_steps": 8674, "loss": 0.4585425853729248, "lr": 4.7555233219255074e-07, "epoch": 1.384136499884713, "percentage": 69.21, "elapsed_time": "4:06:07", "remaining_time": "1:49:30"} +{"current_steps": 6004, "total_steps": 8674, "loss": 0.4332588315010071, "lr": 4.752277482201882e-07, "epoch": 1.3843670740142957, "percentage": 69.22, "elapsed_time": "4:06:10", "remaining_time": "1:49:28"} +{"current_steps": 6005, "total_steps": 8674, "loss": 0.4386274814605713, "lr": 4.749032405287913e-07, "epoch": 1.3845976481438782, "percentage": 69.23, "elapsed_time": "4:06:12", "remaining_time": "1:49:25"} +{"current_steps": 6006, "total_steps": 8674, "loss": 0.5064895749092102, "lr": 4.745788091655295e-07, "epoch": 1.384828222273461, "percentage": 69.24, "elapsed_time": "4:06:14", "remaining_time": "1:49:23"} +{"current_steps": 6007, "total_steps": 8674, "loss": 0.4441327452659607, "lr": 4.7425445417756295e-07, "epoch": 1.3850587964030436, "percentage": 69.25, "elapsed_time": "4:06:17", "remaining_time": "1:49:20"} +{"current_steps": 6008, "total_steps": 8674, "loss": 0.4415687918663025, "lr": 4.7393017561203965e-07, "epoch": 1.3852893705326261, "percentage": 69.26, "elapsed_time": "4:06:19", "remaining_time": "1:49:18"} +{"current_steps": 6009, "total_steps": 8674, "loss": 0.4668382704257965, "lr": 4.736059735160973e-07, "epoch": 1.3855199446622088, "percentage": 69.28, "elapsed_time": "4:06:22", "remaining_time": "1:49:16"} +{"current_steps": 6010, "total_steps": 8674, "loss": 0.3981805443763733, "lr": 4.732818479368615e-07, "epoch": 1.3857505187917916, "percentage": 69.29, "elapsed_time": "4:06:24", "remaining_time": "1:49:13"} +{"current_steps": 6011, "total_steps": 8674, "loss": 0.4465348720550537, "lr": 4.7295779892144694e-07, "epoch": 1.3859810929213743, "percentage": 69.3, "elapsed_time": "4:06:27", "remaining_time": "1:49:11"} +{"current_steps": 6012, "total_steps": 8674, "loss": 0.4844682812690735, "lr": 4.7263382651695805e-07, "epoch": 1.386211667050957, "percentage": 69.31, "elapsed_time": "4:06:29", "remaining_time": "1:49:08"} +{"current_steps": 6013, "total_steps": 8674, "loss": 0.4261378347873688, "lr": 4.723099307704868e-07, "epoch": 1.3864422411805395, "percentage": 69.32, "elapsed_time": "4:06:32", "remaining_time": "1:49:06"} +{"current_steps": 6014, "total_steps": 8674, "loss": 0.457815945148468, "lr": 4.7198611172911506e-07, "epoch": 1.3866728153101222, "percentage": 69.33, "elapsed_time": "4:06:34", "remaining_time": "1:49:03"} +{"current_steps": 6015, "total_steps": 8674, "loss": 0.46352216601371765, "lr": 4.7166236943991333e-07, "epoch": 1.386903389439705, "percentage": 69.35, "elapsed_time": "4:06:37", "remaining_time": "1:49:01"} +{"current_steps": 6016, "total_steps": 8674, "loss": 0.4166485667228699, "lr": 4.7133870394994104e-07, "epoch": 1.3871339635692874, "percentage": 69.36, "elapsed_time": "4:06:39", "remaining_time": "1:48:58"} +{"current_steps": 6017, "total_steps": 8674, "loss": 0.405789852142334, "lr": 4.710151153062456e-07, "epoch": 1.3873645376988701, "percentage": 69.37, "elapsed_time": "4:06:42", "remaining_time": "1:48:56"} +{"current_steps": 6018, "total_steps": 8674, "loss": 0.47718119621276855, "lr": 4.7069160355586456e-07, "epoch": 1.3875951118284529, "percentage": 69.38, "elapsed_time": "4:06:44", "remaining_time": "1:48:53"} +{"current_steps": 6019, "total_steps": 8674, "loss": 0.5040356516838074, "lr": 4.7036816874582307e-07, "epoch": 1.3878256859580356, "percentage": 69.39, "elapsed_time": "4:06:46", "remaining_time": "1:48:51"} +{"current_steps": 6020, "total_steps": 8674, "loss": 0.45093637704849243, "lr": 4.700448109231362e-07, "epoch": 1.3880562600876183, "percentage": 69.4, "elapsed_time": "4:06:49", "remaining_time": "1:48:48"} +{"current_steps": 6021, "total_steps": 8674, "loss": 0.5363638997077942, "lr": 4.6972153013480666e-07, "epoch": 1.3882868342172008, "percentage": 69.41, "elapsed_time": "4:06:51", "remaining_time": "1:48:46"} +{"current_steps": 6022, "total_steps": 8674, "loss": 0.4917050004005432, "lr": 4.6939832642782684e-07, "epoch": 1.3885174083467835, "percentage": 69.43, "elapsed_time": "4:06:54", "remaining_time": "1:48:43"} +{"current_steps": 6023, "total_steps": 8674, "loss": 0.43033331632614136, "lr": 4.690751998491782e-07, "epoch": 1.3887479824763662, "percentage": 69.44, "elapsed_time": "4:06:56", "remaining_time": "1:48:41"} +{"current_steps": 6024, "total_steps": 8674, "loss": 0.36168330907821655, "lr": 4.6875215044582973e-07, "epoch": 1.3889785566059487, "percentage": 69.45, "elapsed_time": "4:06:58", "remaining_time": "1:48:38"} +{"current_steps": 6025, "total_steps": 8674, "loss": 0.48347967863082886, "lr": 4.6842917826474047e-07, "epoch": 1.3892091307355314, "percentage": 69.46, "elapsed_time": "4:07:01", "remaining_time": "1:48:36"} +{"current_steps": 6026, "total_steps": 8674, "loss": 0.4493439495563507, "lr": 4.681062833528572e-07, "epoch": 1.3894397048651141, "percentage": 69.47, "elapsed_time": "4:07:03", "remaining_time": "1:48:34"} +{"current_steps": 6027, "total_steps": 8674, "loss": 0.385773628950119, "lr": 4.677834657571165e-07, "epoch": 1.3896702789946969, "percentage": 69.48, "elapsed_time": "4:07:06", "remaining_time": "1:48:31"} +{"current_steps": 6028, "total_steps": 8674, "loss": 0.4254469573497772, "lr": 4.674607255244426e-07, "epoch": 1.3899008531242796, "percentage": 69.5, "elapsed_time": "4:07:08", "remaining_time": "1:48:29"} +{"current_steps": 6029, "total_steps": 8674, "loss": 0.5070454478263855, "lr": 4.671380627017497e-07, "epoch": 1.390131427253862, "percentage": 69.51, "elapsed_time": "4:07:11", "remaining_time": "1:48:26"} +{"current_steps": 6030, "total_steps": 8674, "loss": 0.44099801778793335, "lr": 4.668154773359394e-07, "epoch": 1.3903620013834448, "percentage": 69.52, "elapsed_time": "4:07:13", "remaining_time": "1:48:24"} +{"current_steps": 6031, "total_steps": 8674, "loss": 0.4965481162071228, "lr": 4.6649296947390314e-07, "epoch": 1.3905925755130275, "percentage": 69.53, "elapsed_time": "4:07:15", "remaining_time": "1:48:21"} +{"current_steps": 6032, "total_steps": 8674, "loss": 0.4085753262042999, "lr": 4.6617053916252116e-07, "epoch": 1.39082314964261, "percentage": 69.54, "elapsed_time": "4:07:18", "remaining_time": "1:48:19"} +{"current_steps": 6033, "total_steps": 8674, "loss": 0.3768424391746521, "lr": 4.6584818644866106e-07, "epoch": 1.3910537237721927, "percentage": 69.55, "elapsed_time": "4:07:20", "remaining_time": "1:48:16"} +{"current_steps": 6034, "total_steps": 8674, "loss": 0.4330044388771057, "lr": 4.6552591137918087e-07, "epoch": 1.3912842979017754, "percentage": 69.56, "elapsed_time": "4:07:23", "remaining_time": "1:48:14"} +{"current_steps": 6035, "total_steps": 8674, "loss": 0.4669216275215149, "lr": 4.6520371400092584e-07, "epoch": 1.3915148720313582, "percentage": 69.58, "elapsed_time": "4:07:25", "remaining_time": "1:48:11"} +{"current_steps": 6036, "total_steps": 8674, "loss": 0.5491182208061218, "lr": 4.648815943607314e-07, "epoch": 1.3917454461609409, "percentage": 69.59, "elapsed_time": "4:07:28", "remaining_time": "1:48:09"} +{"current_steps": 6037, "total_steps": 8674, "loss": 0.4842255413532257, "lr": 4.6455955250542e-07, "epoch": 1.3919760202905234, "percentage": 69.6, "elapsed_time": "4:07:30", "remaining_time": "1:48:06"} +{"current_steps": 6038, "total_steps": 8674, "loss": 0.45479631423950195, "lr": 4.6423758848180427e-07, "epoch": 1.392206594420106, "percentage": 69.61, "elapsed_time": "4:07:33", "remaining_time": "1:48:04"} +{"current_steps": 6039, "total_steps": 8674, "loss": 0.4209587574005127, "lr": 4.6391570233668486e-07, "epoch": 1.3924371685496888, "percentage": 69.62, "elapsed_time": "4:07:35", "remaining_time": "1:48:01"} +{"current_steps": 6040, "total_steps": 8674, "loss": 0.5061464905738831, "lr": 4.6359389411685145e-07, "epoch": 1.3926677426792713, "percentage": 69.63, "elapsed_time": "4:07:37", "remaining_time": "1:47:59"} +{"current_steps": 6041, "total_steps": 8674, "loss": 0.39443570375442505, "lr": 4.6327216386908196e-07, "epoch": 1.392898316808854, "percentage": 69.64, "elapsed_time": "4:07:40", "remaining_time": "1:47:56"} +{"current_steps": 6042, "total_steps": 8674, "loss": 0.4784463942050934, "lr": 4.6295051164014256e-07, "epoch": 1.3931288909384367, "percentage": 69.66, "elapsed_time": "4:07:42", "remaining_time": "1:47:54"} +{"current_steps": 6043, "total_steps": 8674, "loss": 0.41256606578826904, "lr": 4.6262893747678957e-07, "epoch": 1.3933594650680194, "percentage": 69.67, "elapsed_time": "4:07:45", "remaining_time": "1:47:52"} +{"current_steps": 6044, "total_steps": 8674, "loss": 0.4507666230201721, "lr": 4.623074414257662e-07, "epoch": 1.3935900391976022, "percentage": 69.68, "elapsed_time": "4:07:47", "remaining_time": "1:47:49"} +{"current_steps": 6045, "total_steps": 8674, "loss": 0.3783376216888428, "lr": 4.6198602353380545e-07, "epoch": 1.3938206133271847, "percentage": 69.69, "elapsed_time": "4:07:50", "remaining_time": "1:47:47"} +{"current_steps": 6046, "total_steps": 8674, "loss": 0.47854840755462646, "lr": 4.616646838476289e-07, "epoch": 1.3940511874567674, "percentage": 69.7, "elapsed_time": "4:07:52", "remaining_time": "1:47:44"} +{"current_steps": 6047, "total_steps": 8674, "loss": 0.47121208906173706, "lr": 4.6134342241394685e-07, "epoch": 1.39428176158635, "percentage": 69.71, "elapsed_time": "4:07:55", "remaining_time": "1:47:42"} +{"current_steps": 6048, "total_steps": 8674, "loss": 0.5211559534072876, "lr": 4.610222392794569e-07, "epoch": 1.3945123357159326, "percentage": 69.73, "elapsed_time": "4:07:57", "remaining_time": "1:47:39"} +{"current_steps": 6049, "total_steps": 8674, "loss": 0.5846370458602905, "lr": 4.6070113449084747e-07, "epoch": 1.3947429098455153, "percentage": 69.74, "elapsed_time": "4:07:59", "remaining_time": "1:47:37"} +{"current_steps": 6050, "total_steps": 8674, "loss": 0.4787401854991913, "lr": 4.6038010809479365e-07, "epoch": 1.394973483975098, "percentage": 69.75, "elapsed_time": "4:08:02", "remaining_time": "1:47:34"} +{"current_steps": 6051, "total_steps": 8674, "loss": 0.36429738998413086, "lr": 4.600591601379596e-07, "epoch": 1.3952040581046807, "percentage": 69.76, "elapsed_time": "4:08:05", "remaining_time": "1:47:32"} +{"current_steps": 6052, "total_steps": 8674, "loss": 0.49923771619796753, "lr": 4.597382906669992e-07, "epoch": 1.3954346322342635, "percentage": 69.77, "elapsed_time": "4:08:07", "remaining_time": "1:47:29"} +{"current_steps": 6053, "total_steps": 8674, "loss": 0.408005028963089, "lr": 4.5941749972855326e-07, "epoch": 1.395665206363846, "percentage": 69.78, "elapsed_time": "4:08:09", "remaining_time": "1:47:27"} +{"current_steps": 6054, "total_steps": 8674, "loss": 0.4524402618408203, "lr": 4.590967873692523e-07, "epoch": 1.3958957804934287, "percentage": 69.79, "elapsed_time": "4:08:12", "remaining_time": "1:47:24"} +{"current_steps": 6055, "total_steps": 8674, "loss": 0.5264980792999268, "lr": 4.587761536357152e-07, "epoch": 1.3961263546230114, "percentage": 69.81, "elapsed_time": "4:08:14", "remaining_time": "1:47:22"} +{"current_steps": 6056, "total_steps": 8674, "loss": 0.5324279069900513, "lr": 4.5845559857454976e-07, "epoch": 1.3963569287525939, "percentage": 69.82, "elapsed_time": "4:08:16", "remaining_time": "1:47:19"} +{"current_steps": 6057, "total_steps": 8674, "loss": 0.5197574496269226, "lr": 4.581351222323511e-07, "epoch": 1.3965875028821766, "percentage": 69.83, "elapsed_time": "4:08:19", "remaining_time": "1:47:17"} +{"current_steps": 6058, "total_steps": 8674, "loss": 0.4549001157283783, "lr": 4.578147246557043e-07, "epoch": 1.3968180770117593, "percentage": 69.84, "elapsed_time": "4:08:21", "remaining_time": "1:47:15"} +{"current_steps": 6059, "total_steps": 8674, "loss": 0.38597673177719116, "lr": 4.5749440589118183e-07, "epoch": 1.397048651141342, "percentage": 69.85, "elapsed_time": "4:08:24", "remaining_time": "1:47:12"} +{"current_steps": 6060, "total_steps": 8674, "loss": 0.4104316532611847, "lr": 4.57174165985346e-07, "epoch": 1.3972792252709247, "percentage": 69.86, "elapsed_time": "4:08:26", "remaining_time": "1:47:10"} +{"current_steps": 6061, "total_steps": 8674, "loss": 0.5241787433624268, "lr": 4.5685400498474614e-07, "epoch": 1.3975097994005072, "percentage": 69.88, "elapsed_time": "4:08:29", "remaining_time": "1:47:07"} +{"current_steps": 6062, "total_steps": 8674, "loss": 0.5033289790153503, "lr": 4.565339229359213e-07, "epoch": 1.39774037353009, "percentage": 69.89, "elapsed_time": "4:08:31", "remaining_time": "1:47:05"} +{"current_steps": 6063, "total_steps": 8674, "loss": 0.436188280582428, "lr": 4.5621391988539894e-07, "epoch": 1.3979709476596724, "percentage": 69.9, "elapsed_time": "4:08:34", "remaining_time": "1:47:02"} +{"current_steps": 6064, "total_steps": 8674, "loss": 0.3885838985443115, "lr": 4.5589399587969414e-07, "epoch": 1.3982015217892552, "percentage": 69.91, "elapsed_time": "4:08:36", "remaining_time": "1:47:00"} +{"current_steps": 6065, "total_steps": 8674, "loss": 0.5140193104743958, "lr": 4.555741509653116e-07, "epoch": 1.3984320959188379, "percentage": 69.92, "elapsed_time": "4:08:39", "remaining_time": "1:46:57"} +{"current_steps": 6066, "total_steps": 8674, "loss": 0.41084468364715576, "lr": 4.552543851887436e-07, "epoch": 1.3986626700484206, "percentage": 69.93, "elapsed_time": "4:08:41", "remaining_time": "1:46:55"} +{"current_steps": 6067, "total_steps": 8674, "loss": 0.3606417179107666, "lr": 4.549346985964718e-07, "epoch": 1.3988932441780033, "percentage": 69.94, "elapsed_time": "4:08:44", "remaining_time": "1:46:52"} +{"current_steps": 6068, "total_steps": 8674, "loss": 0.48518556356430054, "lr": 4.546150912349653e-07, "epoch": 1.3991238183075858, "percentage": 69.96, "elapsed_time": "4:08:46", "remaining_time": "1:46:50"} +{"current_steps": 6069, "total_steps": 8674, "loss": 0.5394424200057983, "lr": 4.5429556315068264e-07, "epoch": 1.3993543924371685, "percentage": 69.97, "elapsed_time": "4:08:49", "remaining_time": "1:46:47"} +{"current_steps": 6070, "total_steps": 8674, "loss": 0.40272367000579834, "lr": 4.539761143900708e-07, "epoch": 1.3995849665667512, "percentage": 69.98, "elapsed_time": "4:08:51", "remaining_time": "1:46:45"} +{"current_steps": 6071, "total_steps": 8674, "loss": 0.4279879331588745, "lr": 4.536567449995641e-07, "epoch": 1.3998155406963337, "percentage": 69.99, "elapsed_time": "4:08:53", "remaining_time": "1:46:42"} +{"current_steps": 6072, "total_steps": 8674, "loss": 0.48560982942581177, "lr": 4.5333745502558695e-07, "epoch": 1.4000461148259165, "percentage": 70.0, "elapsed_time": "4:08:56", "remaining_time": "1:46:40"} +{"current_steps": 6073, "total_steps": 8674, "loss": 0.49256429076194763, "lr": 4.530182445145506e-07, "epoch": 1.4002766889554992, "percentage": 70.01, "elapsed_time": "4:08:58", "remaining_time": "1:46:37"} +{"current_steps": 6074, "total_steps": 8674, "loss": 0.5015553832054138, "lr": 4.5269911351285614e-07, "epoch": 1.4005072630850819, "percentage": 70.03, "elapsed_time": "4:09:00", "remaining_time": "1:46:35"} +{"current_steps": 6075, "total_steps": 8674, "loss": 0.4313800632953644, "lr": 4.5238006206689204e-07, "epoch": 1.4007378372146646, "percentage": 70.04, "elapsed_time": "4:09:03", "remaining_time": "1:46:32"} +{"current_steps": 6076, "total_steps": 8674, "loss": 0.3440586030483246, "lr": 4.520610902230363e-07, "epoch": 1.400968411344247, "percentage": 70.05, "elapsed_time": "4:09:05", "remaining_time": "1:46:30"} +{"current_steps": 6077, "total_steps": 8674, "loss": 0.43868017196655273, "lr": 4.517421980276538e-07, "epoch": 1.4011989854738298, "percentage": 70.06, "elapsed_time": "4:09:08", "remaining_time": "1:46:28"} +{"current_steps": 6078, "total_steps": 8674, "loss": 0.5581029057502747, "lr": 4.5142338552709923e-07, "epoch": 1.4014295596034125, "percentage": 70.07, "elapsed_time": "4:09:10", "remaining_time": "1:46:25"} +{"current_steps": 6079, "total_steps": 8674, "loss": 0.4543154835700989, "lr": 4.5110465276771524e-07, "epoch": 1.401660133732995, "percentage": 70.08, "elapsed_time": "4:09:12", "remaining_time": "1:46:23"} +{"current_steps": 6080, "total_steps": 8674, "loss": 0.5229466557502747, "lr": 4.507859997958333e-07, "epoch": 1.4018907078625777, "percentage": 70.09, "elapsed_time": "4:09:15", "remaining_time": "1:46:20"} +{"current_steps": 6081, "total_steps": 8674, "loss": 0.46781739592552185, "lr": 4.504674266577724e-07, "epoch": 1.4021212819921605, "percentage": 70.11, "elapsed_time": "4:09:17", "remaining_time": "1:46:18"} +{"current_steps": 6082, "total_steps": 8674, "loss": 0.48040711879730225, "lr": 4.5014893339983993e-07, "epoch": 1.4023518561217432, "percentage": 70.12, "elapsed_time": "4:09:20", "remaining_time": "1:46:15"} +{"current_steps": 6083, "total_steps": 8674, "loss": 0.5039708018302917, "lr": 4.49830520068333e-07, "epoch": 1.402582430251326, "percentage": 70.13, "elapsed_time": "4:09:22", "remaining_time": "1:46:13"} +{"current_steps": 6084, "total_steps": 8674, "loss": 0.43496155738830566, "lr": 4.495121867095354e-07, "epoch": 1.4028130043809084, "percentage": 70.14, "elapsed_time": "4:09:25", "remaining_time": "1:46:10"} +{"current_steps": 6085, "total_steps": 8674, "loss": 0.4603109061717987, "lr": 4.4919393336972045e-07, "epoch": 1.403043578510491, "percentage": 70.15, "elapsed_time": "4:09:27", "remaining_time": "1:46:08"} +{"current_steps": 6086, "total_steps": 8674, "loss": 0.4571962356567383, "lr": 4.488757600951496e-07, "epoch": 1.4032741526400738, "percentage": 70.16, "elapsed_time": "4:09:29", "remaining_time": "1:46:05"} +{"current_steps": 6087, "total_steps": 8674, "loss": 0.46302443742752075, "lr": 4.485576669320729e-07, "epoch": 1.4035047267696563, "percentage": 70.18, "elapsed_time": "4:09:32", "remaining_time": "1:46:03"} +{"current_steps": 6088, "total_steps": 8674, "loss": 0.39066869020462036, "lr": 4.482396539267275e-07, "epoch": 1.403735300899239, "percentage": 70.19, "elapsed_time": "4:09:34", "remaining_time": "1:46:00"} +{"current_steps": 6089, "total_steps": 8674, "loss": 0.4797130823135376, "lr": 4.4792172112534076e-07, "epoch": 1.4039658750288218, "percentage": 70.2, "elapsed_time": "4:09:36", "remaining_time": "1:45:58"} +{"current_steps": 6090, "total_steps": 8674, "loss": 0.4578198492527008, "lr": 4.4760386857412704e-07, "epoch": 1.4041964491584045, "percentage": 70.21, "elapsed_time": "4:09:39", "remaining_time": "1:45:55"} +{"current_steps": 6091, "total_steps": 8674, "loss": 0.40768736600875854, "lr": 4.472860963192889e-07, "epoch": 1.4044270232879872, "percentage": 70.22, "elapsed_time": "4:09:41", "remaining_time": "1:45:53"} +{"current_steps": 6092, "total_steps": 8674, "loss": 0.4201413094997406, "lr": 4.4696840440701846e-07, "epoch": 1.4046575974175697, "percentage": 70.23, "elapsed_time": "4:09:44", "remaining_time": "1:45:50"} +{"current_steps": 6093, "total_steps": 8674, "loss": 0.45901796221733093, "lr": 4.466507928834951e-07, "epoch": 1.4048881715471524, "percentage": 70.24, "elapsed_time": "4:09:46", "remaining_time": "1:45:48"} +{"current_steps": 6094, "total_steps": 8674, "loss": 0.4699435830116272, "lr": 4.463332617948874e-07, "epoch": 1.4051187456767351, "percentage": 70.26, "elapsed_time": "4:09:49", "remaining_time": "1:45:46"} +{"current_steps": 6095, "total_steps": 8674, "loss": 0.4526669383049011, "lr": 4.46015811187351e-07, "epoch": 1.4053493198063176, "percentage": 70.27, "elapsed_time": "4:09:51", "remaining_time": "1:45:43"} +{"current_steps": 6096, "total_steps": 8674, "loss": 0.46754884719848633, "lr": 4.456984411070313e-07, "epoch": 1.4055798939359003, "percentage": 70.28, "elapsed_time": "4:09:54", "remaining_time": "1:45:41"} +{"current_steps": 6097, "total_steps": 8674, "loss": 0.5119268894195557, "lr": 4.453811516000604e-07, "epoch": 1.405810468065483, "percentage": 70.29, "elapsed_time": "4:09:56", "remaining_time": "1:45:38"} +{"current_steps": 6098, "total_steps": 8674, "loss": 0.42980802059173584, "lr": 4.4506394271256043e-07, "epoch": 1.4060410421950658, "percentage": 70.3, "elapsed_time": "4:09:59", "remaining_time": "1:45:36"} +{"current_steps": 6099, "total_steps": 8674, "loss": 0.5895063281059265, "lr": 4.447468144906401e-07, "epoch": 1.4062716163246485, "percentage": 70.31, "elapsed_time": "4:10:01", "remaining_time": "1:45:33"} +{"current_steps": 6100, "total_steps": 8674, "loss": 0.42768803238868713, "lr": 4.4442976698039803e-07, "epoch": 1.406502190454231, "percentage": 70.33, "elapsed_time": "4:10:03", "remaining_time": "1:45:31"} +{"current_steps": 6101, "total_steps": 8674, "loss": 0.44234544038772583, "lr": 4.4411280022791943e-07, "epoch": 1.4067327645838137, "percentage": 70.34, "elapsed_time": "4:10:08", "remaining_time": "1:45:29"} +{"current_steps": 6102, "total_steps": 8674, "loss": 0.4382736086845398, "lr": 4.437959142792791e-07, "epoch": 1.4069633387133964, "percentage": 70.35, "elapsed_time": "4:10:10", "remaining_time": "1:45:26"} +{"current_steps": 6103, "total_steps": 8674, "loss": 0.47603681683540344, "lr": 4.4347910918054e-07, "epoch": 1.407193912842979, "percentage": 70.36, "elapsed_time": "4:10:12", "remaining_time": "1:45:24"} +{"current_steps": 6104, "total_steps": 8674, "loss": 0.5562035441398621, "lr": 4.431623849777522e-07, "epoch": 1.4074244869725616, "percentage": 70.37, "elapsed_time": "4:10:15", "remaining_time": "1:45:21"} +{"current_steps": 6105, "total_steps": 8674, "loss": 0.4153141677379608, "lr": 4.4284574171695535e-07, "epoch": 1.4076550611021443, "percentage": 70.38, "elapsed_time": "4:10:17", "remaining_time": "1:45:19"} +{"current_steps": 6106, "total_steps": 8674, "loss": 0.4825887680053711, "lr": 4.425291794441762e-07, "epoch": 1.407885635231727, "percentage": 70.39, "elapsed_time": "4:10:20", "remaining_time": "1:45:17"} +{"current_steps": 6107, "total_steps": 8674, "loss": 0.4853668808937073, "lr": 4.4221269820543104e-07, "epoch": 1.4081162093613098, "percentage": 70.41, "elapsed_time": "4:10:22", "remaining_time": "1:45:14"} +{"current_steps": 6108, "total_steps": 8674, "loss": 0.5615251064300537, "lr": 4.418962980467229e-07, "epoch": 1.4083467834908923, "percentage": 70.42, "elapsed_time": "4:10:24", "remaining_time": "1:45:12"} +{"current_steps": 6109, "total_steps": 8674, "loss": 0.38605546951293945, "lr": 4.4157997901404396e-07, "epoch": 1.408577357620475, "percentage": 70.43, "elapsed_time": "4:10:27", "remaining_time": "1:45:09"} +{"current_steps": 6110, "total_steps": 8674, "loss": 0.41582173109054565, "lr": 4.412637411533745e-07, "epoch": 1.4088079317500577, "percentage": 70.44, "elapsed_time": "4:10:30", "remaining_time": "1:45:07"} +{"current_steps": 6111, "total_steps": 8674, "loss": 0.38091376423835754, "lr": 4.4094758451068327e-07, "epoch": 1.4090385058796402, "percentage": 70.45, "elapsed_time": "4:10:32", "remaining_time": "1:45:04"} +{"current_steps": 6112, "total_steps": 8674, "loss": 0.43319058418273926, "lr": 4.4063150913192635e-07, "epoch": 1.409269080009223, "percentage": 70.46, "elapsed_time": "4:10:34", "remaining_time": "1:45:02"} +{"current_steps": 6113, "total_steps": 8674, "loss": 0.43207013607025146, "lr": 4.403155150630484e-07, "epoch": 1.4094996541388056, "percentage": 70.47, "elapsed_time": "4:10:37", "remaining_time": "1:44:59"} +{"current_steps": 6114, "total_steps": 8674, "loss": 0.43750250339508057, "lr": 4.399996023499829e-07, "epoch": 1.4097302282683883, "percentage": 70.49, "elapsed_time": "4:10:39", "remaining_time": "1:44:57"} +{"current_steps": 6115, "total_steps": 8674, "loss": 0.44084444642066956, "lr": 4.3968377103865016e-07, "epoch": 1.409960802397971, "percentage": 70.5, "elapsed_time": "4:10:42", "remaining_time": "1:44:54"} +{"current_steps": 6116, "total_steps": 8674, "loss": 0.4752010405063629, "lr": 4.3936802117495997e-07, "epoch": 1.4101913765275536, "percentage": 70.51, "elapsed_time": "4:10:44", "remaining_time": "1:44:52"} +{"current_steps": 6117, "total_steps": 8674, "loss": 0.39239025115966797, "lr": 4.390523528048098e-07, "epoch": 1.4104219506571363, "percentage": 70.52, "elapsed_time": "4:10:47", "remaining_time": "1:44:49"} +{"current_steps": 6118, "total_steps": 8674, "loss": 0.46021080017089844, "lr": 4.387367659740856e-07, "epoch": 1.410652524786719, "percentage": 70.53, "elapsed_time": "4:10:49", "remaining_time": "1:44:47"} +{"current_steps": 6119, "total_steps": 8674, "loss": 0.4079766571521759, "lr": 4.3842126072866014e-07, "epoch": 1.4108830989163015, "percentage": 70.54, "elapsed_time": "4:10:52", "remaining_time": "1:44:45"} +{"current_steps": 6120, "total_steps": 8674, "loss": 0.4922672510147095, "lr": 4.381058371143964e-07, "epoch": 1.4111136730458842, "percentage": 70.56, "elapsed_time": "4:10:54", "remaining_time": "1:44:42"} +{"current_steps": 6121, "total_steps": 8674, "loss": 0.3950929045677185, "lr": 4.377904951771438e-07, "epoch": 1.411344247175467, "percentage": 70.57, "elapsed_time": "4:10:56", "remaining_time": "1:44:40"} +{"current_steps": 6122, "total_steps": 8674, "loss": 0.503406286239624, "lr": 4.374752349627402e-07, "epoch": 1.4115748213050496, "percentage": 70.58, "elapsed_time": "4:10:59", "remaining_time": "1:44:37"} +{"current_steps": 6123, "total_steps": 8674, "loss": 0.49198317527770996, "lr": 4.3716005651701215e-07, "epoch": 1.4118053954346323, "percentage": 70.59, "elapsed_time": "4:11:01", "remaining_time": "1:44:35"} +{"current_steps": 6124, "total_steps": 8674, "loss": 0.47396305203437805, "lr": 4.368449598857742e-07, "epoch": 1.4120359695642148, "percentage": 70.6, "elapsed_time": "4:11:04", "remaining_time": "1:44:32"} +{"current_steps": 6125, "total_steps": 8674, "loss": 0.5248152017593384, "lr": 4.365299451148291e-07, "epoch": 1.4122665436937976, "percentage": 70.61, "elapsed_time": "4:11:06", "remaining_time": "1:44:30"} +{"current_steps": 6126, "total_steps": 8674, "loss": 0.44327419996261597, "lr": 4.362150122499666e-07, "epoch": 1.4124971178233803, "percentage": 70.62, "elapsed_time": "4:11:09", "remaining_time": "1:44:27"} +{"current_steps": 6127, "total_steps": 8674, "loss": 0.4628877639770508, "lr": 4.3590016133696626e-07, "epoch": 1.4127276919529628, "percentage": 70.64, "elapsed_time": "4:11:11", "remaining_time": "1:44:25"} +{"current_steps": 6128, "total_steps": 8674, "loss": 0.5277193188667297, "lr": 4.355853924215942e-07, "epoch": 1.4129582660825455, "percentage": 70.65, "elapsed_time": "4:11:14", "remaining_time": "1:44:22"} +{"current_steps": 6129, "total_steps": 8674, "loss": 0.4675426781177521, "lr": 4.3527070554960577e-07, "epoch": 1.4131888402121282, "percentage": 70.66, "elapsed_time": "4:11:17", "remaining_time": "1:44:20"} +{"current_steps": 6130, "total_steps": 8674, "loss": 0.3762160539627075, "lr": 4.349561007667433e-07, "epoch": 1.413419414341711, "percentage": 70.67, "elapsed_time": "4:11:19", "remaining_time": "1:44:18"} +{"current_steps": 6131, "total_steps": 8674, "loss": 0.4797256588935852, "lr": 4.346415781187385e-07, "epoch": 1.4136499884712936, "percentage": 70.68, "elapsed_time": "4:11:22", "remaining_time": "1:44:15"} +{"current_steps": 6132, "total_steps": 8674, "loss": 0.4348931312561035, "lr": 4.3432713765130967e-07, "epoch": 1.4138805626008761, "percentage": 70.69, "elapsed_time": "4:11:24", "remaining_time": "1:44:13"} +{"current_steps": 6133, "total_steps": 8674, "loss": 0.5080585479736328, "lr": 4.3401277941016435e-07, "epoch": 1.4141111367304589, "percentage": 70.71, "elapsed_time": "4:11:27", "remaining_time": "1:44:10"} +{"current_steps": 6134, "total_steps": 8674, "loss": 0.40223604440689087, "lr": 4.33698503440998e-07, "epoch": 1.4143417108600416, "percentage": 70.72, "elapsed_time": "4:11:29", "remaining_time": "1:44:08"} +{"current_steps": 6135, "total_steps": 8674, "loss": 0.4460202753543854, "lr": 4.3338430978949315e-07, "epoch": 1.414572284989624, "percentage": 70.73, "elapsed_time": "4:11:32", "remaining_time": "1:44:05"} +{"current_steps": 6136, "total_steps": 8674, "loss": 0.5814889669418335, "lr": 4.3307019850132167e-07, "epoch": 1.4148028591192068, "percentage": 70.74, "elapsed_time": "4:11:34", "remaining_time": "1:44:03"} +{"current_steps": 6137, "total_steps": 8674, "loss": 0.39237886667251587, "lr": 4.3275616962214214e-07, "epoch": 1.4150334332487895, "percentage": 70.75, "elapsed_time": "4:11:37", "remaining_time": "1:44:01"} +{"current_steps": 6138, "total_steps": 8674, "loss": 0.4621772766113281, "lr": 4.324422231976025e-07, "epoch": 1.4152640073783722, "percentage": 70.76, "elapsed_time": "4:11:39", "remaining_time": "1:43:58"} +{"current_steps": 6139, "total_steps": 8674, "loss": 0.3722139596939087, "lr": 4.3212835927333745e-07, "epoch": 1.415494581507955, "percentage": 70.77, "elapsed_time": "4:11:41", "remaining_time": "1:43:56"} +{"current_steps": 6140, "total_steps": 8674, "loss": 0.5007534623146057, "lr": 4.3181457789497055e-07, "epoch": 1.4157251556375374, "percentage": 70.79, "elapsed_time": "4:11:44", "remaining_time": "1:43:53"} +{"current_steps": 6141, "total_steps": 8674, "loss": 0.470672607421875, "lr": 4.315008791081135e-07, "epoch": 1.4159557297671201, "percentage": 70.8, "elapsed_time": "4:11:46", "remaining_time": "1:43:51"} +{"current_steps": 6142, "total_steps": 8674, "loss": 0.5196114778518677, "lr": 4.3118726295836495e-07, "epoch": 1.4161863038967029, "percentage": 70.81, "elapsed_time": "4:11:49", "remaining_time": "1:43:48"} +{"current_steps": 6143, "total_steps": 8674, "loss": 0.4606804847717285, "lr": 4.3087372949131275e-07, "epoch": 1.4164168780262854, "percentage": 70.82, "elapsed_time": "4:11:51", "remaining_time": "1:43:46"} +{"current_steps": 6144, "total_steps": 8674, "loss": 0.3926661014556885, "lr": 4.3056027875253156e-07, "epoch": 1.416647452155868, "percentage": 70.83, "elapsed_time": "4:11:54", "remaining_time": "1:43:43"} +{"current_steps": 6145, "total_steps": 8674, "loss": 0.4570828080177307, "lr": 4.3024691078758536e-07, "epoch": 1.4168780262854508, "percentage": 70.84, "elapsed_time": "4:11:56", "remaining_time": "1:43:41"} +{"current_steps": 6146, "total_steps": 8674, "loss": 0.398615300655365, "lr": 4.299336256420245e-07, "epoch": 1.4171086004150335, "percentage": 70.86, "elapsed_time": "4:11:59", "remaining_time": "1:43:38"} +{"current_steps": 6147, "total_steps": 8674, "loss": 0.47571802139282227, "lr": 4.2962042336138873e-07, "epoch": 1.4173391745446162, "percentage": 70.87, "elapsed_time": "4:12:01", "remaining_time": "1:43:36"} +{"current_steps": 6148, "total_steps": 8674, "loss": 0.4266431927680969, "lr": 4.2930730399120487e-07, "epoch": 1.4175697486741987, "percentage": 70.88, "elapsed_time": "4:12:03", "remaining_time": "1:43:33"} +{"current_steps": 6149, "total_steps": 8674, "loss": 0.47870057821273804, "lr": 4.289942675769886e-07, "epoch": 1.4178003228037814, "percentage": 70.89, "elapsed_time": "4:12:06", "remaining_time": "1:43:31"} +{"current_steps": 6150, "total_steps": 8674, "loss": 0.3947669267654419, "lr": 4.2868131416424223e-07, "epoch": 1.4180308969333641, "percentage": 70.9, "elapsed_time": "4:12:08", "remaining_time": "1:43:28"} +{"current_steps": 6151, "total_steps": 8674, "loss": 0.49074164032936096, "lr": 4.283684437984573e-07, "epoch": 1.4182614710629466, "percentage": 70.91, "elapsed_time": "4:12:11", "remaining_time": "1:43:26"} +{"current_steps": 6152, "total_steps": 8674, "loss": 0.5540445446968079, "lr": 4.280556565251123e-07, "epoch": 1.4184920451925294, "percentage": 70.92, "elapsed_time": "4:12:13", "remaining_time": "1:43:24"} +{"current_steps": 6153, "total_steps": 8674, "loss": 0.4898286461830139, "lr": 4.2774295238967386e-07, "epoch": 1.418722619322112, "percentage": 70.94, "elapsed_time": "4:12:16", "remaining_time": "1:43:21"} +{"current_steps": 6154, "total_steps": 8674, "loss": 0.5432708859443665, "lr": 4.2743033143759733e-07, "epoch": 1.4189531934516948, "percentage": 70.95, "elapsed_time": "4:12:18", "remaining_time": "1:43:19"} +{"current_steps": 6155, "total_steps": 8674, "loss": 0.4438853859901428, "lr": 4.2711779371432445e-07, "epoch": 1.4191837675812775, "percentage": 70.96, "elapsed_time": "4:12:21", "remaining_time": "1:43:16"} +{"current_steps": 6156, "total_steps": 8674, "loss": 0.4885905385017395, "lr": 4.268053392652863e-07, "epoch": 1.41941434171086, "percentage": 70.97, "elapsed_time": "4:12:23", "remaining_time": "1:43:14"} +{"current_steps": 6157, "total_steps": 8674, "loss": 0.4465547204017639, "lr": 4.264929681359013e-07, "epoch": 1.4196449158404427, "percentage": 70.98, "elapsed_time": "4:12:25", "remaining_time": "1:43:11"} +{"current_steps": 6158, "total_steps": 8674, "loss": 0.4392780661582947, "lr": 4.2618068037157594e-07, "epoch": 1.4198754899700254, "percentage": 70.99, "elapsed_time": "4:12:28", "remaining_time": "1:43:09"} +{"current_steps": 6159, "total_steps": 8674, "loss": 0.4501269459724426, "lr": 4.258684760177039e-07, "epoch": 1.420106064099608, "percentage": 71.01, "elapsed_time": "4:12:31", "remaining_time": "1:43:06"} +{"current_steps": 6160, "total_steps": 8674, "loss": 0.38439738750457764, "lr": 4.2555635511966783e-07, "epoch": 1.4203366382291907, "percentage": 71.02, "elapsed_time": "4:12:33", "remaining_time": "1:43:04"} +{"current_steps": 6161, "total_steps": 8674, "loss": 0.4679202437400818, "lr": 4.2524431772283743e-07, "epoch": 1.4205672123587734, "percentage": 71.03, "elapsed_time": "4:12:35", "remaining_time": "1:43:01"} +{"current_steps": 6162, "total_steps": 8674, "loss": 0.33505773544311523, "lr": 4.2493236387257e-07, "epoch": 1.420797786488356, "percentage": 71.04, "elapsed_time": "4:12:38", "remaining_time": "1:42:59"} +{"current_steps": 6163, "total_steps": 8674, "loss": 0.39141514897346497, "lr": 4.246204936142116e-07, "epoch": 1.4210283606179388, "percentage": 71.05, "elapsed_time": "4:12:40", "remaining_time": "1:42:56"} +{"current_steps": 6164, "total_steps": 8674, "loss": 0.49278295040130615, "lr": 4.243087069930958e-07, "epoch": 1.4212589347475213, "percentage": 71.06, "elapsed_time": "4:12:43", "remaining_time": "1:42:54"} +{"current_steps": 6165, "total_steps": 8674, "loss": 0.44093143939971924, "lr": 4.239970040545442e-07, "epoch": 1.421489508877104, "percentage": 71.07, "elapsed_time": "4:12:45", "remaining_time": "1:42:51"} +{"current_steps": 6166, "total_steps": 8674, "loss": 0.3840683102607727, "lr": 4.236853848438654e-07, "epoch": 1.4217200830066867, "percentage": 71.09, "elapsed_time": "4:12:47", "remaining_time": "1:42:49"} +{"current_steps": 6167, "total_steps": 8674, "loss": 0.49814748764038086, "lr": 4.23373849406357e-07, "epoch": 1.4219506571362692, "percentage": 71.1, "elapsed_time": "4:12:50", "remaining_time": "1:42:47"} +{"current_steps": 6168, "total_steps": 8674, "loss": 0.37481504678726196, "lr": 4.2306239778730314e-07, "epoch": 1.422181231265852, "percentage": 71.11, "elapsed_time": "4:12:52", "remaining_time": "1:42:44"} +{"current_steps": 6169, "total_steps": 8674, "loss": 0.3936859965324402, "lr": 4.227510300319772e-07, "epoch": 1.4224118053954347, "percentage": 71.12, "elapsed_time": "4:12:55", "remaining_time": "1:42:42"} +{"current_steps": 6170, "total_steps": 8674, "loss": 0.4448816478252411, "lr": 4.224397461856389e-07, "epoch": 1.4226423795250174, "percentage": 71.13, "elapsed_time": "4:12:57", "remaining_time": "1:42:39"} +{"current_steps": 6171, "total_steps": 8674, "loss": 0.5494886040687561, "lr": 4.22128546293537e-07, "epoch": 1.4228729536546, "percentage": 71.14, "elapsed_time": "4:13:00", "remaining_time": "1:42:37"} +{"current_steps": 6172, "total_steps": 8674, "loss": 0.4532161355018616, "lr": 4.218174304009078e-07, "epoch": 1.4231035277841826, "percentage": 71.16, "elapsed_time": "4:13:02", "remaining_time": "1:42:34"} +{"current_steps": 6173, "total_steps": 8674, "loss": 0.4771450161933899, "lr": 4.215063985529743e-07, "epoch": 1.4233341019137653, "percentage": 71.17, "elapsed_time": "4:13:04", "remaining_time": "1:42:32"} +{"current_steps": 6174, "total_steps": 8674, "loss": 0.40784329175949097, "lr": 4.211954507949491e-07, "epoch": 1.4235646760433478, "percentage": 71.18, "elapsed_time": "4:13:07", "remaining_time": "1:42:29"} +{"current_steps": 6175, "total_steps": 8674, "loss": 0.5336268544197083, "lr": 4.208845871720308e-07, "epoch": 1.4237952501729305, "percentage": 71.19, "elapsed_time": "4:13:09", "remaining_time": "1:42:27"} +{"current_steps": 6176, "total_steps": 8674, "loss": 0.44641751050949097, "lr": 4.205738077294072e-07, "epoch": 1.4240258243025132, "percentage": 71.2, "elapsed_time": "4:13:12", "remaining_time": "1:42:24"} +{"current_steps": 6177, "total_steps": 8674, "loss": 0.4370793104171753, "lr": 4.2026311251225264e-07, "epoch": 1.424256398432096, "percentage": 71.21, "elapsed_time": "4:13:14", "remaining_time": "1:42:22"} +{"current_steps": 6178, "total_steps": 8674, "loss": 0.4290730953216553, "lr": 4.1995250156573046e-07, "epoch": 1.4244869725616787, "percentage": 71.22, "elapsed_time": "4:13:17", "remaining_time": "1:42:19"} +{"current_steps": 6179, "total_steps": 8674, "loss": 0.5021491646766663, "lr": 4.196419749349904e-07, "epoch": 1.4247175466912612, "percentage": 71.24, "elapsed_time": "4:13:19", "remaining_time": "1:42:17"} +{"current_steps": 6180, "total_steps": 8674, "loss": 0.3880186080932617, "lr": 4.193315326651711e-07, "epoch": 1.4249481208208439, "percentage": 71.25, "elapsed_time": "4:13:21", "remaining_time": "1:42:14"} +{"current_steps": 6181, "total_steps": 8674, "loss": 0.46498721837997437, "lr": 4.1902117480139876e-07, "epoch": 1.4251786949504266, "percentage": 71.26, "elapsed_time": "4:13:24", "remaining_time": "1:42:12"} +{"current_steps": 6182, "total_steps": 8674, "loss": 0.45799821615219116, "lr": 4.187109013887863e-07, "epoch": 1.425409269080009, "percentage": 71.27, "elapsed_time": "4:13:26", "remaining_time": "1:42:09"} +{"current_steps": 6183, "total_steps": 8674, "loss": 0.47459733486175537, "lr": 4.1840071247243594e-07, "epoch": 1.4256398432095918, "percentage": 71.28, "elapsed_time": "4:13:28", "remaining_time": "1:42:07"} +{"current_steps": 6184, "total_steps": 8674, "loss": 0.47636276483535767, "lr": 4.18090608097436e-07, "epoch": 1.4258704173391745, "percentage": 71.29, "elapsed_time": "4:13:31", "remaining_time": "1:42:04"} +{"current_steps": 6185, "total_steps": 8674, "loss": 0.4710165858268738, "lr": 4.17780588308864e-07, "epoch": 1.4261009914687572, "percentage": 71.31, "elapsed_time": "4:13:33", "remaining_time": "1:42:02"} +{"current_steps": 6186, "total_steps": 8674, "loss": 0.4222904443740845, "lr": 4.174706531517836e-07, "epoch": 1.42633156559834, "percentage": 71.32, "elapsed_time": "4:13:36", "remaining_time": "1:42:00"} +{"current_steps": 6187, "total_steps": 8674, "loss": 0.43496620655059814, "lr": 4.171608026712476e-07, "epoch": 1.4265621397279225, "percentage": 71.33, "elapsed_time": "4:13:38", "remaining_time": "1:41:57"} +{"current_steps": 6188, "total_steps": 8674, "loss": 0.5178344249725342, "lr": 4.1685103691229597e-07, "epoch": 1.4267927138575052, "percentage": 71.34, "elapsed_time": "4:13:41", "remaining_time": "1:41:55"} +{"current_steps": 6189, "total_steps": 8674, "loss": 0.4033231735229492, "lr": 4.1654135591995644e-07, "epoch": 1.4270232879870879, "percentage": 71.35, "elapsed_time": "4:13:43", "remaining_time": "1:41:52"} +{"current_steps": 6190, "total_steps": 8674, "loss": 0.3368793725967407, "lr": 4.162317597392436e-07, "epoch": 1.4272538621166704, "percentage": 71.36, "elapsed_time": "4:13:45", "remaining_time": "1:41:50"} +{"current_steps": 6191, "total_steps": 8674, "loss": 0.44133609533309937, "lr": 4.159222484151612e-07, "epoch": 1.427484436246253, "percentage": 71.37, "elapsed_time": "4:13:48", "remaining_time": "1:41:47"} +{"current_steps": 6192, "total_steps": 8674, "loss": 0.431888222694397, "lr": 4.1561282199269944e-07, "epoch": 1.4277150103758358, "percentage": 71.39, "elapsed_time": "4:13:50", "remaining_time": "1:41:45"} +{"current_steps": 6193, "total_steps": 8674, "loss": 0.4319697618484497, "lr": 4.1530348051683615e-07, "epoch": 1.4279455845054185, "percentage": 71.4, "elapsed_time": "4:13:53", "remaining_time": "1:41:42"} +{"current_steps": 6194, "total_steps": 8674, "loss": 0.5468018054962158, "lr": 4.1499422403253783e-07, "epoch": 1.4281761586350012, "percentage": 71.41, "elapsed_time": "4:13:55", "remaining_time": "1:41:40"} +{"current_steps": 6195, "total_steps": 8674, "loss": 0.5083246231079102, "lr": 4.1468505258475784e-07, "epoch": 1.4284067327645837, "percentage": 71.42, "elapsed_time": "4:13:58", "remaining_time": "1:41:37"} +{"current_steps": 6196, "total_steps": 8674, "loss": 0.3767821788787842, "lr": 4.1437596621843774e-07, "epoch": 1.4286373068941665, "percentage": 71.43, "elapsed_time": "4:14:00", "remaining_time": "1:41:35"} +{"current_steps": 6197, "total_steps": 8674, "loss": 0.5210238099098206, "lr": 4.140669649785058e-07, "epoch": 1.4288678810237492, "percentage": 71.44, "elapsed_time": "4:14:03", "remaining_time": "1:41:32"} +{"current_steps": 6198, "total_steps": 8674, "loss": 0.4498119354248047, "lr": 4.1375804890987907e-07, "epoch": 1.4290984551533317, "percentage": 71.45, "elapsed_time": "4:14:05", "remaining_time": "1:41:30"} +{"current_steps": 6199, "total_steps": 8674, "loss": 0.5093557238578796, "lr": 4.134492180574609e-07, "epoch": 1.4293290292829144, "percentage": 71.47, "elapsed_time": "4:14:07", "remaining_time": "1:41:27"} +{"current_steps": 6200, "total_steps": 8674, "loss": 0.4745742082595825, "lr": 4.131404724661438e-07, "epoch": 1.429559603412497, "percentage": 71.48, "elapsed_time": "4:14:10", "remaining_time": "1:41:25"} +{"current_steps": 6201, "total_steps": 8674, "loss": 0.45697301626205444, "lr": 4.128318121808068e-07, "epoch": 1.4297901775420798, "percentage": 71.49, "elapsed_time": "4:14:14", "remaining_time": "1:41:23"} +{"current_steps": 6202, "total_steps": 8674, "loss": 0.4690994918346405, "lr": 4.125232372463161e-07, "epoch": 1.4300207516716625, "percentage": 71.5, "elapsed_time": "4:14:16", "remaining_time": "1:41:21"} +{"current_steps": 6203, "total_steps": 8674, "loss": 0.49369046092033386, "lr": 4.1221474770752696e-07, "epoch": 1.430251325801245, "percentage": 71.51, "elapsed_time": "4:14:19", "remaining_time": "1:41:18"} +{"current_steps": 6204, "total_steps": 8674, "loss": 0.5137126445770264, "lr": 4.1190634360928113e-07, "epoch": 1.4304818999308277, "percentage": 71.52, "elapsed_time": "4:14:21", "remaining_time": "1:41:16"} +{"current_steps": 6205, "total_steps": 8674, "loss": 0.43663549423217773, "lr": 4.1159802499640883e-07, "epoch": 1.4307124740604105, "percentage": 71.54, "elapsed_time": "4:14:24", "remaining_time": "1:41:13"} +{"current_steps": 6206, "total_steps": 8674, "loss": 0.40197718143463135, "lr": 4.112897919137265e-07, "epoch": 1.430943048189993, "percentage": 71.55, "elapsed_time": "4:14:26", "remaining_time": "1:41:11"} +{"current_steps": 6207, "total_steps": 8674, "loss": 0.5537480115890503, "lr": 4.1098164440603967e-07, "epoch": 1.4311736223195757, "percentage": 71.56, "elapsed_time": "4:14:28", "remaining_time": "1:41:08"} +{"current_steps": 6208, "total_steps": 8674, "loss": 0.36077365279197693, "lr": 4.1067358251814e-07, "epoch": 1.4314041964491584, "percentage": 71.57, "elapsed_time": "4:14:31", "remaining_time": "1:41:06"} +{"current_steps": 6209, "total_steps": 8674, "loss": 0.5421038866043091, "lr": 4.103656062948081e-07, "epoch": 1.4316347705787411, "percentage": 71.58, "elapsed_time": "4:14:33", "remaining_time": "1:41:03"} +{"current_steps": 6210, "total_steps": 8674, "loss": 0.4330317974090576, "lr": 4.100577157808107e-07, "epoch": 1.4318653447083238, "percentage": 71.59, "elapsed_time": "4:14:36", "remaining_time": "1:41:01"} +{"current_steps": 6211, "total_steps": 8674, "loss": 0.4734618067741394, "lr": 4.0974991102090315e-07, "epoch": 1.4320959188379063, "percentage": 71.6, "elapsed_time": "4:14:38", "remaining_time": "1:40:58"} +{"current_steps": 6212, "total_steps": 8674, "loss": 0.4664125442504883, "lr": 4.0944219205982853e-07, "epoch": 1.432326492967489, "percentage": 71.62, "elapsed_time": "4:14:41", "remaining_time": "1:40:56"} +{"current_steps": 6213, "total_steps": 8674, "loss": 0.5214053988456726, "lr": 4.09134558942316e-07, "epoch": 1.4325570670970718, "percentage": 71.63, "elapsed_time": "4:14:43", "remaining_time": "1:40:53"} +{"current_steps": 6214, "total_steps": 8674, "loss": 0.4694370627403259, "lr": 4.08827011713084e-07, "epoch": 1.4327876412266543, "percentage": 71.64, "elapsed_time": "4:14:46", "remaining_time": "1:40:51"} +{"current_steps": 6215, "total_steps": 8674, "loss": 0.46517378091812134, "lr": 4.0851955041683674e-07, "epoch": 1.433018215356237, "percentage": 71.65, "elapsed_time": "4:14:48", "remaining_time": "1:40:49"} +{"current_steps": 6216, "total_steps": 8674, "loss": 0.49152523279190063, "lr": 4.0821217509826766e-07, "epoch": 1.4332487894858197, "percentage": 71.66, "elapsed_time": "4:14:51", "remaining_time": "1:40:46"} +{"current_steps": 6217, "total_steps": 8674, "loss": 0.4272884726524353, "lr": 4.0790488580205616e-07, "epoch": 1.4334793636154024, "percentage": 71.67, "elapsed_time": "4:14:53", "remaining_time": "1:40:44"} +{"current_steps": 6218, "total_steps": 8674, "loss": 0.4585829973220825, "lr": 4.075976825728703e-07, "epoch": 1.4337099377449851, "percentage": 71.69, "elapsed_time": "4:14:56", "remaining_time": "1:40:41"} +{"current_steps": 6219, "total_steps": 8674, "loss": 0.33463186025619507, "lr": 4.07290565455365e-07, "epoch": 1.4339405118745676, "percentage": 71.7, "elapsed_time": "4:14:59", "remaining_time": "1:40:39"} +{"current_steps": 6220, "total_steps": 8674, "loss": 0.4228953719139099, "lr": 4.0698353449418344e-07, "epoch": 1.4341710860041503, "percentage": 71.71, "elapsed_time": "4:15:01", "remaining_time": "1:40:36"} +{"current_steps": 6221, "total_steps": 8674, "loss": 0.5336583256721497, "lr": 4.066765897339547e-07, "epoch": 1.434401660133733, "percentage": 71.72, "elapsed_time": "4:15:03", "remaining_time": "1:40:34"} +{"current_steps": 6222, "total_steps": 8674, "loss": 0.4779771864414215, "lr": 4.063697312192972e-07, "epoch": 1.4346322342633155, "percentage": 71.73, "elapsed_time": "4:15:06", "remaining_time": "1:40:31"} +{"current_steps": 6223, "total_steps": 8674, "loss": 0.35226666927337646, "lr": 4.060629589948155e-07, "epoch": 1.4348628083928983, "percentage": 71.74, "elapsed_time": "4:15:08", "remaining_time": "1:40:29"} +{"current_steps": 6224, "total_steps": 8674, "loss": 0.5006309747695923, "lr": 4.0575627310510174e-07, "epoch": 1.435093382522481, "percentage": 71.75, "elapsed_time": "4:15:11", "remaining_time": "1:40:27"} +{"current_steps": 6225, "total_steps": 8674, "loss": 0.3925382196903229, "lr": 4.0544967359473645e-07, "epoch": 1.4353239566520637, "percentage": 71.77, "elapsed_time": "4:15:13", "remaining_time": "1:40:24"} +{"current_steps": 6226, "total_steps": 8674, "loss": 0.3443659543991089, "lr": 4.0514316050828643e-07, "epoch": 1.4355545307816464, "percentage": 71.78, "elapsed_time": "4:15:16", "remaining_time": "1:40:22"} +{"current_steps": 6227, "total_steps": 8674, "loss": 0.35585030913352966, "lr": 4.048367338903067e-07, "epoch": 1.435785104911229, "percentage": 71.79, "elapsed_time": "4:15:18", "remaining_time": "1:40:19"} +{"current_steps": 6228, "total_steps": 8674, "loss": 0.4147206246852875, "lr": 4.045303937853395e-07, "epoch": 1.4360156790408116, "percentage": 71.8, "elapsed_time": "4:15:21", "remaining_time": "1:40:17"} +{"current_steps": 6229, "total_steps": 8674, "loss": 0.4475427567958832, "lr": 4.0422414023791486e-07, "epoch": 1.4362462531703943, "percentage": 71.81, "elapsed_time": "4:15:23", "remaining_time": "1:40:14"} +{"current_steps": 6230, "total_steps": 8674, "loss": 0.5235386490821838, "lr": 4.0391797329254897e-07, "epoch": 1.4364768272999768, "percentage": 71.82, "elapsed_time": "4:15:25", "remaining_time": "1:40:12"} +{"current_steps": 6231, "total_steps": 8674, "loss": 0.3543087840080261, "lr": 4.036118929937472e-07, "epoch": 1.4367074014295595, "percentage": 71.84, "elapsed_time": "4:15:28", "remaining_time": "1:40:09"} +{"current_steps": 6232, "total_steps": 8674, "loss": 0.4718255400657654, "lr": 4.03305899386001e-07, "epoch": 1.4369379755591423, "percentage": 71.85, "elapsed_time": "4:15:30", "remaining_time": "1:40:07"} +{"current_steps": 6233, "total_steps": 8674, "loss": 0.41239792108535767, "lr": 4.0299999251378924e-07, "epoch": 1.437168549688725, "percentage": 71.86, "elapsed_time": "4:15:33", "remaining_time": "1:40:04"} +{"current_steps": 6234, "total_steps": 8674, "loss": 0.4241238236427307, "lr": 4.026941724215791e-07, "epoch": 1.4373991238183077, "percentage": 71.87, "elapsed_time": "4:15:35", "remaining_time": "1:40:02"} +{"current_steps": 6235, "total_steps": 8674, "loss": 0.43678992986679077, "lr": 4.0238843915382435e-07, "epoch": 1.4376296979478902, "percentage": 71.88, "elapsed_time": "4:15:38", "remaining_time": "1:40:00"} +{"current_steps": 6236, "total_steps": 8674, "loss": 0.4304202198982239, "lr": 4.0208279275496706e-07, "epoch": 1.437860272077473, "percentage": 71.89, "elapsed_time": "4:15:40", "remaining_time": "1:39:57"} +{"current_steps": 6237, "total_steps": 8674, "loss": 0.4297143816947937, "lr": 4.0177723326943516e-07, "epoch": 1.4380908462070556, "percentage": 71.9, "elapsed_time": "4:15:43", "remaining_time": "1:39:55"} +{"current_steps": 6238, "total_steps": 8674, "loss": 0.4823951721191406, "lr": 4.0147176074164557e-07, "epoch": 1.4383214203366381, "percentage": 71.92, "elapsed_time": "4:15:45", "remaining_time": "1:39:52"} +{"current_steps": 6239, "total_steps": 8674, "loss": 0.41384291648864746, "lr": 4.0116637521600104e-07, "epoch": 1.4385519944662208, "percentage": 71.93, "elapsed_time": "4:15:48", "remaining_time": "1:39:50"} +{"current_steps": 6240, "total_steps": 8674, "loss": 0.5725995898246765, "lr": 4.008610767368933e-07, "epoch": 1.4387825685958036, "percentage": 71.94, "elapsed_time": "4:15:50", "remaining_time": "1:39:47"} +{"current_steps": 6241, "total_steps": 8674, "loss": 0.5222553014755249, "lr": 4.0055586534869976e-07, "epoch": 1.4390131427253863, "percentage": 71.95, "elapsed_time": "4:15:52", "remaining_time": "1:39:45"} +{"current_steps": 6242, "total_steps": 8674, "loss": 0.33871912956237793, "lr": 4.002507410957864e-07, "epoch": 1.439243716854969, "percentage": 71.96, "elapsed_time": "4:15:55", "remaining_time": "1:39:42"} +{"current_steps": 6243, "total_steps": 8674, "loss": 0.423028826713562, "lr": 3.9994570402250647e-07, "epoch": 1.4394742909845515, "percentage": 71.97, "elapsed_time": "4:15:58", "remaining_time": "1:39:40"} +{"current_steps": 6244, "total_steps": 8674, "loss": 0.4235682785511017, "lr": 3.996407541731994e-07, "epoch": 1.4397048651141342, "percentage": 71.99, "elapsed_time": "4:16:00", "remaining_time": "1:39:37"} +{"current_steps": 6245, "total_steps": 8674, "loss": 0.43758147954940796, "lr": 3.993358915921936e-07, "epoch": 1.439935439243717, "percentage": 72.0, "elapsed_time": "4:16:02", "remaining_time": "1:39:35"} +{"current_steps": 6246, "total_steps": 8674, "loss": 0.4462485611438751, "lr": 3.9903111632380314e-07, "epoch": 1.4401660133732994, "percentage": 72.01, "elapsed_time": "4:16:05", "remaining_time": "1:39:32"} +{"current_steps": 6247, "total_steps": 8674, "loss": 0.4650310277938843, "lr": 3.9872642841233086e-07, "epoch": 1.4403965875028821, "percentage": 72.02, "elapsed_time": "4:16:07", "remaining_time": "1:39:30"} +{"current_steps": 6248, "total_steps": 8674, "loss": 0.36653342843055725, "lr": 3.984218279020656e-07, "epoch": 1.4406271616324648, "percentage": 72.03, "elapsed_time": "4:16:10", "remaining_time": "1:39:27"} +{"current_steps": 6249, "total_steps": 8674, "loss": 0.4102433919906616, "lr": 3.9811731483728483e-07, "epoch": 1.4408577357620476, "percentage": 72.04, "elapsed_time": "4:16:12", "remaining_time": "1:39:25"} +{"current_steps": 6250, "total_steps": 8674, "loss": 0.46611371636390686, "lr": 3.9781288926225187e-07, "epoch": 1.4410883098916303, "percentage": 72.05, "elapsed_time": "4:16:14", "remaining_time": "1:39:23"} +{"current_steps": 6251, "total_steps": 8674, "loss": 0.39757978916168213, "lr": 3.9750855122121854e-07, "epoch": 1.4413188840212128, "percentage": 72.07, "elapsed_time": "4:16:17", "remaining_time": "1:39:20"} +{"current_steps": 6252, "total_steps": 8674, "loss": 0.3736093044281006, "lr": 3.972043007584236e-07, "epoch": 1.4415494581507955, "percentage": 72.08, "elapsed_time": "4:16:20", "remaining_time": "1:39:18"} +{"current_steps": 6253, "total_steps": 8674, "loss": 0.4907599091529846, "lr": 3.9690013791809243e-07, "epoch": 1.4417800322803782, "percentage": 72.09, "elapsed_time": "4:16:22", "remaining_time": "1:39:15"} +{"current_steps": 6254, "total_steps": 8674, "loss": 0.4852679967880249, "lr": 3.965960627444387e-07, "epoch": 1.4420106064099607, "percentage": 72.1, "elapsed_time": "4:16:24", "remaining_time": "1:39:13"} +{"current_steps": 6255, "total_steps": 8674, "loss": 0.3681846261024475, "lr": 3.962920752816622e-07, "epoch": 1.4422411805395434, "percentage": 72.11, "elapsed_time": "4:16:27", "remaining_time": "1:39:10"} +{"current_steps": 6256, "total_steps": 8674, "loss": 0.36029407382011414, "lr": 3.9598817557395136e-07, "epoch": 1.4424717546691261, "percentage": 72.12, "elapsed_time": "4:16:29", "remaining_time": "1:39:08"} +{"current_steps": 6257, "total_steps": 8674, "loss": 0.4156547486782074, "lr": 3.9568436366548044e-07, "epoch": 1.4427023287987089, "percentage": 72.14, "elapsed_time": "4:16:32", "remaining_time": "1:39:05"} +{"current_steps": 6258, "total_steps": 8674, "loss": 0.417999804019928, "lr": 3.9538063960041155e-07, "epoch": 1.4429329029282916, "percentage": 72.15, "elapsed_time": "4:16:34", "remaining_time": "1:39:03"} +{"current_steps": 6259, "total_steps": 8674, "loss": 0.34347790479660034, "lr": 3.9507700342289454e-07, "epoch": 1.443163477057874, "percentage": 72.16, "elapsed_time": "4:16:37", "remaining_time": "1:39:00"} +{"current_steps": 6260, "total_steps": 8674, "loss": 0.5093958973884583, "lr": 3.9477345517706606e-07, "epoch": 1.4433940511874568, "percentage": 72.17, "elapsed_time": "4:16:39", "remaining_time": "1:38:58"} +{"current_steps": 6261, "total_steps": 8674, "loss": 0.45406264066696167, "lr": 3.9446999490704935e-07, "epoch": 1.4436246253170395, "percentage": 72.18, "elapsed_time": "4:16:42", "remaining_time": "1:38:56"} +{"current_steps": 6262, "total_steps": 8674, "loss": 0.35074740648269653, "lr": 3.941666226569561e-07, "epoch": 1.443855199446622, "percentage": 72.19, "elapsed_time": "4:16:44", "remaining_time": "1:38:53"} +{"current_steps": 6263, "total_steps": 8674, "loss": 0.4588093161582947, "lr": 3.9386333847088414e-07, "epoch": 1.4440857735762047, "percentage": 72.2, "elapsed_time": "4:16:47", "remaining_time": "1:38:51"} +{"current_steps": 6264, "total_steps": 8674, "loss": 0.5431508421897888, "lr": 3.935601423929187e-07, "epoch": 1.4443163477057874, "percentage": 72.22, "elapsed_time": "4:16:49", "remaining_time": "1:38:48"} +{"current_steps": 6265, "total_steps": 8674, "loss": 0.5942284464836121, "lr": 3.9325703446713253e-07, "epoch": 1.4445469218353701, "percentage": 72.23, "elapsed_time": "4:16:51", "remaining_time": "1:38:46"} +{"current_steps": 6266, "total_steps": 8674, "loss": 0.45533287525177, "lr": 3.929540147375856e-07, "epoch": 1.4447774959649529, "percentage": 72.24, "elapsed_time": "4:16:54", "remaining_time": "1:38:43"} +{"current_steps": 6267, "total_steps": 8674, "loss": 0.41154634952545166, "lr": 3.926510832483252e-07, "epoch": 1.4450080700945354, "percentage": 72.25, "elapsed_time": "4:16:56", "remaining_time": "1:38:41"} +{"current_steps": 6268, "total_steps": 8674, "loss": 0.548882246017456, "lr": 3.923482400433847e-07, "epoch": 1.445238644224118, "percentage": 72.26, "elapsed_time": "4:16:59", "remaining_time": "1:38:38"} +{"current_steps": 6269, "total_steps": 8674, "loss": 0.4062466621398926, "lr": 3.9204548516678635e-07, "epoch": 1.4454692183537008, "percentage": 72.27, "elapsed_time": "4:17:02", "remaining_time": "1:38:36"} +{"current_steps": 6270, "total_steps": 8674, "loss": 0.39035165309906006, "lr": 3.917428186625378e-07, "epoch": 1.4456997924832833, "percentage": 72.28, "elapsed_time": "4:17:04", "remaining_time": "1:38:34"} +{"current_steps": 6271, "total_steps": 8674, "loss": 0.44899889826774597, "lr": 3.9144024057463545e-07, "epoch": 1.445930366612866, "percentage": 72.3, "elapsed_time": "4:17:07", "remaining_time": "1:38:31"} +{"current_steps": 6272, "total_steps": 8674, "loss": 0.5676968097686768, "lr": 3.911377509470616e-07, "epoch": 1.4461609407424487, "percentage": 72.31, "elapsed_time": "4:17:09", "remaining_time": "1:38:29"} +{"current_steps": 6273, "total_steps": 8674, "loss": 0.5157150626182556, "lr": 3.9083534982378596e-07, "epoch": 1.4463915148720314, "percentage": 72.32, "elapsed_time": "4:17:11", "remaining_time": "1:38:26"} +{"current_steps": 6274, "total_steps": 8674, "loss": 0.4405839443206787, "lr": 3.9053303724876595e-07, "epoch": 1.4466220890016142, "percentage": 72.33, "elapsed_time": "4:17:14", "remaining_time": "1:38:24"} +{"current_steps": 6275, "total_steps": 8674, "loss": 0.4184240400791168, "lr": 3.9023081326594564e-07, "epoch": 1.4468526631311966, "percentage": 72.34, "elapsed_time": "4:17:16", "remaining_time": "1:38:21"} +{"current_steps": 6276, "total_steps": 8674, "loss": 0.46825113892555237, "lr": 3.8992867791925687e-07, "epoch": 1.4470832372607794, "percentage": 72.35, "elapsed_time": "4:17:19", "remaining_time": "1:38:19"} +{"current_steps": 6277, "total_steps": 8674, "loss": 0.39870697259902954, "lr": 3.896266312526174e-07, "epoch": 1.447313811390362, "percentage": 72.37, "elapsed_time": "4:17:21", "remaining_time": "1:38:16"} +{"current_steps": 6278, "total_steps": 8674, "loss": 0.5021681785583496, "lr": 3.893246733099332e-07, "epoch": 1.4475443855199446, "percentage": 72.38, "elapsed_time": "4:17:24", "remaining_time": "1:38:14"} +{"current_steps": 6279, "total_steps": 8674, "loss": 0.5453378558158875, "lr": 3.890228041350966e-07, "epoch": 1.4477749596495273, "percentage": 72.39, "elapsed_time": "4:17:26", "remaining_time": "1:38:11"} +{"current_steps": 6280, "total_steps": 8674, "loss": 0.4488704800605774, "lr": 3.887210237719877e-07, "epoch": 1.44800553377911, "percentage": 72.4, "elapsed_time": "4:17:28", "remaining_time": "1:38:09"} +{"current_steps": 6281, "total_steps": 8674, "loss": 0.45669007301330566, "lr": 3.8841933226447274e-07, "epoch": 1.4482361079086927, "percentage": 72.41, "elapsed_time": "4:17:31", "remaining_time": "1:38:06"} +{"current_steps": 6282, "total_steps": 8674, "loss": 0.43954944610595703, "lr": 3.881177296564061e-07, "epoch": 1.4484666820382754, "percentage": 72.42, "elapsed_time": "4:17:33", "remaining_time": "1:38:04"} +{"current_steps": 6283, "total_steps": 8674, "loss": 0.39490729570388794, "lr": 3.8781621599162896e-07, "epoch": 1.448697256167858, "percentage": 72.43, "elapsed_time": "4:17:36", "remaining_time": "1:38:01"} +{"current_steps": 6284, "total_steps": 8674, "loss": 0.44206392765045166, "lr": 3.875147913139688e-07, "epoch": 1.4489278302974407, "percentage": 72.45, "elapsed_time": "4:17:38", "remaining_time": "1:37:59"} +{"current_steps": 6285, "total_steps": 8674, "loss": 0.3874932527542114, "lr": 3.872134556672415e-07, "epoch": 1.4491584044270232, "percentage": 72.46, "elapsed_time": "4:17:41", "remaining_time": "1:37:57"} +{"current_steps": 6286, "total_steps": 8674, "loss": 0.4762042760848999, "lr": 3.8691220909524847e-07, "epoch": 1.4493889785566059, "percentage": 72.47, "elapsed_time": "4:17:43", "remaining_time": "1:37:54"} +{"current_steps": 6287, "total_steps": 8674, "loss": 0.45220378041267395, "lr": 3.8661105164177955e-07, "epoch": 1.4496195526861886, "percentage": 72.48, "elapsed_time": "4:17:46", "remaining_time": "1:37:52"} +{"current_steps": 6288, "total_steps": 8674, "loss": 0.48711973428726196, "lr": 3.863099833506105e-07, "epoch": 1.4498501268157713, "percentage": 72.49, "elapsed_time": "4:17:48", "remaining_time": "1:37:49"} +{"current_steps": 6289, "total_steps": 8674, "loss": 0.3985457420349121, "lr": 3.8600900426550495e-07, "epoch": 1.450080700945354, "percentage": 72.5, "elapsed_time": "4:17:50", "remaining_time": "1:37:47"} +{"current_steps": 6290, "total_steps": 8674, "loss": 0.4626576006412506, "lr": 3.8570811443021324e-07, "epoch": 1.4503112750749365, "percentage": 72.52, "elapsed_time": "4:17:53", "remaining_time": "1:37:44"} +{"current_steps": 6291, "total_steps": 8674, "loss": 0.49909156560897827, "lr": 3.8540731388847303e-07, "epoch": 1.4505418492045192, "percentage": 72.53, "elapsed_time": "4:17:55", "remaining_time": "1:37:42"} +{"current_steps": 6292, "total_steps": 8674, "loss": 0.47779160737991333, "lr": 3.8510660268400853e-07, "epoch": 1.450772423334102, "percentage": 72.54, "elapsed_time": "4:17:58", "remaining_time": "1:37:39"} +{"current_steps": 6293, "total_steps": 8674, "loss": 0.41273951530456543, "lr": 3.8480598086053073e-07, "epoch": 1.4510029974636844, "percentage": 72.55, "elapsed_time": "4:18:00", "remaining_time": "1:37:37"} +{"current_steps": 6294, "total_steps": 8674, "loss": 0.49659836292266846, "lr": 3.8450544846173873e-07, "epoch": 1.4512335715932672, "percentage": 72.56, "elapsed_time": "4:18:03", "remaining_time": "1:37:34"} +{"current_steps": 6295, "total_steps": 8674, "loss": 0.48864418268203735, "lr": 3.842050055313174e-07, "epoch": 1.4514641457228499, "percentage": 72.57, "elapsed_time": "4:18:05", "remaining_time": "1:37:32"} +{"current_steps": 6296, "total_steps": 8674, "loss": 0.4437263011932373, "lr": 3.8390465211293964e-07, "epoch": 1.4516947198524326, "percentage": 72.58, "elapsed_time": "4:18:07", "remaining_time": "1:37:29"} +{"current_steps": 6297, "total_steps": 8674, "loss": 0.4785847067832947, "lr": 3.83604388250264e-07, "epoch": 1.4519252939820153, "percentage": 72.6, "elapsed_time": "4:18:10", "remaining_time": "1:37:27"} +{"current_steps": 6298, "total_steps": 8674, "loss": 0.4376726746559143, "lr": 3.8330421398693815e-07, "epoch": 1.4521558681115978, "percentage": 72.61, "elapsed_time": "4:18:12", "remaining_time": "1:37:24"} +{"current_steps": 6299, "total_steps": 8674, "loss": 0.39121049642562866, "lr": 3.8300412936659456e-07, "epoch": 1.4523864422411805, "percentage": 72.62, "elapsed_time": "4:18:15", "remaining_time": "1:37:22"} +{"current_steps": 6300, "total_steps": 8674, "loss": 0.4635738730430603, "lr": 3.827041344328541e-07, "epoch": 1.4526170163707632, "percentage": 72.63, "elapsed_time": "4:18:17", "remaining_time": "1:37:19"} +{"current_steps": 6301, "total_steps": 8674, "loss": 0.502306342124939, "lr": 3.8240422922932345e-07, "epoch": 1.4528475905003457, "percentage": 72.64, "elapsed_time": "4:18:21", "remaining_time": "1:37:17"} +{"current_steps": 6302, "total_steps": 8674, "loss": 0.4401247799396515, "lr": 3.8210441379959765e-07, "epoch": 1.4530781646299284, "percentage": 72.65, "elapsed_time": "4:18:24", "remaining_time": "1:37:15"} +{"current_steps": 6303, "total_steps": 8674, "loss": 0.5291532874107361, "lr": 3.8180468818725744e-07, "epoch": 1.4533087387595112, "percentage": 72.67, "elapsed_time": "4:18:26", "remaining_time": "1:37:13"} +{"current_steps": 6304, "total_steps": 8674, "loss": 0.44658181071281433, "lr": 3.8150505243587074e-07, "epoch": 1.4535393128890939, "percentage": 72.68, "elapsed_time": "4:18:28", "remaining_time": "1:37:10"} +{"current_steps": 6305, "total_steps": 8674, "loss": 0.45127803087234497, "lr": 3.8120550658899284e-07, "epoch": 1.4537698870186766, "percentage": 72.69, "elapsed_time": "4:18:31", "remaining_time": "1:37:08"} +{"current_steps": 6306, "total_steps": 8674, "loss": 0.42187097668647766, "lr": 3.809060506901659e-07, "epoch": 1.454000461148259, "percentage": 72.7, "elapsed_time": "4:18:33", "remaining_time": "1:37:05"} +{"current_steps": 6307, "total_steps": 8674, "loss": 0.3573130667209625, "lr": 3.806066847829191e-07, "epoch": 1.4542310352778418, "percentage": 72.71, "elapsed_time": "4:18:36", "remaining_time": "1:37:03"} +{"current_steps": 6308, "total_steps": 8674, "loss": 0.4350733757019043, "lr": 3.8030740891076775e-07, "epoch": 1.4544616094074245, "percentage": 72.72, "elapsed_time": "4:18:38", "remaining_time": "1:37:00"} +{"current_steps": 6309, "total_steps": 8674, "loss": 0.48514148592948914, "lr": 3.8000822311721526e-07, "epoch": 1.454692183537007, "percentage": 72.73, "elapsed_time": "4:18:41", "remaining_time": "1:36:58"} +{"current_steps": 6310, "total_steps": 8674, "loss": 0.41036373376846313, "lr": 3.797091274457507e-07, "epoch": 1.4549227576665897, "percentage": 72.75, "elapsed_time": "4:18:43", "remaining_time": "1:36:55"} +{"current_steps": 6311, "total_steps": 8674, "loss": 0.4141424298286438, "lr": 3.7941012193985113e-07, "epoch": 1.4551533317961725, "percentage": 72.76, "elapsed_time": "4:18:46", "remaining_time": "1:36:53"} +{"current_steps": 6312, "total_steps": 8674, "loss": 0.4465962052345276, "lr": 3.7911120664297947e-07, "epoch": 1.4553839059257552, "percentage": 72.77, "elapsed_time": "4:18:48", "remaining_time": "1:36:50"} +{"current_steps": 6313, "total_steps": 8674, "loss": 0.42370718717575073, "lr": 3.7881238159858653e-07, "epoch": 1.455614480055338, "percentage": 72.78, "elapsed_time": "4:18:51", "remaining_time": "1:36:48"} +{"current_steps": 6314, "total_steps": 8674, "loss": 0.5199419260025024, "lr": 3.785136468501098e-07, "epoch": 1.4558450541849204, "percentage": 72.79, "elapsed_time": "4:18:53", "remaining_time": "1:36:45"} +{"current_steps": 6315, "total_steps": 8674, "loss": 0.4802842140197754, "lr": 3.782150024409727e-07, "epoch": 1.456075628314503, "percentage": 72.8, "elapsed_time": "4:18:55", "remaining_time": "1:36:43"} +{"current_steps": 6316, "total_steps": 8674, "loss": 0.4640405476093292, "lr": 3.77916448414587e-07, "epoch": 1.4563062024440858, "percentage": 72.82, "elapsed_time": "4:18:58", "remaining_time": "1:36:41"} +{"current_steps": 6317, "total_steps": 8674, "loss": 0.4338728189468384, "lr": 3.776179848143497e-07, "epoch": 1.4565367765736683, "percentage": 72.83, "elapsed_time": "4:19:00", "remaining_time": "1:36:38"} +{"current_steps": 6318, "total_steps": 8674, "loss": 0.42709267139434814, "lr": 3.7731961168364644e-07, "epoch": 1.456767350703251, "percentage": 72.84, "elapsed_time": "4:19:03", "remaining_time": "1:36:36"} +{"current_steps": 6319, "total_steps": 8674, "loss": 0.4985729455947876, "lr": 3.7702132906584784e-07, "epoch": 1.4569979248328337, "percentage": 72.85, "elapsed_time": "4:19:05", "remaining_time": "1:36:33"} +{"current_steps": 6320, "total_steps": 8674, "loss": 0.46335911750793457, "lr": 3.7672313700431277e-07, "epoch": 1.4572284989624165, "percentage": 72.86, "elapsed_time": "4:19:08", "remaining_time": "1:36:31"} +{"current_steps": 6321, "total_steps": 8674, "loss": 0.39897364377975464, "lr": 3.7642503554238657e-07, "epoch": 1.4574590730919992, "percentage": 72.87, "elapsed_time": "4:19:10", "remaining_time": "1:36:28"} +{"current_steps": 6322, "total_steps": 8674, "loss": 0.4338347017765045, "lr": 3.761270247234014e-07, "epoch": 1.4576896472215817, "percentage": 72.88, "elapsed_time": "4:19:12", "remaining_time": "1:36:26"} +{"current_steps": 6323, "total_steps": 8674, "loss": 0.4619752764701843, "lr": 3.7582910459067607e-07, "epoch": 1.4579202213511644, "percentage": 72.9, "elapsed_time": "4:19:15", "remaining_time": "1:36:23"} +{"current_steps": 6324, "total_steps": 8674, "loss": 0.4676104784011841, "lr": 3.7553127518751583e-07, "epoch": 1.458150795480747, "percentage": 72.91, "elapsed_time": "4:19:17", "remaining_time": "1:36:21"} +{"current_steps": 6325, "total_steps": 8674, "loss": 0.37536361813545227, "lr": 3.752335365572138e-07, "epoch": 1.4583813696103296, "percentage": 72.92, "elapsed_time": "4:19:20", "remaining_time": "1:36:18"} +{"current_steps": 6326, "total_steps": 8674, "loss": 0.4389209449291229, "lr": 3.749358887430487e-07, "epoch": 1.4586119437399123, "percentage": 72.93, "elapsed_time": "4:19:22", "remaining_time": "1:36:16"} +{"current_steps": 6327, "total_steps": 8674, "loss": 0.44722115993499756, "lr": 3.746383317882874e-07, "epoch": 1.458842517869495, "percentage": 72.94, "elapsed_time": "4:19:25", "remaining_time": "1:36:13"} +{"current_steps": 6328, "total_steps": 8674, "loss": 0.39179277420043945, "lr": 3.743408657361821e-07, "epoch": 1.4590730919990778, "percentage": 72.95, "elapsed_time": "4:19:27", "remaining_time": "1:36:11"} +{"current_steps": 6329, "total_steps": 8674, "loss": 0.4704967737197876, "lr": 3.7404349062997275e-07, "epoch": 1.4593036661286605, "percentage": 72.97, "elapsed_time": "4:19:30", "remaining_time": "1:36:08"} +{"current_steps": 6330, "total_steps": 8674, "loss": 0.4294360876083374, "lr": 3.737462065128859e-07, "epoch": 1.459534240258243, "percentage": 72.98, "elapsed_time": "4:19:32", "remaining_time": "1:36:06"} +{"current_steps": 6331, "total_steps": 8674, "loss": 0.5070170760154724, "lr": 3.734490134281353e-07, "epoch": 1.4597648143878257, "percentage": 72.99, "elapsed_time": "4:19:34", "remaining_time": "1:36:04"} +{"current_steps": 6332, "total_steps": 8674, "loss": 0.3670409023761749, "lr": 3.7315191141892013e-07, "epoch": 1.4599953885174084, "percentage": 73.0, "elapsed_time": "4:19:37", "remaining_time": "1:36:01"} +{"current_steps": 6333, "total_steps": 8674, "loss": 0.5043025016784668, "lr": 3.7285490052842785e-07, "epoch": 1.460225962646991, "percentage": 73.01, "elapsed_time": "4:19:39", "remaining_time": "1:35:59"} +{"current_steps": 6334, "total_steps": 8674, "loss": 0.43942689895629883, "lr": 3.725579807998316e-07, "epoch": 1.4604565367765736, "percentage": 73.02, "elapsed_time": "4:19:42", "remaining_time": "1:35:56"} +{"current_steps": 6335, "total_steps": 8674, "loss": 0.3444882035255432, "lr": 3.7226115227629164e-07, "epoch": 1.4606871109061563, "percentage": 73.03, "elapsed_time": "4:19:44", "remaining_time": "1:35:54"} +{"current_steps": 6336, "total_steps": 8674, "loss": 0.3994483947753906, "lr": 3.71964415000955e-07, "epoch": 1.460917685035739, "percentage": 73.05, "elapsed_time": "4:19:47", "remaining_time": "1:35:51"} +{"current_steps": 6337, "total_steps": 8674, "loss": 0.3581928014755249, "lr": 3.7166776901695564e-07, "epoch": 1.4611482591653218, "percentage": 73.06, "elapsed_time": "4:19:49", "remaining_time": "1:35:49"} +{"current_steps": 6338, "total_steps": 8674, "loss": 0.4068276286125183, "lr": 3.7137121436741423e-07, "epoch": 1.4613788332949043, "percentage": 73.07, "elapsed_time": "4:19:52", "remaining_time": "1:35:46"} +{"current_steps": 6339, "total_steps": 8674, "loss": 0.4140080213546753, "lr": 3.710747510954376e-07, "epoch": 1.461609407424487, "percentage": 73.08, "elapsed_time": "4:19:54", "remaining_time": "1:35:44"} +{"current_steps": 6340, "total_steps": 8674, "loss": 0.4328460097312927, "lr": 3.707783792441201e-07, "epoch": 1.4618399815540697, "percentage": 73.09, "elapsed_time": "4:19:56", "remaining_time": "1:35:41"} +{"current_steps": 6341, "total_steps": 8674, "loss": 0.49252209067344666, "lr": 3.704820988565419e-07, "epoch": 1.4620705556836522, "percentage": 73.1, "elapsed_time": "4:19:59", "remaining_time": "1:35:39"} +{"current_steps": 6342, "total_steps": 8674, "loss": 0.43051671981811523, "lr": 3.7018590997577093e-07, "epoch": 1.462301129813235, "percentage": 73.12, "elapsed_time": "4:20:01", "remaining_time": "1:35:36"} +{"current_steps": 6343, "total_steps": 8674, "loss": 0.5131059288978577, "lr": 3.698898126448605e-07, "epoch": 1.4625317039428176, "percentage": 73.13, "elapsed_time": "4:20:04", "remaining_time": "1:35:34"} +{"current_steps": 6344, "total_steps": 8674, "loss": 0.4633597731590271, "lr": 3.6959380690685185e-07, "epoch": 1.4627622780724003, "percentage": 73.14, "elapsed_time": "4:20:06", "remaining_time": "1:35:32"} +{"current_steps": 6345, "total_steps": 8674, "loss": 0.3603428602218628, "lr": 3.6929789280477265e-07, "epoch": 1.462992852201983, "percentage": 73.15, "elapsed_time": "4:20:09", "remaining_time": "1:35:29"} +{"current_steps": 6346, "total_steps": 8674, "loss": 0.5337490439414978, "lr": 3.6900207038163633e-07, "epoch": 1.4632234263315655, "percentage": 73.16, "elapsed_time": "4:20:11", "remaining_time": "1:35:27"} +{"current_steps": 6347, "total_steps": 8674, "loss": 0.4940665066242218, "lr": 3.687063396804444e-07, "epoch": 1.4634540004611483, "percentage": 73.17, "elapsed_time": "4:20:14", "remaining_time": "1:35:24"} +{"current_steps": 6348, "total_steps": 8674, "loss": 0.45664387941360474, "lr": 3.6841070074418367e-07, "epoch": 1.463684574590731, "percentage": 73.18, "elapsed_time": "4:20:16", "remaining_time": "1:35:22"} +{"current_steps": 6349, "total_steps": 8674, "loss": 0.4546254277229309, "lr": 3.681151536158289e-07, "epoch": 1.4639151487203135, "percentage": 73.2, "elapsed_time": "4:20:19", "remaining_time": "1:35:19"} +{"current_steps": 6350, "total_steps": 8674, "loss": 0.37474149465560913, "lr": 3.6781969833834015e-07, "epoch": 1.4641457228498962, "percentage": 73.21, "elapsed_time": "4:20:21", "remaining_time": "1:35:17"} +{"current_steps": 6351, "total_steps": 8674, "loss": 0.38016337156295776, "lr": 3.675243349546655e-07, "epoch": 1.464376296979479, "percentage": 73.22, "elapsed_time": "4:20:23", "remaining_time": "1:35:14"} +{"current_steps": 6352, "total_steps": 8674, "loss": 0.46079233288764954, "lr": 3.672290635077384e-07, "epoch": 1.4646068711090616, "percentage": 73.23, "elapsed_time": "4:20:26", "remaining_time": "1:35:12"} +{"current_steps": 6353, "total_steps": 8674, "loss": 0.39382117986679077, "lr": 3.669338840404799e-07, "epoch": 1.4648374452386443, "percentage": 73.24, "elapsed_time": "4:20:29", "remaining_time": "1:35:09"} +{"current_steps": 6354, "total_steps": 8674, "loss": 0.4502074718475342, "lr": 3.6663879659579766e-07, "epoch": 1.4650680193682268, "percentage": 73.25, "elapsed_time": "4:20:31", "remaining_time": "1:35:07"} +{"current_steps": 6355, "total_steps": 8674, "loss": 0.38199833035469055, "lr": 3.663438012165848e-07, "epoch": 1.4652985934978096, "percentage": 73.26, "elapsed_time": "4:20:34", "remaining_time": "1:35:05"} +{"current_steps": 6356, "total_steps": 8674, "loss": 0.4340086579322815, "lr": 3.660488979457228e-07, "epoch": 1.4655291676273923, "percentage": 73.28, "elapsed_time": "4:20:36", "remaining_time": "1:35:02"} +{"current_steps": 6357, "total_steps": 8674, "loss": 0.5425105094909668, "lr": 3.65754086826078e-07, "epoch": 1.4657597417569748, "percentage": 73.29, "elapsed_time": "4:20:38", "remaining_time": "1:35:00"} +{"current_steps": 6358, "total_steps": 8674, "loss": 0.4671604633331299, "lr": 3.654593679005048e-07, "epoch": 1.4659903158865575, "percentage": 73.3, "elapsed_time": "4:20:41", "remaining_time": "1:34:57"} +{"current_steps": 6359, "total_steps": 8674, "loss": 0.4608290195465088, "lr": 3.6516474121184317e-07, "epoch": 1.4662208900161402, "percentage": 73.31, "elapsed_time": "4:20:43", "remaining_time": "1:34:55"} +{"current_steps": 6360, "total_steps": 8674, "loss": 0.5272650122642517, "lr": 3.6487020680292023e-07, "epoch": 1.466451464145723, "percentage": 73.32, "elapsed_time": "4:20:46", "remaining_time": "1:34:52"} +{"current_steps": 6361, "total_steps": 8674, "loss": 0.40990152955055237, "lr": 3.645757647165495e-07, "epoch": 1.4666820382753056, "percentage": 73.33, "elapsed_time": "4:20:48", "remaining_time": "1:34:50"} +{"current_steps": 6362, "total_steps": 8674, "loss": 0.4723639488220215, "lr": 3.6428141499553166e-07, "epoch": 1.4669126124048881, "percentage": 73.35, "elapsed_time": "4:20:51", "remaining_time": "1:34:47"} +{"current_steps": 6363, "total_steps": 8674, "loss": 0.5115963220596313, "lr": 3.639871576826529e-07, "epoch": 1.4671431865344708, "percentage": 73.36, "elapsed_time": "4:20:53", "remaining_time": "1:34:45"} +{"current_steps": 6364, "total_steps": 8674, "loss": 0.44548431038856506, "lr": 3.636929928206862e-07, "epoch": 1.4673737606640536, "percentage": 73.37, "elapsed_time": "4:20:56", "remaining_time": "1:34:42"} +{"current_steps": 6365, "total_steps": 8674, "loss": 0.48599356412887573, "lr": 3.633989204523922e-07, "epoch": 1.467604334793636, "percentage": 73.38, "elapsed_time": "4:20:58", "remaining_time": "1:34:40"} +{"current_steps": 6366, "total_steps": 8674, "loss": 0.463236004114151, "lr": 3.631049406205164e-07, "epoch": 1.4678349089232188, "percentage": 73.39, "elapsed_time": "4:21:00", "remaining_time": "1:34:37"} +{"current_steps": 6367, "total_steps": 8674, "loss": 0.4840255379676819, "lr": 3.6281105336779225e-07, "epoch": 1.4680654830528015, "percentage": 73.4, "elapsed_time": "4:21:03", "remaining_time": "1:34:35"} +{"current_steps": 6368, "total_steps": 8674, "loss": 0.39191675186157227, "lr": 3.6251725873693926e-07, "epoch": 1.4682960571823842, "percentage": 73.41, "elapsed_time": "4:21:05", "remaining_time": "1:34:32"} +{"current_steps": 6369, "total_steps": 8674, "loss": 0.5161769986152649, "lr": 3.622235567706637e-07, "epoch": 1.468526631311967, "percentage": 73.43, "elapsed_time": "4:21:08", "remaining_time": "1:34:30"} +{"current_steps": 6370, "total_steps": 8674, "loss": 0.4579160213470459, "lr": 3.6192994751165764e-07, "epoch": 1.4687572054415494, "percentage": 73.44, "elapsed_time": "4:21:11", "remaining_time": "1:34:28"} +{"current_steps": 6371, "total_steps": 8674, "loss": 0.4254727363586426, "lr": 3.616364310026006e-07, "epoch": 1.4689877795711321, "percentage": 73.45, "elapsed_time": "4:21:13", "remaining_time": "1:34:25"} +{"current_steps": 6372, "total_steps": 8674, "loss": 0.3614911139011383, "lr": 3.613430072861575e-07, "epoch": 1.4692183537007149, "percentage": 73.46, "elapsed_time": "4:21:15", "remaining_time": "1:34:23"} +{"current_steps": 6373, "total_steps": 8674, "loss": 0.4501386284828186, "lr": 3.610496764049814e-07, "epoch": 1.4694489278302973, "percentage": 73.47, "elapsed_time": "4:21:18", "remaining_time": "1:34:20"} +{"current_steps": 6374, "total_steps": 8674, "loss": 0.4988802671432495, "lr": 3.607564384017102e-07, "epoch": 1.46967950195988, "percentage": 73.48, "elapsed_time": "4:21:20", "remaining_time": "1:34:18"} +{"current_steps": 6375, "total_steps": 8674, "loss": 0.4277713894844055, "lr": 3.6046329331896907e-07, "epoch": 1.4699100760894628, "percentage": 73.5, "elapsed_time": "4:21:22", "remaining_time": "1:34:15"} +{"current_steps": 6376, "total_steps": 8674, "loss": 0.5007919073104858, "lr": 3.601702411993697e-07, "epoch": 1.4701406502190455, "percentage": 73.51, "elapsed_time": "4:21:25", "remaining_time": "1:34:13"} +{"current_steps": 6377, "total_steps": 8674, "loss": 0.4857282042503357, "lr": 3.5987728208551015e-07, "epoch": 1.4703712243486282, "percentage": 73.52, "elapsed_time": "4:21:27", "remaining_time": "1:34:10"} +{"current_steps": 6378, "total_steps": 8674, "loss": 0.45752188563346863, "lr": 3.595844160199756e-07, "epoch": 1.4706017984782107, "percentage": 73.53, "elapsed_time": "4:21:30", "remaining_time": "1:34:08"} +{"current_steps": 6379, "total_steps": 8674, "loss": 0.4364059269428253, "lr": 3.592916430453361e-07, "epoch": 1.4708323726077934, "percentage": 73.54, "elapsed_time": "4:21:32", "remaining_time": "1:34:05"} +{"current_steps": 6380, "total_steps": 8674, "loss": 0.48765695095062256, "lr": 3.589989632041501e-07, "epoch": 1.4710629467373761, "percentage": 73.55, "elapsed_time": "4:21:35", "remaining_time": "1:34:03"} +{"current_steps": 6381, "total_steps": 8674, "loss": 0.5505347847938538, "lr": 3.5870637653896087e-07, "epoch": 1.4712935208669586, "percentage": 73.56, "elapsed_time": "4:21:37", "remaining_time": "1:34:00"} +{"current_steps": 6382, "total_steps": 8674, "loss": 0.4468069076538086, "lr": 3.584138830922994e-07, "epoch": 1.4715240949965414, "percentage": 73.58, "elapsed_time": "4:21:39", "remaining_time": "1:33:58"} +{"current_steps": 6383, "total_steps": 8674, "loss": 0.4050968289375305, "lr": 3.5812148290668186e-07, "epoch": 1.471754669126124, "percentage": 73.59, "elapsed_time": "4:21:42", "remaining_time": "1:33:55"} +{"current_steps": 6384, "total_steps": 8674, "loss": 0.47324883937835693, "lr": 3.578291760246122e-07, "epoch": 1.4719852432557068, "percentage": 73.6, "elapsed_time": "4:21:44", "remaining_time": "1:33:53"} +{"current_steps": 6385, "total_steps": 8674, "loss": 0.4431450366973877, "lr": 3.5753696248858025e-07, "epoch": 1.4722158173852895, "percentage": 73.61, "elapsed_time": "4:21:47", "remaining_time": "1:33:50"} +{"current_steps": 6386, "total_steps": 8674, "loss": 0.4599822163581848, "lr": 3.5724484234106166e-07, "epoch": 1.472446391514872, "percentage": 73.62, "elapsed_time": "4:21:49", "remaining_time": "1:33:48"} +{"current_steps": 6387, "total_steps": 8674, "loss": 0.3655046224594116, "lr": 3.5695281562451964e-07, "epoch": 1.4726769656444547, "percentage": 73.63, "elapsed_time": "4:21:51", "remaining_time": "1:33:45"} +{"current_steps": 6388, "total_steps": 8674, "loss": 0.4543811082839966, "lr": 3.5666088238140267e-07, "epoch": 1.4729075397740374, "percentage": 73.65, "elapsed_time": "4:21:54", "remaining_time": "1:33:43"} +{"current_steps": 6389, "total_steps": 8674, "loss": 0.45380568504333496, "lr": 3.563690426541469e-07, "epoch": 1.47313811390362, "percentage": 73.66, "elapsed_time": "4:21:56", "remaining_time": "1:33:40"} +{"current_steps": 6390, "total_steps": 8674, "loss": 0.3640294373035431, "lr": 3.5607729648517336e-07, "epoch": 1.4733686880332026, "percentage": 73.67, "elapsed_time": "4:21:58", "remaining_time": "1:33:38"} +{"current_steps": 6391, "total_steps": 8674, "loss": 0.39890235662460327, "lr": 3.557856439168907e-07, "epoch": 1.4735992621627854, "percentage": 73.68, "elapsed_time": "4:22:01", "remaining_time": "1:33:36"} +{"current_steps": 6392, "total_steps": 8674, "loss": 0.47551727294921875, "lr": 3.5549408499169374e-07, "epoch": 1.473829836292368, "percentage": 73.69, "elapsed_time": "4:22:03", "remaining_time": "1:33:33"} +{"current_steps": 6393, "total_steps": 8674, "loss": 0.43851834535598755, "lr": 3.5520261975196364e-07, "epoch": 1.4740604104219508, "percentage": 73.7, "elapsed_time": "4:22:06", "remaining_time": "1:33:31"} +{"current_steps": 6394, "total_steps": 8674, "loss": 0.45289307832717896, "lr": 3.549112482400676e-07, "epoch": 1.4742909845515333, "percentage": 73.71, "elapsed_time": "4:22:08", "remaining_time": "1:33:28"} +{"current_steps": 6395, "total_steps": 8674, "loss": 0.5229180455207825, "lr": 3.546199704983591e-07, "epoch": 1.474521558681116, "percentage": 73.73, "elapsed_time": "4:22:11", "remaining_time": "1:33:26"} +{"current_steps": 6396, "total_steps": 8674, "loss": 0.47332310676574707, "lr": 3.5432878656917884e-07, "epoch": 1.4747521328106985, "percentage": 73.74, "elapsed_time": "4:22:13", "remaining_time": "1:33:23"} +{"current_steps": 6397, "total_steps": 8674, "loss": 0.4079092741012573, "lr": 3.540376964948529e-07, "epoch": 1.4749827069402812, "percentage": 73.75, "elapsed_time": "4:22:16", "remaining_time": "1:33:21"} +{"current_steps": 6398, "total_steps": 8674, "loss": 0.43366020917892456, "lr": 3.5374670031769484e-07, "epoch": 1.475213281069864, "percentage": 73.76, "elapsed_time": "4:22:18", "remaining_time": "1:33:18"} +{"current_steps": 6399, "total_steps": 8674, "loss": 0.45040106773376465, "lr": 3.5345579808000294e-07, "epoch": 1.4754438551994467, "percentage": 73.77, "elapsed_time": "4:22:21", "remaining_time": "1:33:16"} +{"current_steps": 6400, "total_steps": 8674, "loss": 0.4409756064414978, "lr": 3.531649898240634e-07, "epoch": 1.4756744293290294, "percentage": 73.78, "elapsed_time": "4:22:23", "remaining_time": "1:33:13"} +{"current_steps": 6401, "total_steps": 8674, "loss": 0.4141521751880646, "lr": 3.528742755921481e-07, "epoch": 1.4759050034586119, "percentage": 73.8, "elapsed_time": "4:22:27", "remaining_time": "1:33:11"} +{"current_steps": 6402, "total_steps": 8674, "loss": 0.4697296619415283, "lr": 3.525836554265156e-07, "epoch": 1.4761355775881946, "percentage": 73.81, "elapsed_time": "4:22:30", "remaining_time": "1:33:09"} +{"current_steps": 6403, "total_steps": 8674, "loss": 0.4369434714317322, "lr": 3.5229312936941013e-07, "epoch": 1.4763661517177773, "percentage": 73.82, "elapsed_time": "4:22:32", "remaining_time": "1:33:07"} +{"current_steps": 6404, "total_steps": 8674, "loss": 0.4197359085083008, "lr": 3.5200269746306224e-07, "epoch": 1.4765967258473598, "percentage": 73.83, "elapsed_time": "4:22:34", "remaining_time": "1:33:04"} +{"current_steps": 6405, "total_steps": 8674, "loss": 0.495933473110199, "lr": 3.5171235974968996e-07, "epoch": 1.4768272999769425, "percentage": 73.84, "elapsed_time": "4:22:37", "remaining_time": "1:33:02"} +{"current_steps": 6406, "total_steps": 8674, "loss": 0.4177231192588806, "lr": 3.51422116271496e-07, "epoch": 1.4770578741065252, "percentage": 73.85, "elapsed_time": "4:22:39", "remaining_time": "1:32:59"} +{"current_steps": 6407, "total_steps": 8674, "loss": 0.5366500020027161, "lr": 3.511319670706705e-07, "epoch": 1.477288448236108, "percentage": 73.86, "elapsed_time": "4:22:42", "remaining_time": "1:32:57"} +{"current_steps": 6408, "total_steps": 8674, "loss": 0.3900446891784668, "lr": 3.508419121893897e-07, "epoch": 1.4775190223656907, "percentage": 73.88, "elapsed_time": "4:22:44", "remaining_time": "1:32:54"} +{"current_steps": 6409, "total_steps": 8674, "loss": 0.40877431631088257, "lr": 3.5055195166981646e-07, "epoch": 1.4777495964952732, "percentage": 73.89, "elapsed_time": "4:22:46", "remaining_time": "1:32:52"} +{"current_steps": 6410, "total_steps": 8674, "loss": 0.4381163716316223, "lr": 3.502620855540985e-07, "epoch": 1.4779801706248559, "percentage": 73.9, "elapsed_time": "4:22:49", "remaining_time": "1:32:49"} +{"current_steps": 6411, "total_steps": 8674, "loss": 0.3449817895889282, "lr": 3.4997231388437167e-07, "epoch": 1.4782107447544386, "percentage": 73.91, "elapsed_time": "4:22:52", "remaining_time": "1:32:47"} +{"current_steps": 6412, "total_steps": 8674, "loss": 0.4879523515701294, "lr": 3.4968263670275653e-07, "epoch": 1.478441318884021, "percentage": 73.92, "elapsed_time": "4:22:54", "remaining_time": "1:32:44"} +{"current_steps": 6413, "total_steps": 8674, "loss": 0.3781365156173706, "lr": 3.493930540513613e-07, "epoch": 1.4786718930136038, "percentage": 73.93, "elapsed_time": "4:22:56", "remaining_time": "1:32:42"} +{"current_steps": 6414, "total_steps": 8674, "loss": 0.4505656361579895, "lr": 3.49103565972279e-07, "epoch": 1.4789024671431865, "percentage": 73.95, "elapsed_time": "4:22:59", "remaining_time": "1:32:39"} +{"current_steps": 6415, "total_steps": 8674, "loss": 0.4285612106323242, "lr": 3.4881417250759006e-07, "epoch": 1.4791330412727692, "percentage": 73.96, "elapsed_time": "4:23:01", "remaining_time": "1:32:37"} +{"current_steps": 6416, "total_steps": 8674, "loss": 0.5285177826881409, "lr": 3.48524873699361e-07, "epoch": 1.479363615402352, "percentage": 73.97, "elapsed_time": "4:23:04", "remaining_time": "1:32:35"} +{"current_steps": 6417, "total_steps": 8674, "loss": 0.4504782259464264, "lr": 3.482356695896437e-07, "epoch": 1.4795941895319344, "percentage": 73.98, "elapsed_time": "4:23:06", "remaining_time": "1:32:32"} +{"current_steps": 6418, "total_steps": 8674, "loss": 0.45295125246047974, "lr": 3.4794656022047765e-07, "epoch": 1.4798247636615172, "percentage": 73.99, "elapsed_time": "4:23:09", "remaining_time": "1:32:30"} +{"current_steps": 6419, "total_steps": 8674, "loss": 0.35889285802841187, "lr": 3.47657545633887e-07, "epoch": 1.4800553377910999, "percentage": 74.0, "elapsed_time": "4:23:11", "remaining_time": "1:32:27"} +{"current_steps": 6420, "total_steps": 8674, "loss": 0.49129703640937805, "lr": 3.4736862587188384e-07, "epoch": 1.4802859119206824, "percentage": 74.01, "elapsed_time": "4:23:14", "remaining_time": "1:32:25"} +{"current_steps": 6421, "total_steps": 8674, "loss": 0.5018036365509033, "lr": 3.4707980097646474e-07, "epoch": 1.480516486050265, "percentage": 74.03, "elapsed_time": "4:23:16", "remaining_time": "1:32:22"} +{"current_steps": 6422, "total_steps": 8674, "loss": 0.48743095993995667, "lr": 3.46791070989614e-07, "epoch": 1.4807470601798478, "percentage": 74.04, "elapsed_time": "4:23:19", "remaining_time": "1:32:20"} +{"current_steps": 6423, "total_steps": 8674, "loss": 0.4876127243041992, "lr": 3.46502435953301e-07, "epoch": 1.4809776343094305, "percentage": 74.05, "elapsed_time": "4:23:21", "remaining_time": "1:32:17"} +{"current_steps": 6424, "total_steps": 8674, "loss": 0.517420768737793, "lr": 3.462138959094818e-07, "epoch": 1.4812082084390132, "percentage": 74.06, "elapsed_time": "4:23:24", "remaining_time": "1:32:15"} +{"current_steps": 6425, "total_steps": 8674, "loss": 0.49587076902389526, "lr": 3.4592545090009907e-07, "epoch": 1.4814387825685957, "percentage": 74.07, "elapsed_time": "4:23:26", "remaining_time": "1:32:12"} +{"current_steps": 6426, "total_steps": 8674, "loss": 0.43007123470306396, "lr": 3.4563710096708063e-07, "epoch": 1.4816693566981785, "percentage": 74.08, "elapsed_time": "4:23:29", "remaining_time": "1:32:10"} +{"current_steps": 6427, "total_steps": 8674, "loss": 0.41231095790863037, "lr": 3.4534884615234163e-07, "epoch": 1.4818999308277612, "percentage": 74.09, "elapsed_time": "4:23:31", "remaining_time": "1:32:08"} +{"current_steps": 6428, "total_steps": 8674, "loss": 0.4454977512359619, "lr": 3.450606864977822e-07, "epoch": 1.4821305049573437, "percentage": 74.11, "elapsed_time": "4:23:34", "remaining_time": "1:32:05"} +{"current_steps": 6429, "total_steps": 8674, "loss": 0.4432292878627777, "lr": 3.447726220452899e-07, "epoch": 1.4823610790869264, "percentage": 74.12, "elapsed_time": "4:23:36", "remaining_time": "1:32:03"} +{"current_steps": 6430, "total_steps": 8674, "loss": 0.47547852993011475, "lr": 3.444846528367372e-07, "epoch": 1.482591653216509, "percentage": 74.13, "elapsed_time": "4:23:39", "remaining_time": "1:32:00"} +{"current_steps": 6431, "total_steps": 8674, "loss": 0.45712774991989136, "lr": 3.441967789139837e-07, "epoch": 1.4828222273460918, "percentage": 74.14, "elapsed_time": "4:23:41", "remaining_time": "1:31:58"} +{"current_steps": 6432, "total_steps": 8674, "loss": 0.4485551714897156, "lr": 3.439090003188748e-07, "epoch": 1.4830528014756745, "percentage": 74.15, "elapsed_time": "4:23:43", "remaining_time": "1:31:55"} +{"current_steps": 6433, "total_steps": 8674, "loss": 0.5157139301300049, "lr": 3.4362131709324225e-07, "epoch": 1.483283375605257, "percentage": 74.16, "elapsed_time": "4:23:46", "remaining_time": "1:31:53"} +{"current_steps": 6434, "total_steps": 8674, "loss": 0.3786337375640869, "lr": 3.4333372927890346e-07, "epoch": 1.4835139497348397, "percentage": 74.18, "elapsed_time": "4:23:48", "remaining_time": "1:31:50"} +{"current_steps": 6435, "total_steps": 8674, "loss": 0.444644033908844, "lr": 3.430462369176619e-07, "epoch": 1.4837445238644225, "percentage": 74.19, "elapsed_time": "4:23:51", "remaining_time": "1:31:48"} +{"current_steps": 6436, "total_steps": 8674, "loss": 0.450777530670166, "lr": 3.427588400513082e-07, "epoch": 1.483975097994005, "percentage": 74.2, "elapsed_time": "4:23:53", "remaining_time": "1:31:45"} +{"current_steps": 6437, "total_steps": 8674, "loss": 0.4547499418258667, "lr": 3.424715387216176e-07, "epoch": 1.4842056721235877, "percentage": 74.21, "elapsed_time": "4:23:56", "remaining_time": "1:31:43"} +{"current_steps": 6438, "total_steps": 8674, "loss": 0.41394394636154175, "lr": 3.4218433297035274e-07, "epoch": 1.4844362462531704, "percentage": 74.22, "elapsed_time": "4:23:58", "remaining_time": "1:31:40"} +{"current_steps": 6439, "total_steps": 8674, "loss": 0.46392822265625, "lr": 3.4189722283926194e-07, "epoch": 1.484666820382753, "percentage": 74.23, "elapsed_time": "4:24:01", "remaining_time": "1:31:38"} +{"current_steps": 6440, "total_steps": 8674, "loss": 0.443311870098114, "lr": 3.416102083700797e-07, "epoch": 1.4848973945123358, "percentage": 74.24, "elapsed_time": "4:24:03", "remaining_time": "1:31:36"} +{"current_steps": 6441, "total_steps": 8674, "loss": 0.49744826555252075, "lr": 3.4132328960452594e-07, "epoch": 1.4851279686419183, "percentage": 74.26, "elapsed_time": "4:24:06", "remaining_time": "1:31:33"} +{"current_steps": 6442, "total_steps": 8674, "loss": 0.3906005620956421, "lr": 3.4103646658430787e-07, "epoch": 1.485358542771501, "percentage": 74.27, "elapsed_time": "4:24:08", "remaining_time": "1:31:31"} +{"current_steps": 6443, "total_steps": 8674, "loss": 0.4236280918121338, "lr": 3.407497393511175e-07, "epoch": 1.4855891169010838, "percentage": 74.28, "elapsed_time": "4:24:10", "remaining_time": "1:31:28"} +{"current_steps": 6444, "total_steps": 8674, "loss": 0.5457645654678345, "lr": 3.4046310794663403e-07, "epoch": 1.4858196910306662, "percentage": 74.29, "elapsed_time": "4:24:13", "remaining_time": "1:31:26"} +{"current_steps": 6445, "total_steps": 8674, "loss": 0.541573703289032, "lr": 3.4017657241252217e-07, "epoch": 1.486050265160249, "percentage": 74.3, "elapsed_time": "4:24:16", "remaining_time": "1:31:23"} +{"current_steps": 6446, "total_steps": 8674, "loss": 0.496945858001709, "lr": 3.398901327904322e-07, "epoch": 1.4862808392898317, "percentage": 74.31, "elapsed_time": "4:24:18", "remaining_time": "1:31:21"} +{"current_steps": 6447, "total_steps": 8674, "loss": 0.46119701862335205, "lr": 3.3960378912200136e-07, "epoch": 1.4865114134194144, "percentage": 74.33, "elapsed_time": "4:24:20", "remaining_time": "1:31:18"} +{"current_steps": 6448, "total_steps": 8674, "loss": 0.5169441103935242, "lr": 3.3931754144885284e-07, "epoch": 1.4867419875489971, "percentage": 74.34, "elapsed_time": "4:24:23", "remaining_time": "1:31:16"} +{"current_steps": 6449, "total_steps": 8674, "loss": 0.525173544883728, "lr": 3.390313898125957e-07, "epoch": 1.4869725616785796, "percentage": 74.35, "elapsed_time": "4:24:25", "remaining_time": "1:31:13"} +{"current_steps": 6450, "total_steps": 8674, "loss": 0.46877139806747437, "lr": 3.3874533425482457e-07, "epoch": 1.4872031358081623, "percentage": 74.36, "elapsed_time": "4:24:28", "remaining_time": "1:31:11"} +{"current_steps": 6451, "total_steps": 8674, "loss": 0.49436479806900024, "lr": 3.3845937481712096e-07, "epoch": 1.487433709937745, "percentage": 74.37, "elapsed_time": "4:24:30", "remaining_time": "1:31:08"} +{"current_steps": 6452, "total_steps": 8674, "loss": 0.40879231691360474, "lr": 3.3817351154105145e-07, "epoch": 1.4876642840673275, "percentage": 74.38, "elapsed_time": "4:24:33", "remaining_time": "1:31:06"} +{"current_steps": 6453, "total_steps": 8674, "loss": 0.5060825347900391, "lr": 3.378877444681697e-07, "epoch": 1.4878948581969103, "percentage": 74.39, "elapsed_time": "4:24:35", "remaining_time": "1:31:04"} +{"current_steps": 6454, "total_steps": 8674, "loss": 0.4875546097755432, "lr": 3.3760207364001434e-07, "epoch": 1.488125432326493, "percentage": 74.41, "elapsed_time": "4:24:38", "remaining_time": "1:31:01"} +{"current_steps": 6455, "total_steps": 8674, "loss": 0.3791916072368622, "lr": 3.373164990981108e-07, "epoch": 1.4883560064560757, "percentage": 74.42, "elapsed_time": "4:24:40", "remaining_time": "1:30:59"} +{"current_steps": 6456, "total_steps": 8674, "loss": 0.46757322549819946, "lr": 3.370310208839704e-07, "epoch": 1.4885865805856584, "percentage": 74.43, "elapsed_time": "4:24:43", "remaining_time": "1:30:56"} +{"current_steps": 6457, "total_steps": 8674, "loss": 0.4334050416946411, "lr": 3.3674563903908994e-07, "epoch": 1.488817154715241, "percentage": 74.44, "elapsed_time": "4:24:45", "remaining_time": "1:30:54"} +{"current_steps": 6458, "total_steps": 8674, "loss": 0.4408720135688782, "lr": 3.3646035360495294e-07, "epoch": 1.4890477288448236, "percentage": 74.45, "elapsed_time": "4:24:48", "remaining_time": "1:30:51"} +{"current_steps": 6459, "total_steps": 8674, "loss": 0.46556228399276733, "lr": 3.3617516462302795e-07, "epoch": 1.4892783029744063, "percentage": 74.46, "elapsed_time": "4:24:50", "remaining_time": "1:30:49"} +{"current_steps": 6460, "total_steps": 8674, "loss": 0.5212184190750122, "lr": 3.3589007213477096e-07, "epoch": 1.4895088771039888, "percentage": 74.48, "elapsed_time": "4:24:53", "remaining_time": "1:30:46"} +{"current_steps": 6461, "total_steps": 8674, "loss": 0.5340084433555603, "lr": 3.35605076181622e-07, "epoch": 1.4897394512335715, "percentage": 74.49, "elapsed_time": "4:24:55", "remaining_time": "1:30:44"} +{"current_steps": 6462, "total_steps": 8674, "loss": 0.38049495220184326, "lr": 3.353201768050088e-07, "epoch": 1.4899700253631543, "percentage": 74.5, "elapsed_time": "4:24:58", "remaining_time": "1:30:42"} +{"current_steps": 6463, "total_steps": 8674, "loss": 0.5480734705924988, "lr": 3.350353740463442e-07, "epoch": 1.490200599492737, "percentage": 74.51, "elapsed_time": "4:25:00", "remaining_time": "1:30:39"} +{"current_steps": 6464, "total_steps": 8674, "loss": 0.4179231524467468, "lr": 3.3475066794702756e-07, "epoch": 1.4904311736223197, "percentage": 74.52, "elapsed_time": "4:25:03", "remaining_time": "1:30:37"} +{"current_steps": 6465, "total_steps": 8674, "loss": 0.5380987524986267, "lr": 3.3446605854844335e-07, "epoch": 1.4906617477519022, "percentage": 74.53, "elapsed_time": "4:25:05", "remaining_time": "1:30:34"} +{"current_steps": 6466, "total_steps": 8674, "loss": 0.41146454215049744, "lr": 3.3418154589196226e-07, "epoch": 1.490892321881485, "percentage": 74.54, "elapsed_time": "4:25:08", "remaining_time": "1:30:32"} +{"current_steps": 6467, "total_steps": 8674, "loss": 0.4586387276649475, "lr": 3.3389713001894157e-07, "epoch": 1.4911228960110676, "percentage": 74.56, "elapsed_time": "4:25:10", "remaining_time": "1:30:29"} +{"current_steps": 6468, "total_steps": 8674, "loss": 0.4023931920528412, "lr": 3.336128109707236e-07, "epoch": 1.4913534701406501, "percentage": 74.57, "elapsed_time": "4:25:13", "remaining_time": "1:30:27"} +{"current_steps": 6469, "total_steps": 8674, "loss": 0.5373448133468628, "lr": 3.333285887886373e-07, "epoch": 1.4915840442702328, "percentage": 74.58, "elapsed_time": "4:25:15", "remaining_time": "1:30:24"} +{"current_steps": 6470, "total_steps": 8674, "loss": 0.4413643479347229, "lr": 3.330444635139971e-07, "epoch": 1.4918146183998156, "percentage": 74.59, "elapsed_time": "4:25:18", "remaining_time": "1:30:22"} +{"current_steps": 6471, "total_steps": 8674, "loss": 0.399494469165802, "lr": 3.3276043518810327e-07, "epoch": 1.4920451925293983, "percentage": 74.6, "elapsed_time": "4:25:20", "remaining_time": "1:30:20"} +{"current_steps": 6472, "total_steps": 8674, "loss": 0.4353644847869873, "lr": 3.3247650385224256e-07, "epoch": 1.492275766658981, "percentage": 74.61, "elapsed_time": "4:25:23", "remaining_time": "1:30:17"} +{"current_steps": 6473, "total_steps": 8674, "loss": 0.5231607556343079, "lr": 3.3219266954768743e-07, "epoch": 1.4925063407885635, "percentage": 74.63, "elapsed_time": "4:25:25", "remaining_time": "1:30:15"} +{"current_steps": 6474, "total_steps": 8674, "loss": 0.414408802986145, "lr": 3.3190893231569596e-07, "epoch": 1.4927369149181462, "percentage": 74.64, "elapsed_time": "4:25:27", "remaining_time": "1:30:12"} +{"current_steps": 6475, "total_steps": 8674, "loss": 0.3921009302139282, "lr": 3.3162529219751155e-07, "epoch": 1.492967489047729, "percentage": 74.65, "elapsed_time": "4:25:30", "remaining_time": "1:30:10"} +{"current_steps": 6476, "total_steps": 8674, "loss": 0.4317164421081543, "lr": 3.3134174923436506e-07, "epoch": 1.4931980631773114, "percentage": 74.66, "elapsed_time": "4:25:32", "remaining_time": "1:30:07"} +{"current_steps": 6477, "total_steps": 8674, "loss": 0.46302181482315063, "lr": 3.3105830346747175e-07, "epoch": 1.4934286373068941, "percentage": 74.67, "elapsed_time": "4:25:35", "remaining_time": "1:30:05"} +{"current_steps": 6478, "total_steps": 8674, "loss": 0.45704615116119385, "lr": 3.307749549380335e-07, "epoch": 1.4936592114364768, "percentage": 74.68, "elapsed_time": "4:25:37", "remaining_time": "1:30:02"} +{"current_steps": 6479, "total_steps": 8674, "loss": 0.45455485582351685, "lr": 3.304917036872379e-07, "epoch": 1.4938897855660596, "percentage": 74.69, "elapsed_time": "4:25:39", "remaining_time": "1:30:00"} +{"current_steps": 6480, "total_steps": 8674, "loss": 0.41939157247543335, "lr": 3.302085497562588e-07, "epoch": 1.4941203596956423, "percentage": 74.71, "elapsed_time": "4:25:42", "remaining_time": "1:29:57"} +{"current_steps": 6481, "total_steps": 8674, "loss": 0.4109286367893219, "lr": 3.2992549318625487e-07, "epoch": 1.4943509338252248, "percentage": 74.72, "elapsed_time": "4:25:44", "remaining_time": "1:29:55"} +{"current_steps": 6482, "total_steps": 8674, "loss": 0.44710463285446167, "lr": 3.2964253401837173e-07, "epoch": 1.4945815079548075, "percentage": 74.73, "elapsed_time": "4:25:47", "remaining_time": "1:29:52"} +{"current_steps": 6483, "total_steps": 8674, "loss": 0.4330691695213318, "lr": 3.2935967229373986e-07, "epoch": 1.4948120820843902, "percentage": 74.74, "elapsed_time": "4:25:49", "remaining_time": "1:29:50"} +{"current_steps": 6484, "total_steps": 8674, "loss": 0.41174834966659546, "lr": 3.2907690805347667e-07, "epoch": 1.4950426562139727, "percentage": 74.75, "elapsed_time": "4:25:51", "remaining_time": "1:29:47"} +{"current_steps": 6485, "total_steps": 8674, "loss": 0.4368870258331299, "lr": 3.2879424133868406e-07, "epoch": 1.4952732303435554, "percentage": 74.76, "elapsed_time": "4:25:54", "remaining_time": "1:29:45"} +{"current_steps": 6486, "total_steps": 8674, "loss": 0.5155518651008606, "lr": 3.2851167219045107e-07, "epoch": 1.4955038044731381, "percentage": 74.78, "elapsed_time": "4:25:56", "remaining_time": "1:29:42"} +{"current_steps": 6487, "total_steps": 8674, "loss": 0.47015419602394104, "lr": 3.282292006498522e-07, "epoch": 1.4957343786027208, "percentage": 74.79, "elapsed_time": "4:25:59", "remaining_time": "1:29:40"} +{"current_steps": 6488, "total_steps": 8674, "loss": 0.41059884428977966, "lr": 3.2794682675794684e-07, "epoch": 1.4959649527323036, "percentage": 74.8, "elapsed_time": "4:26:01", "remaining_time": "1:29:37"} +{"current_steps": 6489, "total_steps": 8674, "loss": 0.4864136278629303, "lr": 3.2766455055578157e-07, "epoch": 1.496195526861886, "percentage": 74.81, "elapsed_time": "4:26:04", "remaining_time": "1:29:35"} +{"current_steps": 6490, "total_steps": 8674, "loss": 0.3599165976047516, "lr": 3.2738237208438744e-07, "epoch": 1.4964261009914688, "percentage": 74.82, "elapsed_time": "4:26:06", "remaining_time": "1:29:33"} +{"current_steps": 6491, "total_steps": 8674, "loss": 0.4734029769897461, "lr": 3.2710029138478267e-07, "epoch": 1.4966566751210515, "percentage": 74.83, "elapsed_time": "4:26:09", "remaining_time": "1:29:30"} +{"current_steps": 6492, "total_steps": 8674, "loss": 0.46739861369132996, "lr": 3.268183084979699e-07, "epoch": 1.496887249250634, "percentage": 74.84, "elapsed_time": "4:26:11", "remaining_time": "1:29:28"} +{"current_steps": 6493, "total_steps": 8674, "loss": 0.46794670820236206, "lr": 3.265364234649387e-07, "epoch": 1.4971178233802167, "percentage": 74.86, "elapsed_time": "4:26:13", "remaining_time": "1:29:25"} +{"current_steps": 6494, "total_steps": 8674, "loss": 0.463203489780426, "lr": 3.262546363266635e-07, "epoch": 1.4973483975097994, "percentage": 74.87, "elapsed_time": "4:26:16", "remaining_time": "1:29:23"} +{"current_steps": 6495, "total_steps": 8674, "loss": 0.4495059847831726, "lr": 3.2597294712410504e-07, "epoch": 1.4975789716393821, "percentage": 74.88, "elapsed_time": "4:26:19", "remaining_time": "1:29:20"} +{"current_steps": 6496, "total_steps": 8674, "loss": 0.43549245595932007, "lr": 3.256913558982101e-07, "epoch": 1.4978095457689649, "percentage": 74.89, "elapsed_time": "4:26:21", "remaining_time": "1:29:18"} +{"current_steps": 6497, "total_steps": 8674, "loss": 0.40582704544067383, "lr": 3.254098626899102e-07, "epoch": 1.4980401198985474, "percentage": 74.9, "elapsed_time": "4:26:23", "remaining_time": "1:29:15"} +{"current_steps": 6498, "total_steps": 8674, "loss": 0.3720378279685974, "lr": 3.251284675401238e-07, "epoch": 1.49827069402813, "percentage": 74.91, "elapsed_time": "4:26:26", "remaining_time": "1:29:13"} +{"current_steps": 6499, "total_steps": 8674, "loss": 0.42694520950317383, "lr": 3.24847170489754e-07, "epoch": 1.4985012681577128, "percentage": 74.93, "elapsed_time": "4:26:28", "remaining_time": "1:29:10"} +{"current_steps": 6500, "total_steps": 8674, "loss": 0.442158043384552, "lr": 3.2456597157969066e-07, "epoch": 1.4987318422872953, "percentage": 74.94, "elapsed_time": "4:26:31", "remaining_time": "1:29:08"} +{"current_steps": 6501, "total_steps": 8674, "loss": 0.44245558977127075, "lr": 3.2428487085080846e-07, "epoch": 1.498962416416878, "percentage": 74.95, "elapsed_time": "4:26:34", "remaining_time": "1:29:06"} +{"current_steps": 6502, "total_steps": 8674, "loss": 0.4127236008644104, "lr": 3.240038683439684e-07, "epoch": 1.4991929905464607, "percentage": 74.96, "elapsed_time": "4:26:37", "remaining_time": "1:29:03"} +{"current_steps": 6503, "total_steps": 8674, "loss": 0.4262787103652954, "lr": 3.237229641000171e-07, "epoch": 1.4994235646760434, "percentage": 74.97, "elapsed_time": "4:26:40", "remaining_time": "1:29:01"} +{"current_steps": 6504, "total_steps": 8674, "loss": 0.4181264042854309, "lr": 3.2344215815978714e-07, "epoch": 1.4996541388056261, "percentage": 74.98, "elapsed_time": "4:26:42", "remaining_time": "1:28:59"} +{"current_steps": 6505, "total_steps": 8674, "loss": 0.4416937530040741, "lr": 3.2316145056409616e-07, "epoch": 1.4998847129352086, "percentage": 74.99, "elapsed_time": "4:26:45", "remaining_time": "1:28:56"} +{"current_steps": 6506, "total_steps": 8674, "loss": 0.4901489019393921, "lr": 3.228808413537476e-07, "epoch": 1.5001152870647914, "percentage": 75.01, "elapsed_time": "4:26:47", "remaining_time": "1:28:54"} +{"current_steps": 6507, "total_steps": 8674, "loss": 0.37932026386260986, "lr": 3.2260033056953153e-07, "epoch": 1.5003458611943739, "percentage": 75.02, "elapsed_time": "4:26:49", "remaining_time": "1:28:51"} +{"current_steps": 6508, "total_steps": 8674, "loss": 0.4680899381637573, "lr": 3.223199182522223e-07, "epoch": 1.5005764353239566, "percentage": 75.03, "elapsed_time": "4:26:52", "remaining_time": "1:28:49"} +{"current_steps": 6509, "total_steps": 8674, "loss": 0.508334219455719, "lr": 3.2203960444258105e-07, "epoch": 1.5008070094535393, "percentage": 75.04, "elapsed_time": "4:26:55", "remaining_time": "1:28:46"} +{"current_steps": 6510, "total_steps": 8674, "loss": 0.3386784791946411, "lr": 3.2175938918135415e-07, "epoch": 1.501037583583122, "percentage": 75.05, "elapsed_time": "4:26:57", "remaining_time": "1:28:44"} +{"current_steps": 6511, "total_steps": 8674, "loss": 0.4315892457962036, "lr": 3.214792725092741e-07, "epoch": 1.5012681577127047, "percentage": 75.06, "elapsed_time": "4:27:00", "remaining_time": "1:28:41"} +{"current_steps": 6512, "total_steps": 8674, "loss": 0.3709627389907837, "lr": 3.211992544670582e-07, "epoch": 1.5014987318422874, "percentage": 75.07, "elapsed_time": "4:27:02", "remaining_time": "1:28:39"} +{"current_steps": 6513, "total_steps": 8674, "loss": 0.5260987877845764, "lr": 3.2091933509541023e-07, "epoch": 1.50172930597187, "percentage": 75.09, "elapsed_time": "4:27:04", "remaining_time": "1:28:37"} +{"current_steps": 6514, "total_steps": 8674, "loss": 0.5379073619842529, "lr": 3.20639514435019e-07, "epoch": 1.5019598801014526, "percentage": 75.1, "elapsed_time": "4:27:07", "remaining_time": "1:28:34"} +{"current_steps": 6515, "total_steps": 8674, "loss": 0.47530391812324524, "lr": 3.2035979252655976e-07, "epoch": 1.5021904542310351, "percentage": 75.11, "elapsed_time": "4:27:09", "remaining_time": "1:28:32"} +{"current_steps": 6516, "total_steps": 8674, "loss": 0.459227979183197, "lr": 3.200801694106926e-07, "epoch": 1.5024210283606179, "percentage": 75.12, "elapsed_time": "4:27:12", "remaining_time": "1:28:29"} +{"current_steps": 6517, "total_steps": 8674, "loss": 0.4867238998413086, "lr": 3.19800645128063e-07, "epoch": 1.5026516024902006, "percentage": 75.13, "elapsed_time": "4:27:14", "remaining_time": "1:28:27"} +{"current_steps": 6518, "total_steps": 8674, "loss": 0.38478928804397583, "lr": 3.195212197193039e-07, "epoch": 1.5028821766197833, "percentage": 75.14, "elapsed_time": "4:27:17", "remaining_time": "1:28:24"} +{"current_steps": 6519, "total_steps": 8674, "loss": 0.3938423991203308, "lr": 3.192418932250316e-07, "epoch": 1.503112750749366, "percentage": 75.16, "elapsed_time": "4:27:19", "remaining_time": "1:28:22"} +{"current_steps": 6520, "total_steps": 8674, "loss": 0.457303911447525, "lr": 3.1896266568584975e-07, "epoch": 1.5033433248789487, "percentage": 75.17, "elapsed_time": "4:27:22", "remaining_time": "1:28:19"} +{"current_steps": 6521, "total_steps": 8674, "loss": 0.5007269382476807, "lr": 3.1868353714234607e-07, "epoch": 1.5035738990085312, "percentage": 75.18, "elapsed_time": "4:27:24", "remaining_time": "1:28:17"} +{"current_steps": 6522, "total_steps": 8674, "loss": 0.3878381848335266, "lr": 3.1840450763509576e-07, "epoch": 1.503804473138114, "percentage": 75.19, "elapsed_time": "4:27:27", "remaining_time": "1:28:14"} +{"current_steps": 6523, "total_steps": 8674, "loss": 0.488269567489624, "lr": 3.181255772046575e-07, "epoch": 1.5040350472676964, "percentage": 75.2, "elapsed_time": "4:27:29", "remaining_time": "1:28:12"} +{"current_steps": 6524, "total_steps": 8674, "loss": 0.41664889454841614, "lr": 3.1784674589157767e-07, "epoch": 1.5042656213972792, "percentage": 75.21, "elapsed_time": "4:27:32", "remaining_time": "1:28:09"} +{"current_steps": 6525, "total_steps": 8674, "loss": 0.4862533509731293, "lr": 3.175680137363863e-07, "epoch": 1.5044961955268619, "percentage": 75.22, "elapsed_time": "4:27:34", "remaining_time": "1:28:07"} +{"current_steps": 6526, "total_steps": 8674, "loss": 0.4629037380218506, "lr": 3.172893807796004e-07, "epoch": 1.5047267696564446, "percentage": 75.24, "elapsed_time": "4:27:36", "remaining_time": "1:28:05"} +{"current_steps": 6527, "total_steps": 8674, "loss": 0.46300196647644043, "lr": 3.1701084706172245e-07, "epoch": 1.5049573437860273, "percentage": 75.25, "elapsed_time": "4:27:39", "remaining_time": "1:28:02"} +{"current_steps": 6528, "total_steps": 8674, "loss": 0.40698888897895813, "lr": 3.1673241262323934e-07, "epoch": 1.50518791791561, "percentage": 75.26, "elapsed_time": "4:27:41", "remaining_time": "1:28:00"} +{"current_steps": 6529, "total_steps": 8674, "loss": 0.4344380497932434, "lr": 3.1645407750462514e-07, "epoch": 1.5054184920451925, "percentage": 75.27, "elapsed_time": "4:27:44", "remaining_time": "1:27:57"} +{"current_steps": 6530, "total_steps": 8674, "loss": 0.49757128953933716, "lr": 3.1617584174633806e-07, "epoch": 1.5056490661747752, "percentage": 75.28, "elapsed_time": "4:27:46", "remaining_time": "1:27:55"} +{"current_steps": 6531, "total_steps": 8674, "loss": 0.4506916105747223, "lr": 3.15897705388823e-07, "epoch": 1.5058796403043577, "percentage": 75.29, "elapsed_time": "4:27:49", "remaining_time": "1:27:52"} +{"current_steps": 6532, "total_steps": 8674, "loss": 0.3941146731376648, "lr": 3.156196684725093e-07, "epoch": 1.5061102144339404, "percentage": 75.31, "elapsed_time": "4:27:51", "remaining_time": "1:27:50"} +{"current_steps": 6533, "total_steps": 8674, "loss": 0.5400820374488831, "lr": 3.153417310378127e-07, "epoch": 1.5063407885635232, "percentage": 75.32, "elapsed_time": "4:27:54", "remaining_time": "1:27:47"} +{"current_steps": 6534, "total_steps": 8674, "loss": 0.4418470859527588, "lr": 3.1506389312513435e-07, "epoch": 1.5065713626931059, "percentage": 75.33, "elapsed_time": "4:27:56", "remaining_time": "1:27:45"} +{"current_steps": 6535, "total_steps": 8674, "loss": 0.3897334933280945, "lr": 3.1478615477486113e-07, "epoch": 1.5068019368226886, "percentage": 75.34, "elapsed_time": "4:27:59", "remaining_time": "1:27:43"} +{"current_steps": 6536, "total_steps": 8674, "loss": 0.4923437833786011, "lr": 3.145085160273647e-07, "epoch": 1.5070325109522713, "percentage": 75.35, "elapsed_time": "4:28:01", "remaining_time": "1:27:40"} +{"current_steps": 6537, "total_steps": 8674, "loss": 0.41996920108795166, "lr": 3.142309769230025e-07, "epoch": 1.5072630850818538, "percentage": 75.36, "elapsed_time": "4:28:04", "remaining_time": "1:27:38"} +{"current_steps": 6538, "total_steps": 8674, "loss": 0.38584667444229126, "lr": 3.1395353750211806e-07, "epoch": 1.5074936592114365, "percentage": 75.37, "elapsed_time": "4:28:06", "remaining_time": "1:27:35"} +{"current_steps": 6539, "total_steps": 8674, "loss": 0.5093455910682678, "lr": 3.136761978050395e-07, "epoch": 1.507724233341019, "percentage": 75.39, "elapsed_time": "4:28:09", "remaining_time": "1:27:33"} +{"current_steps": 6540, "total_steps": 8674, "loss": 0.5592935681343079, "lr": 3.1339895787208126e-07, "epoch": 1.5079548074706017, "percentage": 75.4, "elapsed_time": "4:28:11", "remaining_time": "1:27:30"} +{"current_steps": 6541, "total_steps": 8674, "loss": 0.38311779499053955, "lr": 3.1312181774354306e-07, "epoch": 1.5081853816001844, "percentage": 75.41, "elapsed_time": "4:28:13", "remaining_time": "1:27:28"} +{"current_steps": 6542, "total_steps": 8674, "loss": 0.4422299265861511, "lr": 3.1284477745971025e-07, "epoch": 1.5084159557297672, "percentage": 75.42, "elapsed_time": "4:28:16", "remaining_time": "1:27:25"} +{"current_steps": 6543, "total_steps": 8674, "loss": 0.5097527503967285, "lr": 3.125678370608528e-07, "epoch": 1.5086465298593499, "percentage": 75.43, "elapsed_time": "4:28:18", "remaining_time": "1:27:23"} +{"current_steps": 6544, "total_steps": 8674, "loss": 0.42586642503738403, "lr": 3.1229099658722747e-07, "epoch": 1.5088771039889326, "percentage": 75.44, "elapsed_time": "4:28:21", "remaining_time": "1:27:20"} +{"current_steps": 6545, "total_steps": 8674, "loss": 0.5006861686706543, "lr": 3.120142560790755e-07, "epoch": 1.509107678118515, "percentage": 75.46, "elapsed_time": "4:28:24", "remaining_time": "1:27:18"} +{"current_steps": 6546, "total_steps": 8674, "loss": 0.4361686706542969, "lr": 3.117376155766237e-07, "epoch": 1.5093382522480978, "percentage": 75.47, "elapsed_time": "4:28:26", "remaining_time": "1:27:15"} +{"current_steps": 6547, "total_steps": 8674, "loss": 0.45466339588165283, "lr": 3.11461075120085e-07, "epoch": 1.5095688263776803, "percentage": 75.48, "elapsed_time": "4:28:28", "remaining_time": "1:27:13"} +{"current_steps": 6548, "total_steps": 8674, "loss": 0.39591068029403687, "lr": 3.1118463474965697e-07, "epoch": 1.509799400507263, "percentage": 75.49, "elapsed_time": "4:28:31", "remaining_time": "1:27:11"} +{"current_steps": 6549, "total_steps": 8674, "loss": 0.4672427475452423, "lr": 3.1090829450552316e-07, "epoch": 1.5100299746368457, "percentage": 75.5, "elapsed_time": "4:28:33", "remaining_time": "1:27:08"} +{"current_steps": 6550, "total_steps": 8674, "loss": 0.4785880148410797, "lr": 3.1063205442785234e-07, "epoch": 1.5102605487664285, "percentage": 75.51, "elapsed_time": "4:28:36", "remaining_time": "1:27:06"} +{"current_steps": 6551, "total_steps": 8674, "loss": 0.441936731338501, "lr": 3.103559145567994e-07, "epoch": 1.5104911228960112, "percentage": 75.52, "elapsed_time": "4:28:38", "remaining_time": "1:27:03"} +{"current_steps": 6552, "total_steps": 8674, "loss": 0.49719512462615967, "lr": 3.1007987493250334e-07, "epoch": 1.510721697025594, "percentage": 75.54, "elapsed_time": "4:28:41", "remaining_time": "1:27:01"} +{"current_steps": 6553, "total_steps": 8674, "loss": 0.40702491998672485, "lr": 3.098039355950899e-07, "epoch": 1.5109522711551764, "percentage": 75.55, "elapsed_time": "4:28:43", "remaining_time": "1:26:58"} +{"current_steps": 6554, "total_steps": 8674, "loss": 0.44754648208618164, "lr": 3.0952809658466896e-07, "epoch": 1.511182845284759, "percentage": 75.56, "elapsed_time": "4:28:46", "remaining_time": "1:26:56"} +{"current_steps": 6555, "total_steps": 8674, "loss": 0.5370102524757385, "lr": 3.0925235794133717e-07, "epoch": 1.5114134194143416, "percentage": 75.57, "elapsed_time": "4:28:48", "remaining_time": "1:26:53"} +{"current_steps": 6556, "total_steps": 8674, "loss": 0.46693646907806396, "lr": 3.089767197051755e-07, "epoch": 1.5116439935439243, "percentage": 75.58, "elapsed_time": "4:28:51", "remaining_time": "1:26:51"} +{"current_steps": 6557, "total_steps": 8674, "loss": 0.3887597322463989, "lr": 3.0870118191625084e-07, "epoch": 1.511874567673507, "percentage": 75.59, "elapsed_time": "4:28:53", "remaining_time": "1:26:48"} +{"current_steps": 6558, "total_steps": 8674, "loss": 0.4783397912979126, "lr": 3.0842574461461577e-07, "epoch": 1.5121051418030897, "percentage": 75.61, "elapsed_time": "4:28:55", "remaining_time": "1:26:46"} +{"current_steps": 6559, "total_steps": 8674, "loss": 0.5305588245391846, "lr": 3.081504078403073e-07, "epoch": 1.5123357159326725, "percentage": 75.62, "elapsed_time": "4:28:58", "remaining_time": "1:26:43"} +{"current_steps": 6560, "total_steps": 8674, "loss": 0.45315784215927124, "lr": 3.078751716333492e-07, "epoch": 1.5125662900622552, "percentage": 75.63, "elapsed_time": "4:29:00", "remaining_time": "1:26:41"} +{"current_steps": 6561, "total_steps": 8674, "loss": 0.4805132746696472, "lr": 3.0760003603374897e-07, "epoch": 1.5127968641918377, "percentage": 75.64, "elapsed_time": "4:29:03", "remaining_time": "1:26:39"} +{"current_steps": 6562, "total_steps": 8674, "loss": 0.4956076145172119, "lr": 3.0732500108150104e-07, "epoch": 1.5130274383214202, "percentage": 75.65, "elapsed_time": "4:29:05", "remaining_time": "1:26:36"} +{"current_steps": 6563, "total_steps": 8674, "loss": 0.3629196882247925, "lr": 3.07050066816584e-07, "epoch": 1.5132580124510029, "percentage": 75.66, "elapsed_time": "4:29:08", "remaining_time": "1:26:34"} +{"current_steps": 6564, "total_steps": 8674, "loss": 0.43240371346473694, "lr": 3.067752332789626e-07, "epoch": 1.5134885865805856, "percentage": 75.67, "elapsed_time": "4:29:10", "remaining_time": "1:26:31"} +{"current_steps": 6565, "total_steps": 8674, "loss": 0.4933302402496338, "lr": 3.065005005085869e-07, "epoch": 1.5137191607101683, "percentage": 75.69, "elapsed_time": "4:29:13", "remaining_time": "1:26:29"} +{"current_steps": 6566, "total_steps": 8674, "loss": 0.47905197739601135, "lr": 3.0622586854539155e-07, "epoch": 1.513949734839751, "percentage": 75.7, "elapsed_time": "4:29:15", "remaining_time": "1:26:26"} +{"current_steps": 6567, "total_steps": 8674, "loss": 0.4245232343673706, "lr": 3.059513374292978e-07, "epoch": 1.5141803089693338, "percentage": 75.71, "elapsed_time": "4:29:17", "remaining_time": "1:26:24"} +{"current_steps": 6568, "total_steps": 8674, "loss": 0.40526312589645386, "lr": 3.0567690720021077e-07, "epoch": 1.5144108830989162, "percentage": 75.72, "elapsed_time": "4:29:20", "remaining_time": "1:26:21"} +{"current_steps": 6569, "total_steps": 8674, "loss": 0.5808804631233215, "lr": 3.0540257789802227e-07, "epoch": 1.514641457228499, "percentage": 75.73, "elapsed_time": "4:29:22", "remaining_time": "1:26:19"} +{"current_steps": 6570, "total_steps": 8674, "loss": 0.44997286796569824, "lr": 3.0512834956260836e-07, "epoch": 1.5148720313580815, "percentage": 75.74, "elapsed_time": "4:29:25", "remaining_time": "1:26:16"} +{"current_steps": 6571, "total_steps": 8674, "loss": 0.44051581621170044, "lr": 3.048542222338315e-07, "epoch": 1.5151026054876642, "percentage": 75.76, "elapsed_time": "4:29:27", "remaining_time": "1:26:14"} +{"current_steps": 6572, "total_steps": 8674, "loss": 0.5113236308097839, "lr": 3.045801959515382e-07, "epoch": 1.515333179617247, "percentage": 75.77, "elapsed_time": "4:29:29", "remaining_time": "1:26:11"} +{"current_steps": 6573, "total_steps": 8674, "loss": 0.554703950881958, "lr": 3.0430627075556125e-07, "epoch": 1.5155637537468296, "percentage": 75.78, "elapsed_time": "4:29:32", "remaining_time": "1:26:09"} +{"current_steps": 6574, "total_steps": 8674, "loss": 0.3819808065891266, "lr": 3.0403244668571847e-07, "epoch": 1.5157943278764123, "percentage": 75.79, "elapsed_time": "4:29:34", "remaining_time": "1:26:06"} +{"current_steps": 6575, "total_steps": 8674, "loss": 0.47970864176750183, "lr": 3.037587237818133e-07, "epoch": 1.516024902005995, "percentage": 75.8, "elapsed_time": "4:29:37", "remaining_time": "1:26:04"} +{"current_steps": 6576, "total_steps": 8674, "loss": 0.4296469986438751, "lr": 3.0348510208363386e-07, "epoch": 1.5162554761355775, "percentage": 75.81, "elapsed_time": "4:29:39", "remaining_time": "1:26:01"} +{"current_steps": 6577, "total_steps": 8674, "loss": 0.4372752904891968, "lr": 3.032115816309535e-07, "epoch": 1.5164860502651603, "percentage": 75.82, "elapsed_time": "4:29:42", "remaining_time": "1:25:59"} +{"current_steps": 6578, "total_steps": 8674, "loss": 0.4711950719356537, "lr": 3.029381624635318e-07, "epoch": 1.5167166243947428, "percentage": 75.84, "elapsed_time": "4:29:44", "remaining_time": "1:25:57"} +{"current_steps": 6579, "total_steps": 8674, "loss": 0.4448170065879822, "lr": 3.026648446211124e-07, "epoch": 1.5169471985243255, "percentage": 75.85, "elapsed_time": "4:29:47", "remaining_time": "1:25:54"} +{"current_steps": 6580, "total_steps": 8674, "loss": 0.4527873992919922, "lr": 3.02391628143425e-07, "epoch": 1.5171777726539082, "percentage": 75.86, "elapsed_time": "4:29:49", "remaining_time": "1:25:52"} +{"current_steps": 6581, "total_steps": 8674, "loss": 0.453765332698822, "lr": 3.0211851307018463e-07, "epoch": 1.517408346783491, "percentage": 75.87, "elapsed_time": "4:29:52", "remaining_time": "1:25:49"} +{"current_steps": 6582, "total_steps": 8674, "loss": 0.46818265318870544, "lr": 3.018454994410915e-07, "epoch": 1.5176389209130736, "percentage": 75.88, "elapsed_time": "4:29:54", "remaining_time": "1:25:47"} +{"current_steps": 6583, "total_steps": 8674, "loss": 0.38551369309425354, "lr": 3.0157258729583026e-07, "epoch": 1.5178694950426563, "percentage": 75.89, "elapsed_time": "4:29:56", "remaining_time": "1:25:44"} +{"current_steps": 6584, "total_steps": 8674, "loss": 0.3651260733604431, "lr": 3.012997766740721e-07, "epoch": 1.5181000691722388, "percentage": 75.91, "elapsed_time": "4:29:59", "remaining_time": "1:25:42"} +{"current_steps": 6585, "total_steps": 8674, "loss": 0.36894726753234863, "lr": 3.010270676154726e-07, "epoch": 1.5183306433018215, "percentage": 75.92, "elapsed_time": "4:30:01", "remaining_time": "1:25:39"} +{"current_steps": 6586, "total_steps": 8674, "loss": 0.42595791816711426, "lr": 3.007544601596722e-07, "epoch": 1.518561217431404, "percentage": 75.93, "elapsed_time": "4:30:04", "remaining_time": "1:25:37"} +{"current_steps": 6587, "total_steps": 8674, "loss": 0.4916795492172241, "lr": 3.004819543462979e-07, "epoch": 1.5187917915609868, "percentage": 75.94, "elapsed_time": "4:30:06", "remaining_time": "1:25:34"} +{"current_steps": 6588, "total_steps": 8674, "loss": 0.5098932385444641, "lr": 3.0020955021496073e-07, "epoch": 1.5190223656905695, "percentage": 75.95, "elapsed_time": "4:30:08", "remaining_time": "1:25:32"} +{"current_steps": 6589, "total_steps": 8674, "loss": 0.6336305737495422, "lr": 2.9993724780525796e-07, "epoch": 1.5192529398201522, "percentage": 75.96, "elapsed_time": "4:30:11", "remaining_time": "1:25:29"} +{"current_steps": 6590, "total_steps": 8674, "loss": 0.4911893606185913, "lr": 2.996650471567709e-07, "epoch": 1.519483513949735, "percentage": 75.97, "elapsed_time": "4:30:13", "remaining_time": "1:25:27"} +{"current_steps": 6591, "total_steps": 8674, "loss": 0.4388008117675781, "lr": 2.9939294830906727e-07, "epoch": 1.5197140880793176, "percentage": 75.99, "elapsed_time": "4:30:16", "remaining_time": "1:25:24"} +{"current_steps": 6592, "total_steps": 8674, "loss": 0.392263799905777, "lr": 2.991209513016986e-07, "epoch": 1.5199446622089001, "percentage": 76.0, "elapsed_time": "4:30:18", "remaining_time": "1:25:22"} +{"current_steps": 6593, "total_steps": 8674, "loss": 0.36495402455329895, "lr": 2.988490561742032e-07, "epoch": 1.5201752363384828, "percentage": 76.01, "elapsed_time": "4:30:20", "remaining_time": "1:25:19"} +{"current_steps": 6594, "total_steps": 8674, "loss": 0.5280855298042297, "lr": 2.985772629661032e-07, "epoch": 1.5204058104680653, "percentage": 76.02, "elapsed_time": "4:30:23", "remaining_time": "1:25:17"} +{"current_steps": 6595, "total_steps": 8674, "loss": 0.43953752517700195, "lr": 2.9830557171690693e-07, "epoch": 1.520636384597648, "percentage": 76.03, "elapsed_time": "4:30:25", "remaining_time": "1:25:15"} +{"current_steps": 6596, "total_steps": 8674, "loss": 0.41361862421035767, "lr": 2.980339824661071e-07, "epoch": 1.5208669587272308, "percentage": 76.04, "elapsed_time": "4:30:28", "remaining_time": "1:25:12"} +{"current_steps": 6597, "total_steps": 8674, "loss": 0.39955854415893555, "lr": 2.977624952531821e-07, "epoch": 1.5210975328568135, "percentage": 76.05, "elapsed_time": "4:30:30", "remaining_time": "1:25:10"} +{"current_steps": 6598, "total_steps": 8674, "loss": 0.505165696144104, "lr": 2.9749111011759565e-07, "epoch": 1.5213281069863962, "percentage": 76.07, "elapsed_time": "4:30:33", "remaining_time": "1:25:07"} +{"current_steps": 6599, "total_steps": 8674, "loss": 0.4388153851032257, "lr": 2.9721982709879566e-07, "epoch": 1.521558681115979, "percentage": 76.08, "elapsed_time": "4:30:36", "remaining_time": "1:25:05"} +{"current_steps": 6600, "total_steps": 8674, "loss": 0.4479100704193115, "lr": 2.969486462362167e-07, "epoch": 1.5217892552455614, "percentage": 76.09, "elapsed_time": "4:30:38", "remaining_time": "1:25:02"} +{"current_steps": 6601, "total_steps": 8674, "loss": 0.4005380868911743, "lr": 2.9667756756927686e-07, "epoch": 1.5220198293751441, "percentage": 76.1, "elapsed_time": "4:30:42", "remaining_time": "1:25:00"} +{"current_steps": 6602, "total_steps": 8674, "loss": 0.43774881958961487, "lr": 2.9640659113738087e-07, "epoch": 1.5222504035047266, "percentage": 76.11, "elapsed_time": "4:30:44", "remaining_time": "1:24:58"} +{"current_steps": 6603, "total_steps": 8674, "loss": 0.4449707865715027, "lr": 2.9613571697991725e-07, "epoch": 1.5224809776343093, "percentage": 76.12, "elapsed_time": "4:30:47", "remaining_time": "1:24:55"} +{"current_steps": 6604, "total_steps": 8674, "loss": 0.454499751329422, "lr": 2.958649451362606e-07, "epoch": 1.522711551763892, "percentage": 76.14, "elapsed_time": "4:30:49", "remaining_time": "1:24:53"} +{"current_steps": 6605, "total_steps": 8674, "loss": 0.35601305961608887, "lr": 2.955942756457707e-07, "epoch": 1.5229421258934748, "percentage": 76.15, "elapsed_time": "4:30:51", "remaining_time": "1:24:50"} +{"current_steps": 6606, "total_steps": 8674, "loss": 0.5252523422241211, "lr": 2.9532370854779143e-07, "epoch": 1.5231727000230575, "percentage": 76.16, "elapsed_time": "4:30:54", "remaining_time": "1:24:48"} +{"current_steps": 6607, "total_steps": 8674, "loss": 0.4311884939670563, "lr": 2.950532438816531e-07, "epoch": 1.5234032741526402, "percentage": 76.17, "elapsed_time": "4:30:56", "remaining_time": "1:24:45"} +{"current_steps": 6608, "total_steps": 8674, "loss": 0.43956485390663147, "lr": 2.9478288168667e-07, "epoch": 1.5236338482822227, "percentage": 76.18, "elapsed_time": "4:30:59", "remaining_time": "1:24:43"} +{"current_steps": 6609, "total_steps": 8674, "loss": 0.400115430355072, "lr": 2.9451262200214235e-07, "epoch": 1.5238644224118054, "percentage": 76.19, "elapsed_time": "4:31:01", "remaining_time": "1:24:41"} +{"current_steps": 6610, "total_steps": 8674, "loss": 0.41738802194595337, "lr": 2.942424648673548e-07, "epoch": 1.524094996541388, "percentage": 76.2, "elapsed_time": "4:31:04", "remaining_time": "1:24:38"} +{"current_steps": 6611, "total_steps": 8674, "loss": 0.412765771150589, "lr": 2.939724103215776e-07, "epoch": 1.5243255706709706, "percentage": 76.22, "elapsed_time": "4:31:06", "remaining_time": "1:24:36"} +{"current_steps": 6612, "total_steps": 8674, "loss": 0.44869422912597656, "lr": 2.937024584040659e-07, "epoch": 1.5245561448005533, "percentage": 76.23, "elapsed_time": "4:31:09", "remaining_time": "1:24:33"} +{"current_steps": 6613, "total_steps": 8674, "loss": 0.39191997051239014, "lr": 2.934326091540603e-07, "epoch": 1.524786718930136, "percentage": 76.24, "elapsed_time": "4:31:11", "remaining_time": "1:24:31"} +{"current_steps": 6614, "total_steps": 8674, "loss": 0.36575692892074585, "lr": 2.9316286261078547e-07, "epoch": 1.5250172930597188, "percentage": 76.25, "elapsed_time": "4:31:14", "remaining_time": "1:24:28"} +{"current_steps": 6615, "total_steps": 8674, "loss": 0.49928778409957886, "lr": 2.9289321881345254e-07, "epoch": 1.5252478671893015, "percentage": 76.26, "elapsed_time": "4:31:16", "remaining_time": "1:24:26"} +{"current_steps": 6616, "total_steps": 8674, "loss": 0.49619296193122864, "lr": 2.926236778012565e-07, "epoch": 1.525478441318884, "percentage": 76.27, "elapsed_time": "4:31:18", "remaining_time": "1:24:23"} +{"current_steps": 6617, "total_steps": 8674, "loss": 0.4614447355270386, "lr": 2.923542396133777e-07, "epoch": 1.5257090154484667, "percentage": 76.29, "elapsed_time": "4:31:21", "remaining_time": "1:24:21"} +{"current_steps": 6618, "total_steps": 8674, "loss": 0.43820804357528687, "lr": 2.9208490428898213e-07, "epoch": 1.5259395895780492, "percentage": 76.3, "elapsed_time": "4:31:23", "remaining_time": "1:24:18"} +{"current_steps": 6619, "total_steps": 8674, "loss": 0.46856528520584106, "lr": 2.9181567186722e-07, "epoch": 1.526170163707632, "percentage": 76.31, "elapsed_time": "4:31:26", "remaining_time": "1:24:16"} +{"current_steps": 6620, "total_steps": 8674, "loss": 0.45428818464279175, "lr": 2.915465423872272e-07, "epoch": 1.5264007378372146, "percentage": 76.32, "elapsed_time": "4:31:28", "remaining_time": "1:24:13"} +{"current_steps": 6621, "total_steps": 8674, "loss": 0.44715386629104614, "lr": 2.912775158881243e-07, "epoch": 1.5266313119667974, "percentage": 76.33, "elapsed_time": "4:31:31", "remaining_time": "1:24:11"} +{"current_steps": 6622, "total_steps": 8674, "loss": 0.537441611289978, "lr": 2.9100859240901764e-07, "epoch": 1.52686188609638, "percentage": 76.34, "elapsed_time": "4:31:33", "remaining_time": "1:24:08"} +{"current_steps": 6623, "total_steps": 8674, "loss": 0.4430112838745117, "lr": 2.9073977198899714e-07, "epoch": 1.5270924602259628, "percentage": 76.35, "elapsed_time": "4:31:35", "remaining_time": "1:24:06"} +{"current_steps": 6624, "total_steps": 8674, "loss": 0.41713255643844604, "lr": 2.904710546671392e-07, "epoch": 1.5273230343555453, "percentage": 76.37, "elapsed_time": "4:31:38", "remaining_time": "1:24:04"} +{"current_steps": 6625, "total_steps": 8674, "loss": 0.4313931465148926, "lr": 2.9020244048250396e-07, "epoch": 1.527553608485128, "percentage": 76.38, "elapsed_time": "4:31:40", "remaining_time": "1:24:01"} +{"current_steps": 6626, "total_steps": 8674, "loss": 0.5038034319877625, "lr": 2.899339294741379e-07, "epoch": 1.5277841826147105, "percentage": 76.39, "elapsed_time": "4:31:43", "remaining_time": "1:23:59"} +{"current_steps": 6627, "total_steps": 8674, "loss": 0.45088762044906616, "lr": 2.8966552168107127e-07, "epoch": 1.5280147567442932, "percentage": 76.4, "elapsed_time": "4:31:45", "remaining_time": "1:23:56"} +{"current_steps": 6628, "total_steps": 8674, "loss": 0.40857064723968506, "lr": 2.8939721714232e-07, "epoch": 1.528245330873876, "percentage": 76.41, "elapsed_time": "4:31:48", "remaining_time": "1:23:54"} +{"current_steps": 6629, "total_steps": 8674, "loss": 0.43766242265701294, "lr": 2.891290158968853e-07, "epoch": 1.5284759050034586, "percentage": 76.42, "elapsed_time": "4:31:50", "remaining_time": "1:23:51"} +{"current_steps": 6630, "total_steps": 8674, "loss": 0.45986247062683105, "lr": 2.888609179837523e-07, "epoch": 1.5287064791330414, "percentage": 76.44, "elapsed_time": "4:31:53", "remaining_time": "1:23:49"} +{"current_steps": 6631, "total_steps": 8674, "loss": 0.4681728482246399, "lr": 2.8859292344189236e-07, "epoch": 1.528937053262624, "percentage": 76.45, "elapsed_time": "4:31:55", "remaining_time": "1:23:46"} +{"current_steps": 6632, "total_steps": 8674, "loss": 0.36730295419692993, "lr": 2.883250323102605e-07, "epoch": 1.5291676273922066, "percentage": 76.46, "elapsed_time": "4:31:57", "remaining_time": "1:23:44"} +{"current_steps": 6633, "total_steps": 8674, "loss": 0.43494418263435364, "lr": 2.880572446277982e-07, "epoch": 1.5293982015217893, "percentage": 76.47, "elapsed_time": "4:32:00", "remaining_time": "1:23:41"} +{"current_steps": 6634, "total_steps": 8674, "loss": 0.49145790934562683, "lr": 2.877895604334305e-07, "epoch": 1.5296287756513718, "percentage": 76.48, "elapsed_time": "4:32:02", "remaining_time": "1:23:39"} +{"current_steps": 6635, "total_steps": 8674, "loss": 0.4166264832019806, "lr": 2.875219797660681e-07, "epoch": 1.5298593497809545, "percentage": 76.49, "elapsed_time": "4:32:05", "remaining_time": "1:23:36"} +{"current_steps": 6636, "total_steps": 8674, "loss": 0.4336514472961426, "lr": 2.8725450266460704e-07, "epoch": 1.5300899239105372, "percentage": 76.5, "elapsed_time": "4:32:07", "remaining_time": "1:23:34"} +{"current_steps": 6637, "total_steps": 8674, "loss": 0.44186240434646606, "lr": 2.869871291679271e-07, "epoch": 1.53032049804012, "percentage": 76.52, "elapsed_time": "4:32:10", "remaining_time": "1:23:32"} +{"current_steps": 6638, "total_steps": 8674, "loss": 0.40619733929634094, "lr": 2.867198593148945e-07, "epoch": 1.5305510721697027, "percentage": 76.53, "elapsed_time": "4:32:12", "remaining_time": "1:23:29"} +{"current_steps": 6639, "total_steps": 8674, "loss": 0.45552101731300354, "lr": 2.864526931443588e-07, "epoch": 1.5307816462992854, "percentage": 76.54, "elapsed_time": "4:32:15", "remaining_time": "1:23:27"} +{"current_steps": 6640, "total_steps": 8674, "loss": 0.45153865218162537, "lr": 2.861856306951562e-07, "epoch": 1.5310122204288679, "percentage": 76.55, "elapsed_time": "4:32:17", "remaining_time": "1:23:24"} +{"current_steps": 6641, "total_steps": 8674, "loss": 0.5146148204803467, "lr": 2.859186720061061e-07, "epoch": 1.5312427945584506, "percentage": 76.56, "elapsed_time": "4:32:20", "remaining_time": "1:23:22"} +{"current_steps": 6642, "total_steps": 8674, "loss": 0.4566080868244171, "lr": 2.856518171160143e-07, "epoch": 1.531473368688033, "percentage": 76.57, "elapsed_time": "4:32:22", "remaining_time": "1:23:19"} +{"current_steps": 6643, "total_steps": 8674, "loss": 0.4390585124492645, "lr": 2.853850660636703e-07, "epoch": 1.5317039428176158, "percentage": 76.59, "elapsed_time": "4:32:24", "remaining_time": "1:23:17"} +{"current_steps": 6644, "total_steps": 8674, "loss": 0.5508195757865906, "lr": 2.851184188878493e-07, "epoch": 1.5319345169471985, "percentage": 76.6, "elapsed_time": "4:32:27", "remaining_time": "1:23:14"} +{"current_steps": 6645, "total_steps": 8674, "loss": 0.47640183568000793, "lr": 2.8485187562731126e-07, "epoch": 1.5321650910767812, "percentage": 76.61, "elapsed_time": "4:32:29", "remaining_time": "1:23:12"} +{"current_steps": 6646, "total_steps": 8674, "loss": 0.4511566758155823, "lr": 2.8458543632080123e-07, "epoch": 1.532395665206364, "percentage": 76.62, "elapsed_time": "4:32:32", "remaining_time": "1:23:09"} +{"current_steps": 6647, "total_steps": 8674, "loss": 0.414367139339447, "lr": 2.843191010070486e-07, "epoch": 1.5326262393359467, "percentage": 76.63, "elapsed_time": "4:32:34", "remaining_time": "1:23:07"} +{"current_steps": 6648, "total_steps": 8674, "loss": 0.4611589312553406, "lr": 2.840528697247674e-07, "epoch": 1.5328568134655292, "percentage": 76.64, "elapsed_time": "4:32:37", "remaining_time": "1:23:04"} +{"current_steps": 6649, "total_steps": 8674, "loss": 0.4675883948802948, "lr": 2.8378674251265787e-07, "epoch": 1.5330873875951119, "percentage": 76.65, "elapsed_time": "4:32:39", "remaining_time": "1:23:02"} +{"current_steps": 6650, "total_steps": 8674, "loss": 0.49039095640182495, "lr": 2.835207194094036e-07, "epoch": 1.5333179617246944, "percentage": 76.67, "elapsed_time": "4:32:41", "remaining_time": "1:22:59"} +{"current_steps": 6651, "total_steps": 8674, "loss": 0.45641693472862244, "lr": 2.832548004536741e-07, "epoch": 1.533548535854277, "percentage": 76.68, "elapsed_time": "4:32:44", "remaining_time": "1:22:57"} +{"current_steps": 6652, "total_steps": 8674, "loss": 0.4858587682247162, "lr": 2.829889856841233e-07, "epoch": 1.5337791099838598, "percentage": 76.69, "elapsed_time": "4:32:46", "remaining_time": "1:22:54"} +{"current_steps": 6653, "total_steps": 8674, "loss": 0.3640017807483673, "lr": 2.8272327513939055e-07, "epoch": 1.5340096841134425, "percentage": 76.7, "elapsed_time": "4:32:49", "remaining_time": "1:22:52"} +{"current_steps": 6654, "total_steps": 8674, "loss": 0.42915207147598267, "lr": 2.8245766885809865e-07, "epoch": 1.5342402582430252, "percentage": 76.71, "elapsed_time": "4:32:51", "remaining_time": "1:22:50"} +{"current_steps": 6655, "total_steps": 8674, "loss": 0.5041407346725464, "lr": 2.8219216687885707e-07, "epoch": 1.534470832372608, "percentage": 76.72, "elapsed_time": "4:32:54", "remaining_time": "1:22:47"} +{"current_steps": 6656, "total_steps": 8674, "loss": 0.4748334288597107, "lr": 2.8192676924025885e-07, "epoch": 1.5347014065021904, "percentage": 76.74, "elapsed_time": "4:32:57", "remaining_time": "1:22:45"} +{"current_steps": 6657, "total_steps": 8674, "loss": 0.4745975136756897, "lr": 2.8166147598088173e-07, "epoch": 1.5349319806317732, "percentage": 76.75, "elapsed_time": "4:32:59", "remaining_time": "1:22:42"} +{"current_steps": 6658, "total_steps": 8674, "loss": 0.49246084690093994, "lr": 2.813962871392893e-07, "epoch": 1.5351625547613557, "percentage": 76.76, "elapsed_time": "4:33:02", "remaining_time": "1:22:40"} +{"current_steps": 6659, "total_steps": 8674, "loss": 0.47876033186912537, "lr": 2.8113120275402936e-07, "epoch": 1.5353931288909384, "percentage": 76.77, "elapsed_time": "4:33:04", "remaining_time": "1:22:37"} +{"current_steps": 6660, "total_steps": 8674, "loss": 0.5244987607002258, "lr": 2.808662228636348e-07, "epoch": 1.535623703020521, "percentage": 76.78, "elapsed_time": "4:33:06", "remaining_time": "1:22:35"} +{"current_steps": 6661, "total_steps": 8674, "loss": 0.44661569595336914, "lr": 2.8060134750662277e-07, "epoch": 1.5358542771501038, "percentage": 76.79, "elapsed_time": "4:33:09", "remaining_time": "1:22:33"} +{"current_steps": 6662, "total_steps": 8674, "loss": 0.4508060812950134, "lr": 2.8033657672149615e-07, "epoch": 1.5360848512796865, "percentage": 76.8, "elapsed_time": "4:33:11", "remaining_time": "1:22:30"} +{"current_steps": 6663, "total_steps": 8674, "loss": 0.4657326340675354, "lr": 2.8007191054674117e-07, "epoch": 1.5363154254092692, "percentage": 76.82, "elapsed_time": "4:33:14", "remaining_time": "1:22:28"} +{"current_steps": 6664, "total_steps": 8674, "loss": 0.495077520608902, "lr": 2.798073490208307e-07, "epoch": 1.5365459995388517, "percentage": 76.83, "elapsed_time": "4:33:16", "remaining_time": "1:22:25"} +{"current_steps": 6665, "total_steps": 8674, "loss": 0.40721309185028076, "lr": 2.795428921822206e-07, "epoch": 1.5367765736684345, "percentage": 76.84, "elapsed_time": "4:33:19", "remaining_time": "1:22:23"} +{"current_steps": 6666, "total_steps": 8674, "loss": 0.3279367685317993, "lr": 2.7927854006935315e-07, "epoch": 1.537007147798017, "percentage": 76.85, "elapsed_time": "4:33:21", "remaining_time": "1:22:20"} +{"current_steps": 6667, "total_steps": 8674, "loss": 0.4849242866039276, "lr": 2.790142927206538e-07, "epoch": 1.5372377219275997, "percentage": 76.86, "elapsed_time": "4:33:24", "remaining_time": "1:22:18"} +{"current_steps": 6668, "total_steps": 8674, "loss": 0.45151397585868835, "lr": 2.7875015017453394e-07, "epoch": 1.5374682960571824, "percentage": 76.87, "elapsed_time": "4:33:26", "remaining_time": "1:22:15"} +{"current_steps": 6669, "total_steps": 8674, "loss": 0.43480992317199707, "lr": 2.784861124693898e-07, "epoch": 1.537698870186765, "percentage": 76.88, "elapsed_time": "4:33:29", "remaining_time": "1:22:13"} +{"current_steps": 6670, "total_steps": 8674, "loss": 0.48764440417289734, "lr": 2.782221796436012e-07, "epoch": 1.5379294443163478, "percentage": 76.9, "elapsed_time": "4:33:31", "remaining_time": "1:22:10"} +{"current_steps": 6671, "total_steps": 8674, "loss": 0.4164161682128906, "lr": 2.7795835173553407e-07, "epoch": 1.5381600184459305, "percentage": 76.91, "elapsed_time": "4:33:33", "remaining_time": "1:22:08"} +{"current_steps": 6672, "total_steps": 8674, "loss": 0.49934858083724976, "lr": 2.7769462878353777e-07, "epoch": 1.538390592575513, "percentage": 76.92, "elapsed_time": "4:33:36", "remaining_time": "1:22:05"} +{"current_steps": 6673, "total_steps": 8674, "loss": 0.4877321124076843, "lr": 2.77431010825948e-07, "epoch": 1.5386211667050955, "percentage": 76.93, "elapsed_time": "4:33:38", "remaining_time": "1:22:03"} +{"current_steps": 6674, "total_steps": 8674, "loss": 0.44518858194351196, "lr": 2.771674979010834e-07, "epoch": 1.5388517408346782, "percentage": 76.94, "elapsed_time": "4:33:41", "remaining_time": "1:22:00"} +{"current_steps": 6675, "total_steps": 8674, "loss": 0.4237474203109741, "lr": 2.769040900472488e-07, "epoch": 1.539082314964261, "percentage": 76.95, "elapsed_time": "4:33:43", "remaining_time": "1:21:58"} +{"current_steps": 6676, "total_steps": 8674, "loss": 0.45270341634750366, "lr": 2.7664078730273335e-07, "epoch": 1.5393128890938437, "percentage": 76.97, "elapsed_time": "4:33:46", "remaining_time": "1:21:56"} +{"current_steps": 6677, "total_steps": 8674, "loss": 0.3866819739341736, "lr": 2.7637758970581004e-07, "epoch": 1.5395434632234264, "percentage": 76.98, "elapsed_time": "4:33:48", "remaining_time": "1:21:53"} +{"current_steps": 6678, "total_steps": 8674, "loss": 0.4384220838546753, "lr": 2.7611449729473825e-07, "epoch": 1.539774037353009, "percentage": 76.99, "elapsed_time": "4:33:51", "remaining_time": "1:21:51"} +{"current_steps": 6679, "total_steps": 8674, "loss": 0.4462182819843292, "lr": 2.758515101077602e-07, "epoch": 1.5400046114825916, "percentage": 77.0, "elapsed_time": "4:33:53", "remaining_time": "1:21:48"} +{"current_steps": 6680, "total_steps": 8674, "loss": 0.3927033245563507, "lr": 2.755886281831046e-07, "epoch": 1.5402351856121743, "percentage": 77.01, "elapsed_time": "4:33:56", "remaining_time": "1:21:46"} +{"current_steps": 6681, "total_steps": 8674, "loss": 0.4678634703159332, "lr": 2.7532585155898314e-07, "epoch": 1.5404657597417568, "percentage": 77.02, "elapsed_time": "4:33:58", "remaining_time": "1:21:43"} +{"current_steps": 6682, "total_steps": 8674, "loss": 0.4165131151676178, "lr": 2.750631802735935e-07, "epoch": 1.5406963338713395, "percentage": 77.03, "elapsed_time": "4:34:00", "remaining_time": "1:21:41"} +{"current_steps": 6683, "total_steps": 8674, "loss": 0.3705793023109436, "lr": 2.748006143651178e-07, "epoch": 1.5409269080009222, "percentage": 77.05, "elapsed_time": "4:34:03", "remaining_time": "1:21:38"} +{"current_steps": 6684, "total_steps": 8674, "loss": 0.5428882837295532, "lr": 2.745381538717226e-07, "epoch": 1.541157482130505, "percentage": 77.06, "elapsed_time": "4:34:05", "remaining_time": "1:21:36"} +{"current_steps": 6685, "total_steps": 8674, "loss": 0.4116673171520233, "lr": 2.742757988315589e-07, "epoch": 1.5413880562600877, "percentage": 77.07, "elapsed_time": "4:34:08", "remaining_time": "1:21:33"} +{"current_steps": 6686, "total_steps": 8674, "loss": 0.4617515802383423, "lr": 2.740135492827631e-07, "epoch": 1.5416186303896704, "percentage": 77.08, "elapsed_time": "4:34:10", "remaining_time": "1:21:31"} +{"current_steps": 6687, "total_steps": 8674, "loss": 0.5002453923225403, "lr": 2.737514052634555e-07, "epoch": 1.541849204519253, "percentage": 77.09, "elapsed_time": "4:34:13", "remaining_time": "1:21:28"} +{"current_steps": 6688, "total_steps": 8674, "loss": 0.46029362082481384, "lr": 2.734893668117412e-07, "epoch": 1.5420797786488356, "percentage": 77.1, "elapsed_time": "4:34:15", "remaining_time": "1:21:26"} +{"current_steps": 6689, "total_steps": 8674, "loss": 0.40502026677131653, "lr": 2.732274339657107e-07, "epoch": 1.542310352778418, "percentage": 77.12, "elapsed_time": "4:34:17", "remaining_time": "1:21:23"} +{"current_steps": 6690, "total_steps": 8674, "loss": 0.5267831087112427, "lr": 2.7296560676343803e-07, "epoch": 1.5425409269080008, "percentage": 77.13, "elapsed_time": "4:34:20", "remaining_time": "1:21:21"} +{"current_steps": 6691, "total_steps": 8674, "loss": 0.3464335799217224, "lr": 2.727038852429826e-07, "epoch": 1.5427715010375835, "percentage": 77.14, "elapsed_time": "4:34:22", "remaining_time": "1:21:18"} +{"current_steps": 6692, "total_steps": 8674, "loss": 0.36635881662368774, "lr": 2.7244226944238847e-07, "epoch": 1.5430020751671663, "percentage": 77.15, "elapsed_time": "4:34:24", "remaining_time": "1:21:16"} +{"current_steps": 6693, "total_steps": 8674, "loss": 0.4567757844924927, "lr": 2.7218075939968435e-07, "epoch": 1.543232649296749, "percentage": 77.16, "elapsed_time": "4:34:27", "remaining_time": "1:21:13"} +{"current_steps": 6694, "total_steps": 8674, "loss": 0.539220929145813, "lr": 2.719193551528827e-07, "epoch": 1.5434632234263317, "percentage": 77.17, "elapsed_time": "4:34:29", "remaining_time": "1:21:11"} +{"current_steps": 6695, "total_steps": 8674, "loss": 0.48553818464279175, "lr": 2.71658056739982e-07, "epoch": 1.5436937975559142, "percentage": 77.18, "elapsed_time": "4:34:32", "remaining_time": "1:21:09"} +{"current_steps": 6696, "total_steps": 8674, "loss": 0.48564499616622925, "lr": 2.7139686419896424e-07, "epoch": 1.543924371685497, "percentage": 77.2, "elapsed_time": "4:34:34", "remaining_time": "1:21:06"} +{"current_steps": 6697, "total_steps": 8674, "loss": 0.4163014590740204, "lr": 2.7113577756779616e-07, "epoch": 1.5441549458150794, "percentage": 77.21, "elapsed_time": "4:34:37", "remaining_time": "1:21:04"} +{"current_steps": 6698, "total_steps": 8674, "loss": 0.5686431527137756, "lr": 2.708747968844296e-07, "epoch": 1.544385519944662, "percentage": 77.22, "elapsed_time": "4:34:39", "remaining_time": "1:21:01"} +{"current_steps": 6699, "total_steps": 8674, "loss": 0.5365211963653564, "lr": 2.706139221868008e-07, "epoch": 1.5446160940742448, "percentage": 77.23, "elapsed_time": "4:34:41", "remaining_time": "1:20:59"} +{"current_steps": 6700, "total_steps": 8674, "loss": 0.4147397577762604, "lr": 2.7035315351283084e-07, "epoch": 1.5448466682038275, "percentage": 77.24, "elapsed_time": "4:34:44", "remaining_time": "1:20:56"} +{"current_steps": 6701, "total_steps": 8674, "loss": 0.3938590884208679, "lr": 2.7009249090042454e-07, "epoch": 1.5450772423334103, "percentage": 77.25, "elapsed_time": "4:34:48", "remaining_time": "1:20:54"} +{"current_steps": 6702, "total_steps": 8674, "loss": 0.3521370589733124, "lr": 2.698319343874722e-07, "epoch": 1.545307816462993, "percentage": 77.27, "elapsed_time": "4:34:50", "remaining_time": "1:20:52"} +{"current_steps": 6703, "total_steps": 8674, "loss": 0.430014967918396, "lr": 2.69571484011848e-07, "epoch": 1.5455383905925755, "percentage": 77.28, "elapsed_time": "4:34:53", "remaining_time": "1:20:49"} +{"current_steps": 6704, "total_steps": 8674, "loss": 0.4697108864784241, "lr": 2.6931113981141164e-07, "epoch": 1.5457689647221582, "percentage": 77.29, "elapsed_time": "4:34:55", "remaining_time": "1:20:47"} +{"current_steps": 6705, "total_steps": 8674, "loss": 0.46567851305007935, "lr": 2.69050901824006e-07, "epoch": 1.5459995388517407, "percentage": 77.3, "elapsed_time": "4:34:58", "remaining_time": "1:20:44"} +{"current_steps": 6706, "total_steps": 8674, "loss": 0.46061819791793823, "lr": 2.6879077008745986e-07, "epoch": 1.5462301129813234, "percentage": 77.31, "elapsed_time": "4:35:00", "remaining_time": "1:20:42"} +{"current_steps": 6707, "total_steps": 8674, "loss": 0.568658709526062, "lr": 2.6853074463958614e-07, "epoch": 1.5464606871109061, "percentage": 77.32, "elapsed_time": "4:35:02", "remaining_time": "1:20:39"} +{"current_steps": 6708, "total_steps": 8674, "loss": 0.42816412448883057, "lr": 2.682708255181815e-07, "epoch": 1.5466912612404888, "percentage": 77.33, "elapsed_time": "4:35:05", "remaining_time": "1:20:37"} +{"current_steps": 6709, "total_steps": 8674, "loss": 0.42515552043914795, "lr": 2.6801101276102866e-07, "epoch": 1.5469218353700716, "percentage": 77.35, "elapsed_time": "4:35:07", "remaining_time": "1:20:34"} +{"current_steps": 6710, "total_steps": 8674, "loss": 0.46513399481773376, "lr": 2.677513064058932e-07, "epoch": 1.5471524094996543, "percentage": 77.36, "elapsed_time": "4:35:10", "remaining_time": "1:20:32"} +{"current_steps": 6711, "total_steps": 8674, "loss": 0.4194756746292114, "lr": 2.6749170649052675e-07, "epoch": 1.5473829836292368, "percentage": 77.37, "elapsed_time": "4:35:12", "remaining_time": "1:20:30"} +{"current_steps": 6712, "total_steps": 8674, "loss": 0.4456541836261749, "lr": 2.672322130526643e-07, "epoch": 1.5476135577588195, "percentage": 77.38, "elapsed_time": "4:35:15", "remaining_time": "1:20:27"} +{"current_steps": 6713, "total_steps": 8674, "loss": 0.493444561958313, "lr": 2.669728261300264e-07, "epoch": 1.547844131888402, "percentage": 77.39, "elapsed_time": "4:35:17", "remaining_time": "1:20:25"} +{"current_steps": 6714, "total_steps": 8674, "loss": 0.47202616930007935, "lr": 2.6671354576031645e-07, "epoch": 1.5480747060179847, "percentage": 77.4, "elapsed_time": "4:35:20", "remaining_time": "1:20:22"} +{"current_steps": 6715, "total_steps": 8674, "loss": 0.4584811329841614, "lr": 2.66454371981225e-07, "epoch": 1.5483052801475674, "percentage": 77.42, "elapsed_time": "4:35:22", "remaining_time": "1:20:20"} +{"current_steps": 6716, "total_steps": 8674, "loss": 0.4072091579437256, "lr": 2.6619530483042485e-07, "epoch": 1.5485358542771501, "percentage": 77.43, "elapsed_time": "4:35:24", "remaining_time": "1:20:17"} +{"current_steps": 6717, "total_steps": 8674, "loss": 0.49742361903190613, "lr": 2.6593634434557365e-07, "epoch": 1.5487664284067328, "percentage": 77.44, "elapsed_time": "4:35:27", "remaining_time": "1:20:15"} +{"current_steps": 6718, "total_steps": 8674, "loss": 0.49291643500328064, "lr": 2.6567749056431467e-07, "epoch": 1.5489970025363156, "percentage": 77.45, "elapsed_time": "4:35:29", "remaining_time": "1:20:12"} +{"current_steps": 6719, "total_steps": 8674, "loss": 0.5210362076759338, "lr": 2.6541874352427427e-07, "epoch": 1.549227576665898, "percentage": 77.46, "elapsed_time": "4:35:32", "remaining_time": "1:20:10"} +{"current_steps": 6720, "total_steps": 8674, "loss": 0.4489557147026062, "lr": 2.651601032630645e-07, "epoch": 1.5494581507954808, "percentage": 77.47, "elapsed_time": "4:35:34", "remaining_time": "1:20:07"} +{"current_steps": 6721, "total_steps": 8674, "loss": 0.4417908191680908, "lr": 2.649015698182808e-07, "epoch": 1.5496887249250633, "percentage": 77.48, "elapsed_time": "4:35:37", "remaining_time": "1:20:05"} +{"current_steps": 6722, "total_steps": 8674, "loss": 0.45177266001701355, "lr": 2.6464314322750404e-07, "epoch": 1.549919299054646, "percentage": 77.5, "elapsed_time": "4:35:39", "remaining_time": "1:20:02"} +{"current_steps": 6723, "total_steps": 8674, "loss": 0.37720638513565063, "lr": 2.6438482352829896e-07, "epoch": 1.5501498731842287, "percentage": 77.51, "elapsed_time": "4:35:42", "remaining_time": "1:20:00"} +{"current_steps": 6724, "total_steps": 8674, "loss": 0.5108897089958191, "lr": 2.641266107582153e-07, "epoch": 1.5503804473138114, "percentage": 77.52, "elapsed_time": "4:35:44", "remaining_time": "1:19:58"} +{"current_steps": 6725, "total_steps": 8674, "loss": 0.449248731136322, "lr": 2.638685049547863e-07, "epoch": 1.5506110214433941, "percentage": 77.53, "elapsed_time": "4:35:47", "remaining_time": "1:19:55"} +{"current_steps": 6726, "total_steps": 8674, "loss": 0.4692652225494385, "lr": 2.636105061555309e-07, "epoch": 1.5508415955729768, "percentage": 77.54, "elapsed_time": "4:35:49", "remaining_time": "1:19:53"} +{"current_steps": 6727, "total_steps": 8674, "loss": 0.49128347635269165, "lr": 2.6335261439795153e-07, "epoch": 1.5510721697025593, "percentage": 77.55, "elapsed_time": "4:35:51", "remaining_time": "1:19:50"} +{"current_steps": 6728, "total_steps": 8674, "loss": 0.4618053436279297, "lr": 2.630948297195351e-07, "epoch": 1.551302743832142, "percentage": 77.57, "elapsed_time": "4:35:54", "remaining_time": "1:19:48"} +{"current_steps": 6729, "total_steps": 8674, "loss": 0.4342828094959259, "lr": 2.6283715215775336e-07, "epoch": 1.5515333179617246, "percentage": 77.58, "elapsed_time": "4:35:57", "remaining_time": "1:19:45"} +{"current_steps": 6730, "total_steps": 8674, "loss": 0.5214434862136841, "lr": 2.625795817500626e-07, "epoch": 1.5517638920913073, "percentage": 77.59, "elapsed_time": "4:35:59", "remaining_time": "1:19:43"} +{"current_steps": 6731, "total_steps": 8674, "loss": 0.4873029589653015, "lr": 2.623221185339034e-07, "epoch": 1.55199446622089, "percentage": 77.6, "elapsed_time": "4:36:02", "remaining_time": "1:19:40"} +{"current_steps": 6732, "total_steps": 8674, "loss": 0.4510548412799835, "lr": 2.6206476254670007e-07, "epoch": 1.5522250403504727, "percentage": 77.61, "elapsed_time": "4:36:04", "remaining_time": "1:19:38"} +{"current_steps": 6733, "total_steps": 8674, "loss": 0.4832548499107361, "lr": 2.6180751382586265e-07, "epoch": 1.5524556144800554, "percentage": 77.62, "elapsed_time": "4:36:07", "remaining_time": "1:19:36"} +{"current_steps": 6734, "total_steps": 8674, "loss": 0.5438823699951172, "lr": 2.6155037240878406e-07, "epoch": 1.5526861886096381, "percentage": 77.63, "elapsed_time": "4:36:09", "remaining_time": "1:19:33"} +{"current_steps": 6735, "total_steps": 8674, "loss": 0.4967566728591919, "lr": 2.6129333833284315e-07, "epoch": 1.5529167627392206, "percentage": 77.65, "elapsed_time": "4:36:12", "remaining_time": "1:19:31"} +{"current_steps": 6736, "total_steps": 8674, "loss": 0.5187437534332275, "lr": 2.610364116354018e-07, "epoch": 1.5531473368688034, "percentage": 77.66, "elapsed_time": "4:36:14", "remaining_time": "1:19:28"} +{"current_steps": 6737, "total_steps": 8674, "loss": 0.4199862480163574, "lr": 2.607795923538072e-07, "epoch": 1.5533779109983858, "percentage": 77.67, "elapsed_time": "4:36:17", "remaining_time": "1:19:26"} +{"current_steps": 6738, "total_steps": 8674, "loss": 0.5009325742721558, "lr": 2.6052288052539084e-07, "epoch": 1.5536084851279686, "percentage": 77.68, "elapsed_time": "4:36:19", "remaining_time": "1:19:23"} +{"current_steps": 6739, "total_steps": 8674, "loss": 0.48698678612709045, "lr": 2.602662761874679e-07, "epoch": 1.5538390592575513, "percentage": 77.69, "elapsed_time": "4:36:21", "remaining_time": "1:19:21"} +{"current_steps": 6740, "total_steps": 8674, "loss": 0.4845883846282959, "lr": 2.6000977937733905e-07, "epoch": 1.554069633387134, "percentage": 77.7, "elapsed_time": "4:36:24", "remaining_time": "1:19:18"} +{"current_steps": 6741, "total_steps": 8674, "loss": 0.512161135673523, "lr": 2.59753390132288e-07, "epoch": 1.5543002075167167, "percentage": 77.72, "elapsed_time": "4:36:26", "remaining_time": "1:19:16"} +{"current_steps": 6742, "total_steps": 8674, "loss": 0.42334964871406555, "lr": 2.5949710848958415e-07, "epoch": 1.5545307816462994, "percentage": 77.73, "elapsed_time": "4:36:29", "remaining_time": "1:19:13"} +{"current_steps": 6743, "total_steps": 8674, "loss": 0.3781980276107788, "lr": 2.592409344864801e-07, "epoch": 1.554761355775882, "percentage": 77.74, "elapsed_time": "4:36:31", "remaining_time": "1:19:11"} +{"current_steps": 6744, "total_steps": 8674, "loss": 0.4989853501319885, "lr": 2.5898486816021394e-07, "epoch": 1.5549919299054646, "percentage": 77.75, "elapsed_time": "4:36:34", "remaining_time": "1:19:08"} +{"current_steps": 6745, "total_steps": 8674, "loss": 0.45715585350990295, "lr": 2.5872890954800676e-07, "epoch": 1.5552225040350471, "percentage": 77.76, "elapsed_time": "4:36:36", "remaining_time": "1:19:06"} +{"current_steps": 6746, "total_steps": 8674, "loss": 0.5025684833526611, "lr": 2.5847305868706515e-07, "epoch": 1.5554530781646299, "percentage": 77.77, "elapsed_time": "4:36:39", "remaining_time": "1:19:04"} +{"current_steps": 6747, "total_steps": 8674, "loss": 0.47298115491867065, "lr": 2.5821731561457994e-07, "epoch": 1.5556836522942126, "percentage": 77.78, "elapsed_time": "4:36:41", "remaining_time": "1:19:01"} +{"current_steps": 6748, "total_steps": 8674, "loss": 0.45412957668304443, "lr": 2.5796168036772524e-07, "epoch": 1.5559142264237953, "percentage": 77.8, "elapsed_time": "4:36:44", "remaining_time": "1:18:59"} +{"current_steps": 6749, "total_steps": 8674, "loss": 0.3958669602870941, "lr": 2.5770615298366107e-07, "epoch": 1.556144800553378, "percentage": 77.81, "elapsed_time": "4:36:46", "remaining_time": "1:18:56"} +{"current_steps": 6750, "total_steps": 8674, "loss": 0.4748396873474121, "lr": 2.574507334995302e-07, "epoch": 1.5563753746829607, "percentage": 77.82, "elapsed_time": "4:36:48", "remaining_time": "1:18:54"} +{"current_steps": 6751, "total_steps": 8674, "loss": 0.4741169810295105, "lr": 2.5719542195246093e-07, "epoch": 1.5566059488125432, "percentage": 77.83, "elapsed_time": "4:36:51", "remaining_time": "1:18:51"} +{"current_steps": 6752, "total_steps": 8674, "loss": 0.4362972378730774, "lr": 2.569402183795648e-07, "epoch": 1.556836522942126, "percentage": 77.84, "elapsed_time": "4:36:53", "remaining_time": "1:18:49"} +{"current_steps": 6753, "total_steps": 8674, "loss": 0.48013412952423096, "lr": 2.5668512281793873e-07, "epoch": 1.5570670970717084, "percentage": 77.85, "elapsed_time": "4:36:56", "remaining_time": "1:18:46"} +{"current_steps": 6754, "total_steps": 8674, "loss": 0.4728567600250244, "lr": 2.564301353046634e-07, "epoch": 1.5572976712012911, "percentage": 77.86, "elapsed_time": "4:36:58", "remaining_time": "1:18:44"} +{"current_steps": 6755, "total_steps": 8674, "loss": 0.4304337501525879, "lr": 2.56175255876804e-07, "epoch": 1.5575282453308739, "percentage": 77.88, "elapsed_time": "4:37:01", "remaining_time": "1:18:41"} +{"current_steps": 6756, "total_steps": 8674, "loss": 0.43467870354652405, "lr": 2.5592048457140926e-07, "epoch": 1.5577588194604566, "percentage": 77.89, "elapsed_time": "4:37:03", "remaining_time": "1:18:39"} +{"current_steps": 6757, "total_steps": 8674, "loss": 0.3912844657897949, "lr": 2.556658214255134e-07, "epoch": 1.5579893935900393, "percentage": 77.9, "elapsed_time": "4:37:06", "remaining_time": "1:18:36"} +{"current_steps": 6758, "total_steps": 8674, "loss": 0.4462862014770508, "lr": 2.5541126647613397e-07, "epoch": 1.558219967719622, "percentage": 77.91, "elapsed_time": "4:37:08", "remaining_time": "1:18:34"} +{"current_steps": 6759, "total_steps": 8674, "loss": 0.43929487466812134, "lr": 2.551568197602729e-07, "epoch": 1.5584505418492045, "percentage": 77.92, "elapsed_time": "4:37:11", "remaining_time": "1:18:32"} +{"current_steps": 6760, "total_steps": 8674, "loss": 0.44473958015441895, "lr": 2.549024813149169e-07, "epoch": 1.5586811159787872, "percentage": 77.93, "elapsed_time": "4:37:13", "remaining_time": "1:18:29"} +{"current_steps": 6761, "total_steps": 8674, "loss": 0.5159727931022644, "lr": 2.546482511770365e-07, "epoch": 1.5589116901083697, "percentage": 77.95, "elapsed_time": "4:37:16", "remaining_time": "1:18:27"} +{"current_steps": 6762, "total_steps": 8674, "loss": 0.3975204825401306, "lr": 2.5439412938358696e-07, "epoch": 1.5591422642379524, "percentage": 77.96, "elapsed_time": "4:37:18", "remaining_time": "1:18:24"} +{"current_steps": 6763, "total_steps": 8674, "loss": 0.5198286175727844, "lr": 2.54140115971507e-07, "epoch": 1.5593728383675352, "percentage": 77.97, "elapsed_time": "4:37:21", "remaining_time": "1:18:22"} +{"current_steps": 6764, "total_steps": 8674, "loss": 0.4815763831138611, "lr": 2.5388621097772046e-07, "epoch": 1.5596034124971179, "percentage": 77.98, "elapsed_time": "4:37:23", "remaining_time": "1:18:19"} +{"current_steps": 6765, "total_steps": 8674, "loss": 0.365215539932251, "lr": 2.5363241443913454e-07, "epoch": 1.5598339866267006, "percentage": 77.99, "elapsed_time": "4:37:25", "remaining_time": "1:18:17"} +{"current_steps": 6766, "total_steps": 8674, "loss": 0.486020028591156, "lr": 2.533787263926417e-07, "epoch": 1.5600645607562833, "percentage": 78.0, "elapsed_time": "4:37:28", "remaining_time": "1:18:14"} +{"current_steps": 6767, "total_steps": 8674, "loss": 0.38536715507507324, "lr": 2.5312514687511766e-07, "epoch": 1.5602951348858658, "percentage": 78.01, "elapsed_time": "4:37:30", "remaining_time": "1:18:12"} +{"current_steps": 6768, "total_steps": 8674, "loss": 0.44713371992111206, "lr": 2.528716759234227e-07, "epoch": 1.5605257090154485, "percentage": 78.03, "elapsed_time": "4:37:33", "remaining_time": "1:18:09"} +{"current_steps": 6769, "total_steps": 8674, "loss": 0.4122806489467621, "lr": 2.5261831357440154e-07, "epoch": 1.560756283145031, "percentage": 78.04, "elapsed_time": "4:37:35", "remaining_time": "1:18:07"} +{"current_steps": 6770, "total_steps": 8674, "loss": 0.40514320135116577, "lr": 2.523650598648829e-07, "epoch": 1.5609868572746137, "percentage": 78.05, "elapsed_time": "4:37:38", "remaining_time": "1:18:04"} +{"current_steps": 6771, "total_steps": 8674, "loss": 0.4273102283477783, "lr": 2.5211191483168027e-07, "epoch": 1.5612174314041964, "percentage": 78.06, "elapsed_time": "4:37:40", "remaining_time": "1:18:02"} +{"current_steps": 6772, "total_steps": 8674, "loss": 0.4774209260940552, "lr": 2.5185887851159005e-07, "epoch": 1.5614480055337792, "percentage": 78.07, "elapsed_time": "4:37:43", "remaining_time": "1:18:00"} +{"current_steps": 6773, "total_steps": 8674, "loss": 0.3928600549697876, "lr": 2.5160595094139436e-07, "epoch": 1.5616785796633619, "percentage": 78.08, "elapsed_time": "4:37:45", "remaining_time": "1:17:57"} +{"current_steps": 6774, "total_steps": 8674, "loss": 0.4414944052696228, "lr": 2.5135313215785816e-07, "epoch": 1.5619091537929446, "percentage": 78.1, "elapsed_time": "4:37:48", "remaining_time": "1:17:55"} +{"current_steps": 6775, "total_steps": 8674, "loss": 0.36133646965026855, "lr": 2.5110042219773176e-07, "epoch": 1.562139727922527, "percentage": 78.11, "elapsed_time": "4:37:50", "remaining_time": "1:17:52"} +{"current_steps": 6776, "total_steps": 8674, "loss": 0.44824904203414917, "lr": 2.508478210977486e-07, "epoch": 1.5623703020521098, "percentage": 78.12, "elapsed_time": "4:37:53", "remaining_time": "1:17:50"} +{"current_steps": 6777, "total_steps": 8674, "loss": 0.3699820637702942, "lr": 2.5059532889462707e-07, "epoch": 1.5626008761816923, "percentage": 78.13, "elapsed_time": "4:37:55", "remaining_time": "1:17:47"} +{"current_steps": 6778, "total_steps": 8674, "loss": 0.4809808135032654, "lr": 2.5034294562506976e-07, "epoch": 1.562831450311275, "percentage": 78.14, "elapsed_time": "4:37:58", "remaining_time": "1:17:45"} +{"current_steps": 6779, "total_steps": 8674, "loss": 0.487751841545105, "lr": 2.5009067132576256e-07, "epoch": 1.5630620244408577, "percentage": 78.15, "elapsed_time": "4:38:00", "remaining_time": "1:17:42"} +{"current_steps": 6780, "total_steps": 8674, "loss": 0.47932374477386475, "lr": 2.4983850603337675e-07, "epoch": 1.5632925985704405, "percentage": 78.16, "elapsed_time": "4:38:03", "remaining_time": "1:17:40"} +{"current_steps": 6781, "total_steps": 8674, "loss": 0.42852234840393066, "lr": 2.495864497845663e-07, "epoch": 1.5635231727000232, "percentage": 78.18, "elapsed_time": "4:38:05", "remaining_time": "1:17:37"} +{"current_steps": 6782, "total_steps": 8674, "loss": 0.4392131567001343, "lr": 2.49334502615971e-07, "epoch": 1.5637537468296059, "percentage": 78.19, "elapsed_time": "4:38:07", "remaining_time": "1:17:35"} +{"current_steps": 6783, "total_steps": 8674, "loss": 0.45050233602523804, "lr": 2.4908266456421323e-07, "epoch": 1.5639843209591884, "percentage": 78.2, "elapsed_time": "4:38:10", "remaining_time": "1:17:33"} +{"current_steps": 6784, "total_steps": 8674, "loss": 0.45328110456466675, "lr": 2.488309356659004e-07, "epoch": 1.5642148950887709, "percentage": 78.21, "elapsed_time": "4:38:12", "remaining_time": "1:17:30"} +{"current_steps": 6785, "total_steps": 8674, "loss": 0.3851325511932373, "lr": 2.4857931595762403e-07, "epoch": 1.5644454692183536, "percentage": 78.22, "elapsed_time": "4:38:15", "remaining_time": "1:17:28"} +{"current_steps": 6786, "total_steps": 8674, "loss": 0.4096960127353668, "lr": 2.4832780547595976e-07, "epoch": 1.5646760433479363, "percentage": 78.23, "elapsed_time": "4:38:18", "remaining_time": "1:17:25"} +{"current_steps": 6787, "total_steps": 8674, "loss": 0.4439825117588043, "lr": 2.480764042574669e-07, "epoch": 1.564906617477519, "percentage": 78.25, "elapsed_time": "4:38:20", "remaining_time": "1:17:23"} +{"current_steps": 6788, "total_steps": 8674, "loss": 0.4259459972381592, "lr": 2.4782511233868895e-07, "epoch": 1.5651371916071017, "percentage": 78.26, "elapsed_time": "4:38:22", "remaining_time": "1:17:20"} +{"current_steps": 6789, "total_steps": 8674, "loss": 0.4701216220855713, "lr": 2.475739297561542e-07, "epoch": 1.5653677657366845, "percentage": 78.27, "elapsed_time": "4:38:25", "remaining_time": "1:17:18"} +{"current_steps": 6790, "total_steps": 8674, "loss": 0.4435737133026123, "lr": 2.473228565463742e-07, "epoch": 1.565598339866267, "percentage": 78.28, "elapsed_time": "4:38:27", "remaining_time": "1:17:15"} +{"current_steps": 6791, "total_steps": 8674, "loss": 0.4476662278175354, "lr": 2.4707189274584537e-07, "epoch": 1.5658289139958497, "percentage": 78.29, "elapsed_time": "4:38:29", "remaining_time": "1:17:13"} +{"current_steps": 6792, "total_steps": 8674, "loss": 0.4399911165237427, "lr": 2.468210383910474e-07, "epoch": 1.5660594881254322, "percentage": 78.3, "elapsed_time": "4:38:32", "remaining_time": "1:17:10"} +{"current_steps": 6793, "total_steps": 8674, "loss": 0.4206039309501648, "lr": 2.465702935184446e-07, "epoch": 1.5662900622550149, "percentage": 78.31, "elapsed_time": "4:38:34", "remaining_time": "1:17:08"} +{"current_steps": 6794, "total_steps": 8674, "loss": 0.44936686754226685, "lr": 2.463196581644855e-07, "epoch": 1.5665206363845976, "percentage": 78.33, "elapsed_time": "4:38:37", "remaining_time": "1:17:05"} +{"current_steps": 6795, "total_steps": 8674, "loss": 0.39926016330718994, "lr": 2.4606913236560277e-07, "epoch": 1.5667512105141803, "percentage": 78.34, "elapsed_time": "4:38:39", "remaining_time": "1:17:03"} +{"current_steps": 6796, "total_steps": 8674, "loss": 0.4338487982749939, "lr": 2.4581871615821216e-07, "epoch": 1.566981784643763, "percentage": 78.35, "elapsed_time": "4:38:42", "remaining_time": "1:17:01"} +{"current_steps": 6797, "total_steps": 8674, "loss": 0.5047430992126465, "lr": 2.455684095787148e-07, "epoch": 1.5672123587733457, "percentage": 78.36, "elapsed_time": "4:38:44", "remaining_time": "1:16:58"} +{"current_steps": 6798, "total_steps": 8674, "loss": 0.46082550287246704, "lr": 2.4531821266349504e-07, "epoch": 1.5674429329029282, "percentage": 78.37, "elapsed_time": "4:38:47", "remaining_time": "1:16:56"} +{"current_steps": 6799, "total_steps": 8674, "loss": 0.44586509466171265, "lr": 2.450681254489214e-07, "epoch": 1.567673507032511, "percentage": 78.38, "elapsed_time": "4:38:49", "remaining_time": "1:16:53"} +{"current_steps": 6800, "total_steps": 8674, "loss": 0.5167746543884277, "lr": 2.4481814797134657e-07, "epoch": 1.5679040811620935, "percentage": 78.4, "elapsed_time": "4:38:52", "remaining_time": "1:16:51"} +{"current_steps": 6801, "total_steps": 8674, "loss": 0.44062116742134094, "lr": 2.4456828026710753e-07, "epoch": 1.5681346552916762, "percentage": 78.41, "elapsed_time": "4:38:55", "remaining_time": "1:16:49"} +{"current_steps": 6802, "total_steps": 8674, "loss": 0.5096040368080139, "lr": 2.4431852237252524e-07, "epoch": 1.5683652294212589, "percentage": 78.42, "elapsed_time": "4:38:58", "remaining_time": "1:16:46"} +{"current_steps": 6803, "total_steps": 8674, "loss": 0.44234153628349304, "lr": 2.440688743239042e-07, "epoch": 1.5685958035508416, "percentage": 78.43, "elapsed_time": "4:39:01", "remaining_time": "1:16:44"} +{"current_steps": 6804, "total_steps": 8674, "loss": 0.431011825799942, "lr": 2.4381933615753357e-07, "epoch": 1.5688263776804243, "percentage": 78.44, "elapsed_time": "4:39:03", "remaining_time": "1:16:41"} +{"current_steps": 6805, "total_steps": 8674, "loss": 0.4903266131877899, "lr": 2.435699079096858e-07, "epoch": 1.569056951810007, "percentage": 78.45, "elapsed_time": "4:39:05", "remaining_time": "1:16:39"} +{"current_steps": 6806, "total_steps": 8674, "loss": 0.4698626399040222, "lr": 2.433205896166185e-07, "epoch": 1.5692875259395895, "percentage": 78.46, "elapsed_time": "4:39:08", "remaining_time": "1:16:36"} +{"current_steps": 6807, "total_steps": 8674, "loss": 0.37576574087142944, "lr": 2.4307138131457184e-07, "epoch": 1.5695181000691723, "percentage": 78.48, "elapsed_time": "4:39:10", "remaining_time": "1:16:34"} +{"current_steps": 6808, "total_steps": 8674, "loss": 0.47068172693252563, "lr": 2.4282228303977113e-07, "epoch": 1.5697486741987547, "percentage": 78.49, "elapsed_time": "4:39:13", "remaining_time": "1:16:31"} +{"current_steps": 6809, "total_steps": 8674, "loss": 0.45246315002441406, "lr": 2.425732948284257e-07, "epoch": 1.5699792483283375, "percentage": 78.5, "elapsed_time": "4:39:15", "remaining_time": "1:16:29"} +{"current_steps": 6810, "total_steps": 8674, "loss": 0.4746376574039459, "lr": 2.423244167167278e-07, "epoch": 1.5702098224579202, "percentage": 78.51, "elapsed_time": "4:39:18", "remaining_time": "1:16:26"} +{"current_steps": 6811, "total_steps": 8674, "loss": 0.413469135761261, "lr": 2.420756487408551e-07, "epoch": 1.570440396587503, "percentage": 78.52, "elapsed_time": "4:39:20", "remaining_time": "1:16:24"} +{"current_steps": 6812, "total_steps": 8674, "loss": 0.3567890226840973, "lr": 2.418269909369678e-07, "epoch": 1.5706709707170856, "percentage": 78.53, "elapsed_time": "4:39:23", "remaining_time": "1:16:22"} +{"current_steps": 6813, "total_steps": 8674, "loss": 0.4676034450531006, "lr": 2.415784433412116e-07, "epoch": 1.5709015448466683, "percentage": 78.55, "elapsed_time": "4:39:25", "remaining_time": "1:16:19"} +{"current_steps": 6814, "total_steps": 8674, "loss": 0.429337739944458, "lr": 2.4133000598971477e-07, "epoch": 1.5711321189762508, "percentage": 78.56, "elapsed_time": "4:39:28", "remaining_time": "1:16:17"} +{"current_steps": 6815, "total_steps": 8674, "loss": 0.35861289501190186, "lr": 2.4108167891859065e-07, "epoch": 1.5713626931058335, "percentage": 78.57, "elapsed_time": "4:39:30", "remaining_time": "1:16:14"} +{"current_steps": 6816, "total_steps": 8674, "loss": 0.43728363513946533, "lr": 2.4083346216393564e-07, "epoch": 1.571593267235416, "percentage": 78.58, "elapsed_time": "4:39:33", "remaining_time": "1:16:12"} +{"current_steps": 6817, "total_steps": 8674, "loss": 0.44594380259513855, "lr": 2.405853557618308e-07, "epoch": 1.5718238413649988, "percentage": 78.59, "elapsed_time": "4:39:35", "remaining_time": "1:16:09"} +{"current_steps": 6818, "total_steps": 8674, "loss": 0.36871337890625, "lr": 2.403373597483414e-07, "epoch": 1.5720544154945815, "percentage": 78.6, "elapsed_time": "4:39:37", "remaining_time": "1:16:07"} +{"current_steps": 6819, "total_steps": 8674, "loss": 0.3769477307796478, "lr": 2.400894741595152e-07, "epoch": 1.5722849896241642, "percentage": 78.61, "elapsed_time": "4:39:40", "remaining_time": "1:16:04"} +{"current_steps": 6820, "total_steps": 8674, "loss": 0.503145694732666, "lr": 2.3984169903138583e-07, "epoch": 1.572515563753747, "percentage": 78.63, "elapsed_time": "4:39:42", "remaining_time": "1:16:02"} +{"current_steps": 6821, "total_steps": 8674, "loss": 0.4082655906677246, "lr": 2.395940343999691e-07, "epoch": 1.5727461378833296, "percentage": 78.64, "elapsed_time": "4:39:45", "remaining_time": "1:15:59"} +{"current_steps": 6822, "total_steps": 8674, "loss": 0.4106418192386627, "lr": 2.3934648030126625e-07, "epoch": 1.5729767120129121, "percentage": 78.65, "elapsed_time": "4:39:47", "remaining_time": "1:15:57"} +{"current_steps": 6823, "total_steps": 8674, "loss": 0.45363783836364746, "lr": 2.390990367712613e-07, "epoch": 1.5732072861424948, "percentage": 78.66, "elapsed_time": "4:39:50", "remaining_time": "1:15:54"} +{"current_steps": 6824, "total_steps": 8674, "loss": 0.4416825473308563, "lr": 2.388517038459227e-07, "epoch": 1.5734378602720773, "percentage": 78.67, "elapsed_time": "4:39:52", "remaining_time": "1:15:52"} +{"current_steps": 6825, "total_steps": 8674, "loss": 0.5106863379478455, "lr": 2.3860448156120304e-07, "epoch": 1.57366843440166, "percentage": 78.68, "elapsed_time": "4:39:55", "remaining_time": "1:15:50"} +{"current_steps": 6826, "total_steps": 8674, "loss": 0.4618466794490814, "lr": 2.3835736995303879e-07, "epoch": 1.5738990085312428, "percentage": 78.69, "elapsed_time": "4:39:57", "remaining_time": "1:15:47"} +{"current_steps": 6827, "total_steps": 8674, "loss": 0.414678692817688, "lr": 2.381103690573495e-07, "epoch": 1.5741295826608255, "percentage": 78.71, "elapsed_time": "4:39:59", "remaining_time": "1:15:45"} +{"current_steps": 6828, "total_steps": 8674, "loss": 0.39774662256240845, "lr": 2.3786347891004e-07, "epoch": 1.5743601567904082, "percentage": 78.72, "elapsed_time": "4:40:02", "remaining_time": "1:15:42"} +{"current_steps": 6829, "total_steps": 8674, "loss": 0.4513537287712097, "lr": 2.376166995469977e-07, "epoch": 1.574590730919991, "percentage": 78.73, "elapsed_time": "4:40:04", "remaining_time": "1:15:40"} +{"current_steps": 6830, "total_steps": 8674, "loss": 0.44062697887420654, "lr": 2.3737003100409447e-07, "epoch": 1.5748213050495734, "percentage": 78.74, "elapsed_time": "4:40:07", "remaining_time": "1:15:37"} +{"current_steps": 6831, "total_steps": 8674, "loss": 0.42305582761764526, "lr": 2.3712347331718617e-07, "epoch": 1.5750518791791561, "percentage": 78.75, "elapsed_time": "4:40:09", "remaining_time": "1:15:35"} +{"current_steps": 6832, "total_steps": 8674, "loss": 0.46731626987457275, "lr": 2.3687702652211262e-07, "epoch": 1.5752824533087386, "percentage": 78.76, "elapsed_time": "4:40:12", "remaining_time": "1:15:32"} +{"current_steps": 6833, "total_steps": 8674, "loss": 0.4926149845123291, "lr": 2.3663069065469753e-07, "epoch": 1.5755130274383213, "percentage": 78.78, "elapsed_time": "4:40:14", "remaining_time": "1:15:30"} +{"current_steps": 6834, "total_steps": 8674, "loss": 0.49002933502197266, "lr": 2.3638446575074777e-07, "epoch": 1.575743601567904, "percentage": 78.79, "elapsed_time": "4:40:17", "remaining_time": "1:15:27"} +{"current_steps": 6835, "total_steps": 8674, "loss": 0.47110694646835327, "lr": 2.3613835184605523e-07, "epoch": 1.5759741756974868, "percentage": 78.8, "elapsed_time": "4:40:19", "remaining_time": "1:15:25"} +{"current_steps": 6836, "total_steps": 8674, "loss": 0.4257816672325134, "lr": 2.3589234897639444e-07, "epoch": 1.5762047498270695, "percentage": 78.81, "elapsed_time": "4:40:22", "remaining_time": "1:15:22"} +{"current_steps": 6837, "total_steps": 8674, "loss": 0.4031051695346832, "lr": 2.3564645717752506e-07, "epoch": 1.5764353239566522, "percentage": 78.82, "elapsed_time": "4:40:24", "remaining_time": "1:15:20"} +{"current_steps": 6838, "total_steps": 8674, "loss": 0.5077808499336243, "lr": 2.3540067648518957e-07, "epoch": 1.5766658980862347, "percentage": 78.83, "elapsed_time": "4:40:26", "remaining_time": "1:15:18"} +{"current_steps": 6839, "total_steps": 8674, "loss": 0.3877585232257843, "lr": 2.3515500693511449e-07, "epoch": 1.5768964722158174, "percentage": 78.84, "elapsed_time": "4:40:29", "remaining_time": "1:15:15"} +{"current_steps": 6840, "total_steps": 8674, "loss": 0.4356805682182312, "lr": 2.3490944856301064e-07, "epoch": 1.5771270463454, "percentage": 78.86, "elapsed_time": "4:40:31", "remaining_time": "1:15:13"} +{"current_steps": 6841, "total_steps": 8674, "loss": 0.46679362654685974, "lr": 2.346640014045723e-07, "epoch": 1.5773576204749826, "percentage": 78.87, "elapsed_time": "4:40:34", "remaining_time": "1:15:10"} +{"current_steps": 6842, "total_steps": 8674, "loss": 0.4837648272514343, "lr": 2.3441866549547817e-07, "epoch": 1.5775881946045653, "percentage": 78.88, "elapsed_time": "4:40:36", "remaining_time": "1:15:08"} +{"current_steps": 6843, "total_steps": 8674, "loss": 0.42723533511161804, "lr": 2.341734408713897e-07, "epoch": 1.577818768734148, "percentage": 78.89, "elapsed_time": "4:40:38", "remaining_time": "1:15:05"} +{"current_steps": 6844, "total_steps": 8674, "loss": 0.3680928647518158, "lr": 2.3392832756795322e-07, "epoch": 1.5780493428637308, "percentage": 78.9, "elapsed_time": "4:40:41", "remaining_time": "1:15:03"} +{"current_steps": 6845, "total_steps": 8674, "loss": 0.434980571269989, "lr": 2.3368332562079797e-07, "epoch": 1.5782799169933135, "percentage": 78.91, "elapsed_time": "4:40:44", "remaining_time": "1:15:00"} +{"current_steps": 6846, "total_steps": 8674, "loss": 0.45552271604537964, "lr": 2.3343843506553805e-07, "epoch": 1.578510491122896, "percentage": 78.93, "elapsed_time": "4:40:46", "remaining_time": "1:14:58"} +{"current_steps": 6847, "total_steps": 8674, "loss": 0.4292616844177246, "lr": 2.331936559377702e-07, "epoch": 1.5787410652524787, "percentage": 78.94, "elapsed_time": "4:40:49", "remaining_time": "1:14:55"} +{"current_steps": 6848, "total_steps": 8674, "loss": 0.5025339126586914, "lr": 2.3294898827307573e-07, "epoch": 1.5789716393820612, "percentage": 78.95, "elapsed_time": "4:40:51", "remaining_time": "1:14:53"} +{"current_steps": 6849, "total_steps": 8674, "loss": 0.47567370533943176, "lr": 2.3270443210701996e-07, "epoch": 1.579202213511644, "percentage": 78.96, "elapsed_time": "4:40:53", "remaining_time": "1:14:50"} +{"current_steps": 6850, "total_steps": 8674, "loss": 0.5435467958450317, "lr": 2.3245998747515095e-07, "epoch": 1.5794327876412266, "percentage": 78.97, "elapsed_time": "4:40:56", "remaining_time": "1:14:48"} +{"current_steps": 6851, "total_steps": 8674, "loss": 0.4409145712852478, "lr": 2.3221565441300194e-07, "epoch": 1.5796633617708093, "percentage": 78.98, "elapsed_time": "4:40:58", "remaining_time": "1:14:45"} +{"current_steps": 6852, "total_steps": 8674, "loss": 0.40482181310653687, "lr": 2.3197143295608845e-07, "epoch": 1.579893935900392, "percentage": 78.99, "elapsed_time": "4:41:01", "remaining_time": "1:14:43"} +{"current_steps": 6853, "total_steps": 8674, "loss": 0.40231794118881226, "lr": 2.317273231399113e-07, "epoch": 1.5801245100299748, "percentage": 79.01, "elapsed_time": "4:41:03", "remaining_time": "1:14:41"} +{"current_steps": 6854, "total_steps": 8674, "loss": 0.43245166540145874, "lr": 2.314833249999535e-07, "epoch": 1.5803550841595573, "percentage": 79.02, "elapsed_time": "4:41:06", "remaining_time": "1:14:38"} +{"current_steps": 6855, "total_steps": 8674, "loss": 0.40237659215927124, "lr": 2.3123943857168315e-07, "epoch": 1.58058565828914, "percentage": 79.03, "elapsed_time": "4:41:08", "remaining_time": "1:14:36"} +{"current_steps": 6856, "total_steps": 8674, "loss": 0.48900318145751953, "lr": 2.309956638905517e-07, "epoch": 1.5808162324187225, "percentage": 79.04, "elapsed_time": "4:41:11", "remaining_time": "1:14:33"} +{"current_steps": 6857, "total_steps": 8674, "loss": 0.42364567518234253, "lr": 2.3075200099199422e-07, "epoch": 1.5810468065483052, "percentage": 79.05, "elapsed_time": "4:41:13", "remaining_time": "1:14:31"} +{"current_steps": 6858, "total_steps": 8674, "loss": 0.4658735990524292, "lr": 2.3050844991142958e-07, "epoch": 1.581277380677888, "percentage": 79.06, "elapsed_time": "4:41:15", "remaining_time": "1:14:28"} +{"current_steps": 6859, "total_steps": 8674, "loss": 0.42268991470336914, "lr": 2.3026501068426007e-07, "epoch": 1.5815079548074706, "percentage": 79.08, "elapsed_time": "4:41:18", "remaining_time": "1:14:26"} +{"current_steps": 6860, "total_steps": 8674, "loss": 0.44876742362976074, "lr": 2.3002168334587247e-07, "epoch": 1.5817385289370534, "percentage": 79.09, "elapsed_time": "4:41:20", "remaining_time": "1:14:23"} +{"current_steps": 6861, "total_steps": 8674, "loss": 0.42540132999420166, "lr": 2.2977846793163646e-07, "epoch": 1.581969103066636, "percentage": 79.1, "elapsed_time": "4:41:23", "remaining_time": "1:14:21"} +{"current_steps": 6862, "total_steps": 8674, "loss": 0.48768138885498047, "lr": 2.2953536447690636e-07, "epoch": 1.5821996771962186, "percentage": 79.11, "elapsed_time": "4:41:25", "remaining_time": "1:14:18"} +{"current_steps": 6863, "total_steps": 8674, "loss": 0.42905953526496887, "lr": 2.292923730170192e-07, "epoch": 1.5824302513258013, "percentage": 79.12, "elapsed_time": "4:41:28", "remaining_time": "1:14:16"} +{"current_steps": 6864, "total_steps": 8674, "loss": 0.4103778004646301, "lr": 2.2904949358729653e-07, "epoch": 1.5826608254553838, "percentage": 79.13, "elapsed_time": "4:41:30", "remaining_time": "1:14:13"} +{"current_steps": 6865, "total_steps": 8674, "loss": 0.39303290843963623, "lr": 2.2880672622304331e-07, "epoch": 1.5828913995849665, "percentage": 79.14, "elapsed_time": "4:41:33", "remaining_time": "1:14:11"} +{"current_steps": 6866, "total_steps": 8674, "loss": 0.5087130069732666, "lr": 2.2856407095954843e-07, "epoch": 1.5831219737145492, "percentage": 79.16, "elapsed_time": "4:41:35", "remaining_time": "1:14:09"} +{"current_steps": 6867, "total_steps": 8674, "loss": 0.33117055892944336, "lr": 2.283215278320839e-07, "epoch": 1.583352547844132, "percentage": 79.17, "elapsed_time": "4:41:37", "remaining_time": "1:14:06"} +{"current_steps": 6868, "total_steps": 8674, "loss": 0.41781488060951233, "lr": 2.280790968759063e-07, "epoch": 1.5835831219737146, "percentage": 79.18, "elapsed_time": "4:41:40", "remaining_time": "1:14:04"} +{"current_steps": 6869, "total_steps": 8674, "loss": 0.5104382634162903, "lr": 2.2783677812625523e-07, "epoch": 1.5838136961032974, "percentage": 79.19, "elapsed_time": "4:41:42", "remaining_time": "1:14:01"} +{"current_steps": 6870, "total_steps": 8674, "loss": 0.3987969160079956, "lr": 2.2759457161835372e-07, "epoch": 1.5840442702328799, "percentage": 79.2, "elapsed_time": "4:41:45", "remaining_time": "1:13:59"} +{"current_steps": 6871, "total_steps": 8674, "loss": 0.4723064601421356, "lr": 2.2735247738740936e-07, "epoch": 1.5842748443624626, "percentage": 79.21, "elapsed_time": "4:41:47", "remaining_time": "1:13:56"} +{"current_steps": 6872, "total_steps": 8674, "loss": 0.3942141830921173, "lr": 2.2711049546861293e-07, "epoch": 1.584505418492045, "percentage": 79.23, "elapsed_time": "4:41:49", "remaining_time": "1:13:54"} +{"current_steps": 6873, "total_steps": 8674, "loss": 0.38271787762641907, "lr": 2.268686258971393e-07, "epoch": 1.5847359926216278, "percentage": 79.24, "elapsed_time": "4:41:52", "remaining_time": "1:13:51"} +{"current_steps": 6874, "total_steps": 8674, "loss": 0.4944665729999542, "lr": 2.2662686870814607e-07, "epoch": 1.5849665667512105, "percentage": 79.25, "elapsed_time": "4:41:54", "remaining_time": "1:13:49"} +{"current_steps": 6875, "total_steps": 8674, "loss": 0.46695005893707275, "lr": 2.2638522393677562e-07, "epoch": 1.5851971408807932, "percentage": 79.26, "elapsed_time": "4:41:57", "remaining_time": "1:13:46"} +{"current_steps": 6876, "total_steps": 8674, "loss": 0.4620080888271332, "lr": 2.2614369161815295e-07, "epoch": 1.585427715010376, "percentage": 79.27, "elapsed_time": "4:41:59", "remaining_time": "1:13:44"} +{"current_steps": 6877, "total_steps": 8674, "loss": 0.5650279521942139, "lr": 2.2590227178738776e-07, "epoch": 1.5856582891399587, "percentage": 79.28, "elapsed_time": "4:42:01", "remaining_time": "1:13:41"} +{"current_steps": 6878, "total_steps": 8674, "loss": 0.3556622564792633, "lr": 2.2566096447957227e-07, "epoch": 1.5858888632695411, "percentage": 79.29, "elapsed_time": "4:42:04", "remaining_time": "1:13:39"} +{"current_steps": 6879, "total_steps": 8674, "loss": 0.4978718161582947, "lr": 2.254197697297834e-07, "epoch": 1.5861194373991239, "percentage": 79.31, "elapsed_time": "4:42:07", "remaining_time": "1:13:36"} +{"current_steps": 6880, "total_steps": 8674, "loss": 0.4759003520011902, "lr": 2.2517868757308146e-07, "epoch": 1.5863500115287064, "percentage": 79.32, "elapsed_time": "4:42:09", "remaining_time": "1:13:34"} +{"current_steps": 6881, "total_steps": 8674, "loss": 0.5078370571136475, "lr": 2.2493771804450945e-07, "epoch": 1.586580585658289, "percentage": 79.33, "elapsed_time": "4:42:11", "remaining_time": "1:13:31"} +{"current_steps": 6882, "total_steps": 8674, "loss": 0.4188239276409149, "lr": 2.2469686117909547e-07, "epoch": 1.5868111597878718, "percentage": 79.34, "elapsed_time": "4:42:14", "remaining_time": "1:13:29"} +{"current_steps": 6883, "total_steps": 8674, "loss": 0.4075232744216919, "lr": 2.2445611701184997e-07, "epoch": 1.5870417339174545, "percentage": 79.35, "elapsed_time": "4:42:16", "remaining_time": "1:13:27"} +{"current_steps": 6884, "total_steps": 8674, "loss": 0.3643442988395691, "lr": 2.2421548557776794e-07, "epoch": 1.5872723080470372, "percentage": 79.36, "elapsed_time": "4:42:19", "remaining_time": "1:13:24"} +{"current_steps": 6885, "total_steps": 8674, "loss": 0.38767147064208984, "lr": 2.2397496691182716e-07, "epoch": 1.58750288217662, "percentage": 79.38, "elapsed_time": "4:42:21", "remaining_time": "1:13:22"} +{"current_steps": 6886, "total_steps": 8674, "loss": 0.4874354600906372, "lr": 2.2373456104899e-07, "epoch": 1.5877334563062024, "percentage": 79.39, "elapsed_time": "4:42:24", "remaining_time": "1:13:19"} +{"current_steps": 6887, "total_steps": 8674, "loss": 0.46412762999534607, "lr": 2.2349426802420134e-07, "epoch": 1.5879640304357852, "percentage": 79.4, "elapsed_time": "4:42:26", "remaining_time": "1:13:17"} +{"current_steps": 6888, "total_steps": 8674, "loss": 0.4299372434616089, "lr": 2.2325408787239054e-07, "epoch": 1.5881946045653677, "percentage": 79.41, "elapsed_time": "4:42:29", "remaining_time": "1:13:14"} +{"current_steps": 6889, "total_steps": 8674, "loss": 0.3962220549583435, "lr": 2.230140206284703e-07, "epoch": 1.5884251786949504, "percentage": 79.42, "elapsed_time": "4:42:31", "remaining_time": "1:13:12"} +{"current_steps": 6890, "total_steps": 8674, "loss": 0.5048998594284058, "lr": 2.2277406632733653e-07, "epoch": 1.588655752824533, "percentage": 79.43, "elapsed_time": "4:42:33", "remaining_time": "1:13:09"} +{"current_steps": 6891, "total_steps": 8674, "loss": 0.35463857650756836, "lr": 2.2253422500386932e-07, "epoch": 1.5888863269541158, "percentage": 79.44, "elapsed_time": "4:42:36", "remaining_time": "1:13:07"} +{"current_steps": 6892, "total_steps": 8674, "loss": 0.3969672620296478, "lr": 2.2229449669293165e-07, "epoch": 1.5891169010836985, "percentage": 79.46, "elapsed_time": "4:42:38", "remaining_time": "1:13:04"} +{"current_steps": 6893, "total_steps": 8674, "loss": 0.36300575733184814, "lr": 2.22054881429371e-07, "epoch": 1.5893474752132812, "percentage": 79.47, "elapsed_time": "4:42:40", "remaining_time": "1:13:02"} +{"current_steps": 6894, "total_steps": 8674, "loss": 0.45796507596969604, "lr": 2.2181537924801729e-07, "epoch": 1.5895780493428637, "percentage": 79.48, "elapsed_time": "4:42:43", "remaining_time": "1:12:59"} +{"current_steps": 6895, "total_steps": 8674, "loss": 0.42725688219070435, "lr": 2.2157599018368488e-07, "epoch": 1.5898086234724462, "percentage": 79.49, "elapsed_time": "4:42:46", "remaining_time": "1:12:57"} +{"current_steps": 6896, "total_steps": 8674, "loss": 0.4959419369697571, "lr": 2.213367142711714e-07, "epoch": 1.590039197602029, "percentage": 79.5, "elapsed_time": "4:42:48", "remaining_time": "1:12:54"} +{"current_steps": 6897, "total_steps": 8674, "loss": 0.3707115948200226, "lr": 2.2109755154525821e-07, "epoch": 1.5902697717316117, "percentage": 79.51, "elapsed_time": "4:42:50", "remaining_time": "1:12:52"} +{"current_steps": 6898, "total_steps": 8674, "loss": 0.3647577166557312, "lr": 2.2085850204070989e-07, "epoch": 1.5905003458611944, "percentage": 79.53, "elapsed_time": "4:42:53", "remaining_time": "1:12:50"} +{"current_steps": 6899, "total_steps": 8674, "loss": 0.42227697372436523, "lr": 2.2061956579227447e-07, "epoch": 1.590730919990777, "percentage": 79.54, "elapsed_time": "4:42:55", "remaining_time": "1:12:47"} +{"current_steps": 6900, "total_steps": 8674, "loss": 0.41736292839050293, "lr": 2.2038074283468412e-07, "epoch": 1.5909614941203598, "percentage": 79.55, "elapsed_time": "4:42:58", "remaining_time": "1:12:45"} +{"current_steps": 6901, "total_steps": 8674, "loss": 0.46005967259407043, "lr": 2.201420332026538e-07, "epoch": 1.5911920682499423, "percentage": 79.56, "elapsed_time": "4:43:01", "remaining_time": "1:12:42"} +{"current_steps": 6902, "total_steps": 8674, "loss": 0.3572643995285034, "lr": 2.1990343693088243e-07, "epoch": 1.591422642379525, "percentage": 79.57, "elapsed_time": "4:43:04", "remaining_time": "1:12:40"} +{"current_steps": 6903, "total_steps": 8674, "loss": 0.5321012735366821, "lr": 2.196649540540527e-07, "epoch": 1.5916532165091075, "percentage": 79.58, "elapsed_time": "4:43:06", "remaining_time": "1:12:38"} +{"current_steps": 6904, "total_steps": 8674, "loss": 0.4913836419582367, "lr": 2.194265846068305e-07, "epoch": 1.5918837906386902, "percentage": 79.59, "elapsed_time": "4:43:09", "remaining_time": "1:12:35"} +{"current_steps": 6905, "total_steps": 8674, "loss": 0.37674903869628906, "lr": 2.1918832862386493e-07, "epoch": 1.592114364768273, "percentage": 79.61, "elapsed_time": "4:43:11", "remaining_time": "1:12:33"} +{"current_steps": 6906, "total_steps": 8674, "loss": 0.4385930001735687, "lr": 2.1895018613978934e-07, "epoch": 1.5923449388978557, "percentage": 79.62, "elapsed_time": "4:43:14", "remaining_time": "1:12:30"} +{"current_steps": 6907, "total_steps": 8674, "loss": 0.5219674706459045, "lr": 2.1871215718921964e-07, "epoch": 1.5925755130274384, "percentage": 79.63, "elapsed_time": "4:43:16", "remaining_time": "1:12:28"} +{"current_steps": 6908, "total_steps": 8674, "loss": 0.4241113066673279, "lr": 2.1847424180675622e-07, "epoch": 1.592806087157021, "percentage": 79.64, "elapsed_time": "4:43:18", "remaining_time": "1:12:25"} +{"current_steps": 6909, "total_steps": 8674, "loss": 0.4008786082267761, "lr": 2.1823644002698237e-07, "epoch": 1.5930366612866036, "percentage": 79.65, "elapsed_time": "4:43:21", "remaining_time": "1:12:23"} +{"current_steps": 6910, "total_steps": 8674, "loss": 0.3333933651447296, "lr": 2.179987518844645e-07, "epoch": 1.5932672354161863, "percentage": 79.66, "elapsed_time": "4:43:23", "remaining_time": "1:12:20"} +{"current_steps": 6911, "total_steps": 8674, "loss": 0.48857730627059937, "lr": 2.1776117741375343e-07, "epoch": 1.5934978095457688, "percentage": 79.67, "elapsed_time": "4:43:26", "remaining_time": "1:12:18"} +{"current_steps": 6912, "total_steps": 8674, "loss": 0.37393617630004883, "lr": 2.1752371664938306e-07, "epoch": 1.5937283836753515, "percentage": 79.69, "elapsed_time": "4:43:28", "remaining_time": "1:12:15"} +{"current_steps": 6913, "total_steps": 8674, "loss": 0.5365080833435059, "lr": 2.172863696258709e-07, "epoch": 1.5939589578049342, "percentage": 79.7, "elapsed_time": "4:43:31", "remaining_time": "1:12:13"} +{"current_steps": 6914, "total_steps": 8674, "loss": 0.49318936467170715, "lr": 2.1704913637771705e-07, "epoch": 1.594189531934517, "percentage": 79.71, "elapsed_time": "4:43:33", "remaining_time": "1:12:10"} +{"current_steps": 6915, "total_steps": 8674, "loss": 0.37682920694351196, "lr": 2.1681201693940666e-07, "epoch": 1.5944201060640997, "percentage": 79.72, "elapsed_time": "4:43:35", "remaining_time": "1:12:08"} +{"current_steps": 6916, "total_steps": 8674, "loss": 0.4894877076148987, "lr": 2.1657501134540657e-07, "epoch": 1.5946506801936824, "percentage": 79.73, "elapsed_time": "4:43:38", "remaining_time": "1:12:05"} +{"current_steps": 6917, "total_steps": 8674, "loss": 0.4200783967971802, "lr": 2.1633811963016869e-07, "epoch": 1.5948812543232649, "percentage": 79.74, "elapsed_time": "4:43:40", "remaining_time": "1:12:03"} +{"current_steps": 6918, "total_steps": 8674, "loss": 0.3953052759170532, "lr": 2.1610134182812702e-07, "epoch": 1.5951118284528476, "percentage": 79.76, "elapsed_time": "4:43:43", "remaining_time": "1:12:00"} +{"current_steps": 6919, "total_steps": 8674, "loss": 0.4006558656692505, "lr": 2.158646779736999e-07, "epoch": 1.59534240258243, "percentage": 79.77, "elapsed_time": "4:43:45", "remaining_time": "1:11:58"} +{"current_steps": 6920, "total_steps": 8674, "loss": 0.3749210238456726, "lr": 2.1562812810128906e-07, "epoch": 1.5955729767120128, "percentage": 79.78, "elapsed_time": "4:43:47", "remaining_time": "1:11:56"} +{"current_steps": 6921, "total_steps": 8674, "loss": 0.4688538610935211, "lr": 2.1539169224527887e-07, "epoch": 1.5958035508415955, "percentage": 79.79, "elapsed_time": "4:43:50", "remaining_time": "1:11:53"} +{"current_steps": 6922, "total_steps": 8674, "loss": 0.4483727216720581, "lr": 2.151553704400383e-07, "epoch": 1.5960341249711782, "percentage": 79.8, "elapsed_time": "4:43:52", "remaining_time": "1:11:51"} +{"current_steps": 6923, "total_steps": 8674, "loss": 0.5118253827095032, "lr": 2.149191627199185e-07, "epoch": 1.596264699100761, "percentage": 79.81, "elapsed_time": "4:43:55", "remaining_time": "1:11:48"} +{"current_steps": 6924, "total_steps": 8674, "loss": 0.43641170859336853, "lr": 2.1468306911925525e-07, "epoch": 1.5964952732303437, "percentage": 79.82, "elapsed_time": "4:43:57", "remaining_time": "1:11:46"} +{"current_steps": 6925, "total_steps": 8674, "loss": 0.38253384828567505, "lr": 2.1444708967236657e-07, "epoch": 1.5967258473599262, "percentage": 79.84, "elapsed_time": "4:43:59", "remaining_time": "1:11:43"} +{"current_steps": 6926, "total_steps": 8674, "loss": 0.43674635887145996, "lr": 2.1421122441355476e-07, "epoch": 1.596956421489509, "percentage": 79.85, "elapsed_time": "4:44:02", "remaining_time": "1:11:41"} +{"current_steps": 6927, "total_steps": 8674, "loss": 0.37392908334732056, "lr": 2.1397547337710519e-07, "epoch": 1.5971869956190914, "percentage": 79.86, "elapsed_time": "4:44:04", "remaining_time": "1:11:38"} +{"current_steps": 6928, "total_steps": 8674, "loss": 0.4531250298023224, "lr": 2.13739836597287e-07, "epoch": 1.597417569748674, "percentage": 79.87, "elapsed_time": "4:44:07", "remaining_time": "1:11:36"} +{"current_steps": 6929, "total_steps": 8674, "loss": 0.38579899072647095, "lr": 2.13504314108352e-07, "epoch": 1.5976481438782568, "percentage": 79.88, "elapsed_time": "4:44:09", "remaining_time": "1:11:33"} +{"current_steps": 6930, "total_steps": 8674, "loss": 0.5215288400650024, "lr": 2.1326890594453563e-07, "epoch": 1.5978787180078395, "percentage": 79.89, "elapsed_time": "4:44:12", "remaining_time": "1:11:31"} +{"current_steps": 6931, "total_steps": 8674, "loss": 0.4396743178367615, "lr": 2.130336121400572e-07, "epoch": 1.5981092921374223, "percentage": 79.91, "elapsed_time": "4:44:14", "remaining_time": "1:11:28"} +{"current_steps": 6932, "total_steps": 8674, "loss": 0.5068432688713074, "lr": 2.127984327291188e-07, "epoch": 1.598339866267005, "percentage": 79.92, "elapsed_time": "4:44:17", "remaining_time": "1:11:26"} +{"current_steps": 6933, "total_steps": 8674, "loss": 0.48809194564819336, "lr": 2.1256336774590643e-07, "epoch": 1.5985704403965875, "percentage": 79.93, "elapsed_time": "4:44:19", "remaining_time": "1:11:23"} +{"current_steps": 6934, "total_steps": 8674, "loss": 0.4191613793373108, "lr": 2.123284172245885e-07, "epoch": 1.5988010145261702, "percentage": 79.94, "elapsed_time": "4:44:21", "remaining_time": "1:11:21"} +{"current_steps": 6935, "total_steps": 8674, "loss": 0.41901010274887085, "lr": 2.1209358119931843e-07, "epoch": 1.5990315886557527, "percentage": 79.95, "elapsed_time": "4:44:24", "remaining_time": "1:11:18"} +{"current_steps": 6936, "total_steps": 8674, "loss": 0.5046913623809814, "lr": 2.1185885970423133e-07, "epoch": 1.5992621627853354, "percentage": 79.96, "elapsed_time": "4:44:26", "remaining_time": "1:11:16"} +{"current_steps": 6937, "total_steps": 8674, "loss": 0.5113730430603027, "lr": 2.1162425277344675e-07, "epoch": 1.5994927369149181, "percentage": 79.97, "elapsed_time": "4:44:29", "remaining_time": "1:11:14"} +{"current_steps": 6938, "total_steps": 8674, "loss": 0.34129637479782104, "lr": 2.1138976044106672e-07, "epoch": 1.5997233110445008, "percentage": 79.99, "elapsed_time": "4:44:31", "remaining_time": "1:11:11"} +{"current_steps": 6939, "total_steps": 8674, "loss": 0.4492289423942566, "lr": 2.1115538274117762e-07, "epoch": 1.5999538851740835, "percentage": 80.0, "elapsed_time": "4:44:33", "remaining_time": "1:11:09"} +{"current_steps": 6940, "total_steps": 8674, "loss": 0.41002708673477173, "lr": 2.1092111970784833e-07, "epoch": 1.6001844593036663, "percentage": 80.01, "elapsed_time": "4:44:36", "remaining_time": "1:11:06"} +{"current_steps": 6941, "total_steps": 8674, "loss": 0.5444740056991577, "lr": 2.1068697137513113e-07, "epoch": 1.6004150334332488, "percentage": 80.02, "elapsed_time": "4:44:38", "remaining_time": "1:11:04"} +{"current_steps": 6942, "total_steps": 8674, "loss": 0.3489699959754944, "lr": 2.1045293777706196e-07, "epoch": 1.6006456075628315, "percentage": 80.03, "elapsed_time": "4:44:41", "remaining_time": "1:11:01"} +{"current_steps": 6943, "total_steps": 8674, "loss": 0.41807419061660767, "lr": 2.1021901894766025e-07, "epoch": 1.600876181692414, "percentage": 80.04, "elapsed_time": "4:44:43", "remaining_time": "1:10:59"} +{"current_steps": 6944, "total_steps": 8674, "loss": 0.41074657440185547, "lr": 2.0998521492092857e-07, "epoch": 1.6011067558219967, "percentage": 80.06, "elapsed_time": "4:44:45", "remaining_time": "1:10:56"} +{"current_steps": 6945, "total_steps": 8674, "loss": 0.4085312485694885, "lr": 2.097515257308521e-07, "epoch": 1.6013373299515794, "percentage": 80.07, "elapsed_time": "4:44:48", "remaining_time": "1:10:54"} +{"current_steps": 6946, "total_steps": 8674, "loss": 0.42699170112609863, "lr": 2.095179514114006e-07, "epoch": 1.6015679040811621, "percentage": 80.08, "elapsed_time": "4:44:50", "remaining_time": "1:10:51"} +{"current_steps": 6947, "total_steps": 8674, "loss": 0.40041583776474, "lr": 2.0928449199652597e-07, "epoch": 1.6017984782107448, "percentage": 80.09, "elapsed_time": "4:44:53", "remaining_time": "1:10:49"} +{"current_steps": 6948, "total_steps": 8674, "loss": 0.47465208172798157, "lr": 2.090511475201643e-07, "epoch": 1.6020290523403276, "percentage": 80.1, "elapsed_time": "4:44:55", "remaining_time": "1:10:46"} +{"current_steps": 6949, "total_steps": 8674, "loss": 0.4338058829307556, "lr": 2.0881791801623405e-07, "epoch": 1.60225962646991, "percentage": 80.11, "elapsed_time": "4:44:58", "remaining_time": "1:10:44"} +{"current_steps": 6950, "total_steps": 8674, "loss": 0.5398772954940796, "lr": 2.0858480351863794e-07, "epoch": 1.6024902005994928, "percentage": 80.12, "elapsed_time": "4:45:00", "remaining_time": "1:10:41"} +{"current_steps": 6951, "total_steps": 8674, "loss": 0.40750259160995483, "lr": 2.0835180406126151e-07, "epoch": 1.6027207747290753, "percentage": 80.14, "elapsed_time": "4:45:02", "remaining_time": "1:10:39"} +{"current_steps": 6952, "total_steps": 8674, "loss": 0.4365716278553009, "lr": 2.0811891967797336e-07, "epoch": 1.602951348858658, "percentage": 80.15, "elapsed_time": "4:45:05", "remaining_time": "1:10:36"} +{"current_steps": 6953, "total_steps": 8674, "loss": 0.41537174582481384, "lr": 2.078861504026258e-07, "epoch": 1.6031819229882407, "percentage": 80.16, "elapsed_time": "4:45:07", "remaining_time": "1:10:34"} +{"current_steps": 6954, "total_steps": 8674, "loss": 0.3687853217124939, "lr": 2.0765349626905394e-07, "epoch": 1.6034124971178234, "percentage": 80.17, "elapsed_time": "4:45:10", "remaining_time": "1:10:32"} +{"current_steps": 6955, "total_steps": 8674, "loss": 0.48866790533065796, "lr": 2.074209573110769e-07, "epoch": 1.6036430712474061, "percentage": 80.18, "elapsed_time": "4:45:12", "remaining_time": "1:10:29"} +{"current_steps": 6956, "total_steps": 8674, "loss": 0.4618760347366333, "lr": 2.0718853356249588e-07, "epoch": 1.6038736453769888, "percentage": 80.19, "elapsed_time": "4:45:15", "remaining_time": "1:10:27"} +{"current_steps": 6957, "total_steps": 8674, "loss": 0.365873247385025, "lr": 2.0695622505709654e-07, "epoch": 1.6041042195065713, "percentage": 80.21, "elapsed_time": "4:45:17", "remaining_time": "1:10:24"} +{"current_steps": 6958, "total_steps": 8674, "loss": 0.4346495270729065, "lr": 2.0672403182864706e-07, "epoch": 1.604334793636154, "percentage": 80.22, "elapsed_time": "4:45:19", "remaining_time": "1:10:22"} +{"current_steps": 6959, "total_steps": 8674, "loss": 0.3995724618434906, "lr": 2.0649195391089935e-07, "epoch": 1.6045653677657365, "percentage": 80.23, "elapsed_time": "4:45:22", "remaining_time": "1:10:19"} +{"current_steps": 6960, "total_steps": 8674, "loss": 0.4628515839576721, "lr": 2.062599913375882e-07, "epoch": 1.6047959418953193, "percentage": 80.24, "elapsed_time": "4:45:24", "remaining_time": "1:10:17"} +{"current_steps": 6961, "total_steps": 8674, "loss": 0.39776262640953064, "lr": 2.060281441424314e-07, "epoch": 1.605026516024902, "percentage": 80.25, "elapsed_time": "4:45:26", "remaining_time": "1:10:14"} +{"current_steps": 6962, "total_steps": 8674, "loss": 0.4622994065284729, "lr": 2.057964123591307e-07, "epoch": 1.6052570901544847, "percentage": 80.26, "elapsed_time": "4:45:29", "remaining_time": "1:10:12"} +{"current_steps": 6963, "total_steps": 8674, "loss": 0.4028933048248291, "lr": 2.0556479602137033e-07, "epoch": 1.6054876642840674, "percentage": 80.27, "elapsed_time": "4:45:31", "remaining_time": "1:10:09"} +{"current_steps": 6964, "total_steps": 8674, "loss": 0.46639660000801086, "lr": 2.0533329516281838e-07, "epoch": 1.6057182384136501, "percentage": 80.29, "elapsed_time": "4:45:34", "remaining_time": "1:10:07"} +{"current_steps": 6965, "total_steps": 8674, "loss": 0.4063863158226013, "lr": 2.0510190981712537e-07, "epoch": 1.6059488125432326, "percentage": 80.3, "elapsed_time": "4:45:36", "remaining_time": "1:10:04"} +{"current_steps": 6966, "total_steps": 8674, "loss": 0.471376895904541, "lr": 2.0487064001792586e-07, "epoch": 1.6061793866728153, "percentage": 80.31, "elapsed_time": "4:45:38", "remaining_time": "1:10:02"} +{"current_steps": 6967, "total_steps": 8674, "loss": 0.5094102025032043, "lr": 2.0463948579883727e-07, "epoch": 1.6064099608023978, "percentage": 80.32, "elapsed_time": "4:45:41", "remaining_time": "1:09:59"} +{"current_steps": 6968, "total_steps": 8674, "loss": 0.3922441005706787, "lr": 2.0440844719346039e-07, "epoch": 1.6066405349319806, "percentage": 80.33, "elapsed_time": "4:45:43", "remaining_time": "1:09:57"} +{"current_steps": 6969, "total_steps": 8674, "loss": 0.47777149081230164, "lr": 2.0417752423537882e-07, "epoch": 1.6068711090615633, "percentage": 80.34, "elapsed_time": "4:45:46", "remaining_time": "1:09:54"} +{"current_steps": 6970, "total_steps": 8674, "loss": 0.5780138969421387, "lr": 2.0394671695815924e-07, "epoch": 1.607101683191146, "percentage": 80.36, "elapsed_time": "4:45:48", "remaining_time": "1:09:52"} +{"current_steps": 6971, "total_steps": 8674, "loss": 0.43968862295150757, "lr": 2.0371602539535237e-07, "epoch": 1.6073322573207287, "percentage": 80.37, "elapsed_time": "4:45:51", "remaining_time": "1:09:49"} +{"current_steps": 6972, "total_steps": 8674, "loss": 0.5204722881317139, "lr": 2.0348544958049096e-07, "epoch": 1.6075628314503114, "percentage": 80.38, "elapsed_time": "4:45:53", "remaining_time": "1:09:47"} +{"current_steps": 6973, "total_steps": 8674, "loss": 0.3944805860519409, "lr": 2.0325498954709198e-07, "epoch": 1.607793405579894, "percentage": 80.39, "elapsed_time": "4:45:55", "remaining_time": "1:09:45"} +{"current_steps": 6974, "total_steps": 8674, "loss": 0.42686349153518677, "lr": 2.0302464532865505e-07, "epoch": 1.6080239797094766, "percentage": 80.4, "elapsed_time": "4:45:58", "remaining_time": "1:09:42"} +{"current_steps": 6975, "total_steps": 8674, "loss": 0.3860762119293213, "lr": 2.027944169586633e-07, "epoch": 1.6082545538390591, "percentage": 80.41, "elapsed_time": "4:46:00", "remaining_time": "1:09:40"} +{"current_steps": 6976, "total_steps": 8674, "loss": 0.5570458769798279, "lr": 2.0256430447058215e-07, "epoch": 1.6084851279686418, "percentage": 80.42, "elapsed_time": "4:46:02", "remaining_time": "1:09:37"} +{"current_steps": 6977, "total_steps": 8674, "loss": 0.4556728005409241, "lr": 2.0233430789786132e-07, "epoch": 1.6087157020982246, "percentage": 80.44, "elapsed_time": "4:46:05", "remaining_time": "1:09:35"} +{"current_steps": 6978, "total_steps": 8674, "loss": 0.48365700244903564, "lr": 2.0210442727393285e-07, "epoch": 1.6089462762278073, "percentage": 80.45, "elapsed_time": "4:46:07", "remaining_time": "1:09:32"} +{"current_steps": 6979, "total_steps": 8674, "loss": 0.4456971287727356, "lr": 2.018746626322124e-07, "epoch": 1.60917685035739, "percentage": 80.46, "elapsed_time": "4:46:10", "remaining_time": "1:09:30"} +{"current_steps": 6980, "total_steps": 8674, "loss": 0.41877123713493347, "lr": 2.0164501400609835e-07, "epoch": 1.6094074244869727, "percentage": 80.47, "elapsed_time": "4:46:12", "remaining_time": "1:09:27"} +{"current_steps": 6981, "total_steps": 8674, "loss": 0.4073547124862671, "lr": 2.0141548142897246e-07, "epoch": 1.6096379986165552, "percentage": 80.48, "elapsed_time": "4:46:15", "remaining_time": "1:09:25"} +{"current_steps": 6982, "total_steps": 8674, "loss": 0.4987693727016449, "lr": 2.0118606493420021e-07, "epoch": 1.609868572746138, "percentage": 80.49, "elapsed_time": "4:46:17", "remaining_time": "1:09:22"} +{"current_steps": 6983, "total_steps": 8674, "loss": 0.4391751289367676, "lr": 2.0095676455512878e-07, "epoch": 1.6100991468757204, "percentage": 80.5, "elapsed_time": "4:46:20", "remaining_time": "1:09:20"} +{"current_steps": 6984, "total_steps": 8674, "loss": 0.409262478351593, "lr": 2.0072758032508996e-07, "epoch": 1.6103297210053031, "percentage": 80.52, "elapsed_time": "4:46:22", "remaining_time": "1:09:17"} +{"current_steps": 6985, "total_steps": 8674, "loss": 0.38653457164764404, "lr": 2.0049851227739744e-07, "epoch": 1.6105602951348859, "percentage": 80.53, "elapsed_time": "4:46:24", "remaining_time": "1:09:15"} +{"current_steps": 6986, "total_steps": 8674, "loss": 0.4824348986148834, "lr": 2.0026956044534914e-07, "epoch": 1.6107908692644686, "percentage": 80.54, "elapsed_time": "4:46:27", "remaining_time": "1:09:12"} +{"current_steps": 6987, "total_steps": 8674, "loss": 0.45774850249290466, "lr": 2.00040724862225e-07, "epoch": 1.6110214433940513, "percentage": 80.55, "elapsed_time": "4:46:29", "remaining_time": "1:09:10"} +{"current_steps": 6988, "total_steps": 8674, "loss": 0.45437830686569214, "lr": 1.9981200556128906e-07, "epoch": 1.611252017523634, "percentage": 80.56, "elapsed_time": "4:46:32", "remaining_time": "1:09:08"} +{"current_steps": 6989, "total_steps": 8674, "loss": 0.4563155770301819, "lr": 1.9958340257578753e-07, "epoch": 1.6114825916532165, "percentage": 80.57, "elapsed_time": "4:46:34", "remaining_time": "1:09:05"} +{"current_steps": 6990, "total_steps": 8674, "loss": 0.5786794424057007, "lr": 1.9935491593895048e-07, "epoch": 1.6117131657827992, "percentage": 80.59, "elapsed_time": "4:46:37", "remaining_time": "1:09:03"} +{"current_steps": 6991, "total_steps": 8674, "loss": 0.5290218591690063, "lr": 1.991265456839909e-07, "epoch": 1.6119437399123817, "percentage": 80.6, "elapsed_time": "4:46:39", "remaining_time": "1:09:00"} +{"current_steps": 6992, "total_steps": 8674, "loss": 0.3456650376319885, "lr": 1.9889829184410434e-07, "epoch": 1.6121743140419644, "percentage": 80.61, "elapsed_time": "4:46:42", "remaining_time": "1:08:58"} +{"current_steps": 6993, "total_steps": 8674, "loss": 0.40869832038879395, "lr": 1.9867015445247015e-07, "epoch": 1.6124048881715471, "percentage": 80.62, "elapsed_time": "4:46:44", "remaining_time": "1:08:55"} +{"current_steps": 6994, "total_steps": 8674, "loss": 0.49926644563674927, "lr": 1.9844213354225004e-07, "epoch": 1.6126354623011299, "percentage": 80.63, "elapsed_time": "4:46:46", "remaining_time": "1:08:53"} +{"current_steps": 6995, "total_steps": 8674, "loss": 0.4874018132686615, "lr": 1.9821422914658957e-07, "epoch": 1.6128660364307126, "percentage": 80.64, "elapsed_time": "4:46:49", "remaining_time": "1:08:50"} +{"current_steps": 6996, "total_steps": 8674, "loss": 0.4228810667991638, "lr": 1.9798644129861654e-07, "epoch": 1.6130966105602953, "percentage": 80.65, "elapsed_time": "4:46:51", "remaining_time": "1:08:48"} +{"current_steps": 6997, "total_steps": 8674, "loss": 0.4309043884277344, "lr": 1.9775877003144237e-07, "epoch": 1.6133271846898778, "percentage": 80.67, "elapsed_time": "4:46:54", "remaining_time": "1:08:45"} +{"current_steps": 6998, "total_steps": 8674, "loss": 0.3917756676673889, "lr": 1.9753121537816142e-07, "epoch": 1.6135577588194605, "percentage": 80.68, "elapsed_time": "4:46:56", "remaining_time": "1:08:43"} +{"current_steps": 6999, "total_steps": 8674, "loss": 0.4074435830116272, "lr": 1.9730377737185145e-07, "epoch": 1.613788332949043, "percentage": 80.69, "elapsed_time": "4:46:58", "remaining_time": "1:08:40"} +{"current_steps": 7000, "total_steps": 8674, "loss": 0.4581322968006134, "lr": 1.9707645604557243e-07, "epoch": 1.6140189070786257, "percentage": 80.7, "elapsed_time": "4:47:01", "remaining_time": "1:08:38"} +{"current_steps": 7001, "total_steps": 8674, "loss": 0.4479151666164398, "lr": 1.9684925143236776e-07, "epoch": 1.6142494812082084, "percentage": 80.71, "elapsed_time": "4:47:05", "remaining_time": "1:08:36"} +{"current_steps": 7002, "total_steps": 8674, "loss": 0.3378838300704956, "lr": 1.966221635652643e-07, "epoch": 1.6144800553377912, "percentage": 80.72, "elapsed_time": "4:47:07", "remaining_time": "1:08:33"} +{"current_steps": 7003, "total_steps": 8674, "loss": 0.3383278250694275, "lr": 1.96395192477271e-07, "epoch": 1.6147106294673739, "percentage": 80.74, "elapsed_time": "4:47:10", "remaining_time": "1:08:31"} +{"current_steps": 7004, "total_steps": 8674, "loss": 0.5164717435836792, "lr": 1.9616833820138091e-07, "epoch": 1.6149412035969566, "percentage": 80.75, "elapsed_time": "4:47:12", "remaining_time": "1:08:28"} +{"current_steps": 7005, "total_steps": 8674, "loss": 0.4548792243003845, "lr": 1.9594160077056932e-07, "epoch": 1.615171777726539, "percentage": 80.76, "elapsed_time": "4:47:15", "remaining_time": "1:08:26"} +{"current_steps": 7006, "total_steps": 8674, "loss": 0.41074928641319275, "lr": 1.9571498021779531e-07, "epoch": 1.6154023518561216, "percentage": 80.77, "elapsed_time": "4:47:17", "remaining_time": "1:08:23"} +{"current_steps": 7007, "total_steps": 8674, "loss": 0.4156193137168884, "lr": 1.9548847657599976e-07, "epoch": 1.6156329259857043, "percentage": 80.78, "elapsed_time": "4:47:19", "remaining_time": "1:08:21"} +{"current_steps": 7008, "total_steps": 8674, "loss": 0.4602770209312439, "lr": 1.95262089878108e-07, "epoch": 1.615863500115287, "percentage": 80.79, "elapsed_time": "4:47:22", "remaining_time": "1:08:19"} +{"current_steps": 7009, "total_steps": 8674, "loss": 0.4911346733570099, "lr": 1.9503582015702713e-07, "epoch": 1.6160940742448697, "percentage": 80.8, "elapsed_time": "4:47:24", "remaining_time": "1:08:16"} +{"current_steps": 7010, "total_steps": 8674, "loss": 0.394087553024292, "lr": 1.9480966744564764e-07, "epoch": 1.6163246483744524, "percentage": 80.82, "elapsed_time": "4:47:27", "remaining_time": "1:08:14"} +{"current_steps": 7011, "total_steps": 8674, "loss": 0.4845706820487976, "lr": 1.9458363177684367e-07, "epoch": 1.6165552225040352, "percentage": 80.83, "elapsed_time": "4:47:29", "remaining_time": "1:08:11"} +{"current_steps": 7012, "total_steps": 8674, "loss": 0.49142736196517944, "lr": 1.9435771318347116e-07, "epoch": 1.6167857966336177, "percentage": 80.84, "elapsed_time": "4:47:32", "remaining_time": "1:08:09"} +{"current_steps": 7013, "total_steps": 8674, "loss": 0.4408283829689026, "lr": 1.9413191169836996e-07, "epoch": 1.6170163707632004, "percentage": 80.85, "elapsed_time": "4:47:34", "remaining_time": "1:08:06"} +{"current_steps": 7014, "total_steps": 8674, "loss": 0.6088640689849854, "lr": 1.9390622735436268e-07, "epoch": 1.6172469448927829, "percentage": 80.86, "elapsed_time": "4:47:37", "remaining_time": "1:08:04"} +{"current_steps": 7015, "total_steps": 8674, "loss": 0.5208842158317566, "lr": 1.93680660184255e-07, "epoch": 1.6174775190223656, "percentage": 80.87, "elapsed_time": "4:47:39", "remaining_time": "1:08:01"} +{"current_steps": 7016, "total_steps": 8674, "loss": 0.5652821660041809, "lr": 1.9345521022083488e-07, "epoch": 1.6177080931519483, "percentage": 80.89, "elapsed_time": "4:47:42", "remaining_time": "1:07:59"} +{"current_steps": 7017, "total_steps": 8674, "loss": 0.4861832857131958, "lr": 1.9322987749687437e-07, "epoch": 1.617938667281531, "percentage": 80.9, "elapsed_time": "4:47:44", "remaining_time": "1:07:56"} +{"current_steps": 7018, "total_steps": 8674, "loss": 0.39583832025527954, "lr": 1.930046620451272e-07, "epoch": 1.6181692414111137, "percentage": 80.91, "elapsed_time": "4:47:47", "remaining_time": "1:07:54"} +{"current_steps": 7019, "total_steps": 8674, "loss": 0.5638653039932251, "lr": 1.927795638983313e-07, "epoch": 1.6183998155406965, "percentage": 80.92, "elapsed_time": "4:47:49", "remaining_time": "1:07:51"} +{"current_steps": 7020, "total_steps": 8674, "loss": 0.4737275242805481, "lr": 1.9255458308920648e-07, "epoch": 1.618630389670279, "percentage": 80.93, "elapsed_time": "4:47:51", "remaining_time": "1:07:49"} +{"current_steps": 7021, "total_steps": 8674, "loss": 0.4526802897453308, "lr": 1.923297196504563e-07, "epoch": 1.6188609637998617, "percentage": 80.94, "elapsed_time": "4:47:54", "remaining_time": "1:07:47"} +{"current_steps": 7022, "total_steps": 8674, "loss": 0.40800565481185913, "lr": 1.9210497361476708e-07, "epoch": 1.6190915379294442, "percentage": 80.95, "elapsed_time": "4:47:57", "remaining_time": "1:07:44"} +{"current_steps": 7023, "total_steps": 8674, "loss": 0.39532414078712463, "lr": 1.9188034501480744e-07, "epoch": 1.6193221120590269, "percentage": 80.97, "elapsed_time": "4:47:59", "remaining_time": "1:07:42"} +{"current_steps": 7024, "total_steps": 8674, "loss": 0.40236538648605347, "lr": 1.9165583388322993e-07, "epoch": 1.6195526861886096, "percentage": 80.98, "elapsed_time": "4:48:01", "remaining_time": "1:07:39"} +{"current_steps": 7025, "total_steps": 8674, "loss": 0.4421047866344452, "lr": 1.91431440252669e-07, "epoch": 1.6197832603181923, "percentage": 80.99, "elapsed_time": "4:48:04", "remaining_time": "1:07:37"} +{"current_steps": 7026, "total_steps": 8674, "loss": 0.4149084687232971, "lr": 1.9120716415574322e-07, "epoch": 1.620013834447775, "percentage": 81.0, "elapsed_time": "4:48:06", "remaining_time": "1:07:34"} +{"current_steps": 7027, "total_steps": 8674, "loss": 0.4186127185821533, "lr": 1.9098300562505264e-07, "epoch": 1.6202444085773577, "percentage": 81.01, "elapsed_time": "4:48:09", "remaining_time": "1:07:32"} +{"current_steps": 7028, "total_steps": 8674, "loss": 0.4649406671524048, "lr": 1.9075896469318132e-07, "epoch": 1.6204749827069402, "percentage": 81.02, "elapsed_time": "4:48:11", "remaining_time": "1:07:29"} +{"current_steps": 7029, "total_steps": 8674, "loss": 0.43240052461624146, "lr": 1.9053504139269593e-07, "epoch": 1.620705556836523, "percentage": 81.04, "elapsed_time": "4:48:14", "remaining_time": "1:07:27"} +{"current_steps": 7030, "total_steps": 8674, "loss": 0.4874862730503082, "lr": 1.9031123575614628e-07, "epoch": 1.6209361309661054, "percentage": 81.05, "elapsed_time": "4:48:16", "remaining_time": "1:07:24"} +{"current_steps": 7031, "total_steps": 8674, "loss": 0.3771815896034241, "lr": 1.900875478160644e-07, "epoch": 1.6211667050956882, "percentage": 81.06, "elapsed_time": "4:48:18", "remaining_time": "1:07:22"} +{"current_steps": 7032, "total_steps": 8674, "loss": 0.49882376194000244, "lr": 1.898639776049653e-07, "epoch": 1.6213972792252709, "percentage": 81.07, "elapsed_time": "4:48:21", "remaining_time": "1:07:19"} +{"current_steps": 7033, "total_steps": 8674, "loss": 0.3813830614089966, "lr": 1.896405251553479e-07, "epoch": 1.6216278533548536, "percentage": 81.08, "elapsed_time": "4:48:23", "remaining_time": "1:07:17"} +{"current_steps": 7034, "total_steps": 8674, "loss": 0.41883599758148193, "lr": 1.8941719049969272e-07, "epoch": 1.6218584274844363, "percentage": 81.09, "elapsed_time": "4:48:26", "remaining_time": "1:07:14"} +{"current_steps": 7035, "total_steps": 8674, "loss": 0.42194586992263794, "lr": 1.8919397367046409e-07, "epoch": 1.622089001614019, "percentage": 81.1, "elapsed_time": "4:48:28", "remaining_time": "1:07:12"} +{"current_steps": 7036, "total_steps": 8674, "loss": 0.36967700719833374, "lr": 1.889708747001084e-07, "epoch": 1.6223195757436015, "percentage": 81.12, "elapsed_time": "4:48:30", "remaining_time": "1:07:10"} +{"current_steps": 7037, "total_steps": 8674, "loss": 0.4493946433067322, "lr": 1.887478936210556e-07, "epoch": 1.6225501498731842, "percentage": 81.13, "elapsed_time": "4:48:33", "remaining_time": "1:07:07"} +{"current_steps": 7038, "total_steps": 8674, "loss": 0.42121458053588867, "lr": 1.8852503046571833e-07, "epoch": 1.6227807240027667, "percentage": 81.14, "elapsed_time": "4:48:35", "remaining_time": "1:07:05"} +{"current_steps": 7039, "total_steps": 8674, "loss": 0.4529588222503662, "lr": 1.8830228526649207e-07, "epoch": 1.6230112981323495, "percentage": 81.15, "elapsed_time": "4:48:38", "remaining_time": "1:07:02"} +{"current_steps": 7040, "total_steps": 8674, "loss": 0.387844443321228, "lr": 1.88079658055755e-07, "epoch": 1.6232418722619322, "percentage": 81.16, "elapsed_time": "4:48:40", "remaining_time": "1:07:00"} +{"current_steps": 7041, "total_steps": 8674, "loss": 0.49954158067703247, "lr": 1.8785714886586802e-07, "epoch": 1.623472446391515, "percentage": 81.17, "elapsed_time": "4:48:43", "remaining_time": "1:06:57"} +{"current_steps": 7042, "total_steps": 8674, "loss": 0.4016296863555908, "lr": 1.8763475772917548e-07, "epoch": 1.6237030205210976, "percentage": 81.19, "elapsed_time": "4:48:45", "remaining_time": "1:06:55"} +{"current_steps": 7043, "total_steps": 8674, "loss": 0.358657568693161, "lr": 1.8741248467800362e-07, "epoch": 1.6239335946506803, "percentage": 81.2, "elapsed_time": "4:48:48", "remaining_time": "1:06:52"} +{"current_steps": 7044, "total_steps": 8674, "loss": 0.434385746717453, "lr": 1.8719032974466264e-07, "epoch": 1.6241641687802628, "percentage": 81.21, "elapsed_time": "4:48:50", "remaining_time": "1:06:50"} +{"current_steps": 7045, "total_steps": 8674, "loss": 0.4658992886543274, "lr": 1.8696829296144466e-07, "epoch": 1.6243947429098455, "percentage": 81.22, "elapsed_time": "4:48:52", "remaining_time": "1:06:47"} +{"current_steps": 7046, "total_steps": 8674, "loss": 0.5438188910484314, "lr": 1.8674637436062545e-07, "epoch": 1.624625317039428, "percentage": 81.23, "elapsed_time": "4:48:55", "remaining_time": "1:06:45"} +{"current_steps": 7047, "total_steps": 8674, "loss": 0.47364577651023865, "lr": 1.8652457397446254e-07, "epoch": 1.6248558911690107, "percentage": 81.24, "elapsed_time": "4:48:57", "remaining_time": "1:06:42"} +{"current_steps": 7048, "total_steps": 8674, "loss": 0.3664509654045105, "lr": 1.8630289183519733e-07, "epoch": 1.6250864652985935, "percentage": 81.25, "elapsed_time": "4:49:00", "remaining_time": "1:06:40"} +{"current_steps": 7049, "total_steps": 8674, "loss": 0.4226282835006714, "lr": 1.8608132797505317e-07, "epoch": 1.6253170394281762, "percentage": 81.27, "elapsed_time": "4:49:02", "remaining_time": "1:06:37"} +{"current_steps": 7050, "total_steps": 8674, "loss": 0.47477972507476807, "lr": 1.8585988242623706e-07, "epoch": 1.625547613557759, "percentage": 81.28, "elapsed_time": "4:49:05", "remaining_time": "1:06:35"} +{"current_steps": 7051, "total_steps": 8674, "loss": 0.5372269749641418, "lr": 1.8563855522093786e-07, "epoch": 1.6257781876873416, "percentage": 81.29, "elapsed_time": "4:49:07", "remaining_time": "1:06:33"} +{"current_steps": 7052, "total_steps": 8674, "loss": 0.37929385900497437, "lr": 1.8541734639132788e-07, "epoch": 1.626008761816924, "percentage": 81.3, "elapsed_time": "4:49:09", "remaining_time": "1:06:30"} +{"current_steps": 7053, "total_steps": 8674, "loss": 0.4029538631439209, "lr": 1.8519625596956244e-07, "epoch": 1.6262393359465068, "percentage": 81.31, "elapsed_time": "4:49:12", "remaining_time": "1:06:28"} +{"current_steps": 7054, "total_steps": 8674, "loss": 0.3932439982891083, "lr": 1.8497528398777874e-07, "epoch": 1.6264699100760893, "percentage": 81.32, "elapsed_time": "4:49:14", "remaining_time": "1:06:25"} +{"current_steps": 7055, "total_steps": 8674, "loss": 0.45190152525901794, "lr": 1.847544304780978e-07, "epoch": 1.626700484205672, "percentage": 81.34, "elapsed_time": "4:49:17", "remaining_time": "1:06:23"} +{"current_steps": 7056, "total_steps": 8674, "loss": 0.4852195382118225, "lr": 1.8453369547262242e-07, "epoch": 1.6269310583352548, "percentage": 81.35, "elapsed_time": "4:49:19", "remaining_time": "1:06:20"} +{"current_steps": 7057, "total_steps": 8674, "loss": 0.41676801443099976, "lr": 1.8431307900343918e-07, "epoch": 1.6271616324648375, "percentage": 81.36, "elapsed_time": "4:49:22", "remaining_time": "1:06:18"} +{"current_steps": 7058, "total_steps": 8674, "loss": 0.44374561309814453, "lr": 1.8409258110261626e-07, "epoch": 1.6273922065944202, "percentage": 81.37, "elapsed_time": "4:49:24", "remaining_time": "1:06:15"} +{"current_steps": 7059, "total_steps": 8674, "loss": 0.4348192811012268, "lr": 1.838722018022061e-07, "epoch": 1.627622780724003, "percentage": 81.38, "elapsed_time": "4:49:26", "remaining_time": "1:06:13"} +{"current_steps": 7060, "total_steps": 8674, "loss": 0.46572640538215637, "lr": 1.836519411342422e-07, "epoch": 1.6278533548535854, "percentage": 81.39, "elapsed_time": "4:49:29", "remaining_time": "1:06:10"} +{"current_steps": 7061, "total_steps": 8674, "loss": 0.4633631408214569, "lr": 1.8343179913074214e-07, "epoch": 1.6280839289831681, "percentage": 81.4, "elapsed_time": "4:49:31", "remaining_time": "1:06:08"} +{"current_steps": 7062, "total_steps": 8674, "loss": 0.44420552253723145, "lr": 1.8321177582370605e-07, "epoch": 1.6283145031127506, "percentage": 81.42, "elapsed_time": "4:49:34", "remaining_time": "1:06:05"} +{"current_steps": 7063, "total_steps": 8674, "loss": 0.5628370046615601, "lr": 1.8299187124511594e-07, "epoch": 1.6285450772423333, "percentage": 81.43, "elapsed_time": "4:49:36", "remaining_time": "1:06:03"} +{"current_steps": 7064, "total_steps": 8674, "loss": 0.5342314839363098, "lr": 1.8277208542693778e-07, "epoch": 1.628775651371916, "percentage": 81.44, "elapsed_time": "4:49:39", "remaining_time": "1:06:00"} +{"current_steps": 7065, "total_steps": 8674, "loss": 0.440934419631958, "lr": 1.82552418401119e-07, "epoch": 1.6290062255014988, "percentage": 81.45, "elapsed_time": "4:49:41", "remaining_time": "1:05:58"} +{"current_steps": 7066, "total_steps": 8674, "loss": 0.45218637585639954, "lr": 1.823328701995912e-07, "epoch": 1.6292367996310815, "percentage": 81.46, "elapsed_time": "4:49:43", "remaining_time": "1:05:56"} +{"current_steps": 7067, "total_steps": 8674, "loss": 0.4059211015701294, "lr": 1.8211344085426716e-07, "epoch": 1.6294673737606642, "percentage": 81.47, "elapsed_time": "4:49:46", "remaining_time": "1:05:53"} +{"current_steps": 7068, "total_steps": 8674, "loss": 0.5036444067955017, "lr": 1.818941303970435e-07, "epoch": 1.6296979478902467, "percentage": 81.48, "elapsed_time": "4:49:48", "remaining_time": "1:05:51"} +{"current_steps": 7069, "total_steps": 8674, "loss": 0.5034196972846985, "lr": 1.8167493885979935e-07, "epoch": 1.6299285220198294, "percentage": 81.5, "elapsed_time": "4:49:51", "remaining_time": "1:05:48"} +{"current_steps": 7070, "total_steps": 8674, "loss": 0.4199259281158447, "lr": 1.8145586627439645e-07, "epoch": 1.630159096149412, "percentage": 81.51, "elapsed_time": "4:49:53", "remaining_time": "1:05:46"} +{"current_steps": 7071, "total_steps": 8674, "loss": 0.5439015626907349, "lr": 1.8123691267267915e-07, "epoch": 1.6303896702789946, "percentage": 81.52, "elapsed_time": "4:49:56", "remaining_time": "1:05:43"} +{"current_steps": 7072, "total_steps": 8674, "loss": 0.4349868893623352, "lr": 1.810180780864743e-07, "epoch": 1.6306202444085773, "percentage": 81.53, "elapsed_time": "4:49:58", "remaining_time": "1:05:41"} +{"current_steps": 7073, "total_steps": 8674, "loss": 0.39939552545547485, "lr": 1.807993625475921e-07, "epoch": 1.63085081853816, "percentage": 81.54, "elapsed_time": "4:50:00", "remaining_time": "1:05:38"} +{"current_steps": 7074, "total_steps": 8674, "loss": 0.43073540925979614, "lr": 1.8058076608782468e-07, "epoch": 1.6310813926677428, "percentage": 81.55, "elapsed_time": "4:50:03", "remaining_time": "1:05:36"} +{"current_steps": 7075, "total_steps": 8674, "loss": 0.4735824465751648, "lr": 1.8036228873894744e-07, "epoch": 1.6313119667973255, "percentage": 81.57, "elapsed_time": "4:50:05", "remaining_time": "1:05:33"} +{"current_steps": 7076, "total_steps": 8674, "loss": 0.42971551418304443, "lr": 1.8014393053271836e-07, "epoch": 1.631542540926908, "percentage": 81.58, "elapsed_time": "4:50:08", "remaining_time": "1:05:31"} +{"current_steps": 7077, "total_steps": 8674, "loss": 0.48593759536743164, "lr": 1.7992569150087823e-07, "epoch": 1.6317731150564907, "percentage": 81.59, "elapsed_time": "4:50:10", "remaining_time": "1:05:28"} +{"current_steps": 7078, "total_steps": 8674, "loss": 0.530194878578186, "lr": 1.7970757167514973e-07, "epoch": 1.6320036891860732, "percentage": 81.6, "elapsed_time": "4:50:12", "remaining_time": "1:05:26"} +{"current_steps": 7079, "total_steps": 8674, "loss": 0.43393629789352417, "lr": 1.794895710872394e-07, "epoch": 1.632234263315656, "percentage": 81.61, "elapsed_time": "4:50:15", "remaining_time": "1:05:23"} +{"current_steps": 7080, "total_steps": 8674, "loss": 0.4211798906326294, "lr": 1.7927168976883556e-07, "epoch": 1.6324648374452386, "percentage": 81.62, "elapsed_time": "4:50:18", "remaining_time": "1:05:21"} +{"current_steps": 7081, "total_steps": 8674, "loss": 0.39001476764678955, "lr": 1.790539277516091e-07, "epoch": 1.6326954115748213, "percentage": 81.63, "elapsed_time": "4:50:20", "remaining_time": "1:05:19"} +{"current_steps": 7082, "total_steps": 8674, "loss": 0.4360283613204956, "lr": 1.788362850672146e-07, "epoch": 1.632925985704404, "percentage": 81.65, "elapsed_time": "4:50:22", "remaining_time": "1:05:16"} +{"current_steps": 7083, "total_steps": 8674, "loss": 0.47754842042922974, "lr": 1.7861876174728807e-07, "epoch": 1.6331565598339868, "percentage": 81.66, "elapsed_time": "4:50:25", "remaining_time": "1:05:14"} +{"current_steps": 7084, "total_steps": 8674, "loss": 0.35193490982055664, "lr": 1.7840135782344888e-07, "epoch": 1.6333871339635693, "percentage": 81.67, "elapsed_time": "4:50:27", "remaining_time": "1:05:11"} +{"current_steps": 7085, "total_steps": 8674, "loss": 0.39997392892837524, "lr": 1.7818407332729912e-07, "epoch": 1.633617708093152, "percentage": 81.68, "elapsed_time": "4:50:29", "remaining_time": "1:05:09"} +{"current_steps": 7086, "total_steps": 8674, "loss": 0.3255331218242645, "lr": 1.7796690829042328e-07, "epoch": 1.6338482822227345, "percentage": 81.69, "elapsed_time": "4:50:32", "remaining_time": "1:05:06"} +{"current_steps": 7087, "total_steps": 8674, "loss": 0.47072282433509827, "lr": 1.777498627443882e-07, "epoch": 1.6340788563523172, "percentage": 81.7, "elapsed_time": "4:50:34", "remaining_time": "1:05:04"} +{"current_steps": 7088, "total_steps": 8674, "loss": 0.4231484830379486, "lr": 1.775329367207441e-07, "epoch": 1.6343094304819, "percentage": 81.72, "elapsed_time": "4:50:37", "remaining_time": "1:05:01"} +{"current_steps": 7089, "total_steps": 8674, "loss": 0.37112197279930115, "lr": 1.7731613025102276e-07, "epoch": 1.6345400046114826, "percentage": 81.73, "elapsed_time": "4:50:39", "remaining_time": "1:04:59"} +{"current_steps": 7090, "total_steps": 8674, "loss": 0.5772623419761658, "lr": 1.7709944336673986e-07, "epoch": 1.6347705787410653, "percentage": 81.74, "elapsed_time": "4:50:42", "remaining_time": "1:04:56"} +{"current_steps": 7091, "total_steps": 8674, "loss": 0.45922917127609253, "lr": 1.7688287609939244e-07, "epoch": 1.635001152870648, "percentage": 81.75, "elapsed_time": "4:50:44", "remaining_time": "1:04:54"} +{"current_steps": 7092, "total_steps": 8674, "loss": 0.42784950137138367, "lr": 1.7666642848046098e-07, "epoch": 1.6352317270002306, "percentage": 81.76, "elapsed_time": "4:50:46", "remaining_time": "1:04:51"} +{"current_steps": 7093, "total_steps": 8674, "loss": 0.4676967263221741, "lr": 1.7645010054140873e-07, "epoch": 1.6354623011298133, "percentage": 81.77, "elapsed_time": "4:50:49", "remaining_time": "1:04:49"} +{"current_steps": 7094, "total_steps": 8674, "loss": 0.434337317943573, "lr": 1.7623389231368046e-07, "epoch": 1.6356928752593958, "percentage": 81.78, "elapsed_time": "4:50:51", "remaining_time": "1:04:46"} +{"current_steps": 7095, "total_steps": 8674, "loss": 0.4667350947856903, "lr": 1.760178038287048e-07, "epoch": 1.6359234493889785, "percentage": 81.8, "elapsed_time": "4:50:54", "remaining_time": "1:04:44"} +{"current_steps": 7096, "total_steps": 8674, "loss": 0.42233705520629883, "lr": 1.7580183511789204e-07, "epoch": 1.6361540235185612, "percentage": 81.81, "elapsed_time": "4:50:56", "remaining_time": "1:04:42"} +{"current_steps": 7097, "total_steps": 8674, "loss": 0.4488460421562195, "lr": 1.7558598621263565e-07, "epoch": 1.636384597648144, "percentage": 81.82, "elapsed_time": "4:50:59", "remaining_time": "1:04:39"} +{"current_steps": 7098, "total_steps": 8674, "loss": 0.4264194667339325, "lr": 1.753702571443112e-07, "epoch": 1.6366151717777266, "percentage": 81.83, "elapsed_time": "4:51:01", "remaining_time": "1:04:37"} +{"current_steps": 7099, "total_steps": 8674, "loss": 0.32695144414901733, "lr": 1.7515464794427715e-07, "epoch": 1.6368457459073094, "percentage": 81.84, "elapsed_time": "4:51:03", "remaining_time": "1:04:34"} +{"current_steps": 7100, "total_steps": 8674, "loss": 0.3573018014431, "lr": 1.7493915864387487e-07, "epoch": 1.6370763200368919, "percentage": 81.85, "elapsed_time": "4:51:06", "remaining_time": "1:04:32"} +{"current_steps": 7101, "total_steps": 8674, "loss": 0.4545198082923889, "lr": 1.7472378927442732e-07, "epoch": 1.6373068941664746, "percentage": 81.87, "elapsed_time": "4:51:10", "remaining_time": "1:04:29"} +{"current_steps": 7102, "total_steps": 8674, "loss": 0.42589202523231506, "lr": 1.7450853986724123e-07, "epoch": 1.637537468296057, "percentage": 81.88, "elapsed_time": "4:51:12", "remaining_time": "1:04:27"} +{"current_steps": 7103, "total_steps": 8674, "loss": 0.4403502345085144, "lr": 1.742934104536048e-07, "epoch": 1.6377680424256398, "percentage": 81.89, "elapsed_time": "4:51:14", "remaining_time": "1:04:25"} +{"current_steps": 7104, "total_steps": 8674, "loss": 0.4262208938598633, "lr": 1.7407840106478955e-07, "epoch": 1.6379986165552225, "percentage": 81.9, "elapsed_time": "4:51:17", "remaining_time": "1:04:22"} +{"current_steps": 7105, "total_steps": 8674, "loss": 0.4706578254699707, "lr": 1.7386351173204905e-07, "epoch": 1.6382291906848052, "percentage": 81.91, "elapsed_time": "4:51:19", "remaining_time": "1:04:20"} +{"current_steps": 7106, "total_steps": 8674, "loss": 0.4526079297065735, "lr": 1.7364874248661986e-07, "epoch": 1.638459764814388, "percentage": 81.92, "elapsed_time": "4:51:22", "remaining_time": "1:04:17"} +{"current_steps": 7107, "total_steps": 8674, "loss": 0.42836326360702515, "lr": 1.734340933597207e-07, "epoch": 1.6386903389439706, "percentage": 81.93, "elapsed_time": "4:51:24", "remaining_time": "1:04:15"} +{"current_steps": 7108, "total_steps": 8674, "loss": 0.42680823802948, "lr": 1.7321956438255292e-07, "epoch": 1.6389209130735531, "percentage": 81.95, "elapsed_time": "4:51:27", "remaining_time": "1:04:12"} +{"current_steps": 7109, "total_steps": 8674, "loss": 0.38365036249160767, "lr": 1.7300515558630068e-07, "epoch": 1.6391514872031359, "percentage": 81.96, "elapsed_time": "4:51:29", "remaining_time": "1:04:10"} +{"current_steps": 7110, "total_steps": 8674, "loss": 0.4153991937637329, "lr": 1.7279086700213063e-07, "epoch": 1.6393820613327184, "percentage": 81.97, "elapsed_time": "4:51:31", "remaining_time": "1:04:07"} +{"current_steps": 7111, "total_steps": 8674, "loss": 0.42257291078567505, "lr": 1.7257669866119163e-07, "epoch": 1.639612635462301, "percentage": 81.98, "elapsed_time": "4:51:34", "remaining_time": "1:04:05"} +{"current_steps": 7112, "total_steps": 8674, "loss": 0.34990063309669495, "lr": 1.7236265059461498e-07, "epoch": 1.6398432095918838, "percentage": 81.99, "elapsed_time": "4:51:36", "remaining_time": "1:04:02"} +{"current_steps": 7113, "total_steps": 8674, "loss": 0.44848760962486267, "lr": 1.72148722833515e-07, "epoch": 1.6400737837214665, "percentage": 82.0, "elapsed_time": "4:51:39", "remaining_time": "1:04:00"} +{"current_steps": 7114, "total_steps": 8674, "loss": 0.4649186134338379, "lr": 1.7193491540898808e-07, "epoch": 1.6403043578510492, "percentage": 82.02, "elapsed_time": "4:51:41", "remaining_time": "1:03:57"} +{"current_steps": 7115, "total_steps": 8674, "loss": 0.480952650308609, "lr": 1.7172122835211333e-07, "epoch": 1.640534931980632, "percentage": 82.03, "elapsed_time": "4:51:44", "remaining_time": "1:03:55"} +{"current_steps": 7116, "total_steps": 8674, "loss": 0.4669501483440399, "lr": 1.7150766169395235e-07, "epoch": 1.6407655061102144, "percentage": 82.04, "elapsed_time": "4:51:46", "remaining_time": "1:03:52"} +{"current_steps": 7117, "total_steps": 8674, "loss": 0.4273250102996826, "lr": 1.7129421546554957e-07, "epoch": 1.6409960802397972, "percentage": 82.05, "elapsed_time": "4:51:48", "remaining_time": "1:03:50"} +{"current_steps": 7118, "total_steps": 8674, "loss": 0.47923076152801514, "lr": 1.71080889697931e-07, "epoch": 1.6412266543693796, "percentage": 82.06, "elapsed_time": "4:51:51", "remaining_time": "1:03:47"} +{"current_steps": 7119, "total_steps": 8674, "loss": 0.42801159620285034, "lr": 1.708676844221061e-07, "epoch": 1.6414572284989624, "percentage": 82.07, "elapsed_time": "4:51:53", "remaining_time": "1:03:45"} +{"current_steps": 7120, "total_steps": 8674, "loss": 0.39929044246673584, "lr": 1.7065459966906636e-07, "epoch": 1.641687802628545, "percentage": 82.08, "elapsed_time": "4:51:56", "remaining_time": "1:03:43"} +{"current_steps": 7121, "total_steps": 8674, "loss": 0.4919764995574951, "lr": 1.7044163546978553e-07, "epoch": 1.6419183767581278, "percentage": 82.1, "elapsed_time": "4:51:58", "remaining_time": "1:03:40"} +{"current_steps": 7122, "total_steps": 8674, "loss": 0.45943617820739746, "lr": 1.702287918552202e-07, "epoch": 1.6421489508877105, "percentage": 82.11, "elapsed_time": "4:52:01", "remaining_time": "1:03:38"} +{"current_steps": 7123, "total_steps": 8674, "loss": 0.48078954219818115, "lr": 1.7001606885630948e-07, "epoch": 1.642379525017293, "percentage": 82.12, "elapsed_time": "4:52:03", "remaining_time": "1:03:35"} +{"current_steps": 7124, "total_steps": 8674, "loss": 0.4217113256454468, "lr": 1.6980346650397505e-07, "epoch": 1.6426100991468757, "percentage": 82.13, "elapsed_time": "4:52:05", "remaining_time": "1:03:33"} +{"current_steps": 7125, "total_steps": 8674, "loss": 0.4643937051296234, "lr": 1.6959098482912037e-07, "epoch": 1.6428406732764582, "percentage": 82.14, "elapsed_time": "4:52:08", "remaining_time": "1:03:30"} +{"current_steps": 7126, "total_steps": 8674, "loss": 0.43977001309394836, "lr": 1.6937862386263212e-07, "epoch": 1.643071247406041, "percentage": 82.15, "elapsed_time": "4:52:10", "remaining_time": "1:03:28"} +{"current_steps": 7127, "total_steps": 8674, "loss": 0.3872392177581787, "lr": 1.6916638363537882e-07, "epoch": 1.6433018215356237, "percentage": 82.17, "elapsed_time": "4:52:13", "remaining_time": "1:03:25"} +{"current_steps": 7128, "total_steps": 8674, "loss": 0.44625502824783325, "lr": 1.6895426417821213e-07, "epoch": 1.6435323956652064, "percentage": 82.18, "elapsed_time": "4:52:15", "remaining_time": "1:03:23"} +{"current_steps": 7129, "total_steps": 8674, "loss": 0.36836186051368713, "lr": 1.6874226552196523e-07, "epoch": 1.643762969794789, "percentage": 82.19, "elapsed_time": "4:52:18", "remaining_time": "1:03:20"} +{"current_steps": 7130, "total_steps": 8674, "loss": 0.35491907596588135, "lr": 1.6853038769745465e-07, "epoch": 1.6439935439243718, "percentage": 82.2, "elapsed_time": "4:52:20", "remaining_time": "1:03:18"} +{"current_steps": 7131, "total_steps": 8674, "loss": 0.5210527181625366, "lr": 1.6831863073547913e-07, "epoch": 1.6442241180539543, "percentage": 82.21, "elapsed_time": "4:52:22", "remaining_time": "1:03:15"} +{"current_steps": 7132, "total_steps": 8674, "loss": 0.3805693984031677, "lr": 1.6810699466681932e-07, "epoch": 1.644454692183537, "percentage": 82.22, "elapsed_time": "4:52:25", "remaining_time": "1:03:13"} +{"current_steps": 7133, "total_steps": 8674, "loss": 0.5768346786499023, "lr": 1.6789547952223893e-07, "epoch": 1.6446852663131195, "percentage": 82.23, "elapsed_time": "4:52:27", "remaining_time": "1:03:10"} +{"current_steps": 7134, "total_steps": 8674, "loss": 0.46465635299682617, "lr": 1.6768408533248356e-07, "epoch": 1.6449158404427022, "percentage": 82.25, "elapsed_time": "4:52:30", "remaining_time": "1:03:08"} +{"current_steps": 7135, "total_steps": 8674, "loss": 0.43119215965270996, "lr": 1.674728121282819e-07, "epoch": 1.645146414572285, "percentage": 82.26, "elapsed_time": "4:52:32", "remaining_time": "1:03:06"} +{"current_steps": 7136, "total_steps": 8674, "loss": 0.42814093828201294, "lr": 1.6726165994034402e-07, "epoch": 1.6453769887018677, "percentage": 82.27, "elapsed_time": "4:52:34", "remaining_time": "1:03:03"} +{"current_steps": 7137, "total_steps": 8674, "loss": 0.41762328147888184, "lr": 1.6705062879936382e-07, "epoch": 1.6456075628314504, "percentage": 82.28, "elapsed_time": "4:52:37", "remaining_time": "1:03:01"} +{"current_steps": 7138, "total_steps": 8674, "loss": 0.42717012763023376, "lr": 1.668397187360161e-07, "epoch": 1.645838136961033, "percentage": 82.29, "elapsed_time": "4:52:39", "remaining_time": "1:02:58"} +{"current_steps": 7139, "total_steps": 8674, "loss": 0.37660926580429077, "lr": 1.666289297809591e-07, "epoch": 1.6460687110906156, "percentage": 82.3, "elapsed_time": "4:52:42", "remaining_time": "1:02:56"} +{"current_steps": 7140, "total_steps": 8674, "loss": 0.3905887007713318, "lr": 1.664182619648331e-07, "epoch": 1.6462992852201983, "percentage": 82.31, "elapsed_time": "4:52:44", "remaining_time": "1:02:53"} +{"current_steps": 7141, "total_steps": 8674, "loss": 0.4848547577857971, "lr": 1.6620771531826117e-07, "epoch": 1.6465298593497808, "percentage": 82.33, "elapsed_time": "4:52:46", "remaining_time": "1:02:51"} +{"current_steps": 7142, "total_steps": 8674, "loss": 0.37895438075065613, "lr": 1.659972898718479e-07, "epoch": 1.6467604334793635, "percentage": 82.34, "elapsed_time": "4:52:49", "remaining_time": "1:02:48"} +{"current_steps": 7143, "total_steps": 8674, "loss": 0.46770527958869934, "lr": 1.6578698565618075e-07, "epoch": 1.6469910076089462, "percentage": 82.35, "elapsed_time": "4:52:51", "remaining_time": "1:02:46"} +{"current_steps": 7144, "total_steps": 8674, "loss": 0.44138044118881226, "lr": 1.6557680270182995e-07, "epoch": 1.647221581738529, "percentage": 82.36, "elapsed_time": "4:52:54", "remaining_time": "1:02:43"} +{"current_steps": 7145, "total_steps": 8674, "loss": 0.3681126832962036, "lr": 1.6536674103934734e-07, "epoch": 1.6474521558681117, "percentage": 82.37, "elapsed_time": "4:52:56", "remaining_time": "1:02:41"} +{"current_steps": 7146, "total_steps": 8674, "loss": 0.4410884380340576, "lr": 1.651568006992675e-07, "epoch": 1.6476827299976944, "percentage": 82.38, "elapsed_time": "4:52:59", "remaining_time": "1:02:38"} +{"current_steps": 7147, "total_steps": 8674, "loss": 0.4161960482597351, "lr": 1.6494698171210776e-07, "epoch": 1.6479133041272769, "percentage": 82.4, "elapsed_time": "4:53:01", "remaining_time": "1:02:36"} +{"current_steps": 7148, "total_steps": 8674, "loss": 0.4912784695625305, "lr": 1.647372841083674e-07, "epoch": 1.6481438782568596, "percentage": 82.41, "elapsed_time": "4:53:03", "remaining_time": "1:02:33"} +{"current_steps": 7149, "total_steps": 8674, "loss": 0.5137985944747925, "lr": 1.6452770791852766e-07, "epoch": 1.648374452386442, "percentage": 82.42, "elapsed_time": "4:53:06", "remaining_time": "1:02:31"} +{"current_steps": 7150, "total_steps": 8674, "loss": 0.43644070625305176, "lr": 1.6431825317305303e-07, "epoch": 1.6486050265160248, "percentage": 82.43, "elapsed_time": "4:53:08", "remaining_time": "1:02:28"} +{"current_steps": 7151, "total_steps": 8674, "loss": 0.4319378733634949, "lr": 1.6410891990238973e-07, "epoch": 1.6488356006456075, "percentage": 82.44, "elapsed_time": "4:53:11", "remaining_time": "1:02:26"} +{"current_steps": 7152, "total_steps": 8674, "loss": 0.474090039730072, "lr": 1.6389970813696619e-07, "epoch": 1.6490661747751902, "percentage": 82.45, "elapsed_time": "4:53:13", "remaining_time": "1:02:24"} +{"current_steps": 7153, "total_steps": 8674, "loss": 0.40291503071784973, "lr": 1.6369061790719375e-07, "epoch": 1.649296748904773, "percentage": 82.46, "elapsed_time": "4:53:15", "remaining_time": "1:02:21"} +{"current_steps": 7154, "total_steps": 8674, "loss": 0.51482754945755, "lr": 1.6348164924346562e-07, "epoch": 1.6495273230343557, "percentage": 82.48, "elapsed_time": "4:53:18", "remaining_time": "1:02:19"} +{"current_steps": 7155, "total_steps": 8674, "loss": 0.35308974981307983, "lr": 1.632728021761579e-07, "epoch": 1.6497578971639382, "percentage": 82.49, "elapsed_time": "4:53:20", "remaining_time": "1:02:16"} +{"current_steps": 7156, "total_steps": 8674, "loss": 0.5269055366516113, "lr": 1.6306407673562815e-07, "epoch": 1.6499884712935209, "percentage": 82.5, "elapsed_time": "4:53:23", "remaining_time": "1:02:14"} +{"current_steps": 7157, "total_steps": 8674, "loss": 0.41290512681007385, "lr": 1.6285547295221724e-07, "epoch": 1.6502190454231034, "percentage": 82.51, "elapsed_time": "4:53:25", "remaining_time": "1:02:11"} +{"current_steps": 7158, "total_steps": 8674, "loss": 0.39930522441864014, "lr": 1.6264699085624721e-07, "epoch": 1.650449619552686, "percentage": 82.52, "elapsed_time": "4:53:27", "remaining_time": "1:02:09"} +{"current_steps": 7159, "total_steps": 8674, "loss": 0.4617648422718048, "lr": 1.6243863047802365e-07, "epoch": 1.6506801936822688, "percentage": 82.53, "elapsed_time": "4:53:30", "remaining_time": "1:02:06"} +{"current_steps": 7160, "total_steps": 8674, "loss": 0.4618498980998993, "lr": 1.6223039184783337e-07, "epoch": 1.6509107678118515, "percentage": 82.55, "elapsed_time": "4:53:32", "remaining_time": "1:02:04"} +{"current_steps": 7161, "total_steps": 8674, "loss": 0.43138834834098816, "lr": 1.6202227499594635e-07, "epoch": 1.6511413419414342, "percentage": 82.56, "elapsed_time": "4:53:35", "remaining_time": "1:02:01"} +{"current_steps": 7162, "total_steps": 8674, "loss": 0.5330632925033569, "lr": 1.618142799526141e-07, "epoch": 1.651371916071017, "percentage": 82.57, "elapsed_time": "4:53:37", "remaining_time": "1:01:59"} +{"current_steps": 7163, "total_steps": 8674, "loss": 0.45410698652267456, "lr": 1.6160640674807103e-07, "epoch": 1.6516024902005995, "percentage": 82.58, "elapsed_time": "4:53:40", "remaining_time": "1:01:56"} +{"current_steps": 7164, "total_steps": 8674, "loss": 0.4216715693473816, "lr": 1.6139865541253384e-07, "epoch": 1.6518330643301822, "percentage": 82.59, "elapsed_time": "4:53:42", "remaining_time": "1:01:54"} +{"current_steps": 7165, "total_steps": 8674, "loss": 0.3738868832588196, "lr": 1.6119102597620083e-07, "epoch": 1.6520636384597647, "percentage": 82.6, "elapsed_time": "4:53:44", "remaining_time": "1:01:51"} +{"current_steps": 7166, "total_steps": 8674, "loss": 0.44595998525619507, "lr": 1.609835184692535e-07, "epoch": 1.6522942125893474, "percentage": 82.61, "elapsed_time": "4:53:47", "remaining_time": "1:01:49"} +{"current_steps": 7167, "total_steps": 8674, "loss": 0.5446096062660217, "lr": 1.6077613292185466e-07, "epoch": 1.65252478671893, "percentage": 82.63, "elapsed_time": "4:53:49", "remaining_time": "1:01:46"} +{"current_steps": 7168, "total_steps": 8674, "loss": 0.47280746698379517, "lr": 1.605688693641505e-07, "epoch": 1.6527553608485128, "percentage": 82.64, "elapsed_time": "4:53:51", "remaining_time": "1:01:44"} +{"current_steps": 7169, "total_steps": 8674, "loss": 0.5280133485794067, "lr": 1.6036172782626823e-07, "epoch": 1.6529859349780955, "percentage": 82.65, "elapsed_time": "4:53:54", "remaining_time": "1:01:41"} +{"current_steps": 7170, "total_steps": 8674, "loss": 0.4659959375858307, "lr": 1.6015470833831835e-07, "epoch": 1.6532165091076783, "percentage": 82.66, "elapsed_time": "4:53:56", "remaining_time": "1:01:39"} +{"current_steps": 7171, "total_steps": 8674, "loss": 0.5196797251701355, "lr": 1.5994781093039335e-07, "epoch": 1.6534470832372608, "percentage": 82.67, "elapsed_time": "4:53:59", "remaining_time": "1:01:37"} +{"current_steps": 7172, "total_steps": 8674, "loss": 0.41855669021606445, "lr": 1.597410356325676e-07, "epoch": 1.6536776573668435, "percentage": 82.68, "elapsed_time": "4:54:01", "remaining_time": "1:01:34"} +{"current_steps": 7173, "total_steps": 8674, "loss": 0.43063706159591675, "lr": 1.5953438247489814e-07, "epoch": 1.653908231496426, "percentage": 82.7, "elapsed_time": "4:54:04", "remaining_time": "1:01:32"} +{"current_steps": 7174, "total_steps": 8674, "loss": 0.3954850435256958, "lr": 1.59327851487424e-07, "epoch": 1.6541388056260087, "percentage": 82.71, "elapsed_time": "4:54:06", "remaining_time": "1:01:29"} +{"current_steps": 7175, "total_steps": 8674, "loss": 0.4497464895248413, "lr": 1.591214427001667e-07, "epoch": 1.6543693797555914, "percentage": 82.72, "elapsed_time": "4:54:09", "remaining_time": "1:01:27"} +{"current_steps": 7176, "total_steps": 8674, "loss": 0.47012704610824585, "lr": 1.5891515614312967e-07, "epoch": 1.6545999538851741, "percentage": 82.73, "elapsed_time": "4:54:11", "remaining_time": "1:01:24"} +{"current_steps": 7177, "total_steps": 8674, "loss": 0.399054616689682, "lr": 1.5870899184629872e-07, "epoch": 1.6548305280147568, "percentage": 82.74, "elapsed_time": "4:54:13", "remaining_time": "1:01:22"} +{"current_steps": 7178, "total_steps": 8674, "loss": 0.41277164220809937, "lr": 1.5850294983964208e-07, "epoch": 1.6550611021443395, "percentage": 82.75, "elapsed_time": "4:54:16", "remaining_time": "1:01:19"} +{"current_steps": 7179, "total_steps": 8674, "loss": 0.4735640287399292, "lr": 1.5829703015311013e-07, "epoch": 1.655291676273922, "percentage": 82.76, "elapsed_time": "4:54:18", "remaining_time": "1:01:17"} +{"current_steps": 7180, "total_steps": 8674, "loss": 0.4244140386581421, "lr": 1.5809123281663516e-07, "epoch": 1.6555222504035048, "percentage": 82.78, "elapsed_time": "4:54:21", "remaining_time": "1:01:14"} +{"current_steps": 7181, "total_steps": 8674, "loss": 0.4291320741176605, "lr": 1.5788555786013212e-07, "epoch": 1.6557528245330873, "percentage": 82.79, "elapsed_time": "4:54:23", "remaining_time": "1:01:12"} +{"current_steps": 7182, "total_steps": 8674, "loss": 0.3965643048286438, "lr": 1.576800053134979e-07, "epoch": 1.65598339866267, "percentage": 82.8, "elapsed_time": "4:54:25", "remaining_time": "1:01:09"} +{"current_steps": 7183, "total_steps": 8674, "loss": 0.4087764620780945, "lr": 1.5747457520661123e-07, "epoch": 1.6562139727922527, "percentage": 82.81, "elapsed_time": "4:54:28", "remaining_time": "1:01:07"} +{"current_steps": 7184, "total_steps": 8674, "loss": 0.4207920432090759, "lr": 1.5726926756933411e-07, "epoch": 1.6564445469218354, "percentage": 82.82, "elapsed_time": "4:54:30", "remaining_time": "1:01:04"} +{"current_steps": 7185, "total_steps": 8674, "loss": 0.34311753511428833, "lr": 1.570640824315095e-07, "epoch": 1.6566751210514181, "percentage": 82.83, "elapsed_time": "4:54:32", "remaining_time": "1:01:02"} +{"current_steps": 7186, "total_steps": 8674, "loss": 0.44728145003318787, "lr": 1.5685901982296345e-07, "epoch": 1.6569056951810008, "percentage": 82.85, "elapsed_time": "4:54:35", "remaining_time": "1:01:00"} +{"current_steps": 7187, "total_steps": 8674, "loss": 0.38300156593322754, "lr": 1.5665407977350386e-07, "epoch": 1.6571362693105833, "percentage": 82.86, "elapsed_time": "4:54:37", "remaining_time": "1:00:57"} +{"current_steps": 7188, "total_steps": 8674, "loss": 0.32724204659461975, "lr": 1.56449262312921e-07, "epoch": 1.657366843440166, "percentage": 82.87, "elapsed_time": "4:54:40", "remaining_time": "1:00:55"} +{"current_steps": 7189, "total_steps": 8674, "loss": 0.4812743067741394, "lr": 1.562445674709868e-07, "epoch": 1.6575974175697485, "percentage": 82.88, "elapsed_time": "4:54:42", "remaining_time": "1:00:52"} +{"current_steps": 7190, "total_steps": 8674, "loss": 0.3974485397338867, "lr": 1.5603999527745615e-07, "epoch": 1.6578279916993313, "percentage": 82.89, "elapsed_time": "4:54:45", "remaining_time": "1:00:50"} +{"current_steps": 7191, "total_steps": 8674, "loss": 0.5058138370513916, "lr": 1.5583554576206536e-07, "epoch": 1.658058565828914, "percentage": 82.9, "elapsed_time": "4:54:47", "remaining_time": "1:00:47"} +{"current_steps": 7192, "total_steps": 8674, "loss": 0.4442358613014221, "lr": 1.5563121895453323e-07, "epoch": 1.6582891399584967, "percentage": 82.91, "elapsed_time": "4:54:49", "remaining_time": "1:00:45"} +{"current_steps": 7193, "total_steps": 8674, "loss": 0.35400623083114624, "lr": 1.5542701488456077e-07, "epoch": 1.6585197140880794, "percentage": 82.93, "elapsed_time": "4:54:52", "remaining_time": "1:00:42"} +{"current_steps": 7194, "total_steps": 8674, "loss": 0.5046352744102478, "lr": 1.5522293358183125e-07, "epoch": 1.6587502882176621, "percentage": 82.94, "elapsed_time": "4:54:54", "remaining_time": "1:00:40"} +{"current_steps": 7195, "total_steps": 8674, "loss": 0.45344769954681396, "lr": 1.5501897507601015e-07, "epoch": 1.6589808623472446, "percentage": 82.95, "elapsed_time": "4:54:57", "remaining_time": "1:00:37"} +{"current_steps": 7196, "total_steps": 8674, "loss": 0.4251500368118286, "lr": 1.548151393967444e-07, "epoch": 1.6592114364768273, "percentage": 82.96, "elapsed_time": "4:54:59", "remaining_time": "1:00:35"} +{"current_steps": 7197, "total_steps": 8674, "loss": 0.3728788495063782, "lr": 1.5461142657366399e-07, "epoch": 1.6594420106064098, "percentage": 82.97, "elapsed_time": "4:55:02", "remaining_time": "1:00:32"} +{"current_steps": 7198, "total_steps": 8674, "loss": 0.3143829107284546, "lr": 1.5440783663638036e-07, "epoch": 1.6596725847359926, "percentage": 82.98, "elapsed_time": "4:55:04", "remaining_time": "1:00:30"} +{"current_steps": 7199, "total_steps": 8674, "loss": 0.5070813894271851, "lr": 1.5420436961448758e-07, "epoch": 1.6599031588655753, "percentage": 83.0, "elapsed_time": "4:55:06", "remaining_time": "1:00:27"} +{"current_steps": 7200, "total_steps": 8674, "loss": 0.3644014000892639, "lr": 1.5400102553756145e-07, "epoch": 1.660133732995158, "percentage": 83.01, "elapsed_time": "4:55:09", "remaining_time": "1:00:25"} +{"current_steps": 7201, "total_steps": 8674, "loss": 0.4120270609855652, "lr": 1.5379780443516023e-07, "epoch": 1.6603643071247407, "percentage": 83.02, "elapsed_time": "4:55:12", "remaining_time": "1:00:23"} +{"current_steps": 7202, "total_steps": 8674, "loss": 0.4327865242958069, "lr": 1.5359470633682425e-07, "epoch": 1.6605948812543234, "percentage": 83.03, "elapsed_time": "4:55:15", "remaining_time": "1:00:20"} +{"current_steps": 7203, "total_steps": 8674, "loss": 0.626624584197998, "lr": 1.5339173127207562e-07, "epoch": 1.660825455383906, "percentage": 83.04, "elapsed_time": "4:55:17", "remaining_time": "1:00:18"} +{"current_steps": 7204, "total_steps": 8674, "loss": 0.45536088943481445, "lr": 1.5318887927041913e-07, "epoch": 1.6610560295134886, "percentage": 83.05, "elapsed_time": "4:55:20", "remaining_time": "1:00:15"} +{"current_steps": 7205, "total_steps": 8674, "loss": 0.5306276082992554, "lr": 1.52986150361341e-07, "epoch": 1.6612866036430711, "percentage": 83.06, "elapsed_time": "4:55:22", "remaining_time": "1:00:13"} +{"current_steps": 7206, "total_steps": 8674, "loss": 0.4263244867324829, "lr": 1.5278354457431043e-07, "epoch": 1.6615171777726538, "percentage": 83.08, "elapsed_time": "4:55:24", "remaining_time": "1:00:10"} +{"current_steps": 7207, "total_steps": 8674, "loss": 0.4578266143798828, "lr": 1.5258106193877762e-07, "epoch": 1.6617477519022366, "percentage": 83.09, "elapsed_time": "4:55:27", "remaining_time": "1:00:08"} +{"current_steps": 7208, "total_steps": 8674, "loss": 0.5120365619659424, "lr": 1.5237870248417605e-07, "epoch": 1.6619783260318193, "percentage": 83.1, "elapsed_time": "4:55:29", "remaining_time": "1:00:05"} +{"current_steps": 7209, "total_steps": 8674, "loss": 0.4491463005542755, "lr": 1.521764662399202e-07, "epoch": 1.662208900161402, "percentage": 83.11, "elapsed_time": "4:55:32", "remaining_time": "1:00:03"} +{"current_steps": 7210, "total_steps": 8674, "loss": 0.4810635447502136, "lr": 1.5197435323540752e-07, "epoch": 1.6624394742909847, "percentage": 83.12, "elapsed_time": "4:55:34", "remaining_time": "1:00:00"} +{"current_steps": 7211, "total_steps": 8674, "loss": 0.4250200390815735, "lr": 1.5177236350001722e-07, "epoch": 1.6626700484205672, "percentage": 83.13, "elapsed_time": "4:55:36", "remaining_time": "0:59:58"} +{"current_steps": 7212, "total_steps": 8674, "loss": 0.49981385469436646, "lr": 1.515704970631102e-07, "epoch": 1.66290062255015, "percentage": 83.15, "elapsed_time": "4:55:39", "remaining_time": "0:59:56"} +{"current_steps": 7213, "total_steps": 8674, "loss": 0.40204358100891113, "lr": 1.5136875395403027e-07, "epoch": 1.6631311966797324, "percentage": 83.16, "elapsed_time": "4:55:41", "remaining_time": "0:59:53"} +{"current_steps": 7214, "total_steps": 8674, "loss": 0.514127254486084, "lr": 1.5116713420210236e-07, "epoch": 1.6633617708093151, "percentage": 83.17, "elapsed_time": "4:55:44", "remaining_time": "0:59:51"} +{"current_steps": 7215, "total_steps": 8674, "loss": 0.5119338631629944, "lr": 1.509656378366343e-07, "epoch": 1.6635923449388978, "percentage": 83.18, "elapsed_time": "4:55:46", "remaining_time": "0:59:48"} +{"current_steps": 7216, "total_steps": 8674, "loss": 0.45031970739364624, "lr": 1.507642648869153e-07, "epoch": 1.6638229190684806, "percentage": 83.19, "elapsed_time": "4:55:48", "remaining_time": "0:59:46"} +{"current_steps": 7217, "total_steps": 8674, "loss": 0.4503582715988159, "lr": 1.5056301538221716e-07, "epoch": 1.6640534931980633, "percentage": 83.2, "elapsed_time": "4:55:51", "remaining_time": "0:59:43"} +{"current_steps": 7218, "total_steps": 8674, "loss": 0.38793227076530457, "lr": 1.503618893517935e-07, "epoch": 1.664284067327646, "percentage": 83.21, "elapsed_time": "4:55:53", "remaining_time": "0:59:41"} +{"current_steps": 7219, "total_steps": 8674, "loss": 0.4446987211704254, "lr": 1.5016088682488026e-07, "epoch": 1.6645146414572285, "percentage": 83.23, "elapsed_time": "4:55:55", "remaining_time": "0:59:38"} +{"current_steps": 7220, "total_steps": 8674, "loss": 0.4687119722366333, "lr": 1.4996000783069485e-07, "epoch": 1.6647452155868112, "percentage": 83.24, "elapsed_time": "4:55:58", "remaining_time": "0:59:36"} +{"current_steps": 7221, "total_steps": 8674, "loss": 0.48283010721206665, "lr": 1.4975925239843734e-07, "epoch": 1.6649757897163937, "percentage": 83.25, "elapsed_time": "4:56:00", "remaining_time": "0:59:33"} +{"current_steps": 7222, "total_steps": 8674, "loss": 0.510201632976532, "lr": 1.4955862055728941e-07, "epoch": 1.6652063638459764, "percentage": 83.26, "elapsed_time": "4:56:03", "remaining_time": "0:59:31"} +{"current_steps": 7223, "total_steps": 8674, "loss": 0.4070482850074768, "lr": 1.4935811233641471e-07, "epoch": 1.6654369379755591, "percentage": 83.27, "elapsed_time": "4:56:05", "remaining_time": "0:59:28"} +{"current_steps": 7224, "total_steps": 8674, "loss": 0.44347989559173584, "lr": 1.4915772776495948e-07, "epoch": 1.6656675121051419, "percentage": 83.28, "elapsed_time": "4:56:08", "remaining_time": "0:59:26"} +{"current_steps": 7225, "total_steps": 8674, "loss": 0.4160166382789612, "lr": 1.4895746687205147e-07, "epoch": 1.6658980862347246, "percentage": 83.29, "elapsed_time": "4:56:10", "remaining_time": "0:59:23"} +{"current_steps": 7226, "total_steps": 8674, "loss": 0.39939236640930176, "lr": 1.4875732968680098e-07, "epoch": 1.6661286603643073, "percentage": 83.31, "elapsed_time": "4:56:13", "remaining_time": "0:59:21"} +{"current_steps": 7227, "total_steps": 8674, "loss": 0.4604174494743347, "lr": 1.4855731623829936e-07, "epoch": 1.6663592344938898, "percentage": 83.32, "elapsed_time": "4:56:15", "remaining_time": "0:59:19"} +{"current_steps": 7228, "total_steps": 8674, "loss": 0.4691208004951477, "lr": 1.4835742655562134e-07, "epoch": 1.6665898086234725, "percentage": 83.33, "elapsed_time": "4:56:17", "remaining_time": "0:59:16"} +{"current_steps": 7229, "total_steps": 8674, "loss": 0.4146147668361664, "lr": 1.481576606678222e-07, "epoch": 1.666820382753055, "percentage": 83.34, "elapsed_time": "4:56:20", "remaining_time": "0:59:14"} +{"current_steps": 7230, "total_steps": 8674, "loss": 0.4064391255378723, "lr": 1.4795801860394041e-07, "epoch": 1.6670509568826377, "percentage": 83.35, "elapsed_time": "4:56:22", "remaining_time": "0:59:11"} +{"current_steps": 7231, "total_steps": 8674, "loss": 0.3696960210800171, "lr": 1.4775850039299587e-07, "epoch": 1.6672815310122204, "percentage": 83.36, "elapsed_time": "4:56:25", "remaining_time": "0:59:09"} +{"current_steps": 7232, "total_steps": 8674, "loss": 0.4356287121772766, "lr": 1.4755910606399023e-07, "epoch": 1.6675121051418031, "percentage": 83.38, "elapsed_time": "4:56:27", "remaining_time": "0:59:06"} +{"current_steps": 7233, "total_steps": 8674, "loss": 0.39327436685562134, "lr": 1.473598356459078e-07, "epoch": 1.6677426792713859, "percentage": 83.39, "elapsed_time": "4:56:30", "remaining_time": "0:59:04"} +{"current_steps": 7234, "total_steps": 8674, "loss": 0.4722225069999695, "lr": 1.4716068916771452e-07, "epoch": 1.6679732534009684, "percentage": 83.4, "elapsed_time": "4:56:32", "remaining_time": "0:59:01"} +{"current_steps": 7235, "total_steps": 8674, "loss": 0.3645583987236023, "lr": 1.4696166665835852e-07, "epoch": 1.668203827530551, "percentage": 83.41, "elapsed_time": "4:56:35", "remaining_time": "0:58:59"} +{"current_steps": 7236, "total_steps": 8674, "loss": 0.4153117537498474, "lr": 1.4676276814676935e-07, "epoch": 1.6684344016601336, "percentage": 83.42, "elapsed_time": "4:56:37", "remaining_time": "0:58:56"} +{"current_steps": 7237, "total_steps": 8674, "loss": 0.3470612168312073, "lr": 1.4656399366185933e-07, "epoch": 1.6686649757897163, "percentage": 83.43, "elapsed_time": "4:56:39", "remaining_time": "0:58:54"} +{"current_steps": 7238, "total_steps": 8674, "loss": 0.3934207260608673, "lr": 1.4636534323252203e-07, "epoch": 1.668895549919299, "percentage": 83.44, "elapsed_time": "4:56:42", "remaining_time": "0:58:51"} +{"current_steps": 7239, "total_steps": 8674, "loss": 0.35530412197113037, "lr": 1.4616681688763355e-07, "epoch": 1.6691261240488817, "percentage": 83.46, "elapsed_time": "4:56:44", "remaining_time": "0:58:49"} +{"current_steps": 7240, "total_steps": 8674, "loss": 0.5218726396560669, "lr": 1.4596841465605136e-07, "epoch": 1.6693566981784644, "percentage": 83.47, "elapsed_time": "4:56:47", "remaining_time": "0:58:47"} +{"current_steps": 7241, "total_steps": 8674, "loss": 0.4287494421005249, "lr": 1.4577013656661542e-07, "epoch": 1.6695872723080472, "percentage": 83.48, "elapsed_time": "4:56:49", "remaining_time": "0:58:44"} +{"current_steps": 7242, "total_steps": 8674, "loss": 0.5161805152893066, "lr": 1.4557198264814775e-07, "epoch": 1.6698178464376296, "percentage": 83.49, "elapsed_time": "4:56:52", "remaining_time": "0:58:42"} +{"current_steps": 7243, "total_steps": 8674, "loss": 0.4843006730079651, "lr": 1.4537395292945153e-07, "epoch": 1.6700484205672124, "percentage": 83.5, "elapsed_time": "4:56:54", "remaining_time": "0:58:39"} +{"current_steps": 7244, "total_steps": 8674, "loss": 0.526993989944458, "lr": 1.4517604743931288e-07, "epoch": 1.6702789946967949, "percentage": 83.51, "elapsed_time": "4:56:56", "remaining_time": "0:58:37"} +{"current_steps": 7245, "total_steps": 8674, "loss": 0.43705734610557556, "lr": 1.4497826620649888e-07, "epoch": 1.6705095688263776, "percentage": 83.53, "elapsed_time": "4:56:59", "remaining_time": "0:58:34"} +{"current_steps": 7246, "total_steps": 8674, "loss": 0.6001747846603394, "lr": 1.4478060925975942e-07, "epoch": 1.6707401429559603, "percentage": 83.54, "elapsed_time": "4:57:02", "remaining_time": "0:58:32"} +{"current_steps": 7247, "total_steps": 8674, "loss": 0.4041635990142822, "lr": 1.4458307662782564e-07, "epoch": 1.670970717085543, "percentage": 83.55, "elapsed_time": "4:57:04", "remaining_time": "0:58:29"} +{"current_steps": 7248, "total_steps": 8674, "loss": 0.4425908923149109, "lr": 1.4438566833941112e-07, "epoch": 1.6712012912151257, "percentage": 83.56, "elapsed_time": "4:57:06", "remaining_time": "0:58:27"} +{"current_steps": 7249, "total_steps": 8674, "loss": 0.5202267169952393, "lr": 1.4418838442321102e-07, "epoch": 1.6714318653447084, "percentage": 83.57, "elapsed_time": "4:57:09", "remaining_time": "0:58:24"} +{"current_steps": 7250, "total_steps": 8674, "loss": 0.44352006912231445, "lr": 1.4399122490790293e-07, "epoch": 1.671662439474291, "percentage": 83.58, "elapsed_time": "4:57:11", "remaining_time": "0:58:22"} +{"current_steps": 7251, "total_steps": 8674, "loss": 0.4757179021835327, "lr": 1.4379418982214542e-07, "epoch": 1.6718930136038737, "percentage": 83.59, "elapsed_time": "4:57:14", "remaining_time": "0:58:19"} +{"current_steps": 7252, "total_steps": 8674, "loss": 0.4748988747596741, "lr": 1.4359727919457998e-07, "epoch": 1.6721235877334562, "percentage": 83.61, "elapsed_time": "4:57:16", "remaining_time": "0:58:17"} +{"current_steps": 7253, "total_steps": 8674, "loss": 0.4280398190021515, "lr": 1.434004930538294e-07, "epoch": 1.6723541618630389, "percentage": 83.62, "elapsed_time": "4:57:18", "remaining_time": "0:58:14"} +{"current_steps": 7254, "total_steps": 8674, "loss": 0.4959871172904968, "lr": 1.4320383142849834e-07, "epoch": 1.6725847359926216, "percentage": 83.63, "elapsed_time": "4:57:21", "remaining_time": "0:58:12"} +{"current_steps": 7255, "total_steps": 8674, "loss": 0.506413996219635, "lr": 1.4300729434717396e-07, "epoch": 1.6728153101222043, "percentage": 83.64, "elapsed_time": "4:57:23", "remaining_time": "0:58:10"} +{"current_steps": 7256, "total_steps": 8674, "loss": 0.4723675847053528, "lr": 1.4281088183842448e-07, "epoch": 1.673045884251787, "percentage": 83.65, "elapsed_time": "4:57:26", "remaining_time": "0:58:07"} +{"current_steps": 7257, "total_steps": 8674, "loss": 0.41801339387893677, "lr": 1.4261459393080076e-07, "epoch": 1.6732764583813697, "percentage": 83.66, "elapsed_time": "4:57:28", "remaining_time": "0:58:05"} +{"current_steps": 7258, "total_steps": 8674, "loss": 0.4463369846343994, "lr": 1.424184306528351e-07, "epoch": 1.6735070325109522, "percentage": 83.68, "elapsed_time": "4:57:30", "remaining_time": "0:58:02"} +{"current_steps": 7259, "total_steps": 8674, "loss": 0.4167429506778717, "lr": 1.422223920330421e-07, "epoch": 1.673737606640535, "percentage": 83.69, "elapsed_time": "4:57:33", "remaining_time": "0:58:00"} +{"current_steps": 7260, "total_steps": 8674, "loss": 0.48808401823043823, "lr": 1.420264780999174e-07, "epoch": 1.6739681807701174, "percentage": 83.7, "elapsed_time": "4:57:35", "remaining_time": "0:57:57"} +{"current_steps": 7261, "total_steps": 8674, "loss": 0.515659749507904, "lr": 1.4183068888193973e-07, "epoch": 1.6741987548997002, "percentage": 83.71, "elapsed_time": "4:57:38", "remaining_time": "0:57:55"} +{"current_steps": 7262, "total_steps": 8674, "loss": 0.4393026530742645, "lr": 1.416350244075688e-07, "epoch": 1.6744293290292829, "percentage": 83.72, "elapsed_time": "4:57:40", "remaining_time": "0:57:52"} +{"current_steps": 7263, "total_steps": 8674, "loss": 0.35053056478500366, "lr": 1.4143948470524602e-07, "epoch": 1.6746599031588656, "percentage": 83.73, "elapsed_time": "4:57:43", "remaining_time": "0:57:50"} +{"current_steps": 7264, "total_steps": 8674, "loss": 0.35598453879356384, "lr": 1.4124406980339532e-07, "epoch": 1.6748904772884483, "percentage": 83.74, "elapsed_time": "4:57:45", "remaining_time": "0:57:47"} +{"current_steps": 7265, "total_steps": 8674, "loss": 0.47989165782928467, "lr": 1.410487797304224e-07, "epoch": 1.675121051418031, "percentage": 83.76, "elapsed_time": "4:57:47", "remaining_time": "0:57:45"} +{"current_steps": 7266, "total_steps": 8674, "loss": 0.4621499180793762, "lr": 1.408536145147148e-07, "epoch": 1.6753516255476135, "percentage": 83.77, "elapsed_time": "4:57:50", "remaining_time": "0:57:42"} +{"current_steps": 7267, "total_steps": 8674, "loss": 0.40567925572395325, "lr": 1.4065857418464122e-07, "epoch": 1.6755821996771962, "percentage": 83.78, "elapsed_time": "4:57:52", "remaining_time": "0:57:40"} +{"current_steps": 7268, "total_steps": 8674, "loss": 0.38889849185943604, "lr": 1.4046365876855326e-07, "epoch": 1.6758127738067787, "percentage": 83.79, "elapsed_time": "4:57:55", "remaining_time": "0:57:37"} +{"current_steps": 7269, "total_steps": 8674, "loss": 0.516187846660614, "lr": 1.4026886829478345e-07, "epoch": 1.6760433479363614, "percentage": 83.8, "elapsed_time": "4:57:57", "remaining_time": "0:57:35"} +{"current_steps": 7270, "total_steps": 8674, "loss": 0.4007910192012787, "lr": 1.4007420279164706e-07, "epoch": 1.6762739220659442, "percentage": 83.81, "elapsed_time": "4:57:59", "remaining_time": "0:57:33"} +{"current_steps": 7271, "total_steps": 8674, "loss": 0.4426886737346649, "lr": 1.3987966228744007e-07, "epoch": 1.6765044961955269, "percentage": 83.83, "elapsed_time": "4:58:02", "remaining_time": "0:57:30"} +{"current_steps": 7272, "total_steps": 8674, "loss": 0.46890369057655334, "lr": 1.3968524681044114e-07, "epoch": 1.6767350703251096, "percentage": 83.84, "elapsed_time": "4:58:04", "remaining_time": "0:57:28"} +{"current_steps": 7273, "total_steps": 8674, "loss": 0.510369598865509, "lr": 1.3949095638891096e-07, "epoch": 1.6769656444546923, "percentage": 83.85, "elapsed_time": "4:58:07", "remaining_time": "0:57:25"} +{"current_steps": 7274, "total_steps": 8674, "loss": 0.47810226678848267, "lr": 1.3929679105109106e-07, "epoch": 1.6771962185842748, "percentage": 83.86, "elapsed_time": "4:58:09", "remaining_time": "0:57:23"} +{"current_steps": 7275, "total_steps": 8674, "loss": 0.48592591285705566, "lr": 1.3910275082520572e-07, "epoch": 1.6774267927138575, "percentage": 83.87, "elapsed_time": "4:58:12", "remaining_time": "0:57:20"} +{"current_steps": 7276, "total_steps": 8674, "loss": 0.4664943814277649, "lr": 1.3890883573946021e-07, "epoch": 1.67765736684344, "percentage": 83.88, "elapsed_time": "4:58:14", "remaining_time": "0:57:18"} +{"current_steps": 7277, "total_steps": 8674, "loss": 0.47146645188331604, "lr": 1.3871504582204263e-07, "epoch": 1.6778879409730227, "percentage": 83.89, "elapsed_time": "4:58:17", "remaining_time": "0:57:15"} +{"current_steps": 7278, "total_steps": 8674, "loss": 0.5171671509742737, "lr": 1.3852138110112166e-07, "epoch": 1.6781185151026055, "percentage": 83.91, "elapsed_time": "4:58:19", "remaining_time": "0:57:13"} +{"current_steps": 7279, "total_steps": 8674, "loss": 0.45887336134910583, "lr": 1.3832784160484913e-07, "epoch": 1.6783490892321882, "percentage": 83.92, "elapsed_time": "4:58:21", "remaining_time": "0:57:10"} +{"current_steps": 7280, "total_steps": 8674, "loss": 0.4363539516925812, "lr": 1.3813442736135728e-07, "epoch": 1.678579663361771, "percentage": 83.93, "elapsed_time": "4:58:24", "remaining_time": "0:57:08"} +{"current_steps": 7281, "total_steps": 8674, "loss": 0.4626097083091736, "lr": 1.379411383987612e-07, "epoch": 1.6788102374913536, "percentage": 83.94, "elapsed_time": "4:58:26", "remaining_time": "0:57:05"} +{"current_steps": 7282, "total_steps": 8674, "loss": 0.5939204096794128, "lr": 1.3774797474515766e-07, "epoch": 1.679040811620936, "percentage": 83.95, "elapsed_time": "4:58:29", "remaining_time": "0:57:03"} +{"current_steps": 7283, "total_steps": 8674, "loss": 0.5463666915893555, "lr": 1.3755493642862437e-07, "epoch": 1.6792713857505188, "percentage": 83.96, "elapsed_time": "4:58:31", "remaining_time": "0:57:01"} +{"current_steps": 7284, "total_steps": 8674, "loss": 0.3634001910686493, "lr": 1.3736202347722182e-07, "epoch": 1.6795019598801013, "percentage": 83.98, "elapsed_time": "4:58:34", "remaining_time": "0:56:58"} +{"current_steps": 7285, "total_steps": 8674, "loss": 0.39512360095977783, "lr": 1.3716923591899166e-07, "epoch": 1.679732534009684, "percentage": 83.99, "elapsed_time": "4:58:36", "remaining_time": "0:56:56"} +{"current_steps": 7286, "total_steps": 8674, "loss": 0.3858473300933838, "lr": 1.3697657378195772e-07, "epoch": 1.6799631081392667, "percentage": 84.0, "elapsed_time": "4:58:38", "remaining_time": "0:56:53"} +{"current_steps": 7287, "total_steps": 8674, "loss": 0.473757266998291, "lr": 1.36784037094125e-07, "epoch": 1.6801936822688495, "percentage": 84.01, "elapsed_time": "4:58:41", "remaining_time": "0:56:51"} +{"current_steps": 7288, "total_steps": 8674, "loss": 0.41679126024246216, "lr": 1.3659162588348107e-07, "epoch": 1.6804242563984322, "percentage": 84.02, "elapsed_time": "4:58:43", "remaining_time": "0:56:48"} +{"current_steps": 7289, "total_steps": 8674, "loss": 0.4267998933792114, "lr": 1.363993401779946e-07, "epoch": 1.680654830528015, "percentage": 84.03, "elapsed_time": "4:58:46", "remaining_time": "0:56:46"} +{"current_steps": 7290, "total_steps": 8674, "loss": 0.5453667044639587, "lr": 1.3620718000561648e-07, "epoch": 1.6808854046575974, "percentage": 84.04, "elapsed_time": "4:58:48", "remaining_time": "0:56:43"} +{"current_steps": 7291, "total_steps": 8674, "loss": 0.3882933259010315, "lr": 1.3601514539427895e-07, "epoch": 1.68111597878718, "percentage": 84.06, "elapsed_time": "4:58:51", "remaining_time": "0:56:41"} +{"current_steps": 7292, "total_steps": 8674, "loss": 0.5565635561943054, "lr": 1.3582323637189653e-07, "epoch": 1.6813465529167626, "percentage": 84.07, "elapsed_time": "4:58:53", "remaining_time": "0:56:38"} +{"current_steps": 7293, "total_steps": 8674, "loss": 0.49807024002075195, "lr": 1.356314529663647e-07, "epoch": 1.6815771270463453, "percentage": 84.08, "elapsed_time": "4:58:55", "remaining_time": "0:56:36"} +{"current_steps": 7294, "total_steps": 8674, "loss": 0.40868130326271057, "lr": 1.3543979520556116e-07, "epoch": 1.681807701175928, "percentage": 84.09, "elapsed_time": "4:58:58", "remaining_time": "0:56:33"} +{"current_steps": 7295, "total_steps": 8674, "loss": 0.46088406443595886, "lr": 1.352482631173455e-07, "epoch": 1.6820382753055108, "percentage": 84.1, "elapsed_time": "4:59:00", "remaining_time": "0:56:31"} +{"current_steps": 7296, "total_steps": 8674, "loss": 0.44346722960472107, "lr": 1.3505685672955869e-07, "epoch": 1.6822688494350935, "percentage": 84.11, "elapsed_time": "4:59:03", "remaining_time": "0:56:28"} +{"current_steps": 7297, "total_steps": 8674, "loss": 0.36585044860839844, "lr": 1.348655760700239e-07, "epoch": 1.6824994235646762, "percentage": 84.12, "elapsed_time": "4:59:05", "remaining_time": "0:56:26"} +{"current_steps": 7298, "total_steps": 8674, "loss": 0.46082472801208496, "lr": 1.3467442116654536e-07, "epoch": 1.6827299976942587, "percentage": 84.14, "elapsed_time": "4:59:08", "remaining_time": "0:56:24"} +{"current_steps": 7299, "total_steps": 8674, "loss": 0.5011709928512573, "lr": 1.3448339204690974e-07, "epoch": 1.6829605718238414, "percentage": 84.15, "elapsed_time": "4:59:10", "remaining_time": "0:56:21"} +{"current_steps": 7300, "total_steps": 8674, "loss": 0.4382838010787964, "lr": 1.3429248873888454e-07, "epoch": 1.683191145953424, "percentage": 84.16, "elapsed_time": "4:59:12", "remaining_time": "0:56:19"} +{"current_steps": 7301, "total_steps": 8674, "loss": 0.35204610228538513, "lr": 1.3410171127022008e-07, "epoch": 1.6834217200830066, "percentage": 84.17, "elapsed_time": "4:59:16", "remaining_time": "0:56:16"} +{"current_steps": 7302, "total_steps": 8674, "loss": 0.3915257453918457, "lr": 1.3391105966864745e-07, "epoch": 1.6836522942125893, "percentage": 84.18, "elapsed_time": "4:59:18", "remaining_time": "0:56:14"} +{"current_steps": 7303, "total_steps": 8674, "loss": 0.3945339322090149, "lr": 1.3372053396187967e-07, "epoch": 1.683882868342172, "percentage": 84.19, "elapsed_time": "4:59:21", "remaining_time": "0:56:11"} +{"current_steps": 7304, "total_steps": 8674, "loss": 0.48783642053604126, "lr": 1.335301341776117e-07, "epoch": 1.6841134424717548, "percentage": 84.21, "elapsed_time": "4:59:23", "remaining_time": "0:56:09"} +{"current_steps": 7305, "total_steps": 8674, "loss": 0.49026161432266235, "lr": 1.333398603435203e-07, "epoch": 1.6843440166013375, "percentage": 84.22, "elapsed_time": "4:59:26", "remaining_time": "0:56:07"} +{"current_steps": 7306, "total_steps": 8674, "loss": 0.5035061836242676, "lr": 1.3314971248726358e-07, "epoch": 1.68457459073092, "percentage": 84.23, "elapsed_time": "4:59:28", "remaining_time": "0:56:04"} +{"current_steps": 7307, "total_steps": 8674, "loss": 0.5452826023101807, "lr": 1.3295969063648126e-07, "epoch": 1.6848051648605027, "percentage": 84.24, "elapsed_time": "4:59:31", "remaining_time": "0:56:02"} +{"current_steps": 7308, "total_steps": 8674, "loss": 0.4609105885028839, "lr": 1.3276979481879524e-07, "epoch": 1.6850357389900852, "percentage": 84.25, "elapsed_time": "4:59:33", "remaining_time": "0:55:59"} +{"current_steps": 7309, "total_steps": 8674, "loss": 0.5799046754837036, "lr": 1.3258002506180855e-07, "epoch": 1.685266313119668, "percentage": 84.26, "elapsed_time": "4:59:36", "remaining_time": "0:55:57"} +{"current_steps": 7310, "total_steps": 8674, "loss": 0.42096465826034546, "lr": 1.3239038139310644e-07, "epoch": 1.6854968872492506, "percentage": 84.27, "elapsed_time": "4:59:38", "remaining_time": "0:55:54"} +{"current_steps": 7311, "total_steps": 8674, "loss": 0.4741813540458679, "lr": 1.3220086384025508e-07, "epoch": 1.6857274613788333, "percentage": 84.29, "elapsed_time": "4:59:40", "remaining_time": "0:55:52"} +{"current_steps": 7312, "total_steps": 8674, "loss": 0.4872191250324249, "lr": 1.3201147243080302e-07, "epoch": 1.685958035508416, "percentage": 84.3, "elapsed_time": "4:59:43", "remaining_time": "0:55:49"} +{"current_steps": 7313, "total_steps": 8674, "loss": 0.5210198163986206, "lr": 1.3182220719228054e-07, "epoch": 1.6861886096379988, "percentage": 84.31, "elapsed_time": "4:59:45", "remaining_time": "0:55:47"} +{"current_steps": 7314, "total_steps": 8674, "loss": 0.4873948395252228, "lr": 1.3163306815219878e-07, "epoch": 1.6864191837675813, "percentage": 84.32, "elapsed_time": "4:59:48", "remaining_time": "0:55:44"} +{"current_steps": 7315, "total_steps": 8674, "loss": 0.46856212615966797, "lr": 1.3144405533805136e-07, "epoch": 1.686649757897164, "percentage": 84.33, "elapsed_time": "4:59:50", "remaining_time": "0:55:42"} +{"current_steps": 7316, "total_steps": 8674, "loss": 0.3931645154953003, "lr": 1.3125516877731279e-07, "epoch": 1.6868803320267465, "percentage": 84.34, "elapsed_time": "4:59:52", "remaining_time": "0:55:39"} +{"current_steps": 7317, "total_steps": 8674, "loss": 0.4473317861557007, "lr": 1.3106640849744023e-07, "epoch": 1.6871109061563292, "percentage": 84.36, "elapsed_time": "4:59:55", "remaining_time": "0:55:37"} +{"current_steps": 7318, "total_steps": 8674, "loss": 0.4499043822288513, "lr": 1.3087777452587124e-07, "epoch": 1.687341480285912, "percentage": 84.37, "elapsed_time": "4:59:57", "remaining_time": "0:55:34"} +{"current_steps": 7319, "total_steps": 8674, "loss": 0.4992508292198181, "lr": 1.30689266890026e-07, "epoch": 1.6875720544154946, "percentage": 84.38, "elapsed_time": "5:00:00", "remaining_time": "0:55:32"} +{"current_steps": 7320, "total_steps": 8674, "loss": 0.4684743583202362, "lr": 1.305008856173061e-07, "epoch": 1.6878026285450773, "percentage": 84.39, "elapsed_time": "5:00:02", "remaining_time": "0:55:29"} +{"current_steps": 7321, "total_steps": 8674, "loss": 0.5543930530548096, "lr": 1.303126307350948e-07, "epoch": 1.68803320267466, "percentage": 84.4, "elapsed_time": "5:00:04", "remaining_time": "0:55:27"} +{"current_steps": 7322, "total_steps": 8674, "loss": 0.3812211751937866, "lr": 1.3012450227075655e-07, "epoch": 1.6882637768042426, "percentage": 84.41, "elapsed_time": "5:00:07", "remaining_time": "0:55:25"} +{"current_steps": 7323, "total_steps": 8674, "loss": 0.5455845594406128, "lr": 1.299365002516377e-07, "epoch": 1.6884943509338253, "percentage": 84.42, "elapsed_time": "5:00:09", "remaining_time": "0:55:22"} +{"current_steps": 7324, "total_steps": 8674, "loss": 0.4256778657436371, "lr": 1.2974862470506654e-07, "epoch": 1.6887249250634078, "percentage": 84.44, "elapsed_time": "5:00:12", "remaining_time": "0:55:20"} +{"current_steps": 7325, "total_steps": 8674, "loss": 0.4973354637622833, "lr": 1.2956087565835228e-07, "epoch": 1.6889554991929905, "percentage": 84.45, "elapsed_time": "5:00:14", "remaining_time": "0:55:17"} +{"current_steps": 7326, "total_steps": 8674, "loss": 0.5141343474388123, "lr": 1.2937325313878666e-07, "epoch": 1.6891860733225732, "percentage": 84.46, "elapsed_time": "5:00:17", "remaining_time": "0:55:15"} +{"current_steps": 7327, "total_steps": 8674, "loss": 0.3872978687286377, "lr": 1.2918575717364178e-07, "epoch": 1.689416647452156, "percentage": 84.47, "elapsed_time": "5:00:19", "remaining_time": "0:55:12"} +{"current_steps": 7328, "total_steps": 8674, "loss": 0.4333486557006836, "lr": 1.2899838779017292e-07, "epoch": 1.6896472215817386, "percentage": 84.48, "elapsed_time": "5:00:21", "remaining_time": "0:55:10"} +{"current_steps": 7329, "total_steps": 8674, "loss": 0.42979496717453003, "lr": 1.2881114501561553e-07, "epoch": 1.6898777957113214, "percentage": 84.49, "elapsed_time": "5:00:24", "remaining_time": "0:55:07"} +{"current_steps": 7330, "total_steps": 8674, "loss": 0.43296414613723755, "lr": 1.2862402887718771e-07, "epoch": 1.6901083698409038, "percentage": 84.51, "elapsed_time": "5:00:27", "remaining_time": "0:55:05"} +{"current_steps": 7331, "total_steps": 8674, "loss": 0.41763681173324585, "lr": 1.2843703940208816e-07, "epoch": 1.6903389439704866, "percentage": 84.52, "elapsed_time": "5:00:29", "remaining_time": "0:55:02"} +{"current_steps": 7332, "total_steps": 8674, "loss": 0.4531592130661011, "lr": 1.2825017661749814e-07, "epoch": 1.690569518100069, "percentage": 84.53, "elapsed_time": "5:00:31", "remaining_time": "0:55:00"} +{"current_steps": 7333, "total_steps": 8674, "loss": 0.4608149826526642, "lr": 1.2806344055057995e-07, "epoch": 1.6908000922296518, "percentage": 84.54, "elapsed_time": "5:00:34", "remaining_time": "0:54:57"} +{"current_steps": 7334, "total_steps": 8674, "loss": 0.4298786520957947, "lr": 1.2787683122847726e-07, "epoch": 1.6910306663592345, "percentage": 84.55, "elapsed_time": "5:00:36", "remaining_time": "0:54:55"} +{"current_steps": 7335, "total_steps": 8674, "loss": 0.4404297471046448, "lr": 1.2769034867831586e-07, "epoch": 1.6912612404888172, "percentage": 84.56, "elapsed_time": "5:00:39", "remaining_time": "0:54:53"} +{"current_steps": 7336, "total_steps": 8674, "loss": 0.3857702910900116, "lr": 1.2750399292720281e-07, "epoch": 1.6914918146184, "percentage": 84.57, "elapsed_time": "5:00:41", "remaining_time": "0:54:50"} +{"current_steps": 7337, "total_steps": 8674, "loss": 0.351214200258255, "lr": 1.2731776400222716e-07, "epoch": 1.6917223887479826, "percentage": 84.59, "elapsed_time": "5:00:44", "remaining_time": "0:54:48"} +{"current_steps": 7338, "total_steps": 8674, "loss": 0.4711484909057617, "lr": 1.2713166193045854e-07, "epoch": 1.6919529628775651, "percentage": 84.6, "elapsed_time": "5:00:46", "remaining_time": "0:54:45"} +{"current_steps": 7339, "total_steps": 8674, "loss": 0.4819946587085724, "lr": 1.2694568673894946e-07, "epoch": 1.6921835370071479, "percentage": 84.61, "elapsed_time": "5:00:49", "remaining_time": "0:54:43"} +{"current_steps": 7340, "total_steps": 8674, "loss": 0.39870262145996094, "lr": 1.267598384547327e-07, "epoch": 1.6924141111367303, "percentage": 84.62, "elapsed_time": "5:00:51", "remaining_time": "0:54:40"} +{"current_steps": 7341, "total_steps": 8674, "loss": 0.4775997996330261, "lr": 1.265741171048237e-07, "epoch": 1.692644685266313, "percentage": 84.63, "elapsed_time": "5:00:53", "remaining_time": "0:54:38"} +{"current_steps": 7342, "total_steps": 8674, "loss": 0.4166836738586426, "lr": 1.2638852271621836e-07, "epoch": 1.6928752593958958, "percentage": 84.64, "elapsed_time": "5:00:56", "remaining_time": "0:54:35"} +{"current_steps": 7343, "total_steps": 8674, "loss": 0.396761953830719, "lr": 1.2620305531589514e-07, "epoch": 1.6931058335254785, "percentage": 84.66, "elapsed_time": "5:00:58", "remaining_time": "0:54:33"} +{"current_steps": 7344, "total_steps": 8674, "loss": 0.36929184198379517, "lr": 1.260177149308136e-07, "epoch": 1.6933364076550612, "percentage": 84.67, "elapsed_time": "5:01:00", "remaining_time": "0:54:30"} +{"current_steps": 7345, "total_steps": 8674, "loss": 0.4664369821548462, "lr": 1.2583250158791459e-07, "epoch": 1.6935669817846437, "percentage": 84.68, "elapsed_time": "5:01:03", "remaining_time": "0:54:28"} +{"current_steps": 7346, "total_steps": 8674, "loss": 0.40877625346183777, "lr": 1.2564741531412115e-07, "epoch": 1.6937975559142264, "percentage": 84.69, "elapsed_time": "5:01:05", "remaining_time": "0:54:25"} +{"current_steps": 7347, "total_steps": 8674, "loss": 0.4282684922218323, "lr": 1.254624561363369e-07, "epoch": 1.694028130043809, "percentage": 84.7, "elapsed_time": "5:01:08", "remaining_time": "0:54:23"} +{"current_steps": 7348, "total_steps": 8674, "loss": 0.5430412292480469, "lr": 1.2527762408144805e-07, "epoch": 1.6942587041733916, "percentage": 84.71, "elapsed_time": "5:01:10", "remaining_time": "0:54:21"} +{"current_steps": 7349, "total_steps": 8674, "loss": 0.45990923047065735, "lr": 1.2509291917632147e-07, "epoch": 1.6944892783029744, "percentage": 84.72, "elapsed_time": "5:01:13", "remaining_time": "0:54:18"} +{"current_steps": 7350, "total_steps": 8674, "loss": 0.38062262535095215, "lr": 1.2490834144780593e-07, "epoch": 1.694719852432557, "percentage": 84.74, "elapsed_time": "5:01:15", "remaining_time": "0:54:16"} +{"current_steps": 7351, "total_steps": 8674, "loss": 0.4704701900482178, "lr": 1.2472389092273172e-07, "epoch": 1.6949504265621398, "percentage": 84.75, "elapsed_time": "5:01:18", "remaining_time": "0:54:13"} +{"current_steps": 7352, "total_steps": 8674, "loss": 0.4439951181411743, "lr": 1.2453956762791084e-07, "epoch": 1.6951810006917225, "percentage": 84.76, "elapsed_time": "5:01:20", "remaining_time": "0:54:11"} +{"current_steps": 7353, "total_steps": 8674, "loss": 0.49405014514923096, "lr": 1.2435537159013632e-07, "epoch": 1.695411574821305, "percentage": 84.77, "elapsed_time": "5:01:22", "remaining_time": "0:54:08"} +{"current_steps": 7354, "total_steps": 8674, "loss": 0.4282076060771942, "lr": 1.2417130283618282e-07, "epoch": 1.6956421489508877, "percentage": 84.78, "elapsed_time": "5:01:25", "remaining_time": "0:54:06"} +{"current_steps": 7355, "total_steps": 8674, "loss": 0.43492811918258667, "lr": 1.2398736139280687e-07, "epoch": 1.6958727230804702, "percentage": 84.79, "elapsed_time": "5:01:27", "remaining_time": "0:54:03"} +{"current_steps": 7356, "total_steps": 8674, "loss": 0.37239378690719604, "lr": 1.238035472867458e-07, "epoch": 1.696103297210053, "percentage": 84.81, "elapsed_time": "5:01:30", "remaining_time": "0:54:01"} +{"current_steps": 7357, "total_steps": 8674, "loss": 0.39911961555480957, "lr": 1.236198605447194e-07, "epoch": 1.6963338713396356, "percentage": 84.82, "elapsed_time": "5:01:32", "remaining_time": "0:53:58"} +{"current_steps": 7358, "total_steps": 8674, "loss": 0.4962255656719208, "lr": 1.2343630119342786e-07, "epoch": 1.6965644454692184, "percentage": 84.83, "elapsed_time": "5:01:35", "remaining_time": "0:53:56"} +{"current_steps": 7359, "total_steps": 8674, "loss": 0.37414759397506714, "lr": 1.2325286925955358e-07, "epoch": 1.696795019598801, "percentage": 84.84, "elapsed_time": "5:01:37", "remaining_time": "0:53:53"} +{"current_steps": 7360, "total_steps": 8674, "loss": 0.41224929690361023, "lr": 1.230695647697604e-07, "epoch": 1.6970255937283838, "percentage": 84.85, "elapsed_time": "5:01:39", "remaining_time": "0:53:51"} +{"current_steps": 7361, "total_steps": 8674, "loss": 0.43184489011764526, "lr": 1.228863877506936e-07, "epoch": 1.6972561678579663, "percentage": 84.86, "elapsed_time": "5:01:42", "remaining_time": "0:53:48"} +{"current_steps": 7362, "total_steps": 8674, "loss": 0.4741829037666321, "lr": 1.227033382289795e-07, "epoch": 1.697486741987549, "percentage": 84.87, "elapsed_time": "5:01:44", "remaining_time": "0:53:46"} +{"current_steps": 7363, "total_steps": 8674, "loss": 0.43224620819091797, "lr": 1.2252041623122643e-07, "epoch": 1.6977173161171315, "percentage": 84.89, "elapsed_time": "5:01:47", "remaining_time": "0:53:44"} +{"current_steps": 7364, "total_steps": 8674, "loss": 0.46645525097846985, "lr": 1.2233762178402386e-07, "epoch": 1.6979478902467142, "percentage": 84.9, "elapsed_time": "5:01:49", "remaining_time": "0:53:41"} +{"current_steps": 7365, "total_steps": 8674, "loss": 0.4237707555294037, "lr": 1.2215495491394256e-07, "epoch": 1.698178464376297, "percentage": 84.91, "elapsed_time": "5:01:52", "remaining_time": "0:53:39"} +{"current_steps": 7366, "total_steps": 8674, "loss": 0.36378395557403564, "lr": 1.2197241564753535e-07, "epoch": 1.6984090385058797, "percentage": 84.92, "elapsed_time": "5:01:54", "remaining_time": "0:53:36"} +{"current_steps": 7367, "total_steps": 8674, "loss": 0.4564269185066223, "lr": 1.21790004011336e-07, "epoch": 1.6986396126354624, "percentage": 84.93, "elapsed_time": "5:01:56", "remaining_time": "0:53:34"} +{"current_steps": 7368, "total_steps": 8674, "loss": 0.4492420256137848, "lr": 1.2160772003186027e-07, "epoch": 1.698870186765045, "percentage": 84.94, "elapsed_time": "5:01:59", "remaining_time": "0:53:31"} +{"current_steps": 7369, "total_steps": 8674, "loss": 0.515146017074585, "lr": 1.214255637356043e-07, "epoch": 1.6991007608946276, "percentage": 84.96, "elapsed_time": "5:02:01", "remaining_time": "0:53:29"} +{"current_steps": 7370, "total_steps": 8674, "loss": 0.41473329067230225, "lr": 1.2124353514904707e-07, "epoch": 1.6993313350242103, "percentage": 84.97, "elapsed_time": "5:02:04", "remaining_time": "0:53:26"} +{"current_steps": 7371, "total_steps": 8674, "loss": 0.4408412575721741, "lr": 1.210616342986477e-07, "epoch": 1.6995619091537928, "percentage": 84.98, "elapsed_time": "5:02:06", "remaining_time": "0:53:24"} +{"current_steps": 7372, "total_steps": 8674, "loss": 0.5370820760726929, "lr": 1.208798612108477e-07, "epoch": 1.6997924832833755, "percentage": 84.99, "elapsed_time": "5:02:09", "remaining_time": "0:53:21"} +{"current_steps": 7373, "total_steps": 8674, "loss": 0.46518170833587646, "lr": 1.206982159120693e-07, "epoch": 1.7000230574129582, "percentage": 85.0, "elapsed_time": "5:02:11", "remaining_time": "0:53:19"} +{"current_steps": 7374, "total_steps": 8674, "loss": 0.45063477754592896, "lr": 1.205166984287167e-07, "epoch": 1.700253631542541, "percentage": 85.01, "elapsed_time": "5:02:14", "remaining_time": "0:53:16"} +{"current_steps": 7375, "total_steps": 8674, "loss": 0.47391965985298157, "lr": 1.2033530878717546e-07, "epoch": 1.7004842056721237, "percentage": 85.02, "elapsed_time": "5:02:16", "remaining_time": "0:53:14"} +{"current_steps": 7376, "total_steps": 8674, "loss": 0.45812156796455383, "lr": 1.2015404701381205e-07, "epoch": 1.7007147798017064, "percentage": 85.04, "elapsed_time": "5:02:18", "remaining_time": "0:53:11"} +{"current_steps": 7377, "total_steps": 8674, "loss": 0.5174708366394043, "lr": 1.1997291313497503e-07, "epoch": 1.7009453539312889, "percentage": 85.05, "elapsed_time": "5:02:21", "remaining_time": "0:53:09"} +{"current_steps": 7378, "total_steps": 8674, "loss": 0.3412814736366272, "lr": 1.1979190717699373e-07, "epoch": 1.7011759280608716, "percentage": 85.06, "elapsed_time": "5:02:23", "remaining_time": "0:53:07"} +{"current_steps": 7379, "total_steps": 8674, "loss": 0.41912171244621277, "lr": 1.196110291661796e-07, "epoch": 1.701406502190454, "percentage": 85.07, "elapsed_time": "5:02:26", "remaining_time": "0:53:04"} +{"current_steps": 7380, "total_steps": 8674, "loss": 0.5569772720336914, "lr": 1.1943027912882464e-07, "epoch": 1.7016370763200368, "percentage": 85.08, "elapsed_time": "5:02:28", "remaining_time": "0:53:02"} +{"current_steps": 7381, "total_steps": 8674, "loss": 0.40875375270843506, "lr": 1.1924965709120304e-07, "epoch": 1.7018676504496195, "percentage": 85.09, "elapsed_time": "5:02:31", "remaining_time": "0:52:59"} +{"current_steps": 7382, "total_steps": 8674, "loss": 0.46906760334968567, "lr": 1.1906916307956983e-07, "epoch": 1.7020982245792022, "percentage": 85.1, "elapsed_time": "5:02:33", "remaining_time": "0:52:57"} +{"current_steps": 7383, "total_steps": 8674, "loss": 0.40830397605895996, "lr": 1.1888879712016165e-07, "epoch": 1.702328798708785, "percentage": 85.12, "elapsed_time": "5:02:35", "remaining_time": "0:52:54"} +{"current_steps": 7384, "total_steps": 8674, "loss": 0.4051646590232849, "lr": 1.1870855923919687e-07, "epoch": 1.7025593728383677, "percentage": 85.13, "elapsed_time": "5:02:38", "remaining_time": "0:52:52"} +{"current_steps": 7385, "total_steps": 8674, "loss": 0.5042610764503479, "lr": 1.1852844946287432e-07, "epoch": 1.7027899469679502, "percentage": 85.14, "elapsed_time": "5:02:40", "remaining_time": "0:52:49"} +{"current_steps": 7386, "total_steps": 8674, "loss": 0.5304923057556152, "lr": 1.183484678173754e-07, "epoch": 1.7030205210975329, "percentage": 85.15, "elapsed_time": "5:02:43", "remaining_time": "0:52:47"} +{"current_steps": 7387, "total_steps": 8674, "loss": 0.443366676568985, "lr": 1.1816861432886171e-07, "epoch": 1.7032510952271154, "percentage": 85.16, "elapsed_time": "5:02:45", "remaining_time": "0:52:44"} +{"current_steps": 7388, "total_steps": 8674, "loss": 0.4527779817581177, "lr": 1.1798888902347714e-07, "epoch": 1.703481669356698, "percentage": 85.17, "elapsed_time": "5:02:47", "remaining_time": "0:52:42"} +{"current_steps": 7389, "total_steps": 8674, "loss": 0.4277183413505554, "lr": 1.1780929192734634e-07, "epoch": 1.7037122434862808, "percentage": 85.19, "elapsed_time": "5:02:50", "remaining_time": "0:52:39"} +{"current_steps": 7390, "total_steps": 8674, "loss": 0.4908677637577057, "lr": 1.1762982306657577e-07, "epoch": 1.7039428176158635, "percentage": 85.2, "elapsed_time": "5:02:52", "remaining_time": "0:52:37"} +{"current_steps": 7391, "total_steps": 8674, "loss": 0.398892879486084, "lr": 1.1745048246725286e-07, "epoch": 1.7041733917454462, "percentage": 85.21, "elapsed_time": "5:02:55", "remaining_time": "0:52:35"} +{"current_steps": 7392, "total_steps": 8674, "loss": 0.4654615521430969, "lr": 1.1727127015544691e-07, "epoch": 1.704403965875029, "percentage": 85.22, "elapsed_time": "5:02:57", "remaining_time": "0:52:32"} +{"current_steps": 7393, "total_steps": 8674, "loss": 0.4850313663482666, "lr": 1.1709218615720806e-07, "epoch": 1.7046345400046115, "percentage": 85.23, "elapsed_time": "5:03:00", "remaining_time": "0:52:30"} +{"current_steps": 7394, "total_steps": 8674, "loss": 0.4036976099014282, "lr": 1.1691323049856772e-07, "epoch": 1.7048651141341942, "percentage": 85.24, "elapsed_time": "5:03:02", "remaining_time": "0:52:27"} +{"current_steps": 7395, "total_steps": 8674, "loss": 0.39174383878707886, "lr": 1.167344032055394e-07, "epoch": 1.7050956882637767, "percentage": 85.25, "elapsed_time": "5:03:05", "remaining_time": "0:52:25"} +{"current_steps": 7396, "total_steps": 8674, "loss": 0.44915109872817993, "lr": 1.1655570430411699e-07, "epoch": 1.7053262623933594, "percentage": 85.27, "elapsed_time": "5:03:07", "remaining_time": "0:52:22"} +{"current_steps": 7397, "total_steps": 8674, "loss": 0.4720522165298462, "lr": 1.1637713382027636e-07, "epoch": 1.705556836522942, "percentage": 85.28, "elapsed_time": "5:03:10", "remaining_time": "0:52:20"} +{"current_steps": 7398, "total_steps": 8674, "loss": 0.4452325105667114, "lr": 1.1619869177997455e-07, "epoch": 1.7057874106525248, "percentage": 85.29, "elapsed_time": "5:03:12", "remaining_time": "0:52:17"} +{"current_steps": 7399, "total_steps": 8674, "loss": 0.4009271562099457, "lr": 1.1602037820915023e-07, "epoch": 1.7060179847821075, "percentage": 85.3, "elapsed_time": "5:03:14", "remaining_time": "0:52:15"} +{"current_steps": 7400, "total_steps": 8674, "loss": 0.37518051266670227, "lr": 1.1584219313372257e-07, "epoch": 1.7062485589116902, "percentage": 85.31, "elapsed_time": "5:03:17", "remaining_time": "0:52:12"} +{"current_steps": 7401, "total_steps": 8674, "loss": 0.42883241176605225, "lr": 1.1566413657959295e-07, "epoch": 1.7064791330412727, "percentage": 85.32, "elapsed_time": "5:03:20", "remaining_time": "0:52:10"} +{"current_steps": 7402, "total_steps": 8674, "loss": 0.4597551226615906, "lr": 1.1548620857264346e-07, "epoch": 1.7067097071708555, "percentage": 85.34, "elapsed_time": "5:03:23", "remaining_time": "0:52:08"} +{"current_steps": 7403, "total_steps": 8674, "loss": 0.5491876006126404, "lr": 1.1530840913873797e-07, "epoch": 1.706940281300438, "percentage": 85.35, "elapsed_time": "5:03:25", "remaining_time": "0:52:05"} +{"current_steps": 7404, "total_steps": 8674, "loss": 0.5632074475288391, "lr": 1.1513073830372122e-07, "epoch": 1.7071708554300207, "percentage": 85.36, "elapsed_time": "5:03:28", "remaining_time": "0:52:03"} +{"current_steps": 7405, "total_steps": 8674, "loss": 0.5251858234405518, "lr": 1.1495319609341947e-07, "epoch": 1.7074014295596034, "percentage": 85.37, "elapsed_time": "5:03:30", "remaining_time": "0:52:00"} +{"current_steps": 7406, "total_steps": 8674, "loss": 0.5388965606689453, "lr": 1.1477578253364028e-07, "epoch": 1.707632003689186, "percentage": 85.38, "elapsed_time": "5:03:33", "remaining_time": "0:51:58"} +{"current_steps": 7407, "total_steps": 8674, "loss": 0.4429551959037781, "lr": 1.145984976501726e-07, "epoch": 1.7078625778187688, "percentage": 85.39, "elapsed_time": "5:03:35", "remaining_time": "0:51:55"} +{"current_steps": 7408, "total_steps": 8674, "loss": 0.4702358841896057, "lr": 1.144213414687868e-07, "epoch": 1.7080931519483515, "percentage": 85.4, "elapsed_time": "5:03:37", "remaining_time": "0:51:53"} +{"current_steps": 7409, "total_steps": 8674, "loss": 0.4506569504737854, "lr": 1.1424431401523382e-07, "epoch": 1.708323726077934, "percentage": 85.42, "elapsed_time": "5:03:40", "remaining_time": "0:51:50"} +{"current_steps": 7410, "total_steps": 8674, "loss": 0.384244441986084, "lr": 1.1406741531524689e-07, "epoch": 1.7085543002075168, "percentage": 85.43, "elapsed_time": "5:03:42", "remaining_time": "0:51:48"} +{"current_steps": 7411, "total_steps": 8674, "loss": 0.4642629027366638, "lr": 1.1389064539453952e-07, "epoch": 1.7087848743370992, "percentage": 85.44, "elapsed_time": "5:03:45", "remaining_time": "0:51:45"} +{"current_steps": 7412, "total_steps": 8674, "loss": 0.4568482041358948, "lr": 1.1371400427880761e-07, "epoch": 1.709015448466682, "percentage": 85.45, "elapsed_time": "5:03:47", "remaining_time": "0:51:43"} +{"current_steps": 7413, "total_steps": 8674, "loss": 0.536895215511322, "lr": 1.135374919937272e-07, "epoch": 1.7092460225962647, "percentage": 85.46, "elapsed_time": "5:03:49", "remaining_time": "0:51:41"} +{"current_steps": 7414, "total_steps": 8674, "loss": 0.49696239829063416, "lr": 1.1336110856495628e-07, "epoch": 1.7094765967258474, "percentage": 85.47, "elapsed_time": "5:03:52", "remaining_time": "0:51:38"} +{"current_steps": 7415, "total_steps": 8674, "loss": 0.3857358694076538, "lr": 1.1318485401813438e-07, "epoch": 1.7097071708554301, "percentage": 85.49, "elapsed_time": "5:03:54", "remaining_time": "0:51:36"} +{"current_steps": 7416, "total_steps": 8674, "loss": 0.38111335039138794, "lr": 1.1300872837888121e-07, "epoch": 1.7099377449850128, "percentage": 85.5, "elapsed_time": "5:03:57", "remaining_time": "0:51:33"} +{"current_steps": 7417, "total_steps": 8674, "loss": 0.4255755543708801, "lr": 1.1283273167279906e-07, "epoch": 1.7101683191145953, "percentage": 85.51, "elapsed_time": "5:03:59", "remaining_time": "0:51:31"} +{"current_steps": 7418, "total_steps": 8674, "loss": 0.5048757791519165, "lr": 1.1265686392547024e-07, "epoch": 1.710398893244178, "percentage": 85.52, "elapsed_time": "5:04:01", "remaining_time": "0:51:28"} +{"current_steps": 7419, "total_steps": 8674, "loss": 0.5402916073799133, "lr": 1.1248112516245944e-07, "epoch": 1.7106294673737605, "percentage": 85.53, "elapsed_time": "5:04:04", "remaining_time": "0:51:26"} +{"current_steps": 7420, "total_steps": 8674, "loss": 0.3617591857910156, "lr": 1.1230551540931165e-07, "epoch": 1.7108600415033433, "percentage": 85.54, "elapsed_time": "5:04:06", "remaining_time": "0:51:23"} +{"current_steps": 7421, "total_steps": 8674, "loss": 0.4636116921901703, "lr": 1.1213003469155369e-07, "epoch": 1.711090615632926, "percentage": 85.55, "elapsed_time": "5:04:09", "remaining_time": "0:51:21"} +{"current_steps": 7422, "total_steps": 8674, "loss": 0.4675198495388031, "lr": 1.1195468303469346e-07, "epoch": 1.7113211897625087, "percentage": 85.57, "elapsed_time": "5:04:12", "remaining_time": "0:51:18"} +{"current_steps": 7423, "total_steps": 8674, "loss": 0.48491787910461426, "lr": 1.1177946046422038e-07, "epoch": 1.7115517638920914, "percentage": 85.58, "elapsed_time": "5:04:14", "remaining_time": "0:51:16"} +{"current_steps": 7424, "total_steps": 8674, "loss": 0.3898283839225769, "lr": 1.1160436700560449e-07, "epoch": 1.7117823380216741, "percentage": 85.59, "elapsed_time": "5:04:16", "remaining_time": "0:51:13"} +{"current_steps": 7425, "total_steps": 8674, "loss": 0.41522908210754395, "lr": 1.1142940268429735e-07, "epoch": 1.7120129121512566, "percentage": 85.6, "elapsed_time": "5:04:19", "remaining_time": "0:51:11"} +{"current_steps": 7426, "total_steps": 8674, "loss": 0.4681985378265381, "lr": 1.1125456752573215e-07, "epoch": 1.7122434862808393, "percentage": 85.61, "elapsed_time": "5:04:21", "remaining_time": "0:51:09"} +{"current_steps": 7427, "total_steps": 8674, "loss": 0.4788553714752197, "lr": 1.1107986155532245e-07, "epoch": 1.7124740604104218, "percentage": 85.62, "elapsed_time": "5:04:23", "remaining_time": "0:51:06"} +{"current_steps": 7428, "total_steps": 8674, "loss": 0.43853843212127686, "lr": 1.1090528479846406e-07, "epoch": 1.7127046345400045, "percentage": 85.64, "elapsed_time": "5:04:26", "remaining_time": "0:51:04"} +{"current_steps": 7429, "total_steps": 8674, "loss": 0.3736591637134552, "lr": 1.107308372805329e-07, "epoch": 1.7129352086695873, "percentage": 85.65, "elapsed_time": "5:04:28", "remaining_time": "0:51:01"} +{"current_steps": 7430, "total_steps": 8674, "loss": 0.5770819783210754, "lr": 1.1055651902688712e-07, "epoch": 1.71316578279917, "percentage": 85.66, "elapsed_time": "5:04:31", "remaining_time": "0:50:59"} +{"current_steps": 7431, "total_steps": 8674, "loss": 0.5906555652618408, "lr": 1.1038233006286558e-07, "epoch": 1.7133963569287527, "percentage": 85.67, "elapsed_time": "5:04:33", "remaining_time": "0:50:56"} +{"current_steps": 7432, "total_steps": 8674, "loss": 0.4621407389640808, "lr": 1.1020827041378844e-07, "epoch": 1.7136269310583354, "percentage": 85.68, "elapsed_time": "5:04:36", "remaining_time": "0:50:54"} +{"current_steps": 7433, "total_steps": 8674, "loss": 0.4203164279460907, "lr": 1.1003434010495705e-07, "epoch": 1.713857505187918, "percentage": 85.69, "elapsed_time": "5:04:38", "remaining_time": "0:50:51"} +{"current_steps": 7434, "total_steps": 8674, "loss": 0.4607565104961395, "lr": 1.0986053916165373e-07, "epoch": 1.7140880793175006, "percentage": 85.7, "elapsed_time": "5:04:40", "remaining_time": "0:50:49"} +{"current_steps": 7435, "total_steps": 8674, "loss": 0.47256794571876526, "lr": 1.0968686760914248e-07, "epoch": 1.7143186534470831, "percentage": 85.72, "elapsed_time": "5:04:43", "remaining_time": "0:50:46"} +{"current_steps": 7436, "total_steps": 8674, "loss": 0.479513943195343, "lr": 1.0951332547266778e-07, "epoch": 1.7145492275766658, "percentage": 85.73, "elapsed_time": "5:04:45", "remaining_time": "0:50:44"} +{"current_steps": 7437, "total_steps": 8674, "loss": 0.47687965631484985, "lr": 1.0933991277745614e-07, "epoch": 1.7147798017062486, "percentage": 85.74, "elapsed_time": "5:04:48", "remaining_time": "0:50:41"} +{"current_steps": 7438, "total_steps": 8674, "loss": 0.45799845457077026, "lr": 1.091666295487147e-07, "epoch": 1.7150103758358313, "percentage": 85.75, "elapsed_time": "5:04:50", "remaining_time": "0:50:39"} +{"current_steps": 7439, "total_steps": 8674, "loss": 0.43398863077163696, "lr": 1.089934758116322e-07, "epoch": 1.715240949965414, "percentage": 85.76, "elapsed_time": "5:04:53", "remaining_time": "0:50:36"} +{"current_steps": 7440, "total_steps": 8674, "loss": 0.4098217189311981, "lr": 1.0882045159137788e-07, "epoch": 1.7154715240949967, "percentage": 85.77, "elapsed_time": "5:04:55", "remaining_time": "0:50:34"} +{"current_steps": 7441, "total_steps": 8674, "loss": 0.49889707565307617, "lr": 1.086475569131029e-07, "epoch": 1.7157020982245792, "percentage": 85.79, "elapsed_time": "5:04:57", "remaining_time": "0:50:32"} +{"current_steps": 7442, "total_steps": 8674, "loss": 0.4187192916870117, "lr": 1.0847479180193897e-07, "epoch": 1.715932672354162, "percentage": 85.8, "elapsed_time": "5:05:00", "remaining_time": "0:50:29"} +{"current_steps": 7443, "total_steps": 8674, "loss": 0.44331133365631104, "lr": 1.0830215628299954e-07, "epoch": 1.7161632464837444, "percentage": 85.81, "elapsed_time": "5:05:02", "remaining_time": "0:50:27"} +{"current_steps": 7444, "total_steps": 8674, "loss": 0.4888196587562561, "lr": 1.0812965038137856e-07, "epoch": 1.7163938206133271, "percentage": 85.82, "elapsed_time": "5:05:04", "remaining_time": "0:50:24"} +{"current_steps": 7445, "total_steps": 8674, "loss": 0.4884798228740692, "lr": 1.0795727412215183e-07, "epoch": 1.7166243947429098, "percentage": 85.83, "elapsed_time": "5:05:07", "remaining_time": "0:50:22"} +{"current_steps": 7446, "total_steps": 8674, "loss": 0.45655232667922974, "lr": 1.07785027530376e-07, "epoch": 1.7168549688724926, "percentage": 85.84, "elapsed_time": "5:05:09", "remaining_time": "0:50:19"} +{"current_steps": 7447, "total_steps": 8674, "loss": 0.3086237907409668, "lr": 1.0761291063108857e-07, "epoch": 1.7170855430020753, "percentage": 85.85, "elapsed_time": "5:05:12", "remaining_time": "0:50:17"} +{"current_steps": 7448, "total_steps": 8674, "loss": 0.4279823899269104, "lr": 1.0744092344930888e-07, "epoch": 1.717316117131658, "percentage": 85.87, "elapsed_time": "5:05:14", "remaining_time": "0:50:14"} +{"current_steps": 7449, "total_steps": 8674, "loss": 0.4241681396961212, "lr": 1.072690660100366e-07, "epoch": 1.7175466912612405, "percentage": 85.88, "elapsed_time": "5:05:16", "remaining_time": "0:50:12"} +{"current_steps": 7450, "total_steps": 8674, "loss": 0.47086501121520996, "lr": 1.070973383382533e-07, "epoch": 1.7177772653908232, "percentage": 85.89, "elapsed_time": "5:05:19", "remaining_time": "0:50:09"} +{"current_steps": 7451, "total_steps": 8674, "loss": 0.43798619508743286, "lr": 1.0692574045892099e-07, "epoch": 1.7180078395204057, "percentage": 85.9, "elapsed_time": "5:05:21", "remaining_time": "0:50:07"} +{"current_steps": 7452, "total_steps": 8674, "loss": 0.5781964659690857, "lr": 1.0675427239698354e-07, "epoch": 1.7182384136499884, "percentage": 85.91, "elapsed_time": "5:05:24", "remaining_time": "0:50:04"} +{"current_steps": 7453, "total_steps": 8674, "loss": 0.4850879907608032, "lr": 1.0658293417736508e-07, "epoch": 1.7184689877795711, "percentage": 85.92, "elapsed_time": "5:05:26", "remaining_time": "0:50:02"} +{"current_steps": 7454, "total_steps": 8674, "loss": 0.40468811988830566, "lr": 1.064117258249717e-07, "epoch": 1.7186995619091539, "percentage": 85.93, "elapsed_time": "5:05:29", "remaining_time": "0:49:59"} +{"current_steps": 7455, "total_steps": 8674, "loss": 0.4054880142211914, "lr": 1.0624064736469052e-07, "epoch": 1.7189301360387366, "percentage": 85.95, "elapsed_time": "5:05:31", "remaining_time": "0:49:57"} +{"current_steps": 7456, "total_steps": 8674, "loss": 0.38633522391319275, "lr": 1.0606969882138894e-07, "epoch": 1.719160710168319, "percentage": 85.96, "elapsed_time": "5:05:34", "remaining_time": "0:49:55"} +{"current_steps": 7457, "total_steps": 8674, "loss": 0.4287499785423279, "lr": 1.0589888021991644e-07, "epoch": 1.7193912842979018, "percentage": 85.97, "elapsed_time": "5:05:36", "remaining_time": "0:49:52"} +{"current_steps": 7458, "total_steps": 8674, "loss": 0.49269533157348633, "lr": 1.0572819158510316e-07, "epoch": 1.7196218584274843, "percentage": 85.98, "elapsed_time": "5:05:38", "remaining_time": "0:49:50"} +{"current_steps": 7459, "total_steps": 8674, "loss": 0.38874679803848267, "lr": 1.0555763294176045e-07, "epoch": 1.719852432557067, "percentage": 85.99, "elapsed_time": "5:05:41", "remaining_time": "0:49:47"} +{"current_steps": 7460, "total_steps": 8674, "loss": 0.4381089508533478, "lr": 1.0538720431468051e-07, "epoch": 1.7200830066866497, "percentage": 86.0, "elapsed_time": "5:05:43", "remaining_time": "0:49:45"} +{"current_steps": 7461, "total_steps": 8674, "loss": 0.4550422430038452, "lr": 1.0521690572863706e-07, "epoch": 1.7203135808162324, "percentage": 86.02, "elapsed_time": "5:05:45", "remaining_time": "0:49:42"} +{"current_steps": 7462, "total_steps": 8674, "loss": 0.5173785090446472, "lr": 1.0504673720838476e-07, "epoch": 1.7205441549458151, "percentage": 86.03, "elapsed_time": "5:05:48", "remaining_time": "0:49:40"} +{"current_steps": 7463, "total_steps": 8674, "loss": 0.5082184076309204, "lr": 1.0487669877865945e-07, "epoch": 1.7207747290753979, "percentage": 86.04, "elapsed_time": "5:05:50", "remaining_time": "0:49:37"} +{"current_steps": 7464, "total_steps": 8674, "loss": 0.49810969829559326, "lr": 1.0470679046417786e-07, "epoch": 1.7210053032049804, "percentage": 86.05, "elapsed_time": "5:05:53", "remaining_time": "0:49:35"} +{"current_steps": 7465, "total_steps": 8674, "loss": 0.47808337211608887, "lr": 1.0453701228963751e-07, "epoch": 1.721235877334563, "percentage": 86.06, "elapsed_time": "5:05:55", "remaining_time": "0:49:32"} +{"current_steps": 7466, "total_steps": 8674, "loss": 0.5100537538528442, "lr": 1.0436736427971782e-07, "epoch": 1.7214664514641456, "percentage": 86.07, "elapsed_time": "5:05:57", "remaining_time": "0:49:30"} +{"current_steps": 7467, "total_steps": 8674, "loss": 0.44948023557662964, "lr": 1.0419784645907858e-07, "epoch": 1.7216970255937283, "percentage": 86.08, "elapsed_time": "5:06:00", "remaining_time": "0:49:27"} +{"current_steps": 7468, "total_steps": 8674, "loss": 0.4653180241584778, "lr": 1.040284588523611e-07, "epoch": 1.721927599723311, "percentage": 86.1, "elapsed_time": "5:06:02", "remaining_time": "0:49:25"} +{"current_steps": 7469, "total_steps": 8674, "loss": 0.4930723309516907, "lr": 1.0385920148418737e-07, "epoch": 1.7221581738528937, "percentage": 86.11, "elapsed_time": "5:06:04", "remaining_time": "0:49:22"} +{"current_steps": 7470, "total_steps": 8674, "loss": 0.48883867263793945, "lr": 1.036900743791611e-07, "epoch": 1.7223887479824764, "percentage": 86.12, "elapsed_time": "5:06:07", "remaining_time": "0:49:20"} +{"current_steps": 7471, "total_steps": 8674, "loss": 0.4030319154262543, "lr": 1.0352107756186624e-07, "epoch": 1.7226193221120591, "percentage": 86.13, "elapsed_time": "5:06:09", "remaining_time": "0:49:17"} +{"current_steps": 7472, "total_steps": 8674, "loss": 0.4174875319004059, "lr": 1.033522110568683e-07, "epoch": 1.7228498962416416, "percentage": 86.14, "elapsed_time": "5:06:12", "remaining_time": "0:49:15"} +{"current_steps": 7473, "total_steps": 8674, "loss": 0.5152361392974854, "lr": 1.0318347488871371e-07, "epoch": 1.7230804703712244, "percentage": 86.15, "elapsed_time": "5:06:14", "remaining_time": "0:49:13"} +{"current_steps": 7474, "total_steps": 8674, "loss": 0.43221428990364075, "lr": 1.0301486908193014e-07, "epoch": 1.7233110445008069, "percentage": 86.17, "elapsed_time": "5:06:17", "remaining_time": "0:49:10"} +{"current_steps": 7475, "total_steps": 8674, "loss": 0.4239969849586487, "lr": 1.0284639366102598e-07, "epoch": 1.7235416186303896, "percentage": 86.18, "elapsed_time": "5:06:19", "remaining_time": "0:49:08"} +{"current_steps": 7476, "total_steps": 8674, "loss": 0.5171400904655457, "lr": 1.0267804865049068e-07, "epoch": 1.7237721927599723, "percentage": 86.19, "elapsed_time": "5:06:21", "remaining_time": "0:49:05"} +{"current_steps": 7477, "total_steps": 8674, "loss": 0.45670178532600403, "lr": 1.0250983407479518e-07, "epoch": 1.724002766889555, "percentage": 86.2, "elapsed_time": "5:06:24", "remaining_time": "0:49:03"} +{"current_steps": 7478, "total_steps": 8674, "loss": 0.36458373069763184, "lr": 1.0234174995839107e-07, "epoch": 1.7242333410191377, "percentage": 86.21, "elapsed_time": "5:06:26", "remaining_time": "0:49:00"} +{"current_steps": 7479, "total_steps": 8674, "loss": 0.4940750002861023, "lr": 1.0217379632571122e-07, "epoch": 1.7244639151487204, "percentage": 86.22, "elapsed_time": "5:06:29", "remaining_time": "0:48:58"} +{"current_steps": 7480, "total_steps": 8674, "loss": 0.43453872203826904, "lr": 1.0200597320116911e-07, "epoch": 1.724694489278303, "percentage": 86.23, "elapsed_time": "5:06:31", "remaining_time": "0:48:55"} +{"current_steps": 7481, "total_steps": 8674, "loss": 0.49255162477493286, "lr": 1.0183828060915989e-07, "epoch": 1.7249250634078857, "percentage": 86.25, "elapsed_time": "5:06:34", "remaining_time": "0:48:53"} +{"current_steps": 7482, "total_steps": 8674, "loss": 0.46221014857292175, "lr": 1.0167071857405906e-07, "epoch": 1.7251556375374681, "percentage": 86.26, "elapsed_time": "5:06:36", "remaining_time": "0:48:50"} +{"current_steps": 7483, "total_steps": 8674, "loss": 0.43426087498664856, "lr": 1.015032871202236e-07, "epoch": 1.7253862116670509, "percentage": 86.27, "elapsed_time": "5:06:38", "remaining_time": "0:48:48"} +{"current_steps": 7484, "total_steps": 8674, "loss": 0.45327985286712646, "lr": 1.0133598627199136e-07, "epoch": 1.7256167857966336, "percentage": 86.28, "elapsed_time": "5:06:41", "remaining_time": "0:48:45"} +{"current_steps": 7485, "total_steps": 8674, "loss": 0.4691676199436188, "lr": 1.011688160536811e-07, "epoch": 1.7258473599262163, "percentage": 86.29, "elapsed_time": "5:06:43", "remaining_time": "0:48:43"} +{"current_steps": 7486, "total_steps": 8674, "loss": 0.5080254077911377, "lr": 1.0100177648959296e-07, "epoch": 1.726077934055799, "percentage": 86.3, "elapsed_time": "5:06:45", "remaining_time": "0:48:40"} +{"current_steps": 7487, "total_steps": 8674, "loss": 0.34122025966644287, "lr": 1.008348676040075e-07, "epoch": 1.7263085081853817, "percentage": 86.32, "elapsed_time": "5:06:48", "remaining_time": "0:48:38"} +{"current_steps": 7488, "total_steps": 8674, "loss": 0.44408074021339417, "lr": 1.0066808942118699e-07, "epoch": 1.7265390823149642, "percentage": 86.33, "elapsed_time": "5:06:50", "remaining_time": "0:48:36"} +{"current_steps": 7489, "total_steps": 8674, "loss": 0.3777790665626526, "lr": 1.0050144196537402e-07, "epoch": 1.726769656444547, "percentage": 86.34, "elapsed_time": "5:06:53", "remaining_time": "0:48:33"} +{"current_steps": 7490, "total_steps": 8674, "loss": 0.48730146884918213, "lr": 1.0033492526079279e-07, "epoch": 1.7270002305741294, "percentage": 86.35, "elapsed_time": "5:06:55", "remaining_time": "0:48:31"} +{"current_steps": 7491, "total_steps": 8674, "loss": 0.35903626680374146, "lr": 1.001685393316477e-07, "epoch": 1.7272308047037122, "percentage": 86.36, "elapsed_time": "5:06:58", "remaining_time": "0:48:28"} +{"current_steps": 7492, "total_steps": 8674, "loss": 0.37729373574256897, "lr": 1.0000228420212509e-07, "epoch": 1.7274613788332949, "percentage": 86.37, "elapsed_time": "5:07:00", "remaining_time": "0:48:26"} +{"current_steps": 7493, "total_steps": 8674, "loss": 0.4388326406478882, "lr": 9.98361598963916e-08, "epoch": 1.7276919529628776, "percentage": 86.38, "elapsed_time": "5:07:02", "remaining_time": "0:48:23"} +{"current_steps": 7494, "total_steps": 8674, "loss": 0.45095232129096985, "lr": 9.967016643859527e-08, "epoch": 1.7279225270924603, "percentage": 86.4, "elapsed_time": "5:07:05", "remaining_time": "0:48:21"} +{"current_steps": 7495, "total_steps": 8674, "loss": 0.4736475944519043, "lr": 9.95043038528649e-08, "epoch": 1.728153101222043, "percentage": 86.41, "elapsed_time": "5:07:07", "remaining_time": "0:48:18"} +{"current_steps": 7496, "total_steps": 8674, "loss": 0.2984190285205841, "lr": 9.933857216330999e-08, "epoch": 1.7283836753516255, "percentage": 86.42, "elapsed_time": "5:07:10", "remaining_time": "0:48:16"} +{"current_steps": 7497, "total_steps": 8674, "loss": 0.45391780138015747, "lr": 9.91729713940218e-08, "epoch": 1.7286142494812082, "percentage": 86.43, "elapsed_time": "5:07:12", "remaining_time": "0:48:13"} +{"current_steps": 7498, "total_steps": 8674, "loss": 0.5150727033615112, "lr": 9.900750156907157e-08, "epoch": 1.7288448236107907, "percentage": 86.44, "elapsed_time": "5:07:15", "remaining_time": "0:48:11"} +{"current_steps": 7499, "total_steps": 8674, "loss": 0.41298598051071167, "lr": 9.884216271251256e-08, "epoch": 1.7290753977403734, "percentage": 86.45, "elapsed_time": "5:07:17", "remaining_time": "0:48:08"} +{"current_steps": 7500, "total_steps": 8674, "loss": 0.4820541441440582, "lr": 9.86769548483779e-08, "epoch": 1.7293059718699562, "percentage": 86.47, "elapsed_time": "5:07:19", "remaining_time": "0:48:06"} +{"current_steps": 7501, "total_steps": 8674, "loss": 0.4148511290550232, "lr": 9.85118780006825e-08, "epoch": 1.7295365459995389, "percentage": 86.48, "elapsed_time": "5:07:23", "remaining_time": "0:48:04"} +{"current_steps": 7502, "total_steps": 8674, "loss": 0.39676210284233093, "lr": 9.834693219342183e-08, "epoch": 1.7297671201291216, "percentage": 86.49, "elapsed_time": "5:07:25", "remaining_time": "0:48:01"} +{"current_steps": 7503, "total_steps": 8674, "loss": 0.3665908873081207, "lr": 9.818211745057292e-08, "epoch": 1.7299976942587043, "percentage": 86.5, "elapsed_time": "5:07:28", "remaining_time": "0:47:59"} +{"current_steps": 7504, "total_steps": 8674, "loss": 0.39340025186538696, "lr": 9.801743379609274e-08, "epoch": 1.7302282683882868, "percentage": 86.51, "elapsed_time": "5:07:30", "remaining_time": "0:47:56"} +{"current_steps": 7505, "total_steps": 8674, "loss": 0.4677412807941437, "lr": 9.785288125391977e-08, "epoch": 1.7304588425178695, "percentage": 86.52, "elapsed_time": "5:07:32", "remaining_time": "0:47:54"} +{"current_steps": 7506, "total_steps": 8674, "loss": 0.49413764476776123, "lr": 9.768845984797369e-08, "epoch": 1.730689416647452, "percentage": 86.53, "elapsed_time": "5:07:35", "remaining_time": "0:47:51"} +{"current_steps": 7507, "total_steps": 8674, "loss": 0.5312438607215881, "lr": 9.752416960215437e-08, "epoch": 1.7309199907770347, "percentage": 86.55, "elapsed_time": "5:07:37", "remaining_time": "0:47:49"} +{"current_steps": 7508, "total_steps": 8674, "loss": 0.38522863388061523, "lr": 9.736001054034338e-08, "epoch": 1.7311505649066175, "percentage": 86.56, "elapsed_time": "5:07:40", "remaining_time": "0:47:46"} +{"current_steps": 7509, "total_steps": 8674, "loss": 0.49167078733444214, "lr": 9.719598268640283e-08, "epoch": 1.7313811390362002, "percentage": 86.57, "elapsed_time": "5:07:42", "remaining_time": "0:47:44"} +{"current_steps": 7510, "total_steps": 8674, "loss": 0.4465949535369873, "lr": 9.7032086064176e-08, "epoch": 1.7316117131657829, "percentage": 86.58, "elapsed_time": "5:07:45", "remaining_time": "0:47:41"} +{"current_steps": 7511, "total_steps": 8674, "loss": 0.4627634882926941, "lr": 9.686832069748663e-08, "epoch": 1.7318422872953656, "percentage": 86.59, "elapsed_time": "5:07:47", "remaining_time": "0:47:39"} +{"current_steps": 7512, "total_steps": 8674, "loss": 0.4188409447669983, "lr": 9.670468661013998e-08, "epoch": 1.732072861424948, "percentage": 86.6, "elapsed_time": "5:07:49", "remaining_time": "0:47:37"} +{"current_steps": 7513, "total_steps": 8674, "loss": 0.5775213241577148, "lr": 9.654118382592146e-08, "epoch": 1.7323034355545308, "percentage": 86.62, "elapsed_time": "5:07:52", "remaining_time": "0:47:34"} +{"current_steps": 7514, "total_steps": 8674, "loss": 0.43912672996520996, "lr": 9.637781236859843e-08, "epoch": 1.7325340096841133, "percentage": 86.63, "elapsed_time": "5:07:54", "remaining_time": "0:47:32"} +{"current_steps": 7515, "total_steps": 8674, "loss": 0.5364755392074585, "lr": 9.62145722619182e-08, "epoch": 1.732764583813696, "percentage": 86.64, "elapsed_time": "5:07:57", "remaining_time": "0:47:29"} +{"current_steps": 7516, "total_steps": 8674, "loss": 0.4832648038864136, "lr": 9.605146352960935e-08, "epoch": 1.7329951579432787, "percentage": 86.65, "elapsed_time": "5:07:59", "remaining_time": "0:47:27"} +{"current_steps": 7517, "total_steps": 8674, "loss": 0.36932459473609924, "lr": 9.588848619538182e-08, "epoch": 1.7332257320728615, "percentage": 86.66, "elapsed_time": "5:08:02", "remaining_time": "0:47:24"} +{"current_steps": 7518, "total_steps": 8674, "loss": 0.43458276987075806, "lr": 9.57256402829254e-08, "epoch": 1.7334563062024442, "percentage": 86.67, "elapsed_time": "5:08:04", "remaining_time": "0:47:22"} +{"current_steps": 7519, "total_steps": 8674, "loss": 0.41533568501472473, "lr": 9.556292581591196e-08, "epoch": 1.733686880332027, "percentage": 86.68, "elapsed_time": "5:08:06", "remaining_time": "0:47:19"} +{"current_steps": 7520, "total_steps": 8674, "loss": 0.45898690819740295, "lr": 9.540034281799325e-08, "epoch": 1.7339174544616094, "percentage": 86.7, "elapsed_time": "5:08:09", "remaining_time": "0:47:17"} +{"current_steps": 7521, "total_steps": 8674, "loss": 0.3321181535720825, "lr": 9.523789131280279e-08, "epoch": 1.734148028591192, "percentage": 86.71, "elapsed_time": "5:08:11", "remaining_time": "0:47:14"} +{"current_steps": 7522, "total_steps": 8674, "loss": 0.3926161229610443, "lr": 9.507557132395416e-08, "epoch": 1.7343786027207746, "percentage": 86.72, "elapsed_time": "5:08:14", "remaining_time": "0:47:12"} +{"current_steps": 7523, "total_steps": 8674, "loss": 0.41051846742630005, "lr": 9.491338287504247e-08, "epoch": 1.7346091768503573, "percentage": 86.73, "elapsed_time": "5:08:16", "remaining_time": "0:47:09"} +{"current_steps": 7524, "total_steps": 8674, "loss": 0.4440652132034302, "lr": 9.47513259896432e-08, "epoch": 1.73483975097994, "percentage": 86.74, "elapsed_time": "5:08:19", "remaining_time": "0:47:07"} +{"current_steps": 7525, "total_steps": 8674, "loss": 0.5175125598907471, "lr": 9.458940069131304e-08, "epoch": 1.7350703251095227, "percentage": 86.75, "elapsed_time": "5:08:21", "remaining_time": "0:47:05"} +{"current_steps": 7526, "total_steps": 8674, "loss": 0.45521751046180725, "lr": 9.442760700358987e-08, "epoch": 1.7353008992391055, "percentage": 86.77, "elapsed_time": "5:08:23", "remaining_time": "0:47:02"} +{"current_steps": 7527, "total_steps": 8674, "loss": 0.5133911967277527, "lr": 9.426594494999151e-08, "epoch": 1.7355314733686882, "percentage": 86.78, "elapsed_time": "5:08:26", "remaining_time": "0:47:00"} +{"current_steps": 7528, "total_steps": 8674, "loss": 0.4397609233856201, "lr": 9.410441455401752e-08, "epoch": 1.7357620474982707, "percentage": 86.79, "elapsed_time": "5:08:28", "remaining_time": "0:46:57"} +{"current_steps": 7529, "total_steps": 8674, "loss": 0.4503510594367981, "lr": 9.394301583914765e-08, "epoch": 1.7359926216278534, "percentage": 86.8, "elapsed_time": "5:08:31", "remaining_time": "0:46:55"} +{"current_steps": 7530, "total_steps": 8674, "loss": 0.44119834899902344, "lr": 9.378174882884327e-08, "epoch": 1.7362231957574359, "percentage": 86.81, "elapsed_time": "5:08:33", "remaining_time": "0:46:52"} +{"current_steps": 7531, "total_steps": 8674, "loss": 0.46257996559143066, "lr": 9.362061354654583e-08, "epoch": 1.7364537698870186, "percentage": 86.82, "elapsed_time": "5:08:36", "remaining_time": "0:46:50"} +{"current_steps": 7532, "total_steps": 8674, "loss": 0.4468308687210083, "lr": 9.345961001567792e-08, "epoch": 1.7366843440166013, "percentage": 86.83, "elapsed_time": "5:08:38", "remaining_time": "0:46:47"} +{"current_steps": 7533, "total_steps": 8674, "loss": 0.3837989568710327, "lr": 9.32987382596433e-08, "epoch": 1.736914918146184, "percentage": 86.85, "elapsed_time": "5:08:40", "remaining_time": "0:46:45"} +{"current_steps": 7534, "total_steps": 8674, "loss": 0.4224961996078491, "lr": 9.313799830182644e-08, "epoch": 1.7371454922757668, "percentage": 86.86, "elapsed_time": "5:08:43", "remaining_time": "0:46:42"} +{"current_steps": 7535, "total_steps": 8674, "loss": 0.37379956245422363, "lr": 9.297739016559225e-08, "epoch": 1.7373760664053495, "percentage": 86.87, "elapsed_time": "5:08:45", "remaining_time": "0:46:40"} +{"current_steps": 7536, "total_steps": 8674, "loss": 0.4204242527484894, "lr": 9.281691387428658e-08, "epoch": 1.737606640534932, "percentage": 86.88, "elapsed_time": "5:08:48", "remaining_time": "0:46:37"} +{"current_steps": 7537, "total_steps": 8674, "loss": 0.5270572900772095, "lr": 9.265656945123678e-08, "epoch": 1.7378372146645147, "percentage": 86.89, "elapsed_time": "5:08:50", "remaining_time": "0:46:35"} +{"current_steps": 7538, "total_steps": 8674, "loss": 0.44208282232284546, "lr": 9.249635691975e-08, "epoch": 1.7380677887940972, "percentage": 86.9, "elapsed_time": "5:08:53", "remaining_time": "0:46:33"} +{"current_steps": 7539, "total_steps": 8674, "loss": 0.32514283061027527, "lr": 9.233627630311502e-08, "epoch": 1.73829836292368, "percentage": 86.91, "elapsed_time": "5:08:57", "remaining_time": "0:46:30"} +{"current_steps": 7540, "total_steps": 8674, "loss": 0.35472434759140015, "lr": 9.217632762460126e-08, "epoch": 1.7385289370532626, "percentage": 86.93, "elapsed_time": "5:09:00", "remaining_time": "0:46:28"} +{"current_steps": 7541, "total_steps": 8674, "loss": 0.5034215450286865, "lr": 9.201651090745888e-08, "epoch": 1.7387595111828453, "percentage": 86.94, "elapsed_time": "5:09:04", "remaining_time": "0:46:26"} +{"current_steps": 7542, "total_steps": 8674, "loss": 0.4779762029647827, "lr": 9.185682617491863e-08, "epoch": 1.738990085312428, "percentage": 86.95, "elapsed_time": "5:09:07", "remaining_time": "0:46:23"} +{"current_steps": 7543, "total_steps": 8674, "loss": 0.4964079260826111, "lr": 9.169727345019263e-08, "epoch": 1.7392206594420108, "percentage": 86.96, "elapsed_time": "5:09:11", "remaining_time": "0:46:21"} +{"current_steps": 7544, "total_steps": 8674, "loss": 0.5125068426132202, "lr": 9.153785275647319e-08, "epoch": 1.7394512335715933, "percentage": 86.97, "elapsed_time": "5:09:14", "remaining_time": "0:46:19"} +{"current_steps": 7545, "total_steps": 8674, "loss": 0.39051756262779236, "lr": 9.13785641169339e-08, "epoch": 1.739681807701176, "percentage": 86.98, "elapsed_time": "5:09:18", "remaining_time": "0:46:16"} +{"current_steps": 7546, "total_steps": 8674, "loss": 0.45951950550079346, "lr": 9.121940755472901e-08, "epoch": 1.7399123818307585, "percentage": 87.0, "elapsed_time": "5:09:22", "remaining_time": "0:46:14"} +{"current_steps": 7547, "total_steps": 8674, "loss": 0.42676979303359985, "lr": 9.106038309299302e-08, "epoch": 1.7401429559603412, "percentage": 87.01, "elapsed_time": "5:09:26", "remaining_time": "0:46:12"} +{"current_steps": 7548, "total_steps": 8674, "loss": 0.3585033416748047, "lr": 9.090149075484255e-08, "epoch": 1.740373530089924, "percentage": 87.02, "elapsed_time": "5:09:30", "remaining_time": "0:46:10"} +{"current_steps": 7549, "total_steps": 8674, "loss": 0.4613775312900543, "lr": 9.074273056337366e-08, "epoch": 1.7406041042195066, "percentage": 87.03, "elapsed_time": "5:09:34", "remaining_time": "0:46:08"} +{"current_steps": 7550, "total_steps": 8674, "loss": 0.48934412002563477, "lr": 9.058410254166415e-08, "epoch": 1.7408346783490893, "percentage": 87.04, "elapsed_time": "5:09:38", "remaining_time": "0:46:05"} +{"current_steps": 7551, "total_steps": 8674, "loss": 0.5749069452285767, "lr": 9.042560671277177e-08, "epoch": 1.741065252478672, "percentage": 87.05, "elapsed_time": "5:09:41", "remaining_time": "0:46:03"} +{"current_steps": 7552, "total_steps": 8674, "loss": 0.4760423004627228, "lr": 9.026724309973588e-08, "epoch": 1.7412958266082545, "percentage": 87.06, "elapsed_time": "5:09:45", "remaining_time": "0:46:01"} +{"current_steps": 7553, "total_steps": 8674, "loss": 0.43080049753189087, "lr": 9.010901172557594e-08, "epoch": 1.7415264007378373, "percentage": 87.08, "elapsed_time": "5:09:49", "remaining_time": "0:45:59"} +{"current_steps": 7554, "total_steps": 8674, "loss": 0.44850271940231323, "lr": 8.99509126132928e-08, "epoch": 1.7417569748674198, "percentage": 87.09, "elapsed_time": "5:09:53", "remaining_time": "0:45:56"} +{"current_steps": 7555, "total_steps": 8674, "loss": 0.34593498706817627, "lr": 8.979294578586738e-08, "epoch": 1.7419875489970025, "percentage": 87.1, "elapsed_time": "5:09:57", "remaining_time": "0:45:54"} +{"current_steps": 7556, "total_steps": 8674, "loss": 0.3738324046134949, "lr": 8.963511126626188e-08, "epoch": 1.7422181231265852, "percentage": 87.11, "elapsed_time": "5:10:01", "remaining_time": "0:45:52"} +{"current_steps": 7557, "total_steps": 8674, "loss": 0.47988662123680115, "lr": 8.947740907741952e-08, "epoch": 1.742448697256168, "percentage": 87.12, "elapsed_time": "5:10:04", "remaining_time": "0:45:50"} +{"current_steps": 7558, "total_steps": 8674, "loss": 0.5863034725189209, "lr": 8.931983924226338e-08, "epoch": 1.7426792713857506, "percentage": 87.13, "elapsed_time": "5:10:08", "remaining_time": "0:45:47"} +{"current_steps": 7559, "total_steps": 8674, "loss": 0.38455232977867126, "lr": 8.916240178369827e-08, "epoch": 1.7429098455153333, "percentage": 87.15, "elapsed_time": "5:10:11", "remaining_time": "0:45:45"} +{"current_steps": 7560, "total_steps": 8674, "loss": 0.3919760584831238, "lr": 8.900509672460899e-08, "epoch": 1.7431404196449158, "percentage": 87.16, "elapsed_time": "5:10:14", "remaining_time": "0:45:42"} +{"current_steps": 7561, "total_steps": 8674, "loss": 0.4090653657913208, "lr": 8.884792408786169e-08, "epoch": 1.7433709937744986, "percentage": 87.17, "elapsed_time": "5:10:17", "remaining_time": "0:45:40"} +{"current_steps": 7562, "total_steps": 8674, "loss": 0.42597073316574097, "lr": 8.869088389630264e-08, "epoch": 1.743601567904081, "percentage": 87.18, "elapsed_time": "5:10:20", "remaining_time": "0:45:38"} +{"current_steps": 7563, "total_steps": 8674, "loss": 0.38760805130004883, "lr": 8.853397617275959e-08, "epoch": 1.7438321420336638, "percentage": 87.19, "elapsed_time": "5:10:23", "remaining_time": "0:45:35"} +{"current_steps": 7564, "total_steps": 8674, "loss": 0.3753165900707245, "lr": 8.837720094004042e-08, "epoch": 1.7440627161632465, "percentage": 87.2, "elapsed_time": "5:10:26", "remaining_time": "0:45:33"} +{"current_steps": 7565, "total_steps": 8674, "loss": 0.5169536471366882, "lr": 8.822055822093432e-08, "epoch": 1.7442932902928292, "percentage": 87.21, "elapsed_time": "5:10:29", "remaining_time": "0:45:30"} +{"current_steps": 7566, "total_steps": 8674, "loss": 0.3886902332305908, "lr": 8.806404803821077e-08, "epoch": 1.744523864422412, "percentage": 87.23, "elapsed_time": "5:10:32", "remaining_time": "0:45:28"} +{"current_steps": 7567, "total_steps": 8674, "loss": 0.48971402645111084, "lr": 8.790767041461977e-08, "epoch": 1.7447544385519944, "percentage": 87.24, "elapsed_time": "5:10:35", "remaining_time": "0:45:26"} +{"current_steps": 7568, "total_steps": 8674, "loss": 0.4656449556350708, "lr": 8.775142537289282e-08, "epoch": 1.7449850126815771, "percentage": 87.25, "elapsed_time": "5:10:39", "remaining_time": "0:45:23"} +{"current_steps": 7569, "total_steps": 8674, "loss": 0.43197786808013916, "lr": 8.75953129357414e-08, "epoch": 1.7452155868111596, "percentage": 87.26, "elapsed_time": "5:10:42", "remaining_time": "0:45:21"} +{"current_steps": 7570, "total_steps": 8674, "loss": 0.5062606930732727, "lr": 8.743933312585816e-08, "epoch": 1.7454461609407423, "percentage": 87.27, "elapsed_time": "5:10:46", "remaining_time": "0:45:19"} +{"current_steps": 7571, "total_steps": 8674, "loss": 0.5489983558654785, "lr": 8.728348596591639e-08, "epoch": 1.745676735070325, "percentage": 87.28, "elapsed_time": "5:10:49", "remaining_time": "0:45:16"} +{"current_steps": 7572, "total_steps": 8674, "loss": 0.4351652264595032, "lr": 8.712777147857031e-08, "epoch": 1.7459073091999078, "percentage": 87.3, "elapsed_time": "5:10:52", "remaining_time": "0:45:14"} +{"current_steps": 7573, "total_steps": 8674, "loss": 0.5096884965896606, "lr": 8.697218968645403e-08, "epoch": 1.7461378833294905, "percentage": 87.31, "elapsed_time": "5:10:56", "remaining_time": "0:45:12"} +{"current_steps": 7574, "total_steps": 8674, "loss": 0.3127269744873047, "lr": 8.681674061218347e-08, "epoch": 1.7463684574590732, "percentage": 87.32, "elapsed_time": "5:10:59", "remaining_time": "0:45:10"} +{"current_steps": 7575, "total_steps": 8674, "loss": 0.4738629460334778, "lr": 8.666142427835443e-08, "epoch": 1.7465990315886557, "percentage": 87.33, "elapsed_time": "5:11:03", "remaining_time": "0:45:07"} +{"current_steps": 7576, "total_steps": 8674, "loss": 0.46921902894973755, "lr": 8.650624070754375e-08, "epoch": 1.7468296057182384, "percentage": 87.34, "elapsed_time": "5:11:06", "remaining_time": "0:45:05"} +{"current_steps": 7577, "total_steps": 8674, "loss": 0.5296987891197205, "lr": 8.635118992230906e-08, "epoch": 1.747060179847821, "percentage": 87.35, "elapsed_time": "5:11:09", "remaining_time": "0:45:03"} +{"current_steps": 7578, "total_steps": 8674, "loss": 0.3522387742996216, "lr": 8.619627194518819e-08, "epoch": 1.7472907539774036, "percentage": 87.36, "elapsed_time": "5:11:13", "remaining_time": "0:45:00"} +{"current_steps": 7579, "total_steps": 8674, "loss": 0.42747724056243896, "lr": 8.604148679870049e-08, "epoch": 1.7475213281069863, "percentage": 87.38, "elapsed_time": "5:11:16", "remaining_time": "0:44:58"} +{"current_steps": 7580, "total_steps": 8674, "loss": 0.399990439414978, "lr": 8.588683450534528e-08, "epoch": 1.747751902236569, "percentage": 87.39, "elapsed_time": "5:11:20", "remaining_time": "0:44:56"} +{"current_steps": 7581, "total_steps": 8674, "loss": 0.48220518231391907, "lr": 8.573231508760315e-08, "epoch": 1.7479824763661518, "percentage": 87.4, "elapsed_time": "5:11:23", "remaining_time": "0:44:53"} +{"current_steps": 7582, "total_steps": 8674, "loss": 0.5227106213569641, "lr": 8.557792856793455e-08, "epoch": 1.7482130504957345, "percentage": 87.41, "elapsed_time": "5:11:27", "remaining_time": "0:44:51"} +{"current_steps": 7583, "total_steps": 8674, "loss": 0.5436732769012451, "lr": 8.542367496878178e-08, "epoch": 1.748443624625317, "percentage": 87.42, "elapsed_time": "5:11:30", "remaining_time": "0:44:49"} +{"current_steps": 7584, "total_steps": 8674, "loss": 0.48398053646087646, "lr": 8.526955431256644e-08, "epoch": 1.7486741987548997, "percentage": 87.43, "elapsed_time": "5:11:33", "remaining_time": "0:44:46"} +{"current_steps": 7585, "total_steps": 8674, "loss": 0.5727924108505249, "lr": 8.511556662169217e-08, "epoch": 1.7489047728844822, "percentage": 87.45, "elapsed_time": "5:11:37", "remaining_time": "0:44:44"} +{"current_steps": 7586, "total_steps": 8674, "loss": 0.48077693581581116, "lr": 8.496171191854229e-08, "epoch": 1.749135347014065, "percentage": 87.46, "elapsed_time": "5:11:40", "remaining_time": "0:44:42"} +{"current_steps": 7587, "total_steps": 8674, "loss": 0.45447635650634766, "lr": 8.480799022548113e-08, "epoch": 1.7493659211436476, "percentage": 87.47, "elapsed_time": "5:11:44", "remaining_time": "0:44:39"} +{"current_steps": 7588, "total_steps": 8674, "loss": 0.4605486989021301, "lr": 8.465440156485392e-08, "epoch": 1.7495964952732304, "percentage": 87.48, "elapsed_time": "5:11:47", "remaining_time": "0:44:37"} +{"current_steps": 7589, "total_steps": 8674, "loss": 0.4229927062988281, "lr": 8.450094595898604e-08, "epoch": 1.749827069402813, "percentage": 87.49, "elapsed_time": "5:11:50", "remaining_time": "0:44:35"} +{"current_steps": 7590, "total_steps": 8674, "loss": 0.43005260825157166, "lr": 8.434762343018408e-08, "epoch": 1.7500576435323958, "percentage": 87.5, "elapsed_time": "5:11:54", "remaining_time": "0:44:32"} +{"current_steps": 7591, "total_steps": 8674, "loss": 0.47446098923683167, "lr": 8.41944340007349e-08, "epoch": 1.7502882176619783, "percentage": 87.51, "elapsed_time": "5:11:57", "remaining_time": "0:44:30"} +{"current_steps": 7592, "total_steps": 8674, "loss": 0.40554216504096985, "lr": 8.40413776929062e-08, "epoch": 1.750518791791561, "percentage": 87.53, "elapsed_time": "5:12:01", "remaining_time": "0:44:28"} +{"current_steps": 7593, "total_steps": 8674, "loss": 0.4144189953804016, "lr": 8.38884545289461e-08, "epoch": 1.7507493659211435, "percentage": 87.54, "elapsed_time": "5:12:04", "remaining_time": "0:44:25"} +{"current_steps": 7594, "total_steps": 8674, "loss": 0.449351966381073, "lr": 8.373566453108361e-08, "epoch": 1.7509799400507262, "percentage": 87.55, "elapsed_time": "5:12:08", "remaining_time": "0:44:23"} +{"current_steps": 7595, "total_steps": 8674, "loss": 0.4584103226661682, "lr": 8.358300772152849e-08, "epoch": 1.751210514180309, "percentage": 87.56, "elapsed_time": "5:12:12", "remaining_time": "0:44:21"} +{"current_steps": 7596, "total_steps": 8674, "loss": 0.4739362895488739, "lr": 8.343048412247066e-08, "epoch": 1.7514410883098916, "percentage": 87.57, "elapsed_time": "5:12:15", "remaining_time": "0:44:18"} +{"current_steps": 7597, "total_steps": 8674, "loss": 0.3970356583595276, "lr": 8.327809375608131e-08, "epoch": 1.7516716624394744, "percentage": 87.58, "elapsed_time": "5:12:19", "remaining_time": "0:44:16"} +{"current_steps": 7598, "total_steps": 8674, "loss": 0.4298238754272461, "lr": 8.312583664451157e-08, "epoch": 1.751902236569057, "percentage": 87.6, "elapsed_time": "5:12:23", "remaining_time": "0:44:14"} +{"current_steps": 7599, "total_steps": 8674, "loss": 0.4920361340045929, "lr": 8.297371280989385e-08, "epoch": 1.7521328106986396, "percentage": 87.61, "elapsed_time": "5:12:27", "remaining_time": "0:44:12"} +{"current_steps": 7600, "total_steps": 8674, "loss": 0.5035870671272278, "lr": 8.282172227434059e-08, "epoch": 1.7523633848282223, "percentage": 87.62, "elapsed_time": "5:12:31", "remaining_time": "0:44:09"} +{"current_steps": 7601, "total_steps": 8674, "loss": 0.373248815536499, "lr": 8.266986505994555e-08, "epoch": 1.7525939589578048, "percentage": 87.63, "elapsed_time": "5:12:37", "remaining_time": "0:44:07"} +{"current_steps": 7602, "total_steps": 8674, "loss": 0.48491543531417847, "lr": 8.25181411887822e-08, "epoch": 1.7528245330873875, "percentage": 87.64, "elapsed_time": "5:12:41", "remaining_time": "0:44:05"} +{"current_steps": 7603, "total_steps": 8674, "loss": 0.4298476576805115, "lr": 8.236655068290554e-08, "epoch": 1.7530551072169702, "percentage": 87.65, "elapsed_time": "5:12:45", "remaining_time": "0:44:03"} +{"current_steps": 7604, "total_steps": 8674, "loss": 0.48804932832717896, "lr": 8.221509356435064e-08, "epoch": 1.753285681346553, "percentage": 87.66, "elapsed_time": "5:12:49", "remaining_time": "0:44:01"} +{"current_steps": 7605, "total_steps": 8674, "loss": 0.467857301235199, "lr": 8.206376985513353e-08, "epoch": 1.7535162554761357, "percentage": 87.68, "elapsed_time": "5:12:53", "remaining_time": "0:43:58"} +{"current_steps": 7606, "total_steps": 8674, "loss": 0.48995548486709595, "lr": 8.19125795772504e-08, "epoch": 1.7537468296057184, "percentage": 87.69, "elapsed_time": "5:12:57", "remaining_time": "0:43:56"} +{"current_steps": 7607, "total_steps": 8674, "loss": 0.4459487795829773, "lr": 8.176152275267823e-08, "epoch": 1.7539774037353009, "percentage": 87.7, "elapsed_time": "5:13:01", "remaining_time": "0:43:54"} +{"current_steps": 7608, "total_steps": 8674, "loss": 0.5054866671562195, "lr": 8.1610599403375e-08, "epoch": 1.7542079778648836, "percentage": 87.71, "elapsed_time": "5:13:05", "remaining_time": "0:43:52"} +{"current_steps": 7609, "total_steps": 8674, "loss": 0.46223869919776917, "lr": 8.145980955127862e-08, "epoch": 1.754438551994466, "percentage": 87.72, "elapsed_time": "5:13:09", "remaining_time": "0:43:49"} +{"current_steps": 7610, "total_steps": 8674, "loss": 0.4743426442146301, "lr": 8.1309153218308e-08, "epoch": 1.7546691261240488, "percentage": 87.73, "elapsed_time": "5:13:12", "remaining_time": "0:43:47"} +{"current_steps": 7611, "total_steps": 8674, "loss": 0.40808072686195374, "lr": 8.115863042636262e-08, "epoch": 1.7548997002536315, "percentage": 87.74, "elapsed_time": "5:13:16", "remaining_time": "0:43:45"} +{"current_steps": 7612, "total_steps": 8674, "loss": 0.4452321231365204, "lr": 8.100824119732263e-08, "epoch": 1.7551302743832142, "percentage": 87.76, "elapsed_time": "5:13:19", "remaining_time": "0:43:42"} +{"current_steps": 7613, "total_steps": 8674, "loss": 0.4211857318878174, "lr": 8.085798555304824e-08, "epoch": 1.755360848512797, "percentage": 87.77, "elapsed_time": "5:13:22", "remaining_time": "0:43:40"} +{"current_steps": 7614, "total_steps": 8674, "loss": 0.3356667757034302, "lr": 8.070786351538117e-08, "epoch": 1.7555914226423797, "percentage": 87.78, "elapsed_time": "5:13:26", "remaining_time": "0:43:38"} +{"current_steps": 7615, "total_steps": 8674, "loss": 0.4636021852493286, "lr": 8.055787510614287e-08, "epoch": 1.7558219967719622, "percentage": 87.79, "elapsed_time": "5:13:29", "remaining_time": "0:43:35"} +{"current_steps": 7616, "total_steps": 8674, "loss": 0.4066168963909149, "lr": 8.040802034713546e-08, "epoch": 1.7560525709015449, "percentage": 87.8, "elapsed_time": "5:13:33", "remaining_time": "0:43:33"} +{"current_steps": 7617, "total_steps": 8674, "loss": 0.426937460899353, "lr": 8.025829926014216e-08, "epoch": 1.7562831450311274, "percentage": 87.81, "elapsed_time": "5:13:37", "remaining_time": "0:43:31"} +{"current_steps": 7618, "total_steps": 8674, "loss": 0.464493989944458, "lr": 8.010871186692625e-08, "epoch": 1.75651371916071, "percentage": 87.83, "elapsed_time": "5:13:40", "remaining_time": "0:43:28"} +{"current_steps": 7619, "total_steps": 8674, "loss": 0.44130605459213257, "lr": 7.995925818923222e-08, "epoch": 1.7567442932902928, "percentage": 87.84, "elapsed_time": "5:13:44", "remaining_time": "0:43:26"} +{"current_steps": 7620, "total_steps": 8674, "loss": 0.5241909027099609, "lr": 7.980993824878402e-08, "epoch": 1.7569748674198755, "percentage": 87.85, "elapsed_time": "5:13:47", "remaining_time": "0:43:24"} +{"current_steps": 7621, "total_steps": 8674, "loss": 0.45450860261917114, "lr": 7.96607520672874e-08, "epoch": 1.7572054415494582, "percentage": 87.86, "elapsed_time": "5:13:50", "remaining_time": "0:43:21"} +{"current_steps": 7622, "total_steps": 8674, "loss": 0.443767786026001, "lr": 7.951169966642757e-08, "epoch": 1.757436015679041, "percentage": 87.87, "elapsed_time": "5:13:54", "remaining_time": "0:43:19"} +{"current_steps": 7623, "total_steps": 8674, "loss": 0.3951075077056885, "lr": 7.936278106787131e-08, "epoch": 1.7576665898086234, "percentage": 87.88, "elapsed_time": "5:13:57", "remaining_time": "0:43:17"} +{"current_steps": 7624, "total_steps": 8674, "loss": 0.44628477096557617, "lr": 7.921399629326509e-08, "epoch": 1.7578971639382062, "percentage": 87.89, "elapsed_time": "5:14:01", "remaining_time": "0:43:14"} +{"current_steps": 7625, "total_steps": 8674, "loss": 0.38743889331817627, "lr": 7.906534536423648e-08, "epoch": 1.7581277380677887, "percentage": 87.91, "elapsed_time": "5:14:04", "remaining_time": "0:43:12"} +{"current_steps": 7626, "total_steps": 8674, "loss": 0.4338032007217407, "lr": 7.891682830239311e-08, "epoch": 1.7583583121973714, "percentage": 87.92, "elapsed_time": "5:14:08", "remaining_time": "0:43:10"} +{"current_steps": 7627, "total_steps": 8674, "loss": 0.47387874126434326, "lr": 7.876844512932367e-08, "epoch": 1.758588886326954, "percentage": 87.93, "elapsed_time": "5:14:11", "remaining_time": "0:43:07"} +{"current_steps": 7628, "total_steps": 8674, "loss": 0.4082717299461365, "lr": 7.86201958665973e-08, "epoch": 1.7588194604565368, "percentage": 87.94, "elapsed_time": "5:14:14", "remaining_time": "0:43:05"} +{"current_steps": 7629, "total_steps": 8674, "loss": 0.4254682958126068, "lr": 7.847208053576326e-08, "epoch": 1.7590500345861195, "percentage": 87.95, "elapsed_time": "5:14:18", "remaining_time": "0:43:03"} +{"current_steps": 7630, "total_steps": 8674, "loss": 0.3572045564651489, "lr": 7.832409915835181e-08, "epoch": 1.7592806087157022, "percentage": 87.96, "elapsed_time": "5:14:21", "remaining_time": "0:43:00"} +{"current_steps": 7631, "total_steps": 8674, "loss": 0.39110279083251953, "lr": 7.817625175587328e-08, "epoch": 1.7595111828452847, "percentage": 87.98, "elapsed_time": "5:14:24", "remaining_time": "0:42:58"} +{"current_steps": 7632, "total_steps": 8674, "loss": 0.49292176961898804, "lr": 7.802853834981926e-08, "epoch": 1.7597417569748675, "percentage": 87.99, "elapsed_time": "5:14:28", "remaining_time": "0:42:56"} +{"current_steps": 7633, "total_steps": 8674, "loss": 0.4271275997161865, "lr": 7.78809589616608e-08, "epoch": 1.75997233110445, "percentage": 88.0, "elapsed_time": "5:14:31", "remaining_time": "0:42:53"} +{"current_steps": 7634, "total_steps": 8674, "loss": 0.470772922039032, "lr": 7.77335136128503e-08, "epoch": 1.7602029052340327, "percentage": 88.01, "elapsed_time": "5:14:35", "remaining_time": "0:42:51"} +{"current_steps": 7635, "total_steps": 8674, "loss": 0.4872988760471344, "lr": 7.758620232482083e-08, "epoch": 1.7604334793636154, "percentage": 88.02, "elapsed_time": "5:14:38", "remaining_time": "0:42:49"} +{"current_steps": 7636, "total_steps": 8674, "loss": 0.4300990104675293, "lr": 7.743902511898492e-08, "epoch": 1.760664053493198, "percentage": 88.03, "elapsed_time": "5:14:42", "remaining_time": "0:42:46"} +{"current_steps": 7637, "total_steps": 8674, "loss": 0.4524795711040497, "lr": 7.729198201673682e-08, "epoch": 1.7608946276227808, "percentage": 88.04, "elapsed_time": "5:14:46", "remaining_time": "0:42:44"} +{"current_steps": 7638, "total_steps": 8674, "loss": 0.4673241376876831, "lr": 7.714507303945028e-08, "epoch": 1.7611252017523635, "percentage": 88.06, "elapsed_time": "5:14:49", "remaining_time": "0:42:42"} +{"current_steps": 7639, "total_steps": 8674, "loss": 0.5171443223953247, "lr": 7.699829820848048e-08, "epoch": 1.761355775881946, "percentage": 88.07, "elapsed_time": "5:14:51", "remaining_time": "0:42:39"} +{"current_steps": 7640, "total_steps": 8674, "loss": 0.44416171312332153, "lr": 7.68516575451621e-08, "epoch": 1.7615863500115287, "percentage": 88.08, "elapsed_time": "5:14:54", "remaining_time": "0:42:37"} +{"current_steps": 7641, "total_steps": 8674, "loss": 0.4456225633621216, "lr": 7.670515107081122e-08, "epoch": 1.7618169241411112, "percentage": 88.09, "elapsed_time": "5:14:57", "remaining_time": "0:42:34"} +{"current_steps": 7642, "total_steps": 8674, "loss": 0.5235984921455383, "lr": 7.65587788067239e-08, "epoch": 1.762047498270694, "percentage": 88.1, "elapsed_time": "5:15:00", "remaining_time": "0:42:32"} +{"current_steps": 7643, "total_steps": 8674, "loss": 0.4957311749458313, "lr": 7.641254077417702e-08, "epoch": 1.7622780724002767, "percentage": 88.11, "elapsed_time": "5:15:02", "remaining_time": "0:42:29"} +{"current_steps": 7644, "total_steps": 8674, "loss": 0.48401015996932983, "lr": 7.626643699442748e-08, "epoch": 1.7625086465298594, "percentage": 88.13, "elapsed_time": "5:15:04", "remaining_time": "0:42:27"} +{"current_steps": 7645, "total_steps": 8674, "loss": 0.5440249443054199, "lr": 7.612046748871326e-08, "epoch": 1.762739220659442, "percentage": 88.14, "elapsed_time": "5:15:07", "remaining_time": "0:42:24"} +{"current_steps": 7646, "total_steps": 8674, "loss": 0.3922181725502014, "lr": 7.597463227825229e-08, "epoch": 1.7629697947890248, "percentage": 88.15, "elapsed_time": "5:15:09", "remaining_time": "0:42:22"} +{"current_steps": 7647, "total_steps": 8674, "loss": 0.4679541289806366, "lr": 7.582893138424318e-08, "epoch": 1.7632003689186073, "percentage": 88.16, "elapsed_time": "5:15:12", "remaining_time": "0:42:19"} +{"current_steps": 7648, "total_steps": 8674, "loss": 0.4461076557636261, "lr": 7.568336482786508e-08, "epoch": 1.76343094304819, "percentage": 88.17, "elapsed_time": "5:15:14", "remaining_time": "0:42:17"} +{"current_steps": 7649, "total_steps": 8674, "loss": 0.4028201997280121, "lr": 7.553793263027752e-08, "epoch": 1.7636615171777725, "percentage": 88.18, "elapsed_time": "5:15:17", "remaining_time": "0:42:14"} +{"current_steps": 7650, "total_steps": 8674, "loss": 0.47307640314102173, "lr": 7.53926348126206e-08, "epoch": 1.7638920913073552, "percentage": 88.19, "elapsed_time": "5:15:19", "remaining_time": "0:42:12"} +{"current_steps": 7651, "total_steps": 8674, "loss": 0.4763333201408386, "lr": 7.524747139601473e-08, "epoch": 1.764122665436938, "percentage": 88.21, "elapsed_time": "5:15:21", "remaining_time": "0:42:09"} +{"current_steps": 7652, "total_steps": 8674, "loss": 0.5062815546989441, "lr": 7.510244240156127e-08, "epoch": 1.7643532395665207, "percentage": 88.22, "elapsed_time": "5:15:24", "remaining_time": "0:42:07"} +{"current_steps": 7653, "total_steps": 8674, "loss": 0.38344740867614746, "lr": 7.495754785034114e-08, "epoch": 1.7645838136961034, "percentage": 88.23, "elapsed_time": "5:15:26", "remaining_time": "0:42:05"} +{"current_steps": 7654, "total_steps": 8674, "loss": 0.36255425214767456, "lr": 7.48127877634166e-08, "epoch": 1.7648143878256861, "percentage": 88.24, "elapsed_time": "5:15:29", "remaining_time": "0:42:02"} +{"current_steps": 7655, "total_steps": 8674, "loss": 0.4136468172073364, "lr": 7.466816216182969e-08, "epoch": 1.7650449619552686, "percentage": 88.25, "elapsed_time": "5:15:31", "remaining_time": "0:42:00"} +{"current_steps": 7656, "total_steps": 8674, "loss": 0.4294041395187378, "lr": 7.452367106660351e-08, "epoch": 1.7652755360848513, "percentage": 88.26, "elapsed_time": "5:15:33", "remaining_time": "0:41:57"} +{"current_steps": 7657, "total_steps": 8674, "loss": 0.3865356147289276, "lr": 7.437931449874101e-08, "epoch": 1.7655061102144338, "percentage": 88.28, "elapsed_time": "5:15:36", "remaining_time": "0:41:55"} +{"current_steps": 7658, "total_steps": 8674, "loss": 0.44538289308547974, "lr": 7.42350924792261e-08, "epoch": 1.7657366843440165, "percentage": 88.29, "elapsed_time": "5:15:38", "remaining_time": "0:41:52"} +{"current_steps": 7659, "total_steps": 8674, "loss": 0.4943844676017761, "lr": 7.409100502902299e-08, "epoch": 1.7659672584735993, "percentage": 88.3, "elapsed_time": "5:15:41", "remaining_time": "0:41:50"} +{"current_steps": 7660, "total_steps": 8674, "loss": 0.41705092787742615, "lr": 7.394705216907582e-08, "epoch": 1.766197832603182, "percentage": 88.31, "elapsed_time": "5:15:43", "remaining_time": "0:41:47"} +{"current_steps": 7661, "total_steps": 8674, "loss": 0.4304206967353821, "lr": 7.380323392031018e-08, "epoch": 1.7664284067327647, "percentage": 88.32, "elapsed_time": "5:15:45", "remaining_time": "0:41:45"} +{"current_steps": 7662, "total_steps": 8674, "loss": 0.4830179214477539, "lr": 7.365955030363102e-08, "epoch": 1.7666589808623474, "percentage": 88.33, "elapsed_time": "5:15:48", "remaining_time": "0:41:42"} +{"current_steps": 7663, "total_steps": 8674, "loss": 0.47749078273773193, "lr": 7.351600133992452e-08, "epoch": 1.76688955499193, "percentage": 88.34, "elapsed_time": "5:15:50", "remaining_time": "0:41:40"} +{"current_steps": 7664, "total_steps": 8674, "loss": 0.3899204730987549, "lr": 7.337258705005667e-08, "epoch": 1.7671201291215126, "percentage": 88.36, "elapsed_time": "5:15:53", "remaining_time": "0:41:37"} +{"current_steps": 7665, "total_steps": 8674, "loss": 0.4621524214744568, "lr": 7.322930745487443e-08, "epoch": 1.7673507032510951, "percentage": 88.37, "elapsed_time": "5:15:55", "remaining_time": "0:41:35"} +{"current_steps": 7666, "total_steps": 8674, "loss": 0.5305047035217285, "lr": 7.308616257520506e-08, "epoch": 1.7675812773806778, "percentage": 88.38, "elapsed_time": "5:15:58", "remaining_time": "0:41:32"} +{"current_steps": 7667, "total_steps": 8674, "loss": 0.5894631147384644, "lr": 7.294315243185578e-08, "epoch": 1.7678118515102605, "percentage": 88.39, "elapsed_time": "5:16:00", "remaining_time": "0:41:30"} +{"current_steps": 7668, "total_steps": 8674, "loss": 0.38509970903396606, "lr": 7.280027704561498e-08, "epoch": 1.7680424256398433, "percentage": 88.4, "elapsed_time": "5:16:02", "remaining_time": "0:41:27"} +{"current_steps": 7669, "total_steps": 8674, "loss": 0.45494410395622253, "lr": 7.265753643725048e-08, "epoch": 1.768272999769426, "percentage": 88.41, "elapsed_time": "5:16:05", "remaining_time": "0:41:25"} +{"current_steps": 7670, "total_steps": 8674, "loss": 0.4819248914718628, "lr": 7.251493062751169e-08, "epoch": 1.7685035738990087, "percentage": 88.43, "elapsed_time": "5:16:07", "remaining_time": "0:41:22"} +{"current_steps": 7671, "total_steps": 8674, "loss": 0.43286386132240295, "lr": 7.237245963712724e-08, "epoch": 1.7687341480285912, "percentage": 88.44, "elapsed_time": "5:16:10", "remaining_time": "0:41:20"} +{"current_steps": 7672, "total_steps": 8674, "loss": 0.4285479187965393, "lr": 7.223012348680724e-08, "epoch": 1.768964722158174, "percentage": 88.45, "elapsed_time": "5:16:12", "remaining_time": "0:41:17"} +{"current_steps": 7673, "total_steps": 8674, "loss": 0.42678505182266235, "lr": 7.208792219724124e-08, "epoch": 1.7691952962877564, "percentage": 88.46, "elapsed_time": "5:16:14", "remaining_time": "0:41:15"} +{"current_steps": 7674, "total_steps": 8674, "loss": 0.47091686725616455, "lr": 7.194585578909995e-08, "epoch": 1.7694258704173391, "percentage": 88.47, "elapsed_time": "5:16:17", "remaining_time": "0:41:12"} +{"current_steps": 7675, "total_steps": 8674, "loss": 0.41932445764541626, "lr": 7.180392428303394e-08, "epoch": 1.7696564445469218, "percentage": 88.48, "elapsed_time": "5:16:19", "remaining_time": "0:41:10"} +{"current_steps": 7676, "total_steps": 8674, "loss": 0.4043616056442261, "lr": 7.166212769967483e-08, "epoch": 1.7698870186765046, "percentage": 88.49, "elapsed_time": "5:16:22", "remaining_time": "0:41:07"} +{"current_steps": 7677, "total_steps": 8674, "loss": 0.395826518535614, "lr": 7.15204660596338e-08, "epoch": 1.7701175928060873, "percentage": 88.51, "elapsed_time": "5:16:24", "remaining_time": "0:41:05"} +{"current_steps": 7678, "total_steps": 8674, "loss": 0.4684498906135559, "lr": 7.13789393835027e-08, "epoch": 1.7703481669356698, "percentage": 88.52, "elapsed_time": "5:16:27", "remaining_time": "0:41:03"} +{"current_steps": 7679, "total_steps": 8674, "loss": 0.4713285565376282, "lr": 7.12375476918542e-08, "epoch": 1.7705787410652525, "percentage": 88.53, "elapsed_time": "5:16:29", "remaining_time": "0:41:00"} +{"current_steps": 7680, "total_steps": 8674, "loss": 0.47559499740600586, "lr": 7.109629100524073e-08, "epoch": 1.770809315194835, "percentage": 88.54, "elapsed_time": "5:16:31", "remaining_time": "0:40:58"} +{"current_steps": 7681, "total_steps": 8674, "loss": 0.5364210605621338, "lr": 7.095516934419554e-08, "epoch": 1.7710398893244177, "percentage": 88.55, "elapsed_time": "5:16:34", "remaining_time": "0:40:55"} +{"current_steps": 7682, "total_steps": 8674, "loss": 0.5731894969940186, "lr": 7.081418272923212e-08, "epoch": 1.7712704634540004, "percentage": 88.56, "elapsed_time": "5:16:36", "remaining_time": "0:40:53"} +{"current_steps": 7683, "total_steps": 8674, "loss": 0.4287458062171936, "lr": 7.067333118084428e-08, "epoch": 1.7715010375835831, "percentage": 88.58, "elapsed_time": "5:16:39", "remaining_time": "0:40:50"} +{"current_steps": 7684, "total_steps": 8674, "loss": 0.3849913775920868, "lr": 7.053261471950612e-08, "epoch": 1.7717316117131658, "percentage": 88.59, "elapsed_time": "5:16:41", "remaining_time": "0:40:48"} +{"current_steps": 7685, "total_steps": 8674, "loss": 0.4933156371116638, "lr": 7.039203336567245e-08, "epoch": 1.7719621858427486, "percentage": 88.6, "elapsed_time": "5:16:43", "remaining_time": "0:40:45"} +{"current_steps": 7686, "total_steps": 8674, "loss": 0.5185002088546753, "lr": 7.025158713977808e-08, "epoch": 1.772192759972331, "percentage": 88.61, "elapsed_time": "5:16:46", "remaining_time": "0:40:43"} +{"current_steps": 7687, "total_steps": 8674, "loss": 0.514995276927948, "lr": 7.011127606223799e-08, "epoch": 1.7724233341019138, "percentage": 88.62, "elapsed_time": "5:16:48", "remaining_time": "0:40:40"} +{"current_steps": 7688, "total_steps": 8674, "loss": 0.4362761676311493, "lr": 6.99711001534481e-08, "epoch": 1.7726539082314963, "percentage": 88.63, "elapsed_time": "5:16:51", "remaining_time": "0:40:38"} +{"current_steps": 7689, "total_steps": 8674, "loss": 0.44117432832717896, "lr": 6.983105943378431e-08, "epoch": 1.772884482361079, "percentage": 88.64, "elapsed_time": "5:16:53", "remaining_time": "0:40:35"} +{"current_steps": 7690, "total_steps": 8674, "loss": 0.4940808415412903, "lr": 6.969115392360325e-08, "epoch": 1.7731150564906617, "percentage": 88.66, "elapsed_time": "5:16:56", "remaining_time": "0:40:33"} +{"current_steps": 7691, "total_steps": 8674, "loss": 0.4322758913040161, "lr": 6.955138364324109e-08, "epoch": 1.7733456306202444, "percentage": 88.67, "elapsed_time": "5:16:58", "remaining_time": "0:40:30"} +{"current_steps": 7692, "total_steps": 8674, "loss": 0.3867933750152588, "lr": 6.941174861301536e-08, "epoch": 1.7735762047498271, "percentage": 88.68, "elapsed_time": "5:17:00", "remaining_time": "0:40:28"} +{"current_steps": 7693, "total_steps": 8674, "loss": 0.4380000829696655, "lr": 6.927224885322302e-08, "epoch": 1.7738067788794099, "percentage": 88.69, "elapsed_time": "5:17:03", "remaining_time": "0:40:25"} +{"current_steps": 7694, "total_steps": 8674, "loss": 0.46499723196029663, "lr": 6.913288438414222e-08, "epoch": 1.7740373530089923, "percentage": 88.7, "elapsed_time": "5:17:05", "remaining_time": "0:40:23"} +{"current_steps": 7695, "total_steps": 8674, "loss": 0.4845675230026245, "lr": 6.89936552260304e-08, "epoch": 1.774267927138575, "percentage": 88.71, "elapsed_time": "5:17:08", "remaining_time": "0:40:20"} +{"current_steps": 7696, "total_steps": 8674, "loss": 0.3755526542663574, "lr": 6.88545613991266e-08, "epoch": 1.7744985012681576, "percentage": 88.72, "elapsed_time": "5:17:10", "remaining_time": "0:40:18"} +{"current_steps": 7697, "total_steps": 8674, "loss": 0.4765484929084778, "lr": 6.871560292364887e-08, "epoch": 1.7747290753977403, "percentage": 88.74, "elapsed_time": "5:17:12", "remaining_time": "0:40:15"} +{"current_steps": 7698, "total_steps": 8674, "loss": 0.4176154136657715, "lr": 6.857677981979659e-08, "epoch": 1.774959649527323, "percentage": 88.75, "elapsed_time": "5:17:15", "remaining_time": "0:40:13"} +{"current_steps": 7699, "total_steps": 8674, "loss": 0.410483717918396, "lr": 6.84380921077492e-08, "epoch": 1.7751902236569057, "percentage": 88.76, "elapsed_time": "5:17:17", "remaining_time": "0:40:10"} +{"current_steps": 7700, "total_steps": 8674, "loss": 0.5188060998916626, "lr": 6.829953980766612e-08, "epoch": 1.7754207977864884, "percentage": 88.77, "elapsed_time": "5:17:20", "remaining_time": "0:40:08"} +{"current_steps": 7701, "total_steps": 8674, "loss": 0.47039783000946045, "lr": 6.816112293968745e-08, "epoch": 1.7756513719160711, "percentage": 88.78, "elapsed_time": "5:17:24", "remaining_time": "0:40:06"} +{"current_steps": 7702, "total_steps": 8674, "loss": 0.5367648601531982, "lr": 6.802284152393345e-08, "epoch": 1.7758819460456536, "percentage": 88.79, "elapsed_time": "5:17:26", "remaining_time": "0:40:03"} +{"current_steps": 7703, "total_steps": 8674, "loss": 0.500449538230896, "lr": 6.78846955805048e-08, "epoch": 1.7761125201752364, "percentage": 88.81, "elapsed_time": "5:17:28", "remaining_time": "0:40:01"} +{"current_steps": 7704, "total_steps": 8674, "loss": 0.4579819440841675, "lr": 6.774668512948234e-08, "epoch": 1.7763430943048188, "percentage": 88.82, "elapsed_time": "5:17:31", "remaining_time": "0:39:58"} +{"current_steps": 7705, "total_steps": 8674, "loss": 0.41459107398986816, "lr": 6.760881019092712e-08, "epoch": 1.7765736684344016, "percentage": 88.83, "elapsed_time": "5:17:33", "remaining_time": "0:39:56"} +{"current_steps": 7706, "total_steps": 8674, "loss": 0.46020573377609253, "lr": 6.747107078488112e-08, "epoch": 1.7768042425639843, "percentage": 88.84, "elapsed_time": "5:17:36", "remaining_time": "0:39:53"} +{"current_steps": 7707, "total_steps": 8674, "loss": 0.48069459199905396, "lr": 6.733346693136566e-08, "epoch": 1.777034816693567, "percentage": 88.85, "elapsed_time": "5:17:38", "remaining_time": "0:39:51"} +{"current_steps": 7708, "total_steps": 8674, "loss": 0.3514458239078522, "lr": 6.719599865038328e-08, "epoch": 1.7772653908231497, "percentage": 88.86, "elapsed_time": "5:17:41", "remaining_time": "0:39:48"} +{"current_steps": 7709, "total_steps": 8674, "loss": 0.4696041941642761, "lr": 6.705866596191601e-08, "epoch": 1.7774959649527324, "percentage": 88.87, "elapsed_time": "5:17:43", "remaining_time": "0:39:46"} +{"current_steps": 7710, "total_steps": 8674, "loss": 0.45286083221435547, "lr": 6.692146888592675e-08, "epoch": 1.777726539082315, "percentage": 88.89, "elapsed_time": "5:17:45", "remaining_time": "0:39:43"} +{"current_steps": 7711, "total_steps": 8674, "loss": 0.4659677743911743, "lr": 6.678440744235848e-08, "epoch": 1.7779571132118976, "percentage": 88.9, "elapsed_time": "5:17:48", "remaining_time": "0:39:41"} +{"current_steps": 7712, "total_steps": 8674, "loss": 0.4030906558036804, "lr": 6.664748165113432e-08, "epoch": 1.7781876873414801, "percentage": 88.91, "elapsed_time": "5:17:51", "remaining_time": "0:39:39"} +{"current_steps": 7713, "total_steps": 8674, "loss": 0.4878493547439575, "lr": 6.651069153215804e-08, "epoch": 1.7784182614710629, "percentage": 88.92, "elapsed_time": "5:17:55", "remaining_time": "0:39:36"} +{"current_steps": 7714, "total_steps": 8674, "loss": 0.4651924669742584, "lr": 6.637403710531352e-08, "epoch": 1.7786488356006456, "percentage": 88.93, "elapsed_time": "5:17:58", "remaining_time": "0:39:34"} +{"current_steps": 7715, "total_steps": 8674, "loss": 0.37795954942703247, "lr": 6.623751839046455e-08, "epoch": 1.7788794097302283, "percentage": 88.94, "elapsed_time": "5:18:01", "remaining_time": "0:39:31"} +{"current_steps": 7716, "total_steps": 8674, "loss": 0.5722923278808594, "lr": 6.610113540745577e-08, "epoch": 1.779109983859811, "percentage": 88.96, "elapsed_time": "5:18:04", "remaining_time": "0:39:29"} +{"current_steps": 7717, "total_steps": 8674, "loss": 0.46933984756469727, "lr": 6.59648881761118e-08, "epoch": 1.7793405579893937, "percentage": 88.97, "elapsed_time": "5:18:07", "remaining_time": "0:39:27"} +{"current_steps": 7718, "total_steps": 8674, "loss": 0.5066707134246826, "lr": 6.582877671623732e-08, "epoch": 1.7795711321189762, "percentage": 88.98, "elapsed_time": "5:18:10", "remaining_time": "0:39:24"} +{"current_steps": 7719, "total_steps": 8674, "loss": 0.5064150094985962, "lr": 6.569280104761787e-08, "epoch": 1.779801706248559, "percentage": 88.99, "elapsed_time": "5:18:13", "remaining_time": "0:39:22"} +{"current_steps": 7720, "total_steps": 8674, "loss": 0.408633828163147, "lr": 6.555696119001853e-08, "epoch": 1.7800322803781414, "percentage": 89.0, "elapsed_time": "5:18:16", "remaining_time": "0:39:19"} +{"current_steps": 7721, "total_steps": 8674, "loss": 0.4960691034793854, "lr": 6.542125716318514e-08, "epoch": 1.7802628545077241, "percentage": 89.01, "elapsed_time": "5:18:19", "remaining_time": "0:39:17"} +{"current_steps": 7722, "total_steps": 8674, "loss": 0.4275667071342468, "lr": 6.528568898684373e-08, "epoch": 1.7804934286373069, "percentage": 89.02, "elapsed_time": "5:18:22", "remaining_time": "0:39:15"} +{"current_steps": 7723, "total_steps": 8674, "loss": 0.5309962630271912, "lr": 6.515025668070062e-08, "epoch": 1.7807240027668896, "percentage": 89.04, "elapsed_time": "5:18:25", "remaining_time": "0:39:12"} +{"current_steps": 7724, "total_steps": 8674, "loss": 0.42067253589630127, "lr": 6.501496026444197e-08, "epoch": 1.7809545768964723, "percentage": 89.05, "elapsed_time": "5:18:29", "remaining_time": "0:39:10"} +{"current_steps": 7725, "total_steps": 8674, "loss": 0.43419337272644043, "lr": 6.487979975773484e-08, "epoch": 1.781185151026055, "percentage": 89.06, "elapsed_time": "5:18:32", "remaining_time": "0:39:07"} +{"current_steps": 7726, "total_steps": 8674, "loss": 0.46563541889190674, "lr": 6.474477518022592e-08, "epoch": 1.7814157251556375, "percentage": 89.07, "elapsed_time": "5:18:35", "remaining_time": "0:39:05"} +{"current_steps": 7727, "total_steps": 8674, "loss": 0.4233010411262512, "lr": 6.460988655154232e-08, "epoch": 1.7816462992852202, "percentage": 89.08, "elapsed_time": "5:18:38", "remaining_time": "0:39:03"} +{"current_steps": 7728, "total_steps": 8674, "loss": 0.47119754552841187, "lr": 6.447513389129155e-08, "epoch": 1.7818768734148027, "percentage": 89.09, "elapsed_time": "5:18:41", "remaining_time": "0:39:00"} +{"current_steps": 7729, "total_steps": 8674, "loss": 0.5227707624435425, "lr": 6.434051721906142e-08, "epoch": 1.7821074475443854, "percentage": 89.11, "elapsed_time": "5:18:45", "remaining_time": "0:38:58"} +{"current_steps": 7730, "total_steps": 8674, "loss": 0.4521239399909973, "lr": 6.42060365544198e-08, "epoch": 1.7823380216739682, "percentage": 89.12, "elapsed_time": "5:18:48", "remaining_time": "0:38:56"} +{"current_steps": 7731, "total_steps": 8674, "loss": 0.36693084239959717, "lr": 6.407169191691464e-08, "epoch": 1.7825685958035509, "percentage": 89.13, "elapsed_time": "5:18:52", "remaining_time": "0:38:53"} +{"current_steps": 7732, "total_steps": 8674, "loss": 0.43610745668411255, "lr": 6.393748332607463e-08, "epoch": 1.7827991699331336, "percentage": 89.14, "elapsed_time": "5:18:57", "remaining_time": "0:38:51"} +{"current_steps": 7733, "total_steps": 8674, "loss": 0.4471576511859894, "lr": 6.380341080140794e-08, "epoch": 1.7830297440627163, "percentage": 89.15, "elapsed_time": "5:19:00", "remaining_time": "0:38:49"} +{"current_steps": 7734, "total_steps": 8674, "loss": 0.48119011521339417, "lr": 6.366947436240367e-08, "epoch": 1.7832603181922988, "percentage": 89.16, "elapsed_time": "5:19:04", "remaining_time": "0:38:46"} +{"current_steps": 7735, "total_steps": 8674, "loss": 0.44503623247146606, "lr": 6.353567402853055e-08, "epoch": 1.7834908923218815, "percentage": 89.17, "elapsed_time": "5:19:08", "remaining_time": "0:38:44"} +{"current_steps": 7736, "total_steps": 8674, "loss": 0.3350965678691864, "lr": 6.340200981923804e-08, "epoch": 1.783721466451464, "percentage": 89.19, "elapsed_time": "5:19:12", "remaining_time": "0:38:42"} +{"current_steps": 7737, "total_steps": 8674, "loss": 0.4814649224281311, "lr": 6.326848175395572e-08, "epoch": 1.7839520405810467, "percentage": 89.2, "elapsed_time": "5:19:16", "remaining_time": "0:38:39"} +{"current_steps": 7738, "total_steps": 8674, "loss": 0.42114442586898804, "lr": 6.313508985209281e-08, "epoch": 1.7841826147106294, "percentage": 89.21, "elapsed_time": "5:19:20", "remaining_time": "0:38:37"} +{"current_steps": 7739, "total_steps": 8674, "loss": 0.5044004917144775, "lr": 6.30018341330396e-08, "epoch": 1.7844131888402122, "percentage": 89.22, "elapsed_time": "5:19:24", "remaining_time": "0:38:35"} +{"current_steps": 7740, "total_steps": 8674, "loss": 0.46084678173065186, "lr": 6.286871461616594e-08, "epoch": 1.7846437629697949, "percentage": 89.23, "elapsed_time": "5:19:28", "remaining_time": "0:38:33"} +{"current_steps": 7741, "total_steps": 8674, "loss": 0.5159536600112915, "lr": 6.273573132082222e-08, "epoch": 1.7848743370993776, "percentage": 89.24, "elapsed_time": "5:19:32", "remaining_time": "0:38:30"} +{"current_steps": 7742, "total_steps": 8674, "loss": 0.4394105076789856, "lr": 6.260288426633875e-08, "epoch": 1.78510491122896, "percentage": 89.26, "elapsed_time": "5:19:36", "remaining_time": "0:38:28"} +{"current_steps": 7743, "total_steps": 8674, "loss": 0.39798909425735474, "lr": 6.247017347202643e-08, "epoch": 1.7853354853585428, "percentage": 89.27, "elapsed_time": "5:19:40", "remaining_time": "0:38:26"} +{"current_steps": 7744, "total_steps": 8674, "loss": 0.3865649104118347, "lr": 6.23375989571756e-08, "epoch": 1.7855660594881253, "percentage": 89.28, "elapsed_time": "5:19:44", "remaining_time": "0:38:23"} +{"current_steps": 7745, "total_steps": 8674, "loss": 0.3641304671764374, "lr": 6.220516074105808e-08, "epoch": 1.785796633617708, "percentage": 89.29, "elapsed_time": "5:19:48", "remaining_time": "0:38:21"} +{"current_steps": 7746, "total_steps": 8674, "loss": 0.5025773644447327, "lr": 6.207285884292468e-08, "epoch": 1.7860272077472907, "percentage": 89.3, "elapsed_time": "5:19:52", "remaining_time": "0:38:19"} +{"current_steps": 7747, "total_steps": 8674, "loss": 0.4289078414440155, "lr": 6.194069328200669e-08, "epoch": 1.7862577818768735, "percentage": 89.31, "elapsed_time": "5:19:56", "remaining_time": "0:38:17"} +{"current_steps": 7748, "total_steps": 8674, "loss": 0.37442147731781006, "lr": 6.180866407751595e-08, "epoch": 1.7864883560064562, "percentage": 89.32, "elapsed_time": "5:20:00", "remaining_time": "0:38:14"} +{"current_steps": 7749, "total_steps": 8674, "loss": 0.4975471794605255, "lr": 6.167677124864412e-08, "epoch": 1.7867189301360389, "percentage": 89.34, "elapsed_time": "5:20:04", "remaining_time": "0:38:12"} +{"current_steps": 7750, "total_steps": 8674, "loss": 0.42754751443862915, "lr": 6.154501481456331e-08, "epoch": 1.7869495042656214, "percentage": 89.35, "elapsed_time": "5:20:08", "remaining_time": "0:38:10"} +{"current_steps": 7751, "total_steps": 8674, "loss": 0.40203964710235596, "lr": 6.141339479442542e-08, "epoch": 1.787180078395204, "percentage": 89.36, "elapsed_time": "5:20:12", "remaining_time": "0:38:07"} +{"current_steps": 7752, "total_steps": 8674, "loss": 0.46465349197387695, "lr": 6.128191120736293e-08, "epoch": 1.7874106525247866, "percentage": 89.37, "elapsed_time": "5:20:16", "remaining_time": "0:38:05"} +{"current_steps": 7753, "total_steps": 8674, "loss": 0.43915730714797974, "lr": 6.11505640724882e-08, "epoch": 1.7876412266543693, "percentage": 89.38, "elapsed_time": "5:20:20", "remaining_time": "0:38:03"} +{"current_steps": 7754, "total_steps": 8674, "loss": 0.5205652713775635, "lr": 6.101935340889419e-08, "epoch": 1.787871800783952, "percentage": 89.39, "elapsed_time": "5:20:23", "remaining_time": "0:38:00"} +{"current_steps": 7755, "total_steps": 8674, "loss": 0.39400190114974976, "lr": 6.088827923565321e-08, "epoch": 1.7881023749135347, "percentage": 89.41, "elapsed_time": "5:20:27", "remaining_time": "0:37:58"} +{"current_steps": 7756, "total_steps": 8674, "loss": 0.48021531105041504, "lr": 6.075734157181855e-08, "epoch": 1.7883329490431175, "percentage": 89.42, "elapsed_time": "5:20:31", "remaining_time": "0:37:56"} +{"current_steps": 7757, "total_steps": 8674, "loss": 0.42780327796936035, "lr": 6.062654043642334e-08, "epoch": 1.7885635231727002, "percentage": 89.43, "elapsed_time": "5:20:36", "remaining_time": "0:37:54"} +{"current_steps": 7758, "total_steps": 8674, "loss": 0.4307866096496582, "lr": 6.049587584848059e-08, "epoch": 1.7887940973022827, "percentage": 89.44, "elapsed_time": "5:20:39", "remaining_time": "0:37:51"} +{"current_steps": 7759, "total_steps": 8674, "loss": 0.4258533716201782, "lr": 6.036534782698377e-08, "epoch": 1.7890246714318654, "percentage": 89.45, "elapsed_time": "5:20:43", "remaining_time": "0:37:49"} +{"current_steps": 7760, "total_steps": 8674, "loss": 0.5159060955047607, "lr": 6.02349563909067e-08, "epoch": 1.7892552455614479, "percentage": 89.46, "elapsed_time": "5:20:47", "remaining_time": "0:37:47"} +{"current_steps": 7761, "total_steps": 8674, "loss": 0.4407171308994293, "lr": 6.0104701559203e-08, "epoch": 1.7894858196910306, "percentage": 89.47, "elapsed_time": "5:20:51", "remaining_time": "0:37:44"} +{"current_steps": 7762, "total_steps": 8674, "loss": 0.40273964405059814, "lr": 5.99745833508063e-08, "epoch": 1.7897163938206133, "percentage": 89.49, "elapsed_time": "5:20:55", "remaining_time": "0:37:42"} +{"current_steps": 7763, "total_steps": 8674, "loss": 0.42018163204193115, "lr": 5.984460178463102e-08, "epoch": 1.789946967950196, "percentage": 89.5, "elapsed_time": "5:20:59", "remaining_time": "0:37:40"} +{"current_steps": 7764, "total_steps": 8674, "loss": 0.519807755947113, "lr": 5.971475687957084e-08, "epoch": 1.7901775420797787, "percentage": 89.51, "elapsed_time": "5:21:03", "remaining_time": "0:37:37"} +{"current_steps": 7765, "total_steps": 8674, "loss": 0.42557477951049805, "lr": 5.9585048654500535e-08, "epoch": 1.7904081162093615, "percentage": 89.52, "elapsed_time": "5:21:07", "remaining_time": "0:37:35"} +{"current_steps": 7766, "total_steps": 8674, "loss": 0.39568305015563965, "lr": 5.9455477128273924e-08, "epoch": 1.790638690338944, "percentage": 89.53, "elapsed_time": "5:21:11", "remaining_time": "0:37:33"} +{"current_steps": 7767, "total_steps": 8674, "loss": 0.43125781416893005, "lr": 5.932604231972593e-08, "epoch": 1.7908692644685267, "percentage": 89.54, "elapsed_time": "5:21:15", "remaining_time": "0:37:30"} +{"current_steps": 7768, "total_steps": 8674, "loss": 0.46194958686828613, "lr": 5.919674424767129e-08, "epoch": 1.7910998385981092, "percentage": 89.55, "elapsed_time": "5:21:18", "remaining_time": "0:37:28"} +{"current_steps": 7769, "total_steps": 8674, "loss": 0.40115779638290405, "lr": 5.906758293090441e-08, "epoch": 1.791330412727692, "percentage": 89.57, "elapsed_time": "5:21:22", "remaining_time": "0:37:26"} +{"current_steps": 7770, "total_steps": 8674, "loss": 0.46589648723602295, "lr": 5.893855838820061e-08, "epoch": 1.7915609868572746, "percentage": 89.58, "elapsed_time": "5:21:25", "remaining_time": "0:37:23"} +{"current_steps": 7771, "total_steps": 8674, "loss": 0.3540228605270386, "lr": 5.880967063831455e-08, "epoch": 1.7917915609868573, "percentage": 89.59, "elapsed_time": "5:21:29", "remaining_time": "0:37:21"} +{"current_steps": 7772, "total_steps": 8674, "loss": 0.4324638545513153, "lr": 5.868091969998168e-08, "epoch": 1.79202213511644, "percentage": 89.6, "elapsed_time": "5:21:32", "remaining_time": "0:37:19"} +{"current_steps": 7773, "total_steps": 8674, "loss": 0.4301075339317322, "lr": 5.855230559191693e-08, "epoch": 1.7922527092460228, "percentage": 89.61, "elapsed_time": "5:21:36", "remaining_time": "0:37:16"} +{"current_steps": 7774, "total_steps": 8674, "loss": 0.4496096670627594, "lr": 5.842382833281612e-08, "epoch": 1.7924832833756053, "percentage": 89.62, "elapsed_time": "5:21:39", "remaining_time": "0:37:14"} +{"current_steps": 7775, "total_steps": 8674, "loss": 0.4554907977581024, "lr": 5.8295487941354195e-08, "epoch": 1.792713857505188, "percentage": 89.64, "elapsed_time": "5:21:43", "remaining_time": "0:37:11"} +{"current_steps": 7776, "total_steps": 8674, "loss": 0.5020148158073425, "lr": 5.816728443618701e-08, "epoch": 1.7929444316347705, "percentage": 89.65, "elapsed_time": "5:21:46", "remaining_time": "0:37:09"} +{"current_steps": 7777, "total_steps": 8674, "loss": 0.4073353409767151, "lr": 5.803921783595045e-08, "epoch": 1.7931750057643532, "percentage": 89.66, "elapsed_time": "5:21:50", "remaining_time": "0:37:07"} +{"current_steps": 7778, "total_steps": 8674, "loss": 0.4995894432067871, "lr": 5.791128815925983e-08, "epoch": 1.793405579893936, "percentage": 89.67, "elapsed_time": "5:21:54", "remaining_time": "0:37:04"} +{"current_steps": 7779, "total_steps": 8674, "loss": 0.5383706092834473, "lr": 5.778349542471139e-08, "epoch": 1.7936361540235186, "percentage": 89.68, "elapsed_time": "5:21:57", "remaining_time": "0:37:02"} +{"current_steps": 7780, "total_steps": 8674, "loss": 0.4206235408782959, "lr": 5.765583965088083e-08, "epoch": 1.7938667281531013, "percentage": 89.69, "elapsed_time": "5:22:01", "remaining_time": "0:37:00"} +{"current_steps": 7781, "total_steps": 8674, "loss": 0.49053555727005005, "lr": 5.752832085632453e-08, "epoch": 1.794097302282684, "percentage": 89.7, "elapsed_time": "5:22:04", "remaining_time": "0:36:57"} +{"current_steps": 7782, "total_steps": 8674, "loss": 0.4372660517692566, "lr": 5.740093905957832e-08, "epoch": 1.7943278764122665, "percentage": 89.72, "elapsed_time": "5:22:08", "remaining_time": "0:36:55"} +{"current_steps": 7783, "total_steps": 8674, "loss": 0.40125733613967896, "lr": 5.727369427915851e-08, "epoch": 1.7945584505418493, "percentage": 89.73, "elapsed_time": "5:22:12", "remaining_time": "0:36:53"} +{"current_steps": 7784, "total_steps": 8674, "loss": 0.3595162034034729, "lr": 5.714658653356153e-08, "epoch": 1.7947890246714318, "percentage": 89.74, "elapsed_time": "5:22:16", "remaining_time": "0:36:50"} +{"current_steps": 7785, "total_steps": 8674, "loss": 0.42618101835250854, "lr": 5.7019615841263915e-08, "epoch": 1.7950195988010145, "percentage": 89.75, "elapsed_time": "5:22:20", "remaining_time": "0:36:48"} +{"current_steps": 7786, "total_steps": 8674, "loss": 0.39135509729385376, "lr": 5.6892782220721694e-08, "epoch": 1.7952501729305972, "percentage": 89.76, "elapsed_time": "5:22:24", "remaining_time": "0:36:46"} +{"current_steps": 7787, "total_steps": 8674, "loss": 0.3792929947376251, "lr": 5.6766085690372004e-08, "epoch": 1.79548074706018, "percentage": 89.77, "elapsed_time": "5:22:28", "remaining_time": "0:36:43"} +{"current_steps": 7788, "total_steps": 8674, "loss": 0.5193231105804443, "lr": 5.6639526268631e-08, "epoch": 1.7957113211897626, "percentage": 89.79, "elapsed_time": "5:22:32", "remaining_time": "0:36:41"} +{"current_steps": 7789, "total_steps": 8674, "loss": 0.3896862268447876, "lr": 5.6513103973895415e-08, "epoch": 1.7959418953193451, "percentage": 89.8, "elapsed_time": "5:22:36", "remaining_time": "0:36:39"} +{"current_steps": 7790, "total_steps": 8674, "loss": 0.5345273017883301, "lr": 5.638681882454211e-08, "epoch": 1.7961724694489278, "percentage": 89.81, "elapsed_time": "5:22:39", "remaining_time": "0:36:36"} +{"current_steps": 7791, "total_steps": 8674, "loss": 0.4297627806663513, "lr": 5.626067083892794e-08, "epoch": 1.7964030435785103, "percentage": 89.82, "elapsed_time": "5:22:44", "remaining_time": "0:36:34"} +{"current_steps": 7792, "total_steps": 8674, "loss": 0.3176969587802887, "lr": 5.6134660035389914e-08, "epoch": 1.796633617708093, "percentage": 89.83, "elapsed_time": "5:22:47", "remaining_time": "0:36:32"} +{"current_steps": 7793, "total_steps": 8674, "loss": 0.5449323654174805, "lr": 5.600878643224471e-08, "epoch": 1.7968641918376758, "percentage": 89.84, "elapsed_time": "5:22:51", "remaining_time": "0:36:29"} +{"current_steps": 7794, "total_steps": 8674, "loss": 0.38096293807029724, "lr": 5.588305004778959e-08, "epoch": 1.7970947659672585, "percentage": 89.85, "elapsed_time": "5:22:55", "remaining_time": "0:36:27"} +{"current_steps": 7795, "total_steps": 8674, "loss": 0.3917475938796997, "lr": 5.575745090030137e-08, "epoch": 1.7973253400968412, "percentage": 89.87, "elapsed_time": "5:22:59", "remaining_time": "0:36:25"} +{"current_steps": 7796, "total_steps": 8674, "loss": 0.41522616147994995, "lr": 5.563198900803734e-08, "epoch": 1.797555914226424, "percentage": 89.88, "elapsed_time": "5:23:03", "remaining_time": "0:36:23"} +{"current_steps": 7797, "total_steps": 8674, "loss": 0.46558207273483276, "lr": 5.550666438923468e-08, "epoch": 1.7977864883560064, "percentage": 89.89, "elapsed_time": "5:23:07", "remaining_time": "0:36:20"} +{"current_steps": 7798, "total_steps": 8674, "loss": 0.43256324529647827, "lr": 5.538147706211038e-08, "epoch": 1.7980170624855891, "percentage": 89.9, "elapsed_time": "5:23:11", "remaining_time": "0:36:18"} +{"current_steps": 7799, "total_steps": 8674, "loss": 0.37302178144454956, "lr": 5.5256427044861666e-08, "epoch": 1.7982476366151716, "percentage": 89.91, "elapsed_time": "5:23:15", "remaining_time": "0:36:16"} +{"current_steps": 7800, "total_steps": 8674, "loss": 0.5247504711151123, "lr": 5.5131514355666095e-08, "epoch": 1.7984782107447543, "percentage": 89.92, "elapsed_time": "5:23:19", "remaining_time": "0:36:13"} +{"current_steps": 7801, "total_steps": 8674, "loss": 0.3906348943710327, "lr": 5.5006739012680934e-08, "epoch": 1.798708784874337, "percentage": 89.94, "elapsed_time": "5:23:25", "remaining_time": "0:36:11"} +{"current_steps": 7802, "total_steps": 8674, "loss": 0.5293325185775757, "lr": 5.488210103404345e-08, "epoch": 1.7989393590039198, "percentage": 89.95, "elapsed_time": "5:23:29", "remaining_time": "0:36:09"} +{"current_steps": 7803, "total_steps": 8674, "loss": 0.4189381003379822, "lr": 5.4757600437871146e-08, "epoch": 1.7991699331335025, "percentage": 89.96, "elapsed_time": "5:23:33", "remaining_time": "0:36:07"} +{"current_steps": 7804, "total_steps": 8674, "loss": 0.40476128458976746, "lr": 5.4633237242261207e-08, "epoch": 1.7994005072630852, "percentage": 89.97, "elapsed_time": "5:23:37", "remaining_time": "0:36:04"} +{"current_steps": 7805, "total_steps": 8674, "loss": 0.3908376097679138, "lr": 5.45090114652913e-08, "epoch": 1.7996310813926677, "percentage": 89.98, "elapsed_time": "5:23:41", "remaining_time": "0:36:02"} +{"current_steps": 7806, "total_steps": 8674, "loss": 0.42332786321640015, "lr": 5.438492312501885e-08, "epoch": 1.7998616555222504, "percentage": 89.99, "elapsed_time": "5:23:45", "remaining_time": "0:36:00"} +{"current_steps": 7807, "total_steps": 8674, "loss": 0.3398321866989136, "lr": 5.426097223948123e-08, "epoch": 1.800092229651833, "percentage": 90.0, "elapsed_time": "5:23:48", "remaining_time": "0:35:57"} +{"current_steps": 7808, "total_steps": 8674, "loss": 0.4610673189163208, "lr": 5.413715882669623e-08, "epoch": 1.8003228037814156, "percentage": 90.02, "elapsed_time": "5:23:51", "remaining_time": "0:35:55"} +{"current_steps": 7809, "total_steps": 8674, "loss": 0.4149124026298523, "lr": 5.401348290466112e-08, "epoch": 1.8005533779109983, "percentage": 90.03, "elapsed_time": "5:23:53", "remaining_time": "0:35:52"} +{"current_steps": 7810, "total_steps": 8674, "loss": 0.47464168071746826, "lr": 5.388994449135376e-08, "epoch": 1.800783952040581, "percentage": 90.04, "elapsed_time": "5:23:56", "remaining_time": "0:35:50"} +{"current_steps": 7811, "total_steps": 8674, "loss": 0.4530913829803467, "lr": 5.376654360473121e-08, "epoch": 1.8010145261701638, "percentage": 90.05, "elapsed_time": "5:23:58", "remaining_time": "0:35:47"} +{"current_steps": 7812, "total_steps": 8674, "loss": 0.5577078461647034, "lr": 5.364328026273157e-08, "epoch": 1.8012451002997465, "percentage": 90.06, "elapsed_time": "5:24:01", "remaining_time": "0:35:45"} +{"current_steps": 7813, "total_steps": 8674, "loss": 0.4772539436817169, "lr": 5.3520154483272075e-08, "epoch": 1.801475674429329, "percentage": 90.07, "elapsed_time": "5:24:03", "remaining_time": "0:35:42"} +{"current_steps": 7814, "total_steps": 8674, "loss": 0.5387610197067261, "lr": 5.339716628425039e-08, "epoch": 1.8017062485589117, "percentage": 90.09, "elapsed_time": "5:24:06", "remaining_time": "0:35:40"} +{"current_steps": 7815, "total_steps": 8674, "loss": 0.4505125880241394, "lr": 5.327431568354401e-08, "epoch": 1.8019368226884942, "percentage": 90.1, "elapsed_time": "5:24:08", "remaining_time": "0:35:37"} +{"current_steps": 7816, "total_steps": 8674, "loss": 0.43021589517593384, "lr": 5.3151602699010867e-08, "epoch": 1.802167396818077, "percentage": 90.11, "elapsed_time": "5:24:11", "remaining_time": "0:35:35"} +{"current_steps": 7817, "total_steps": 8674, "loss": 0.44107457995414734, "lr": 5.3029027348488244e-08, "epoch": 1.8023979709476596, "percentage": 90.12, "elapsed_time": "5:24:13", "remaining_time": "0:35:32"} +{"current_steps": 7818, "total_steps": 8674, "loss": 0.42265504598617554, "lr": 5.2906589649793666e-08, "epoch": 1.8026285450772424, "percentage": 90.13, "elapsed_time": "5:24:15", "remaining_time": "0:35:30"} +{"current_steps": 7819, "total_steps": 8674, "loss": 0.4814263582229614, "lr": 5.2784289620724895e-08, "epoch": 1.802859119206825, "percentage": 90.14, "elapsed_time": "5:24:18", "remaining_time": "0:35:27"} +{"current_steps": 7820, "total_steps": 8674, "loss": 0.4255106747150421, "lr": 5.2662127279059275e-08, "epoch": 1.8030896933364078, "percentage": 90.15, "elapsed_time": "5:24:20", "remaining_time": "0:35:25"} +{"current_steps": 7821, "total_steps": 8674, "loss": 0.43405312299728394, "lr": 5.2540102642554593e-08, "epoch": 1.8033202674659903, "percentage": 90.17, "elapsed_time": "5:24:23", "remaining_time": "0:35:22"} +{"current_steps": 7822, "total_steps": 8674, "loss": 0.3986097574234009, "lr": 5.2418215728948004e-08, "epoch": 1.803550841595573, "percentage": 90.18, "elapsed_time": "5:24:25", "remaining_time": "0:35:20"} +{"current_steps": 7823, "total_steps": 8674, "loss": 0.4988093972206116, "lr": 5.2296466555957205e-08, "epoch": 1.8037814157251555, "percentage": 90.19, "elapsed_time": "5:24:27", "remaining_time": "0:35:17"} +{"current_steps": 7824, "total_steps": 8674, "loss": 0.5290527939796448, "lr": 5.217485514127973e-08, "epoch": 1.8040119898547382, "percentage": 90.2, "elapsed_time": "5:24:30", "remaining_time": "0:35:15"} +{"current_steps": 7825, "total_steps": 8674, "loss": 0.3705815076828003, "lr": 5.205338150259308e-08, "epoch": 1.804242563984321, "percentage": 90.21, "elapsed_time": "5:24:33", "remaining_time": "0:35:12"} +{"current_steps": 7826, "total_steps": 8674, "loss": 0.37735384702682495, "lr": 5.193204565755449e-08, "epoch": 1.8044731381139036, "percentage": 90.22, "elapsed_time": "5:24:35", "remaining_time": "0:35:10"} +{"current_steps": 7827, "total_steps": 8674, "loss": 0.39033758640289307, "lr": 5.1810847623801504e-08, "epoch": 1.8047037122434864, "percentage": 90.24, "elapsed_time": "5:24:37", "remaining_time": "0:35:07"} +{"current_steps": 7828, "total_steps": 8674, "loss": 0.4669237732887268, "lr": 5.168978741895147e-08, "epoch": 1.804934286373069, "percentage": 90.25, "elapsed_time": "5:24:40", "remaining_time": "0:35:05"} +{"current_steps": 7829, "total_steps": 8674, "loss": 0.5178482532501221, "lr": 5.156886506060154e-08, "epoch": 1.8051648605026516, "percentage": 90.26, "elapsed_time": "5:24:42", "remaining_time": "0:35:02"} +{"current_steps": 7830, "total_steps": 8674, "loss": 0.44134122133255005, "lr": 5.14480805663291e-08, "epoch": 1.8053954346322343, "percentage": 90.27, "elapsed_time": "5:24:44", "remaining_time": "0:35:00"} +{"current_steps": 7831, "total_steps": 8674, "loss": 0.44371920824050903, "lr": 5.132743395369144e-08, "epoch": 1.8056260087618168, "percentage": 90.28, "elapsed_time": "5:24:47", "remaining_time": "0:34:57"} +{"current_steps": 7832, "total_steps": 8674, "loss": 0.43268662691116333, "lr": 5.1206925240225964e-08, "epoch": 1.8058565828913995, "percentage": 90.29, "elapsed_time": "5:24:49", "remaining_time": "0:34:55"} +{"current_steps": 7833, "total_steps": 8674, "loss": 0.5035665035247803, "lr": 5.1086554443449445e-08, "epoch": 1.8060871570209822, "percentage": 90.3, "elapsed_time": "5:24:52", "remaining_time": "0:34:52"} +{"current_steps": 7834, "total_steps": 8674, "loss": 0.4987141191959381, "lr": 5.0966321580859336e-08, "epoch": 1.806317731150565, "percentage": 90.32, "elapsed_time": "5:24:54", "remaining_time": "0:34:50"} +{"current_steps": 7835, "total_steps": 8674, "loss": 0.5951617956161499, "lr": 5.0846226669932437e-08, "epoch": 1.8065483052801476, "percentage": 90.33, "elapsed_time": "5:24:57", "remaining_time": "0:34:47"} +{"current_steps": 7836, "total_steps": 8674, "loss": 0.4710814654827118, "lr": 5.072626972812599e-08, "epoch": 1.8067788794097304, "percentage": 90.34, "elapsed_time": "5:24:59", "remaining_time": "0:34:45"} +{"current_steps": 7837, "total_steps": 8674, "loss": 0.5173348188400269, "lr": 5.060645077287662e-08, "epoch": 1.8070094535393129, "percentage": 90.35, "elapsed_time": "5:25:01", "remaining_time": "0:34:42"} +{"current_steps": 7838, "total_steps": 8674, "loss": 0.49508416652679443, "lr": 5.048676982160161e-08, "epoch": 1.8072400276688956, "percentage": 90.36, "elapsed_time": "5:25:04", "remaining_time": "0:34:40"} +{"current_steps": 7839, "total_steps": 8674, "loss": 0.4535290598869324, "lr": 5.03672268916977e-08, "epoch": 1.807470601798478, "percentage": 90.37, "elapsed_time": "5:25:06", "remaining_time": "0:34:37"} +{"current_steps": 7840, "total_steps": 8674, "loss": 0.5337553024291992, "lr": 5.024782200054145e-08, "epoch": 1.8077011759280608, "percentage": 90.39, "elapsed_time": "5:25:09", "remaining_time": "0:34:35"} +{"current_steps": 7841, "total_steps": 8674, "loss": 0.47118210792541504, "lr": 5.012855516548986e-08, "epoch": 1.8079317500576435, "percentage": 90.4, "elapsed_time": "5:25:11", "remaining_time": "0:34:32"} +{"current_steps": 7842, "total_steps": 8674, "loss": 0.4458848237991333, "lr": 5.0009426403879283e-08, "epoch": 1.8081623241872262, "percentage": 90.41, "elapsed_time": "5:25:14", "remaining_time": "0:34:30"} +{"current_steps": 7843, "total_steps": 8674, "loss": 0.5055558681488037, "lr": 4.9890435733026536e-08, "epoch": 1.808392898316809, "percentage": 90.42, "elapsed_time": "5:25:16", "remaining_time": "0:34:27"} +{"current_steps": 7844, "total_steps": 8674, "loss": 0.43715038895606995, "lr": 4.9771583170228006e-08, "epoch": 1.8086234724463917, "percentage": 90.43, "elapsed_time": "5:25:19", "remaining_time": "0:34:25"} +{"current_steps": 7845, "total_steps": 8674, "loss": 0.427906334400177, "lr": 4.96528687327602e-08, "epoch": 1.8088540465759742, "percentage": 90.44, "elapsed_time": "5:25:21", "remaining_time": "0:34:22"} +{"current_steps": 7846, "total_steps": 8674, "loss": 0.48160994052886963, "lr": 4.953429243787932e-08, "epoch": 1.8090846207055569, "percentage": 90.45, "elapsed_time": "5:25:23", "remaining_time": "0:34:20"} +{"current_steps": 7847, "total_steps": 8674, "loss": 0.40856754779815674, "lr": 4.941585430282158e-08, "epoch": 1.8093151948351394, "percentage": 90.47, "elapsed_time": "5:25:26", "remaining_time": "0:34:17"} +{"current_steps": 7848, "total_steps": 8674, "loss": 0.40482330322265625, "lr": 4.929755434480354e-08, "epoch": 1.809545768964722, "percentage": 90.48, "elapsed_time": "5:25:28", "remaining_time": "0:34:15"} +{"current_steps": 7849, "total_steps": 8674, "loss": 0.4286755323410034, "lr": 4.9179392581021e-08, "epoch": 1.8097763430943048, "percentage": 90.49, "elapsed_time": "5:25:31", "remaining_time": "0:34:12"} +{"current_steps": 7850, "total_steps": 8674, "loss": 0.4436051547527313, "lr": 4.906136902864999e-08, "epoch": 1.8100069172238875, "percentage": 90.5, "elapsed_time": "5:25:33", "remaining_time": "0:34:10"} +{"current_steps": 7851, "total_steps": 8674, "loss": 0.41794437170028687, "lr": 4.8943483704846465e-08, "epoch": 1.8102374913534702, "percentage": 90.51, "elapsed_time": "5:25:36", "remaining_time": "0:34:07"} +{"current_steps": 7852, "total_steps": 8674, "loss": 0.4308912754058838, "lr": 4.8825736626746384e-08, "epoch": 1.810468065483053, "percentage": 90.52, "elapsed_time": "5:25:38", "remaining_time": "0:34:05"} +{"current_steps": 7853, "total_steps": 8674, "loss": 0.43090081214904785, "lr": 4.870812781146516e-08, "epoch": 1.8106986396126354, "percentage": 90.53, "elapsed_time": "5:25:41", "remaining_time": "0:34:02"} +{"current_steps": 7854, "total_steps": 8674, "loss": 0.4329320192337036, "lr": 4.859065727609857e-08, "epoch": 1.8109292137422182, "percentage": 90.55, "elapsed_time": "5:25:43", "remaining_time": "0:34:00"} +{"current_steps": 7855, "total_steps": 8674, "loss": 0.3162953853607178, "lr": 4.8473325037722276e-08, "epoch": 1.8111597878718007, "percentage": 90.56, "elapsed_time": "5:25:45", "remaining_time": "0:33:57"} +{"current_steps": 7856, "total_steps": 8674, "loss": 0.37513065338134766, "lr": 4.835613111339165e-08, "epoch": 1.8113903620013834, "percentage": 90.57, "elapsed_time": "5:25:48", "remaining_time": "0:33:55"} +{"current_steps": 7857, "total_steps": 8674, "loss": 0.4120938181877136, "lr": 4.823907552014195e-08, "epoch": 1.811620936130966, "percentage": 90.58, "elapsed_time": "5:25:50", "remaining_time": "0:33:52"} +{"current_steps": 7858, "total_steps": 8674, "loss": 0.4295421242713928, "lr": 4.8122158274988555e-08, "epoch": 1.8118515102605488, "percentage": 90.59, "elapsed_time": "5:25:53", "remaining_time": "0:33:50"} +{"current_steps": 7859, "total_steps": 8674, "loss": 0.44738203287124634, "lr": 4.8005379394926435e-08, "epoch": 1.8120820843901315, "percentage": 90.6, "elapsed_time": "5:25:55", "remaining_time": "0:33:47"} +{"current_steps": 7860, "total_steps": 8674, "loss": 0.447609007358551, "lr": 4.7888738896930456e-08, "epoch": 1.8123126585197142, "percentage": 90.62, "elapsed_time": "5:25:58", "remaining_time": "0:33:45"} +{"current_steps": 7861, "total_steps": 8674, "loss": 0.38288167119026184, "lr": 4.777223679795561e-08, "epoch": 1.8125432326492967, "percentage": 90.63, "elapsed_time": "5:26:00", "remaining_time": "0:33:42"} +{"current_steps": 7862, "total_steps": 8674, "loss": 0.5003981590270996, "lr": 4.765587311493668e-08, "epoch": 1.8127738067788794, "percentage": 90.64, "elapsed_time": "5:26:02", "remaining_time": "0:33:40"} +{"current_steps": 7863, "total_steps": 8674, "loss": 0.5244492888450623, "lr": 4.7539647864788476e-08, "epoch": 1.813004380908462, "percentage": 90.65, "elapsed_time": "5:26:05", "remaining_time": "0:33:37"} +{"current_steps": 7864, "total_steps": 8674, "loss": 0.505184531211853, "lr": 4.742356106440526e-08, "epoch": 1.8132349550380447, "percentage": 90.66, "elapsed_time": "5:26:07", "remaining_time": "0:33:35"} +{"current_steps": 7865, "total_steps": 8674, "loss": 0.5364291071891785, "lr": 4.7307612730661636e-08, "epoch": 1.8134655291676274, "percentage": 90.67, "elapsed_time": "5:26:10", "remaining_time": "0:33:32"} +{"current_steps": 7866, "total_steps": 8674, "loss": 0.4370742738246918, "lr": 4.719180288041158e-08, "epoch": 1.81369610329721, "percentage": 90.68, "elapsed_time": "5:26:12", "remaining_time": "0:33:30"} +{"current_steps": 7867, "total_steps": 8674, "loss": 0.37784355878829956, "lr": 4.7076131530489505e-08, "epoch": 1.8139266774267928, "percentage": 90.7, "elapsed_time": "5:26:15", "remaining_time": "0:33:28"} +{"current_steps": 7868, "total_steps": 8674, "loss": 0.5184513330459595, "lr": 4.6960598697709294e-08, "epoch": 1.8141572515563755, "percentage": 90.71, "elapsed_time": "5:26:17", "remaining_time": "0:33:25"} +{"current_steps": 7869, "total_steps": 8674, "loss": 0.41221511363983154, "lr": 4.6845204398864743e-08, "epoch": 1.814387825685958, "percentage": 90.72, "elapsed_time": "5:26:19", "remaining_time": "0:33:23"} +{"current_steps": 7870, "total_steps": 8674, "loss": 0.43040651082992554, "lr": 4.672994865072965e-08, "epoch": 1.8146183998155407, "percentage": 90.73, "elapsed_time": "5:26:22", "remaining_time": "0:33:20"} +{"current_steps": 7871, "total_steps": 8674, "loss": 0.4681999385356903, "lr": 4.6614831470057625e-08, "epoch": 1.8148489739451232, "percentage": 90.74, "elapsed_time": "5:26:24", "remaining_time": "0:33:18"} +{"current_steps": 7872, "total_steps": 8674, "loss": 0.49752098321914673, "lr": 4.649985287358227e-08, "epoch": 1.815079548074706, "percentage": 90.75, "elapsed_time": "5:26:27", "remaining_time": "0:33:15"} +{"current_steps": 7873, "total_steps": 8674, "loss": 0.4621706008911133, "lr": 4.6385012878016663e-08, "epoch": 1.8153101222042887, "percentage": 90.77, "elapsed_time": "5:26:29", "remaining_time": "0:33:13"} +{"current_steps": 7874, "total_steps": 8674, "loss": 0.4359724521636963, "lr": 4.627031150005401e-08, "epoch": 1.8155406963338714, "percentage": 90.78, "elapsed_time": "5:26:32", "remaining_time": "0:33:10"} +{"current_steps": 7875, "total_steps": 8674, "loss": 0.4901214838027954, "lr": 4.6155748756367294e-08, "epoch": 1.815771270463454, "percentage": 90.79, "elapsed_time": "5:26:34", "remaining_time": "0:33:08"} +{"current_steps": 7876, "total_steps": 8674, "loss": 0.5012428760528564, "lr": 4.604132466360955e-08, "epoch": 1.8160018445930368, "percentage": 90.8, "elapsed_time": "5:26:37", "remaining_time": "0:33:05"} +{"current_steps": 7877, "total_steps": 8674, "loss": 0.5048446655273438, "lr": 4.592703923841323e-08, "epoch": 1.8162324187226193, "percentage": 90.81, "elapsed_time": "5:26:39", "remaining_time": "0:33:03"} +{"current_steps": 7878, "total_steps": 8674, "loss": 0.5025140047073364, "lr": 4.5812892497390955e-08, "epoch": 1.816462992852202, "percentage": 90.82, "elapsed_time": "5:26:42", "remaining_time": "0:33:00"} +{"current_steps": 7879, "total_steps": 8674, "loss": 0.4456709623336792, "lr": 4.5698884457135324e-08, "epoch": 1.8166935669817845, "percentage": 90.83, "elapsed_time": "5:26:44", "remaining_time": "0:32:58"} +{"current_steps": 7880, "total_steps": 8674, "loss": 0.38283586502075195, "lr": 4.5585015134218196e-08, "epoch": 1.8169241411113672, "percentage": 90.85, "elapsed_time": "5:26:46", "remaining_time": "0:32:55"} +{"current_steps": 7881, "total_steps": 8674, "loss": 0.3458648920059204, "lr": 4.5471284545192004e-08, "epoch": 1.81715471524095, "percentage": 90.86, "elapsed_time": "5:26:49", "remaining_time": "0:32:53"} +{"current_steps": 7882, "total_steps": 8674, "loss": 0.4609532952308655, "lr": 4.53576927065884e-08, "epoch": 1.8173852893705327, "percentage": 90.87, "elapsed_time": "5:26:51", "remaining_time": "0:32:50"} +{"current_steps": 7883, "total_steps": 8674, "loss": 0.4250793159008026, "lr": 4.524423963491919e-08, "epoch": 1.8176158635001154, "percentage": 90.88, "elapsed_time": "5:26:54", "remaining_time": "0:32:48"} +{"current_steps": 7884, "total_steps": 8674, "loss": 0.41343796253204346, "lr": 4.513092534667584e-08, "epoch": 1.817846437629698, "percentage": 90.89, "elapsed_time": "5:26:56", "remaining_time": "0:32:45"} +{"current_steps": 7885, "total_steps": 8674, "loss": 0.46575528383255005, "lr": 4.5017749858329736e-08, "epoch": 1.8180770117592806, "percentage": 90.9, "elapsed_time": "5:26:59", "remaining_time": "0:32:43"} +{"current_steps": 7886, "total_steps": 8674, "loss": 0.47052180767059326, "lr": 4.4904713186332156e-08, "epoch": 1.8183075858888633, "percentage": 90.92, "elapsed_time": "5:27:01", "remaining_time": "0:32:40"} +{"current_steps": 7887, "total_steps": 8674, "loss": 0.42979568243026733, "lr": 4.479181534711429e-08, "epoch": 1.8185381600184458, "percentage": 90.93, "elapsed_time": "5:27:04", "remaining_time": "0:32:38"} +{"current_steps": 7888, "total_steps": 8674, "loss": 0.4278537929058075, "lr": 4.46790563570868e-08, "epoch": 1.8187687341480285, "percentage": 90.94, "elapsed_time": "5:27:07", "remaining_time": "0:32:35"} +{"current_steps": 7889, "total_steps": 8674, "loss": 0.45380616188049316, "lr": 4.456643623264022e-08, "epoch": 1.8189993082776112, "percentage": 90.95, "elapsed_time": "5:27:10", "remaining_time": "0:32:33"} +{"current_steps": 7890, "total_steps": 8674, "loss": 0.46085125207901, "lr": 4.445395499014526e-08, "epoch": 1.819229882407194, "percentage": 90.96, "elapsed_time": "5:27:14", "remaining_time": "0:32:30"} +{"current_steps": 7891, "total_steps": 8674, "loss": 0.47558531165122986, "lr": 4.434161264595204e-08, "epoch": 1.8194604565367767, "percentage": 90.97, "elapsed_time": "5:27:17", "remaining_time": "0:32:28"} +{"current_steps": 7892, "total_steps": 8674, "loss": 0.42082321643829346, "lr": 4.4229409216390845e-08, "epoch": 1.8196910306663594, "percentage": 90.98, "elapsed_time": "5:27:20", "remaining_time": "0:32:26"} +{"current_steps": 7893, "total_steps": 8674, "loss": 0.40222978591918945, "lr": 4.411734471777129e-08, "epoch": 1.819921604795942, "percentage": 91.0, "elapsed_time": "5:27:23", "remaining_time": "0:32:23"} +{"current_steps": 7894, "total_steps": 8674, "loss": 0.39737701416015625, "lr": 4.400541916638323e-08, "epoch": 1.8201521789255246, "percentage": 91.01, "elapsed_time": "5:27:26", "remaining_time": "0:32:21"} +{"current_steps": 7895, "total_steps": 8674, "loss": 0.46538835763931274, "lr": 4.389363257849632e-08, "epoch": 1.820382753055107, "percentage": 91.02, "elapsed_time": "5:27:29", "remaining_time": "0:32:18"} +{"current_steps": 7896, "total_steps": 8674, "loss": 0.4994567036628723, "lr": 4.378198497035979e-08, "epoch": 1.8206133271846898, "percentage": 91.03, "elapsed_time": "5:27:33", "remaining_time": "0:32:16"} +{"current_steps": 7897, "total_steps": 8674, "loss": 0.4574298858642578, "lr": 4.367047635820264e-08, "epoch": 1.8208439013142725, "percentage": 91.04, "elapsed_time": "5:27:37", "remaining_time": "0:32:14"} +{"current_steps": 7898, "total_steps": 8674, "loss": 0.4716116786003113, "lr": 4.3559106758234044e-08, "epoch": 1.8210744754438553, "percentage": 91.05, "elapsed_time": "5:27:41", "remaining_time": "0:32:11"} +{"current_steps": 7899, "total_steps": 8674, "loss": 0.35549741983413696, "lr": 4.344787618664247e-08, "epoch": 1.821305049573438, "percentage": 91.07, "elapsed_time": "5:27:45", "remaining_time": "0:32:09"} +{"current_steps": 7900, "total_steps": 8674, "loss": 0.44955599308013916, "lr": 4.3336784659596226e-08, "epoch": 1.8215356237030205, "percentage": 91.08, "elapsed_time": "5:27:49", "remaining_time": "0:32:07"} +{"current_steps": 7901, "total_steps": 8674, "loss": 0.4047467112541199, "lr": 4.322583219324394e-08, "epoch": 1.8217661978326032, "percentage": 91.09, "elapsed_time": "5:27:55", "remaining_time": "0:32:04"} +{"current_steps": 7902, "total_steps": 8674, "loss": 0.40367889404296875, "lr": 4.3115018803713596e-08, "epoch": 1.8219967719621857, "percentage": 91.1, "elapsed_time": "5:27:58", "remaining_time": "0:32:02"} +{"current_steps": 7903, "total_steps": 8674, "loss": 0.32705235481262207, "lr": 4.3004344507113096e-08, "epoch": 1.8222273460917684, "percentage": 91.11, "elapsed_time": "5:28:02", "remaining_time": "0:32:00"} +{"current_steps": 7904, "total_steps": 8674, "loss": 0.3845488727092743, "lr": 4.2893809319529794e-08, "epoch": 1.8224579202213511, "percentage": 91.12, "elapsed_time": "5:28:05", "remaining_time": "0:31:57"} +{"current_steps": 7905, "total_steps": 8674, "loss": 0.49070197343826294, "lr": 4.2783413257031495e-08, "epoch": 1.8226884943509338, "percentage": 91.13, "elapsed_time": "5:28:08", "remaining_time": "0:31:55"} +{"current_steps": 7906, "total_steps": 8674, "loss": 0.550437867641449, "lr": 4.267315633566493e-08, "epoch": 1.8229190684805165, "percentage": 91.15, "elapsed_time": "5:28:12", "remaining_time": "0:31:52"} +{"current_steps": 7907, "total_steps": 8674, "loss": 0.5042926073074341, "lr": 4.25630385714576e-08, "epoch": 1.8231496426100993, "percentage": 91.16, "elapsed_time": "5:28:16", "remaining_time": "0:31:50"} +{"current_steps": 7908, "total_steps": 8674, "loss": 0.48839205503463745, "lr": 4.245305998041571e-08, "epoch": 1.8233802167396818, "percentage": 91.17, "elapsed_time": "5:28:20", "remaining_time": "0:31:48"} +{"current_steps": 7909, "total_steps": 8674, "loss": 0.4754030108451843, "lr": 4.234322057852602e-08, "epoch": 1.8236107908692645, "percentage": 91.18, "elapsed_time": "5:28:23", "remaining_time": "0:31:45"} +{"current_steps": 7910, "total_steps": 8674, "loss": 0.394174188375473, "lr": 4.223352038175487e-08, "epoch": 1.823841364998847, "percentage": 91.19, "elapsed_time": "5:28:27", "remaining_time": "0:31:43"} +{"current_steps": 7911, "total_steps": 8674, "loss": 0.39882469177246094, "lr": 4.2123959406048183e-08, "epoch": 1.8240719391284297, "percentage": 91.2, "elapsed_time": "5:28:31", "remaining_time": "0:31:41"} +{"current_steps": 7912, "total_steps": 8674, "loss": 0.4611927270889282, "lr": 4.201453766733176e-08, "epoch": 1.8243025132580124, "percentage": 91.22, "elapsed_time": "5:28:35", "remaining_time": "0:31:38"} +{"current_steps": 7913, "total_steps": 8674, "loss": 0.4164184331893921, "lr": 4.190525518151122e-08, "epoch": 1.8245330873875951, "percentage": 91.23, "elapsed_time": "5:28:39", "remaining_time": "0:31:36"} +{"current_steps": 7914, "total_steps": 8674, "loss": 0.41586828231811523, "lr": 4.179611196447186e-08, "epoch": 1.8247636615171778, "percentage": 91.24, "elapsed_time": "5:28:43", "remaining_time": "0:31:34"} +{"current_steps": 7915, "total_steps": 8674, "loss": 0.4707748591899872, "lr": 4.168710803207864e-08, "epoch": 1.8249942356467606, "percentage": 91.25, "elapsed_time": "5:28:47", "remaining_time": "0:31:31"} +{"current_steps": 7916, "total_steps": 8674, "loss": 0.4235571622848511, "lr": 4.157824340017657e-08, "epoch": 1.825224809776343, "percentage": 91.26, "elapsed_time": "5:28:51", "remaining_time": "0:31:29"} +{"current_steps": 7917, "total_steps": 8674, "loss": 0.3761681914329529, "lr": 4.146951808458998e-08, "epoch": 1.8254553839059258, "percentage": 91.27, "elapsed_time": "5:28:55", "remaining_time": "0:31:27"} +{"current_steps": 7918, "total_steps": 8674, "loss": 0.45545494556427, "lr": 4.136093210112346e-08, "epoch": 1.8256859580355083, "percentage": 91.28, "elapsed_time": "5:28:59", "remaining_time": "0:31:24"} +{"current_steps": 7919, "total_steps": 8674, "loss": 0.4154251515865326, "lr": 4.1252485465561035e-08, "epoch": 1.825916532165091, "percentage": 91.3, "elapsed_time": "5:29:02", "remaining_time": "0:31:22"} +{"current_steps": 7920, "total_steps": 8674, "loss": 0.3664330244064331, "lr": 4.114417819366633e-08, "epoch": 1.8261471062946737, "percentage": 91.31, "elapsed_time": "5:29:06", "remaining_time": "0:31:19"} +{"current_steps": 7921, "total_steps": 8674, "loss": 0.4527730643749237, "lr": 4.10360103011832e-08, "epoch": 1.8263776804242564, "percentage": 91.32, "elapsed_time": "5:29:10", "remaining_time": "0:31:17"} +{"current_steps": 7922, "total_steps": 8674, "loss": 0.5245767831802368, "lr": 4.092798180383461e-08, "epoch": 1.8266082545538391, "percentage": 91.33, "elapsed_time": "5:29:14", "remaining_time": "0:31:15"} +{"current_steps": 7923, "total_steps": 8674, "loss": 0.39781343936920166, "lr": 4.0820092717323894e-08, "epoch": 1.8268388286834218, "percentage": 91.34, "elapsed_time": "5:29:18", "remaining_time": "0:31:12"} +{"current_steps": 7924, "total_steps": 8674, "loss": 0.4173957109451294, "lr": 4.071234305733362e-08, "epoch": 1.8270694028130043, "percentage": 91.35, "elapsed_time": "5:29:22", "remaining_time": "0:31:10"} +{"current_steps": 7925, "total_steps": 8674, "loss": 0.38840869069099426, "lr": 4.0604732839526256e-08, "epoch": 1.827299976942587, "percentage": 91.36, "elapsed_time": "5:29:26", "remaining_time": "0:31:08"} +{"current_steps": 7926, "total_steps": 8674, "loss": 0.4107547998428345, "lr": 4.0497262079544294e-08, "epoch": 1.8275305510721696, "percentage": 91.38, "elapsed_time": "5:29:30", "remaining_time": "0:31:05"} +{"current_steps": 7927, "total_steps": 8674, "loss": 0.41102874279022217, "lr": 4.038993079300956e-08, "epoch": 1.8277611252017523, "percentage": 91.39, "elapsed_time": "5:29:34", "remaining_time": "0:31:03"} +{"current_steps": 7928, "total_steps": 8674, "loss": 0.3393939733505249, "lr": 4.028273899552381e-08, "epoch": 1.827991699331335, "percentage": 91.4, "elapsed_time": "5:29:38", "remaining_time": "0:31:01"} +{"current_steps": 7929, "total_steps": 8674, "loss": 0.42469024658203125, "lr": 4.017568670266835e-08, "epoch": 1.8282222734609177, "percentage": 91.41, "elapsed_time": "5:29:42", "remaining_time": "0:30:58"} +{"current_steps": 7930, "total_steps": 8674, "loss": 0.4869099259376526, "lr": 4.006877393000441e-08, "epoch": 1.8284528475905004, "percentage": 91.42, "elapsed_time": "5:29:46", "remaining_time": "0:30:56"} +{"current_steps": 7931, "total_steps": 8674, "loss": 0.4463779926300049, "lr": 3.996200069307265e-08, "epoch": 1.8286834217200831, "percentage": 91.43, "elapsed_time": "5:29:50", "remaining_time": "0:30:53"} +{"current_steps": 7932, "total_steps": 8674, "loss": 0.429579496383667, "lr": 3.985536700739378e-08, "epoch": 1.8289139958496656, "percentage": 91.45, "elapsed_time": "5:29:54", "remaining_time": "0:30:51"} +{"current_steps": 7933, "total_steps": 8674, "loss": 0.38837558031082153, "lr": 3.9748872888468065e-08, "epoch": 1.8291445699792483, "percentage": 91.46, "elapsed_time": "5:29:58", "remaining_time": "0:30:49"} +{"current_steps": 7934, "total_steps": 8674, "loss": 0.4444499909877777, "lr": 3.964251835177568e-08, "epoch": 1.8293751441088308, "percentage": 91.47, "elapsed_time": "5:30:02", "remaining_time": "0:30:46"} +{"current_steps": 7935, "total_steps": 8674, "loss": 0.5216259360313416, "lr": 3.953630341277603e-08, "epoch": 1.8296057182384136, "percentage": 91.48, "elapsed_time": "5:30:06", "remaining_time": "0:30:44"} +{"current_steps": 7936, "total_steps": 8674, "loss": 0.46454817056655884, "lr": 3.943022808690888e-08, "epoch": 1.8298362923679963, "percentage": 91.49, "elapsed_time": "5:30:10", "remaining_time": "0:30:42"} +{"current_steps": 7937, "total_steps": 8674, "loss": 0.38960570096969604, "lr": 3.9324292389593005e-08, "epoch": 1.830066866497579, "percentage": 91.5, "elapsed_time": "5:30:13", "remaining_time": "0:30:39"} +{"current_steps": 7938, "total_steps": 8674, "loss": 0.3318006992340088, "lr": 3.9218496336227426e-08, "epoch": 1.8302974406271617, "percentage": 91.51, "elapsed_time": "5:30:17", "remaining_time": "0:30:37"} +{"current_steps": 7939, "total_steps": 8674, "loss": 0.41555076837539673, "lr": 3.9112839942190725e-08, "epoch": 1.8305280147567444, "percentage": 91.53, "elapsed_time": "5:30:21", "remaining_time": "0:30:35"} +{"current_steps": 7940, "total_steps": 8674, "loss": 0.4296320080757141, "lr": 3.900732322284095e-08, "epoch": 1.830758588886327, "percentage": 91.54, "elapsed_time": "5:30:25", "remaining_time": "0:30:32"} +{"current_steps": 7941, "total_steps": 8674, "loss": 0.4416658282279968, "lr": 3.8901946193516055e-08, "epoch": 1.8309891630159096, "percentage": 91.55, "elapsed_time": "5:30:29", "remaining_time": "0:30:30"} +{"current_steps": 7942, "total_steps": 8674, "loss": 0.4539029598236084, "lr": 3.8796708869533676e-08, "epoch": 1.8312197371454921, "percentage": 91.56, "elapsed_time": "5:30:33", "remaining_time": "0:30:28"} +{"current_steps": 7943, "total_steps": 8674, "loss": 0.4526992440223694, "lr": 3.869161126619136e-08, "epoch": 1.8314503112750748, "percentage": 91.57, "elapsed_time": "5:30:37", "remaining_time": "0:30:25"} +{"current_steps": 7944, "total_steps": 8674, "loss": 0.3991963863372803, "lr": 3.8586653398765766e-08, "epoch": 1.8316808854046576, "percentage": 91.58, "elapsed_time": "5:30:41", "remaining_time": "0:30:23"} +{"current_steps": 7945, "total_steps": 8674, "loss": 0.44474589824676514, "lr": 3.848183528251381e-08, "epoch": 1.8319114595342403, "percentage": 91.6, "elapsed_time": "5:30:45", "remaining_time": "0:30:20"} +{"current_steps": 7946, "total_steps": 8674, "loss": 0.5022028684616089, "lr": 3.837715693267174e-08, "epoch": 1.832142033663823, "percentage": 91.61, "elapsed_time": "5:30:49", "remaining_time": "0:30:18"} +{"current_steps": 7947, "total_steps": 8674, "loss": 0.4839058518409729, "lr": 3.8272618364455836e-08, "epoch": 1.8323726077934057, "percentage": 91.62, "elapsed_time": "5:30:53", "remaining_time": "0:30:16"} +{"current_steps": 7948, "total_steps": 8674, "loss": 0.3580874800682068, "lr": 3.8168219593061376e-08, "epoch": 1.8326031819229882, "percentage": 91.63, "elapsed_time": "5:30:56", "remaining_time": "0:30:13"} +{"current_steps": 7949, "total_steps": 8674, "loss": 0.4350799024105072, "lr": 3.806396063366424e-08, "epoch": 1.832833756052571, "percentage": 91.64, "elapsed_time": "5:31:01", "remaining_time": "0:30:11"} +{"current_steps": 7950, "total_steps": 8674, "loss": 0.4386145770549774, "lr": 3.79598415014194e-08, "epoch": 1.8330643301821534, "percentage": 91.65, "elapsed_time": "5:31:04", "remaining_time": "0:30:09"} +{"current_steps": 7951, "total_steps": 8674, "loss": 0.5122627019882202, "lr": 3.785586221146142e-08, "epoch": 1.8332949043117361, "percentage": 91.66, "elapsed_time": "5:31:08", "remaining_time": "0:30:06"} +{"current_steps": 7952, "total_steps": 8674, "loss": 0.41197121143341064, "lr": 3.77520227789051e-08, "epoch": 1.8335254784413189, "percentage": 91.68, "elapsed_time": "5:31:12", "remaining_time": "0:30:04"} +{"current_steps": 7953, "total_steps": 8674, "loss": 0.5508084297180176, "lr": 3.764832321884426e-08, "epoch": 1.8337560525709016, "percentage": 91.69, "elapsed_time": "5:31:16", "remaining_time": "0:30:01"} +{"current_steps": 7954, "total_steps": 8674, "loss": 0.40791934728622437, "lr": 3.754476354635283e-08, "epoch": 1.8339866267004843, "percentage": 91.7, "elapsed_time": "5:31:20", "remaining_time": "0:29:59"} +{"current_steps": 7955, "total_steps": 8674, "loss": 0.3880457878112793, "lr": 3.7441343776484113e-08, "epoch": 1.834217200830067, "percentage": 91.71, "elapsed_time": "5:31:24", "remaining_time": "0:29:57"} +{"current_steps": 7956, "total_steps": 8674, "loss": 0.40519118309020996, "lr": 3.7338063924271304e-08, "epoch": 1.8344477749596495, "percentage": 91.72, "elapsed_time": "5:31:28", "remaining_time": "0:29:54"} +{"current_steps": 7957, "total_steps": 8674, "loss": 0.46081095933914185, "lr": 3.723492400472716e-08, "epoch": 1.8346783490892322, "percentage": 91.73, "elapsed_time": "5:31:32", "remaining_time": "0:29:52"} +{"current_steps": 7958, "total_steps": 8674, "loss": 0.3946321904659271, "lr": 3.713192403284438e-08, "epoch": 1.8349089232188147, "percentage": 91.75, "elapsed_time": "5:31:36", "remaining_time": "0:29:50"} +{"current_steps": 7959, "total_steps": 8674, "loss": 0.4699859023094177, "lr": 3.702906402359474e-08, "epoch": 1.8351394973483974, "percentage": 91.76, "elapsed_time": "5:31:40", "remaining_time": "0:29:47"} +{"current_steps": 7960, "total_steps": 8674, "loss": 0.43031781911849976, "lr": 3.692634399192995e-08, "epoch": 1.8353700714779801, "percentage": 91.77, "elapsed_time": "5:31:44", "remaining_time": "0:29:45"} +{"current_steps": 7961, "total_steps": 8674, "loss": 0.4072418212890625, "lr": 3.6823763952781636e-08, "epoch": 1.8356006456075629, "percentage": 91.78, "elapsed_time": "5:31:47", "remaining_time": "0:29:42"} +{"current_steps": 7962, "total_steps": 8674, "loss": 0.40659528970718384, "lr": 3.672132392106053e-08, "epoch": 1.8358312197371456, "percentage": 91.79, "elapsed_time": "5:31:51", "remaining_time": "0:29:40"} +{"current_steps": 7963, "total_steps": 8674, "loss": 0.41279205679893494, "lr": 3.661902391165772e-08, "epoch": 1.8360617938667283, "percentage": 91.8, "elapsed_time": "5:31:55", "remaining_time": "0:29:38"} +{"current_steps": 7964, "total_steps": 8674, "loss": 0.43887826800346375, "lr": 3.65168639394432e-08, "epoch": 1.8362923679963108, "percentage": 91.81, "elapsed_time": "5:31:59", "remaining_time": "0:29:35"} +{"current_steps": 7965, "total_steps": 8674, "loss": 0.46111762523651123, "lr": 3.6414844019267196e-08, "epoch": 1.8365229421258935, "percentage": 91.83, "elapsed_time": "5:32:03", "remaining_time": "0:29:33"} +{"current_steps": 7966, "total_steps": 8674, "loss": 0.42694801092147827, "lr": 3.63129641659593e-08, "epoch": 1.836753516255476, "percentage": 91.84, "elapsed_time": "5:32:07", "remaining_time": "0:29:31"} +{"current_steps": 7967, "total_steps": 8674, "loss": 0.4674855172634125, "lr": 3.6211224394328775e-08, "epoch": 1.8369840903850587, "percentage": 91.85, "elapsed_time": "5:32:11", "remaining_time": "0:29:28"} +{"current_steps": 7968, "total_steps": 8674, "loss": 0.48998844623565674, "lr": 3.610962471916435e-08, "epoch": 1.8372146645146414, "percentage": 91.86, "elapsed_time": "5:32:15", "remaining_time": "0:29:26"} +{"current_steps": 7969, "total_steps": 8674, "loss": 0.4162273406982422, "lr": 3.600816515523486e-08, "epoch": 1.8374452386442242, "percentage": 91.87, "elapsed_time": "5:32:19", "remaining_time": "0:29:24"} +{"current_steps": 7970, "total_steps": 8674, "loss": 0.4446166753768921, "lr": 3.5906845717288304e-08, "epoch": 1.8376758127738069, "percentage": 91.88, "elapsed_time": "5:32:23", "remaining_time": "0:29:21"} +{"current_steps": 7971, "total_steps": 8674, "loss": 0.4782527983188629, "lr": 3.580566642005245e-08, "epoch": 1.8379063869033896, "percentage": 91.9, "elapsed_time": "5:32:27", "remaining_time": "0:29:19"} +{"current_steps": 7972, "total_steps": 8674, "loss": 0.43014609813690186, "lr": 3.570462727823476e-08, "epoch": 1.838136961032972, "percentage": 91.91, "elapsed_time": "5:32:31", "remaining_time": "0:29:16"} +{"current_steps": 7973, "total_steps": 8674, "loss": 0.5155357122421265, "lr": 3.560372830652225e-08, "epoch": 1.8383675351625548, "percentage": 91.92, "elapsed_time": "5:32:35", "remaining_time": "0:29:14"} +{"current_steps": 7974, "total_steps": 8674, "loss": 0.4231104254722595, "lr": 3.5502969519581984e-08, "epoch": 1.8385981092921373, "percentage": 91.93, "elapsed_time": "5:32:39", "remaining_time": "0:29:12"} +{"current_steps": 7975, "total_steps": 8674, "loss": 0.529877245426178, "lr": 3.540235093205979e-08, "epoch": 1.83882868342172, "percentage": 91.94, "elapsed_time": "5:32:42", "remaining_time": "0:29:09"} +{"current_steps": 7976, "total_steps": 8674, "loss": 0.4841991662979126, "lr": 3.530187255858186e-08, "epoch": 1.8390592575513027, "percentage": 91.95, "elapsed_time": "5:32:45", "remaining_time": "0:29:07"} +{"current_steps": 7977, "total_steps": 8674, "loss": 0.40202534198760986, "lr": 3.520153441375362e-08, "epoch": 1.8392898316808854, "percentage": 91.96, "elapsed_time": "5:32:49", "remaining_time": "0:29:04"} +{"current_steps": 7978, "total_steps": 8674, "loss": 0.398551344871521, "lr": 3.51013365121603e-08, "epoch": 1.8395204058104682, "percentage": 91.98, "elapsed_time": "5:32:53", "remaining_time": "0:29:02"} +{"current_steps": 7979, "total_steps": 8674, "loss": 0.49139225482940674, "lr": 3.500127886836668e-08, "epoch": 1.8397509799400509, "percentage": 91.99, "elapsed_time": "5:32:56", "remaining_time": "0:29:00"} +{"current_steps": 7980, "total_steps": 8674, "loss": 0.4708287715911865, "lr": 3.4901361496917135e-08, "epoch": 1.8399815540696334, "percentage": 92.0, "elapsed_time": "5:32:59", "remaining_time": "0:28:57"} +{"current_steps": 7981, "total_steps": 8674, "loss": 0.4174381494522095, "lr": 3.4801584412335714e-08, "epoch": 1.840212128199216, "percentage": 92.01, "elapsed_time": "5:33:02", "remaining_time": "0:28:55"} +{"current_steps": 7982, "total_steps": 8674, "loss": 0.535778284072876, "lr": 3.470194762912593e-08, "epoch": 1.8404427023287986, "percentage": 92.02, "elapsed_time": "5:33:04", "remaining_time": "0:28:52"} +{"current_steps": 7983, "total_steps": 8674, "loss": 0.540034294128418, "lr": 3.4602451161771186e-08, "epoch": 1.8406732764583813, "percentage": 92.03, "elapsed_time": "5:33:07", "remaining_time": "0:28:50"} +{"current_steps": 7984, "total_steps": 8674, "loss": 0.4399121403694153, "lr": 3.450309502473403e-08, "epoch": 1.840903850587964, "percentage": 92.05, "elapsed_time": "5:33:09", "remaining_time": "0:28:47"} +{"current_steps": 7985, "total_steps": 8674, "loss": 0.5011022686958313, "lr": 3.4403879232457134e-08, "epoch": 1.8411344247175467, "percentage": 92.06, "elapsed_time": "5:33:11", "remaining_time": "0:28:45"} +{"current_steps": 7986, "total_steps": 8674, "loss": 0.392477810382843, "lr": 3.4304803799362405e-08, "epoch": 1.8413649988471295, "percentage": 92.07, "elapsed_time": "5:33:14", "remaining_time": "0:28:42"} +{"current_steps": 7987, "total_steps": 8674, "loss": 0.4734686315059662, "lr": 3.420586873985132e-08, "epoch": 1.8415955729767122, "percentage": 92.08, "elapsed_time": "5:33:16", "remaining_time": "0:28:40"} +{"current_steps": 7988, "total_steps": 8674, "loss": 0.37347573041915894, "lr": 3.410707406830537e-08, "epoch": 1.8418261471062947, "percentage": 92.09, "elapsed_time": "5:33:19", "remaining_time": "0:28:37"} +{"current_steps": 7989, "total_steps": 8674, "loss": 0.38837599754333496, "lr": 3.400841979908531e-08, "epoch": 1.8420567212358774, "percentage": 92.1, "elapsed_time": "5:33:21", "remaining_time": "0:28:35"} +{"current_steps": 7990, "total_steps": 8674, "loss": 0.38598424196243286, "lr": 3.390990594653142e-08, "epoch": 1.8422872953654599, "percentage": 92.11, "elapsed_time": "5:33:24", "remaining_time": "0:28:32"} +{"current_steps": 7991, "total_steps": 8674, "loss": 0.48508739471435547, "lr": 3.381153252496371e-08, "epoch": 1.8425178694950426, "percentage": 92.13, "elapsed_time": "5:33:26", "remaining_time": "0:28:30"} +{"current_steps": 7992, "total_steps": 8674, "loss": 0.41946491599082947, "lr": 3.3713299548681736e-08, "epoch": 1.8427484436246253, "percentage": 92.14, "elapsed_time": "5:33:29", "remaining_time": "0:28:27"} +{"current_steps": 7993, "total_steps": 8674, "loss": 0.4803915023803711, "lr": 3.3615207031964744e-08, "epoch": 1.842979017754208, "percentage": 92.15, "elapsed_time": "5:33:31", "remaining_time": "0:28:24"} +{"current_steps": 7994, "total_steps": 8674, "loss": 0.39463797211647034, "lr": 3.351725498907143e-08, "epoch": 1.8432095918837907, "percentage": 92.16, "elapsed_time": "5:33:33", "remaining_time": "0:28:22"} +{"current_steps": 7995, "total_steps": 8674, "loss": 0.43345123529434204, "lr": 3.341944343424008e-08, "epoch": 1.8434401660133735, "percentage": 92.17, "elapsed_time": "5:33:36", "remaining_time": "0:28:19"} +{"current_steps": 7996, "total_steps": 8674, "loss": 0.5164570212364197, "lr": 3.332177238168854e-08, "epoch": 1.843670740142956, "percentage": 92.18, "elapsed_time": "5:33:38", "remaining_time": "0:28:17"} +{"current_steps": 7997, "total_steps": 8674, "loss": 0.5313355922698975, "lr": 3.322424184561445e-08, "epoch": 1.8439013142725387, "percentage": 92.2, "elapsed_time": "5:33:41", "remaining_time": "0:28:14"} +{"current_steps": 7998, "total_steps": 8674, "loss": 0.4488258361816406, "lr": 3.3126851840194815e-08, "epoch": 1.8441318884021212, "percentage": 92.21, "elapsed_time": "5:33:43", "remaining_time": "0:28:12"} +{"current_steps": 7999, "total_steps": 8674, "loss": 0.5122581720352173, "lr": 3.30296023795863e-08, "epoch": 1.8443624625317039, "percentage": 92.22, "elapsed_time": "5:33:46", "remaining_time": "0:28:09"} +{"current_steps": 8000, "total_steps": 8674, "loss": 0.4619610905647278, "lr": 3.293249347792493e-08, "epoch": 1.8445930366612866, "percentage": 92.23, "elapsed_time": "5:33:48", "remaining_time": "0:28:07"} +{"current_steps": 8001, "total_steps": 8674, "loss": 0.4214603006839752, "lr": 3.2835525149326636e-08, "epoch": 1.8448236107908693, "percentage": 92.24, "elapsed_time": "5:33:52", "remaining_time": "0:28:05"} +{"current_steps": 8002, "total_steps": 8674, "loss": 0.40279510617256165, "lr": 3.2738697407886485e-08, "epoch": 1.845054184920452, "percentage": 92.25, "elapsed_time": "5:33:54", "remaining_time": "0:28:02"} +{"current_steps": 8003, "total_steps": 8674, "loss": 0.4797242283821106, "lr": 3.264201026767977e-08, "epoch": 1.8452847590500348, "percentage": 92.26, "elapsed_time": "5:33:57", "remaining_time": "0:28:00"} +{"current_steps": 8004, "total_steps": 8674, "loss": 0.3833237588405609, "lr": 3.254546374276057e-08, "epoch": 1.8455153331796172, "percentage": 92.28, "elapsed_time": "5:33:59", "remaining_time": "0:27:57"} +{"current_steps": 8005, "total_steps": 8674, "loss": 0.41461342573165894, "lr": 3.244905784716323e-08, "epoch": 1.8457459073092, "percentage": 92.29, "elapsed_time": "5:34:02", "remaining_time": "0:27:54"} +{"current_steps": 8006, "total_steps": 8674, "loss": 0.592107892036438, "lr": 3.235279259490109e-08, "epoch": 1.8459764814387825, "percentage": 92.3, "elapsed_time": "5:34:04", "remaining_time": "0:27:52"} +{"current_steps": 8007, "total_steps": 8674, "loss": 0.39025670289993286, "lr": 3.2256667999967405e-08, "epoch": 1.8462070555683652, "percentage": 92.31, "elapsed_time": "5:34:07", "remaining_time": "0:27:49"} +{"current_steps": 8008, "total_steps": 8674, "loss": 0.40197378396987915, "lr": 3.2160684076334766e-08, "epoch": 1.846437629697948, "percentage": 92.32, "elapsed_time": "5:34:09", "remaining_time": "0:27:47"} +{"current_steps": 8009, "total_steps": 8674, "loss": 0.4013815224170685, "lr": 3.206484083795558e-08, "epoch": 1.8466682038275306, "percentage": 92.33, "elapsed_time": "5:34:12", "remaining_time": "0:27:44"} +{"current_steps": 8010, "total_steps": 8674, "loss": 0.45386412739753723, "lr": 3.1969138298761356e-08, "epoch": 1.8468987779571133, "percentage": 92.34, "elapsed_time": "5:34:14", "remaining_time": "0:27:42"} +{"current_steps": 8011, "total_steps": 8674, "loss": 0.43034985661506653, "lr": 3.187357647266353e-08, "epoch": 1.8471293520866958, "percentage": 92.36, "elapsed_time": "5:34:16", "remaining_time": "0:27:39"} +{"current_steps": 8012, "total_steps": 8674, "loss": 0.4346637725830078, "lr": 3.177815537355322e-08, "epoch": 1.8473599262162785, "percentage": 92.37, "elapsed_time": "5:34:19", "remaining_time": "0:27:37"} +{"current_steps": 8013, "total_steps": 8674, "loss": 0.5203511118888855, "lr": 3.1682875015300535e-08, "epoch": 1.847590500345861, "percentage": 92.38, "elapsed_time": "5:34:21", "remaining_time": "0:27:34"} +{"current_steps": 8014, "total_steps": 8674, "loss": 0.37658393383026123, "lr": 3.1587735411755636e-08, "epoch": 1.8478210744754437, "percentage": 92.39, "elapsed_time": "5:34:24", "remaining_time": "0:27:32"} +{"current_steps": 8015, "total_steps": 8674, "loss": 0.5473518371582031, "lr": 3.149273657674789e-08, "epoch": 1.8480516486050265, "percentage": 92.4, "elapsed_time": "5:34:26", "remaining_time": "0:27:29"} +{"current_steps": 8016, "total_steps": 8674, "loss": 0.5171597599983215, "lr": 3.1397878524086484e-08, "epoch": 1.8482822227346092, "percentage": 92.41, "elapsed_time": "5:34:29", "remaining_time": "0:27:27"} +{"current_steps": 8017, "total_steps": 8674, "loss": 0.46588706970214844, "lr": 3.130316126755983e-08, "epoch": 1.848512796864192, "percentage": 92.43, "elapsed_time": "5:34:31", "remaining_time": "0:27:24"} +{"current_steps": 8018, "total_steps": 8674, "loss": 0.5571366548538208, "lr": 3.1208584820936244e-08, "epoch": 1.8487433709937746, "percentage": 92.44, "elapsed_time": "5:34:33", "remaining_time": "0:27:22"} +{"current_steps": 8019, "total_steps": 8674, "loss": 0.45803195238113403, "lr": 3.111414919796318e-08, "epoch": 1.848973945123357, "percentage": 92.45, "elapsed_time": "5:34:36", "remaining_time": "0:27:19"} +{"current_steps": 8020, "total_steps": 8674, "loss": 0.4732629060745239, "lr": 3.1019854412367875e-08, "epoch": 1.8492045192529398, "percentage": 92.46, "elapsed_time": "5:34:38", "remaining_time": "0:27:17"} +{"current_steps": 8021, "total_steps": 8674, "loss": 0.5268767476081848, "lr": 3.092570047785714e-08, "epoch": 1.8494350933825223, "percentage": 92.47, "elapsed_time": "5:34:41", "remaining_time": "0:27:14"} +{"current_steps": 8022, "total_steps": 8674, "loss": 0.5179537534713745, "lr": 3.0831687408117035e-08, "epoch": 1.849665667512105, "percentage": 92.48, "elapsed_time": "5:34:43", "remaining_time": "0:27:12"} +{"current_steps": 8023, "total_steps": 8674, "loss": 0.5110389590263367, "lr": 3.073781521681351e-08, "epoch": 1.8498962416416878, "percentage": 92.49, "elapsed_time": "5:34:45", "remaining_time": "0:27:09"} +{"current_steps": 8024, "total_steps": 8674, "loss": 0.4078633189201355, "lr": 3.064408391759154e-08, "epoch": 1.8501268157712705, "percentage": 92.51, "elapsed_time": "5:34:48", "remaining_time": "0:27:07"} +{"current_steps": 8025, "total_steps": 8674, "loss": 0.4632648229598999, "lr": 3.055049352407624e-08, "epoch": 1.8503573899008532, "percentage": 92.52, "elapsed_time": "5:34:50", "remaining_time": "0:27:04"} +{"current_steps": 8026, "total_steps": 8674, "loss": 0.41569265723228455, "lr": 3.0457044049871705e-08, "epoch": 1.850587964030436, "percentage": 92.53, "elapsed_time": "5:34:53", "remaining_time": "0:27:02"} +{"current_steps": 8027, "total_steps": 8674, "loss": 0.4105853736400604, "lr": 3.036373550856186e-08, "epoch": 1.8508185381600184, "percentage": 92.54, "elapsed_time": "5:34:55", "remaining_time": "0:26:59"} +{"current_steps": 8028, "total_steps": 8674, "loss": 0.4415978789329529, "lr": 3.027056791370996e-08, "epoch": 1.8510491122896011, "percentage": 92.55, "elapsed_time": "5:34:58", "remaining_time": "0:26:57"} +{"current_steps": 8029, "total_steps": 8674, "loss": 0.3990614414215088, "lr": 3.017754127885908e-08, "epoch": 1.8512796864191836, "percentage": 92.56, "elapsed_time": "5:35:00", "remaining_time": "0:26:54"} +{"current_steps": 8030, "total_steps": 8674, "loss": 0.42349040508270264, "lr": 3.0084655617531376e-08, "epoch": 1.8515102605487663, "percentage": 92.58, "elapsed_time": "5:35:03", "remaining_time": "0:26:52"} +{"current_steps": 8031, "total_steps": 8674, "loss": 0.4687228798866272, "lr": 2.9991910943228725e-08, "epoch": 1.851740834678349, "percentage": 92.59, "elapsed_time": "5:35:05", "remaining_time": "0:26:49"} +{"current_steps": 8032, "total_steps": 8674, "loss": 0.6091229915618896, "lr": 2.989930726943268e-08, "epoch": 1.8519714088079318, "percentage": 92.6, "elapsed_time": "5:35:08", "remaining_time": "0:26:47"} +{"current_steps": 8033, "total_steps": 8674, "loss": 0.43401795625686646, "lr": 2.980684460960381e-08, "epoch": 1.8522019829375145, "percentage": 92.61, "elapsed_time": "5:35:11", "remaining_time": "0:26:44"} +{"current_steps": 8034, "total_steps": 8674, "loss": 0.47280481457710266, "lr": 2.9714522977182688e-08, "epoch": 1.8524325570670972, "percentage": 92.62, "elapsed_time": "5:35:13", "remaining_time": "0:26:42"} +{"current_steps": 8035, "total_steps": 8674, "loss": 0.5078729391098022, "lr": 2.962234238558925e-08, "epoch": 1.8526631311966797, "percentage": 92.63, "elapsed_time": "5:35:15", "remaining_time": "0:26:39"} +{"current_steps": 8036, "total_steps": 8674, "loss": 0.4279085695743561, "lr": 2.9530302848223e-08, "epoch": 1.8528937053262624, "percentage": 92.64, "elapsed_time": "5:35:18", "remaining_time": "0:26:37"} +{"current_steps": 8037, "total_steps": 8674, "loss": 0.3720093369483948, "lr": 2.9438404378462455e-08, "epoch": 1.853124279455845, "percentage": 92.66, "elapsed_time": "5:35:20", "remaining_time": "0:26:34"} +{"current_steps": 8038, "total_steps": 8674, "loss": 0.26778513193130493, "lr": 2.934664698966627e-08, "epoch": 1.8533548535854276, "percentage": 92.67, "elapsed_time": "5:35:23", "remaining_time": "0:26:32"} +{"current_steps": 8039, "total_steps": 8674, "loss": 0.47606828808784485, "lr": 2.9255030695172324e-08, "epoch": 1.8535854277150103, "percentage": 92.68, "elapsed_time": "5:35:25", "remaining_time": "0:26:29"} +{"current_steps": 8040, "total_steps": 8674, "loss": 0.437153160572052, "lr": 2.9163555508297632e-08, "epoch": 1.853816001844593, "percentage": 92.69, "elapsed_time": "5:35:27", "remaining_time": "0:26:27"} +{"current_steps": 8041, "total_steps": 8674, "loss": 0.408009797334671, "lr": 2.907222144233945e-08, "epoch": 1.8540465759741758, "percentage": 92.7, "elapsed_time": "5:35:30", "remaining_time": "0:26:24"} +{"current_steps": 8042, "total_steps": 8674, "loss": 0.3435688018798828, "lr": 2.8981028510573824e-08, "epoch": 1.8542771501037585, "percentage": 92.71, "elapsed_time": "5:35:32", "remaining_time": "0:26:22"} +{"current_steps": 8043, "total_steps": 8674, "loss": 0.4829018712043762, "lr": 2.8889976726256705e-08, "epoch": 1.854507724233341, "percentage": 92.73, "elapsed_time": "5:35:35", "remaining_time": "0:26:19"} +{"current_steps": 8044, "total_steps": 8674, "loss": 0.44579288363456726, "lr": 2.879906610262339e-08, "epoch": 1.8547382983629237, "percentage": 92.74, "elapsed_time": "5:35:37", "remaining_time": "0:26:17"} +{"current_steps": 8045, "total_steps": 8674, "loss": 0.4952869415283203, "lr": 2.8708296652888764e-08, "epoch": 1.8549688724925062, "percentage": 92.75, "elapsed_time": "5:35:40", "remaining_time": "0:26:14"} +{"current_steps": 8046, "total_steps": 8674, "loss": 0.4870997965335846, "lr": 2.8617668390246818e-08, "epoch": 1.855199446622089, "percentage": 92.76, "elapsed_time": "5:35:42", "remaining_time": "0:26:12"} +{"current_steps": 8047, "total_steps": 8674, "loss": 0.5009135603904724, "lr": 2.8527181327871465e-08, "epoch": 1.8554300207516716, "percentage": 92.77, "elapsed_time": "5:35:44", "remaining_time": "0:26:09"} +{"current_steps": 8048, "total_steps": 8674, "loss": 0.4837114214897156, "lr": 2.8436835478915954e-08, "epoch": 1.8556605948812543, "percentage": 92.78, "elapsed_time": "5:35:47", "remaining_time": "0:26:07"} +{"current_steps": 8049, "total_steps": 8674, "loss": 0.47955578565597534, "lr": 2.8346630856512897e-08, "epoch": 1.855891169010837, "percentage": 92.79, "elapsed_time": "5:35:49", "remaining_time": "0:26:04"} +{"current_steps": 8050, "total_steps": 8674, "loss": 0.4882965385913849, "lr": 2.8256567473774363e-08, "epoch": 1.8561217431404198, "percentage": 92.81, "elapsed_time": "5:35:52", "remaining_time": "0:26:02"} +{"current_steps": 8051, "total_steps": 8674, "loss": 0.4542367458343506, "lr": 2.8166645343792094e-08, "epoch": 1.8563523172700023, "percentage": 92.82, "elapsed_time": "5:35:54", "remaining_time": "0:25:59"} +{"current_steps": 8052, "total_steps": 8674, "loss": 0.4506416916847229, "lr": 2.8076864479637198e-08, "epoch": 1.856582891399585, "percentage": 92.83, "elapsed_time": "5:35:57", "remaining_time": "0:25:57"} +{"current_steps": 8053, "total_steps": 8674, "loss": 0.5043084025382996, "lr": 2.798722489436012e-08, "epoch": 1.8568134655291675, "percentage": 92.84, "elapsed_time": "5:35:59", "remaining_time": "0:25:54"} +{"current_steps": 8054, "total_steps": 8674, "loss": 0.3711032271385193, "lr": 2.78977266009911e-08, "epoch": 1.8570440396587502, "percentage": 92.85, "elapsed_time": "5:36:01", "remaining_time": "0:25:52"} +{"current_steps": 8055, "total_steps": 8674, "loss": 0.33371198177337646, "lr": 2.7808369612539405e-08, "epoch": 1.857274613788333, "percentage": 92.86, "elapsed_time": "5:36:04", "remaining_time": "0:25:49"} +{"current_steps": 8056, "total_steps": 8674, "loss": 0.5328178405761719, "lr": 2.771915394199409e-08, "epoch": 1.8575051879179156, "percentage": 92.88, "elapsed_time": "5:36:06", "remaining_time": "0:25:47"} +{"current_steps": 8057, "total_steps": 8674, "loss": 0.4615975618362427, "lr": 2.7630079602323443e-08, "epoch": 1.8577357620474984, "percentage": 92.89, "elapsed_time": "5:36:09", "remaining_time": "0:25:44"} +{"current_steps": 8058, "total_steps": 8674, "loss": 0.4667460024356842, "lr": 2.754114660647533e-08, "epoch": 1.857966336177081, "percentage": 92.9, "elapsed_time": "5:36:11", "remaining_time": "0:25:42"} +{"current_steps": 8059, "total_steps": 8674, "loss": 0.483825147151947, "lr": 2.745235496737719e-08, "epoch": 1.8581969103066636, "percentage": 92.91, "elapsed_time": "5:36:14", "remaining_time": "0:25:39"} +{"current_steps": 8060, "total_steps": 8674, "loss": 0.4376814365386963, "lr": 2.736370469793592e-08, "epoch": 1.8584274844362463, "percentage": 92.92, "elapsed_time": "5:36:16", "remaining_time": "0:25:37"} +{"current_steps": 8061, "total_steps": 8674, "loss": 0.4862465262413025, "lr": 2.7275195811037432e-08, "epoch": 1.8586580585658288, "percentage": 92.93, "elapsed_time": "5:36:19", "remaining_time": "0:25:34"} +{"current_steps": 8062, "total_steps": 8674, "loss": 0.48104172945022583, "lr": 2.718682831954744e-08, "epoch": 1.8588886326954115, "percentage": 92.94, "elapsed_time": "5:36:21", "remaining_time": "0:25:32"} +{"current_steps": 8063, "total_steps": 8674, "loss": 0.43358030915260315, "lr": 2.709860223631122e-08, "epoch": 1.8591192068249942, "percentage": 92.96, "elapsed_time": "5:36:23", "remaining_time": "0:25:29"} +{"current_steps": 8064, "total_steps": 8674, "loss": 0.44614607095718384, "lr": 2.701051757415307e-08, "epoch": 1.859349780954577, "percentage": 92.97, "elapsed_time": "5:36:26", "remaining_time": "0:25:26"} +{"current_steps": 8065, "total_steps": 8674, "loss": 0.49824249744415283, "lr": 2.6922574345877303e-08, "epoch": 1.8595803550841596, "percentage": 92.98, "elapsed_time": "5:36:28", "remaining_time": "0:25:24"} +{"current_steps": 8066, "total_steps": 8674, "loss": 0.39621901512145996, "lr": 2.683477256426714e-08, "epoch": 1.8598109292137424, "percentage": 92.99, "elapsed_time": "5:36:31", "remaining_time": "0:25:21"} +{"current_steps": 8067, "total_steps": 8674, "loss": 0.40166205167770386, "lr": 2.6747112242085478e-08, "epoch": 1.8600415033433249, "percentage": 93.0, "elapsed_time": "5:36:33", "remaining_time": "0:25:19"} +{"current_steps": 8068, "total_steps": 8674, "loss": 0.4249534606933594, "lr": 2.6659593392074575e-08, "epoch": 1.8602720774729076, "percentage": 93.01, "elapsed_time": "5:36:36", "remaining_time": "0:25:16"} +{"current_steps": 8069, "total_steps": 8674, "loss": 0.4015510678291321, "lr": 2.6572216026956473e-08, "epoch": 1.86050265160249, "percentage": 93.03, "elapsed_time": "5:36:38", "remaining_time": "0:25:14"} +{"current_steps": 8070, "total_steps": 8674, "loss": 0.4691264033317566, "lr": 2.6484980159432236e-08, "epoch": 1.8607332257320728, "percentage": 93.04, "elapsed_time": "5:36:40", "remaining_time": "0:25:11"} +{"current_steps": 8071, "total_steps": 8674, "loss": 0.5095053315162659, "lr": 2.639788580218216e-08, "epoch": 1.8609637998616555, "percentage": 93.05, "elapsed_time": "5:36:43", "remaining_time": "0:25:09"} +{"current_steps": 8072, "total_steps": 8674, "loss": 0.4658794403076172, "lr": 2.6310932967866794e-08, "epoch": 1.8611943739912382, "percentage": 93.06, "elapsed_time": "5:36:45", "remaining_time": "0:25:06"} +{"current_steps": 8073, "total_steps": 8674, "loss": 0.495827853679657, "lr": 2.622412166912513e-08, "epoch": 1.861424948120821, "percentage": 93.07, "elapsed_time": "5:36:48", "remaining_time": "0:25:04"} +{"current_steps": 8074, "total_steps": 8674, "loss": 0.43652772903442383, "lr": 2.6137451918576413e-08, "epoch": 1.8616555222504036, "percentage": 93.08, "elapsed_time": "5:36:50", "remaining_time": "0:25:01"} +{"current_steps": 8075, "total_steps": 8674, "loss": 0.4636423587799072, "lr": 2.6050923728818784e-08, "epoch": 1.8618860963799861, "percentage": 93.09, "elapsed_time": "5:36:53", "remaining_time": "0:24:59"} +{"current_steps": 8076, "total_steps": 8674, "loss": 0.4572441577911377, "lr": 2.5964537112430186e-08, "epoch": 1.8621166705095689, "percentage": 93.11, "elapsed_time": "5:36:55", "remaining_time": "0:24:56"} +{"current_steps": 8077, "total_steps": 8674, "loss": 0.4549320340156555, "lr": 2.587829208196757e-08, "epoch": 1.8623472446391514, "percentage": 93.12, "elapsed_time": "5:36:58", "remaining_time": "0:24:54"} +{"current_steps": 8078, "total_steps": 8674, "loss": 0.46412795782089233, "lr": 2.5792188649967795e-08, "epoch": 1.862577818768734, "percentage": 93.13, "elapsed_time": "5:37:00", "remaining_time": "0:24:51"} +{"current_steps": 8079, "total_steps": 8674, "loss": 0.40059781074523926, "lr": 2.570622682894652e-08, "epoch": 1.8628083928983168, "percentage": 93.14, "elapsed_time": "5:37:02", "remaining_time": "0:24:49"} +{"current_steps": 8080, "total_steps": 8674, "loss": 0.5396246910095215, "lr": 2.5620406631399416e-08, "epoch": 1.8630389670278995, "percentage": 93.15, "elapsed_time": "5:37:05", "remaining_time": "0:24:46"} +{"current_steps": 8081, "total_steps": 8674, "loss": 0.4793856143951416, "lr": 2.553472806980128e-08, "epoch": 1.8632695411574822, "percentage": 93.16, "elapsed_time": "5:37:07", "remaining_time": "0:24:44"} +{"current_steps": 8082, "total_steps": 8674, "loss": 0.4428815543651581, "lr": 2.5449191156606264e-08, "epoch": 1.863500115287065, "percentage": 93.18, "elapsed_time": "5:37:10", "remaining_time": "0:24:41"} +{"current_steps": 8083, "total_steps": 8674, "loss": 0.4024256467819214, "lr": 2.5363795904248086e-08, "epoch": 1.8637306894166474, "percentage": 93.19, "elapsed_time": "5:37:12", "remaining_time": "0:24:39"} +{"current_steps": 8084, "total_steps": 8674, "loss": 0.4868123531341553, "lr": 2.5278542325139818e-08, "epoch": 1.8639612635462302, "percentage": 93.2, "elapsed_time": "5:37:15", "remaining_time": "0:24:36"} +{"current_steps": 8085, "total_steps": 8674, "loss": 0.602108359336853, "lr": 2.519343043167399e-08, "epoch": 1.8641918376758126, "percentage": 93.21, "elapsed_time": "5:37:17", "remaining_time": "0:24:34"} +{"current_steps": 8086, "total_steps": 8674, "loss": 0.4500008225440979, "lr": 2.510846023622237e-08, "epoch": 1.8644224118053954, "percentage": 93.22, "elapsed_time": "5:37:20", "remaining_time": "0:24:31"} +{"current_steps": 8087, "total_steps": 8674, "loss": 0.3894640803337097, "lr": 2.502363175113642e-08, "epoch": 1.864652985934978, "percentage": 93.23, "elapsed_time": "5:37:22", "remaining_time": "0:24:29"} +{"current_steps": 8088, "total_steps": 8674, "loss": 0.4525550305843353, "lr": 2.493894498874649e-08, "epoch": 1.8648835600645608, "percentage": 93.24, "elapsed_time": "5:37:24", "remaining_time": "0:24:26"} +{"current_steps": 8089, "total_steps": 8674, "loss": 0.3908608555793762, "lr": 2.485439996136296e-08, "epoch": 1.8651141341941435, "percentage": 93.26, "elapsed_time": "5:37:27", "remaining_time": "0:24:24"} +{"current_steps": 8090, "total_steps": 8674, "loss": 0.4551984667778015, "lr": 2.4769996681275106e-08, "epoch": 1.8653447083237262, "percentage": 93.27, "elapsed_time": "5:37:29", "remaining_time": "0:24:21"} +{"current_steps": 8091, "total_steps": 8674, "loss": 0.34474045038223267, "lr": 2.468573516075201e-08, "epoch": 1.8655752824533087, "percentage": 93.28, "elapsed_time": "5:37:32", "remaining_time": "0:24:19"} +{"current_steps": 8092, "total_steps": 8674, "loss": 0.41480594873428345, "lr": 2.4601615412041755e-08, "epoch": 1.8658058565828914, "percentage": 93.29, "elapsed_time": "5:37:34", "remaining_time": "0:24:16"} +{"current_steps": 8093, "total_steps": 8674, "loss": 0.5043104887008667, "lr": 2.4517637447372007e-08, "epoch": 1.866036430712474, "percentage": 93.3, "elapsed_time": "5:37:37", "remaining_time": "0:24:14"} +{"current_steps": 8094, "total_steps": 8674, "loss": 0.4467152953147888, "lr": 2.4433801278950007e-08, "epoch": 1.8662670048420567, "percentage": 93.31, "elapsed_time": "5:37:39", "remaining_time": "0:24:11"} +{"current_steps": 8095, "total_steps": 8674, "loss": 0.454445481300354, "lr": 2.4350106918962e-08, "epoch": 1.8664975789716394, "percentage": 93.32, "elapsed_time": "5:37:41", "remaining_time": "0:24:09"} +{"current_steps": 8096, "total_steps": 8674, "loss": 0.4639291763305664, "lr": 2.426655437957392e-08, "epoch": 1.866728153101222, "percentage": 93.34, "elapsed_time": "5:37:44", "remaining_time": "0:24:06"} +{"current_steps": 8097, "total_steps": 8674, "loss": 0.46178731322288513, "lr": 2.418314367293084e-08, "epoch": 1.8669587272308048, "percentage": 93.35, "elapsed_time": "5:37:46", "remaining_time": "0:24:04"} +{"current_steps": 8098, "total_steps": 8674, "loss": 0.43832290172576904, "lr": 2.4099874811157383e-08, "epoch": 1.8671893013603875, "percentage": 93.36, "elapsed_time": "5:37:49", "remaining_time": "0:24:01"} +{"current_steps": 8099, "total_steps": 8674, "loss": 0.4586114287376404, "lr": 2.4016747806357652e-08, "epoch": 1.86741987548997, "percentage": 93.37, "elapsed_time": "5:37:51", "remaining_time": "0:23:59"} +{"current_steps": 8100, "total_steps": 8674, "loss": 0.37975889444351196, "lr": 2.3933762670614978e-08, "epoch": 1.8676504496195527, "percentage": 93.38, "elapsed_time": "5:37:54", "remaining_time": "0:23:56"} +{"current_steps": 8101, "total_steps": 8674, "loss": 0.4579748511314392, "lr": 2.3850919415992042e-08, "epoch": 1.8678810237491352, "percentage": 93.39, "elapsed_time": "5:37:57", "remaining_time": "0:23:54"} +{"current_steps": 8102, "total_steps": 8674, "loss": 0.5120238661766052, "lr": 2.3768218054530775e-08, "epoch": 1.868111597878718, "percentage": 93.41, "elapsed_time": "5:38:00", "remaining_time": "0:23:51"} +{"current_steps": 8103, "total_steps": 8674, "loss": 0.41514822840690613, "lr": 2.3685658598253e-08, "epoch": 1.8683421720083007, "percentage": 93.42, "elapsed_time": "5:38:02", "remaining_time": "0:23:49"} +{"current_steps": 8104, "total_steps": 8674, "loss": 0.49480026960372925, "lr": 2.360324105915934e-08, "epoch": 1.8685727461378834, "percentage": 93.43, "elapsed_time": "5:38:05", "remaining_time": "0:23:46"} +{"current_steps": 8105, "total_steps": 8674, "loss": 0.41115111112594604, "lr": 2.352096544922999e-08, "epoch": 1.868803320267466, "percentage": 93.44, "elapsed_time": "5:38:07", "remaining_time": "0:23:44"} +{"current_steps": 8106, "total_steps": 8674, "loss": 0.44793501496315, "lr": 2.3438831780424607e-08, "epoch": 1.8690338943970488, "percentage": 93.45, "elapsed_time": "5:38:09", "remaining_time": "0:23:41"} +{"current_steps": 8107, "total_steps": 8674, "loss": 0.4197582006454468, "lr": 2.3356840064682305e-08, "epoch": 1.8692644685266313, "percentage": 93.46, "elapsed_time": "5:38:12", "remaining_time": "0:23:39"} +{"current_steps": 8108, "total_steps": 8674, "loss": 0.3654597997665405, "lr": 2.3274990313921218e-08, "epoch": 1.869495042656214, "percentage": 93.47, "elapsed_time": "5:38:14", "remaining_time": "0:23:36"} +{"current_steps": 8109, "total_steps": 8674, "loss": 0.5105487704277039, "lr": 2.319328254003927e-08, "epoch": 1.8697256167857965, "percentage": 93.49, "elapsed_time": "5:38:17", "remaining_time": "0:23:34"} +{"current_steps": 8110, "total_steps": 8674, "loss": 0.5202287435531616, "lr": 2.3111716754913192e-08, "epoch": 1.8699561909153792, "percentage": 93.5, "elapsed_time": "5:38:19", "remaining_time": "0:23:31"} +{"current_steps": 8111, "total_steps": 8674, "loss": 0.4475836753845215, "lr": 2.303029297039949e-08, "epoch": 1.870186765044962, "percentage": 93.51, "elapsed_time": "5:38:22", "remaining_time": "0:23:29"} +{"current_steps": 8112, "total_steps": 8674, "loss": 0.5010285973548889, "lr": 2.2949011198334144e-08, "epoch": 1.8704173391745447, "percentage": 93.52, "elapsed_time": "5:38:24", "remaining_time": "0:23:26"} +{"current_steps": 8113, "total_steps": 8674, "loss": 0.41949477791786194, "lr": 2.286787145053204e-08, "epoch": 1.8706479133041274, "percentage": 93.53, "elapsed_time": "5:38:26", "remaining_time": "0:23:24"} +{"current_steps": 8114, "total_steps": 8674, "loss": 0.38505449891090393, "lr": 2.2786873738787738e-08, "epoch": 1.87087848743371, "percentage": 93.54, "elapsed_time": "5:38:29", "remaining_time": "0:23:21"} +{"current_steps": 8115, "total_steps": 8674, "loss": 0.4854990839958191, "lr": 2.2706018074875043e-08, "epoch": 1.8711090615632926, "percentage": 93.56, "elapsed_time": "5:38:31", "remaining_time": "0:23:19"} +{"current_steps": 8116, "total_steps": 8674, "loss": 0.3846585154533386, "lr": 2.2625304470547336e-08, "epoch": 1.8713396356928753, "percentage": 93.57, "elapsed_time": "5:38:34", "remaining_time": "0:23:16"} +{"current_steps": 8117, "total_steps": 8674, "loss": 0.48948657512664795, "lr": 2.2544732937537003e-08, "epoch": 1.8715702098224578, "percentage": 93.58, "elapsed_time": "5:38:36", "remaining_time": "0:23:14"} +{"current_steps": 8118, "total_steps": 8674, "loss": 0.5571197867393494, "lr": 2.2464303487555902e-08, "epoch": 1.8718007839520405, "percentage": 93.59, "elapsed_time": "5:38:39", "remaining_time": "0:23:11"} +{"current_steps": 8119, "total_steps": 8674, "loss": 0.514819324016571, "lr": 2.2384016132295345e-08, "epoch": 1.8720313580816232, "percentage": 93.6, "elapsed_time": "5:38:41", "remaining_time": "0:23:09"} +{"current_steps": 8120, "total_steps": 8674, "loss": 0.4411713182926178, "lr": 2.230387088342589e-08, "epoch": 1.872261932211206, "percentage": 93.61, "elapsed_time": "5:38:44", "remaining_time": "0:23:06"} +{"current_steps": 8121, "total_steps": 8674, "loss": 0.4494340717792511, "lr": 2.2223867752597437e-08, "epoch": 1.8724925063407887, "percentage": 93.62, "elapsed_time": "5:38:46", "remaining_time": "0:23:04"} +{"current_steps": 8122, "total_steps": 8674, "loss": 0.4186316132545471, "lr": 2.2144006751439236e-08, "epoch": 1.8727230804703712, "percentage": 93.64, "elapsed_time": "5:38:48", "remaining_time": "0:23:01"} +{"current_steps": 8123, "total_steps": 8674, "loss": 0.45932692289352417, "lr": 2.2064287891560007e-08, "epoch": 1.8729536545999539, "percentage": 93.65, "elapsed_time": "5:38:51", "remaining_time": "0:22:59"} +{"current_steps": 8124, "total_steps": 8674, "loss": 0.4095005989074707, "lr": 2.1984711184547477e-08, "epoch": 1.8731842287295364, "percentage": 93.66, "elapsed_time": "5:38:53", "remaining_time": "0:22:56"} +{"current_steps": 8125, "total_steps": 8674, "loss": 0.3822292685508728, "lr": 2.1905276641969284e-08, "epoch": 1.873414802859119, "percentage": 93.67, "elapsed_time": "5:38:56", "remaining_time": "0:22:54"} +{"current_steps": 8126, "total_steps": 8674, "loss": 0.41837501525878906, "lr": 2.1825984275371633e-08, "epoch": 1.8736453769887018, "percentage": 93.68, "elapsed_time": "5:38:58", "remaining_time": "0:22:51"} +{"current_steps": 8127, "total_steps": 8674, "loss": 0.3903341591358185, "lr": 2.1746834096280752e-08, "epoch": 1.8738759511182845, "percentage": 93.69, "elapsed_time": "5:39:01", "remaining_time": "0:22:49"} +{"current_steps": 8128, "total_steps": 8674, "loss": 0.4760533571243286, "lr": 2.166782611620177e-08, "epoch": 1.8741065252478672, "percentage": 93.71, "elapsed_time": "5:39:03", "remaining_time": "0:22:46"} +{"current_steps": 8129, "total_steps": 8674, "loss": 0.43960827589035034, "lr": 2.1588960346619388e-08, "epoch": 1.87433709937745, "percentage": 93.72, "elapsed_time": "5:39:05", "remaining_time": "0:22:44"} +{"current_steps": 8130, "total_steps": 8674, "loss": 0.47941142320632935, "lr": 2.151023679899755e-08, "epoch": 1.8745676735070325, "percentage": 93.73, "elapsed_time": "5:39:08", "remaining_time": "0:22:41"} +{"current_steps": 8131, "total_steps": 8674, "loss": 0.4467000961303711, "lr": 2.143165548477943e-08, "epoch": 1.8747982476366152, "percentage": 93.74, "elapsed_time": "5:39:10", "remaining_time": "0:22:39"} +{"current_steps": 8132, "total_steps": 8674, "loss": 0.42472416162490845, "lr": 2.1353216415387788e-08, "epoch": 1.8750288217661977, "percentage": 93.75, "elapsed_time": "5:39:13", "remaining_time": "0:22:36"} +{"current_steps": 8133, "total_steps": 8674, "loss": 0.5127208232879639, "lr": 2.1274919602224273e-08, "epoch": 1.8752593958957804, "percentage": 93.76, "elapsed_time": "5:39:15", "remaining_time": "0:22:34"} +{"current_steps": 8134, "total_steps": 8674, "loss": 0.5362575650215149, "lr": 2.119676505667045e-08, "epoch": 1.875489970025363, "percentage": 93.77, "elapsed_time": "5:39:18", "remaining_time": "0:22:31"} +{"current_steps": 8135, "total_steps": 8674, "loss": 0.4025413990020752, "lr": 2.111875279008657e-08, "epoch": 1.8757205441549458, "percentage": 93.79, "elapsed_time": "5:39:20", "remaining_time": "0:22:29"} +{"current_steps": 8136, "total_steps": 8674, "loss": 0.49126237630844116, "lr": 2.1040882813812667e-08, "epoch": 1.8759511182845285, "percentage": 93.8, "elapsed_time": "5:39:23", "remaining_time": "0:22:26"} +{"current_steps": 8137, "total_steps": 8674, "loss": 0.40609198808670044, "lr": 2.096315513916791e-08, "epoch": 1.8761816924141113, "percentage": 93.81, "elapsed_time": "5:39:25", "remaining_time": "0:22:24"} +{"current_steps": 8138, "total_steps": 8674, "loss": 0.47826945781707764, "lr": 2.0885569777450707e-08, "epoch": 1.8764122665436938, "percentage": 93.82, "elapsed_time": "5:39:28", "remaining_time": "0:22:21"} +{"current_steps": 8139, "total_steps": 8674, "loss": 0.39987948536872864, "lr": 2.0808126739939035e-08, "epoch": 1.8766428406732765, "percentage": 93.83, "elapsed_time": "5:39:30", "remaining_time": "0:22:19"} +{"current_steps": 8140, "total_steps": 8674, "loss": 0.5727471113204956, "lr": 2.0730826037890003e-08, "epoch": 1.876873414802859, "percentage": 93.84, "elapsed_time": "5:39:32", "remaining_time": "0:22:16"} +{"current_steps": 8141, "total_steps": 8674, "loss": 0.4772847294807434, "lr": 2.0653667682540066e-08, "epoch": 1.8771039889324417, "percentage": 93.86, "elapsed_time": "5:39:35", "remaining_time": "0:22:14"} +{"current_steps": 8142, "total_steps": 8674, "loss": 0.3258974552154541, "lr": 2.0576651685104697e-08, "epoch": 1.8773345630620244, "percentage": 93.87, "elapsed_time": "5:39:37", "remaining_time": "0:22:11"} +{"current_steps": 8143, "total_steps": 8674, "loss": 0.5220766067504883, "lr": 2.049977805677938e-08, "epoch": 1.8775651371916071, "percentage": 93.88, "elapsed_time": "5:39:40", "remaining_time": "0:22:08"} +{"current_steps": 8144, "total_steps": 8674, "loss": 0.39550334215164185, "lr": 2.0423046808738077e-08, "epoch": 1.8777957113211898, "percentage": 93.89, "elapsed_time": "5:39:42", "remaining_time": "0:22:06"} +{"current_steps": 8145, "total_steps": 8674, "loss": 0.4487137198448181, "lr": 2.034645795213463e-08, "epoch": 1.8780262854507725, "percentage": 93.9, "elapsed_time": "5:39:45", "remaining_time": "0:22:03"} +{"current_steps": 8146, "total_steps": 8674, "loss": 0.3363339304924011, "lr": 2.0270011498102147e-08, "epoch": 1.878256859580355, "percentage": 93.91, "elapsed_time": "5:39:47", "remaining_time": "0:22:01"} +{"current_steps": 8147, "total_steps": 8674, "loss": 0.5161975026130676, "lr": 2.019370745775273e-08, "epoch": 1.8784874337099378, "percentage": 93.92, "elapsed_time": "5:39:50", "remaining_time": "0:21:58"} +{"current_steps": 8148, "total_steps": 8674, "loss": 0.359643816947937, "lr": 2.011754584217784e-08, "epoch": 1.8787180078395203, "percentage": 93.94, "elapsed_time": "5:39:52", "remaining_time": "0:21:56"} +{"current_steps": 8149, "total_steps": 8674, "loss": 0.4472349286079407, "lr": 2.0041526662448625e-08, "epoch": 1.878948581969103, "percentage": 93.95, "elapsed_time": "5:39:55", "remaining_time": "0:21:53"} +{"current_steps": 8150, "total_steps": 8674, "loss": 0.40363550186157227, "lr": 1.9965649929615135e-08, "epoch": 1.8791791560986857, "percentage": 93.96, "elapsed_time": "5:39:57", "remaining_time": "0:21:51"} +{"current_steps": 8151, "total_steps": 8674, "loss": 0.46063172817230225, "lr": 1.9889915654706656e-08, "epoch": 1.8794097302282684, "percentage": 93.97, "elapsed_time": "5:40:00", "remaining_time": "0:21:48"} +{"current_steps": 8152, "total_steps": 8674, "loss": 0.4478832483291626, "lr": 1.981432384873205e-08, "epoch": 1.8796403043578511, "percentage": 93.98, "elapsed_time": "5:40:02", "remaining_time": "0:21:46"} +{"current_steps": 8153, "total_steps": 8674, "loss": 0.3438538908958435, "lr": 1.9738874522679304e-08, "epoch": 1.8798708784874338, "percentage": 93.99, "elapsed_time": "5:40:04", "remaining_time": "0:21:43"} +{"current_steps": 8154, "total_steps": 8674, "loss": 0.6035101413726807, "lr": 1.966356768751598e-08, "epoch": 1.8801014526170163, "percentage": 94.01, "elapsed_time": "5:40:07", "remaining_time": "0:21:41"} +{"current_steps": 8155, "total_steps": 8674, "loss": 0.42533814907073975, "lr": 1.958840335418832e-08, "epoch": 1.880332026746599, "percentage": 94.02, "elapsed_time": "5:40:09", "remaining_time": "0:21:38"} +{"current_steps": 8156, "total_steps": 8674, "loss": 0.4117417633533478, "lr": 1.9513381533622587e-08, "epoch": 1.8805626008761815, "percentage": 94.03, "elapsed_time": "5:40:12", "remaining_time": "0:21:36"} +{"current_steps": 8157, "total_steps": 8674, "loss": 0.4353973865509033, "lr": 1.943850223672361e-08, "epoch": 1.8807931750057643, "percentage": 94.04, "elapsed_time": "5:40:14", "remaining_time": "0:21:33"} +{"current_steps": 8158, "total_steps": 8674, "loss": 0.46115410327911377, "lr": 1.9363765474376125e-08, "epoch": 1.881023749135347, "percentage": 94.05, "elapsed_time": "5:40:17", "remaining_time": "0:21:31"} +{"current_steps": 8159, "total_steps": 8674, "loss": 0.3851476311683655, "lr": 1.9289171257443782e-08, "epoch": 1.8812543232649297, "percentage": 94.06, "elapsed_time": "5:40:19", "remaining_time": "0:21:28"} +{"current_steps": 8160, "total_steps": 8674, "loss": 0.4786919355392456, "lr": 1.921471959676957e-08, "epoch": 1.8814848973945124, "percentage": 94.07, "elapsed_time": "5:40:22", "remaining_time": "0:21:26"} +{"current_steps": 8161, "total_steps": 8674, "loss": 0.4427906274795532, "lr": 1.914041050317583e-08, "epoch": 1.8817154715240951, "percentage": 94.09, "elapsed_time": "5:40:24", "remaining_time": "0:21:23"} +{"current_steps": 8162, "total_steps": 8674, "loss": 0.37774696946144104, "lr": 1.906624398746415e-08, "epoch": 1.8819460456536776, "percentage": 94.1, "elapsed_time": "5:40:26", "remaining_time": "0:21:21"} +{"current_steps": 8163, "total_steps": 8674, "loss": 0.43793195486068726, "lr": 1.8992220060415343e-08, "epoch": 1.8821766197832603, "percentage": 94.11, "elapsed_time": "5:40:29", "remaining_time": "0:21:18"} +{"current_steps": 8164, "total_steps": 8674, "loss": 0.3869394063949585, "lr": 1.8918338732789587e-08, "epoch": 1.8824071939128428, "percentage": 94.12, "elapsed_time": "5:40:31", "remaining_time": "0:21:16"} +{"current_steps": 8165, "total_steps": 8674, "loss": 0.4963928461074829, "lr": 1.8844600015326283e-08, "epoch": 1.8826377680424256, "percentage": 94.13, "elapsed_time": "5:40:34", "remaining_time": "0:21:13"} +{"current_steps": 8166, "total_steps": 8674, "loss": 0.45727187395095825, "lr": 1.8771003918743978e-08, "epoch": 1.8828683421720083, "percentage": 94.14, "elapsed_time": "5:40:36", "remaining_time": "0:21:11"} +{"current_steps": 8167, "total_steps": 8674, "loss": 0.4878919720649719, "lr": 1.8697550453740884e-08, "epoch": 1.883098916301591, "percentage": 94.15, "elapsed_time": "5:40:39", "remaining_time": "0:21:08"} +{"current_steps": 8168, "total_steps": 8674, "loss": 0.5376998782157898, "lr": 1.862423963099391e-08, "epoch": 1.8833294904311737, "percentage": 94.17, "elapsed_time": "5:40:41", "remaining_time": "0:21:06"} +{"current_steps": 8169, "total_steps": 8674, "loss": 0.4534180760383606, "lr": 1.8551071461159638e-08, "epoch": 1.8835600645607564, "percentage": 94.18, "elapsed_time": "5:40:43", "remaining_time": "0:21:03"} +{"current_steps": 8170, "total_steps": 8674, "loss": 0.43389183282852173, "lr": 1.847804595487379e-08, "epoch": 1.883790638690339, "percentage": 94.19, "elapsed_time": "5:40:46", "remaining_time": "0:21:01"} +{"current_steps": 8171, "total_steps": 8674, "loss": 0.4833742678165436, "lr": 1.8405163122751532e-08, "epoch": 1.8840212128199216, "percentage": 94.2, "elapsed_time": "5:40:48", "remaining_time": "0:20:58"} +{"current_steps": 8172, "total_steps": 8674, "loss": 0.49344220757484436, "lr": 1.833242297538695e-08, "epoch": 1.8842517869495041, "percentage": 94.21, "elapsed_time": "5:40:51", "remaining_time": "0:20:56"} +{"current_steps": 8173, "total_steps": 8674, "loss": 0.49290287494659424, "lr": 1.8259825523353478e-08, "epoch": 1.8844823610790868, "percentage": 94.22, "elapsed_time": "5:40:54", "remaining_time": "0:20:53"} +{"current_steps": 8174, "total_steps": 8674, "loss": 0.3971661627292633, "lr": 1.8187370777204115e-08, "epoch": 1.8847129352086696, "percentage": 94.24, "elapsed_time": "5:40:56", "remaining_time": "0:20:51"} +{"current_steps": 8175, "total_steps": 8674, "loss": 0.4984559416770935, "lr": 1.811505874747066e-08, "epoch": 1.8849435093382523, "percentage": 94.25, "elapsed_time": "5:40:59", "remaining_time": "0:20:48"} +{"current_steps": 8176, "total_steps": 8674, "loss": 0.38448822498321533, "lr": 1.804288944466459e-08, "epoch": 1.885174083467835, "percentage": 94.26, "elapsed_time": "5:41:01", "remaining_time": "0:20:46"} +{"current_steps": 8177, "total_steps": 8674, "loss": 0.5468838214874268, "lr": 1.7970862879276406e-08, "epoch": 1.8854046575974177, "percentage": 94.27, "elapsed_time": "5:41:04", "remaining_time": "0:20:43"} +{"current_steps": 8178, "total_steps": 8674, "loss": 0.46132227778434753, "lr": 1.7898979061775844e-08, "epoch": 1.8856352317270002, "percentage": 94.28, "elapsed_time": "5:41:06", "remaining_time": "0:20:41"} +{"current_steps": 8179, "total_steps": 8674, "loss": 0.4636603593826294, "lr": 1.782723800261199e-08, "epoch": 1.885865805856583, "percentage": 94.29, "elapsed_time": "5:41:08", "remaining_time": "0:20:38"} +{"current_steps": 8180, "total_steps": 8674, "loss": 0.5302075147628784, "lr": 1.7755639712213057e-08, "epoch": 1.8860963799861654, "percentage": 94.3, "elapsed_time": "5:41:11", "remaining_time": "0:20:36"} +{"current_steps": 8181, "total_steps": 8674, "loss": 0.4817178249359131, "lr": 1.7684184200986718e-08, "epoch": 1.8863269541157481, "percentage": 94.32, "elapsed_time": "5:41:13", "remaining_time": "0:20:33"} +{"current_steps": 8182, "total_steps": 8674, "loss": 0.4535263180732727, "lr": 1.7612871479319668e-08, "epoch": 1.8865575282453309, "percentage": 94.33, "elapsed_time": "5:41:16", "remaining_time": "0:20:31"} +{"current_steps": 8183, "total_steps": 8674, "loss": 0.5260534286499023, "lr": 1.7541701557577837e-08, "epoch": 1.8867881023749136, "percentage": 94.34, "elapsed_time": "5:41:18", "remaining_time": "0:20:28"} +{"current_steps": 8184, "total_steps": 8674, "loss": 0.4526366591453552, "lr": 1.7470674446106614e-08, "epoch": 1.8870186765044963, "percentage": 94.35, "elapsed_time": "5:41:21", "remaining_time": "0:20:26"} +{"current_steps": 8185, "total_steps": 8674, "loss": 0.4721973240375519, "lr": 1.7399790155230632e-08, "epoch": 1.887249250634079, "percentage": 94.36, "elapsed_time": "5:41:23", "remaining_time": "0:20:23"} +{"current_steps": 8186, "total_steps": 8674, "loss": 0.4331268072128296, "lr": 1.7329048695253422e-08, "epoch": 1.8874798247636615, "percentage": 94.37, "elapsed_time": "5:41:25", "remaining_time": "0:20:21"} +{"current_steps": 8187, "total_steps": 8674, "loss": 0.5175650119781494, "lr": 1.7258450076458097e-08, "epoch": 1.8877103988932442, "percentage": 94.39, "elapsed_time": "5:41:28", "remaining_time": "0:20:18"} +{"current_steps": 8188, "total_steps": 8674, "loss": 0.45537033677101135, "lr": 1.718799430910678e-08, "epoch": 1.8879409730228267, "percentage": 94.4, "elapsed_time": "5:41:30", "remaining_time": "0:20:16"} +{"current_steps": 8189, "total_steps": 8674, "loss": 0.5055547952651978, "lr": 1.7117681403441054e-08, "epoch": 1.8881715471524094, "percentage": 94.41, "elapsed_time": "5:41:33", "remaining_time": "0:20:13"} +{"current_steps": 8190, "total_steps": 8674, "loss": 0.45514553785324097, "lr": 1.7047511369681522e-08, "epoch": 1.8884021212819921, "percentage": 94.42, "elapsed_time": "5:41:35", "remaining_time": "0:20:11"} +{"current_steps": 8191, "total_steps": 8674, "loss": 0.44227129220962524, "lr": 1.6977484218028136e-08, "epoch": 1.8886326954115749, "percentage": 94.43, "elapsed_time": "5:41:38", "remaining_time": "0:20:08"} +{"current_steps": 8192, "total_steps": 8674, "loss": 0.4916682839393616, "lr": 1.690759995866009e-08, "epoch": 1.8888632695411576, "percentage": 94.44, "elapsed_time": "5:41:40", "remaining_time": "0:20:06"} +{"current_steps": 8193, "total_steps": 8674, "loss": 0.48626652359962463, "lr": 1.683785860173559e-08, "epoch": 1.8890938436707403, "percentage": 94.45, "elapsed_time": "5:41:42", "remaining_time": "0:20:03"} +{"current_steps": 8194, "total_steps": 8674, "loss": 0.39982378482818604, "lr": 1.676826015739252e-08, "epoch": 1.8893244178003228, "percentage": 94.47, "elapsed_time": "5:41:45", "remaining_time": "0:20:01"} +{"current_steps": 8195, "total_steps": 8674, "loss": 0.49218645691871643, "lr": 1.6698804635747576e-08, "epoch": 1.8895549919299055, "percentage": 94.48, "elapsed_time": "5:41:47", "remaining_time": "0:19:58"} +{"current_steps": 8196, "total_steps": 8674, "loss": 0.38896578550338745, "lr": 1.6629492046896897e-08, "epoch": 1.889785566059488, "percentage": 94.49, "elapsed_time": "5:41:49", "remaining_time": "0:19:56"} +{"current_steps": 8197, "total_steps": 8674, "loss": 0.4217762053012848, "lr": 1.6560322400915538e-08, "epoch": 1.8900161401890707, "percentage": 94.5, "elapsed_time": "5:41:52", "remaining_time": "0:19:53"} +{"current_steps": 8198, "total_steps": 8674, "loss": 0.4020112156867981, "lr": 1.6491295707858343e-08, "epoch": 1.8902467143186534, "percentage": 94.51, "elapsed_time": "5:41:54", "remaining_time": "0:19:51"} +{"current_steps": 8199, "total_steps": 8674, "loss": 0.4630794823169708, "lr": 1.6422411977758843e-08, "epoch": 1.8904772884482361, "percentage": 94.52, "elapsed_time": "5:41:57", "remaining_time": "0:19:48"} +{"current_steps": 8200, "total_steps": 8674, "loss": 0.3673272132873535, "lr": 1.6353671220629917e-08, "epoch": 1.8907078625778189, "percentage": 94.54, "elapsed_time": "5:41:59", "remaining_time": "0:19:46"} +{"current_steps": 8201, "total_steps": 8674, "loss": 0.4677228331565857, "lr": 1.6285073446463903e-08, "epoch": 1.8909384367074016, "percentage": 94.55, "elapsed_time": "5:42:03", "remaining_time": "0:19:43"} +{"current_steps": 8202, "total_steps": 8674, "loss": 0.4532579183578491, "lr": 1.621661866523216e-08, "epoch": 1.891169010836984, "percentage": 94.56, "elapsed_time": "5:42:05", "remaining_time": "0:19:41"} +{"current_steps": 8203, "total_steps": 8674, "loss": 0.3011256456375122, "lr": 1.6148306886885287e-08, "epoch": 1.8913995849665668, "percentage": 94.57, "elapsed_time": "5:42:08", "remaining_time": "0:19:38"} +{"current_steps": 8204, "total_steps": 8674, "loss": 0.43071651458740234, "lr": 1.6080138121352892e-08, "epoch": 1.8916301590961493, "percentage": 94.58, "elapsed_time": "5:42:10", "remaining_time": "0:19:36"} +{"current_steps": 8205, "total_steps": 8674, "loss": 0.3180675506591797, "lr": 1.6012112378544272e-08, "epoch": 1.891860733225732, "percentage": 94.59, "elapsed_time": "5:42:13", "remaining_time": "0:19:33"} +{"current_steps": 8206, "total_steps": 8674, "loss": 0.35130774974823, "lr": 1.594422966834741e-08, "epoch": 1.8920913073553147, "percentage": 94.6, "elapsed_time": "5:42:15", "remaining_time": "0:19:31"} +{"current_steps": 8207, "total_steps": 8674, "loss": 0.4953269958496094, "lr": 1.587649000062996e-08, "epoch": 1.8923218814848974, "percentage": 94.62, "elapsed_time": "5:42:18", "remaining_time": "0:19:28"} +{"current_steps": 8208, "total_steps": 8674, "loss": 0.3713166415691376, "lr": 1.5808893385238388e-08, "epoch": 1.8925524556144802, "percentage": 94.63, "elapsed_time": "5:42:20", "remaining_time": "0:19:26"} +{"current_steps": 8209, "total_steps": 8674, "loss": 0.4273546040058136, "lr": 1.5741439831998827e-08, "epoch": 1.8927830297440629, "percentage": 94.64, "elapsed_time": "5:42:23", "remaining_time": "0:19:23"} +{"current_steps": 8210, "total_steps": 8674, "loss": 0.45312386751174927, "lr": 1.5674129350715994e-08, "epoch": 1.8930136038736454, "percentage": 94.65, "elapsed_time": "5:42:25", "remaining_time": "0:19:21"} +{"current_steps": 8211, "total_steps": 8674, "loss": 0.40246695280075073, "lr": 1.560696195117439e-08, "epoch": 1.893244178003228, "percentage": 94.66, "elapsed_time": "5:42:27", "remaining_time": "0:19:18"} +{"current_steps": 8212, "total_steps": 8674, "loss": 0.5229366421699524, "lr": 1.5539937643137325e-08, "epoch": 1.8934747521328106, "percentage": 94.67, "elapsed_time": "5:42:30", "remaining_time": "0:19:16"} +{"current_steps": 8213, "total_steps": 8674, "loss": 0.43834251165390015, "lr": 1.5473056436347554e-08, "epoch": 1.8937053262623933, "percentage": 94.69, "elapsed_time": "5:42:32", "remaining_time": "0:19:13"} +{"current_steps": 8214, "total_steps": 8674, "loss": 0.4423528015613556, "lr": 1.540631834052697e-08, "epoch": 1.893935900391976, "percentage": 94.7, "elapsed_time": "5:42:35", "remaining_time": "0:19:11"} +{"current_steps": 8215, "total_steps": 8674, "loss": 0.49888452887535095, "lr": 1.5339723365376478e-08, "epoch": 1.8941664745215587, "percentage": 94.71, "elapsed_time": "5:42:37", "remaining_time": "0:19:08"} +{"current_steps": 8216, "total_steps": 8674, "loss": 0.44023919105529785, "lr": 1.5273271520576448e-08, "epoch": 1.8943970486511414, "percentage": 94.72, "elapsed_time": "5:42:40", "remaining_time": "0:19:06"} +{"current_steps": 8217, "total_steps": 8674, "loss": 0.4733201861381531, "lr": 1.5206962815786262e-08, "epoch": 1.8946276227807242, "percentage": 94.73, "elapsed_time": "5:42:42", "remaining_time": "0:19:03"} +{"current_steps": 8218, "total_steps": 8674, "loss": 0.5393285751342773, "lr": 1.5140797260644768e-08, "epoch": 1.8948581969103067, "percentage": 94.74, "elapsed_time": "5:42:44", "remaining_time": "0:19:01"} +{"current_steps": 8219, "total_steps": 8674, "loss": 0.4240071773529053, "lr": 1.507477486476949e-08, "epoch": 1.8950887710398894, "percentage": 94.75, "elapsed_time": "5:42:47", "remaining_time": "0:18:58"} +{"current_steps": 8220, "total_steps": 8674, "loss": 0.42983078956604004, "lr": 1.5008895637757647e-08, "epoch": 1.8953193451694719, "percentage": 94.77, "elapsed_time": "5:42:49", "remaining_time": "0:18:56"} +{"current_steps": 8221, "total_steps": 8674, "loss": 0.47513502836227417, "lr": 1.4943159589185462e-08, "epoch": 1.8955499192990546, "percentage": 94.78, "elapsed_time": "5:42:52", "remaining_time": "0:18:53"} +{"current_steps": 8222, "total_steps": 8674, "loss": 0.41938167810440063, "lr": 1.4877566728608293e-08, "epoch": 1.8957804934286373, "percentage": 94.79, "elapsed_time": "5:42:54", "remaining_time": "0:18:51"} +{"current_steps": 8223, "total_steps": 8674, "loss": 0.44817137718200684, "lr": 1.4812117065560625e-08, "epoch": 1.89601106755822, "percentage": 94.8, "elapsed_time": "5:42:57", "remaining_time": "0:18:48"} +{"current_steps": 8224, "total_steps": 8674, "loss": 0.46840909123420715, "lr": 1.4746810609556292e-08, "epoch": 1.8962416416878027, "percentage": 94.81, "elapsed_time": "5:42:59", "remaining_time": "0:18:46"} +{"current_steps": 8225, "total_steps": 8674, "loss": 0.377409964799881, "lr": 1.4681647370088369e-08, "epoch": 1.8964722158173855, "percentage": 94.82, "elapsed_time": "5:43:02", "remaining_time": "0:18:43"} +{"current_steps": 8226, "total_steps": 8674, "loss": 0.41149425506591797, "lr": 1.4616627356628831e-08, "epoch": 1.896702789946968, "percentage": 94.84, "elapsed_time": "5:43:04", "remaining_time": "0:18:41"} +{"current_steps": 8227, "total_steps": 8674, "loss": 0.39183878898620605, "lr": 1.455175057862923e-08, "epoch": 1.8969333640765507, "percentage": 94.85, "elapsed_time": "5:43:06", "remaining_time": "0:18:38"} +{"current_steps": 8228, "total_steps": 8674, "loss": 0.3629387617111206, "lr": 1.448701704551969e-08, "epoch": 1.8971639382061332, "percentage": 94.86, "elapsed_time": "5:43:09", "remaining_time": "0:18:36"} +{"current_steps": 8229, "total_steps": 8674, "loss": 0.4007713794708252, "lr": 1.4422426766710239e-08, "epoch": 1.8973945123357159, "percentage": 94.87, "elapsed_time": "5:43:11", "remaining_time": "0:18:33"} +{"current_steps": 8230, "total_steps": 8674, "loss": 0.42354586720466614, "lr": 1.4357979751589477e-08, "epoch": 1.8976250864652986, "percentage": 94.88, "elapsed_time": "5:43:14", "remaining_time": "0:18:31"} +{"current_steps": 8231, "total_steps": 8674, "loss": 0.5321829319000244, "lr": 1.429367600952558e-08, "epoch": 1.8978556605948813, "percentage": 94.89, "elapsed_time": "5:43:16", "remaining_time": "0:18:28"} +{"current_steps": 8232, "total_steps": 8674, "loss": 0.4840988218784332, "lr": 1.4229515549865845e-08, "epoch": 1.898086234724464, "percentage": 94.9, "elapsed_time": "5:43:19", "remaining_time": "0:18:26"} +{"current_steps": 8233, "total_steps": 8674, "loss": 0.5006803870201111, "lr": 1.4165498381936369e-08, "epoch": 1.8983168088540465, "percentage": 94.92, "elapsed_time": "5:43:21", "remaining_time": "0:18:23"} +{"current_steps": 8234, "total_steps": 8674, "loss": 0.40582865476608276, "lr": 1.4101624515042821e-08, "epoch": 1.8985473829836292, "percentage": 94.93, "elapsed_time": "5:43:23", "remaining_time": "0:18:21"} +{"current_steps": 8235, "total_steps": 8674, "loss": 0.38199514150619507, "lr": 1.4037893958469993e-08, "epoch": 1.8987779571132117, "percentage": 94.94, "elapsed_time": "5:43:26", "remaining_time": "0:18:18"} +{"current_steps": 8236, "total_steps": 8674, "loss": 0.39234936237335205, "lr": 1.3974306721481699e-08, "epoch": 1.8990085312427945, "percentage": 94.95, "elapsed_time": "5:43:28", "remaining_time": "0:18:15"} +{"current_steps": 8237, "total_steps": 8674, "loss": 0.42211759090423584, "lr": 1.391086281332099e-08, "epoch": 1.8992391053723772, "percentage": 94.96, "elapsed_time": "5:43:31", "remaining_time": "0:18:13"} +{"current_steps": 8238, "total_steps": 8674, "loss": 0.4519961476325989, "lr": 1.3847562243210043e-08, "epoch": 1.8994696795019599, "percentage": 94.97, "elapsed_time": "5:43:33", "remaining_time": "0:18:10"} +{"current_steps": 8239, "total_steps": 8674, "loss": 0.4795762896537781, "lr": 1.3784405020350276e-08, "epoch": 1.8997002536315426, "percentage": 94.99, "elapsed_time": "5:43:35", "remaining_time": "0:18:08"} +{"current_steps": 8240, "total_steps": 8674, "loss": 0.4549542963504791, "lr": 1.3721391153922235e-08, "epoch": 1.8999308277611253, "percentage": 95.0, "elapsed_time": "5:43:38", "remaining_time": "0:18:05"} +{"current_steps": 8241, "total_steps": 8674, "loss": 0.5253233313560486, "lr": 1.3658520653085703e-08, "epoch": 1.9001614018907078, "percentage": 95.01, "elapsed_time": "5:43:40", "remaining_time": "0:18:03"} +{"current_steps": 8242, "total_steps": 8674, "loss": 0.44850921630859375, "lr": 1.3595793526979371e-08, "epoch": 1.9003919760202905, "percentage": 95.02, "elapsed_time": "5:43:43", "remaining_time": "0:18:00"} +{"current_steps": 8243, "total_steps": 8674, "loss": 0.4416281580924988, "lr": 1.35332097847215e-08, "epoch": 1.900622550149873, "percentage": 95.03, "elapsed_time": "5:43:45", "remaining_time": "0:17:58"} +{"current_steps": 8244, "total_steps": 8674, "loss": 0.5567417740821838, "lr": 1.3470769435409036e-08, "epoch": 1.9008531242794557, "percentage": 95.04, "elapsed_time": "5:43:48", "remaining_time": "0:17:55"} +{"current_steps": 8245, "total_steps": 8674, "loss": 0.43554848432540894, "lr": 1.3408472488118383e-08, "epoch": 1.9010836984090385, "percentage": 95.05, "elapsed_time": "5:43:50", "remaining_time": "0:17:53"} +{"current_steps": 8246, "total_steps": 8674, "loss": 0.4219995141029358, "lr": 1.3346318951905077e-08, "epoch": 1.9013142725386212, "percentage": 95.07, "elapsed_time": "5:43:52", "remaining_time": "0:17:50"} +{"current_steps": 8247, "total_steps": 8674, "loss": 0.45862913131713867, "lr": 1.328430883580367e-08, "epoch": 1.901544846668204, "percentage": 95.08, "elapsed_time": "5:43:55", "remaining_time": "0:17:48"} +{"current_steps": 8248, "total_steps": 8674, "loss": 0.5026064515113831, "lr": 1.3222442148828172e-08, "epoch": 1.9017754207977866, "percentage": 95.09, "elapsed_time": "5:43:57", "remaining_time": "0:17:45"} +{"current_steps": 8249, "total_steps": 8674, "loss": 0.46948713064193726, "lr": 1.316071889997139e-08, "epoch": 1.902005994927369, "percentage": 95.1, "elapsed_time": "5:44:00", "remaining_time": "0:17:43"} +{"current_steps": 8250, "total_steps": 8674, "loss": 0.4263686537742615, "lr": 1.3099139098205258e-08, "epoch": 1.9022365690569518, "percentage": 95.11, "elapsed_time": "5:44:02", "remaining_time": "0:17:40"} +{"current_steps": 8251, "total_steps": 8674, "loss": 0.4652191400527954, "lr": 1.3037702752481394e-08, "epoch": 1.9024671431865343, "percentage": 95.12, "elapsed_time": "5:44:05", "remaining_time": "0:17:38"} +{"current_steps": 8252, "total_steps": 8674, "loss": 0.4918743371963501, "lr": 1.2976409871729987e-08, "epoch": 1.902697717316117, "percentage": 95.13, "elapsed_time": "5:44:07", "remaining_time": "0:17:35"} +{"current_steps": 8253, "total_steps": 8674, "loss": 0.5297696590423584, "lr": 1.2915260464860466e-08, "epoch": 1.9029282914456997, "percentage": 95.15, "elapsed_time": "5:44:10", "remaining_time": "0:17:33"} +{"current_steps": 8254, "total_steps": 8674, "loss": 0.5320281982421875, "lr": 1.2854254540761722e-08, "epoch": 1.9031588655752825, "percentage": 95.16, "elapsed_time": "5:44:12", "remaining_time": "0:17:30"} +{"current_steps": 8255, "total_steps": 8674, "loss": 0.4424601197242737, "lr": 1.2793392108301437e-08, "epoch": 1.9033894397048652, "percentage": 95.17, "elapsed_time": "5:44:14", "remaining_time": "0:17:28"} +{"current_steps": 8256, "total_steps": 8674, "loss": 0.4811365008354187, "lr": 1.2732673176326758e-08, "epoch": 1.903620013834448, "percentage": 95.18, "elapsed_time": "5:44:17", "remaining_time": "0:17:25"} +{"current_steps": 8257, "total_steps": 8674, "loss": 0.3744504451751709, "lr": 1.2672097753663624e-08, "epoch": 1.9038505879640304, "percentage": 95.19, "elapsed_time": "5:44:19", "remaining_time": "0:17:23"} +{"current_steps": 8258, "total_steps": 8674, "loss": 0.4703986644744873, "lr": 1.2611665849117326e-08, "epoch": 1.904081162093613, "percentage": 95.2, "elapsed_time": "5:44:22", "remaining_time": "0:17:20"} +{"current_steps": 8259, "total_steps": 8674, "loss": 0.5431181192398071, "lr": 1.255137747147228e-08, "epoch": 1.9043117362231956, "percentage": 95.22, "elapsed_time": "5:44:24", "remaining_time": "0:17:18"} +{"current_steps": 8260, "total_steps": 8674, "loss": 0.5066450238227844, "lr": 1.2491232629492143e-08, "epoch": 1.9045423103527783, "percentage": 95.23, "elapsed_time": "5:44:27", "remaining_time": "0:17:15"} +{"current_steps": 8261, "total_steps": 8674, "loss": 0.4374620020389557, "lr": 1.2431231331919368e-08, "epoch": 1.904772884482361, "percentage": 95.24, "elapsed_time": "5:44:29", "remaining_time": "0:17:13"} +{"current_steps": 8262, "total_steps": 8674, "loss": 0.3628976345062256, "lr": 1.2371373587475753e-08, "epoch": 1.9050034586119438, "percentage": 95.25, "elapsed_time": "5:44:32", "remaining_time": "0:17:10"} +{"current_steps": 8263, "total_steps": 8674, "loss": 0.43471890687942505, "lr": 1.231165940486234e-08, "epoch": 1.9052340327415265, "percentage": 95.26, "elapsed_time": "5:44:34", "remaining_time": "0:17:08"} +{"current_steps": 8264, "total_steps": 8674, "loss": 0.5038785934448242, "lr": 1.2252088792759074e-08, "epoch": 1.9054646068711092, "percentage": 95.27, "elapsed_time": "5:44:37", "remaining_time": "0:17:05"} +{"current_steps": 8265, "total_steps": 8674, "loss": 0.44022035598754883, "lr": 1.2192661759825363e-08, "epoch": 1.9056951810006917, "percentage": 95.28, "elapsed_time": "5:44:39", "remaining_time": "0:17:03"} +{"current_steps": 8266, "total_steps": 8674, "loss": 0.4924722909927368, "lr": 1.2133378314699294e-08, "epoch": 1.9059257551302744, "percentage": 95.3, "elapsed_time": "5:44:42", "remaining_time": "0:17:00"} +{"current_steps": 8267, "total_steps": 8674, "loss": 0.3824247121810913, "lr": 1.2074238465998532e-08, "epoch": 1.906156329259857, "percentage": 95.31, "elapsed_time": "5:44:44", "remaining_time": "0:16:58"} +{"current_steps": 8268, "total_steps": 8674, "loss": 0.47094473242759705, "lr": 1.2015242222319422e-08, "epoch": 1.9063869033894396, "percentage": 95.32, "elapsed_time": "5:44:46", "remaining_time": "0:16:55"} +{"current_steps": 8269, "total_steps": 8674, "loss": 0.5653735399246216, "lr": 1.1956389592237881e-08, "epoch": 1.9066174775190223, "percentage": 95.33, "elapsed_time": "5:44:49", "remaining_time": "0:16:53"} +{"current_steps": 8270, "total_steps": 8674, "loss": 0.4763476848602295, "lr": 1.1897680584308512e-08, "epoch": 1.906848051648605, "percentage": 95.34, "elapsed_time": "5:44:51", "remaining_time": "0:16:50"} +{"current_steps": 8271, "total_steps": 8674, "loss": 0.3845449686050415, "lr": 1.1839115207065487e-08, "epoch": 1.9070786257781878, "percentage": 95.35, "elapsed_time": "5:44:54", "remaining_time": "0:16:48"} +{"current_steps": 8272, "total_steps": 8674, "loss": 0.43071988224983215, "lr": 1.1780693469021775e-08, "epoch": 1.9073091999077705, "percentage": 95.37, "elapsed_time": "5:44:56", "remaining_time": "0:16:45"} +{"current_steps": 8273, "total_steps": 8674, "loss": 0.43860751390457153, "lr": 1.172241537866947e-08, "epoch": 1.907539774037353, "percentage": 95.38, "elapsed_time": "5:44:59", "remaining_time": "0:16:43"} +{"current_steps": 8274, "total_steps": 8674, "loss": 0.5077678561210632, "lr": 1.1664280944480132e-08, "epoch": 1.9077703481669357, "percentage": 95.39, "elapsed_time": "5:45:01", "remaining_time": "0:16:40"} +{"current_steps": 8275, "total_steps": 8674, "loss": 0.3832993805408478, "lr": 1.1606290174903888e-08, "epoch": 1.9080009222965182, "percentage": 95.4, "elapsed_time": "5:45:03", "remaining_time": "0:16:38"} +{"current_steps": 8276, "total_steps": 8674, "loss": 0.48003530502319336, "lr": 1.1548443078370551e-08, "epoch": 1.908231496426101, "percentage": 95.41, "elapsed_time": "5:45:06", "remaining_time": "0:16:35"} +{"current_steps": 8277, "total_steps": 8674, "loss": 0.6109439134597778, "lr": 1.1490739663288618e-08, "epoch": 1.9084620705556836, "percentage": 95.42, "elapsed_time": "5:45:08", "remaining_time": "0:16:33"} +{"current_steps": 8278, "total_steps": 8674, "loss": 0.4559859037399292, "lr": 1.1433179938045823e-08, "epoch": 1.9086926446852663, "percentage": 95.43, "elapsed_time": "5:45:11", "remaining_time": "0:16:30"} +{"current_steps": 8279, "total_steps": 8674, "loss": 0.3935600221157074, "lr": 1.137576391100925e-08, "epoch": 1.908923218814849, "percentage": 95.45, "elapsed_time": "5:45:13", "remaining_time": "0:16:28"} +{"current_steps": 8280, "total_steps": 8674, "loss": 0.44477611780166626, "lr": 1.1318491590524782e-08, "epoch": 1.9091537929444318, "percentage": 95.46, "elapsed_time": "5:45:16", "remaining_time": "0:16:25"} +{"current_steps": 8281, "total_steps": 8674, "loss": 0.47065627574920654, "lr": 1.1261362984917533e-08, "epoch": 1.9093843670740143, "percentage": 95.47, "elapsed_time": "5:45:18", "remaining_time": "0:16:23"} +{"current_steps": 8282, "total_steps": 8674, "loss": 0.44851434230804443, "lr": 1.1204378102491862e-08, "epoch": 1.909614941203597, "percentage": 95.48, "elapsed_time": "5:45:20", "remaining_time": "0:16:20"} +{"current_steps": 8283, "total_steps": 8674, "loss": 0.38606488704681396, "lr": 1.1147536951530923e-08, "epoch": 1.9098455153331795, "percentage": 95.49, "elapsed_time": "5:45:23", "remaining_time": "0:16:18"} +{"current_steps": 8284, "total_steps": 8674, "loss": 0.5400182008743286, "lr": 1.1090839540297103e-08, "epoch": 1.9100760894627622, "percentage": 95.5, "elapsed_time": "5:45:25", "remaining_time": "0:16:15"} +{"current_steps": 8285, "total_steps": 8674, "loss": 0.4225059449672699, "lr": 1.1034285877032146e-08, "epoch": 1.910306663592345, "percentage": 95.52, "elapsed_time": "5:45:28", "remaining_time": "0:16:13"} +{"current_steps": 8286, "total_steps": 8674, "loss": 0.5111556649208069, "lr": 1.0977875969956584e-08, "epoch": 1.9105372377219276, "percentage": 95.53, "elapsed_time": "5:45:30", "remaining_time": "0:16:10"} +{"current_steps": 8287, "total_steps": 8674, "loss": 0.40596213936805725, "lr": 1.0921609827270196e-08, "epoch": 1.9107678118515103, "percentage": 95.54, "elapsed_time": "5:45:33", "remaining_time": "0:16:08"} +{"current_steps": 8288, "total_steps": 8674, "loss": 0.47917360067367554, "lr": 1.0865487457151768e-08, "epoch": 1.910998385981093, "percentage": 95.55, "elapsed_time": "5:45:35", "remaining_time": "0:16:05"} +{"current_steps": 8289, "total_steps": 8674, "loss": 0.45154574513435364, "lr": 1.0809508867759331e-08, "epoch": 1.9112289601106756, "percentage": 95.56, "elapsed_time": "5:45:37", "remaining_time": "0:16:03"} +{"current_steps": 8290, "total_steps": 8674, "loss": 0.5024373531341553, "lr": 1.0753674067229935e-08, "epoch": 1.9114595342402583, "percentage": 95.57, "elapsed_time": "5:45:40", "remaining_time": "0:16:00"} +{"current_steps": 8291, "total_steps": 8674, "loss": 0.5084686875343323, "lr": 1.069798306367975e-08, "epoch": 1.9116901083698408, "percentage": 95.58, "elapsed_time": "5:45:42", "remaining_time": "0:15:58"} +{"current_steps": 8292, "total_steps": 8674, "loss": 0.3947920501232147, "lr": 1.064243586520408e-08, "epoch": 1.9119206824994235, "percentage": 95.6, "elapsed_time": "5:45:45", "remaining_time": "0:15:55"} +{"current_steps": 8293, "total_steps": 8674, "loss": 0.5011960864067078, "lr": 1.0587032479877023e-08, "epoch": 1.9121512566290062, "percentage": 95.61, "elapsed_time": "5:45:47", "remaining_time": "0:15:53"} +{"current_steps": 8294, "total_steps": 8674, "loss": 0.43622612953186035, "lr": 1.0531772915752247e-08, "epoch": 1.912381830758589, "percentage": 95.62, "elapsed_time": "5:45:49", "remaining_time": "0:15:50"} +{"current_steps": 8295, "total_steps": 8674, "loss": 0.380764365196228, "lr": 1.0476657180862325e-08, "epoch": 1.9126124048881716, "percentage": 95.63, "elapsed_time": "5:45:52", "remaining_time": "0:15:48"} +{"current_steps": 8296, "total_steps": 8674, "loss": 0.4183109700679779, "lr": 1.042168528321874e-08, "epoch": 1.9128429790177544, "percentage": 95.64, "elapsed_time": "5:45:54", "remaining_time": "0:15:45"} +{"current_steps": 8297, "total_steps": 8674, "loss": 0.4221222698688507, "lr": 1.036685723081221e-08, "epoch": 1.9130735531473368, "percentage": 95.65, "elapsed_time": "5:45:57", "remaining_time": "0:15:43"} +{"current_steps": 8298, "total_steps": 8674, "loss": 0.543656051158905, "lr": 1.0312173031612804e-08, "epoch": 1.9133041272769196, "percentage": 95.67, "elapsed_time": "5:45:59", "remaining_time": "0:15:40"} +{"current_steps": 8299, "total_steps": 8674, "loss": 0.48872441053390503, "lr": 1.0257632693569052e-08, "epoch": 1.913534701406502, "percentage": 95.68, "elapsed_time": "5:46:02", "remaining_time": "0:15:38"} +{"current_steps": 8300, "total_steps": 8674, "loss": 0.5447995662689209, "lr": 1.0203236224609169e-08, "epoch": 1.9137652755360848, "percentage": 95.69, "elapsed_time": "5:46:04", "remaining_time": "0:15:35"} +{"current_steps": 8301, "total_steps": 8674, "loss": 0.39448055624961853, "lr": 1.0148983632640162e-08, "epoch": 1.9139958496656675, "percentage": 95.7, "elapsed_time": "5:46:08", "remaining_time": "0:15:33"} +{"current_steps": 8302, "total_steps": 8674, "loss": 0.44735193252563477, "lr": 1.009487492554828e-08, "epoch": 1.9142264237952502, "percentage": 95.71, "elapsed_time": "5:46:10", "remaining_time": "0:15:30"} +{"current_steps": 8303, "total_steps": 8674, "loss": 0.4747859537601471, "lr": 1.0040910111198786e-08, "epoch": 1.914456997924833, "percentage": 95.72, "elapsed_time": "5:46:12", "remaining_time": "0:15:28"} +{"current_steps": 8304, "total_steps": 8674, "loss": 0.5120220184326172, "lr": 9.987089197435739e-09, "epoch": 1.9146875720544156, "percentage": 95.73, "elapsed_time": "5:46:15", "remaining_time": "0:15:25"} +{"current_steps": 8305, "total_steps": 8674, "loss": 0.3889455795288086, "lr": 9.933412192082991e-09, "epoch": 1.9149181461839981, "percentage": 95.75, "elapsed_time": "5:46:17", "remaining_time": "0:15:23"} +{"current_steps": 8306, "total_steps": 8674, "loss": 0.36584073305130005, "lr": 9.879879102942635e-09, "epoch": 1.9151487203135809, "percentage": 95.76, "elapsed_time": "5:46:20", "remaining_time": "0:15:20"} +{"current_steps": 8307, "total_steps": 8674, "loss": 0.6259280443191528, "lr": 9.826489937796556e-09, "epoch": 1.9153792944431633, "percentage": 95.77, "elapsed_time": "5:46:22", "remaining_time": "0:15:18"} +{"current_steps": 8308, "total_steps": 8674, "loss": 0.45160970091819763, "lr": 9.773244704405104e-09, "epoch": 1.915609868572746, "percentage": 95.78, "elapsed_time": "5:46:25", "remaining_time": "0:15:15"} +{"current_steps": 8309, "total_steps": 8674, "loss": 0.47028589248657227, "lr": 9.720143410508309e-09, "epoch": 1.9158404427023288, "percentage": 95.79, "elapsed_time": "5:46:27", "remaining_time": "0:15:13"} +{"current_steps": 8310, "total_steps": 8674, "loss": 0.3850802183151245, "lr": 9.667186063824773e-09, "epoch": 1.9160710168319115, "percentage": 95.8, "elapsed_time": "5:46:30", "remaining_time": "0:15:10"} +{"current_steps": 8311, "total_steps": 8674, "loss": 0.4134417772293091, "lr": 9.614372672052451e-09, "epoch": 1.9163015909614942, "percentage": 95.82, "elapsed_time": "5:46:32", "remaining_time": "0:15:08"} +{"current_steps": 8312, "total_steps": 8674, "loss": 0.5340328216552734, "lr": 9.561703242868425e-09, "epoch": 1.916532165091077, "percentage": 95.83, "elapsed_time": "5:46:35", "remaining_time": "0:15:05"} +{"current_steps": 8313, "total_steps": 8674, "loss": 0.4580942392349243, "lr": 9.509177783928569e-09, "epoch": 1.9167627392206594, "percentage": 95.84, "elapsed_time": "5:46:37", "remaining_time": "0:15:03"} +{"current_steps": 8314, "total_steps": 8674, "loss": 0.4227365553379059, "lr": 9.45679630286811e-09, "epoch": 1.9169933133502421, "percentage": 95.85, "elapsed_time": "5:46:39", "remaining_time": "0:15:00"} +{"current_steps": 8315, "total_steps": 8674, "loss": 0.42711400985717773, "lr": 9.404558807301065e-09, "epoch": 1.9172238874798246, "percentage": 95.86, "elapsed_time": "5:46:42", "remaining_time": "0:14:58"} +{"current_steps": 8316, "total_steps": 8674, "loss": 0.41088467836380005, "lr": 9.352465304820811e-09, "epoch": 1.9174544616094074, "percentage": 95.87, "elapsed_time": "5:46:45", "remaining_time": "0:14:55"} +{"current_steps": 8317, "total_steps": 8674, "loss": 0.4669058918952942, "lr": 9.30051580299962e-09, "epoch": 1.91768503573899, "percentage": 95.88, "elapsed_time": "5:46:47", "remaining_time": "0:14:53"} +{"current_steps": 8318, "total_steps": 8674, "loss": 0.34129124879837036, "lr": 9.248710309388896e-09, "epoch": 1.9179156098685728, "percentage": 95.9, "elapsed_time": "5:46:49", "remaining_time": "0:14:50"} +{"current_steps": 8319, "total_steps": 8674, "loss": 0.5538367033004761, "lr": 9.19704883151906e-09, "epoch": 1.9181461839981555, "percentage": 95.91, "elapsed_time": "5:46:52", "remaining_time": "0:14:48"} +{"current_steps": 8320, "total_steps": 8674, "loss": 0.4591939151287079, "lr": 9.145531376899773e-09, "epoch": 1.9183767581277382, "percentage": 95.92, "elapsed_time": "5:46:54", "remaining_time": "0:14:45"} +{"current_steps": 8321, "total_steps": 8674, "loss": 0.38709723949432373, "lr": 9.094157953019376e-09, "epoch": 1.9186073322573207, "percentage": 95.93, "elapsed_time": "5:46:57", "remaining_time": "0:14:43"} +{"current_steps": 8322, "total_steps": 8674, "loss": 0.503919780254364, "lr": 9.042928567345787e-09, "epoch": 1.9188379063869034, "percentage": 95.94, "elapsed_time": "5:46:59", "remaining_time": "0:14:40"} +{"current_steps": 8323, "total_steps": 8674, "loss": 0.510110080242157, "lr": 8.991843227325491e-09, "epoch": 1.919068480516486, "percentage": 95.95, "elapsed_time": "5:47:02", "remaining_time": "0:14:38"} +{"current_steps": 8324, "total_steps": 8674, "loss": 0.5100687146186829, "lr": 8.940901940384437e-09, "epoch": 1.9192990546460686, "percentage": 95.96, "elapsed_time": "5:47:04", "remaining_time": "0:14:35"} +{"current_steps": 8325, "total_steps": 8674, "loss": 0.44701308012008667, "lr": 8.89010471392726e-09, "epoch": 1.9195296287756514, "percentage": 95.98, "elapsed_time": "5:47:07", "remaining_time": "0:14:33"} +{"current_steps": 8326, "total_steps": 8674, "loss": 0.4657078981399536, "lr": 8.83945155533794e-09, "epoch": 1.919760202905234, "percentage": 95.99, "elapsed_time": "5:47:09", "remaining_time": "0:14:30"} +{"current_steps": 8327, "total_steps": 8674, "loss": 0.510329008102417, "lr": 8.788942471979588e-09, "epoch": 1.9199907770348168, "percentage": 96.0, "elapsed_time": "5:47:11", "remaining_time": "0:14:28"} +{"current_steps": 8328, "total_steps": 8674, "loss": 0.5373008847236633, "lr": 8.738577471193997e-09, "epoch": 1.9202213511643995, "percentage": 96.01, "elapsed_time": "5:47:14", "remaining_time": "0:14:25"} +{"current_steps": 8329, "total_steps": 8674, "loss": 0.46517014503479004, "lr": 8.688356560302313e-09, "epoch": 1.920451925293982, "percentage": 96.02, "elapsed_time": "5:47:16", "remaining_time": "0:14:23"} +{"current_steps": 8330, "total_steps": 8674, "loss": 0.3993692398071289, "lr": 8.638279746604582e-09, "epoch": 1.9206824994235647, "percentage": 96.03, "elapsed_time": "5:47:19", "remaining_time": "0:14:20"} +{"current_steps": 8331, "total_steps": 8674, "loss": 0.42480504512786865, "lr": 8.588347037380095e-09, "epoch": 1.9209130735531472, "percentage": 96.05, "elapsed_time": "5:47:21", "remaining_time": "0:14:18"} +{"current_steps": 8332, "total_steps": 8674, "loss": 0.44433218240737915, "lr": 8.538558439887044e-09, "epoch": 1.92114364768273, "percentage": 96.06, "elapsed_time": "5:47:24", "remaining_time": "0:14:15"} +{"current_steps": 8333, "total_steps": 8674, "loss": 0.4645090103149414, "lr": 8.488913961362643e-09, "epoch": 1.9213742218123127, "percentage": 96.07, "elapsed_time": "5:47:26", "remaining_time": "0:14:13"} +{"current_steps": 8334, "total_steps": 8674, "loss": 0.47265806794166565, "lr": 8.439413609023227e-09, "epoch": 1.9216047959418954, "percentage": 96.08, "elapsed_time": "5:47:29", "remaining_time": "0:14:10"} +{"current_steps": 8335, "total_steps": 8674, "loss": 0.46389561891555786, "lr": 8.390057390064265e-09, "epoch": 1.921835370071478, "percentage": 96.09, "elapsed_time": "5:47:31", "remaining_time": "0:14:08"} +{"current_steps": 8336, "total_steps": 8674, "loss": 0.45355337858200073, "lr": 8.340845311660127e-09, "epoch": 1.9220659442010608, "percentage": 96.1, "elapsed_time": "5:47:34", "remaining_time": "0:14:05"} +{"current_steps": 8337, "total_steps": 8674, "loss": 0.47136229276657104, "lr": 8.291777380964315e-09, "epoch": 1.9222965183306433, "percentage": 96.11, "elapsed_time": "5:47:36", "remaining_time": "0:14:03"} +{"current_steps": 8338, "total_steps": 8674, "loss": 0.4914461374282837, "lr": 8.242853605109234e-09, "epoch": 1.922527092460226, "percentage": 96.13, "elapsed_time": "5:47:38", "remaining_time": "0:14:00"} +{"current_steps": 8339, "total_steps": 8674, "loss": 0.48298412561416626, "lr": 8.194073991206641e-09, "epoch": 1.9227576665898085, "percentage": 96.14, "elapsed_time": "5:47:41", "remaining_time": "0:13:58"} +{"current_steps": 8340, "total_steps": 8674, "loss": 0.5316052436828613, "lr": 8.145438546346971e-09, "epoch": 1.9229882407193912, "percentage": 96.15, "elapsed_time": "5:47:43", "remaining_time": "0:13:55"} +{"current_steps": 8341, "total_steps": 8674, "loss": 0.45742303133010864, "lr": 8.09694727760002e-09, "epoch": 1.923218814848974, "percentage": 96.16, "elapsed_time": "5:47:46", "remaining_time": "0:13:53"} +{"current_steps": 8342, "total_steps": 8674, "loss": 0.41579365730285645, "lr": 8.048600192014365e-09, "epoch": 1.9234493889785567, "percentage": 96.17, "elapsed_time": "5:47:48", "remaining_time": "0:13:50"} +{"current_steps": 8343, "total_steps": 8674, "loss": 0.37775835394859314, "lr": 8.000397296617834e-09, "epoch": 1.9236799631081394, "percentage": 96.18, "elapsed_time": "5:47:51", "remaining_time": "0:13:48"} +{"current_steps": 8344, "total_steps": 8674, "loss": 0.4720783531665802, "lr": 7.95233859841704e-09, "epoch": 1.9239105372377219, "percentage": 96.2, "elapsed_time": "5:47:53", "remaining_time": "0:13:45"} +{"current_steps": 8345, "total_steps": 8674, "loss": 0.5015095472335815, "lr": 7.904424104398067e-09, "epoch": 1.9241411113673046, "percentage": 96.21, "elapsed_time": "5:47:56", "remaining_time": "0:13:43"} +{"current_steps": 8346, "total_steps": 8674, "loss": 0.6053783893585205, "lr": 7.856653821525672e-09, "epoch": 1.924371685496887, "percentage": 96.22, "elapsed_time": "5:47:58", "remaining_time": "0:13:40"} +{"current_steps": 8347, "total_steps": 8674, "loss": 0.47775521874427795, "lr": 7.809027756743635e-09, "epoch": 1.9246022596264698, "percentage": 96.23, "elapsed_time": "5:48:00", "remaining_time": "0:13:38"} +{"current_steps": 8348, "total_steps": 8674, "loss": 0.36487245559692383, "lr": 7.761545916974976e-09, "epoch": 1.9248328337560525, "percentage": 96.24, "elapsed_time": "5:48:03", "remaining_time": "0:13:35"} +{"current_steps": 8349, "total_steps": 8674, "loss": 0.48085975646972656, "lr": 7.714208309121617e-09, "epoch": 1.9250634078856352, "percentage": 96.25, "elapsed_time": "5:48:05", "remaining_time": "0:13:33"} +{"current_steps": 8350, "total_steps": 8674, "loss": 0.48800790309906006, "lr": 7.667014940064609e-09, "epoch": 1.925293982015218, "percentage": 96.26, "elapsed_time": "5:48:08", "remaining_time": "0:13:30"} +{"current_steps": 8351, "total_steps": 8674, "loss": 0.5294181704521179, "lr": 7.61996581666402e-09, "epoch": 1.9255245561448007, "percentage": 96.28, "elapsed_time": "5:48:10", "remaining_time": "0:13:28"} +{"current_steps": 8352, "total_steps": 8674, "loss": 0.44024503231048584, "lr": 7.573060945758936e-09, "epoch": 1.9257551302743832, "percentage": 96.29, "elapsed_time": "5:48:13", "remaining_time": "0:13:25"} +{"current_steps": 8353, "total_steps": 8674, "loss": 0.4359186887741089, "lr": 7.526300334167235e-09, "epoch": 1.9259857044039659, "percentage": 96.3, "elapsed_time": "5:48:15", "remaining_time": "0:13:23"} +{"current_steps": 8354, "total_steps": 8674, "loss": 0.4803895652294159, "lr": 7.479683988686259e-09, "epoch": 1.9262162785335484, "percentage": 96.31, "elapsed_time": "5:48:18", "remaining_time": "0:13:20"} +{"current_steps": 8355, "total_steps": 8674, "loss": 0.43153274059295654, "lr": 7.433211916092141e-09, "epoch": 1.926446852663131, "percentage": 96.32, "elapsed_time": "5:48:20", "remaining_time": "0:13:17"} +{"current_steps": 8356, "total_steps": 8674, "loss": 0.38263070583343506, "lr": 7.386884123140036e-09, "epoch": 1.9266774267927138, "percentage": 96.33, "elapsed_time": "5:48:22", "remaining_time": "0:13:15"} +{"current_steps": 8357, "total_steps": 8674, "loss": 0.42121192812919617, "lr": 7.340700616564e-09, "epoch": 1.9269080009222965, "percentage": 96.35, "elapsed_time": "5:48:25", "remaining_time": "0:13:12"} +{"current_steps": 8358, "total_steps": 8674, "loss": 0.46008965373039246, "lr": 7.294661403077662e-09, "epoch": 1.9271385750518792, "percentage": 96.36, "elapsed_time": "5:48:27", "remaining_time": "0:13:10"} +{"current_steps": 8359, "total_steps": 8674, "loss": 0.48495203256607056, "lr": 7.248766489372893e-09, "epoch": 1.927369149181462, "percentage": 96.37, "elapsed_time": "5:48:30", "remaining_time": "0:13:07"} +{"current_steps": 8360, "total_steps": 8674, "loss": 0.5004169940948486, "lr": 7.203015882121244e-09, "epoch": 1.9275997233110445, "percentage": 96.38, "elapsed_time": "5:48:32", "remaining_time": "0:13:05"} +{"current_steps": 8361, "total_steps": 8674, "loss": 0.5660319328308105, "lr": 7.15740958797284e-09, "epoch": 1.9278302974406272, "percentage": 96.39, "elapsed_time": "5:48:35", "remaining_time": "0:13:02"} +{"current_steps": 8362, "total_steps": 8674, "loss": 0.43854010105133057, "lr": 7.111947613557268e-09, "epoch": 1.9280608715702097, "percentage": 96.4, "elapsed_time": "5:48:37", "remaining_time": "0:13:00"} +{"current_steps": 8363, "total_steps": 8674, "loss": 0.44730937480926514, "lr": 7.066629965482574e-09, "epoch": 1.9282914456997924, "percentage": 96.41, "elapsed_time": "5:48:40", "remaining_time": "0:12:57"} +{"current_steps": 8364, "total_steps": 8674, "loss": 0.45642590522766113, "lr": 7.021456650336377e-09, "epoch": 1.928522019829375, "percentage": 96.43, "elapsed_time": "5:48:42", "remaining_time": "0:12:55"} +{"current_steps": 8365, "total_steps": 8674, "loss": 0.5613523721694946, "lr": 6.976427674684871e-09, "epoch": 1.9287525939589578, "percentage": 96.44, "elapsed_time": "5:48:44", "remaining_time": "0:12:52"} +{"current_steps": 8366, "total_steps": 8674, "loss": 0.4231454133987427, "lr": 6.931543045073706e-09, "epoch": 1.9289831680885405, "percentage": 96.45, "elapsed_time": "5:48:47", "remaining_time": "0:12:50"} +{"current_steps": 8367, "total_steps": 8674, "loss": 0.464144766330719, "lr": 6.886802768027223e-09, "epoch": 1.9292137422181233, "percentage": 96.46, "elapsed_time": "5:48:49", "remaining_time": "0:12:47"} +{"current_steps": 8368, "total_steps": 8674, "loss": 0.4303344488143921, "lr": 6.8422068500487705e-09, "epoch": 1.9294443163477057, "percentage": 96.47, "elapsed_time": "5:48:52", "remaining_time": "0:12:45"} +{"current_steps": 8369, "total_steps": 8674, "loss": 0.4333549737930298, "lr": 6.797755297620944e-09, "epoch": 1.9296748904772885, "percentage": 96.48, "elapsed_time": "5:48:54", "remaining_time": "0:12:42"} +{"current_steps": 8370, "total_steps": 8674, "loss": 0.4656146466732025, "lr": 6.753448117205241e-09, "epoch": 1.929905464606871, "percentage": 96.5, "elapsed_time": "5:48:57", "remaining_time": "0:12:40"} +{"current_steps": 8371, "total_steps": 8674, "loss": 0.3823866844177246, "lr": 6.709285315242063e-09, "epoch": 1.9301360387364537, "percentage": 96.51, "elapsed_time": "5:48:59", "remaining_time": "0:12:37"} +{"current_steps": 8372, "total_steps": 8674, "loss": 0.4552363157272339, "lr": 6.665266898150946e-09, "epoch": 1.9303666128660364, "percentage": 96.52, "elapsed_time": "5:49:02", "remaining_time": "0:12:35"} +{"current_steps": 8373, "total_steps": 8674, "loss": 0.48757460713386536, "lr": 6.6213928723304335e-09, "epoch": 1.930597186995619, "percentage": 96.53, "elapsed_time": "5:49:04", "remaining_time": "0:12:32"} +{"current_steps": 8374, "total_steps": 8674, "loss": 0.3263235092163086, "lr": 6.577663244158094e-09, "epoch": 1.9308277611252018, "percentage": 96.54, "elapsed_time": "5:49:07", "remaining_time": "0:12:30"} +{"current_steps": 8375, "total_steps": 8674, "loss": 0.510450541973114, "lr": 6.534078019990397e-09, "epoch": 1.9310583352547845, "percentage": 96.55, "elapsed_time": "5:49:09", "remaining_time": "0:12:27"} +{"current_steps": 8376, "total_steps": 8674, "loss": 0.37407904863357544, "lr": 6.490637206162941e-09, "epoch": 1.931288909384367, "percentage": 96.56, "elapsed_time": "5:49:12", "remaining_time": "0:12:25"} +{"current_steps": 8377, "total_steps": 8674, "loss": 0.4216376543045044, "lr": 6.4473408089902315e-09, "epoch": 1.9315194835139498, "percentage": 96.58, "elapsed_time": "5:49:14", "remaining_time": "0:12:22"} +{"current_steps": 8378, "total_steps": 8674, "loss": 0.41611379384994507, "lr": 6.404188834766011e-09, "epoch": 1.9317500576435322, "percentage": 96.59, "elapsed_time": "5:49:16", "remaining_time": "0:12:20"} +{"current_steps": 8379, "total_steps": 8674, "loss": 0.5301774740219116, "lr": 6.361181289762596e-09, "epoch": 1.931980631773115, "percentage": 96.6, "elapsed_time": "5:49:19", "remaining_time": "0:12:17"} +{"current_steps": 8380, "total_steps": 8674, "loss": 0.43767407536506653, "lr": 6.3183181802317635e-09, "epoch": 1.9322112059026977, "percentage": 96.61, "elapsed_time": "5:49:21", "remaining_time": "0:12:15"} +{"current_steps": 8381, "total_steps": 8674, "loss": 0.417082279920578, "lr": 6.275599512404084e-09, "epoch": 1.9324417800322804, "percentage": 96.62, "elapsed_time": "5:49:24", "remaining_time": "0:12:12"} +{"current_steps": 8382, "total_steps": 8674, "loss": 0.41670864820480347, "lr": 6.233025292489147e-09, "epoch": 1.9326723541618631, "percentage": 96.63, "elapsed_time": "5:49:26", "remaining_time": "0:12:10"} +{"current_steps": 8383, "total_steps": 8674, "loss": 0.48778587579727173, "lr": 6.190595526675446e-09, "epoch": 1.9329029282914458, "percentage": 96.65, "elapsed_time": "5:49:29", "remaining_time": "0:12:07"} +{"current_steps": 8384, "total_steps": 8674, "loss": 0.44433802366256714, "lr": 6.148310221130604e-09, "epoch": 1.9331335024210283, "percentage": 96.66, "elapsed_time": "5:49:31", "remaining_time": "0:12:05"} +{"current_steps": 8385, "total_steps": 8674, "loss": 0.46826764941215515, "lr": 6.106169382001369e-09, "epoch": 1.933364076550611, "percentage": 96.67, "elapsed_time": "5:49:33", "remaining_time": "0:12:02"} +{"current_steps": 8386, "total_steps": 8674, "loss": 0.5509334802627563, "lr": 6.064173015413177e-09, "epoch": 1.9335946506801935, "percentage": 96.68, "elapsed_time": "5:49:36", "remaining_time": "0:12:00"} +{"current_steps": 8387, "total_steps": 8674, "loss": 0.4436245560646057, "lr": 6.022321127470698e-09, "epoch": 1.9338252248097763, "percentage": 96.69, "elapsed_time": "5:49:38", "remaining_time": "0:11:57"} +{"current_steps": 8388, "total_steps": 8674, "loss": 0.3577145040035248, "lr": 5.9806137242574e-09, "epoch": 1.934055798939359, "percentage": 96.7, "elapsed_time": "5:49:41", "remaining_time": "0:11:55"} +{"current_steps": 8389, "total_steps": 8674, "loss": 0.39893999695777893, "lr": 5.939050811835988e-09, "epoch": 1.9342863730689417, "percentage": 96.71, "elapsed_time": "5:49:43", "remaining_time": "0:11:52"} +{"current_steps": 8390, "total_steps": 8674, "loss": 0.4109868109226227, "lr": 5.897632396248075e-09, "epoch": 1.9345169471985244, "percentage": 96.73, "elapsed_time": "5:49:45", "remaining_time": "0:11:50"} +{"current_steps": 8391, "total_steps": 8674, "loss": 0.4193134307861328, "lr": 5.85635848351429e-09, "epoch": 1.9347475213281071, "percentage": 96.74, "elapsed_time": "5:49:48", "remaining_time": "0:11:47"} +{"current_steps": 8392, "total_steps": 8674, "loss": 0.44189178943634033, "lr": 5.8152290796340545e-09, "epoch": 1.9349780954576896, "percentage": 96.75, "elapsed_time": "5:49:51", "remaining_time": "0:11:45"} +{"current_steps": 8393, "total_steps": 8674, "loss": 0.5014302730560303, "lr": 5.774244190586141e-09, "epoch": 1.9352086695872723, "percentage": 96.76, "elapsed_time": "5:49:53", "remaining_time": "0:11:42"} +{"current_steps": 8394, "total_steps": 8674, "loss": 0.4962024688720703, "lr": 5.733403822328009e-09, "epoch": 1.9354392437168548, "percentage": 96.77, "elapsed_time": "5:49:55", "remaining_time": "0:11:40"} +{"current_steps": 8395, "total_steps": 8674, "loss": 0.45495474338531494, "lr": 5.69270798079613e-09, "epoch": 1.9356698178464375, "percentage": 96.78, "elapsed_time": "5:49:58", "remaining_time": "0:11:37"} +{"current_steps": 8396, "total_steps": 8674, "loss": 0.49062758684158325, "lr": 5.652156671906105e-09, "epoch": 1.9359003919760203, "percentage": 96.8, "elapsed_time": "5:50:00", "remaining_time": "0:11:35"} +{"current_steps": 8397, "total_steps": 8674, "loss": 0.45899879932403564, "lr": 5.611749901552554e-09, "epoch": 1.936130966105603, "percentage": 96.81, "elapsed_time": "5:50:02", "remaining_time": "0:11:32"} +{"current_steps": 8398, "total_steps": 8674, "loss": 0.47287002205848694, "lr": 5.57148767560911e-09, "epoch": 1.9363615402351857, "percentage": 96.82, "elapsed_time": "5:50:05", "remaining_time": "0:11:30"} +{"current_steps": 8399, "total_steps": 8674, "loss": 0.439136266708374, "lr": 5.531369999927982e-09, "epoch": 1.9365921143647684, "percentage": 96.83, "elapsed_time": "5:50:08", "remaining_time": "0:11:27"} +{"current_steps": 8400, "total_steps": 8674, "loss": 0.3920954465866089, "lr": 5.4913968803410594e-09, "epoch": 1.936822688494351, "percentage": 96.84, "elapsed_time": "5:50:10", "remaining_time": "0:11:25"} +{"current_steps": 8401, "total_steps": 8674, "loss": 0.4608895480632782, "lr": 5.451568322658473e-09, "epoch": 1.9370532626239336, "percentage": 96.85, "elapsed_time": "5:50:14", "remaining_time": "0:11:22"} +{"current_steps": 8402, "total_steps": 8674, "loss": 0.4617875814437866, "lr": 5.4118843326699246e-09, "epoch": 1.9372838367535161, "percentage": 96.86, "elapsed_time": "5:50:16", "remaining_time": "0:11:20"} +{"current_steps": 8403, "total_steps": 8674, "loss": 0.5293254852294922, "lr": 5.372344916143912e-09, "epoch": 1.9375144108830988, "percentage": 96.88, "elapsed_time": "5:50:19", "remaining_time": "0:11:17"} +{"current_steps": 8404, "total_steps": 8674, "loss": 0.3935343623161316, "lr": 5.332950078827725e-09, "epoch": 1.9377449850126816, "percentage": 96.89, "elapsed_time": "5:50:21", "remaining_time": "0:11:15"} +{"current_steps": 8405, "total_steps": 8674, "loss": 0.4612414240837097, "lr": 5.293699826447895e-09, "epoch": 1.9379755591422643, "percentage": 96.9, "elapsed_time": "5:50:24", "remaining_time": "0:11:12"} +{"current_steps": 8406, "total_steps": 8674, "loss": 0.4779428243637085, "lr": 5.254594164709858e-09, "epoch": 1.938206133271847, "percentage": 96.91, "elapsed_time": "5:50:26", "remaining_time": "0:11:10"} +{"current_steps": 8407, "total_steps": 8674, "loss": 0.37436819076538086, "lr": 5.215633099298067e-09, "epoch": 1.9384367074014297, "percentage": 96.92, "elapsed_time": "5:50:28", "remaining_time": "0:11:07"} +{"current_steps": 8408, "total_steps": 8674, "loss": 0.458698570728302, "lr": 5.1768166358757695e-09, "epoch": 1.9386672815310122, "percentage": 96.93, "elapsed_time": "5:50:31", "remaining_time": "0:11:05"} +{"current_steps": 8409, "total_steps": 8674, "loss": 0.39365172386169434, "lr": 5.1381447800854515e-09, "epoch": 1.938897855660595, "percentage": 96.94, "elapsed_time": "5:50:33", "remaining_time": "0:11:02"} +{"current_steps": 8410, "total_steps": 8674, "loss": 0.46358722448349, "lr": 5.099617537548284e-09, "epoch": 1.9391284297901774, "percentage": 96.96, "elapsed_time": "5:50:36", "remaining_time": "0:11:00"} +{"current_steps": 8411, "total_steps": 8674, "loss": 0.4286697506904602, "lr": 5.061234913864898e-09, "epoch": 1.9393590039197601, "percentage": 96.97, "elapsed_time": "5:50:38", "remaining_time": "0:10:57"} +{"current_steps": 8412, "total_steps": 8674, "loss": 0.4925898015499115, "lr": 5.022996914614275e-09, "epoch": 1.9395895780493428, "percentage": 96.98, "elapsed_time": "5:50:41", "remaining_time": "0:10:55"} +{"current_steps": 8413, "total_steps": 8674, "loss": 0.46924275159835815, "lr": 4.984903545354857e-09, "epoch": 1.9398201521789256, "percentage": 96.99, "elapsed_time": "5:50:43", "remaining_time": "0:10:52"} +{"current_steps": 8414, "total_steps": 8674, "loss": 0.5326268672943115, "lr": 4.946954811623994e-09, "epoch": 1.9400507263085083, "percentage": 97.0, "elapsed_time": "5:50:46", "remaining_time": "0:10:50"} +{"current_steps": 8415, "total_steps": 8674, "loss": 0.4367690682411194, "lr": 4.909150718937716e-09, "epoch": 1.940281300438091, "percentage": 97.01, "elapsed_time": "5:50:48", "remaining_time": "0:10:47"} +{"current_steps": 8416, "total_steps": 8674, "loss": 0.45579224824905396, "lr": 4.8714912727914055e-09, "epoch": 1.9405118745676735, "percentage": 97.03, "elapsed_time": "5:50:51", "remaining_time": "0:10:45"} +{"current_steps": 8417, "total_steps": 8674, "loss": 0.4420431852340698, "lr": 4.8339764786590186e-09, "epoch": 1.9407424486972562, "percentage": 97.04, "elapsed_time": "5:50:53", "remaining_time": "0:10:42"} +{"current_steps": 8418, "total_steps": 8674, "loss": 0.4175274670124054, "lr": 4.79660634199397e-09, "epoch": 1.9409730228268387, "percentage": 97.05, "elapsed_time": "5:50:56", "remaining_time": "0:10:40"} +{"current_steps": 8419, "total_steps": 8674, "loss": 0.41451364755630493, "lr": 4.759380868228246e-09, "epoch": 1.9412035969564214, "percentage": 97.06, "elapsed_time": "5:50:58", "remaining_time": "0:10:37"} +{"current_steps": 8420, "total_steps": 8674, "loss": 0.4211805462837219, "lr": 4.722300062772966e-09, "epoch": 1.9414341710860041, "percentage": 97.07, "elapsed_time": "5:51:00", "remaining_time": "0:10:35"} +{"current_steps": 8421, "total_steps": 8674, "loss": 0.4458296000957489, "lr": 4.68536393101826e-09, "epoch": 1.9416647452155869, "percentage": 97.08, "elapsed_time": "5:51:03", "remaining_time": "0:10:32"} +{"current_steps": 8422, "total_steps": 8674, "loss": 0.6226488351821899, "lr": 4.648572478333057e-09, "epoch": 1.9418953193451696, "percentage": 97.09, "elapsed_time": "5:51:05", "remaining_time": "0:10:30"} +{"current_steps": 8423, "total_steps": 8674, "loss": 0.343037486076355, "lr": 4.611925710065523e-09, "epoch": 1.9421258934747523, "percentage": 97.11, "elapsed_time": "5:51:08", "remaining_time": "0:10:27"} +{"current_steps": 8424, "total_steps": 8674, "loss": 0.42478299140930176, "lr": 4.575423631542397e-09, "epoch": 1.9423564676043348, "percentage": 97.12, "elapsed_time": "5:51:10", "remaining_time": "0:10:25"} +{"current_steps": 8425, "total_steps": 8674, "loss": 0.4467424750328064, "lr": 4.539066248069878e-09, "epoch": 1.9425870417339175, "percentage": 97.13, "elapsed_time": "5:51:13", "remaining_time": "0:10:22"} +{"current_steps": 8426, "total_steps": 8674, "loss": 0.4598960876464844, "lr": 4.50285356493274e-09, "epoch": 1.9428176158635, "percentage": 97.14, "elapsed_time": "5:51:15", "remaining_time": "0:10:20"} +{"current_steps": 8427, "total_steps": 8674, "loss": 0.43005913496017456, "lr": 4.466785587394883e-09, "epoch": 1.9430481899930827, "percentage": 97.15, "elapsed_time": "5:51:18", "remaining_time": "0:10:17"} +{"current_steps": 8428, "total_steps": 8674, "loss": 0.4259253740310669, "lr": 4.430862320699114e-09, "epoch": 1.9432787641226654, "percentage": 97.16, "elapsed_time": "5:51:20", "remaining_time": "0:10:15"} +{"current_steps": 8429, "total_steps": 8674, "loss": 0.4275285601615906, "lr": 4.395083770067476e-09, "epoch": 1.9435093382522481, "percentage": 97.18, "elapsed_time": "5:51:22", "remaining_time": "0:10:12"} +{"current_steps": 8430, "total_steps": 8674, "loss": 0.42151302099227905, "lr": 4.3594499407003656e-09, "epoch": 1.9437399123818309, "percentage": 97.19, "elapsed_time": "5:51:25", "remaining_time": "0:10:10"} +{"current_steps": 8431, "total_steps": 8674, "loss": 0.41727957129478455, "lr": 4.3239608377778625e-09, "epoch": 1.9439704865114136, "percentage": 97.2, "elapsed_time": "5:51:27", "remaining_time": "0:10:07"} +{"current_steps": 8432, "total_steps": 8674, "loss": 0.5026905536651611, "lr": 4.288616466458395e-09, "epoch": 1.944201060640996, "percentage": 97.21, "elapsed_time": "5:51:30", "remaining_time": "0:10:05"} +{"current_steps": 8433, "total_steps": 8674, "loss": 0.5170408487319946, "lr": 4.2534168318798524e-09, "epoch": 1.9444316347705788, "percentage": 97.22, "elapsed_time": "5:51:33", "remaining_time": "0:10:02"} +{"current_steps": 8434, "total_steps": 8674, "loss": 0.3918447196483612, "lr": 4.21836193915881e-09, "epoch": 1.9446622089001613, "percentage": 97.23, "elapsed_time": "5:51:36", "remaining_time": "0:10:00"} +{"current_steps": 8435, "total_steps": 8674, "loss": 0.49871906638145447, "lr": 4.183451793390747e-09, "epoch": 1.944892783029744, "percentage": 97.24, "elapsed_time": "5:51:38", "remaining_time": "0:09:57"} +{"current_steps": 8436, "total_steps": 8674, "loss": 0.43729400634765625, "lr": 4.1486863996502694e-09, "epoch": 1.9451233571593267, "percentage": 97.26, "elapsed_time": "5:51:41", "remaining_time": "0:09:55"} +{"current_steps": 8437, "total_steps": 8674, "loss": 0.49198442697525024, "lr": 4.114065762990781e-09, "epoch": 1.9453539312889094, "percentage": 97.27, "elapsed_time": "5:51:43", "remaining_time": "0:09:52"} +{"current_steps": 8438, "total_steps": 8674, "loss": 0.48610788583755493, "lr": 4.079589888444923e-09, "epoch": 1.9455845054184921, "percentage": 97.28, "elapsed_time": "5:51:45", "remaining_time": "0:09:50"} +{"current_steps": 8439, "total_steps": 8674, "loss": 0.43962734937667847, "lr": 4.045258781024019e-09, "epoch": 1.9458150795480749, "percentage": 97.29, "elapsed_time": "5:51:48", "remaining_time": "0:09:47"} +{"current_steps": 8440, "total_steps": 8674, "loss": 0.3320704400539398, "lr": 4.011072445718522e-09, "epoch": 1.9460456536776574, "percentage": 97.3, "elapsed_time": "5:51:50", "remaining_time": "0:09:45"} +{"current_steps": 8441, "total_steps": 8674, "loss": 0.4773918092250824, "lr": 3.977030887497568e-09, "epoch": 1.94627622780724, "percentage": 97.31, "elapsed_time": "5:51:53", "remaining_time": "0:09:42"} +{"current_steps": 8442, "total_steps": 8674, "loss": 0.424363911151886, "lr": 3.9431341113096425e-09, "epoch": 1.9465068019368226, "percentage": 97.33, "elapsed_time": "5:51:55", "remaining_time": "0:09:40"} +{"current_steps": 8443, "total_steps": 8674, "loss": 0.5321601033210754, "lr": 3.9093821220818055e-09, "epoch": 1.9467373760664053, "percentage": 97.34, "elapsed_time": "5:51:57", "remaining_time": "0:09:37"} +{"current_steps": 8444, "total_steps": 8674, "loss": 0.48579344153404236, "lr": 3.875774924720465e-09, "epoch": 1.946967950195988, "percentage": 97.35, "elapsed_time": "5:52:00", "remaining_time": "0:09:35"} +{"current_steps": 8445, "total_steps": 8674, "loss": 0.39313316345214844, "lr": 3.842312524110603e-09, "epoch": 1.9471985243255707, "percentage": 97.36, "elapsed_time": "5:52:02", "remaining_time": "0:09:32"} +{"current_steps": 8446, "total_steps": 8674, "loss": 0.522427499294281, "lr": 3.8089949251163264e-09, "epoch": 1.9474290984551534, "percentage": 97.37, "elapsed_time": "5:52:05", "remaining_time": "0:09:30"} +{"current_steps": 8447, "total_steps": 8674, "loss": 0.3822653889656067, "lr": 3.775822132580875e-09, "epoch": 1.9476596725847362, "percentage": 97.38, "elapsed_time": "5:52:07", "remaining_time": "0:09:27"} +{"current_steps": 8448, "total_steps": 8674, "loss": 0.4322483241558075, "lr": 3.7427941513259454e-09, "epoch": 1.9478902467143187, "percentage": 97.39, "elapsed_time": "5:52:09", "remaining_time": "0:09:25"} +{"current_steps": 8449, "total_steps": 8674, "loss": 0.4862939715385437, "lr": 3.7099109861528087e-09, "epoch": 1.9481208208439014, "percentage": 97.41, "elapsed_time": "5:52:12", "remaining_time": "0:09:22"} +{"current_steps": 8450, "total_steps": 8674, "loss": 0.45388323068618774, "lr": 3.6771726418410863e-09, "epoch": 1.9483513949734839, "percentage": 97.42, "elapsed_time": "5:52:14", "remaining_time": "0:09:20"} +{"current_steps": 8451, "total_steps": 8674, "loss": 0.3937215805053711, "lr": 3.644579123149749e-09, "epoch": 1.9485819691030666, "percentage": 97.43, "elapsed_time": "5:52:17", "remaining_time": "0:09:17"} +{"current_steps": 8452, "total_steps": 8674, "loss": 0.46887993812561035, "lr": 3.6121304348165628e-09, "epoch": 1.9488125432326493, "percentage": 97.44, "elapsed_time": "5:52:19", "remaining_time": "0:09:15"} +{"current_steps": 8453, "total_steps": 8674, "loss": 0.4444226026535034, "lr": 3.5798265815584204e-09, "epoch": 1.949043117362232, "percentage": 97.45, "elapsed_time": "5:52:21", "remaining_time": "0:09:12"} +{"current_steps": 8454, "total_steps": 8674, "loss": 0.4938625991344452, "lr": 3.5476675680709e-09, "epoch": 1.9492736914918147, "percentage": 97.46, "elapsed_time": "5:52:24", "remaining_time": "0:09:10"} +{"current_steps": 8455, "total_steps": 8674, "loss": 0.37632471323013306, "lr": 3.5156533990285953e-09, "epoch": 1.9495042656213972, "percentage": 97.48, "elapsed_time": "5:52:26", "remaining_time": "0:09:07"} +{"current_steps": 8456, "total_steps": 8674, "loss": 0.4345025420188904, "lr": 3.483784079085117e-09, "epoch": 1.94973483975098, "percentage": 97.49, "elapsed_time": "5:52:29", "remaining_time": "0:09:05"} +{"current_steps": 8457, "total_steps": 8674, "loss": 0.3721727132797241, "lr": 3.4520596128729818e-09, "epoch": 1.9499654138805624, "percentage": 97.5, "elapsed_time": "5:52:31", "remaining_time": "0:09:02"} +{"current_steps": 8458, "total_steps": 8674, "loss": 0.4871670603752136, "lr": 3.4204800050037232e-09, "epoch": 1.9501959880101452, "percentage": 97.51, "elapsed_time": "5:52:34", "remaining_time": "0:09:00"} +{"current_steps": 8459, "total_steps": 8674, "loss": 0.578133225440979, "lr": 3.38904526006778e-09, "epoch": 1.9504265621397279, "percentage": 97.52, "elapsed_time": "5:52:36", "remaining_time": "0:08:57"} +{"current_steps": 8460, "total_steps": 8674, "loss": 0.4721870422363281, "lr": 3.357755382634386e-09, "epoch": 1.9506571362693106, "percentage": 97.53, "elapsed_time": "5:52:39", "remaining_time": "0:08:55"} +{"current_steps": 8461, "total_steps": 8674, "loss": 0.4569184184074402, "lr": 3.3266103772519037e-09, "epoch": 1.9508877103988933, "percentage": 97.54, "elapsed_time": "5:52:41", "remaining_time": "0:08:52"} +{"current_steps": 8462, "total_steps": 8674, "loss": 0.48763811588287354, "lr": 3.2956102484477112e-09, "epoch": 1.951118284528476, "percentage": 97.56, "elapsed_time": "5:52:44", "remaining_time": "0:08:50"} +{"current_steps": 8463, "total_steps": 8674, "loss": 0.45957818627357483, "lr": 3.264755000727759e-09, "epoch": 1.9513488586580585, "percentage": 97.57, "elapsed_time": "5:52:46", "remaining_time": "0:08:47"} +{"current_steps": 8464, "total_steps": 8674, "loss": 0.49398598074913025, "lr": 3.234044638577238e-09, "epoch": 1.9515794327876412, "percentage": 97.58, "elapsed_time": "5:52:48", "remaining_time": "0:08:45"} +{"current_steps": 8465, "total_steps": 8674, "loss": 0.48884931206703186, "lr": 3.2034791664603544e-09, "epoch": 1.9518100069172237, "percentage": 97.59, "elapsed_time": "5:52:51", "remaining_time": "0:08:42"} +{"current_steps": 8466, "total_steps": 8674, "loss": 0.45171886682510376, "lr": 3.173058588819999e-09, "epoch": 1.9520405810468064, "percentage": 97.6, "elapsed_time": "5:52:53", "remaining_time": "0:08:40"} +{"current_steps": 8467, "total_steps": 8674, "loss": 0.45110028982162476, "lr": 3.142782910077968e-09, "epoch": 1.9522711551763892, "percentage": 97.61, "elapsed_time": "5:52:56", "remaining_time": "0:08:37"} +{"current_steps": 8468, "total_steps": 8674, "loss": 0.4602523446083069, "lr": 3.1126521346354074e-09, "epoch": 1.9525017293059719, "percentage": 97.63, "elapsed_time": "5:52:58", "remaining_time": "0:08:35"} +{"current_steps": 8469, "total_steps": 8674, "loss": 0.3908727169036865, "lr": 3.082666266872036e-09, "epoch": 1.9527323034355546, "percentage": 97.64, "elapsed_time": "5:53:01", "remaining_time": "0:08:32"} +{"current_steps": 8470, "total_steps": 8674, "loss": 0.4886831045150757, "lr": 3.0528253111464786e-09, "epoch": 1.9529628775651373, "percentage": 97.65, "elapsed_time": "5:53:03", "remaining_time": "0:08:30"} +{"current_steps": 8471, "total_steps": 8674, "loss": 0.4407721161842346, "lr": 3.023129271796598e-09, "epoch": 1.9531934516947198, "percentage": 97.66, "elapsed_time": "5:53:05", "remaining_time": "0:08:27"} +{"current_steps": 8472, "total_steps": 8674, "loss": 0.46958622336387634, "lr": 2.9935781531389425e-09, "epoch": 1.9534240258243025, "percentage": 97.67, "elapsed_time": "5:53:08", "remaining_time": "0:08:25"} +{"current_steps": 8473, "total_steps": 8674, "loss": 0.4642796516418457, "lr": 2.964171959469075e-09, "epoch": 1.953654599953885, "percentage": 97.68, "elapsed_time": "5:53:10", "remaining_time": "0:08:22"} +{"current_steps": 8474, "total_steps": 8674, "loss": 0.5124588012695312, "lr": 2.9349106950613545e-09, "epoch": 1.9538851740834677, "percentage": 97.69, "elapsed_time": "5:53:13", "remaining_time": "0:08:20"} +{"current_steps": 8475, "total_steps": 8674, "loss": 0.516730546951294, "lr": 2.9057943641693784e-09, "epoch": 1.9541157482130505, "percentage": 97.71, "elapsed_time": "5:53:16", "remaining_time": "0:08:17"} +{"current_steps": 8476, "total_steps": 8674, "loss": 0.47847944498062134, "lr": 2.876822971025428e-09, "epoch": 1.9543463223426332, "percentage": 97.72, "elapsed_time": "5:53:18", "remaining_time": "0:08:15"} +{"current_steps": 8477, "total_steps": 8674, "loss": 0.5167095065116882, "lr": 2.8479965198408007e-09, "epoch": 1.9545768964722159, "percentage": 97.73, "elapsed_time": "5:53:20", "remaining_time": "0:08:12"} +{"current_steps": 8478, "total_steps": 8674, "loss": 0.40728163719177246, "lr": 2.819315014805812e-09, "epoch": 1.9548074706017986, "percentage": 97.74, "elapsed_time": "5:53:23", "remaining_time": "0:08:10"} +{"current_steps": 8479, "total_steps": 8674, "loss": 0.49741852283477783, "lr": 2.790778460089349e-09, "epoch": 1.955038044731381, "percentage": 97.75, "elapsed_time": "5:53:25", "remaining_time": "0:08:07"} +{"current_steps": 8480, "total_steps": 8674, "loss": 0.33847475051879883, "lr": 2.7623868598397603e-09, "epoch": 1.9552686188609638, "percentage": 97.76, "elapsed_time": "5:53:28", "remaining_time": "0:08:05"} +{"current_steps": 8481, "total_steps": 8674, "loss": 0.39727652072906494, "lr": 2.734140218183856e-09, "epoch": 1.9554991929905463, "percentage": 97.77, "elapsed_time": "5:53:30", "remaining_time": "0:08:02"} +{"current_steps": 8482, "total_steps": 8674, "loss": 0.40332260727882385, "lr": 2.706038539227795e-09, "epoch": 1.955729767120129, "percentage": 97.79, "elapsed_time": "5:53:32", "remaining_time": "0:08:00"} +{"current_steps": 8483, "total_steps": 8674, "loss": 0.40296924114227295, "lr": 2.6780818270562e-09, "epoch": 1.9559603412497117, "percentage": 97.8, "elapsed_time": "5:53:35", "remaining_time": "0:07:57"} +{"current_steps": 8484, "total_steps": 8674, "loss": 0.4253476858139038, "lr": 2.650270085732931e-09, "epoch": 1.9561909153792945, "percentage": 97.81, "elapsed_time": "5:53:37", "remaining_time": "0:07:55"} +{"current_steps": 8485, "total_steps": 8674, "loss": 0.448941171169281, "lr": 2.6226033193007535e-09, "epoch": 1.9564214895088772, "percentage": 97.82, "elapsed_time": "5:53:40", "remaining_time": "0:07:52"} +{"current_steps": 8486, "total_steps": 8674, "loss": 0.48213180899620056, "lr": 2.59508153178134e-09, "epoch": 1.95665206363846, "percentage": 97.83, "elapsed_time": "5:53:42", "remaining_time": "0:07:50"} +{"current_steps": 8487, "total_steps": 8674, "loss": 0.48886558413505554, "lr": 2.5677047271752683e-09, "epoch": 1.9568826377680424, "percentage": 97.84, "elapsed_time": "5:53:45", "remaining_time": "0:07:47"} +{"current_steps": 8488, "total_steps": 8674, "loss": 0.49786341190338135, "lr": 2.5404729094619103e-09, "epoch": 1.957113211897625, "percentage": 97.86, "elapsed_time": "5:53:47", "remaining_time": "0:07:45"} +{"current_steps": 8489, "total_steps": 8674, "loss": 0.4487866163253784, "lr": 2.5133860825997667e-09, "epoch": 1.9573437860272076, "percentage": 97.87, "elapsed_time": "5:53:50", "remaining_time": "0:07:42"} +{"current_steps": 8490, "total_steps": 8674, "loss": 0.46193206310272217, "lr": 2.486444250526243e-09, "epoch": 1.9575743601567903, "percentage": 97.88, "elapsed_time": "5:53:52", "remaining_time": "0:07:40"} +{"current_steps": 8491, "total_steps": 8674, "loss": 0.44729042053222656, "lr": 2.459647417157429e-09, "epoch": 1.957804934286373, "percentage": 97.89, "elapsed_time": "5:53:55", "remaining_time": "0:07:37"} +{"current_steps": 8492, "total_steps": 8674, "loss": 0.4646851718425751, "lr": 2.432995586388764e-09, "epoch": 1.9580355084159557, "percentage": 97.9, "elapsed_time": "5:53:57", "remaining_time": "0:07:35"} +{"current_steps": 8493, "total_steps": 8674, "loss": 0.49538400769233704, "lr": 2.40648876209415e-09, "epoch": 1.9582660825455385, "percentage": 97.91, "elapsed_time": "5:54:00", "remaining_time": "0:07:32"} +{"current_steps": 8494, "total_steps": 8674, "loss": 0.5548783540725708, "lr": 2.3801269481267262e-09, "epoch": 1.9584966566751212, "percentage": 97.92, "elapsed_time": "5:54:02", "remaining_time": "0:07:30"} +{"current_steps": 8495, "total_steps": 8674, "loss": 0.4390280544757843, "lr": 2.3539101483184277e-09, "epoch": 1.9587272308047037, "percentage": 97.94, "elapsed_time": "5:54:04", "remaining_time": "0:07:27"} +{"current_steps": 8496, "total_steps": 8674, "loss": 0.3079942464828491, "lr": 2.327838366480095e-09, "epoch": 1.9589578049342864, "percentage": 97.95, "elapsed_time": "5:54:07", "remaining_time": "0:07:25"} +{"current_steps": 8497, "total_steps": 8674, "loss": 0.5199894309043884, "lr": 2.301911606401585e-09, "epoch": 1.959188379063869, "percentage": 97.96, "elapsed_time": "5:54:09", "remaining_time": "0:07:22"} +{"current_steps": 8498, "total_steps": 8674, "loss": 0.3403523564338684, "lr": 2.276129871851662e-09, "epoch": 1.9594189531934516, "percentage": 97.97, "elapsed_time": "5:54:12", "remaining_time": "0:07:20"} +{"current_steps": 8499, "total_steps": 8674, "loss": 0.49699991941452026, "lr": 2.2504931665777714e-09, "epoch": 1.9596495273230343, "percentage": 97.98, "elapsed_time": "5:54:14", "remaining_time": "0:07:17"} +{"current_steps": 8500, "total_steps": 8674, "loss": 0.4178547263145447, "lr": 2.2250014943066e-09, "epoch": 1.959880101452617, "percentage": 97.99, "elapsed_time": "5:54:17", "remaining_time": "0:07:15"} +{"current_steps": 8501, "total_steps": 8674, "loss": 0.5622760057449341, "lr": 2.199654858743627e-09, "epoch": 1.9601106755821998, "percentage": 98.01, "elapsed_time": "5:54:20", "remaining_time": "0:07:12"} +{"current_steps": 8502, "total_steps": 8674, "loss": 0.4072464406490326, "lr": 2.1744532635733505e-09, "epoch": 1.9603412497117825, "percentage": 98.02, "elapsed_time": "5:54:23", "remaining_time": "0:07:10"} +{"current_steps": 8503, "total_steps": 8674, "loss": 0.475033164024353, "lr": 2.1493967124587287e-09, "epoch": 1.960571823841365, "percentage": 98.03, "elapsed_time": "5:54:25", "remaining_time": "0:07:07"} +{"current_steps": 8504, "total_steps": 8674, "loss": 0.4734419584274292, "lr": 2.1244852090424035e-09, "epoch": 1.9608023979709477, "percentage": 98.04, "elapsed_time": "5:54:27", "remaining_time": "0:07:05"} +{"current_steps": 8505, "total_steps": 8674, "loss": 0.42523911595344543, "lr": 2.099718756945257e-09, "epoch": 1.9610329721005302, "percentage": 98.05, "elapsed_time": "5:54:30", "remaining_time": "0:07:02"} +{"current_steps": 8506, "total_steps": 8674, "loss": 0.5085049867630005, "lr": 2.075097359767297e-09, "epoch": 1.961263546230113, "percentage": 98.06, "elapsed_time": "5:54:32", "remaining_time": "0:07:00"} +{"current_steps": 8507, "total_steps": 8674, "loss": 0.5682120323181152, "lr": 2.0506210210877728e-09, "epoch": 1.9614941203596956, "percentage": 98.07, "elapsed_time": "5:54:35", "remaining_time": "0:06:57"} +{"current_steps": 8508, "total_steps": 8674, "loss": 0.4550264775753021, "lr": 2.0262897444642823e-09, "epoch": 1.9617246944892783, "percentage": 98.09, "elapsed_time": "5:54:37", "remaining_time": "0:06:55"} +{"current_steps": 8509, "total_steps": 8674, "loss": 0.43745940923690796, "lr": 2.0021035334337745e-09, "epoch": 1.961955268618861, "percentage": 98.1, "elapsed_time": "5:54:40", "remaining_time": "0:06:52"} +{"current_steps": 8510, "total_steps": 8674, "loss": 0.4523237347602844, "lr": 1.9780623915118812e-09, "epoch": 1.9621858427484438, "percentage": 98.11, "elapsed_time": "5:54:42", "remaining_time": "0:06:50"} +{"current_steps": 8511, "total_steps": 8674, "loss": 0.43080687522888184, "lr": 1.9541663221933623e-09, "epoch": 1.9624164168780263, "percentage": 98.12, "elapsed_time": "5:54:44", "remaining_time": "0:06:47"} +{"current_steps": 8512, "total_steps": 8674, "loss": 0.5265613794326782, "lr": 1.930415328951551e-09, "epoch": 1.962646991007609, "percentage": 98.13, "elapsed_time": "5:54:47", "remaining_time": "0:06:45"} +{"current_steps": 8513, "total_steps": 8674, "loss": 0.5482667684555054, "lr": 1.906809415239019e-09, "epoch": 1.9628775651371915, "percentage": 98.14, "elapsed_time": "5:54:49", "remaining_time": "0:06:42"} +{"current_steps": 8514, "total_steps": 8674, "loss": 0.43548330664634705, "lr": 1.8833485844871322e-09, "epoch": 1.9631081392667742, "percentage": 98.16, "elapsed_time": "5:54:52", "remaining_time": "0:06:40"} +{"current_steps": 8515, "total_steps": 8674, "loss": 0.45715010166168213, "lr": 1.8600328401061627e-09, "epoch": 1.963338713396357, "percentage": 98.17, "elapsed_time": "5:54:54", "remaining_time": "0:06:37"} +{"current_steps": 8516, "total_steps": 8674, "loss": 0.48137760162353516, "lr": 1.8368621854852884e-09, "epoch": 1.9635692875259396, "percentage": 98.18, "elapsed_time": "5:54:57", "remaining_time": "0:06:35"} +{"current_steps": 8517, "total_steps": 8674, "loss": 0.4607926607131958, "lr": 1.8138366239924818e-09, "epoch": 1.9637998616555223, "percentage": 98.19, "elapsed_time": "5:54:59", "remaining_time": "0:06:32"} +{"current_steps": 8518, "total_steps": 8674, "loss": 0.3551321029663086, "lr": 1.7909561589749545e-09, "epoch": 1.964030435785105, "percentage": 98.2, "elapsed_time": "5:55:02", "remaining_time": "0:06:30"} +{"current_steps": 8519, "total_steps": 8674, "loss": 0.4075126647949219, "lr": 1.7682207937583792e-09, "epoch": 1.9642610099146876, "percentage": 98.21, "elapsed_time": "5:55:04", "remaining_time": "0:06:27"} +{"current_steps": 8520, "total_steps": 8674, "loss": 0.4470815658569336, "lr": 1.7456305316477793e-09, "epoch": 1.9644915840442703, "percentage": 98.22, "elapsed_time": "5:55:06", "remaining_time": "0:06:25"} +{"current_steps": 8521, "total_steps": 8674, "loss": 0.5074938535690308, "lr": 1.72318537592675e-09, "epoch": 1.9647221581738528, "percentage": 98.24, "elapsed_time": "5:55:09", "remaining_time": "0:06:22"} +{"current_steps": 8522, "total_steps": 8674, "loss": 0.4799109697341919, "lr": 1.700885329857904e-09, "epoch": 1.9649527323034355, "percentage": 98.25, "elapsed_time": "5:55:11", "remaining_time": "0:06:20"} +{"current_steps": 8523, "total_steps": 8674, "loss": 0.5603263974189758, "lr": 1.6787303966828703e-09, "epoch": 1.9651833064330182, "percentage": 98.26, "elapsed_time": "5:55:14", "remaining_time": "0:06:17"} +{"current_steps": 8524, "total_steps": 8674, "loss": 0.45492851734161377, "lr": 1.656720579622073e-09, "epoch": 1.965413880562601, "percentage": 98.27, "elapsed_time": "5:55:16", "remaining_time": "0:06:15"} +{"current_steps": 8525, "total_steps": 8674, "loss": 0.47700050473213196, "lr": 1.6348558818748414e-09, "epoch": 1.9656444546921836, "percentage": 98.28, "elapsed_time": "5:55:19", "remaining_time": "0:06:12"} +{"current_steps": 8526, "total_steps": 8674, "loss": 0.5105462074279785, "lr": 1.6131363066194115e-09, "epoch": 1.9658750288217663, "percentage": 98.29, "elapsed_time": "5:55:21", "remaining_time": "0:06:10"} +{"current_steps": 8527, "total_steps": 8674, "loss": 0.47818124294281006, "lr": 1.5915618570130351e-09, "epoch": 1.9661056029513488, "percentage": 98.31, "elapsed_time": "5:55:24", "remaining_time": "0:06:07"} +{"current_steps": 8528, "total_steps": 8674, "loss": 0.4549172520637512, "lr": 1.5701325361916484e-09, "epoch": 1.9663361770809316, "percentage": 98.32, "elapsed_time": "5:55:26", "remaining_time": "0:06:05"} +{"current_steps": 8529, "total_steps": 8674, "loss": 0.406271755695343, "lr": 1.5488483472703151e-09, "epoch": 1.966566751210514, "percentage": 98.33, "elapsed_time": "5:55:28", "remaining_time": "0:06:02"} +{"current_steps": 8530, "total_steps": 8674, "loss": 0.4452788829803467, "lr": 1.5277092933427827e-09, "epoch": 1.9667973253400968, "percentage": 98.34, "elapsed_time": "5:55:31", "remaining_time": "0:06:00"} +{"current_steps": 8531, "total_steps": 8674, "loss": 0.46621495485305786, "lr": 1.5067153774820374e-09, "epoch": 1.9670278994696795, "percentage": 98.35, "elapsed_time": "5:55:33", "remaining_time": "0:05:57"} +{"current_steps": 8532, "total_steps": 8674, "loss": 0.47837382555007935, "lr": 1.4858666027395272e-09, "epoch": 1.9672584735992622, "percentage": 98.36, "elapsed_time": "5:55:36", "remaining_time": "0:05:55"} +{"current_steps": 8533, "total_steps": 8674, "loss": 0.5690933465957642, "lr": 1.4651629721460501e-09, "epoch": 1.967489047728845, "percentage": 98.37, "elapsed_time": "5:55:38", "remaining_time": "0:05:52"} +{"current_steps": 8534, "total_steps": 8674, "loss": 0.478906512260437, "lr": 1.4446044887109764e-09, "epoch": 1.9677196218584276, "percentage": 98.39, "elapsed_time": "5:55:41", "remaining_time": "0:05:50"} +{"current_steps": 8535, "total_steps": 8674, "loss": 0.5024028420448303, "lr": 1.4241911554225827e-09, "epoch": 1.9679501959880101, "percentage": 98.4, "elapsed_time": "5:55:43", "remaining_time": "0:05:47"} +{"current_steps": 8536, "total_steps": 8674, "loss": 0.4430769979953766, "lr": 1.4039229752483839e-09, "epoch": 1.9681807701175928, "percentage": 98.41, "elapsed_time": "5:55:45", "remaining_time": "0:05:45"} +{"current_steps": 8537, "total_steps": 8674, "loss": 0.34506234526634216, "lr": 1.3837999511343567e-09, "epoch": 1.9684113442471753, "percentage": 98.42, "elapsed_time": "5:55:48", "remaining_time": "0:05:42"} +{"current_steps": 8538, "total_steps": 8674, "loss": 0.47483426332473755, "lr": 1.363822086005717e-09, "epoch": 1.968641918376758, "percentage": 98.43, "elapsed_time": "5:55:50", "remaining_time": "0:05:40"} +{"current_steps": 8539, "total_steps": 8674, "loss": 0.3902367651462555, "lr": 1.343989382766475e-09, "epoch": 1.9688724925063408, "percentage": 98.44, "elapsed_time": "5:55:53", "remaining_time": "0:05:37"} +{"current_steps": 8540, "total_steps": 8674, "loss": 0.5114254951477051, "lr": 1.3243018442994358e-09, "epoch": 1.9691030666359235, "percentage": 98.46, "elapsed_time": "5:55:55", "remaining_time": "0:05:35"} +{"current_steps": 8541, "total_steps": 8674, "loss": 0.4048948287963867, "lr": 1.3047594734663104e-09, "epoch": 1.9693336407655062, "percentage": 98.47, "elapsed_time": "5:55:58", "remaining_time": "0:05:32"} +{"current_steps": 8542, "total_steps": 8674, "loss": 0.4168536067008972, "lr": 1.2853622731079372e-09, "epoch": 1.969564214895089, "percentage": 98.48, "elapsed_time": "5:56:00", "remaining_time": "0:05:30"} +{"current_steps": 8543, "total_steps": 8674, "loss": 0.38410186767578125, "lr": 1.2661102460437279e-09, "epoch": 1.9697947890246714, "percentage": 98.49, "elapsed_time": "5:56:03", "remaining_time": "0:05:27"} +{"current_steps": 8544, "total_steps": 8674, "loss": 0.4931117296218872, "lr": 1.2470033950724435e-09, "epoch": 1.9700253631542541, "percentage": 98.5, "elapsed_time": "5:56:05", "remaining_time": "0:05:25"} +{"current_steps": 8545, "total_steps": 8674, "loss": 0.41142135858535767, "lr": 1.228041722971085e-09, "epoch": 1.9702559372838366, "percentage": 98.51, "elapsed_time": "5:56:08", "remaining_time": "0:05:22"} +{"current_steps": 8546, "total_steps": 8674, "loss": 0.5165313482284546, "lr": 1.209225232496225e-09, "epoch": 1.9704865114134194, "percentage": 98.52, "elapsed_time": "5:56:10", "remaining_time": "0:05:20"} +{"current_steps": 8547, "total_steps": 8674, "loss": 0.3330427408218384, "lr": 1.190553926382898e-09, "epoch": 1.970717085543002, "percentage": 98.54, "elapsed_time": "5:56:12", "remaining_time": "0:05:17"} +{"current_steps": 8548, "total_steps": 8674, "loss": 0.43116509914398193, "lr": 1.172027807345155e-09, "epoch": 1.9709476596725848, "percentage": 98.55, "elapsed_time": "5:56:15", "remaining_time": "0:05:15"} +{"current_steps": 8549, "total_steps": 8674, "loss": 0.43564409017562866, "lr": 1.1536468780760643e-09, "epoch": 1.9711782338021675, "percentage": 98.56, "elapsed_time": "5:56:18", "remaining_time": "0:05:12"} +{"current_steps": 8550, "total_steps": 8674, "loss": 0.5361013412475586, "lr": 1.1354111412472666e-09, "epoch": 1.9714088079317502, "percentage": 98.57, "elapsed_time": "5:56:20", "remaining_time": "0:05:10"} +{"current_steps": 8551, "total_steps": 8674, "loss": 0.4049466550350189, "lr": 1.1173205995097524e-09, "epoch": 1.9716393820613327, "percentage": 98.58, "elapsed_time": "5:56:23", "remaining_time": "0:05:07"} +{"current_steps": 8552, "total_steps": 8674, "loss": 0.45090144872665405, "lr": 1.0993752554930847e-09, "epoch": 1.9718699561909154, "percentage": 98.59, "elapsed_time": "5:56:25", "remaining_time": "0:05:05"} +{"current_steps": 8553, "total_steps": 8674, "loss": 0.43933606147766113, "lr": 1.0815751118057326e-09, "epoch": 1.972100530320498, "percentage": 98.61, "elapsed_time": "5:56:27", "remaining_time": "0:05:02"} +{"current_steps": 8554, "total_steps": 8674, "loss": 0.5254300832748413, "lr": 1.063920171035182e-09, "epoch": 1.9723311044500806, "percentage": 98.62, "elapsed_time": "5:56:30", "remaining_time": "0:05:00"} +{"current_steps": 8555, "total_steps": 8674, "loss": 0.45544567704200745, "lr": 1.0464104357477132e-09, "epoch": 1.9725616785796634, "percentage": 98.63, "elapsed_time": "5:56:32", "remaining_time": "0:04:57"} +{"current_steps": 8556, "total_steps": 8674, "loss": 0.5177001357078552, "lr": 1.0290459084886238e-09, "epoch": 1.972792252709246, "percentage": 98.64, "elapsed_time": "5:56:34", "remaining_time": "0:04:55"} +{"current_steps": 8557, "total_steps": 8674, "loss": 0.4669674038887024, "lr": 1.0118265917818946e-09, "epoch": 1.9730228268388288, "percentage": 98.65, "elapsed_time": "5:56:37", "remaining_time": "0:04:52"} +{"current_steps": 8558, "total_steps": 8674, "loss": 0.4244263172149658, "lr": 9.947524881307456e-10, "epoch": 1.9732534009684115, "percentage": 98.66, "elapsed_time": "5:56:39", "remaining_time": "0:04:50"} +{"current_steps": 8559, "total_steps": 8674, "loss": 0.44121527671813965, "lr": 9.778236000168583e-10, "epoch": 1.973483975097994, "percentage": 98.67, "elapsed_time": "5:56:42", "remaining_time": "0:04:47"} +{"current_steps": 8560, "total_steps": 8674, "loss": 0.44209837913513184, "lr": 9.610399299010418e-10, "epoch": 1.9737145492275767, "percentage": 98.69, "elapsed_time": "5:56:44", "remaining_time": "0:04:45"} +{"current_steps": 8561, "total_steps": 8674, "loss": 0.4036273956298828, "lr": 9.444014802231226e-10, "epoch": 1.9739451233571592, "percentage": 98.7, "elapsed_time": "5:56:47", "remaining_time": "0:04:42"} +{"current_steps": 8562, "total_steps": 8674, "loss": 0.47106266021728516, "lr": 9.279082534014992e-10, "epoch": 1.974175697486742, "percentage": 98.71, "elapsed_time": "5:56:49", "remaining_time": "0:04:40"} +{"current_steps": 8563, "total_steps": 8674, "loss": 0.41080260276794434, "lr": 9.115602518338095e-10, "epoch": 1.9744062716163246, "percentage": 98.72, "elapsed_time": "5:56:51", "remaining_time": "0:04:37"} +{"current_steps": 8564, "total_steps": 8674, "loss": 0.4333069920539856, "lr": 8.953574778962635e-10, "epoch": 1.9746368457459074, "percentage": 98.73, "elapsed_time": "5:56:54", "remaining_time": "0:04:35"} +{"current_steps": 8565, "total_steps": 8674, "loss": 0.3939141631126404, "lr": 8.792999339440887e-10, "epoch": 1.97486741987549, "percentage": 98.74, "elapsed_time": "5:56:56", "remaining_time": "0:04:32"} +{"current_steps": 8566, "total_steps": 8674, "loss": 0.4202404022216797, "lr": 8.633876223114178e-10, "epoch": 1.9750979940050726, "percentage": 98.75, "elapsed_time": "5:56:59", "remaining_time": "0:04:30"} +{"current_steps": 8567, "total_steps": 8674, "loss": 0.44722893834114075, "lr": 8.476205453114005e-10, "epoch": 1.9753285681346553, "percentage": 98.77, "elapsed_time": "5:57:02", "remaining_time": "0:04:27"} +{"current_steps": 8568, "total_steps": 8674, "loss": 0.4095258414745331, "lr": 8.319987052357591e-10, "epoch": 1.9755591422642378, "percentage": 98.78, "elapsed_time": "5:57:04", "remaining_time": "0:04:25"} +{"current_steps": 8569, "total_steps": 8674, "loss": 0.43372297286987305, "lr": 8.165221043553439e-10, "epoch": 1.9757897163938205, "percentage": 98.79, "elapsed_time": "5:57:06", "remaining_time": "0:04:22"} +{"current_steps": 8570, "total_steps": 8674, "loss": 0.4697731137275696, "lr": 8.011907449199106e-10, "epoch": 1.9760202905234032, "percentage": 98.8, "elapsed_time": "5:57:09", "remaining_time": "0:04:20"} +{"current_steps": 8571, "total_steps": 8674, "loss": 0.49179136753082275, "lr": 7.860046291580103e-10, "epoch": 1.976250864652986, "percentage": 98.81, "elapsed_time": "5:57:11", "remaining_time": "0:04:17"} +{"current_steps": 8572, "total_steps": 8674, "loss": 0.35898157954216003, "lr": 7.70963759277099e-10, "epoch": 1.9764814387825687, "percentage": 98.82, "elapsed_time": "5:57:14", "remaining_time": "0:04:15"} +{"current_steps": 8573, "total_steps": 8674, "loss": 0.48293429613113403, "lr": 7.560681374634282e-10, "epoch": 1.9767120129121514, "percentage": 98.84, "elapsed_time": "5:57:16", "remaining_time": "0:04:12"} +{"current_steps": 8574, "total_steps": 8674, "loss": 0.39636045694351196, "lr": 7.413177658822656e-10, "epoch": 1.9769425870417339, "percentage": 98.85, "elapsed_time": "5:57:19", "remaining_time": "0:04:10"} +{"current_steps": 8575, "total_steps": 8674, "loss": 0.375876784324646, "lr": 7.267126466777851e-10, "epoch": 1.9771731611713166, "percentage": 98.86, "elapsed_time": "5:57:21", "remaining_time": "0:04:07"} +{"current_steps": 8576, "total_steps": 8674, "loss": 0.4064311385154724, "lr": 7.122527819729551e-10, "epoch": 1.977403735300899, "percentage": 98.87, "elapsed_time": "5:57:24", "remaining_time": "0:04:05"} +{"current_steps": 8577, "total_steps": 8674, "loss": 0.4373857378959656, "lr": 6.979381738696499e-10, "epoch": 1.9776343094304818, "percentage": 98.88, "elapsed_time": "5:57:26", "remaining_time": "0:04:02"} +{"current_steps": 8578, "total_steps": 8674, "loss": 0.5008025765419006, "lr": 6.837688244486494e-10, "epoch": 1.9778648835600645, "percentage": 98.89, "elapsed_time": "5:57:28", "remaining_time": "0:04:00"} +{"current_steps": 8579, "total_steps": 8674, "loss": 0.4286271035671234, "lr": 6.697447357695285e-10, "epoch": 1.9780954576896472, "percentage": 98.9, "elapsed_time": "5:57:31", "remaining_time": "0:03:57"} +{"current_steps": 8580, "total_steps": 8674, "loss": 0.4420759081840515, "lr": 6.558659098711006e-10, "epoch": 1.97832603181923, "percentage": 98.92, "elapsed_time": "5:57:33", "remaining_time": "0:03:55"} +{"current_steps": 8581, "total_steps": 8674, "loss": 0.3946709632873535, "lr": 6.421323487705299e-10, "epoch": 1.9785566059488127, "percentage": 98.93, "elapsed_time": "5:57:36", "remaining_time": "0:03:52"} +{"current_steps": 8582, "total_steps": 8674, "loss": 0.42874544858932495, "lr": 6.285440544641085e-10, "epoch": 1.9787871800783952, "percentage": 98.94, "elapsed_time": "5:57:38", "remaining_time": "0:03:50"} +{"current_steps": 8583, "total_steps": 8674, "loss": 0.4728921055793762, "lr": 6.151010289272563e-10, "epoch": 1.9790177542079779, "percentage": 98.95, "elapsed_time": "5:57:41", "remaining_time": "0:03:47"} +{"current_steps": 8584, "total_steps": 8674, "loss": 0.3756295442581177, "lr": 6.018032741139656e-10, "epoch": 1.9792483283375604, "percentage": 98.96, "elapsed_time": "5:57:43", "remaining_time": "0:03:45"} +{"current_steps": 8585, "total_steps": 8674, "loss": 0.48663657903671265, "lr": 5.886507919570239e-10, "epoch": 1.979478902467143, "percentage": 98.97, "elapsed_time": "5:57:46", "remaining_time": "0:03:42"} +{"current_steps": 8586, "total_steps": 8674, "loss": 0.46127766370773315, "lr": 5.756435843685681e-10, "epoch": 1.9797094765967258, "percentage": 98.99, "elapsed_time": "5:57:48", "remaining_time": "0:03:40"} +{"current_steps": 8587, "total_steps": 8674, "loss": 0.493796169757843, "lr": 5.627816532390862e-10, "epoch": 1.9799400507263085, "percentage": 99.0, "elapsed_time": "5:57:50", "remaining_time": "0:03:37"} +{"current_steps": 8588, "total_steps": 8674, "loss": 0.3703004717826843, "lr": 5.500650004383045e-10, "epoch": 1.9801706248558912, "percentage": 99.01, "elapsed_time": "5:57:53", "remaining_time": "0:03:35"} +{"current_steps": 8589, "total_steps": 8674, "loss": 0.5385284423828125, "lr": 5.374936278146336e-10, "epoch": 1.980401198985474, "percentage": 99.02, "elapsed_time": "5:57:55", "remaining_time": "0:03:32"} +{"current_steps": 8590, "total_steps": 8674, "loss": 0.3996584713459015, "lr": 5.250675371956115e-10, "epoch": 1.9806317731150564, "percentage": 99.03, "elapsed_time": "5:57:58", "remaining_time": "0:03:30"} +{"current_steps": 8591, "total_steps": 8674, "loss": 0.4513227641582489, "lr": 5.12786730387349e-10, "epoch": 1.9808623472446392, "percentage": 99.04, "elapsed_time": "5:58:00", "remaining_time": "0:03:27"} +{"current_steps": 8592, "total_steps": 8674, "loss": 0.46632474660873413, "lr": 5.006512091750848e-10, "epoch": 1.9810929213742217, "percentage": 99.05, "elapsed_time": "5:58:03", "remaining_time": "0:03:25"} +{"current_steps": 8593, "total_steps": 8674, "loss": 0.5379712581634521, "lr": 4.886609753227411e-10, "epoch": 1.9813234955038044, "percentage": 99.07, "elapsed_time": "5:58:05", "remaining_time": "0:03:22"} +{"current_steps": 8594, "total_steps": 8674, "loss": 0.3606422543525696, "lr": 4.768160305732572e-10, "epoch": 1.981554069633387, "percentage": 99.08, "elapsed_time": "5:58:07", "remaining_time": "0:03:20"} +{"current_steps": 8595, "total_steps": 8674, "loss": 0.39339596033096313, "lr": 4.651163766484778e-10, "epoch": 1.9817846437629698, "percentage": 99.09, "elapsed_time": "5:58:10", "remaining_time": "0:03:17"} +{"current_steps": 8596, "total_steps": 8674, "loss": 0.4606707692146301, "lr": 4.535620152489317e-10, "epoch": 1.9820152178925525, "percentage": 99.1, "elapsed_time": "5:58:12", "remaining_time": "0:03:15"} +{"current_steps": 8597, "total_steps": 8674, "loss": 0.4234154522418976, "lr": 4.421529480543862e-10, "epoch": 1.9822457920221352, "percentage": 99.11, "elapsed_time": "5:58:15", "remaining_time": "0:03:12"} +{"current_steps": 8598, "total_steps": 8674, "loss": 0.49317437410354614, "lr": 4.308891767229594e-10, "epoch": 1.9824763661517177, "percentage": 99.12, "elapsed_time": "5:58:17", "remaining_time": "0:03:10"} +{"current_steps": 8599, "total_steps": 8674, "loss": 0.47756847739219666, "lr": 4.197707028922304e-10, "epoch": 1.9827069402813005, "percentage": 99.14, "elapsed_time": "5:58:20", "remaining_time": "0:03:07"} +{"current_steps": 8600, "total_steps": 8674, "loss": 0.37664321064949036, "lr": 4.0879752817823963e-10, "epoch": 1.982937514410883, "percentage": 99.15, "elapsed_time": "5:58:22", "remaining_time": "0:03:05"} +{"current_steps": 8601, "total_steps": 8674, "loss": 0.3927830457687378, "lr": 3.9796965417604465e-10, "epoch": 1.9831680885404657, "percentage": 99.16, "elapsed_time": "5:58:26", "remaining_time": "0:03:02"} +{"current_steps": 8602, "total_steps": 8674, "loss": 0.41071420907974243, "lr": 3.8728708245971966e-10, "epoch": 1.9833986626700484, "percentage": 99.17, "elapsed_time": "5:58:28", "remaining_time": "0:03:00"} +{"current_steps": 8603, "total_steps": 8674, "loss": 0.49516505002975464, "lr": 3.7674981458191145e-10, "epoch": 1.983629236799631, "percentage": 99.18, "elapsed_time": "5:58:31", "remaining_time": "0:02:57"} +{"current_steps": 8604, "total_steps": 8674, "loss": 0.474129855632782, "lr": 3.6635785207439486e-10, "epoch": 1.9838598109292138, "percentage": 99.19, "elapsed_time": "5:58:33", "remaining_time": "0:02:55"} +{"current_steps": 8605, "total_steps": 8674, "loss": 0.4445813298225403, "lr": 3.5611119644773923e-10, "epoch": 1.9840903850587965, "percentage": 99.2, "elapsed_time": "5:58:35", "remaining_time": "0:02:52"} +{"current_steps": 8606, "total_steps": 8674, "loss": 0.46165329217910767, "lr": 3.4600984919141987e-10, "epoch": 1.984320959188379, "percentage": 99.22, "elapsed_time": "5:58:38", "remaining_time": "0:02:50"} +{"current_steps": 8607, "total_steps": 8674, "loss": 0.4073392152786255, "lr": 3.3605381177381764e-10, "epoch": 1.9845515333179617, "percentage": 99.23, "elapsed_time": "5:58:40", "remaining_time": "0:02:47"} +{"current_steps": 8608, "total_steps": 8674, "loss": 0.46712470054626465, "lr": 3.262430856419973e-10, "epoch": 1.9847821074475442, "percentage": 99.24, "elapsed_time": "5:58:43", "remaining_time": "0:02:45"} +{"current_steps": 8609, "total_steps": 8674, "loss": 0.49993449449539185, "lr": 3.165776722222624e-10, "epoch": 1.985012681577127, "percentage": 99.25, "elapsed_time": "5:58:45", "remaining_time": "0:02:42"} +{"current_steps": 8610, "total_steps": 8674, "loss": 0.40737634897232056, "lr": 3.0705757291926705e-10, "epoch": 1.9852432557067097, "percentage": 99.26, "elapsed_time": "5:58:48", "remaining_time": "0:02:40"} +{"current_steps": 8611, "total_steps": 8674, "loss": 0.3714853823184967, "lr": 2.976827891172373e-10, "epoch": 1.9854738298362924, "percentage": 99.27, "elapsed_time": "5:58:50", "remaining_time": "0:02:37"} +{"current_steps": 8612, "total_steps": 8674, "loss": 0.3818984925746918, "lr": 2.884533221785279e-10, "epoch": 1.985704403965875, "percentage": 99.29, "elapsed_time": "5:58:53", "remaining_time": "0:02:35"} +{"current_steps": 8613, "total_steps": 8674, "loss": 0.4529988765716553, "lr": 2.7936917344495435e-10, "epoch": 1.9859349780954578, "percentage": 99.3, "elapsed_time": "5:58:55", "remaining_time": "0:02:32"} +{"current_steps": 8614, "total_steps": 8674, "loss": 0.44964706897735596, "lr": 2.7043034423701595e-10, "epoch": 1.9861655522250403, "percentage": 99.31, "elapsed_time": "5:58:57", "remaining_time": "0:02:30"} +{"current_steps": 8615, "total_steps": 8674, "loss": 0.49079659581184387, "lr": 2.616368358538956e-10, "epoch": 1.986396126354623, "percentage": 99.32, "elapsed_time": "5:59:00", "remaining_time": "0:02:27"} +{"current_steps": 8616, "total_steps": 8674, "loss": 0.4411408305168152, "lr": 2.529886495739042e-10, "epoch": 1.9866267004842055, "percentage": 99.33, "elapsed_time": "5:59:02", "remaining_time": "0:02:25"} +{"current_steps": 8617, "total_steps": 8674, "loss": 0.4386615455150604, "lr": 2.444857866541472e-10, "epoch": 1.9868572746137882, "percentage": 99.34, "elapsed_time": "5:59:05", "remaining_time": "0:02:22"} +{"current_steps": 8618, "total_steps": 8674, "loss": 0.4545249342918396, "lr": 2.3612824833063594e-10, "epoch": 1.987087848743371, "percentage": 99.35, "elapsed_time": "5:59:07", "remaining_time": "0:02:20"} +{"current_steps": 8619, "total_steps": 8674, "loss": 0.40094703435897827, "lr": 2.2791603581817643e-10, "epoch": 1.9873184228729537, "percentage": 99.37, "elapsed_time": "5:59:10", "remaining_time": "0:02:17"} +{"current_steps": 8620, "total_steps": 8674, "loss": 0.40233147144317627, "lr": 2.1984915031048047e-10, "epoch": 1.9875489970025364, "percentage": 99.38, "elapsed_time": "5:59:12", "remaining_time": "0:02:15"} +{"current_steps": 8621, "total_steps": 8674, "loss": 0.460537314414978, "lr": 2.1192759298016562e-10, "epoch": 1.9877795711321191, "percentage": 99.39, "elapsed_time": "5:59:14", "remaining_time": "0:02:12"} +{"current_steps": 8622, "total_steps": 8674, "loss": 0.4602966904640198, "lr": 2.0415136497875518e-10, "epoch": 1.9880101452617016, "percentage": 99.4, "elapsed_time": "5:59:17", "remaining_time": "0:02:10"} +{"current_steps": 8623, "total_steps": 8674, "loss": 0.5004392266273499, "lr": 1.9652046743656724e-10, "epoch": 1.9882407193912843, "percentage": 99.41, "elapsed_time": "5:59:19", "remaining_time": "0:02:07"} +{"current_steps": 8624, "total_steps": 8674, "loss": 0.48196107149124146, "lr": 1.8903490146282564e-10, "epoch": 1.9884712935208668, "percentage": 99.42, "elapsed_time": "5:59:22", "remaining_time": "0:02:05"} +{"current_steps": 8625, "total_steps": 8674, "loss": 0.45684510469436646, "lr": 1.8169466814565992e-10, "epoch": 1.9887018676504495, "percentage": 99.44, "elapsed_time": "5:59:24", "remaining_time": "0:02:02"} +{"current_steps": 8626, "total_steps": 8674, "loss": 0.44381850957870483, "lr": 1.7449976855199444e-10, "epoch": 1.9889324417800323, "percentage": 99.45, "elapsed_time": "5:59:27", "remaining_time": "0:02:00"} +{"current_steps": 8627, "total_steps": 8674, "loss": 0.5301632881164551, "lr": 1.674502037277703e-10, "epoch": 1.989163015909615, "percentage": 99.46, "elapsed_time": "5:59:29", "remaining_time": "0:01:57"} +{"current_steps": 8628, "total_steps": 8674, "loss": 0.5154398679733276, "lr": 1.6054597469761233e-10, "epoch": 1.9893935900391977, "percentage": 99.47, "elapsed_time": "5:59:31", "remaining_time": "0:01:55"} +{"current_steps": 8629, "total_steps": 8674, "loss": 0.4334644079208374, "lr": 1.5378708246516215e-10, "epoch": 1.9896241641687804, "percentage": 99.48, "elapsed_time": "5:59:34", "remaining_time": "0:01:52"} +{"current_steps": 8630, "total_steps": 8674, "loss": 0.45578733086586, "lr": 1.4717352801296713e-10, "epoch": 1.989854738298363, "percentage": 99.49, "elapsed_time": "5:59:36", "remaining_time": "0:01:50"} +{"current_steps": 8631, "total_steps": 8674, "loss": 0.48997777700424194, "lr": 1.4070531230225834e-10, "epoch": 1.9900853124279456, "percentage": 99.5, "elapsed_time": "5:59:39", "remaining_time": "0:01:47"} +{"current_steps": 8632, "total_steps": 8674, "loss": 0.4760161340236664, "lr": 1.3438243627328371e-10, "epoch": 1.9903158865575281, "percentage": 99.52, "elapsed_time": "5:59:41", "remaining_time": "0:01:45"} +{"current_steps": 8633, "total_steps": 8674, "loss": 0.43040308356285095, "lr": 1.2820490084508583e-10, "epoch": 1.9905464606871108, "percentage": 99.53, "elapsed_time": "5:59:44", "remaining_time": "0:01:42"} +{"current_steps": 8634, "total_steps": 8674, "loss": 0.4588020443916321, "lr": 1.2217270691583514e-10, "epoch": 1.9907770348166935, "percentage": 99.54, "elapsed_time": "5:59:46", "remaining_time": "0:01:40"} +{"current_steps": 8635, "total_steps": 8674, "loss": 0.46267229318618774, "lr": 1.1628585536216374e-10, "epoch": 1.9910076089462763, "percentage": 99.55, "elapsed_time": "5:59:48", "remaining_time": "0:01:37"} +{"current_steps": 8636, "total_steps": 8674, "loss": 0.4159420132637024, "lr": 1.1054434703994253e-10, "epoch": 1.991238183075859, "percentage": 99.56, "elapsed_time": "5:59:51", "remaining_time": "0:01:35"} +{"current_steps": 8637, "total_steps": 8674, "loss": 0.47950947284698486, "lr": 1.0494818278361518e-10, "epoch": 1.9914687572054417, "percentage": 99.57, "elapsed_time": "5:59:53", "remaining_time": "0:01:32"} +{"current_steps": 8638, "total_steps": 8674, "loss": 0.4912334680557251, "lr": 9.949736340664206e-11, "epoch": 1.9916993313350242, "percentage": 99.58, "elapsed_time": "5:59:56", "remaining_time": "0:01:30"} +{"current_steps": 8639, "total_steps": 8674, "loss": 0.4895044267177582, "lr": 9.419188970150038e-11, "epoch": 1.991929905464607, "percentage": 99.6, "elapsed_time": "5:59:58", "remaining_time": "0:01:27"} +{"current_steps": 8640, "total_steps": 8674, "loss": 0.4822810888290405, "lr": 8.903176243935106e-11, "epoch": 1.9921604795941894, "percentage": 99.61, "elapsed_time": "6:00:00", "remaining_time": "0:01:25"} +{"current_steps": 8641, "total_steps": 8674, "loss": 0.4739280045032501, "lr": 8.401698237014975e-11, "epoch": 1.9923910537237721, "percentage": 99.62, "elapsed_time": "6:00:03", "remaining_time": "0:01:22"} +{"current_steps": 8642, "total_steps": 8674, "loss": 0.5394953489303589, "lr": 7.91475502228689e-11, "epoch": 1.9926216278533548, "percentage": 99.63, "elapsed_time": "6:00:06", "remaining_time": "0:01:20"} +{"current_steps": 8643, "total_steps": 8674, "loss": 0.38446712493896484, "lr": 7.44234667054977e-11, "epoch": 1.9928522019829376, "percentage": 99.64, "elapsed_time": "6:00:08", "remaining_time": "0:01:17"} +{"current_steps": 8644, "total_steps": 8674, "loss": 0.46814244985580444, "lr": 6.98447325045981e-11, "epoch": 1.9930827761125203, "percentage": 99.65, "elapsed_time": "6:00:10", "remaining_time": "0:01:15"} +{"current_steps": 8645, "total_steps": 8674, "loss": 0.5420444011688232, "lr": 6.541134828574879e-11, "epoch": 1.993313350242103, "percentage": 99.67, "elapsed_time": "6:00:13", "remaining_time": "0:01:12"} +{"current_steps": 8646, "total_steps": 8674, "loss": 0.45574939250946045, "lr": 6.112331469332321e-11, "epoch": 1.9935439243716855, "percentage": 99.68, "elapsed_time": "6:00:15", "remaining_time": "0:01:10"} +{"current_steps": 8647, "total_steps": 8674, "loss": 0.37707841396331787, "lr": 5.69806323507116e-11, "epoch": 1.9937744985012682, "percentage": 99.69, "elapsed_time": "6:00:18", "remaining_time": "0:01:07"} +{"current_steps": 8648, "total_steps": 8674, "loss": 0.5139172077178955, "lr": 5.298330186020994e-11, "epoch": 1.9940050726308507, "percentage": 99.7, "elapsed_time": "6:00:20", "remaining_time": "0:01:05"} +{"current_steps": 8649, "total_steps": 8674, "loss": 0.5251332521438599, "lr": 4.913132380268692e-11, "epoch": 1.9942356467604334, "percentage": 99.71, "elapsed_time": "6:00:23", "remaining_time": "0:01:02"} +{"current_steps": 8650, "total_steps": 8674, "loss": 0.38396936655044556, "lr": 4.542469873802801e-11, "epoch": 1.9944662208900161, "percentage": 99.72, "elapsed_time": "6:00:25", "remaining_time": "0:01:00"} +{"current_steps": 8651, "total_steps": 8674, "loss": 0.42507076263427734, "lr": 4.1863427205246495e-11, "epoch": 1.9946967950195988, "percentage": 99.73, "elapsed_time": "6:00:27", "remaining_time": "0:00:57"} +{"current_steps": 8652, "total_steps": 8674, "loss": 0.3914533257484436, "lr": 3.8447509721817316e-11, "epoch": 1.9949273691491816, "percentage": 99.75, "elapsed_time": "6:00:30", "remaining_time": "0:00:55"} +{"current_steps": 8653, "total_steps": 8674, "loss": 0.46923860907554626, "lr": 3.5176946784343245e-11, "epoch": 1.9951579432787643, "percentage": 99.76, "elapsed_time": "6:00:32", "remaining_time": "0:00:52"} +{"current_steps": 8654, "total_steps": 8674, "loss": 0.35363346338272095, "lr": 3.205173886822177e-11, "epoch": 1.9953885174083468, "percentage": 99.77, "elapsed_time": "6:00:35", "remaining_time": "0:00:50"} +{"current_steps": 8655, "total_steps": 8674, "loss": 0.4142746925354004, "lr": 2.9071886427867175e-11, "epoch": 1.9956190915379295, "percentage": 99.78, "elapsed_time": "6:00:37", "remaining_time": "0:00:47"} +{"current_steps": 8656, "total_steps": 8674, "loss": 0.34989133477211, "lr": 2.623738989626645e-11, "epoch": 1.995849665667512, "percentage": 99.79, "elapsed_time": "6:00:39", "remaining_time": "0:00:44"} +{"current_steps": 8657, "total_steps": 8674, "loss": 0.5059055089950562, "lr": 2.354824968542335e-11, "epoch": 1.9960802397970947, "percentage": 99.8, "elapsed_time": "6:00:42", "remaining_time": "0:00:42"} +{"current_steps": 8658, "total_steps": 8674, "loss": 0.4772738516330719, "lr": 2.1004466186358426e-11, "epoch": 1.9963108139266774, "percentage": 99.82, "elapsed_time": "6:00:44", "remaining_time": "0:00:39"} +{"current_steps": 8659, "total_steps": 8674, "loss": 0.5055459141731262, "lr": 1.860603976877595e-11, "epoch": 1.9965413880562601, "percentage": 99.83, "elapsed_time": "6:00:47", "remaining_time": "0:00:37"} +{"current_steps": 8660, "total_steps": 8674, "loss": 0.3764510154724121, "lr": 1.6352970781285946e-11, "epoch": 1.9967719621858429, "percentage": 99.84, "elapsed_time": "6:00:49", "remaining_time": "0:00:34"} +{"current_steps": 8661, "total_steps": 8674, "loss": 0.42315495014190674, "lr": 1.424525955140421e-11, "epoch": 1.9970025363154256, "percentage": 99.85, "elapsed_time": "6:00:51", "remaining_time": "0:00:32"} +{"current_steps": 8662, "total_steps": 8674, "loss": 0.3647070527076721, "lr": 1.2282906385552295e-11, "epoch": 1.997233110445008, "percentage": 99.86, "elapsed_time": "6:00:54", "remaining_time": "0:00:29"} +{"current_steps": 8663, "total_steps": 8674, "loss": 0.3832179307937622, "lr": 1.0465911568946495e-11, "epoch": 1.9974636845745908, "percentage": 99.87, "elapsed_time": "6:00:56", "remaining_time": "0:00:27"} +{"current_steps": 8664, "total_steps": 8674, "loss": 0.46649307012557983, "lr": 8.79427536570887e-12, "epoch": 1.9976942587041733, "percentage": 99.88, "elapsed_time": "6:00:59", "remaining_time": "0:00:24"} +{"current_steps": 8665, "total_steps": 8674, "loss": 0.5101447701454163, "lr": 7.267998018867238e-12, "epoch": 1.997924832833756, "percentage": 99.9, "elapsed_time": "6:01:01", "remaining_time": "0:00:22"} +{"current_steps": 8666, "total_steps": 8674, "loss": 0.48426300287246704, "lr": 5.8870797502441615e-12, "epoch": 1.9981554069633387, "percentage": 99.91, "elapsed_time": "6:01:04", "remaining_time": "0:00:19"} +{"current_steps": 8667, "total_steps": 8674, "loss": 0.5109666585922241, "lr": 4.65152076045694e-12, "epoch": 1.9983859810929214, "percentage": 99.92, "elapsed_time": "6:01:06", "remaining_time": "0:00:17"} +{"current_steps": 8668, "total_steps": 8674, "loss": 0.36605560779571533, "lr": 3.5613212293617023e-12, "epoch": 1.9986165552225041, "percentage": 99.93, "elapsed_time": "6:01:09", "remaining_time": "0:00:14"} +{"current_steps": 8669, "total_steps": 8674, "loss": 0.515751302242279, "lr": 2.6164813152762533e-12, "epoch": 1.9988471293520869, "percentage": 99.94, "elapsed_time": "6:01:11", "remaining_time": "0:00:12"} +{"current_steps": 8670, "total_steps": 8674, "loss": 0.48570311069488525, "lr": 1.8170011554241582e-12, "epoch": 1.9990777034816694, "percentage": 99.95, "elapsed_time": "6:01:14", "remaining_time": "0:00:09"} +{"current_steps": 8671, "total_steps": 8674, "loss": 0.5033636093139648, "lr": 1.1628808662678124e-12, "epoch": 1.999308277611252, "percentage": 99.97, "elapsed_time": "6:01:16", "remaining_time": "0:00:07"} +{"current_steps": 8672, "total_steps": 8674, "loss": 0.4581984281539917, "lr": 6.541205427312846e-13, "epoch": 1.9995388517408346, "percentage": 99.98, "elapsed_time": "6:01:18", "remaining_time": "0:00:04"} +{"current_steps": 8673, "total_steps": 8674, "loss": 0.4574134945869446, "lr": 2.9072025886645037e-13, "epoch": 1.9997694258704173, "percentage": 99.99, "elapsed_time": "6:01:21", "remaining_time": "0:00:02"} +{"current_steps": 8674, "total_steps": 8674, "loss": 0.39279258251190186, "lr": 7.268006729788112e-14, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "6:01:23", "remaining_time": "0:00:00"} +{"current_steps": 8674, "total_steps": 8674, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "6:01:25", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..59332348fbf1cc209f9ad48a87e47dd7c23e53f4 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,60761 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 8674, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00023057412958266084, + "grad_norm": 0.5456158480642083, + "learning_rate": 0.0, + "loss": 1.2793785333633423, + "step": 1 + }, + { + "epoch": 0.0004611482591653217, + "grad_norm": 0.5348414425588685, + "learning_rate": 4.6082949308755755e-09, + "loss": 1.2810249328613281, + "step": 2 + }, + { + "epoch": 0.0006917223887479825, + "grad_norm": 0.5742665952103186, + "learning_rate": 9.216589861751151e-09, + "loss": 1.5180970430374146, + "step": 3 + }, + { + "epoch": 0.0009222965183306433, + "grad_norm": 0.47570843593061296, + "learning_rate": 1.3824884792626728e-08, + "loss": 1.2771815061569214, + "step": 4 + }, + { + "epoch": 0.001152870647913304, + "grad_norm": 0.6179854753010914, + "learning_rate": 1.8433179723502302e-08, + "loss": 1.6275714635849, + "step": 5 + }, + { + "epoch": 0.001383444777495965, + "grad_norm": 0.5728287935763549, + "learning_rate": 2.304147465437788e-08, + "loss": 1.4852838516235352, + "step": 6 + }, + { + "epoch": 0.0016140189070786258, + "grad_norm": 0.7402806033919309, + "learning_rate": 2.7649769585253456e-08, + "loss": 1.3845010995864868, + "step": 7 + }, + { + "epoch": 0.0018445930366612867, + "grad_norm": 0.5357861516775319, + "learning_rate": 3.225806451612903e-08, + "loss": 1.2716574668884277, + "step": 8 + }, + { + "epoch": 0.0020751671662439476, + "grad_norm": 0.49378309074438254, + "learning_rate": 3.6866359447004604e-08, + "loss": 1.4046194553375244, + "step": 9 + }, + { + "epoch": 0.002305741295826608, + "grad_norm": 0.5231726157264511, + "learning_rate": 4.1474654377880186e-08, + "loss": 1.4988269805908203, + "step": 10 + }, + { + "epoch": 0.002536315425409269, + "grad_norm": 0.5469518790093721, + "learning_rate": 4.608294930875576e-08, + "loss": 1.3523340225219727, + "step": 11 + }, + { + "epoch": 0.00276688955499193, + "grad_norm": 0.5125117134786147, + "learning_rate": 5.069124423963134e-08, + "loss": 1.3664941787719727, + "step": 12 + }, + { + "epoch": 0.0029974636845745907, + "grad_norm": 0.5526794406387441, + "learning_rate": 5.529953917050691e-08, + "loss": 1.4892609119415283, + "step": 13 + }, + { + "epoch": 0.0032280378141572516, + "grad_norm": 0.5197262159341672, + "learning_rate": 5.990783410138249e-08, + "loss": 1.305836796760559, + "step": 14 + }, + { + "epoch": 0.0034586119437399125, + "grad_norm": 0.5214120337499729, + "learning_rate": 6.451612903225806e-08, + "loss": 1.3458774089813232, + "step": 15 + }, + { + "epoch": 0.0036891860733225734, + "grad_norm": 0.5249821302153419, + "learning_rate": 6.912442396313364e-08, + "loss": 1.4305222034454346, + "step": 16 + }, + { + "epoch": 0.003919760202905234, + "grad_norm": 0.48597332722440695, + "learning_rate": 7.373271889400921e-08, + "loss": 1.4247705936431885, + "step": 17 + }, + { + "epoch": 0.004150334332487895, + "grad_norm": 0.5492563451667527, + "learning_rate": 7.834101382488478e-08, + "loss": 1.4151098728179932, + "step": 18 + }, + { + "epoch": 0.004380908462070556, + "grad_norm": 0.4931832122178826, + "learning_rate": 8.294930875576037e-08, + "loss": 1.4633708000183105, + "step": 19 + }, + { + "epoch": 0.004611482591653216, + "grad_norm": 0.4601872454406169, + "learning_rate": 8.755760368663594e-08, + "loss": 1.2271082401275635, + "step": 20 + }, + { + "epoch": 0.004842056721235877, + "grad_norm": 0.5482366075993729, + "learning_rate": 9.216589861751152e-08, + "loss": 1.493757724761963, + "step": 21 + }, + { + "epoch": 0.005072630850818538, + "grad_norm": 0.5190439230451068, + "learning_rate": 9.677419354838709e-08, + "loss": 1.446916103363037, + "step": 22 + }, + { + "epoch": 0.005303204980401199, + "grad_norm": 0.5010656217784003, + "learning_rate": 1.0138248847926267e-07, + "loss": 1.4575269222259521, + "step": 23 + }, + { + "epoch": 0.00553377910998386, + "grad_norm": 0.5983934917725938, + "learning_rate": 1.0599078341013824e-07, + "loss": 1.5000505447387695, + "step": 24 + }, + { + "epoch": 0.005764353239566521, + "grad_norm": 0.5264341016273323, + "learning_rate": 1.1059907834101383e-07, + "loss": 1.32895827293396, + "step": 25 + }, + { + "epoch": 0.005994927369149181, + "grad_norm": 0.5507902323042685, + "learning_rate": 1.152073732718894e-07, + "loss": 1.479337215423584, + "step": 26 + }, + { + "epoch": 0.006225501498731842, + "grad_norm": 0.4597707182389027, + "learning_rate": 1.1981566820276498e-07, + "loss": 1.3543293476104736, + "step": 27 + }, + { + "epoch": 0.006456075628314503, + "grad_norm": 0.4984681813259071, + "learning_rate": 1.2442396313364054e-07, + "loss": 1.3075106143951416, + "step": 28 + }, + { + "epoch": 0.006686649757897164, + "grad_norm": 0.540668752320374, + "learning_rate": 1.2903225806451611e-07, + "loss": 1.2077248096466064, + "step": 29 + }, + { + "epoch": 0.006917223887479825, + "grad_norm": 0.5053904313535789, + "learning_rate": 1.336405529953917e-07, + "loss": 1.2841781377792358, + "step": 30 + }, + { + "epoch": 0.0071477980170624855, + "grad_norm": 0.5007265235886551, + "learning_rate": 1.3824884792626728e-07, + "loss": 1.4022557735443115, + "step": 31 + }, + { + "epoch": 0.007378372146645147, + "grad_norm": 0.5376464155945276, + "learning_rate": 1.4285714285714285e-07, + "loss": 1.4971141815185547, + "step": 32 + }, + { + "epoch": 0.007608946276227807, + "grad_norm": 0.49485432736210644, + "learning_rate": 1.4746543778801842e-07, + "loss": 1.3699426651000977, + "step": 33 + }, + { + "epoch": 0.007839520405810468, + "grad_norm": 0.602690054138726, + "learning_rate": 1.52073732718894e-07, + "loss": 1.466570258140564, + "step": 34 + }, + { + "epoch": 0.008070094535393129, + "grad_norm": 0.544784030735669, + "learning_rate": 1.5668202764976955e-07, + "loss": 1.3031455278396606, + "step": 35 + }, + { + "epoch": 0.00830066866497579, + "grad_norm": 0.5516628365932859, + "learning_rate": 1.6129032258064515e-07, + "loss": 1.3989369869232178, + "step": 36 + }, + { + "epoch": 0.00853124279455845, + "grad_norm": 0.5375908894429152, + "learning_rate": 1.6589861751152074e-07, + "loss": 1.41139817237854, + "step": 37 + }, + { + "epoch": 0.008761816924141111, + "grad_norm": 0.4923010186613349, + "learning_rate": 1.705069124423963e-07, + "loss": 1.305363655090332, + "step": 38 + }, + { + "epoch": 0.008992391053723773, + "grad_norm": 0.5782996548067549, + "learning_rate": 1.7511520737327188e-07, + "loss": 1.3931915760040283, + "step": 39 + }, + { + "epoch": 0.009222965183306432, + "grad_norm": 0.5425552369520273, + "learning_rate": 1.7972350230414745e-07, + "loss": 1.4728009700775146, + "step": 40 + }, + { + "epoch": 0.009453539312889093, + "grad_norm": 0.5162050268750099, + "learning_rate": 1.8433179723502305e-07, + "loss": 1.4165544509887695, + "step": 41 + }, + { + "epoch": 0.009684113442471755, + "grad_norm": 0.509079818266607, + "learning_rate": 1.889400921658986e-07, + "loss": 1.3693115711212158, + "step": 42 + }, + { + "epoch": 0.009914687572054416, + "grad_norm": 0.5804116282906935, + "learning_rate": 1.9354838709677418e-07, + "loss": 1.468721866607666, + "step": 43 + }, + { + "epoch": 0.010145261701637076, + "grad_norm": 0.5466645633601509, + "learning_rate": 1.9815668202764975e-07, + "loss": 1.4732704162597656, + "step": 44 + }, + { + "epoch": 0.010375835831219737, + "grad_norm": 0.4534942899185725, + "learning_rate": 2.0276497695852535e-07, + "loss": 1.2579209804534912, + "step": 45 + }, + { + "epoch": 0.010606409960802398, + "grad_norm": 0.4766380716605293, + "learning_rate": 2.073732718894009e-07, + "loss": 1.3587429523468018, + "step": 46 + }, + { + "epoch": 0.010836984090385058, + "grad_norm": 0.5409254453286721, + "learning_rate": 2.1198156682027649e-07, + "loss": 1.368800401687622, + "step": 47 + }, + { + "epoch": 0.01106755821996772, + "grad_norm": 0.5103994243466702, + "learning_rate": 2.1658986175115208e-07, + "loss": 1.2960132360458374, + "step": 48 + }, + { + "epoch": 0.01129813234955038, + "grad_norm": 0.47493679434319974, + "learning_rate": 2.2119815668202765e-07, + "loss": 1.3035235404968262, + "step": 49 + }, + { + "epoch": 0.011528706479133042, + "grad_norm": 0.5271868916321076, + "learning_rate": 2.2580645161290322e-07, + "loss": 1.5074443817138672, + "step": 50 + }, + { + "epoch": 0.011759280608715702, + "grad_norm": 0.5381217045242119, + "learning_rate": 2.304147465437788e-07, + "loss": 1.4689760208129883, + "step": 51 + }, + { + "epoch": 0.011989854738298363, + "grad_norm": 0.4629483381608022, + "learning_rate": 2.3502304147465438e-07, + "loss": 1.3542251586914062, + "step": 52 + }, + { + "epoch": 0.012220428867881024, + "grad_norm": 0.4592532760230554, + "learning_rate": 2.3963133640552995e-07, + "loss": 1.3521728515625, + "step": 53 + }, + { + "epoch": 0.012451002997463684, + "grad_norm": 0.5030837073491258, + "learning_rate": 2.442396313364055e-07, + "loss": 1.3577494621276855, + "step": 54 + }, + { + "epoch": 0.012681577127046345, + "grad_norm": 0.5438911836333451, + "learning_rate": 2.488479262672811e-07, + "loss": 1.459476351737976, + "step": 55 + }, + { + "epoch": 0.012912151256629006, + "grad_norm": 0.52516269169267, + "learning_rate": 2.534562211981567e-07, + "loss": 1.484410047531128, + "step": 56 + }, + { + "epoch": 0.013142725386211668, + "grad_norm": 0.5188914022486312, + "learning_rate": 2.5806451612903223e-07, + "loss": 1.3589065074920654, + "step": 57 + }, + { + "epoch": 0.013373299515794327, + "grad_norm": 0.5619229477118247, + "learning_rate": 2.6267281105990777e-07, + "loss": 1.3558262586593628, + "step": 58 + }, + { + "epoch": 0.013603873645376989, + "grad_norm": 0.5534574014271282, + "learning_rate": 2.672811059907834e-07, + "loss": 1.5165367126464844, + "step": 59 + }, + { + "epoch": 0.01383444777495965, + "grad_norm": 0.47598313164662104, + "learning_rate": 2.7188940092165896e-07, + "loss": 1.3051776885986328, + "step": 60 + }, + { + "epoch": 0.01406502190454231, + "grad_norm": 0.45011107968146047, + "learning_rate": 2.7649769585253456e-07, + "loss": 1.2916524410247803, + "step": 61 + }, + { + "epoch": 0.014295596034124971, + "grad_norm": 0.513792634149487, + "learning_rate": 2.8110599078341015e-07, + "loss": 1.440261721611023, + "step": 62 + }, + { + "epoch": 0.014526170163707632, + "grad_norm": 0.5424492375693261, + "learning_rate": 2.857142857142857e-07, + "loss": 1.3422625064849854, + "step": 63 + }, + { + "epoch": 0.014756744293290294, + "grad_norm": 0.4598784526258713, + "learning_rate": 2.903225806451613e-07, + "loss": 1.374439001083374, + "step": 64 + }, + { + "epoch": 0.014987318422872953, + "grad_norm": 0.5339252174305668, + "learning_rate": 2.9493087557603683e-07, + "loss": 1.4382294416427612, + "step": 65 + }, + { + "epoch": 0.015217892552455614, + "grad_norm": 0.5302645203365586, + "learning_rate": 2.9953917050691243e-07, + "loss": 1.3971002101898193, + "step": 66 + }, + { + "epoch": 0.015448466682038276, + "grad_norm": 0.5711144083332746, + "learning_rate": 3.04147465437788e-07, + "loss": 1.376272439956665, + "step": 67 + }, + { + "epoch": 0.015679040811620935, + "grad_norm": 0.5016109357973636, + "learning_rate": 3.0875576036866356e-07, + "loss": 1.3135097026824951, + "step": 68 + }, + { + "epoch": 0.015909614941203597, + "grad_norm": 0.5041882505031982, + "learning_rate": 3.133640552995391e-07, + "loss": 1.2688875198364258, + "step": 69 + }, + { + "epoch": 0.016140189070786258, + "grad_norm": 0.544108037399583, + "learning_rate": 3.1797235023041476e-07, + "loss": 1.4380691051483154, + "step": 70 + }, + { + "epoch": 0.01637076320036892, + "grad_norm": 0.5634345795303867, + "learning_rate": 3.225806451612903e-07, + "loss": 1.319260835647583, + "step": 71 + }, + { + "epoch": 0.01660133732995158, + "grad_norm": 0.5352869486400713, + "learning_rate": 3.271889400921659e-07, + "loss": 1.4083738327026367, + "step": 72 + }, + { + "epoch": 0.01683191145953424, + "grad_norm": 0.5524091199068598, + "learning_rate": 3.317972350230415e-07, + "loss": 1.4904775619506836, + "step": 73 + }, + { + "epoch": 0.0170624855891169, + "grad_norm": 0.5488563092854116, + "learning_rate": 3.3640552995391703e-07, + "loss": 1.4534616470336914, + "step": 74 + }, + { + "epoch": 0.01729305971869956, + "grad_norm": 0.621117268365485, + "learning_rate": 3.410138248847926e-07, + "loss": 1.6545689105987549, + "step": 75 + }, + { + "epoch": 0.017523633848282223, + "grad_norm": 0.4834761822798673, + "learning_rate": 3.4562211981566817e-07, + "loss": 1.2267192602157593, + "step": 76 + }, + { + "epoch": 0.017754207977864884, + "grad_norm": 0.5801091305703396, + "learning_rate": 3.5023041474654376e-07, + "loss": 1.4207227230072021, + "step": 77 + }, + { + "epoch": 0.017984782107447545, + "grad_norm": 0.5253671028782199, + "learning_rate": 3.5483870967741936e-07, + "loss": 1.4952092170715332, + "step": 78 + }, + { + "epoch": 0.018215356237030206, + "grad_norm": 0.4832223487637491, + "learning_rate": 3.594470046082949e-07, + "loss": 1.2932121753692627, + "step": 79 + }, + { + "epoch": 0.018445930366612864, + "grad_norm": 0.5623376259320272, + "learning_rate": 3.6405529953917044e-07, + "loss": 1.3855851888656616, + "step": 80 + }, + { + "epoch": 0.018676504496195526, + "grad_norm": 0.45682252121341854, + "learning_rate": 3.686635944700461e-07, + "loss": 1.3645650148391724, + "step": 81 + }, + { + "epoch": 0.018907078625778187, + "grad_norm": 0.49579660369860507, + "learning_rate": 3.7327188940092163e-07, + "loss": 1.322283387184143, + "step": 82 + }, + { + "epoch": 0.01913765275536085, + "grad_norm": 0.5177315365924456, + "learning_rate": 3.778801843317972e-07, + "loss": 1.3363629579544067, + "step": 83 + }, + { + "epoch": 0.01936822688494351, + "grad_norm": 0.616201260540867, + "learning_rate": 3.824884792626728e-07, + "loss": 1.553279161453247, + "step": 84 + }, + { + "epoch": 0.01959880101452617, + "grad_norm": 0.5198473540371843, + "learning_rate": 3.8709677419354837e-07, + "loss": 1.4434814453125, + "step": 85 + }, + { + "epoch": 0.019829375144108832, + "grad_norm": 0.5923570018189629, + "learning_rate": 3.9170506912442396e-07, + "loss": 1.5134285688400269, + "step": 86 + }, + { + "epoch": 0.02005994927369149, + "grad_norm": 0.5850924486743854, + "learning_rate": 3.963133640552995e-07, + "loss": 1.4244651794433594, + "step": 87 + }, + { + "epoch": 0.02029052340327415, + "grad_norm": 0.560105193358992, + "learning_rate": 4.009216589861751e-07, + "loss": 1.4571855068206787, + "step": 88 + }, + { + "epoch": 0.020521097532856813, + "grad_norm": 0.48108556089196525, + "learning_rate": 4.055299539170507e-07, + "loss": 1.2940685749053955, + "step": 89 + }, + { + "epoch": 0.020751671662439474, + "grad_norm": 0.5203979535892653, + "learning_rate": 4.1013824884792624e-07, + "loss": 1.3537572622299194, + "step": 90 + }, + { + "epoch": 0.020982245792022135, + "grad_norm": 0.5791117780548783, + "learning_rate": 4.147465437788018e-07, + "loss": 1.524500846862793, + "step": 91 + }, + { + "epoch": 0.021212819921604797, + "grad_norm": 0.4890632694429427, + "learning_rate": 4.1935483870967743e-07, + "loss": 1.4414368867874146, + "step": 92 + }, + { + "epoch": 0.021443394051187458, + "grad_norm": 0.49954451696473423, + "learning_rate": 4.2396313364055297e-07, + "loss": 1.284010887145996, + "step": 93 + }, + { + "epoch": 0.021673968180770116, + "grad_norm": 0.6088073736973271, + "learning_rate": 4.285714285714285e-07, + "loss": 1.5901892185211182, + "step": 94 + }, + { + "epoch": 0.021904542310352777, + "grad_norm": 0.5856129890195899, + "learning_rate": 4.3317972350230416e-07, + "loss": 1.4408211708068848, + "step": 95 + }, + { + "epoch": 0.02213511643993544, + "grad_norm": 0.49571353442310634, + "learning_rate": 4.377880184331797e-07, + "loss": 1.2293554544448853, + "step": 96 + }, + { + "epoch": 0.0223656905695181, + "grad_norm": 0.570508723127356, + "learning_rate": 4.423963133640553e-07, + "loss": 1.4144377708435059, + "step": 97 + }, + { + "epoch": 0.02259626469910076, + "grad_norm": 0.5952794755762669, + "learning_rate": 4.4700460829493084e-07, + "loss": 1.359034776687622, + "step": 98 + }, + { + "epoch": 0.022826838828683423, + "grad_norm": 0.5878914385748992, + "learning_rate": 4.5161290322580644e-07, + "loss": 1.3299517631530762, + "step": 99 + }, + { + "epoch": 0.023057412958266084, + "grad_norm": 0.5039341997298462, + "learning_rate": 4.5622119815668203e-07, + "loss": 1.3072423934936523, + "step": 100 + }, + { + "epoch": 0.023287987087848742, + "grad_norm": 0.6205508042108064, + "learning_rate": 4.608294930875576e-07, + "loss": 1.5683096647262573, + "step": 101 + }, + { + "epoch": 0.023518561217431403, + "grad_norm": 0.6300075069307655, + "learning_rate": 4.654377880184331e-07, + "loss": 1.6294015645980835, + "step": 102 + }, + { + "epoch": 0.023749135347014064, + "grad_norm": 0.5245849244619794, + "learning_rate": 4.7004608294930877e-07, + "loss": 1.424511194229126, + "step": 103 + }, + { + "epoch": 0.023979709476596726, + "grad_norm": 0.5471205081131801, + "learning_rate": 4.746543778801843e-07, + "loss": 1.4169164896011353, + "step": 104 + }, + { + "epoch": 0.024210283606179387, + "grad_norm": 0.5854813174619509, + "learning_rate": 4.792626728110599e-07, + "loss": 1.3933480978012085, + "step": 105 + }, + { + "epoch": 0.02444085773576205, + "grad_norm": 0.6166413586526565, + "learning_rate": 4.838709677419355e-07, + "loss": 1.488750696182251, + "step": 106 + }, + { + "epoch": 0.02467143186534471, + "grad_norm": 0.6052025315612124, + "learning_rate": 4.88479262672811e-07, + "loss": 1.4852150678634644, + "step": 107 + }, + { + "epoch": 0.024902005994927368, + "grad_norm": 0.5750922845804657, + "learning_rate": 4.930875576036866e-07, + "loss": 1.4256765842437744, + "step": 108 + }, + { + "epoch": 0.02513258012451003, + "grad_norm": 0.5231547313189364, + "learning_rate": 4.976958525345622e-07, + "loss": 1.3063642978668213, + "step": 109 + }, + { + "epoch": 0.02536315425409269, + "grad_norm": 0.5734263022927267, + "learning_rate": 5.023041474654378e-07, + "loss": 1.549802303314209, + "step": 110 + }, + { + "epoch": 0.02559372838367535, + "grad_norm": 0.5041709928346361, + "learning_rate": 5.069124423963134e-07, + "loss": 1.301950454711914, + "step": 111 + }, + { + "epoch": 0.025824302513258013, + "grad_norm": 0.5567596794280206, + "learning_rate": 5.11520737327189e-07, + "loss": 1.3025325536727905, + "step": 112 + }, + { + "epoch": 0.026054876642840674, + "grad_norm": 0.5369405016436734, + "learning_rate": 5.161290322580645e-07, + "loss": 1.40749192237854, + "step": 113 + }, + { + "epoch": 0.026285450772423335, + "grad_norm": 0.5208396194792263, + "learning_rate": 5.2073732718894e-07, + "loss": 1.3216793537139893, + "step": 114 + }, + { + "epoch": 0.026516024902005993, + "grad_norm": 0.5052494958784187, + "learning_rate": 5.253456221198155e-07, + "loss": 1.3189308643341064, + "step": 115 + }, + { + "epoch": 0.026746599031588655, + "grad_norm": 0.5632602249643789, + "learning_rate": 5.299539170506912e-07, + "loss": 1.430384635925293, + "step": 116 + }, + { + "epoch": 0.026977173161171316, + "grad_norm": 0.5516062364182813, + "learning_rate": 5.345622119815668e-07, + "loss": 1.4081478118896484, + "step": 117 + }, + { + "epoch": 0.027207747290753977, + "grad_norm": 0.6385508559977366, + "learning_rate": 5.391705069124423e-07, + "loss": 1.434388518333435, + "step": 118 + }, + { + "epoch": 0.02743832142033664, + "grad_norm": 0.6138756203209041, + "learning_rate": 5.437788018433179e-07, + "loss": 1.4139282703399658, + "step": 119 + }, + { + "epoch": 0.0276688955499193, + "grad_norm": 0.5683069275087388, + "learning_rate": 5.483870967741935e-07, + "loss": 1.4511487483978271, + "step": 120 + }, + { + "epoch": 0.02789946967950196, + "grad_norm": 0.6423215590072974, + "learning_rate": 5.529953917050691e-07, + "loss": 1.5713481903076172, + "step": 121 + }, + { + "epoch": 0.02813004380908462, + "grad_norm": 0.5705917499340588, + "learning_rate": 5.576036866359447e-07, + "loss": 1.4315730333328247, + "step": 122 + }, + { + "epoch": 0.02836061793866728, + "grad_norm": 0.5316898536625556, + "learning_rate": 5.622119815668203e-07, + "loss": 1.3283708095550537, + "step": 123 + }, + { + "epoch": 0.028591192068249942, + "grad_norm": 0.6184222176453401, + "learning_rate": 5.668202764976958e-07, + "loss": 1.4329016208648682, + "step": 124 + }, + { + "epoch": 0.028821766197832603, + "grad_norm": 0.5872933055537319, + "learning_rate": 5.714285714285714e-07, + "loss": 1.444648265838623, + "step": 125 + }, + { + "epoch": 0.029052340327415264, + "grad_norm": 0.5205647887621043, + "learning_rate": 5.760368663594469e-07, + "loss": 1.3584785461425781, + "step": 126 + }, + { + "epoch": 0.029282914456997926, + "grad_norm": 0.5687232002808722, + "learning_rate": 5.806451612903226e-07, + "loss": 1.2815918922424316, + "step": 127 + }, + { + "epoch": 0.029513488586580587, + "grad_norm": 0.5252774303203537, + "learning_rate": 5.852534562211982e-07, + "loss": 1.3332037925720215, + "step": 128 + }, + { + "epoch": 0.029744062716163245, + "grad_norm": 0.5694649769044726, + "learning_rate": 5.898617511520737e-07, + "loss": 1.4522390365600586, + "step": 129 + }, + { + "epoch": 0.029974636845745906, + "grad_norm": 0.5607244925516301, + "learning_rate": 5.944700460829493e-07, + "loss": 1.4362024068832397, + "step": 130 + }, + { + "epoch": 0.030205210975328568, + "grad_norm": 0.5432906779366606, + "learning_rate": 5.990783410138249e-07, + "loss": 1.3271276950836182, + "step": 131 + }, + { + "epoch": 0.03043578510491123, + "grad_norm": 0.6175056690394787, + "learning_rate": 6.036866359447004e-07, + "loss": 1.5936369895935059, + "step": 132 + }, + { + "epoch": 0.03066635923449389, + "grad_norm": 0.5887629397700789, + "learning_rate": 6.08294930875576e-07, + "loss": 1.4786381721496582, + "step": 133 + }, + { + "epoch": 0.03089693336407655, + "grad_norm": 0.5490770556101789, + "learning_rate": 6.129032258064516e-07, + "loss": 1.3499064445495605, + "step": 134 + }, + { + "epoch": 0.031127507493659213, + "grad_norm": 0.583021664079577, + "learning_rate": 6.175115207373271e-07, + "loss": 1.4434795379638672, + "step": 135 + }, + { + "epoch": 0.03135808162324187, + "grad_norm": 0.6037371306112707, + "learning_rate": 6.221198156682027e-07, + "loss": 1.4064602851867676, + "step": 136 + }, + { + "epoch": 0.03158865575282453, + "grad_norm": 0.5005511365111003, + "learning_rate": 6.267281105990782e-07, + "loss": 1.3325507640838623, + "step": 137 + }, + { + "epoch": 0.03181922988240719, + "grad_norm": 0.516984621863849, + "learning_rate": 6.313364055299539e-07, + "loss": 1.2584879398345947, + "step": 138 + }, + { + "epoch": 0.032049804011989855, + "grad_norm": 0.5401703370709408, + "learning_rate": 6.359447004608295e-07, + "loss": 1.3754582405090332, + "step": 139 + }, + { + "epoch": 0.032280378141572516, + "grad_norm": 0.5773695778497429, + "learning_rate": 6.40552995391705e-07, + "loss": 1.2700412273406982, + "step": 140 + }, + { + "epoch": 0.03251095227115518, + "grad_norm": 0.580045410672373, + "learning_rate": 6.451612903225806e-07, + "loss": 1.395858645439148, + "step": 141 + }, + { + "epoch": 0.03274152640073784, + "grad_norm": 0.6146943532430481, + "learning_rate": 6.497695852534562e-07, + "loss": 1.402890682220459, + "step": 142 + }, + { + "epoch": 0.0329721005303205, + "grad_norm": 0.5736524878471048, + "learning_rate": 6.543778801843318e-07, + "loss": 1.5405397415161133, + "step": 143 + }, + { + "epoch": 0.03320267465990316, + "grad_norm": 0.5418174501474893, + "learning_rate": 6.589861751152074e-07, + "loss": 1.2394921779632568, + "step": 144 + }, + { + "epoch": 0.03343324878948582, + "grad_norm": 0.6276742940359161, + "learning_rate": 6.63594470046083e-07, + "loss": 1.453255295753479, + "step": 145 + }, + { + "epoch": 0.03366382291906848, + "grad_norm": 0.6191808042065741, + "learning_rate": 6.682027649769585e-07, + "loss": 1.3661112785339355, + "step": 146 + }, + { + "epoch": 0.03389439704865114, + "grad_norm": 0.5260230971069313, + "learning_rate": 6.728110599078341e-07, + "loss": 1.2952282428741455, + "step": 147 + }, + { + "epoch": 0.0341249711782338, + "grad_norm": 0.6693704726704671, + "learning_rate": 6.774193548387096e-07, + "loss": 1.396565318107605, + "step": 148 + }, + { + "epoch": 0.03435554530781646, + "grad_norm": 0.5881355966882998, + "learning_rate": 6.820276497695853e-07, + "loss": 1.3207082748413086, + "step": 149 + }, + { + "epoch": 0.03458611943739912, + "grad_norm": 0.5727010424261832, + "learning_rate": 6.866359447004608e-07, + "loss": 1.4085125923156738, + "step": 150 + }, + { + "epoch": 0.034816693566981784, + "grad_norm": 0.6667208730018341, + "learning_rate": 6.912442396313363e-07, + "loss": 1.5698528289794922, + "step": 151 + }, + { + "epoch": 0.035047267696564445, + "grad_norm": 0.5847511619477141, + "learning_rate": 6.958525345622119e-07, + "loss": 1.4091004133224487, + "step": 152 + }, + { + "epoch": 0.035277841826147106, + "grad_norm": 0.5143540253572731, + "learning_rate": 7.004608294930875e-07, + "loss": 1.2392504215240479, + "step": 153 + }, + { + "epoch": 0.03550841595572977, + "grad_norm": 0.6061996419355483, + "learning_rate": 7.05069124423963e-07, + "loss": 1.3355891704559326, + "step": 154 + }, + { + "epoch": 0.03573899008531243, + "grad_norm": 0.5654677060773288, + "learning_rate": 7.096774193548387e-07, + "loss": 1.330599308013916, + "step": 155 + }, + { + "epoch": 0.03596956421489509, + "grad_norm": 0.5625277163359125, + "learning_rate": 7.142857142857143e-07, + "loss": 1.344653844833374, + "step": 156 + }, + { + "epoch": 0.03620013834447775, + "grad_norm": 0.5693935421186345, + "learning_rate": 7.188940092165898e-07, + "loss": 1.341560959815979, + "step": 157 + }, + { + "epoch": 0.03643071247406041, + "grad_norm": 0.5761507210889462, + "learning_rate": 7.235023041474654e-07, + "loss": 1.2242077589035034, + "step": 158 + }, + { + "epoch": 0.036661286603643074, + "grad_norm": 0.61477283253827, + "learning_rate": 7.281105990783409e-07, + "loss": 1.2858202457427979, + "step": 159 + }, + { + "epoch": 0.03689186073322573, + "grad_norm": 0.6410836439864531, + "learning_rate": 7.327188940092166e-07, + "loss": 1.479524850845337, + "step": 160 + }, + { + "epoch": 0.03712243486280839, + "grad_norm": 0.5918139936623208, + "learning_rate": 7.373271889400922e-07, + "loss": 1.43915855884552, + "step": 161 + }, + { + "epoch": 0.03735300899239105, + "grad_norm": 0.6478814183526712, + "learning_rate": 7.419354838709677e-07, + "loss": 1.3939034938812256, + "step": 162 + }, + { + "epoch": 0.03758358312197371, + "grad_norm": 0.6065250961726126, + "learning_rate": 7.465437788018433e-07, + "loss": 1.2733443975448608, + "step": 163 + }, + { + "epoch": 0.037814157251556374, + "grad_norm": 0.5670760124517911, + "learning_rate": 7.511520737327189e-07, + "loss": 1.3436474800109863, + "step": 164 + }, + { + "epoch": 0.038044731381139035, + "grad_norm": 0.622037546591312, + "learning_rate": 7.557603686635944e-07, + "loss": 1.4250465631484985, + "step": 165 + }, + { + "epoch": 0.0382753055107217, + "grad_norm": 0.607298640184171, + "learning_rate": 7.603686635944701e-07, + "loss": 1.4244422912597656, + "step": 166 + }, + { + "epoch": 0.03850587964030436, + "grad_norm": 0.6986289389542176, + "learning_rate": 7.649769585253457e-07, + "loss": 1.5487544536590576, + "step": 167 + }, + { + "epoch": 0.03873645376988702, + "grad_norm": 0.5793907792629099, + "learning_rate": 7.695852534562211e-07, + "loss": 1.3282281160354614, + "step": 168 + }, + { + "epoch": 0.03896702789946968, + "grad_norm": 0.5428953608010194, + "learning_rate": 7.741935483870967e-07, + "loss": 1.2823774814605713, + "step": 169 + }, + { + "epoch": 0.03919760202905234, + "grad_norm": 0.5889853233557574, + "learning_rate": 7.788018433179722e-07, + "loss": 1.2402329444885254, + "step": 170 + }, + { + "epoch": 0.039428176158635, + "grad_norm": 0.6219537569729359, + "learning_rate": 7.834101382488479e-07, + "loss": 1.3755587339401245, + "step": 171 + }, + { + "epoch": 0.039658750288217665, + "grad_norm": 0.5509851701904478, + "learning_rate": 7.880184331797235e-07, + "loss": 1.3403921127319336, + "step": 172 + }, + { + "epoch": 0.039889324417800326, + "grad_norm": 0.5971512014225002, + "learning_rate": 7.92626728110599e-07, + "loss": 1.3742129802703857, + "step": 173 + }, + { + "epoch": 0.04011989854738298, + "grad_norm": 0.7068161569826883, + "learning_rate": 7.972350230414746e-07, + "loss": 1.6444599628448486, + "step": 174 + }, + { + "epoch": 0.04035047267696564, + "grad_norm": 0.6019721571978455, + "learning_rate": 8.018433179723502e-07, + "loss": 1.3891929388046265, + "step": 175 + }, + { + "epoch": 0.0405810468065483, + "grad_norm": 0.5520157347061957, + "learning_rate": 8.064516129032257e-07, + "loss": 1.2279409170150757, + "step": 176 + }, + { + "epoch": 0.040811620936130964, + "grad_norm": 0.6346481492269727, + "learning_rate": 8.110599078341014e-07, + "loss": 1.4576997756958008, + "step": 177 + }, + { + "epoch": 0.041042195065713626, + "grad_norm": 0.612489332435889, + "learning_rate": 8.15668202764977e-07, + "loss": 1.3585199117660522, + "step": 178 + }, + { + "epoch": 0.04127276919529629, + "grad_norm": 0.5908354773562909, + "learning_rate": 8.202764976958525e-07, + "loss": 1.3056905269622803, + "step": 179 + }, + { + "epoch": 0.04150334332487895, + "grad_norm": 0.5749600887070265, + "learning_rate": 8.248847926267281e-07, + "loss": 1.3029698133468628, + "step": 180 + }, + { + "epoch": 0.04173391745446161, + "grad_norm": 0.6598409427706357, + "learning_rate": 8.294930875576036e-07, + "loss": 1.4368736743927002, + "step": 181 + }, + { + "epoch": 0.04196449158404427, + "grad_norm": 0.5781034108869284, + "learning_rate": 8.341013824884793e-07, + "loss": 1.3243422508239746, + "step": 182 + }, + { + "epoch": 0.04219506571362693, + "grad_norm": 0.5206395827762466, + "learning_rate": 8.387096774193549e-07, + "loss": 1.232081413269043, + "step": 183 + }, + { + "epoch": 0.042425639843209594, + "grad_norm": 0.656527379150416, + "learning_rate": 8.433179723502303e-07, + "loss": 1.4601390361785889, + "step": 184 + }, + { + "epoch": 0.042656213972792255, + "grad_norm": 0.7159376690159417, + "learning_rate": 8.479262672811059e-07, + "loss": 1.3778860569000244, + "step": 185 + }, + { + "epoch": 0.042886788102374916, + "grad_norm": 0.590059263278645, + "learning_rate": 8.525345622119815e-07, + "loss": 1.3235092163085938, + "step": 186 + }, + { + "epoch": 0.04311736223195758, + "grad_norm": 0.6886704124574455, + "learning_rate": 8.57142857142857e-07, + "loss": 1.4480581283569336, + "step": 187 + }, + { + "epoch": 0.04334793636154023, + "grad_norm": 0.6346582437238362, + "learning_rate": 8.617511520737327e-07, + "loss": 1.4530816078186035, + "step": 188 + }, + { + "epoch": 0.04357851049112289, + "grad_norm": 0.6767670706852607, + "learning_rate": 8.663594470046083e-07, + "loss": 1.4447407722473145, + "step": 189 + }, + { + "epoch": 0.043809084620705555, + "grad_norm": 0.6049885392306779, + "learning_rate": 8.709677419354838e-07, + "loss": 1.3610244989395142, + "step": 190 + }, + { + "epoch": 0.044039658750288216, + "grad_norm": 0.6415008170468611, + "learning_rate": 8.755760368663594e-07, + "loss": 1.4084277153015137, + "step": 191 + }, + { + "epoch": 0.04427023287987088, + "grad_norm": 0.579530872526008, + "learning_rate": 8.801843317972349e-07, + "loss": 1.3652758598327637, + "step": 192 + }, + { + "epoch": 0.04450080700945354, + "grad_norm": 0.7106489880805067, + "learning_rate": 8.847926267281106e-07, + "loss": 1.4791496992111206, + "step": 193 + }, + { + "epoch": 0.0447313811390362, + "grad_norm": 0.6211187249917176, + "learning_rate": 8.894009216589862e-07, + "loss": 1.3958008289337158, + "step": 194 + }, + { + "epoch": 0.04496195526861886, + "grad_norm": 0.700016972508283, + "learning_rate": 8.940092165898617e-07, + "loss": 1.4134410619735718, + "step": 195 + }, + { + "epoch": 0.04519252939820152, + "grad_norm": 0.6911089974612981, + "learning_rate": 8.986175115207373e-07, + "loss": 1.4062776565551758, + "step": 196 + }, + { + "epoch": 0.045423103527784184, + "grad_norm": 0.6823334536756955, + "learning_rate": 9.032258064516129e-07, + "loss": 1.375224232673645, + "step": 197 + }, + { + "epoch": 0.045653677657366845, + "grad_norm": 0.6003343488972004, + "learning_rate": 9.078341013824884e-07, + "loss": 1.2440606355667114, + "step": 198 + }, + { + "epoch": 0.045884251786949506, + "grad_norm": 0.6737684280449967, + "learning_rate": 9.124423963133641e-07, + "loss": 1.4068349599838257, + "step": 199 + }, + { + "epoch": 0.04611482591653217, + "grad_norm": 0.6181499859340271, + "learning_rate": 9.170506912442397e-07, + "loss": 1.3797581195831299, + "step": 200 + }, + { + "epoch": 0.04634540004611483, + "grad_norm": 0.6445170966825345, + "learning_rate": 9.216589861751152e-07, + "loss": 1.4441678524017334, + "step": 201 + }, + { + "epoch": 0.046575974175697483, + "grad_norm": 0.6677276378953197, + "learning_rate": 9.262672811059907e-07, + "loss": 1.4727370738983154, + "step": 202 + }, + { + "epoch": 0.046806548305280145, + "grad_norm": 0.7032332117559357, + "learning_rate": 9.308755760368662e-07, + "loss": 1.448495864868164, + "step": 203 + }, + { + "epoch": 0.047037122434862806, + "grad_norm": 0.674429398641426, + "learning_rate": 9.354838709677418e-07, + "loss": 1.3727293014526367, + "step": 204 + }, + { + "epoch": 0.04726769656444547, + "grad_norm": 0.6701259318687961, + "learning_rate": 9.400921658986175e-07, + "loss": 1.4234352111816406, + "step": 205 + }, + { + "epoch": 0.04749827069402813, + "grad_norm": 0.5974678653003657, + "learning_rate": 9.44700460829493e-07, + "loss": 1.2407056093215942, + "step": 206 + }, + { + "epoch": 0.04772884482361079, + "grad_norm": 0.672276356974357, + "learning_rate": 9.493087557603686e-07, + "loss": 1.3502311706542969, + "step": 207 + }, + { + "epoch": 0.04795941895319345, + "grad_norm": 0.7465400676066979, + "learning_rate": 9.539170506912442e-07, + "loss": 1.4618254899978638, + "step": 208 + }, + { + "epoch": 0.04818999308277611, + "grad_norm": 0.681303163705478, + "learning_rate": 9.585253456221198e-07, + "loss": 1.3624317646026611, + "step": 209 + }, + { + "epoch": 0.048420567212358774, + "grad_norm": 0.7608712138693399, + "learning_rate": 9.631336405529954e-07, + "loss": 1.512046456336975, + "step": 210 + }, + { + "epoch": 0.048651141341941435, + "grad_norm": 0.6018077766578277, + "learning_rate": 9.67741935483871e-07, + "loss": 1.2896164655685425, + "step": 211 + }, + { + "epoch": 0.0488817154715241, + "grad_norm": 0.7063578249182565, + "learning_rate": 9.723502304147466e-07, + "loss": 1.5507850646972656, + "step": 212 + }, + { + "epoch": 0.04911228960110676, + "grad_norm": 0.7081498572564182, + "learning_rate": 9.76958525345622e-07, + "loss": 1.425408124923706, + "step": 213 + }, + { + "epoch": 0.04934286373068942, + "grad_norm": 0.7025877080602252, + "learning_rate": 9.815668202764976e-07, + "loss": 1.347771406173706, + "step": 214 + }, + { + "epoch": 0.04957343786027208, + "grad_norm": 0.7201983919068122, + "learning_rate": 9.861751152073732e-07, + "loss": 1.4044904708862305, + "step": 215 + }, + { + "epoch": 0.049804011989854735, + "grad_norm": 0.7045020078596302, + "learning_rate": 9.907834101382488e-07, + "loss": 1.3507332801818848, + "step": 216 + }, + { + "epoch": 0.050034586119437396, + "grad_norm": 0.6820424993070572, + "learning_rate": 9.953917050691244e-07, + "loss": 1.3022946119308472, + "step": 217 + }, + { + "epoch": 0.05026516024902006, + "grad_norm": 0.6561516180690095, + "learning_rate": 1e-06, + "loss": 1.284754991531372, + "step": 218 + }, + { + "epoch": 0.05049573437860272, + "grad_norm": 0.6003085662526402, + "learning_rate": 1.0046082949308756e-06, + "loss": 1.2985923290252686, + "step": 219 + }, + { + "epoch": 0.05072630850818538, + "grad_norm": 0.6214608767923379, + "learning_rate": 1.0092165898617511e-06, + "loss": 1.3855717182159424, + "step": 220 + }, + { + "epoch": 0.05095688263776804, + "grad_norm": 0.675694738994849, + "learning_rate": 1.0138248847926267e-06, + "loss": 1.357919692993164, + "step": 221 + }, + { + "epoch": 0.0511874567673507, + "grad_norm": 0.6736529895786637, + "learning_rate": 1.0184331797235021e-06, + "loss": 1.2818949222564697, + "step": 222 + }, + { + "epoch": 0.051418030896933364, + "grad_norm": 0.6226203332882617, + "learning_rate": 1.023041474654378e-06, + "loss": 1.2488511800765991, + "step": 223 + }, + { + "epoch": 0.051648605026516026, + "grad_norm": 0.7420146271711324, + "learning_rate": 1.0276497695852535e-06, + "loss": 1.3824148178100586, + "step": 224 + }, + { + "epoch": 0.05187917915609869, + "grad_norm": 0.6473939851836901, + "learning_rate": 1.032258064516129e-06, + "loss": 1.3114633560180664, + "step": 225 + }, + { + "epoch": 0.05210975328568135, + "grad_norm": 0.6372141360329365, + "learning_rate": 1.0368663594470047e-06, + "loss": 1.272273063659668, + "step": 226 + }, + { + "epoch": 0.05234032741526401, + "grad_norm": 0.8216490037105428, + "learning_rate": 1.04147465437788e-06, + "loss": 1.5072649717330933, + "step": 227 + }, + { + "epoch": 0.05257090154484667, + "grad_norm": 0.7183581578734374, + "learning_rate": 1.0460829493087557e-06, + "loss": 1.4087142944335938, + "step": 228 + }, + { + "epoch": 0.05280147567442933, + "grad_norm": 0.8332625481322393, + "learning_rate": 1.050691244239631e-06, + "loss": 1.4866605997085571, + "step": 229 + }, + { + "epoch": 0.05303204980401199, + "grad_norm": 0.6315632875144884, + "learning_rate": 1.0552995391705069e-06, + "loss": 1.3377184867858887, + "step": 230 + }, + { + "epoch": 0.05326262393359465, + "grad_norm": 0.6695801561741619, + "learning_rate": 1.0599078341013825e-06, + "loss": 1.4009103775024414, + "step": 231 + }, + { + "epoch": 0.05349319806317731, + "grad_norm": 0.7832755910275336, + "learning_rate": 1.0645161290322579e-06, + "loss": 1.4878556728363037, + "step": 232 + }, + { + "epoch": 0.05372377219275997, + "grad_norm": 0.7218421394327601, + "learning_rate": 1.0691244239631337e-06, + "loss": 1.4002021551132202, + "step": 233 + }, + { + "epoch": 0.05395434632234263, + "grad_norm": 0.6918832056192313, + "learning_rate": 1.073732718894009e-06, + "loss": 1.337146520614624, + "step": 234 + }, + { + "epoch": 0.05418492045192529, + "grad_norm": 0.7101215642172168, + "learning_rate": 1.0783410138248847e-06, + "loss": 1.4084792137145996, + "step": 235 + }, + { + "epoch": 0.054415494581507955, + "grad_norm": 0.8413614642264606, + "learning_rate": 1.0829493087557605e-06, + "loss": 1.4131449460983276, + "step": 236 + }, + { + "epoch": 0.054646068711090616, + "grad_norm": 0.6587637953772119, + "learning_rate": 1.0875576036866358e-06, + "loss": 1.1869292259216309, + "step": 237 + }, + { + "epoch": 0.05487664284067328, + "grad_norm": 0.7608337119634553, + "learning_rate": 1.0921658986175114e-06, + "loss": 1.3970961570739746, + "step": 238 + }, + { + "epoch": 0.05510721697025594, + "grad_norm": 0.7677503323555195, + "learning_rate": 1.096774193548387e-06, + "loss": 1.2682442665100098, + "step": 239 + }, + { + "epoch": 0.0553377910998386, + "grad_norm": 0.6546621813731868, + "learning_rate": 1.1013824884792626e-06, + "loss": 1.2983934879302979, + "step": 240 + }, + { + "epoch": 0.05556836522942126, + "grad_norm": 0.7451544478647047, + "learning_rate": 1.1059907834101382e-06, + "loss": 1.3980869054794312, + "step": 241 + }, + { + "epoch": 0.05579893935900392, + "grad_norm": 0.6116475273591584, + "learning_rate": 1.1105990783410138e-06, + "loss": 1.3068631887435913, + "step": 242 + }, + { + "epoch": 0.056029513488586584, + "grad_norm": 0.7974654782353883, + "learning_rate": 1.1152073732718894e-06, + "loss": 1.5353353023529053, + "step": 243 + }, + { + "epoch": 0.05626008761816924, + "grad_norm": 0.663054900024182, + "learning_rate": 1.1198156682027648e-06, + "loss": 1.290163278579712, + "step": 244 + }, + { + "epoch": 0.0564906617477519, + "grad_norm": 0.6761997400626832, + "learning_rate": 1.1244239631336406e-06, + "loss": 1.3671848773956299, + "step": 245 + }, + { + "epoch": 0.05672123587733456, + "grad_norm": 0.6294209937786865, + "learning_rate": 1.1290322580645162e-06, + "loss": 1.3020408153533936, + "step": 246 + }, + { + "epoch": 0.05695181000691722, + "grad_norm": 0.7207247726421506, + "learning_rate": 1.1336405529953916e-06, + "loss": 1.3159775733947754, + "step": 247 + }, + { + "epoch": 0.057182384136499884, + "grad_norm": 0.6708051542823367, + "learning_rate": 1.1382488479262674e-06, + "loss": 1.3163995742797852, + "step": 248 + }, + { + "epoch": 0.057412958266082545, + "grad_norm": 0.8019994049858626, + "learning_rate": 1.1428571428571428e-06, + "loss": 1.5215930938720703, + "step": 249 + }, + { + "epoch": 0.057643532395665206, + "grad_norm": 0.6559479072990889, + "learning_rate": 1.1474654377880184e-06, + "loss": 1.2870161533355713, + "step": 250 + }, + { + "epoch": 0.05787410652524787, + "grad_norm": 0.7147869966218979, + "learning_rate": 1.1520737327188938e-06, + "loss": 1.2624198198318481, + "step": 251 + }, + { + "epoch": 0.05810468065483053, + "grad_norm": 0.7319832858668294, + "learning_rate": 1.1566820276497696e-06, + "loss": 1.2778981924057007, + "step": 252 + }, + { + "epoch": 0.05833525478441319, + "grad_norm": 0.6564800467165074, + "learning_rate": 1.1612903225806452e-06, + "loss": 1.1934442520141602, + "step": 253 + }, + { + "epoch": 0.05856582891399585, + "grad_norm": 0.7291335446235057, + "learning_rate": 1.1658986175115205e-06, + "loss": 1.3840088844299316, + "step": 254 + }, + { + "epoch": 0.05879640304357851, + "grad_norm": 0.7017610521536986, + "learning_rate": 1.1705069124423963e-06, + "loss": 1.373002290725708, + "step": 255 + }, + { + "epoch": 0.059026977173161174, + "grad_norm": 0.6853330554611681, + "learning_rate": 1.1751152073732717e-06, + "loss": 1.3614685535430908, + "step": 256 + }, + { + "epoch": 0.059257551302743836, + "grad_norm": 0.7170055632885292, + "learning_rate": 1.1797235023041473e-06, + "loss": 1.3525335788726807, + "step": 257 + }, + { + "epoch": 0.05948812543232649, + "grad_norm": 0.7471586447698318, + "learning_rate": 1.1843317972350231e-06, + "loss": 1.3806469440460205, + "step": 258 + }, + { + "epoch": 0.05971869956190915, + "grad_norm": 0.7262354481718393, + "learning_rate": 1.1889400921658985e-06, + "loss": 1.372736930847168, + "step": 259 + }, + { + "epoch": 0.05994927369149181, + "grad_norm": 0.7470794959515278, + "learning_rate": 1.1935483870967741e-06, + "loss": 1.309061050415039, + "step": 260 + }, + { + "epoch": 0.060179847821074474, + "grad_norm": 0.7217295951903909, + "learning_rate": 1.1981566820276497e-06, + "loss": 1.3500525951385498, + "step": 261 + }, + { + "epoch": 0.060410421950657135, + "grad_norm": 0.7498906773328822, + "learning_rate": 1.2027649769585253e-06, + "loss": 1.4197357892990112, + "step": 262 + }, + { + "epoch": 0.0606409960802398, + "grad_norm": 0.9553336191863615, + "learning_rate": 1.207373271889401e-06, + "loss": 1.6454131603240967, + "step": 263 + }, + { + "epoch": 0.06087157020982246, + "grad_norm": 0.7361372249879211, + "learning_rate": 1.2119815668202765e-06, + "loss": 1.269604206085205, + "step": 264 + }, + { + "epoch": 0.06110214433940512, + "grad_norm": 0.6596823046141973, + "learning_rate": 1.216589861751152e-06, + "loss": 1.2358057498931885, + "step": 265 + }, + { + "epoch": 0.06133271846898778, + "grad_norm": 0.7203751630823346, + "learning_rate": 1.2211981566820275e-06, + "loss": 1.2713422775268555, + "step": 266 + }, + { + "epoch": 0.06156329259857044, + "grad_norm": 0.7033446179657081, + "learning_rate": 1.2258064516129033e-06, + "loss": 1.225820779800415, + "step": 267 + }, + { + "epoch": 0.0617938667281531, + "grad_norm": 0.6900817599997362, + "learning_rate": 1.2304147465437787e-06, + "loss": 1.279617190361023, + "step": 268 + }, + { + "epoch": 0.062024440857735764, + "grad_norm": 0.6800159728233099, + "learning_rate": 1.2350230414746543e-06, + "loss": 1.2081385850906372, + "step": 269 + }, + { + "epoch": 0.062255014987318426, + "grad_norm": 0.7378639399050563, + "learning_rate": 1.23963133640553e-06, + "loss": 1.3121249675750732, + "step": 270 + }, + { + "epoch": 0.06248558911690109, + "grad_norm": 0.7497904685097676, + "learning_rate": 1.2442396313364054e-06, + "loss": 1.28495454788208, + "step": 271 + }, + { + "epoch": 0.06271616324648374, + "grad_norm": 0.7749777957183016, + "learning_rate": 1.248847926267281e-06, + "loss": 1.3837053775787354, + "step": 272 + }, + { + "epoch": 0.0629467373760664, + "grad_norm": 0.7210838772374344, + "learning_rate": 1.2534562211981564e-06, + "loss": 1.2119230031967163, + "step": 273 + }, + { + "epoch": 0.06317731150564906, + "grad_norm": 0.7143072591295863, + "learning_rate": 1.2580645161290322e-06, + "loss": 1.323190450668335, + "step": 274 + }, + { + "epoch": 0.06340788563523173, + "grad_norm": 0.7546501032980093, + "learning_rate": 1.2626728110599078e-06, + "loss": 1.4300715923309326, + "step": 275 + }, + { + "epoch": 0.06363845976481439, + "grad_norm": 0.7154461007442852, + "learning_rate": 1.2672811059907832e-06, + "loss": 1.1680996417999268, + "step": 276 + }, + { + "epoch": 0.06386903389439705, + "grad_norm": 0.8088364505140268, + "learning_rate": 1.271889400921659e-06, + "loss": 1.3980211019515991, + "step": 277 + }, + { + "epoch": 0.06409960802397971, + "grad_norm": 0.7801914373505492, + "learning_rate": 1.2764976958525344e-06, + "loss": 1.40798020362854, + "step": 278 + }, + { + "epoch": 0.06433018215356237, + "grad_norm": 0.7237186405433459, + "learning_rate": 1.28110599078341e-06, + "loss": 1.2535033226013184, + "step": 279 + }, + { + "epoch": 0.06456075628314503, + "grad_norm": 0.7779219570683336, + "learning_rate": 1.2857142857142858e-06, + "loss": 1.3866907358169556, + "step": 280 + }, + { + "epoch": 0.0647913304127277, + "grad_norm": 0.7036374523288562, + "learning_rate": 1.2903225806451612e-06, + "loss": 1.1985647678375244, + "step": 281 + }, + { + "epoch": 0.06502190454231035, + "grad_norm": 0.8186126171093759, + "learning_rate": 1.2949308755760368e-06, + "loss": 1.3741936683654785, + "step": 282 + }, + { + "epoch": 0.06525247867189302, + "grad_norm": 0.7795060457073558, + "learning_rate": 1.2995391705069124e-06, + "loss": 1.3684422969818115, + "step": 283 + }, + { + "epoch": 0.06548305280147568, + "grad_norm": 0.7685811594695469, + "learning_rate": 1.304147465437788e-06, + "loss": 1.3792086839675903, + "step": 284 + }, + { + "epoch": 0.06571362693105834, + "grad_norm": 0.8541112738893439, + "learning_rate": 1.3087557603686636e-06, + "loss": 1.3252873420715332, + "step": 285 + }, + { + "epoch": 0.065944201060641, + "grad_norm": 0.7272989570317888, + "learning_rate": 1.3133640552995392e-06, + "loss": 1.1918525695800781, + "step": 286 + }, + { + "epoch": 0.06617477519022366, + "grad_norm": 0.8825171015262823, + "learning_rate": 1.3179723502304148e-06, + "loss": 1.3760654926300049, + "step": 287 + }, + { + "epoch": 0.06640534931980632, + "grad_norm": 0.8100539272477522, + "learning_rate": 1.3225806451612901e-06, + "loss": 1.3452839851379395, + "step": 288 + }, + { + "epoch": 0.06663592344938898, + "grad_norm": 0.7635396360128843, + "learning_rate": 1.327188940092166e-06, + "loss": 1.321220874786377, + "step": 289 + }, + { + "epoch": 0.06686649757897165, + "grad_norm": 0.724002123288283, + "learning_rate": 1.3317972350230413e-06, + "loss": 1.222012996673584, + "step": 290 + }, + { + "epoch": 0.0670970717085543, + "grad_norm": 0.7939713970528558, + "learning_rate": 1.336405529953917e-06, + "loss": 1.3209044933319092, + "step": 291 + }, + { + "epoch": 0.06732764583813695, + "grad_norm": 0.834643855588948, + "learning_rate": 1.3410138248847927e-06, + "loss": 1.3250432014465332, + "step": 292 + }, + { + "epoch": 0.06755821996771962, + "grad_norm": 0.6522445861220314, + "learning_rate": 1.3456221198156681e-06, + "loss": 1.1738805770874023, + "step": 293 + }, + { + "epoch": 0.06778879409730228, + "grad_norm": 0.7430324759377445, + "learning_rate": 1.3502304147465437e-06, + "loss": 1.238675832748413, + "step": 294 + }, + { + "epoch": 0.06801936822688494, + "grad_norm": 0.6872443402637277, + "learning_rate": 1.354838709677419e-06, + "loss": 1.2162814140319824, + "step": 295 + }, + { + "epoch": 0.0682499423564676, + "grad_norm": 0.7451321254668013, + "learning_rate": 1.359447004608295e-06, + "loss": 1.2087210416793823, + "step": 296 + }, + { + "epoch": 0.06848051648605026, + "grad_norm": 0.7183129418570579, + "learning_rate": 1.3640552995391705e-06, + "loss": 1.2657420635223389, + "step": 297 + }, + { + "epoch": 0.06871109061563292, + "grad_norm": 0.8828866176671843, + "learning_rate": 1.3686635944700459e-06, + "loss": 1.496249794960022, + "step": 298 + }, + { + "epoch": 0.06894166474521558, + "grad_norm": 0.7852198432087445, + "learning_rate": 1.3732718894009217e-06, + "loss": 1.2698930501937866, + "step": 299 + }, + { + "epoch": 0.06917223887479824, + "grad_norm": 0.723866375282328, + "learning_rate": 1.377880184331797e-06, + "loss": 1.2088165283203125, + "step": 300 + }, + { + "epoch": 0.0694028130043809, + "grad_norm": 0.764377981893855, + "learning_rate": 1.3824884792626727e-06, + "loss": 1.392000436782837, + "step": 301 + }, + { + "epoch": 0.06963338713396357, + "grad_norm": 0.7252481501169622, + "learning_rate": 1.3870967741935485e-06, + "loss": 1.366544485092163, + "step": 302 + }, + { + "epoch": 0.06986396126354623, + "grad_norm": 0.7900814443800929, + "learning_rate": 1.3917050691244239e-06, + "loss": 1.3276031017303467, + "step": 303 + }, + { + "epoch": 0.07009453539312889, + "grad_norm": 0.7000339586583599, + "learning_rate": 1.3963133640552995e-06, + "loss": 1.1413768529891968, + "step": 304 + }, + { + "epoch": 0.07032510952271155, + "grad_norm": 0.7903483195817192, + "learning_rate": 1.400921658986175e-06, + "loss": 1.2958520650863647, + "step": 305 + }, + { + "epoch": 0.07055568365229421, + "grad_norm": 0.7651988170590107, + "learning_rate": 1.4055299539170507e-06, + "loss": 1.3514549732208252, + "step": 306 + }, + { + "epoch": 0.07078625778187687, + "grad_norm": 0.767117117462576, + "learning_rate": 1.410138248847926e-06, + "loss": 1.332120418548584, + "step": 307 + }, + { + "epoch": 0.07101683191145954, + "grad_norm": 0.8380945550826328, + "learning_rate": 1.4147465437788018e-06, + "loss": 1.282820463180542, + "step": 308 + }, + { + "epoch": 0.0712474060410422, + "grad_norm": 0.7478573370757386, + "learning_rate": 1.4193548387096774e-06, + "loss": 1.3927665948867798, + "step": 309 + }, + { + "epoch": 0.07147798017062486, + "grad_norm": 0.7471336867744233, + "learning_rate": 1.4239631336405528e-06, + "loss": 1.2459386587142944, + "step": 310 + }, + { + "epoch": 0.07170855430020752, + "grad_norm": 0.715680538211599, + "learning_rate": 1.4285714285714286e-06, + "loss": 1.1996700763702393, + "step": 311 + }, + { + "epoch": 0.07193912842979018, + "grad_norm": 0.7466366577926873, + "learning_rate": 1.433179723502304e-06, + "loss": 1.1007883548736572, + "step": 312 + }, + { + "epoch": 0.07216970255937284, + "grad_norm": 0.6505103448142013, + "learning_rate": 1.4377880184331796e-06, + "loss": 1.211327314376831, + "step": 313 + }, + { + "epoch": 0.0724002766889555, + "grad_norm": 0.7475198907178121, + "learning_rate": 1.4423963133640554e-06, + "loss": 1.314349889755249, + "step": 314 + }, + { + "epoch": 0.07263085081853816, + "grad_norm": 0.7782372886671983, + "learning_rate": 1.4470046082949308e-06, + "loss": 1.2270662784576416, + "step": 315 + }, + { + "epoch": 0.07286142494812083, + "grad_norm": 0.7521500862086049, + "learning_rate": 1.4516129032258064e-06, + "loss": 1.1802537441253662, + "step": 316 + }, + { + "epoch": 0.07309199907770349, + "grad_norm": 0.7684137773026678, + "learning_rate": 1.4562211981566818e-06, + "loss": 1.275806188583374, + "step": 317 + }, + { + "epoch": 0.07332257320728615, + "grad_norm": 0.789590997753613, + "learning_rate": 1.4608294930875576e-06, + "loss": 1.2713148593902588, + "step": 318 + }, + { + "epoch": 0.07355314733686881, + "grad_norm": 0.8345280857312554, + "learning_rate": 1.4654377880184332e-06, + "loss": 1.3091093301773071, + "step": 319 + }, + { + "epoch": 0.07378372146645146, + "grad_norm": 0.7108154017524825, + "learning_rate": 1.4700460829493086e-06, + "loss": 1.1274672746658325, + "step": 320 + }, + { + "epoch": 0.07401429559603412, + "grad_norm": 0.7137227522476419, + "learning_rate": 1.4746543778801844e-06, + "loss": 1.236955165863037, + "step": 321 + }, + { + "epoch": 0.07424486972561678, + "grad_norm": 0.7825967305477171, + "learning_rate": 1.4792626728110598e-06, + "loss": 1.2561366558074951, + "step": 322 + }, + { + "epoch": 0.07447544385519944, + "grad_norm": 0.7250730413423113, + "learning_rate": 1.4838709677419353e-06, + "loss": 1.1229519844055176, + "step": 323 + }, + { + "epoch": 0.0747060179847821, + "grad_norm": 0.7688658143017724, + "learning_rate": 1.4884792626728112e-06, + "loss": 1.200115442276001, + "step": 324 + }, + { + "epoch": 0.07493659211436476, + "grad_norm": 0.7499295220603182, + "learning_rate": 1.4930875576036865e-06, + "loss": 1.1930850744247437, + "step": 325 + }, + { + "epoch": 0.07516716624394743, + "grad_norm": 0.8209913282027874, + "learning_rate": 1.4976958525345621e-06, + "loss": 1.3204331398010254, + "step": 326 + }, + { + "epoch": 0.07539774037353009, + "grad_norm": 0.7429612395335268, + "learning_rate": 1.5023041474654377e-06, + "loss": 1.109247088432312, + "step": 327 + }, + { + "epoch": 0.07562831450311275, + "grad_norm": 0.7097388789784923, + "learning_rate": 1.5069124423963133e-06, + "loss": 1.1239254474639893, + "step": 328 + }, + { + "epoch": 0.07585888863269541, + "grad_norm": 0.7867677832004493, + "learning_rate": 1.5115207373271887e-06, + "loss": 1.22686767578125, + "step": 329 + }, + { + "epoch": 0.07608946276227807, + "grad_norm": 0.8425243281826544, + "learning_rate": 1.5161290322580645e-06, + "loss": 1.2846856117248535, + "step": 330 + }, + { + "epoch": 0.07632003689186073, + "grad_norm": 0.7611030204070008, + "learning_rate": 1.5207373271889401e-06, + "loss": 1.1720764636993408, + "step": 331 + }, + { + "epoch": 0.0765506110214434, + "grad_norm": 0.6783089545901869, + "learning_rate": 1.5253456221198155e-06, + "loss": 1.05867338180542, + "step": 332 + }, + { + "epoch": 0.07678118515102605, + "grad_norm": 0.781197296597327, + "learning_rate": 1.5299539170506913e-06, + "loss": 1.2652220726013184, + "step": 333 + }, + { + "epoch": 0.07701175928060872, + "grad_norm": 0.7674267376615101, + "learning_rate": 1.5345622119815667e-06, + "loss": 1.1367218494415283, + "step": 334 + }, + { + "epoch": 0.07724233341019138, + "grad_norm": 0.7149265599125916, + "learning_rate": 1.5391705069124423e-06, + "loss": 1.169439673423767, + "step": 335 + }, + { + "epoch": 0.07747290753977404, + "grad_norm": 0.8284832797024527, + "learning_rate": 1.543778801843318e-06, + "loss": 1.265104055404663, + "step": 336 + }, + { + "epoch": 0.0777034816693567, + "grad_norm": 0.6605498491920537, + "learning_rate": 1.5483870967741935e-06, + "loss": 1.059098243713379, + "step": 337 + }, + { + "epoch": 0.07793405579893936, + "grad_norm": 0.8255024678570093, + "learning_rate": 1.552995391705069e-06, + "loss": 1.0998419523239136, + "step": 338 + }, + { + "epoch": 0.07816462992852202, + "grad_norm": 0.8285993940213782, + "learning_rate": 1.5576036866359445e-06, + "loss": 1.1361349821090698, + "step": 339 + }, + { + "epoch": 0.07839520405810468, + "grad_norm": 0.7677612111698353, + "learning_rate": 1.5622119815668203e-06, + "loss": 1.1051890850067139, + "step": 340 + }, + { + "epoch": 0.07862577818768735, + "grad_norm": 0.8204078401725609, + "learning_rate": 1.5668202764976959e-06, + "loss": 1.1675043106079102, + "step": 341 + }, + { + "epoch": 0.07885635231727, + "grad_norm": 0.8428908363907526, + "learning_rate": 1.5714285714285712e-06, + "loss": 1.180741786956787, + "step": 342 + }, + { + "epoch": 0.07908692644685267, + "grad_norm": 0.8559354133772745, + "learning_rate": 1.576036866359447e-06, + "loss": 1.241147518157959, + "step": 343 + }, + { + "epoch": 0.07931750057643533, + "grad_norm": 0.848204694935563, + "learning_rate": 1.5806451612903224e-06, + "loss": 1.2831401824951172, + "step": 344 + }, + { + "epoch": 0.07954807470601799, + "grad_norm": 0.7281233645086155, + "learning_rate": 1.585253456221198e-06, + "loss": 1.2328094244003296, + "step": 345 + }, + { + "epoch": 0.07977864883560065, + "grad_norm": 0.7932743453051899, + "learning_rate": 1.5898617511520738e-06, + "loss": 1.296494960784912, + "step": 346 + }, + { + "epoch": 0.08000922296518331, + "grad_norm": 0.7368517201206619, + "learning_rate": 1.5944700460829492e-06, + "loss": 1.1802153587341309, + "step": 347 + }, + { + "epoch": 0.08023979709476596, + "grad_norm": 0.8829436639082808, + "learning_rate": 1.5990783410138248e-06, + "loss": 1.2387690544128418, + "step": 348 + }, + { + "epoch": 0.08047037122434862, + "grad_norm": 0.8002618721063425, + "learning_rate": 1.6036866359447004e-06, + "loss": 1.1307916641235352, + "step": 349 + }, + { + "epoch": 0.08070094535393128, + "grad_norm": 0.8185303488247757, + "learning_rate": 1.608294930875576e-06, + "loss": 1.117497444152832, + "step": 350 + }, + { + "epoch": 0.08093151948351394, + "grad_norm": 0.7524331692605707, + "learning_rate": 1.6129032258064514e-06, + "loss": 1.1360805034637451, + "step": 351 + }, + { + "epoch": 0.0811620936130966, + "grad_norm": 0.7626049955851422, + "learning_rate": 1.6175115207373272e-06, + "loss": 1.1756231784820557, + "step": 352 + }, + { + "epoch": 0.08139266774267927, + "grad_norm": 0.7605864356179197, + "learning_rate": 1.6221198156682028e-06, + "loss": 1.0260417461395264, + "step": 353 + }, + { + "epoch": 0.08162324187226193, + "grad_norm": 0.6949706544727091, + "learning_rate": 1.6267281105990782e-06, + "loss": 1.0863536596298218, + "step": 354 + }, + { + "epoch": 0.08185381600184459, + "grad_norm": 0.7427032746567218, + "learning_rate": 1.631336405529954e-06, + "loss": 1.0529779195785522, + "step": 355 + }, + { + "epoch": 0.08208439013142725, + "grad_norm": 0.7626426518406405, + "learning_rate": 1.6359447004608294e-06, + "loss": 1.0374994277954102, + "step": 356 + }, + { + "epoch": 0.08231496426100991, + "grad_norm": 0.7762352327056515, + "learning_rate": 1.640552995391705e-06, + "loss": 1.153419017791748, + "step": 357 + }, + { + "epoch": 0.08254553839059257, + "grad_norm": 0.7455681546697154, + "learning_rate": 1.6451612903225808e-06, + "loss": 1.0155376195907593, + "step": 358 + }, + { + "epoch": 0.08277611252017524, + "grad_norm": 0.779838920397346, + "learning_rate": 1.6497695852534561e-06, + "loss": 1.1288530826568604, + "step": 359 + }, + { + "epoch": 0.0830066866497579, + "grad_norm": 0.8920666311969824, + "learning_rate": 1.6543778801843317e-06, + "loss": 1.1493456363677979, + "step": 360 + }, + { + "epoch": 0.08323726077934056, + "grad_norm": 0.8383114858680324, + "learning_rate": 1.6589861751152071e-06, + "loss": 1.1064895391464233, + "step": 361 + }, + { + "epoch": 0.08346783490892322, + "grad_norm": 0.752156167882629, + "learning_rate": 1.663594470046083e-06, + "loss": 1.0102828741073608, + "step": 362 + }, + { + "epoch": 0.08369840903850588, + "grad_norm": 0.8341451005387022, + "learning_rate": 1.6682027649769585e-06, + "loss": 1.0750138759613037, + "step": 363 + }, + { + "epoch": 0.08392898316808854, + "grad_norm": 0.8504953523340792, + "learning_rate": 1.672811059907834e-06, + "loss": 1.1611195802688599, + "step": 364 + }, + { + "epoch": 0.0841595572976712, + "grad_norm": 0.8228646683486963, + "learning_rate": 1.6774193548387097e-06, + "loss": 1.2799829244613647, + "step": 365 + }, + { + "epoch": 0.08439013142725386, + "grad_norm": 0.9626273899315478, + "learning_rate": 1.682027649769585e-06, + "loss": 1.2427947521209717, + "step": 366 + }, + { + "epoch": 0.08462070555683653, + "grad_norm": 0.724553415716276, + "learning_rate": 1.6866359447004607e-06, + "loss": 1.0379959344863892, + "step": 367 + }, + { + "epoch": 0.08485127968641919, + "grad_norm": 0.7173602639018404, + "learning_rate": 1.6912442396313363e-06, + "loss": 0.8439304828643799, + "step": 368 + }, + { + "epoch": 0.08508185381600185, + "grad_norm": 0.8477542480910312, + "learning_rate": 1.6958525345622119e-06, + "loss": 1.1249288320541382, + "step": 369 + }, + { + "epoch": 0.08531242794558451, + "grad_norm": 0.8715705993798011, + "learning_rate": 1.7004608294930875e-06, + "loss": 1.186207890510559, + "step": 370 + }, + { + "epoch": 0.08554300207516717, + "grad_norm": 0.9990300341847143, + "learning_rate": 1.705069124423963e-06, + "loss": 1.1181306838989258, + "step": 371 + }, + { + "epoch": 0.08577357620474983, + "grad_norm": 0.8792678686182055, + "learning_rate": 1.7096774193548387e-06, + "loss": 0.9828017950057983, + "step": 372 + }, + { + "epoch": 0.0860041503343325, + "grad_norm": 0.7710250186072433, + "learning_rate": 1.714285714285714e-06, + "loss": 1.1158804893493652, + "step": 373 + }, + { + "epoch": 0.08623472446391516, + "grad_norm": 0.9602707019706166, + "learning_rate": 1.7188940092165899e-06, + "loss": 1.1771481037139893, + "step": 374 + }, + { + "epoch": 0.08646529859349782, + "grad_norm": 0.8137176951163696, + "learning_rate": 1.7235023041474655e-06, + "loss": 1.1378540992736816, + "step": 375 + }, + { + "epoch": 0.08669587272308046, + "grad_norm": 0.819557644912057, + "learning_rate": 1.7281105990783408e-06, + "loss": 1.2011152505874634, + "step": 376 + }, + { + "epoch": 0.08692644685266313, + "grad_norm": 0.8779923853134601, + "learning_rate": 1.7327188940092167e-06, + "loss": 1.0932848453521729, + "step": 377 + }, + { + "epoch": 0.08715702098224579, + "grad_norm": 0.7579888078286682, + "learning_rate": 1.737327188940092e-06, + "loss": 1.0530626773834229, + "step": 378 + }, + { + "epoch": 0.08738759511182845, + "grad_norm": 0.8123881302713649, + "learning_rate": 1.7419354838709676e-06, + "loss": 1.09238600730896, + "step": 379 + }, + { + "epoch": 0.08761816924141111, + "grad_norm": 0.8179032370650432, + "learning_rate": 1.7465437788018434e-06, + "loss": 1.10097336769104, + "step": 380 + }, + { + "epoch": 0.08784874337099377, + "grad_norm": 0.9066182701404021, + "learning_rate": 1.7511520737327188e-06, + "loss": 1.1483392715454102, + "step": 381 + }, + { + "epoch": 0.08807931750057643, + "grad_norm": 0.7929757896387074, + "learning_rate": 1.7557603686635944e-06, + "loss": 0.9776606559753418, + "step": 382 + }, + { + "epoch": 0.08830989163015909, + "grad_norm": 0.7070713392242878, + "learning_rate": 1.7603686635944698e-06, + "loss": 0.9363219738006592, + "step": 383 + }, + { + "epoch": 0.08854046575974175, + "grad_norm": 0.8829017901239412, + "learning_rate": 1.7649769585253456e-06, + "loss": 1.1259841918945312, + "step": 384 + }, + { + "epoch": 0.08877103988932442, + "grad_norm": 0.8379913612296851, + "learning_rate": 1.7695852534562212e-06, + "loss": 1.0652339458465576, + "step": 385 + }, + { + "epoch": 0.08900161401890708, + "grad_norm": 0.9016264696692738, + "learning_rate": 1.7741935483870966e-06, + "loss": 1.1088197231292725, + "step": 386 + }, + { + "epoch": 0.08923218814848974, + "grad_norm": 0.8434226175443441, + "learning_rate": 1.7788018433179724e-06, + "loss": 1.0171717405319214, + "step": 387 + }, + { + "epoch": 0.0894627622780724, + "grad_norm": 0.893116506697827, + "learning_rate": 1.7834101382488478e-06, + "loss": 1.0391405820846558, + "step": 388 + }, + { + "epoch": 0.08969333640765506, + "grad_norm": 0.9558704899064524, + "learning_rate": 1.7880184331797234e-06, + "loss": 0.9970325231552124, + "step": 389 + }, + { + "epoch": 0.08992391053723772, + "grad_norm": 0.8304308575964876, + "learning_rate": 1.792626728110599e-06, + "loss": 1.1427147388458252, + "step": 390 + }, + { + "epoch": 0.09015448466682038, + "grad_norm": 0.8319398781501527, + "learning_rate": 1.7972350230414746e-06, + "loss": 0.8830767273902893, + "step": 391 + }, + { + "epoch": 0.09038505879640304, + "grad_norm": 0.8983385232838542, + "learning_rate": 1.8018433179723502e-06, + "loss": 1.0469788312911987, + "step": 392 + }, + { + "epoch": 0.0906156329259857, + "grad_norm": 1.0033385350969977, + "learning_rate": 1.8064516129032258e-06, + "loss": 1.022156834602356, + "step": 393 + }, + { + "epoch": 0.09084620705556837, + "grad_norm": 0.8626168210196775, + "learning_rate": 1.8110599078341013e-06, + "loss": 1.0723674297332764, + "step": 394 + }, + { + "epoch": 0.09107678118515103, + "grad_norm": 0.8060308252194399, + "learning_rate": 1.8156682027649767e-06, + "loss": 0.9089772701263428, + "step": 395 + }, + { + "epoch": 0.09130735531473369, + "grad_norm": 0.8875270675183294, + "learning_rate": 1.8202764976958525e-06, + "loss": 1.1029877662658691, + "step": 396 + }, + { + "epoch": 0.09153792944431635, + "grad_norm": 0.94113090982248, + "learning_rate": 1.8248847926267281e-06, + "loss": 0.998812198638916, + "step": 397 + }, + { + "epoch": 0.09176850357389901, + "grad_norm": 1.0016962443263888, + "learning_rate": 1.8294930875576035e-06, + "loss": 1.116652250289917, + "step": 398 + }, + { + "epoch": 0.09199907770348167, + "grad_norm": 0.8575568562545252, + "learning_rate": 1.8341013824884793e-06, + "loss": 1.0071923732757568, + "step": 399 + }, + { + "epoch": 0.09222965183306434, + "grad_norm": 0.9758059413772218, + "learning_rate": 1.8387096774193547e-06, + "loss": 1.0713586807250977, + "step": 400 + }, + { + "epoch": 0.092460225962647, + "grad_norm": 0.8883854169226675, + "learning_rate": 1.8433179723502303e-06, + "loss": 1.0897400379180908, + "step": 401 + }, + { + "epoch": 0.09269080009222966, + "grad_norm": 0.9342253113098401, + "learning_rate": 1.8479262672811061e-06, + "loss": 0.9571444392204285, + "step": 402 + }, + { + "epoch": 0.09292137422181232, + "grad_norm": 0.9173411430110425, + "learning_rate": 1.8525345622119815e-06, + "loss": 0.9822309017181396, + "step": 403 + }, + { + "epoch": 0.09315194835139497, + "grad_norm": 0.8821702665182305, + "learning_rate": 1.857142857142857e-06, + "loss": 1.0010900497436523, + "step": 404 + }, + { + "epoch": 0.09338252248097763, + "grad_norm": 0.8417761058687274, + "learning_rate": 1.8617511520737325e-06, + "loss": 0.8548961877822876, + "step": 405 + }, + { + "epoch": 0.09361309661056029, + "grad_norm": 0.9390158571311362, + "learning_rate": 1.8663594470046083e-06, + "loss": 1.0856781005859375, + "step": 406 + }, + { + "epoch": 0.09384367074014295, + "grad_norm": 0.9100547740927183, + "learning_rate": 1.8709677419354837e-06, + "loss": 1.0913856029510498, + "step": 407 + }, + { + "epoch": 0.09407424486972561, + "grad_norm": 1.0379606890495185, + "learning_rate": 1.8755760368663593e-06, + "loss": 0.9409916400909424, + "step": 408 + }, + { + "epoch": 0.09430481899930827, + "grad_norm": 0.9523962354053698, + "learning_rate": 1.880184331797235e-06, + "loss": 0.9950551390647888, + "step": 409 + }, + { + "epoch": 0.09453539312889093, + "grad_norm": 0.861704297563458, + "learning_rate": 1.8847926267281104e-06, + "loss": 0.9915211200714111, + "step": 410 + }, + { + "epoch": 0.0947659672584736, + "grad_norm": 0.9290893256356082, + "learning_rate": 1.889400921658986e-06, + "loss": 1.0381574630737305, + "step": 411 + }, + { + "epoch": 0.09499654138805626, + "grad_norm": 0.9228539253940193, + "learning_rate": 1.8940092165898616e-06, + "loss": 0.8911284804344177, + "step": 412 + }, + { + "epoch": 0.09522711551763892, + "grad_norm": 0.9426577567548815, + "learning_rate": 1.8986175115207372e-06, + "loss": 0.8757172226905823, + "step": 413 + }, + { + "epoch": 0.09545768964722158, + "grad_norm": 0.7971911677154941, + "learning_rate": 1.9032258064516128e-06, + "loss": 0.8362075090408325, + "step": 414 + }, + { + "epoch": 0.09568826377680424, + "grad_norm": 0.9051810749284879, + "learning_rate": 1.9078341013824884e-06, + "loss": 0.906524658203125, + "step": 415 + }, + { + "epoch": 0.0959188379063869, + "grad_norm": 0.9304511138009018, + "learning_rate": 1.912442396313364e-06, + "loss": 1.100447654724121, + "step": 416 + }, + { + "epoch": 0.09614941203596956, + "grad_norm": 0.8321943001479206, + "learning_rate": 1.9170506912442396e-06, + "loss": 0.9658455848693848, + "step": 417 + }, + { + "epoch": 0.09637998616555223, + "grad_norm": 0.9393736008547379, + "learning_rate": 1.921658986175115e-06, + "loss": 0.971304714679718, + "step": 418 + }, + { + "epoch": 0.09661056029513489, + "grad_norm": 0.8792304256570437, + "learning_rate": 1.926267281105991e-06, + "loss": 0.916153073310852, + "step": 419 + }, + { + "epoch": 0.09684113442471755, + "grad_norm": 0.960700719296913, + "learning_rate": 1.930875576036866e-06, + "loss": 0.9166572093963623, + "step": 420 + }, + { + "epoch": 0.09707170855430021, + "grad_norm": 0.8385154496673872, + "learning_rate": 1.935483870967742e-06, + "loss": 0.8754867315292358, + "step": 421 + }, + { + "epoch": 0.09730228268388287, + "grad_norm": 0.8951117289542856, + "learning_rate": 1.9400921658986174e-06, + "loss": 0.9507668018341064, + "step": 422 + }, + { + "epoch": 0.09753285681346553, + "grad_norm": 1.0251554467069826, + "learning_rate": 1.944700460829493e-06, + "loss": 0.8977904319763184, + "step": 423 + }, + { + "epoch": 0.0977634309430482, + "grad_norm": 0.8433365129133346, + "learning_rate": 1.9493087557603686e-06, + "loss": 0.8359580039978027, + "step": 424 + }, + { + "epoch": 0.09799400507263085, + "grad_norm": 0.8653781711190967, + "learning_rate": 1.953917050691244e-06, + "loss": 0.8928875923156738, + "step": 425 + }, + { + "epoch": 0.09822457920221352, + "grad_norm": 1.016156538051323, + "learning_rate": 1.9585253456221198e-06, + "loss": 0.9031360149383545, + "step": 426 + }, + { + "epoch": 0.09845515333179618, + "grad_norm": 0.9535004151409068, + "learning_rate": 1.963133640552995e-06, + "loss": 0.9135938286781311, + "step": 427 + }, + { + "epoch": 0.09868572746137884, + "grad_norm": 0.9913179989235431, + "learning_rate": 1.967741935483871e-06, + "loss": 0.8978056907653809, + "step": 428 + }, + { + "epoch": 0.0989163015909615, + "grad_norm": 0.7393338474601954, + "learning_rate": 1.9723502304147463e-06, + "loss": 0.8236517906188965, + "step": 429 + }, + { + "epoch": 0.09914687572054416, + "grad_norm": 0.9578937542491764, + "learning_rate": 1.976958525345622e-06, + "loss": 0.8279497027397156, + "step": 430 + }, + { + "epoch": 0.09937744985012681, + "grad_norm": 0.8687224271614162, + "learning_rate": 1.9815668202764975e-06, + "loss": 0.9273175001144409, + "step": 431 + }, + { + "epoch": 0.09960802397970947, + "grad_norm": 0.9008857811722423, + "learning_rate": 1.9861751152073733e-06, + "loss": 0.8990100622177124, + "step": 432 + }, + { + "epoch": 0.09983859810929213, + "grad_norm": 0.9051637314581525, + "learning_rate": 1.9907834101382487e-06, + "loss": 0.9221487045288086, + "step": 433 + }, + { + "epoch": 0.10006917223887479, + "grad_norm": 0.8468556051112544, + "learning_rate": 1.995391705069124e-06, + "loss": 0.7376757264137268, + "step": 434 + }, + { + "epoch": 0.10029974636845745, + "grad_norm": 0.8651656722450953, + "learning_rate": 2e-06, + "loss": 0.8496265411376953, + "step": 435 + }, + { + "epoch": 0.10053032049804012, + "grad_norm": 0.8177327534577133, + "learning_rate": 1.9999999273199326e-06, + "loss": 0.73260897397995, + "step": 436 + }, + { + "epoch": 0.10076089462762278, + "grad_norm": 1.2545811776233549, + "learning_rate": 1.999999709279741e-06, + "loss": 0.9583776593208313, + "step": 437 + }, + { + "epoch": 0.10099146875720544, + "grad_norm": 0.7771019547302918, + "learning_rate": 1.9999993458794573e-06, + "loss": 0.810507595539093, + "step": 438 + }, + { + "epoch": 0.1012220428867881, + "grad_norm": 0.8756547566965167, + "learning_rate": 1.9999988371191337e-06, + "loss": 0.7957329750061035, + "step": 439 + }, + { + "epoch": 0.10145261701637076, + "grad_norm": 0.8325539024899065, + "learning_rate": 1.9999981829988444e-06, + "loss": 0.8141027688980103, + "step": 440 + }, + { + "epoch": 0.10168319114595342, + "grad_norm": 0.9256731752358246, + "learning_rate": 1.9999973835186847e-06, + "loss": 0.8454669117927551, + "step": 441 + }, + { + "epoch": 0.10191376527553608, + "grad_norm": 0.9086105801784582, + "learning_rate": 1.9999964386787706e-06, + "loss": 0.7966687679290771, + "step": 442 + }, + { + "epoch": 0.10214433940511874, + "grad_norm": 0.8420803725442093, + "learning_rate": 1.9999953484792394e-06, + "loss": 0.8623852133750916, + "step": 443 + }, + { + "epoch": 0.1023749135347014, + "grad_norm": 0.976279238987049, + "learning_rate": 1.9999941129202494e-06, + "loss": 0.9604165554046631, + "step": 444 + }, + { + "epoch": 0.10260548766428407, + "grad_norm": 0.8427059790049124, + "learning_rate": 1.999992732001981e-06, + "loss": 0.7461415529251099, + "step": 445 + }, + { + "epoch": 0.10283606179386673, + "grad_norm": 0.8066869506045082, + "learning_rate": 1.9999912057246342e-06, + "loss": 0.7243722677230835, + "step": 446 + }, + { + "epoch": 0.10306663592344939, + "grad_norm": 0.8507773615519725, + "learning_rate": 1.999989534088431e-06, + "loss": 0.8466402292251587, + "step": 447 + }, + { + "epoch": 0.10329721005303205, + "grad_norm": 0.9504023717644374, + "learning_rate": 1.9999877170936142e-06, + "loss": 0.8062578439712524, + "step": 448 + }, + { + "epoch": 0.10352778418261471, + "grad_norm": 0.8134117517887439, + "learning_rate": 1.9999857547404484e-06, + "loss": 0.8979625701904297, + "step": 449 + }, + { + "epoch": 0.10375835831219737, + "grad_norm": 0.7889840834274454, + "learning_rate": 1.999983647029219e-06, + "loss": 0.7970046401023865, + "step": 450 + }, + { + "epoch": 0.10398893244178004, + "grad_norm": 0.8933195109789729, + "learning_rate": 1.999981393960231e-06, + "loss": 0.9027936458587646, + "step": 451 + }, + { + "epoch": 0.1042195065713627, + "grad_norm": 0.9428128689196352, + "learning_rate": 1.9999789955338133e-06, + "loss": 0.8347916007041931, + "step": 452 + }, + { + "epoch": 0.10445008070094536, + "grad_norm": 0.7636783217821816, + "learning_rate": 1.9999764517503146e-06, + "loss": 0.7856979370117188, + "step": 453 + }, + { + "epoch": 0.10468065483052802, + "grad_norm": 0.8588750023960529, + "learning_rate": 1.9999737626101037e-06, + "loss": 0.8370383381843567, + "step": 454 + }, + { + "epoch": 0.10491122896011068, + "grad_norm": 0.7607065236764231, + "learning_rate": 1.9999709281135718e-06, + "loss": 0.8629742860794067, + "step": 455 + }, + { + "epoch": 0.10514180308969334, + "grad_norm": 0.7031266959727278, + "learning_rate": 1.9999679482611315e-06, + "loss": 0.8187414407730103, + "step": 456 + }, + { + "epoch": 0.105372377219276, + "grad_norm": 0.7996485745988237, + "learning_rate": 1.9999648230532156e-06, + "loss": 0.8169279098510742, + "step": 457 + }, + { + "epoch": 0.10560295134885866, + "grad_norm": 0.7291726430068795, + "learning_rate": 1.999961552490278e-06, + "loss": 0.7186012268066406, + "step": 458 + }, + { + "epoch": 0.10583352547844131, + "grad_norm": 0.8814433348597316, + "learning_rate": 1.9999581365727947e-06, + "loss": 0.8088201284408569, + "step": 459 + }, + { + "epoch": 0.10606409960802397, + "grad_norm": 0.8945815471698739, + "learning_rate": 1.999954575301262e-06, + "loss": 0.7067796587944031, + "step": 460 + }, + { + "epoch": 0.10629467373760663, + "grad_norm": 0.8727386643724712, + "learning_rate": 1.9999508686761974e-06, + "loss": 0.8839461803436279, + "step": 461 + }, + { + "epoch": 0.1065252478671893, + "grad_norm": 0.7752145606049893, + "learning_rate": 1.99994701669814e-06, + "loss": 0.750046968460083, + "step": 462 + }, + { + "epoch": 0.10675582199677196, + "grad_norm": 0.8246620057663118, + "learning_rate": 1.999943019367649e-06, + "loss": 0.7954964637756348, + "step": 463 + }, + { + "epoch": 0.10698639612635462, + "grad_norm": 0.8139454190246876, + "learning_rate": 1.9999388766853065e-06, + "loss": 0.7178900241851807, + "step": 464 + }, + { + "epoch": 0.10721697025593728, + "grad_norm": 0.7775108685144316, + "learning_rate": 1.999934588651714e-06, + "loss": 0.7583869695663452, + "step": 465 + }, + { + "epoch": 0.10744754438551994, + "grad_norm": 0.7294165374555056, + "learning_rate": 1.999930155267495e-06, + "loss": 0.8068876266479492, + "step": 466 + }, + { + "epoch": 0.1076781185151026, + "grad_norm": 0.7396884936816651, + "learning_rate": 1.9999255765332946e-06, + "loss": 0.7507776021957397, + "step": 467 + }, + { + "epoch": 0.10790869264468526, + "grad_norm": 0.7418847797451098, + "learning_rate": 1.999920852449777e-06, + "loss": 0.7719494104385376, + "step": 468 + }, + { + "epoch": 0.10813926677426793, + "grad_norm": 0.7666886626519035, + "learning_rate": 1.99991598301763e-06, + "loss": 0.7420990467071533, + "step": 469 + }, + { + "epoch": 0.10836984090385059, + "grad_norm": 0.7701810012003275, + "learning_rate": 1.9999109682375606e-06, + "loss": 0.7152374386787415, + "step": 470 + }, + { + "epoch": 0.10860041503343325, + "grad_norm": 0.6850973266115482, + "learning_rate": 1.9999058081102985e-06, + "loss": 0.7971220016479492, + "step": 471 + }, + { + "epoch": 0.10883098916301591, + "grad_norm": 0.7306176016482578, + "learning_rate": 1.9999005026365936e-06, + "loss": 0.774874746799469, + "step": 472 + }, + { + "epoch": 0.10906156329259857, + "grad_norm": 0.8957955356096076, + "learning_rate": 1.999895051817216e-06, + "loss": 0.7567731142044067, + "step": 473 + }, + { + "epoch": 0.10929213742218123, + "grad_norm": 0.9679087986333686, + "learning_rate": 1.99988945565296e-06, + "loss": 0.7221060991287231, + "step": 474 + }, + { + "epoch": 0.1095227115517639, + "grad_norm": 0.7758710632294333, + "learning_rate": 1.9998837141446378e-06, + "loss": 0.8064852952957153, + "step": 475 + }, + { + "epoch": 0.10975328568134655, + "grad_norm": 0.7342367942239104, + "learning_rate": 1.9998778272930842e-06, + "loss": 0.7329462766647339, + "step": 476 + }, + { + "epoch": 0.10998385981092922, + "grad_norm": 0.6944047501493505, + "learning_rate": 1.999871795099155e-06, + "loss": 0.715752363204956, + "step": 477 + }, + { + "epoch": 0.11021443394051188, + "grad_norm": 1.250464562888065, + "learning_rate": 1.9998656175637265e-06, + "loss": 0.8702882528305054, + "step": 478 + }, + { + "epoch": 0.11044500807009454, + "grad_norm": 0.9132853105204283, + "learning_rate": 1.9998592946876976e-06, + "loss": 0.8559622764587402, + "step": 479 + }, + { + "epoch": 0.1106755821996772, + "grad_norm": 1.0302853941011325, + "learning_rate": 1.999852826471987e-06, + "loss": 0.910442590713501, + "step": 480 + }, + { + "epoch": 0.11090615632925986, + "grad_norm": 0.7658983046756905, + "learning_rate": 1.9998462129175347e-06, + "loss": 0.8159372806549072, + "step": 481 + }, + { + "epoch": 0.11113673045884252, + "grad_norm": 0.6814545269174561, + "learning_rate": 1.9998394540253022e-06, + "loss": 0.8120635747909546, + "step": 482 + }, + { + "epoch": 0.11136730458842518, + "grad_norm": 0.9382461503301303, + "learning_rate": 1.999832549796272e-06, + "loss": 0.7867682576179504, + "step": 483 + }, + { + "epoch": 0.11159787871800785, + "grad_norm": 0.7285854274509946, + "learning_rate": 1.999825500231448e-06, + "loss": 0.695517897605896, + "step": 484 + }, + { + "epoch": 0.1118284528475905, + "grad_norm": 0.7426222297635688, + "learning_rate": 1.999818305331854e-06, + "loss": 0.8402971029281616, + "step": 485 + }, + { + "epoch": 0.11205902697717317, + "grad_norm": 0.9496598665654408, + "learning_rate": 1.9998109650985372e-06, + "loss": 0.7987074851989746, + "step": 486 + }, + { + "epoch": 0.11228960110675582, + "grad_norm": 0.7601824170608918, + "learning_rate": 1.9998034795325634e-06, + "loss": 0.6525362133979797, + "step": 487 + }, + { + "epoch": 0.11252017523633848, + "grad_norm": 0.6649425764525309, + "learning_rate": 1.999795848635021e-06, + "loss": 0.6218863725662231, + "step": 488 + }, + { + "epoch": 0.11275074936592114, + "grad_norm": 0.6793237780262881, + "learning_rate": 1.99978807240702e-06, + "loss": 0.7225729823112488, + "step": 489 + }, + { + "epoch": 0.1129813234955038, + "grad_norm": 0.7289774462660574, + "learning_rate": 1.9997801508496893e-06, + "loss": 0.7553551197052002, + "step": 490 + }, + { + "epoch": 0.11321189762508646, + "grad_norm": 0.7070554840091658, + "learning_rate": 1.999772083964182e-06, + "loss": 0.6695772409439087, + "step": 491 + }, + { + "epoch": 0.11344247175466912, + "grad_norm": 0.7937000317220514, + "learning_rate": 1.999763871751669e-06, + "loss": 0.7683162689208984, + "step": 492 + }, + { + "epoch": 0.11367304588425178, + "grad_norm": 0.7958897510308529, + "learning_rate": 1.9997555142133457e-06, + "loss": 0.7761441469192505, + "step": 493 + }, + { + "epoch": 0.11390362001383444, + "grad_norm": 0.8391915745578431, + "learning_rate": 1.999747011350426e-06, + "loss": 0.7204692959785461, + "step": 494 + }, + { + "epoch": 0.1141341941434171, + "grad_norm": 0.6535908344557003, + "learning_rate": 1.999738363164146e-06, + "loss": 0.6960519552230835, + "step": 495 + }, + { + "epoch": 0.11436476827299977, + "grad_norm": 0.669834933810116, + "learning_rate": 1.999729569655763e-06, + "loss": 0.7502788305282593, + "step": 496 + }, + { + "epoch": 0.11459534240258243, + "grad_norm": 0.7119093873273127, + "learning_rate": 1.999720630826555e-06, + "loss": 0.7649067640304565, + "step": 497 + }, + { + "epoch": 0.11482591653216509, + "grad_norm": 0.865452520980124, + "learning_rate": 1.9997115466778214e-06, + "loss": 0.6867918968200684, + "step": 498 + }, + { + "epoch": 0.11505649066174775, + "grad_norm": 0.7725462530919065, + "learning_rate": 1.9997023172108828e-06, + "loss": 0.7324330806732178, + "step": 499 + }, + { + "epoch": 0.11528706479133041, + "grad_norm": 0.7493898462804314, + "learning_rate": 1.999692942427081e-06, + "loss": 0.7452527284622192, + "step": 500 + }, + { + "epoch": 0.11551763892091307, + "grad_norm": 0.8849003751162662, + "learning_rate": 1.9996834223277775e-06, + "loss": 0.8311381340026855, + "step": 501 + }, + { + "epoch": 0.11574821305049574, + "grad_norm": 0.7698737492516583, + "learning_rate": 1.999673756914358e-06, + "loss": 0.6955340504646301, + "step": 502 + }, + { + "epoch": 0.1159787871800784, + "grad_norm": 0.9035827861690212, + "learning_rate": 1.999663946188226e-06, + "loss": 0.802892804145813, + "step": 503 + }, + { + "epoch": 0.11620936130966106, + "grad_norm": 0.9827928009523055, + "learning_rate": 1.9996539901508086e-06, + "loss": 0.8307123184204102, + "step": 504 + }, + { + "epoch": 0.11643993543924372, + "grad_norm": 0.7167523084062808, + "learning_rate": 1.9996438888035525e-06, + "loss": 0.7604272365570068, + "step": 505 + }, + { + "epoch": 0.11667050956882638, + "grad_norm": 0.7887244154559485, + "learning_rate": 1.9996336421479256e-06, + "loss": 0.798006534576416, + "step": 506 + }, + { + "epoch": 0.11690108369840904, + "grad_norm": 0.9102232519285063, + "learning_rate": 1.999623250185418e-06, + "loss": 0.7342728972434998, + "step": 507 + }, + { + "epoch": 0.1171316578279917, + "grad_norm": 0.689331248687117, + "learning_rate": 1.9996127129175402e-06, + "loss": 0.7659468650817871, + "step": 508 + }, + { + "epoch": 0.11736223195757436, + "grad_norm": 0.9057052272338976, + "learning_rate": 1.999602030345824e-06, + "loss": 0.6467913389205933, + "step": 509 + }, + { + "epoch": 0.11759280608715703, + "grad_norm": 0.9026632882900626, + "learning_rate": 1.9995912024718214e-06, + "loss": 0.8207371234893799, + "step": 510 + }, + { + "epoch": 0.11782338021673969, + "grad_norm": 0.6427345565408408, + "learning_rate": 1.999580229297108e-06, + "loss": 0.6865919232368469, + "step": 511 + }, + { + "epoch": 0.11805395434632235, + "grad_norm": 0.9123825063372557, + "learning_rate": 1.999569110823277e-06, + "loss": 0.7367759346961975, + "step": 512 + }, + { + "epoch": 0.11828452847590501, + "grad_norm": 0.7732312467631449, + "learning_rate": 1.9995578470519455e-06, + "loss": 0.678460955619812, + "step": 513 + }, + { + "epoch": 0.11851510260548767, + "grad_norm": 0.9273893139854266, + "learning_rate": 1.999546437984751e-06, + "loss": 0.7442954182624817, + "step": 514 + }, + { + "epoch": 0.11874567673507032, + "grad_norm": 0.7064385006159516, + "learning_rate": 1.9995348836233515e-06, + "loss": 0.6881241798400879, + "step": 515 + }, + { + "epoch": 0.11897625086465298, + "grad_norm": 0.7494917485319132, + "learning_rate": 1.9995231839694267e-06, + "loss": 0.6957181692123413, + "step": 516 + }, + { + "epoch": 0.11920682499423564, + "grad_norm": 1.0228956088069594, + "learning_rate": 1.9995113390246773e-06, + "loss": 0.655665934085846, + "step": 517 + }, + { + "epoch": 0.1194373991238183, + "grad_norm": 0.8789756041062182, + "learning_rate": 1.9994993487908245e-06, + "loss": 0.8156173229217529, + "step": 518 + }, + { + "epoch": 0.11966797325340096, + "grad_norm": 0.8973364358315123, + "learning_rate": 1.9994872132696125e-06, + "loss": 0.7063135504722595, + "step": 519 + }, + { + "epoch": 0.11989854738298363, + "grad_norm": 0.91785396837973, + "learning_rate": 1.9994749324628046e-06, + "loss": 0.694409966468811, + "step": 520 + }, + { + "epoch": 0.12012912151256629, + "grad_norm": 0.7331348179727938, + "learning_rate": 1.9994625063721852e-06, + "loss": 0.8167020082473755, + "step": 521 + }, + { + "epoch": 0.12035969564214895, + "grad_norm": 0.9326590546614593, + "learning_rate": 1.9994499349995615e-06, + "loss": 0.7214051485061646, + "step": 522 + }, + { + "epoch": 0.12059026977173161, + "grad_norm": 0.8993621490561152, + "learning_rate": 1.999437218346761e-06, + "loss": 0.8798317909240723, + "step": 523 + }, + { + "epoch": 0.12082084390131427, + "grad_norm": 0.6552492075288662, + "learning_rate": 1.9994243564156316e-06, + "loss": 0.684230387210846, + "step": 524 + }, + { + "epoch": 0.12105141803089693, + "grad_norm": 0.9112132053465716, + "learning_rate": 1.999411349208043e-06, + "loss": 0.7519755363464355, + "step": 525 + }, + { + "epoch": 0.1212819921604796, + "grad_norm": 0.8052315425352758, + "learning_rate": 1.9993981967258857e-06, + "loss": 0.8420398235321045, + "step": 526 + }, + { + "epoch": 0.12151256629006225, + "grad_norm": 0.7105743668928439, + "learning_rate": 1.999384898971073e-06, + "loss": 0.8349270820617676, + "step": 527 + }, + { + "epoch": 0.12174314041964492, + "grad_norm": 1.0983006521395142, + "learning_rate": 1.999371455945536e-06, + "loss": 0.794980525970459, + "step": 528 + }, + { + "epoch": 0.12197371454922758, + "grad_norm": 1.1816598770476783, + "learning_rate": 1.9993578676512294e-06, + "loss": 0.666529655456543, + "step": 529 + }, + { + "epoch": 0.12220428867881024, + "grad_norm": 0.7564948773505585, + "learning_rate": 1.999344134090129e-06, + "loss": 0.7356991767883301, + "step": 530 + }, + { + "epoch": 0.1224348628083929, + "grad_norm": 0.8210277180950322, + "learning_rate": 1.9993302552642305e-06, + "loss": 0.6289858818054199, + "step": 531 + }, + { + "epoch": 0.12266543693797556, + "grad_norm": 0.7570779839057131, + "learning_rate": 1.9993162311755516e-06, + "loss": 0.706937313079834, + "step": 532 + }, + { + "epoch": 0.12289601106755822, + "grad_norm": 0.8676215771749471, + "learning_rate": 1.99930206182613e-06, + "loss": 0.7265158891677856, + "step": 533 + }, + { + "epoch": 0.12312658519714088, + "grad_norm": 0.7802472371537522, + "learning_rate": 1.999287747218027e-06, + "loss": 0.6575910449028015, + "step": 534 + }, + { + "epoch": 0.12335715932672355, + "grad_norm": 0.6298254280489823, + "learning_rate": 1.999273287353322e-06, + "loss": 0.6696841716766357, + "step": 535 + }, + { + "epoch": 0.1235877334563062, + "grad_norm": 1.071079002554872, + "learning_rate": 1.9992586822341177e-06, + "loss": 0.7749101519584656, + "step": 536 + }, + { + "epoch": 0.12381830758588887, + "grad_norm": 0.9432884782892066, + "learning_rate": 1.9992439318625367e-06, + "loss": 0.6880518198013306, + "step": 537 + }, + { + "epoch": 0.12404888171547153, + "grad_norm": 0.7827285978985046, + "learning_rate": 1.999229036240723e-06, + "loss": 0.6871178150177002, + "step": 538 + }, + { + "epoch": 0.12427945584505419, + "grad_norm": 0.7976778538474537, + "learning_rate": 1.999213995370842e-06, + "loss": 0.5867285132408142, + "step": 539 + }, + { + "epoch": 0.12451002997463685, + "grad_norm": 0.9357527236724963, + "learning_rate": 1.99919880925508e-06, + "loss": 0.8276966214179993, + "step": 540 + }, + { + "epoch": 0.12474060410421951, + "grad_norm": 1.0175450529032033, + "learning_rate": 1.9991834778956445e-06, + "loss": 0.7710754871368408, + "step": 541 + }, + { + "epoch": 0.12497117823380217, + "grad_norm": 0.9390745817535735, + "learning_rate": 1.9991680012947642e-06, + "loss": 0.7753217816352844, + "step": 542 + }, + { + "epoch": 0.12520175236338482, + "grad_norm": 0.8094522929040034, + "learning_rate": 1.9991523794546886e-06, + "loss": 0.7906090617179871, + "step": 543 + }, + { + "epoch": 0.12543232649296748, + "grad_norm": 0.9340000664605023, + "learning_rate": 1.9991366123776885e-06, + "loss": 0.7199760675430298, + "step": 544 + }, + { + "epoch": 0.12566290062255014, + "grad_norm": 0.7023452308433018, + "learning_rate": 1.9991207000660556e-06, + "loss": 0.671667218208313, + "step": 545 + }, + { + "epoch": 0.1258934747521328, + "grad_norm": 0.8347026711317173, + "learning_rate": 1.9991046425221036e-06, + "loss": 0.7289182543754578, + "step": 546 + }, + { + "epoch": 0.12612404888171547, + "grad_norm": 0.7827652568460417, + "learning_rate": 1.999088439748166e-06, + "loss": 0.6894270181655884, + "step": 547 + }, + { + "epoch": 0.12635462301129813, + "grad_norm": 0.7280796152072353, + "learning_rate": 1.9990720917465983e-06, + "loss": 0.5861620306968689, + "step": 548 + }, + { + "epoch": 0.1265851971408808, + "grad_norm": 0.9057106564897087, + "learning_rate": 1.999055598519777e-06, + "loss": 0.7082245349884033, + "step": 549 + }, + { + "epoch": 0.12681577127046345, + "grad_norm": 0.9647506404446157, + "learning_rate": 1.999038960070099e-06, + "loss": 0.6746149659156799, + "step": 550 + }, + { + "epoch": 0.1270463454000461, + "grad_norm": 0.8620899067636014, + "learning_rate": 1.999022176399983e-06, + "loss": 0.7791188955307007, + "step": 551 + }, + { + "epoch": 0.12727691952962877, + "grad_norm": 0.7157725370776972, + "learning_rate": 1.999005247511869e-06, + "loss": 0.6371017694473267, + "step": 552 + }, + { + "epoch": 0.12750749365921143, + "grad_norm": 1.0373263968991309, + "learning_rate": 1.9989881734082182e-06, + "loss": 0.7006558179855347, + "step": 553 + }, + { + "epoch": 0.1277380677887941, + "grad_norm": 1.0670128946400503, + "learning_rate": 1.9989709540915115e-06, + "loss": 0.7011476755142212, + "step": 554 + }, + { + "epoch": 0.12796864191837676, + "grad_norm": 0.7293348024241428, + "learning_rate": 1.998953589564252e-06, + "loss": 0.6518280506134033, + "step": 555 + }, + { + "epoch": 0.12819921604795942, + "grad_norm": 1.013490270581775, + "learning_rate": 1.9989360798289646e-06, + "loss": 0.703351616859436, + "step": 556 + }, + { + "epoch": 0.12842979017754208, + "grad_norm": 0.9007382613729068, + "learning_rate": 1.998918424888194e-06, + "loss": 0.7498817443847656, + "step": 557 + }, + { + "epoch": 0.12866036430712474, + "grad_norm": 0.7936147649672419, + "learning_rate": 1.998900624744507e-06, + "loss": 0.647042989730835, + "step": 558 + }, + { + "epoch": 0.1288909384367074, + "grad_norm": 1.058658035724676, + "learning_rate": 1.99888267940049e-06, + "loss": 0.7519131898880005, + "step": 559 + }, + { + "epoch": 0.12912151256629006, + "grad_norm": 0.9392201849899589, + "learning_rate": 1.9988645888587524e-06, + "loss": 0.8416757583618164, + "step": 560 + }, + { + "epoch": 0.12935208669587273, + "grad_norm": 0.7856467653874107, + "learning_rate": 1.9988463531219238e-06, + "loss": 0.7044156193733215, + "step": 561 + }, + { + "epoch": 0.1295826608254554, + "grad_norm": 0.7712707168267965, + "learning_rate": 1.9988279721926547e-06, + "loss": 0.5429179668426514, + "step": 562 + }, + { + "epoch": 0.12981323495503805, + "grad_norm": 0.8186921939471294, + "learning_rate": 1.9988094460736173e-06, + "loss": 0.6146735548973083, + "step": 563 + }, + { + "epoch": 0.1300438090846207, + "grad_norm": 0.8439852070799176, + "learning_rate": 1.9987907747675038e-06, + "loss": 0.7544587850570679, + "step": 564 + }, + { + "epoch": 0.13027438321420337, + "grad_norm": 0.9760725928946941, + "learning_rate": 1.998771958277029e-06, + "loss": 0.7344266772270203, + "step": 565 + }, + { + "epoch": 0.13050495734378603, + "grad_norm": 0.8485941936610121, + "learning_rate": 1.9987529966049276e-06, + "loss": 0.6952091455459595, + "step": 566 + }, + { + "epoch": 0.1307355314733687, + "grad_norm": 0.7996168239987546, + "learning_rate": 1.9987338897539563e-06, + "loss": 0.6164644956588745, + "step": 567 + }, + { + "epoch": 0.13096610560295135, + "grad_norm": 1.04815525718601, + "learning_rate": 1.998714637726892e-06, + "loss": 0.7554208636283875, + "step": 568 + }, + { + "epoch": 0.13119667973253402, + "grad_norm": 0.97358719596577, + "learning_rate": 1.9986952405265336e-06, + "loss": 0.6640980243682861, + "step": 569 + }, + { + "epoch": 0.13142725386211668, + "grad_norm": 0.8089360786109361, + "learning_rate": 1.9986756981557005e-06, + "loss": 0.6947968006134033, + "step": 570 + }, + { + "epoch": 0.13165782799169934, + "grad_norm": 0.8239726316605849, + "learning_rate": 1.9986560106172332e-06, + "loss": 0.5987592935562134, + "step": 571 + }, + { + "epoch": 0.131888402121282, + "grad_norm": 0.709030479654625, + "learning_rate": 1.9986361779139944e-06, + "loss": 0.5830701589584351, + "step": 572 + }, + { + "epoch": 0.13211897625086466, + "grad_norm": 1.1719328645727012, + "learning_rate": 1.9986162000488655e-06, + "loss": 0.6589827537536621, + "step": 573 + }, + { + "epoch": 0.13234955038044732, + "grad_norm": 0.795778409153881, + "learning_rate": 1.9985960770247514e-06, + "loss": 0.7761766910552979, + "step": 574 + }, + { + "epoch": 0.13258012451002998, + "grad_norm": 0.8403074018612, + "learning_rate": 1.998575808844577e-06, + "loss": 0.6817613244056702, + "step": 575 + }, + { + "epoch": 0.13281069863961265, + "grad_norm": 0.8817998372104671, + "learning_rate": 1.998555395511289e-06, + "loss": 0.553085207939148, + "step": 576 + }, + { + "epoch": 0.1330412727691953, + "grad_norm": 0.6885856342268037, + "learning_rate": 1.998534837027854e-06, + "loss": 0.6500711441040039, + "step": 577 + }, + { + "epoch": 0.13327184689877797, + "grad_norm": 1.046231764034874, + "learning_rate": 1.9985141333972605e-06, + "loss": 0.7818950414657593, + "step": 578 + }, + { + "epoch": 0.13350242102836063, + "grad_norm": 0.7987907466299384, + "learning_rate": 1.9984932846225178e-06, + "loss": 0.7030247449874878, + "step": 579 + }, + { + "epoch": 0.1337329951579433, + "grad_norm": 0.7031460051202854, + "learning_rate": 1.9984722907066572e-06, + "loss": 0.6336206197738647, + "step": 580 + }, + { + "epoch": 0.13396356928752595, + "grad_norm": 0.8178681347907562, + "learning_rate": 1.9984511516527295e-06, + "loss": 0.7483044862747192, + "step": 581 + }, + { + "epoch": 0.1341941434171086, + "grad_norm": 0.8070808524670383, + "learning_rate": 1.9984298674638084e-06, + "loss": 0.7124725580215454, + "step": 582 + }, + { + "epoch": 0.13442471754669127, + "grad_norm": 0.8209937510618921, + "learning_rate": 1.998408438142987e-06, + "loss": 0.623436450958252, + "step": 583 + }, + { + "epoch": 0.1346552916762739, + "grad_norm": 0.8592886051949084, + "learning_rate": 1.9983868636933804e-06, + "loss": 0.646303653717041, + "step": 584 + }, + { + "epoch": 0.13488586580585657, + "grad_norm": 0.715391883952278, + "learning_rate": 1.998365144118125e-06, + "loss": 0.6349619626998901, + "step": 585 + }, + { + "epoch": 0.13511643993543923, + "grad_norm": 0.842094849315078, + "learning_rate": 1.9983432794203778e-06, + "loss": 0.5222466588020325, + "step": 586 + }, + { + "epoch": 0.1353470140650219, + "grad_norm": 0.7893129778630776, + "learning_rate": 1.998321269603317e-06, + "loss": 0.7210453152656555, + "step": 587 + }, + { + "epoch": 0.13557758819460455, + "grad_norm": 0.8260995902689467, + "learning_rate": 1.998299114670142e-06, + "loss": 0.6829872131347656, + "step": 588 + }, + { + "epoch": 0.13580816232418721, + "grad_norm": 0.714861095640182, + "learning_rate": 1.998276814624073e-06, + "loss": 0.6493744254112244, + "step": 589 + }, + { + "epoch": 0.13603873645376988, + "grad_norm": 0.8350239344719634, + "learning_rate": 1.998254369468352e-06, + "loss": 0.6885819435119629, + "step": 590 + }, + { + "epoch": 0.13626931058335254, + "grad_norm": 0.7070632175859811, + "learning_rate": 1.9982317792062415e-06, + "loss": 0.6393503546714783, + "step": 591 + }, + { + "epoch": 0.1364998847129352, + "grad_norm": 1.010551624947432, + "learning_rate": 1.998209043841025e-06, + "loss": 0.7243417501449585, + "step": 592 + }, + { + "epoch": 0.13673045884251786, + "grad_norm": 0.693273868923859, + "learning_rate": 1.9981861633760073e-06, + "loss": 0.5955190658569336, + "step": 593 + }, + { + "epoch": 0.13696103297210052, + "grad_norm": 0.89841301134605, + "learning_rate": 1.9981631378145147e-06, + "loss": 0.6907675862312317, + "step": 594 + }, + { + "epoch": 0.13719160710168318, + "grad_norm": 1.022542216960162, + "learning_rate": 1.9981399671598938e-06, + "loss": 0.8540418148040771, + "step": 595 + }, + { + "epoch": 0.13742218123126584, + "grad_norm": 0.850573072747265, + "learning_rate": 1.9981166514155128e-06, + "loss": 0.6558555364608765, + "step": 596 + }, + { + "epoch": 0.1376527553608485, + "grad_norm": 0.9448807343375427, + "learning_rate": 1.9980931905847607e-06, + "loss": 0.6902164220809937, + "step": 597 + }, + { + "epoch": 0.13788332949043117, + "grad_norm": 1.240663469028779, + "learning_rate": 1.9980695846710485e-06, + "loss": 0.7090387344360352, + "step": 598 + }, + { + "epoch": 0.13811390362001383, + "grad_norm": 0.8847772852436644, + "learning_rate": 1.9980458336778067e-06, + "loss": 0.5913621187210083, + "step": 599 + }, + { + "epoch": 0.1383444777495965, + "grad_norm": 0.864647475805302, + "learning_rate": 1.998021937608488e-06, + "loss": 0.6742709279060364, + "step": 600 + }, + { + "epoch": 0.13857505187917915, + "grad_norm": 0.9253166862332501, + "learning_rate": 1.997997896466566e-06, + "loss": 0.7156273126602173, + "step": 601 + }, + { + "epoch": 0.1388056260087618, + "grad_norm": 0.7104566809406643, + "learning_rate": 1.9979737102555358e-06, + "loss": 0.6039655208587646, + "step": 602 + }, + { + "epoch": 0.13903620013834447, + "grad_norm": 0.7521323143425293, + "learning_rate": 1.9979493789789123e-06, + "loss": 0.6437175273895264, + "step": 603 + }, + { + "epoch": 0.13926677426792713, + "grad_norm": 0.7922747435817725, + "learning_rate": 1.9979249026402327e-06, + "loss": 0.6037663221359253, + "step": 604 + }, + { + "epoch": 0.1394973483975098, + "grad_norm": 0.8526913554693543, + "learning_rate": 1.9979002812430544e-06, + "loss": 0.6014829874038696, + "step": 605 + }, + { + "epoch": 0.13972792252709246, + "grad_norm": 0.9960319429386536, + "learning_rate": 1.9978755147909575e-06, + "loss": 0.5644428133964539, + "step": 606 + }, + { + "epoch": 0.13995849665667512, + "grad_norm": 0.7146930597248379, + "learning_rate": 1.997850603287541e-06, + "loss": 0.5483256578445435, + "step": 607 + }, + { + "epoch": 0.14018907078625778, + "grad_norm": 0.941628560636658, + "learning_rate": 1.9978255467364264e-06, + "loss": 0.6323236227035522, + "step": 608 + }, + { + "epoch": 0.14041964491584044, + "grad_norm": 0.8661204864695959, + "learning_rate": 1.9978003451412563e-06, + "loss": 0.677186131477356, + "step": 609 + }, + { + "epoch": 0.1406502190454231, + "grad_norm": 0.7467694215725664, + "learning_rate": 1.9977749985056934e-06, + "loss": 0.6768285036087036, + "step": 610 + }, + { + "epoch": 0.14088079317500576, + "grad_norm": 0.6978429335446755, + "learning_rate": 1.997749506833422e-06, + "loss": 0.5347047448158264, + "step": 611 + }, + { + "epoch": 0.14111136730458843, + "grad_norm": 0.8856138167235749, + "learning_rate": 1.9977238701281484e-06, + "loss": 0.7459336519241333, + "step": 612 + }, + { + "epoch": 0.1413419414341711, + "grad_norm": 0.7081494897690513, + "learning_rate": 1.9976980883935982e-06, + "loss": 0.6617337465286255, + "step": 613 + }, + { + "epoch": 0.14157251556375375, + "grad_norm": 0.766248846701343, + "learning_rate": 1.9976721616335197e-06, + "loss": 0.6214765310287476, + "step": 614 + }, + { + "epoch": 0.1418030896933364, + "grad_norm": 0.9664061776833217, + "learning_rate": 1.9976460898516814e-06, + "loss": 0.7468793392181396, + "step": 615 + }, + { + "epoch": 0.14203366382291907, + "grad_norm": 0.9401860990707812, + "learning_rate": 1.9976198730518733e-06, + "loss": 0.676013708114624, + "step": 616 + }, + { + "epoch": 0.14226423795250173, + "grad_norm": 0.7984359669803877, + "learning_rate": 1.9975935112379057e-06, + "loss": 0.6350057125091553, + "step": 617 + }, + { + "epoch": 0.1424948120820844, + "grad_norm": 0.7941645196610473, + "learning_rate": 1.997567004413611e-06, + "loss": 0.6743426322937012, + "step": 618 + }, + { + "epoch": 0.14272538621166705, + "grad_norm": 0.9456320720036326, + "learning_rate": 1.9975403525828423e-06, + "loss": 0.5894836187362671, + "step": 619 + }, + { + "epoch": 0.14295596034124972, + "grad_norm": 1.1964423414511856, + "learning_rate": 1.9975135557494735e-06, + "loss": 0.7142415046691895, + "step": 620 + }, + { + "epoch": 0.14318653447083238, + "grad_norm": 0.7973360588907056, + "learning_rate": 1.9974866139174e-06, + "loss": 0.6402454972267151, + "step": 621 + }, + { + "epoch": 0.14341710860041504, + "grad_norm": 0.8197617379148621, + "learning_rate": 1.997459527090538e-06, + "loss": 0.6870661973953247, + "step": 622 + }, + { + "epoch": 0.1436476827299977, + "grad_norm": 0.9660987988063562, + "learning_rate": 1.9974322952728247e-06, + "loss": 0.5526704788208008, + "step": 623 + }, + { + "epoch": 0.14387825685958036, + "grad_norm": 0.8373386744091922, + "learning_rate": 1.9974049184682186e-06, + "loss": 0.6712762117385864, + "step": 624 + }, + { + "epoch": 0.14410883098916302, + "grad_norm": 0.8330659804365839, + "learning_rate": 1.997377396680699e-06, + "loss": 0.6064080595970154, + "step": 625 + }, + { + "epoch": 0.14433940511874568, + "grad_norm": 0.7758896299152315, + "learning_rate": 1.997349729914267e-06, + "loss": 0.5540767908096313, + "step": 626 + }, + { + "epoch": 0.14456997924832835, + "grad_norm": 0.7444906414234538, + "learning_rate": 1.997321918172944e-06, + "loss": 0.52143394947052, + "step": 627 + }, + { + "epoch": 0.144800553377911, + "grad_norm": 0.8091707705607726, + "learning_rate": 1.9972939614607723e-06, + "loss": 0.7708792686462402, + "step": 628 + }, + { + "epoch": 0.14503112750749367, + "grad_norm": 1.0019252225174067, + "learning_rate": 1.997265859781816e-06, + "loss": 0.706872284412384, + "step": 629 + }, + { + "epoch": 0.14526170163707633, + "grad_norm": 0.7978488701627702, + "learning_rate": 1.99723761314016e-06, + "loss": 0.6643307209014893, + "step": 630 + }, + { + "epoch": 0.145492275766659, + "grad_norm": 1.0319728160628425, + "learning_rate": 1.9972092215399107e-06, + "loss": 0.6582880020141602, + "step": 631 + }, + { + "epoch": 0.14572284989624165, + "grad_norm": 0.7041979367649327, + "learning_rate": 1.997180684985194e-06, + "loss": 0.5704749822616577, + "step": 632 + }, + { + "epoch": 0.1459534240258243, + "grad_norm": 0.9160954038448087, + "learning_rate": 1.997152003480159e-06, + "loss": 0.6021866798400879, + "step": 633 + }, + { + "epoch": 0.14618399815540697, + "grad_norm": 1.0186739140184302, + "learning_rate": 1.9971231770289745e-06, + "loss": 0.6980762481689453, + "step": 634 + }, + { + "epoch": 0.14641457228498964, + "grad_norm": 0.9102171344238382, + "learning_rate": 1.9970942056358307e-06, + "loss": 0.6252140998840332, + "step": 635 + }, + { + "epoch": 0.1466451464145723, + "grad_norm": 0.8257085970836279, + "learning_rate": 1.9970650893049384e-06, + "loss": 0.5938589572906494, + "step": 636 + }, + { + "epoch": 0.14687572054415496, + "grad_norm": 0.7561297866548697, + "learning_rate": 1.997035828040531e-06, + "loss": 0.48420464992523193, + "step": 637 + }, + { + "epoch": 0.14710629467373762, + "grad_norm": 1.1749911282917564, + "learning_rate": 1.997006421846861e-06, + "loss": 0.6917499303817749, + "step": 638 + }, + { + "epoch": 0.14733686880332028, + "grad_norm": 0.9636395596462505, + "learning_rate": 1.9969768707282034e-06, + "loss": 0.7040522694587708, + "step": 639 + }, + { + "epoch": 0.14756744293290291, + "grad_norm": 0.7956128694692409, + "learning_rate": 1.9969471746888535e-06, + "loss": 0.6131860017776489, + "step": 640 + }, + { + "epoch": 0.14779801706248558, + "grad_norm": 0.8000550155014501, + "learning_rate": 1.996917333733128e-06, + "loss": 0.7042062282562256, + "step": 641 + }, + { + "epoch": 0.14802859119206824, + "grad_norm": 0.9440344299424565, + "learning_rate": 1.9968873478653647e-06, + "loss": 0.6729326844215393, + "step": 642 + }, + { + "epoch": 0.1482591653216509, + "grad_norm": 0.8065631083250541, + "learning_rate": 1.996857217089922e-06, + "loss": 0.5801228880882263, + "step": 643 + }, + { + "epoch": 0.14848973945123356, + "grad_norm": 0.9584481605552773, + "learning_rate": 1.99682694141118e-06, + "loss": 0.6657989025115967, + "step": 644 + }, + { + "epoch": 0.14872031358081622, + "grad_norm": 0.8276892521273487, + "learning_rate": 1.9967965208335395e-06, + "loss": 0.5915562510490417, + "step": 645 + }, + { + "epoch": 0.14895088771039888, + "grad_norm": 0.8005079741579677, + "learning_rate": 1.9967659553614225e-06, + "loss": 0.6651759147644043, + "step": 646 + }, + { + "epoch": 0.14918146183998154, + "grad_norm": 0.785500734493462, + "learning_rate": 1.996735244999272e-06, + "loss": 0.625860333442688, + "step": 647 + }, + { + "epoch": 0.1494120359695642, + "grad_norm": 0.891334856659417, + "learning_rate": 1.996704389751552e-06, + "loss": 0.5731238126754761, + "step": 648 + }, + { + "epoch": 0.14964261009914687, + "grad_norm": 0.8662032133236818, + "learning_rate": 1.996673389622748e-06, + "loss": 0.6233615875244141, + "step": 649 + }, + { + "epoch": 0.14987318422872953, + "grad_norm": 0.7037223780792468, + "learning_rate": 1.9966422446173655e-06, + "loss": 0.5294947028160095, + "step": 650 + }, + { + "epoch": 0.1501037583583122, + "grad_norm": 0.8024689158972043, + "learning_rate": 1.996610954739932e-06, + "loss": 0.6234334707260132, + "step": 651 + }, + { + "epoch": 0.15033433248789485, + "grad_norm": 0.9863259301950934, + "learning_rate": 1.996579519994996e-06, + "loss": 0.5800126194953918, + "step": 652 + }, + { + "epoch": 0.1505649066174775, + "grad_norm": 0.9145794705086053, + "learning_rate": 1.9965479403871268e-06, + "loss": 0.7072441577911377, + "step": 653 + }, + { + "epoch": 0.15079548074706017, + "grad_norm": 0.8604804316966843, + "learning_rate": 1.996516215920915e-06, + "loss": 0.6350210309028625, + "step": 654 + }, + { + "epoch": 0.15102605487664283, + "grad_norm": 0.8272551438363688, + "learning_rate": 1.996484346600971e-06, + "loss": 0.6098944544792175, + "step": 655 + }, + { + "epoch": 0.1512566290062255, + "grad_norm": 0.7942772112843086, + "learning_rate": 1.996452332431929e-06, + "loss": 0.6593213081359863, + "step": 656 + }, + { + "epoch": 0.15148720313580816, + "grad_norm": 1.0870788996229426, + "learning_rate": 1.9964201734184413e-06, + "loss": 0.6997909545898438, + "step": 657 + }, + { + "epoch": 0.15171777726539082, + "grad_norm": 0.8320533396880808, + "learning_rate": 1.996387869565183e-06, + "loss": 0.5672277212142944, + "step": 658 + }, + { + "epoch": 0.15194835139497348, + "grad_norm": 0.8777194103988153, + "learning_rate": 1.99635542087685e-06, + "loss": 0.5835613012313843, + "step": 659 + }, + { + "epoch": 0.15217892552455614, + "grad_norm": 1.0025309187744094, + "learning_rate": 1.9963228273581587e-06, + "loss": 0.6001917123794556, + "step": 660 + }, + { + "epoch": 0.1524094996541388, + "grad_norm": 0.9582174045063777, + "learning_rate": 1.996290089013847e-06, + "loss": 0.6421242356300354, + "step": 661 + }, + { + "epoch": 0.15264007378372146, + "grad_norm": 0.8996449559898986, + "learning_rate": 1.996257205848674e-06, + "loss": 0.6888365745544434, + "step": 662 + }, + { + "epoch": 0.15287064791330413, + "grad_norm": 0.8017642329752841, + "learning_rate": 1.9962241778674193e-06, + "loss": 0.6694042682647705, + "step": 663 + }, + { + "epoch": 0.1531012220428868, + "grad_norm": 0.8362235694997654, + "learning_rate": 1.9961910050748836e-06, + "loss": 0.6754042506217957, + "step": 664 + }, + { + "epoch": 0.15333179617246945, + "grad_norm": 0.9429947161447709, + "learning_rate": 1.9961576874758893e-06, + "loss": 0.576134979724884, + "step": 665 + }, + { + "epoch": 0.1535623703020521, + "grad_norm": 0.8634505888713511, + "learning_rate": 1.9961242250752796e-06, + "loss": 0.6548957824707031, + "step": 666 + }, + { + "epoch": 0.15379294443163477, + "grad_norm": 0.8494612034918267, + "learning_rate": 1.9960906178779183e-06, + "loss": 0.553372859954834, + "step": 667 + }, + { + "epoch": 0.15402351856121743, + "grad_norm": 0.8776559544848238, + "learning_rate": 1.9960568658886904e-06, + "loss": 0.6749063730239868, + "step": 668 + }, + { + "epoch": 0.1542540926908001, + "grad_norm": 0.8490449157821316, + "learning_rate": 1.9960229691125023e-06, + "loss": 0.6083666086196899, + "step": 669 + }, + { + "epoch": 0.15448466682038275, + "grad_norm": 0.9102216407598661, + "learning_rate": 1.995988927554281e-06, + "loss": 0.6468017101287842, + "step": 670 + }, + { + "epoch": 0.15471524094996542, + "grad_norm": 0.9054463862187181, + "learning_rate": 1.995954741218976e-06, + "loss": 0.7095121145248413, + "step": 671 + }, + { + "epoch": 0.15494581507954808, + "grad_norm": 0.8984210973740085, + "learning_rate": 1.995920410111555e-06, + "loss": 0.7167302966117859, + "step": 672 + }, + { + "epoch": 0.15517638920913074, + "grad_norm": 0.9754903087688545, + "learning_rate": 1.995885934237009e-06, + "loss": 0.6563462018966675, + "step": 673 + }, + { + "epoch": 0.1554069633387134, + "grad_norm": 0.7833661271069817, + "learning_rate": 1.9958513136003495e-06, + "loss": 0.638554573059082, + "step": 674 + }, + { + "epoch": 0.15563753746829606, + "grad_norm": 1.1119382875058637, + "learning_rate": 1.995816548206609e-06, + "loss": 0.7051291465759277, + "step": 675 + }, + { + "epoch": 0.15586811159787872, + "grad_norm": 0.879000690907415, + "learning_rate": 1.995781638060841e-06, + "loss": 0.6292394399642944, + "step": 676 + }, + { + "epoch": 0.15609868572746138, + "grad_norm": 0.7328696227145686, + "learning_rate": 1.99574658316812e-06, + "loss": 0.5266016721725464, + "step": 677 + }, + { + "epoch": 0.15632925985704405, + "grad_norm": 0.8021809147598078, + "learning_rate": 1.9957113835335415e-06, + "loss": 0.6059033870697021, + "step": 678 + }, + { + "epoch": 0.1565598339866267, + "grad_norm": 1.0012445200078677, + "learning_rate": 1.995676039162222e-06, + "loss": 0.5252447128295898, + "step": 679 + }, + { + "epoch": 0.15679040811620937, + "grad_norm": 0.9661534967224599, + "learning_rate": 1.9956405500593e-06, + "loss": 0.5963196754455566, + "step": 680 + }, + { + "epoch": 0.15702098224579203, + "grad_norm": 1.1191160767100459, + "learning_rate": 1.9956049162299322e-06, + "loss": 0.7262317538261414, + "step": 681 + }, + { + "epoch": 0.1572515563753747, + "grad_norm": 0.6929567178003186, + "learning_rate": 1.995569137679301e-06, + "loss": 0.6701623201370239, + "step": 682 + }, + { + "epoch": 0.15748213050495735, + "grad_norm": 1.1067508842107727, + "learning_rate": 1.9955332144126048e-06, + "loss": 0.6201569437980652, + "step": 683 + }, + { + "epoch": 0.15771270463454, + "grad_norm": 0.8729576302308473, + "learning_rate": 1.9954971464350673e-06, + "loss": 0.5338399410247803, + "step": 684 + }, + { + "epoch": 0.15794327876412267, + "grad_norm": 1.0541267316046437, + "learning_rate": 1.99546093375193e-06, + "loss": 0.6784210205078125, + "step": 685 + }, + { + "epoch": 0.15817385289370534, + "grad_norm": 0.7386088048688241, + "learning_rate": 1.9954245763684574e-06, + "loss": 0.6752813458442688, + "step": 686 + }, + { + "epoch": 0.158404427023288, + "grad_norm": 0.92655840240498, + "learning_rate": 1.9953880742899344e-06, + "loss": 0.6734355688095093, + "step": 687 + }, + { + "epoch": 0.15863500115287066, + "grad_norm": 1.0183777461857344, + "learning_rate": 1.995351427521667e-06, + "loss": 0.4857062101364136, + "step": 688 + }, + { + "epoch": 0.15886557528245332, + "grad_norm": 1.0292686670210065, + "learning_rate": 1.995314636068982e-06, + "loss": 0.6014343500137329, + "step": 689 + }, + { + "epoch": 0.15909614941203598, + "grad_norm": 0.6804392354384567, + "learning_rate": 1.995277699937227e-06, + "loss": 0.571649432182312, + "step": 690 + }, + { + "epoch": 0.15932672354161864, + "grad_norm": 0.8504096595688001, + "learning_rate": 1.9952406191317717e-06, + "loss": 0.5195556879043579, + "step": 691 + }, + { + "epoch": 0.1595572976712013, + "grad_norm": 1.0458950135227758, + "learning_rate": 1.995203393658006e-06, + "loss": 0.6520895957946777, + "step": 692 + }, + { + "epoch": 0.15978787180078396, + "grad_norm": 0.8415432435774023, + "learning_rate": 1.995166023521341e-06, + "loss": 0.7223460674285889, + "step": 693 + }, + { + "epoch": 0.16001844593036663, + "grad_norm": 0.9976828679541363, + "learning_rate": 1.9951285087272085e-06, + "loss": 0.5540120005607605, + "step": 694 + }, + { + "epoch": 0.1602490200599493, + "grad_norm": 0.9583028785849829, + "learning_rate": 1.995090849281062e-06, + "loss": 0.6539945602416992, + "step": 695 + }, + { + "epoch": 0.16047959418953192, + "grad_norm": 0.6996553037894581, + "learning_rate": 1.995053045188376e-06, + "loss": 0.595169186592102, + "step": 696 + }, + { + "epoch": 0.16071016831911458, + "grad_norm": 0.7841493951031693, + "learning_rate": 1.995015096454645e-06, + "loss": 0.564440131187439, + "step": 697 + }, + { + "epoch": 0.16094074244869724, + "grad_norm": 0.8288568147288248, + "learning_rate": 1.9949770030853857e-06, + "loss": 0.5934277772903442, + "step": 698 + }, + { + "epoch": 0.1611713165782799, + "grad_norm": 0.8284586150514878, + "learning_rate": 1.9949387650861353e-06, + "loss": 0.5645352602005005, + "step": 699 + }, + { + "epoch": 0.16140189070786257, + "grad_norm": 0.7431587516594325, + "learning_rate": 1.9949003824624517e-06, + "loss": 0.6437552571296692, + "step": 700 + }, + { + "epoch": 0.16163246483744523, + "grad_norm": 0.9720884796741701, + "learning_rate": 1.9948618552199147e-06, + "loss": 0.7052004337310791, + "step": 701 + }, + { + "epoch": 0.1618630389670279, + "grad_norm": 0.869867046800395, + "learning_rate": 1.994823183364124e-06, + "loss": 0.6547686457633972, + "step": 702 + }, + { + "epoch": 0.16209361309661055, + "grad_norm": 0.8852938288883528, + "learning_rate": 1.994784366900702e-06, + "loss": 0.582744836807251, + "step": 703 + }, + { + "epoch": 0.1623241872261932, + "grad_norm": 0.9493941174588165, + "learning_rate": 1.99474540583529e-06, + "loss": 0.6668936014175415, + "step": 704 + }, + { + "epoch": 0.16255476135577587, + "grad_norm": 0.8294615633120708, + "learning_rate": 1.994706300173552e-06, + "loss": 0.6076918840408325, + "step": 705 + }, + { + "epoch": 0.16278533548535853, + "grad_norm": 0.8313694025786441, + "learning_rate": 1.994667049921172e-06, + "loss": 0.5053621530532837, + "step": 706 + }, + { + "epoch": 0.1630159096149412, + "grad_norm": 0.7898437620774408, + "learning_rate": 1.994627655083856e-06, + "loss": 0.5480915904045105, + "step": 707 + }, + { + "epoch": 0.16324648374452386, + "grad_norm": 0.8758549357955973, + "learning_rate": 1.99458811566733e-06, + "loss": 0.5851327776908875, + "step": 708 + }, + { + "epoch": 0.16347705787410652, + "grad_norm": 0.8484239464634123, + "learning_rate": 1.9945484316773415e-06, + "loss": 0.7058213949203491, + "step": 709 + }, + { + "epoch": 0.16370763200368918, + "grad_norm": 1.019538936894149, + "learning_rate": 1.9945086031196588e-06, + "loss": 0.6900246739387512, + "step": 710 + }, + { + "epoch": 0.16393820613327184, + "grad_norm": 0.9247299002550031, + "learning_rate": 1.994468630000072e-06, + "loss": 0.6088757514953613, + "step": 711 + }, + { + "epoch": 0.1641687802628545, + "grad_norm": 0.82117755294185, + "learning_rate": 1.9944285123243908e-06, + "loss": 0.6167945861816406, + "step": 712 + }, + { + "epoch": 0.16439935439243716, + "grad_norm": 0.8171354955480022, + "learning_rate": 1.994388250098447e-06, + "loss": 0.5842427015304565, + "step": 713 + }, + { + "epoch": 0.16462992852201982, + "grad_norm": 1.0833616769520091, + "learning_rate": 1.9943478433280937e-06, + "loss": 0.6709132194519043, + "step": 714 + }, + { + "epoch": 0.1648605026516025, + "grad_norm": 0.9486447603343945, + "learning_rate": 1.994307292019204e-06, + "loss": 0.5600479245185852, + "step": 715 + }, + { + "epoch": 0.16509107678118515, + "grad_norm": 0.9425877157645439, + "learning_rate": 1.994266596177672e-06, + "loss": 0.59420245885849, + "step": 716 + }, + { + "epoch": 0.1653216509107678, + "grad_norm": 0.8878954538957776, + "learning_rate": 1.994225755809414e-06, + "loss": 0.6098697185516357, + "step": 717 + }, + { + "epoch": 0.16555222504035047, + "grad_norm": 0.9792435497913993, + "learning_rate": 1.994184770920366e-06, + "loss": 0.5626084804534912, + "step": 718 + }, + { + "epoch": 0.16578279916993313, + "grad_norm": 0.827415177568412, + "learning_rate": 1.9941436415164854e-06, + "loss": 0.633317232131958, + "step": 719 + }, + { + "epoch": 0.1660133732995158, + "grad_norm": 0.7458775266643737, + "learning_rate": 1.994102367603752e-06, + "loss": 0.6629287004470825, + "step": 720 + }, + { + "epoch": 0.16624394742909845, + "grad_norm": 0.8804838237561229, + "learning_rate": 1.994060949188164e-06, + "loss": 0.6281176805496216, + "step": 721 + }, + { + "epoch": 0.16647452155868112, + "grad_norm": 0.7448717784104247, + "learning_rate": 1.994019386275743e-06, + "loss": 0.49195849895477295, + "step": 722 + }, + { + "epoch": 0.16670509568826378, + "grad_norm": 0.8001133040698483, + "learning_rate": 1.9939776788725295e-06, + "loss": 0.5165697932243347, + "step": 723 + }, + { + "epoch": 0.16693566981784644, + "grad_norm": 0.7747636914973149, + "learning_rate": 1.9939358269845867e-06, + "loss": 0.6294844150543213, + "step": 724 + }, + { + "epoch": 0.1671662439474291, + "grad_norm": 0.944854174617811, + "learning_rate": 1.9938938306179986e-06, + "loss": 0.6117822527885437, + "step": 725 + }, + { + "epoch": 0.16739681807701176, + "grad_norm": 0.8223415721013929, + "learning_rate": 1.9938516897788693e-06, + "loss": 0.5904515981674194, + "step": 726 + }, + { + "epoch": 0.16762739220659442, + "grad_norm": 0.9451811550082199, + "learning_rate": 1.9938094044733247e-06, + "loss": 0.5453853011131287, + "step": 727 + }, + { + "epoch": 0.16785796633617708, + "grad_norm": 1.0093698810967915, + "learning_rate": 1.9937669747075107e-06, + "loss": 0.6724731922149658, + "step": 728 + }, + { + "epoch": 0.16808854046575974, + "grad_norm": 0.8787203913390783, + "learning_rate": 1.993724400487596e-06, + "loss": 0.4844778776168823, + "step": 729 + }, + { + "epoch": 0.1683191145953424, + "grad_norm": 1.0150110817624924, + "learning_rate": 1.9936816818197682e-06, + "loss": 0.6666063070297241, + "step": 730 + }, + { + "epoch": 0.16854968872492507, + "grad_norm": 0.8363215992575103, + "learning_rate": 1.9936388187102374e-06, + "loss": 0.49354803562164307, + "step": 731 + }, + { + "epoch": 0.16878026285450773, + "grad_norm": 1.011739420494133, + "learning_rate": 1.993595811165234e-06, + "loss": 0.6587027311325073, + "step": 732 + }, + { + "epoch": 0.1690108369840904, + "grad_norm": 0.8706809761457309, + "learning_rate": 1.9935526591910095e-06, + "loss": 0.5618065595626831, + "step": 733 + }, + { + "epoch": 0.16924141111367305, + "grad_norm": 1.0230867510580486, + "learning_rate": 1.993509362793837e-06, + "loss": 0.6332052946090698, + "step": 734 + }, + { + "epoch": 0.1694719852432557, + "grad_norm": 0.8938300688074264, + "learning_rate": 1.9934659219800095e-06, + "loss": 0.5888797044754028, + "step": 735 + }, + { + "epoch": 0.16970255937283837, + "grad_norm": 0.9600504381358347, + "learning_rate": 1.9934223367558418e-06, + "loss": 0.6995177865028381, + "step": 736 + }, + { + "epoch": 0.16993313350242104, + "grad_norm": 0.8183852978697493, + "learning_rate": 1.9933786071276693e-06, + "loss": 0.6117641925811768, + "step": 737 + }, + { + "epoch": 0.1701637076320037, + "grad_norm": 0.8824726889784998, + "learning_rate": 1.9933347331018487e-06, + "loss": 0.7138235569000244, + "step": 738 + }, + { + "epoch": 0.17039428176158636, + "grad_norm": 0.9234925675447027, + "learning_rate": 1.993290714684758e-06, + "loss": 0.6139661073684692, + "step": 739 + }, + { + "epoch": 0.17062485589116902, + "grad_norm": 0.9457487351494172, + "learning_rate": 1.9932465518827945e-06, + "loss": 0.6998997926712036, + "step": 740 + }, + { + "epoch": 0.17085543002075168, + "grad_norm": 0.8625145077640682, + "learning_rate": 1.9932022447023787e-06, + "loss": 0.5736757516860962, + "step": 741 + }, + { + "epoch": 0.17108600415033434, + "grad_norm": 0.7768775382949296, + "learning_rate": 1.993157793149951e-06, + "loss": 0.6069833040237427, + "step": 742 + }, + { + "epoch": 0.171316578279917, + "grad_norm": 0.9368489446003049, + "learning_rate": 1.9931131972319726e-06, + "loss": 0.618720531463623, + "step": 743 + }, + { + "epoch": 0.17154715240949966, + "grad_norm": 1.1182101771495103, + "learning_rate": 1.9930684569549263e-06, + "loss": 0.6918530464172363, + "step": 744 + }, + { + "epoch": 0.17177772653908233, + "grad_norm": 0.9107072762217621, + "learning_rate": 1.993023572325315e-06, + "loss": 0.5303134322166443, + "step": 745 + }, + { + "epoch": 0.172008300668665, + "grad_norm": 1.163525853024132, + "learning_rate": 1.9929785433496637e-06, + "loss": 0.5017606019973755, + "step": 746 + }, + { + "epoch": 0.17223887479824765, + "grad_norm": 0.8248835281602814, + "learning_rate": 1.9929333700345176e-06, + "loss": 0.5683910846710205, + "step": 747 + }, + { + "epoch": 0.1724694489278303, + "grad_norm": 1.024957040527593, + "learning_rate": 1.992888052386443e-06, + "loss": 0.7594112157821655, + "step": 748 + }, + { + "epoch": 0.17270002305741297, + "grad_norm": 0.8415419064063624, + "learning_rate": 1.9928425904120272e-06, + "loss": 0.5817109942436218, + "step": 749 + }, + { + "epoch": 0.17293059718699563, + "grad_norm": 0.9772344685918459, + "learning_rate": 1.9927969841178785e-06, + "loss": 0.74810391664505, + "step": 750 + }, + { + "epoch": 0.17316117131657827, + "grad_norm": 0.7709842631317299, + "learning_rate": 1.992751233510627e-06, + "loss": 0.5620408654212952, + "step": 751 + }, + { + "epoch": 0.17339174544616093, + "grad_norm": 0.9147017514524429, + "learning_rate": 1.9927053385969224e-06, + "loss": 0.5661174654960632, + "step": 752 + }, + { + "epoch": 0.1736223195757436, + "grad_norm": 0.8721149149743948, + "learning_rate": 1.992659299383436e-06, + "loss": 0.6170656681060791, + "step": 753 + }, + { + "epoch": 0.17385289370532625, + "grad_norm": 0.8946316220934861, + "learning_rate": 1.99261311587686e-06, + "loss": 0.6399837136268616, + "step": 754 + }, + { + "epoch": 0.1740834678349089, + "grad_norm": 0.7741035474142021, + "learning_rate": 1.992566788083908e-06, + "loss": 0.646568775177002, + "step": 755 + }, + { + "epoch": 0.17431404196449157, + "grad_norm": 0.8936741351690501, + "learning_rate": 1.992520316011314e-06, + "loss": 0.6836358904838562, + "step": 756 + }, + { + "epoch": 0.17454461609407423, + "grad_norm": 0.8304614027509832, + "learning_rate": 1.9924736996658327e-06, + "loss": 0.7077229619026184, + "step": 757 + }, + { + "epoch": 0.1747751902236569, + "grad_norm": 0.87551528703017, + "learning_rate": 1.9924269390542408e-06, + "loss": 0.5127657651901245, + "step": 758 + }, + { + "epoch": 0.17500576435323956, + "grad_norm": 0.9006786249451013, + "learning_rate": 1.992380034183336e-06, + "loss": 0.49244552850723267, + "step": 759 + }, + { + "epoch": 0.17523633848282222, + "grad_norm": 0.8017561502743571, + "learning_rate": 1.9923329850599353e-06, + "loss": 0.6145986318588257, + "step": 760 + }, + { + "epoch": 0.17546691261240488, + "grad_norm": 1.0163805424999015, + "learning_rate": 1.9922857916908784e-06, + "loss": 0.5233397483825684, + "step": 761 + }, + { + "epoch": 0.17569748674198754, + "grad_norm": 0.9596772303146165, + "learning_rate": 1.992238454083025e-06, + "loss": 0.6296844482421875, + "step": 762 + }, + { + "epoch": 0.1759280608715702, + "grad_norm": 0.7860963753584104, + "learning_rate": 1.9921909722432565e-06, + "loss": 0.5274437665939331, + "step": 763 + }, + { + "epoch": 0.17615863500115286, + "grad_norm": 0.8930810667791799, + "learning_rate": 1.9921433461784744e-06, + "loss": 0.6365554332733154, + "step": 764 + }, + { + "epoch": 0.17638920913073552, + "grad_norm": 0.9611521576454714, + "learning_rate": 1.992095575895602e-06, + "loss": 0.6256603002548218, + "step": 765 + }, + { + "epoch": 0.17661978326031819, + "grad_norm": 0.9488006285824869, + "learning_rate": 1.9920476614015827e-06, + "loss": 0.6914918422698975, + "step": 766 + }, + { + "epoch": 0.17685035738990085, + "grad_norm": 0.9925839476608436, + "learning_rate": 1.9919996027033823e-06, + "loss": 0.618436336517334, + "step": 767 + }, + { + "epoch": 0.1770809315194835, + "grad_norm": 1.0637307823847924, + "learning_rate": 1.9919513998079857e-06, + "loss": 0.7496027946472168, + "step": 768 + }, + { + "epoch": 0.17731150564906617, + "grad_norm": 0.873569070894671, + "learning_rate": 1.9919030527224e-06, + "loss": 0.6188616752624512, + "step": 769 + }, + { + "epoch": 0.17754207977864883, + "grad_norm": 0.9573370107752551, + "learning_rate": 1.991854561453653e-06, + "loss": 0.6525505185127258, + "step": 770 + }, + { + "epoch": 0.1777726539082315, + "grad_norm": 0.8791752874309303, + "learning_rate": 1.9918059260087933e-06, + "loss": 0.6302521228790283, + "step": 771 + }, + { + "epoch": 0.17800322803781415, + "grad_norm": 0.7767159097983319, + "learning_rate": 1.9917571463948905e-06, + "loss": 0.48817628622055054, + "step": 772 + }, + { + "epoch": 0.17823380216739682, + "grad_norm": 0.9997756560425097, + "learning_rate": 1.9917082226190357e-06, + "loss": 0.7571396231651306, + "step": 773 + }, + { + "epoch": 0.17846437629697948, + "grad_norm": 0.9019653117383005, + "learning_rate": 1.99165915468834e-06, + "loss": 0.6416890025138855, + "step": 774 + }, + { + "epoch": 0.17869495042656214, + "grad_norm": 0.9030141776784474, + "learning_rate": 1.9916099426099357e-06, + "loss": 0.5668659210205078, + "step": 775 + }, + { + "epoch": 0.1789255245561448, + "grad_norm": 0.8616948701360102, + "learning_rate": 1.991560586390977e-06, + "loss": 0.5491495132446289, + "step": 776 + }, + { + "epoch": 0.17915609868572746, + "grad_norm": 0.8461739489170892, + "learning_rate": 1.991511086038637e-06, + "loss": 0.5596655607223511, + "step": 777 + }, + { + "epoch": 0.17938667281531012, + "grad_norm": 0.948797979696852, + "learning_rate": 1.991461441560113e-06, + "loss": 0.606618344783783, + "step": 778 + }, + { + "epoch": 0.17961724694489278, + "grad_norm": 0.8682290862864503, + "learning_rate": 1.9914116529626195e-06, + "loss": 0.6534444093704224, + "step": 779 + }, + { + "epoch": 0.17984782107447544, + "grad_norm": 0.7942772802909244, + "learning_rate": 1.9913617202533956e-06, + "loss": 0.6566994190216064, + "step": 780 + }, + { + "epoch": 0.1800783952040581, + "grad_norm": 0.8753236598884384, + "learning_rate": 1.9913116434396976e-06, + "loss": 0.6745898723602295, + "step": 781 + }, + { + "epoch": 0.18030896933364077, + "grad_norm": 0.8904483654623074, + "learning_rate": 1.991261422528806e-06, + "loss": 0.6260639429092407, + "step": 782 + }, + { + "epoch": 0.18053954346322343, + "grad_norm": 1.095081708934966, + "learning_rate": 1.9912110575280203e-06, + "loss": 0.6937930583953857, + "step": 783 + }, + { + "epoch": 0.1807701175928061, + "grad_norm": 0.7535766751550929, + "learning_rate": 1.991160548444662e-06, + "loss": 0.5220614671707153, + "step": 784 + }, + { + "epoch": 0.18100069172238875, + "grad_norm": 1.0171096783148863, + "learning_rate": 1.9911098952860725e-06, + "loss": 0.630463719367981, + "step": 785 + }, + { + "epoch": 0.1812312658519714, + "grad_norm": 0.9064677619585607, + "learning_rate": 1.9910590980596154e-06, + "loss": 0.5476818084716797, + "step": 786 + }, + { + "epoch": 0.18146183998155407, + "grad_norm": 0.8827497683061851, + "learning_rate": 1.9910081567726745e-06, + "loss": 0.619910478591919, + "step": 787 + }, + { + "epoch": 0.18169241411113674, + "grad_norm": 0.9583246792904453, + "learning_rate": 1.990957071432654e-06, + "loss": 0.759405255317688, + "step": 788 + }, + { + "epoch": 0.1819229882407194, + "grad_norm": 0.9249642030902185, + "learning_rate": 1.9909058420469808e-06, + "loss": 0.6093606948852539, + "step": 789 + }, + { + "epoch": 0.18215356237030206, + "grad_norm": 1.0777393301256872, + "learning_rate": 1.9908544686231e-06, + "loss": 0.5358198285102844, + "step": 790 + }, + { + "epoch": 0.18238413649988472, + "grad_norm": 0.8619190562873736, + "learning_rate": 1.9908029511684806e-06, + "loss": 0.577926754951477, + "step": 791 + }, + { + "epoch": 0.18261471062946738, + "grad_norm": 1.0298704295501269, + "learning_rate": 1.990751289690611e-06, + "loss": 0.6232448816299438, + "step": 792 + }, + { + "epoch": 0.18284528475905004, + "grad_norm": 0.9837349749201401, + "learning_rate": 1.9906994841970005e-06, + "loss": 0.5461868047714233, + "step": 793 + }, + { + "epoch": 0.1830758588886327, + "grad_norm": 0.9430576362377001, + "learning_rate": 1.9906475346951793e-06, + "loss": 0.6074671745300293, + "step": 794 + }, + { + "epoch": 0.18330643301821536, + "grad_norm": 0.9936839742941572, + "learning_rate": 1.990595441192699e-06, + "loss": 0.7101696729660034, + "step": 795 + }, + { + "epoch": 0.18353700714779803, + "grad_norm": 0.950260898814123, + "learning_rate": 1.9905432036971318e-06, + "loss": 0.6507722735404968, + "step": 796 + }, + { + "epoch": 0.1837675812773807, + "grad_norm": 0.8942288113166778, + "learning_rate": 1.9904908222160715e-06, + "loss": 0.6497524380683899, + "step": 797 + }, + { + "epoch": 0.18399815540696335, + "grad_norm": 0.9396678930556792, + "learning_rate": 1.9904382967571315e-06, + "loss": 0.6359415054321289, + "step": 798 + }, + { + "epoch": 0.184228729536546, + "grad_norm": 0.8070326036364724, + "learning_rate": 1.9903856273279475e-06, + "loss": 0.6062989234924316, + "step": 799 + }, + { + "epoch": 0.18445930366612867, + "grad_norm": 0.9626677000162343, + "learning_rate": 1.9903328139361753e-06, + "loss": 0.5872690677642822, + "step": 800 + }, + { + "epoch": 0.18468987779571133, + "grad_norm": 0.7985705265040473, + "learning_rate": 1.9902798565894917e-06, + "loss": 0.541993260383606, + "step": 801 + }, + { + "epoch": 0.184920451925294, + "grad_norm": 0.9775943406877085, + "learning_rate": 1.9902267552955948e-06, + "loss": 0.6509004235267639, + "step": 802 + }, + { + "epoch": 0.18515102605487666, + "grad_norm": 1.032367389635004, + "learning_rate": 1.9901735100622034e-06, + "loss": 0.6994458436965942, + "step": 803 + }, + { + "epoch": 0.18538160018445932, + "grad_norm": 0.723727027388961, + "learning_rate": 1.9901201208970574e-06, + "loss": 0.5426214933395386, + "step": 804 + }, + { + "epoch": 0.18561217431404198, + "grad_norm": 0.9494744349432898, + "learning_rate": 1.9900665878079172e-06, + "loss": 0.5889894366264343, + "step": 805 + }, + { + "epoch": 0.18584274844362464, + "grad_norm": 0.8565255265724333, + "learning_rate": 1.990012910802564e-06, + "loss": 0.6455902457237244, + "step": 806 + }, + { + "epoch": 0.18607332257320727, + "grad_norm": 0.8487813974117321, + "learning_rate": 1.989959089888801e-06, + "loss": 0.6336048245429993, + "step": 807 + }, + { + "epoch": 0.18630389670278993, + "grad_norm": 0.8414189962242138, + "learning_rate": 1.9899051250744517e-06, + "loss": 0.6091762781143188, + "step": 808 + }, + { + "epoch": 0.1865344708323726, + "grad_norm": 0.9439572961008054, + "learning_rate": 1.9898510163673594e-06, + "loss": 0.5551953315734863, + "step": 809 + }, + { + "epoch": 0.18676504496195526, + "grad_norm": 1.0494491780231465, + "learning_rate": 1.9897967637753907e-06, + "loss": 0.6441607475280762, + "step": 810 + }, + { + "epoch": 0.18699561909153792, + "grad_norm": 0.886313339848662, + "learning_rate": 1.989742367306431e-06, + "loss": 0.5766205787658691, + "step": 811 + }, + { + "epoch": 0.18722619322112058, + "grad_norm": 0.8129745295139125, + "learning_rate": 1.9896878269683872e-06, + "loss": 0.624677836894989, + "step": 812 + }, + { + "epoch": 0.18745676735070324, + "grad_norm": 1.0883386432883795, + "learning_rate": 1.9896331427691878e-06, + "loss": 0.5942056775093079, + "step": 813 + }, + { + "epoch": 0.1876873414802859, + "grad_norm": 0.9421668652395382, + "learning_rate": 1.989578314716781e-06, + "loss": 0.5194109082221985, + "step": 814 + }, + { + "epoch": 0.18791791560986856, + "grad_norm": 0.9041080200693152, + "learning_rate": 1.9895233428191375e-06, + "loss": 0.5851193070411682, + "step": 815 + }, + { + "epoch": 0.18814848973945122, + "grad_norm": 0.7963655717285544, + "learning_rate": 1.989468227084248e-06, + "loss": 0.5596088171005249, + "step": 816 + }, + { + "epoch": 0.18837906386903389, + "grad_norm": 0.9364254304069746, + "learning_rate": 1.989412967520123e-06, + "loss": 0.608109712600708, + "step": 817 + }, + { + "epoch": 0.18860963799861655, + "grad_norm": 0.8927696059217924, + "learning_rate": 1.9893575641347957e-06, + "loss": 0.6488924026489258, + "step": 818 + }, + { + "epoch": 0.1888402121281992, + "grad_norm": 0.9447086482881396, + "learning_rate": 1.9893020169363202e-06, + "loss": 0.6668595671653748, + "step": 819 + }, + { + "epoch": 0.18907078625778187, + "grad_norm": 0.9937318511996248, + "learning_rate": 1.9892463259327702e-06, + "loss": 0.6516261696815491, + "step": 820 + }, + { + "epoch": 0.18930136038736453, + "grad_norm": 1.0796549259081865, + "learning_rate": 1.9891904911322408e-06, + "loss": 0.5960654020309448, + "step": 821 + }, + { + "epoch": 0.1895319345169472, + "grad_norm": 0.7909478658460368, + "learning_rate": 1.989134512542848e-06, + "loss": 0.5836078524589539, + "step": 822 + }, + { + "epoch": 0.18976250864652985, + "grad_norm": 0.8238472267757905, + "learning_rate": 1.98907839017273e-06, + "loss": 0.6233468651771545, + "step": 823 + }, + { + "epoch": 0.18999308277611252, + "grad_norm": 0.9807541829716023, + "learning_rate": 1.989022124030043e-06, + "loss": 0.6228024363517761, + "step": 824 + }, + { + "epoch": 0.19022365690569518, + "grad_norm": 0.8131035743107407, + "learning_rate": 1.9889657141229674e-06, + "loss": 0.5549489259719849, + "step": 825 + }, + { + "epoch": 0.19045423103527784, + "grad_norm": 1.04900407843417, + "learning_rate": 1.988909160459703e-06, + "loss": 0.572743833065033, + "step": 826 + }, + { + "epoch": 0.1906848051648605, + "grad_norm": 0.9532449351501632, + "learning_rate": 1.988852463048469e-06, + "loss": 0.5483371019363403, + "step": 827 + }, + { + "epoch": 0.19091537929444316, + "grad_norm": 0.8589634934665029, + "learning_rate": 1.988795621897508e-06, + "loss": 0.6489086151123047, + "step": 828 + }, + { + "epoch": 0.19114595342402582, + "grad_norm": 0.8093738620503291, + "learning_rate": 1.9887386370150823e-06, + "loss": 0.5885359644889832, + "step": 829 + }, + { + "epoch": 0.19137652755360848, + "grad_norm": 1.1233507395706857, + "learning_rate": 1.988681508409475e-06, + "loss": 0.5725297927856445, + "step": 830 + }, + { + "epoch": 0.19160710168319114, + "grad_norm": 0.9186016287497916, + "learning_rate": 1.9886242360889907e-06, + "loss": 0.5165927410125732, + "step": 831 + }, + { + "epoch": 0.1918376758127738, + "grad_norm": 0.9873812028582082, + "learning_rate": 1.988566820061954e-06, + "loss": 0.4909062385559082, + "step": 832 + }, + { + "epoch": 0.19206824994235647, + "grad_norm": 0.8524339429885558, + "learning_rate": 1.988509260336711e-06, + "loss": 0.6611230373382568, + "step": 833 + }, + { + "epoch": 0.19229882407193913, + "grad_norm": 0.8054213393470881, + "learning_rate": 1.9884515569216296e-06, + "loss": 0.5702481269836426, + "step": 834 + }, + { + "epoch": 0.1925293982015218, + "grad_norm": 1.0204414620630202, + "learning_rate": 1.988393709825096e-06, + "loss": 0.5923126935958862, + "step": 835 + }, + { + "epoch": 0.19275997233110445, + "grad_norm": 0.9055032000924194, + "learning_rate": 1.98833571905552e-06, + "loss": 0.6054497957229614, + "step": 836 + }, + { + "epoch": 0.1929905464606871, + "grad_norm": 0.9248140875126212, + "learning_rate": 1.9882775846213305e-06, + "loss": 0.6688513159751892, + "step": 837 + }, + { + "epoch": 0.19322112059026977, + "grad_norm": 1.0273808455254545, + "learning_rate": 1.988219306530978e-06, + "loss": 0.5898394584655762, + "step": 838 + }, + { + "epoch": 0.19345169471985244, + "grad_norm": 0.9751112903331337, + "learning_rate": 1.9881608847929345e-06, + "loss": 0.575627326965332, + "step": 839 + }, + { + "epoch": 0.1936822688494351, + "grad_norm": 0.8673669914525766, + "learning_rate": 1.9881023194156913e-06, + "loss": 0.5392276048660278, + "step": 840 + }, + { + "epoch": 0.19391284297901776, + "grad_norm": 0.8706508008641746, + "learning_rate": 1.9880436104077624e-06, + "loss": 0.5464376211166382, + "step": 841 + }, + { + "epoch": 0.19414341710860042, + "grad_norm": 1.1088629334080236, + "learning_rate": 1.9879847577776804e-06, + "loss": 0.5483032464981079, + "step": 842 + }, + { + "epoch": 0.19437399123818308, + "grad_norm": 1.088158010228094, + "learning_rate": 1.9879257615340016e-06, + "loss": 0.583878219127655, + "step": 843 + }, + { + "epoch": 0.19460456536776574, + "grad_norm": 0.903659297701254, + "learning_rate": 1.9878666216853005e-06, + "loss": 0.5646623373031616, + "step": 844 + }, + { + "epoch": 0.1948351394973484, + "grad_norm": 0.8893037043091606, + "learning_rate": 1.9878073382401745e-06, + "loss": 0.4785343408584595, + "step": 845 + }, + { + "epoch": 0.19506571362693106, + "grad_norm": 0.8306997774077053, + "learning_rate": 1.987747911207241e-06, + "loss": 0.6247695684432983, + "step": 846 + }, + { + "epoch": 0.19529628775651373, + "grad_norm": 0.8871051444384922, + "learning_rate": 1.9876883405951377e-06, + "loss": 0.5686244368553162, + "step": 847 + }, + { + "epoch": 0.1955268618860964, + "grad_norm": 1.0693338597203925, + "learning_rate": 1.9876286264125242e-06, + "loss": 0.5887250900268555, + "step": 848 + }, + { + "epoch": 0.19575743601567905, + "grad_norm": 1.009687803574172, + "learning_rate": 1.9875687686680808e-06, + "loss": 0.6225967407226562, + "step": 849 + }, + { + "epoch": 0.1959880101452617, + "grad_norm": 0.8424215047754778, + "learning_rate": 1.987508767370508e-06, + "loss": 0.4695369601249695, + "step": 850 + }, + { + "epoch": 0.19621858427484437, + "grad_norm": 1.0270923710251258, + "learning_rate": 1.9874486225285276e-06, + "loss": 0.5248171091079712, + "step": 851 + }, + { + "epoch": 0.19644915840442703, + "grad_norm": 1.0947189066196994, + "learning_rate": 1.9873883341508825e-06, + "loss": 0.573886513710022, + "step": 852 + }, + { + "epoch": 0.1966797325340097, + "grad_norm": 0.980074050730982, + "learning_rate": 1.9873279022463365e-06, + "loss": 0.5309966802597046, + "step": 853 + }, + { + "epoch": 0.19691030666359235, + "grad_norm": 1.2273525906968545, + "learning_rate": 1.987267326823673e-06, + "loss": 0.7115850448608398, + "step": 854 + }, + { + "epoch": 0.19714088079317502, + "grad_norm": 1.65154587276706, + "learning_rate": 1.9872066078916984e-06, + "loss": 0.6970044374465942, + "step": 855 + }, + { + "epoch": 0.19737145492275768, + "grad_norm": 1.0520569639047552, + "learning_rate": 1.987145745459238e-06, + "loss": 0.5956458449363708, + "step": 856 + }, + { + "epoch": 0.19760202905234034, + "grad_norm": 0.8621512966256671, + "learning_rate": 1.9870847395351395e-06, + "loss": 0.6200698614120483, + "step": 857 + }, + { + "epoch": 0.197832603181923, + "grad_norm": 0.8987981187104104, + "learning_rate": 1.98702359012827e-06, + "loss": 0.6552712321281433, + "step": 858 + }, + { + "epoch": 0.19806317731150566, + "grad_norm": 0.8832934653512269, + "learning_rate": 1.986962297247519e-06, + "loss": 0.5995951294898987, + "step": 859 + }, + { + "epoch": 0.19829375144108832, + "grad_norm": 1.0415029103173328, + "learning_rate": 1.9869008609017946e-06, + "loss": 0.5903854966163635, + "step": 860 + }, + { + "epoch": 0.19852432557067098, + "grad_norm": 0.7946410320386238, + "learning_rate": 1.986839281100029e-06, + "loss": 0.49756956100463867, + "step": 861 + }, + { + "epoch": 0.19875489970025362, + "grad_norm": 0.8989937288923138, + "learning_rate": 1.986777557851172e-06, + "loss": 0.6726386547088623, + "step": 862 + }, + { + "epoch": 0.19898547382983628, + "grad_norm": 1.066877002121069, + "learning_rate": 1.9867156911641963e-06, + "loss": 0.5941756963729858, + "step": 863 + }, + { + "epoch": 0.19921604795941894, + "grad_norm": 1.1426428571577942, + "learning_rate": 1.986653681048095e-06, + "loss": 0.6148152351379395, + "step": 864 + }, + { + "epoch": 0.1994466220890016, + "grad_norm": 0.8574337846446602, + "learning_rate": 1.9865915275118815e-06, + "loss": 0.5484675765037537, + "step": 865 + }, + { + "epoch": 0.19967719621858426, + "grad_norm": 1.279305752369778, + "learning_rate": 1.986529230564591e-06, + "loss": 0.5835011601448059, + "step": 866 + }, + { + "epoch": 0.19990777034816692, + "grad_norm": 1.2828587747963143, + "learning_rate": 1.9864667902152785e-06, + "loss": 0.5505619049072266, + "step": 867 + }, + { + "epoch": 0.20013834447774959, + "grad_norm": 0.978792866059614, + "learning_rate": 1.986404206473021e-06, + "loss": 0.6170759797096252, + "step": 868 + }, + { + "epoch": 0.20036891860733225, + "grad_norm": 0.9063283607010307, + "learning_rate": 1.9863414793469144e-06, + "loss": 0.6302823424339294, + "step": 869 + }, + { + "epoch": 0.2005994927369149, + "grad_norm": 0.9919923586713045, + "learning_rate": 1.9862786088460778e-06, + "loss": 0.6265357732772827, + "step": 870 + }, + { + "epoch": 0.20083006686649757, + "grad_norm": 0.8288163853607481, + "learning_rate": 1.9862155949796497e-06, + "loss": 0.5346760749816895, + "step": 871 + }, + { + "epoch": 0.20106064099608023, + "grad_norm": 1.0613032711669241, + "learning_rate": 1.98615243775679e-06, + "loss": 0.5480276346206665, + "step": 872 + }, + { + "epoch": 0.2012912151256629, + "grad_norm": 1.0504212966242243, + "learning_rate": 1.986089137186679e-06, + "loss": 0.615007758140564, + "step": 873 + }, + { + "epoch": 0.20152178925524555, + "grad_norm": 1.0424303204478471, + "learning_rate": 1.986025693278518e-06, + "loss": 0.598671555519104, + "step": 874 + }, + { + "epoch": 0.20175236338482821, + "grad_norm": 1.1162570964298844, + "learning_rate": 1.98596210604153e-06, + "loss": 0.6029553413391113, + "step": 875 + }, + { + "epoch": 0.20198293751441088, + "grad_norm": 0.9723766835428509, + "learning_rate": 1.985898375484957e-06, + "loss": 0.6854428052902222, + "step": 876 + }, + { + "epoch": 0.20221351164399354, + "grad_norm": 0.7502030102171089, + "learning_rate": 1.9858345016180636e-06, + "loss": 0.5032496452331543, + "step": 877 + }, + { + "epoch": 0.2024440857735762, + "grad_norm": 0.910423493721141, + "learning_rate": 1.9857704844501343e-06, + "loss": 0.5521007776260376, + "step": 878 + }, + { + "epoch": 0.20267465990315886, + "grad_norm": 0.9861926154372014, + "learning_rate": 1.9857063239904742e-06, + "loss": 0.6473567485809326, + "step": 879 + }, + { + "epoch": 0.20290523403274152, + "grad_norm": 0.9973567674127126, + "learning_rate": 1.9856420202484103e-06, + "loss": 0.528810977935791, + "step": 880 + }, + { + "epoch": 0.20313580816232418, + "grad_norm": 1.0663389238750165, + "learning_rate": 1.9855775732332898e-06, + "loss": 0.681857705116272, + "step": 881 + }, + { + "epoch": 0.20336638229190684, + "grad_norm": 0.9199566615284357, + "learning_rate": 1.9855129829544805e-06, + "loss": 0.6510526537895203, + "step": 882 + }, + { + "epoch": 0.2035969564214895, + "grad_norm": 1.0847608945381821, + "learning_rate": 1.985448249421371e-06, + "loss": 0.5690885782241821, + "step": 883 + }, + { + "epoch": 0.20382753055107217, + "grad_norm": 0.9067033263808438, + "learning_rate": 1.985383372643371e-06, + "loss": 0.6451331973075867, + "step": 884 + }, + { + "epoch": 0.20405810468065483, + "grad_norm": 0.7596187493834748, + "learning_rate": 1.9853183526299117e-06, + "loss": 0.493961900472641, + "step": 885 + }, + { + "epoch": 0.2042886788102375, + "grad_norm": 1.031307930072274, + "learning_rate": 1.9852531893904434e-06, + "loss": 0.5390207767486572, + "step": 886 + }, + { + "epoch": 0.20451925293982015, + "grad_norm": 0.9671201783822709, + "learning_rate": 1.9851878829344395e-06, + "loss": 0.5976558923721313, + "step": 887 + }, + { + "epoch": 0.2047498270694028, + "grad_norm": 0.9832697265495778, + "learning_rate": 1.9851224332713917e-06, + "loss": 0.539776623249054, + "step": 888 + }, + { + "epoch": 0.20498040119898547, + "grad_norm": 1.1606849770347532, + "learning_rate": 1.9850568404108144e-06, + "loss": 0.6791383624076843, + "step": 889 + }, + { + "epoch": 0.20521097532856813, + "grad_norm": 1.1599404347752247, + "learning_rate": 1.984991104362242e-06, + "loss": 0.6195741891860962, + "step": 890 + }, + { + "epoch": 0.2054415494581508, + "grad_norm": 1.0295013801913249, + "learning_rate": 1.9849252251352303e-06, + "loss": 0.5792666673660278, + "step": 891 + }, + { + "epoch": 0.20567212358773346, + "grad_norm": 0.7871401361859056, + "learning_rate": 1.984859202739355e-06, + "loss": 0.5633316040039062, + "step": 892 + }, + { + "epoch": 0.20590269771731612, + "grad_norm": 0.9078754261167402, + "learning_rate": 1.9847930371842137e-06, + "loss": 0.6152814626693726, + "step": 893 + }, + { + "epoch": 0.20613327184689878, + "grad_norm": 1.0024181714804654, + "learning_rate": 1.9847267284794234e-06, + "loss": 0.5584526658058167, + "step": 894 + }, + { + "epoch": 0.20636384597648144, + "grad_norm": 0.9442571191896375, + "learning_rate": 1.9846602766346235e-06, + "loss": 0.5526787042617798, + "step": 895 + }, + { + "epoch": 0.2065944201060641, + "grad_norm": 1.114741515810547, + "learning_rate": 1.984593681659473e-06, + "loss": 0.6851564049720764, + "step": 896 + }, + { + "epoch": 0.20682499423564676, + "grad_norm": 0.9529867069899208, + "learning_rate": 1.9845269435636524e-06, + "loss": 0.6012386083602905, + "step": 897 + }, + { + "epoch": 0.20705556836522943, + "grad_norm": 0.9587418141612076, + "learning_rate": 1.9844600623568626e-06, + "loss": 0.5515716075897217, + "step": 898 + }, + { + "epoch": 0.2072861424948121, + "grad_norm": 1.0489716310270325, + "learning_rate": 1.9843930380488255e-06, + "loss": 0.6534323692321777, + "step": 899 + }, + { + "epoch": 0.20751671662439475, + "grad_norm": 0.9795829214559992, + "learning_rate": 1.9843258706492836e-06, + "loss": 0.726966381072998, + "step": 900 + }, + { + "epoch": 0.2077472907539774, + "grad_norm": 1.0154014646465384, + "learning_rate": 1.984258560168001e-06, + "loss": 0.6692399978637695, + "step": 901 + }, + { + "epoch": 0.20797786488356007, + "grad_norm": 0.8361205321250001, + "learning_rate": 1.9841911066147614e-06, + "loss": 0.5815941095352173, + "step": 902 + }, + { + "epoch": 0.20820843901314273, + "grad_norm": 0.8093430372283338, + "learning_rate": 1.98412350999937e-06, + "loss": 0.4850257933139801, + "step": 903 + }, + { + "epoch": 0.2084390131427254, + "grad_norm": 0.9321751727050823, + "learning_rate": 1.9840557703316524e-06, + "loss": 0.7309345006942749, + "step": 904 + }, + { + "epoch": 0.20866958727230805, + "grad_norm": 0.9487721653557605, + "learning_rate": 1.9839878876214556e-06, + "loss": 0.6246342658996582, + "step": 905 + }, + { + "epoch": 0.20890016140189072, + "grad_norm": 0.923401773715514, + "learning_rate": 1.983919861878647e-06, + "loss": 0.503870964050293, + "step": 906 + }, + { + "epoch": 0.20913073553147338, + "grad_norm": 0.9277576649885639, + "learning_rate": 1.9838516931131147e-06, + "loss": 0.5316766500473022, + "step": 907 + }, + { + "epoch": 0.20936130966105604, + "grad_norm": 0.9488124820166146, + "learning_rate": 1.983783381334768e-06, + "loss": 0.5707069039344788, + "step": 908 + }, + { + "epoch": 0.2095918837906387, + "grad_norm": 1.1481758251998657, + "learning_rate": 1.983714926553536e-06, + "loss": 0.5482156276702881, + "step": 909 + }, + { + "epoch": 0.20982245792022136, + "grad_norm": 0.8868748652499737, + "learning_rate": 1.98364632877937e-06, + "loss": 0.45747748017311096, + "step": 910 + }, + { + "epoch": 0.21005303204980402, + "grad_norm": 1.070435205795932, + "learning_rate": 1.9835775880222414e-06, + "loss": 0.5599262118339539, + "step": 911 + }, + { + "epoch": 0.21028360617938668, + "grad_norm": 0.8833178195747919, + "learning_rate": 1.9835087042921416e-06, + "loss": 0.5115377902984619, + "step": 912 + }, + { + "epoch": 0.21051418030896935, + "grad_norm": 1.0026720443060566, + "learning_rate": 1.9834396775990846e-06, + "loss": 0.6577836275100708, + "step": 913 + }, + { + "epoch": 0.210744754438552, + "grad_norm": 1.0996458728397183, + "learning_rate": 1.9833705079531033e-06, + "loss": 0.4979211091995239, + "step": 914 + }, + { + "epoch": 0.21097532856813467, + "grad_norm": 0.9038590231228891, + "learning_rate": 1.983301195364252e-06, + "loss": 0.5052670240402222, + "step": 915 + }, + { + "epoch": 0.21120590269771733, + "grad_norm": 0.9375736925419242, + "learning_rate": 1.9832317398426076e-06, + "loss": 0.5480808019638062, + "step": 916 + }, + { + "epoch": 0.2114364768273, + "grad_norm": 1.1234174619828885, + "learning_rate": 1.983162141398264e-06, + "loss": 0.5328841209411621, + "step": 917 + }, + { + "epoch": 0.21166705095688262, + "grad_norm": 1.0661654042909894, + "learning_rate": 1.98309240004134e-06, + "loss": 0.5572643280029297, + "step": 918 + }, + { + "epoch": 0.21189762508646529, + "grad_norm": 0.7370595537346776, + "learning_rate": 1.983022515781972e-06, + "loss": 0.5180699825286865, + "step": 919 + }, + { + "epoch": 0.21212819921604795, + "grad_norm": 0.9467461169752135, + "learning_rate": 1.9829524886303182e-06, + "loss": 0.5031566619873047, + "step": 920 + }, + { + "epoch": 0.2123587733456306, + "grad_norm": 1.0924744776428812, + "learning_rate": 1.9828823185965587e-06, + "loss": 0.6579925417900085, + "step": 921 + }, + { + "epoch": 0.21258934747521327, + "grad_norm": 1.0635734753276387, + "learning_rate": 1.982812005690893e-06, + "loss": 0.6107230186462402, + "step": 922 + }, + { + "epoch": 0.21281992160479593, + "grad_norm": 0.8209241554677639, + "learning_rate": 1.982741549923542e-06, + "loss": 0.5244725942611694, + "step": 923 + }, + { + "epoch": 0.2130504957343786, + "grad_norm": 0.8970249012108504, + "learning_rate": 1.9826709513047466e-06, + "loss": 0.5857048630714417, + "step": 924 + }, + { + "epoch": 0.21328106986396125, + "grad_norm": 1.1702999413512643, + "learning_rate": 1.9826002098447694e-06, + "loss": 0.6417914628982544, + "step": 925 + }, + { + "epoch": 0.21351164399354391, + "grad_norm": 1.025740647317304, + "learning_rate": 1.982529325553893e-06, + "loss": 0.6062248945236206, + "step": 926 + }, + { + "epoch": 0.21374221812312658, + "grad_norm": 0.8397411976395659, + "learning_rate": 1.982458298442422e-06, + "loss": 0.4870455265045166, + "step": 927 + }, + { + "epoch": 0.21397279225270924, + "grad_norm": 0.8931294029793581, + "learning_rate": 1.9823871285206802e-06, + "loss": 0.6552037000656128, + "step": 928 + }, + { + "epoch": 0.2142033663822919, + "grad_norm": 0.9703019761386622, + "learning_rate": 1.9823158157990133e-06, + "loss": 0.531679093837738, + "step": 929 + }, + { + "epoch": 0.21443394051187456, + "grad_norm": 1.2664544243150397, + "learning_rate": 1.982244360287787e-06, + "loss": 0.516847550868988, + "step": 930 + }, + { + "epoch": 0.21466451464145722, + "grad_norm": 0.810392988957607, + "learning_rate": 1.982172761997388e-06, + "loss": 0.47147709131240845, + "step": 931 + }, + { + "epoch": 0.21489508877103988, + "grad_norm": 0.8771741979565738, + "learning_rate": 1.982101020938224e-06, + "loss": 0.627938985824585, + "step": 932 + }, + { + "epoch": 0.21512566290062254, + "grad_norm": 1.0257080856710215, + "learning_rate": 1.9820291371207233e-06, + "loss": 0.639348030090332, + "step": 933 + }, + { + "epoch": 0.2153562370302052, + "grad_norm": 0.9702705556217962, + "learning_rate": 1.9819571105553354e-06, + "loss": 0.6480363607406616, + "step": 934 + }, + { + "epoch": 0.21558681115978787, + "grad_norm": 0.9260617050921398, + "learning_rate": 1.9818849412525293e-06, + "loss": 0.5776711702346802, + "step": 935 + }, + { + "epoch": 0.21581738528937053, + "grad_norm": 0.9042487017557694, + "learning_rate": 1.9818126292227957e-06, + "loss": 0.5891472101211548, + "step": 936 + }, + { + "epoch": 0.2160479594189532, + "grad_norm": 0.8905401941241984, + "learning_rate": 1.9817401744766465e-06, + "loss": 0.5977755784988403, + "step": 937 + }, + { + "epoch": 0.21627853354853585, + "grad_norm": 0.8626457448308078, + "learning_rate": 1.981667577024613e-06, + "loss": 0.5263733863830566, + "step": 938 + }, + { + "epoch": 0.2165091076781185, + "grad_norm": 1.0627291912482457, + "learning_rate": 1.9815948368772484e-06, + "loss": 0.5440605878829956, + "step": 939 + }, + { + "epoch": 0.21673968180770117, + "grad_norm": 0.9629159186929203, + "learning_rate": 1.9815219540451263e-06, + "loss": 0.5140440464019775, + "step": 940 + }, + { + "epoch": 0.21697025593728383, + "grad_norm": 1.0494365886675714, + "learning_rate": 1.9814489285388402e-06, + "loss": 0.6741353273391724, + "step": 941 + }, + { + "epoch": 0.2172008300668665, + "grad_norm": 1.1329427006993176, + "learning_rate": 1.981375760369006e-06, + "loss": 0.6243258714675903, + "step": 942 + }, + { + "epoch": 0.21743140419644916, + "grad_norm": 1.1054961559311265, + "learning_rate": 1.981302449546259e-06, + "loss": 0.6363699436187744, + "step": 943 + }, + { + "epoch": 0.21766197832603182, + "grad_norm": 0.9214231813217233, + "learning_rate": 1.981228996081256e-06, + "loss": 0.5849490165710449, + "step": 944 + }, + { + "epoch": 0.21789255245561448, + "grad_norm": 0.8824229032075002, + "learning_rate": 1.9811553999846736e-06, + "loss": 0.43679118156433105, + "step": 945 + }, + { + "epoch": 0.21812312658519714, + "grad_norm": 0.8524209104471582, + "learning_rate": 1.9810816612672104e-06, + "loss": 0.5575870275497437, + "step": 946 + }, + { + "epoch": 0.2183537007147798, + "grad_norm": 1.2313981009960802, + "learning_rate": 1.9810077799395846e-06, + "loss": 0.5288122296333313, + "step": 947 + }, + { + "epoch": 0.21858427484436246, + "grad_norm": 0.9413824588491826, + "learning_rate": 1.9809337560125357e-06, + "loss": 0.5618559718132019, + "step": 948 + }, + { + "epoch": 0.21881484897394513, + "grad_norm": 0.900237395227137, + "learning_rate": 1.980859589496824e-06, + "loss": 0.6346654891967773, + "step": 949 + }, + { + "epoch": 0.2190454231035278, + "grad_norm": 0.7859619018047411, + "learning_rate": 1.98078528040323e-06, + "loss": 0.5456810593605042, + "step": 950 + }, + { + "epoch": 0.21927599723311045, + "grad_norm": 1.096845447650345, + "learning_rate": 1.980710828742556e-06, + "loss": 0.6463650465011597, + "step": 951 + }, + { + "epoch": 0.2195065713626931, + "grad_norm": 0.8708852946707265, + "learning_rate": 1.980636234525624e-06, + "loss": 0.5013638734817505, + "step": 952 + }, + { + "epoch": 0.21973714549227577, + "grad_norm": 1.0813749561311563, + "learning_rate": 1.9805614977632763e-06, + "loss": 0.6522110104560852, + "step": 953 + }, + { + "epoch": 0.21996771962185843, + "grad_norm": 1.1282712003155921, + "learning_rate": 1.9804866184663775e-06, + "loss": 0.5864803791046143, + "step": 954 + }, + { + "epoch": 0.2201982937514411, + "grad_norm": 1.0131587624930238, + "learning_rate": 1.9804115966458116e-06, + "loss": 0.5261500477790833, + "step": 955 + }, + { + "epoch": 0.22042886788102375, + "grad_norm": 0.9727651996633074, + "learning_rate": 1.980336432312484e-06, + "loss": 0.585462212562561, + "step": 956 + }, + { + "epoch": 0.22065944201060642, + "grad_norm": 0.913173290527313, + "learning_rate": 1.9802611254773207e-06, + "loss": 0.5889539122581482, + "step": 957 + }, + { + "epoch": 0.22089001614018908, + "grad_norm": 0.9844451118331555, + "learning_rate": 1.980185676151268e-06, + "loss": 0.665162205696106, + "step": 958 + }, + { + "epoch": 0.22112059026977174, + "grad_norm": 0.9378356304402508, + "learning_rate": 1.9801100843452935e-06, + "loss": 0.5344980359077454, + "step": 959 + }, + { + "epoch": 0.2213511643993544, + "grad_norm": 0.9210142542004092, + "learning_rate": 1.980034350070385e-06, + "loss": 0.6301499009132385, + "step": 960 + }, + { + "epoch": 0.22158173852893706, + "grad_norm": 1.0404902143094334, + "learning_rate": 1.9799584733375512e-06, + "loss": 0.5114584565162659, + "step": 961 + }, + { + "epoch": 0.22181231265851972, + "grad_norm": 1.0168872016124533, + "learning_rate": 1.979882454157822e-06, + "loss": 0.5199861526489258, + "step": 962 + }, + { + "epoch": 0.22204288678810238, + "grad_norm": 1.1826380086118446, + "learning_rate": 1.9798062925422472e-06, + "loss": 0.5336212515830994, + "step": 963 + }, + { + "epoch": 0.22227346091768505, + "grad_norm": 1.0189277044162137, + "learning_rate": 1.9797299885018977e-06, + "loss": 0.535847544670105, + "step": 964 + }, + { + "epoch": 0.2225040350472677, + "grad_norm": 1.1943664941065335, + "learning_rate": 1.979653542047865e-06, + "loss": 0.6234130859375, + "step": 965 + }, + { + "epoch": 0.22273460917685037, + "grad_norm": 0.9414245062598806, + "learning_rate": 1.979576953191262e-06, + "loss": 0.5017205476760864, + "step": 966 + }, + { + "epoch": 0.22296518330643303, + "grad_norm": 0.8271602877368085, + "learning_rate": 1.9795002219432204e-06, + "loss": 0.4982973337173462, + "step": 967 + }, + { + "epoch": 0.2231957574360157, + "grad_norm": 1.0821521338057418, + "learning_rate": 1.979423348314895e-06, + "loss": 0.47946417331695557, + "step": 968 + }, + { + "epoch": 0.22342633156559835, + "grad_norm": 0.9333636639659694, + "learning_rate": 1.97934633231746e-06, + "loss": 0.5431856513023376, + "step": 969 + }, + { + "epoch": 0.223656905695181, + "grad_norm": 1.010615347342822, + "learning_rate": 1.9792691739621097e-06, + "loss": 0.5355685949325562, + "step": 970 + }, + { + "epoch": 0.22388747982476367, + "grad_norm": 0.9115391310212676, + "learning_rate": 1.979191873260061e-06, + "loss": 0.6103906631469727, + "step": 971 + }, + { + "epoch": 0.22411805395434634, + "grad_norm": 0.9295016548118124, + "learning_rate": 1.9791144302225493e-06, + "loss": 0.538421094417572, + "step": 972 + }, + { + "epoch": 0.224348628083929, + "grad_norm": 1.2200934433979187, + "learning_rate": 1.9790368448608322e-06, + "loss": 0.6068445444107056, + "step": 973 + }, + { + "epoch": 0.22457920221351163, + "grad_norm": 0.8606144159525476, + "learning_rate": 1.9789591171861874e-06, + "loss": 0.463737815618515, + "step": 974 + }, + { + "epoch": 0.2248097763430943, + "grad_norm": 1.0217946560153375, + "learning_rate": 1.9788812472099135e-06, + "loss": 0.6588588953018188, + "step": 975 + }, + { + "epoch": 0.22504035047267695, + "grad_norm": 1.0288343828209117, + "learning_rate": 1.9788032349433297e-06, + "loss": 0.678712010383606, + "step": 976 + }, + { + "epoch": 0.22527092460225961, + "grad_norm": 1.1695805252394589, + "learning_rate": 1.9787250803977757e-06, + "loss": 0.6397948265075684, + "step": 977 + }, + { + "epoch": 0.22550149873184228, + "grad_norm": 1.029054993282064, + "learning_rate": 1.978646783584612e-06, + "loss": 0.5422782897949219, + "step": 978 + }, + { + "epoch": 0.22573207286142494, + "grad_norm": 0.9969509169785887, + "learning_rate": 1.9785683445152204e-06, + "loss": 0.5314444303512573, + "step": 979 + }, + { + "epoch": 0.2259626469910076, + "grad_norm": 1.0816366548169771, + "learning_rate": 1.9784897632010026e-06, + "loss": 0.6260710954666138, + "step": 980 + }, + { + "epoch": 0.22619322112059026, + "grad_norm": 1.6140506138107567, + "learning_rate": 1.9784110396533804e-06, + "loss": 0.6765384078025818, + "step": 981 + }, + { + "epoch": 0.22642379525017292, + "grad_norm": 0.9741870993027198, + "learning_rate": 1.9783321738837983e-06, + "loss": 0.6716702580451965, + "step": 982 + }, + { + "epoch": 0.22665436937975558, + "grad_norm": 0.9800524570597025, + "learning_rate": 1.978253165903719e-06, + "loss": 0.5537375211715698, + "step": 983 + }, + { + "epoch": 0.22688494350933824, + "grad_norm": 1.2650751897909203, + "learning_rate": 1.9781740157246285e-06, + "loss": 0.525878369808197, + "step": 984 + }, + { + "epoch": 0.2271155176389209, + "grad_norm": 1.1285639712327624, + "learning_rate": 1.978094723358031e-06, + "loss": 0.6349027156829834, + "step": 985 + }, + { + "epoch": 0.22734609176850357, + "grad_norm": 0.9922350297605812, + "learning_rate": 1.9780152888154525e-06, + "loss": 0.5777440071105957, + "step": 986 + }, + { + "epoch": 0.22757666589808623, + "grad_norm": 0.8792919247604332, + "learning_rate": 1.9779357121084402e-06, + "loss": 0.6181483268737793, + "step": 987 + }, + { + "epoch": 0.2278072400276689, + "grad_norm": 1.113677830579263, + "learning_rate": 1.9778559932485606e-06, + "loss": 0.6364198923110962, + "step": 988 + }, + { + "epoch": 0.22803781415725155, + "grad_norm": 1.0528039871957056, + "learning_rate": 1.9777761322474024e-06, + "loss": 0.623460054397583, + "step": 989 + }, + { + "epoch": 0.2282683882868342, + "grad_norm": 1.0042426162492055, + "learning_rate": 1.977696129116574e-06, + "loss": 0.504749059677124, + "step": 990 + }, + { + "epoch": 0.22849896241641687, + "grad_norm": 0.9462650071116105, + "learning_rate": 1.9776159838677048e-06, + "loss": 0.5228890180587769, + "step": 991 + }, + { + "epoch": 0.22872953654599953, + "grad_norm": 0.983638268661895, + "learning_rate": 1.977535696512444e-06, + "loss": 0.5765929222106934, + "step": 992 + }, + { + "epoch": 0.2289601106755822, + "grad_norm": 1.0000819039461677, + "learning_rate": 1.977455267062463e-06, + "loss": 0.5165348052978516, + "step": 993 + }, + { + "epoch": 0.22919068480516486, + "grad_norm": 1.0528189784184039, + "learning_rate": 1.9773746955294525e-06, + "loss": 0.6056735515594482, + "step": 994 + }, + { + "epoch": 0.22942125893474752, + "grad_norm": 1.0625954437167437, + "learning_rate": 1.9772939819251245e-06, + "loss": 0.5430403351783752, + "step": 995 + }, + { + "epoch": 0.22965183306433018, + "grad_norm": 1.2611536344776966, + "learning_rate": 1.977213126261212e-06, + "loss": 0.5710945129394531, + "step": 996 + }, + { + "epoch": 0.22988240719391284, + "grad_norm": 0.9590894945496666, + "learning_rate": 1.977132128549468e-06, + "loss": 0.5189366936683655, + "step": 997 + }, + { + "epoch": 0.2301129813234955, + "grad_norm": 1.229825794085491, + "learning_rate": 1.977050988801666e-06, + "loss": 0.6578037738800049, + "step": 998 + }, + { + "epoch": 0.23034355545307816, + "grad_norm": 1.0761110723698188, + "learning_rate": 1.9769697070296006e-06, + "loss": 0.5787034034729004, + "step": 999 + }, + { + "epoch": 0.23057412958266083, + "grad_norm": 1.0414208441736372, + "learning_rate": 1.976888283245087e-06, + "loss": 0.5169408321380615, + "step": 1000 + }, + { + "epoch": 0.2308047037122435, + "grad_norm": 1.1228864795023747, + "learning_rate": 1.976806717459961e-06, + "loss": 0.6326704025268555, + "step": 1001 + }, + { + "epoch": 0.23103527784182615, + "grad_norm": 1.2998118201322668, + "learning_rate": 1.9767250096860785e-06, + "loss": 0.5188414454460144, + "step": 1002 + }, + { + "epoch": 0.2312658519714088, + "grad_norm": 0.9684429634366722, + "learning_rate": 1.9766431599353173e-06, + "loss": 0.5788798928260803, + "step": 1003 + }, + { + "epoch": 0.23149642610099147, + "grad_norm": 1.011079377555661, + "learning_rate": 1.976561168219575e-06, + "loss": 0.5513355731964111, + "step": 1004 + }, + { + "epoch": 0.23172700023057413, + "grad_norm": 0.9242770139183195, + "learning_rate": 1.97647903455077e-06, + "loss": 0.5810542106628418, + "step": 1005 + }, + { + "epoch": 0.2319575743601568, + "grad_norm": 0.9036081245550505, + "learning_rate": 1.9763967589408407e-06, + "loss": 0.6541746854782104, + "step": 1006 + }, + { + "epoch": 0.23218814848973945, + "grad_norm": 0.972339176589073, + "learning_rate": 1.976314341401747e-06, + "loss": 0.48837774991989136, + "step": 1007 + }, + { + "epoch": 0.23241872261932212, + "grad_norm": 1.0622732331560878, + "learning_rate": 1.976231781945469e-06, + "loss": 0.514664888381958, + "step": 1008 + }, + { + "epoch": 0.23264929674890478, + "grad_norm": 1.1476741578183667, + "learning_rate": 1.976149080584008e-06, + "loss": 0.48295027017593384, + "step": 1009 + }, + { + "epoch": 0.23287987087848744, + "grad_norm": 0.9532553897028984, + "learning_rate": 1.9760662373293847e-06, + "loss": 0.5975791811943054, + "step": 1010 + }, + { + "epoch": 0.2331104450080701, + "grad_norm": 1.0101722687438028, + "learning_rate": 1.9759832521936424e-06, + "loss": 0.4810718297958374, + "step": 1011 + }, + { + "epoch": 0.23334101913765276, + "grad_norm": 0.8377461102160731, + "learning_rate": 1.9759001251888425e-06, + "loss": 0.5984642505645752, + "step": 1012 + }, + { + "epoch": 0.23357159326723542, + "grad_norm": 1.1428510363276687, + "learning_rate": 1.975816856327069e-06, + "loss": 0.600128710269928, + "step": 1013 + }, + { + "epoch": 0.23380216739681808, + "grad_norm": 0.976646115631477, + "learning_rate": 1.9757334456204263e-06, + "loss": 0.5036175847053528, + "step": 1014 + }, + { + "epoch": 0.23403274152640074, + "grad_norm": 0.781296299293608, + "learning_rate": 1.975649893081038e-06, + "loss": 0.49270063638687134, + "step": 1015 + }, + { + "epoch": 0.2342633156559834, + "grad_norm": 1.0782515218974933, + "learning_rate": 1.97556619872105e-06, + "loss": 0.5337218642234802, + "step": 1016 + }, + { + "epoch": 0.23449388978556607, + "grad_norm": 1.279305397178248, + "learning_rate": 1.9754823625526277e-06, + "loss": 0.5263136625289917, + "step": 1017 + }, + { + "epoch": 0.23472446391514873, + "grad_norm": 1.1321753640293293, + "learning_rate": 1.975398384587958e-06, + "loss": 0.6271284818649292, + "step": 1018 + }, + { + "epoch": 0.2349550380447314, + "grad_norm": 0.9524936816808555, + "learning_rate": 1.975314264839248e-06, + "loss": 0.7009197473526001, + "step": 1019 + }, + { + "epoch": 0.23518561217431405, + "grad_norm": 1.0291281498015452, + "learning_rate": 1.9752300033187248e-06, + "loss": 0.5781605839729309, + "step": 1020 + }, + { + "epoch": 0.2354161863038967, + "grad_norm": 1.0439195983844425, + "learning_rate": 1.9751456000386367e-06, + "loss": 0.549934446811676, + "step": 1021 + }, + { + "epoch": 0.23564676043347937, + "grad_norm": 1.1313488046553661, + "learning_rate": 1.9750610550112535e-06, + "loss": 0.5856816172599792, + "step": 1022 + }, + { + "epoch": 0.23587733456306204, + "grad_norm": 1.1355877980298148, + "learning_rate": 1.9749763682488638e-06, + "loss": 0.6225322484970093, + "step": 1023 + }, + { + "epoch": 0.2361079086926447, + "grad_norm": 0.8829653489765357, + "learning_rate": 1.9748915397637775e-06, + "loss": 0.5533155202865601, + "step": 1024 + }, + { + "epoch": 0.23633848282222736, + "grad_norm": 0.9964032830251005, + "learning_rate": 1.974806569568326e-06, + "loss": 0.4960908889770508, + "step": 1025 + }, + { + "epoch": 0.23656905695181002, + "grad_norm": 1.0642112431572752, + "learning_rate": 1.97472145767486e-06, + "loss": 0.5960450768470764, + "step": 1026 + }, + { + "epoch": 0.23679963108139268, + "grad_norm": 1.0609331852795814, + "learning_rate": 1.9746362040957517e-06, + "loss": 0.5653714537620544, + "step": 1027 + }, + { + "epoch": 0.23703020521097534, + "grad_norm": 0.9636699324332547, + "learning_rate": 1.9745508088433936e-06, + "loss": 0.6400578022003174, + "step": 1028 + }, + { + "epoch": 0.23726077934055798, + "grad_norm": 1.0105210896498236, + "learning_rate": 1.9744652719301987e-06, + "loss": 0.5459057092666626, + "step": 1029 + }, + { + "epoch": 0.23749135347014064, + "grad_norm": 1.0859828591491134, + "learning_rate": 1.9743795933686005e-06, + "loss": 0.46735280752182007, + "step": 1030 + }, + { + "epoch": 0.2377219275997233, + "grad_norm": 0.9440768334185448, + "learning_rate": 1.9742937731710533e-06, + "loss": 0.526339590549469, + "step": 1031 + }, + { + "epoch": 0.23795250172930596, + "grad_norm": 1.013077702945683, + "learning_rate": 1.9742078113500323e-06, + "loss": 0.5976641178131104, + "step": 1032 + }, + { + "epoch": 0.23818307585888862, + "grad_norm": 0.9655038700233691, + "learning_rate": 1.9741217079180325e-06, + "loss": 0.5331728458404541, + "step": 1033 + }, + { + "epoch": 0.23841364998847128, + "grad_norm": 0.9368079955738086, + "learning_rate": 1.9740354628875696e-06, + "loss": 0.5743261575698853, + "step": 1034 + }, + { + "epoch": 0.23864422411805394, + "grad_norm": 0.9982653104570526, + "learning_rate": 1.973949076271181e-06, + "loss": 0.54700767993927, + "step": 1035 + }, + { + "epoch": 0.2388747982476366, + "grad_norm": 0.8919318869448586, + "learning_rate": 1.9738625480814235e-06, + "loss": 0.5483411550521851, + "step": 1036 + }, + { + "epoch": 0.23910537237721927, + "grad_norm": 0.9314153856468148, + "learning_rate": 1.973775878330875e-06, + "loss": 0.5677193403244019, + "step": 1037 + }, + { + "epoch": 0.23933594650680193, + "grad_norm": 0.9867371078797748, + "learning_rate": 1.973689067032133e-06, + "loss": 0.5092767477035522, + "step": 1038 + }, + { + "epoch": 0.2395665206363846, + "grad_norm": 0.9526587430164372, + "learning_rate": 1.973602114197818e-06, + "loss": 0.5618614554405212, + "step": 1039 + }, + { + "epoch": 0.23979709476596725, + "grad_norm": 1.1304270434054837, + "learning_rate": 1.9735150198405677e-06, + "loss": 0.5601966977119446, + "step": 1040 + }, + { + "epoch": 0.2400276688955499, + "grad_norm": 1.2376653334727166, + "learning_rate": 1.973427783973043e-06, + "loss": 0.5945397019386292, + "step": 1041 + }, + { + "epoch": 0.24025824302513257, + "grad_norm": 1.084452486357135, + "learning_rate": 1.9733404066079253e-06, + "loss": 0.42448002099990845, + "step": 1042 + }, + { + "epoch": 0.24048881715471523, + "grad_norm": 1.0671556472806993, + "learning_rate": 1.9732528877579146e-06, + "loss": 0.5237313508987427, + "step": 1043 + }, + { + "epoch": 0.2407193912842979, + "grad_norm": 1.085642930506958, + "learning_rate": 1.973165227435733e-06, + "loss": 0.6006743907928467, + "step": 1044 + }, + { + "epoch": 0.24094996541388056, + "grad_norm": 0.9267133414742948, + "learning_rate": 1.973077425654123e-06, + "loss": 0.547584056854248, + "step": 1045 + }, + { + "epoch": 0.24118053954346322, + "grad_norm": 1.0824218376223906, + "learning_rate": 1.972989482425847e-06, + "loss": 0.5472346544265747, + "step": 1046 + }, + { + "epoch": 0.24141111367304588, + "grad_norm": 1.1106806941355478, + "learning_rate": 1.972901397763689e-06, + "loss": 0.5962260365486145, + "step": 1047 + }, + { + "epoch": 0.24164168780262854, + "grad_norm": 0.9770536598072448, + "learning_rate": 1.9728131716804525e-06, + "loss": 0.561386227607727, + "step": 1048 + }, + { + "epoch": 0.2418722619322112, + "grad_norm": 1.2169602038706573, + "learning_rate": 1.9727248041889624e-06, + "loss": 0.46618524193763733, + "step": 1049 + }, + { + "epoch": 0.24210283606179386, + "grad_norm": 0.9641011081185654, + "learning_rate": 1.9726362953020643e-06, + "loss": 0.4684019088745117, + "step": 1050 + }, + { + "epoch": 0.24233341019137652, + "grad_norm": 1.1116892767931694, + "learning_rate": 1.9725476450326227e-06, + "loss": 0.5670303106307983, + "step": 1051 + }, + { + "epoch": 0.2425639843209592, + "grad_norm": 1.0413794589983083, + "learning_rate": 1.9724588533935246e-06, + "loss": 0.5451534986495972, + "step": 1052 + }, + { + "epoch": 0.24279455845054185, + "grad_norm": 1.3028651104025368, + "learning_rate": 1.9723699203976766e-06, + "loss": 0.578605592250824, + "step": 1053 + }, + { + "epoch": 0.2430251325801245, + "grad_norm": 1.072521418141734, + "learning_rate": 1.972280846058006e-06, + "loss": 0.5844857692718506, + "step": 1054 + }, + { + "epoch": 0.24325570670970717, + "grad_norm": 0.8882845471690917, + "learning_rate": 1.9721916303874603e-06, + "loss": 0.5152320861816406, + "step": 1055 + }, + { + "epoch": 0.24348628083928983, + "grad_norm": 0.994596822062513, + "learning_rate": 1.9721022733990087e-06, + "loss": 0.5108952522277832, + "step": 1056 + }, + { + "epoch": 0.2437168549688725, + "grad_norm": 1.2179028657479944, + "learning_rate": 1.97201277510564e-06, + "loss": 0.6345964670181274, + "step": 1057 + }, + { + "epoch": 0.24394742909845515, + "grad_norm": 1.0322609868377797, + "learning_rate": 1.9719231355203627e-06, + "loss": 0.6699639558792114, + "step": 1058 + }, + { + "epoch": 0.24417800322803782, + "grad_norm": 1.0786593444912098, + "learning_rate": 1.971833354656208e-06, + "loss": 0.5426750779151917, + "step": 1059 + }, + { + "epoch": 0.24440857735762048, + "grad_norm": 0.9469348439661489, + "learning_rate": 1.9717434325262253e-06, + "loss": 0.45797908306121826, + "step": 1060 + }, + { + "epoch": 0.24463915148720314, + "grad_norm": 0.9212142090514559, + "learning_rate": 1.9716533691434872e-06, + "loss": 0.46754708886146545, + "step": 1061 + }, + { + "epoch": 0.2448697256167858, + "grad_norm": 1.0419375830533737, + "learning_rate": 1.9715631645210838e-06, + "loss": 0.6593209505081177, + "step": 1062 + }, + { + "epoch": 0.24510029974636846, + "grad_norm": 0.8714440933836988, + "learning_rate": 1.9714728186721287e-06, + "loss": 0.5634866952896118, + "step": 1063 + }, + { + "epoch": 0.24533087387595112, + "grad_norm": 1.3414429697713321, + "learning_rate": 1.971382331609753e-06, + "loss": 0.5066277980804443, + "step": 1064 + }, + { + "epoch": 0.24556144800553378, + "grad_norm": 0.9735373407478976, + "learning_rate": 1.9712917033471113e-06, + "loss": 0.5721756219863892, + "step": 1065 + }, + { + "epoch": 0.24579202213511644, + "grad_norm": 0.9116883309182201, + "learning_rate": 1.9712009338973765e-06, + "loss": 0.5188664197921753, + "step": 1066 + }, + { + "epoch": 0.2460225962646991, + "grad_norm": 1.1314636983505006, + "learning_rate": 1.9711100232737434e-06, + "loss": 0.4879762828350067, + "step": 1067 + }, + { + "epoch": 0.24625317039428177, + "grad_norm": 1.2412816829375237, + "learning_rate": 1.971018971489426e-06, + "loss": 0.5169111490249634, + "step": 1068 + }, + { + "epoch": 0.24648374452386443, + "grad_norm": 1.2239551353327036, + "learning_rate": 1.9709277785576605e-06, + "loss": 0.7341418862342834, + "step": 1069 + }, + { + "epoch": 0.2467143186534471, + "grad_norm": 0.9353793197150668, + "learning_rate": 1.970836444491702e-06, + "loss": 0.48676228523254395, + "step": 1070 + }, + { + "epoch": 0.24694489278302975, + "grad_norm": 1.1049152340951753, + "learning_rate": 1.9707449693048277e-06, + "loss": 0.5594040751457214, + "step": 1071 + }, + { + "epoch": 0.2471754669126124, + "grad_norm": 1.1275772388460679, + "learning_rate": 1.970653353010334e-06, + "loss": 0.575579047203064, + "step": 1072 + }, + { + "epoch": 0.24740604104219507, + "grad_norm": 0.9990792550863451, + "learning_rate": 1.9705615956215375e-06, + "loss": 0.5212938189506531, + "step": 1073 + }, + { + "epoch": 0.24763661517177774, + "grad_norm": 1.2242480620016798, + "learning_rate": 1.970469697151777e-06, + "loss": 0.49838072061538696, + "step": 1074 + }, + { + "epoch": 0.2478671893013604, + "grad_norm": 1.0069439526224342, + "learning_rate": 1.9703776576144106e-06, + "loss": 0.505547285079956, + "step": 1075 + }, + { + "epoch": 0.24809776343094306, + "grad_norm": 0.9320138812686547, + "learning_rate": 1.970285477022817e-06, + "loss": 0.5236082077026367, + "step": 1076 + }, + { + "epoch": 0.24832833756052572, + "grad_norm": 1.1096851604663263, + "learning_rate": 1.9701931553903963e-06, + "loss": 0.5417677760124207, + "step": 1077 + }, + { + "epoch": 0.24855891169010838, + "grad_norm": 1.4437484296393372, + "learning_rate": 1.9701006927305676e-06, + "loss": 0.624547004699707, + "step": 1078 + }, + { + "epoch": 0.24878948581969104, + "grad_norm": 1.1814609406249081, + "learning_rate": 1.9700080890567713e-06, + "loss": 0.7127759456634521, + "step": 1079 + }, + { + "epoch": 0.2490200599492737, + "grad_norm": 1.1432146079503174, + "learning_rate": 1.9699153443824686e-06, + "loss": 0.44590264558792114, + "step": 1080 + }, + { + "epoch": 0.24925063407885636, + "grad_norm": 0.9565451374538135, + "learning_rate": 1.9698224587211407e-06, + "loss": 0.6311746835708618, + "step": 1081 + }, + { + "epoch": 0.24948120820843903, + "grad_norm": 0.870591902169041, + "learning_rate": 1.9697294320862898e-06, + "loss": 0.4837970733642578, + "step": 1082 + }, + { + "epoch": 0.2497117823380217, + "grad_norm": 0.8760016768814028, + "learning_rate": 1.969636264491438e-06, + "loss": 0.5749634504318237, + "step": 1083 + }, + { + "epoch": 0.24994235646760435, + "grad_norm": 0.9733867387062589, + "learning_rate": 1.9695429559501283e-06, + "loss": 0.5002774000167847, + "step": 1084 + }, + { + "epoch": 0.250172930597187, + "grad_norm": 0.9904270135981337, + "learning_rate": 1.9694495064759236e-06, + "loss": 0.5407592058181763, + "step": 1085 + }, + { + "epoch": 0.25040350472676964, + "grad_norm": 0.9112103184885231, + "learning_rate": 1.969355916082408e-06, + "loss": 0.5557315349578857, + "step": 1086 + }, + { + "epoch": 0.2506340788563523, + "grad_norm": 1.073902907739282, + "learning_rate": 1.9692621847831865e-06, + "loss": 0.4710160493850708, + "step": 1087 + }, + { + "epoch": 0.25086465298593497, + "grad_norm": 0.946965380647112, + "learning_rate": 1.969168312591883e-06, + "loss": 0.5935187339782715, + "step": 1088 + }, + { + "epoch": 0.2510952271155176, + "grad_norm": 0.9849357353961209, + "learning_rate": 1.969074299522143e-06, + "loss": 0.5358916521072388, + "step": 1089 + }, + { + "epoch": 0.2513258012451003, + "grad_norm": 0.9196749680008564, + "learning_rate": 1.968980145587632e-06, + "loss": 0.40736621618270874, + "step": 1090 + }, + { + "epoch": 0.25155637537468295, + "grad_norm": 0.8048789415521217, + "learning_rate": 1.968885850802037e-06, + "loss": 0.4986698627471924, + "step": 1091 + }, + { + "epoch": 0.2517869495042656, + "grad_norm": 0.9340127152994311, + "learning_rate": 1.968791415179064e-06, + "loss": 0.5547258853912354, + "step": 1092 + }, + { + "epoch": 0.2520175236338483, + "grad_norm": 1.0477998347740531, + "learning_rate": 1.96869683873244e-06, + "loss": 0.5187167525291443, + "step": 1093 + }, + { + "epoch": 0.25224809776343093, + "grad_norm": 0.9456931065936238, + "learning_rate": 1.9686021214759136e-06, + "loss": 0.560575008392334, + "step": 1094 + }, + { + "epoch": 0.2524786718930136, + "grad_norm": 1.0595767044992972, + "learning_rate": 1.968507263423252e-06, + "loss": 0.6441233158111572, + "step": 1095 + }, + { + "epoch": 0.25270924602259626, + "grad_norm": 1.1650850474563572, + "learning_rate": 1.9684122645882446e-06, + "loss": 0.6693669557571411, + "step": 1096 + }, + { + "epoch": 0.2529398201521789, + "grad_norm": 0.9107773905688578, + "learning_rate": 1.9683171249846992e-06, + "loss": 0.4713742434978485, + "step": 1097 + }, + { + "epoch": 0.2531703942817616, + "grad_norm": 1.0855755163203802, + "learning_rate": 1.9682218446264466e-06, + "loss": 0.5393046140670776, + "step": 1098 + }, + { + "epoch": 0.25340096841134424, + "grad_norm": 0.8304628447343301, + "learning_rate": 1.968126423527336e-06, + "loss": 0.44416874647140503, + "step": 1099 + }, + { + "epoch": 0.2536315425409269, + "grad_norm": 0.8560775526129268, + "learning_rate": 1.9680308617012383e-06, + "loss": 0.486186683177948, + "step": 1100 + }, + { + "epoch": 0.25386211667050956, + "grad_norm": 0.8812542184427957, + "learning_rate": 1.9679351591620446e-06, + "loss": 0.5523893237113953, + "step": 1101 + }, + { + "epoch": 0.2540926908000922, + "grad_norm": 0.9964866126205207, + "learning_rate": 1.967839315923665e-06, + "loss": 0.49889492988586426, + "step": 1102 + }, + { + "epoch": 0.2543232649296749, + "grad_norm": 1.1438608764608638, + "learning_rate": 1.9677433320000325e-06, + "loss": 0.6084630489349365, + "step": 1103 + }, + { + "epoch": 0.25455383905925755, + "grad_norm": 0.9684259335546852, + "learning_rate": 1.967647207405099e-06, + "loss": 0.5458555221557617, + "step": 1104 + }, + { + "epoch": 0.2547844131888402, + "grad_norm": 1.3299718075912128, + "learning_rate": 1.9675509421528367e-06, + "loss": 0.5453877449035645, + "step": 1105 + }, + { + "epoch": 0.25501498731842287, + "grad_norm": 1.0404901274691463, + "learning_rate": 1.9674545362572393e-06, + "loss": 0.5226954221725464, + "step": 1106 + }, + { + "epoch": 0.25524556144800553, + "grad_norm": 1.0740163604419912, + "learning_rate": 1.96735798973232e-06, + "loss": 0.5736720561981201, + "step": 1107 + }, + { + "epoch": 0.2554761355775882, + "grad_norm": 0.9184855028566775, + "learning_rate": 1.9672613025921135e-06, + "loss": 0.5474177598953247, + "step": 1108 + }, + { + "epoch": 0.25570670970717085, + "grad_norm": 1.2485055919980548, + "learning_rate": 1.967164474850673e-06, + "loss": 0.5146498084068298, + "step": 1109 + }, + { + "epoch": 0.2559372838367535, + "grad_norm": 1.1137167951471605, + "learning_rate": 1.967067506522075e-06, + "loss": 0.6319057941436768, + "step": 1110 + }, + { + "epoch": 0.2561678579663362, + "grad_norm": 0.9087550652455604, + "learning_rate": 1.9669703976204136e-06, + "loss": 0.44495588541030884, + "step": 1111 + }, + { + "epoch": 0.25639843209591884, + "grad_norm": 0.9108509097161608, + "learning_rate": 1.9668731481598052e-06, + "loss": 0.5331558585166931, + "step": 1112 + }, + { + "epoch": 0.2566290062255015, + "grad_norm": 0.9795245602848469, + "learning_rate": 1.9667757581543856e-06, + "loss": 0.5409468412399292, + "step": 1113 + }, + { + "epoch": 0.25685958035508416, + "grad_norm": 1.054007279778104, + "learning_rate": 1.9666782276183112e-06, + "loss": 0.5743308663368225, + "step": 1114 + }, + { + "epoch": 0.2570901544846668, + "grad_norm": 1.004577427685411, + "learning_rate": 1.96658055656576e-06, + "loss": 0.5612793564796448, + "step": 1115 + }, + { + "epoch": 0.2573207286142495, + "grad_norm": 0.9750416454144903, + "learning_rate": 1.9664827450109285e-06, + "loss": 0.554356575012207, + "step": 1116 + }, + { + "epoch": 0.25755130274383214, + "grad_norm": 0.9682247695156199, + "learning_rate": 1.9663847929680352e-06, + "loss": 0.5999840497970581, + "step": 1117 + }, + { + "epoch": 0.2577818768734148, + "grad_norm": 1.0370889815397122, + "learning_rate": 1.9662867004513184e-06, + "loss": 0.5152497291564941, + "step": 1118 + }, + { + "epoch": 0.25801245100299747, + "grad_norm": 1.098663296506931, + "learning_rate": 1.966188467475036e-06, + "loss": 0.6333990097045898, + "step": 1119 + }, + { + "epoch": 0.25824302513258013, + "grad_norm": 0.9734180757824468, + "learning_rate": 1.9660900940534685e-06, + "loss": 0.5826340913772583, + "step": 1120 + }, + { + "epoch": 0.2584735992621628, + "grad_norm": 1.0258650855361047, + "learning_rate": 1.965991580200915e-06, + "loss": 0.5968586206436157, + "step": 1121 + }, + { + "epoch": 0.25870417339174545, + "grad_norm": 1.1400845768454182, + "learning_rate": 1.9658929259316945e-06, + "loss": 0.6164212226867676, + "step": 1122 + }, + { + "epoch": 0.2589347475213281, + "grad_norm": 0.9979393096335119, + "learning_rate": 1.9657941312601487e-06, + "loss": 0.6115970611572266, + "step": 1123 + }, + { + "epoch": 0.2591653216509108, + "grad_norm": 1.0595728674513747, + "learning_rate": 1.9656951962006376e-06, + "loss": 0.5490012168884277, + "step": 1124 + }, + { + "epoch": 0.25939589578049344, + "grad_norm": 0.9502072685023252, + "learning_rate": 1.9655961207675425e-06, + "loss": 0.6350439786911011, + "step": 1125 + }, + { + "epoch": 0.2596264699100761, + "grad_norm": 1.0657411847577343, + "learning_rate": 1.965496904975266e-06, + "loss": 0.5667803287506104, + "step": 1126 + }, + { + "epoch": 0.25985704403965876, + "grad_norm": 1.1821679518558437, + "learning_rate": 1.9653975488382287e-06, + "loss": 0.6443949937820435, + "step": 1127 + }, + { + "epoch": 0.2600876181692414, + "grad_norm": 0.9716559479774245, + "learning_rate": 1.965298052370874e-06, + "loss": 0.6085849404335022, + "step": 1128 + }, + { + "epoch": 0.2603181922988241, + "grad_norm": 1.0823001356947075, + "learning_rate": 1.9651984155876644e-06, + "loss": 0.6633332967758179, + "step": 1129 + }, + { + "epoch": 0.26054876642840674, + "grad_norm": 1.2848504053653516, + "learning_rate": 1.965098638503083e-06, + "loss": 0.5997219085693359, + "step": 1130 + }, + { + "epoch": 0.2607793405579894, + "grad_norm": 1.0454096533900064, + "learning_rate": 1.9649987211316333e-06, + "loss": 0.5425878167152405, + "step": 1131 + }, + { + "epoch": 0.26100991468757206, + "grad_norm": 1.1511928917305188, + "learning_rate": 1.9648986634878397e-06, + "loss": 0.5894105434417725, + "step": 1132 + }, + { + "epoch": 0.2612404888171547, + "grad_norm": 1.0098199878370706, + "learning_rate": 1.9647984655862464e-06, + "loss": 0.5967395901679993, + "step": 1133 + }, + { + "epoch": 0.2614710629467374, + "grad_norm": 1.026032503619318, + "learning_rate": 1.964698127441418e-06, + "loss": 0.5129253268241882, + "step": 1134 + }, + { + "epoch": 0.26170163707632005, + "grad_norm": 0.8680242413092717, + "learning_rate": 1.96459764906794e-06, + "loss": 0.4503140449523926, + "step": 1135 + }, + { + "epoch": 0.2619322112059027, + "grad_norm": 1.3487730716398616, + "learning_rate": 1.964497030480418e-06, + "loss": 0.5533326864242554, + "step": 1136 + }, + { + "epoch": 0.26216278533548537, + "grad_norm": 1.020191268815397, + "learning_rate": 1.9643962716934776e-06, + "loss": 0.695278525352478, + "step": 1137 + }, + { + "epoch": 0.26239335946506803, + "grad_norm": 1.0637915159693183, + "learning_rate": 1.9642953727217654e-06, + "loss": 0.5198212265968323, + "step": 1138 + }, + { + "epoch": 0.2626239335946507, + "grad_norm": 0.8691408428805534, + "learning_rate": 1.9641943335799476e-06, + "loss": 0.4348503351211548, + "step": 1139 + }, + { + "epoch": 0.26285450772423335, + "grad_norm": 1.075781292907759, + "learning_rate": 1.9640931542827116e-06, + "loss": 0.5241343975067139, + "step": 1140 + }, + { + "epoch": 0.263085081853816, + "grad_norm": 1.1170175690927264, + "learning_rate": 1.9639918348447654e-06, + "loss": 0.6621984839439392, + "step": 1141 + }, + { + "epoch": 0.2633156559833987, + "grad_norm": 0.9797970310895017, + "learning_rate": 1.9638903752808358e-06, + "loss": 0.6091395020484924, + "step": 1142 + }, + { + "epoch": 0.26354623011298134, + "grad_norm": 1.358580155566318, + "learning_rate": 1.963788775605671e-06, + "loss": 0.4857162833213806, + "step": 1143 + }, + { + "epoch": 0.263776804242564, + "grad_norm": 1.155872598215321, + "learning_rate": 1.9636870358340408e-06, + "loss": 0.5912413597106934, + "step": 1144 + }, + { + "epoch": 0.26400737837214666, + "grad_norm": 0.9493926626803307, + "learning_rate": 1.9635851559807326e-06, + "loss": 0.6006268858909607, + "step": 1145 + }, + { + "epoch": 0.2642379525017293, + "grad_norm": 1.0095494395510323, + "learning_rate": 1.9634831360605567e-06, + "loss": 0.5580735802650452, + "step": 1146 + }, + { + "epoch": 0.264468526631312, + "grad_norm": 1.09443652681985, + "learning_rate": 1.9633809760883423e-06, + "loss": 0.5554602146148682, + "step": 1147 + }, + { + "epoch": 0.26469910076089465, + "grad_norm": 1.0073361110439816, + "learning_rate": 1.9632786760789393e-06, + "loss": 0.5648301839828491, + "step": 1148 + }, + { + "epoch": 0.2649296748904773, + "grad_norm": 0.9958775096480507, + "learning_rate": 1.9631762360472186e-06, + "loss": 0.5317412614822388, + "step": 1149 + }, + { + "epoch": 0.26516024902005997, + "grad_norm": 0.8377541227122274, + "learning_rate": 1.96307365600807e-06, + "loss": 0.5608310699462891, + "step": 1150 + }, + { + "epoch": 0.26539082314964263, + "grad_norm": 0.9709108194630034, + "learning_rate": 1.962970935976405e-06, + "loss": 0.49922698736190796, + "step": 1151 + }, + { + "epoch": 0.2656213972792253, + "grad_norm": 1.0372577064435262, + "learning_rate": 1.9628680759671556e-06, + "loss": 0.5840054750442505, + "step": 1152 + }, + { + "epoch": 0.26585197140880795, + "grad_norm": 1.1264168952681184, + "learning_rate": 1.9627650759952727e-06, + "loss": 0.6038475632667542, + "step": 1153 + }, + { + "epoch": 0.2660825455383906, + "grad_norm": 0.969212515968761, + "learning_rate": 1.9626619360757284e-06, + "loss": 0.5923193097114563, + "step": 1154 + }, + { + "epoch": 0.2663131196679733, + "grad_norm": 1.1606889211687668, + "learning_rate": 1.962558656223516e-06, + "loss": 0.5278598666191101, + "step": 1155 + }, + { + "epoch": 0.26654369379755594, + "grad_norm": 0.9873103600473375, + "learning_rate": 1.9624552364536472e-06, + "loss": 0.47691023349761963, + "step": 1156 + }, + { + "epoch": 0.2667742679271386, + "grad_norm": 0.9087676067471127, + "learning_rate": 1.962351676781156e-06, + "loss": 0.5801899433135986, + "step": 1157 + }, + { + "epoch": 0.26700484205672126, + "grad_norm": 1.253961482177072, + "learning_rate": 1.962247977221095e-06, + "loss": 0.5170506238937378, + "step": 1158 + }, + { + "epoch": 0.2672354161863039, + "grad_norm": 1.0951542684812736, + "learning_rate": 1.9621441377885387e-06, + "loss": 0.6114981174468994, + "step": 1159 + }, + { + "epoch": 0.2674659903158866, + "grad_norm": 1.0027892727643062, + "learning_rate": 1.9620401584985807e-06, + "loss": 0.6377004384994507, + "step": 1160 + }, + { + "epoch": 0.26769656444546924, + "grad_norm": 0.9961094597216124, + "learning_rate": 1.9619360393663356e-06, + "loss": 0.6177431344985962, + "step": 1161 + }, + { + "epoch": 0.2679271385750519, + "grad_norm": 1.1384478708718946, + "learning_rate": 1.9618317804069384e-06, + "loss": 0.579784095287323, + "step": 1162 + }, + { + "epoch": 0.26815771270463457, + "grad_norm": 0.8744752952973797, + "learning_rate": 1.9617273816355444e-06, + "loss": 0.6078776121139526, + "step": 1163 + }, + { + "epoch": 0.2683882868342172, + "grad_norm": 0.9801356210694869, + "learning_rate": 1.961622843067328e-06, + "loss": 0.5583093166351318, + "step": 1164 + }, + { + "epoch": 0.2686188609637999, + "grad_norm": 0.8741287294678143, + "learning_rate": 1.961518164717486e-06, + "loss": 0.46033143997192383, + "step": 1165 + }, + { + "epoch": 0.26884943509338255, + "grad_norm": 1.250568820610365, + "learning_rate": 1.961413346601234e-06, + "loss": 0.5637123584747314, + "step": 1166 + }, + { + "epoch": 0.2690800092229652, + "grad_norm": 1.0360456860810905, + "learning_rate": 1.9613083887338085e-06, + "loss": 0.5943595170974731, + "step": 1167 + }, + { + "epoch": 0.2693105833525478, + "grad_norm": 1.0495419121458136, + "learning_rate": 1.961203291130466e-06, + "loss": 0.5440319776535034, + "step": 1168 + }, + { + "epoch": 0.2695411574821305, + "grad_norm": 0.9704830315061433, + "learning_rate": 1.961098053806484e-06, + "loss": 0.5665608048439026, + "step": 1169 + }, + { + "epoch": 0.26977173161171314, + "grad_norm": 1.0522625707521382, + "learning_rate": 1.960992676777159e-06, + "loss": 0.5707683563232422, + "step": 1170 + }, + { + "epoch": 0.2700023057412958, + "grad_norm": 1.034604689259721, + "learning_rate": 1.9608871600578093e-06, + "loss": 0.5447777509689331, + "step": 1171 + }, + { + "epoch": 0.27023287987087846, + "grad_norm": 1.1920689559592121, + "learning_rate": 1.9607815036637726e-06, + "loss": 0.5598857402801514, + "step": 1172 + }, + { + "epoch": 0.2704634540004611, + "grad_norm": 1.208701571232948, + "learning_rate": 1.960675707610407e-06, + "loss": 0.558403491973877, + "step": 1173 + }, + { + "epoch": 0.2706940281300438, + "grad_norm": 1.3006493228897391, + "learning_rate": 1.960569771913091e-06, + "loss": 0.6696962118148804, + "step": 1174 + }, + { + "epoch": 0.27092460225962645, + "grad_norm": 1.0597715788538418, + "learning_rate": 1.960463696587224e-06, + "loss": 0.519884467124939, + "step": 1175 + }, + { + "epoch": 0.2711551763892091, + "grad_norm": 1.0090714718428708, + "learning_rate": 1.9603574816482243e-06, + "loss": 0.6440261602401733, + "step": 1176 + }, + { + "epoch": 0.27138575051879177, + "grad_norm": 1.1163188497552168, + "learning_rate": 1.9602511271115317e-06, + "loss": 0.48713982105255127, + "step": 1177 + }, + { + "epoch": 0.27161632464837443, + "grad_norm": 0.9570997011710476, + "learning_rate": 1.960144632992606e-06, + "loss": 0.5257129073143005, + "step": 1178 + }, + { + "epoch": 0.2718468987779571, + "grad_norm": 1.3308862733434774, + "learning_rate": 1.9600379993069272e-06, + "loss": 0.5220426917076111, + "step": 1179 + }, + { + "epoch": 0.27207747290753975, + "grad_norm": 1.0690404222828096, + "learning_rate": 1.9599312260699955e-06, + "loss": 0.569817304611206, + "step": 1180 + }, + { + "epoch": 0.2723080470371224, + "grad_norm": 1.0650857331550394, + "learning_rate": 1.9598243132973317e-06, + "loss": 0.4370031952857971, + "step": 1181 + }, + { + "epoch": 0.2725386211667051, + "grad_norm": 1.125403283606087, + "learning_rate": 1.959717261004476e-06, + "loss": 0.6060882210731506, + "step": 1182 + }, + { + "epoch": 0.27276919529628774, + "grad_norm": 0.9065361051198069, + "learning_rate": 1.9596100692069905e-06, + "loss": 0.5830891132354736, + "step": 1183 + }, + { + "epoch": 0.2729997694258704, + "grad_norm": 1.4570032441462188, + "learning_rate": 1.9595027379204556e-06, + "loss": 0.5689493417739868, + "step": 1184 + }, + { + "epoch": 0.27323034355545306, + "grad_norm": 1.3244280690129522, + "learning_rate": 1.9593952671604735e-06, + "loss": 0.5550887584686279, + "step": 1185 + }, + { + "epoch": 0.2734609176850357, + "grad_norm": 1.0207521269848765, + "learning_rate": 1.9592876569426665e-06, + "loss": 0.48127567768096924, + "step": 1186 + }, + { + "epoch": 0.2736914918146184, + "grad_norm": 1.071211669612227, + "learning_rate": 1.9591799072826764e-06, + "loss": 0.640753984451294, + "step": 1187 + }, + { + "epoch": 0.27392206594420104, + "grad_norm": 1.1730143666350425, + "learning_rate": 1.959072018196165e-06, + "loss": 0.5266000032424927, + "step": 1188 + }, + { + "epoch": 0.2741526400737837, + "grad_norm": 0.927867514508325, + "learning_rate": 1.958963989698817e-06, + "loss": 0.5586614608764648, + "step": 1189 + }, + { + "epoch": 0.27438321420336637, + "grad_norm": 1.1860842675481242, + "learning_rate": 1.9588558218063336e-06, + "loss": 0.5937967896461487, + "step": 1190 + }, + { + "epoch": 0.274613788332949, + "grad_norm": 1.3761930600193095, + "learning_rate": 1.958747514534439e-06, + "loss": 0.5887218713760376, + "step": 1191 + }, + { + "epoch": 0.2748443624625317, + "grad_norm": 1.0541442430853707, + "learning_rate": 1.9586390678988766e-06, + "loss": 0.5151614546775818, + "step": 1192 + }, + { + "epoch": 0.27507493659211435, + "grad_norm": 0.9782419657689414, + "learning_rate": 1.95853048191541e-06, + "loss": 0.5392748713493347, + "step": 1193 + }, + { + "epoch": 0.275305510721697, + "grad_norm": 1.330179141409128, + "learning_rate": 1.9584217565998237e-06, + "loss": 0.5649560689926147, + "step": 1194 + }, + { + "epoch": 0.2755360848512797, + "grad_norm": 1.0628047614804303, + "learning_rate": 1.9583128919679213e-06, + "loss": 0.4888305962085724, + "step": 1195 + }, + { + "epoch": 0.27576665898086233, + "grad_norm": 0.8838567368205815, + "learning_rate": 1.9582038880355282e-06, + "loss": 0.5026978850364685, + "step": 1196 + }, + { + "epoch": 0.275997233110445, + "grad_norm": 1.094585503881071, + "learning_rate": 1.9580947448184887e-06, + "loss": 0.5358047485351562, + "step": 1197 + }, + { + "epoch": 0.27622780724002766, + "grad_norm": 1.0838231861798517, + "learning_rate": 1.957985462332668e-06, + "loss": 0.6145739555358887, + "step": 1198 + }, + { + "epoch": 0.2764583813696103, + "grad_norm": 1.1469394336927528, + "learning_rate": 1.957876040593952e-06, + "loss": 0.5155332684516907, + "step": 1199 + }, + { + "epoch": 0.276688955499193, + "grad_norm": 0.9936014396625975, + "learning_rate": 1.957766479618245e-06, + "loss": 0.48794522881507874, + "step": 1200 + }, + { + "epoch": 0.27691952962877564, + "grad_norm": 1.135029138979863, + "learning_rate": 1.957656779421474e-06, + "loss": 0.5851761102676392, + "step": 1201 + }, + { + "epoch": 0.2771501037583583, + "grad_norm": 1.0236207003793518, + "learning_rate": 1.957546940019584e-06, + "loss": 0.603874683380127, + "step": 1202 + }, + { + "epoch": 0.27738067788794096, + "grad_norm": 1.0658787224753152, + "learning_rate": 1.9574369614285426e-06, + "loss": 0.5022559762001038, + "step": 1203 + }, + { + "epoch": 0.2776112520175236, + "grad_norm": 1.4179237341040045, + "learning_rate": 1.9573268436643347e-06, + "loss": 0.6469730138778687, + "step": 1204 + }, + { + "epoch": 0.2778418261471063, + "grad_norm": 0.9207501665109726, + "learning_rate": 1.9572165867429685e-06, + "loss": 0.49918532371520996, + "step": 1205 + }, + { + "epoch": 0.27807240027668895, + "grad_norm": 0.9656836684424259, + "learning_rate": 1.95710619068047e-06, + "loss": 0.48623788356781006, + "step": 1206 + }, + { + "epoch": 0.2783029744062716, + "grad_norm": 0.9837814076450196, + "learning_rate": 1.956995655492887e-06, + "loss": 0.4868438243865967, + "step": 1207 + }, + { + "epoch": 0.27853354853585427, + "grad_norm": 1.3533879485069031, + "learning_rate": 1.9568849811962862e-06, + "loss": 0.5989904403686523, + "step": 1208 + }, + { + "epoch": 0.27876412266543693, + "grad_norm": 1.3345070230968985, + "learning_rate": 1.956774167806756e-06, + "loss": 0.5125104188919067, + "step": 1209 + }, + { + "epoch": 0.2789946967950196, + "grad_norm": 1.0305365483781255, + "learning_rate": 1.956663215340404e-06, + "loss": 0.5126978158950806, + "step": 1210 + }, + { + "epoch": 0.27922527092460225, + "grad_norm": 0.9524616726362105, + "learning_rate": 1.9565521238133576e-06, + "loss": 0.5009375810623169, + "step": 1211 + }, + { + "epoch": 0.2794558450541849, + "grad_norm": 1.0762476710184214, + "learning_rate": 1.956440893241766e-06, + "loss": 0.5601603984832764, + "step": 1212 + }, + { + "epoch": 0.2796864191837676, + "grad_norm": 1.2962045971613827, + "learning_rate": 1.956329523641797e-06, + "loss": 0.6310690641403198, + "step": 1213 + }, + { + "epoch": 0.27991699331335024, + "grad_norm": 1.0395130987242733, + "learning_rate": 1.95621801502964e-06, + "loss": 0.498830646276474, + "step": 1214 + }, + { + "epoch": 0.2801475674429329, + "grad_norm": 1.0547121574701517, + "learning_rate": 1.9561063674215036e-06, + "loss": 0.6612650156021118, + "step": 1215 + }, + { + "epoch": 0.28037814157251556, + "grad_norm": 1.0369778810130763, + "learning_rate": 1.9559945808336166e-06, + "loss": 0.5651615858078003, + "step": 1216 + }, + { + "epoch": 0.2806087157020982, + "grad_norm": 1.0028009497915646, + "learning_rate": 1.955882655282229e-06, + "loss": 0.5675203800201416, + "step": 1217 + }, + { + "epoch": 0.2808392898316809, + "grad_norm": 1.0910384567165883, + "learning_rate": 1.9557705907836095e-06, + "loss": 0.5691455006599426, + "step": 1218 + }, + { + "epoch": 0.28106986396126354, + "grad_norm": 1.2440322291047097, + "learning_rate": 1.955658387354048e-06, + "loss": 0.6018673181533813, + "step": 1219 + }, + { + "epoch": 0.2813004380908462, + "grad_norm": 0.8594681913500082, + "learning_rate": 1.955546045009855e-06, + "loss": 0.5188831090927124, + "step": 1220 + }, + { + "epoch": 0.28153101222042887, + "grad_norm": 0.9611802055135819, + "learning_rate": 1.9554335637673596e-06, + "loss": 0.5161044597625732, + "step": 1221 + }, + { + "epoch": 0.28176158635001153, + "grad_norm": 1.0764912433641416, + "learning_rate": 1.9553209436429132e-06, + "loss": 0.5651452541351318, + "step": 1222 + }, + { + "epoch": 0.2819921604795942, + "grad_norm": 1.0362033432012678, + "learning_rate": 1.9552081846528858e-06, + "loss": 0.5763273239135742, + "step": 1223 + }, + { + "epoch": 0.28222273460917685, + "grad_norm": 1.0512305083546745, + "learning_rate": 1.9550952868136677e-06, + "loss": 0.6379664540290833, + "step": 1224 + }, + { + "epoch": 0.2824533087387595, + "grad_norm": 0.966358468685478, + "learning_rate": 1.95498225014167e-06, + "loss": 0.4021342396736145, + "step": 1225 + }, + { + "epoch": 0.2826838828683422, + "grad_norm": 1.3065298085361052, + "learning_rate": 1.954869074653324e-06, + "loss": 0.49230247735977173, + "step": 1226 + }, + { + "epoch": 0.28291445699792483, + "grad_norm": 0.9198430971109288, + "learning_rate": 1.954755760365081e-06, + "loss": 0.5921554565429688, + "step": 1227 + }, + { + "epoch": 0.2831450311275075, + "grad_norm": 1.2338068239582654, + "learning_rate": 1.954642307293412e-06, + "loss": 0.6495868563652039, + "step": 1228 + }, + { + "epoch": 0.28337560525709016, + "grad_norm": 1.0310593371372254, + "learning_rate": 1.954528715454808e-06, + "loss": 0.5699795484542847, + "step": 1229 + }, + { + "epoch": 0.2836061793866728, + "grad_norm": 1.3462988930710962, + "learning_rate": 1.9544149848657816e-06, + "loss": 0.582231879234314, + "step": 1230 + }, + { + "epoch": 0.2838367535162555, + "grad_norm": 1.0033811085419764, + "learning_rate": 1.9543011155428647e-06, + "loss": 0.5952359437942505, + "step": 1231 + }, + { + "epoch": 0.28406732764583814, + "grad_norm": 1.150479906025031, + "learning_rate": 1.9541871075026092e-06, + "loss": 0.646816611289978, + "step": 1232 + }, + { + "epoch": 0.2842979017754208, + "grad_norm": 1.2509776515814615, + "learning_rate": 1.9540729607615866e-06, + "loss": 0.5781043767929077, + "step": 1233 + }, + { + "epoch": 0.28452847590500346, + "grad_norm": 1.1718295930905136, + "learning_rate": 1.95395867533639e-06, + "loss": 0.609764814376831, + "step": 1234 + }, + { + "epoch": 0.2847590500345861, + "grad_norm": 1.2826152398089232, + "learning_rate": 1.9538442512436325e-06, + "loss": 0.4673759341239929, + "step": 1235 + }, + { + "epoch": 0.2849896241641688, + "grad_norm": 1.1343052125955835, + "learning_rate": 1.953729688499946e-06, + "loss": 0.6310999393463135, + "step": 1236 + }, + { + "epoch": 0.28522019829375145, + "grad_norm": 1.075568996273352, + "learning_rate": 1.953614987121983e-06, + "loss": 0.5103853344917297, + "step": 1237 + }, + { + "epoch": 0.2854507724233341, + "grad_norm": 1.1329951189185654, + "learning_rate": 1.9535001471264178e-06, + "loss": 0.5735328197479248, + "step": 1238 + }, + { + "epoch": 0.28568134655291677, + "grad_norm": 1.010063337652323, + "learning_rate": 1.953385168529942e-06, + "loss": 0.5617454051971436, + "step": 1239 + }, + { + "epoch": 0.28591192068249943, + "grad_norm": 1.1392481671873862, + "learning_rate": 1.9532700513492705e-06, + "loss": 0.49873489141464233, + "step": 1240 + }, + { + "epoch": 0.2861424948120821, + "grad_norm": 0.9923008758606798, + "learning_rate": 1.9531547956011353e-06, + "loss": 0.49185073375701904, + "step": 1241 + }, + { + "epoch": 0.28637306894166475, + "grad_norm": 1.1119890456844754, + "learning_rate": 1.9530394013022907e-06, + "loss": 0.6016734838485718, + "step": 1242 + }, + { + "epoch": 0.2866036430712474, + "grad_norm": 0.984310677257317, + "learning_rate": 1.9529238684695105e-06, + "loss": 0.5922054052352905, + "step": 1243 + }, + { + "epoch": 0.2868342172008301, + "grad_norm": 1.2933601588161594, + "learning_rate": 1.952808197119588e-06, + "loss": 0.6498355269432068, + "step": 1244 + }, + { + "epoch": 0.28706479133041274, + "grad_norm": 1.106145681286101, + "learning_rate": 1.9526923872693382e-06, + "loss": 0.5564426183700562, + "step": 1245 + }, + { + "epoch": 0.2872953654599954, + "grad_norm": 1.0410162813090216, + "learning_rate": 1.9525764389355945e-06, + "loss": 0.6144154071807861, + "step": 1246 + }, + { + "epoch": 0.28752593958957806, + "grad_norm": 0.9304288925500919, + "learning_rate": 1.9524603521352116e-06, + "loss": 0.5958914756774902, + "step": 1247 + }, + { + "epoch": 0.2877565137191607, + "grad_norm": 1.167763375182377, + "learning_rate": 1.952344126885063e-06, + "loss": 0.5471549034118652, + "step": 1248 + }, + { + "epoch": 0.2879870878487434, + "grad_norm": 1.0658282088084226, + "learning_rate": 1.952227763202044e-06, + "loss": 0.5512329936027527, + "step": 1249 + }, + { + "epoch": 0.28821766197832605, + "grad_norm": 0.9336952567830841, + "learning_rate": 1.9521112611030695e-06, + "loss": 0.5545130968093872, + "step": 1250 + }, + { + "epoch": 0.2884482361079087, + "grad_norm": 0.9540157404500241, + "learning_rate": 1.9519946206050734e-06, + "loss": 0.5409479737281799, + "step": 1251 + }, + { + "epoch": 0.28867881023749137, + "grad_norm": 1.0425656776824677, + "learning_rate": 1.9518778417250114e-06, + "loss": 0.5248778462409973, + "step": 1252 + }, + { + "epoch": 0.28890938436707403, + "grad_norm": 1.1108036883068904, + "learning_rate": 1.951760924479858e-06, + "loss": 0.4985620975494385, + "step": 1253 + }, + { + "epoch": 0.2891399584966567, + "grad_norm": 1.1956376798663733, + "learning_rate": 1.951643868886608e-06, + "loss": 0.5470424890518188, + "step": 1254 + }, + { + "epoch": 0.28937053262623935, + "grad_norm": 0.830517770820401, + "learning_rate": 1.9515266749622776e-06, + "loss": 0.5082905292510986, + "step": 1255 + }, + { + "epoch": 0.289601106755822, + "grad_norm": 1.1321002460273393, + "learning_rate": 1.9514093427239013e-06, + "loss": 0.5734596252441406, + "step": 1256 + }, + { + "epoch": 0.2898316808854047, + "grad_norm": 1.133005147672039, + "learning_rate": 1.951291872188535e-06, + "loss": 0.4727100431919098, + "step": 1257 + }, + { + "epoch": 0.29006225501498734, + "grad_norm": 1.044180363768592, + "learning_rate": 1.951174263373254e-06, + "loss": 0.6727551221847534, + "step": 1258 + }, + { + "epoch": 0.29029282914457, + "grad_norm": 0.9491498247436025, + "learning_rate": 1.9510565162951534e-06, + "loss": 0.5225725173950195, + "step": 1259 + }, + { + "epoch": 0.29052340327415266, + "grad_norm": 0.9861385624887246, + "learning_rate": 1.95093863097135e-06, + "loss": 0.46537530422210693, + "step": 1260 + }, + { + "epoch": 0.2907539774037353, + "grad_norm": 1.0433291271591505, + "learning_rate": 1.950820607418979e-06, + "loss": 0.4729498624801636, + "step": 1261 + }, + { + "epoch": 0.290984551533318, + "grad_norm": 1.0319083654914931, + "learning_rate": 1.950702445655196e-06, + "loss": 0.519434928894043, + "step": 1262 + }, + { + "epoch": 0.29121512566290064, + "grad_norm": 1.0839075745171884, + "learning_rate": 1.9505841456971784e-06, + "loss": 0.5487297177314758, + "step": 1263 + }, + { + "epoch": 0.2914456997924833, + "grad_norm": 0.9970964597897494, + "learning_rate": 1.9504657075621207e-06, + "loss": 0.6228574514389038, + "step": 1264 + }, + { + "epoch": 0.29167627392206597, + "grad_norm": 1.076219157850212, + "learning_rate": 1.95034713126724e-06, + "loss": 0.486205518245697, + "step": 1265 + }, + { + "epoch": 0.2919068480516486, + "grad_norm": 1.220321517878089, + "learning_rate": 1.950228416829772e-06, + "loss": 0.6465567350387573, + "step": 1266 + }, + { + "epoch": 0.2921374221812313, + "grad_norm": 1.0227736343783316, + "learning_rate": 1.9501095642669735e-06, + "loss": 0.5160506963729858, + "step": 1267 + }, + { + "epoch": 0.29236799631081395, + "grad_norm": 1.0494858452172506, + "learning_rate": 1.9499905735961206e-06, + "loss": 0.47334107756614685, + "step": 1268 + }, + { + "epoch": 0.2925985704403966, + "grad_norm": 1.1563719640673416, + "learning_rate": 1.9498714448345103e-06, + "loss": 0.46453380584716797, + "step": 1269 + }, + { + "epoch": 0.29282914456997927, + "grad_norm": 0.9754273704287023, + "learning_rate": 1.9497521779994582e-06, + "loss": 0.5617728233337402, + "step": 1270 + }, + { + "epoch": 0.29305971869956193, + "grad_norm": 1.3129160300173046, + "learning_rate": 1.9496327731083026e-06, + "loss": 0.6129153966903687, + "step": 1271 + }, + { + "epoch": 0.2932902928291446, + "grad_norm": 1.2949114738936178, + "learning_rate": 1.9495132301783983e-06, + "loss": 0.4903183579444885, + "step": 1272 + }, + { + "epoch": 0.29352086695872726, + "grad_norm": 1.1167146830002543, + "learning_rate": 1.9493935492271235e-06, + "loss": 0.5087980628013611, + "step": 1273 + }, + { + "epoch": 0.2937514410883099, + "grad_norm": 1.0447162269466075, + "learning_rate": 1.949273730271874e-06, + "loss": 0.5102910399436951, + "step": 1274 + }, + { + "epoch": 0.2939820152178926, + "grad_norm": 1.0971342006057034, + "learning_rate": 1.9491537733300674e-06, + "loss": 0.5581132769584656, + "step": 1275 + }, + { + "epoch": 0.29421258934747524, + "grad_norm": 1.0166201989797772, + "learning_rate": 1.949033678419141e-06, + "loss": 0.5668213367462158, + "step": 1276 + }, + { + "epoch": 0.2944431634770579, + "grad_norm": 1.1646263878722904, + "learning_rate": 1.9489134455565503e-06, + "loss": 0.5352080464363098, + "step": 1277 + }, + { + "epoch": 0.29467373760664056, + "grad_norm": 1.0375138174364513, + "learning_rate": 1.948793074759774e-06, + "loss": 0.47343915700912476, + "step": 1278 + }, + { + "epoch": 0.29490431173622317, + "grad_norm": 1.2395532163204355, + "learning_rate": 1.9486725660463084e-06, + "loss": 0.5169435143470764, + "step": 1279 + }, + { + "epoch": 0.29513488586580583, + "grad_norm": 1.2035025560649288, + "learning_rate": 1.9485519194336707e-06, + "loss": 0.4801402688026428, + "step": 1280 + }, + { + "epoch": 0.2953654599953885, + "grad_norm": 1.2115883619737033, + "learning_rate": 1.9484311349393984e-06, + "loss": 0.6537381410598755, + "step": 1281 + }, + { + "epoch": 0.29559603412497115, + "grad_norm": 0.9306094110342265, + "learning_rate": 1.9483102125810483e-06, + "loss": 0.5160089135169983, + "step": 1282 + }, + { + "epoch": 0.2958266082545538, + "grad_norm": 1.0525832312633145, + "learning_rate": 1.9481891523761985e-06, + "loss": 0.5332320332527161, + "step": 1283 + }, + { + "epoch": 0.2960571823841365, + "grad_norm": 0.9112280719646961, + "learning_rate": 1.9480679543424453e-06, + "loss": 0.5076215267181396, + "step": 1284 + }, + { + "epoch": 0.29628775651371914, + "grad_norm": 1.1265706213450601, + "learning_rate": 1.947946618497407e-06, + "loss": 0.607105016708374, + "step": 1285 + }, + { + "epoch": 0.2965183306433018, + "grad_norm": 1.076771624610464, + "learning_rate": 1.9478251448587203e-06, + "loss": 0.6265846490859985, + "step": 1286 + }, + { + "epoch": 0.29674890477288446, + "grad_norm": 1.164803442921585, + "learning_rate": 1.9477035334440426e-06, + "loss": 0.5313390493392944, + "step": 1287 + }, + { + "epoch": 0.2969794789024671, + "grad_norm": 1.0583207692233336, + "learning_rate": 1.947581784271052e-06, + "loss": 0.5059833526611328, + "step": 1288 + }, + { + "epoch": 0.2972100530320498, + "grad_norm": 1.171630953302918, + "learning_rate": 1.9474598973574455e-06, + "loss": 0.5550922155380249, + "step": 1289 + }, + { + "epoch": 0.29744062716163244, + "grad_norm": 0.9941233964259298, + "learning_rate": 1.947337872720941e-06, + "loss": 0.5594801306724548, + "step": 1290 + }, + { + "epoch": 0.2976712012912151, + "grad_norm": 1.1672729516761162, + "learning_rate": 1.9472157103792753e-06, + "loss": 0.6404933333396912, + "step": 1291 + }, + { + "epoch": 0.29790177542079777, + "grad_norm": 1.216836258446271, + "learning_rate": 1.947093410350206e-06, + "loss": 0.5884830355644226, + "step": 1292 + }, + { + "epoch": 0.2981323495503804, + "grad_norm": 1.313520165154308, + "learning_rate": 1.9469709726515114e-06, + "loss": 0.5723487138748169, + "step": 1293 + }, + { + "epoch": 0.2983629236799631, + "grad_norm": 1.047985941483805, + "learning_rate": 1.946848397300989e-06, + "loss": 0.5298895239830017, + "step": 1294 + }, + { + "epoch": 0.29859349780954575, + "grad_norm": 1.009793366380185, + "learning_rate": 1.9467256843164557e-06, + "loss": 0.6118877530097961, + "step": 1295 + }, + { + "epoch": 0.2988240719391284, + "grad_norm": 1.2369344702112195, + "learning_rate": 1.9466028337157498e-06, + "loss": 0.6014599800109863, + "step": 1296 + }, + { + "epoch": 0.29905464606871107, + "grad_norm": 0.9889478752374168, + "learning_rate": 1.9464798455167278e-06, + "loss": 0.5861071944236755, + "step": 1297 + }, + { + "epoch": 0.29928522019829373, + "grad_norm": 1.238998066636259, + "learning_rate": 1.9463567197372684e-06, + "loss": 0.5863409042358398, + "step": 1298 + }, + { + "epoch": 0.2995157943278764, + "grad_norm": 1.217300214744882, + "learning_rate": 1.9462334563952687e-06, + "loss": 0.6576352119445801, + "step": 1299 + }, + { + "epoch": 0.29974636845745906, + "grad_norm": 1.074029788035818, + "learning_rate": 1.9461100555086463e-06, + "loss": 0.5458395481109619, + "step": 1300 + }, + { + "epoch": 0.2999769425870417, + "grad_norm": 1.2759220903954522, + "learning_rate": 1.945986517095339e-06, + "loss": 0.48430997133255005, + "step": 1301 + }, + { + "epoch": 0.3002075167166244, + "grad_norm": 1.2436119574902915, + "learning_rate": 1.945862841173304e-06, + "loss": 0.4212522506713867, + "step": 1302 + }, + { + "epoch": 0.30043809084620704, + "grad_norm": 1.1823128908009017, + "learning_rate": 1.9457390277605188e-06, + "loss": 0.5671685934066772, + "step": 1303 + }, + { + "epoch": 0.3006686649757897, + "grad_norm": 1.0831721181422946, + "learning_rate": 1.945615076874981e-06, + "loss": 0.5350982546806335, + "step": 1304 + }, + { + "epoch": 0.30089923910537236, + "grad_norm": 0.9247033101108441, + "learning_rate": 1.9454909885347088e-06, + "loss": 0.45792657136917114, + "step": 1305 + }, + { + "epoch": 0.301129813234955, + "grad_norm": 1.0473073919925908, + "learning_rate": 1.9453667627577387e-06, + "loss": 0.5644106864929199, + "step": 1306 + }, + { + "epoch": 0.3013603873645377, + "grad_norm": 1.3332547603439018, + "learning_rate": 1.945242399562129e-06, + "loss": 0.554198145866394, + "step": 1307 + }, + { + "epoch": 0.30159096149412035, + "grad_norm": 0.9232575644574793, + "learning_rate": 1.9451178989659565e-06, + "loss": 0.5073474049568176, + "step": 1308 + }, + { + "epoch": 0.301821535623703, + "grad_norm": 1.0206284762622284, + "learning_rate": 1.944993260987319e-06, + "loss": 0.569359302520752, + "step": 1309 + }, + { + "epoch": 0.30205210975328567, + "grad_norm": 1.0382686851233573, + "learning_rate": 1.944868485644334e-06, + "loss": 0.5011791586875916, + "step": 1310 + }, + { + "epoch": 0.30228268388286833, + "grad_norm": 0.9869955270819804, + "learning_rate": 1.9447435729551384e-06, + "loss": 0.41121986508369446, + "step": 1311 + }, + { + "epoch": 0.302513258012451, + "grad_norm": 1.3489170954309295, + "learning_rate": 1.9446185229378896e-06, + "loss": 0.5615876913070679, + "step": 1312 + }, + { + "epoch": 0.30274383214203365, + "grad_norm": 1.2244043366760826, + "learning_rate": 1.9444933356107652e-06, + "loss": 0.5450695157051086, + "step": 1313 + }, + { + "epoch": 0.3029744062716163, + "grad_norm": 1.0371383598149113, + "learning_rate": 1.9443680109919626e-06, + "loss": 0.522222101688385, + "step": 1314 + }, + { + "epoch": 0.303204980401199, + "grad_norm": 0.9638880730108786, + "learning_rate": 1.9442425490996984e-06, + "loss": 0.5081876516342163, + "step": 1315 + }, + { + "epoch": 0.30343555453078164, + "grad_norm": 1.1506604859779093, + "learning_rate": 1.9441169499522104e-06, + "loss": 0.4955870509147644, + "step": 1316 + }, + { + "epoch": 0.3036661286603643, + "grad_norm": 1.0185303369767542, + "learning_rate": 1.9439912135677553e-06, + "loss": 0.5098991990089417, + "step": 1317 + }, + { + "epoch": 0.30389670278994696, + "grad_norm": 0.9949182918503017, + "learning_rate": 1.94386533996461e-06, + "loss": 0.5686191320419312, + "step": 1318 + }, + { + "epoch": 0.3041272769195296, + "grad_norm": 1.180090494573931, + "learning_rate": 1.943739329161072e-06, + "loss": 0.606401264667511, + "step": 1319 + }, + { + "epoch": 0.3043578510491123, + "grad_norm": 1.0411002752171188, + "learning_rate": 1.9436131811754576e-06, + "loss": 0.49249163269996643, + "step": 1320 + }, + { + "epoch": 0.30458842517869494, + "grad_norm": 1.1079741007732102, + "learning_rate": 1.9434868960261047e-06, + "loss": 0.5373499989509583, + "step": 1321 + }, + { + "epoch": 0.3048189993082776, + "grad_norm": 1.4236897413447511, + "learning_rate": 1.943360473731369e-06, + "loss": 0.4568977355957031, + "step": 1322 + }, + { + "epoch": 0.30504957343786027, + "grad_norm": 1.034905077800575, + "learning_rate": 1.943233914309628e-06, + "loss": 0.562126636505127, + "step": 1323 + }, + { + "epoch": 0.3052801475674429, + "grad_norm": 1.343019932527111, + "learning_rate": 1.943107217779278e-06, + "loss": 0.5795382261276245, + "step": 1324 + }, + { + "epoch": 0.3055107216970256, + "grad_norm": 0.9852538064889438, + "learning_rate": 1.942980384158736e-06, + "loss": 0.5671530365943909, + "step": 1325 + }, + { + "epoch": 0.30574129582660825, + "grad_norm": 0.8981413519731547, + "learning_rate": 1.942853413466438e-06, + "loss": 0.5511401891708374, + "step": 1326 + }, + { + "epoch": 0.3059718699561909, + "grad_norm": 1.1491379693233763, + "learning_rate": 1.942726305720841e-06, + "loss": 0.5712149739265442, + "step": 1327 + }, + { + "epoch": 0.3062024440857736, + "grad_norm": 1.171535283311252, + "learning_rate": 1.9425990609404215e-06, + "loss": 0.5181496739387512, + "step": 1328 + }, + { + "epoch": 0.30643301821535623, + "grad_norm": 1.1968505005842098, + "learning_rate": 1.9424716791436753e-06, + "loss": 0.5758726596832275, + "step": 1329 + }, + { + "epoch": 0.3066635923449389, + "grad_norm": 0.9714627365066287, + "learning_rate": 1.942344160349119e-06, + "loss": 0.5757049322128296, + "step": 1330 + }, + { + "epoch": 0.30689416647452156, + "grad_norm": 0.9271633895158528, + "learning_rate": 1.9422165045752886e-06, + "loss": 0.47352534532546997, + "step": 1331 + }, + { + "epoch": 0.3071247406041042, + "grad_norm": 1.1418817146577889, + "learning_rate": 1.94208871184074e-06, + "loss": 0.5940845012664795, + "step": 1332 + }, + { + "epoch": 0.3073553147336869, + "grad_norm": 1.0590875448509756, + "learning_rate": 1.9419607821640496e-06, + "loss": 0.5225652456283569, + "step": 1333 + }, + { + "epoch": 0.30758588886326954, + "grad_norm": 1.0803440664833228, + "learning_rate": 1.9418327155638126e-06, + "loss": 0.5253404378890991, + "step": 1334 + }, + { + "epoch": 0.3078164629928522, + "grad_norm": 0.9995333811538123, + "learning_rate": 1.941704512058646e-06, + "loss": 0.5637744665145874, + "step": 1335 + }, + { + "epoch": 0.30804703712243486, + "grad_norm": 0.9947267518967771, + "learning_rate": 1.941576171667184e-06, + "loss": 0.48273587226867676, + "step": 1336 + }, + { + "epoch": 0.3082776112520175, + "grad_norm": 0.9569882979404835, + "learning_rate": 1.9414476944080833e-06, + "loss": 0.5989019870758057, + "step": 1337 + }, + { + "epoch": 0.3085081853816002, + "grad_norm": 1.1125936950721667, + "learning_rate": 1.9413190803000183e-06, + "loss": 0.5231547951698303, + "step": 1338 + }, + { + "epoch": 0.30873875951118285, + "grad_norm": 1.0300527191348772, + "learning_rate": 1.9411903293616853e-06, + "loss": 0.5125160217285156, + "step": 1339 + }, + { + "epoch": 0.3089693336407655, + "grad_norm": 1.251133475270548, + "learning_rate": 1.9410614416117993e-06, + "loss": 0.50664883852005, + "step": 1340 + }, + { + "epoch": 0.30919990777034817, + "grad_norm": 1.063411016331963, + "learning_rate": 1.9409324170690955e-06, + "loss": 0.5555824637413025, + "step": 1341 + }, + { + "epoch": 0.30943048189993083, + "grad_norm": 0.9621002533491156, + "learning_rate": 1.940803255752329e-06, + "loss": 0.5182096362113953, + "step": 1342 + }, + { + "epoch": 0.3096610560295135, + "grad_norm": 1.0359415249922332, + "learning_rate": 1.940673957680274e-06, + "loss": 0.5202751159667969, + "step": 1343 + }, + { + "epoch": 0.30989163015909615, + "grad_norm": 0.9908809268815285, + "learning_rate": 1.940544522871726e-06, + "loss": 0.49791598320007324, + "step": 1344 + }, + { + "epoch": 0.3101222042886788, + "grad_norm": 0.990495096784543, + "learning_rate": 1.9404149513454995e-06, + "loss": 0.48691657185554504, + "step": 1345 + }, + { + "epoch": 0.3103527784182615, + "grad_norm": 1.0649987362093034, + "learning_rate": 1.9402852431204293e-06, + "loss": 0.5726481676101685, + "step": 1346 + }, + { + "epoch": 0.31058335254784414, + "grad_norm": 0.9750258824279312, + "learning_rate": 1.940155398215369e-06, + "loss": 0.5443148016929626, + "step": 1347 + }, + { + "epoch": 0.3108139266774268, + "grad_norm": 1.1005441671416878, + "learning_rate": 1.9400254166491935e-06, + "loss": 0.5767767429351807, + "step": 1348 + }, + { + "epoch": 0.31104450080700946, + "grad_norm": 1.059167179602632, + "learning_rate": 1.9398952984407967e-06, + "loss": 0.5208882689476013, + "step": 1349 + }, + { + "epoch": 0.3112750749365921, + "grad_norm": 0.8304820941291429, + "learning_rate": 1.939765043609093e-06, + "loss": 0.5152548551559448, + "step": 1350 + }, + { + "epoch": 0.3115056490661748, + "grad_norm": 1.1875548530259965, + "learning_rate": 1.939634652173016e-06, + "loss": 0.42542198300361633, + "step": 1351 + }, + { + "epoch": 0.31173622319575744, + "grad_norm": 1.1424220130032787, + "learning_rate": 1.9395041241515197e-06, + "loss": 0.6471734046936035, + "step": 1352 + }, + { + "epoch": 0.3119667973253401, + "grad_norm": 1.1191897598164906, + "learning_rate": 1.9393734595635767e-06, + "loss": 0.6257486343383789, + "step": 1353 + }, + { + "epoch": 0.31219737145492277, + "grad_norm": 1.1348942815080005, + "learning_rate": 1.9392426584281815e-06, + "loss": 0.562118649482727, + "step": 1354 + }, + { + "epoch": 0.31242794558450543, + "grad_norm": 1.223083488663697, + "learning_rate": 1.939111720764347e-06, + "loss": 0.5602811574935913, + "step": 1355 + }, + { + "epoch": 0.3126585197140881, + "grad_norm": 1.041642546930775, + "learning_rate": 1.9389806465911056e-06, + "loss": 0.54469895362854, + "step": 1356 + }, + { + "epoch": 0.31288909384367075, + "grad_norm": 1.159034123821878, + "learning_rate": 1.9388494359275115e-06, + "loss": 0.5262914896011353, + "step": 1357 + }, + { + "epoch": 0.3131196679732534, + "grad_norm": 1.184281074720895, + "learning_rate": 1.938718088792637e-06, + "loss": 0.6137207746505737, + "step": 1358 + }, + { + "epoch": 0.3133502421028361, + "grad_norm": 1.0740150522099046, + "learning_rate": 1.9385866052055744e-06, + "loss": 0.5792986750602722, + "step": 1359 + }, + { + "epoch": 0.31358081623241874, + "grad_norm": 0.9946259290534466, + "learning_rate": 1.938454985185437e-06, + "loss": 0.4953799843788147, + "step": 1360 + }, + { + "epoch": 0.3138113903620014, + "grad_norm": 1.2906978669163651, + "learning_rate": 1.938323228751356e-06, + "loss": 0.5722379684448242, + "step": 1361 + }, + { + "epoch": 0.31404196449158406, + "grad_norm": 0.9996513214249106, + "learning_rate": 1.938191335922484e-06, + "loss": 0.513651967048645, + "step": 1362 + }, + { + "epoch": 0.3142725386211667, + "grad_norm": 1.0509635344773647, + "learning_rate": 1.9380593067179935e-06, + "loss": 0.4911235272884369, + "step": 1363 + }, + { + "epoch": 0.3145031127507494, + "grad_norm": 1.0029036193486218, + "learning_rate": 1.9379271411570753e-06, + "loss": 0.5478678941726685, + "step": 1364 + }, + { + "epoch": 0.31473368688033204, + "grad_norm": 0.8901015021428158, + "learning_rate": 1.9377948392589417e-06, + "loss": 0.46698129177093506, + "step": 1365 + }, + { + "epoch": 0.3149642610099147, + "grad_norm": 1.3327357773387452, + "learning_rate": 1.9376624010428243e-06, + "loss": 0.5081343650817871, + "step": 1366 + }, + { + "epoch": 0.31519483513949736, + "grad_norm": 1.1172038301784757, + "learning_rate": 1.9375298265279735e-06, + "loss": 0.583903431892395, + "step": 1367 + }, + { + "epoch": 0.31542540926908, + "grad_norm": 1.0403870552320973, + "learning_rate": 1.937397115733661e-06, + "loss": 0.5249435901641846, + "step": 1368 + }, + { + "epoch": 0.3156559833986627, + "grad_norm": 1.184866053048378, + "learning_rate": 1.9372642686791777e-06, + "loss": 0.5463817119598389, + "step": 1369 + }, + { + "epoch": 0.31588655752824535, + "grad_norm": 1.2179956171685966, + "learning_rate": 1.9371312853838338e-06, + "loss": 0.4634520709514618, + "step": 1370 + }, + { + "epoch": 0.316117131657828, + "grad_norm": 1.2606144259751904, + "learning_rate": 1.93699816586696e-06, + "loss": 0.6018840074539185, + "step": 1371 + }, + { + "epoch": 0.31634770578741067, + "grad_norm": 1.1911067691024062, + "learning_rate": 1.9368649101479072e-06, + "loss": 0.5507885813713074, + "step": 1372 + }, + { + "epoch": 0.31657827991699333, + "grad_norm": 0.9991148637431415, + "learning_rate": 1.9367315182460442e-06, + "loss": 0.5520491600036621, + "step": 1373 + }, + { + "epoch": 0.316808854046576, + "grad_norm": 1.2455223208218802, + "learning_rate": 1.936597990180762e-06, + "loss": 0.5410347580909729, + "step": 1374 + }, + { + "epoch": 0.31703942817615866, + "grad_norm": 1.6049117927004484, + "learning_rate": 1.9364643259714694e-06, + "loss": 0.5771749019622803, + "step": 1375 + }, + { + "epoch": 0.3172700023057413, + "grad_norm": 1.123905862633382, + "learning_rate": 1.9363305256375965e-06, + "loss": 0.5071828365325928, + "step": 1376 + }, + { + "epoch": 0.317500576435324, + "grad_norm": 1.1240180544134455, + "learning_rate": 1.936196589198592e-06, + "loss": 0.558908224105835, + "step": 1377 + }, + { + "epoch": 0.31773115056490664, + "grad_norm": 1.1984781772064843, + "learning_rate": 1.9360625166739256e-06, + "loss": 0.5509803295135498, + "step": 1378 + }, + { + "epoch": 0.3179617246944893, + "grad_norm": 1.1703050385431384, + "learning_rate": 1.935928308083085e-06, + "loss": 0.5333945155143738, + "step": 1379 + }, + { + "epoch": 0.31819229882407196, + "grad_norm": 1.2141630137674275, + "learning_rate": 1.93579396344558e-06, + "loss": 0.5337819457054138, + "step": 1380 + }, + { + "epoch": 0.3184228729536546, + "grad_norm": 1.161230429960398, + "learning_rate": 1.9356594827809387e-06, + "loss": 0.5286899209022522, + "step": 1381 + }, + { + "epoch": 0.3186534470832373, + "grad_norm": 1.3042082103630104, + "learning_rate": 1.9355248661087083e-06, + "loss": 0.5915369987487793, + "step": 1382 + }, + { + "epoch": 0.31888402121281995, + "grad_norm": 1.2725859277548193, + "learning_rate": 1.9353901134484575e-06, + "loss": 0.5843492746353149, + "step": 1383 + }, + { + "epoch": 0.3191145953424026, + "grad_norm": 1.0723106790063142, + "learning_rate": 1.935255224819774e-06, + "loss": 0.5015528202056885, + "step": 1384 + }, + { + "epoch": 0.31934516947198527, + "grad_norm": 1.2053658641154292, + "learning_rate": 1.935120200242265e-06, + "loss": 0.5650957822799683, + "step": 1385 + }, + { + "epoch": 0.31957574360156793, + "grad_norm": 0.9993056241167617, + "learning_rate": 1.9349850397355576e-06, + "loss": 0.5452740788459778, + "step": 1386 + }, + { + "epoch": 0.3198063177311506, + "grad_norm": 1.138341645042275, + "learning_rate": 1.934849743319299e-06, + "loss": 0.5069071054458618, + "step": 1387 + }, + { + "epoch": 0.32003689186073325, + "grad_norm": 1.3097523217194937, + "learning_rate": 1.934714311013156e-06, + "loss": 0.5350260734558105, + "step": 1388 + }, + { + "epoch": 0.3202674659903159, + "grad_norm": 1.065882395696928, + "learning_rate": 1.9345787428368146e-06, + "loss": 0.6002014875411987, + "step": 1389 + }, + { + "epoch": 0.3204980401198986, + "grad_norm": 1.0951548438177328, + "learning_rate": 1.9344430388099813e-06, + "loss": 0.5111383199691772, + "step": 1390 + }, + { + "epoch": 0.3207286142494812, + "grad_norm": 1.3896947100609738, + "learning_rate": 1.934307198952382e-06, + "loss": 0.6029741168022156, + "step": 1391 + }, + { + "epoch": 0.32095918837906384, + "grad_norm": 1.0076386708324083, + "learning_rate": 1.9341712232837628e-06, + "loss": 0.48339328169822693, + "step": 1392 + }, + { + "epoch": 0.3211897625086465, + "grad_norm": 1.5017597017671664, + "learning_rate": 1.9340351118238882e-06, + "loss": 0.6080894470214844, + "step": 1393 + }, + { + "epoch": 0.32142033663822916, + "grad_norm": 1.1935202429445742, + "learning_rate": 1.9338988645925444e-06, + "loss": 0.46375036239624023, + "step": 1394 + }, + { + "epoch": 0.3216509107678118, + "grad_norm": 1.2397479694281224, + "learning_rate": 1.9337624816095357e-06, + "loss": 0.5974088907241821, + "step": 1395 + }, + { + "epoch": 0.3218814848973945, + "grad_norm": 1.4525926184759388, + "learning_rate": 1.9336259628946865e-06, + "loss": 0.5759298801422119, + "step": 1396 + }, + { + "epoch": 0.32211205902697715, + "grad_norm": 1.0361695525185906, + "learning_rate": 1.9334893084678417e-06, + "loss": 0.6050859689712524, + "step": 1397 + }, + { + "epoch": 0.3223426331565598, + "grad_norm": 1.1306650773102374, + "learning_rate": 1.9333525183488657e-06, + "loss": 0.5879993438720703, + "step": 1398 + }, + { + "epoch": 0.32257320728614247, + "grad_norm": 1.055350398289763, + "learning_rate": 1.933215592557642e-06, + "loss": 0.5496323108673096, + "step": 1399 + }, + { + "epoch": 0.32280378141572513, + "grad_norm": 1.2847712135798797, + "learning_rate": 1.9330785311140732e-06, + "loss": 0.48447534441947937, + "step": 1400 + }, + { + "epoch": 0.3230343555453078, + "grad_norm": 1.2583031445613762, + "learning_rate": 1.932941334038084e-06, + "loss": 0.5687322020530701, + "step": 1401 + }, + { + "epoch": 0.32326492967489046, + "grad_norm": 1.1545356458260727, + "learning_rate": 1.9328040013496166e-06, + "loss": 0.4070928990840912, + "step": 1402 + }, + { + "epoch": 0.3234955038044731, + "grad_norm": 0.9643847324304846, + "learning_rate": 1.9326665330686344e-06, + "loss": 0.5131539106369019, + "step": 1403 + }, + { + "epoch": 0.3237260779340558, + "grad_norm": 1.0846567553359194, + "learning_rate": 1.932528929215119e-06, + "loss": 0.47571802139282227, + "step": 1404 + }, + { + "epoch": 0.32395665206363844, + "grad_norm": 1.095169764239565, + "learning_rate": 1.9323911898090728e-06, + "loss": 0.5676391124725342, + "step": 1405 + }, + { + "epoch": 0.3241872261932211, + "grad_norm": 1.0653010445083047, + "learning_rate": 1.9322533148705177e-06, + "loss": 0.5464721322059631, + "step": 1406 + }, + { + "epoch": 0.32441780032280376, + "grad_norm": 1.044728614529827, + "learning_rate": 1.9321153044194953e-06, + "loss": 0.6130954027175903, + "step": 1407 + }, + { + "epoch": 0.3246483744523864, + "grad_norm": 1.6513732337511444, + "learning_rate": 1.9319771584760666e-06, + "loss": 0.6058028936386108, + "step": 1408 + }, + { + "epoch": 0.3248789485819691, + "grad_norm": 1.1251884535657009, + "learning_rate": 1.9318388770603123e-06, + "loss": 0.5326286554336548, + "step": 1409 + }, + { + "epoch": 0.32510952271155175, + "grad_norm": 1.2184625691329178, + "learning_rate": 1.9317004601923337e-06, + "loss": 0.6046053767204285, + "step": 1410 + }, + { + "epoch": 0.3253400968411344, + "grad_norm": 1.058617017669887, + "learning_rate": 1.931561907892251e-06, + "loss": 0.4597975015640259, + "step": 1411 + }, + { + "epoch": 0.32557067097071707, + "grad_norm": 1.1843983331118075, + "learning_rate": 1.9314232201802035e-06, + "loss": 0.6024897694587708, + "step": 1412 + }, + { + "epoch": 0.32580124510029973, + "grad_norm": 1.037552834044261, + "learning_rate": 1.9312843970763512e-06, + "loss": 0.45463523268699646, + "step": 1413 + }, + { + "epoch": 0.3260318192298824, + "grad_norm": 0.9412245310618959, + "learning_rate": 1.9311454386008736e-06, + "loss": 0.512498140335083, + "step": 1414 + }, + { + "epoch": 0.32626239335946505, + "grad_norm": 0.8929271577435476, + "learning_rate": 1.9310063447739695e-06, + "loss": 0.4851795434951782, + "step": 1415 + }, + { + "epoch": 0.3264929674890477, + "grad_norm": 1.1131717345806365, + "learning_rate": 1.930867115615858e-06, + "loss": 0.5464169979095459, + "step": 1416 + }, + { + "epoch": 0.3267235416186304, + "grad_norm": 0.9649299588738096, + "learning_rate": 1.930727751146777e-06, + "loss": 0.5614463090896606, + "step": 1417 + }, + { + "epoch": 0.32695411574821304, + "grad_norm": 1.1279163828506724, + "learning_rate": 1.930588251386985e-06, + "loss": 0.635399341583252, + "step": 1418 + }, + { + "epoch": 0.3271846898777957, + "grad_norm": 1.0116750083389472, + "learning_rate": 1.9304486163567588e-06, + "loss": 0.4862840175628662, + "step": 1419 + }, + { + "epoch": 0.32741526400737836, + "grad_norm": 1.3810849020281415, + "learning_rate": 1.930308846076397e-06, + "loss": 0.6548877954483032, + "step": 1420 + }, + { + "epoch": 0.327645838136961, + "grad_norm": 0.9726550652757486, + "learning_rate": 1.9301689405662154e-06, + "loss": 0.5781031250953674, + "step": 1421 + }, + { + "epoch": 0.3278764122665437, + "grad_norm": 1.0075078554250574, + "learning_rate": 1.930028899846552e-06, + "loss": 0.4945180118083954, + "step": 1422 + }, + { + "epoch": 0.32810698639612634, + "grad_norm": 1.1661473529435082, + "learning_rate": 1.9298887239377623e-06, + "loss": 0.548690915107727, + "step": 1423 + }, + { + "epoch": 0.328337560525709, + "grad_norm": 1.0120278252177992, + "learning_rate": 1.929748412860222e-06, + "loss": 0.44515126943588257, + "step": 1424 + }, + { + "epoch": 0.32856813465529167, + "grad_norm": 0.8968526552864172, + "learning_rate": 1.9296079666343273e-06, + "loss": 0.433849573135376, + "step": 1425 + }, + { + "epoch": 0.3287987087848743, + "grad_norm": 1.185097032812299, + "learning_rate": 1.9294673852804938e-06, + "loss": 0.5600666403770447, + "step": 1426 + }, + { + "epoch": 0.329029282914457, + "grad_norm": 1.1490365285996864, + "learning_rate": 1.9293266688191555e-06, + "loss": 0.5302737355232239, + "step": 1427 + }, + { + "epoch": 0.32925985704403965, + "grad_norm": 1.1854633228597617, + "learning_rate": 1.929185817270768e-06, + "loss": 0.5590239763259888, + "step": 1428 + }, + { + "epoch": 0.3294904311736223, + "grad_norm": 0.9322915581005059, + "learning_rate": 1.929044830655804e-06, + "loss": 0.43225252628326416, + "step": 1429 + }, + { + "epoch": 0.329721005303205, + "grad_norm": 1.0987581728513967, + "learning_rate": 1.9289037089947595e-06, + "loss": 0.4932950735092163, + "step": 1430 + }, + { + "epoch": 0.32995157943278763, + "grad_norm": 1.1539316791656467, + "learning_rate": 1.9287624523081457e-06, + "loss": 0.48358941078186035, + "step": 1431 + }, + { + "epoch": 0.3301821535623703, + "grad_norm": 1.1348341469716536, + "learning_rate": 1.928621060616497e-06, + "loss": 0.48359012603759766, + "step": 1432 + }, + { + "epoch": 0.33041272769195296, + "grad_norm": 0.9278501695529541, + "learning_rate": 1.9284795339403663e-06, + "loss": 0.48462390899658203, + "step": 1433 + }, + { + "epoch": 0.3306433018215356, + "grad_norm": 1.439376655816269, + "learning_rate": 1.9283378723003253e-06, + "loss": 0.5167088508605957, + "step": 1434 + }, + { + "epoch": 0.3308738759511183, + "grad_norm": 1.0184323306356053, + "learning_rate": 1.928196075716966e-06, + "loss": 0.47352856397628784, + "step": 1435 + }, + { + "epoch": 0.33110445008070094, + "grad_norm": 0.9676467825700396, + "learning_rate": 1.9280541442109e-06, + "loss": 0.5013144016265869, + "step": 1436 + }, + { + "epoch": 0.3313350242102836, + "grad_norm": 1.1746874818237374, + "learning_rate": 1.927912077802759e-06, + "loss": 0.5061586499214172, + "step": 1437 + }, + { + "epoch": 0.33156559833986626, + "grad_norm": 1.3055289684633111, + "learning_rate": 1.9277698765131927e-06, + "loss": 0.5718814134597778, + "step": 1438 + }, + { + "epoch": 0.3317961724694489, + "grad_norm": 1.147604660511156, + "learning_rate": 1.9276275403628727e-06, + "loss": 0.47547006607055664, + "step": 1439 + }, + { + "epoch": 0.3320267465990316, + "grad_norm": 1.1585259805283974, + "learning_rate": 1.9274850693724884e-06, + "loss": 0.5387942790985107, + "step": 1440 + }, + { + "epoch": 0.33225732072861425, + "grad_norm": 1.013907046172662, + "learning_rate": 1.9273424635627494e-06, + "loss": 0.524285078048706, + "step": 1441 + }, + { + "epoch": 0.3324878948581969, + "grad_norm": 1.1737357855070976, + "learning_rate": 1.927199722954385e-06, + "loss": 0.5073943138122559, + "step": 1442 + }, + { + "epoch": 0.33271846898777957, + "grad_norm": 1.2047946851654725, + "learning_rate": 1.927056847568144e-06, + "loss": 0.4609600007534027, + "step": 1443 + }, + { + "epoch": 0.33294904311736223, + "grad_norm": 1.0416538135601094, + "learning_rate": 1.926913837424795e-06, + "loss": 0.4861013889312744, + "step": 1444 + }, + { + "epoch": 0.3331796172469449, + "grad_norm": 1.0835107342484427, + "learning_rate": 1.9267706925451253e-06, + "loss": 0.5255436897277832, + "step": 1445 + }, + { + "epoch": 0.33341019137652755, + "grad_norm": 1.4634923921780199, + "learning_rate": 1.9266274129499434e-06, + "loss": 0.6673840880393982, + "step": 1446 + }, + { + "epoch": 0.3336407655061102, + "grad_norm": 0.9656915858584796, + "learning_rate": 1.9264839986600757e-06, + "loss": 0.38582634925842285, + "step": 1447 + }, + { + "epoch": 0.3338713396356929, + "grad_norm": 0.9567963925410773, + "learning_rate": 1.926340449696369e-06, + "loss": 0.4597562253475189, + "step": 1448 + }, + { + "epoch": 0.33410191376527554, + "grad_norm": 1.130778436617546, + "learning_rate": 1.92619676607969e-06, + "loss": 0.5901148319244385, + "step": 1449 + }, + { + "epoch": 0.3343324878948582, + "grad_norm": 1.2252206522255358, + "learning_rate": 1.9260529478309242e-06, + "loss": 0.49872028827667236, + "step": 1450 + }, + { + "epoch": 0.33456306202444086, + "grad_norm": 0.9242619738807548, + "learning_rate": 1.925908994970977e-06, + "loss": 0.4611232578754425, + "step": 1451 + }, + { + "epoch": 0.3347936361540235, + "grad_norm": 1.1122995891321772, + "learning_rate": 1.9257649075207738e-06, + "loss": 0.5671408176422119, + "step": 1452 + }, + { + "epoch": 0.3350242102836062, + "grad_norm": 1.2073453603933548, + "learning_rate": 1.925620685501259e-06, + "loss": 0.4892054498195648, + "step": 1453 + }, + { + "epoch": 0.33525478441318884, + "grad_norm": 1.1748595063207394, + "learning_rate": 1.9254763289333966e-06, + "loss": 0.5506503582000732, + "step": 1454 + }, + { + "epoch": 0.3354853585427715, + "grad_norm": 1.4352362120603241, + "learning_rate": 1.9253318378381702e-06, + "loss": 0.6233078241348267, + "step": 1455 + }, + { + "epoch": 0.33571593267235417, + "grad_norm": 1.2159230168553836, + "learning_rate": 1.9251872122365835e-06, + "loss": 0.5551373958587646, + "step": 1456 + }, + { + "epoch": 0.33594650680193683, + "grad_norm": 1.0308435059717576, + "learning_rate": 1.925042452149659e-06, + "loss": 0.5561612844467163, + "step": 1457 + }, + { + "epoch": 0.3361770809315195, + "grad_norm": 1.0286600789295617, + "learning_rate": 1.924897557598439e-06, + "loss": 0.613766074180603, + "step": 1458 + }, + { + "epoch": 0.33640765506110215, + "grad_norm": 1.092154153863493, + "learning_rate": 1.9247525286039852e-06, + "loss": 0.5767652988433838, + "step": 1459 + }, + { + "epoch": 0.3366382291906848, + "grad_norm": 1.1221153049255785, + "learning_rate": 1.9246073651873795e-06, + "loss": 0.49292564392089844, + "step": 1460 + }, + { + "epoch": 0.3368688033202675, + "grad_norm": 1.2909262812986786, + "learning_rate": 1.9244620673697224e-06, + "loss": 0.5901867151260376, + "step": 1461 + }, + { + "epoch": 0.33709937744985013, + "grad_norm": 1.1013040204716718, + "learning_rate": 1.924316635172135e-06, + "loss": 0.5543808937072754, + "step": 1462 + }, + { + "epoch": 0.3373299515794328, + "grad_norm": 1.3433064818976315, + "learning_rate": 1.9241710686157568e-06, + "loss": 0.528805136680603, + "step": 1463 + }, + { + "epoch": 0.33756052570901546, + "grad_norm": 1.2569454583762516, + "learning_rate": 1.924025367721748e-06, + "loss": 0.6396733522415161, + "step": 1464 + }, + { + "epoch": 0.3377910998385981, + "grad_norm": 0.9764691877916688, + "learning_rate": 1.9238795325112867e-06, + "loss": 0.5558862686157227, + "step": 1465 + }, + { + "epoch": 0.3380216739681808, + "grad_norm": 1.2329860923893396, + "learning_rate": 1.9237335630055724e-06, + "loss": 0.5863986015319824, + "step": 1466 + }, + { + "epoch": 0.33825224809776344, + "grad_norm": 1.0929132974739206, + "learning_rate": 1.923587459225823e-06, + "loss": 0.5636321306228638, + "step": 1467 + }, + { + "epoch": 0.3384828222273461, + "grad_norm": 1.1286586205882263, + "learning_rate": 1.923441221193276e-06, + "loss": 0.6065811514854431, + "step": 1468 + }, + { + "epoch": 0.33871339635692876, + "grad_norm": 1.4147716425908794, + "learning_rate": 1.9232948489291886e-06, + "loss": 0.580939769744873, + "step": 1469 + }, + { + "epoch": 0.3389439704865114, + "grad_norm": 1.1018333541876169, + "learning_rate": 1.9231483424548377e-06, + "loss": 0.5429994463920593, + "step": 1470 + }, + { + "epoch": 0.3391745446160941, + "grad_norm": 1.1834314239894592, + "learning_rate": 1.92300170179152e-06, + "loss": 0.5090892910957336, + "step": 1471 + }, + { + "epoch": 0.33940511874567675, + "grad_norm": 1.053685812356228, + "learning_rate": 1.9228549269605498e-06, + "loss": 0.5280312299728394, + "step": 1472 + }, + { + "epoch": 0.3396356928752594, + "grad_norm": 0.992641626439364, + "learning_rate": 1.9227080179832634e-06, + "loss": 0.5098810195922852, + "step": 1473 + }, + { + "epoch": 0.33986626700484207, + "grad_norm": 1.110706876976592, + "learning_rate": 1.922560974881015e-06, + "loss": 0.4554474353790283, + "step": 1474 + }, + { + "epoch": 0.34009684113442473, + "grad_norm": 1.042826154870894, + "learning_rate": 1.9224137976751793e-06, + "loss": 0.4492517113685608, + "step": 1475 + }, + { + "epoch": 0.3403274152640074, + "grad_norm": 1.3050966518961793, + "learning_rate": 1.9222664863871495e-06, + "loss": 0.47606343030929565, + "step": 1476 + }, + { + "epoch": 0.34055798939359005, + "grad_norm": 1.331553847580159, + "learning_rate": 1.9221190410383394e-06, + "loss": 0.5939435362815857, + "step": 1477 + }, + { + "epoch": 0.3407885635231727, + "grad_norm": 1.0156905582890146, + "learning_rate": 1.921971461650181e-06, + "loss": 0.5418350696563721, + "step": 1478 + }, + { + "epoch": 0.3410191376527554, + "grad_norm": 1.258400628812999, + "learning_rate": 1.9218237482441265e-06, + "loss": 0.5307733416557312, + "step": 1479 + }, + { + "epoch": 0.34124971178233804, + "grad_norm": 1.097634429758053, + "learning_rate": 1.9216759008416483e-06, + "loss": 0.5102016925811768, + "step": 1480 + }, + { + "epoch": 0.3414802859119207, + "grad_norm": 1.6070497683125828, + "learning_rate": 1.9215279194642366e-06, + "loss": 0.5043876767158508, + "step": 1481 + }, + { + "epoch": 0.34171086004150336, + "grad_norm": 1.0925329335071103, + "learning_rate": 1.9213798041334025e-06, + "loss": 0.5365253686904907, + "step": 1482 + }, + { + "epoch": 0.341941434171086, + "grad_norm": 1.1923005853358424, + "learning_rate": 1.921231554870676e-06, + "loss": 0.4938368797302246, + "step": 1483 + }, + { + "epoch": 0.3421720083006687, + "grad_norm": 1.0865439416616147, + "learning_rate": 1.921083171697607e-06, + "loss": 0.5274159908294678, + "step": 1484 + }, + { + "epoch": 0.34240258243025135, + "grad_norm": 1.1913792015364102, + "learning_rate": 1.9209346546357637e-06, + "loss": 0.4720276892185211, + "step": 1485 + }, + { + "epoch": 0.342633156559834, + "grad_norm": 0.9383641214181552, + "learning_rate": 1.920786003706735e-06, + "loss": 0.42276352643966675, + "step": 1486 + }, + { + "epoch": 0.34286373068941667, + "grad_norm": 1.0581324959121157, + "learning_rate": 1.920637218932129e-06, + "loss": 0.5319294333457947, + "step": 1487 + }, + { + "epoch": 0.34309430481899933, + "grad_norm": 1.1819330354237378, + "learning_rate": 1.920488300333572e-06, + "loss": 0.5197560787200928, + "step": 1488 + }, + { + "epoch": 0.343324878948582, + "grad_norm": 1.5013538667422215, + "learning_rate": 1.9203392479327127e-06, + "loss": 0.550025463104248, + "step": 1489 + }, + { + "epoch": 0.34355545307816465, + "grad_norm": 1.0981284345294107, + "learning_rate": 1.920190061751216e-06, + "loss": 0.50255286693573, + "step": 1490 + }, + { + "epoch": 0.3437860272077473, + "grad_norm": 1.1895622589876538, + "learning_rate": 1.9200407418107678e-06, + "loss": 0.5952906608581543, + "step": 1491 + }, + { + "epoch": 0.34401660133733, + "grad_norm": 0.9421522918126589, + "learning_rate": 1.9198912881330737e-06, + "loss": 0.48161056637763977, + "step": 1492 + }, + { + "epoch": 0.34424717546691264, + "grad_norm": 1.177243819966174, + "learning_rate": 1.919741700739858e-06, + "loss": 0.5490972995758057, + "step": 1493 + }, + { + "epoch": 0.3444777495964953, + "grad_norm": 1.4788962836499655, + "learning_rate": 1.9195919796528647e-06, + "loss": 0.45651519298553467, + "step": 1494 + }, + { + "epoch": 0.34470832372607796, + "grad_norm": 1.2203060266370191, + "learning_rate": 1.919442124893857e-06, + "loss": 0.5318460464477539, + "step": 1495 + }, + { + "epoch": 0.3449388978556606, + "grad_norm": 1.0748079339537138, + "learning_rate": 1.9192921364846187e-06, + "loss": 0.5052516460418701, + "step": 1496 + }, + { + "epoch": 0.3451694719852433, + "grad_norm": 1.3171544150804408, + "learning_rate": 1.9191420144469515e-06, + "loss": 0.6653434038162231, + "step": 1497 + }, + { + "epoch": 0.34540004611482594, + "grad_norm": 0.962422061512943, + "learning_rate": 1.9189917588026774e-06, + "loss": 0.47182875871658325, + "step": 1498 + }, + { + "epoch": 0.3456306202444086, + "grad_norm": 1.0305251609345925, + "learning_rate": 1.9188413695736376e-06, + "loss": 0.5257801413536072, + "step": 1499 + }, + { + "epoch": 0.34586119437399127, + "grad_norm": 1.1090254531285808, + "learning_rate": 1.918690846781692e-06, + "loss": 0.565075695514679, + "step": 1500 + }, + { + "epoch": 0.3460917685035739, + "grad_norm": 1.1909717210416553, + "learning_rate": 1.9185401904487214e-06, + "loss": 0.49737876653671265, + "step": 1501 + }, + { + "epoch": 0.34632234263315653, + "grad_norm": 1.021716441788736, + "learning_rate": 1.918389400596625e-06, + "loss": 0.5136237144470215, + "step": 1502 + }, + { + "epoch": 0.3465529167627392, + "grad_norm": 1.011829912931323, + "learning_rate": 1.9182384772473216e-06, + "loss": 0.5122819542884827, + "step": 1503 + }, + { + "epoch": 0.34678349089232186, + "grad_norm": 1.1232586653417744, + "learning_rate": 1.91808742042275e-06, + "loss": 0.4586041271686554, + "step": 1504 + }, + { + "epoch": 0.3470140650219045, + "grad_norm": 1.0599756649712084, + "learning_rate": 1.9179362301448666e-06, + "loss": 0.49752146005630493, + "step": 1505 + }, + { + "epoch": 0.3472446391514872, + "grad_norm": 1.0110535685015802, + "learning_rate": 1.917784906435649e-06, + "loss": 0.4423530101776123, + "step": 1506 + }, + { + "epoch": 0.34747521328106984, + "grad_norm": 1.2828635133632034, + "learning_rate": 1.9176334493170946e-06, + "loss": 0.4979468882083893, + "step": 1507 + }, + { + "epoch": 0.3477057874106525, + "grad_norm": 1.0086748218378025, + "learning_rate": 1.9174818588112178e-06, + "loss": 0.5229524374008179, + "step": 1508 + }, + { + "epoch": 0.34793636154023516, + "grad_norm": 1.006104946386604, + "learning_rate": 1.9173301349400546e-06, + "loss": 0.47884654998779297, + "step": 1509 + }, + { + "epoch": 0.3481669356698178, + "grad_norm": 1.161430061405767, + "learning_rate": 1.9171782777256594e-06, + "loss": 0.5204922556877136, + "step": 1510 + }, + { + "epoch": 0.3483975097994005, + "grad_norm": 1.1268415177845295, + "learning_rate": 1.917026287190106e-06, + "loss": 0.5077674984931946, + "step": 1511 + }, + { + "epoch": 0.34862808392898315, + "grad_norm": 0.9750269271228661, + "learning_rate": 1.9168741633554885e-06, + "loss": 0.4171299934387207, + "step": 1512 + }, + { + "epoch": 0.3488586580585658, + "grad_norm": 1.065613083459404, + "learning_rate": 1.9167219062439187e-06, + "loss": 0.5228694081306458, + "step": 1513 + }, + { + "epoch": 0.34908923218814847, + "grad_norm": 1.188410464922724, + "learning_rate": 1.916569515877529e-06, + "loss": 0.5496635437011719, + "step": 1514 + }, + { + "epoch": 0.34931980631773113, + "grad_norm": 0.969674279609777, + "learning_rate": 1.9164169922784716e-06, + "loss": 0.5197573900222778, + "step": 1515 + }, + { + "epoch": 0.3495503804473138, + "grad_norm": 1.3265152215611398, + "learning_rate": 1.9162643354689163e-06, + "loss": 0.5726813077926636, + "step": 1516 + }, + { + "epoch": 0.34978095457689645, + "grad_norm": 1.0368094455843846, + "learning_rate": 1.916111545471054e-06, + "loss": 0.53382408618927, + "step": 1517 + }, + { + "epoch": 0.3500115287064791, + "grad_norm": 1.0676291023728657, + "learning_rate": 1.915958622307094e-06, + "loss": 0.5535515546798706, + "step": 1518 + }, + { + "epoch": 0.3502421028360618, + "grad_norm": 1.183098293067818, + "learning_rate": 1.9158055659992648e-06, + "loss": 0.5295307040214539, + "step": 1519 + }, + { + "epoch": 0.35047267696564444, + "grad_norm": 1.3231709310936663, + "learning_rate": 1.9156523765698158e-06, + "loss": 0.5397933125495911, + "step": 1520 + }, + { + "epoch": 0.3507032510952271, + "grad_norm": 1.217082341703879, + "learning_rate": 1.915499054041014e-06, + "loss": 0.5614666938781738, + "step": 1521 + }, + { + "epoch": 0.35093382522480976, + "grad_norm": 1.155125291987811, + "learning_rate": 1.915345598435146e-06, + "loss": 0.5321720838546753, + "step": 1522 + }, + { + "epoch": 0.3511643993543924, + "grad_norm": 1.172353935810673, + "learning_rate": 1.9151920097745185e-06, + "loss": 0.51869797706604, + "step": 1523 + }, + { + "epoch": 0.3513949734839751, + "grad_norm": 1.0936179296558388, + "learning_rate": 1.9150382880814577e-06, + "loss": 0.58238685131073, + "step": 1524 + }, + { + "epoch": 0.35162554761355774, + "grad_norm": 1.135142968184709, + "learning_rate": 1.914884433378308e-06, + "loss": 0.5617767572402954, + "step": 1525 + }, + { + "epoch": 0.3518561217431404, + "grad_norm": 0.9232400306777988, + "learning_rate": 1.9147304456874336e-06, + "loss": 0.5207428932189941, + "step": 1526 + }, + { + "epoch": 0.35208669587272307, + "grad_norm": 1.0829138732821308, + "learning_rate": 1.914576325031218e-06, + "loss": 0.5929840207099915, + "step": 1527 + }, + { + "epoch": 0.3523172700023057, + "grad_norm": 1.0372438860332964, + "learning_rate": 1.914422071432065e-06, + "loss": 0.510567307472229, + "step": 1528 + }, + { + "epoch": 0.3525478441318884, + "grad_norm": 1.2529291445912578, + "learning_rate": 1.914267684912397e-06, + "loss": 0.5524177551269531, + "step": 1529 + }, + { + "epoch": 0.35277841826147105, + "grad_norm": 1.0844290023080794, + "learning_rate": 1.9141131654946548e-06, + "loss": 0.5622289180755615, + "step": 1530 + }, + { + "epoch": 0.3530089923910537, + "grad_norm": 1.1655531028574153, + "learning_rate": 1.9139585132012995e-06, + "loss": 0.5085979700088501, + "step": 1531 + }, + { + "epoch": 0.35323956652063637, + "grad_norm": 1.0367412290626608, + "learning_rate": 1.9138037280548117e-06, + "loss": 0.47232770919799805, + "step": 1532 + }, + { + "epoch": 0.35347014065021903, + "grad_norm": 1.3584148636864177, + "learning_rate": 1.913648810077691e-06, + "loss": 0.535300612449646, + "step": 1533 + }, + { + "epoch": 0.3537007147798017, + "grad_norm": 1.1457507125445123, + "learning_rate": 1.9134937592924562e-06, + "loss": 0.4351940155029297, + "step": 1534 + }, + { + "epoch": 0.35393128890938436, + "grad_norm": 0.9891980196576595, + "learning_rate": 1.9133385757216456e-06, + "loss": 0.4691917896270752, + "step": 1535 + }, + { + "epoch": 0.354161863038967, + "grad_norm": 1.03905005054118, + "learning_rate": 1.9131832593878167e-06, + "loss": 0.4911034107208252, + "step": 1536 + }, + { + "epoch": 0.3543924371685497, + "grad_norm": 0.9599946260153974, + "learning_rate": 1.9130278103135458e-06, + "loss": 0.3954068422317505, + "step": 1537 + }, + { + "epoch": 0.35462301129813234, + "grad_norm": 1.2512488183212185, + "learning_rate": 1.9128722285214297e-06, + "loss": 0.5541605949401855, + "step": 1538 + }, + { + "epoch": 0.354853585427715, + "grad_norm": 1.2362059407886639, + "learning_rate": 1.9127165140340832e-06, + "loss": 0.5719314217567444, + "step": 1539 + }, + { + "epoch": 0.35508415955729766, + "grad_norm": 1.342530930822934, + "learning_rate": 1.9125606668741418e-06, + "loss": 0.60889732837677, + "step": 1540 + }, + { + "epoch": 0.3553147336868803, + "grad_norm": 1.2098741685807175, + "learning_rate": 1.9124046870642587e-06, + "loss": 0.5247465968132019, + "step": 1541 + }, + { + "epoch": 0.355545307816463, + "grad_norm": 1.3096766952611592, + "learning_rate": 1.912248574627107e-06, + "loss": 0.5681591033935547, + "step": 1542 + }, + { + "epoch": 0.35577588194604565, + "grad_norm": 1.0008372683888578, + "learning_rate": 1.91209232958538e-06, + "loss": 0.5995845794677734, + "step": 1543 + }, + { + "epoch": 0.3560064560756283, + "grad_norm": 1.0463229098086306, + "learning_rate": 1.9119359519617893e-06, + "loss": 0.514456033706665, + "step": 1544 + }, + { + "epoch": 0.35623703020521097, + "grad_norm": 1.0680000709528683, + "learning_rate": 1.9117794417790657e-06, + "loss": 0.45192602276802063, + "step": 1545 + }, + { + "epoch": 0.35646760433479363, + "grad_norm": 1.042670075197141, + "learning_rate": 1.911622799059959e-06, + "loss": 0.5529573559761047, + "step": 1546 + }, + { + "epoch": 0.3566981784643763, + "grad_norm": 1.2129822836493795, + "learning_rate": 1.9114660238272403e-06, + "loss": 0.4544152021408081, + "step": 1547 + }, + { + "epoch": 0.35692875259395895, + "grad_norm": 1.516629148023364, + "learning_rate": 1.9113091161036974e-06, + "loss": 0.5676225423812866, + "step": 1548 + }, + { + "epoch": 0.3571593267235416, + "grad_norm": 1.1320627323756525, + "learning_rate": 1.9111520759121384e-06, + "loss": 0.5571830868721008, + "step": 1549 + }, + { + "epoch": 0.3573899008531243, + "grad_norm": 1.1377531274302592, + "learning_rate": 1.910994903275391e-06, + "loss": 0.5091487765312195, + "step": 1550 + }, + { + "epoch": 0.35762047498270694, + "grad_norm": 1.107456889270875, + "learning_rate": 1.9108375982163015e-06, + "loss": 0.5484684705734253, + "step": 1551 + }, + { + "epoch": 0.3578510491122896, + "grad_norm": 1.261905478374622, + "learning_rate": 1.9106801607577364e-06, + "loss": 0.49742424488067627, + "step": 1552 + }, + { + "epoch": 0.35808162324187226, + "grad_norm": 1.2341261046425518, + "learning_rate": 1.9105225909225804e-06, + "loss": 0.5871520638465881, + "step": 1553 + }, + { + "epoch": 0.3583121973714549, + "grad_norm": 1.2329576492287886, + "learning_rate": 1.910364888733738e-06, + "loss": 0.5096076726913452, + "step": 1554 + }, + { + "epoch": 0.3585427715010376, + "grad_norm": 1.3375416968847058, + "learning_rate": 1.910207054214133e-06, + "loss": 0.7168693542480469, + "step": 1555 + }, + { + "epoch": 0.35877334563062024, + "grad_norm": 1.126707169388949, + "learning_rate": 1.910049087386707e-06, + "loss": 0.5603561997413635, + "step": 1556 + }, + { + "epoch": 0.3590039197602029, + "grad_norm": 1.299433383477777, + "learning_rate": 1.909890988274424e-06, + "loss": 0.5857734680175781, + "step": 1557 + }, + { + "epoch": 0.35923449388978557, + "grad_norm": 1.040543925807462, + "learning_rate": 1.9097327569002642e-06, + "loss": 0.5612708926200867, + "step": 1558 + }, + { + "epoch": 0.35946506801936823, + "grad_norm": 1.146949414139332, + "learning_rate": 1.909574393287228e-06, + "loss": 0.5264564752578735, + "step": 1559 + }, + { + "epoch": 0.3596956421489509, + "grad_norm": 0.9390137754415148, + "learning_rate": 1.9094158974583357e-06, + "loss": 0.4163395166397095, + "step": 1560 + }, + { + "epoch": 0.35992621627853355, + "grad_norm": 1.0884801214343747, + "learning_rate": 1.909257269436626e-06, + "loss": 0.483236163854599, + "step": 1561 + }, + { + "epoch": 0.3601567904081162, + "grad_norm": 1.0086049535834347, + "learning_rate": 1.9090985092451572e-06, + "loss": 0.48892003297805786, + "step": 1562 + }, + { + "epoch": 0.3603873645376989, + "grad_norm": 1.0090138133688373, + "learning_rate": 1.908939616907007e-06, + "loss": 0.45310860872268677, + "step": 1563 + }, + { + "epoch": 0.36061793866728153, + "grad_norm": 1.0130833457744266, + "learning_rate": 1.908780592445271e-06, + "loss": 0.5242425799369812, + "step": 1564 + }, + { + "epoch": 0.3608485127968642, + "grad_norm": 1.0425805251353624, + "learning_rate": 1.9086214358830663e-06, + "loss": 0.47026845812797546, + "step": 1565 + }, + { + "epoch": 0.36107908692644686, + "grad_norm": 1.2209406413770176, + "learning_rate": 1.9084621472435267e-06, + "loss": 0.5783924460411072, + "step": 1566 + }, + { + "epoch": 0.3613096610560295, + "grad_norm": 1.0139793238266448, + "learning_rate": 1.9083027265498073e-06, + "loss": 0.5534437894821167, + "step": 1567 + }, + { + "epoch": 0.3615402351856122, + "grad_norm": 1.27522834837266, + "learning_rate": 1.9081431738250815e-06, + "loss": 0.49131953716278076, + "step": 1568 + }, + { + "epoch": 0.36177080931519484, + "grad_norm": 1.0466765845853998, + "learning_rate": 1.9079834890925412e-06, + "loss": 0.4798020124435425, + "step": 1569 + }, + { + "epoch": 0.3620013834447775, + "grad_norm": 1.1201181573638213, + "learning_rate": 1.9078236723753987e-06, + "loss": 0.4928893446922302, + "step": 1570 + }, + { + "epoch": 0.36223195757436016, + "grad_norm": 0.884047440430311, + "learning_rate": 1.9076637236968847e-06, + "loss": 0.4483630657196045, + "step": 1571 + }, + { + "epoch": 0.3624625317039428, + "grad_norm": 1.0983581542959335, + "learning_rate": 1.90750364308025e-06, + "loss": 0.593490481376648, + "step": 1572 + }, + { + "epoch": 0.3626931058335255, + "grad_norm": 1.1430514811975505, + "learning_rate": 1.9073434305487631e-06, + "loss": 0.5944634675979614, + "step": 1573 + }, + { + "epoch": 0.36292367996310815, + "grad_norm": 1.003698560447405, + "learning_rate": 1.9071830861257134e-06, + "loss": 0.5010452270507812, + "step": 1574 + }, + { + "epoch": 0.3631542540926908, + "grad_norm": 1.0687566975761509, + "learning_rate": 1.9070226098344078e-06, + "loss": 0.5128473043441772, + "step": 1575 + }, + { + "epoch": 0.36338482822227347, + "grad_norm": 1.0854169038402666, + "learning_rate": 1.9068620016981733e-06, + "loss": 0.6256363987922668, + "step": 1576 + }, + { + "epoch": 0.36361540235185613, + "grad_norm": 1.0796360454107574, + "learning_rate": 1.9067012617403565e-06, + "loss": 0.5502322912216187, + "step": 1577 + }, + { + "epoch": 0.3638459764814388, + "grad_norm": 1.2842731628323776, + "learning_rate": 1.906540389984322e-06, + "loss": 0.5756800174713135, + "step": 1578 + }, + { + "epoch": 0.36407655061102145, + "grad_norm": 1.135643566986845, + "learning_rate": 1.9063793864534543e-06, + "loss": 0.5131359696388245, + "step": 1579 + }, + { + "epoch": 0.3643071247406041, + "grad_norm": 0.9714084254330834, + "learning_rate": 1.9062182511711567e-06, + "loss": 0.5776810646057129, + "step": 1580 + }, + { + "epoch": 0.3645376988701868, + "grad_norm": 1.0973639487789169, + "learning_rate": 1.9060569841608523e-06, + "loss": 0.49460822343826294, + "step": 1581 + }, + { + "epoch": 0.36476827299976944, + "grad_norm": 0.942012419923591, + "learning_rate": 1.9058955854459823e-06, + "loss": 0.5031022429466248, + "step": 1582 + }, + { + "epoch": 0.3649988471293521, + "grad_norm": 1.2106661637014209, + "learning_rate": 1.9057340550500082e-06, + "loss": 0.4957816004753113, + "step": 1583 + }, + { + "epoch": 0.36522942125893476, + "grad_norm": 0.9363710565312526, + "learning_rate": 1.9055723929964102e-06, + "loss": 0.47861093282699585, + "step": 1584 + }, + { + "epoch": 0.3654599953885174, + "grad_norm": 1.027272725701274, + "learning_rate": 1.9054105993086868e-06, + "loss": 0.44517919421195984, + "step": 1585 + }, + { + "epoch": 0.3656905695181001, + "grad_norm": 1.1724343492985738, + "learning_rate": 1.9052486740103568e-06, + "loss": 0.46661484241485596, + "step": 1586 + }, + { + "epoch": 0.36592114364768275, + "grad_norm": 0.9788001147307338, + "learning_rate": 1.9050866171249575e-06, + "loss": 0.517694890499115, + "step": 1587 + }, + { + "epoch": 0.3661517177772654, + "grad_norm": 1.1284193922698917, + "learning_rate": 1.904924428676046e-06, + "loss": 0.49465644359588623, + "step": 1588 + }, + { + "epoch": 0.36638229190684807, + "grad_norm": 1.0036913999315975, + "learning_rate": 1.9047621086871971e-06, + "loss": 0.41830652952194214, + "step": 1589 + }, + { + "epoch": 0.36661286603643073, + "grad_norm": 1.1944977036427056, + "learning_rate": 1.9045996571820067e-06, + "loss": 0.5540663003921509, + "step": 1590 + }, + { + "epoch": 0.3668434401660134, + "grad_norm": 1.072580109375711, + "learning_rate": 1.9044370741840882e-06, + "loss": 0.5619527101516724, + "step": 1591 + }, + { + "epoch": 0.36707401429559605, + "grad_norm": 1.1509533440805209, + "learning_rate": 1.9042743597170746e-06, + "loss": 0.5086055994033813, + "step": 1592 + }, + { + "epoch": 0.3673045884251787, + "grad_norm": 1.050425223739088, + "learning_rate": 1.9041115138046183e-06, + "loss": 0.5839927196502686, + "step": 1593 + }, + { + "epoch": 0.3675351625547614, + "grad_norm": 1.0464789939377692, + "learning_rate": 1.9039485364703904e-06, + "loss": 0.508616030216217, + "step": 1594 + }, + { + "epoch": 0.36776573668434404, + "grad_norm": 1.15877506638183, + "learning_rate": 1.903785427738082e-06, + "loss": 0.46514832973480225, + "step": 1595 + }, + { + "epoch": 0.3679963108139267, + "grad_norm": 1.525284603977575, + "learning_rate": 1.9036221876314016e-06, + "loss": 0.42142176628112793, + "step": 1596 + }, + { + "epoch": 0.36822688494350936, + "grad_norm": 1.3114380851226077, + "learning_rate": 1.9034588161740786e-06, + "loss": 0.42195791006088257, + "step": 1597 + }, + { + "epoch": 0.368457459073092, + "grad_norm": 1.0276642661247686, + "learning_rate": 1.9032953133898601e-06, + "loss": 0.46705931425094604, + "step": 1598 + }, + { + "epoch": 0.3686880332026747, + "grad_norm": 1.1002100436754347, + "learning_rate": 1.9031316793025134e-06, + "loss": 0.4741164743900299, + "step": 1599 + }, + { + "epoch": 0.36891860733225734, + "grad_norm": 1.269728601723268, + "learning_rate": 1.902967913935824e-06, + "loss": 0.49730339646339417, + "step": 1600 + }, + { + "epoch": 0.36914918146184, + "grad_norm": 0.9594474153361355, + "learning_rate": 1.902804017313597e-06, + "loss": 0.47678127884864807, + "step": 1601 + }, + { + "epoch": 0.36937975559142266, + "grad_norm": 1.1964394586929104, + "learning_rate": 1.9026399894596565e-06, + "loss": 0.4954279661178589, + "step": 1602 + }, + { + "epoch": 0.3696103297210053, + "grad_norm": 0.9685506818723637, + "learning_rate": 1.9024758303978456e-06, + "loss": 0.5115381479263306, + "step": 1603 + }, + { + "epoch": 0.369840903850588, + "grad_norm": 1.0632901548704432, + "learning_rate": 1.9023115401520264e-06, + "loss": 0.6147117614746094, + "step": 1604 + }, + { + "epoch": 0.37007147798017065, + "grad_norm": 1.4566806194426465, + "learning_rate": 1.9021471187460802e-06, + "loss": 0.5334371328353882, + "step": 1605 + }, + { + "epoch": 0.3703020521097533, + "grad_norm": 1.2820059739478686, + "learning_rate": 1.9019825662039073e-06, + "loss": 0.4702361226081848, + "step": 1606 + }, + { + "epoch": 0.37053262623933597, + "grad_norm": 1.1889012346736458, + "learning_rate": 1.901817882549427e-06, + "loss": 0.5049586892127991, + "step": 1607 + }, + { + "epoch": 0.37076320036891863, + "grad_norm": 1.2055092488358514, + "learning_rate": 1.901653067806578e-06, + "loss": 0.5063170194625854, + "step": 1608 + }, + { + "epoch": 0.3709937744985013, + "grad_norm": 1.1599393359430212, + "learning_rate": 1.9014881219993175e-06, + "loss": 0.540824294090271, + "step": 1609 + }, + { + "epoch": 0.37122434862808396, + "grad_norm": 1.372148291928607, + "learning_rate": 1.901323045151622e-06, + "loss": 0.4744170904159546, + "step": 1610 + }, + { + "epoch": 0.3714549227576666, + "grad_norm": 1.2144026597364277, + "learning_rate": 1.9011578372874876e-06, + "loss": 0.5090929269790649, + "step": 1611 + }, + { + "epoch": 0.3716854968872493, + "grad_norm": 1.0610635938586983, + "learning_rate": 1.9009924984309284e-06, + "loss": 0.3886772394180298, + "step": 1612 + }, + { + "epoch": 0.3719160710168319, + "grad_norm": 1.1192663585328575, + "learning_rate": 1.9008270286059782e-06, + "loss": 0.4976482391357422, + "step": 1613 + }, + { + "epoch": 0.37214664514641455, + "grad_norm": 1.0577168176218985, + "learning_rate": 1.9006614278366898e-06, + "loss": 0.4629209041595459, + "step": 1614 + }, + { + "epoch": 0.3723772192759972, + "grad_norm": 1.0381238100092287, + "learning_rate": 1.9004956961471352e-06, + "loss": 0.49334412813186646, + "step": 1615 + }, + { + "epoch": 0.37260779340557987, + "grad_norm": 1.2336018114177745, + "learning_rate": 1.9003298335614047e-06, + "loss": 0.614592432975769, + "step": 1616 + }, + { + "epoch": 0.37283836753516253, + "grad_norm": 0.9895019344615126, + "learning_rate": 1.9001638401036082e-06, + "loss": 0.5339843034744263, + "step": 1617 + }, + { + "epoch": 0.3730689416647452, + "grad_norm": 0.9743667038154072, + "learning_rate": 1.8999977157978749e-06, + "loss": 0.5516937375068665, + "step": 1618 + }, + { + "epoch": 0.37329951579432785, + "grad_norm": 1.2149293301312265, + "learning_rate": 1.8998314606683522e-06, + "loss": 0.5034124255180359, + "step": 1619 + }, + { + "epoch": 0.3735300899239105, + "grad_norm": 0.9412969527830801, + "learning_rate": 1.8996650747392073e-06, + "loss": 0.49766790866851807, + "step": 1620 + }, + { + "epoch": 0.3737606640534932, + "grad_norm": 1.1063112007683722, + "learning_rate": 1.899498558034626e-06, + "loss": 0.6662446856498718, + "step": 1621 + }, + { + "epoch": 0.37399123818307584, + "grad_norm": 1.3692241861945424, + "learning_rate": 1.8993319105788129e-06, + "loss": 0.5416747331619263, + "step": 1622 + }, + { + "epoch": 0.3742218123126585, + "grad_norm": 1.2377768970666951, + "learning_rate": 1.8991651323959922e-06, + "loss": 0.5137313604354858, + "step": 1623 + }, + { + "epoch": 0.37445238644224116, + "grad_norm": 1.0509326993065755, + "learning_rate": 1.8989982235104072e-06, + "loss": 0.566002607345581, + "step": 1624 + }, + { + "epoch": 0.3746829605718238, + "grad_norm": 1.314391237074608, + "learning_rate": 1.8988311839463188e-06, + "loss": 0.5201380252838135, + "step": 1625 + }, + { + "epoch": 0.3749135347014065, + "grad_norm": 1.2844709164103703, + "learning_rate": 1.8986640137280087e-06, + "loss": 0.5103918313980103, + "step": 1626 + }, + { + "epoch": 0.37514410883098914, + "grad_norm": 1.081063959726764, + "learning_rate": 1.8984967128797763e-06, + "loss": 0.47900843620300293, + "step": 1627 + }, + { + "epoch": 0.3753746829605718, + "grad_norm": 1.0524739811683044, + "learning_rate": 1.898329281425941e-06, + "loss": 0.42991960048675537, + "step": 1628 + }, + { + "epoch": 0.37560525709015447, + "grad_norm": 1.2087969734027784, + "learning_rate": 1.89816171939084e-06, + "loss": 0.5707317590713501, + "step": 1629 + }, + { + "epoch": 0.3758358312197371, + "grad_norm": 1.0714171850017424, + "learning_rate": 1.8979940267988309e-06, + "loss": 0.565521240234375, + "step": 1630 + }, + { + "epoch": 0.3760664053493198, + "grad_norm": 1.2721353238917528, + "learning_rate": 1.8978262036742888e-06, + "loss": 0.6584400534629822, + "step": 1631 + }, + { + "epoch": 0.37629697947890245, + "grad_norm": 1.1181726564305359, + "learning_rate": 1.897658250041609e-06, + "loss": 0.4749317169189453, + "step": 1632 + }, + { + "epoch": 0.3765275536084851, + "grad_norm": 1.3732616000652873, + "learning_rate": 1.8974901659252048e-06, + "loss": 0.5495604872703552, + "step": 1633 + }, + { + "epoch": 0.37675812773806777, + "grad_norm": 1.6408199477459455, + "learning_rate": 1.8973219513495094e-06, + "loss": 0.465708464384079, + "step": 1634 + }, + { + "epoch": 0.37698870186765043, + "grad_norm": 1.1887777428919946, + "learning_rate": 1.8971536063389742e-06, + "loss": 0.4599069058895111, + "step": 1635 + }, + { + "epoch": 0.3772192759972331, + "grad_norm": 1.1348638946303797, + "learning_rate": 1.89698513091807e-06, + "loss": 0.4716145694255829, + "step": 1636 + }, + { + "epoch": 0.37744985012681576, + "grad_norm": 0.990973234996169, + "learning_rate": 1.8968165251112863e-06, + "loss": 0.594079852104187, + "step": 1637 + }, + { + "epoch": 0.3776804242563984, + "grad_norm": 1.3300173886007076, + "learning_rate": 1.8966477889431317e-06, + "loss": 0.4588915705680847, + "step": 1638 + }, + { + "epoch": 0.3779109983859811, + "grad_norm": 1.5111913527277292, + "learning_rate": 1.8964789224381337e-06, + "loss": 0.5236901044845581, + "step": 1639 + }, + { + "epoch": 0.37814157251556374, + "grad_norm": 1.067104402214014, + "learning_rate": 1.8963099256208388e-06, + "loss": 0.4954737424850464, + "step": 1640 + }, + { + "epoch": 0.3783721466451464, + "grad_norm": 1.066408318154628, + "learning_rate": 1.8961407985158125e-06, + "loss": 0.4194701910018921, + "step": 1641 + }, + { + "epoch": 0.37860272077472906, + "grad_norm": 0.9999478144515371, + "learning_rate": 1.8959715411476388e-06, + "loss": 0.5368303060531616, + "step": 1642 + }, + { + "epoch": 0.3788332949043117, + "grad_norm": 1.2178837934755509, + "learning_rate": 1.8958021535409214e-06, + "loss": 0.5181677341461182, + "step": 1643 + }, + { + "epoch": 0.3790638690338944, + "grad_norm": 1.0342390187480546, + "learning_rate": 1.8956326357202821e-06, + "loss": 0.4755169749259949, + "step": 1644 + }, + { + "epoch": 0.37929444316347705, + "grad_norm": 1.1097461588236448, + "learning_rate": 1.8954629877103625e-06, + "loss": 0.5460895299911499, + "step": 1645 + }, + { + "epoch": 0.3795250172930597, + "grad_norm": 1.090972908814234, + "learning_rate": 1.8952932095358224e-06, + "loss": 0.47811684012413025, + "step": 1646 + }, + { + "epoch": 0.37975559142264237, + "grad_norm": 1.1794844360929688, + "learning_rate": 1.8951233012213405e-06, + "loss": 0.5791733860969543, + "step": 1647 + }, + { + "epoch": 0.37998616555222503, + "grad_norm": 1.1163036430533217, + "learning_rate": 1.8949532627916151e-06, + "loss": 0.4996911585330963, + "step": 1648 + }, + { + "epoch": 0.3802167396818077, + "grad_norm": 1.3190959058791496, + "learning_rate": 1.8947830942713628e-06, + "loss": 0.6108353137969971, + "step": 1649 + }, + { + "epoch": 0.38044731381139035, + "grad_norm": 1.2084081721604487, + "learning_rate": 1.8946127956853195e-06, + "loss": 0.5303040742874146, + "step": 1650 + }, + { + "epoch": 0.380677887940973, + "grad_norm": 1.0581391679258725, + "learning_rate": 1.8944423670582397e-06, + "loss": 0.4651896357536316, + "step": 1651 + }, + { + "epoch": 0.3809084620705557, + "grad_norm": 1.1464415021916683, + "learning_rate": 1.8942718084148969e-06, + "loss": 0.6321637630462646, + "step": 1652 + }, + { + "epoch": 0.38113903620013834, + "grad_norm": 1.1535120052175352, + "learning_rate": 1.8941011197800836e-06, + "loss": 0.5124787092208862, + "step": 1653 + }, + { + "epoch": 0.381369610329721, + "grad_norm": 1.2712538370269149, + "learning_rate": 1.893930301178611e-06, + "loss": 0.5779180526733398, + "step": 1654 + }, + { + "epoch": 0.38160018445930366, + "grad_norm": 1.2579128550158534, + "learning_rate": 1.8937593526353096e-06, + "loss": 0.5723867416381836, + "step": 1655 + }, + { + "epoch": 0.3818307585888863, + "grad_norm": 1.0216965854263103, + "learning_rate": 1.8935882741750281e-06, + "loss": 0.4312398433685303, + "step": 1656 + }, + { + "epoch": 0.382061332718469, + "grad_norm": 1.7195703110538068, + "learning_rate": 1.893417065822635e-06, + "loss": 0.6503756046295166, + "step": 1657 + }, + { + "epoch": 0.38229190684805164, + "grad_norm": 1.2691180997694498, + "learning_rate": 1.8932457276030166e-06, + "loss": 0.508478045463562, + "step": 1658 + }, + { + "epoch": 0.3825224809776343, + "grad_norm": 0.9328619594784499, + "learning_rate": 1.8930742595410792e-06, + "loss": 0.46552446484565735, + "step": 1659 + }, + { + "epoch": 0.38275305510721697, + "grad_norm": 0.983497277362264, + "learning_rate": 1.8929026616617467e-06, + "loss": 0.4739278256893158, + "step": 1660 + }, + { + "epoch": 0.3829836292367996, + "grad_norm": 1.2642164913655083, + "learning_rate": 1.8927309339899634e-06, + "loss": 0.5584233403205872, + "step": 1661 + }, + { + "epoch": 0.3832142033663823, + "grad_norm": 1.0681648876128738, + "learning_rate": 1.8925590765506911e-06, + "loss": 0.6155074238777161, + "step": 1662 + }, + { + "epoch": 0.38344477749596495, + "grad_norm": 1.1479148469369402, + "learning_rate": 1.8923870893689112e-06, + "loss": 0.5253106951713562, + "step": 1663 + }, + { + "epoch": 0.3836753516255476, + "grad_norm": 1.2179992400932398, + "learning_rate": 1.8922149724696238e-06, + "loss": 0.4190565347671509, + "step": 1664 + }, + { + "epoch": 0.3839059257551303, + "grad_norm": 1.124098215736467, + "learning_rate": 1.892042725877848e-06, + "loss": 0.5263853073120117, + "step": 1665 + }, + { + "epoch": 0.38413649988471293, + "grad_norm": 1.0385777204325046, + "learning_rate": 1.8918703496186214e-06, + "loss": 0.4492432773113251, + "step": 1666 + }, + { + "epoch": 0.3843670740142956, + "grad_norm": 1.3356308613758272, + "learning_rate": 1.8916978437170004e-06, + "loss": 0.49745023250579834, + "step": 1667 + }, + { + "epoch": 0.38459764814387826, + "grad_norm": 1.2023114319635457, + "learning_rate": 1.891525208198061e-06, + "loss": 0.6003707647323608, + "step": 1668 + }, + { + "epoch": 0.3848282222734609, + "grad_norm": 1.6371184982518272, + "learning_rate": 1.8913524430868973e-06, + "loss": 0.5430049300193787, + "step": 1669 + }, + { + "epoch": 0.3850587964030436, + "grad_norm": 1.0715049923324578, + "learning_rate": 1.8911795484086222e-06, + "loss": 0.5561289191246033, + "step": 1670 + }, + { + "epoch": 0.38528937053262624, + "grad_norm": 1.1416350409171048, + "learning_rate": 1.8910065241883678e-06, + "loss": 0.5488184690475464, + "step": 1671 + }, + { + "epoch": 0.3855199446622089, + "grad_norm": 1.0082475661815067, + "learning_rate": 1.890833370451285e-06, + "loss": 0.46347010135650635, + "step": 1672 + }, + { + "epoch": 0.38575051879179156, + "grad_norm": 1.0668592703569681, + "learning_rate": 1.8906600872225438e-06, + "loss": 0.553687334060669, + "step": 1673 + }, + { + "epoch": 0.3859810929213742, + "grad_norm": 1.1035800532005071, + "learning_rate": 1.8904866745273323e-06, + "loss": 0.46162208914756775, + "step": 1674 + }, + { + "epoch": 0.3862116670509569, + "grad_norm": 1.076914158561248, + "learning_rate": 1.8903131323908576e-06, + "loss": 0.4478996992111206, + "step": 1675 + }, + { + "epoch": 0.38644224118053955, + "grad_norm": 1.1488135535707533, + "learning_rate": 1.8901394608383463e-06, + "loss": 0.5857031345367432, + "step": 1676 + }, + { + "epoch": 0.3866728153101222, + "grad_norm": 1.5929334393746841, + "learning_rate": 1.8899656598950432e-06, + "loss": 0.592833399772644, + "step": 1677 + }, + { + "epoch": 0.38690338943970487, + "grad_norm": 1.0232228390237461, + "learning_rate": 1.8897917295862117e-06, + "loss": 0.6007786989212036, + "step": 1678 + }, + { + "epoch": 0.38713396356928753, + "grad_norm": 1.109869111259485, + "learning_rate": 1.8896176699371343e-06, + "loss": 0.5248164534568787, + "step": 1679 + }, + { + "epoch": 0.3873645376988702, + "grad_norm": 0.856016560201164, + "learning_rate": 1.8894434809731128e-06, + "loss": 0.43112409114837646, + "step": 1680 + }, + { + "epoch": 0.38759511182845285, + "grad_norm": 1.318795823918729, + "learning_rate": 1.8892691627194673e-06, + "loss": 0.56545090675354, + "step": 1681 + }, + { + "epoch": 0.3878256859580355, + "grad_norm": 1.1470159881146635, + "learning_rate": 1.8890947152015363e-06, + "loss": 0.6287904977798462, + "step": 1682 + }, + { + "epoch": 0.3880562600876182, + "grad_norm": 1.155806897456587, + "learning_rate": 1.8889201384446775e-06, + "loss": 0.48461633920669556, + "step": 1683 + }, + { + "epoch": 0.38828683421720084, + "grad_norm": 1.2251476021613918, + "learning_rate": 1.888745432474268e-06, + "loss": 0.5089331865310669, + "step": 1684 + }, + { + "epoch": 0.3885174083467835, + "grad_norm": 0.9661641286318025, + "learning_rate": 1.8885705973157027e-06, + "loss": 0.4805281162261963, + "step": 1685 + }, + { + "epoch": 0.38874798247636616, + "grad_norm": 1.070887780603473, + "learning_rate": 1.8883956329943955e-06, + "loss": 0.5243096947669983, + "step": 1686 + }, + { + "epoch": 0.3889785566059488, + "grad_norm": 1.240979728566986, + "learning_rate": 1.8882205395357795e-06, + "loss": 0.5808781981468201, + "step": 1687 + }, + { + "epoch": 0.3892091307355315, + "grad_norm": 1.2574299318006046, + "learning_rate": 1.8880453169653063e-06, + "loss": 0.5397018194198608, + "step": 1688 + }, + { + "epoch": 0.38943970486511414, + "grad_norm": 1.182945649827907, + "learning_rate": 1.8878699653084462e-06, + "loss": 0.4475638270378113, + "step": 1689 + }, + { + "epoch": 0.3896702789946968, + "grad_norm": 1.3095447574792232, + "learning_rate": 1.8876944845906884e-06, + "loss": 0.6212958693504333, + "step": 1690 + }, + { + "epoch": 0.38990085312427947, + "grad_norm": 1.1726349359481907, + "learning_rate": 1.8875188748375407e-06, + "loss": 0.44465404748916626, + "step": 1691 + }, + { + "epoch": 0.39013142725386213, + "grad_norm": 1.2650698772045321, + "learning_rate": 1.8873431360745297e-06, + "loss": 0.5711641311645508, + "step": 1692 + }, + { + "epoch": 0.3903620013834448, + "grad_norm": 1.2039233000565408, + "learning_rate": 1.8871672683272012e-06, + "loss": 0.4527866244316101, + "step": 1693 + }, + { + "epoch": 0.39059257551302745, + "grad_norm": 1.515756125658867, + "learning_rate": 1.8869912716211188e-06, + "loss": 0.6242899894714355, + "step": 1694 + }, + { + "epoch": 0.3908231496426101, + "grad_norm": 1.6198907712835393, + "learning_rate": 1.8868151459818656e-06, + "loss": 0.6294416189193726, + "step": 1695 + }, + { + "epoch": 0.3910537237721928, + "grad_norm": 1.2238875456694314, + "learning_rate": 1.8866388914350435e-06, + "loss": 0.49869638681411743, + "step": 1696 + }, + { + "epoch": 0.39128429790177544, + "grad_norm": 1.1755814842525432, + "learning_rate": 1.886462508006273e-06, + "loss": 0.5456752777099609, + "step": 1697 + }, + { + "epoch": 0.3915148720313581, + "grad_norm": 1.0114016306766007, + "learning_rate": 1.8862859957211926e-06, + "loss": 0.4197172224521637, + "step": 1698 + }, + { + "epoch": 0.39174544616094076, + "grad_norm": 1.0278658872450297, + "learning_rate": 1.8861093546054603e-06, + "loss": 0.5012276768684387, + "step": 1699 + }, + { + "epoch": 0.3919760202905234, + "grad_norm": 1.2065880303446173, + "learning_rate": 1.8859325846847531e-06, + "loss": 0.48108845949172974, + "step": 1700 + }, + { + "epoch": 0.3922065944201061, + "grad_norm": 1.1190986847477769, + "learning_rate": 1.885755685984766e-06, + "loss": 0.48592355847358704, + "step": 1701 + }, + { + "epoch": 0.39243716854968874, + "grad_norm": 1.136053467553038, + "learning_rate": 1.8855786585312132e-06, + "loss": 0.5744791030883789, + "step": 1702 + }, + { + "epoch": 0.3926677426792714, + "grad_norm": 1.1435558229801501, + "learning_rate": 1.8854015023498273e-06, + "loss": 0.5378769040107727, + "step": 1703 + }, + { + "epoch": 0.39289831680885406, + "grad_norm": 1.0710678493453967, + "learning_rate": 1.8852242174663594e-06, + "loss": 0.5630123615264893, + "step": 1704 + }, + { + "epoch": 0.3931288909384367, + "grad_norm": 1.0913466409725974, + "learning_rate": 1.8850468039065806e-06, + "loss": 0.5247849225997925, + "step": 1705 + }, + { + "epoch": 0.3933594650680194, + "grad_norm": 1.282307381217427, + "learning_rate": 1.884869261696279e-06, + "loss": 0.5679286122322083, + "step": 1706 + }, + { + "epoch": 0.39359003919760205, + "grad_norm": 1.0140902583392881, + "learning_rate": 1.8846915908612622e-06, + "loss": 0.4505179524421692, + "step": 1707 + }, + { + "epoch": 0.3938206133271847, + "grad_norm": 1.233342858229108, + "learning_rate": 1.8845137914273566e-06, + "loss": 0.6077077388763428, + "step": 1708 + }, + { + "epoch": 0.39405118745676737, + "grad_norm": 1.1523756442286543, + "learning_rate": 1.8843358634204069e-06, + "loss": 0.4703037738800049, + "step": 1709 + }, + { + "epoch": 0.39428176158635003, + "grad_norm": 1.3467147447696661, + "learning_rate": 1.8841578068662773e-06, + "loss": 0.6085091829299927, + "step": 1710 + }, + { + "epoch": 0.3945123357159327, + "grad_norm": 1.3769264461225226, + "learning_rate": 1.8839796217908498e-06, + "loss": 0.6075730919837952, + "step": 1711 + }, + { + "epoch": 0.39474290984551536, + "grad_norm": 1.4068518720273175, + "learning_rate": 1.8838013082200252e-06, + "loss": 0.581851601600647, + "step": 1712 + }, + { + "epoch": 0.394973483975098, + "grad_norm": 0.9365976129961602, + "learning_rate": 1.8836228661797234e-06, + "loss": 0.555284857749939, + "step": 1713 + }, + { + "epoch": 0.3952040581046807, + "grad_norm": 1.205134330479215, + "learning_rate": 1.8834442956958832e-06, + "loss": 0.5342675447463989, + "step": 1714 + }, + { + "epoch": 0.39543463223426334, + "grad_norm": 1.2329889286532099, + "learning_rate": 1.8832655967944605e-06, + "loss": 0.47501081228256226, + "step": 1715 + }, + { + "epoch": 0.395665206363846, + "grad_norm": 1.1350943426800137, + "learning_rate": 1.8830867695014323e-06, + "loss": 0.592293918132782, + "step": 1716 + }, + { + "epoch": 0.39589578049342866, + "grad_norm": 1.2591938264724012, + "learning_rate": 1.8829078138427921e-06, + "loss": 0.5903242826461792, + "step": 1717 + }, + { + "epoch": 0.3961263546230113, + "grad_norm": 1.203385992389072, + "learning_rate": 1.882728729844553e-06, + "loss": 0.5292568206787109, + "step": 1718 + }, + { + "epoch": 0.396356928752594, + "grad_norm": 1.070652075724697, + "learning_rate": 1.8825495175327468e-06, + "loss": 0.5748786926269531, + "step": 1719 + }, + { + "epoch": 0.39658750288217665, + "grad_norm": 1.230421737483, + "learning_rate": 1.8823701769334242e-06, + "loss": 0.6191601753234863, + "step": 1720 + }, + { + "epoch": 0.3968180770117593, + "grad_norm": 1.180452919869617, + "learning_rate": 1.8821907080726535e-06, + "loss": 0.5569231510162354, + "step": 1721 + }, + { + "epoch": 0.39704865114134197, + "grad_norm": 1.291275382361216, + "learning_rate": 1.882011110976523e-06, + "loss": 0.5103349089622498, + "step": 1722 + }, + { + "epoch": 0.39727922527092463, + "grad_norm": 1.1952555855906501, + "learning_rate": 1.8818313856711382e-06, + "loss": 0.4981175363063812, + "step": 1723 + }, + { + "epoch": 0.39750979940050724, + "grad_norm": 1.5157833486690673, + "learning_rate": 1.8816515321826248e-06, + "loss": 0.5429514050483704, + "step": 1724 + }, + { + "epoch": 0.3977403735300899, + "grad_norm": 1.1377768164918185, + "learning_rate": 1.8814715505371254e-06, + "loss": 0.5318386554718018, + "step": 1725 + }, + { + "epoch": 0.39797094765967256, + "grad_norm": 1.0451576127270763, + "learning_rate": 1.881291440760803e-06, + "loss": 0.47451460361480713, + "step": 1726 + }, + { + "epoch": 0.3982015217892552, + "grad_norm": 1.2815255131055066, + "learning_rate": 1.8811112028798384e-06, + "loss": 0.5141372680664062, + "step": 1727 + }, + { + "epoch": 0.3984320959188379, + "grad_norm": 1.0864089006893662, + "learning_rate": 1.8809308369204302e-06, + "loss": 0.4950217008590698, + "step": 1728 + }, + { + "epoch": 0.39866267004842054, + "grad_norm": 0.9530925154379366, + "learning_rate": 1.880750342908797e-06, + "loss": 0.4961693286895752, + "step": 1729 + }, + { + "epoch": 0.3988932441780032, + "grad_norm": 1.1860643451162984, + "learning_rate": 1.8805697208711752e-06, + "loss": 0.43443650007247925, + "step": 1730 + }, + { + "epoch": 0.39912381830758586, + "grad_norm": 1.1332453377909741, + "learning_rate": 1.8803889708338203e-06, + "loss": 0.6116896867752075, + "step": 1731 + }, + { + "epoch": 0.3993543924371685, + "grad_norm": 0.9403622624868753, + "learning_rate": 1.8802080928230062e-06, + "loss": 0.46244728565216064, + "step": 1732 + }, + { + "epoch": 0.3995849665667512, + "grad_norm": 1.3180964068285155, + "learning_rate": 1.880027086865025e-06, + "loss": 0.5728162527084351, + "step": 1733 + }, + { + "epoch": 0.39981554069633385, + "grad_norm": 1.1310284579414278, + "learning_rate": 1.8798459529861876e-06, + "loss": 0.4472135901451111, + "step": 1734 + }, + { + "epoch": 0.4000461148259165, + "grad_norm": 1.4100215542732757, + "learning_rate": 1.8796646912128246e-06, + "loss": 0.5862090587615967, + "step": 1735 + }, + { + "epoch": 0.40027668895549917, + "grad_norm": 1.428537555998266, + "learning_rate": 1.8794833015712831e-06, + "loss": 0.6406301259994507, + "step": 1736 + }, + { + "epoch": 0.40050726308508183, + "grad_norm": 1.3320783455965834, + "learning_rate": 1.8793017840879306e-06, + "loss": 0.5865743160247803, + "step": 1737 + }, + { + "epoch": 0.4007378372146645, + "grad_norm": 1.2736301947050057, + "learning_rate": 1.8791201387891524e-06, + "loss": 0.5521814823150635, + "step": 1738 + }, + { + "epoch": 0.40096841134424716, + "grad_norm": 0.9710129928143749, + "learning_rate": 1.8789383657013522e-06, + "loss": 0.40027791261672974, + "step": 1739 + }, + { + "epoch": 0.4011989854738298, + "grad_norm": 1.213730124395359, + "learning_rate": 1.8787564648509528e-06, + "loss": 0.5594751238822937, + "step": 1740 + }, + { + "epoch": 0.4014295596034125, + "grad_norm": 1.2077878384788876, + "learning_rate": 1.8785744362643955e-06, + "loss": 0.5029730796813965, + "step": 1741 + }, + { + "epoch": 0.40166013373299514, + "grad_norm": 1.086599940670418, + "learning_rate": 1.8783922799681397e-06, + "loss": 0.6089034676551819, + "step": 1742 + }, + { + "epoch": 0.4018907078625778, + "grad_norm": 1.178028157014987, + "learning_rate": 1.8782099959886639e-06, + "loss": 0.5238372683525085, + "step": 1743 + }, + { + "epoch": 0.40212128199216046, + "grad_norm": 1.0430681899893623, + "learning_rate": 1.8780275843524643e-06, + "loss": 0.47281232476234436, + "step": 1744 + }, + { + "epoch": 0.4023518561217431, + "grad_norm": 1.0603667709126336, + "learning_rate": 1.8778450450860571e-06, + "loss": 0.44885876774787903, + "step": 1745 + }, + { + "epoch": 0.4025824302513258, + "grad_norm": 1.1187549409367323, + "learning_rate": 1.8776623782159762e-06, + "loss": 0.5915139317512512, + "step": 1746 + }, + { + "epoch": 0.40281300438090845, + "grad_norm": 1.6743224234561098, + "learning_rate": 1.8774795837687736e-06, + "loss": 0.49341484904289246, + "step": 1747 + }, + { + "epoch": 0.4030435785104911, + "grad_norm": 1.1133076324661322, + "learning_rate": 1.8772966617710205e-06, + "loss": 0.43253493309020996, + "step": 1748 + }, + { + "epoch": 0.40327415264007377, + "grad_norm": 1.2596810310862556, + "learning_rate": 1.8771136122493064e-06, + "loss": 0.48660045862197876, + "step": 1749 + }, + { + "epoch": 0.40350472676965643, + "grad_norm": 1.158836920018239, + "learning_rate": 1.8769304352302396e-06, + "loss": 0.4493838846683502, + "step": 1750 + }, + { + "epoch": 0.4037353008992391, + "grad_norm": 1.1033409495303377, + "learning_rate": 1.8767471307404464e-06, + "loss": 0.5656435489654541, + "step": 1751 + }, + { + "epoch": 0.40396587502882175, + "grad_norm": 1.1945430976561655, + "learning_rate": 1.876563698806572e-06, + "loss": 0.48047327995300293, + "step": 1752 + }, + { + "epoch": 0.4041964491584044, + "grad_norm": 1.117811372759575, + "learning_rate": 1.8763801394552806e-06, + "loss": 0.5314204692840576, + "step": 1753 + }, + { + "epoch": 0.4044270232879871, + "grad_norm": 1.212293607312766, + "learning_rate": 1.876196452713254e-06, + "loss": 0.5436627864837646, + "step": 1754 + }, + { + "epoch": 0.40465759741756974, + "grad_norm": 1.1748084841171984, + "learning_rate": 1.8760126386071933e-06, + "loss": 0.5383991599082947, + "step": 1755 + }, + { + "epoch": 0.4048881715471524, + "grad_norm": 1.1737559222863878, + "learning_rate": 1.8758286971638171e-06, + "loss": 0.48271507024765015, + "step": 1756 + }, + { + "epoch": 0.40511874567673506, + "grad_norm": 1.0323965631837329, + "learning_rate": 1.8756446284098638e-06, + "loss": 0.5920745134353638, + "step": 1757 + }, + { + "epoch": 0.4053493198063177, + "grad_norm": 1.1254236464300211, + "learning_rate": 1.875460432372089e-06, + "loss": 0.4467526078224182, + "step": 1758 + }, + { + "epoch": 0.4055798939359004, + "grad_norm": 0.9503211623796617, + "learning_rate": 1.875276109077268e-06, + "loss": 0.425409734249115, + "step": 1759 + }, + { + "epoch": 0.40581046806548304, + "grad_norm": 1.1318149217921376, + "learning_rate": 1.8750916585521938e-06, + "loss": 0.4911944568157196, + "step": 1760 + }, + { + "epoch": 0.4060410421950657, + "grad_norm": 1.5865124774001016, + "learning_rate": 1.8749070808236787e-06, + "loss": 0.49605780839920044, + "step": 1761 + }, + { + "epoch": 0.40627161632464837, + "grad_norm": 1.322640956813398, + "learning_rate": 1.874722375918552e-06, + "loss": 0.5582889914512634, + "step": 1762 + }, + { + "epoch": 0.406502190454231, + "grad_norm": 1.0487904765861873, + "learning_rate": 1.874537543863663e-06, + "loss": 0.4867294132709503, + "step": 1763 + }, + { + "epoch": 0.4067327645838137, + "grad_norm": 1.062364022734449, + "learning_rate": 1.8743525846858787e-06, + "loss": 0.5050587058067322, + "step": 1764 + }, + { + "epoch": 0.40696333871339635, + "grad_norm": 1.0581562602291477, + "learning_rate": 1.8741674984120852e-06, + "loss": 0.4380977749824524, + "step": 1765 + }, + { + "epoch": 0.407193912842979, + "grad_norm": 1.326690473297383, + "learning_rate": 1.8739822850691865e-06, + "loss": 0.5159280300140381, + "step": 1766 + }, + { + "epoch": 0.4074244869725617, + "grad_norm": 1.3542586293022822, + "learning_rate": 1.8737969446841046e-06, + "loss": 0.6999780535697937, + "step": 1767 + }, + { + "epoch": 0.40765506110214433, + "grad_norm": 1.110421221417803, + "learning_rate": 1.8736114772837816e-06, + "loss": 0.5844931602478027, + "step": 1768 + }, + { + "epoch": 0.407885635231727, + "grad_norm": 1.2621793403708754, + "learning_rate": 1.8734258828951764e-06, + "loss": 0.5078610181808472, + "step": 1769 + }, + { + "epoch": 0.40811620936130966, + "grad_norm": 1.1260800835324682, + "learning_rate": 1.8732401615452673e-06, + "loss": 0.564793586730957, + "step": 1770 + }, + { + "epoch": 0.4083467834908923, + "grad_norm": 1.2906459398399637, + "learning_rate": 1.8730543132610506e-06, + "loss": 0.6145100593566895, + "step": 1771 + }, + { + "epoch": 0.408577357620475, + "grad_norm": 1.181953537531204, + "learning_rate": 1.8728683380695414e-06, + "loss": 0.45434027910232544, + "step": 1772 + }, + { + "epoch": 0.40880793175005764, + "grad_norm": 1.0716516851559217, + "learning_rate": 1.872682235997773e-06, + "loss": 0.4917553961277008, + "step": 1773 + }, + { + "epoch": 0.4090385058796403, + "grad_norm": 1.0983534367258283, + "learning_rate": 1.872496007072797e-06, + "loss": 0.5677252411842346, + "step": 1774 + }, + { + "epoch": 0.40926908000922296, + "grad_norm": 1.042591224606922, + "learning_rate": 1.872309651321684e-06, + "loss": 0.5516688823699951, + "step": 1775 + }, + { + "epoch": 0.4094996541388056, + "grad_norm": 0.9746786592567609, + "learning_rate": 1.8721231687715227e-06, + "loss": 0.46755337715148926, + "step": 1776 + }, + { + "epoch": 0.4097302282683883, + "grad_norm": 1.3130136596789415, + "learning_rate": 1.8719365594494202e-06, + "loss": 0.6575521230697632, + "step": 1777 + }, + { + "epoch": 0.40996080239797095, + "grad_norm": 1.147271087293654, + "learning_rate": 1.8717498233825019e-06, + "loss": 0.6088716983795166, + "step": 1778 + }, + { + "epoch": 0.4101913765275536, + "grad_norm": 0.9692417840942277, + "learning_rate": 1.8715629605979118e-06, + "loss": 0.39476478099823, + "step": 1779 + }, + { + "epoch": 0.41042195065713627, + "grad_norm": 1.1915743629339146, + "learning_rate": 1.8713759711228123e-06, + "loss": 0.4893898665904999, + "step": 1780 + }, + { + "epoch": 0.41065252478671893, + "grad_norm": 1.298092223223541, + "learning_rate": 1.8711888549843842e-06, + "loss": 0.5077828764915466, + "step": 1781 + }, + { + "epoch": 0.4108830989163016, + "grad_norm": 1.0084481520460131, + "learning_rate": 1.8710016122098269e-06, + "loss": 0.5212582349777222, + "step": 1782 + }, + { + "epoch": 0.41111367304588425, + "grad_norm": 1.1325685052130308, + "learning_rate": 1.870814242826358e-06, + "loss": 0.5135321617126465, + "step": 1783 + }, + { + "epoch": 0.4113442471754669, + "grad_norm": 1.3281766258765773, + "learning_rate": 1.8706267468612133e-06, + "loss": 0.5398930311203003, + "step": 1784 + }, + { + "epoch": 0.4115748213050496, + "grad_norm": 1.3736547238310808, + "learning_rate": 1.8704391243416477e-06, + "loss": 0.49205562472343445, + "step": 1785 + }, + { + "epoch": 0.41180539543463224, + "grad_norm": 1.1386437791047925, + "learning_rate": 1.8702513752949335e-06, + "loss": 0.5145718455314636, + "step": 1786 + }, + { + "epoch": 0.4120359695642149, + "grad_norm": 0.9532031818658743, + "learning_rate": 1.8700634997483622e-06, + "loss": 0.4868374466896057, + "step": 1787 + }, + { + "epoch": 0.41226654369379756, + "grad_norm": 1.3881400467911258, + "learning_rate": 1.8698754977292435e-06, + "loss": 0.5409311652183533, + "step": 1788 + }, + { + "epoch": 0.4124971178233802, + "grad_norm": 1.307800898328953, + "learning_rate": 1.8696873692649052e-06, + "loss": 0.5476658344268799, + "step": 1789 + }, + { + "epoch": 0.4127276919529629, + "grad_norm": 1.251951597359409, + "learning_rate": 1.8694991143826937e-06, + "loss": 0.5545511245727539, + "step": 1790 + }, + { + "epoch": 0.41295826608254554, + "grad_norm": 1.1923559975321376, + "learning_rate": 1.869310733109974e-06, + "loss": 0.5479267835617065, + "step": 1791 + }, + { + "epoch": 0.4131888402121282, + "grad_norm": 1.1567279350887396, + "learning_rate": 1.8691222254741289e-06, + "loss": 0.5261585712432861, + "step": 1792 + }, + { + "epoch": 0.41341941434171087, + "grad_norm": 1.035636889065738, + "learning_rate": 1.8689335915025599e-06, + "loss": 0.5478091239929199, + "step": 1793 + }, + { + "epoch": 0.41364998847129353, + "grad_norm": 1.5699808716332777, + "learning_rate": 1.8687448312226872e-06, + "loss": 0.6739054322242737, + "step": 1794 + }, + { + "epoch": 0.4138805626008762, + "grad_norm": 1.2236857571837823, + "learning_rate": 1.8685559446619487e-06, + "loss": 0.613865315914154, + "step": 1795 + }, + { + "epoch": 0.41411113673045885, + "grad_norm": 1.0357788562325108, + "learning_rate": 1.8683669318478012e-06, + "loss": 0.3936721384525299, + "step": 1796 + }, + { + "epoch": 0.4143417108600415, + "grad_norm": 1.2330991076599302, + "learning_rate": 1.8681777928077197e-06, + "loss": 0.5508556365966797, + "step": 1797 + }, + { + "epoch": 0.4145722849896242, + "grad_norm": 1.1597942164225867, + "learning_rate": 1.867988527569197e-06, + "loss": 0.47734567523002625, + "step": 1798 + }, + { + "epoch": 0.41480285911920683, + "grad_norm": 1.0741273588884312, + "learning_rate": 1.8677991361597449e-06, + "loss": 0.46847039461135864, + "step": 1799 + }, + { + "epoch": 0.4150334332487895, + "grad_norm": 1.0364595457718502, + "learning_rate": 1.8676096186068937e-06, + "loss": 0.5202786326408386, + "step": 1800 + }, + { + "epoch": 0.41526400737837216, + "grad_norm": 1.2972392907268704, + "learning_rate": 1.8674199749381914e-06, + "loss": 0.5144700407981873, + "step": 1801 + }, + { + "epoch": 0.4154945815079548, + "grad_norm": 1.1959128972921023, + "learning_rate": 1.8672302051812048e-06, + "loss": 0.4394092559814453, + "step": 1802 + }, + { + "epoch": 0.4157251556375375, + "grad_norm": 1.159378410595036, + "learning_rate": 1.8670403093635185e-06, + "loss": 0.5017338991165161, + "step": 1803 + }, + { + "epoch": 0.41595572976712014, + "grad_norm": 1.173120824085894, + "learning_rate": 1.8668502875127366e-06, + "loss": 0.409381628036499, + "step": 1804 + }, + { + "epoch": 0.4161863038967028, + "grad_norm": 1.0538601271665184, + "learning_rate": 1.8666601396564795e-06, + "loss": 0.5193957090377808, + "step": 1805 + }, + { + "epoch": 0.41641687802628546, + "grad_norm": 1.1338279816499315, + "learning_rate": 1.8664698658223882e-06, + "loss": 0.5933586359024048, + "step": 1806 + }, + { + "epoch": 0.4166474521558681, + "grad_norm": 1.1304820859227924, + "learning_rate": 1.8662794660381204e-06, + "loss": 0.5283366441726685, + "step": 1807 + }, + { + "epoch": 0.4168780262854508, + "grad_norm": 1.118558214164988, + "learning_rate": 1.8660889403313526e-06, + "loss": 0.5063748359680176, + "step": 1808 + }, + { + "epoch": 0.41710860041503345, + "grad_norm": 1.087893149342631, + "learning_rate": 1.86589828872978e-06, + "loss": 0.6386028528213501, + "step": 1809 + }, + { + "epoch": 0.4173391745446161, + "grad_norm": 1.0041938541729358, + "learning_rate": 1.8657075112611153e-06, + "loss": 0.4618440270423889, + "step": 1810 + }, + { + "epoch": 0.41756974867419877, + "grad_norm": 1.3214046412105014, + "learning_rate": 1.8655166079530903e-06, + "loss": 0.4523535966873169, + "step": 1811 + }, + { + "epoch": 0.41780032280378143, + "grad_norm": 1.0747078557029888, + "learning_rate": 1.8653255788334544e-06, + "loss": 0.501311719417572, + "step": 1812 + }, + { + "epoch": 0.4180308969333641, + "grad_norm": 1.112333239244982, + "learning_rate": 1.865134423929976e-06, + "loss": 0.5504614114761353, + "step": 1813 + }, + { + "epoch": 0.41826147106294675, + "grad_norm": 1.0979124892402103, + "learning_rate": 1.864943143270441e-06, + "loss": 0.44275063276290894, + "step": 1814 + }, + { + "epoch": 0.4184920451925294, + "grad_norm": 1.2558217334961832, + "learning_rate": 1.8647517368826545e-06, + "loss": 0.5628173351287842, + "step": 1815 + }, + { + "epoch": 0.4187226193221121, + "grad_norm": 1.032119999950418, + "learning_rate": 1.864560204794439e-06, + "loss": 0.489221453666687, + "step": 1816 + }, + { + "epoch": 0.41895319345169474, + "grad_norm": 1.2211401188891802, + "learning_rate": 1.8643685470336355e-06, + "loss": 0.5440137386322021, + "step": 1817 + }, + { + "epoch": 0.4191837675812774, + "grad_norm": 1.169073111073683, + "learning_rate": 1.8641767636281035e-06, + "loss": 0.4518952965736389, + "step": 1818 + }, + { + "epoch": 0.41941434171086006, + "grad_norm": 1.3403542594346476, + "learning_rate": 1.8639848546057209e-06, + "loss": 0.591090977191925, + "step": 1819 + }, + { + "epoch": 0.4196449158404427, + "grad_norm": 1.1775626126130905, + "learning_rate": 1.8637928199943836e-06, + "loss": 0.5622411966323853, + "step": 1820 + }, + { + "epoch": 0.4198754899700254, + "grad_norm": 1.1913164061698733, + "learning_rate": 1.8636006598220052e-06, + "loss": 0.5086779594421387, + "step": 1821 + }, + { + "epoch": 0.42010606409960805, + "grad_norm": 1.1334153574078034, + "learning_rate": 1.8634083741165188e-06, + "loss": 0.5055384635925293, + "step": 1822 + }, + { + "epoch": 0.4203366382291907, + "grad_norm": 1.129676706405598, + "learning_rate": 1.863215962905875e-06, + "loss": 0.5076277852058411, + "step": 1823 + }, + { + "epoch": 0.42056721235877337, + "grad_norm": 1.2637764937692704, + "learning_rate": 1.8630234262180424e-06, + "loss": 0.5378403067588806, + "step": 1824 + }, + { + "epoch": 0.42079778648835603, + "grad_norm": 1.0886873342980177, + "learning_rate": 1.8628307640810083e-06, + "loss": 0.6133165955543518, + "step": 1825 + }, + { + "epoch": 0.4210283606179387, + "grad_norm": 1.1726755470049002, + "learning_rate": 1.8626379765227782e-06, + "loss": 0.4978156089782715, + "step": 1826 + }, + { + "epoch": 0.42125893474752135, + "grad_norm": 1.0651427070474233, + "learning_rate": 1.8624450635713759e-06, + "loss": 0.43159037828445435, + "step": 1827 + }, + { + "epoch": 0.421489508877104, + "grad_norm": 1.0498543002649237, + "learning_rate": 1.8622520252548424e-06, + "loss": 0.48821642994880676, + "step": 1828 + }, + { + "epoch": 0.4217200830066867, + "grad_norm": 1.016883491579865, + "learning_rate": 1.8620588616012387e-06, + "loss": 0.4666696786880493, + "step": 1829 + }, + { + "epoch": 0.42195065713626934, + "grad_norm": 1.3621906870852534, + "learning_rate": 1.8618655726386425e-06, + "loss": 0.5278067588806152, + "step": 1830 + }, + { + "epoch": 0.422181231265852, + "grad_norm": 1.0791230542588068, + "learning_rate": 1.8616721583951512e-06, + "loss": 0.4357749819755554, + "step": 1831 + }, + { + "epoch": 0.42241180539543466, + "grad_norm": 1.2299213864410639, + "learning_rate": 1.8614786188988782e-06, + "loss": 0.5388439893722534, + "step": 1832 + }, + { + "epoch": 0.4226423795250173, + "grad_norm": 1.4108572710321559, + "learning_rate": 1.8612849541779573e-06, + "loss": 0.5443956255912781, + "step": 1833 + }, + { + "epoch": 0.4228729536546, + "grad_norm": 1.2641105463427431, + "learning_rate": 1.86109116426054e-06, + "loss": 0.5614160895347595, + "step": 1834 + }, + { + "epoch": 0.4231035277841826, + "grad_norm": 1.2744746751945835, + "learning_rate": 1.8608972491747943e-06, + "loss": 0.45780229568481445, + "step": 1835 + }, + { + "epoch": 0.42333410191376525, + "grad_norm": 1.4638598184796152, + "learning_rate": 1.8607032089489088e-06, + "loss": 0.6354867219924927, + "step": 1836 + }, + { + "epoch": 0.4235646760433479, + "grad_norm": 1.2548140048045007, + "learning_rate": 1.860509043611089e-06, + "loss": 0.5172948241233826, + "step": 1837 + }, + { + "epoch": 0.42379525017293057, + "grad_norm": 1.1235697857312772, + "learning_rate": 1.8603147531895586e-06, + "loss": 0.4353157877922058, + "step": 1838 + }, + { + "epoch": 0.42402582430251323, + "grad_norm": 1.1680682893696177, + "learning_rate": 1.8601203377125599e-06, + "loss": 0.4971036911010742, + "step": 1839 + }, + { + "epoch": 0.4242563984320959, + "grad_norm": 1.0750331417799794, + "learning_rate": 1.859925797208353e-06, + "loss": 0.5037736296653748, + "step": 1840 + }, + { + "epoch": 0.42448697256167855, + "grad_norm": 1.052234823772871, + "learning_rate": 1.8597311317052165e-06, + "loss": 0.4480808675289154, + "step": 1841 + }, + { + "epoch": 0.4247175466912612, + "grad_norm": 1.2441100874175304, + "learning_rate": 1.8595363412314468e-06, + "loss": 0.5102680325508118, + "step": 1842 + }, + { + "epoch": 0.4249481208208439, + "grad_norm": 1.1806961844163353, + "learning_rate": 1.8593414258153585e-06, + "loss": 0.5979090929031372, + "step": 1843 + }, + { + "epoch": 0.42517869495042654, + "grad_norm": 1.0776260642041309, + "learning_rate": 1.8591463854852854e-06, + "loss": 0.4616047143936157, + "step": 1844 + }, + { + "epoch": 0.4254092690800092, + "grad_norm": 1.0059742827824252, + "learning_rate": 1.8589512202695773e-06, + "loss": 0.4893925189971924, + "step": 1845 + }, + { + "epoch": 0.42563984320959186, + "grad_norm": 1.0527785435538273, + "learning_rate": 1.8587559301966045e-06, + "loss": 0.49619823694229126, + "step": 1846 + }, + { + "epoch": 0.4258704173391745, + "grad_norm": 1.0558967393125807, + "learning_rate": 1.858560515294754e-06, + "loss": 0.5205181837081909, + "step": 1847 + }, + { + "epoch": 0.4261009914687572, + "grad_norm": 1.3589791827910958, + "learning_rate": 1.8583649755924315e-06, + "loss": 0.5910394191741943, + "step": 1848 + }, + { + "epoch": 0.42633156559833985, + "grad_norm": 1.0092224062378152, + "learning_rate": 1.8581693111180603e-06, + "loss": 0.4916709363460541, + "step": 1849 + }, + { + "epoch": 0.4265621397279225, + "grad_norm": 1.261654259944108, + "learning_rate": 1.8579735219000824e-06, + "loss": 0.5728994011878967, + "step": 1850 + }, + { + "epoch": 0.42679271385750517, + "grad_norm": 1.162885813109175, + "learning_rate": 1.857777607966958e-06, + "loss": 0.49620527029037476, + "step": 1851 + }, + { + "epoch": 0.42702328798708783, + "grad_norm": 1.2230754640158692, + "learning_rate": 1.8575815693471649e-06, + "loss": 0.5100233554840088, + "step": 1852 + }, + { + "epoch": 0.4272538621166705, + "grad_norm": 1.1713081386962017, + "learning_rate": 1.8573854060691994e-06, + "loss": 0.48981544375419617, + "step": 1853 + }, + { + "epoch": 0.42748443624625315, + "grad_norm": 1.0875128431195988, + "learning_rate": 1.8571891181615755e-06, + "loss": 0.44190293550491333, + "step": 1854 + }, + { + "epoch": 0.4277150103758358, + "grad_norm": 1.2645757986317834, + "learning_rate": 1.8569927056528264e-06, + "loss": 0.42867448925971985, + "step": 1855 + }, + { + "epoch": 0.4279455845054185, + "grad_norm": 1.849182592399251, + "learning_rate": 1.8567961685715016e-06, + "loss": 0.4873782694339752, + "step": 1856 + }, + { + "epoch": 0.42817615863500114, + "grad_norm": 1.2007241803680166, + "learning_rate": 1.8565995069461706e-06, + "loss": 0.4985312819480896, + "step": 1857 + }, + { + "epoch": 0.4284067327645838, + "grad_norm": 1.2242163730204847, + "learning_rate": 1.85640272080542e-06, + "loss": 0.5525496006011963, + "step": 1858 + }, + { + "epoch": 0.42863730689416646, + "grad_norm": 1.293851624108558, + "learning_rate": 1.8562058101778547e-06, + "loss": 0.5645877122879028, + "step": 1859 + }, + { + "epoch": 0.4288678810237491, + "grad_norm": 1.0805291431045556, + "learning_rate": 1.856008775092097e-06, + "loss": 0.4304332137107849, + "step": 1860 + }, + { + "epoch": 0.4290984551533318, + "grad_norm": 1.14759009112306, + "learning_rate": 1.8558116155767888e-06, + "loss": 0.4970170259475708, + "step": 1861 + }, + { + "epoch": 0.42932902928291444, + "grad_norm": 1.344010966492771, + "learning_rate": 1.8556143316605888e-06, + "loss": 0.5718003511428833, + "step": 1862 + }, + { + "epoch": 0.4295596034124971, + "grad_norm": 1.3157067542574963, + "learning_rate": 1.8554169233721741e-06, + "loss": 0.4445415139198303, + "step": 1863 + }, + { + "epoch": 0.42979017754207977, + "grad_norm": 1.1001033203387223, + "learning_rate": 1.8552193907402404e-06, + "loss": 0.5297178626060486, + "step": 1864 + }, + { + "epoch": 0.4300207516716624, + "grad_norm": 0.9618626645905404, + "learning_rate": 1.8550217337935013e-06, + "loss": 0.4564483165740967, + "step": 1865 + }, + { + "epoch": 0.4302513258012451, + "grad_norm": 1.2509575429906847, + "learning_rate": 1.8548239525606872e-06, + "loss": 0.4789202809333801, + "step": 1866 + }, + { + "epoch": 0.43048189993082775, + "grad_norm": 1.0950598228304256, + "learning_rate": 1.8546260470705485e-06, + "loss": 0.5240263938903809, + "step": 1867 + }, + { + "epoch": 0.4307124740604104, + "grad_norm": 1.0326884664902543, + "learning_rate": 1.8544280173518523e-06, + "loss": 0.4190866947174072, + "step": 1868 + }, + { + "epoch": 0.43094304818999307, + "grad_norm": 1.098749197470929, + "learning_rate": 1.8542298634333844e-06, + "loss": 0.502301812171936, + "step": 1869 + }, + { + "epoch": 0.43117362231957573, + "grad_norm": 1.3711612309046508, + "learning_rate": 1.8540315853439488e-06, + "loss": 0.5752545595169067, + "step": 1870 + }, + { + "epoch": 0.4314041964491584, + "grad_norm": 0.9641480143185914, + "learning_rate": 1.8538331831123667e-06, + "loss": 0.44959962368011475, + "step": 1871 + }, + { + "epoch": 0.43163477057874106, + "grad_norm": 1.2299121621798328, + "learning_rate": 1.8536346567674782e-06, + "loss": 0.5320106148719788, + "step": 1872 + }, + { + "epoch": 0.4318653447083237, + "grad_norm": 1.393182956860924, + "learning_rate": 1.8534360063381407e-06, + "loss": 0.5981979966163635, + "step": 1873 + }, + { + "epoch": 0.4320959188379064, + "grad_norm": 1.350381662747622, + "learning_rate": 1.8532372318532306e-06, + "loss": 0.5567579865455627, + "step": 1874 + }, + { + "epoch": 0.43232649296748904, + "grad_norm": 1.4350681093951811, + "learning_rate": 1.8530383333416415e-06, + "loss": 0.5604764223098755, + "step": 1875 + }, + { + "epoch": 0.4325570670970717, + "grad_norm": 1.4048444099270982, + "learning_rate": 1.8528393108322852e-06, + "loss": 0.5410721302032471, + "step": 1876 + }, + { + "epoch": 0.43278764122665436, + "grad_norm": 1.1191045271107989, + "learning_rate": 1.852640164354092e-06, + "loss": 0.5417271852493286, + "step": 1877 + }, + { + "epoch": 0.433018215356237, + "grad_norm": 1.1925092385457925, + "learning_rate": 1.8524408939360096e-06, + "loss": 0.5831471681594849, + "step": 1878 + }, + { + "epoch": 0.4332487894858197, + "grad_norm": 1.0939224950949575, + "learning_rate": 1.8522414996070045e-06, + "loss": 0.45030760765075684, + "step": 1879 + }, + { + "epoch": 0.43347936361540235, + "grad_norm": 1.1520994484307991, + "learning_rate": 1.8520419813960596e-06, + "loss": 0.44657936692237854, + "step": 1880 + }, + { + "epoch": 0.433709937744985, + "grad_norm": 1.1691007631884454, + "learning_rate": 1.851842339332178e-06, + "loss": 0.5472795963287354, + "step": 1881 + }, + { + "epoch": 0.43394051187456767, + "grad_norm": 1.1388268257083902, + "learning_rate": 1.8516425734443786e-06, + "loss": 0.4883359968662262, + "step": 1882 + }, + { + "epoch": 0.43417108600415033, + "grad_norm": 1.0473976151781044, + "learning_rate": 1.8514426837617006e-06, + "loss": 0.5172675848007202, + "step": 1883 + }, + { + "epoch": 0.434401660133733, + "grad_norm": 1.2812470936666533, + "learning_rate": 1.851242670313199e-06, + "loss": 0.5253418684005737, + "step": 1884 + }, + { + "epoch": 0.43463223426331565, + "grad_norm": 1.2940121862284113, + "learning_rate": 1.8510425331279485e-06, + "loss": 0.4684918522834778, + "step": 1885 + }, + { + "epoch": 0.4348628083928983, + "grad_norm": 1.7313907662218715, + "learning_rate": 1.8508422722350404e-06, + "loss": 0.522485077381134, + "step": 1886 + }, + { + "epoch": 0.435093382522481, + "grad_norm": 1.0862530759153244, + "learning_rate": 1.8506418876635852e-06, + "loss": 0.5123787522315979, + "step": 1887 + }, + { + "epoch": 0.43532395665206364, + "grad_norm": 1.2812741997977775, + "learning_rate": 1.8504413794427106e-06, + "loss": 0.5195976495742798, + "step": 1888 + }, + { + "epoch": 0.4355545307816463, + "grad_norm": 1.081503403719265, + "learning_rate": 1.8502407476015626e-06, + "loss": 0.48394906520843506, + "step": 1889 + }, + { + "epoch": 0.43578510491122896, + "grad_norm": 1.2031421687566246, + "learning_rate": 1.850039992169305e-06, + "loss": 0.5083323121070862, + "step": 1890 + }, + { + "epoch": 0.4360156790408116, + "grad_norm": 1.2379097603599272, + "learning_rate": 1.8498391131751196e-06, + "loss": 0.5303651094436646, + "step": 1891 + }, + { + "epoch": 0.4362462531703943, + "grad_norm": 1.010820397187413, + "learning_rate": 1.8496381106482062e-06, + "loss": 0.49429047107696533, + "step": 1892 + }, + { + "epoch": 0.43647682729997694, + "grad_norm": 1.2506572926955764, + "learning_rate": 1.8494369846177826e-06, + "loss": 0.5263347625732422, + "step": 1893 + }, + { + "epoch": 0.4367074014295596, + "grad_norm": 1.3195849148516783, + "learning_rate": 1.8492357351130848e-06, + "loss": 0.5332654714584351, + "step": 1894 + }, + { + "epoch": 0.43693797555914227, + "grad_norm": 1.1692381501686961, + "learning_rate": 1.8490343621633657e-06, + "loss": 0.5598278045654297, + "step": 1895 + }, + { + "epoch": 0.43716854968872493, + "grad_norm": 1.0323293964159153, + "learning_rate": 1.8488328657978975e-06, + "loss": 0.4026976227760315, + "step": 1896 + }, + { + "epoch": 0.4373991238183076, + "grad_norm": 1.3568102099956687, + "learning_rate": 1.8486312460459698e-06, + "loss": 0.4277791380882263, + "step": 1897 + }, + { + "epoch": 0.43762969794789025, + "grad_norm": 1.2550644818276735, + "learning_rate": 1.8484295029368896e-06, + "loss": 0.49567973613739014, + "step": 1898 + }, + { + "epoch": 0.4378602720774729, + "grad_norm": 1.3750960531365106, + "learning_rate": 1.8482276364999828e-06, + "loss": 0.4659258723258972, + "step": 1899 + }, + { + "epoch": 0.4380908462070556, + "grad_norm": 1.4921650354400726, + "learning_rate": 1.8480256467645923e-06, + "loss": 0.4950314164161682, + "step": 1900 + }, + { + "epoch": 0.43832142033663823, + "grad_norm": 1.2407118809889077, + "learning_rate": 1.8478235337600796e-06, + "loss": 0.5584981441497803, + "step": 1901 + }, + { + "epoch": 0.4385519944662209, + "grad_norm": 1.4539173472262998, + "learning_rate": 1.847621297515824e-06, + "loss": 0.6322404146194458, + "step": 1902 + }, + { + "epoch": 0.43878256859580356, + "grad_norm": 1.6859923054790666, + "learning_rate": 1.8474189380612225e-06, + "loss": 0.49535471200942993, + "step": 1903 + }, + { + "epoch": 0.4390131427253862, + "grad_norm": 1.0079272515569784, + "learning_rate": 1.8472164554256897e-06, + "loss": 0.40703707933425903, + "step": 1904 + }, + { + "epoch": 0.4392437168549689, + "grad_norm": 1.1125525506446694, + "learning_rate": 1.8470138496386588e-06, + "loss": 0.4540821313858032, + "step": 1905 + }, + { + "epoch": 0.43947429098455154, + "grad_norm": 1.1572392182622382, + "learning_rate": 1.846811120729581e-06, + "loss": 0.45964252948760986, + "step": 1906 + }, + { + "epoch": 0.4397048651141342, + "grad_norm": 1.018497744556974, + "learning_rate": 1.8466082687279244e-06, + "loss": 0.4604472517967224, + "step": 1907 + }, + { + "epoch": 0.43993543924371686, + "grad_norm": 1.114828518838774, + "learning_rate": 1.8464052936631758e-06, + "loss": 0.44585052132606506, + "step": 1908 + }, + { + "epoch": 0.4401660133732995, + "grad_norm": 1.2189161284011176, + "learning_rate": 1.8462021955648397e-06, + "loss": 0.43862414360046387, + "step": 1909 + }, + { + "epoch": 0.4403965875028822, + "grad_norm": 1.0484346475063675, + "learning_rate": 1.8459989744624386e-06, + "loss": 0.5148224234580994, + "step": 1910 + }, + { + "epoch": 0.44062716163246485, + "grad_norm": 1.3041727396087255, + "learning_rate": 1.8457956303855124e-06, + "loss": 0.6201390027999878, + "step": 1911 + }, + { + "epoch": 0.4408577357620475, + "grad_norm": 1.322348681007624, + "learning_rate": 1.8455921633636196e-06, + "loss": 0.5828813314437866, + "step": 1912 + }, + { + "epoch": 0.44108830989163017, + "grad_norm": 1.2413839772395276, + "learning_rate": 1.845388573426336e-06, + "loss": 0.5491579174995422, + "step": 1913 + }, + { + "epoch": 0.44131888402121283, + "grad_norm": 1.135006469141378, + "learning_rate": 1.8451848606032554e-06, + "loss": 0.4204079508781433, + "step": 1914 + }, + { + "epoch": 0.4415494581507955, + "grad_norm": 1.3248528862326203, + "learning_rate": 1.8449810249239898e-06, + "loss": 0.5734649300575256, + "step": 1915 + }, + { + "epoch": 0.44178003228037815, + "grad_norm": 1.1101812599659409, + "learning_rate": 1.8447770664181684e-06, + "loss": 0.48931679129600525, + "step": 1916 + }, + { + "epoch": 0.4420106064099608, + "grad_norm": 1.292831898773596, + "learning_rate": 1.8445729851154392e-06, + "loss": 0.5206375122070312, + "step": 1917 + }, + { + "epoch": 0.4422411805395435, + "grad_norm": 1.3590503413541226, + "learning_rate": 1.8443687810454666e-06, + "loss": 0.4916420578956604, + "step": 1918 + }, + { + "epoch": 0.44247175466912614, + "grad_norm": 1.0963843972341092, + "learning_rate": 1.8441644542379348e-06, + "loss": 0.5021753311157227, + "step": 1919 + }, + { + "epoch": 0.4427023287987088, + "grad_norm": 1.2556127492378621, + "learning_rate": 1.8439600047225441e-06, + "loss": 0.4615249037742615, + "step": 1920 + }, + { + "epoch": 0.44293290292829146, + "grad_norm": 1.3251855444784397, + "learning_rate": 1.8437554325290133e-06, + "loss": 0.4849514365196228, + "step": 1921 + }, + { + "epoch": 0.4431634770578741, + "grad_norm": 1.3926092312086646, + "learning_rate": 1.843550737687079e-06, + "loss": 0.5872727632522583, + "step": 1922 + }, + { + "epoch": 0.4433940511874568, + "grad_norm": 1.1422193923698303, + "learning_rate": 1.843345920226496e-06, + "loss": 0.48469966650009155, + "step": 1923 + }, + { + "epoch": 0.44362462531703944, + "grad_norm": 1.1078885152995024, + "learning_rate": 1.8431409801770364e-06, + "loss": 0.45931774377822876, + "step": 1924 + }, + { + "epoch": 0.4438551994466221, + "grad_norm": 1.0630184817249293, + "learning_rate": 1.8429359175684907e-06, + "loss": 0.5138596296310425, + "step": 1925 + }, + { + "epoch": 0.44408577357620477, + "grad_norm": 1.1576378783801253, + "learning_rate": 1.8427307324306661e-06, + "loss": 0.5586874485015869, + "step": 1926 + }, + { + "epoch": 0.44431634770578743, + "grad_norm": 0.9982496919132913, + "learning_rate": 1.8425254247933887e-06, + "loss": 0.5373901724815369, + "step": 1927 + }, + { + "epoch": 0.4445469218353701, + "grad_norm": 1.3044317948619655, + "learning_rate": 1.8423199946865022e-06, + "loss": 0.46104729175567627, + "step": 1928 + }, + { + "epoch": 0.44477749596495275, + "grad_norm": 1.2637964058278408, + "learning_rate": 1.8421144421398678e-06, + "loss": 0.4837646782398224, + "step": 1929 + }, + { + "epoch": 0.4450080700945354, + "grad_norm": 1.0579849017335872, + "learning_rate": 1.8419087671833647e-06, + "loss": 0.47685718536376953, + "step": 1930 + }, + { + "epoch": 0.4452386442241181, + "grad_norm": 1.3061309074235694, + "learning_rate": 1.8417029698468897e-06, + "loss": 0.5904572606086731, + "step": 1931 + }, + { + "epoch": 0.44546921835370074, + "grad_norm": 1.0698778232309683, + "learning_rate": 1.8414970501603577e-06, + "loss": 0.5434018969535828, + "step": 1932 + }, + { + "epoch": 0.4456997924832834, + "grad_norm": 1.0813116335575876, + "learning_rate": 1.8412910081537012e-06, + "loss": 0.5532705783843994, + "step": 1933 + }, + { + "epoch": 0.44593036661286606, + "grad_norm": 1.2746241772853588, + "learning_rate": 1.8410848438568704e-06, + "loss": 0.4900597929954529, + "step": 1934 + }, + { + "epoch": 0.4461609407424487, + "grad_norm": 1.1321871851277807, + "learning_rate": 1.8408785572998334e-06, + "loss": 0.40426892042160034, + "step": 1935 + }, + { + "epoch": 0.4463915148720314, + "grad_norm": 1.2056959007702837, + "learning_rate": 1.840672148512576e-06, + "loss": 0.48805081844329834, + "step": 1936 + }, + { + "epoch": 0.44662208900161404, + "grad_norm": 1.247599925173634, + "learning_rate": 1.8404656175251019e-06, + "loss": 0.4997096657752991, + "step": 1937 + }, + { + "epoch": 0.4468526631311967, + "grad_norm": 1.1300078883402307, + "learning_rate": 1.8402589643674325e-06, + "loss": 0.5113422274589539, + "step": 1938 + }, + { + "epoch": 0.44708323726077936, + "grad_norm": 1.2034211237767165, + "learning_rate": 1.8400521890696065e-06, + "loss": 0.44080060720443726, + "step": 1939 + }, + { + "epoch": 0.447313811390362, + "grad_norm": 1.1365386964776252, + "learning_rate": 1.8398452916616816e-06, + "loss": 0.4477943778038025, + "step": 1940 + }, + { + "epoch": 0.4475443855199447, + "grad_norm": 1.2171142668463, + "learning_rate": 1.8396382721737318e-06, + "loss": 0.4597470760345459, + "step": 1941 + }, + { + "epoch": 0.44777495964952735, + "grad_norm": 1.1079547319265362, + "learning_rate": 1.8394311306358494e-06, + "loss": 0.4758293628692627, + "step": 1942 + }, + { + "epoch": 0.44800553377911, + "grad_norm": 1.1579717682654027, + "learning_rate": 1.8392238670781453e-06, + "loss": 0.4573550224304199, + "step": 1943 + }, + { + "epoch": 0.44823610790869267, + "grad_norm": 1.318176172591765, + "learning_rate": 1.8390164815307465e-06, + "loss": 0.504696786403656, + "step": 1944 + }, + { + "epoch": 0.44846668203827533, + "grad_norm": 1.176904108457006, + "learning_rate": 1.8388089740237991e-06, + "loss": 0.4936453700065613, + "step": 1945 + }, + { + "epoch": 0.448697256167858, + "grad_norm": 1.0847569291854338, + "learning_rate": 1.8386013445874661e-06, + "loss": 0.4851078987121582, + "step": 1946 + }, + { + "epoch": 0.4489278302974406, + "grad_norm": 1.184810595622898, + "learning_rate": 1.8383935932519288e-06, + "loss": 0.4881519377231598, + "step": 1947 + }, + { + "epoch": 0.44915840442702326, + "grad_norm": 1.2389121525709461, + "learning_rate": 1.8381857200473859e-06, + "loss": 0.5604408979415894, + "step": 1948 + }, + { + "epoch": 0.4493889785566059, + "grad_norm": 1.2909928460674411, + "learning_rate": 1.8379777250040535e-06, + "loss": 0.5022269487380981, + "step": 1949 + }, + { + "epoch": 0.4496195526861886, + "grad_norm": 1.5074815200191058, + "learning_rate": 1.8377696081521666e-06, + "loss": 0.6519315242767334, + "step": 1950 + }, + { + "epoch": 0.44985012681577125, + "grad_norm": 1.0636886048128833, + "learning_rate": 1.8375613695219766e-06, + "loss": 0.3820997476577759, + "step": 1951 + }, + { + "epoch": 0.4500807009453539, + "grad_norm": 1.2705283632306288, + "learning_rate": 1.8373530091437526e-06, + "loss": 0.5473283529281616, + "step": 1952 + }, + { + "epoch": 0.45031127507493657, + "grad_norm": 1.3245130391551474, + "learning_rate": 1.8371445270477828e-06, + "loss": 0.5835955142974854, + "step": 1953 + }, + { + "epoch": 0.45054184920451923, + "grad_norm": 0.9645583101230016, + "learning_rate": 1.8369359232643716e-06, + "loss": 0.5398194789886475, + "step": 1954 + }, + { + "epoch": 0.4507724233341019, + "grad_norm": 1.363319289299188, + "learning_rate": 1.8367271978238418e-06, + "loss": 0.36561834812164307, + "step": 1955 + }, + { + "epoch": 0.45100299746368455, + "grad_norm": 1.212738724980002, + "learning_rate": 1.8365183507565342e-06, + "loss": 0.319802463054657, + "step": 1956 + }, + { + "epoch": 0.4512335715932672, + "grad_norm": 1.2303957915062576, + "learning_rate": 1.8363093820928063e-06, + "loss": 0.46466606855392456, + "step": 1957 + }, + { + "epoch": 0.4514641457228499, + "grad_norm": 1.0793723825771542, + "learning_rate": 1.8361002918630338e-06, + "loss": 0.5839806199073792, + "step": 1958 + }, + { + "epoch": 0.45169471985243254, + "grad_norm": 1.1018651408043991, + "learning_rate": 1.8358910800976105e-06, + "loss": 0.4472346603870392, + "step": 1959 + }, + { + "epoch": 0.4519252939820152, + "grad_norm": 1.2384424942976882, + "learning_rate": 1.835681746826947e-06, + "loss": 0.5191199779510498, + "step": 1960 + }, + { + "epoch": 0.45215586811159786, + "grad_norm": 1.199344967008703, + "learning_rate": 1.8354722920814722e-06, + "loss": 0.5832456350326538, + "step": 1961 + }, + { + "epoch": 0.4523864422411805, + "grad_norm": 1.17539846221013, + "learning_rate": 1.8352627158916326e-06, + "loss": 0.604708194732666, + "step": 1962 + }, + { + "epoch": 0.4526170163707632, + "grad_norm": 1.0362921929144542, + "learning_rate": 1.8350530182878924e-06, + "loss": 0.5640981793403625, + "step": 1963 + }, + { + "epoch": 0.45284759050034584, + "grad_norm": 1.6578766467164143, + "learning_rate": 1.8348431993007326e-06, + "loss": 0.4816977381706238, + "step": 1964 + }, + { + "epoch": 0.4530781646299285, + "grad_norm": 1.1374005988930347, + "learning_rate": 1.8346332589606526e-06, + "loss": 0.4226726293563843, + "step": 1965 + }, + { + "epoch": 0.45330873875951117, + "grad_norm": 1.1547528745449813, + "learning_rate": 1.8344231972981701e-06, + "loss": 0.49635130167007446, + "step": 1966 + }, + { + "epoch": 0.4535393128890938, + "grad_norm": 1.1372879426647424, + "learning_rate": 1.8342130143438193e-06, + "loss": 0.5275523662567139, + "step": 1967 + }, + { + "epoch": 0.4537698870186765, + "grad_norm": 1.202496816282669, + "learning_rate": 1.834002710128152e-06, + "loss": 0.48517313599586487, + "step": 1968 + }, + { + "epoch": 0.45400046114825915, + "grad_norm": 1.1968500607132941, + "learning_rate": 1.8337922846817388e-06, + "loss": 0.4352126717567444, + "step": 1969 + }, + { + "epoch": 0.4542310352778418, + "grad_norm": 1.116289808278095, + "learning_rate": 1.8335817380351668e-06, + "loss": 0.48131102323532104, + "step": 1970 + }, + { + "epoch": 0.45446160940742447, + "grad_norm": 1.1124663257243492, + "learning_rate": 1.8333710702190408e-06, + "loss": 0.48989611864089966, + "step": 1971 + }, + { + "epoch": 0.45469218353700713, + "grad_norm": 1.4370850989895667, + "learning_rate": 1.8331602812639839e-06, + "loss": 0.4841296076774597, + "step": 1972 + }, + { + "epoch": 0.4549227576665898, + "grad_norm": 1.1830445801916494, + "learning_rate": 1.8329493712006364e-06, + "loss": 0.5479841232299805, + "step": 1973 + }, + { + "epoch": 0.45515333179617246, + "grad_norm": 1.1923903658380426, + "learning_rate": 1.8327383400596559e-06, + "loss": 0.4732212424278259, + "step": 1974 + }, + { + "epoch": 0.4553839059257551, + "grad_norm": 1.0628413230145501, + "learning_rate": 1.8325271878717183e-06, + "loss": 0.46675610542297363, + "step": 1975 + }, + { + "epoch": 0.4556144800553378, + "grad_norm": 1.0416293786228703, + "learning_rate": 1.8323159146675163e-06, + "loss": 0.5464143753051758, + "step": 1976 + }, + { + "epoch": 0.45584505418492044, + "grad_norm": 1.0345078154587666, + "learning_rate": 1.832104520477761e-06, + "loss": 0.3888660669326782, + "step": 1977 + }, + { + "epoch": 0.4560756283145031, + "grad_norm": 1.4241654424068988, + "learning_rate": 1.8318930053331805e-06, + "loss": 0.5163271427154541, + "step": 1978 + }, + { + "epoch": 0.45630620244408576, + "grad_norm": 1.2347472844947731, + "learning_rate": 1.8316813692645208e-06, + "loss": 0.5471124649047852, + "step": 1979 + }, + { + "epoch": 0.4565367765736684, + "grad_norm": 1.1473833654009267, + "learning_rate": 1.8314696123025452e-06, + "loss": 0.5907406210899353, + "step": 1980 + }, + { + "epoch": 0.4567673507032511, + "grad_norm": 1.298768820373183, + "learning_rate": 1.8312577344780346e-06, + "loss": 0.5249447226524353, + "step": 1981 + }, + { + "epoch": 0.45699792483283375, + "grad_norm": 1.2135802460189444, + "learning_rate": 1.8310457358217879e-06, + "loss": 0.5063247084617615, + "step": 1982 + }, + { + "epoch": 0.4572284989624164, + "grad_norm": 1.361065103282706, + "learning_rate": 1.830833616364621e-06, + "loss": 0.4448107182979584, + "step": 1983 + }, + { + "epoch": 0.45745907309199907, + "grad_norm": 1.1036363497718666, + "learning_rate": 1.830621376137368e-06, + "loss": 0.5699697732925415, + "step": 1984 + }, + { + "epoch": 0.45768964722158173, + "grad_norm": 1.246349122018957, + "learning_rate": 1.8304090151708794e-06, + "loss": 0.5701720118522644, + "step": 1985 + }, + { + "epoch": 0.4579202213511644, + "grad_norm": 1.2319947144837158, + "learning_rate": 1.830196533496025e-06, + "loss": 0.4754391014575958, + "step": 1986 + }, + { + "epoch": 0.45815079548074705, + "grad_norm": 1.3528306833221286, + "learning_rate": 1.8299839311436903e-06, + "loss": 0.47649019956588745, + "step": 1987 + }, + { + "epoch": 0.4583813696103297, + "grad_norm": 1.3311097062461437, + "learning_rate": 1.8297712081447797e-06, + "loss": 0.5524393320083618, + "step": 1988 + }, + { + "epoch": 0.4586119437399124, + "grad_norm": 1.0762480086961639, + "learning_rate": 1.8295583645302144e-06, + "loss": 0.45731648802757263, + "step": 1989 + }, + { + "epoch": 0.45884251786949504, + "grad_norm": 1.130533269973984, + "learning_rate": 1.8293454003309336e-06, + "loss": 0.4999742805957794, + "step": 1990 + }, + { + "epoch": 0.4590730919990777, + "grad_norm": 1.1313506863251181, + "learning_rate": 1.829132315577894e-06, + "loss": 0.49084147810935974, + "step": 1991 + }, + { + "epoch": 0.45930366612866036, + "grad_norm": 1.2521400943324308, + "learning_rate": 1.828919110302069e-06, + "loss": 0.45332348346710205, + "step": 1992 + }, + { + "epoch": 0.459534240258243, + "grad_norm": 1.0776738520694769, + "learning_rate": 1.8287057845344504e-06, + "loss": 0.5029363632202148, + "step": 1993 + }, + { + "epoch": 0.4597648143878257, + "grad_norm": 1.1554006749910666, + "learning_rate": 1.8284923383060475e-06, + "loss": 0.5373274087905884, + "step": 1994 + }, + { + "epoch": 0.45999538851740834, + "grad_norm": 1.372219905846735, + "learning_rate": 1.8282787716478867e-06, + "loss": 0.5022158622741699, + "step": 1995 + }, + { + "epoch": 0.460225962646991, + "grad_norm": 1.5170390306548123, + "learning_rate": 1.828065084591012e-06, + "loss": 0.5093190670013428, + "step": 1996 + }, + { + "epoch": 0.46045653677657367, + "grad_norm": 1.1628780385550688, + "learning_rate": 1.827851277166485e-06, + "loss": 0.5406581163406372, + "step": 1997 + }, + { + "epoch": 0.4606871109061563, + "grad_norm": 1.0838824930169186, + "learning_rate": 1.8276373494053852e-06, + "loss": 0.4403364062309265, + "step": 1998 + }, + { + "epoch": 0.460917685035739, + "grad_norm": 1.0663930849179153, + "learning_rate": 1.8274233013388085e-06, + "loss": 0.48383134603500366, + "step": 1999 + }, + { + "epoch": 0.46114825916532165, + "grad_norm": 1.278024022767056, + "learning_rate": 1.8272091329978693e-06, + "loss": 0.5177836418151855, + "step": 2000 + }, + { + "epoch": 0.4613788332949043, + "grad_norm": 1.3026255484345248, + "learning_rate": 1.8269948444136991e-06, + "loss": 0.5699004530906677, + "step": 2001 + }, + { + "epoch": 0.461609407424487, + "grad_norm": 1.0712598167444656, + "learning_rate": 1.826780435617447e-06, + "loss": 0.5415153503417969, + "step": 2002 + }, + { + "epoch": 0.46183998155406963, + "grad_norm": 1.3243429308154806, + "learning_rate": 1.8265659066402792e-06, + "loss": 0.5521166920661926, + "step": 2003 + }, + { + "epoch": 0.4620705556836523, + "grad_norm": 1.0401918069659792, + "learning_rate": 1.8263512575133802e-06, + "loss": 0.4518507122993469, + "step": 2004 + }, + { + "epoch": 0.46230112981323496, + "grad_norm": 1.4036586027704223, + "learning_rate": 1.8261364882679508e-06, + "loss": 0.5997140407562256, + "step": 2005 + }, + { + "epoch": 0.4625317039428176, + "grad_norm": 1.2297832096563293, + "learning_rate": 1.8259215989352103e-06, + "loss": 0.5105265974998474, + "step": 2006 + }, + { + "epoch": 0.4627622780724003, + "grad_norm": 1.3620575066378895, + "learning_rate": 1.825706589546395e-06, + "loss": 0.5229371190071106, + "step": 2007 + }, + { + "epoch": 0.46299285220198294, + "grad_norm": 1.323713226525437, + "learning_rate": 1.825491460132759e-06, + "loss": 0.4833800792694092, + "step": 2008 + }, + { + "epoch": 0.4632234263315656, + "grad_norm": 1.443684310899243, + "learning_rate": 1.8252762107255727e-06, + "loss": 0.4323253035545349, + "step": 2009 + }, + { + "epoch": 0.46345400046114826, + "grad_norm": 1.0890999093716327, + "learning_rate": 1.8250608413561253e-06, + "loss": 0.4563494026660919, + "step": 2010 + }, + { + "epoch": 0.4636845745907309, + "grad_norm": 1.5474519259744821, + "learning_rate": 1.8248453520557228e-06, + "loss": 0.5656196475028992, + "step": 2011 + }, + { + "epoch": 0.4639151487203136, + "grad_norm": 1.4798653425077055, + "learning_rate": 1.8246297428556887e-06, + "loss": 0.5448226928710938, + "step": 2012 + }, + { + "epoch": 0.46414572284989625, + "grad_norm": 1.1620535147248132, + "learning_rate": 1.8244140137873645e-06, + "loss": 0.4692860543727875, + "step": 2013 + }, + { + "epoch": 0.4643762969794789, + "grad_norm": 1.1643805671555858, + "learning_rate": 1.8241981648821079e-06, + "loss": 0.5948643088340759, + "step": 2014 + }, + { + "epoch": 0.46460687110906157, + "grad_norm": 1.1853722372788744, + "learning_rate": 1.823982196171295e-06, + "loss": 0.54410719871521, + "step": 2015 + }, + { + "epoch": 0.46483744523864423, + "grad_norm": 1.1149495485691443, + "learning_rate": 1.8237661076863192e-06, + "loss": 0.430447518825531, + "step": 2016 + }, + { + "epoch": 0.4650680193682269, + "grad_norm": 1.2520273819748522, + "learning_rate": 1.8235498994585913e-06, + "loss": 0.5420910716056824, + "step": 2017 + }, + { + "epoch": 0.46529859349780955, + "grad_norm": 1.119152189162338, + "learning_rate": 1.823333571519539e-06, + "loss": 0.5140334963798523, + "step": 2018 + }, + { + "epoch": 0.4655291676273922, + "grad_norm": 1.1399919106847334, + "learning_rate": 1.8231171239006075e-06, + "loss": 0.5901660323143005, + "step": 2019 + }, + { + "epoch": 0.4657597417569749, + "grad_norm": 1.174060044130563, + "learning_rate": 1.8229005566332603e-06, + "loss": 0.5025908350944519, + "step": 2020 + }, + { + "epoch": 0.46599031588655754, + "grad_norm": 1.3363070549997977, + "learning_rate": 1.8226838697489772e-06, + "loss": 0.4884544909000397, + "step": 2021 + }, + { + "epoch": 0.4662208900161402, + "grad_norm": 1.1349219249551332, + "learning_rate": 1.822467063279256e-06, + "loss": 0.46449869871139526, + "step": 2022 + }, + { + "epoch": 0.46645146414572286, + "grad_norm": 1.2563720378844234, + "learning_rate": 1.8222501372556116e-06, + "loss": 0.49463552236557007, + "step": 2023 + }, + { + "epoch": 0.4666820382753055, + "grad_norm": 1.285405581097111, + "learning_rate": 1.8220330917095768e-06, + "loss": 0.5027149319648743, + "step": 2024 + }, + { + "epoch": 0.4669126124048882, + "grad_norm": 1.3048909901236199, + "learning_rate": 1.8218159266727007e-06, + "loss": 0.564018726348877, + "step": 2025 + }, + { + "epoch": 0.46714318653447084, + "grad_norm": 1.1965631228875364, + "learning_rate": 1.821598642176551e-06, + "loss": 0.4235766530036926, + "step": 2026 + }, + { + "epoch": 0.4673737606640535, + "grad_norm": 1.3354885477125742, + "learning_rate": 1.8213812382527118e-06, + "loss": 0.5696560144424438, + "step": 2027 + }, + { + "epoch": 0.46760433479363617, + "grad_norm": 1.2879943344932543, + "learning_rate": 1.8211637149327856e-06, + "loss": 0.6101738214492798, + "step": 2028 + }, + { + "epoch": 0.46783490892321883, + "grad_norm": 1.2787382273760666, + "learning_rate": 1.820946072248391e-06, + "loss": 0.46749603748321533, + "step": 2029 + }, + { + "epoch": 0.4680654830528015, + "grad_norm": 1.0137433334051962, + "learning_rate": 1.8207283102311646e-06, + "loss": 0.4713476300239563, + "step": 2030 + }, + { + "epoch": 0.46829605718238415, + "grad_norm": 1.1924917748606811, + "learning_rate": 1.8205104289127607e-06, + "loss": 0.5381859540939331, + "step": 2031 + }, + { + "epoch": 0.4685266313119668, + "grad_norm": 1.1753816722161505, + "learning_rate": 1.82029242832485e-06, + "loss": 0.4871833324432373, + "step": 2032 + }, + { + "epoch": 0.4687572054415495, + "grad_norm": 1.2889177236993268, + "learning_rate": 1.8200743084991217e-06, + "loss": 0.520627498626709, + "step": 2033 + }, + { + "epoch": 0.46898777957113214, + "grad_norm": 1.1168475824168262, + "learning_rate": 1.8198560694672813e-06, + "loss": 0.5382364392280579, + "step": 2034 + }, + { + "epoch": 0.4692183537007148, + "grad_norm": 1.0953401197844614, + "learning_rate": 1.8196377112610524e-06, + "loss": 0.384588360786438, + "step": 2035 + }, + { + "epoch": 0.46944892783029746, + "grad_norm": 1.3337847292368636, + "learning_rate": 1.8194192339121752e-06, + "loss": 0.5515186786651611, + "step": 2036 + }, + { + "epoch": 0.4696795019598801, + "grad_norm": 1.2634192136555153, + "learning_rate": 1.819200637452408e-06, + "loss": 0.5405331254005432, + "step": 2037 + }, + { + "epoch": 0.4699100760894628, + "grad_norm": 1.3408838607377604, + "learning_rate": 1.818981921913526e-06, + "loss": 0.5565645694732666, + "step": 2038 + }, + { + "epoch": 0.47014065021904544, + "grad_norm": 1.1845986031026676, + "learning_rate": 1.818763087327321e-06, + "loss": 0.4856358468532562, + "step": 2039 + }, + { + "epoch": 0.4703712243486281, + "grad_norm": 1.1018414398540533, + "learning_rate": 1.8185441337256035e-06, + "loss": 0.5495761632919312, + "step": 2040 + }, + { + "epoch": 0.47060179847821076, + "grad_norm": 1.1792744067343253, + "learning_rate": 1.8183250611402007e-06, + "loss": 0.509435772895813, + "step": 2041 + }, + { + "epoch": 0.4708323726077934, + "grad_norm": 1.0107628293119386, + "learning_rate": 1.8181058696029564e-06, + "loss": 0.4663920998573303, + "step": 2042 + }, + { + "epoch": 0.4710629467373761, + "grad_norm": 1.5093599722992523, + "learning_rate": 1.817886559145733e-06, + "loss": 0.5976128578186035, + "step": 2043 + }, + { + "epoch": 0.47129352086695875, + "grad_norm": 1.2084791393616294, + "learning_rate": 1.817667129800409e-06, + "loss": 0.49167966842651367, + "step": 2044 + }, + { + "epoch": 0.4715240949965414, + "grad_norm": 1.1457657477052965, + "learning_rate": 1.817447581598881e-06, + "loss": 0.5889153480529785, + "step": 2045 + }, + { + "epoch": 0.47175466912612407, + "grad_norm": 1.206584712735091, + "learning_rate": 1.8172279145730622e-06, + "loss": 0.4970330595970154, + "step": 2046 + }, + { + "epoch": 0.47198524325570673, + "grad_norm": 1.1497751548880843, + "learning_rate": 1.817008128754884e-06, + "loss": 0.4840531051158905, + "step": 2047 + }, + { + "epoch": 0.4722158173852894, + "grad_norm": 1.0450687693806986, + "learning_rate": 1.816788224176294e-06, + "loss": 0.48297861218452454, + "step": 2048 + }, + { + "epoch": 0.47244639151487205, + "grad_norm": 1.184218710920589, + "learning_rate": 1.8165682008692578e-06, + "loss": 0.540350079536438, + "step": 2049 + }, + { + "epoch": 0.4726769656444547, + "grad_norm": 1.0359041945652345, + "learning_rate": 1.8163480588657578e-06, + "loss": 0.46405351161956787, + "step": 2050 + }, + { + "epoch": 0.4729075397740374, + "grad_norm": 1.1107404730922064, + "learning_rate": 1.816127798197794e-06, + "loss": 0.5175468921661377, + "step": 2051 + }, + { + "epoch": 0.47313811390362004, + "grad_norm": 1.3876726162535544, + "learning_rate": 1.8159074188973836e-06, + "loss": 0.5923771858215332, + "step": 2052 + }, + { + "epoch": 0.4733686880332027, + "grad_norm": 1.135618311389398, + "learning_rate": 1.815686920996561e-06, + "loss": 0.4999024569988251, + "step": 2053 + }, + { + "epoch": 0.47359926216278536, + "grad_norm": 1.260203747569289, + "learning_rate": 1.8154663045273775e-06, + "loss": 0.5630939602851868, + "step": 2054 + }, + { + "epoch": 0.473829836292368, + "grad_norm": 1.0446947469213006, + "learning_rate": 1.8152455695219021e-06, + "loss": 0.5505836009979248, + "step": 2055 + }, + { + "epoch": 0.4740604104219507, + "grad_norm": 1.0593378648910954, + "learning_rate": 1.8150247160122213e-06, + "loss": 0.44550588726997375, + "step": 2056 + }, + { + "epoch": 0.47429098455153335, + "grad_norm": 1.3784716647825315, + "learning_rate": 1.8148037440304375e-06, + "loss": 0.5387516021728516, + "step": 2057 + }, + { + "epoch": 0.47452155868111595, + "grad_norm": 1.2100168024707112, + "learning_rate": 1.814582653608672e-06, + "loss": 0.5941788554191589, + "step": 2058 + }, + { + "epoch": 0.4747521328106986, + "grad_norm": 1.3537451578676338, + "learning_rate": 1.8143614447790622e-06, + "loss": 0.552179217338562, + "step": 2059 + }, + { + "epoch": 0.4749827069402813, + "grad_norm": 1.4352695047482156, + "learning_rate": 1.8141401175737632e-06, + "loss": 0.4475885033607483, + "step": 2060 + }, + { + "epoch": 0.47521328106986394, + "grad_norm": 1.560782042661122, + "learning_rate": 1.813918672024947e-06, + "loss": 0.5821356773376465, + "step": 2061 + }, + { + "epoch": 0.4754438551994466, + "grad_norm": 1.0378834941031638, + "learning_rate": 1.8136971081648027e-06, + "loss": 0.4673501253128052, + "step": 2062 + }, + { + "epoch": 0.47567442932902926, + "grad_norm": 1.278556049660224, + "learning_rate": 1.8134754260255373e-06, + "loss": 0.582427978515625, + "step": 2063 + }, + { + "epoch": 0.4759050034586119, + "grad_norm": 1.050202225169388, + "learning_rate": 1.8132536256393744e-06, + "loss": 0.4494328498840332, + "step": 2064 + }, + { + "epoch": 0.4761355775881946, + "grad_norm": 1.2125688329070163, + "learning_rate": 1.8130317070385552e-06, + "loss": 0.44775205850601196, + "step": 2065 + }, + { + "epoch": 0.47636615171777724, + "grad_norm": 1.6939798990457848, + "learning_rate": 1.8128096702553372e-06, + "loss": 0.5456822514533997, + "step": 2066 + }, + { + "epoch": 0.4765967258473599, + "grad_norm": 1.3273956589633653, + "learning_rate": 1.8125875153219963e-06, + "loss": 0.46396178007125854, + "step": 2067 + }, + { + "epoch": 0.47682729997694256, + "grad_norm": 1.1515186039412058, + "learning_rate": 1.8123652422708247e-06, + "loss": 0.4479365944862366, + "step": 2068 + }, + { + "epoch": 0.4770578741065252, + "grad_norm": 1.2802069282774096, + "learning_rate": 1.8121428511341322e-06, + "loss": 0.4633978605270386, + "step": 2069 + }, + { + "epoch": 0.4772884482361079, + "grad_norm": 1.0517363876370052, + "learning_rate": 1.811920341944245e-06, + "loss": 0.5190213918685913, + "step": 2070 + }, + { + "epoch": 0.47751902236569055, + "grad_norm": 1.1502023331468956, + "learning_rate": 1.811697714733508e-06, + "loss": 0.3900855779647827, + "step": 2071 + }, + { + "epoch": 0.4777495964952732, + "grad_norm": 1.1255517906685018, + "learning_rate": 1.8114749695342816e-06, + "loss": 0.5130020380020142, + "step": 2072 + }, + { + "epoch": 0.47798017062485587, + "grad_norm": 1.181934216759251, + "learning_rate": 1.8112521063789444e-06, + "loss": 0.5279096364974976, + "step": 2073 + }, + { + "epoch": 0.47821074475443853, + "grad_norm": 1.1536132669518966, + "learning_rate": 1.8110291252998918e-06, + "loss": 0.5048732161521912, + "step": 2074 + }, + { + "epoch": 0.4784413188840212, + "grad_norm": 1.3979756779725594, + "learning_rate": 1.8108060263295362e-06, + "loss": 0.5410048365592957, + "step": 2075 + }, + { + "epoch": 0.47867189301360386, + "grad_norm": 1.2583345285712537, + "learning_rate": 1.8105828095003073e-06, + "loss": 0.5144593715667725, + "step": 2076 + }, + { + "epoch": 0.4789024671431865, + "grad_norm": 1.427505910251362, + "learning_rate": 1.810359474844652e-06, + "loss": 0.543846845626831, + "step": 2077 + }, + { + "epoch": 0.4791330412727692, + "grad_norm": 1.3389957969723305, + "learning_rate": 1.8101360223950346e-06, + "loss": 0.5628032684326172, + "step": 2078 + }, + { + "epoch": 0.47936361540235184, + "grad_norm": 1.2233623869672197, + "learning_rate": 1.8099124521839358e-06, + "loss": 0.5248516201972961, + "step": 2079 + }, + { + "epoch": 0.4795941895319345, + "grad_norm": 1.1882395736191633, + "learning_rate": 1.8096887642438537e-06, + "loss": 0.44171589612960815, + "step": 2080 + }, + { + "epoch": 0.47982476366151716, + "grad_norm": 1.1226478747483744, + "learning_rate": 1.809464958607304e-06, + "loss": 0.5003859996795654, + "step": 2081 + }, + { + "epoch": 0.4800553377910998, + "grad_norm": 1.2241972764897475, + "learning_rate": 1.8092410353068183e-06, + "loss": 0.5271269679069519, + "step": 2082 + }, + { + "epoch": 0.4802859119206825, + "grad_norm": 1.390627459359596, + "learning_rate": 1.8090169943749474e-06, + "loss": 0.5191465616226196, + "step": 2083 + }, + { + "epoch": 0.48051648605026515, + "grad_norm": 1.229186901325219, + "learning_rate": 1.8087928358442567e-06, + "loss": 0.4569256007671356, + "step": 2084 + }, + { + "epoch": 0.4807470601798478, + "grad_norm": 1.2586566204343959, + "learning_rate": 1.8085685597473307e-06, + "loss": 0.521030068397522, + "step": 2085 + }, + { + "epoch": 0.48097763430943047, + "grad_norm": 1.8616539280014968, + "learning_rate": 1.80834416611677e-06, + "loss": 0.48959439992904663, + "step": 2086 + }, + { + "epoch": 0.48120820843901313, + "grad_norm": 1.37464754051939, + "learning_rate": 1.8081196549851925e-06, + "loss": 0.6536514163017273, + "step": 2087 + }, + { + "epoch": 0.4814387825685958, + "grad_norm": 1.2292193685806807, + "learning_rate": 1.8078950263852327e-06, + "loss": 0.5746080875396729, + "step": 2088 + }, + { + "epoch": 0.48166935669817845, + "grad_norm": 1.244000490897379, + "learning_rate": 1.8076702803495437e-06, + "loss": 0.5518802404403687, + "step": 2089 + }, + { + "epoch": 0.4818999308277611, + "grad_norm": 1.0641823457217219, + "learning_rate": 1.8074454169107934e-06, + "loss": 0.49385470151901245, + "step": 2090 + }, + { + "epoch": 0.4821305049573438, + "grad_norm": 1.0197781900207734, + "learning_rate": 1.8072204361016688e-06, + "loss": 0.4488806426525116, + "step": 2091 + }, + { + "epoch": 0.48236107908692644, + "grad_norm": 1.1424753749617582, + "learning_rate": 1.8069953379548727e-06, + "loss": 0.4167511761188507, + "step": 2092 + }, + { + "epoch": 0.4825916532165091, + "grad_norm": 1.0650805504939584, + "learning_rate": 1.8067701225031258e-06, + "loss": 0.4181321859359741, + "step": 2093 + }, + { + "epoch": 0.48282222734609176, + "grad_norm": 1.4930083094447149, + "learning_rate": 1.806544789779165e-06, + "loss": 0.5257805585861206, + "step": 2094 + }, + { + "epoch": 0.4830528014756744, + "grad_norm": 1.2055270290247748, + "learning_rate": 1.806319339815745e-06, + "loss": 0.4687056541442871, + "step": 2095 + }, + { + "epoch": 0.4832833756052571, + "grad_norm": 1.4682007990950796, + "learning_rate": 1.8060937726456373e-06, + "loss": 0.48070380091667175, + "step": 2096 + }, + { + "epoch": 0.48351394973483974, + "grad_norm": 1.1555932423285984, + "learning_rate": 1.80586808830163e-06, + "loss": 0.516263484954834, + "step": 2097 + }, + { + "epoch": 0.4837445238644224, + "grad_norm": 1.1676344701764343, + "learning_rate": 1.805642286816529e-06, + "loss": 0.44018858671188354, + "step": 2098 + }, + { + "epoch": 0.48397509799400507, + "grad_norm": 1.1426045047454896, + "learning_rate": 1.8054163682231565e-06, + "loss": 0.469373881816864, + "step": 2099 + }, + { + "epoch": 0.4842056721235877, + "grad_norm": 1.2080131082183756, + "learning_rate": 1.8051903325543525e-06, + "loss": 0.4759753346443176, + "step": 2100 + }, + { + "epoch": 0.4844362462531704, + "grad_norm": 1.210070128706108, + "learning_rate": 1.804964179842973e-06, + "loss": 0.5002714395523071, + "step": 2101 + }, + { + "epoch": 0.48466682038275305, + "grad_norm": 1.5442585246670464, + "learning_rate": 1.804737910121892e-06, + "loss": 0.4869537353515625, + "step": 2102 + }, + { + "epoch": 0.4848973945123357, + "grad_norm": 1.0025531891942765, + "learning_rate": 1.804511523424e-06, + "loss": 0.4840247929096222, + "step": 2103 + }, + { + "epoch": 0.4851279686419184, + "grad_norm": 1.2125955941110753, + "learning_rate": 1.8042850197822049e-06, + "loss": 0.48390740156173706, + "step": 2104 + }, + { + "epoch": 0.48535854277150103, + "grad_norm": 1.2581816256760507, + "learning_rate": 1.8040583992294305e-06, + "loss": 0.5875431895256042, + "step": 2105 + }, + { + "epoch": 0.4855891169010837, + "grad_norm": 1.1530238586197006, + "learning_rate": 1.803831661798619e-06, + "loss": 0.4599287211894989, + "step": 2106 + }, + { + "epoch": 0.48581969103066636, + "grad_norm": 1.120967919274212, + "learning_rate": 1.803604807522729e-06, + "loss": 0.5266382694244385, + "step": 2107 + }, + { + "epoch": 0.486050265160249, + "grad_norm": 1.6402953005136756, + "learning_rate": 1.8033778364347359e-06, + "loss": 0.5592058897018433, + "step": 2108 + }, + { + "epoch": 0.4862808392898317, + "grad_norm": 1.278433491122833, + "learning_rate": 1.8031507485676324e-06, + "loss": 0.4385683834552765, + "step": 2109 + }, + { + "epoch": 0.48651141341941434, + "grad_norm": 0.9409152493815139, + "learning_rate": 1.8029235439544277e-06, + "loss": 0.4205859303474426, + "step": 2110 + }, + { + "epoch": 0.486741987548997, + "grad_norm": 1.2334271425613326, + "learning_rate": 1.8026962226281484e-06, + "loss": 0.4179378151893616, + "step": 2111 + }, + { + "epoch": 0.48697256167857966, + "grad_norm": 1.3018247329424364, + "learning_rate": 1.8024687846218382e-06, + "loss": 0.5022565126419067, + "step": 2112 + }, + { + "epoch": 0.4872031358081623, + "grad_norm": 1.092822670373115, + "learning_rate": 1.8022412299685574e-06, + "loss": 0.4591484069824219, + "step": 2113 + }, + { + "epoch": 0.487433709937745, + "grad_norm": 1.135644170855214, + "learning_rate": 1.8020135587013836e-06, + "loss": 0.44381004571914673, + "step": 2114 + }, + { + "epoch": 0.48766428406732765, + "grad_norm": 1.4882998519827229, + "learning_rate": 1.8017857708534106e-06, + "loss": 0.5418124198913574, + "step": 2115 + }, + { + "epoch": 0.4878948581969103, + "grad_norm": 1.1899076485341344, + "learning_rate": 1.80155786645775e-06, + "loss": 0.45836228132247925, + "step": 2116 + }, + { + "epoch": 0.48812543232649297, + "grad_norm": 1.0900529156655503, + "learning_rate": 1.80132984554753e-06, + "loss": 0.6028016805648804, + "step": 2117 + }, + { + "epoch": 0.48835600645607563, + "grad_norm": 1.2082046720219188, + "learning_rate": 1.8011017081558956e-06, + "loss": 0.461037814617157, + "step": 2118 + }, + { + "epoch": 0.4885865805856583, + "grad_norm": 1.2201342507223627, + "learning_rate": 1.8008734543160092e-06, + "loss": 0.45145073533058167, + "step": 2119 + }, + { + "epoch": 0.48881715471524095, + "grad_norm": 1.0786402560770025, + "learning_rate": 1.8006450840610495e-06, + "loss": 0.5074604153633118, + "step": 2120 + }, + { + "epoch": 0.4890477288448236, + "grad_norm": 1.047533414614444, + "learning_rate": 1.8004165974242124e-06, + "loss": 0.48518210649490356, + "step": 2121 + }, + { + "epoch": 0.4892783029744063, + "grad_norm": 1.3858118136014763, + "learning_rate": 1.800187994438711e-06, + "loss": 0.5427801609039307, + "step": 2122 + }, + { + "epoch": 0.48950887710398894, + "grad_norm": 1.1550068575676335, + "learning_rate": 1.799959275137775e-06, + "loss": 0.5002918839454651, + "step": 2123 + }, + { + "epoch": 0.4897394512335716, + "grad_norm": 1.1639768741422865, + "learning_rate": 1.799730439554651e-06, + "loss": 0.4417838454246521, + "step": 2124 + }, + { + "epoch": 0.48997002536315426, + "grad_norm": 1.1441558832004912, + "learning_rate": 1.7995014877226024e-06, + "loss": 0.4260700047016144, + "step": 2125 + }, + { + "epoch": 0.4902005994927369, + "grad_norm": 1.2965264900873492, + "learning_rate": 1.79927241967491e-06, + "loss": 0.5480694770812988, + "step": 2126 + }, + { + "epoch": 0.4904311736223196, + "grad_norm": 1.1303746553940783, + "learning_rate": 1.7990432354448713e-06, + "loss": 0.3911926746368408, + "step": 2127 + }, + { + "epoch": 0.49066174775190224, + "grad_norm": 1.6919718962195622, + "learning_rate": 1.7988139350657997e-06, + "loss": 0.5269262194633484, + "step": 2128 + }, + { + "epoch": 0.4908923218814849, + "grad_norm": 1.1850805062858767, + "learning_rate": 1.7985845185710272e-06, + "loss": 0.47482216358184814, + "step": 2129 + }, + { + "epoch": 0.49112289601106757, + "grad_norm": 1.1047509042558772, + "learning_rate": 1.7983549859939018e-06, + "loss": 0.5663374662399292, + "step": 2130 + }, + { + "epoch": 0.49135347014065023, + "grad_norm": 1.3067402879954033, + "learning_rate": 1.7981253373677875e-06, + "loss": 0.5322546362876892, + "step": 2131 + }, + { + "epoch": 0.4915840442702329, + "grad_norm": 1.3127111295082199, + "learning_rate": 1.797895572726067e-06, + "loss": 0.4238794445991516, + "step": 2132 + }, + { + "epoch": 0.49181461839981555, + "grad_norm": 1.3803934905983801, + "learning_rate": 1.7976656921021384e-06, + "loss": 0.49363791942596436, + "step": 2133 + }, + { + "epoch": 0.4920451925293982, + "grad_norm": 1.2075981604593182, + "learning_rate": 1.7974356955294178e-06, + "loss": 0.5079565048217773, + "step": 2134 + }, + { + "epoch": 0.4922757666589809, + "grad_norm": 1.2533809097279895, + "learning_rate": 1.7972055830413369e-06, + "loss": 0.5259063243865967, + "step": 2135 + }, + { + "epoch": 0.49250634078856353, + "grad_norm": 1.1936271771370206, + "learning_rate": 1.7969753546713448e-06, + "loss": 0.49021831154823303, + "step": 2136 + }, + { + "epoch": 0.4927369149181462, + "grad_norm": 1.1560183810694227, + "learning_rate": 1.7967450104529078e-06, + "loss": 0.49721387028694153, + "step": 2137 + }, + { + "epoch": 0.49296748904772886, + "grad_norm": 1.523657234221405, + "learning_rate": 1.796514550419509e-06, + "loss": 0.6129348278045654, + "step": 2138 + }, + { + "epoch": 0.4931980631773115, + "grad_norm": 1.245217894172975, + "learning_rate": 1.7962839746046479e-06, + "loss": 0.5034269094467163, + "step": 2139 + }, + { + "epoch": 0.4934286373068942, + "grad_norm": 1.2009412202372387, + "learning_rate": 1.7960532830418408e-06, + "loss": 0.490216463804245, + "step": 2140 + }, + { + "epoch": 0.49365921143647684, + "grad_norm": 1.3063386967377661, + "learning_rate": 1.7958224757646212e-06, + "loss": 0.5609744787216187, + "step": 2141 + }, + { + "epoch": 0.4938897855660595, + "grad_norm": 1.2989425251267097, + "learning_rate": 1.7955915528065395e-06, + "loss": 0.4438238739967346, + "step": 2142 + }, + { + "epoch": 0.49412035969564216, + "grad_norm": 1.1724755739495214, + "learning_rate": 1.7953605142011626e-06, + "loss": 0.4704767167568207, + "step": 2143 + }, + { + "epoch": 0.4943509338252248, + "grad_norm": 1.0972580275821462, + "learning_rate": 1.795129359982074e-06, + "loss": 0.44819536805152893, + "step": 2144 + }, + { + "epoch": 0.4945815079548075, + "grad_norm": 1.4390962273022694, + "learning_rate": 1.7948980901828746e-06, + "loss": 0.5311752557754517, + "step": 2145 + }, + { + "epoch": 0.49481208208439015, + "grad_norm": 1.524280309497039, + "learning_rate": 1.7946667048371818e-06, + "loss": 0.46144258975982666, + "step": 2146 + }, + { + "epoch": 0.4950426562139728, + "grad_norm": 1.719231407355215, + "learning_rate": 1.7944352039786297e-06, + "loss": 0.5973725914955139, + "step": 2147 + }, + { + "epoch": 0.49527323034355547, + "grad_norm": 1.4078850153564488, + "learning_rate": 1.7942035876408693e-06, + "loss": 0.4930835962295532, + "step": 2148 + }, + { + "epoch": 0.49550380447313813, + "grad_norm": 1.3404357985733748, + "learning_rate": 1.7939718558575685e-06, + "loss": 0.39137697219848633, + "step": 2149 + }, + { + "epoch": 0.4957343786027208, + "grad_norm": 1.364926902591579, + "learning_rate": 1.7937400086624117e-06, + "loss": 0.47618329524993896, + "step": 2150 + }, + { + "epoch": 0.49596495273230345, + "grad_norm": 1.1307446090872737, + "learning_rate": 1.7935080460891005e-06, + "loss": 0.4751483201980591, + "step": 2151 + }, + { + "epoch": 0.4961955268618861, + "grad_norm": 1.05862482163457, + "learning_rate": 1.7932759681713528e-06, + "loss": 0.4654052257537842, + "step": 2152 + }, + { + "epoch": 0.4964261009914688, + "grad_norm": 1.5078817597304273, + "learning_rate": 1.7930437749429035e-06, + "loss": 0.551579475402832, + "step": 2153 + }, + { + "epoch": 0.49665667512105144, + "grad_norm": 1.1496698915645684, + "learning_rate": 1.792811466437504e-06, + "loss": 0.4967789053916931, + "step": 2154 + }, + { + "epoch": 0.4968872492506341, + "grad_norm": 1.2983844202508301, + "learning_rate": 1.7925790426889234e-06, + "loss": 0.5826432108879089, + "step": 2155 + }, + { + "epoch": 0.49711782338021676, + "grad_norm": 1.1680445889037752, + "learning_rate": 1.792346503730946e-06, + "loss": 0.4260643720626831, + "step": 2156 + }, + { + "epoch": 0.4973483975097994, + "grad_norm": 1.287300561489553, + "learning_rate": 1.7921138495973741e-06, + "loss": 0.48679620027542114, + "step": 2157 + }, + { + "epoch": 0.4975789716393821, + "grad_norm": 1.219223301068072, + "learning_rate": 1.7918810803220266e-06, + "loss": 0.5048027634620667, + "step": 2158 + }, + { + "epoch": 0.49780954576896475, + "grad_norm": 1.3507694371861767, + "learning_rate": 1.7916481959387384e-06, + "loss": 0.5073787569999695, + "step": 2159 + }, + { + "epoch": 0.4980401198985474, + "grad_norm": 1.1692017846177098, + "learning_rate": 1.791415196481362e-06, + "loss": 0.47361671924591064, + "step": 2160 + }, + { + "epoch": 0.49827069402813007, + "grad_norm": 1.2422906508724816, + "learning_rate": 1.7911820819837659e-06, + "loss": 0.46382519602775574, + "step": 2161 + }, + { + "epoch": 0.49850126815771273, + "grad_norm": 1.2239936361904968, + "learning_rate": 1.7909488524798357e-06, + "loss": 0.5167688727378845, + "step": 2162 + }, + { + "epoch": 0.4987318422872954, + "grad_norm": 1.125831583037744, + "learning_rate": 1.7907155080034739e-06, + "loss": 0.4486730992794037, + "step": 2163 + }, + { + "epoch": 0.49896241641687805, + "grad_norm": 1.1343310195374692, + "learning_rate": 1.7904820485885991e-06, + "loss": 0.508470356464386, + "step": 2164 + }, + { + "epoch": 0.4991929905464607, + "grad_norm": 1.2928862741310794, + "learning_rate": 1.790248474269148e-06, + "loss": 0.4752856492996216, + "step": 2165 + }, + { + "epoch": 0.4994235646760434, + "grad_norm": 1.4158256008874892, + "learning_rate": 1.7900147850790713e-06, + "loss": 0.47191953659057617, + "step": 2166 + }, + { + "epoch": 0.49965413880562604, + "grad_norm": 1.2139421208311327, + "learning_rate": 1.7897809810523396e-06, + "loss": 0.48935621976852417, + "step": 2167 + }, + { + "epoch": 0.4998847129352087, + "grad_norm": 1.0547512942585364, + "learning_rate": 1.789547062222938e-06, + "loss": 0.5455219149589539, + "step": 2168 + }, + { + "epoch": 0.5001152870647914, + "grad_norm": 1.3471138253822197, + "learning_rate": 1.789313028624869e-06, + "loss": 0.5068193078041077, + "step": 2169 + }, + { + "epoch": 0.500345861194374, + "grad_norm": 1.354177516749214, + "learning_rate": 1.789078880292152e-06, + "loss": 0.5868322253227234, + "step": 2170 + }, + { + "epoch": 0.5005764353239567, + "grad_norm": 1.2474005261331733, + "learning_rate": 1.7888446172588222e-06, + "loss": 0.5132089853286743, + "step": 2171 + }, + { + "epoch": 0.5008070094535393, + "grad_norm": 1.6917901077948925, + "learning_rate": 1.788610239558933e-06, + "loss": 0.5673823356628418, + "step": 2172 + }, + { + "epoch": 0.501037583583122, + "grad_norm": 1.1902561905753382, + "learning_rate": 1.7883757472265533e-06, + "loss": 0.47085779905319214, + "step": 2173 + }, + { + "epoch": 0.5012681577127046, + "grad_norm": 1.38526914772559, + "learning_rate": 1.7881411402957685e-06, + "loss": 0.5286725163459778, + "step": 2174 + }, + { + "epoch": 0.5014987318422873, + "grad_norm": 1.1910792946448119, + "learning_rate": 1.7879064188006817e-06, + "loss": 0.5044010877609253, + "step": 2175 + }, + { + "epoch": 0.5017293059718699, + "grad_norm": 1.8451305262061892, + "learning_rate": 1.7876715827754113e-06, + "loss": 0.5329761505126953, + "step": 2176 + }, + { + "epoch": 0.5019598801014526, + "grad_norm": 1.1057498562542696, + "learning_rate": 1.7874366322540937e-06, + "loss": 0.5025275349617004, + "step": 2177 + }, + { + "epoch": 0.5021904542310353, + "grad_norm": 1.1913338911250846, + "learning_rate": 1.7872015672708814e-06, + "loss": 0.48466378450393677, + "step": 2178 + }, + { + "epoch": 0.502421028360618, + "grad_norm": 1.298497377256874, + "learning_rate": 1.7869663878599427e-06, + "loss": 0.505358099937439, + "step": 2179 + }, + { + "epoch": 0.5026516024902006, + "grad_norm": 1.3974305011742736, + "learning_rate": 1.7867310940554643e-06, + "loss": 0.4934875965118408, + "step": 2180 + }, + { + "epoch": 0.5028821766197833, + "grad_norm": 0.9670109365307766, + "learning_rate": 1.7864956858916482e-06, + "loss": 0.4726678133010864, + "step": 2181 + }, + { + "epoch": 0.5031127507493659, + "grad_norm": 1.3043022336942207, + "learning_rate": 1.786260163402713e-06, + "loss": 0.4619986414909363, + "step": 2182 + }, + { + "epoch": 0.5033433248789486, + "grad_norm": 1.17201330946801, + "learning_rate": 1.7860245266228946e-06, + "loss": 0.4483926594257355, + "step": 2183 + }, + { + "epoch": 0.5035738990085312, + "grad_norm": 1.0474549975114675, + "learning_rate": 1.7857887755864451e-06, + "loss": 0.4756368100643158, + "step": 2184 + }, + { + "epoch": 0.5038044731381139, + "grad_norm": 1.248404397964203, + "learning_rate": 1.7855529103276334e-06, + "loss": 0.5610564351081848, + "step": 2185 + }, + { + "epoch": 0.5040350472676965, + "grad_norm": 1.178944045969772, + "learning_rate": 1.7853169308807447e-06, + "loss": 0.49948322772979736, + "step": 2186 + }, + { + "epoch": 0.5042656213972793, + "grad_norm": 1.203613939490818, + "learning_rate": 1.7850808372800813e-06, + "loss": 0.5023819208145142, + "step": 2187 + }, + { + "epoch": 0.5044961955268619, + "grad_norm": 1.1738403952666703, + "learning_rate": 1.7848446295599617e-06, + "loss": 0.45893096923828125, + "step": 2188 + }, + { + "epoch": 0.5047267696564446, + "grad_norm": 1.2621327179460875, + "learning_rate": 1.7846083077547212e-06, + "loss": 0.39129459857940674, + "step": 2189 + }, + { + "epoch": 0.5049573437860272, + "grad_norm": 0.9495823494613052, + "learning_rate": 1.784371871898711e-06, + "loss": 0.42348673939704895, + "step": 2190 + }, + { + "epoch": 0.5051879179156099, + "grad_norm": 1.4438634303858584, + "learning_rate": 1.7841353220263e-06, + "loss": 0.5760704278945923, + "step": 2191 + }, + { + "epoch": 0.5054184920451925, + "grad_norm": 1.1475240268019702, + "learning_rate": 1.7838986581718731e-06, + "loss": 0.5281997323036194, + "step": 2192 + }, + { + "epoch": 0.5056490661747752, + "grad_norm": 1.3139768062702608, + "learning_rate": 1.7836618803698315e-06, + "loss": 0.543775200843811, + "step": 2193 + }, + { + "epoch": 0.5058796403043578, + "grad_norm": 1.2497491249667418, + "learning_rate": 1.7834249886545934e-06, + "loss": 0.4148549437522888, + "step": 2194 + }, + { + "epoch": 0.5061102144339406, + "grad_norm": 1.183178207015322, + "learning_rate": 1.7831879830605936e-06, + "loss": 0.5165001153945923, + "step": 2195 + }, + { + "epoch": 0.5063407885635232, + "grad_norm": 1.0854657175123028, + "learning_rate": 1.782950863622283e-06, + "loss": 0.4183283746242523, + "step": 2196 + }, + { + "epoch": 0.5065713626931059, + "grad_norm": 1.2476527930959387, + "learning_rate": 1.7827136303741292e-06, + "loss": 0.46558016538619995, + "step": 2197 + }, + { + "epoch": 0.5068019368226885, + "grad_norm": 1.2829595269176914, + "learning_rate": 1.782476283350617e-06, + "loss": 0.5491806268692017, + "step": 2198 + }, + { + "epoch": 0.5070325109522712, + "grad_norm": 1.3547672961051511, + "learning_rate": 1.7822388225862466e-06, + "loss": 0.42999008297920227, + "step": 2199 + }, + { + "epoch": 0.5072630850818538, + "grad_norm": 1.2776437457035281, + "learning_rate": 1.7820012481155358e-06, + "loss": 0.42478299140930176, + "step": 2200 + }, + { + "epoch": 0.5074936592114365, + "grad_norm": 4.51069636831696, + "learning_rate": 1.781763559973018e-06, + "loss": 0.4175076186656952, + "step": 2201 + }, + { + "epoch": 0.5077242333410191, + "grad_norm": 1.1985836355289028, + "learning_rate": 1.7815257581932439e-06, + "loss": 0.42197084426879883, + "step": 2202 + }, + { + "epoch": 0.5079548074706018, + "grad_norm": 1.2175005553032592, + "learning_rate": 1.7812878428107803e-06, + "loss": 0.39872926473617554, + "step": 2203 + }, + { + "epoch": 0.5081853816001844, + "grad_norm": 1.2908474732070376, + "learning_rate": 1.7810498138602106e-06, + "loss": 0.4572516977787018, + "step": 2204 + }, + { + "epoch": 0.5084159557297672, + "grad_norm": 1.1254873587347531, + "learning_rate": 1.780811671376135e-06, + "loss": 0.5261520147323608, + "step": 2205 + }, + { + "epoch": 0.5086465298593498, + "grad_norm": 1.8336847349223555, + "learning_rate": 1.7805734153931696e-06, + "loss": 0.4714658260345459, + "step": 2206 + }, + { + "epoch": 0.5088771039889325, + "grad_norm": 1.0757806041139168, + "learning_rate": 1.7803350459459472e-06, + "loss": 0.46184858679771423, + "step": 2207 + }, + { + "epoch": 0.5091076781185151, + "grad_norm": 1.2531712345918984, + "learning_rate": 1.7800965630691173e-06, + "loss": 0.48189157247543335, + "step": 2208 + }, + { + "epoch": 0.5093382522480978, + "grad_norm": 1.5363179586848308, + "learning_rate": 1.7798579667973463e-06, + "loss": 0.47865352034568787, + "step": 2209 + }, + { + "epoch": 0.5095688263776804, + "grad_norm": 1.1589101806191746, + "learning_rate": 1.7796192571653162e-06, + "loss": 0.46073317527770996, + "step": 2210 + }, + { + "epoch": 0.5097994005072631, + "grad_norm": 1.1781605500578527, + "learning_rate": 1.7793804342077253e-06, + "loss": 0.5099648237228394, + "step": 2211 + }, + { + "epoch": 0.5100299746368457, + "grad_norm": 1.2319682423717142, + "learning_rate": 1.7791414979592903e-06, + "loss": 0.5436147451400757, + "step": 2212 + }, + { + "epoch": 0.5102605487664285, + "grad_norm": 1.2305699349330186, + "learning_rate": 1.7789024484547417e-06, + "loss": 0.5455893278121948, + "step": 2213 + }, + { + "epoch": 0.5104911228960111, + "grad_norm": 1.2918560641722026, + "learning_rate": 1.7786632857288284e-06, + "loss": 0.4886546730995178, + "step": 2214 + }, + { + "epoch": 0.5107216970255938, + "grad_norm": 1.1611199451436964, + "learning_rate": 1.778424009816315e-06, + "loss": 0.4793723225593567, + "step": 2215 + }, + { + "epoch": 0.5109522711551764, + "grad_norm": 1.3312189289078886, + "learning_rate": 1.7781846207519826e-06, + "loss": 0.5814248323440552, + "step": 2216 + }, + { + "epoch": 0.5111828452847591, + "grad_norm": 1.1560984097631717, + "learning_rate": 1.777945118570629e-06, + "loss": 0.5057421326637268, + "step": 2217 + }, + { + "epoch": 0.5114134194143417, + "grad_norm": 1.3009634347843195, + "learning_rate": 1.7777055033070682e-06, + "loss": 0.3913435935974121, + "step": 2218 + }, + { + "epoch": 0.5116439935439244, + "grad_norm": 0.9761581598604525, + "learning_rate": 1.7774657749961305e-06, + "loss": 0.4450770616531372, + "step": 2219 + }, + { + "epoch": 0.511874567673507, + "grad_norm": 1.731999332658399, + "learning_rate": 1.7772259336726636e-06, + "loss": 0.5164940357208252, + "step": 2220 + }, + { + "epoch": 0.5121051418030897, + "grad_norm": 1.257043827333845, + "learning_rate": 1.7769859793715298e-06, + "loss": 0.44231802225112915, + "step": 2221 + }, + { + "epoch": 0.5123357159326724, + "grad_norm": 1.2521439253976214, + "learning_rate": 1.7767459121276093e-06, + "loss": 0.516791820526123, + "step": 2222 + }, + { + "epoch": 0.5125662900622551, + "grad_norm": 1.2456616904380073, + "learning_rate": 1.7765057319757989e-06, + "loss": 0.4180450737476349, + "step": 2223 + }, + { + "epoch": 0.5127968641918377, + "grad_norm": 1.1350275613249636, + "learning_rate": 1.77626543895101e-06, + "loss": 0.49246734380722046, + "step": 2224 + }, + { + "epoch": 0.5130274383214203, + "grad_norm": 1.1582721424765736, + "learning_rate": 1.7760250330881728e-06, + "loss": 0.5058225393295288, + "step": 2225 + }, + { + "epoch": 0.513258012451003, + "grad_norm": 1.4118813849041838, + "learning_rate": 1.7757845144222321e-06, + "loss": 0.4752033054828644, + "step": 2226 + }, + { + "epoch": 0.5134885865805856, + "grad_norm": 1.2950831387397626, + "learning_rate": 1.77554388298815e-06, + "loss": 0.45163947343826294, + "step": 2227 + }, + { + "epoch": 0.5137191607101683, + "grad_norm": 1.387042973653302, + "learning_rate": 1.7753031388209044e-06, + "loss": 0.46295779943466187, + "step": 2228 + }, + { + "epoch": 0.5139497348397509, + "grad_norm": 1.2958875463664286, + "learning_rate": 1.7750622819554903e-06, + "loss": 0.5682947635650635, + "step": 2229 + }, + { + "epoch": 0.5141803089693336, + "grad_norm": 1.353052791820573, + "learning_rate": 1.7748213124269187e-06, + "loss": 0.4890878200531006, + "step": 2230 + }, + { + "epoch": 0.5144108830989162, + "grad_norm": 1.4612536503294715, + "learning_rate": 1.7745802302702164e-06, + "loss": 0.5952332615852356, + "step": 2231 + }, + { + "epoch": 0.514641457228499, + "grad_norm": 1.1928368431775584, + "learning_rate": 1.7743390355204278e-06, + "loss": 0.43224406242370605, + "step": 2232 + }, + { + "epoch": 0.5148720313580816, + "grad_norm": 1.1851533508030387, + "learning_rate": 1.7740977282126122e-06, + "loss": 0.5010303258895874, + "step": 2233 + }, + { + "epoch": 0.5151026054876643, + "grad_norm": 1.105983766082305, + "learning_rate": 1.7738563083818469e-06, + "loss": 0.5166633725166321, + "step": 2234 + }, + { + "epoch": 0.5153331796172469, + "grad_norm": 1.0533784617555741, + "learning_rate": 1.7736147760632245e-06, + "loss": 0.4748263359069824, + "step": 2235 + }, + { + "epoch": 0.5155637537468296, + "grad_norm": 0.9010011595528595, + "learning_rate": 1.773373131291854e-06, + "loss": 0.46462053060531616, + "step": 2236 + }, + { + "epoch": 0.5157943278764122, + "grad_norm": 1.1288843437350349, + "learning_rate": 1.7731313741028608e-06, + "loss": 0.47799748182296753, + "step": 2237 + }, + { + "epoch": 0.5160249020059949, + "grad_norm": 1.2958124494051022, + "learning_rate": 1.772889504531387e-06, + "loss": 0.43448662757873535, + "step": 2238 + }, + { + "epoch": 0.5162554761355775, + "grad_norm": 1.2781442130344307, + "learning_rate": 1.7726475226125905e-06, + "loss": 0.4609360098838806, + "step": 2239 + }, + { + "epoch": 0.5164860502651603, + "grad_norm": 1.123946418980165, + "learning_rate": 1.7724054283816463e-06, + "loss": 0.505261242389679, + "step": 2240 + }, + { + "epoch": 0.5167166243947429, + "grad_norm": 1.1143888709548355, + "learning_rate": 1.772163221873745e-06, + "loss": 0.3812851905822754, + "step": 2241 + }, + { + "epoch": 0.5169471985243256, + "grad_norm": 1.1698544335678498, + "learning_rate": 1.7719209031240938e-06, + "loss": 0.42545294761657715, + "step": 2242 + }, + { + "epoch": 0.5171777726539082, + "grad_norm": 1.3964979839005025, + "learning_rate": 1.771678472167916e-06, + "loss": 0.45135340094566345, + "step": 2243 + }, + { + "epoch": 0.5174083467834909, + "grad_norm": 1.1118819857040387, + "learning_rate": 1.7714359290404514e-06, + "loss": 0.4499250650405884, + "step": 2244 + }, + { + "epoch": 0.5176389209130735, + "grad_norm": 1.2793420965554383, + "learning_rate": 1.7711932737769564e-06, + "loss": 0.4355557858943939, + "step": 2245 + }, + { + "epoch": 0.5178694950426562, + "grad_norm": 1.3068878220482505, + "learning_rate": 1.7709505064127036e-06, + "loss": 0.4140744209289551, + "step": 2246 + }, + { + "epoch": 0.5181000691722388, + "grad_norm": 1.2538619837975196, + "learning_rate": 1.7707076269829809e-06, + "loss": 0.5108504891395569, + "step": 2247 + }, + { + "epoch": 0.5183306433018215, + "grad_norm": 1.0866593797381727, + "learning_rate": 1.7704646355230936e-06, + "loss": 0.5064615607261658, + "step": 2248 + }, + { + "epoch": 0.5185612174314042, + "grad_norm": 1.4034267264652582, + "learning_rate": 1.7702215320683636e-06, + "loss": 0.5922794342041016, + "step": 2249 + }, + { + "epoch": 0.5187917915609869, + "grad_norm": 1.236045367714828, + "learning_rate": 1.7699783166541279e-06, + "loss": 0.3890082836151123, + "step": 2250 + }, + { + "epoch": 0.5190223656905695, + "grad_norm": 1.1663861833023768, + "learning_rate": 1.7697349893157402e-06, + "loss": 0.5585668087005615, + "step": 2251 + }, + { + "epoch": 0.5192529398201522, + "grad_norm": 1.2125542528327162, + "learning_rate": 1.7694915500885706e-06, + "loss": 0.3904608488082886, + "step": 2252 + }, + { + "epoch": 0.5194835139497348, + "grad_norm": 1.3213509465151734, + "learning_rate": 1.7692479990080056e-06, + "loss": 0.4764491617679596, + "step": 2253 + }, + { + "epoch": 0.5197140880793175, + "grad_norm": 1.3113796870909902, + "learning_rate": 1.769004336109448e-06, + "loss": 0.49443554878234863, + "step": 2254 + }, + { + "epoch": 0.5199446622089001, + "grad_norm": 1.2196571448758133, + "learning_rate": 1.7687605614283165e-06, + "loss": 0.4679003357887268, + "step": 2255 + }, + { + "epoch": 0.5201752363384828, + "grad_norm": 1.6767016497784393, + "learning_rate": 1.7685166750000465e-06, + "loss": 0.6968683004379272, + "step": 2256 + }, + { + "epoch": 0.5204058104680654, + "grad_norm": 1.406455012631932, + "learning_rate": 1.7682726768600888e-06, + "loss": 0.5688217878341675, + "step": 2257 + }, + { + "epoch": 0.5206363845976482, + "grad_norm": 1.176050025614157, + "learning_rate": 1.7680285670439115e-06, + "loss": 0.4688011705875397, + "step": 2258 + }, + { + "epoch": 0.5208669587272308, + "grad_norm": 1.1772680288415673, + "learning_rate": 1.7677843455869984e-06, + "loss": 0.6447713971138, + "step": 2259 + }, + { + "epoch": 0.5210975328568135, + "grad_norm": 1.3187686937196665, + "learning_rate": 1.767540012524849e-06, + "loss": 0.578650951385498, + "step": 2260 + }, + { + "epoch": 0.5213281069863961, + "grad_norm": 1.4425748519700892, + "learning_rate": 1.76729556789298e-06, + "loss": 0.5001357197761536, + "step": 2261 + }, + { + "epoch": 0.5215586811159788, + "grad_norm": 1.2145912604177214, + "learning_rate": 1.7670510117269242e-06, + "loss": 0.5336331129074097, + "step": 2262 + }, + { + "epoch": 0.5217892552455614, + "grad_norm": 1.2105621787494676, + "learning_rate": 1.76680634406223e-06, + "loss": 0.5628900527954102, + "step": 2263 + }, + { + "epoch": 0.5220198293751441, + "grad_norm": 1.2476030455409495, + "learning_rate": 1.766561564934462e-06, + "loss": 0.46497443318367004, + "step": 2264 + }, + { + "epoch": 0.5222504035047267, + "grad_norm": 1.4921989012106511, + "learning_rate": 1.7663166743792019e-06, + "loss": 0.617607831954956, + "step": 2265 + }, + { + "epoch": 0.5224809776343095, + "grad_norm": 1.1582259137476871, + "learning_rate": 1.7660716724320468e-06, + "loss": 0.5236914157867432, + "step": 2266 + }, + { + "epoch": 0.5227115517638921, + "grad_norm": 1.2919028654437321, + "learning_rate": 1.76582655912861e-06, + "loss": 0.5527941584587097, + "step": 2267 + }, + { + "epoch": 0.5229421258934748, + "grad_norm": 1.208274388494889, + "learning_rate": 1.7655813345045218e-06, + "loss": 0.5394654273986816, + "step": 2268 + }, + { + "epoch": 0.5231727000230574, + "grad_norm": 1.1822216818330542, + "learning_rate": 1.7653359985954275e-06, + "loss": 0.47050246596336365, + "step": 2269 + }, + { + "epoch": 0.5234032741526401, + "grad_norm": 1.2893306401147882, + "learning_rate": 1.7650905514369894e-06, + "loss": 0.49413689970970154, + "step": 2270 + }, + { + "epoch": 0.5236338482822227, + "grad_norm": 1.3086960549802995, + "learning_rate": 1.7648449930648856e-06, + "loss": 0.5568829774856567, + "step": 2271 + }, + { + "epoch": 0.5238644224118054, + "grad_norm": 1.2475799557753502, + "learning_rate": 1.7645993235148107e-06, + "loss": 0.49238815903663635, + "step": 2272 + }, + { + "epoch": 0.524094996541388, + "grad_norm": 1.16612817534413, + "learning_rate": 1.7643535428224752e-06, + "loss": 0.5580959320068359, + "step": 2273 + }, + { + "epoch": 0.5243255706709707, + "grad_norm": 1.4921637909191205, + "learning_rate": 1.7641076510236052e-06, + "loss": 0.5853499174118042, + "step": 2274 + }, + { + "epoch": 0.5245561448005533, + "grad_norm": 1.3988944269011947, + "learning_rate": 1.7638616481539448e-06, + "loss": 0.5638653635978699, + "step": 2275 + }, + { + "epoch": 0.5247867189301361, + "grad_norm": 1.2859178438597552, + "learning_rate": 1.7636155342492521e-06, + "loss": 0.5197241306304932, + "step": 2276 + }, + { + "epoch": 0.5250172930597187, + "grad_norm": 1.1094174928372944, + "learning_rate": 1.7633693093453026e-06, + "loss": 0.4137725234031677, + "step": 2277 + }, + { + "epoch": 0.5252478671893014, + "grad_norm": 1.2940062745509122, + "learning_rate": 1.7631229734778872e-06, + "loss": 0.54244065284729, + "step": 2278 + }, + { + "epoch": 0.525478441318884, + "grad_norm": 1.1871875469955007, + "learning_rate": 1.7628765266828137e-06, + "loss": 0.5215432047843933, + "step": 2279 + }, + { + "epoch": 0.5257090154484667, + "grad_norm": 1.1984410258580116, + "learning_rate": 1.7626299689959057e-06, + "loss": 0.5559565424919128, + "step": 2280 + }, + { + "epoch": 0.5259395895780493, + "grad_norm": 1.1663711332671047, + "learning_rate": 1.7623833004530026e-06, + "loss": 0.5251328945159912, + "step": 2281 + }, + { + "epoch": 0.526170163707632, + "grad_norm": 1.241523894329925, + "learning_rate": 1.7621365210899598e-06, + "loss": 0.5351072549819946, + "step": 2282 + }, + { + "epoch": 0.5264007378372146, + "grad_norm": 1.1901641374825476, + "learning_rate": 1.7618896309426504e-06, + "loss": 0.46850037574768066, + "step": 2283 + }, + { + "epoch": 0.5266313119667974, + "grad_norm": 1.1697893294442419, + "learning_rate": 1.761642630046961e-06, + "loss": 0.5001033544540405, + "step": 2284 + }, + { + "epoch": 0.52686188609638, + "grad_norm": 0.9279299862604019, + "learning_rate": 1.7613955184387968e-06, + "loss": 0.47946250438690186, + "step": 2285 + }, + { + "epoch": 0.5270924602259627, + "grad_norm": 1.0539631796672029, + "learning_rate": 1.761148296154077e-06, + "loss": 0.4743049144744873, + "step": 2286 + }, + { + "epoch": 0.5273230343555453, + "grad_norm": 1.154224335020326, + "learning_rate": 1.7609009632287389e-06, + "loss": 0.4518652558326721, + "step": 2287 + }, + { + "epoch": 0.527553608485128, + "grad_norm": 1.0859896497705106, + "learning_rate": 1.7606535196987338e-06, + "loss": 0.5021224617958069, + "step": 2288 + }, + { + "epoch": 0.5277841826147106, + "grad_norm": 1.4832483769951506, + "learning_rate": 1.760405965600031e-06, + "loss": 0.4848078489303589, + "step": 2289 + }, + { + "epoch": 0.5280147567442933, + "grad_norm": 1.22421773905119, + "learning_rate": 1.7601583009686142e-06, + "loss": 0.49077051877975464, + "step": 2290 + }, + { + "epoch": 0.5282453308738759, + "grad_norm": 1.2916718452438969, + "learning_rate": 1.7599105258404848e-06, + "loss": 0.4802943468093872, + "step": 2291 + }, + { + "epoch": 0.5284759050034586, + "grad_norm": 1.4055248895326071, + "learning_rate": 1.7596626402516589e-06, + "loss": 0.5397455096244812, + "step": 2292 + }, + { + "epoch": 0.5287064791330413, + "grad_norm": 1.0497017336135974, + "learning_rate": 1.759414644238169e-06, + "loss": 0.478559672832489, + "step": 2293 + }, + { + "epoch": 0.528937053262624, + "grad_norm": 1.112359888255478, + "learning_rate": 1.7591665378360644e-06, + "loss": 0.5080797672271729, + "step": 2294 + }, + { + "epoch": 0.5291676273922066, + "grad_norm": 1.0468621326779766, + "learning_rate": 1.7589183210814093e-06, + "loss": 0.4959479868412018, + "step": 2295 + }, + { + "epoch": 0.5293982015217893, + "grad_norm": 1.1985868339045591, + "learning_rate": 1.7586699940102853e-06, + "loss": 0.512288510799408, + "step": 2296 + }, + { + "epoch": 0.5296287756513719, + "grad_norm": 1.1129893572343195, + "learning_rate": 1.7584215566587886e-06, + "loss": 0.525113046169281, + "step": 2297 + }, + { + "epoch": 0.5298593497809546, + "grad_norm": 1.2088844531850982, + "learning_rate": 1.7581730090630322e-06, + "loss": 0.3715069890022278, + "step": 2298 + }, + { + "epoch": 0.5300899239105372, + "grad_norm": 1.3852845244524983, + "learning_rate": 1.757924351259145e-06, + "loss": 0.5833072662353516, + "step": 2299 + }, + { + "epoch": 0.5303204980401199, + "grad_norm": 1.638098016270419, + "learning_rate": 1.7576755832832721e-06, + "loss": 0.5942450761795044, + "step": 2300 + }, + { + "epoch": 0.5305510721697025, + "grad_norm": 1.1523961468173722, + "learning_rate": 1.7574267051715745e-06, + "loss": 0.4754432737827301, + "step": 2301 + }, + { + "epoch": 0.5307816462992853, + "grad_norm": 1.3593694553922624, + "learning_rate": 1.7571777169602287e-06, + "loss": 0.5272700190544128, + "step": 2302 + }, + { + "epoch": 0.5310122204288679, + "grad_norm": 1.137089307163323, + "learning_rate": 1.7569286186854283e-06, + "loss": 0.48376554250717163, + "step": 2303 + }, + { + "epoch": 0.5312427945584506, + "grad_norm": 1.324023805933818, + "learning_rate": 1.7566794103833816e-06, + "loss": 0.4324077367782593, + "step": 2304 + }, + { + "epoch": 0.5314733686880332, + "grad_norm": 1.2843168925212602, + "learning_rate": 1.7564300920903142e-06, + "loss": 0.44939202070236206, + "step": 2305 + }, + { + "epoch": 0.5317039428176159, + "grad_norm": 1.2413807013846574, + "learning_rate": 1.7561806638424662e-06, + "loss": 0.5256277322769165, + "step": 2306 + }, + { + "epoch": 0.5319345169471985, + "grad_norm": 1.0855894350628046, + "learning_rate": 1.7559311256760955e-06, + "loss": 0.43901991844177246, + "step": 2307 + }, + { + "epoch": 0.5321650910767812, + "grad_norm": 1.3134089338347328, + "learning_rate": 1.7556814776274746e-06, + "loss": 0.5256138443946838, + "step": 2308 + }, + { + "epoch": 0.5323956652063638, + "grad_norm": 1.3769537654510517, + "learning_rate": 1.7554317197328922e-06, + "loss": 0.4664478600025177, + "step": 2309 + }, + { + "epoch": 0.5326262393359465, + "grad_norm": 1.1227476903728313, + "learning_rate": 1.7551818520286532e-06, + "loss": 0.5042726397514343, + "step": 2310 + }, + { + "epoch": 0.5328568134655292, + "grad_norm": 1.3417267355052607, + "learning_rate": 1.754931874551079e-06, + "loss": 0.5682350397109985, + "step": 2311 + }, + { + "epoch": 0.5330873875951119, + "grad_norm": 1.2416043105842551, + "learning_rate": 1.754681787336505e-06, + "loss": 0.5082807540893555, + "step": 2312 + }, + { + "epoch": 0.5333179617246945, + "grad_norm": 1.4255568276367208, + "learning_rate": 1.754431590421285e-06, + "loss": 0.6020215749740601, + "step": 2313 + }, + { + "epoch": 0.5335485358542772, + "grad_norm": 1.4104154799235167, + "learning_rate": 1.7541812838417877e-06, + "loss": 0.5004276633262634, + "step": 2314 + }, + { + "epoch": 0.5337791099838598, + "grad_norm": 1.060415170291065, + "learning_rate": 1.753930867634397e-06, + "loss": 0.4889993667602539, + "step": 2315 + }, + { + "epoch": 0.5340096841134425, + "grad_norm": 1.0849217066026469, + "learning_rate": 1.7536803418355141e-06, + "loss": 0.4179444909095764, + "step": 2316 + }, + { + "epoch": 0.5342402582430251, + "grad_norm": 1.2618059778728548, + "learning_rate": 1.7534297064815554e-06, + "loss": 0.46807605028152466, + "step": 2317 + }, + { + "epoch": 0.5344708323726078, + "grad_norm": 1.2827117317411258, + "learning_rate": 1.7531789616089528e-06, + "loss": 0.39173221588134766, + "step": 2318 + }, + { + "epoch": 0.5347014065021904, + "grad_norm": 1.2820357654319097, + "learning_rate": 1.7529281072541548e-06, + "loss": 0.4290514886379242, + "step": 2319 + }, + { + "epoch": 0.5349319806317732, + "grad_norm": 1.3778694052072273, + "learning_rate": 1.752677143453626e-06, + "loss": 0.6052347421646118, + "step": 2320 + }, + { + "epoch": 0.5351625547613558, + "grad_norm": 1.054542888313722, + "learning_rate": 1.752426070243846e-06, + "loss": 0.47622209787368774, + "step": 2321 + }, + { + "epoch": 0.5353931288909385, + "grad_norm": 1.128157779747108, + "learning_rate": 1.7521748876613112e-06, + "loss": 0.4216923415660858, + "step": 2322 + }, + { + "epoch": 0.5356237030205211, + "grad_norm": 2.0737049391078384, + "learning_rate": 1.751923595742533e-06, + "loss": 0.5527430772781372, + "step": 2323 + }, + { + "epoch": 0.5358542771501038, + "grad_norm": 1.1406433043117166, + "learning_rate": 1.75167219452404e-06, + "loss": 0.5562101602554321, + "step": 2324 + }, + { + "epoch": 0.5360848512796864, + "grad_norm": 1.2183539446117024, + "learning_rate": 1.7514206840423757e-06, + "loss": 0.546181321144104, + "step": 2325 + }, + { + "epoch": 0.5363154254092691, + "grad_norm": 1.5216852196360238, + "learning_rate": 1.7511690643340995e-06, + "loss": 0.5883532762527466, + "step": 2326 + }, + { + "epoch": 0.5365459995388517, + "grad_norm": 1.2667138111118152, + "learning_rate": 1.750917335435787e-06, + "loss": 0.5231350660324097, + "step": 2327 + }, + { + "epoch": 0.5367765736684345, + "grad_norm": 1.200525241411545, + "learning_rate": 1.7506654973840292e-06, + "loss": 0.4846429228782654, + "step": 2328 + }, + { + "epoch": 0.5370071477980171, + "grad_norm": 1.0815584734915895, + "learning_rate": 1.7504135502154335e-06, + "loss": 0.43692171573638916, + "step": 2329 + }, + { + "epoch": 0.5372377219275998, + "grad_norm": 1.0658062374834336, + "learning_rate": 1.7501614939666234e-06, + "loss": 0.5076167583465576, + "step": 2330 + }, + { + "epoch": 0.5374682960571824, + "grad_norm": 1.2658937157989252, + "learning_rate": 1.7499093286742373e-06, + "loss": 0.5302891135215759, + "step": 2331 + }, + { + "epoch": 0.5376988701867651, + "grad_norm": 1.3200406937261826, + "learning_rate": 1.7496570543749303e-06, + "loss": 0.5827817916870117, + "step": 2332 + }, + { + "epoch": 0.5379294443163477, + "grad_norm": 1.3684047155196064, + "learning_rate": 1.7494046711053726e-06, + "loss": 0.6765470504760742, + "step": 2333 + }, + { + "epoch": 0.5381600184459304, + "grad_norm": 1.3001315312834418, + "learning_rate": 1.7491521789022513e-06, + "loss": 0.48666322231292725, + "step": 2334 + }, + { + "epoch": 0.538390592575513, + "grad_norm": 1.0490910849362622, + "learning_rate": 1.7488995778022685e-06, + "loss": 0.5163695812225342, + "step": 2335 + }, + { + "epoch": 0.5386211667050956, + "grad_norm": 1.1765286879203154, + "learning_rate": 1.748646867842142e-06, + "loss": 0.44487982988357544, + "step": 2336 + }, + { + "epoch": 0.5388517408346783, + "grad_norm": 1.2992285046307706, + "learning_rate": 1.7483940490586058e-06, + "loss": 0.5512663722038269, + "step": 2337 + }, + { + "epoch": 0.539082314964261, + "grad_norm": 1.1533551829707172, + "learning_rate": 1.7481411214884098e-06, + "loss": 0.461128294467926, + "step": 2338 + }, + { + "epoch": 0.5393128890938437, + "grad_norm": 1.2239639921661383, + "learning_rate": 1.7478880851683197e-06, + "loss": 0.47291088104248047, + "step": 2339 + }, + { + "epoch": 0.5395434632234263, + "grad_norm": 1.1568837363453548, + "learning_rate": 1.747634940135117e-06, + "loss": 0.5900166034698486, + "step": 2340 + }, + { + "epoch": 0.539774037353009, + "grad_norm": 1.0385421801821113, + "learning_rate": 1.7473816864255983e-06, + "loss": 0.3878340721130371, + "step": 2341 + }, + { + "epoch": 0.5400046114825916, + "grad_norm": 1.442772155197814, + "learning_rate": 1.7471283240765775e-06, + "loss": 0.5671564340591431, + "step": 2342 + }, + { + "epoch": 0.5402351856121743, + "grad_norm": 1.1602673867587185, + "learning_rate": 1.7468748531248824e-06, + "loss": 0.5153918266296387, + "step": 2343 + }, + { + "epoch": 0.5404657597417569, + "grad_norm": 1.2187996046056446, + "learning_rate": 1.7466212736073585e-06, + "loss": 0.49520084261894226, + "step": 2344 + }, + { + "epoch": 0.5406963338713396, + "grad_norm": 1.0955374839449357, + "learning_rate": 1.7463675855608654e-06, + "loss": 0.4884970784187317, + "step": 2345 + }, + { + "epoch": 0.5409269080009222, + "grad_norm": 1.401002336922335, + "learning_rate": 1.7461137890222798e-06, + "loss": 0.5233277678489685, + "step": 2346 + }, + { + "epoch": 0.541157482130505, + "grad_norm": 1.272363275240415, + "learning_rate": 1.7458598840284928e-06, + "loss": 0.44011372327804565, + "step": 2347 + }, + { + "epoch": 0.5413880562600876, + "grad_norm": 1.1593134205382656, + "learning_rate": 1.745605870616413e-06, + "loss": 0.4833263158798218, + "step": 2348 + }, + { + "epoch": 0.5416186303896703, + "grad_norm": 1.186578949511732, + "learning_rate": 1.7453517488229634e-06, + "loss": 0.4852379262447357, + "step": 2349 + }, + { + "epoch": 0.5418492045192529, + "grad_norm": 1.527590855990685, + "learning_rate": 1.7450975186850831e-06, + "loss": 0.4710320830345154, + "step": 2350 + }, + { + "epoch": 0.5420797786488356, + "grad_norm": 1.4382691899722804, + "learning_rate": 1.744843180239727e-06, + "loss": 0.5144790410995483, + "step": 2351 + }, + { + "epoch": 0.5423103527784182, + "grad_norm": 1.3784898997392558, + "learning_rate": 1.7445887335238663e-06, + "loss": 0.5815445184707642, + "step": 2352 + }, + { + "epoch": 0.5425409269080009, + "grad_norm": 1.1629274836022288, + "learning_rate": 1.7443341785744864e-06, + "loss": 0.5101407170295715, + "step": 2353 + }, + { + "epoch": 0.5427715010375835, + "grad_norm": 1.1760272227987194, + "learning_rate": 1.7440795154285905e-06, + "loss": 0.4584839940071106, + "step": 2354 + }, + { + "epoch": 0.5430020751671663, + "grad_norm": 1.323122873632264, + "learning_rate": 1.743824744123196e-06, + "loss": 0.482247531414032, + "step": 2355 + }, + { + "epoch": 0.5432326492967489, + "grad_norm": 1.1361176263052393, + "learning_rate": 1.7435698646953364e-06, + "loss": 0.5503325462341309, + "step": 2356 + }, + { + "epoch": 0.5434632234263316, + "grad_norm": 1.2952580221197654, + "learning_rate": 1.7433148771820612e-06, + "loss": 0.4803489148616791, + "step": 2357 + }, + { + "epoch": 0.5436937975559142, + "grad_norm": 1.303291620807208, + "learning_rate": 1.7430597816204351e-06, + "loss": 0.5388872027397156, + "step": 2358 + }, + { + "epoch": 0.5439243716854969, + "grad_norm": 1.6209081192397237, + "learning_rate": 1.742804578047539e-06, + "loss": 0.512636125087738, + "step": 2359 + }, + { + "epoch": 0.5441549458150795, + "grad_norm": 1.5943501598581358, + "learning_rate": 1.7425492665004699e-06, + "loss": 0.49154865741729736, + "step": 2360 + }, + { + "epoch": 0.5443855199446622, + "grad_norm": 1.1498651594774036, + "learning_rate": 1.7422938470163389e-06, + "loss": 0.5185250639915466, + "step": 2361 + }, + { + "epoch": 0.5446160940742448, + "grad_norm": 1.5663688017502957, + "learning_rate": 1.7420383196322747e-06, + "loss": 0.5474511384963989, + "step": 2362 + }, + { + "epoch": 0.5448466682038275, + "grad_norm": 1.3465441719791955, + "learning_rate": 1.7417826843854202e-06, + "loss": 0.48212137818336487, + "step": 2363 + }, + { + "epoch": 0.5450772423334102, + "grad_norm": 1.1320785808666363, + "learning_rate": 1.7415269413129348e-06, + "loss": 0.47983086109161377, + "step": 2364 + }, + { + "epoch": 0.5453078164629929, + "grad_norm": 1.1314426678618292, + "learning_rate": 1.7412710904519932e-06, + "loss": 0.4935225546360016, + "step": 2365 + }, + { + "epoch": 0.5455383905925755, + "grad_norm": 1.2528535153373956, + "learning_rate": 1.7410151318397862e-06, + "loss": 0.5167664289474487, + "step": 2366 + }, + { + "epoch": 0.5457689647221582, + "grad_norm": 1.1782327982922274, + "learning_rate": 1.74075906551352e-06, + "loss": 0.5116056799888611, + "step": 2367 + }, + { + "epoch": 0.5459995388517408, + "grad_norm": 1.1184728717072068, + "learning_rate": 1.7405028915104158e-06, + "loss": 0.4709595739841461, + "step": 2368 + }, + { + "epoch": 0.5462301129813235, + "grad_norm": 1.560534410686712, + "learning_rate": 1.7402466098677118e-06, + "loss": 0.3989061117172241, + "step": 2369 + }, + { + "epoch": 0.5464606871109061, + "grad_norm": 1.1397817693321244, + "learning_rate": 1.739990220622661e-06, + "loss": 0.45720764994621277, + "step": 2370 + }, + { + "epoch": 0.5466912612404888, + "grad_norm": 1.6154705847610804, + "learning_rate": 1.739733723812532e-06, + "loss": 0.5865384936332703, + "step": 2371 + }, + { + "epoch": 0.5469218353700714, + "grad_norm": 1.3129437136284077, + "learning_rate": 1.7394771194746092e-06, + "loss": 0.4451501965522766, + "step": 2372 + }, + { + "epoch": 0.5471524094996542, + "grad_norm": 1.2213938230584949, + "learning_rate": 1.7392204076461928e-06, + "loss": 0.4628486633300781, + "step": 2373 + }, + { + "epoch": 0.5473829836292368, + "grad_norm": 1.2854198948482758, + "learning_rate": 1.7389635883645984e-06, + "loss": 0.4797760248184204, + "step": 2374 + }, + { + "epoch": 0.5476135577588195, + "grad_norm": 1.2890601616689177, + "learning_rate": 1.7387066616671571e-06, + "loss": 0.4716770648956299, + "step": 2375 + }, + { + "epoch": 0.5478441318884021, + "grad_norm": 1.071991179643841, + "learning_rate": 1.738449627591216e-06, + "loss": 0.504901647567749, + "step": 2376 + }, + { + "epoch": 0.5480747060179848, + "grad_norm": 1.259141194312177, + "learning_rate": 1.7381924861741375e-06, + "loss": 0.5248615145683289, + "step": 2377 + }, + { + "epoch": 0.5483052801475674, + "grad_norm": 1.1551298194401718, + "learning_rate": 1.7379352374532998e-06, + "loss": 0.41704076528549194, + "step": 2378 + }, + { + "epoch": 0.5485358542771501, + "grad_norm": 1.1093382819710802, + "learning_rate": 1.7376778814660966e-06, + "loss": 0.42278197407722473, + "step": 2379 + }, + { + "epoch": 0.5487664284067327, + "grad_norm": 1.3240414194175114, + "learning_rate": 1.7374204182499372e-06, + "loss": 0.4104729890823364, + "step": 2380 + }, + { + "epoch": 0.5489970025363154, + "grad_norm": 1.237574436817826, + "learning_rate": 1.7371628478422467e-06, + "loss": 0.5205684304237366, + "step": 2381 + }, + { + "epoch": 0.549227576665898, + "grad_norm": 1.2914374831424469, + "learning_rate": 1.7369051702804648e-06, + "loss": 0.4743306040763855, + "step": 2382 + }, + { + "epoch": 0.5494581507954808, + "grad_norm": 1.4263628155545096, + "learning_rate": 1.7366473856020486e-06, + "loss": 0.6324253678321838, + "step": 2383 + }, + { + "epoch": 0.5496887249250634, + "grad_norm": 1.2093119037905458, + "learning_rate": 1.736389493844469e-06, + "loss": 0.46466588973999023, + "step": 2384 + }, + { + "epoch": 0.5499192990546461, + "grad_norm": 1.257464863029373, + "learning_rate": 1.7361314950452136e-06, + "loss": 0.4117918014526367, + "step": 2385 + }, + { + "epoch": 0.5501498731842287, + "grad_norm": 1.0582357147304537, + "learning_rate": 1.7358733892417848e-06, + "loss": 0.40341615676879883, + "step": 2386 + }, + { + "epoch": 0.5503804473138114, + "grad_norm": 1.2083128590610215, + "learning_rate": 1.735615176471701e-06, + "loss": 0.642855167388916, + "step": 2387 + }, + { + "epoch": 0.550611021443394, + "grad_norm": 1.3821025749968947, + "learning_rate": 1.7353568567724959e-06, + "loss": 0.5490958094596863, + "step": 2388 + }, + { + "epoch": 0.5508415955729767, + "grad_norm": 1.0972882559163057, + "learning_rate": 1.7350984301817192e-06, + "loss": 0.5154834985733032, + "step": 2389 + }, + { + "epoch": 0.5510721697025593, + "grad_norm": 1.5156914347306212, + "learning_rate": 1.7348398967369358e-06, + "loss": 0.49488651752471924, + "step": 2390 + }, + { + "epoch": 0.5513027438321421, + "grad_norm": 1.097164324799634, + "learning_rate": 1.7345812564757257e-06, + "loss": 0.4211215674877167, + "step": 2391 + }, + { + "epoch": 0.5515333179617247, + "grad_norm": 1.1060429845011046, + "learning_rate": 1.7343225094356855e-06, + "loss": 0.41840964555740356, + "step": 2392 + }, + { + "epoch": 0.5517638920913074, + "grad_norm": 1.1213399734290006, + "learning_rate": 1.7340636556544264e-06, + "loss": 0.540780782699585, + "step": 2393 + }, + { + "epoch": 0.55199446622089, + "grad_norm": 1.328334535307567, + "learning_rate": 1.7338046951695754e-06, + "loss": 0.4967775046825409, + "step": 2394 + }, + { + "epoch": 0.5522250403504727, + "grad_norm": 1.337457775660936, + "learning_rate": 1.733545628018775e-06, + "loss": 0.5155577659606934, + "step": 2395 + }, + { + "epoch": 0.5524556144800553, + "grad_norm": 1.3409169497631646, + "learning_rate": 1.7332864542396832e-06, + "loss": 0.5106005072593689, + "step": 2396 + }, + { + "epoch": 0.552686188609638, + "grad_norm": 1.106469342539302, + "learning_rate": 1.7330271738699737e-06, + "loss": 0.3459712862968445, + "step": 2397 + }, + { + "epoch": 0.5529167627392206, + "grad_norm": 1.238811250755909, + "learning_rate": 1.7327677869473356e-06, + "loss": 0.4877927303314209, + "step": 2398 + }, + { + "epoch": 0.5531473368688034, + "grad_norm": 1.298959309949219, + "learning_rate": 1.7325082935094732e-06, + "loss": 0.5183857679367065, + "step": 2399 + }, + { + "epoch": 0.553377910998386, + "grad_norm": 1.1165163437308863, + "learning_rate": 1.7322486935941068e-06, + "loss": 0.4326491057872772, + "step": 2400 + }, + { + "epoch": 0.5536084851279687, + "grad_norm": 1.2472729786065346, + "learning_rate": 1.7319889872389716e-06, + "loss": 0.4688712954521179, + "step": 2401 + }, + { + "epoch": 0.5538390592575513, + "grad_norm": 1.2787851295656323, + "learning_rate": 1.7317291744818184e-06, + "loss": 0.4997788071632385, + "step": 2402 + }, + { + "epoch": 0.554069633387134, + "grad_norm": 1.3085189564145994, + "learning_rate": 1.731469255360414e-06, + "loss": 0.5271172523498535, + "step": 2403 + }, + { + "epoch": 0.5543002075167166, + "grad_norm": 1.3689434717845856, + "learning_rate": 1.73120922991254e-06, + "loss": 0.5339269042015076, + "step": 2404 + }, + { + "epoch": 0.5545307816462993, + "grad_norm": 1.2181123008680574, + "learning_rate": 1.7309490981759938e-06, + "loss": 0.47052568197250366, + "step": 2405 + }, + { + "epoch": 0.5547613557758819, + "grad_norm": 1.2508289898124627, + "learning_rate": 1.7306888601885885e-06, + "loss": 0.4112280309200287, + "step": 2406 + }, + { + "epoch": 0.5549919299054646, + "grad_norm": 1.1812487853939355, + "learning_rate": 1.730428515988152e-06, + "loss": 0.5473710298538208, + "step": 2407 + }, + { + "epoch": 0.5552225040350472, + "grad_norm": 1.6509587018432181, + "learning_rate": 1.7301680656125277e-06, + "loss": 0.5079115629196167, + "step": 2408 + }, + { + "epoch": 0.55545307816463, + "grad_norm": 1.193259996108104, + "learning_rate": 1.7299075090995755e-06, + "loss": 0.4805012345314026, + "step": 2409 + }, + { + "epoch": 0.5556836522942126, + "grad_norm": 1.1958830357632493, + "learning_rate": 1.729646846487169e-06, + "loss": 0.4657474756240845, + "step": 2410 + }, + { + "epoch": 0.5559142264237953, + "grad_norm": 1.2442110767414496, + "learning_rate": 1.729386077813199e-06, + "loss": 0.5887978076934814, + "step": 2411 + }, + { + "epoch": 0.5561448005533779, + "grad_norm": 1.0093517139206267, + "learning_rate": 1.7291252031155704e-06, + "loss": 0.43841421604156494, + "step": 2412 + }, + { + "epoch": 0.5563753746829606, + "grad_norm": 1.304380451031228, + "learning_rate": 1.728864222432204e-06, + "loss": 0.5026551485061646, + "step": 2413 + }, + { + "epoch": 0.5566059488125432, + "grad_norm": 1.2344100865196312, + "learning_rate": 1.728603135801036e-06, + "loss": 0.4525277614593506, + "step": 2414 + }, + { + "epoch": 0.5568365229421259, + "grad_norm": 1.3128956010351178, + "learning_rate": 1.7283419432600182e-06, + "loss": 0.4095644950866699, + "step": 2415 + }, + { + "epoch": 0.5570670970717085, + "grad_norm": 1.2351186073808627, + "learning_rate": 1.7280806448471173e-06, + "loss": 0.5098834037780762, + "step": 2416 + }, + { + "epoch": 0.5572976712012913, + "grad_norm": 0.9689174321932323, + "learning_rate": 1.7278192406003159e-06, + "loss": 0.42802777886390686, + "step": 2417 + }, + { + "epoch": 0.5575282453308739, + "grad_norm": 1.283644069549869, + "learning_rate": 1.7275577305576113e-06, + "loss": 0.5036378502845764, + "step": 2418 + }, + { + "epoch": 0.5577588194604566, + "grad_norm": 1.2960652355454445, + "learning_rate": 1.7272961147570175e-06, + "loss": 0.5324885249137878, + "step": 2419 + }, + { + "epoch": 0.5579893935900392, + "grad_norm": 1.6334614504341187, + "learning_rate": 1.727034393236562e-06, + "loss": 0.5763842463493347, + "step": 2420 + }, + { + "epoch": 0.5582199677196219, + "grad_norm": 1.343133312027108, + "learning_rate": 1.7267725660342895e-06, + "loss": 0.49291908740997314, + "step": 2421 + }, + { + "epoch": 0.5584505418492045, + "grad_norm": 1.651006143174213, + "learning_rate": 1.7265106331882588e-06, + "loss": 0.5114868879318237, + "step": 2422 + }, + { + "epoch": 0.5586811159787872, + "grad_norm": 1.1152807378164393, + "learning_rate": 1.7262485947365449e-06, + "loss": 0.42442530393600464, + "step": 2423 + }, + { + "epoch": 0.5589116901083698, + "grad_norm": 1.1309517905090323, + "learning_rate": 1.725986450717237e-06, + "loss": 0.3680551052093506, + "step": 2424 + }, + { + "epoch": 0.5591422642379525, + "grad_norm": 1.2183025106634426, + "learning_rate": 1.725724201168441e-06, + "loss": 0.5849576592445374, + "step": 2425 + }, + { + "epoch": 0.5593728383675352, + "grad_norm": 1.3597945996239442, + "learning_rate": 1.7254618461282773e-06, + "loss": 0.48919233679771423, + "step": 2426 + }, + { + "epoch": 0.5596034124971179, + "grad_norm": 1.1753552641156777, + "learning_rate": 1.7251993856348821e-06, + "loss": 0.4857720732688904, + "step": 2427 + }, + { + "epoch": 0.5598339866267005, + "grad_norm": 1.3324934167522995, + "learning_rate": 1.7249368197264062e-06, + "loss": 0.5106808543205261, + "step": 2428 + }, + { + "epoch": 0.5600645607562832, + "grad_norm": 1.305986731975411, + "learning_rate": 1.724674148441017e-06, + "loss": 0.500100314617157, + "step": 2429 + }, + { + "epoch": 0.5602951348858658, + "grad_norm": 1.226560051936561, + "learning_rate": 1.7244113718168957e-06, + "loss": 0.5389110445976257, + "step": 2430 + }, + { + "epoch": 0.5605257090154485, + "grad_norm": 1.2848731557614161, + "learning_rate": 1.72414848989224e-06, + "loss": 0.42860496044158936, + "step": 2431 + }, + { + "epoch": 0.5607562831450311, + "grad_norm": 1.2392935426075953, + "learning_rate": 1.723885502705262e-06, + "loss": 0.4867728352546692, + "step": 2432 + }, + { + "epoch": 0.5609868572746138, + "grad_norm": 1.215687300161219, + "learning_rate": 1.7236224102941899e-06, + "loss": 0.49194633960723877, + "step": 2433 + }, + { + "epoch": 0.5612174314041964, + "grad_norm": 1.278802988367442, + "learning_rate": 1.7233592126972667e-06, + "loss": 0.5194358229637146, + "step": 2434 + }, + { + "epoch": 0.5614480055337792, + "grad_norm": 1.518126298536734, + "learning_rate": 1.723095909952751e-06, + "loss": 0.4738645553588867, + "step": 2435 + }, + { + "epoch": 0.5616785796633618, + "grad_norm": 1.1842233457279843, + "learning_rate": 1.7228325020989165e-06, + "loss": 0.48232927918434143, + "step": 2436 + }, + { + "epoch": 0.5619091537929445, + "grad_norm": 1.0590325088103263, + "learning_rate": 1.7225689891740522e-06, + "loss": 0.5192145109176636, + "step": 2437 + }, + { + "epoch": 0.5621397279225271, + "grad_norm": 1.2756639382228332, + "learning_rate": 1.7223053712164621e-06, + "loss": 0.4934930205345154, + "step": 2438 + }, + { + "epoch": 0.5623703020521098, + "grad_norm": 1.294610704846241, + "learning_rate": 1.722041648264466e-06, + "loss": 0.5022200345993042, + "step": 2439 + }, + { + "epoch": 0.5626008761816924, + "grad_norm": 1.15319893327068, + "learning_rate": 1.7217778203563986e-06, + "loss": 0.45300528407096863, + "step": 2440 + }, + { + "epoch": 0.5628314503112751, + "grad_norm": 1.1335234735988557, + "learning_rate": 1.7215138875306103e-06, + "loss": 0.4965200126171112, + "step": 2441 + }, + { + "epoch": 0.5630620244408577, + "grad_norm": 1.3081789750993726, + "learning_rate": 1.721249849825466e-06, + "loss": 0.4618280231952667, + "step": 2442 + }, + { + "epoch": 0.5632925985704405, + "grad_norm": 1.255070715358214, + "learning_rate": 1.7209857072793464e-06, + "loss": 0.42270147800445557, + "step": 2443 + }, + { + "epoch": 0.5635231727000231, + "grad_norm": 1.0830436199918496, + "learning_rate": 1.720721459930647e-06, + "loss": 0.5200725793838501, + "step": 2444 + }, + { + "epoch": 0.5637537468296058, + "grad_norm": 1.1368018551382484, + "learning_rate": 1.7204571078177792e-06, + "loss": 0.47475337982177734, + "step": 2445 + }, + { + "epoch": 0.5639843209591884, + "grad_norm": 1.5482537414338693, + "learning_rate": 1.7201926509791693e-06, + "loss": 0.5493113994598389, + "step": 2446 + }, + { + "epoch": 0.564214895088771, + "grad_norm": 1.2861044506324582, + "learning_rate": 1.719928089453259e-06, + "loss": 0.4743562340736389, + "step": 2447 + }, + { + "epoch": 0.5644454692183537, + "grad_norm": 1.2343956116266135, + "learning_rate": 1.7196634232785038e-06, + "loss": 0.5145455598831177, + "step": 2448 + }, + { + "epoch": 0.5646760433479363, + "grad_norm": 1.5340568803714763, + "learning_rate": 1.719398652493377e-06, + "loss": 0.45072540640830994, + "step": 2449 + }, + { + "epoch": 0.564906617477519, + "grad_norm": 1.2363775684809537, + "learning_rate": 1.7191337771363651e-06, + "loss": 0.5150895714759827, + "step": 2450 + }, + { + "epoch": 0.5651371916071016, + "grad_norm": 1.4238500687035243, + "learning_rate": 1.7188687972459705e-06, + "loss": 0.5025302171707153, + "step": 2451 + }, + { + "epoch": 0.5653677657366843, + "grad_norm": 1.2149895801108108, + "learning_rate": 1.7186037128607107e-06, + "loss": 0.618930459022522, + "step": 2452 + }, + { + "epoch": 0.565598339866267, + "grad_norm": 1.1681250836374313, + "learning_rate": 1.7183385240191183e-06, + "loss": 0.5841591358184814, + "step": 2453 + }, + { + "epoch": 0.5658289139958497, + "grad_norm": 1.2481599814364495, + "learning_rate": 1.7180732307597413e-06, + "loss": 0.4915233850479126, + "step": 2454 + }, + { + "epoch": 0.5660594881254323, + "grad_norm": 1.127625184290067, + "learning_rate": 1.7178078331211429e-06, + "loss": 0.46732476353645325, + "step": 2455 + }, + { + "epoch": 0.566290062255015, + "grad_norm": 1.1121526599443385, + "learning_rate": 1.7175423311419013e-06, + "loss": 0.4640737771987915, + "step": 2456 + }, + { + "epoch": 0.5665206363845976, + "grad_norm": 1.2800685498732043, + "learning_rate": 1.7172767248606095e-06, + "loss": 0.39535683393478394, + "step": 2457 + }, + { + "epoch": 0.5667512105141803, + "grad_norm": 1.196636942462094, + "learning_rate": 1.7170110143158766e-06, + "loss": 0.4782179594039917, + "step": 2458 + }, + { + "epoch": 0.5669817846437629, + "grad_norm": 1.5731644028680265, + "learning_rate": 1.7167451995463258e-06, + "loss": 0.6186003684997559, + "step": 2459 + }, + { + "epoch": 0.5672123587733456, + "grad_norm": 1.3163111292704002, + "learning_rate": 1.7164792805905965e-06, + "loss": 0.4915347099304199, + "step": 2460 + }, + { + "epoch": 0.5674429329029282, + "grad_norm": 1.2683630708246802, + "learning_rate": 1.7162132574873422e-06, + "loss": 0.4789005517959595, + "step": 2461 + }, + { + "epoch": 0.567673507032511, + "grad_norm": 1.6928847577315913, + "learning_rate": 1.7159471302752326e-06, + "loss": 0.6307233572006226, + "step": 2462 + }, + { + "epoch": 0.5679040811620936, + "grad_norm": 1.240574680316347, + "learning_rate": 1.7156808989929514e-06, + "loss": 0.5278424024581909, + "step": 2463 + }, + { + "epoch": 0.5681346552916763, + "grad_norm": 1.4388020329709479, + "learning_rate": 1.7154145636791988e-06, + "loss": 0.48552995920181274, + "step": 2464 + }, + { + "epoch": 0.5683652294212589, + "grad_norm": 1.3679954470869684, + "learning_rate": 1.7151481243726885e-06, + "loss": 0.5125370621681213, + "step": 2465 + }, + { + "epoch": 0.5685958035508416, + "grad_norm": 1.3448408660581435, + "learning_rate": 1.7148815811121506e-06, + "loss": 0.44231730699539185, + "step": 2466 + }, + { + "epoch": 0.5688263776804242, + "grad_norm": 1.367567415522102, + "learning_rate": 1.7146149339363296e-06, + "loss": 0.5593529939651489, + "step": 2467 + }, + { + "epoch": 0.5690569518100069, + "grad_norm": 1.347377301704866, + "learning_rate": 1.714348182883986e-06, + "loss": 0.4830925464630127, + "step": 2468 + }, + { + "epoch": 0.5692875259395895, + "grad_norm": 1.4913136319748062, + "learning_rate": 1.714081327993894e-06, + "loss": 0.5538743734359741, + "step": 2469 + }, + { + "epoch": 0.5695181000691723, + "grad_norm": 1.4135532975212044, + "learning_rate": 1.7138143693048441e-06, + "loss": 0.5145905613899231, + "step": 2470 + }, + { + "epoch": 0.5697486741987549, + "grad_norm": 1.301183082915478, + "learning_rate": 1.713547306855641e-06, + "loss": 0.47706612944602966, + "step": 2471 + }, + { + "epoch": 0.5699792483283376, + "grad_norm": 1.2528774428968483, + "learning_rate": 1.7132801406851056e-06, + "loss": 0.45162689685821533, + "step": 2472 + }, + { + "epoch": 0.5702098224579202, + "grad_norm": 1.5721475156494655, + "learning_rate": 1.7130128708320727e-06, + "loss": 0.5141111612319946, + "step": 2473 + }, + { + "epoch": 0.5704403965875029, + "grad_norm": 1.0845779630695374, + "learning_rate": 1.7127454973353932e-06, + "loss": 0.4443173408508301, + "step": 2474 + }, + { + "epoch": 0.5706709707170855, + "grad_norm": 1.2704796440823871, + "learning_rate": 1.7124780202339317e-06, + "loss": 0.4162046015262604, + "step": 2475 + }, + { + "epoch": 0.5709015448466682, + "grad_norm": 1.100254820278883, + "learning_rate": 1.7122104395665695e-06, + "loss": 0.44526439905166626, + "step": 2476 + }, + { + "epoch": 0.5711321189762508, + "grad_norm": 1.3237501807128542, + "learning_rate": 1.7119427553722016e-06, + "loss": 0.5069452524185181, + "step": 2477 + }, + { + "epoch": 0.5713626931058335, + "grad_norm": 1.2833720010816703, + "learning_rate": 1.7116749676897393e-06, + "loss": 0.46709829568862915, + "step": 2478 + }, + { + "epoch": 0.5715932672354161, + "grad_norm": 1.2011083992406753, + "learning_rate": 1.7114070765581078e-06, + "loss": 0.5443992614746094, + "step": 2479 + }, + { + "epoch": 0.5718238413649989, + "grad_norm": 1.5805836267397864, + "learning_rate": 1.7111390820162477e-06, + "loss": 0.4307284653186798, + "step": 2480 + }, + { + "epoch": 0.5720544154945815, + "grad_norm": 1.272693158326629, + "learning_rate": 1.7108709841031148e-06, + "loss": 0.4753509759902954, + "step": 2481 + }, + { + "epoch": 0.5722849896241642, + "grad_norm": 1.3966851487133662, + "learning_rate": 1.7106027828576798e-06, + "loss": 0.5689436197280884, + "step": 2482 + }, + { + "epoch": 0.5725155637537468, + "grad_norm": 1.3535603859222731, + "learning_rate": 1.710334478318929e-06, + "loss": 0.47182410955429077, + "step": 2483 + }, + { + "epoch": 0.5727461378833295, + "grad_norm": 1.4415402220476166, + "learning_rate": 1.7100660705258623e-06, + "loss": 0.4418888986110687, + "step": 2484 + }, + { + "epoch": 0.5729767120129121, + "grad_norm": 1.0842485548099412, + "learning_rate": 1.709797559517496e-06, + "loss": 0.4315544366836548, + "step": 2485 + }, + { + "epoch": 0.5732072861424948, + "grad_norm": 1.136143164844157, + "learning_rate": 1.709528945332861e-06, + "loss": 0.34541741013526917, + "step": 2486 + }, + { + "epoch": 0.5734378602720774, + "grad_norm": 1.444798755487831, + "learning_rate": 1.709260228011003e-06, + "loss": 0.5380317568778992, + "step": 2487 + }, + { + "epoch": 0.5736684344016602, + "grad_norm": 1.1490218932398577, + "learning_rate": 1.7089914075909824e-06, + "loss": 0.5017478466033936, + "step": 2488 + }, + { + "epoch": 0.5738990085312428, + "grad_norm": 1.317791376396268, + "learning_rate": 1.7087224841118756e-06, + "loss": 0.5608090162277222, + "step": 2489 + }, + { + "epoch": 0.5741295826608255, + "grad_norm": 1.3491498137629283, + "learning_rate": 1.708453457612773e-06, + "loss": 0.5360782146453857, + "step": 2490 + }, + { + "epoch": 0.5743601567904081, + "grad_norm": 1.3100243824681166, + "learning_rate": 1.7081843281327802e-06, + "loss": 0.5638090372085571, + "step": 2491 + }, + { + "epoch": 0.5745907309199908, + "grad_norm": 1.2532603581217905, + "learning_rate": 1.707915095711018e-06, + "loss": 0.45777082443237305, + "step": 2492 + }, + { + "epoch": 0.5748213050495734, + "grad_norm": 1.2028357712850113, + "learning_rate": 1.7076457603866224e-06, + "loss": 0.5423707962036133, + "step": 2493 + }, + { + "epoch": 0.5750518791791561, + "grad_norm": 1.3752974790416335, + "learning_rate": 1.7073763221987436e-06, + "loss": 0.4286508560180664, + "step": 2494 + }, + { + "epoch": 0.5752824533087387, + "grad_norm": 1.1304014566480758, + "learning_rate": 1.7071067811865474e-06, + "loss": 0.4197548031806946, + "step": 2495 + }, + { + "epoch": 0.5755130274383214, + "grad_norm": 1.1820720623961845, + "learning_rate": 1.7068371373892142e-06, + "loss": 0.47944843769073486, + "step": 2496 + }, + { + "epoch": 0.575743601567904, + "grad_norm": 1.5454364363464301, + "learning_rate": 1.7065673908459396e-06, + "loss": 0.49708908796310425, + "step": 2497 + }, + { + "epoch": 0.5759741756974868, + "grad_norm": 1.2002677488287707, + "learning_rate": 1.706297541595934e-06, + "loss": 0.46402662992477417, + "step": 2498 + }, + { + "epoch": 0.5762047498270694, + "grad_norm": 1.2375577528106843, + "learning_rate": 1.7060275896784222e-06, + "loss": 0.4665846824645996, + "step": 2499 + }, + { + "epoch": 0.5764353239566521, + "grad_norm": 1.333335025499966, + "learning_rate": 1.7057575351326452e-06, + "loss": 0.511766791343689, + "step": 2500 + }, + { + "epoch": 0.5766658980862347, + "grad_norm": 1.3129729051878996, + "learning_rate": 1.7054873779978578e-06, + "loss": 0.5731323957443237, + "step": 2501 + }, + { + "epoch": 0.5768964722158174, + "grad_norm": 1.208575824869893, + "learning_rate": 1.70521711831333e-06, + "loss": 0.43246185779571533, + "step": 2502 + }, + { + "epoch": 0.5771270463454, + "grad_norm": 1.3743994267646191, + "learning_rate": 1.704946756118347e-06, + "loss": 0.5062395334243774, + "step": 2503 + }, + { + "epoch": 0.5773576204749827, + "grad_norm": 1.2169597850499592, + "learning_rate": 1.7046762914522087e-06, + "loss": 0.5010061264038086, + "step": 2504 + }, + { + "epoch": 0.5775881946045653, + "grad_norm": 1.1915100175955862, + "learning_rate": 1.7044057243542293e-06, + "loss": 0.5118759870529175, + "step": 2505 + }, + { + "epoch": 0.5778187687341481, + "grad_norm": 1.2406153903833703, + "learning_rate": 1.7041350548637392e-06, + "loss": 0.5796714425086975, + "step": 2506 + }, + { + "epoch": 0.5780493428637307, + "grad_norm": 1.198072830487735, + "learning_rate": 1.7038642830200828e-06, + "loss": 0.43587976694107056, + "step": 2507 + }, + { + "epoch": 0.5782799169933134, + "grad_norm": 1.0836383921827997, + "learning_rate": 1.7035934088626193e-06, + "loss": 0.4780135154724121, + "step": 2508 + }, + { + "epoch": 0.578510491122896, + "grad_norm": 1.2949967246283594, + "learning_rate": 1.7033224324307232e-06, + "loss": 0.48039600253105164, + "step": 2509 + }, + { + "epoch": 0.5787410652524787, + "grad_norm": 1.4288262034065056, + "learning_rate": 1.7030513537637835e-06, + "loss": 0.48075419664382935, + "step": 2510 + }, + { + "epoch": 0.5789716393820613, + "grad_norm": 1.294455603546607, + "learning_rate": 1.7027801729012044e-06, + "loss": 0.5006246566772461, + "step": 2511 + }, + { + "epoch": 0.579202213511644, + "grad_norm": 1.3239915881424993, + "learning_rate": 1.7025088898824046e-06, + "loss": 0.550139307975769, + "step": 2512 + }, + { + "epoch": 0.5794327876412266, + "grad_norm": 1.273345251271078, + "learning_rate": 1.7022375047468178e-06, + "loss": 0.5228495001792908, + "step": 2513 + }, + { + "epoch": 0.5796633617708093, + "grad_norm": 1.223108155250479, + "learning_rate": 1.701966017533893e-06, + "loss": 0.4783739149570465, + "step": 2514 + }, + { + "epoch": 0.579893935900392, + "grad_norm": 1.3364695116135945, + "learning_rate": 1.701694428283093e-06, + "loss": 0.47218769788742065, + "step": 2515 + }, + { + "epoch": 0.5801245100299747, + "grad_norm": 1.271458214482931, + "learning_rate": 1.7014227370338967e-06, + "loss": 0.5340671539306641, + "step": 2516 + }, + { + "epoch": 0.5803550841595573, + "grad_norm": 1.1389068048001012, + "learning_rate": 1.7011509438257967e-06, + "loss": 0.4629259407520294, + "step": 2517 + }, + { + "epoch": 0.58058565828914, + "grad_norm": 1.6036419177897663, + "learning_rate": 1.7008790486983013e-06, + "loss": 0.6334242820739746, + "step": 2518 + }, + { + "epoch": 0.5808162324187226, + "grad_norm": 1.3328081079482175, + "learning_rate": 1.7006070516909327e-06, + "loss": 0.544147789478302, + "step": 2519 + }, + { + "epoch": 0.5810468065483053, + "grad_norm": 1.2269860514972317, + "learning_rate": 1.700334952843229e-06, + "loss": 0.47045618295669556, + "step": 2520 + }, + { + "epoch": 0.5812773806778879, + "grad_norm": 1.4613594501045561, + "learning_rate": 1.700062752194742e-06, + "loss": 0.4582393169403076, + "step": 2521 + }, + { + "epoch": 0.5815079548074706, + "grad_norm": 1.335231293513905, + "learning_rate": 1.699790449785039e-06, + "loss": 0.507327139377594, + "step": 2522 + }, + { + "epoch": 0.5817385289370532, + "grad_norm": 1.3812182502399277, + "learning_rate": 1.6995180456537022e-06, + "loss": 0.5345891714096069, + "step": 2523 + }, + { + "epoch": 0.581969103066636, + "grad_norm": 1.3766088909590293, + "learning_rate": 1.6992455398403277e-06, + "loss": 0.4847550094127655, + "step": 2524 + }, + { + "epoch": 0.5821996771962186, + "grad_norm": 1.2694420906725428, + "learning_rate": 1.6989729323845276e-06, + "loss": 0.4472479820251465, + "step": 2525 + }, + { + "epoch": 0.5824302513258013, + "grad_norm": 1.1676894033843348, + "learning_rate": 1.698700223325928e-06, + "loss": 0.4426107108592987, + "step": 2526 + }, + { + "epoch": 0.5826608254553839, + "grad_norm": 1.3669509353012406, + "learning_rate": 1.6984274127041696e-06, + "loss": 0.4814276099205017, + "step": 2527 + }, + { + "epoch": 0.5828913995849666, + "grad_norm": 1.3849093780882, + "learning_rate": 1.6981545005589084e-06, + "loss": 0.5286451578140259, + "step": 2528 + }, + { + "epoch": 0.5831219737145492, + "grad_norm": 1.3586645163698117, + "learning_rate": 1.6978814869298152e-06, + "loss": 0.5291767120361328, + "step": 2529 + }, + { + "epoch": 0.5833525478441319, + "grad_norm": 1.4376369092272532, + "learning_rate": 1.6976083718565748e-06, + "loss": 0.5807399749755859, + "step": 2530 + }, + { + "epoch": 0.5835831219737145, + "grad_norm": 1.5620885730430554, + "learning_rate": 1.6973351553788878e-06, + "loss": 0.5489222407341003, + "step": 2531 + }, + { + "epoch": 0.5838136961032973, + "grad_norm": 1.5080367455114985, + "learning_rate": 1.6970618375364683e-06, + "loss": 0.5295521020889282, + "step": 2532 + }, + { + "epoch": 0.5840442702328799, + "grad_norm": 1.281498688581256, + "learning_rate": 1.6967884183690467e-06, + "loss": 0.4979495406150818, + "step": 2533 + }, + { + "epoch": 0.5842748443624626, + "grad_norm": 1.0681769287073983, + "learning_rate": 1.6965148979163661e-06, + "loss": 0.45667344331741333, + "step": 2534 + }, + { + "epoch": 0.5845054184920452, + "grad_norm": 1.1552847245372566, + "learning_rate": 1.6962412762181866e-06, + "loss": 0.42687737941741943, + "step": 2535 + }, + { + "epoch": 0.5847359926216279, + "grad_norm": 1.2720388462434997, + "learning_rate": 1.6959675533142815e-06, + "loss": 0.5616278648376465, + "step": 2536 + }, + { + "epoch": 0.5849665667512105, + "grad_norm": 1.245024966542371, + "learning_rate": 1.6956937292444386e-06, + "loss": 0.4961121678352356, + "step": 2537 + }, + { + "epoch": 0.5851971408807932, + "grad_norm": 1.1864554840937962, + "learning_rate": 1.6954198040484617e-06, + "loss": 0.5115770101547241, + "step": 2538 + }, + { + "epoch": 0.5854277150103758, + "grad_norm": 1.41778667190123, + "learning_rate": 1.6951457777661686e-06, + "loss": 0.540202260017395, + "step": 2539 + }, + { + "epoch": 0.5856582891399585, + "grad_norm": 1.3238570605319384, + "learning_rate": 1.6948716504373914e-06, + "loss": 0.5312114357948303, + "step": 2540 + }, + { + "epoch": 0.5858888632695411, + "grad_norm": 1.1842147435507233, + "learning_rate": 1.694597422101978e-06, + "loss": 0.49323517084121704, + "step": 2541 + }, + { + "epoch": 0.5861194373991239, + "grad_norm": 1.3138451660312804, + "learning_rate": 1.6943230927997894e-06, + "loss": 0.42929738759994507, + "step": 2542 + }, + { + "epoch": 0.5863500115287065, + "grad_norm": 1.2474057622168624, + "learning_rate": 1.6940486625707021e-06, + "loss": 0.45236462354660034, + "step": 2543 + }, + { + "epoch": 0.5865805856582892, + "grad_norm": 1.1944700996273265, + "learning_rate": 1.6937741314546084e-06, + "loss": 0.5129071474075317, + "step": 2544 + }, + { + "epoch": 0.5868111597878718, + "grad_norm": 1.303867373152147, + "learning_rate": 1.693499499491413e-06, + "loss": 0.5562577247619629, + "step": 2545 + }, + { + "epoch": 0.5870417339174545, + "grad_norm": 1.472236761409707, + "learning_rate": 1.6932247667210372e-06, + "loss": 0.5593177080154419, + "step": 2546 + }, + { + "epoch": 0.5872723080470371, + "grad_norm": 1.666463518969871, + "learning_rate": 1.692949933183416e-06, + "loss": 0.5536680221557617, + "step": 2547 + }, + { + "epoch": 0.5875028821766198, + "grad_norm": 1.552275933236934, + "learning_rate": 1.6926749989184993e-06, + "loss": 0.5523338317871094, + "step": 2548 + }, + { + "epoch": 0.5877334563062024, + "grad_norm": 1.3066438958077835, + "learning_rate": 1.692399963966251e-06, + "loss": 0.41815924644470215, + "step": 2549 + }, + { + "epoch": 0.5879640304357852, + "grad_norm": 1.1800035534558937, + "learning_rate": 1.6921248283666508e-06, + "loss": 0.46959248185157776, + "step": 2550 + }, + { + "epoch": 0.5881946045653678, + "grad_norm": 1.2343992191174948, + "learning_rate": 1.6918495921596928e-06, + "loss": 0.4748489260673523, + "step": 2551 + }, + { + "epoch": 0.5884251786949505, + "grad_norm": 1.853505775613954, + "learning_rate": 1.6915742553853845e-06, + "loss": 0.4541524052619934, + "step": 2552 + }, + { + "epoch": 0.5886557528245331, + "grad_norm": 1.2688298570187295, + "learning_rate": 1.691298818083749e-06, + "loss": 0.47106000781059265, + "step": 2553 + }, + { + "epoch": 0.5888863269541158, + "grad_norm": 1.6112122400264717, + "learning_rate": 1.6910232802948246e-06, + "loss": 0.5364842414855957, + "step": 2554 + }, + { + "epoch": 0.5891169010836984, + "grad_norm": 1.402469759006704, + "learning_rate": 1.690747642058663e-06, + "loss": 0.48388350009918213, + "step": 2555 + }, + { + "epoch": 0.5893474752132811, + "grad_norm": 1.1992143425994695, + "learning_rate": 1.690471903415331e-06, + "loss": 0.5075609683990479, + "step": 2556 + }, + { + "epoch": 0.5895780493428637, + "grad_norm": 1.2039147901396619, + "learning_rate": 1.6901960644049102e-06, + "loss": 0.45098066329956055, + "step": 2557 + }, + { + "epoch": 0.5898086234724463, + "grad_norm": 1.1869247135212617, + "learning_rate": 1.6899201250674966e-06, + "loss": 0.5329077243804932, + "step": 2558 + }, + { + "epoch": 0.590039197602029, + "grad_norm": 1.2771607201573625, + "learning_rate": 1.6896440854432005e-06, + "loss": 0.4632904529571533, + "step": 2559 + }, + { + "epoch": 0.5902697717316117, + "grad_norm": 1.3016593794447966, + "learning_rate": 1.6893679455721474e-06, + "loss": 0.5302451848983765, + "step": 2560 + }, + { + "epoch": 0.5905003458611944, + "grad_norm": 1.1349040723062418, + "learning_rate": 1.6890917054944768e-06, + "loss": 0.45363447070121765, + "step": 2561 + }, + { + "epoch": 0.590730919990777, + "grad_norm": 1.3869965053274627, + "learning_rate": 1.688815365250343e-06, + "loss": 0.5103914737701416, + "step": 2562 + }, + { + "epoch": 0.5909614941203597, + "grad_norm": 1.2859854063949494, + "learning_rate": 1.6885389248799152e-06, + "loss": 0.45474469661712646, + "step": 2563 + }, + { + "epoch": 0.5911920682499423, + "grad_norm": 1.3905925832105772, + "learning_rate": 1.6882623844233766e-06, + "loss": 0.517952024936676, + "step": 2564 + }, + { + "epoch": 0.591422642379525, + "grad_norm": 1.456181517852448, + "learning_rate": 1.6879857439209245e-06, + "loss": 0.4872232973575592, + "step": 2565 + }, + { + "epoch": 0.5916532165091076, + "grad_norm": 1.146992588808451, + "learning_rate": 1.6877090034127726e-06, + "loss": 0.4938408136367798, + "step": 2566 + }, + { + "epoch": 0.5918837906386903, + "grad_norm": 0.9819996395503116, + "learning_rate": 1.6874321629391469e-06, + "loss": 0.42687565088272095, + "step": 2567 + }, + { + "epoch": 0.592114364768273, + "grad_norm": 1.8882181325825955, + "learning_rate": 1.6871552225402896e-06, + "loss": 0.5272493362426758, + "step": 2568 + }, + { + "epoch": 0.5923449388978557, + "grad_norm": 1.265485903227574, + "learning_rate": 1.6868781822564565e-06, + "loss": 0.4643193185329437, + "step": 2569 + }, + { + "epoch": 0.5925755130274383, + "grad_norm": 1.5054555077342378, + "learning_rate": 1.6866010421279183e-06, + "loss": 0.4957782030105591, + "step": 2570 + }, + { + "epoch": 0.592806087157021, + "grad_norm": 1.2319191303045371, + "learning_rate": 1.6863238021949605e-06, + "loss": 0.442360520362854, + "step": 2571 + }, + { + "epoch": 0.5930366612866036, + "grad_norm": 1.365610357460579, + "learning_rate": 1.6860464624978824e-06, + "loss": 0.5108935832977295, + "step": 2572 + }, + { + "epoch": 0.5932672354161863, + "grad_norm": 1.1047616502548026, + "learning_rate": 1.6857690230769976e-06, + "loss": 0.46559715270996094, + "step": 2573 + }, + { + "epoch": 0.5934978095457689, + "grad_norm": 1.2296310276846145, + "learning_rate": 1.6854914839726356e-06, + "loss": 0.44752076268196106, + "step": 2574 + }, + { + "epoch": 0.5937283836753516, + "grad_norm": 1.6735698653712807, + "learning_rate": 1.6852138452251387e-06, + "loss": 0.4018149971961975, + "step": 2575 + }, + { + "epoch": 0.5939589578049342, + "grad_norm": 1.407358523561205, + "learning_rate": 1.6849361068748652e-06, + "loss": 0.47711417078971863, + "step": 2576 + }, + { + "epoch": 0.594189531934517, + "grad_norm": 1.3386417354625197, + "learning_rate": 1.684658268962187e-06, + "loss": 0.4671875834465027, + "step": 2577 + }, + { + "epoch": 0.5944201060640996, + "grad_norm": 1.2780841808458634, + "learning_rate": 1.6843803315274906e-06, + "loss": 0.48041921854019165, + "step": 2578 + }, + { + "epoch": 0.5946506801936823, + "grad_norm": 1.105183308056311, + "learning_rate": 1.6841022946111772e-06, + "loss": 0.3444385528564453, + "step": 2579 + }, + { + "epoch": 0.5948812543232649, + "grad_norm": 1.3054472047651338, + "learning_rate": 1.6838241582536619e-06, + "loss": 0.46800029277801514, + "step": 2580 + }, + { + "epoch": 0.5951118284528476, + "grad_norm": 1.7022638621771704, + "learning_rate": 1.683545922495375e-06, + "loss": 0.4362339377403259, + "step": 2581 + }, + { + "epoch": 0.5953424025824302, + "grad_norm": 1.5138702229312708, + "learning_rate": 1.6832675873767606e-06, + "loss": 0.4818536043167114, + "step": 2582 + }, + { + "epoch": 0.5955729767120129, + "grad_norm": 1.1464685816902647, + "learning_rate": 1.6829891529382775e-06, + "loss": 0.47899681329727173, + "step": 2583 + }, + { + "epoch": 0.5958035508415955, + "grad_norm": 1.028545290493661, + "learning_rate": 1.6827106192203995e-06, + "loss": 0.4239576458930969, + "step": 2584 + }, + { + "epoch": 0.5960341249711782, + "grad_norm": 1.299757224081726, + "learning_rate": 1.6824319862636136e-06, + "loss": 0.545168399810791, + "step": 2585 + }, + { + "epoch": 0.5962646991007609, + "grad_norm": 1.1433294908143323, + "learning_rate": 1.6821532541084228e-06, + "loss": 0.4238642156124115, + "step": 2586 + }, + { + "epoch": 0.5964952732303436, + "grad_norm": 1.1214453575304018, + "learning_rate": 1.6818744227953422e-06, + "loss": 0.39589810371398926, + "step": 2587 + }, + { + "epoch": 0.5967258473599262, + "grad_norm": 1.1696584305728281, + "learning_rate": 1.6815954923649044e-06, + "loss": 0.4358367919921875, + "step": 2588 + }, + { + "epoch": 0.5969564214895089, + "grad_norm": 1.232714944175718, + "learning_rate": 1.6813164628576538e-06, + "loss": 0.5012080073356628, + "step": 2589 + }, + { + "epoch": 0.5971869956190915, + "grad_norm": 1.0762630624781258, + "learning_rate": 1.6810373343141503e-06, + "loss": 0.4637286365032196, + "step": 2590 + }, + { + "epoch": 0.5974175697486742, + "grad_norm": 1.4947457348694884, + "learning_rate": 1.6807581067749684e-06, + "loss": 0.6130828261375427, + "step": 2591 + }, + { + "epoch": 0.5976481438782568, + "grad_norm": 1.538167494741888, + "learning_rate": 1.680478780280696e-06, + "loss": 0.5430021286010742, + "step": 2592 + }, + { + "epoch": 0.5978787180078395, + "grad_norm": 1.4318445545867842, + "learning_rate": 1.6801993548719368e-06, + "loss": 0.5195741653442383, + "step": 2593 + }, + { + "epoch": 0.5981092921374221, + "grad_norm": 1.4741188457279395, + "learning_rate": 1.6799198305893077e-06, + "loss": 0.5452337265014648, + "step": 2594 + }, + { + "epoch": 0.5983398662670049, + "grad_norm": 1.1858829095847359, + "learning_rate": 1.6796402074734402e-06, + "loss": 0.4802110493183136, + "step": 2595 + }, + { + "epoch": 0.5985704403965875, + "grad_norm": 1.114234548006963, + "learning_rate": 1.679360485564981e-06, + "loss": 0.48554790019989014, + "step": 2596 + }, + { + "epoch": 0.5988010145261702, + "grad_norm": 1.3519600489481014, + "learning_rate": 1.6790806649045896e-06, + "loss": 0.5151324272155762, + "step": 2597 + }, + { + "epoch": 0.5990315886557528, + "grad_norm": 1.4134149785589025, + "learning_rate": 1.6788007455329419e-06, + "loss": 0.5122699737548828, + "step": 2598 + }, + { + "epoch": 0.5992621627853355, + "grad_norm": 1.0762809832802989, + "learning_rate": 1.6785207274907258e-06, + "loss": 0.47776496410369873, + "step": 2599 + }, + { + "epoch": 0.5994927369149181, + "grad_norm": 1.3625217888513212, + "learning_rate": 1.6782406108186455e-06, + "loss": 0.5653492212295532, + "step": 2600 + }, + { + "epoch": 0.5997233110445008, + "grad_norm": 1.2197147141619178, + "learning_rate": 1.677960395557419e-06, + "loss": 0.44313424825668335, + "step": 2601 + }, + { + "epoch": 0.5999538851740834, + "grad_norm": 1.137470066753919, + "learning_rate": 1.677680081747778e-06, + "loss": 0.40465259552001953, + "step": 2602 + }, + { + "epoch": 0.6001844593036662, + "grad_norm": 1.4481779333184874, + "learning_rate": 1.6773996694304687e-06, + "loss": 0.5488068461418152, + "step": 2603 + }, + { + "epoch": 0.6004150334332488, + "grad_norm": 1.2545703783665254, + "learning_rate": 1.6771191586462523e-06, + "loss": 0.5122859477996826, + "step": 2604 + }, + { + "epoch": 0.6006456075628315, + "grad_norm": 1.2685821503383574, + "learning_rate": 1.6768385494359039e-06, + "loss": 0.47173869609832764, + "step": 2605 + }, + { + "epoch": 0.6008761816924141, + "grad_norm": 1.342808103655164, + "learning_rate": 1.6765578418402129e-06, + "loss": 0.527764081954956, + "step": 2606 + }, + { + "epoch": 0.6011067558219968, + "grad_norm": 1.7106657610470863, + "learning_rate": 1.6762770358999826e-06, + "loss": 0.5399610996246338, + "step": 2607 + }, + { + "epoch": 0.6013373299515794, + "grad_norm": 1.1677908773060481, + "learning_rate": 1.6759961316560314e-06, + "loss": 0.3441581428050995, + "step": 2608 + }, + { + "epoch": 0.6015679040811621, + "grad_norm": 1.2546350672529525, + "learning_rate": 1.6757151291491916e-06, + "loss": 0.5027580857276917, + "step": 2609 + }, + { + "epoch": 0.6017984782107447, + "grad_norm": 1.6099655975362483, + "learning_rate": 1.6754340284203095e-06, + "loss": 0.3898310363292694, + "step": 2610 + }, + { + "epoch": 0.6020290523403274, + "grad_norm": 1.5075448921993653, + "learning_rate": 1.675152829510246e-06, + "loss": 0.5577199459075928, + "step": 2611 + }, + { + "epoch": 0.60225962646991, + "grad_norm": 1.178797634573082, + "learning_rate": 1.6748715324598763e-06, + "loss": 0.47849035263061523, + "step": 2612 + }, + { + "epoch": 0.6024902005994928, + "grad_norm": 1.2674537093214957, + "learning_rate": 1.6745901373100896e-06, + "loss": 0.46845290064811707, + "step": 2613 + }, + { + "epoch": 0.6027207747290754, + "grad_norm": 1.4078882858329094, + "learning_rate": 1.6743086441017899e-06, + "loss": 0.46008870005607605, + "step": 2614 + }, + { + "epoch": 0.6029513488586581, + "grad_norm": 1.3347721564783812, + "learning_rate": 1.6740270528758948e-06, + "loss": 0.44386154413223267, + "step": 2615 + }, + { + "epoch": 0.6031819229882407, + "grad_norm": 1.2103476019651458, + "learning_rate": 1.6737453636733364e-06, + "loss": 0.495368629693985, + "step": 2616 + }, + { + "epoch": 0.6034124971178234, + "grad_norm": 1.257056760083973, + "learning_rate": 1.6734635765350613e-06, + "loss": 0.519428551197052, + "step": 2617 + }, + { + "epoch": 0.603643071247406, + "grad_norm": 1.5181965589957365, + "learning_rate": 1.6731816915020302e-06, + "loss": 0.49346470832824707, + "step": 2618 + }, + { + "epoch": 0.6038736453769887, + "grad_norm": 1.3323089431428572, + "learning_rate": 1.6728997086152173e-06, + "loss": 0.554854691028595, + "step": 2619 + }, + { + "epoch": 0.6041042195065713, + "grad_norm": 1.503361315997137, + "learning_rate": 1.6726176279156125e-06, + "loss": 0.4930881857872009, + "step": 2620 + }, + { + "epoch": 0.604334793636154, + "grad_norm": 1.1576996092953873, + "learning_rate": 1.6723354494442186e-06, + "loss": 0.4082447588443756, + "step": 2621 + }, + { + "epoch": 0.6045653677657367, + "grad_norm": 1.2572245396068074, + "learning_rate": 1.6720531732420531e-06, + "loss": 0.5151821374893188, + "step": 2622 + }, + { + "epoch": 0.6047959418953194, + "grad_norm": 1.6316483356509275, + "learning_rate": 1.671770799350148e-06, + "loss": 0.44579264521598816, + "step": 2623 + }, + { + "epoch": 0.605026516024902, + "grad_norm": 1.5349454914737826, + "learning_rate": 1.6714883278095489e-06, + "loss": 0.4937717020511627, + "step": 2624 + }, + { + "epoch": 0.6052570901544847, + "grad_norm": 1.4939841287703146, + "learning_rate": 1.671205758661316e-06, + "loss": 0.46298685669898987, + "step": 2625 + }, + { + "epoch": 0.6054876642840673, + "grad_norm": 1.3089529059854432, + "learning_rate": 1.6709230919465233e-06, + "loss": 0.5535221695899963, + "step": 2626 + }, + { + "epoch": 0.60571823841365, + "grad_norm": 1.2781536932155106, + "learning_rate": 1.6706403277062599e-06, + "loss": 0.5289112329483032, + "step": 2627 + }, + { + "epoch": 0.6059488125432326, + "grad_norm": 1.2619858231183905, + "learning_rate": 1.6703574659816285e-06, + "loss": 0.506280779838562, + "step": 2628 + }, + { + "epoch": 0.6061793866728153, + "grad_norm": 1.366142383501645, + "learning_rate": 1.6700745068137451e-06, + "loss": 0.504257082939148, + "step": 2629 + }, + { + "epoch": 0.606409960802398, + "grad_norm": 1.2835196483556859, + "learning_rate": 1.6697914502437411e-06, + "loss": 0.624682605266571, + "step": 2630 + }, + { + "epoch": 0.6066405349319807, + "grad_norm": 1.1715096985967743, + "learning_rate": 1.6695082963127617e-06, + "loss": 0.4539645314216614, + "step": 2631 + }, + { + "epoch": 0.6068711090615633, + "grad_norm": 1.2852717924915888, + "learning_rate": 1.6692250450619665e-06, + "loss": 0.5461890697479248, + "step": 2632 + }, + { + "epoch": 0.607101683191146, + "grad_norm": 1.2251930368732282, + "learning_rate": 1.6689416965325282e-06, + "loss": 0.615606427192688, + "step": 2633 + }, + { + "epoch": 0.6073322573207286, + "grad_norm": 1.3904526684847855, + "learning_rate": 1.668658250765635e-06, + "loss": 0.5355387926101685, + "step": 2634 + }, + { + "epoch": 0.6075628314503113, + "grad_norm": 1.1464900003631002, + "learning_rate": 1.6683747078024886e-06, + "loss": 0.5804985165596008, + "step": 2635 + }, + { + "epoch": 0.6077934055798939, + "grad_norm": 1.1983123193544134, + "learning_rate": 1.6680910676843042e-06, + "loss": 0.4514031410217285, + "step": 2636 + }, + { + "epoch": 0.6080239797094766, + "grad_norm": 1.3446092692413514, + "learning_rate": 1.6678073304523123e-06, + "loss": 0.5621001720428467, + "step": 2637 + }, + { + "epoch": 0.6082545538390592, + "grad_norm": 1.3749875179413227, + "learning_rate": 1.667523496147757e-06, + "loss": 0.49387669563293457, + "step": 2638 + }, + { + "epoch": 0.608485127968642, + "grad_norm": 1.0479438264918854, + "learning_rate": 1.6672395648118966e-06, + "loss": 0.5857938528060913, + "step": 2639 + }, + { + "epoch": 0.6087157020982246, + "grad_norm": 1.149056345239141, + "learning_rate": 1.6669555364860029e-06, + "loss": 0.46403199434280396, + "step": 2640 + }, + { + "epoch": 0.6089462762278073, + "grad_norm": 1.2068025098167319, + "learning_rate": 1.6666714112113627e-06, + "loss": 0.4998488128185272, + "step": 2641 + }, + { + "epoch": 0.6091768503573899, + "grad_norm": 1.3686546841392573, + "learning_rate": 1.6663871890292765e-06, + "loss": 0.6291745901107788, + "step": 2642 + }, + { + "epoch": 0.6094074244869726, + "grad_norm": 1.7034971765108011, + "learning_rate": 1.6661028699810587e-06, + "loss": 0.6326058506965637, + "step": 2643 + }, + { + "epoch": 0.6096379986165552, + "grad_norm": 1.2748339439376004, + "learning_rate": 1.6658184541080378e-06, + "loss": 0.5737805366516113, + "step": 2644 + }, + { + "epoch": 0.6098685727461379, + "grad_norm": 1.435593858390691, + "learning_rate": 1.6655339414515568e-06, + "loss": 0.565047025680542, + "step": 2645 + }, + { + "epoch": 0.6100991468757205, + "grad_norm": 1.154269897254632, + "learning_rate": 1.6652493320529724e-06, + "loss": 0.5157296061515808, + "step": 2646 + }, + { + "epoch": 0.6103297210053032, + "grad_norm": 1.2671967095996914, + "learning_rate": 1.6649646259536554e-06, + "loss": 0.4475112855434418, + "step": 2647 + }, + { + "epoch": 0.6105602951348859, + "grad_norm": 1.4397592539357233, + "learning_rate": 1.6646798231949911e-06, + "loss": 0.5072107315063477, + "step": 2648 + }, + { + "epoch": 0.6107908692644686, + "grad_norm": 1.3901386223871963, + "learning_rate": 1.6643949238183778e-06, + "loss": 0.44673952460289, + "step": 2649 + }, + { + "epoch": 0.6110214433940512, + "grad_norm": 1.4046630639478026, + "learning_rate": 1.6641099278652293e-06, + "loss": 0.47460734844207764, + "step": 2650 + }, + { + "epoch": 0.6112520175236339, + "grad_norm": 1.251836663583678, + "learning_rate": 1.6638248353769718e-06, + "loss": 0.4529770612716675, + "step": 2651 + }, + { + "epoch": 0.6114825916532165, + "grad_norm": 1.4298404685971746, + "learning_rate": 1.6635396463950473e-06, + "loss": 0.5200958251953125, + "step": 2652 + }, + { + "epoch": 0.6117131657827992, + "grad_norm": 1.4871792439140996, + "learning_rate": 1.66325436096091e-06, + "loss": 0.465969979763031, + "step": 2653 + }, + { + "epoch": 0.6119437399123818, + "grad_norm": 1.1085493213804483, + "learning_rate": 1.6629689791160298e-06, + "loss": 0.5173276662826538, + "step": 2654 + }, + { + "epoch": 0.6121743140419645, + "grad_norm": 1.246647464420017, + "learning_rate": 1.6626835009018892e-06, + "loss": 0.5539907217025757, + "step": 2655 + }, + { + "epoch": 0.6124048881715471, + "grad_norm": 1.1686862955670068, + "learning_rate": 1.6623979263599857e-06, + "loss": 0.5617278814315796, + "step": 2656 + }, + { + "epoch": 0.6126354623011299, + "grad_norm": 1.3640942620216159, + "learning_rate": 1.6621122555318304e-06, + "loss": 0.46238285303115845, + "step": 2657 + }, + { + "epoch": 0.6128660364307125, + "grad_norm": 1.4695540598112733, + "learning_rate": 1.6618264884589484e-06, + "loss": 0.49247878789901733, + "step": 2658 + }, + { + "epoch": 0.6130966105602952, + "grad_norm": 1.0811892876151687, + "learning_rate": 1.6615406251828793e-06, + "loss": 0.4844072163105011, + "step": 2659 + }, + { + "epoch": 0.6133271846898778, + "grad_norm": 1.2024921886284354, + "learning_rate": 1.6612546657451754e-06, + "loss": 0.47372323274612427, + "step": 2660 + }, + { + "epoch": 0.6135577588194605, + "grad_norm": 1.299485129998275, + "learning_rate": 1.660968610187404e-06, + "loss": 0.5287426114082336, + "step": 2661 + }, + { + "epoch": 0.6137883329490431, + "grad_norm": 1.4640884136716181, + "learning_rate": 1.6606824585511471e-06, + "loss": 0.5862994194030762, + "step": 2662 + }, + { + "epoch": 0.6140189070786258, + "grad_norm": 1.0158009777389652, + "learning_rate": 1.6603962108779986e-06, + "loss": 0.4866197109222412, + "step": 2663 + }, + { + "epoch": 0.6142494812082084, + "grad_norm": 1.408246184243547, + "learning_rate": 1.660109867209568e-06, + "loss": 0.5561861991882324, + "step": 2664 + }, + { + "epoch": 0.6144800553377912, + "grad_norm": 1.214620364544681, + "learning_rate": 1.659823427587478e-06, + "loss": 0.4878644645214081, + "step": 2665 + }, + { + "epoch": 0.6147106294673738, + "grad_norm": 1.3262957238727335, + "learning_rate": 1.659536892053366e-06, + "loss": 0.5371976494789124, + "step": 2666 + }, + { + "epoch": 0.6149412035969565, + "grad_norm": 1.2817478175527077, + "learning_rate": 1.6592502606488824e-06, + "loss": 0.4816581606864929, + "step": 2667 + }, + { + "epoch": 0.6151717777265391, + "grad_norm": 1.1536826566839264, + "learning_rate": 1.6589635334156919e-06, + "loss": 0.5105183124542236, + "step": 2668 + }, + { + "epoch": 0.6154023518561217, + "grad_norm": 1.4584261311401567, + "learning_rate": 1.6586767103954737e-06, + "loss": 0.5524129271507263, + "step": 2669 + }, + { + "epoch": 0.6156329259857044, + "grad_norm": 1.3107384301518328, + "learning_rate": 1.6583897916299204e-06, + "loss": 0.42373913526535034, + "step": 2670 + }, + { + "epoch": 0.615863500115287, + "grad_norm": 1.3724263799580212, + "learning_rate": 1.658102777160738e-06, + "loss": 0.5620803833007812, + "step": 2671 + }, + { + "epoch": 0.6160940742448697, + "grad_norm": 1.3004346965884186, + "learning_rate": 1.6578156670296472e-06, + "loss": 0.38180166482925415, + "step": 2672 + }, + { + "epoch": 0.6163246483744523, + "grad_norm": 1.2109058692777805, + "learning_rate": 1.6575284612783825e-06, + "loss": 0.48596519231796265, + "step": 2673 + }, + { + "epoch": 0.616555222504035, + "grad_norm": 1.1846928230852602, + "learning_rate": 1.657241159948692e-06, + "loss": 0.5098127126693726, + "step": 2674 + }, + { + "epoch": 0.6167857966336177, + "grad_norm": 1.5943292852368571, + "learning_rate": 1.6569537630823382e-06, + "loss": 0.5650018453598022, + "step": 2675 + }, + { + "epoch": 0.6170163707632004, + "grad_norm": 1.1501551859696775, + "learning_rate": 1.6566662707210967e-06, + "loss": 0.45061948895454407, + "step": 2676 + }, + { + "epoch": 0.617246944892783, + "grad_norm": 1.3028951742766879, + "learning_rate": 1.6563786829067576e-06, + "loss": 0.4292137622833252, + "step": 2677 + }, + { + "epoch": 0.6174775190223657, + "grad_norm": 1.269567036808456, + "learning_rate": 1.656090999681125e-06, + "loss": 0.4837046265602112, + "step": 2678 + }, + { + "epoch": 0.6177080931519483, + "grad_norm": 1.9486185906204885, + "learning_rate": 1.6558032210860162e-06, + "loss": 0.43580353260040283, + "step": 2679 + }, + { + "epoch": 0.617938667281531, + "grad_norm": 1.2529677917985589, + "learning_rate": 1.6555153471632628e-06, + "loss": 0.47321656346321106, + "step": 2680 + }, + { + "epoch": 0.6181692414111136, + "grad_norm": 1.1423229113084605, + "learning_rate": 1.65522737795471e-06, + "loss": 0.47431111335754395, + "step": 2681 + }, + { + "epoch": 0.6183998155406963, + "grad_norm": 0.9698177160310311, + "learning_rate": 1.6549393135022181e-06, + "loss": 0.38062599301338196, + "step": 2682 + }, + { + "epoch": 0.618630389670279, + "grad_norm": 1.2758905094442272, + "learning_rate": 1.6546511538476584e-06, + "loss": 0.5941839218139648, + "step": 2683 + }, + { + "epoch": 0.6188609637998617, + "grad_norm": 1.453087551621585, + "learning_rate": 1.6543628990329195e-06, + "loss": 0.5323158502578735, + "step": 2684 + }, + { + "epoch": 0.6190915379294443, + "grad_norm": 1.100143863509344, + "learning_rate": 1.654074549099901e-06, + "loss": 0.3814772367477417, + "step": 2685 + }, + { + "epoch": 0.619322112059027, + "grad_norm": 1.5499952709692644, + "learning_rate": 1.6537861040905181e-06, + "loss": 0.5520694255828857, + "step": 2686 + }, + { + "epoch": 0.6195526861886096, + "grad_norm": 1.297782443862308, + "learning_rate": 1.653497564046699e-06, + "loss": 0.5514999628067017, + "step": 2687 + }, + { + "epoch": 0.6197832603181923, + "grad_norm": 1.2170603559624027, + "learning_rate": 1.653208929010386e-06, + "loss": 0.39057493209838867, + "step": 2688 + }, + { + "epoch": 0.6200138344477749, + "grad_norm": 1.0224470752428403, + "learning_rate": 1.6529201990235352e-06, + "loss": 0.4941304922103882, + "step": 2689 + }, + { + "epoch": 0.6202444085773576, + "grad_norm": 1.2590211215766611, + "learning_rate": 1.6526313741281164e-06, + "loss": 0.539762020111084, + "step": 2690 + }, + { + "epoch": 0.6204749827069402, + "grad_norm": 1.3801421787603734, + "learning_rate": 1.6523424543661127e-06, + "loss": 0.49524787068367004, + "step": 2691 + }, + { + "epoch": 0.620705556836523, + "grad_norm": 1.2158625492501351, + "learning_rate": 1.6520534397795225e-06, + "loss": 0.4261528253555298, + "step": 2692 + }, + { + "epoch": 0.6209361309661056, + "grad_norm": 1.3188986304771895, + "learning_rate": 1.6517643304103563e-06, + "loss": 0.578548789024353, + "step": 2693 + }, + { + "epoch": 0.6211667050956883, + "grad_norm": 1.24168526725964, + "learning_rate": 1.6514751263006393e-06, + "loss": 0.4766680598258972, + "step": 2694 + }, + { + "epoch": 0.6213972792252709, + "grad_norm": 1.135518406763033, + "learning_rate": 1.6511858274924098e-06, + "loss": 0.4146459996700287, + "step": 2695 + }, + { + "epoch": 0.6216278533548536, + "grad_norm": 1.4632792907408574, + "learning_rate": 1.650896434027721e-06, + "loss": 0.5148390531539917, + "step": 2696 + }, + { + "epoch": 0.6218584274844362, + "grad_norm": 1.1678475162221296, + "learning_rate": 1.6506069459486388e-06, + "loss": 0.4830890893936157, + "step": 2697 + }, + { + "epoch": 0.6220890016140189, + "grad_norm": 1.2027318756470287, + "learning_rate": 1.6503173632972434e-06, + "loss": 0.4550463557243347, + "step": 2698 + }, + { + "epoch": 0.6223195757436015, + "grad_norm": 1.3023820822101895, + "learning_rate": 1.6500276861156284e-06, + "loss": 0.5811448097229004, + "step": 2699 + }, + { + "epoch": 0.6225501498731842, + "grad_norm": 1.3807858518585416, + "learning_rate": 1.6497379144459014e-06, + "loss": 0.44733545184135437, + "step": 2700 + }, + { + "epoch": 0.6227807240027669, + "grad_norm": 1.103384717152327, + "learning_rate": 1.6494480483301835e-06, + "loss": 0.4379687011241913, + "step": 2701 + }, + { + "epoch": 0.6230112981323496, + "grad_norm": 1.326644045971959, + "learning_rate": 1.6491580878106102e-06, + "loss": 0.5163959860801697, + "step": 2702 + }, + { + "epoch": 0.6232418722619322, + "grad_norm": 1.2037310331107272, + "learning_rate": 1.6488680329293297e-06, + "loss": 0.5636980533599854, + "step": 2703 + }, + { + "epoch": 0.6234724463915149, + "grad_norm": 1.1847301227909297, + "learning_rate": 1.6485778837285044e-06, + "loss": 0.46942776441574097, + "step": 2704 + }, + { + "epoch": 0.6237030205210975, + "grad_norm": 1.3867166397057658, + "learning_rate": 1.6482876402503103e-06, + "loss": 0.5104436278343201, + "step": 2705 + }, + { + "epoch": 0.6239335946506802, + "grad_norm": 1.2701601489299654, + "learning_rate": 1.6479973025369379e-06, + "loss": 0.4689507484436035, + "step": 2706 + }, + { + "epoch": 0.6241641687802628, + "grad_norm": 1.2388644364900292, + "learning_rate": 1.64770687063059e-06, + "loss": 0.4009973406791687, + "step": 2707 + }, + { + "epoch": 0.6243947429098455, + "grad_norm": 1.4958191711517836, + "learning_rate": 1.6474163445734846e-06, + "loss": 0.4938286542892456, + "step": 2708 + }, + { + "epoch": 0.6246253170394281, + "grad_norm": 1.2939637643231117, + "learning_rate": 1.6471257244078519e-06, + "loss": 0.4756525754928589, + "step": 2709 + }, + { + "epoch": 0.6248558911690109, + "grad_norm": 1.0308841763344028, + "learning_rate": 1.6468350101759366e-06, + "loss": 0.4322332739830017, + "step": 2710 + }, + { + "epoch": 0.6250864652985935, + "grad_norm": 1.381148895283306, + "learning_rate": 1.6465442019199972e-06, + "loss": 0.4605666995048523, + "step": 2711 + }, + { + "epoch": 0.6253170394281762, + "grad_norm": 1.3288993921232848, + "learning_rate": 1.6462532996823053e-06, + "loss": 0.4576036334037781, + "step": 2712 + }, + { + "epoch": 0.6255476135577588, + "grad_norm": 1.1587792990864858, + "learning_rate": 1.645962303505147e-06, + "loss": 0.4860233664512634, + "step": 2713 + }, + { + "epoch": 0.6257781876873415, + "grad_norm": 1.2195714743605923, + "learning_rate": 1.6456712134308213e-06, + "loss": 0.4717915654182434, + "step": 2714 + }, + { + "epoch": 0.6260087618169241, + "grad_norm": 1.1008237671202603, + "learning_rate": 1.645380029501641e-06, + "loss": 0.49637067317962646, + "step": 2715 + }, + { + "epoch": 0.6262393359465068, + "grad_norm": 1.2218828759453872, + "learning_rate": 1.6450887517599326e-06, + "loss": 0.45388346910476685, + "step": 2716 + }, + { + "epoch": 0.6264699100760894, + "grad_norm": 1.6333623536070287, + "learning_rate": 1.6447973802480362e-06, + "loss": 0.5549031496047974, + "step": 2717 + }, + { + "epoch": 0.6267004842056721, + "grad_norm": 1.333805192555573, + "learning_rate": 1.644505915008306e-06, + "loss": 0.39759719371795654, + "step": 2718 + }, + { + "epoch": 0.6269310583352548, + "grad_norm": 1.2648542744381963, + "learning_rate": 1.644214356083109e-06, + "loss": 0.5126739740371704, + "step": 2719 + }, + { + "epoch": 0.6271616324648375, + "grad_norm": 1.1846129595938097, + "learning_rate": 1.6439227035148265e-06, + "loss": 0.41424083709716797, + "step": 2720 + }, + { + "epoch": 0.6273922065944201, + "grad_norm": 1.2295786085250646, + "learning_rate": 1.643630957345853e-06, + "loss": 0.5829803943634033, + "step": 2721 + }, + { + "epoch": 0.6276227807240028, + "grad_norm": 1.2114307243350246, + "learning_rate": 1.6433391176185972e-06, + "loss": 0.4736567437648773, + "step": 2722 + }, + { + "epoch": 0.6278533548535854, + "grad_norm": 1.4670818430092263, + "learning_rate": 1.6430471843754804e-06, + "loss": 0.41305306553840637, + "step": 2723 + }, + { + "epoch": 0.6280839289831681, + "grad_norm": 1.5480231340195962, + "learning_rate": 1.6427551576589383e-06, + "loss": 0.38422563672065735, + "step": 2724 + }, + { + "epoch": 0.6283145031127507, + "grad_norm": 1.3725795006115715, + "learning_rate": 1.6424630375114199e-06, + "loss": 0.48302626609802246, + "step": 2725 + }, + { + "epoch": 0.6285450772423334, + "grad_norm": 1.2880102228926575, + "learning_rate": 1.6421708239753875e-06, + "loss": 0.4657328128814697, + "step": 2726 + }, + { + "epoch": 0.628775651371916, + "grad_norm": 1.4057295929235551, + "learning_rate": 1.641878517093318e-06, + "loss": 0.46126431226730347, + "step": 2727 + }, + { + "epoch": 0.6290062255014988, + "grad_norm": 1.3246078376538457, + "learning_rate": 1.6415861169077007e-06, + "loss": 0.5196214914321899, + "step": 2728 + }, + { + "epoch": 0.6292367996310814, + "grad_norm": 1.4794856753558834, + "learning_rate": 1.641293623461039e-06, + "loss": 0.5007073879241943, + "step": 2729 + }, + { + "epoch": 0.6294673737606641, + "grad_norm": 1.1543847272279724, + "learning_rate": 1.64100103679585e-06, + "loss": 0.4699769616127014, + "step": 2730 + }, + { + "epoch": 0.6296979478902467, + "grad_norm": 1.3221766888407216, + "learning_rate": 1.6407083569546636e-06, + "loss": 0.5487842559814453, + "step": 2731 + }, + { + "epoch": 0.6299285220198294, + "grad_norm": 1.0556125358940756, + "learning_rate": 1.6404155839800244e-06, + "loss": 0.42733538150787354, + "step": 2732 + }, + { + "epoch": 0.630159096149412, + "grad_norm": 1.1933689155818472, + "learning_rate": 1.64012271791449e-06, + "loss": 0.5105363726615906, + "step": 2733 + }, + { + "epoch": 0.6303896702789947, + "grad_norm": 1.3185367260440977, + "learning_rate": 1.6398297588006305e-06, + "loss": 0.5836968421936035, + "step": 2734 + }, + { + "epoch": 0.6306202444085773, + "grad_norm": 1.3830049962050668, + "learning_rate": 1.639536706681031e-06, + "loss": 0.4350558817386627, + "step": 2735 + }, + { + "epoch": 0.63085081853816, + "grad_norm": 1.4225393539645832, + "learning_rate": 1.63924356159829e-06, + "loss": 0.5388341546058655, + "step": 2736 + }, + { + "epoch": 0.6310813926677427, + "grad_norm": 1.1218759160612528, + "learning_rate": 1.6389503235950186e-06, + "loss": 0.4576529860496521, + "step": 2737 + }, + { + "epoch": 0.6313119667973254, + "grad_norm": 1.524583554785293, + "learning_rate": 1.6386569927138422e-06, + "loss": 0.4525975286960602, + "step": 2738 + }, + { + "epoch": 0.631542540926908, + "grad_norm": 1.56840988374272, + "learning_rate": 1.6383635689973993e-06, + "loss": 0.42143142223358154, + "step": 2739 + }, + { + "epoch": 0.6317731150564907, + "grad_norm": 1.0672209595897675, + "learning_rate": 1.6380700524883423e-06, + "loss": 0.4440336227416992, + "step": 2740 + }, + { + "epoch": 0.6320036891860733, + "grad_norm": 1.2412570194863743, + "learning_rate": 1.637776443229336e-06, + "loss": 0.5009843707084656, + "step": 2741 + }, + { + "epoch": 0.632234263315656, + "grad_norm": 1.6736573631214935, + "learning_rate": 1.6374827412630604e-06, + "loss": 0.538151741027832, + "step": 2742 + }, + { + "epoch": 0.6324648374452386, + "grad_norm": 1.1895254537976463, + "learning_rate": 1.6371889466322077e-06, + "loss": 0.550201416015625, + "step": 2743 + }, + { + "epoch": 0.6326954115748213, + "grad_norm": 1.3861259597044466, + "learning_rate": 1.6368950593794836e-06, + "loss": 0.5707399845123291, + "step": 2744 + }, + { + "epoch": 0.632925985704404, + "grad_norm": 1.393827128295071, + "learning_rate": 1.6366010795476082e-06, + "loss": 0.5196787714958191, + "step": 2745 + }, + { + "epoch": 0.6331565598339867, + "grad_norm": 1.171378891149435, + "learning_rate": 1.636307007179314e-06, + "loss": 0.5243285894393921, + "step": 2746 + }, + { + "epoch": 0.6333871339635693, + "grad_norm": 1.249132441469792, + "learning_rate": 1.6360128423173473e-06, + "loss": 0.4202825427055359, + "step": 2747 + }, + { + "epoch": 0.633617708093152, + "grad_norm": 1.2547380834154716, + "learning_rate": 1.6357185850044681e-06, + "loss": 0.49080896377563477, + "step": 2748 + }, + { + "epoch": 0.6338482822227346, + "grad_norm": 1.2234752623414968, + "learning_rate": 1.6354242352834502e-06, + "loss": 0.5537371635437012, + "step": 2749 + }, + { + "epoch": 0.6340788563523173, + "grad_norm": 1.1077493127634728, + "learning_rate": 1.6351297931970796e-06, + "loss": 0.3744293451309204, + "step": 2750 + }, + { + "epoch": 0.6343094304818999, + "grad_norm": 1.237975564408939, + "learning_rate": 1.634835258788157e-06, + "loss": 0.5176748037338257, + "step": 2751 + }, + { + "epoch": 0.6345400046114826, + "grad_norm": 1.321137847220575, + "learning_rate": 1.6345406320994952e-06, + "loss": 0.5179395079612732, + "step": 2752 + }, + { + "epoch": 0.6347705787410652, + "grad_norm": 1.3158476651008661, + "learning_rate": 1.634245913173922e-06, + "loss": 0.4810818135738373, + "step": 2753 + }, + { + "epoch": 0.635001152870648, + "grad_norm": 1.2760288557710286, + "learning_rate": 1.6339511020542775e-06, + "loss": 0.5188307762145996, + "step": 2754 + }, + { + "epoch": 0.6352317270002306, + "grad_norm": 1.662662743900965, + "learning_rate": 1.6336561987834151e-06, + "loss": 0.41170865297317505, + "step": 2755 + }, + { + "epoch": 0.6354623011298133, + "grad_norm": 1.1982414473393, + "learning_rate": 1.6333612034042025e-06, + "loss": 0.48726415634155273, + "step": 2756 + }, + { + "epoch": 0.6356928752593959, + "grad_norm": 1.1790415390507374, + "learning_rate": 1.63306611595952e-06, + "loss": 0.4483524560928345, + "step": 2757 + }, + { + "epoch": 0.6359234493889786, + "grad_norm": 1.2150870765180466, + "learning_rate": 1.6327709364922618e-06, + "loss": 0.3979623019695282, + "step": 2758 + }, + { + "epoch": 0.6361540235185612, + "grad_norm": 1.2093786796022739, + "learning_rate": 1.6324756650453346e-06, + "loss": 0.461483895778656, + "step": 2759 + }, + { + "epoch": 0.6363845976481439, + "grad_norm": 1.2350751043575534, + "learning_rate": 1.6321803016616598e-06, + "loss": 0.40054333209991455, + "step": 2760 + }, + { + "epoch": 0.6366151717777265, + "grad_norm": 1.1196609017801307, + "learning_rate": 1.6318848463841712e-06, + "loss": 0.534996747970581, + "step": 2761 + }, + { + "epoch": 0.6368457459073092, + "grad_norm": 1.260260551672407, + "learning_rate": 1.631589299255816e-06, + "loss": 0.49408137798309326, + "step": 2762 + }, + { + "epoch": 0.6370763200368919, + "grad_norm": 1.305230846296416, + "learning_rate": 1.6312936603195557e-06, + "loss": 0.49098217487335205, + "step": 2763 + }, + { + "epoch": 0.6373068941664746, + "grad_norm": 1.1344163970655265, + "learning_rate": 1.6309979296183636e-06, + "loss": 0.4990113377571106, + "step": 2764 + }, + { + "epoch": 0.6375374682960572, + "grad_norm": 1.2952446438426217, + "learning_rate": 1.6307021071952276e-06, + "loss": 0.49399930238723755, + "step": 2765 + }, + { + "epoch": 0.6377680424256399, + "grad_norm": 1.320323762194689, + "learning_rate": 1.6304061930931478e-06, + "loss": 0.5029928684234619, + "step": 2766 + }, + { + "epoch": 0.6379986165552225, + "grad_norm": 1.2455728900211775, + "learning_rate": 1.6301101873551396e-06, + "loss": 0.5732289552688599, + "step": 2767 + }, + { + "epoch": 0.6382291906848052, + "grad_norm": 1.2965522975146178, + "learning_rate": 1.6298140900242293e-06, + "loss": 0.47334790229797363, + "step": 2768 + }, + { + "epoch": 0.6384597648143878, + "grad_norm": 1.2464510374223752, + "learning_rate": 1.6295179011434578e-06, + "loss": 0.44271016120910645, + "step": 2769 + }, + { + "epoch": 0.6386903389439705, + "grad_norm": 1.8250225519339747, + "learning_rate": 1.6292216207558798e-06, + "loss": 0.5768353939056396, + "step": 2770 + }, + { + "epoch": 0.6389209130735531, + "grad_norm": 1.074704735340539, + "learning_rate": 1.6289252489045625e-06, + "loss": 0.48315417766571045, + "step": 2771 + }, + { + "epoch": 0.6391514872031359, + "grad_norm": 1.338382007112913, + "learning_rate": 1.6286287856325855e-06, + "loss": 0.5745590925216675, + "step": 2772 + }, + { + "epoch": 0.6393820613327185, + "grad_norm": 1.473033213400145, + "learning_rate": 1.6283322309830444e-06, + "loss": 0.6084291934967041, + "step": 2773 + }, + { + "epoch": 0.6396126354623012, + "grad_norm": 1.083816855400547, + "learning_rate": 1.6280355849990451e-06, + "loss": 0.4995007812976837, + "step": 2774 + }, + { + "epoch": 0.6398432095918838, + "grad_norm": 1.1962451309299882, + "learning_rate": 1.6277388477237084e-06, + "loss": 0.45811381936073303, + "step": 2775 + }, + { + "epoch": 0.6400737837214665, + "grad_norm": 1.448203316971052, + "learning_rate": 1.6274420192001689e-06, + "loss": 0.5666211247444153, + "step": 2776 + }, + { + "epoch": 0.6403043578510491, + "grad_norm": 1.3871415999727634, + "learning_rate": 1.6271450994715723e-06, + "loss": 0.5059396028518677, + "step": 2777 + }, + { + "epoch": 0.6405349319806318, + "grad_norm": 1.4444216130733851, + "learning_rate": 1.6268480885810798e-06, + "loss": 0.5418530702590942, + "step": 2778 + }, + { + "epoch": 0.6407655061102144, + "grad_norm": 1.4034133564890543, + "learning_rate": 1.6265509865718647e-06, + "loss": 0.5047061443328857, + "step": 2779 + }, + { + "epoch": 0.6409960802397972, + "grad_norm": 1.6003350461542336, + "learning_rate": 1.6262537934871138e-06, + "loss": 0.5104432702064514, + "step": 2780 + }, + { + "epoch": 0.6412266543693798, + "grad_norm": 1.3065683677222188, + "learning_rate": 1.625956509370027e-06, + "loss": 0.44423484802246094, + "step": 2781 + }, + { + "epoch": 0.6414572284989624, + "grad_norm": 1.1820302321160245, + "learning_rate": 1.6256591342638179e-06, + "loss": 0.47618383169174194, + "step": 2782 + }, + { + "epoch": 0.6416878026285451, + "grad_norm": 1.3796601981562324, + "learning_rate": 1.625361668211713e-06, + "loss": 0.5423145890235901, + "step": 2783 + }, + { + "epoch": 0.6419183767581277, + "grad_norm": 1.380895745392916, + "learning_rate": 1.6250641112569515e-06, + "loss": 0.517102837562561, + "step": 2784 + }, + { + "epoch": 0.6421489508877104, + "grad_norm": 1.2388489917279923, + "learning_rate": 1.6247664634427864e-06, + "loss": 0.39601820707321167, + "step": 2785 + }, + { + "epoch": 0.642379525017293, + "grad_norm": 1.296572577942614, + "learning_rate": 1.6244687248124843e-06, + "loss": 0.5480250120162964, + "step": 2786 + }, + { + "epoch": 0.6426100991468757, + "grad_norm": 1.1105051491643492, + "learning_rate": 1.624170895409324e-06, + "loss": 0.4743092656135559, + "step": 2787 + }, + { + "epoch": 0.6428406732764583, + "grad_norm": 1.463202362201621, + "learning_rate": 1.6238729752765985e-06, + "loss": 0.4595726728439331, + "step": 2788 + }, + { + "epoch": 0.643071247406041, + "grad_norm": 1.2909676791556273, + "learning_rate": 1.6235749644576132e-06, + "loss": 0.5058779716491699, + "step": 2789 + }, + { + "epoch": 0.6433018215356237, + "grad_norm": 1.3145538108383794, + "learning_rate": 1.623276862995687e-06, + "loss": 0.5075543522834778, + "step": 2790 + }, + { + "epoch": 0.6435323956652064, + "grad_norm": 1.3185436913231439, + "learning_rate": 1.622978670934152e-06, + "loss": 0.5623351335525513, + "step": 2791 + }, + { + "epoch": 0.643762969794789, + "grad_norm": 1.1682118545924238, + "learning_rate": 1.6226803883163536e-06, + "loss": 0.3645760118961334, + "step": 2792 + }, + { + "epoch": 0.6439935439243717, + "grad_norm": 1.4617740663680228, + "learning_rate": 1.6223820151856501e-06, + "loss": 0.5666004419326782, + "step": 2793 + }, + { + "epoch": 0.6442241180539543, + "grad_norm": 1.3342697895697784, + "learning_rate": 1.6220835515854133e-06, + "loss": 0.6571217775344849, + "step": 2794 + }, + { + "epoch": 0.644454692183537, + "grad_norm": 1.4229199895470708, + "learning_rate": 1.6217849975590271e-06, + "loss": 0.5684333443641663, + "step": 2795 + }, + { + "epoch": 0.6446852663131196, + "grad_norm": 1.5289890556459427, + "learning_rate": 1.62148635314989e-06, + "loss": 0.43374937772750854, + "step": 2796 + }, + { + "epoch": 0.6449158404427023, + "grad_norm": 1.1182458179152783, + "learning_rate": 1.6211876184014134e-06, + "loss": 0.5102420449256897, + "step": 2797 + }, + { + "epoch": 0.6451464145722849, + "grad_norm": 1.0775475511417847, + "learning_rate": 1.6208887933570203e-06, + "loss": 0.39345985651016235, + "step": 2798 + }, + { + "epoch": 0.6453769887018677, + "grad_norm": 1.4503631372644623, + "learning_rate": 1.620589878060149e-06, + "loss": 0.47554945945739746, + "step": 2799 + }, + { + "epoch": 0.6456075628314503, + "grad_norm": 1.601431882721041, + "learning_rate": 1.6202908725542495e-06, + "loss": 0.4385503828525543, + "step": 2800 + }, + { + "epoch": 0.645838136961033, + "grad_norm": 1.1168858860640334, + "learning_rate": 1.619991776882785e-06, + "loss": 0.5589696168899536, + "step": 2801 + }, + { + "epoch": 0.6460687110906156, + "grad_norm": 1.265570460008291, + "learning_rate": 1.619692591089232e-06, + "loss": 0.4827546179294586, + "step": 2802 + }, + { + "epoch": 0.6462992852201983, + "grad_norm": 1.3309974001593363, + "learning_rate": 1.6193933152170809e-06, + "loss": 0.491131067276001, + "step": 2803 + }, + { + "epoch": 0.6465298593497809, + "grad_norm": 1.2647545815457555, + "learning_rate": 1.6190939493098341e-06, + "loss": 0.47185173630714417, + "step": 2804 + }, + { + "epoch": 0.6467604334793636, + "grad_norm": 1.235826049412326, + "learning_rate": 1.6187944934110072e-06, + "loss": 0.4411182701587677, + "step": 2805 + }, + { + "epoch": 0.6469910076089462, + "grad_norm": 1.2245067812038697, + "learning_rate": 1.6184949475641295e-06, + "loss": 0.47243285179138184, + "step": 2806 + }, + { + "epoch": 0.647221581738529, + "grad_norm": 1.3311536114931484, + "learning_rate": 1.6181953118127428e-06, + "loss": 0.4449295401573181, + "step": 2807 + }, + { + "epoch": 0.6474521558681116, + "grad_norm": 1.2292361204281614, + "learning_rate": 1.6178955862004024e-06, + "loss": 0.5148872137069702, + "step": 2808 + }, + { + "epoch": 0.6476827299976943, + "grad_norm": 1.2738055603189895, + "learning_rate": 1.6175957707706762e-06, + "loss": 0.5017277598381042, + "step": 2809 + }, + { + "epoch": 0.6479133041272769, + "grad_norm": 1.1324070696899262, + "learning_rate": 1.6172958655671458e-06, + "loss": 0.44220247864723206, + "step": 2810 + }, + { + "epoch": 0.6481438782568596, + "grad_norm": 1.215492495713019, + "learning_rate": 1.6169958706334053e-06, + "loss": 0.45421087741851807, + "step": 2811 + }, + { + "epoch": 0.6483744523864422, + "grad_norm": 1.5167053281985836, + "learning_rate": 1.6166957860130618e-06, + "loss": 0.4772147536277771, + "step": 2812 + }, + { + "epoch": 0.6486050265160249, + "grad_norm": 1.1252103890770975, + "learning_rate": 1.6163956117497357e-06, + "loss": 0.5319628715515137, + "step": 2813 + }, + { + "epoch": 0.6488356006456075, + "grad_norm": 1.2663721872672429, + "learning_rate": 1.6160953478870608e-06, + "loss": 0.5109438896179199, + "step": 2814 + }, + { + "epoch": 0.6490661747751902, + "grad_norm": 1.33543378668276, + "learning_rate": 1.6157949944686827e-06, + "loss": 0.4417513608932495, + "step": 2815 + }, + { + "epoch": 0.6492967489047728, + "grad_norm": 1.2535935822359765, + "learning_rate": 1.6154945515382616e-06, + "loss": 0.5013085007667542, + "step": 2816 + }, + { + "epoch": 0.6495273230343556, + "grad_norm": 1.1191581438601172, + "learning_rate": 1.6151940191394693e-06, + "loss": 0.5197368860244751, + "step": 2817 + }, + { + "epoch": 0.6497578971639382, + "grad_norm": 1.4218758858652996, + "learning_rate": 1.6148933973159914e-06, + "loss": 0.46540898084640503, + "step": 2818 + }, + { + "epoch": 0.6499884712935209, + "grad_norm": 1.2080431861739462, + "learning_rate": 1.6145926861115268e-06, + "loss": 0.4867633581161499, + "step": 2819 + }, + { + "epoch": 0.6502190454231035, + "grad_norm": 1.1380395234486869, + "learning_rate": 1.6142918855697864e-06, + "loss": 0.426607221364975, + "step": 2820 + }, + { + "epoch": 0.6504496195526862, + "grad_norm": 1.2737116095131904, + "learning_rate": 1.613990995734495e-06, + "loss": 0.5183024406433105, + "step": 2821 + }, + { + "epoch": 0.6506801936822688, + "grad_norm": 1.3839354752611597, + "learning_rate": 1.6136900166493893e-06, + "loss": 0.48635101318359375, + "step": 2822 + }, + { + "epoch": 0.6509107678118515, + "grad_norm": 1.5911912747422927, + "learning_rate": 1.6133889483582204e-06, + "loss": 0.47468632459640503, + "step": 2823 + }, + { + "epoch": 0.6511413419414341, + "grad_norm": 1.1598857858501956, + "learning_rate": 1.6130877909047515e-06, + "loss": 0.4665389358997345, + "step": 2824 + }, + { + "epoch": 0.6513719160710169, + "grad_norm": 1.1793258331020087, + "learning_rate": 1.6127865443327585e-06, + "loss": 0.5069966316223145, + "step": 2825 + }, + { + "epoch": 0.6516024902005995, + "grad_norm": 1.4107626754859688, + "learning_rate": 1.612485208686031e-06, + "loss": 0.47820740938186646, + "step": 2826 + }, + { + "epoch": 0.6518330643301822, + "grad_norm": 1.2189859420338702, + "learning_rate": 1.612183784008371e-06, + "loss": 0.43017104268074036, + "step": 2827 + }, + { + "epoch": 0.6520636384597648, + "grad_norm": 1.158515500774614, + "learning_rate": 1.6118822703435937e-06, + "loss": 0.45495298504829407, + "step": 2828 + }, + { + "epoch": 0.6522942125893475, + "grad_norm": 1.7108375139007879, + "learning_rate": 1.6115806677355272e-06, + "loss": 0.4624331593513489, + "step": 2829 + }, + { + "epoch": 0.6525247867189301, + "grad_norm": 1.0788742222165304, + "learning_rate": 1.6112789762280125e-06, + "loss": 0.39458876848220825, + "step": 2830 + }, + { + "epoch": 0.6527553608485128, + "grad_norm": 1.4194134450814206, + "learning_rate": 1.6109771958649035e-06, + "loss": 0.45552846789360046, + "step": 2831 + }, + { + "epoch": 0.6529859349780954, + "grad_norm": 1.4199555723058743, + "learning_rate": 1.6106753266900671e-06, + "loss": 0.4579755663871765, + "step": 2832 + }, + { + "epoch": 0.6532165091076781, + "grad_norm": 1.2589449636358518, + "learning_rate": 1.6103733687473823e-06, + "loss": 0.5164625644683838, + "step": 2833 + }, + { + "epoch": 0.6534470832372608, + "grad_norm": 1.3635551079325425, + "learning_rate": 1.6100713220807432e-06, + "loss": 0.43071237206459045, + "step": 2834 + }, + { + "epoch": 0.6536776573668435, + "grad_norm": 1.2757429725484968, + "learning_rate": 1.6097691867340543e-06, + "loss": 0.5174099802970886, + "step": 2835 + }, + { + "epoch": 0.6539082314964261, + "grad_norm": 1.31351831375575, + "learning_rate": 1.609466962751234e-06, + "loss": 0.5944932699203491, + "step": 2836 + }, + { + "epoch": 0.6541388056260088, + "grad_norm": 1.312815606757786, + "learning_rate": 1.6091646501762145e-06, + "loss": 0.45203912258148193, + "step": 2837 + }, + { + "epoch": 0.6543693797555914, + "grad_norm": 1.292859531347235, + "learning_rate": 1.6088622490529386e-06, + "loss": 0.4197826683521271, + "step": 2838 + }, + { + "epoch": 0.6545999538851741, + "grad_norm": 1.3008648230701247, + "learning_rate": 1.6085597594253649e-06, + "loss": 0.4806807339191437, + "step": 2839 + }, + { + "epoch": 0.6548305280147567, + "grad_norm": 1.233893928808971, + "learning_rate": 1.608257181337462e-06, + "loss": 0.4618797302246094, + "step": 2840 + }, + { + "epoch": 0.6550611021443394, + "grad_norm": 1.1215282144992917, + "learning_rate": 1.6079545148332137e-06, + "loss": 0.4901892840862274, + "step": 2841 + }, + { + "epoch": 0.655291676273922, + "grad_norm": 1.250624448026336, + "learning_rate": 1.607651759956615e-06, + "loss": 0.44869139790534973, + "step": 2842 + }, + { + "epoch": 0.6555222504035048, + "grad_norm": 1.1064395173732657, + "learning_rate": 1.6073489167516747e-06, + "loss": 0.41470903158187866, + "step": 2843 + }, + { + "epoch": 0.6557528245330874, + "grad_norm": 1.2796938856852533, + "learning_rate": 1.6070459852624143e-06, + "loss": 0.5498615503311157, + "step": 2844 + }, + { + "epoch": 0.6559833986626701, + "grad_norm": 1.4741717641783516, + "learning_rate": 1.6067429655328675e-06, + "loss": 0.5462392568588257, + "step": 2845 + }, + { + "epoch": 0.6562139727922527, + "grad_norm": 1.5147243124828937, + "learning_rate": 1.6064398576070815e-06, + "loss": 0.3775100111961365, + "step": 2846 + }, + { + "epoch": 0.6564445469218354, + "grad_norm": 1.3806942156086204, + "learning_rate": 1.6061366615291161e-06, + "loss": 0.4712100028991699, + "step": 2847 + }, + { + "epoch": 0.656675121051418, + "grad_norm": 1.1320542857842297, + "learning_rate": 1.6058333773430439e-06, + "loss": 0.5152161121368408, + "step": 2848 + }, + { + "epoch": 0.6569056951810007, + "grad_norm": 1.2222287817453417, + "learning_rate": 1.6055300050929502e-06, + "loss": 0.46678972244262695, + "step": 2849 + }, + { + "epoch": 0.6571362693105833, + "grad_norm": 1.1948519980696821, + "learning_rate": 1.6052265448229338e-06, + "loss": 0.4622490108013153, + "step": 2850 + }, + { + "epoch": 0.657366843440166, + "grad_norm": 1.2601521252962713, + "learning_rate": 1.6049229965771052e-06, + "loss": 0.49909311532974243, + "step": 2851 + }, + { + "epoch": 0.6575974175697487, + "grad_norm": 1.1801405687475501, + "learning_rate": 1.6046193603995884e-06, + "loss": 0.4428306221961975, + "step": 2852 + }, + { + "epoch": 0.6578279916993314, + "grad_norm": 1.5295557154716768, + "learning_rate": 1.6043156363345196e-06, + "loss": 0.5842458009719849, + "step": 2853 + }, + { + "epoch": 0.658058565828914, + "grad_norm": 1.4945011678677886, + "learning_rate": 1.604011824426049e-06, + "loss": 0.47183722257614136, + "step": 2854 + }, + { + "epoch": 0.6582891399584967, + "grad_norm": 1.2843309395390234, + "learning_rate": 1.6037079247183379e-06, + "loss": 0.44225364923477173, + "step": 2855 + }, + { + "epoch": 0.6585197140880793, + "grad_norm": 1.3795669225253144, + "learning_rate": 1.6034039372555617e-06, + "loss": 0.4820272922515869, + "step": 2856 + }, + { + "epoch": 0.658750288217662, + "grad_norm": 1.6263387244434722, + "learning_rate": 1.6030998620819075e-06, + "loss": 0.48118168115615845, + "step": 2857 + }, + { + "epoch": 0.6589808623472446, + "grad_norm": 1.4704169894155685, + "learning_rate": 1.6027956992415764e-06, + "loss": 0.4386011064052582, + "step": 2858 + }, + { + "epoch": 0.6592114364768273, + "grad_norm": 1.4148356020107666, + "learning_rate": 1.6024914487787814e-06, + "loss": 0.48740649223327637, + "step": 2859 + }, + { + "epoch": 0.65944201060641, + "grad_norm": 1.436235867684013, + "learning_rate": 1.602187110737748e-06, + "loss": 0.46782761812210083, + "step": 2860 + }, + { + "epoch": 0.6596725847359927, + "grad_norm": 1.2796166668007127, + "learning_rate": 1.6018826851627155e-06, + "loss": 0.5086358189582825, + "step": 2861 + }, + { + "epoch": 0.6599031588655753, + "grad_norm": 1.1582673721463366, + "learning_rate": 1.6015781720979344e-06, + "loss": 0.5631915330886841, + "step": 2862 + }, + { + "epoch": 0.660133732995158, + "grad_norm": 1.462417648098582, + "learning_rate": 1.6012735715876693e-06, + "loss": 0.5134458541870117, + "step": 2863 + }, + { + "epoch": 0.6603643071247406, + "grad_norm": 1.1268653967137703, + "learning_rate": 1.6009688836761969e-06, + "loss": 0.4308784008026123, + "step": 2864 + }, + { + "epoch": 0.6605948812543233, + "grad_norm": 1.3112517816231024, + "learning_rate": 1.6006641084078068e-06, + "loss": 0.5149765610694885, + "step": 2865 + }, + { + "epoch": 0.6608254553839059, + "grad_norm": 1.6101510783439525, + "learning_rate": 1.6003592458268005e-06, + "loss": 0.521892786026001, + "step": 2866 + }, + { + "epoch": 0.6610560295134886, + "grad_norm": 1.247084334907296, + "learning_rate": 1.6000542959774937e-06, + "loss": 0.46611008048057556, + "step": 2867 + }, + { + "epoch": 0.6612866036430712, + "grad_norm": 1.2517698630875118, + "learning_rate": 1.5997492589042135e-06, + "loss": 0.43080392479896545, + "step": 2868 + }, + { + "epoch": 0.661517177772654, + "grad_norm": 1.2239680444750303, + "learning_rate": 1.5994441346513003e-06, + "loss": 0.48026901483535767, + "step": 2869 + }, + { + "epoch": 0.6617477519022366, + "grad_norm": 1.1948228818170457, + "learning_rate": 1.5991389232631068e-06, + "loss": 0.48706555366516113, + "step": 2870 + }, + { + "epoch": 0.6619783260318193, + "grad_norm": 1.205848115890533, + "learning_rate": 1.598833624783999e-06, + "loss": 0.5093512535095215, + "step": 2871 + }, + { + "epoch": 0.6622089001614019, + "grad_norm": 1.37517746631934, + "learning_rate": 1.5985282392583542e-06, + "loss": 0.5197086930274963, + "step": 2872 + }, + { + "epoch": 0.6624394742909846, + "grad_norm": 1.3389415544634544, + "learning_rate": 1.5982227667305646e-06, + "loss": 0.497372031211853, + "step": 2873 + }, + { + "epoch": 0.6626700484205672, + "grad_norm": 1.6851191621911175, + "learning_rate": 1.597917207245033e-06, + "loss": 0.4746604561805725, + "step": 2874 + }, + { + "epoch": 0.6629006225501499, + "grad_norm": 1.2864362072574318, + "learning_rate": 1.5976115608461755e-06, + "loss": 0.5531996488571167, + "step": 2875 + }, + { + "epoch": 0.6631311966797325, + "grad_norm": 1.2032344825838508, + "learning_rate": 1.5973058275784208e-06, + "loss": 0.44950544834136963, + "step": 2876 + }, + { + "epoch": 0.6633617708093152, + "grad_norm": 1.231321509427461, + "learning_rate": 1.597000007486211e-06, + "loss": 0.45596158504486084, + "step": 2877 + }, + { + "epoch": 0.6635923449388978, + "grad_norm": 1.1813154846400662, + "learning_rate": 1.596694100613999e-06, + "loss": 0.5243046879768372, + "step": 2878 + }, + { + "epoch": 0.6638229190684806, + "grad_norm": 1.2111771126184059, + "learning_rate": 1.5963881070062528e-06, + "loss": 0.46450644731521606, + "step": 2879 + }, + { + "epoch": 0.6640534931980632, + "grad_norm": 1.286085494147619, + "learning_rate": 1.5960820267074509e-06, + "loss": 0.5565767288208008, + "step": 2880 + }, + { + "epoch": 0.6642840673276459, + "grad_norm": 1.574495375498682, + "learning_rate": 1.595775859762085e-06, + "loss": 0.4351605176925659, + "step": 2881 + }, + { + "epoch": 0.6645146414572285, + "grad_norm": 1.3382136213218339, + "learning_rate": 1.5954696062146603e-06, + "loss": 0.5113346576690674, + "step": 2882 + }, + { + "epoch": 0.6647452155868112, + "grad_norm": 1.203285083111209, + "learning_rate": 1.5951632661096932e-06, + "loss": 0.5005035996437073, + "step": 2883 + }, + { + "epoch": 0.6649757897163938, + "grad_norm": 1.1502074786882042, + "learning_rate": 1.5948568394917138e-06, + "loss": 0.4539811611175537, + "step": 2884 + }, + { + "epoch": 0.6652063638459765, + "grad_norm": 1.234546797786613, + "learning_rate": 1.5945503264052637e-06, + "loss": 0.4519865810871124, + "step": 2885 + }, + { + "epoch": 0.6654369379755591, + "grad_norm": 1.1932724883335695, + "learning_rate": 1.5942437268948985e-06, + "loss": 0.5688626766204834, + "step": 2886 + }, + { + "epoch": 0.6656675121051419, + "grad_norm": 1.1582733834983177, + "learning_rate": 1.5939370410051846e-06, + "loss": 0.5038400888442993, + "step": 2887 + }, + { + "epoch": 0.6658980862347245, + "grad_norm": 1.4308591259843988, + "learning_rate": 1.5936302687807028e-06, + "loss": 0.6332568526268005, + "step": 2888 + }, + { + "epoch": 0.6661286603643072, + "grad_norm": 1.2020172387992982, + "learning_rate": 1.593323410266045e-06, + "loss": 0.4994644820690155, + "step": 2889 + }, + { + "epoch": 0.6663592344938898, + "grad_norm": 1.3423031921779223, + "learning_rate": 1.5930164655058165e-06, + "loss": 0.4952617883682251, + "step": 2890 + }, + { + "epoch": 0.6665898086234725, + "grad_norm": 1.1769489968231674, + "learning_rate": 1.5927094345446345e-06, + "loss": 0.4188910722732544, + "step": 2891 + }, + { + "epoch": 0.6668203827530551, + "grad_norm": 1.319346697910086, + "learning_rate": 1.5924023174271295e-06, + "loss": 0.47160637378692627, + "step": 2892 + }, + { + "epoch": 0.6670509568826377, + "grad_norm": 1.0773369781050426, + "learning_rate": 1.592095114197944e-06, + "loss": 0.44884049892425537, + "step": 2893 + }, + { + "epoch": 0.6672815310122204, + "grad_norm": 1.3166895153069564, + "learning_rate": 1.5917878249017327e-06, + "loss": 0.4105216860771179, + "step": 2894 + }, + { + "epoch": 0.667512105141803, + "grad_norm": 1.3288589826448391, + "learning_rate": 1.5914804495831634e-06, + "loss": 0.5000967383384705, + "step": 2895 + }, + { + "epoch": 0.6677426792713858, + "grad_norm": 1.4772652615504442, + "learning_rate": 1.5911729882869163e-06, + "loss": 0.45515477657318115, + "step": 2896 + }, + { + "epoch": 0.6679732534009684, + "grad_norm": 1.2034912342077588, + "learning_rate": 1.590865441057684e-06, + "loss": 0.4492835998535156, + "step": 2897 + }, + { + "epoch": 0.6682038275305511, + "grad_norm": 1.5637287950189662, + "learning_rate": 1.5905578079401716e-06, + "loss": 0.553781270980835, + "step": 2898 + }, + { + "epoch": 0.6684344016601337, + "grad_norm": 1.235173143749482, + "learning_rate": 1.5902500889790967e-06, + "loss": 0.5085616111755371, + "step": 2899 + }, + { + "epoch": 0.6686649757897164, + "grad_norm": 1.2766607551584273, + "learning_rate": 1.5899422842191891e-06, + "loss": 0.4651145935058594, + "step": 2900 + }, + { + "epoch": 0.668895549919299, + "grad_norm": 1.3114841240621398, + "learning_rate": 1.5896343937051921e-06, + "loss": 0.5503841638565063, + "step": 2901 + }, + { + "epoch": 0.6691261240488817, + "grad_norm": 1.1881721760666544, + "learning_rate": 1.5893264174818599e-06, + "loss": 0.48213839530944824, + "step": 2902 + }, + { + "epoch": 0.6693566981784643, + "grad_norm": 1.2726619976847688, + "learning_rate": 1.5890183555939604e-06, + "loss": 0.4602949023246765, + "step": 2903 + }, + { + "epoch": 0.669587272308047, + "grad_norm": 1.213092004639277, + "learning_rate": 1.5887102080862736e-06, + "loss": 0.43991196155548096, + "step": 2904 + }, + { + "epoch": 0.6698178464376296, + "grad_norm": 1.2472416336517922, + "learning_rate": 1.5884019750035914e-06, + "loss": 0.48186323046684265, + "step": 2905 + }, + { + "epoch": 0.6700484205672124, + "grad_norm": 1.3445409358829308, + "learning_rate": 1.5880936563907189e-06, + "loss": 0.44907671213150024, + "step": 2906 + }, + { + "epoch": 0.670278994696795, + "grad_norm": 1.874421138474627, + "learning_rate": 1.587785252292473e-06, + "loss": 0.4475386142730713, + "step": 2907 + }, + { + "epoch": 0.6705095688263777, + "grad_norm": 1.2649536391923781, + "learning_rate": 1.587476762753684e-06, + "loss": 0.4504704475402832, + "step": 2908 + }, + { + "epoch": 0.6707401429559603, + "grad_norm": 2.0624210450483376, + "learning_rate": 1.5871681878191937e-06, + "loss": 0.5090106129646301, + "step": 2909 + }, + { + "epoch": 0.670970717085543, + "grad_norm": 1.3010076823717651, + "learning_rate": 1.5868595275338561e-06, + "loss": 0.46150895953178406, + "step": 2910 + }, + { + "epoch": 0.6712012912151256, + "grad_norm": 1.2556909013752833, + "learning_rate": 1.586550781942539e-06, + "loss": 0.5499979257583618, + "step": 2911 + }, + { + "epoch": 0.6714318653447083, + "grad_norm": 1.2089730243488483, + "learning_rate": 1.5862419510901211e-06, + "loss": 0.46628689765930176, + "step": 2912 + }, + { + "epoch": 0.6716624394742909, + "grad_norm": 1.2998808024776154, + "learning_rate": 1.5859330350214941e-06, + "loss": 0.4517399072647095, + "step": 2913 + }, + { + "epoch": 0.6718930136038737, + "grad_norm": 1.0879313971673985, + "learning_rate": 1.5856240337815621e-06, + "loss": 0.4696923792362213, + "step": 2914 + }, + { + "epoch": 0.6721235877334563, + "grad_norm": 1.5676723620382764, + "learning_rate": 1.585314947415242e-06, + "loss": 0.41357535123825073, + "step": 2915 + }, + { + "epoch": 0.672354161863039, + "grad_norm": 1.2988881169526059, + "learning_rate": 1.5850057759674621e-06, + "loss": 0.5223745107650757, + "step": 2916 + }, + { + "epoch": 0.6725847359926216, + "grad_norm": 1.5751566352241433, + "learning_rate": 1.584696519483164e-06, + "loss": 0.48562729358673096, + "step": 2917 + }, + { + "epoch": 0.6728153101222043, + "grad_norm": 1.147456021361514, + "learning_rate": 1.5843871780073009e-06, + "loss": 0.3675496280193329, + "step": 2918 + }, + { + "epoch": 0.6730458842517869, + "grad_norm": 1.4691177353786786, + "learning_rate": 1.5840777515848389e-06, + "loss": 0.5782667994499207, + "step": 2919 + }, + { + "epoch": 0.6732764583813696, + "grad_norm": 1.110911745804502, + "learning_rate": 1.583768240260756e-06, + "loss": 0.419716477394104, + "step": 2920 + }, + { + "epoch": 0.6735070325109522, + "grad_norm": 1.2625181785612978, + "learning_rate": 1.5834586440800434e-06, + "loss": 0.4004133939743042, + "step": 2921 + }, + { + "epoch": 0.673737606640535, + "grad_norm": 1.3860644175168617, + "learning_rate": 1.5831489630877037e-06, + "loss": 0.4917314350605011, + "step": 2922 + }, + { + "epoch": 0.6739681807701176, + "grad_norm": 1.3350109690747092, + "learning_rate": 1.5828391973287522e-06, + "loss": 0.5488141179084778, + "step": 2923 + }, + { + "epoch": 0.6741987548997003, + "grad_norm": 1.2547850876004316, + "learning_rate": 1.5825293468482163e-06, + "loss": 0.5047071576118469, + "step": 2924 + }, + { + "epoch": 0.6744293290292829, + "grad_norm": 1.3178326140677985, + "learning_rate": 1.5822194116911364e-06, + "loss": 0.4830411672592163, + "step": 2925 + }, + { + "epoch": 0.6746599031588656, + "grad_norm": 1.2591886503495524, + "learning_rate": 1.5819093919025641e-06, + "loss": 0.47517114877700806, + "step": 2926 + }, + { + "epoch": 0.6748904772884482, + "grad_norm": 1.3603729738722081, + "learning_rate": 1.5815992875275642e-06, + "loss": 0.5617963075637817, + "step": 2927 + }, + { + "epoch": 0.6751210514180309, + "grad_norm": 1.1752484838801127, + "learning_rate": 1.5812890986112137e-06, + "loss": 0.4360186457633972, + "step": 2928 + }, + { + "epoch": 0.6753516255476135, + "grad_norm": 1.5551926866200483, + "learning_rate": 1.5809788251986014e-06, + "loss": 0.49538636207580566, + "step": 2929 + }, + { + "epoch": 0.6755821996771962, + "grad_norm": 1.1285780293266063, + "learning_rate": 1.5806684673348288e-06, + "loss": 0.538766622543335, + "step": 2930 + }, + { + "epoch": 0.6758127738067788, + "grad_norm": 1.5395880930573347, + "learning_rate": 1.5803580250650094e-06, + "loss": 0.4113287329673767, + "step": 2931 + }, + { + "epoch": 0.6760433479363616, + "grad_norm": 1.4441179706006158, + "learning_rate": 1.5800474984342698e-06, + "loss": 0.5298923254013062, + "step": 2932 + }, + { + "epoch": 0.6762739220659442, + "grad_norm": 1.2285488161220737, + "learning_rate": 1.5797368874877472e-06, + "loss": 0.4891100227832794, + "step": 2933 + }, + { + "epoch": 0.6765044961955269, + "grad_norm": 1.3809520207822814, + "learning_rate": 1.579426192270593e-06, + "loss": 0.4412326216697693, + "step": 2934 + }, + { + "epoch": 0.6767350703251095, + "grad_norm": 1.3386538114869513, + "learning_rate": 1.5791154128279693e-06, + "loss": 0.5514793395996094, + "step": 2935 + }, + { + "epoch": 0.6769656444546922, + "grad_norm": 1.2065068425398038, + "learning_rate": 1.578804549205051e-06, + "loss": 0.44050243496894836, + "step": 2936 + }, + { + "epoch": 0.6771962185842748, + "grad_norm": 1.3084516018872256, + "learning_rate": 1.5784936014470256e-06, + "loss": 0.47503453493118286, + "step": 2937 + }, + { + "epoch": 0.6774267927138575, + "grad_norm": 1.445992727647949, + "learning_rate": 1.5781825695990922e-06, + "loss": 0.524544894695282, + "step": 2938 + }, + { + "epoch": 0.6776573668434401, + "grad_norm": 1.2672201923678605, + "learning_rate": 1.5778714537064628e-06, + "loss": 0.4203689694404602, + "step": 2939 + }, + { + "epoch": 0.6778879409730229, + "grad_norm": 1.255678429788082, + "learning_rate": 1.577560253814361e-06, + "loss": 0.4305247664451599, + "step": 2940 + }, + { + "epoch": 0.6781185151026055, + "grad_norm": 1.2383698343036857, + "learning_rate": 1.577248969968023e-06, + "loss": 0.6129249930381775, + "step": 2941 + }, + { + "epoch": 0.6783490892321882, + "grad_norm": 1.4217586280781416, + "learning_rate": 1.5769376022126969e-06, + "loss": 0.44431981444358826, + "step": 2942 + }, + { + "epoch": 0.6785796633617708, + "grad_norm": 1.2327303005745092, + "learning_rate": 1.576626150593643e-06, + "loss": 0.4394958019256592, + "step": 2943 + }, + { + "epoch": 0.6788102374913535, + "grad_norm": 1.2593798978560244, + "learning_rate": 1.5763146151561345e-06, + "loss": 0.44481268525123596, + "step": 2944 + }, + { + "epoch": 0.6790408116209361, + "grad_norm": 1.4440486279504336, + "learning_rate": 1.5760029959454556e-06, + "loss": 0.4251822829246521, + "step": 2945 + }, + { + "epoch": 0.6792713857505188, + "grad_norm": 1.338830252556874, + "learning_rate": 1.575691293006904e-06, + "loss": 0.41041696071624756, + "step": 2946 + }, + { + "epoch": 0.6795019598801014, + "grad_norm": 1.357017341106407, + "learning_rate": 1.5753795063857883e-06, + "loss": 0.5710239410400391, + "step": 2947 + }, + { + "epoch": 0.6797325340096841, + "grad_norm": 1.2834985119403657, + "learning_rate": 1.57506763612743e-06, + "loss": 0.48825210332870483, + "step": 2948 + }, + { + "epoch": 0.6799631081392667, + "grad_norm": 1.263284608882453, + "learning_rate": 1.5747556822771628e-06, + "loss": 0.37077784538269043, + "step": 2949 + }, + { + "epoch": 0.6801936822688495, + "grad_norm": 1.2458271352531185, + "learning_rate": 1.5744436448803322e-06, + "loss": 0.4618649482727051, + "step": 2950 + }, + { + "epoch": 0.6804242563984321, + "grad_norm": 1.0624348057433408, + "learning_rate": 1.574131523982296e-06, + "loss": 0.4415496289730072, + "step": 2951 + }, + { + "epoch": 0.6806548305280148, + "grad_norm": 1.4732593030941656, + "learning_rate": 1.5738193196284239e-06, + "loss": 0.440029501914978, + "step": 2952 + }, + { + "epoch": 0.6808854046575974, + "grad_norm": 1.3992294210480754, + "learning_rate": 1.5735070318640986e-06, + "loss": 0.5149378776550293, + "step": 2953 + }, + { + "epoch": 0.6811159787871801, + "grad_norm": 1.3173119180782331, + "learning_rate": 1.5731946607347136e-06, + "loss": 0.4838085174560547, + "step": 2954 + }, + { + "epoch": 0.6813465529167627, + "grad_norm": 1.3500402916158631, + "learning_rate": 1.5728822062856757e-06, + "loss": 0.48472005128860474, + "step": 2955 + }, + { + "epoch": 0.6815771270463454, + "grad_norm": 1.163167888868214, + "learning_rate": 1.572569668562403e-06, + "loss": 0.5154656767845154, + "step": 2956 + }, + { + "epoch": 0.681807701175928, + "grad_norm": 1.1906599654401737, + "learning_rate": 1.5722570476103263e-06, + "loss": 0.4094988703727722, + "step": 2957 + }, + { + "epoch": 0.6820382753055108, + "grad_norm": 1.2324943837281264, + "learning_rate": 1.5719443434748877e-06, + "loss": 0.5125937461853027, + "step": 2958 + }, + { + "epoch": 0.6822688494350934, + "grad_norm": 1.2538269370063608, + "learning_rate": 1.5716315562015428e-06, + "loss": 0.4807034730911255, + "step": 2959 + }, + { + "epoch": 0.6824994235646761, + "grad_norm": 1.3513545314522855, + "learning_rate": 1.5713186858357577e-06, + "loss": 0.6126741170883179, + "step": 2960 + }, + { + "epoch": 0.6827299976942587, + "grad_norm": 2.1674593801056887, + "learning_rate": 1.5710057324230113e-06, + "loss": 0.5450708866119385, + "step": 2961 + }, + { + "epoch": 0.6829605718238414, + "grad_norm": 1.8355809144200355, + "learning_rate": 1.5706926960087948e-06, + "loss": 0.47740328311920166, + "step": 2962 + }, + { + "epoch": 0.683191145953424, + "grad_norm": 1.311529987995532, + "learning_rate": 1.5703795766386112e-06, + "loss": 0.4731057584285736, + "step": 2963 + }, + { + "epoch": 0.6834217200830067, + "grad_norm": 1.3162153678952433, + "learning_rate": 1.5700663743579754e-06, + "loss": 0.49735045433044434, + "step": 2964 + }, + { + "epoch": 0.6836522942125893, + "grad_norm": 1.2346637447285915, + "learning_rate": 1.569753089212415e-06, + "loss": 0.5257318019866943, + "step": 2965 + }, + { + "epoch": 0.683882868342172, + "grad_norm": 1.1458467925306592, + "learning_rate": 1.5694397212474685e-06, + "loss": 0.3947733938694, + "step": 2966 + }, + { + "epoch": 0.6841134424717547, + "grad_norm": 1.424176183527685, + "learning_rate": 1.5691262705086875e-06, + "loss": 0.5078107714653015, + "step": 2967 + }, + { + "epoch": 0.6843440166013374, + "grad_norm": 1.7316538509871626, + "learning_rate": 1.5688127370416351e-06, + "loss": 0.5921520590782166, + "step": 2968 + }, + { + "epoch": 0.68457459073092, + "grad_norm": 1.2277129646213039, + "learning_rate": 1.5684991208918866e-06, + "loss": 0.45995181798934937, + "step": 2969 + }, + { + "epoch": 0.6848051648605027, + "grad_norm": 1.1894548452861071, + "learning_rate": 1.5681854221050293e-06, + "loss": 0.4874386787414551, + "step": 2970 + }, + { + "epoch": 0.6850357389900853, + "grad_norm": 1.3695475422493124, + "learning_rate": 1.5678716407266625e-06, + "loss": 0.4522739052772522, + "step": 2971 + }, + { + "epoch": 0.685266313119668, + "grad_norm": 1.3244142914830208, + "learning_rate": 1.5675577768023977e-06, + "loss": 0.4596391022205353, + "step": 2972 + }, + { + "epoch": 0.6854968872492506, + "grad_norm": 1.6847382830263626, + "learning_rate": 1.567243830377858e-06, + "loss": 0.5391427278518677, + "step": 2973 + }, + { + "epoch": 0.6857274613788333, + "grad_norm": 1.2164543996098884, + "learning_rate": 1.5669298014986786e-06, + "loss": 0.5583066940307617, + "step": 2974 + }, + { + "epoch": 0.6859580355084159, + "grad_norm": 1.3656527800334406, + "learning_rate": 1.566615690210507e-06, + "loss": 0.5410330295562744, + "step": 2975 + }, + { + "epoch": 0.6861886096379987, + "grad_norm": 1.2007908045124778, + "learning_rate": 1.566301496559002e-06, + "loss": 0.5145233273506165, + "step": 2976 + }, + { + "epoch": 0.6864191837675813, + "grad_norm": 1.4168885241389684, + "learning_rate": 1.5659872205898356e-06, + "loss": 0.5021970272064209, + "step": 2977 + }, + { + "epoch": 0.686649757897164, + "grad_norm": 1.0896663307775538, + "learning_rate": 1.5656728623486903e-06, + "loss": 0.48251593112945557, + "step": 2978 + }, + { + "epoch": 0.6868803320267466, + "grad_norm": 1.2502610536872558, + "learning_rate": 1.5653584218812617e-06, + "loss": 0.4228450655937195, + "step": 2979 + }, + { + "epoch": 0.6871109061563293, + "grad_norm": 1.4048596098114436, + "learning_rate": 1.5650438992332567e-06, + "loss": 0.3975197374820709, + "step": 2980 + }, + { + "epoch": 0.6873414802859119, + "grad_norm": 1.386478606714872, + "learning_rate": 1.5647292944503945e-06, + "loss": 0.5441234707832336, + "step": 2981 + }, + { + "epoch": 0.6875720544154946, + "grad_norm": 1.3552115877356068, + "learning_rate": 1.5644146075784057e-06, + "loss": 0.5357148051261902, + "step": 2982 + }, + { + "epoch": 0.6878026285450772, + "grad_norm": 1.2605289404512496, + "learning_rate": 1.5640998386630337e-06, + "loss": 0.530154824256897, + "step": 2983 + }, + { + "epoch": 0.68803320267466, + "grad_norm": 1.3830405468746736, + "learning_rate": 1.563784987750033e-06, + "loss": 0.480657696723938, + "step": 2984 + }, + { + "epoch": 0.6882637768042426, + "grad_norm": 1.2595390052779563, + "learning_rate": 1.5634700548851712e-06, + "loss": 0.4822859764099121, + "step": 2985 + }, + { + "epoch": 0.6884943509338253, + "grad_norm": 1.4511024891592457, + "learning_rate": 1.5631550401142257e-06, + "loss": 0.48551490902900696, + "step": 2986 + }, + { + "epoch": 0.6887249250634079, + "grad_norm": 1.252088599015217, + "learning_rate": 1.562839943482988e-06, + "loss": 0.43080294132232666, + "step": 2987 + }, + { + "epoch": 0.6889554991929906, + "grad_norm": 1.1661214157780933, + "learning_rate": 1.56252476503726e-06, + "loss": 0.42780637741088867, + "step": 2988 + }, + { + "epoch": 0.6891860733225732, + "grad_norm": 1.3057809079761946, + "learning_rate": 1.5622095048228565e-06, + "loss": 0.539027214050293, + "step": 2989 + }, + { + "epoch": 0.6894166474521559, + "grad_norm": 1.2289425463506802, + "learning_rate": 1.5618941628856037e-06, + "loss": 0.4529460668563843, + "step": 2990 + }, + { + "epoch": 0.6896472215817385, + "grad_norm": 1.4016140654354556, + "learning_rate": 1.5615787392713395e-06, + "loss": 0.49724727869033813, + "step": 2991 + }, + { + "epoch": 0.6898777957113212, + "grad_norm": 1.25157972103927, + "learning_rate": 1.5612632340259144e-06, + "loss": 0.4711928963661194, + "step": 2992 + }, + { + "epoch": 0.6901083698409038, + "grad_norm": 1.3707143585352468, + "learning_rate": 1.56094764719519e-06, + "loss": 0.42258220911026, + "step": 2993 + }, + { + "epoch": 0.6903389439704866, + "grad_norm": 1.371187363460567, + "learning_rate": 1.5606319788250398e-06, + "loss": 0.47754064202308655, + "step": 2994 + }, + { + "epoch": 0.6905695181000692, + "grad_norm": 1.307708883093593, + "learning_rate": 1.5603162289613501e-06, + "loss": 0.47200560569763184, + "step": 2995 + }, + { + "epoch": 0.6908000922296519, + "grad_norm": 1.359798809074, + "learning_rate": 1.5600003976500173e-06, + "loss": 0.5194537043571472, + "step": 2996 + }, + { + "epoch": 0.6910306663592345, + "grad_norm": 1.707437655194179, + "learning_rate": 1.5596844849369518e-06, + "loss": 0.4874703586101532, + "step": 2997 + }, + { + "epoch": 0.6912612404888172, + "grad_norm": 1.262990523197611, + "learning_rate": 1.5593684908680738e-06, + "loss": 0.5028672218322754, + "step": 2998 + }, + { + "epoch": 0.6914918146183998, + "grad_norm": 1.2420345591817543, + "learning_rate": 1.5590524154893169e-06, + "loss": 0.44250521063804626, + "step": 2999 + }, + { + "epoch": 0.6917223887479825, + "grad_norm": 1.6089998258276121, + "learning_rate": 1.5587362588466253e-06, + "loss": 0.536510705947876, + "step": 3000 + }, + { + "epoch": 0.6919529628775651, + "grad_norm": 1.3333649931769909, + "learning_rate": 1.5584200209859558e-06, + "loss": 0.4514959752559662, + "step": 3001 + }, + { + "epoch": 0.6921835370071479, + "grad_norm": 1.1923376457733827, + "learning_rate": 1.5581037019532773e-06, + "loss": 0.4402197301387787, + "step": 3002 + }, + { + "epoch": 0.6924141111367305, + "grad_norm": 1.1940429657833775, + "learning_rate": 1.5577873017945691e-06, + "loss": 0.508256196975708, + "step": 3003 + }, + { + "epoch": 0.6926446852663131, + "grad_norm": 1.2600794916577294, + "learning_rate": 1.5574708205558236e-06, + "loss": 0.5123175978660583, + "step": 3004 + }, + { + "epoch": 0.6928752593958958, + "grad_norm": 1.4303227599201425, + "learning_rate": 1.5571542582830447e-06, + "loss": 0.4874982237815857, + "step": 3005 + }, + { + "epoch": 0.6931058335254784, + "grad_norm": 1.314228379499143, + "learning_rate": 1.556837615022248e-06, + "loss": 0.44554391503334045, + "step": 3006 + }, + { + "epoch": 0.6933364076550611, + "grad_norm": 1.5428941228634732, + "learning_rate": 1.5565208908194603e-06, + "loss": 0.5899895429611206, + "step": 3007 + }, + { + "epoch": 0.6935669817846437, + "grad_norm": 1.2685614762262514, + "learning_rate": 1.5562040857207208e-06, + "loss": 0.5137951374053955, + "step": 3008 + }, + { + "epoch": 0.6937975559142264, + "grad_norm": 1.2863812659603593, + "learning_rate": 1.5558871997720805e-06, + "loss": 0.5435892343521118, + "step": 3009 + }, + { + "epoch": 0.694028130043809, + "grad_norm": 1.4463505314835092, + "learning_rate": 1.5555702330196021e-06, + "loss": 0.45998525619506836, + "step": 3010 + }, + { + "epoch": 0.6942587041733917, + "grad_norm": 1.324515476398786, + "learning_rate": 1.5552531855093597e-06, + "loss": 0.4676332473754883, + "step": 3011 + }, + { + "epoch": 0.6944892783029744, + "grad_norm": 1.2595225568514163, + "learning_rate": 1.5549360572874397e-06, + "loss": 0.48250633478164673, + "step": 3012 + }, + { + "epoch": 0.6947198524325571, + "grad_norm": 1.4537609539003187, + "learning_rate": 1.5546188483999396e-06, + "loss": 0.4841402769088745, + "step": 3013 + }, + { + "epoch": 0.6949504265621397, + "grad_norm": 1.401637069375295, + "learning_rate": 1.5543015588929688e-06, + "loss": 0.4717336893081665, + "step": 3014 + }, + { + "epoch": 0.6951810006917224, + "grad_norm": 1.3276052543558161, + "learning_rate": 1.5539841888126488e-06, + "loss": 0.48844897747039795, + "step": 3015 + }, + { + "epoch": 0.695411574821305, + "grad_norm": 1.539947517538627, + "learning_rate": 1.5536667382051127e-06, + "loss": 0.5244781970977783, + "step": 3016 + }, + { + "epoch": 0.6956421489508877, + "grad_norm": 1.2794123200247822, + "learning_rate": 1.5533492071165046e-06, + "loss": 0.4612278938293457, + "step": 3017 + }, + { + "epoch": 0.6958727230804703, + "grad_norm": 1.1978546028008836, + "learning_rate": 1.5530315955929817e-06, + "loss": 0.40461257100105286, + "step": 3018 + }, + { + "epoch": 0.696103297210053, + "grad_norm": 1.387518032200497, + "learning_rate": 1.5527139036807112e-06, + "loss": 0.5191174745559692, + "step": 3019 + }, + { + "epoch": 0.6963338713396356, + "grad_norm": 1.510370534054042, + "learning_rate": 1.5523961314258731e-06, + "loss": 0.45882558822631836, + "step": 3020 + }, + { + "epoch": 0.6965644454692184, + "grad_norm": 1.230362803290169, + "learning_rate": 1.552078278874659e-06, + "loss": 0.4766819477081299, + "step": 3021 + }, + { + "epoch": 0.696795019598801, + "grad_norm": 1.2822436220739486, + "learning_rate": 1.5517603460732724e-06, + "loss": 0.4572867751121521, + "step": 3022 + }, + { + "epoch": 0.6970255937283837, + "grad_norm": 1.5677891937472022, + "learning_rate": 1.5514423330679272e-06, + "loss": 0.4689183235168457, + "step": 3023 + }, + { + "epoch": 0.6972561678579663, + "grad_norm": 1.18549719550499, + "learning_rate": 1.5511242399048504e-06, + "loss": 0.45769914984703064, + "step": 3024 + }, + { + "epoch": 0.697486741987549, + "grad_norm": 1.3095011770493485, + "learning_rate": 1.5508060666302796e-06, + "loss": 0.47367236018180847, + "step": 3025 + }, + { + "epoch": 0.6977173161171316, + "grad_norm": 1.5441644429162589, + "learning_rate": 1.550487813290465e-06, + "loss": 0.40873080492019653, + "step": 3026 + }, + { + "epoch": 0.6979478902467143, + "grad_norm": 1.2349195465907241, + "learning_rate": 1.5501694799316671e-06, + "loss": 0.42366844415664673, + "step": 3027 + }, + { + "epoch": 0.6981784643762969, + "grad_norm": 1.2587292360565243, + "learning_rate": 1.5498510666001602e-06, + "loss": 0.3133828043937683, + "step": 3028 + }, + { + "epoch": 0.6984090385058797, + "grad_norm": 1.5168032500602213, + "learning_rate": 1.549532573342228e-06, + "loss": 0.5188712477684021, + "step": 3029 + }, + { + "epoch": 0.6986396126354623, + "grad_norm": 1.2707264640547211, + "learning_rate": 1.5492140002041668e-06, + "loss": 0.4374960660934448, + "step": 3030 + }, + { + "epoch": 0.698870186765045, + "grad_norm": 1.6828882278794643, + "learning_rate": 1.5488953472322845e-06, + "loss": 0.5285592079162598, + "step": 3031 + }, + { + "epoch": 0.6991007608946276, + "grad_norm": 1.5111090584536853, + "learning_rate": 1.5485766144729006e-06, + "loss": 0.5331767797470093, + "step": 3032 + }, + { + "epoch": 0.6993313350242103, + "grad_norm": 1.3626863062762309, + "learning_rate": 1.5482578019723462e-06, + "loss": 0.4546147584915161, + "step": 3033 + }, + { + "epoch": 0.6995619091537929, + "grad_norm": 1.2127032724557087, + "learning_rate": 1.5479389097769639e-06, + "loss": 0.47674182057380676, + "step": 3034 + }, + { + "epoch": 0.6997924832833756, + "grad_norm": 1.2042624102453106, + "learning_rate": 1.5476199379331078e-06, + "loss": 0.496138334274292, + "step": 3035 + }, + { + "epoch": 0.7000230574129582, + "grad_norm": 1.367736432364491, + "learning_rate": 1.547300886487144e-06, + "loss": 0.4843756854534149, + "step": 3036 + }, + { + "epoch": 0.7002536315425409, + "grad_norm": 1.5043582093976149, + "learning_rate": 1.5469817554854494e-06, + "loss": 0.6028264760971069, + "step": 3037 + }, + { + "epoch": 0.7004842056721235, + "grad_norm": 1.4959257460685322, + "learning_rate": 1.5466625449744134e-06, + "loss": 0.49528858065605164, + "step": 3038 + }, + { + "epoch": 0.7007147798017063, + "grad_norm": 1.1403876193260207, + "learning_rate": 1.5463432550004358e-06, + "loss": 0.466439425945282, + "step": 3039 + }, + { + "epoch": 0.7009453539312889, + "grad_norm": 1.1012676712945453, + "learning_rate": 1.5460238856099292e-06, + "loss": 0.4196532368659973, + "step": 3040 + }, + { + "epoch": 0.7011759280608716, + "grad_norm": 1.40353983379054, + "learning_rate": 1.5457044368493173e-06, + "loss": 0.47679999470710754, + "step": 3041 + }, + { + "epoch": 0.7014065021904542, + "grad_norm": 1.2594197008827683, + "learning_rate": 1.5453849087650346e-06, + "loss": 0.4368046522140503, + "step": 3042 + }, + { + "epoch": 0.7016370763200369, + "grad_norm": 1.2211703865137815, + "learning_rate": 1.5450653014035285e-06, + "loss": 0.45165273547172546, + "step": 3043 + }, + { + "epoch": 0.7018676504496195, + "grad_norm": 1.1456058151260982, + "learning_rate": 1.5447456148112563e-06, + "loss": 0.44813454151153564, + "step": 3044 + }, + { + "epoch": 0.7020982245792022, + "grad_norm": 1.269275990698592, + "learning_rate": 1.5444258490346882e-06, + "loss": 0.44681504368782043, + "step": 3045 + }, + { + "epoch": 0.7023287987087848, + "grad_norm": 1.3036360811480283, + "learning_rate": 1.5441060041203057e-06, + "loss": 0.44788169860839844, + "step": 3046 + }, + { + "epoch": 0.7025593728383676, + "grad_norm": 1.3232925218771132, + "learning_rate": 1.5437860801146013e-06, + "loss": 0.3754178285598755, + "step": 3047 + }, + { + "epoch": 0.7027899469679502, + "grad_norm": 1.001044690167693, + "learning_rate": 1.5434660770640787e-06, + "loss": 0.3582305908203125, + "step": 3048 + }, + { + "epoch": 0.7030205210975329, + "grad_norm": 1.3449464333610996, + "learning_rate": 1.543145995015254e-06, + "loss": 0.42649000883102417, + "step": 3049 + }, + { + "epoch": 0.7032510952271155, + "grad_norm": 1.2880551855073363, + "learning_rate": 1.5428258340146543e-06, + "loss": 0.5164098143577576, + "step": 3050 + }, + { + "epoch": 0.7034816693566982, + "grad_norm": 1.2456398303270981, + "learning_rate": 1.5425055941088181e-06, + "loss": 0.4193584620952606, + "step": 3051 + }, + { + "epoch": 0.7037122434862808, + "grad_norm": 1.3825374305431077, + "learning_rate": 1.5421852753442957e-06, + "loss": 0.5230807662010193, + "step": 3052 + }, + { + "epoch": 0.7039428176158635, + "grad_norm": 1.466681367301644, + "learning_rate": 1.5418648777676488e-06, + "loss": 0.4573478102684021, + "step": 3053 + }, + { + "epoch": 0.7041733917454461, + "grad_norm": 1.1343088214156583, + "learning_rate": 1.5415444014254503e-06, + "loss": 0.47031426429748535, + "step": 3054 + }, + { + "epoch": 0.7044039658750288, + "grad_norm": 1.3599997528041683, + "learning_rate": 1.5412238463642844e-06, + "loss": 0.4499198794364929, + "step": 3055 + }, + { + "epoch": 0.7046345400046115, + "grad_norm": 1.4014132343100743, + "learning_rate": 1.5409032126307477e-06, + "loss": 0.4775800406932831, + "step": 3056 + }, + { + "epoch": 0.7048651141341942, + "grad_norm": 1.4264420683743835, + "learning_rate": 1.540582500271447e-06, + "loss": 0.535969614982605, + "step": 3057 + }, + { + "epoch": 0.7050956882637768, + "grad_norm": 1.3808494199198469, + "learning_rate": 1.5402617093330013e-06, + "loss": 0.5358741283416748, + "step": 3058 + }, + { + "epoch": 0.7053262623933595, + "grad_norm": 1.2492824573732915, + "learning_rate": 1.5399408398620406e-06, + "loss": 0.5392765998840332, + "step": 3059 + }, + { + "epoch": 0.7055568365229421, + "grad_norm": 1.275809486426879, + "learning_rate": 1.5396198919052066e-06, + "loss": 0.47976016998291016, + "step": 3060 + }, + { + "epoch": 0.7057874106525248, + "grad_norm": 1.2226120465526635, + "learning_rate": 1.5392988655091526e-06, + "loss": 0.39919328689575195, + "step": 3061 + }, + { + "epoch": 0.7060179847821074, + "grad_norm": 1.6011371731611943, + "learning_rate": 1.538977760720543e-06, + "loss": 0.4503553509712219, + "step": 3062 + }, + { + "epoch": 0.7062485589116901, + "grad_norm": 1.2363983734925073, + "learning_rate": 1.5386565775860531e-06, + "loss": 0.4570388197898865, + "step": 3063 + }, + { + "epoch": 0.7064791330412727, + "grad_norm": 1.2640125065615475, + "learning_rate": 1.5383353161523706e-06, + "loss": 0.54588782787323, + "step": 3064 + }, + { + "epoch": 0.7067097071708555, + "grad_norm": 1.3495245665399438, + "learning_rate": 1.5380139764661945e-06, + "loss": 0.40369170904159546, + "step": 3065 + }, + { + "epoch": 0.7069402813004381, + "grad_norm": 1.40505470554238, + "learning_rate": 1.5376925585742341e-06, + "loss": 0.5079206228256226, + "step": 3066 + }, + { + "epoch": 0.7071708554300208, + "grad_norm": 1.2407138703812135, + "learning_rate": 1.5373710625232107e-06, + "loss": 0.41418159008026123, + "step": 3067 + }, + { + "epoch": 0.7074014295596034, + "grad_norm": 1.2523103492462024, + "learning_rate": 1.5370494883598575e-06, + "loss": 0.4546199142932892, + "step": 3068 + }, + { + "epoch": 0.7076320036891861, + "grad_norm": 1.1794904786936184, + "learning_rate": 1.5367278361309183e-06, + "loss": 0.48041367530822754, + "step": 3069 + }, + { + "epoch": 0.7078625778187687, + "grad_norm": 1.3468711432386478, + "learning_rate": 1.5364061058831486e-06, + "loss": 0.47676384449005127, + "step": 3070 + }, + { + "epoch": 0.7080931519483514, + "grad_norm": 1.1888236379295274, + "learning_rate": 1.5360842976633148e-06, + "loss": 0.47341692447662354, + "step": 3071 + }, + { + "epoch": 0.708323726077934, + "grad_norm": 1.3227579498868685, + "learning_rate": 1.5357624115181956e-06, + "loss": 0.38436269760131836, + "step": 3072 + }, + { + "epoch": 0.7085543002075168, + "grad_norm": 1.4827200040386144, + "learning_rate": 1.5354404474945798e-06, + "loss": 0.5369806289672852, + "step": 3073 + }, + { + "epoch": 0.7087848743370994, + "grad_norm": 1.404704151375413, + "learning_rate": 1.535118405639269e-06, + "loss": 0.5314677953720093, + "step": 3074 + }, + { + "epoch": 0.7090154484666821, + "grad_norm": 1.1927563297298747, + "learning_rate": 1.5347962859990742e-06, + "loss": 0.49233007431030273, + "step": 3075 + }, + { + "epoch": 0.7092460225962647, + "grad_norm": 1.3477590726762334, + "learning_rate": 1.5344740886208194e-06, + "loss": 0.4834766983985901, + "step": 3076 + }, + { + "epoch": 0.7094765967258474, + "grad_norm": 1.432138793969477, + "learning_rate": 1.534151813551339e-06, + "loss": 0.505670428276062, + "step": 3077 + }, + { + "epoch": 0.70970717085543, + "grad_norm": 1.3290190812046396, + "learning_rate": 1.533829460837479e-06, + "loss": 0.5256010293960571, + "step": 3078 + }, + { + "epoch": 0.7099377449850127, + "grad_norm": 1.463108893430833, + "learning_rate": 1.5335070305260967e-06, + "loss": 0.4186098873615265, + "step": 3079 + }, + { + "epoch": 0.7101683191145953, + "grad_norm": 1.2048981968166306, + "learning_rate": 1.5331845226640607e-06, + "loss": 0.4034464359283447, + "step": 3080 + }, + { + "epoch": 0.710398893244178, + "grad_norm": 1.346673761335588, + "learning_rate": 1.5328619372982505e-06, + "loss": 0.4521537721157074, + "step": 3081 + }, + { + "epoch": 0.7106294673737606, + "grad_norm": 1.5250190734837208, + "learning_rate": 1.5325392744755574e-06, + "loss": 0.4919602572917938, + "step": 3082 + }, + { + "epoch": 0.7108600415033434, + "grad_norm": 1.1734195700346683, + "learning_rate": 1.5322165342428835e-06, + "loss": 0.4464415907859802, + "step": 3083 + }, + { + "epoch": 0.711090615632926, + "grad_norm": 1.2610549525832775, + "learning_rate": 1.5318937166471427e-06, + "loss": 0.47444385290145874, + "step": 3084 + }, + { + "epoch": 0.7113211897625087, + "grad_norm": 1.1782687896584645, + "learning_rate": 1.5315708217352595e-06, + "loss": 0.4014730453491211, + "step": 3085 + }, + { + "epoch": 0.7115517638920913, + "grad_norm": 1.1806273152667501, + "learning_rate": 1.5312478495541703e-06, + "loss": 0.4528852701187134, + "step": 3086 + }, + { + "epoch": 0.711782338021674, + "grad_norm": 1.4716504682159035, + "learning_rate": 1.5309248001508216e-06, + "loss": 0.4919637441635132, + "step": 3087 + }, + { + "epoch": 0.7120129121512566, + "grad_norm": 1.3824738486934829, + "learning_rate": 1.530601673572173e-06, + "loss": 0.5630985498428345, + "step": 3088 + }, + { + "epoch": 0.7122434862808393, + "grad_norm": 1.4462966182250279, + "learning_rate": 1.5302784698651935e-06, + "loss": 0.3920522630214691, + "step": 3089 + }, + { + "epoch": 0.7124740604104219, + "grad_norm": 1.3282823423467587, + "learning_rate": 1.5299551890768642e-06, + "loss": 0.5502145290374756, + "step": 3090 + }, + { + "epoch": 0.7127046345400047, + "grad_norm": 1.2547204060730106, + "learning_rate": 1.5296318312541767e-06, + "loss": 0.4839448928833008, + "step": 3091 + }, + { + "epoch": 0.7129352086695873, + "grad_norm": 1.3486430423834108, + "learning_rate": 1.5293083964441355e-06, + "loss": 0.5029735565185547, + "step": 3092 + }, + { + "epoch": 0.71316578279917, + "grad_norm": 1.2299483009823662, + "learning_rate": 1.5289848846937544e-06, + "loss": 0.4724803566932678, + "step": 3093 + }, + { + "epoch": 0.7133963569287526, + "grad_norm": 1.1015042263762262, + "learning_rate": 1.528661296050059e-06, + "loss": 0.4609840512275696, + "step": 3094 + }, + { + "epoch": 0.7136269310583353, + "grad_norm": 1.4829248198628113, + "learning_rate": 1.5283376305600863e-06, + "loss": 0.49763959646224976, + "step": 3095 + }, + { + "epoch": 0.7138575051879179, + "grad_norm": 1.2090810088725865, + "learning_rate": 1.5280138882708847e-06, + "loss": 0.42384523153305054, + "step": 3096 + }, + { + "epoch": 0.7140880793175006, + "grad_norm": 1.3550047979469209, + "learning_rate": 1.5276900692295134e-06, + "loss": 0.5034611225128174, + "step": 3097 + }, + { + "epoch": 0.7143186534470832, + "grad_norm": 1.3321189275554508, + "learning_rate": 1.5273661734830423e-06, + "loss": 0.5617417097091675, + "step": 3098 + }, + { + "epoch": 0.714549227576666, + "grad_norm": 1.320340684589947, + "learning_rate": 1.527042201078553e-06, + "loss": 0.4562014937400818, + "step": 3099 + }, + { + "epoch": 0.7147798017062486, + "grad_norm": 1.6932438225785027, + "learning_rate": 1.5267181520631386e-06, + "loss": 0.5626288056373596, + "step": 3100 + }, + { + "epoch": 0.7150103758358313, + "grad_norm": 1.4526784651389733, + "learning_rate": 1.5263940264839028e-06, + "loss": 0.4882054924964905, + "step": 3101 + }, + { + "epoch": 0.7152409499654139, + "grad_norm": 1.523666745804484, + "learning_rate": 1.5260698243879603e-06, + "loss": 0.5371058583259583, + "step": 3102 + }, + { + "epoch": 0.7154715240949966, + "grad_norm": 1.1599798656247362, + "learning_rate": 1.5257455458224368e-06, + "loss": 0.4683259129524231, + "step": 3103 + }, + { + "epoch": 0.7157020982245792, + "grad_norm": 1.223986374608111, + "learning_rate": 1.5254211908344704e-06, + "loss": 0.4894726872444153, + "step": 3104 + }, + { + "epoch": 0.7159326723541619, + "grad_norm": 1.3226351110788483, + "learning_rate": 1.5250967594712089e-06, + "loss": 0.4517880082130432, + "step": 3105 + }, + { + "epoch": 0.7161632464837445, + "grad_norm": 1.162528176566508, + "learning_rate": 1.5247722517798118e-06, + "loss": 0.5062767267227173, + "step": 3106 + }, + { + "epoch": 0.7163938206133272, + "grad_norm": 1.6349408984878264, + "learning_rate": 1.5244476678074494e-06, + "loss": 0.5029302835464478, + "step": 3107 + }, + { + "epoch": 0.7166243947429098, + "grad_norm": 1.3765367207185526, + "learning_rate": 1.5241230076013035e-06, + "loss": 0.44112175703048706, + "step": 3108 + }, + { + "epoch": 0.7168549688724926, + "grad_norm": 1.3847966627377115, + "learning_rate": 1.5237982712085665e-06, + "loss": 0.43693509697914124, + "step": 3109 + }, + { + "epoch": 0.7170855430020752, + "grad_norm": 1.3509946026255297, + "learning_rate": 1.5234734586764422e-06, + "loss": 0.4544166922569275, + "step": 3110 + }, + { + "epoch": 0.7173161171316579, + "grad_norm": 1.1949924477500942, + "learning_rate": 1.5231485700521451e-06, + "loss": 0.5470178127288818, + "step": 3111 + }, + { + "epoch": 0.7175466912612405, + "grad_norm": 1.5007057362656466, + "learning_rate": 1.5228236053829017e-06, + "loss": 0.5215972065925598, + "step": 3112 + }, + { + "epoch": 0.7177772653908232, + "grad_norm": 1.1400006826022246, + "learning_rate": 1.5224985647159488e-06, + "loss": 0.3922381103038788, + "step": 3113 + }, + { + "epoch": 0.7180078395204058, + "grad_norm": 1.3432802481675237, + "learning_rate": 1.5221734480985341e-06, + "loss": 0.47455158829689026, + "step": 3114 + }, + { + "epoch": 0.7182384136499884, + "grad_norm": 1.517078162476979, + "learning_rate": 1.5218482555779164e-06, + "loss": 0.5776175260543823, + "step": 3115 + }, + { + "epoch": 0.7184689877795711, + "grad_norm": 1.4757174936390305, + "learning_rate": 1.521522987201366e-06, + "loss": 0.40414175391197205, + "step": 3116 + }, + { + "epoch": 0.7186995619091537, + "grad_norm": 1.5441693701407133, + "learning_rate": 1.5211976430161643e-06, + "loss": 0.44597384333610535, + "step": 3117 + }, + { + "epoch": 0.7189301360387365, + "grad_norm": 1.6495022083145716, + "learning_rate": 1.5208722230696024e-06, + "loss": 0.50276118516922, + "step": 3118 + }, + { + "epoch": 0.7191607101683191, + "grad_norm": 1.255966386168249, + "learning_rate": 1.5205467274089844e-06, + "loss": 0.43281811475753784, + "step": 3119 + }, + { + "epoch": 0.7193912842979018, + "grad_norm": 1.196003407991791, + "learning_rate": 1.5202211560816243e-06, + "loss": 0.3796764016151428, + "step": 3120 + }, + { + "epoch": 0.7196218584274844, + "grad_norm": 1.1855608567240021, + "learning_rate": 1.5198955091348463e-06, + "loss": 0.47820231318473816, + "step": 3121 + }, + { + "epoch": 0.7198524325570671, + "grad_norm": 1.3809241508956476, + "learning_rate": 1.5195697866159875e-06, + "loss": 0.4737284779548645, + "step": 3122 + }, + { + "epoch": 0.7200830066866497, + "grad_norm": 1.3019928778593748, + "learning_rate": 1.519243988572394e-06, + "loss": 0.44652169942855835, + "step": 3123 + }, + { + "epoch": 0.7203135808162324, + "grad_norm": 1.0393403987452434, + "learning_rate": 1.518918115051425e-06, + "loss": 0.42702072858810425, + "step": 3124 + }, + { + "epoch": 0.720544154945815, + "grad_norm": 1.3835329760109338, + "learning_rate": 1.5185921661004483e-06, + "loss": 0.5003541707992554, + "step": 3125 + }, + { + "epoch": 0.7207747290753977, + "grad_norm": 1.3444035589789487, + "learning_rate": 1.518266141766845e-06, + "loss": 0.5045102834701538, + "step": 3126 + }, + { + "epoch": 0.7210053032049804, + "grad_norm": 1.3069630488439725, + "learning_rate": 1.5179400420980052e-06, + "loss": 0.46619412302970886, + "step": 3127 + }, + { + "epoch": 0.7212358773345631, + "grad_norm": 1.7755918931491346, + "learning_rate": 1.5176138671413314e-06, + "loss": 0.5006855726242065, + "step": 3128 + }, + { + "epoch": 0.7214664514641457, + "grad_norm": 1.4202077937995432, + "learning_rate": 1.5172876169442362e-06, + "loss": 0.4394634962081909, + "step": 3129 + }, + { + "epoch": 0.7216970255937284, + "grad_norm": 1.203576429459206, + "learning_rate": 1.5169612915541428e-06, + "loss": 0.49311593174934387, + "step": 3130 + }, + { + "epoch": 0.721927599723311, + "grad_norm": 1.2610358507024448, + "learning_rate": 1.5166348910184868e-06, + "loss": 0.38406768441200256, + "step": 3131 + }, + { + "epoch": 0.7221581738528937, + "grad_norm": 1.52088025341024, + "learning_rate": 1.5163084153847132e-06, + "loss": 0.547613799571991, + "step": 3132 + }, + { + "epoch": 0.7223887479824763, + "grad_norm": 1.4599825671580298, + "learning_rate": 1.515981864700279e-06, + "loss": 0.43875589966773987, + "step": 3133 + }, + { + "epoch": 0.722619322112059, + "grad_norm": 1.3276172293945816, + "learning_rate": 1.5156552390126516e-06, + "loss": 0.41515982151031494, + "step": 3134 + }, + { + "epoch": 0.7228498962416416, + "grad_norm": 1.400170522869638, + "learning_rate": 1.5153285383693088e-06, + "loss": 0.43297481536865234, + "step": 3135 + }, + { + "epoch": 0.7230804703712244, + "grad_norm": 1.3346402467183769, + "learning_rate": 1.5150017628177408e-06, + "loss": 0.5059916377067566, + "step": 3136 + }, + { + "epoch": 0.723311044500807, + "grad_norm": 1.4474439218451525, + "learning_rate": 1.514674912405447e-06, + "loss": 0.4776325225830078, + "step": 3137 + }, + { + "epoch": 0.7235416186303897, + "grad_norm": 1.4332410620248028, + "learning_rate": 1.5143479871799381e-06, + "loss": 0.4925272464752197, + "step": 3138 + }, + { + "epoch": 0.7237721927599723, + "grad_norm": 0.9806444224416654, + "learning_rate": 1.5140209871887368e-06, + "loss": 0.3825960159301758, + "step": 3139 + }, + { + "epoch": 0.724002766889555, + "grad_norm": 1.811554812935443, + "learning_rate": 1.513693912479376e-06, + "loss": 0.5582098960876465, + "step": 3140 + }, + { + "epoch": 0.7242333410191376, + "grad_norm": 1.4229587145535472, + "learning_rate": 1.5133667630993983e-06, + "loss": 0.4079757630825043, + "step": 3141 + }, + { + "epoch": 0.7244639151487203, + "grad_norm": 1.3307764336864334, + "learning_rate": 1.513039539096359e-06, + "loss": 0.4996449947357178, + "step": 3142 + }, + { + "epoch": 0.7246944892783029, + "grad_norm": 1.2360600034220603, + "learning_rate": 1.5127122405178233e-06, + "loss": 0.4822157323360443, + "step": 3143 + }, + { + "epoch": 0.7249250634078857, + "grad_norm": 1.2687974509229507, + "learning_rate": 1.512384867411367e-06, + "loss": 0.43123728036880493, + "step": 3144 + }, + { + "epoch": 0.7251556375374683, + "grad_norm": 1.2723246094506335, + "learning_rate": 1.5120574198245776e-06, + "loss": 0.4942808151245117, + "step": 3145 + }, + { + "epoch": 0.725386211667051, + "grad_norm": 1.1117112525626116, + "learning_rate": 1.5117298978050525e-06, + "loss": 0.49165093898773193, + "step": 3146 + }, + { + "epoch": 0.7256167857966336, + "grad_norm": 1.2668452294382095, + "learning_rate": 1.5114023014004008e-06, + "loss": 0.4700804352760315, + "step": 3147 + }, + { + "epoch": 0.7258473599262163, + "grad_norm": 1.9638712043686382, + "learning_rate": 1.5110746306582413e-06, + "loss": 0.4703143835067749, + "step": 3148 + }, + { + "epoch": 0.7260779340557989, + "grad_norm": 1.2418379131661055, + "learning_rate": 1.5107468856262048e-06, + "loss": 0.47312211990356445, + "step": 3149 + }, + { + "epoch": 0.7263085081853816, + "grad_norm": 1.3558937860977873, + "learning_rate": 1.5104190663519323e-06, + "loss": 0.49607813358306885, + "step": 3150 + }, + { + "epoch": 0.7265390823149642, + "grad_norm": 1.2747447528869889, + "learning_rate": 1.5100911728830754e-06, + "loss": 0.4401499629020691, + "step": 3151 + }, + { + "epoch": 0.7267696564445469, + "grad_norm": 1.3050498169083122, + "learning_rate": 1.5097632052672973e-06, + "loss": 0.4979579448699951, + "step": 3152 + }, + { + "epoch": 0.7270002305741295, + "grad_norm": 1.1477032098667286, + "learning_rate": 1.5094351635522706e-06, + "loss": 0.42917048931121826, + "step": 3153 + }, + { + "epoch": 0.7272308047037123, + "grad_norm": 1.2688450847611672, + "learning_rate": 1.50910704778568e-06, + "loss": 0.41664260625839233, + "step": 3154 + }, + { + "epoch": 0.7274613788332949, + "grad_norm": 1.4083630490412662, + "learning_rate": 1.5087788580152206e-06, + "loss": 0.5000253915786743, + "step": 3155 + }, + { + "epoch": 0.7276919529628776, + "grad_norm": 1.2424572303309531, + "learning_rate": 1.5084505942885976e-06, + "loss": 0.5075093507766724, + "step": 3156 + }, + { + "epoch": 0.7279225270924602, + "grad_norm": 1.319578470826436, + "learning_rate": 1.508122256653528e-06, + "loss": 0.44975680112838745, + "step": 3157 + }, + { + "epoch": 0.7281531012220429, + "grad_norm": 1.1450711263341298, + "learning_rate": 1.5077938451577383e-06, + "loss": 0.44494926929473877, + "step": 3158 + }, + { + "epoch": 0.7283836753516255, + "grad_norm": 1.3333716905743178, + "learning_rate": 1.5074653598489673e-06, + "loss": 0.5664352178573608, + "step": 3159 + }, + { + "epoch": 0.7286142494812082, + "grad_norm": 1.1840094617058035, + "learning_rate": 1.507136800774963e-06, + "loss": 0.5694705247879028, + "step": 3160 + }, + { + "epoch": 0.7288448236107908, + "grad_norm": 1.5658434570152957, + "learning_rate": 1.506808167983485e-06, + "loss": 0.5121151804924011, + "step": 3161 + }, + { + "epoch": 0.7290753977403736, + "grad_norm": 1.3559529766390859, + "learning_rate": 1.5064794615223034e-06, + "loss": 0.45935380458831787, + "step": 3162 + }, + { + "epoch": 0.7293059718699562, + "grad_norm": 1.2036749528520703, + "learning_rate": 1.506150681439199e-06, + "loss": 0.517521858215332, + "step": 3163 + }, + { + "epoch": 0.7295365459995389, + "grad_norm": 1.271352713883254, + "learning_rate": 1.5058218277819638e-06, + "loss": 0.5078546404838562, + "step": 3164 + }, + { + "epoch": 0.7297671201291215, + "grad_norm": 1.4877111530715366, + "learning_rate": 1.5054929005983992e-06, + "loss": 0.47892552614212036, + "step": 3165 + }, + { + "epoch": 0.7299976942587042, + "grad_norm": 1.5569470487033794, + "learning_rate": 1.5051638999363185e-06, + "loss": 0.48825597763061523, + "step": 3166 + }, + { + "epoch": 0.7302282683882868, + "grad_norm": 1.2181600327145499, + "learning_rate": 1.5048348258435457e-06, + "loss": 0.488031804561615, + "step": 3167 + }, + { + "epoch": 0.7304588425178695, + "grad_norm": 1.178638754387744, + "learning_rate": 1.5045056783679143e-06, + "loss": 0.4669504761695862, + "step": 3168 + }, + { + "epoch": 0.7306894166474521, + "grad_norm": 1.364305786110939, + "learning_rate": 1.5041764575572695e-06, + "loss": 0.45620614290237427, + "step": 3169 + }, + { + "epoch": 0.7309199907770348, + "grad_norm": 1.4607481202185084, + "learning_rate": 1.5038471634594667e-06, + "loss": 0.4271177649497986, + "step": 3170 + }, + { + "epoch": 0.7311505649066175, + "grad_norm": 1.4441980354968733, + "learning_rate": 1.5035177961223726e-06, + "loss": 0.5170531272888184, + "step": 3171 + }, + { + "epoch": 0.7313811390362002, + "grad_norm": 1.046719642579895, + "learning_rate": 1.5031883555938638e-06, + "loss": 0.4261493682861328, + "step": 3172 + }, + { + "epoch": 0.7316117131657828, + "grad_norm": 1.4357281868096983, + "learning_rate": 1.502858841921828e-06, + "loss": 0.4958994686603546, + "step": 3173 + }, + { + "epoch": 0.7318422872953655, + "grad_norm": 1.631538220078115, + "learning_rate": 1.502529255154163e-06, + "loss": 0.49798572063446045, + "step": 3174 + }, + { + "epoch": 0.7320728614249481, + "grad_norm": 1.3524076496726538, + "learning_rate": 1.502199595338778e-06, + "loss": 0.4067850708961487, + "step": 3175 + }, + { + "epoch": 0.7323034355545308, + "grad_norm": 1.2000506588677564, + "learning_rate": 1.5018698625235916e-06, + "loss": 0.4680994153022766, + "step": 3176 + }, + { + "epoch": 0.7325340096841134, + "grad_norm": 1.3054261583860276, + "learning_rate": 1.501540056756535e-06, + "loss": 0.49181580543518066, + "step": 3177 + }, + { + "epoch": 0.7327645838136961, + "grad_norm": 1.485479754545564, + "learning_rate": 1.501210178085548e-06, + "loss": 0.5425546169281006, + "step": 3178 + }, + { + "epoch": 0.7329951579432787, + "grad_norm": 1.1514309763496005, + "learning_rate": 1.500880226558582e-06, + "loss": 0.4869355261325836, + "step": 3179 + }, + { + "epoch": 0.7332257320728615, + "grad_norm": 1.5737536993523387, + "learning_rate": 1.500550202223599e-06, + "loss": 0.5157885551452637, + "step": 3180 + }, + { + "epoch": 0.7334563062024441, + "grad_norm": 1.4471157017235972, + "learning_rate": 1.5002201051285707e-06, + "loss": 0.528350293636322, + "step": 3181 + }, + { + "epoch": 0.7336868803320268, + "grad_norm": 1.0924579051997452, + "learning_rate": 1.499889935321481e-06, + "loss": 0.3963279128074646, + "step": 3182 + }, + { + "epoch": 0.7339174544616094, + "grad_norm": 1.0536411378011648, + "learning_rate": 1.499559692850323e-06, + "loss": 0.36777108907699585, + "step": 3183 + }, + { + "epoch": 0.7341480285911921, + "grad_norm": 1.3572066258310391, + "learning_rate": 1.4992293777631004e-06, + "loss": 0.4592905044555664, + "step": 3184 + }, + { + "epoch": 0.7343786027207747, + "grad_norm": 1.3801194879873266, + "learning_rate": 1.4988989901078285e-06, + "loss": 0.458257257938385, + "step": 3185 + }, + { + "epoch": 0.7346091768503574, + "grad_norm": 1.2823442631336313, + "learning_rate": 1.4985685299325316e-06, + "loss": 0.4844989478588104, + "step": 3186 + }, + { + "epoch": 0.73483975097994, + "grad_norm": 1.3019212093413413, + "learning_rate": 1.498237997285247e-06, + "loss": 0.381417453289032, + "step": 3187 + }, + { + "epoch": 0.7350703251095227, + "grad_norm": 1.267517645310936, + "learning_rate": 1.4979073922140196e-06, + "loss": 0.42452555894851685, + "step": 3188 + }, + { + "epoch": 0.7353008992391054, + "grad_norm": 1.2143530957836637, + "learning_rate": 1.4975767147669063e-06, + "loss": 0.4660685956478119, + "step": 3189 + }, + { + "epoch": 0.7355314733686881, + "grad_norm": 1.243568614271109, + "learning_rate": 1.4972459649919748e-06, + "loss": 0.4332653880119324, + "step": 3190 + }, + { + "epoch": 0.7357620474982707, + "grad_norm": 1.4818958085574696, + "learning_rate": 1.496915142937303e-06, + "loss": 0.5580132007598877, + "step": 3191 + }, + { + "epoch": 0.7359926216278534, + "grad_norm": 1.102415574688255, + "learning_rate": 1.4965842486509792e-06, + "loss": 0.43711793422698975, + "step": 3192 + }, + { + "epoch": 0.736223195757436, + "grad_norm": 1.1786805187530485, + "learning_rate": 1.496253282181102e-06, + "loss": 0.44969767332077026, + "step": 3193 + }, + { + "epoch": 0.7364537698870187, + "grad_norm": 1.5017804708887366, + "learning_rate": 1.4959222435757809e-06, + "loss": 0.5288668870925903, + "step": 3194 + }, + { + "epoch": 0.7366843440166013, + "grad_norm": 1.2442315862489326, + "learning_rate": 1.4955911328831353e-06, + "loss": 0.45993220806121826, + "step": 3195 + }, + { + "epoch": 0.736914918146184, + "grad_norm": 1.6618645292728147, + "learning_rate": 1.4952599501512963e-06, + "loss": 0.5360512733459473, + "step": 3196 + }, + { + "epoch": 0.7371454922757666, + "grad_norm": 1.2833906478614454, + "learning_rate": 1.4949286954284044e-06, + "loss": 0.3923282325267792, + "step": 3197 + }, + { + "epoch": 0.7373760664053494, + "grad_norm": 1.2830570803742403, + "learning_rate": 1.4945973687626103e-06, + "loss": 0.5051449537277222, + "step": 3198 + }, + { + "epoch": 0.737606640534932, + "grad_norm": 1.288727241344276, + "learning_rate": 1.4942659702020763e-06, + "loss": 0.5035187602043152, + "step": 3199 + }, + { + "epoch": 0.7378372146645147, + "grad_norm": 1.1929311231536464, + "learning_rate": 1.4939344997949742e-06, + "loss": 0.4922195076942444, + "step": 3200 + }, + { + "epoch": 0.7380677887940973, + "grad_norm": 1.1654414900260779, + "learning_rate": 1.4936029575894865e-06, + "loss": 0.49664247035980225, + "step": 3201 + }, + { + "epoch": 0.73829836292368, + "grad_norm": 1.2090144084254086, + "learning_rate": 1.4932713436338065e-06, + "loss": 0.4240155816078186, + "step": 3202 + }, + { + "epoch": 0.7385289370532626, + "grad_norm": 1.150655085488804, + "learning_rate": 1.4929396579761376e-06, + "loss": 0.3830781579017639, + "step": 3203 + }, + { + "epoch": 0.7387595111828453, + "grad_norm": 1.2626520886498587, + "learning_rate": 1.4926079006646936e-06, + "loss": 0.37983447313308716, + "step": 3204 + }, + { + "epoch": 0.7389900853124279, + "grad_norm": 1.37294258180721, + "learning_rate": 1.4922760717476989e-06, + "loss": 0.4680769443511963, + "step": 3205 + }, + { + "epoch": 0.7392206594420107, + "grad_norm": 1.0992782157194299, + "learning_rate": 1.4919441712733878e-06, + "loss": 0.3801664710044861, + "step": 3206 + }, + { + "epoch": 0.7394512335715933, + "grad_norm": 1.2101909370157682, + "learning_rate": 1.4916121992900062e-06, + "loss": 0.5506627559661865, + "step": 3207 + }, + { + "epoch": 0.739681807701176, + "grad_norm": 1.4326210599966231, + "learning_rate": 1.4912801558458087e-06, + "loss": 0.4976215660572052, + "step": 3208 + }, + { + "epoch": 0.7399123818307586, + "grad_norm": 1.269851030633043, + "learning_rate": 1.4909480409890615e-06, + "loss": 0.42806485295295715, + "step": 3209 + }, + { + "epoch": 0.7401429559603413, + "grad_norm": 1.5738327378318604, + "learning_rate": 1.4906158547680413e-06, + "loss": 0.3850712180137634, + "step": 3210 + }, + { + "epoch": 0.7403735300899239, + "grad_norm": 1.1706966056418486, + "learning_rate": 1.4902835972310342e-06, + "loss": 0.4356945753097534, + "step": 3211 + }, + { + "epoch": 0.7406041042195066, + "grad_norm": 1.3196733008465567, + "learning_rate": 1.4899512684263373e-06, + "loss": 0.4806904196739197, + "step": 3212 + }, + { + "epoch": 0.7408346783490892, + "grad_norm": 1.6634902313002624, + "learning_rate": 1.489618868402258e-06, + "loss": 0.544597327709198, + "step": 3213 + }, + { + "epoch": 0.7410652524786719, + "grad_norm": 1.2400106880376924, + "learning_rate": 1.4892863972071141e-06, + "loss": 0.39847469329833984, + "step": 3214 + }, + { + "epoch": 0.7412958266082545, + "grad_norm": 1.165782132875825, + "learning_rate": 1.4889538548892336e-06, + "loss": 0.4959847331047058, + "step": 3215 + }, + { + "epoch": 0.7415264007378373, + "grad_norm": 1.1727701470106202, + "learning_rate": 1.488621241496955e-06, + "loss": 0.3839089870452881, + "step": 3216 + }, + { + "epoch": 0.7417569748674199, + "grad_norm": 1.4119004491894294, + "learning_rate": 1.4882885570786266e-06, + "loss": 0.5187599658966064, + "step": 3217 + }, + { + "epoch": 0.7419875489970026, + "grad_norm": 1.1715648701346035, + "learning_rate": 1.4879558016826082e-06, + "loss": 0.45735663175582886, + "step": 3218 + }, + { + "epoch": 0.7422181231265852, + "grad_norm": 1.2093385209256575, + "learning_rate": 1.4876229753572687e-06, + "loss": 0.5635267496109009, + "step": 3219 + }, + { + "epoch": 0.7424486972561679, + "grad_norm": 1.5737635031230153, + "learning_rate": 1.4872900781509876e-06, + "loss": 0.5255833268165588, + "step": 3220 + }, + { + "epoch": 0.7426792713857505, + "grad_norm": 1.3608013352784492, + "learning_rate": 1.486957110112155e-06, + "loss": 0.4563497304916382, + "step": 3221 + }, + { + "epoch": 0.7429098455153332, + "grad_norm": 1.2494840959741684, + "learning_rate": 1.4866240712891714e-06, + "loss": 0.3737669885158539, + "step": 3222 + }, + { + "epoch": 0.7431404196449158, + "grad_norm": 1.3341042787752078, + "learning_rate": 1.4862909617304473e-06, + "loss": 0.48965659737586975, + "step": 3223 + }, + { + "epoch": 0.7433709937744986, + "grad_norm": 1.138792861067833, + "learning_rate": 1.4859577814844036e-06, + "loss": 0.40867483615875244, + "step": 3224 + }, + { + "epoch": 0.7436015679040812, + "grad_norm": 1.6873709244395776, + "learning_rate": 1.4856245305994711e-06, + "loss": 0.5870566368103027, + "step": 3225 + }, + { + "epoch": 0.7438321420336638, + "grad_norm": 1.9479920905112817, + "learning_rate": 1.4852912091240914e-06, + "loss": 0.5424025654792786, + "step": 3226 + }, + { + "epoch": 0.7440627161632465, + "grad_norm": 1.3117337551828157, + "learning_rate": 1.4849578171067166e-06, + "loss": 0.5305285453796387, + "step": 3227 + }, + { + "epoch": 0.7442932902928291, + "grad_norm": 1.6524409541791285, + "learning_rate": 1.4846243545958078e-06, + "loss": 0.4189227819442749, + "step": 3228 + }, + { + "epoch": 0.7445238644224118, + "grad_norm": 1.3163917938675591, + "learning_rate": 1.4842908216398379e-06, + "loss": 0.44568121433258057, + "step": 3229 + }, + { + "epoch": 0.7447544385519944, + "grad_norm": 1.57546318763007, + "learning_rate": 1.4839572182872883e-06, + "loss": 0.5177523493766785, + "step": 3230 + }, + { + "epoch": 0.7449850126815771, + "grad_norm": 2.0231485633083213, + "learning_rate": 1.4836235445866528e-06, + "loss": 0.5100630521774292, + "step": 3231 + }, + { + "epoch": 0.7452155868111597, + "grad_norm": 1.2988766977840327, + "learning_rate": 1.4832898005864336e-06, + "loss": 0.45731791853904724, + "step": 3232 + }, + { + "epoch": 0.7454461609407425, + "grad_norm": 1.4418312758556044, + "learning_rate": 1.4829559863351437e-06, + "loss": 0.5161736011505127, + "step": 3233 + }, + { + "epoch": 0.7456767350703251, + "grad_norm": 1.2131599613200943, + "learning_rate": 1.4826221018813067e-06, + "loss": 0.4778611660003662, + "step": 3234 + }, + { + "epoch": 0.7459073091999078, + "grad_norm": 1.208766404583587, + "learning_rate": 1.482288147273456e-06, + "loss": 0.467506468296051, + "step": 3235 + }, + { + "epoch": 0.7461378833294904, + "grad_norm": 1.3564852786094337, + "learning_rate": 1.4819541225601352e-06, + "loss": 0.5061084032058716, + "step": 3236 + }, + { + "epoch": 0.7463684574590731, + "grad_norm": 1.3693293129226278, + "learning_rate": 1.4816200277898983e-06, + "loss": 0.5066365599632263, + "step": 3237 + }, + { + "epoch": 0.7465990315886557, + "grad_norm": 1.2091939411250054, + "learning_rate": 1.4812858630113093e-06, + "loss": 0.44285398721694946, + "step": 3238 + }, + { + "epoch": 0.7468296057182384, + "grad_norm": 1.3395886619598594, + "learning_rate": 1.4809516282729426e-06, + "loss": 0.5325936079025269, + "step": 3239 + }, + { + "epoch": 0.747060179847821, + "grad_norm": 1.2575363206535257, + "learning_rate": 1.4806173236233818e-06, + "loss": 0.37296950817108154, + "step": 3240 + }, + { + "epoch": 0.7472907539774037, + "grad_norm": 1.3466058050144787, + "learning_rate": 1.4802829491112228e-06, + "loss": 0.4596887230873108, + "step": 3241 + }, + { + "epoch": 0.7475213281069863, + "grad_norm": 1.4791727382559166, + "learning_rate": 1.4799485047850693e-06, + "loss": 0.4344385266304016, + "step": 3242 + }, + { + "epoch": 0.7477519022365691, + "grad_norm": 1.235031250671636, + "learning_rate": 1.4796139906935365e-06, + "loss": 0.458631306886673, + "step": 3243 + }, + { + "epoch": 0.7479824763661517, + "grad_norm": 1.3676048590005543, + "learning_rate": 1.4792794068852494e-06, + "loss": 0.5425032377243042, + "step": 3244 + }, + { + "epoch": 0.7482130504957344, + "grad_norm": 1.1764717045773245, + "learning_rate": 1.478944753408843e-06, + "loss": 0.4240065813064575, + "step": 3245 + }, + { + "epoch": 0.748443624625317, + "grad_norm": 1.3527342191314002, + "learning_rate": 1.478610030312963e-06, + "loss": 0.5533365607261658, + "step": 3246 + }, + { + "epoch": 0.7486741987548997, + "grad_norm": 1.4574041701217884, + "learning_rate": 1.4782752376462647e-06, + "loss": 0.4089345335960388, + "step": 3247 + }, + { + "epoch": 0.7489047728844823, + "grad_norm": 1.3793731191813918, + "learning_rate": 1.4779403754574131e-06, + "loss": 0.5098259449005127, + "step": 3248 + }, + { + "epoch": 0.749135347014065, + "grad_norm": 1.3041128935188901, + "learning_rate": 1.4776054437950842e-06, + "loss": 0.4615677297115326, + "step": 3249 + }, + { + "epoch": 0.7493659211436476, + "grad_norm": 1.3216071057711354, + "learning_rate": 1.4772704427079639e-06, + "loss": 0.460266649723053, + "step": 3250 + }, + { + "epoch": 0.7495964952732304, + "grad_norm": 1.4054347579351087, + "learning_rate": 1.4769353722447476e-06, + "loss": 0.4727064371109009, + "step": 3251 + }, + { + "epoch": 0.749827069402813, + "grad_norm": 1.3954753679563598, + "learning_rate": 1.4766002324541411e-06, + "loss": 0.4733152985572815, + "step": 3252 + }, + { + "epoch": 0.7500576435323957, + "grad_norm": 1.408517900798552, + "learning_rate": 1.4762650233848609e-06, + "loss": 0.5055218935012817, + "step": 3253 + }, + { + "epoch": 0.7502882176619783, + "grad_norm": 1.3285058616446128, + "learning_rate": 1.4759297450856324e-06, + "loss": 0.6129124164581299, + "step": 3254 + }, + { + "epoch": 0.750518791791561, + "grad_norm": 1.6354094862337523, + "learning_rate": 1.4755943976051926e-06, + "loss": 0.46197545528411865, + "step": 3255 + }, + { + "epoch": 0.7507493659211436, + "grad_norm": 1.3239897164772563, + "learning_rate": 1.4752589809922868e-06, + "loss": 0.5227653980255127, + "step": 3256 + }, + { + "epoch": 0.7509799400507263, + "grad_norm": 1.4638577740242362, + "learning_rate": 1.4749234952956715e-06, + "loss": 0.5189518928527832, + "step": 3257 + }, + { + "epoch": 0.7512105141803089, + "grad_norm": 1.2059107130307087, + "learning_rate": 1.474587940564113e-06, + "loss": 0.4850584864616394, + "step": 3258 + }, + { + "epoch": 0.7514410883098916, + "grad_norm": 1.4809027704015267, + "learning_rate": 1.4742523168463876e-06, + "loss": 0.5218943357467651, + "step": 3259 + }, + { + "epoch": 0.7516716624394743, + "grad_norm": 1.130064311367936, + "learning_rate": 1.4739166241912814e-06, + "loss": 0.4311223030090332, + "step": 3260 + }, + { + "epoch": 0.751902236569057, + "grad_norm": 1.372801682112421, + "learning_rate": 1.473580862647591e-06, + "loss": 0.525306224822998, + "step": 3261 + }, + { + "epoch": 0.7521328106986396, + "grad_norm": 1.291063350632538, + "learning_rate": 1.4732450322641225e-06, + "loss": 0.506609320640564, + "step": 3262 + }, + { + "epoch": 0.7523633848282223, + "grad_norm": 1.4043846834415283, + "learning_rate": 1.4729091330896926e-06, + "loss": 0.5477846264839172, + "step": 3263 + }, + { + "epoch": 0.7525939589578049, + "grad_norm": 1.1342853276703964, + "learning_rate": 1.4725731651731268e-06, + "loss": 0.48802629113197327, + "step": 3264 + }, + { + "epoch": 0.7528245330873876, + "grad_norm": 1.5090127096652195, + "learning_rate": 1.4722371285632626e-06, + "loss": 0.4774906635284424, + "step": 3265 + }, + { + "epoch": 0.7530551072169702, + "grad_norm": 1.4537920297241385, + "learning_rate": 1.4719010233089458e-06, + "loss": 0.4220488667488098, + "step": 3266 + }, + { + "epoch": 0.7532856813465529, + "grad_norm": 1.441465153643324, + "learning_rate": 1.4715648494590324e-06, + "loss": 0.43912187218666077, + "step": 3267 + }, + { + "epoch": 0.7535162554761355, + "grad_norm": 1.3653901674246531, + "learning_rate": 1.4712286070623892e-06, + "loss": 0.5302494764328003, + "step": 3268 + }, + { + "epoch": 0.7537468296057183, + "grad_norm": 1.3282339539348487, + "learning_rate": 1.4708922961678923e-06, + "loss": 0.4800306260585785, + "step": 3269 + }, + { + "epoch": 0.7539774037353009, + "grad_norm": 1.2634165352126685, + "learning_rate": 1.4705559168244275e-06, + "loss": 0.3993161618709564, + "step": 3270 + }, + { + "epoch": 0.7542079778648836, + "grad_norm": 1.446141365903489, + "learning_rate": 1.4702194690808916e-06, + "loss": 0.37037837505340576, + "step": 3271 + }, + { + "epoch": 0.7544385519944662, + "grad_norm": 1.3105522613811469, + "learning_rate": 1.4698829529861898e-06, + "loss": 0.44288602471351624, + "step": 3272 + }, + { + "epoch": 0.7546691261240489, + "grad_norm": 1.542566998549956, + "learning_rate": 1.469546368589239e-06, + "loss": 0.5480727553367615, + "step": 3273 + }, + { + "epoch": 0.7548997002536315, + "grad_norm": 1.5093924463506492, + "learning_rate": 1.4692097159389649e-06, + "loss": 0.4964104890823364, + "step": 3274 + }, + { + "epoch": 0.7551302743832142, + "grad_norm": 1.5912503319666471, + "learning_rate": 1.4688729950843033e-06, + "loss": 0.4744144082069397, + "step": 3275 + }, + { + "epoch": 0.7553608485127968, + "grad_norm": 1.1258853516330976, + "learning_rate": 1.4685362060741997e-06, + "loss": 0.44675350189208984, + "step": 3276 + }, + { + "epoch": 0.7555914226423796, + "grad_norm": 1.4768191837188436, + "learning_rate": 1.46819934895761e-06, + "loss": 0.45261216163635254, + "step": 3277 + }, + { + "epoch": 0.7558219967719622, + "grad_norm": 1.3183121513891758, + "learning_rate": 1.4678624237835005e-06, + "loss": 0.4180977940559387, + "step": 3278 + }, + { + "epoch": 0.7560525709015449, + "grad_norm": 1.34629761070606, + "learning_rate": 1.4675254306008456e-06, + "loss": 0.39477843046188354, + "step": 3279 + }, + { + "epoch": 0.7562831450311275, + "grad_norm": 1.439585323315283, + "learning_rate": 1.467188369458631e-06, + "loss": 0.5033801198005676, + "step": 3280 + }, + { + "epoch": 0.7565137191607102, + "grad_norm": 1.3522884656136929, + "learning_rate": 1.4668512404058527e-06, + "loss": 0.5719846487045288, + "step": 3281 + }, + { + "epoch": 0.7567442932902928, + "grad_norm": 1.6993262990855147, + "learning_rate": 1.4665140434915147e-06, + "loss": 0.5198945999145508, + "step": 3282 + }, + { + "epoch": 0.7569748674198755, + "grad_norm": 1.6486008286234453, + "learning_rate": 1.4661767787646326e-06, + "loss": 0.4641912579536438, + "step": 3283 + }, + { + "epoch": 0.7572054415494581, + "grad_norm": 1.542363438136225, + "learning_rate": 1.4658394462742309e-06, + "loss": 0.44070225954055786, + "step": 3284 + }, + { + "epoch": 0.7574360156790408, + "grad_norm": 1.1923089532877131, + "learning_rate": 1.465502046069345e-06, + "loss": 0.4324581027030945, + "step": 3285 + }, + { + "epoch": 0.7576665898086234, + "grad_norm": 1.5168087965785, + "learning_rate": 1.4651645781990187e-06, + "loss": 0.5789060592651367, + "step": 3286 + }, + { + "epoch": 0.7578971639382062, + "grad_norm": 1.7886030443223944, + "learning_rate": 1.4648270427123068e-06, + "loss": 0.45642149448394775, + "step": 3287 + }, + { + "epoch": 0.7581277380677888, + "grad_norm": 1.222780244920245, + "learning_rate": 1.4644894396582732e-06, + "loss": 0.4587763547897339, + "step": 3288 + }, + { + "epoch": 0.7583583121973715, + "grad_norm": 1.570757900264253, + "learning_rate": 1.4641517690859924e-06, + "loss": 0.5472866892814636, + "step": 3289 + }, + { + "epoch": 0.7585888863269541, + "grad_norm": 1.4662287757114318, + "learning_rate": 1.4638140310445476e-06, + "loss": 0.5274207592010498, + "step": 3290 + }, + { + "epoch": 0.7588194604565368, + "grad_norm": 1.5317060576828687, + "learning_rate": 1.4634762255830326e-06, + "loss": 0.46280741691589355, + "step": 3291 + }, + { + "epoch": 0.7590500345861194, + "grad_norm": 1.357303550008307, + "learning_rate": 1.4631383527505515e-06, + "loss": 0.5395090579986572, + "step": 3292 + }, + { + "epoch": 0.7592806087157021, + "grad_norm": 1.3556569618907826, + "learning_rate": 1.4628004125962168e-06, + "loss": 0.49923229217529297, + "step": 3293 + }, + { + "epoch": 0.7595111828452847, + "grad_norm": 1.437270857620585, + "learning_rate": 1.462462405169152e-06, + "loss": 0.5414037108421326, + "step": 3294 + }, + { + "epoch": 0.7597417569748675, + "grad_norm": 1.2450139122326453, + "learning_rate": 1.4621243305184895e-06, + "loss": 0.4246688485145569, + "step": 3295 + }, + { + "epoch": 0.7599723311044501, + "grad_norm": 1.2346000309431113, + "learning_rate": 1.461786188693372e-06, + "loss": 0.4997994005680084, + "step": 3296 + }, + { + "epoch": 0.7602029052340328, + "grad_norm": 1.2539682682883548, + "learning_rate": 1.4614479797429523e-06, + "loss": 0.4571123719215393, + "step": 3297 + }, + { + "epoch": 0.7604334793636154, + "grad_norm": 1.3546747118119653, + "learning_rate": 1.4611097037163917e-06, + "loss": 0.5178083181381226, + "step": 3298 + }, + { + "epoch": 0.7606640534931981, + "grad_norm": 1.438807896221459, + "learning_rate": 1.4607713606628625e-06, + "loss": 0.538001298904419, + "step": 3299 + }, + { + "epoch": 0.7608946276227807, + "grad_norm": 1.6495208547410056, + "learning_rate": 1.4604329506315464e-06, + "loss": 0.45941218733787537, + "step": 3300 + }, + { + "epoch": 0.7611252017523634, + "grad_norm": 1.469904127152949, + "learning_rate": 1.4600944736716344e-06, + "loss": 0.619648277759552, + "step": 3301 + }, + { + "epoch": 0.761355775881946, + "grad_norm": 1.3648924598961014, + "learning_rate": 1.4597559298323281e-06, + "loss": 0.4035170376300812, + "step": 3302 + }, + { + "epoch": 0.7615863500115287, + "grad_norm": 1.4623041349874883, + "learning_rate": 1.4594173191628374e-06, + "loss": 0.48657041788101196, + "step": 3303 + }, + { + "epoch": 0.7618169241411114, + "grad_norm": 1.3486514765257445, + "learning_rate": 1.4590786417123838e-06, + "loss": 0.43324801325798035, + "step": 3304 + }, + { + "epoch": 0.7620474982706941, + "grad_norm": 1.3543990457839288, + "learning_rate": 1.4587398975301968e-06, + "loss": 0.5020644664764404, + "step": 3305 + }, + { + "epoch": 0.7622780724002767, + "grad_norm": 1.4758408294809282, + "learning_rate": 1.4584010866655163e-06, + "loss": 0.4123230576515198, + "step": 3306 + }, + { + "epoch": 0.7625086465298594, + "grad_norm": 1.4629462638568174, + "learning_rate": 1.4580622091675925e-06, + "loss": 0.5110459327697754, + "step": 3307 + }, + { + "epoch": 0.762739220659442, + "grad_norm": 1.3128675599733384, + "learning_rate": 1.4577232650856842e-06, + "loss": 0.3956744074821472, + "step": 3308 + }, + { + "epoch": 0.7629697947890247, + "grad_norm": 1.028092913473986, + "learning_rate": 1.4573842544690602e-06, + "loss": 0.44418880343437195, + "step": 3309 + }, + { + "epoch": 0.7632003689186073, + "grad_norm": 1.2935675774179733, + "learning_rate": 1.4570451773669993e-06, + "loss": 0.46690821647644043, + "step": 3310 + }, + { + "epoch": 0.76343094304819, + "grad_norm": 1.7250402170715877, + "learning_rate": 1.45670603382879e-06, + "loss": 0.5631324052810669, + "step": 3311 + }, + { + "epoch": 0.7636615171777726, + "grad_norm": 1.3197309301962783, + "learning_rate": 1.4563668239037301e-06, + "loss": 0.42355209589004517, + "step": 3312 + }, + { + "epoch": 0.7638920913073554, + "grad_norm": 1.1819135136971526, + "learning_rate": 1.4560275476411273e-06, + "loss": 0.4509078860282898, + "step": 3313 + }, + { + "epoch": 0.764122665436938, + "grad_norm": 1.2704317123198696, + "learning_rate": 1.4556882050902986e-06, + "loss": 0.48707491159439087, + "step": 3314 + }, + { + "epoch": 0.7643532395665207, + "grad_norm": 1.2817274130067733, + "learning_rate": 1.455348796300571e-06, + "loss": 0.4768955707550049, + "step": 3315 + }, + { + "epoch": 0.7645838136961033, + "grad_norm": 1.1995539933150834, + "learning_rate": 1.4550093213212812e-06, + "loss": 0.44231370091438293, + "step": 3316 + }, + { + "epoch": 0.764814387825686, + "grad_norm": 1.283098801050818, + "learning_rate": 1.4546697802017752e-06, + "loss": 0.41919445991516113, + "step": 3317 + }, + { + "epoch": 0.7650449619552686, + "grad_norm": 1.3370966440445557, + "learning_rate": 1.4543301729914086e-06, + "loss": 0.5004634857177734, + "step": 3318 + }, + { + "epoch": 0.7652755360848513, + "grad_norm": 1.3058062554730827, + "learning_rate": 1.4539904997395467e-06, + "loss": 0.5327651500701904, + "step": 3319 + }, + { + "epoch": 0.7655061102144339, + "grad_norm": 1.2690140519120048, + "learning_rate": 1.4536507604955647e-06, + "loss": 0.4571789801120758, + "step": 3320 + }, + { + "epoch": 0.7657366843440166, + "grad_norm": 1.4712336124149359, + "learning_rate": 1.4533109553088474e-06, + "loss": 0.3989352583885193, + "step": 3321 + }, + { + "epoch": 0.7659672584735993, + "grad_norm": 1.390525487190819, + "learning_rate": 1.452971084228788e-06, + "loss": 0.4661702513694763, + "step": 3322 + }, + { + "epoch": 0.766197832603182, + "grad_norm": 1.4525582608827485, + "learning_rate": 1.4526311473047911e-06, + "loss": 0.5007051825523376, + "step": 3323 + }, + { + "epoch": 0.7664284067327646, + "grad_norm": 1.4087277102322913, + "learning_rate": 1.4522911445862697e-06, + "loss": 0.44391199946403503, + "step": 3324 + }, + { + "epoch": 0.7666589808623473, + "grad_norm": 1.5508781982933997, + "learning_rate": 1.4519510761226466e-06, + "loss": 0.48606377840042114, + "step": 3325 + }, + { + "epoch": 0.7668895549919299, + "grad_norm": 1.4942248011879364, + "learning_rate": 1.4516109419633543e-06, + "loss": 0.4831564426422119, + "step": 3326 + }, + { + "epoch": 0.7671201291215126, + "grad_norm": 1.2492238673667777, + "learning_rate": 1.4512707421578344e-06, + "loss": 0.5033055543899536, + "step": 3327 + }, + { + "epoch": 0.7673507032510952, + "grad_norm": 1.268639260981401, + "learning_rate": 1.4509304767555385e-06, + "loss": 0.40440869331359863, + "step": 3328 + }, + { + "epoch": 0.7675812773806779, + "grad_norm": 1.154540060885232, + "learning_rate": 1.4505901458059282e-06, + "loss": 0.4281578063964844, + "step": 3329 + }, + { + "epoch": 0.7678118515102605, + "grad_norm": 1.2646658661078, + "learning_rate": 1.4502497493584735e-06, + "loss": 0.45301395654678345, + "step": 3330 + }, + { + "epoch": 0.7680424256398433, + "grad_norm": 1.2708958618179473, + "learning_rate": 1.4499092874626545e-06, + "loss": 0.3971232771873474, + "step": 3331 + }, + { + "epoch": 0.7682729997694259, + "grad_norm": 1.470304815457328, + "learning_rate": 1.4495687601679607e-06, + "loss": 0.45382559299468994, + "step": 3332 + }, + { + "epoch": 0.7685035738990086, + "grad_norm": 1.5230375908041864, + "learning_rate": 1.4492281675238916e-06, + "loss": 0.4101349711418152, + "step": 3333 + }, + { + "epoch": 0.7687341480285912, + "grad_norm": 1.7708001369907398, + "learning_rate": 1.4488875095799555e-06, + "loss": 0.5322436690330505, + "step": 3334 + }, + { + "epoch": 0.7689647221581739, + "grad_norm": 1.4488936734065874, + "learning_rate": 1.4485467863856703e-06, + "loss": 0.5497866272926331, + "step": 3335 + }, + { + "epoch": 0.7691952962877565, + "grad_norm": 1.5286830910755105, + "learning_rate": 1.4482059979905642e-06, + "loss": 0.5088074207305908, + "step": 3336 + }, + { + "epoch": 0.7694258704173391, + "grad_norm": 1.2530470288119384, + "learning_rate": 1.4478651444441736e-06, + "loss": 0.4444946050643921, + "step": 3337 + }, + { + "epoch": 0.7696564445469218, + "grad_norm": 1.1602955966590311, + "learning_rate": 1.4475242257960454e-06, + "loss": 0.41257357597351074, + "step": 3338 + }, + { + "epoch": 0.7698870186765044, + "grad_norm": 1.3512416855290101, + "learning_rate": 1.4471832420957356e-06, + "loss": 0.47933512926101685, + "step": 3339 + }, + { + "epoch": 0.7701175928060872, + "grad_norm": 1.204411185284335, + "learning_rate": 1.4468421933928093e-06, + "loss": 0.41331803798675537, + "step": 3340 + }, + { + "epoch": 0.7703481669356698, + "grad_norm": 1.3617384100749454, + "learning_rate": 1.4465010797368416e-06, + "loss": 0.5047392845153809, + "step": 3341 + }, + { + "epoch": 0.7705787410652525, + "grad_norm": 1.2651645489335748, + "learning_rate": 1.446159901177417e-06, + "loss": 0.5265953540802002, + "step": 3342 + }, + { + "epoch": 0.7708093151948351, + "grad_norm": 1.5538943468041178, + "learning_rate": 1.4458186577641285e-06, + "loss": 0.48366689682006836, + "step": 3343 + }, + { + "epoch": 0.7710398893244178, + "grad_norm": 1.3170443751716914, + "learning_rate": 1.4454773495465805e-06, + "loss": 0.4303058087825775, + "step": 3344 + }, + { + "epoch": 0.7712704634540004, + "grad_norm": 1.2782967712931992, + "learning_rate": 1.4451359765743845e-06, + "loss": 0.44936758279800415, + "step": 3345 + }, + { + "epoch": 0.7715010375835831, + "grad_norm": 1.1273529926323729, + "learning_rate": 1.4447945388971631e-06, + "loss": 0.37891095876693726, + "step": 3346 + }, + { + "epoch": 0.7717316117131657, + "grad_norm": 1.3818395750162065, + "learning_rate": 1.4444530365645477e-06, + "loss": 0.4958759546279907, + "step": 3347 + }, + { + "epoch": 0.7719621858427484, + "grad_norm": 1.2809802910956953, + "learning_rate": 1.4441114696261791e-06, + "loss": 0.5180525183677673, + "step": 3348 + }, + { + "epoch": 0.772192759972331, + "grad_norm": 1.3137706702012002, + "learning_rate": 1.4437698381317076e-06, + "loss": 0.4760133624076843, + "step": 3349 + }, + { + "epoch": 0.7724233341019138, + "grad_norm": 1.6019634089420207, + "learning_rate": 1.4434281421307923e-06, + "loss": 0.5095269680023193, + "step": 3350 + }, + { + "epoch": 0.7726539082314964, + "grad_norm": 1.3897770832286553, + "learning_rate": 1.443086381673103e-06, + "loss": 0.41132962703704834, + "step": 3351 + }, + { + "epoch": 0.7728844823610791, + "grad_norm": 2.1191686086439687, + "learning_rate": 1.442744556808317e-06, + "loss": 0.5617398023605347, + "step": 3352 + }, + { + "epoch": 0.7731150564906617, + "grad_norm": 1.3926070515875653, + "learning_rate": 1.4424026675861229e-06, + "loss": 0.4421590566635132, + "step": 3353 + }, + { + "epoch": 0.7733456306202444, + "grad_norm": 1.3079796762796725, + "learning_rate": 1.4420607140562175e-06, + "loss": 0.5533363223075867, + "step": 3354 + }, + { + "epoch": 0.773576204749827, + "grad_norm": 1.2259362177236217, + "learning_rate": 1.441718696268307e-06, + "loss": 0.3703731298446655, + "step": 3355 + }, + { + "epoch": 0.7738067788794097, + "grad_norm": 1.3132566837825874, + "learning_rate": 1.4413766142721074e-06, + "loss": 0.4078833758831024, + "step": 3356 + }, + { + "epoch": 0.7740373530089923, + "grad_norm": 1.3669338987803128, + "learning_rate": 1.4410344681173436e-06, + "loss": 0.47297823429107666, + "step": 3357 + }, + { + "epoch": 0.7742679271385751, + "grad_norm": 1.44476399239333, + "learning_rate": 1.4406922578537501e-06, + "loss": 0.4586789309978485, + "step": 3358 + }, + { + "epoch": 0.7744985012681577, + "grad_norm": 2.005996053014414, + "learning_rate": 1.440349983531071e-06, + "loss": 0.5284359455108643, + "step": 3359 + }, + { + "epoch": 0.7747290753977404, + "grad_norm": 1.453810263762319, + "learning_rate": 1.4400076451990585e-06, + "loss": 0.47153323888778687, + "step": 3360 + }, + { + "epoch": 0.774959649527323, + "grad_norm": 1.277395230723769, + "learning_rate": 1.4396652429074758e-06, + "loss": 0.3862396478652954, + "step": 3361 + }, + { + "epoch": 0.7751902236569057, + "grad_norm": 1.4585054412515979, + "learning_rate": 1.4393227767060938e-06, + "loss": 0.48918354511260986, + "step": 3362 + }, + { + "epoch": 0.7754207977864883, + "grad_norm": 1.2680408475983538, + "learning_rate": 1.4389802466446942e-06, + "loss": 0.5541480779647827, + "step": 3363 + }, + { + "epoch": 0.775651371916071, + "grad_norm": 1.3507983643401953, + "learning_rate": 1.4386376527730665e-06, + "loss": 0.48972445726394653, + "step": 3364 + }, + { + "epoch": 0.7758819460456536, + "grad_norm": 1.7557497204808084, + "learning_rate": 1.4382949951410109e-06, + "loss": 0.5016083717346191, + "step": 3365 + }, + { + "epoch": 0.7761125201752364, + "grad_norm": 1.3196221720148595, + "learning_rate": 1.4379522737983351e-06, + "loss": 0.40227651596069336, + "step": 3366 + }, + { + "epoch": 0.776343094304819, + "grad_norm": 1.596207218013102, + "learning_rate": 1.4376094887948584e-06, + "loss": 0.42994722723960876, + "step": 3367 + }, + { + "epoch": 0.7765736684344017, + "grad_norm": 1.516975070106083, + "learning_rate": 1.4372666401804073e-06, + "loss": 0.5087350010871887, + "step": 3368 + }, + { + "epoch": 0.7768042425639843, + "grad_norm": 1.2618017709219296, + "learning_rate": 1.4369237280048186e-06, + "loss": 0.39419132471084595, + "step": 3369 + }, + { + "epoch": 0.777034816693567, + "grad_norm": 1.3456260179482487, + "learning_rate": 1.4365807523179376e-06, + "loss": 0.500682532787323, + "step": 3370 + }, + { + "epoch": 0.7772653908231496, + "grad_norm": 1.4316905894274476, + "learning_rate": 1.4362377131696198e-06, + "loss": 0.49243754148483276, + "step": 3371 + }, + { + "epoch": 0.7774959649527323, + "grad_norm": 1.4395314935622772, + "learning_rate": 1.4358946106097295e-06, + "loss": 0.5479283332824707, + "step": 3372 + }, + { + "epoch": 0.7777265390823149, + "grad_norm": 1.08521870178353, + "learning_rate": 1.4355514446881396e-06, + "loss": 0.43217700719833374, + "step": 3373 + }, + { + "epoch": 0.7779571132118976, + "grad_norm": 1.292406809665349, + "learning_rate": 1.435208215454733e-06, + "loss": 0.5351289510726929, + "step": 3374 + }, + { + "epoch": 0.7781876873414802, + "grad_norm": 1.2023765125576906, + "learning_rate": 1.4348649229594016e-06, + "loss": 0.45523375272750854, + "step": 3375 + }, + { + "epoch": 0.778418261471063, + "grad_norm": 1.1345172738470508, + "learning_rate": 1.4345215672520465e-06, + "loss": 0.49811118841171265, + "step": 3376 + }, + { + "epoch": 0.7786488356006456, + "grad_norm": 1.3017016981868919, + "learning_rate": 1.434178148382578e-06, + "loss": 0.40621131658554077, + "step": 3377 + }, + { + "epoch": 0.7788794097302283, + "grad_norm": 1.322929743849566, + "learning_rate": 1.4338346664009152e-06, + "loss": 0.43339842557907104, + "step": 3378 + }, + { + "epoch": 0.7791099838598109, + "grad_norm": 1.4276417953872829, + "learning_rate": 1.433491121356987e-06, + "loss": 0.4397253096103668, + "step": 3379 + }, + { + "epoch": 0.7793405579893936, + "grad_norm": 1.3957946390360352, + "learning_rate": 1.433147513300731e-06, + "loss": 0.5146217942237854, + "step": 3380 + }, + { + "epoch": 0.7795711321189762, + "grad_norm": 1.3181842447854462, + "learning_rate": 1.432803842282094e-06, + "loss": 0.46328768134117126, + "step": 3381 + }, + { + "epoch": 0.7798017062485589, + "grad_norm": 1.4008272791948313, + "learning_rate": 1.432460108351032e-06, + "loss": 0.47743386030197144, + "step": 3382 + }, + { + "epoch": 0.7800322803781415, + "grad_norm": 1.4765555896470939, + "learning_rate": 1.4321163115575105e-06, + "loss": 0.467747300863266, + "step": 3383 + }, + { + "epoch": 0.7802628545077243, + "grad_norm": 1.2334202034705792, + "learning_rate": 1.431772451951504e-06, + "loss": 0.4269976019859314, + "step": 3384 + }, + { + "epoch": 0.7804934286373069, + "grad_norm": 1.4332482963337814, + "learning_rate": 1.4314285295829956e-06, + "loss": 0.5440881252288818, + "step": 3385 + }, + { + "epoch": 0.7807240027668896, + "grad_norm": 1.5634188347498899, + "learning_rate": 1.431084544501978e-06, + "loss": 0.42413994669914246, + "step": 3386 + }, + { + "epoch": 0.7809545768964722, + "grad_norm": 1.250472551312306, + "learning_rate": 1.4307404967584528e-06, + "loss": 0.5563687086105347, + "step": 3387 + }, + { + "epoch": 0.7811851510260549, + "grad_norm": 1.2530390736213655, + "learning_rate": 1.4303963864024314e-06, + "loss": 0.4822027087211609, + "step": 3388 + }, + { + "epoch": 0.7814157251556375, + "grad_norm": 1.265644144731409, + "learning_rate": 1.430052213483933e-06, + "loss": 0.5267205834388733, + "step": 3389 + }, + { + "epoch": 0.7816462992852202, + "grad_norm": 1.464631682134491, + "learning_rate": 1.4297079780529868e-06, + "loss": 0.49257054924964905, + "step": 3390 + }, + { + "epoch": 0.7818768734148028, + "grad_norm": 1.4967498256417051, + "learning_rate": 1.4293636801596314e-06, + "loss": 0.45225608348846436, + "step": 3391 + }, + { + "epoch": 0.7821074475443855, + "grad_norm": 1.3090966398510886, + "learning_rate": 1.4290193198539133e-06, + "loss": 0.4891412854194641, + "step": 3392 + }, + { + "epoch": 0.7823380216739682, + "grad_norm": 1.2913501590758174, + "learning_rate": 1.4286748971858893e-06, + "loss": 0.4411062002182007, + "step": 3393 + }, + { + "epoch": 0.7825685958035509, + "grad_norm": 1.3634871078304074, + "learning_rate": 1.4283304122056242e-06, + "loss": 0.4584164619445801, + "step": 3394 + }, + { + "epoch": 0.7827991699331335, + "grad_norm": 1.2884433704058607, + "learning_rate": 1.4279858649631928e-06, + "loss": 0.46913737058639526, + "step": 3395 + }, + { + "epoch": 0.7830297440627162, + "grad_norm": 1.320207574562506, + "learning_rate": 1.4276412555086786e-06, + "loss": 0.40582767128944397, + "step": 3396 + }, + { + "epoch": 0.7832603181922988, + "grad_norm": 1.4930886994867976, + "learning_rate": 1.4272965838921737e-06, + "loss": 0.5089453458786011, + "step": 3397 + }, + { + "epoch": 0.7834908923218815, + "grad_norm": 1.3151641529095257, + "learning_rate": 1.4269518501637798e-06, + "loss": 0.4744444489479065, + "step": 3398 + }, + { + "epoch": 0.7837214664514641, + "grad_norm": 1.3271165993445435, + "learning_rate": 1.426607054373608e-06, + "loss": 0.49168163537979126, + "step": 3399 + }, + { + "epoch": 0.7839520405810468, + "grad_norm": 1.4774301348156431, + "learning_rate": 1.4262621965717768e-06, + "loss": 0.4423940181732178, + "step": 3400 + }, + { + "epoch": 0.7841826147106294, + "grad_norm": 1.541226385884193, + "learning_rate": 1.4259172768084152e-06, + "loss": 0.5138403177261353, + "step": 3401 + }, + { + "epoch": 0.7844131888402122, + "grad_norm": 1.5691210214340656, + "learning_rate": 1.425572295133661e-06, + "loss": 0.5248140096664429, + "step": 3402 + }, + { + "epoch": 0.7846437629697948, + "grad_norm": 1.4659537352972094, + "learning_rate": 1.4252272515976607e-06, + "loss": 0.39161059260368347, + "step": 3403 + }, + { + "epoch": 0.7848743370993775, + "grad_norm": 1.307338649596764, + "learning_rate": 1.4248821462505699e-06, + "loss": 0.46826744079589844, + "step": 3404 + }, + { + "epoch": 0.7851049112289601, + "grad_norm": 1.3428424961182877, + "learning_rate": 1.424536979142553e-06, + "loss": 0.4329161047935486, + "step": 3405 + }, + { + "epoch": 0.7853354853585428, + "grad_norm": 1.3831028347986385, + "learning_rate": 1.4241917503237834e-06, + "loss": 0.4691393971443176, + "step": 3406 + }, + { + "epoch": 0.7855660594881254, + "grad_norm": 1.819344171969547, + "learning_rate": 1.423846459844444e-06, + "loss": 0.5130072236061096, + "step": 3407 + }, + { + "epoch": 0.7857966336177081, + "grad_norm": 1.4381134289937085, + "learning_rate": 1.4235011077547264e-06, + "loss": 0.37478166818618774, + "step": 3408 + }, + { + "epoch": 0.7860272077472907, + "grad_norm": 1.1654669583674488, + "learning_rate": 1.4231556941048307e-06, + "loss": 0.46112769842147827, + "step": 3409 + }, + { + "epoch": 0.7862577818768735, + "grad_norm": 1.3711520199030207, + "learning_rate": 1.422810218944966e-06, + "loss": 0.5095282793045044, + "step": 3410 + }, + { + "epoch": 0.7864883560064561, + "grad_norm": 1.4830709787042864, + "learning_rate": 1.422464682325351e-06, + "loss": 0.4182342290878296, + "step": 3411 + }, + { + "epoch": 0.7867189301360388, + "grad_norm": 1.4898619625675633, + "learning_rate": 1.422119084296213e-06, + "loss": 0.3892830014228821, + "step": 3412 + }, + { + "epoch": 0.7869495042656214, + "grad_norm": 1.655445800570714, + "learning_rate": 1.4217734249077877e-06, + "loss": 0.5294528603553772, + "step": 3413 + }, + { + "epoch": 0.7871800783952041, + "grad_norm": 1.501568458574139, + "learning_rate": 1.4214277042103208e-06, + "loss": 0.471803218126297, + "step": 3414 + }, + { + "epoch": 0.7874106525247867, + "grad_norm": 1.2078819401351728, + "learning_rate": 1.4210819222540662e-06, + "loss": 0.4363842010498047, + "step": 3415 + }, + { + "epoch": 0.7876412266543694, + "grad_norm": 1.191025232167839, + "learning_rate": 1.4207360790892867e-06, + "loss": 0.3834928870201111, + "step": 3416 + }, + { + "epoch": 0.787871800783952, + "grad_norm": 1.342904245190706, + "learning_rate": 1.4203901747662539e-06, + "loss": 0.4639194905757904, + "step": 3417 + }, + { + "epoch": 0.7881023749135347, + "grad_norm": 1.4526860275619324, + "learning_rate": 1.4200442093352486e-06, + "loss": 0.47130632400512695, + "step": 3418 + }, + { + "epoch": 0.7883329490431173, + "grad_norm": 1.2585342771790389, + "learning_rate": 1.4196981828465606e-06, + "loss": 0.4848192632198334, + "step": 3419 + }, + { + "epoch": 0.7885635231727001, + "grad_norm": 1.2424140051596944, + "learning_rate": 1.4193520953504884e-06, + "loss": 0.5137286186218262, + "step": 3420 + }, + { + "epoch": 0.7887940973022827, + "grad_norm": 1.4833943072924853, + "learning_rate": 1.4190059468973385e-06, + "loss": 0.47639960050582886, + "step": 3421 + }, + { + "epoch": 0.7890246714318654, + "grad_norm": 1.3974399628621321, + "learning_rate": 1.418659737537428e-06, + "loss": 0.4300975799560547, + "step": 3422 + }, + { + "epoch": 0.789255245561448, + "grad_norm": 1.6248920549834995, + "learning_rate": 1.4183134673210817e-06, + "loss": 0.5669160485267639, + "step": 3423 + }, + { + "epoch": 0.7894858196910307, + "grad_norm": 1.3431432318053507, + "learning_rate": 1.4179671362986336e-06, + "loss": 0.4113837480545044, + "step": 3424 + }, + { + "epoch": 0.7897163938206133, + "grad_norm": 1.3611327690280945, + "learning_rate": 1.417620744520426e-06, + "loss": 0.4992315173149109, + "step": 3425 + }, + { + "epoch": 0.789946967950196, + "grad_norm": 1.6418572453635272, + "learning_rate": 1.417274292036811e-06, + "loss": 0.5556696653366089, + "step": 3426 + }, + { + "epoch": 0.7901775420797786, + "grad_norm": 1.367999541896107, + "learning_rate": 1.4169277788981485e-06, + "loss": 0.47911009192466736, + "step": 3427 + }, + { + "epoch": 0.7904081162093614, + "grad_norm": 1.2100320134669527, + "learning_rate": 1.416581205154808e-06, + "loss": 0.45395466685295105, + "step": 3428 + }, + { + "epoch": 0.790638690338944, + "grad_norm": 1.5386887400015699, + "learning_rate": 1.4162345708571674e-06, + "loss": 0.4404561519622803, + "step": 3429 + }, + { + "epoch": 0.7908692644685267, + "grad_norm": 1.3845404606780534, + "learning_rate": 1.4158878760556136e-06, + "loss": 0.5541578531265259, + "step": 3430 + }, + { + "epoch": 0.7910998385981093, + "grad_norm": 1.4234082473199938, + "learning_rate": 1.4155411208005422e-06, + "loss": 0.5517834424972534, + "step": 3431 + }, + { + "epoch": 0.791330412727692, + "grad_norm": 1.2851916229874634, + "learning_rate": 1.4151943051423574e-06, + "loss": 0.42650169134140015, + "step": 3432 + }, + { + "epoch": 0.7915609868572746, + "grad_norm": 1.7886227172970943, + "learning_rate": 1.414847429131472e-06, + "loss": 0.42724043130874634, + "step": 3433 + }, + { + "epoch": 0.7917915609868573, + "grad_norm": 1.3978336018588784, + "learning_rate": 1.414500492818309e-06, + "loss": 0.41757941246032715, + "step": 3434 + }, + { + "epoch": 0.7920221351164399, + "grad_norm": 1.4250040620354028, + "learning_rate": 1.4141534962532984e-06, + "loss": 0.47318267822265625, + "step": 3435 + }, + { + "epoch": 0.7922527092460226, + "grad_norm": 1.5092267765141392, + "learning_rate": 1.41380643948688e-06, + "loss": 0.5540967583656311, + "step": 3436 + }, + { + "epoch": 0.7924832833756053, + "grad_norm": 1.2943595959957308, + "learning_rate": 1.4134593225695013e-06, + "loss": 0.4459697902202606, + "step": 3437 + }, + { + "epoch": 0.792713857505188, + "grad_norm": 1.2950911274447663, + "learning_rate": 1.41311214555162e-06, + "loss": 0.5263698101043701, + "step": 3438 + }, + { + "epoch": 0.7929444316347706, + "grad_norm": 1.321260987570187, + "learning_rate": 1.4127649084837016e-06, + "loss": 0.40453940629959106, + "step": 3439 + }, + { + "epoch": 0.7931750057643533, + "grad_norm": 1.4138023773004598, + "learning_rate": 1.412417611416221e-06, + "loss": 0.3859207034111023, + "step": 3440 + }, + { + "epoch": 0.7934055798939359, + "grad_norm": 1.3373104076984894, + "learning_rate": 1.4120702543996603e-06, + "loss": 0.4604511260986328, + "step": 3441 + }, + { + "epoch": 0.7936361540235186, + "grad_norm": 1.2912472996688542, + "learning_rate": 1.411722837484512e-06, + "loss": 0.40292084217071533, + "step": 3442 + }, + { + "epoch": 0.7938667281531012, + "grad_norm": 1.3099743009304052, + "learning_rate": 1.4113753607212766e-06, + "loss": 0.40447625517845154, + "step": 3443 + }, + { + "epoch": 0.7940973022826839, + "grad_norm": 1.1711578682822494, + "learning_rate": 1.4110278241604635e-06, + "loss": 0.48472997546195984, + "step": 3444 + }, + { + "epoch": 0.7943278764122665, + "grad_norm": 1.304688924593958, + "learning_rate": 1.4106802278525902e-06, + "loss": 0.5404670238494873, + "step": 3445 + }, + { + "epoch": 0.7945584505418493, + "grad_norm": 1.2201185877258616, + "learning_rate": 1.4103325718481838e-06, + "loss": 0.5885064005851746, + "step": 3446 + }, + { + "epoch": 0.7947890246714319, + "grad_norm": 1.2045708529585497, + "learning_rate": 1.4099848561977794e-06, + "loss": 0.47806939482688904, + "step": 3447 + }, + { + "epoch": 0.7950195988010145, + "grad_norm": 1.2183758256079422, + "learning_rate": 1.4096370809519213e-06, + "loss": 0.4247834086418152, + "step": 3448 + }, + { + "epoch": 0.7952501729305972, + "grad_norm": 1.4701805176850054, + "learning_rate": 1.409289246161162e-06, + "loss": 0.508902370929718, + "step": 3449 + }, + { + "epoch": 0.7954807470601798, + "grad_norm": 1.3709386014599791, + "learning_rate": 1.4089413518760626e-06, + "loss": 0.4866124987602234, + "step": 3450 + }, + { + "epoch": 0.7957113211897625, + "grad_norm": 1.4351510328158692, + "learning_rate": 1.408593398147193e-06, + "loss": 0.5168731212615967, + "step": 3451 + }, + { + "epoch": 0.7959418953193451, + "grad_norm": 1.257672253058261, + "learning_rate": 1.4082453850251326e-06, + "loss": 0.5039271712303162, + "step": 3452 + }, + { + "epoch": 0.7961724694489278, + "grad_norm": 1.3767040030777011, + "learning_rate": 1.4078973125604674e-06, + "loss": 0.3660929799079895, + "step": 3453 + }, + { + "epoch": 0.7964030435785104, + "grad_norm": 1.5330992916300397, + "learning_rate": 1.407549180803794e-06, + "loss": 0.514503538608551, + "step": 3454 + }, + { + "epoch": 0.7966336177080932, + "grad_norm": 1.5704286671243526, + "learning_rate": 1.4072009898057172e-06, + "loss": 0.4803028702735901, + "step": 3455 + }, + { + "epoch": 0.7968641918376758, + "grad_norm": 1.2332119133725918, + "learning_rate": 1.4068527396168492e-06, + "loss": 0.43116262555122375, + "step": 3456 + }, + { + "epoch": 0.7970947659672585, + "grad_norm": 1.522287028583898, + "learning_rate": 1.4065044302878125e-06, + "loss": 0.5009680986404419, + "step": 3457 + }, + { + "epoch": 0.7973253400968411, + "grad_norm": 1.1307500814268987, + "learning_rate": 1.406156061869237e-06, + "loss": 0.4047713875770569, + "step": 3458 + }, + { + "epoch": 0.7975559142264238, + "grad_norm": 1.348066090689188, + "learning_rate": 1.4058076344117615e-06, + "loss": 0.5287230014801025, + "step": 3459 + }, + { + "epoch": 0.7977864883560064, + "grad_norm": 1.7810979263679612, + "learning_rate": 1.4054591479660335e-06, + "loss": 0.5602750778198242, + "step": 3460 + }, + { + "epoch": 0.7980170624855891, + "grad_norm": 1.0587308388288128, + "learning_rate": 1.4051106025827096e-06, + "loss": 0.4178144335746765, + "step": 3461 + }, + { + "epoch": 0.7982476366151717, + "grad_norm": 1.408691487644406, + "learning_rate": 1.4047619983124536e-06, + "loss": 0.5061960220336914, + "step": 3462 + }, + { + "epoch": 0.7984782107447544, + "grad_norm": 1.5043212480263244, + "learning_rate": 1.4044133352059392e-06, + "loss": 0.5091691017150879, + "step": 3463 + }, + { + "epoch": 0.798708784874337, + "grad_norm": 1.3793897642043385, + "learning_rate": 1.4040646133138478e-06, + "loss": 0.5100894570350647, + "step": 3464 + }, + { + "epoch": 0.7989393590039198, + "grad_norm": 1.2188849241203001, + "learning_rate": 1.4037158326868697e-06, + "loss": 0.47493505477905273, + "step": 3465 + }, + { + "epoch": 0.7991699331335024, + "grad_norm": 1.637846674977116, + "learning_rate": 1.4033669933757038e-06, + "loss": 0.5561350584030151, + "step": 3466 + }, + { + "epoch": 0.7994005072630851, + "grad_norm": 1.4971197328143675, + "learning_rate": 1.4030180954310574e-06, + "loss": 0.44552814960479736, + "step": 3467 + }, + { + "epoch": 0.7996310813926677, + "grad_norm": 1.219192969590734, + "learning_rate": 1.4026691389036465e-06, + "loss": 0.4624238908290863, + "step": 3468 + }, + { + "epoch": 0.7998616555222504, + "grad_norm": 1.348458578104898, + "learning_rate": 1.4023201238441951e-06, + "loss": 0.5424448251724243, + "step": 3469 + }, + { + "epoch": 0.800092229651833, + "grad_norm": 1.2410568882309463, + "learning_rate": 1.4019710503034367e-06, + "loss": 0.4629395008087158, + "step": 3470 + }, + { + "epoch": 0.8003228037814157, + "grad_norm": 1.3564725845833965, + "learning_rate": 1.401621918332112e-06, + "loss": 0.4375717043876648, + "step": 3471 + }, + { + "epoch": 0.8005533779109983, + "grad_norm": 1.5212509367699154, + "learning_rate": 1.401272727980971e-06, + "loss": 0.4419640302658081, + "step": 3472 + }, + { + "epoch": 0.8007839520405811, + "grad_norm": 1.3621301015547722, + "learning_rate": 1.4009234793007724e-06, + "loss": 0.42077577114105225, + "step": 3473 + }, + { + "epoch": 0.8010145261701637, + "grad_norm": 1.394506766094276, + "learning_rate": 1.400574172342283e-06, + "loss": 0.3735182583332062, + "step": 3474 + }, + { + "epoch": 0.8012451002997464, + "grad_norm": 1.3325918102604086, + "learning_rate": 1.4002248071562778e-06, + "loss": 0.4263458251953125, + "step": 3475 + }, + { + "epoch": 0.801475674429329, + "grad_norm": 1.3278985843191269, + "learning_rate": 1.3998753837935406e-06, + "loss": 0.42377904057502747, + "step": 3476 + }, + { + "epoch": 0.8017062485589117, + "grad_norm": 1.4415172635554745, + "learning_rate": 1.399525902304864e-06, + "loss": 0.5017589330673218, + "step": 3477 + }, + { + "epoch": 0.8019368226884943, + "grad_norm": 1.2695777372701094, + "learning_rate": 1.3991763627410485e-06, + "loss": 0.41022592782974243, + "step": 3478 + }, + { + "epoch": 0.802167396818077, + "grad_norm": 1.6097549722001219, + "learning_rate": 1.3988267651529028e-06, + "loss": 0.49957793951034546, + "step": 3479 + }, + { + "epoch": 0.8023979709476596, + "grad_norm": 1.4695518489034636, + "learning_rate": 1.398477109591245e-06, + "loss": 0.5065722465515137, + "step": 3480 + }, + { + "epoch": 0.8026285450772424, + "grad_norm": 1.264735145451503, + "learning_rate": 1.398127396106901e-06, + "loss": 0.4353798031806946, + "step": 3481 + }, + { + "epoch": 0.802859119206825, + "grad_norm": 1.5800938751579423, + "learning_rate": 1.3977776247507049e-06, + "loss": 0.41438236832618713, + "step": 3482 + }, + { + "epoch": 0.8030896933364077, + "grad_norm": 1.2712154799989346, + "learning_rate": 1.3974277955734996e-06, + "loss": 0.4348248839378357, + "step": 3483 + }, + { + "epoch": 0.8033202674659903, + "grad_norm": 1.3020033760882643, + "learning_rate": 1.3970779086261363e-06, + "loss": 0.49369150400161743, + "step": 3484 + }, + { + "epoch": 0.803550841595573, + "grad_norm": 1.445427514378273, + "learning_rate": 1.396727963959475e-06, + "loss": 0.5694580078125, + "step": 3485 + }, + { + "epoch": 0.8037814157251556, + "grad_norm": 1.3859575121879733, + "learning_rate": 1.3963779616243834e-06, + "loss": 0.5357070565223694, + "step": 3486 + }, + { + "epoch": 0.8040119898547383, + "grad_norm": 1.3071217267808923, + "learning_rate": 1.3960279016717377e-06, + "loss": 0.41300907731056213, + "step": 3487 + }, + { + "epoch": 0.8042425639843209, + "grad_norm": 1.4713226080636248, + "learning_rate": 1.395677784152423e-06, + "loss": 0.5058030486106873, + "step": 3488 + }, + { + "epoch": 0.8044731381139036, + "grad_norm": 1.394990226330868, + "learning_rate": 1.3953276091173326e-06, + "loss": 0.5225522518157959, + "step": 3489 + }, + { + "epoch": 0.8047037122434862, + "grad_norm": 1.3669211701935395, + "learning_rate": 1.3949773766173675e-06, + "loss": 0.43893736600875854, + "step": 3490 + }, + { + "epoch": 0.804934286373069, + "grad_norm": 1.575168458794386, + "learning_rate": 1.3946270867034375e-06, + "loss": 0.4583659768104553, + "step": 3491 + }, + { + "epoch": 0.8051648605026516, + "grad_norm": 1.2728568882138123, + "learning_rate": 1.394276739426461e-06, + "loss": 0.49550747871398926, + "step": 3492 + }, + { + "epoch": 0.8053954346322343, + "grad_norm": 1.9438900883437185, + "learning_rate": 1.3939263348373648e-06, + "loss": 0.5637674331665039, + "step": 3493 + }, + { + "epoch": 0.8056260087618169, + "grad_norm": 1.3206034443977903, + "learning_rate": 1.3935758729870835e-06, + "loss": 0.4853670299053192, + "step": 3494 + }, + { + "epoch": 0.8058565828913996, + "grad_norm": 1.479029501570459, + "learning_rate": 1.3932253539265603e-06, + "loss": 0.4535500407218933, + "step": 3495 + }, + { + "epoch": 0.8060871570209822, + "grad_norm": 1.4461411101486477, + "learning_rate": 1.3928747777067464e-06, + "loss": 0.4198870062828064, + "step": 3496 + }, + { + "epoch": 0.8063177311505649, + "grad_norm": 1.3336585529006162, + "learning_rate": 1.392524144378602e-06, + "loss": 0.45773670077323914, + "step": 3497 + }, + { + "epoch": 0.8065483052801475, + "grad_norm": 1.718264798623436, + "learning_rate": 1.3921734539930952e-06, + "loss": 0.45263248682022095, + "step": 3498 + }, + { + "epoch": 0.8067788794097303, + "grad_norm": 1.300886470112164, + "learning_rate": 1.3918227066012025e-06, + "loss": 0.473066508769989, + "step": 3499 + }, + { + "epoch": 0.8070094535393129, + "grad_norm": 1.1261914460441818, + "learning_rate": 1.3914719022539082e-06, + "loss": 0.35737159848213196, + "step": 3500 + }, + { + "epoch": 0.8072400276688956, + "grad_norm": 1.4095537979750905, + "learning_rate": 1.3911210410022054e-06, + "loss": 0.5162703394889832, + "step": 3501 + }, + { + "epoch": 0.8074706017984782, + "grad_norm": 1.494617165800155, + "learning_rate": 1.3907701228970955e-06, + "loss": 0.5347551703453064, + "step": 3502 + }, + { + "epoch": 0.8077011759280609, + "grad_norm": 1.7642790890319513, + "learning_rate": 1.390419147989588e-06, + "loss": 0.4889448881149292, + "step": 3503 + }, + { + "epoch": 0.8079317500576435, + "grad_norm": 1.380092267420659, + "learning_rate": 1.3900681163306999e-06, + "loss": 0.47468650341033936, + "step": 3504 + }, + { + "epoch": 0.8081623241872262, + "grad_norm": 1.4749480234582377, + "learning_rate": 1.3897170279714585e-06, + "loss": 0.43236857652664185, + "step": 3505 + }, + { + "epoch": 0.8083928983168088, + "grad_norm": 1.4419786763918543, + "learning_rate": 1.3893658829628974e-06, + "loss": 0.46778976917266846, + "step": 3506 + }, + { + "epoch": 0.8086234724463915, + "grad_norm": 1.353368455676612, + "learning_rate": 1.389014681356059e-06, + "loss": 0.49447667598724365, + "step": 3507 + }, + { + "epoch": 0.8088540465759742, + "grad_norm": 1.3574196281726325, + "learning_rate": 1.388663423201994e-06, + "loss": 0.5221220254898071, + "step": 3508 + }, + { + "epoch": 0.8090846207055569, + "grad_norm": 1.8319434066548141, + "learning_rate": 1.3883121085517615e-06, + "loss": 0.5037325620651245, + "step": 3509 + }, + { + "epoch": 0.8093151948351395, + "grad_norm": 1.1547190760847952, + "learning_rate": 1.387960737456429e-06, + "loss": 0.46879589557647705, + "step": 3510 + }, + { + "epoch": 0.8095457689647222, + "grad_norm": 1.3552976314399992, + "learning_rate": 1.387609309967071e-06, + "loss": 0.44216716289520264, + "step": 3511 + }, + { + "epoch": 0.8097763430943048, + "grad_norm": 1.2016377736710804, + "learning_rate": 1.3872578261347716e-06, + "loss": 0.4525749981403351, + "step": 3512 + }, + { + "epoch": 0.8100069172238875, + "grad_norm": 1.3138421579944453, + "learning_rate": 1.3869062860106224e-06, + "loss": 0.44681644439697266, + "step": 3513 + }, + { + "epoch": 0.8102374913534701, + "grad_norm": 1.5030736189155554, + "learning_rate": 1.3865546896457233e-06, + "loss": 0.4162617325782776, + "step": 3514 + }, + { + "epoch": 0.8104680654830528, + "grad_norm": 1.4360914568156404, + "learning_rate": 1.3862030370911827e-06, + "loss": 0.5262776613235474, + "step": 3515 + }, + { + "epoch": 0.8106986396126354, + "grad_norm": 1.3010389916824352, + "learning_rate": 1.3858513283981163e-06, + "loss": 0.48102372884750366, + "step": 3516 + }, + { + "epoch": 0.8109292137422182, + "grad_norm": 1.41037363508679, + "learning_rate": 1.385499563617649e-06, + "loss": 0.46166497468948364, + "step": 3517 + }, + { + "epoch": 0.8111597878718008, + "grad_norm": 1.4145741054815544, + "learning_rate": 1.3851477428009133e-06, + "loss": 0.43523284792900085, + "step": 3518 + }, + { + "epoch": 0.8113903620013835, + "grad_norm": 1.3662294611202825, + "learning_rate": 1.3847958659990497e-06, + "loss": 0.5413048267364502, + "step": 3519 + }, + { + "epoch": 0.8116209361309661, + "grad_norm": 1.1462124150969017, + "learning_rate": 1.3844439332632073e-06, + "loss": 0.4257383346557617, + "step": 3520 + }, + { + "epoch": 0.8118515102605488, + "grad_norm": 1.5928313905350753, + "learning_rate": 1.3840919446445427e-06, + "loss": 0.4812018871307373, + "step": 3521 + }, + { + "epoch": 0.8120820843901314, + "grad_norm": 1.5231442697754751, + "learning_rate": 1.3837399001942216e-06, + "loss": 0.4890254735946655, + "step": 3522 + }, + { + "epoch": 0.8123126585197141, + "grad_norm": 1.7091323269762855, + "learning_rate": 1.3833877999634166e-06, + "loss": 0.5079991817474365, + "step": 3523 + }, + { + "epoch": 0.8125432326492967, + "grad_norm": 1.6148941470526432, + "learning_rate": 1.3830356440033096e-06, + "loss": 0.44703438878059387, + "step": 3524 + }, + { + "epoch": 0.8127738067788794, + "grad_norm": 1.4685605039032132, + "learning_rate": 1.3826834323650898e-06, + "loss": 0.4218645989894867, + "step": 3525 + }, + { + "epoch": 0.813004380908462, + "grad_norm": 1.585977018929449, + "learning_rate": 1.3823311650999547e-06, + "loss": 0.4544546902179718, + "step": 3526 + }, + { + "epoch": 0.8132349550380448, + "grad_norm": 1.2954656146833265, + "learning_rate": 1.3819788422591099e-06, + "loss": 0.4978422224521637, + "step": 3527 + }, + { + "epoch": 0.8134655291676274, + "grad_norm": 1.3262250095489831, + "learning_rate": 1.3816264638937688e-06, + "loss": 0.42122140526771545, + "step": 3528 + }, + { + "epoch": 0.8136961032972101, + "grad_norm": 1.0995613789441223, + "learning_rate": 1.381274030055154e-06, + "loss": 0.45674729347229004, + "step": 3529 + }, + { + "epoch": 0.8139266774267927, + "grad_norm": 1.5614041042611542, + "learning_rate": 1.3809215407944947e-06, + "loss": 0.5075385570526123, + "step": 3530 + }, + { + "epoch": 0.8141572515563754, + "grad_norm": 1.4231357002591019, + "learning_rate": 1.380568996163029e-06, + "loss": 0.45952552556991577, + "step": 3531 + }, + { + "epoch": 0.814387825685958, + "grad_norm": 1.239122573849665, + "learning_rate": 1.3802163962120025e-06, + "loss": 0.5062624216079712, + "step": 3532 + }, + { + "epoch": 0.8146183998155407, + "grad_norm": 1.4910945652834293, + "learning_rate": 1.3798637409926698e-06, + "loss": 0.49294552206993103, + "step": 3533 + }, + { + "epoch": 0.8148489739451233, + "grad_norm": 1.347255149566569, + "learning_rate": 1.3795110305562926e-06, + "loss": 0.4389861822128296, + "step": 3534 + }, + { + "epoch": 0.8150795480747061, + "grad_norm": 1.5704776908584448, + "learning_rate": 1.3791582649541401e-06, + "loss": 0.47733181715011597, + "step": 3535 + }, + { + "epoch": 0.8153101222042887, + "grad_norm": 1.3661823105841888, + "learning_rate": 1.3788054442374918e-06, + "loss": 0.5007725358009338, + "step": 3536 + }, + { + "epoch": 0.8155406963338714, + "grad_norm": 1.617600694156108, + "learning_rate": 1.378452568457633e-06, + "loss": 0.4857913553714752, + "step": 3537 + }, + { + "epoch": 0.815771270463454, + "grad_norm": 1.4509204702050165, + "learning_rate": 1.3780996376658577e-06, + "loss": 0.5330549478530884, + "step": 3538 + }, + { + "epoch": 0.8160018445930367, + "grad_norm": 1.283827597345967, + "learning_rate": 1.3777466519134684e-06, + "loss": 0.45034217834472656, + "step": 3539 + }, + { + "epoch": 0.8162324187226193, + "grad_norm": 1.313177908039173, + "learning_rate": 1.3773936112517746e-06, + "loss": 0.4442213773727417, + "step": 3540 + }, + { + "epoch": 0.816462992852202, + "grad_norm": 1.479375223581317, + "learning_rate": 1.377040515732095e-06, + "loss": 0.5000369548797607, + "step": 3541 + }, + { + "epoch": 0.8166935669817846, + "grad_norm": 1.3177535399447533, + "learning_rate": 1.3766873654057551e-06, + "loss": 0.5117775797843933, + "step": 3542 + }, + { + "epoch": 0.8169241411113674, + "grad_norm": 1.4163300067502158, + "learning_rate": 1.3763341603240889e-06, + "loss": 0.431648850440979, + "step": 3543 + }, + { + "epoch": 0.81715471524095, + "grad_norm": 1.230235072546183, + "learning_rate": 1.3759809005384387e-06, + "loss": 0.39463019371032715, + "step": 3544 + }, + { + "epoch": 0.8173852893705327, + "grad_norm": 1.4412595458793114, + "learning_rate": 1.375627586100154e-06, + "loss": 0.38739651441574097, + "step": 3545 + }, + { + "epoch": 0.8176158635001153, + "grad_norm": 1.1409525851258608, + "learning_rate": 1.3752742170605927e-06, + "loss": 0.3973360061645508, + "step": 3546 + }, + { + "epoch": 0.817846437629698, + "grad_norm": 1.3276328290635366, + "learning_rate": 1.3749207934711207e-06, + "loss": 0.4791724383831024, + "step": 3547 + }, + { + "epoch": 0.8180770117592806, + "grad_norm": 1.2963607541712077, + "learning_rate": 1.3745673153831114e-06, + "loss": 0.5245905518531799, + "step": 3548 + }, + { + "epoch": 0.8183075858888633, + "grad_norm": 1.4724838776986868, + "learning_rate": 1.3742137828479472e-06, + "loss": 0.5507007241249084, + "step": 3549 + }, + { + "epoch": 0.8185381600184459, + "grad_norm": 1.6416778504866436, + "learning_rate": 1.373860195917017e-06, + "loss": 0.4555748701095581, + "step": 3550 + }, + { + "epoch": 0.8187687341480286, + "grad_norm": 1.2633428656921684, + "learning_rate": 1.3735065546417182e-06, + "loss": 0.39309239387512207, + "step": 3551 + }, + { + "epoch": 0.8189993082776112, + "grad_norm": 1.205265119124541, + "learning_rate": 1.3731528590734564e-06, + "loss": 0.4984157681465149, + "step": 3552 + }, + { + "epoch": 0.819229882407194, + "grad_norm": 1.4373490041823445, + "learning_rate": 1.3727991092636448e-06, + "loss": 0.45853057503700256, + "step": 3553 + }, + { + "epoch": 0.8194604565367766, + "grad_norm": 1.427750473352885, + "learning_rate": 1.3724453052637043e-06, + "loss": 0.47412237524986267, + "step": 3554 + }, + { + "epoch": 0.8196910306663593, + "grad_norm": 1.5140095273509309, + "learning_rate": 1.3720914471250642e-06, + "loss": 0.46433544158935547, + "step": 3555 + }, + { + "epoch": 0.8199216047959419, + "grad_norm": 1.3530305082066354, + "learning_rate": 1.3717375348991612e-06, + "loss": 0.5773437023162842, + "step": 3556 + }, + { + "epoch": 0.8201521789255246, + "grad_norm": 1.519657617219548, + "learning_rate": 1.37138356863744e-06, + "loss": 0.5943500995635986, + "step": 3557 + }, + { + "epoch": 0.8203827530551072, + "grad_norm": 1.1903323655602067, + "learning_rate": 1.3710295483913533e-06, + "loss": 0.4970731735229492, + "step": 3558 + }, + { + "epoch": 0.8206133271846898, + "grad_norm": 1.3936455952745408, + "learning_rate": 1.3706754742123611e-06, + "loss": 0.44726189970970154, + "step": 3559 + }, + { + "epoch": 0.8208439013142725, + "grad_norm": 1.257368755928624, + "learning_rate": 1.3703213461519325e-06, + "loss": 0.3980759382247925, + "step": 3560 + }, + { + "epoch": 0.8210744754438551, + "grad_norm": 1.510740752003684, + "learning_rate": 1.3699671642615434e-06, + "loss": 0.5521829724311829, + "step": 3561 + }, + { + "epoch": 0.8213050495734379, + "grad_norm": 1.4257916187791417, + "learning_rate": 1.3696129285926769e-06, + "loss": 0.42630624771118164, + "step": 3562 + }, + { + "epoch": 0.8215356237030205, + "grad_norm": 1.3813571407602123, + "learning_rate": 1.3692586391968254e-06, + "loss": 0.5060243606567383, + "step": 3563 + }, + { + "epoch": 0.8217661978326032, + "grad_norm": 1.553405319049413, + "learning_rate": 1.3689042961254884e-06, + "loss": 0.5803407430648804, + "step": 3564 + }, + { + "epoch": 0.8219967719621858, + "grad_norm": 1.1610478816524794, + "learning_rate": 1.3685498994301735e-06, + "loss": 0.4510403871536255, + "step": 3565 + }, + { + "epoch": 0.8222273460917685, + "grad_norm": 1.668001711945016, + "learning_rate": 1.3681954491623953e-06, + "loss": 0.5350467562675476, + "step": 3566 + }, + { + "epoch": 0.8224579202213511, + "grad_norm": 1.4589682016059282, + "learning_rate": 1.367840945373677e-06, + "loss": 0.5194679498672485, + "step": 3567 + }, + { + "epoch": 0.8226884943509338, + "grad_norm": 1.5164701950999842, + "learning_rate": 1.3674863881155495e-06, + "loss": 0.43574345111846924, + "step": 3568 + }, + { + "epoch": 0.8229190684805164, + "grad_norm": 1.2235692010100727, + "learning_rate": 1.367131777439551e-06, + "loss": 0.43051451444625854, + "step": 3569 + }, + { + "epoch": 0.8231496426100992, + "grad_norm": 1.4294583851960962, + "learning_rate": 1.3667771133972278e-06, + "loss": 0.44449925422668457, + "step": 3570 + }, + { + "epoch": 0.8233802167396818, + "grad_norm": 1.4281775124274958, + "learning_rate": 1.3664223960401342e-06, + "loss": 0.4466608464717865, + "step": 3571 + }, + { + "epoch": 0.8236107908692645, + "grad_norm": 1.506734312309144, + "learning_rate": 1.3660676254198318e-06, + "loss": 0.6172389984130859, + "step": 3572 + }, + { + "epoch": 0.8238413649988471, + "grad_norm": 1.3071294444794341, + "learning_rate": 1.36571280158789e-06, + "loss": 0.3789742588996887, + "step": 3573 + }, + { + "epoch": 0.8240719391284298, + "grad_norm": 1.2713531694738989, + "learning_rate": 1.365357924595886e-06, + "loss": 0.3871726095676422, + "step": 3574 + }, + { + "epoch": 0.8243025132580124, + "grad_norm": 1.3659394637334186, + "learning_rate": 1.3650029944954047e-06, + "loss": 0.5464534759521484, + "step": 3575 + }, + { + "epoch": 0.8245330873875951, + "grad_norm": 1.4254183485118588, + "learning_rate": 1.3646480113380392e-06, + "loss": 0.4924513101577759, + "step": 3576 + }, + { + "epoch": 0.8247636615171777, + "grad_norm": 1.3350624286567714, + "learning_rate": 1.3642929751753896e-06, + "loss": 0.39648669958114624, + "step": 3577 + }, + { + "epoch": 0.8249942356467604, + "grad_norm": 1.155634552535419, + "learning_rate": 1.3639378860590642e-06, + "loss": 0.44139498472213745, + "step": 3578 + }, + { + "epoch": 0.825224809776343, + "grad_norm": 1.4016430263315434, + "learning_rate": 1.3635827440406784e-06, + "loss": 0.4477856159210205, + "step": 3579 + }, + { + "epoch": 0.8254553839059258, + "grad_norm": 1.2543072909410065, + "learning_rate": 1.363227549171856e-06, + "loss": 0.48722583055496216, + "step": 3580 + }, + { + "epoch": 0.8256859580355084, + "grad_norm": 1.5407337854642607, + "learning_rate": 1.3628723015042285e-06, + "loss": 0.44485795497894287, + "step": 3581 + }, + { + "epoch": 0.8259165321650911, + "grad_norm": 1.481687909768813, + "learning_rate": 1.362517001089434e-06, + "loss": 0.510918140411377, + "step": 3582 + }, + { + "epoch": 0.8261471062946737, + "grad_norm": 1.4714123899535927, + "learning_rate": 1.3621616479791196e-06, + "loss": 0.5157535076141357, + "step": 3583 + }, + { + "epoch": 0.8263776804242564, + "grad_norm": 1.601097277197277, + "learning_rate": 1.361806242224939e-06, + "loss": 0.6120826005935669, + "step": 3584 + }, + { + "epoch": 0.826608254553839, + "grad_norm": 1.379062804125132, + "learning_rate": 1.3614507838785545e-06, + "loss": 0.47521674633026123, + "step": 3585 + }, + { + "epoch": 0.8268388286834217, + "grad_norm": 1.2544051986437676, + "learning_rate": 1.3610952729916352e-06, + "loss": 0.431441068649292, + "step": 3586 + }, + { + "epoch": 0.8270694028130043, + "grad_norm": 1.4333858511847595, + "learning_rate": 1.3607397096158587e-06, + "loss": 0.5168293118476868, + "step": 3587 + }, + { + "epoch": 0.8272999769425871, + "grad_norm": 1.4075386997192105, + "learning_rate": 1.3603840938029092e-06, + "loss": 0.47669821977615356, + "step": 3588 + }, + { + "epoch": 0.8275305510721697, + "grad_norm": 1.6345113020695277, + "learning_rate": 1.3600284256044791e-06, + "loss": 0.5170806050300598, + "step": 3589 + }, + { + "epoch": 0.8277611252017524, + "grad_norm": 1.3443972777893194, + "learning_rate": 1.359672705072269e-06, + "loss": 0.5578932762145996, + "step": 3590 + }, + { + "epoch": 0.827991699331335, + "grad_norm": 1.2931790064355784, + "learning_rate": 1.3593169322579855e-06, + "loss": 0.45000678300857544, + "step": 3591 + }, + { + "epoch": 0.8282222734609177, + "grad_norm": 1.7408157234389992, + "learning_rate": 1.3589611072133448e-06, + "loss": 0.47859635949134827, + "step": 3592 + }, + { + "epoch": 0.8284528475905003, + "grad_norm": 1.629320946493551, + "learning_rate": 1.3586052299900693e-06, + "loss": 0.5373919606208801, + "step": 3593 + }, + { + "epoch": 0.828683421720083, + "grad_norm": 1.4093194136520946, + "learning_rate": 1.3582493006398888e-06, + "loss": 0.5461571216583252, + "step": 3594 + }, + { + "epoch": 0.8289139958496656, + "grad_norm": 1.4221547222488737, + "learning_rate": 1.357893319214542e-06, + "loss": 0.522891640663147, + "step": 3595 + }, + { + "epoch": 0.8291445699792483, + "grad_norm": 1.3931497044748549, + "learning_rate": 1.3575372857657739e-06, + "loss": 0.503441572189331, + "step": 3596 + }, + { + "epoch": 0.829375144108831, + "grad_norm": 1.4755218467347275, + "learning_rate": 1.357181200345338e-06, + "loss": 0.45475268363952637, + "step": 3597 + }, + { + "epoch": 0.8296057182384137, + "grad_norm": 1.3529340787561033, + "learning_rate": 1.3568250630049944e-06, + "loss": 0.4626728296279907, + "step": 3598 + }, + { + "epoch": 0.8298362923679963, + "grad_norm": 1.5106243497530205, + "learning_rate": 1.3564688737965118e-06, + "loss": 0.590618371963501, + "step": 3599 + }, + { + "epoch": 0.830066866497579, + "grad_norm": 1.1729232075760356, + "learning_rate": 1.3561126327716658e-06, + "loss": 0.4252029061317444, + "step": 3600 + }, + { + "epoch": 0.8302974406271616, + "grad_norm": 1.5093126003070163, + "learning_rate": 1.3557563399822396e-06, + "loss": 0.5741503238677979, + "step": 3601 + }, + { + "epoch": 0.8305280147567443, + "grad_norm": 1.346541706093541, + "learning_rate": 1.3553999954800236e-06, + "loss": 0.4591038227081299, + "step": 3602 + }, + { + "epoch": 0.8307585888863269, + "grad_norm": 1.5342817778823432, + "learning_rate": 1.3550435993168164e-06, + "loss": 0.5761657953262329, + "step": 3603 + }, + { + "epoch": 0.8309891630159096, + "grad_norm": 1.4873747737215213, + "learning_rate": 1.3546871515444239e-06, + "loss": 0.4835323691368103, + "step": 3604 + }, + { + "epoch": 0.8312197371454922, + "grad_norm": 1.3474153162620106, + "learning_rate": 1.3543306522146594e-06, + "loss": 0.6152533292770386, + "step": 3605 + }, + { + "epoch": 0.831450311275075, + "grad_norm": 1.7615931586989606, + "learning_rate": 1.3539741013793431e-06, + "loss": 0.48106616735458374, + "step": 3606 + }, + { + "epoch": 0.8316808854046576, + "grad_norm": 1.3977429311647935, + "learning_rate": 1.3536174990903042e-06, + "loss": 0.48128771781921387, + "step": 3607 + }, + { + "epoch": 0.8319114595342403, + "grad_norm": 1.5624866131401935, + "learning_rate": 1.353260845399378e-06, + "loss": 0.4395609498023987, + "step": 3608 + }, + { + "epoch": 0.8321420336638229, + "grad_norm": 1.6243424583265862, + "learning_rate": 1.3529041403584076e-06, + "loss": 0.5298231840133667, + "step": 3609 + }, + { + "epoch": 0.8323726077934056, + "grad_norm": 1.610376085646533, + "learning_rate": 1.3525473840192436e-06, + "loss": 0.4694434404373169, + "step": 3610 + }, + { + "epoch": 0.8326031819229882, + "grad_norm": 1.3870293085196028, + "learning_rate": 1.3521905764337449e-06, + "loss": 0.4264890253543854, + "step": 3611 + }, + { + "epoch": 0.8328337560525709, + "grad_norm": 1.3900907609641087, + "learning_rate": 1.3518337176537762e-06, + "loss": 0.3266828656196594, + "step": 3612 + }, + { + "epoch": 0.8330643301821535, + "grad_norm": 1.548598004244933, + "learning_rate": 1.351476807731211e-06, + "loss": 0.5554935336112976, + "step": 3613 + }, + { + "epoch": 0.8332949043117363, + "grad_norm": 1.3139574983210685, + "learning_rate": 1.3511198467179295e-06, + "loss": 0.4375999867916107, + "step": 3614 + }, + { + "epoch": 0.8335254784413189, + "grad_norm": 1.3568296792682797, + "learning_rate": 1.35076283466582e-06, + "loss": 0.564457893371582, + "step": 3615 + }, + { + "epoch": 0.8337560525709016, + "grad_norm": 1.5648573569840147, + "learning_rate": 1.3504057716267776e-06, + "loss": 0.5141148567199707, + "step": 3616 + }, + { + "epoch": 0.8339866267004842, + "grad_norm": 1.2607282701974722, + "learning_rate": 1.350048657652705e-06, + "loss": 0.45514535903930664, + "step": 3617 + }, + { + "epoch": 0.8342172008300669, + "grad_norm": 1.298858308641179, + "learning_rate": 1.3496914927955122e-06, + "loss": 0.5224772691726685, + "step": 3618 + }, + { + "epoch": 0.8344477749596495, + "grad_norm": 1.3773935543957632, + "learning_rate": 1.349334277107117e-06, + "loss": 0.45185205340385437, + "step": 3619 + }, + { + "epoch": 0.8346783490892322, + "grad_norm": 1.3400411570126707, + "learning_rate": 1.3489770106394444e-06, + "loss": 0.47232794761657715, + "step": 3620 + }, + { + "epoch": 0.8349089232188148, + "grad_norm": 1.3564585933268873, + "learning_rate": 1.3486196934444264e-06, + "loss": 0.44031190872192383, + "step": 3621 + }, + { + "epoch": 0.8351394973483975, + "grad_norm": 1.2921832515242213, + "learning_rate": 1.3482623255740028e-06, + "loss": 0.4594510793685913, + "step": 3622 + }, + { + "epoch": 0.8353700714779801, + "grad_norm": 1.3491628541071723, + "learning_rate": 1.347904907080121e-06, + "loss": 0.38726723194122314, + "step": 3623 + }, + { + "epoch": 0.8356006456075629, + "grad_norm": 1.4086239991990677, + "learning_rate": 1.3475474380147347e-06, + "loss": 0.544617772102356, + "step": 3624 + }, + { + "epoch": 0.8358312197371455, + "grad_norm": 1.5645995914963535, + "learning_rate": 1.347189918429806e-06, + "loss": 0.503423810005188, + "step": 3625 + }, + { + "epoch": 0.8360617938667282, + "grad_norm": 1.3950432339665733, + "learning_rate": 1.3468323483773038e-06, + "loss": 0.4395143985748291, + "step": 3626 + }, + { + "epoch": 0.8362923679963108, + "grad_norm": 1.6308000434387062, + "learning_rate": 1.346474727909205e-06, + "loss": 0.41464856266975403, + "step": 3627 + }, + { + "epoch": 0.8365229421258935, + "grad_norm": 1.4008674771220466, + "learning_rate": 1.346117057077493e-06, + "loss": 0.4782845079898834, + "step": 3628 + }, + { + "epoch": 0.8367535162554761, + "grad_norm": 1.2484540580184977, + "learning_rate": 1.345759335934159e-06, + "loss": 0.48308104276657104, + "step": 3629 + }, + { + "epoch": 0.8369840903850588, + "grad_norm": 1.3935764281095124, + "learning_rate": 1.345401564531201e-06, + "loss": 0.5759967565536499, + "step": 3630 + }, + { + "epoch": 0.8372146645146414, + "grad_norm": 1.421077506310717, + "learning_rate": 1.3450437429206256e-06, + "loss": 0.5900512337684631, + "step": 3631 + }, + { + "epoch": 0.8374452386442242, + "grad_norm": 1.3643346247687353, + "learning_rate": 1.3446858711544451e-06, + "loss": 0.4776286482810974, + "step": 3632 + }, + { + "epoch": 0.8376758127738068, + "grad_norm": 1.5796891796446009, + "learning_rate": 1.34432794928468e-06, + "loss": 0.5123563408851624, + "step": 3633 + }, + { + "epoch": 0.8379063869033895, + "grad_norm": 1.6272139775850447, + "learning_rate": 1.3439699773633574e-06, + "loss": 0.5505821108818054, + "step": 3634 + }, + { + "epoch": 0.8381369610329721, + "grad_norm": 1.4456391396483874, + "learning_rate": 1.343611955442513e-06, + "loss": 0.5525364875793457, + "step": 3635 + }, + { + "epoch": 0.8383675351625548, + "grad_norm": 1.1644228181066894, + "learning_rate": 1.3432538835741884e-06, + "loss": 0.44074952602386475, + "step": 3636 + }, + { + "epoch": 0.8385981092921374, + "grad_norm": 1.3792820862390651, + "learning_rate": 1.3428957618104331e-06, + "loss": 0.5488649606704712, + "step": 3637 + }, + { + "epoch": 0.8388286834217201, + "grad_norm": 1.159150884236996, + "learning_rate": 1.3425375902033034e-06, + "loss": 0.4427725672721863, + "step": 3638 + }, + { + "epoch": 0.8390592575513027, + "grad_norm": 1.5753495335559473, + "learning_rate": 1.3421793688048636e-06, + "loss": 0.5244250297546387, + "step": 3639 + }, + { + "epoch": 0.8392898316808854, + "grad_norm": 1.2853956216426152, + "learning_rate": 1.3418210976671845e-06, + "loss": 0.4684640169143677, + "step": 3640 + }, + { + "epoch": 0.839520405810468, + "grad_norm": 1.4767228704961965, + "learning_rate": 1.3414627768423449e-06, + "loss": 0.4518035054206848, + "step": 3641 + }, + { + "epoch": 0.8397509799400508, + "grad_norm": 1.5338085000094812, + "learning_rate": 1.34110440638243e-06, + "loss": 0.47504323720932007, + "step": 3642 + }, + { + "epoch": 0.8399815540696334, + "grad_norm": 1.7182899921711987, + "learning_rate": 1.3407459863395326e-06, + "loss": 0.3835057020187378, + "step": 3643 + }, + { + "epoch": 0.8402121281992161, + "grad_norm": 1.4517538314936977, + "learning_rate": 1.3403875167657529e-06, + "loss": 0.4103546738624573, + "step": 3644 + }, + { + "epoch": 0.8404427023287987, + "grad_norm": 1.3338056576205999, + "learning_rate": 1.3400289977131974e-06, + "loss": 0.48064136505126953, + "step": 3645 + }, + { + "epoch": 0.8406732764583814, + "grad_norm": 1.5606949897639386, + "learning_rate": 1.3396704292339813e-06, + "loss": 0.49655234813690186, + "step": 3646 + }, + { + "epoch": 0.840903850587964, + "grad_norm": 1.3180737586627664, + "learning_rate": 1.3393118113802259e-06, + "loss": 0.5559303760528564, + "step": 3647 + }, + { + "epoch": 0.8411344247175467, + "grad_norm": 1.3902505896601203, + "learning_rate": 1.3389531442040599e-06, + "loss": 0.5173505544662476, + "step": 3648 + }, + { + "epoch": 0.8413649988471293, + "grad_norm": 1.4997400095057662, + "learning_rate": 1.338594427757619e-06, + "loss": 0.500524640083313, + "step": 3649 + }, + { + "epoch": 0.8415955729767121, + "grad_norm": 1.3017945585861477, + "learning_rate": 1.3382356620930467e-06, + "loss": 0.5167285203933716, + "step": 3650 + }, + { + "epoch": 0.8418261471062947, + "grad_norm": 1.4661199659605932, + "learning_rate": 1.3378768472624929e-06, + "loss": 0.5006825923919678, + "step": 3651 + }, + { + "epoch": 0.8420567212358774, + "grad_norm": 1.5253217794534257, + "learning_rate": 1.3375179833181153e-06, + "loss": 0.5421864986419678, + "step": 3652 + }, + { + "epoch": 0.84228729536546, + "grad_norm": 1.5304567180850979, + "learning_rate": 1.337159070312078e-06, + "loss": 0.4964475929737091, + "step": 3653 + }, + { + "epoch": 0.8425178694950427, + "grad_norm": 1.2795061721511742, + "learning_rate": 1.3368001082965528e-06, + "loss": 0.4020928144454956, + "step": 3654 + }, + { + "epoch": 0.8427484436246253, + "grad_norm": 1.3457912405228358, + "learning_rate": 1.3364410973237183e-06, + "loss": 0.43009278178215027, + "step": 3655 + }, + { + "epoch": 0.842979017754208, + "grad_norm": 1.3663101783603413, + "learning_rate": 1.3360820374457608e-06, + "loss": 0.5939761400222778, + "step": 3656 + }, + { + "epoch": 0.8432095918837906, + "grad_norm": 1.3723718945789372, + "learning_rate": 1.335722928714873e-06, + "loss": 0.43889346718788147, + "step": 3657 + }, + { + "epoch": 0.8434401660133733, + "grad_norm": 1.510811137049935, + "learning_rate": 1.335363771183255e-06, + "loss": 0.5125945806503296, + "step": 3658 + }, + { + "epoch": 0.843670740142956, + "grad_norm": 1.2988273180041983, + "learning_rate": 1.3350045649031143e-06, + "loss": 0.516818642616272, + "step": 3659 + }, + { + "epoch": 0.8439013142725387, + "grad_norm": 1.2172726171902464, + "learning_rate": 1.3346453099266649e-06, + "loss": 0.5098299980163574, + "step": 3660 + }, + { + "epoch": 0.8441318884021213, + "grad_norm": 1.4809835823543989, + "learning_rate": 1.334286006306128e-06, + "loss": 0.46228134632110596, + "step": 3661 + }, + { + "epoch": 0.844362462531704, + "grad_norm": 1.518730905252404, + "learning_rate": 1.3339266540937324e-06, + "loss": 0.38364481925964355, + "step": 3662 + }, + { + "epoch": 0.8445930366612866, + "grad_norm": 1.2447229933483466, + "learning_rate": 1.3335672533417134e-06, + "loss": 0.4363073706626892, + "step": 3663 + }, + { + "epoch": 0.8448236107908693, + "grad_norm": 1.5445839123019949, + "learning_rate": 1.3332078041023133e-06, + "loss": 0.463603675365448, + "step": 3664 + }, + { + "epoch": 0.8450541849204519, + "grad_norm": 1.118250112497339, + "learning_rate": 1.3328483064277816e-06, + "loss": 0.4173084795475006, + "step": 3665 + }, + { + "epoch": 0.8452847590500346, + "grad_norm": 1.2905398126594152, + "learning_rate": 1.3324887603703756e-06, + "loss": 0.41451913118362427, + "step": 3666 + }, + { + "epoch": 0.8455153331796172, + "grad_norm": 1.3301474043831027, + "learning_rate": 1.3321291659823587e-06, + "loss": 0.49418264627456665, + "step": 3667 + }, + { + "epoch": 0.8457459073092, + "grad_norm": 1.323747824550861, + "learning_rate": 1.3317695233160015e-06, + "loss": 0.48787444829940796, + "step": 3668 + }, + { + "epoch": 0.8459764814387826, + "grad_norm": 1.419516654753041, + "learning_rate": 1.3314098324235814e-06, + "loss": 0.484865665435791, + "step": 3669 + }, + { + "epoch": 0.8462070555683652, + "grad_norm": 1.4996660725713626, + "learning_rate": 1.3310500933573837e-06, + "loss": 0.44162076711654663, + "step": 3670 + }, + { + "epoch": 0.8464376296979479, + "grad_norm": 1.4496595059902684, + "learning_rate": 1.3306903061696999e-06, + "loss": 0.39880990982055664, + "step": 3671 + }, + { + "epoch": 0.8466682038275305, + "grad_norm": 1.596735486600776, + "learning_rate": 1.3303304709128288e-06, + "loss": 0.4405972957611084, + "step": 3672 + }, + { + "epoch": 0.8468987779571132, + "grad_norm": 1.8476371944591239, + "learning_rate": 1.3299705876390755e-06, + "loss": 0.4228917956352234, + "step": 3673 + }, + { + "epoch": 0.8471293520866958, + "grad_norm": 1.3245854918753257, + "learning_rate": 1.3296106564007532e-06, + "loss": 0.44533059000968933, + "step": 3674 + }, + { + "epoch": 0.8473599262162785, + "grad_norm": 1.324480419314636, + "learning_rate": 1.3292506772501816e-06, + "loss": 0.4672505855560303, + "step": 3675 + }, + { + "epoch": 0.8475905003458611, + "grad_norm": 1.5345690520656405, + "learning_rate": 1.3288906502396873e-06, + "loss": 0.5651025772094727, + "step": 3676 + }, + { + "epoch": 0.8478210744754439, + "grad_norm": 1.4113200785742674, + "learning_rate": 1.3285305754216034e-06, + "loss": 0.4877372086048126, + "step": 3677 + }, + { + "epoch": 0.8480516486050265, + "grad_norm": 1.6156626909271148, + "learning_rate": 1.3281704528482713e-06, + "loss": 0.43767499923706055, + "step": 3678 + }, + { + "epoch": 0.8482822227346092, + "grad_norm": 1.6309175000442955, + "learning_rate": 1.3278102825720376e-06, + "loss": 0.5077182650566101, + "step": 3679 + }, + { + "epoch": 0.8485127968641918, + "grad_norm": 1.5150502093819094, + "learning_rate": 1.3274500646452573e-06, + "loss": 0.4814456105232239, + "step": 3680 + }, + { + "epoch": 0.8487433709937745, + "grad_norm": 1.3626740483959299, + "learning_rate": 1.3270897991202913e-06, + "loss": 0.4454193115234375, + "step": 3681 + }, + { + "epoch": 0.8489739451233571, + "grad_norm": 1.1173863119708762, + "learning_rate": 1.3267294860495084e-06, + "loss": 0.3973482549190521, + "step": 3682 + }, + { + "epoch": 0.8492045192529398, + "grad_norm": 1.5337644837004238, + "learning_rate": 1.3263691254852834e-06, + "loss": 0.5115909576416016, + "step": 3683 + }, + { + "epoch": 0.8494350933825224, + "grad_norm": 1.2962888350788886, + "learning_rate": 1.3260087174799982e-06, + "loss": 0.4217768907546997, + "step": 3684 + }, + { + "epoch": 0.8496656675121051, + "grad_norm": 1.5676465439666392, + "learning_rate": 1.3256482620860414e-06, + "loss": 0.4462714195251465, + "step": 3685 + }, + { + "epoch": 0.8498962416416878, + "grad_norm": 1.278085511550712, + "learning_rate": 1.32528775935581e-06, + "loss": 0.4617312550544739, + "step": 3686 + }, + { + "epoch": 0.8501268157712705, + "grad_norm": 1.2760475898780375, + "learning_rate": 1.324927209341706e-06, + "loss": 0.4774616062641144, + "step": 3687 + }, + { + "epoch": 0.8503573899008531, + "grad_norm": 1.389927333157612, + "learning_rate": 1.3245666120961389e-06, + "loss": 0.38730189204216003, + "step": 3688 + }, + { + "epoch": 0.8505879640304358, + "grad_norm": 1.5164687032364252, + "learning_rate": 1.324205967671525e-06, + "loss": 0.45189517736434937, + "step": 3689 + }, + { + "epoch": 0.8508185381600184, + "grad_norm": 1.489462413187487, + "learning_rate": 1.3238452761202887e-06, + "loss": 0.4965584874153137, + "step": 3690 + }, + { + "epoch": 0.8510491122896011, + "grad_norm": 1.2283217886481297, + "learning_rate": 1.3234845374948591e-06, + "loss": 0.4409075975418091, + "step": 3691 + }, + { + "epoch": 0.8512796864191837, + "grad_norm": 1.3545920303070538, + "learning_rate": 1.3231237518476737e-06, + "loss": 0.4457218647003174, + "step": 3692 + }, + { + "epoch": 0.8515102605487664, + "grad_norm": 1.2432481704868787, + "learning_rate": 1.3227629192311762e-06, + "loss": 0.42810603976249695, + "step": 3693 + }, + { + "epoch": 0.851740834678349, + "grad_norm": 1.3504737245283156, + "learning_rate": 1.3224020396978172e-06, + "loss": 0.40753173828125, + "step": 3694 + }, + { + "epoch": 0.8519714088079318, + "grad_norm": 1.5063309076640758, + "learning_rate": 1.3220411133000542e-06, + "loss": 0.5057830810546875, + "step": 3695 + }, + { + "epoch": 0.8522019829375144, + "grad_norm": 1.4625648008354504, + "learning_rate": 1.3216801400903515e-06, + "loss": 0.42498981952667236, + "step": 3696 + }, + { + "epoch": 0.8524325570670971, + "grad_norm": 1.736302707969947, + "learning_rate": 1.3213191201211806e-06, + "loss": 0.44985881447792053, + "step": 3697 + }, + { + "epoch": 0.8526631311966797, + "grad_norm": 1.5257289791960187, + "learning_rate": 1.3209580534450192e-06, + "loss": 0.39984816312789917, + "step": 3698 + }, + { + "epoch": 0.8528937053262624, + "grad_norm": 1.4859934204912078, + "learning_rate": 1.3205969401143516e-06, + "loss": 0.4773896038532257, + "step": 3699 + }, + { + "epoch": 0.853124279455845, + "grad_norm": 1.5299580963987478, + "learning_rate": 1.3202357801816698e-06, + "loss": 0.5699855089187622, + "step": 3700 + }, + { + "epoch": 0.8533548535854277, + "grad_norm": 1.5124437197630332, + "learning_rate": 1.3198745736994714e-06, + "loss": 0.4486675262451172, + "step": 3701 + }, + { + "epoch": 0.8535854277150103, + "grad_norm": 1.3641053506348044, + "learning_rate": 1.3195133207202625e-06, + "loss": 0.47909995913505554, + "step": 3702 + }, + { + "epoch": 0.853816001844593, + "grad_norm": 1.3267279385735278, + "learning_rate": 1.3191520212965542e-06, + "loss": 0.4356222450733185, + "step": 3703 + }, + { + "epoch": 0.8540465759741757, + "grad_norm": 1.5161594053893233, + "learning_rate": 1.3187906754808646e-06, + "loss": 0.4734821319580078, + "step": 3704 + }, + { + "epoch": 0.8542771501037584, + "grad_norm": 1.1414361983546972, + "learning_rate": 1.3184292833257197e-06, + "loss": 0.4164031744003296, + "step": 3705 + }, + { + "epoch": 0.854507724233341, + "grad_norm": 1.5194682024268111, + "learning_rate": 1.3180678448836516e-06, + "loss": 0.505548357963562, + "step": 3706 + }, + { + "epoch": 0.8547382983629237, + "grad_norm": 1.4180879233512311, + "learning_rate": 1.3177063602071985e-06, + "loss": 0.4443202316761017, + "step": 3707 + }, + { + "epoch": 0.8549688724925063, + "grad_norm": 1.4808642334806548, + "learning_rate": 1.317344829348906e-06, + "loss": 0.4594070017337799, + "step": 3708 + }, + { + "epoch": 0.855199446622089, + "grad_norm": 1.595149298191138, + "learning_rate": 1.3169832523613265e-06, + "loss": 0.5346768498420715, + "step": 3709 + }, + { + "epoch": 0.8554300207516716, + "grad_norm": 1.4211934536480004, + "learning_rate": 1.3166216292970185e-06, + "loss": 0.44471168518066406, + "step": 3710 + }, + { + "epoch": 0.8556605948812543, + "grad_norm": 1.3967510109946715, + "learning_rate": 1.3162599602085482e-06, + "loss": 0.4414154589176178, + "step": 3711 + }, + { + "epoch": 0.855891169010837, + "grad_norm": 1.2591243363727789, + "learning_rate": 1.3158982451484873e-06, + "loss": 0.4267842769622803, + "step": 3712 + }, + { + "epoch": 0.8561217431404197, + "grad_norm": 1.5517519524370356, + "learning_rate": 1.315536484169415e-06, + "loss": 0.5282812118530273, + "step": 3713 + }, + { + "epoch": 0.8563523172700023, + "grad_norm": 1.3747848129200213, + "learning_rate": 1.3151746773239167e-06, + "loss": 0.3831692934036255, + "step": 3714 + }, + { + "epoch": 0.856582891399585, + "grad_norm": 1.3399055617764033, + "learning_rate": 1.3148128246645848e-06, + "loss": 0.4714779853820801, + "step": 3715 + }, + { + "epoch": 0.8568134655291676, + "grad_norm": 1.5957966977407376, + "learning_rate": 1.3144509262440185e-06, + "loss": 0.515029788017273, + "step": 3716 + }, + { + "epoch": 0.8570440396587503, + "grad_norm": 1.6565005005078866, + "learning_rate": 1.314088982114823e-06, + "loss": 0.48407065868377686, + "step": 3717 + }, + { + "epoch": 0.8572746137883329, + "grad_norm": 1.2250893853794216, + "learning_rate": 1.3137269923296111e-06, + "loss": 0.4756847620010376, + "step": 3718 + }, + { + "epoch": 0.8575051879179156, + "grad_norm": 1.4417516161095163, + "learning_rate": 1.313364956941001e-06, + "loss": 0.47744277119636536, + "step": 3719 + }, + { + "epoch": 0.8577357620474982, + "grad_norm": 1.4540506451139732, + "learning_rate": 1.3130028760016187e-06, + "loss": 0.4967440366744995, + "step": 3720 + }, + { + "epoch": 0.857966336177081, + "grad_norm": 1.5755023694033539, + "learning_rate": 1.312640749564096e-06, + "loss": 0.44999921321868896, + "step": 3721 + }, + { + "epoch": 0.8581969103066636, + "grad_norm": 1.1829331105101752, + "learning_rate": 1.3122785776810723e-06, + "loss": 0.4454652667045593, + "step": 3722 + }, + { + "epoch": 0.8584274844362463, + "grad_norm": 1.220523426514953, + "learning_rate": 1.3119163604051923e-06, + "loss": 0.37483078241348267, + "step": 3723 + }, + { + "epoch": 0.8586580585658289, + "grad_norm": 1.45963624909142, + "learning_rate": 1.3115540977891076e-06, + "loss": 0.3732140064239502, + "step": 3724 + }, + { + "epoch": 0.8588886326954116, + "grad_norm": 1.5667872254799649, + "learning_rate": 1.3111917898854779e-06, + "loss": 0.5709421634674072, + "step": 3725 + }, + { + "epoch": 0.8591192068249942, + "grad_norm": 2.0482790256244514, + "learning_rate": 1.3108294367469677e-06, + "loss": 0.5301297307014465, + "step": 3726 + }, + { + "epoch": 0.8593497809545769, + "grad_norm": 1.2253994153188903, + "learning_rate": 1.3104670384262484e-06, + "loss": 0.45979735255241394, + "step": 3727 + }, + { + "epoch": 0.8595803550841595, + "grad_norm": 1.5172885339612137, + "learning_rate": 1.3101045949759985e-06, + "loss": 0.5051921606063843, + "step": 3728 + }, + { + "epoch": 0.8598109292137422, + "grad_norm": 1.5432212262669465, + "learning_rate": 1.309742106448903e-06, + "loss": 0.5057204365730286, + "step": 3729 + }, + { + "epoch": 0.8600415033433249, + "grad_norm": 1.3029916397805466, + "learning_rate": 1.3093795728976535e-06, + "loss": 0.4265059530735016, + "step": 3730 + }, + { + "epoch": 0.8602720774729076, + "grad_norm": 1.2392416355330595, + "learning_rate": 1.3090169943749473e-06, + "loss": 0.39166492223739624, + "step": 3731 + }, + { + "epoch": 0.8605026516024902, + "grad_norm": 1.4335892651385718, + "learning_rate": 1.308654370933489e-06, + "loss": 0.4321832060813904, + "step": 3732 + }, + { + "epoch": 0.8607332257320729, + "grad_norm": 1.4026009292758175, + "learning_rate": 1.3082917026259906e-06, + "loss": 0.5028939247131348, + "step": 3733 + }, + { + "epoch": 0.8609637998616555, + "grad_norm": 1.461263824354524, + "learning_rate": 1.3079289895051681e-06, + "loss": 0.4642373323440552, + "step": 3734 + }, + { + "epoch": 0.8611943739912382, + "grad_norm": 1.2616373488525174, + "learning_rate": 1.3075662316237464e-06, + "loss": 0.416348397731781, + "step": 3735 + }, + { + "epoch": 0.8614249481208208, + "grad_norm": 1.9156143459520234, + "learning_rate": 1.3072034290344556e-06, + "loss": 0.48442524671554565, + "step": 3736 + }, + { + "epoch": 0.8616555222504035, + "grad_norm": 1.4675369296005183, + "learning_rate": 1.3068405817900332e-06, + "loss": 0.46903935074806213, + "step": 3737 + }, + { + "epoch": 0.8618860963799861, + "grad_norm": 1.433982633948309, + "learning_rate": 1.3064776899432224e-06, + "loss": 0.48172008991241455, + "step": 3738 + }, + { + "epoch": 0.8621166705095689, + "grad_norm": 1.4697783322173945, + "learning_rate": 1.3061147535467734e-06, + "loss": 0.44460922479629517, + "step": 3739 + }, + { + "epoch": 0.8623472446391515, + "grad_norm": 1.4552688390934359, + "learning_rate": 1.3057517726534423e-06, + "loss": 0.4728608727455139, + "step": 3740 + }, + { + "epoch": 0.8625778187687342, + "grad_norm": 1.2981084774118934, + "learning_rate": 1.3053887473159928e-06, + "loss": 0.36457544565200806, + "step": 3741 + }, + { + "epoch": 0.8628083928983168, + "grad_norm": 1.3219603285138386, + "learning_rate": 1.3050256775871936e-06, + "loss": 0.3753359317779541, + "step": 3742 + }, + { + "epoch": 0.8630389670278995, + "grad_norm": 1.71764180047156, + "learning_rate": 1.304662563519821e-06, + "loss": 0.38679057359695435, + "step": 3743 + }, + { + "epoch": 0.8632695411574821, + "grad_norm": 1.2517686459377946, + "learning_rate": 1.304299405166657e-06, + "loss": 0.5008635520935059, + "step": 3744 + }, + { + "epoch": 0.8635001152870648, + "grad_norm": 1.6524585351681906, + "learning_rate": 1.3039362025804903e-06, + "loss": 0.3723052740097046, + "step": 3745 + }, + { + "epoch": 0.8637306894166474, + "grad_norm": 1.4101013037777343, + "learning_rate": 1.3035729558141166e-06, + "loss": 0.4227592945098877, + "step": 3746 + }, + { + "epoch": 0.8639612635462302, + "grad_norm": 1.2385954175555658, + "learning_rate": 1.3032096649203369e-06, + "loss": 0.44072139263153076, + "step": 3747 + }, + { + "epoch": 0.8641918376758128, + "grad_norm": 1.330285491132409, + "learning_rate": 1.3028463299519594e-06, + "loss": 0.49321871995925903, + "step": 3748 + }, + { + "epoch": 0.8644224118053955, + "grad_norm": 1.1777120494442346, + "learning_rate": 1.3024829509617987e-06, + "loss": 0.3751382827758789, + "step": 3749 + }, + { + "epoch": 0.8646529859349781, + "grad_norm": 1.2092220891938048, + "learning_rate": 1.3021195280026755e-06, + "loss": 0.43967729806900024, + "step": 3750 + }, + { + "epoch": 0.8648835600645608, + "grad_norm": 1.2227774970491123, + "learning_rate": 1.3017560611274172e-06, + "loss": 0.4102880358695984, + "step": 3751 + }, + { + "epoch": 0.8651141341941434, + "grad_norm": 1.4524327131347594, + "learning_rate": 1.301392550388857e-06, + "loss": 0.5225233435630798, + "step": 3752 + }, + { + "epoch": 0.8653447083237261, + "grad_norm": 1.7121734467218848, + "learning_rate": 1.3010289958398352e-06, + "loss": 0.6021677255630493, + "step": 3753 + }, + { + "epoch": 0.8655752824533087, + "grad_norm": 1.294116122042798, + "learning_rate": 1.300665397533198e-06, + "loss": 0.5031560063362122, + "step": 3754 + }, + { + "epoch": 0.8658058565828914, + "grad_norm": 1.2573123861588813, + "learning_rate": 1.300301755521798e-06, + "loss": 0.5406110286712646, + "step": 3755 + }, + { + "epoch": 0.866036430712474, + "grad_norm": 1.3123644187859618, + "learning_rate": 1.2999380698584945e-06, + "loss": 0.5359587669372559, + "step": 3756 + }, + { + "epoch": 0.8662670048420568, + "grad_norm": 1.4006997771166723, + "learning_rate": 1.2995743405961525e-06, + "loss": 0.46089720726013184, + "step": 3757 + }, + { + "epoch": 0.8664975789716394, + "grad_norm": 1.3064464980724229, + "learning_rate": 1.2992105677876444e-06, + "loss": 0.4611746668815613, + "step": 3758 + }, + { + "epoch": 0.8667281531012221, + "grad_norm": 1.3860871410802968, + "learning_rate": 1.2988467514858478e-06, + "loss": 0.47040778398513794, + "step": 3759 + }, + { + "epoch": 0.8669587272308047, + "grad_norm": 1.4624604845389892, + "learning_rate": 1.2984828917436469e-06, + "loss": 0.5118452310562134, + "step": 3760 + }, + { + "epoch": 0.8671893013603874, + "grad_norm": 1.3248325273306294, + "learning_rate": 1.2981189886139326e-06, + "loss": 0.42349302768707275, + "step": 3761 + }, + { + "epoch": 0.86741987548997, + "grad_norm": 1.4983666129317725, + "learning_rate": 1.2977550421496022e-06, + "loss": 0.4888027310371399, + "step": 3762 + }, + { + "epoch": 0.8676504496195527, + "grad_norm": 1.5557430857836938, + "learning_rate": 1.2973910524035587e-06, + "loss": 0.5637897849082947, + "step": 3763 + }, + { + "epoch": 0.8678810237491353, + "grad_norm": 1.2906063231523421, + "learning_rate": 1.2970270194287119e-06, + "loss": 0.4159572124481201, + "step": 3764 + }, + { + "epoch": 0.868111597878718, + "grad_norm": 1.613449710248156, + "learning_rate": 1.2966629432779775e-06, + "loss": 0.4558612108230591, + "step": 3765 + }, + { + "epoch": 0.8683421720083007, + "grad_norm": 1.229959300374187, + "learning_rate": 1.2962988240042775e-06, + "loss": 0.4235115647315979, + "step": 3766 + }, + { + "epoch": 0.8685727461378834, + "grad_norm": 1.5042750051225975, + "learning_rate": 1.2959346616605404e-06, + "loss": 0.5096476078033447, + "step": 3767 + }, + { + "epoch": 0.868803320267466, + "grad_norm": 1.3849812365321899, + "learning_rate": 1.2955704562997013e-06, + "loss": 0.47097906470298767, + "step": 3768 + }, + { + "epoch": 0.8690338943970487, + "grad_norm": 1.2057643302548011, + "learning_rate": 1.2952062079747008e-06, + "loss": 0.4508157968521118, + "step": 3769 + }, + { + "epoch": 0.8692644685266313, + "grad_norm": 1.3904260388472953, + "learning_rate": 1.2948419167384864e-06, + "loss": 0.43800675868988037, + "step": 3770 + }, + { + "epoch": 0.869495042656214, + "grad_norm": 1.3552023829739699, + "learning_rate": 1.2944775826440108e-06, + "loss": 0.5512480735778809, + "step": 3771 + }, + { + "epoch": 0.8697256167857966, + "grad_norm": 1.4428129453899297, + "learning_rate": 1.2941132057442342e-06, + "loss": 0.4654430150985718, + "step": 3772 + }, + { + "epoch": 0.8699561909153793, + "grad_norm": 1.3297596373891312, + "learning_rate": 1.293748786092123e-06, + "loss": 0.5429458618164062, + "step": 3773 + }, + { + "epoch": 0.870186765044962, + "grad_norm": 1.7953090529311853, + "learning_rate": 1.2933843237406481e-06, + "loss": 0.415671169757843, + "step": 3774 + }, + { + "epoch": 0.8704173391745447, + "grad_norm": 1.3784118855195835, + "learning_rate": 1.2930198187427884e-06, + "loss": 0.4347325563430786, + "step": 3775 + }, + { + "epoch": 0.8706479133041273, + "grad_norm": 1.3858530201589612, + "learning_rate": 1.2926552711515287e-06, + "loss": 0.41997528076171875, + "step": 3776 + }, + { + "epoch": 0.87087848743371, + "grad_norm": 1.4475652450278216, + "learning_rate": 1.292290681019859e-06, + "loss": 0.45956090092658997, + "step": 3777 + }, + { + "epoch": 0.8711090615632926, + "grad_norm": 1.3318373392521217, + "learning_rate": 1.2919260484007767e-06, + "loss": 0.4615165889263153, + "step": 3778 + }, + { + "epoch": 0.8713396356928753, + "grad_norm": 1.5526291007190895, + "learning_rate": 1.2915613733472848e-06, + "loss": 0.3919866681098938, + "step": 3779 + }, + { + "epoch": 0.8715702098224579, + "grad_norm": 1.5182901628405527, + "learning_rate": 1.2911966559123922e-06, + "loss": 0.5324772000312805, + "step": 3780 + }, + { + "epoch": 0.8718007839520405, + "grad_norm": 1.4899431097732017, + "learning_rate": 1.2908318961491147e-06, + "loss": 0.4813354015350342, + "step": 3781 + }, + { + "epoch": 0.8720313580816232, + "grad_norm": 1.6904916219237236, + "learning_rate": 1.2904670941104735e-06, + "loss": 0.5617851614952087, + "step": 3782 + }, + { + "epoch": 0.8722619322112058, + "grad_norm": 1.5869523154671146, + "learning_rate": 1.2901022498494963e-06, + "loss": 0.5369905233383179, + "step": 3783 + }, + { + "epoch": 0.8724925063407886, + "grad_norm": 1.4103839502113327, + "learning_rate": 1.289737363419217e-06, + "loss": 0.469723641872406, + "step": 3784 + }, + { + "epoch": 0.8727230804703712, + "grad_norm": 1.5392452648373567, + "learning_rate": 1.2893724348726757e-06, + "loss": 0.5100580453872681, + "step": 3785 + }, + { + "epoch": 0.8729536545999539, + "grad_norm": 1.4522390007049084, + "learning_rate": 1.289007464262918e-06, + "loss": 0.3959219455718994, + "step": 3786 + }, + { + "epoch": 0.8731842287295365, + "grad_norm": 1.3370969443139462, + "learning_rate": 1.2886424516429967e-06, + "loss": 0.4237936735153198, + "step": 3787 + }, + { + "epoch": 0.8734148028591192, + "grad_norm": 1.6505369649722645, + "learning_rate": 1.2882773970659693e-06, + "loss": 0.4604552984237671, + "step": 3788 + }, + { + "epoch": 0.8736453769887018, + "grad_norm": 1.4408188813706955, + "learning_rate": 1.287912300584901e-06, + "loss": 0.4265769124031067, + "step": 3789 + }, + { + "epoch": 0.8738759511182845, + "grad_norm": 1.185765484689313, + "learning_rate": 1.2875471622528617e-06, + "loss": 0.4644312262535095, + "step": 3790 + }, + { + "epoch": 0.8741065252478671, + "grad_norm": 1.5605966972230738, + "learning_rate": 1.2871819821229282e-06, + "loss": 0.5520300269126892, + "step": 3791 + }, + { + "epoch": 0.8743370993774499, + "grad_norm": 1.2172431342127952, + "learning_rate": 1.2868167602481831e-06, + "loss": 0.42350637912750244, + "step": 3792 + }, + { + "epoch": 0.8745676735070325, + "grad_norm": 1.3605025828289865, + "learning_rate": 1.2864514966817155e-06, + "loss": 0.5148683786392212, + "step": 3793 + }, + { + "epoch": 0.8747982476366152, + "grad_norm": 1.2825363473778824, + "learning_rate": 1.2860861914766191e-06, + "loss": 0.4506865441799164, + "step": 3794 + }, + { + "epoch": 0.8750288217661978, + "grad_norm": 1.240014068038836, + "learning_rate": 1.2857208446859957e-06, + "loss": 0.4042026996612549, + "step": 3795 + }, + { + "epoch": 0.8752593958957805, + "grad_norm": 1.749789157467437, + "learning_rate": 1.2853554563629521e-06, + "loss": 0.4601382613182068, + "step": 3796 + }, + { + "epoch": 0.8754899700253631, + "grad_norm": 1.1956968937229655, + "learning_rate": 1.2849900265606007e-06, + "loss": 0.3387809097766876, + "step": 3797 + }, + { + "epoch": 0.8757205441549458, + "grad_norm": 1.3296970918872935, + "learning_rate": 1.2846245553320604e-06, + "loss": 0.5295180082321167, + "step": 3798 + }, + { + "epoch": 0.8759511182845284, + "grad_norm": 1.518762035085977, + "learning_rate": 1.2842590427304564e-06, + "loss": 0.47733891010284424, + "step": 3799 + }, + { + "epoch": 0.8761816924141111, + "grad_norm": 1.3675518552119075, + "learning_rate": 1.2838934888089198e-06, + "loss": 0.46294957399368286, + "step": 3800 + }, + { + "epoch": 0.8764122665436938, + "grad_norm": 1.3892016156570253, + "learning_rate": 1.2835278936205877e-06, + "loss": 0.4638972580432892, + "step": 3801 + }, + { + "epoch": 0.8766428406732765, + "grad_norm": 1.2670627732920314, + "learning_rate": 1.2831622572186027e-06, + "loss": 0.5078087449073792, + "step": 3802 + }, + { + "epoch": 0.8768734148028591, + "grad_norm": 1.2490466990727205, + "learning_rate": 1.2827965796561138e-06, + "loss": 0.49626827239990234, + "step": 3803 + }, + { + "epoch": 0.8771039889324418, + "grad_norm": 1.3784871825818807, + "learning_rate": 1.2824308609862758e-06, + "loss": 0.4857192635536194, + "step": 3804 + }, + { + "epoch": 0.8773345630620244, + "grad_norm": 1.5003545684747548, + "learning_rate": 1.2820651012622498e-06, + "loss": 0.5403131246566772, + "step": 3805 + }, + { + "epoch": 0.8775651371916071, + "grad_norm": 1.532730699853752, + "learning_rate": 1.2816993005372029e-06, + "loss": 0.519463837146759, + "step": 3806 + }, + { + "epoch": 0.8777957113211897, + "grad_norm": 1.648937105926222, + "learning_rate": 1.2813334588643077e-06, + "loss": 0.6038607954978943, + "step": 3807 + }, + { + "epoch": 0.8780262854507724, + "grad_norm": 1.5251750284604964, + "learning_rate": 1.280967576296743e-06, + "loss": 0.4892663359642029, + "step": 3808 + }, + { + "epoch": 0.878256859580355, + "grad_norm": 1.4437992115831912, + "learning_rate": 1.2806016528876934e-06, + "loss": 0.47872501611709595, + "step": 3809 + }, + { + "epoch": 0.8784874337099378, + "grad_norm": 1.401497704596745, + "learning_rate": 1.28023568869035e-06, + "loss": 0.4863993227481842, + "step": 3810 + }, + { + "epoch": 0.8787180078395204, + "grad_norm": 1.2319881889422357, + "learning_rate": 1.2798696837579088e-06, + "loss": 0.45241546630859375, + "step": 3811 + }, + { + "epoch": 0.8789485819691031, + "grad_norm": 1.26957816055566, + "learning_rate": 1.2795036381435728e-06, + "loss": 0.48720863461494446, + "step": 3812 + }, + { + "epoch": 0.8791791560986857, + "grad_norm": 1.4244000796725484, + "learning_rate": 1.2791375519005507e-06, + "loss": 0.49139827489852905, + "step": 3813 + }, + { + "epoch": 0.8794097302282684, + "grad_norm": 1.1021730064681352, + "learning_rate": 1.278771425082056e-06, + "loss": 0.41915225982666016, + "step": 3814 + }, + { + "epoch": 0.879640304357851, + "grad_norm": 1.164668093587021, + "learning_rate": 1.2784052577413095e-06, + "loss": 0.41831016540527344, + "step": 3815 + }, + { + "epoch": 0.8798708784874337, + "grad_norm": 1.392466935090571, + "learning_rate": 1.2780390499315374e-06, + "loss": 0.49456197023391724, + "step": 3816 + }, + { + "epoch": 0.8801014526170163, + "grad_norm": 1.4645341817096265, + "learning_rate": 1.2776728017059714e-06, + "loss": 0.4656866192817688, + "step": 3817 + }, + { + "epoch": 0.880332026746599, + "grad_norm": 1.375452516729426, + "learning_rate": 1.2773065131178494e-06, + "loss": 0.449514776468277, + "step": 3818 + }, + { + "epoch": 0.8805626008761817, + "grad_norm": 1.320026502962018, + "learning_rate": 1.2769401842204156e-06, + "loss": 0.3762073516845703, + "step": 3819 + }, + { + "epoch": 0.8807931750057644, + "grad_norm": 1.6471923718834367, + "learning_rate": 1.2765738150669192e-06, + "loss": 0.5680521130561829, + "step": 3820 + }, + { + "epoch": 0.881023749135347, + "grad_norm": 1.227867578043664, + "learning_rate": 1.276207405710616e-06, + "loss": 0.35371482372283936, + "step": 3821 + }, + { + "epoch": 0.8812543232649297, + "grad_norm": 1.6584454245429339, + "learning_rate": 1.2758409562047669e-06, + "loss": 0.5145018100738525, + "step": 3822 + }, + { + "epoch": 0.8814848973945123, + "grad_norm": 1.4264603788288566, + "learning_rate": 1.2754744666026392e-06, + "loss": 0.5425234436988831, + "step": 3823 + }, + { + "epoch": 0.881715471524095, + "grad_norm": 1.605664005655016, + "learning_rate": 1.275107936957506e-06, + "loss": 0.48439931869506836, + "step": 3824 + }, + { + "epoch": 0.8819460456536776, + "grad_norm": 1.4836193722422002, + "learning_rate": 1.2747413673226462e-06, + "loss": 0.5177323818206787, + "step": 3825 + }, + { + "epoch": 0.8821766197832603, + "grad_norm": 1.4672524591279896, + "learning_rate": 1.2743747577513437e-06, + "loss": 0.4718499779701233, + "step": 3826 + }, + { + "epoch": 0.882407193912843, + "grad_norm": 1.3580668132517044, + "learning_rate": 1.27400810829689e-06, + "loss": 0.5140804648399353, + "step": 3827 + }, + { + "epoch": 0.8826377680424257, + "grad_norm": 1.2476007061260952, + "learning_rate": 1.2736414190125805e-06, + "loss": 0.4611731767654419, + "step": 3828 + }, + { + "epoch": 0.8828683421720083, + "grad_norm": 1.3574827964922753, + "learning_rate": 1.2732746899517175e-06, + "loss": 0.526127815246582, + "step": 3829 + }, + { + "epoch": 0.883098916301591, + "grad_norm": 1.3368001624765957, + "learning_rate": 1.2729079211676085e-06, + "loss": 0.4039766192436218, + "step": 3830 + }, + { + "epoch": 0.8833294904311736, + "grad_norm": 1.5033466347185125, + "learning_rate": 1.2725411127135676e-06, + "loss": 0.4232807159423828, + "step": 3831 + }, + { + "epoch": 0.8835600645607563, + "grad_norm": 1.2556638937655993, + "learning_rate": 1.2721742646429142e-06, + "loss": 0.48490262031555176, + "step": 3832 + }, + { + "epoch": 0.8837906386903389, + "grad_norm": 1.278298782194165, + "learning_rate": 1.2718073770089729e-06, + "loss": 0.4664677083492279, + "step": 3833 + }, + { + "epoch": 0.8840212128199216, + "grad_norm": 1.3387833207328181, + "learning_rate": 1.2714404498650742e-06, + "loss": 0.4402846097946167, + "step": 3834 + }, + { + "epoch": 0.8842517869495042, + "grad_norm": 1.195436797590032, + "learning_rate": 1.2710734832645555e-06, + "loss": 0.45942988991737366, + "step": 3835 + }, + { + "epoch": 0.884482361079087, + "grad_norm": 1.3235253441897963, + "learning_rate": 1.2707064772607587e-06, + "loss": 0.45924365520477295, + "step": 3836 + }, + { + "epoch": 0.8847129352086696, + "grad_norm": 1.2350134713864223, + "learning_rate": 1.270339431907032e-06, + "loss": 0.3877851963043213, + "step": 3837 + }, + { + "epoch": 0.8849435093382523, + "grad_norm": 1.381311043724791, + "learning_rate": 1.2699723472567288e-06, + "loss": 0.45364105701446533, + "step": 3838 + }, + { + "epoch": 0.8851740834678349, + "grad_norm": 1.2798000201692457, + "learning_rate": 1.2696052233632089e-06, + "loss": 0.3527877926826477, + "step": 3839 + }, + { + "epoch": 0.8854046575974176, + "grad_norm": 1.7105597319107566, + "learning_rate": 1.2692380602798375e-06, + "loss": 0.499268501996994, + "step": 3840 + }, + { + "epoch": 0.8856352317270002, + "grad_norm": 1.2823188650483364, + "learning_rate": 1.2688708580599854e-06, + "loss": 0.39443689584732056, + "step": 3841 + }, + { + "epoch": 0.8858658058565829, + "grad_norm": 1.442355552170661, + "learning_rate": 1.268503616757029e-06, + "loss": 0.5262328386306763, + "step": 3842 + }, + { + "epoch": 0.8860963799861655, + "grad_norm": 1.4602798515117177, + "learning_rate": 1.2681363364243509e-06, + "loss": 0.4761236608028412, + "step": 3843 + }, + { + "epoch": 0.8863269541157482, + "grad_norm": 1.3806283660695482, + "learning_rate": 1.2677690171153391e-06, + "loss": 0.5173169374465942, + "step": 3844 + }, + { + "epoch": 0.8865575282453309, + "grad_norm": 1.4796905287439253, + "learning_rate": 1.2674016588833866e-06, + "loss": 0.5304574966430664, + "step": 3845 + }, + { + "epoch": 0.8867881023749136, + "grad_norm": 1.2451043989470143, + "learning_rate": 1.2670342617818925e-06, + "loss": 0.44707632064819336, + "step": 3846 + }, + { + "epoch": 0.8870186765044962, + "grad_norm": 1.4327430501013436, + "learning_rate": 1.2666668258642628e-06, + "loss": 0.44395360350608826, + "step": 3847 + }, + { + "epoch": 0.8872492506340789, + "grad_norm": 1.5382701800989709, + "learning_rate": 1.266299351183907e-06, + "loss": 0.4993078112602234, + "step": 3848 + }, + { + "epoch": 0.8874798247636615, + "grad_norm": 1.447761685140105, + "learning_rate": 1.2659318377942418e-06, + "loss": 0.4836229681968689, + "step": 3849 + }, + { + "epoch": 0.8877103988932442, + "grad_norm": 1.1586406035440977, + "learning_rate": 1.2655642857486885e-06, + "loss": 0.4898098111152649, + "step": 3850 + }, + { + "epoch": 0.8879409730228268, + "grad_norm": 1.4550595650341691, + "learning_rate": 1.2651966951006753e-06, + "loss": 0.5117218494415283, + "step": 3851 + }, + { + "epoch": 0.8881715471524095, + "grad_norm": 1.1751749847019868, + "learning_rate": 1.2648290659036347e-06, + "loss": 0.3920857906341553, + "step": 3852 + }, + { + "epoch": 0.8884021212819921, + "grad_norm": 1.2103531492140316, + "learning_rate": 1.2644613982110055e-06, + "loss": 0.42527467012405396, + "step": 3853 + }, + { + "epoch": 0.8886326954115749, + "grad_norm": 1.4673474591941762, + "learning_rate": 1.2640936920762318e-06, + "loss": 0.5283650159835815, + "step": 3854 + }, + { + "epoch": 0.8888632695411575, + "grad_norm": 1.1384795561192926, + "learning_rate": 1.2637259475527634e-06, + "loss": 0.3976718783378601, + "step": 3855 + }, + { + "epoch": 0.8890938436707402, + "grad_norm": 1.3777221980377923, + "learning_rate": 1.2633581646940555e-06, + "loss": 0.3767106533050537, + "step": 3856 + }, + { + "epoch": 0.8893244178003228, + "grad_norm": 1.2421308508382682, + "learning_rate": 1.2629903435535695e-06, + "loss": 0.4002486765384674, + "step": 3857 + }, + { + "epoch": 0.8895549919299055, + "grad_norm": 1.7761729251417224, + "learning_rate": 1.2626224841847718e-06, + "loss": 0.3829443156719208, + "step": 3858 + }, + { + "epoch": 0.8897855660594881, + "grad_norm": 1.6906089339859913, + "learning_rate": 1.2622545866411342e-06, + "loss": 0.5338312983512878, + "step": 3859 + }, + { + "epoch": 0.8900161401890708, + "grad_norm": 1.3435755743208722, + "learning_rate": 1.2618866509761347e-06, + "loss": 0.49615299701690674, + "step": 3860 + }, + { + "epoch": 0.8902467143186534, + "grad_norm": 1.3772165276715471, + "learning_rate": 1.2615186772432562e-06, + "loss": 0.5080281496047974, + "step": 3861 + }, + { + "epoch": 0.8904772884482361, + "grad_norm": 1.3191602759544514, + "learning_rate": 1.2611506654959877e-06, + "loss": 0.4631335139274597, + "step": 3862 + }, + { + "epoch": 0.8907078625778188, + "grad_norm": 1.6754337710064344, + "learning_rate": 1.2607826157878232e-06, + "loss": 0.5179207921028137, + "step": 3863 + }, + { + "epoch": 0.8909384367074015, + "grad_norm": 1.8689690583071528, + "learning_rate": 1.260414528172263e-06, + "loss": 0.5107406973838806, + "step": 3864 + }, + { + "epoch": 0.8911690108369841, + "grad_norm": 1.4263135964434357, + "learning_rate": 1.2600464027028112e-06, + "loss": 0.3719855844974518, + "step": 3865 + }, + { + "epoch": 0.8913995849665668, + "grad_norm": 1.2717821474296322, + "learning_rate": 1.2596782394329797e-06, + "loss": 0.4703129231929779, + "step": 3866 + }, + { + "epoch": 0.8916301590961494, + "grad_norm": 1.4971801597034615, + "learning_rate": 1.2593100384162842e-06, + "loss": 0.49239644408226013, + "step": 3867 + }, + { + "epoch": 0.8918607332257321, + "grad_norm": 1.505796830220407, + "learning_rate": 1.2589417997062468e-06, + "loss": 0.5194324851036072, + "step": 3868 + }, + { + "epoch": 0.8920913073553147, + "grad_norm": 1.2722329079463401, + "learning_rate": 1.2585735233563943e-06, + "loss": 0.4224633574485779, + "step": 3869 + }, + { + "epoch": 0.8923218814848974, + "grad_norm": 1.7020995758876771, + "learning_rate": 1.2582052094202594e-06, + "loss": 0.4377749562263489, + "step": 3870 + }, + { + "epoch": 0.89255245561448, + "grad_norm": 1.2037908365106704, + "learning_rate": 1.2578368579513809e-06, + "loss": 0.42847269773483276, + "step": 3871 + }, + { + "epoch": 0.8927830297440628, + "grad_norm": 1.4087908465200083, + "learning_rate": 1.2574684690033018e-06, + "loss": 0.5194802284240723, + "step": 3872 + }, + { + "epoch": 0.8930136038736454, + "grad_norm": 1.3553883811442613, + "learning_rate": 1.2571000426295716e-06, + "loss": 0.4401082396507263, + "step": 3873 + }, + { + "epoch": 0.8932441780032281, + "grad_norm": 1.5117708123403886, + "learning_rate": 1.2567315788837442e-06, + "loss": 0.38890570402145386, + "step": 3874 + }, + { + "epoch": 0.8934747521328107, + "grad_norm": 1.4931972330534145, + "learning_rate": 1.2563630778193802e-06, + "loss": 0.522612452507019, + "step": 3875 + }, + { + "epoch": 0.8937053262623934, + "grad_norm": 1.757870637645656, + "learning_rate": 1.2559945394900447e-06, + "loss": 0.516444981098175, + "step": 3876 + }, + { + "epoch": 0.893935900391976, + "grad_norm": 1.193092685346779, + "learning_rate": 1.255625963949308e-06, + "loss": 0.4084436297416687, + "step": 3877 + }, + { + "epoch": 0.8941664745215587, + "grad_norm": 1.4364911954858623, + "learning_rate": 1.2552573512507474e-06, + "loss": 0.4561755657196045, + "step": 3878 + }, + { + "epoch": 0.8943970486511413, + "grad_norm": 1.3498949478529019, + "learning_rate": 1.2548887014479435e-06, + "loss": 0.44372665882110596, + "step": 3879 + }, + { + "epoch": 0.894627622780724, + "grad_norm": 1.4181034577590674, + "learning_rate": 1.2545200145944837e-06, + "loss": 0.4714791774749756, + "step": 3880 + }, + { + "epoch": 0.8948581969103067, + "grad_norm": 1.506508633299638, + "learning_rate": 1.25415129074396e-06, + "loss": 0.48050814867019653, + "step": 3881 + }, + { + "epoch": 0.8950887710398894, + "grad_norm": 1.7788226663138391, + "learning_rate": 1.2537825299499708e-06, + "loss": 0.4078127145767212, + "step": 3882 + }, + { + "epoch": 0.895319345169472, + "grad_norm": 1.1273639481853348, + "learning_rate": 1.2534137322661187e-06, + "loss": 0.41556763648986816, + "step": 3883 + }, + { + "epoch": 0.8955499192990547, + "grad_norm": 1.2916565664076916, + "learning_rate": 1.2530448977460127e-06, + "loss": 0.3862306475639343, + "step": 3884 + }, + { + "epoch": 0.8957804934286373, + "grad_norm": 1.2417402269481763, + "learning_rate": 1.2526760264432656e-06, + "loss": 0.4071112871170044, + "step": 3885 + }, + { + "epoch": 0.89601106755822, + "grad_norm": 1.2074121865816745, + "learning_rate": 1.2523071184114978e-06, + "loss": 0.36956706643104553, + "step": 3886 + }, + { + "epoch": 0.8962416416878026, + "grad_norm": 1.5187969981751328, + "learning_rate": 1.251938173704333e-06, + "loss": 0.5087941884994507, + "step": 3887 + }, + { + "epoch": 0.8964722158173853, + "grad_norm": 1.5300476571906632, + "learning_rate": 1.2515691923754017e-06, + "loss": 0.5636804103851318, + "step": 3888 + }, + { + "epoch": 0.896702789946968, + "grad_norm": 1.2028947296679213, + "learning_rate": 1.2512001744783383e-06, + "loss": 0.40899237990379333, + "step": 3889 + }, + { + "epoch": 0.8969333640765507, + "grad_norm": 1.2319974158201112, + "learning_rate": 1.2508311200667839e-06, + "loss": 0.3964187800884247, + "step": 3890 + }, + { + "epoch": 0.8971639382061333, + "grad_norm": 1.1881521968898023, + "learning_rate": 1.2504620291943838e-06, + "loss": 0.43190568685531616, + "step": 3891 + }, + { + "epoch": 0.897394512335716, + "grad_norm": 1.5323277954151004, + "learning_rate": 1.25009290191479e-06, + "loss": 0.5640079379081726, + "step": 3892 + }, + { + "epoch": 0.8976250864652986, + "grad_norm": 1.5228387521540339, + "learning_rate": 1.2497237382816577e-06, + "loss": 0.4969727396965027, + "step": 3893 + }, + { + "epoch": 0.8978556605948812, + "grad_norm": 1.438395912517929, + "learning_rate": 1.2493545383486497e-06, + "loss": 0.43710076808929443, + "step": 3894 + }, + { + "epoch": 0.8980862347244639, + "grad_norm": 1.217545409086522, + "learning_rate": 1.248985302169432e-06, + "loss": 0.4246212840080261, + "step": 3895 + }, + { + "epoch": 0.8983168088540465, + "grad_norm": 1.1837244532547113, + "learning_rate": 1.2486160297976776e-06, + "loss": 0.3812369108200073, + "step": 3896 + }, + { + "epoch": 0.8985473829836292, + "grad_norm": 2.1554879190255685, + "learning_rate": 1.248246721287063e-06, + "loss": 0.6407653093338013, + "step": 3897 + }, + { + "epoch": 0.8987779571132118, + "grad_norm": 1.6947319293322312, + "learning_rate": 1.247877376691272e-06, + "loss": 0.47748661041259766, + "step": 3898 + }, + { + "epoch": 0.8990085312427946, + "grad_norm": 1.5504399903750061, + "learning_rate": 1.2475079960639922e-06, + "loss": 0.5047964453697205, + "step": 3899 + }, + { + "epoch": 0.8992391053723772, + "grad_norm": 1.1781117181895115, + "learning_rate": 1.2471385794589167e-06, + "loss": 0.37989485263824463, + "step": 3900 + }, + { + "epoch": 0.8994696795019599, + "grad_norm": 1.2955755733611327, + "learning_rate": 1.2467691269297437e-06, + "loss": 0.38857924938201904, + "step": 3901 + }, + { + "epoch": 0.8997002536315425, + "grad_norm": 1.2312069291338004, + "learning_rate": 1.2463996385301776e-06, + "loss": 0.45452386140823364, + "step": 3902 + }, + { + "epoch": 0.8999308277611252, + "grad_norm": 1.5565774035889273, + "learning_rate": 1.2460301143139267e-06, + "loss": 0.41920900344848633, + "step": 3903 + }, + { + "epoch": 0.9001614018907078, + "grad_norm": 1.542875547138451, + "learning_rate": 1.2456605543347051e-06, + "loss": 0.5979125499725342, + "step": 3904 + }, + { + "epoch": 0.9003919760202905, + "grad_norm": 1.5505304900467811, + "learning_rate": 1.2452909586462323e-06, + "loss": 0.5517082214355469, + "step": 3905 + }, + { + "epoch": 0.9006225501498731, + "grad_norm": 1.2381443535248697, + "learning_rate": 1.244921327302233e-06, + "loss": 0.4558248519897461, + "step": 3906 + }, + { + "epoch": 0.9008531242794559, + "grad_norm": 1.5503878716470787, + "learning_rate": 1.2445516603564362e-06, + "loss": 0.5637399554252625, + "step": 3907 + }, + { + "epoch": 0.9010836984090385, + "grad_norm": 1.2396897738245216, + "learning_rate": 1.2441819578625775e-06, + "loss": 0.5208043456077576, + "step": 3908 + }, + { + "epoch": 0.9013142725386212, + "grad_norm": 1.400218770913741, + "learning_rate": 1.243812219874396e-06, + "loss": 0.3901744484901428, + "step": 3909 + }, + { + "epoch": 0.9015448466682038, + "grad_norm": 1.4025338042989108, + "learning_rate": 1.2434424464456376e-06, + "loss": 0.5770972967147827, + "step": 3910 + }, + { + "epoch": 0.9017754207977865, + "grad_norm": 1.375223010916462, + "learning_rate": 1.2430726376300525e-06, + "loss": 0.3457295894622803, + "step": 3911 + }, + { + "epoch": 0.9020059949273691, + "grad_norm": 1.3118554362154196, + "learning_rate": 1.242702793481396e-06, + "loss": 0.4487595558166504, + "step": 3912 + }, + { + "epoch": 0.9022365690569518, + "grad_norm": 1.2548104794507453, + "learning_rate": 1.2423329140534286e-06, + "loss": 0.4369876980781555, + "step": 3913 + }, + { + "epoch": 0.9024671431865344, + "grad_norm": 1.5693012853497335, + "learning_rate": 1.2419629993999165e-06, + "loss": 0.43154388666152954, + "step": 3914 + }, + { + "epoch": 0.9026977173161171, + "grad_norm": 1.313977531855456, + "learning_rate": 1.24159304957463e-06, + "loss": 0.4528294801712036, + "step": 3915 + }, + { + "epoch": 0.9029282914456997, + "grad_norm": 1.4152554930408472, + "learning_rate": 1.2412230646313452e-06, + "loss": 0.4204830527305603, + "step": 3916 + }, + { + "epoch": 0.9031588655752825, + "grad_norm": 1.3117655747531898, + "learning_rate": 1.2408530446238433e-06, + "loss": 0.46544623374938965, + "step": 3917 + }, + { + "epoch": 0.9033894397048651, + "grad_norm": 1.19103055945586, + "learning_rate": 1.2404829896059107e-06, + "loss": 0.39419203996658325, + "step": 3918 + }, + { + "epoch": 0.9036200138344478, + "grad_norm": 1.3085505059347724, + "learning_rate": 1.240112899631338e-06, + "loss": 0.4214451014995575, + "step": 3919 + }, + { + "epoch": 0.9038505879640304, + "grad_norm": 1.310156094815825, + "learning_rate": 1.239742774753922e-06, + "loss": 0.42385220527648926, + "step": 3920 + }, + { + "epoch": 0.9040811620936131, + "grad_norm": 1.4457769612459037, + "learning_rate": 1.2393726150274636e-06, + "loss": 0.5206592082977295, + "step": 3921 + }, + { + "epoch": 0.9043117362231957, + "grad_norm": 1.4602545667694231, + "learning_rate": 1.23900242050577e-06, + "loss": 0.4358803629875183, + "step": 3922 + }, + { + "epoch": 0.9045423103527784, + "grad_norm": 1.3596132034754325, + "learning_rate": 1.2386321912426524e-06, + "loss": 0.4525173306465149, + "step": 3923 + }, + { + "epoch": 0.904772884482361, + "grad_norm": 1.4736466426478543, + "learning_rate": 1.2382619272919273e-06, + "loss": 0.48877185583114624, + "step": 3924 + }, + { + "epoch": 0.9050034586119438, + "grad_norm": 1.152358955118646, + "learning_rate": 1.2378916287074162e-06, + "loss": 0.4401814341545105, + "step": 3925 + }, + { + "epoch": 0.9052340327415264, + "grad_norm": 1.337265572878916, + "learning_rate": 1.2375212955429459e-06, + "loss": 0.37818846106529236, + "step": 3926 + }, + { + "epoch": 0.9054646068711091, + "grad_norm": 1.285760527835995, + "learning_rate": 1.2371509278523482e-06, + "loss": 0.36472904682159424, + "step": 3927 + }, + { + "epoch": 0.9056951810006917, + "grad_norm": 1.2999097028645303, + "learning_rate": 1.2367805256894596e-06, + "loss": 0.5113309025764465, + "step": 3928 + }, + { + "epoch": 0.9059257551302744, + "grad_norm": 1.2052405163032573, + "learning_rate": 1.2364100891081218e-06, + "loss": 0.36074432730674744, + "step": 3929 + }, + { + "epoch": 0.906156329259857, + "grad_norm": 1.3493065976556424, + "learning_rate": 1.2360396181621819e-06, + "loss": 0.39177048206329346, + "step": 3930 + }, + { + "epoch": 0.9063869033894397, + "grad_norm": 1.3736058093352046, + "learning_rate": 1.2356691129054912e-06, + "loss": 0.4758113622665405, + "step": 3931 + }, + { + "epoch": 0.9066174775190223, + "grad_norm": 1.3614234520329223, + "learning_rate": 1.2352985733919065e-06, + "loss": 0.3840598464012146, + "step": 3932 + }, + { + "epoch": 0.906848051648605, + "grad_norm": 1.510763334369694, + "learning_rate": 1.2349279996752892e-06, + "loss": 0.5103816986083984, + "step": 3933 + }, + { + "epoch": 0.9070786257781877, + "grad_norm": 1.466046011323441, + "learning_rate": 1.234557391809507e-06, + "loss": 0.4175255298614502, + "step": 3934 + }, + { + "epoch": 0.9073091999077704, + "grad_norm": 2.627411026682294, + "learning_rate": 1.2341867498484302e-06, + "loss": 0.4504377245903015, + "step": 3935 + }, + { + "epoch": 0.907539774037353, + "grad_norm": 1.2868923632717955, + "learning_rate": 1.2338160738459355e-06, + "loss": 0.45868122577667236, + "step": 3936 + }, + { + "epoch": 0.9077703481669357, + "grad_norm": 1.3231771761325972, + "learning_rate": 1.2334453638559054e-06, + "loss": 0.5161639451980591, + "step": 3937 + }, + { + "epoch": 0.9080009222965183, + "grad_norm": 1.5486748129834036, + "learning_rate": 1.2330746199322257e-06, + "loss": 0.44561630487442017, + "step": 3938 + }, + { + "epoch": 0.908231496426101, + "grad_norm": 1.595486700598371, + "learning_rate": 1.2327038421287876e-06, + "loss": 0.4780126214027405, + "step": 3939 + }, + { + "epoch": 0.9084620705556836, + "grad_norm": 1.2226582649026916, + "learning_rate": 1.2323330304994877e-06, + "loss": 0.505066990852356, + "step": 3940 + }, + { + "epoch": 0.9086926446852663, + "grad_norm": 1.3041405659013958, + "learning_rate": 1.2319621850982274e-06, + "loss": 0.5053813457489014, + "step": 3941 + }, + { + "epoch": 0.9089232188148489, + "grad_norm": 1.178162092657054, + "learning_rate": 1.2315913059789125e-06, + "loss": 0.3579134941101074, + "step": 3942 + }, + { + "epoch": 0.9091537929444317, + "grad_norm": 1.4949007072050957, + "learning_rate": 1.2312203931954543e-06, + "loss": 0.5703507661819458, + "step": 3943 + }, + { + "epoch": 0.9093843670740143, + "grad_norm": 1.4141867956521472, + "learning_rate": 1.2308494468017685e-06, + "loss": 0.4972035884857178, + "step": 3944 + }, + { + "epoch": 0.909614941203597, + "grad_norm": 1.8338477540837272, + "learning_rate": 1.230478466851776e-06, + "loss": 0.5528955459594727, + "step": 3945 + }, + { + "epoch": 0.9098455153331796, + "grad_norm": 1.4009292239467905, + "learning_rate": 1.2301074533994024e-06, + "loss": 0.4099786877632141, + "step": 3946 + }, + { + "epoch": 0.9100760894627623, + "grad_norm": 1.3414325662099453, + "learning_rate": 1.2297364064985786e-06, + "loss": 0.41020166873931885, + "step": 3947 + }, + { + "epoch": 0.9103066635923449, + "grad_norm": 1.4112377219226224, + "learning_rate": 1.2293653262032395e-06, + "loss": 0.4340355694293976, + "step": 3948 + }, + { + "epoch": 0.9105372377219276, + "grad_norm": 1.376446280407005, + "learning_rate": 1.2289942125673261e-06, + "loss": 0.4369847774505615, + "step": 3949 + }, + { + "epoch": 0.9107678118515102, + "grad_norm": 1.4688076477466663, + "learning_rate": 1.228623065644783e-06, + "loss": 0.406423956155777, + "step": 3950 + }, + { + "epoch": 0.910998385981093, + "grad_norm": 1.4230223897567287, + "learning_rate": 1.22825188548956e-06, + "loss": 0.5081946849822998, + "step": 3951 + }, + { + "epoch": 0.9112289601106756, + "grad_norm": 1.7017899930713631, + "learning_rate": 1.2278806721556124e-06, + "loss": 0.43494492769241333, + "step": 3952 + }, + { + "epoch": 0.9114595342402583, + "grad_norm": 1.348884752431283, + "learning_rate": 1.2275094256968996e-06, + "loss": 0.35356831550598145, + "step": 3953 + }, + { + "epoch": 0.9116901083698409, + "grad_norm": 1.2260567341450548, + "learning_rate": 1.227138146167386e-06, + "loss": 0.36741551756858826, + "step": 3954 + }, + { + "epoch": 0.9119206824994236, + "grad_norm": 1.4686302016765889, + "learning_rate": 1.226766833621041e-06, + "loss": 0.491504430770874, + "step": 3955 + }, + { + "epoch": 0.9121512566290062, + "grad_norm": 1.266294151631501, + "learning_rate": 1.2263954881118384e-06, + "loss": 0.4558037519454956, + "step": 3956 + }, + { + "epoch": 0.9123818307585889, + "grad_norm": 1.398276341256052, + "learning_rate": 1.2260241096937571e-06, + "loss": 0.3941671848297119, + "step": 3957 + }, + { + "epoch": 0.9126124048881715, + "grad_norm": 1.7133993603535684, + "learning_rate": 1.2256526984207809e-06, + "loss": 0.40505191683769226, + "step": 3958 + }, + { + "epoch": 0.9128429790177542, + "grad_norm": 1.3369540241008888, + "learning_rate": 1.2252812543468982e-06, + "loss": 0.4669588804244995, + "step": 3959 + }, + { + "epoch": 0.9130735531473368, + "grad_norm": 1.6346862522902008, + "learning_rate": 1.2249097775261014e-06, + "loss": 0.535057544708252, + "step": 3960 + }, + { + "epoch": 0.9133041272769196, + "grad_norm": 1.465530924269544, + "learning_rate": 1.2245382680123898e-06, + "loss": 0.5127478837966919, + "step": 3961 + }, + { + "epoch": 0.9135347014065022, + "grad_norm": 1.239878706419753, + "learning_rate": 1.224166725859765e-06, + "loss": 0.5004767179489136, + "step": 3962 + }, + { + "epoch": 0.9137652755360849, + "grad_norm": 1.3382850542269662, + "learning_rate": 1.2237951511222346e-06, + "loss": 0.47929924726486206, + "step": 3963 + }, + { + "epoch": 0.9139958496656675, + "grad_norm": 1.3650943807220162, + "learning_rate": 1.2234235438538109e-06, + "loss": 0.5619359016418457, + "step": 3964 + }, + { + "epoch": 0.9142264237952502, + "grad_norm": 2.173999313160228, + "learning_rate": 1.223051904108511e-06, + "loss": 0.44648507237434387, + "step": 3965 + }, + { + "epoch": 0.9144569979248328, + "grad_norm": 1.5081082363333118, + "learning_rate": 1.2226802319403562e-06, + "loss": 0.4451872706413269, + "step": 3966 + }, + { + "epoch": 0.9146875720544155, + "grad_norm": 1.1999813764066747, + "learning_rate": 1.222308527403373e-06, + "loss": 0.44295474886894226, + "step": 3967 + }, + { + "epoch": 0.9149181461839981, + "grad_norm": 1.4510785821223537, + "learning_rate": 1.221936790551592e-06, + "loss": 0.517430305480957, + "step": 3968 + }, + { + "epoch": 0.9151487203135809, + "grad_norm": 1.2648448897941866, + "learning_rate": 1.2215650214390493e-06, + "loss": 0.4819454252719879, + "step": 3969 + }, + { + "epoch": 0.9153792944431635, + "grad_norm": 1.40726836834287, + "learning_rate": 1.2211932201197855e-06, + "loss": 0.41739264130592346, + "step": 3970 + }, + { + "epoch": 0.9156098685727462, + "grad_norm": 1.214750449543567, + "learning_rate": 1.2208213866478452e-06, + "loss": 0.38833269476890564, + "step": 3971 + }, + { + "epoch": 0.9158404427023288, + "grad_norm": 1.4780394203565799, + "learning_rate": 1.2204495210772784e-06, + "loss": 0.48899054527282715, + "step": 3972 + }, + { + "epoch": 0.9160710168319115, + "grad_norm": 1.4236888721907983, + "learning_rate": 1.2200776234621395e-06, + "loss": 0.5201622247695923, + "step": 3973 + }, + { + "epoch": 0.9163015909614941, + "grad_norm": 1.4696703280770271, + "learning_rate": 1.219705693856488e-06, + "loss": 0.4105098843574524, + "step": 3974 + }, + { + "epoch": 0.9165321650910768, + "grad_norm": 1.2658629585457457, + "learning_rate": 1.2193337323143865e-06, + "loss": 0.45458245277404785, + "step": 3975 + }, + { + "epoch": 0.9167627392206594, + "grad_norm": 1.4906657502786624, + "learning_rate": 1.2189617388899049e-06, + "loss": 0.5013390779495239, + "step": 3976 + }, + { + "epoch": 0.9169933133502421, + "grad_norm": 1.3837275498584536, + "learning_rate": 1.218589713637115e-06, + "loss": 0.37065303325653076, + "step": 3977 + }, + { + "epoch": 0.9172238874798248, + "grad_norm": 1.4237915808433583, + "learning_rate": 1.218217656610095e-06, + "loss": 0.45158177614212036, + "step": 3978 + }, + { + "epoch": 0.9174544616094075, + "grad_norm": 1.3261399530988285, + "learning_rate": 1.2178455678629271e-06, + "loss": 0.4439426064491272, + "step": 3979 + }, + { + "epoch": 0.9176850357389901, + "grad_norm": 1.4056969202356144, + "learning_rate": 1.217473447449698e-06, + "loss": 0.42215704917907715, + "step": 3980 + }, + { + "epoch": 0.9179156098685728, + "grad_norm": 1.6572776500354818, + "learning_rate": 1.2171012954244991e-06, + "loss": 0.42273545265197754, + "step": 3981 + }, + { + "epoch": 0.9181461839981554, + "grad_norm": 1.5659197643503024, + "learning_rate": 1.216729111841427e-06, + "loss": 0.6045219898223877, + "step": 3982 + }, + { + "epoch": 0.9183767581277381, + "grad_norm": 1.318642532575583, + "learning_rate": 1.216356896754582e-06, + "loss": 0.49316874146461487, + "step": 3983 + }, + { + "epoch": 0.9186073322573207, + "grad_norm": 1.2984174252340932, + "learning_rate": 1.2159846502180692e-06, + "loss": 0.5222599506378174, + "step": 3984 + }, + { + "epoch": 0.9188379063869034, + "grad_norm": 1.21924477747188, + "learning_rate": 1.2156123722859988e-06, + "loss": 0.4513903856277466, + "step": 3985 + }, + { + "epoch": 0.919068480516486, + "grad_norm": 1.5286242494549134, + "learning_rate": 1.2152400630124846e-06, + "loss": 0.4946150779724121, + "step": 3986 + }, + { + "epoch": 0.9192990546460688, + "grad_norm": 1.6287340554518628, + "learning_rate": 1.2148677224516458e-06, + "loss": 0.5482569336891174, + "step": 3987 + }, + { + "epoch": 0.9195296287756514, + "grad_norm": 1.4490082622042646, + "learning_rate": 1.2144953506576061e-06, + "loss": 0.457091361284256, + "step": 3988 + }, + { + "epoch": 0.9197602029052341, + "grad_norm": 1.378032718586854, + "learning_rate": 1.2141229476844933e-06, + "loss": 0.4262084364891052, + "step": 3989 + }, + { + "epoch": 0.9199907770348167, + "grad_norm": 1.2394422456854066, + "learning_rate": 1.2137505135864402e-06, + "loss": 0.4905529022216797, + "step": 3990 + }, + { + "epoch": 0.9202213511643994, + "grad_norm": 1.3246738813802295, + "learning_rate": 1.2133780484175833e-06, + "loss": 0.5001873970031738, + "step": 3991 + }, + { + "epoch": 0.920451925293982, + "grad_norm": 1.4663495799657225, + "learning_rate": 1.2130055522320647e-06, + "loss": 0.396418035030365, + "step": 3992 + }, + { + "epoch": 0.9206824994235647, + "grad_norm": 1.5742445852004807, + "learning_rate": 1.2126330250840302e-06, + "loss": 0.5743722915649414, + "step": 3993 + }, + { + "epoch": 0.9209130735531473, + "grad_norm": 1.720134285882963, + "learning_rate": 1.212260467027631e-06, + "loss": 0.5134707689285278, + "step": 3994 + }, + { + "epoch": 0.92114364768273, + "grad_norm": 1.2913764867867046, + "learning_rate": 1.2118878781170213e-06, + "loss": 0.4191853404045105, + "step": 3995 + }, + { + "epoch": 0.9213742218123127, + "grad_norm": 1.8061166260156263, + "learning_rate": 1.2115152584063613e-06, + "loss": 0.3430103063583374, + "step": 3996 + }, + { + "epoch": 0.9216047959418954, + "grad_norm": 1.491788048135039, + "learning_rate": 1.2111426079498147e-06, + "loss": 0.5229896903038025, + "step": 3997 + }, + { + "epoch": 0.921835370071478, + "grad_norm": 1.9288487767080142, + "learning_rate": 1.2107699268015501e-06, + "loss": 0.5028181076049805, + "step": 3998 + }, + { + "epoch": 0.9220659442010607, + "grad_norm": 1.8323250729268132, + "learning_rate": 1.2103972150157407e-06, + "loss": 0.4662501811981201, + "step": 3999 + }, + { + "epoch": 0.9222965183306433, + "grad_norm": 1.7877363086665337, + "learning_rate": 1.2100244726465636e-06, + "loss": 0.5581385493278503, + "step": 4000 + }, + { + "epoch": 0.922527092460226, + "grad_norm": 1.5059656153682595, + "learning_rate": 1.2096516997482012e-06, + "loss": 0.3925841450691223, + "step": 4001 + }, + { + "epoch": 0.9227576665898086, + "grad_norm": 1.4478402824011334, + "learning_rate": 1.2092788963748393e-06, + "loss": 0.4021197557449341, + "step": 4002 + }, + { + "epoch": 0.9229882407193913, + "grad_norm": 1.5875480480080288, + "learning_rate": 1.2089060625806683e-06, + "loss": 0.5519800186157227, + "step": 4003 + }, + { + "epoch": 0.923218814848974, + "grad_norm": 1.4740215502095901, + "learning_rate": 1.2085331984198847e-06, + "loss": 0.4426038861274719, + "step": 4004 + }, + { + "epoch": 0.9234493889785566, + "grad_norm": 1.3127950735735558, + "learning_rate": 1.2081603039466872e-06, + "loss": 0.4370608925819397, + "step": 4005 + }, + { + "epoch": 0.9236799631081393, + "grad_norm": 1.6270244555647773, + "learning_rate": 1.2077873792152797e-06, + "loss": 0.5535042881965637, + "step": 4006 + }, + { + "epoch": 0.9239105372377219, + "grad_norm": 1.4254025319676356, + "learning_rate": 1.2074144242798708e-06, + "loss": 0.45786774158477783, + "step": 4007 + }, + { + "epoch": 0.9241411113673046, + "grad_norm": 1.305332226115227, + "learning_rate": 1.207041439194673e-06, + "loss": 0.38189244270324707, + "step": 4008 + }, + { + "epoch": 0.9243716854968872, + "grad_norm": 1.4825176983109143, + "learning_rate": 1.206668424013904e-06, + "loss": 0.48782190680503845, + "step": 4009 + }, + { + "epoch": 0.9246022596264699, + "grad_norm": 1.4182276344304934, + "learning_rate": 1.2062953787917852e-06, + "loss": 0.46295344829559326, + "step": 4010 + }, + { + "epoch": 0.9248328337560525, + "grad_norm": 1.370453601452758, + "learning_rate": 1.205922303582542e-06, + "loss": 0.5205795764923096, + "step": 4011 + }, + { + "epoch": 0.9250634078856352, + "grad_norm": 1.431830816120071, + "learning_rate": 1.205549198440405e-06, + "loss": 0.47622987627983093, + "step": 4012 + }, + { + "epoch": 0.9252939820152178, + "grad_norm": 1.3190370245605134, + "learning_rate": 1.2051760634196091e-06, + "loss": 0.4826146960258484, + "step": 4013 + }, + { + "epoch": 0.9255245561448006, + "grad_norm": 1.608771307027525, + "learning_rate": 1.2048028985743928e-06, + "loss": 0.46193474531173706, + "step": 4014 + }, + { + "epoch": 0.9257551302743832, + "grad_norm": 1.4926107871852312, + "learning_rate": 1.2044297039589996e-06, + "loss": 0.523394763469696, + "step": 4015 + }, + { + "epoch": 0.9259857044039659, + "grad_norm": 1.3096026982819484, + "learning_rate": 1.2040564796276773e-06, + "loss": 0.3963446617126465, + "step": 4016 + }, + { + "epoch": 0.9262162785335485, + "grad_norm": 1.3803899653039033, + "learning_rate": 1.2036832256346774e-06, + "loss": 0.5016456842422485, + "step": 4017 + }, + { + "epoch": 0.9264468526631312, + "grad_norm": 1.2198633348825472, + "learning_rate": 1.2033099420342566e-06, + "loss": 0.47298160195350647, + "step": 4018 + }, + { + "epoch": 0.9266774267927138, + "grad_norm": 1.5448162104307424, + "learning_rate": 1.2029366288806748e-06, + "loss": 0.387129545211792, + "step": 4019 + }, + { + "epoch": 0.9269080009222965, + "grad_norm": 1.4210281769521962, + "learning_rate": 1.2025632862281976e-06, + "loss": 0.46101367473602295, + "step": 4020 + }, + { + "epoch": 0.9271385750518791, + "grad_norm": 1.364554371793265, + "learning_rate": 1.2021899141310938e-06, + "loss": 0.4242950677871704, + "step": 4021 + }, + { + "epoch": 0.9273691491814618, + "grad_norm": 1.5524341283687932, + "learning_rate": 1.201816512643637e-06, + "loss": 0.45983830094337463, + "step": 4022 + }, + { + "epoch": 0.9275997233110445, + "grad_norm": 1.3760025635830133, + "learning_rate": 1.2014430818201044e-06, + "loss": 0.39785802364349365, + "step": 4023 + }, + { + "epoch": 0.9278302974406272, + "grad_norm": 1.254017871701417, + "learning_rate": 1.2010696217147783e-06, + "loss": 0.39265739917755127, + "step": 4024 + }, + { + "epoch": 0.9280608715702098, + "grad_norm": 1.4761130221315304, + "learning_rate": 1.2006961323819455e-06, + "loss": 0.49783703684806824, + "step": 4025 + }, + { + "epoch": 0.9282914456997925, + "grad_norm": 1.3764899481486361, + "learning_rate": 1.2003226138758953e-06, + "loss": 0.4479181170463562, + "step": 4026 + }, + { + "epoch": 0.9285220198293751, + "grad_norm": 1.4404345233811269, + "learning_rate": 1.199949066250923e-06, + "loss": 0.5205901265144348, + "step": 4027 + }, + { + "epoch": 0.9287525939589578, + "grad_norm": 1.3718010528366764, + "learning_rate": 1.1995754895613277e-06, + "loss": 0.5163009762763977, + "step": 4028 + }, + { + "epoch": 0.9289831680885404, + "grad_norm": 1.6219891318512447, + "learning_rate": 1.1992018838614124e-06, + "loss": 0.5746268033981323, + "step": 4029 + }, + { + "epoch": 0.9292137422181231, + "grad_norm": 1.2896226756922917, + "learning_rate": 1.1988282492054844e-06, + "loss": 0.5306442975997925, + "step": 4030 + }, + { + "epoch": 0.9294443163477057, + "grad_norm": 1.1978686339854372, + "learning_rate": 1.198454585647855e-06, + "loss": 0.4219534993171692, + "step": 4031 + }, + { + "epoch": 0.9296748904772885, + "grad_norm": 1.3997557750947305, + "learning_rate": 1.1980808932428406e-06, + "loss": 0.4167936444282532, + "step": 4032 + }, + { + "epoch": 0.9299054646068711, + "grad_norm": 1.2271684703243566, + "learning_rate": 1.197707172044761e-06, + "loss": 0.42376089096069336, + "step": 4033 + }, + { + "epoch": 0.9301360387364538, + "grad_norm": 1.5370602561856461, + "learning_rate": 1.1973334221079398e-06, + "loss": 0.48729848861694336, + "step": 4034 + }, + { + "epoch": 0.9303666128660364, + "grad_norm": 1.2353226603771892, + "learning_rate": 1.1969596434867062e-06, + "loss": 0.45877987146377563, + "step": 4035 + }, + { + "epoch": 0.9305971869956191, + "grad_norm": 1.2531522631367908, + "learning_rate": 1.196585836235392e-06, + "loss": 0.504621684551239, + "step": 4036 + }, + { + "epoch": 0.9308277611252017, + "grad_norm": 1.202880043912139, + "learning_rate": 1.1962120004083342e-06, + "loss": 0.45170748233795166, + "step": 4037 + }, + { + "epoch": 0.9310583352547844, + "grad_norm": 1.3604906368473153, + "learning_rate": 1.1958381360598737e-06, + "loss": 0.3969152569770813, + "step": 4038 + }, + { + "epoch": 0.931288909384367, + "grad_norm": 1.2718279913855612, + "learning_rate": 1.1954642432443553e-06, + "loss": 0.4286048412322998, + "step": 4039 + }, + { + "epoch": 0.9315194835139498, + "grad_norm": 1.4261317138789782, + "learning_rate": 1.1950903220161284e-06, + "loss": 0.3755400776863098, + "step": 4040 + }, + { + "epoch": 0.9317500576435324, + "grad_norm": 1.7559058405972485, + "learning_rate": 1.1947163724295457e-06, + "loss": 0.553135871887207, + "step": 4041 + }, + { + "epoch": 0.9319806317731151, + "grad_norm": 1.3529681190465184, + "learning_rate": 1.194342394538965e-06, + "loss": 0.53995281457901, + "step": 4042 + }, + { + "epoch": 0.9322112059026977, + "grad_norm": 1.3239114086556873, + "learning_rate": 1.1939683883987476e-06, + "loss": 0.4405739903450012, + "step": 4043 + }, + { + "epoch": 0.9324417800322804, + "grad_norm": 1.4320084668753248, + "learning_rate": 1.1935943540632591e-06, + "loss": 0.5046489238739014, + "step": 4044 + }, + { + "epoch": 0.932672354161863, + "grad_norm": 1.63220562819442, + "learning_rate": 1.1932202915868694e-06, + "loss": 0.4699453115463257, + "step": 4045 + }, + { + "epoch": 0.9329029282914457, + "grad_norm": 1.791152379500816, + "learning_rate": 1.192846201023952e-06, + "loss": 0.5643539428710938, + "step": 4046 + }, + { + "epoch": 0.9331335024210283, + "grad_norm": 1.3213038373558907, + "learning_rate": 1.192472082428885e-06, + "loss": 0.4423527121543884, + "step": 4047 + }, + { + "epoch": 0.933364076550611, + "grad_norm": 1.488626793530787, + "learning_rate": 1.1920979358560498e-06, + "loss": 0.4446362257003784, + "step": 4048 + }, + { + "epoch": 0.9335946506801936, + "grad_norm": 1.6284188135746005, + "learning_rate": 1.1917237613598332e-06, + "loss": 0.48347601294517517, + "step": 4049 + }, + { + "epoch": 0.9338252248097764, + "grad_norm": 1.339621886087554, + "learning_rate": 1.1913495589946243e-06, + "loss": 0.4736206531524658, + "step": 4050 + }, + { + "epoch": 0.934055798939359, + "grad_norm": 1.5821523477294297, + "learning_rate": 1.1909753288148181e-06, + "loss": 0.4896177053451538, + "step": 4051 + }, + { + "epoch": 0.9342863730689417, + "grad_norm": 1.3503870180183308, + "learning_rate": 1.1906010708748124e-06, + "loss": 0.3953405022621155, + "step": 4052 + }, + { + "epoch": 0.9345169471985243, + "grad_norm": 1.75805064255455, + "learning_rate": 1.1902267852290092e-06, + "loss": 0.30871689319610596, + "step": 4053 + }, + { + "epoch": 0.934747521328107, + "grad_norm": 1.4966149449301516, + "learning_rate": 1.1898524719318151e-06, + "loss": 0.44187474250793457, + "step": 4054 + }, + { + "epoch": 0.9349780954576896, + "grad_norm": 1.3440011557143472, + "learning_rate": 1.1894781310376396e-06, + "loss": 0.4069768488407135, + "step": 4055 + }, + { + "epoch": 0.9352086695872723, + "grad_norm": 1.2938244564986259, + "learning_rate": 1.1891037626008982e-06, + "loss": 0.36307692527770996, + "step": 4056 + }, + { + "epoch": 0.9354392437168549, + "grad_norm": 1.2107088826138788, + "learning_rate": 1.188729366676008e-06, + "loss": 0.38535594940185547, + "step": 4057 + }, + { + "epoch": 0.9356698178464377, + "grad_norm": 1.416105966319888, + "learning_rate": 1.1883549433173916e-06, + "loss": 0.46454256772994995, + "step": 4058 + }, + { + "epoch": 0.9359003919760203, + "grad_norm": 1.5618282514551205, + "learning_rate": 1.1879804925794752e-06, + "loss": 0.48537465929985046, + "step": 4059 + }, + { + "epoch": 0.936130966105603, + "grad_norm": 1.4027831120439134, + "learning_rate": 1.1876060145166893e-06, + "loss": 0.4355062246322632, + "step": 4060 + }, + { + "epoch": 0.9363615402351856, + "grad_norm": 1.4619447190479122, + "learning_rate": 1.1872315091834676e-06, + "loss": 0.47248804569244385, + "step": 4061 + }, + { + "epoch": 0.9365921143647683, + "grad_norm": 1.4336627602293526, + "learning_rate": 1.1868569766342488e-06, + "loss": 0.4896939992904663, + "step": 4062 + }, + { + "epoch": 0.9368226884943509, + "grad_norm": 1.7008224797561309, + "learning_rate": 1.1864824169234744e-06, + "loss": 0.4259600043296814, + "step": 4063 + }, + { + "epoch": 0.9370532626239336, + "grad_norm": 1.4119659383453314, + "learning_rate": 1.186107830105591e-06, + "loss": 0.4228817820549011, + "step": 4064 + }, + { + "epoch": 0.9372838367535162, + "grad_norm": 1.4911543620584802, + "learning_rate": 1.1857332162350484e-06, + "loss": 0.44750750064849854, + "step": 4065 + }, + { + "epoch": 0.937514410883099, + "grad_norm": 1.4424129451647476, + "learning_rate": 1.1853585753663003e-06, + "loss": 0.49125558137893677, + "step": 4066 + }, + { + "epoch": 0.9377449850126816, + "grad_norm": 1.2540485430842725, + "learning_rate": 1.1849839075538048e-06, + "loss": 0.446805477142334, + "step": 4067 + }, + { + "epoch": 0.9379755591422643, + "grad_norm": 1.6527694351266196, + "learning_rate": 1.1846092128520235e-06, + "loss": 0.4516616463661194, + "step": 4068 + }, + { + "epoch": 0.9382061332718469, + "grad_norm": 1.2461495462560317, + "learning_rate": 1.1842344913154223e-06, + "loss": 0.5271207690238953, + "step": 4069 + }, + { + "epoch": 0.9384367074014296, + "grad_norm": 1.3340471888093621, + "learning_rate": 1.1838597429984702e-06, + "loss": 0.46718811988830566, + "step": 4070 + }, + { + "epoch": 0.9386672815310122, + "grad_norm": 1.6970586095771742, + "learning_rate": 1.1834849679556416e-06, + "loss": 0.4948880672454834, + "step": 4071 + }, + { + "epoch": 0.9388978556605949, + "grad_norm": 1.570925891079885, + "learning_rate": 1.183110166241413e-06, + "loss": 0.5141744613647461, + "step": 4072 + }, + { + "epoch": 0.9391284297901775, + "grad_norm": 1.683475962747206, + "learning_rate": 1.1827353379102662e-06, + "loss": 0.43921130895614624, + "step": 4073 + }, + { + "epoch": 0.9393590039197602, + "grad_norm": 1.458461387708897, + "learning_rate": 1.182360483016686e-06, + "loss": 0.35931193828582764, + "step": 4074 + }, + { + "epoch": 0.9395895780493428, + "grad_norm": 1.4562814179425503, + "learning_rate": 1.1819856016151615e-06, + "loss": 0.4376310408115387, + "step": 4075 + }, + { + "epoch": 0.9398201521789256, + "grad_norm": 1.1615675527476144, + "learning_rate": 1.1816106937601856e-06, + "loss": 0.45419907569885254, + "step": 4076 + }, + { + "epoch": 0.9400507263085082, + "grad_norm": 1.447994335613413, + "learning_rate": 1.1812357595062545e-06, + "loss": 0.4077754616737366, + "step": 4077 + }, + { + "epoch": 0.9402813004380909, + "grad_norm": 1.4463033622550583, + "learning_rate": 1.1808607989078686e-06, + "loss": 0.5555585622787476, + "step": 4078 + }, + { + "epoch": 0.9405118745676735, + "grad_norm": 1.4616481074430372, + "learning_rate": 1.1804858120195334e-06, + "loss": 0.4566183090209961, + "step": 4079 + }, + { + "epoch": 0.9407424486972562, + "grad_norm": 1.3314435652232666, + "learning_rate": 1.180110798895756e-06, + "loss": 0.39149847626686096, + "step": 4080 + }, + { + "epoch": 0.9409730228268388, + "grad_norm": 1.3122400287018474, + "learning_rate": 1.1797357595910485e-06, + "loss": 0.42695966362953186, + "step": 4081 + }, + { + "epoch": 0.9412035969564215, + "grad_norm": 1.4264504061469645, + "learning_rate": 1.1793606941599266e-06, + "loss": 0.49673956632614136, + "step": 4082 + }, + { + "epoch": 0.9414341710860041, + "grad_norm": 1.3703442162376693, + "learning_rate": 1.17898560265691e-06, + "loss": 0.44765836000442505, + "step": 4083 + }, + { + "epoch": 0.9416647452155869, + "grad_norm": 1.2694691955405566, + "learning_rate": 1.1786104851365227e-06, + "loss": 0.40580642223358154, + "step": 4084 + }, + { + "epoch": 0.9418953193451695, + "grad_norm": 1.6554640938571203, + "learning_rate": 1.1782353416532907e-06, + "loss": 0.5389235019683838, + "step": 4085 + }, + { + "epoch": 0.9421258934747522, + "grad_norm": 1.4858385739097846, + "learning_rate": 1.1778601722617456e-06, + "loss": 0.5130764245986938, + "step": 4086 + }, + { + "epoch": 0.9423564676043348, + "grad_norm": 1.4406092108567712, + "learning_rate": 1.1774849770164218e-06, + "loss": 0.5031291842460632, + "step": 4087 + }, + { + "epoch": 0.9425870417339175, + "grad_norm": 1.474863885181778, + "learning_rate": 1.1771097559718581e-06, + "loss": 0.463434636592865, + "step": 4088 + }, + { + "epoch": 0.9428176158635001, + "grad_norm": 1.3059771334220434, + "learning_rate": 1.1767345091825962e-06, + "loss": 0.4249681234359741, + "step": 4089 + }, + { + "epoch": 0.9430481899930828, + "grad_norm": 1.322875104249168, + "learning_rate": 1.176359236703182e-06, + "loss": 0.39353805780410767, + "step": 4090 + }, + { + "epoch": 0.9432787641226654, + "grad_norm": 1.1645299347166784, + "learning_rate": 1.1759839385881657e-06, + "loss": 0.4554273188114166, + "step": 4091 + }, + { + "epoch": 0.9435093382522481, + "grad_norm": 1.5935626726835685, + "learning_rate": 1.1756086148921005e-06, + "loss": 0.6275606155395508, + "step": 4092 + }, + { + "epoch": 0.9437399123818307, + "grad_norm": 1.40548177481024, + "learning_rate": 1.1752332656695432e-06, + "loss": 0.5058892965316772, + "step": 4093 + }, + { + "epoch": 0.9439704865114135, + "grad_norm": 1.4618963991295721, + "learning_rate": 1.1748578909750547e-06, + "loss": 0.4318118095397949, + "step": 4094 + }, + { + "epoch": 0.9442010606409961, + "grad_norm": 1.5133013388223657, + "learning_rate": 1.1744824908631996e-06, + "loss": 0.4873964190483093, + "step": 4095 + }, + { + "epoch": 0.9444316347705788, + "grad_norm": 1.7199346017960337, + "learning_rate": 1.1741070653885467e-06, + "loss": 0.5026696920394897, + "step": 4096 + }, + { + "epoch": 0.9446622089001614, + "grad_norm": 1.1838920009196625, + "learning_rate": 1.1737316146056667e-06, + "loss": 0.4337490200996399, + "step": 4097 + }, + { + "epoch": 0.9448927830297441, + "grad_norm": 1.4841621540296046, + "learning_rate": 1.173356138569136e-06, + "loss": 0.4552634358406067, + "step": 4098 + }, + { + "epoch": 0.9451233571593267, + "grad_norm": 1.50340660176824, + "learning_rate": 1.1729806373335336e-06, + "loss": 0.4631303548812866, + "step": 4099 + }, + { + "epoch": 0.9453539312889094, + "grad_norm": 1.2840677998534646, + "learning_rate": 1.1726051109534424e-06, + "loss": 0.5004513263702393, + "step": 4100 + }, + { + "epoch": 0.945584505418492, + "grad_norm": 1.4218926297879624, + "learning_rate": 1.172229559483449e-06, + "loss": 0.4634668827056885, + "step": 4101 + }, + { + "epoch": 0.9458150795480748, + "grad_norm": 1.3580815662313042, + "learning_rate": 1.171853982978144e-06, + "loss": 0.4034295678138733, + "step": 4102 + }, + { + "epoch": 0.9460456536776574, + "grad_norm": 1.4066326558267837, + "learning_rate": 1.1714783814921206e-06, + "loss": 0.4981224536895752, + "step": 4103 + }, + { + "epoch": 0.9462762278072401, + "grad_norm": 1.637441573047362, + "learning_rate": 1.1711027550799767e-06, + "loss": 0.460249125957489, + "step": 4104 + }, + { + "epoch": 0.9465068019368227, + "grad_norm": 1.7282687422797383, + "learning_rate": 1.170727103796313e-06, + "loss": 0.4794936180114746, + "step": 4105 + }, + { + "epoch": 0.9467373760664054, + "grad_norm": 1.679442128589896, + "learning_rate": 1.170351427695735e-06, + "loss": 0.42724454402923584, + "step": 4106 + }, + { + "epoch": 0.946967950195988, + "grad_norm": 1.5092304593591768, + "learning_rate": 1.16997572683285e-06, + "loss": 0.4612593948841095, + "step": 4107 + }, + { + "epoch": 0.9471985243255707, + "grad_norm": 1.4462371891962704, + "learning_rate": 1.169600001262271e-06, + "loss": 0.49512046575546265, + "step": 4108 + }, + { + "epoch": 0.9474290984551533, + "grad_norm": 1.382963972341291, + "learning_rate": 1.1692242510386124e-06, + "loss": 0.49438196420669556, + "step": 4109 + }, + { + "epoch": 0.947659672584736, + "grad_norm": 1.246967438511099, + "learning_rate": 1.1688484762164938e-06, + "loss": 0.4833865165710449, + "step": 4110 + }, + { + "epoch": 0.9478902467143187, + "grad_norm": 1.6394354229670154, + "learning_rate": 1.1684726768505385e-06, + "loss": 0.49647942185401917, + "step": 4111 + }, + { + "epoch": 0.9481208208439014, + "grad_norm": 1.3141370309593936, + "learning_rate": 1.1680968529953718e-06, + "loss": 0.4299147129058838, + "step": 4112 + }, + { + "epoch": 0.948351394973484, + "grad_norm": 1.2751791494481195, + "learning_rate": 1.167721004705624e-06, + "loss": 0.42613041400909424, + "step": 4113 + }, + { + "epoch": 0.9485819691030667, + "grad_norm": 1.5850112492057793, + "learning_rate": 1.1673451320359284e-06, + "loss": 0.3989883065223694, + "step": 4114 + }, + { + "epoch": 0.9488125432326493, + "grad_norm": 1.6195345588406382, + "learning_rate": 1.1669692350409222e-06, + "loss": 0.41362684965133667, + "step": 4115 + }, + { + "epoch": 0.9490431173622319, + "grad_norm": 1.3043186455514282, + "learning_rate": 1.1665933137752452e-06, + "loss": 0.3807048201560974, + "step": 4116 + }, + { + "epoch": 0.9492736914918146, + "grad_norm": 1.452270133487064, + "learning_rate": 1.1662173682935414e-06, + "loss": 0.3440876007080078, + "step": 4117 + }, + { + "epoch": 0.9495042656213972, + "grad_norm": 1.5051121617765968, + "learning_rate": 1.165841398650459e-06, + "loss": 0.43534499406814575, + "step": 4118 + }, + { + "epoch": 0.9497348397509799, + "grad_norm": 1.2124174426672352, + "learning_rate": 1.1654654049006484e-06, + "loss": 0.4900544285774231, + "step": 4119 + }, + { + "epoch": 0.9499654138805625, + "grad_norm": 1.4219346573372744, + "learning_rate": 1.1650893870987643e-06, + "loss": 0.5189288854598999, + "step": 4120 + }, + { + "epoch": 0.9501959880101453, + "grad_norm": 1.5561303354373495, + "learning_rate": 1.1647133452994643e-06, + "loss": 0.587873101234436, + "step": 4121 + }, + { + "epoch": 0.9504265621397279, + "grad_norm": 1.2947612520331362, + "learning_rate": 1.1643372795574106e-06, + "loss": 0.4367108941078186, + "step": 4122 + }, + { + "epoch": 0.9506571362693106, + "grad_norm": 1.3855876287330298, + "learning_rate": 1.1639611899272679e-06, + "loss": 0.4121246635913849, + "step": 4123 + }, + { + "epoch": 0.9508877103988932, + "grad_norm": 1.371083137252789, + "learning_rate": 1.1635850764637042e-06, + "loss": 0.4993973672389984, + "step": 4124 + }, + { + "epoch": 0.9511182845284759, + "grad_norm": 1.3729377845652901, + "learning_rate": 1.163208939221392e-06, + "loss": 0.39145413041114807, + "step": 4125 + }, + { + "epoch": 0.9513488586580585, + "grad_norm": 1.5515816392895183, + "learning_rate": 1.1628327782550065e-06, + "loss": 0.45954760909080505, + "step": 4126 + }, + { + "epoch": 0.9515794327876412, + "grad_norm": 1.5137997254417062, + "learning_rate": 1.1624565936192263e-06, + "loss": 0.5159680843353271, + "step": 4127 + }, + { + "epoch": 0.9518100069172238, + "grad_norm": 1.5429829982679306, + "learning_rate": 1.1620803853687337e-06, + "loss": 0.4441346228122711, + "step": 4128 + }, + { + "epoch": 0.9520405810468066, + "grad_norm": 1.1994992888255296, + "learning_rate": 1.1617041535582144e-06, + "loss": 0.3842248320579529, + "step": 4129 + }, + { + "epoch": 0.9522711551763892, + "grad_norm": 1.5742838715827387, + "learning_rate": 1.1613278982423577e-06, + "loss": 0.5332437753677368, + "step": 4130 + }, + { + "epoch": 0.9525017293059719, + "grad_norm": 1.416443461852387, + "learning_rate": 1.160951619475856e-06, + "loss": 0.4265931248664856, + "step": 4131 + }, + { + "epoch": 0.9527323034355545, + "grad_norm": 1.344407559333665, + "learning_rate": 1.1605753173134052e-06, + "loss": 0.47442418336868286, + "step": 4132 + }, + { + "epoch": 0.9529628775651372, + "grad_norm": 1.4385000789860496, + "learning_rate": 1.1601989918097044e-06, + "loss": 0.6128898859024048, + "step": 4133 + }, + { + "epoch": 0.9531934516947198, + "grad_norm": 1.3167710707989233, + "learning_rate": 1.159822643019457e-06, + "loss": 0.5347775220870972, + "step": 4134 + }, + { + "epoch": 0.9534240258243025, + "grad_norm": 1.1478699481046142, + "learning_rate": 1.1594462709973682e-06, + "loss": 0.39984625577926636, + "step": 4135 + }, + { + "epoch": 0.9536545999538851, + "grad_norm": 1.411910940206958, + "learning_rate": 1.1590698757981483e-06, + "loss": 0.5146951675415039, + "step": 4136 + }, + { + "epoch": 0.9538851740834678, + "grad_norm": 1.4057451726772026, + "learning_rate": 1.1586934574765097e-06, + "loss": 0.3589641749858856, + "step": 4137 + }, + { + "epoch": 0.9541157482130505, + "grad_norm": 1.4047870659239305, + "learning_rate": 1.1583170160871689e-06, + "loss": 0.428930401802063, + "step": 4138 + }, + { + "epoch": 0.9543463223426332, + "grad_norm": 1.3760779428564116, + "learning_rate": 1.1579405516848452e-06, + "loss": 0.46921080350875854, + "step": 4139 + }, + { + "epoch": 0.9545768964722158, + "grad_norm": 1.462957669946579, + "learning_rate": 1.1575640643242616e-06, + "loss": 0.39079514145851135, + "step": 4140 + }, + { + "epoch": 0.9548074706017985, + "grad_norm": 1.5322762323160557, + "learning_rate": 1.1571875540601443e-06, + "loss": 0.4475102424621582, + "step": 4141 + }, + { + "epoch": 0.9550380447313811, + "grad_norm": 1.3964952325110702, + "learning_rate": 1.1568110209472232e-06, + "loss": 0.43881016969680786, + "step": 4142 + }, + { + "epoch": 0.9552686188609638, + "grad_norm": 1.2846843095885363, + "learning_rate": 1.156434465040231e-06, + "loss": 0.4382214844226837, + "step": 4143 + }, + { + "epoch": 0.9554991929905464, + "grad_norm": 1.6590322564778253, + "learning_rate": 1.1560578863939037e-06, + "loss": 0.5390958786010742, + "step": 4144 + }, + { + "epoch": 0.9557297671201291, + "grad_norm": 1.2966408722030756, + "learning_rate": 1.155681285062981e-06, + "loss": 0.4276137948036194, + "step": 4145 + }, + { + "epoch": 0.9559603412497117, + "grad_norm": 1.3756682316204962, + "learning_rate": 1.1553046611022058e-06, + "loss": 0.4541968107223511, + "step": 4146 + }, + { + "epoch": 0.9561909153792945, + "grad_norm": 1.4806679512404375, + "learning_rate": 1.1549280145663242e-06, + "loss": 0.43287473917007446, + "step": 4147 + }, + { + "epoch": 0.9564214895088771, + "grad_norm": 1.5507500145218385, + "learning_rate": 1.1545513455100855e-06, + "loss": 0.432822585105896, + "step": 4148 + }, + { + "epoch": 0.9566520636384598, + "grad_norm": 1.4662390355071035, + "learning_rate": 1.1541746539882424e-06, + "loss": 0.519271969795227, + "step": 4149 + }, + { + "epoch": 0.9568826377680424, + "grad_norm": 1.4521470663351335, + "learning_rate": 1.1537979400555506e-06, + "loss": 0.4158627390861511, + "step": 4150 + }, + { + "epoch": 0.9571132118976251, + "grad_norm": 1.4834584070713739, + "learning_rate": 1.1534212037667698e-06, + "loss": 0.42122989892959595, + "step": 4151 + }, + { + "epoch": 0.9573437860272077, + "grad_norm": 1.696588703842723, + "learning_rate": 1.1530444451766623e-06, + "loss": 0.4141794443130493, + "step": 4152 + }, + { + "epoch": 0.9575743601567904, + "grad_norm": 1.3149219500885996, + "learning_rate": 1.1526676643399933e-06, + "loss": 0.4935780167579651, + "step": 4153 + }, + { + "epoch": 0.957804934286373, + "grad_norm": 1.3661965645097156, + "learning_rate": 1.152290861311532e-06, + "loss": 0.5075733661651611, + "step": 4154 + }, + { + "epoch": 0.9580355084159557, + "grad_norm": 1.37824406851626, + "learning_rate": 1.151914036146051e-06, + "loss": 0.4852841794490814, + "step": 4155 + }, + { + "epoch": 0.9582660825455384, + "grad_norm": 1.2576277022731817, + "learning_rate": 1.151537188898325e-06, + "loss": 0.46114620566368103, + "step": 4156 + }, + { + "epoch": 0.9584966566751211, + "grad_norm": 1.6662322349225411, + "learning_rate": 1.1511603196231327e-06, + "loss": 0.519254207611084, + "step": 4157 + }, + { + "epoch": 0.9587272308047037, + "grad_norm": 1.3283960828325414, + "learning_rate": 1.1507834283752562e-06, + "loss": 0.43635690212249756, + "step": 4158 + }, + { + "epoch": 0.9589578049342864, + "grad_norm": 1.3730336798021219, + "learning_rate": 1.1504065152094802e-06, + "loss": 0.48448023200035095, + "step": 4159 + }, + { + "epoch": 0.959188379063869, + "grad_norm": 1.320755520801986, + "learning_rate": 1.1500295801805927e-06, + "loss": 0.4461054801940918, + "step": 4160 + }, + { + "epoch": 0.9594189531934517, + "grad_norm": 1.3183810948385437, + "learning_rate": 1.1496526233433852e-06, + "loss": 0.44869595766067505, + "step": 4161 + }, + { + "epoch": 0.9596495273230343, + "grad_norm": 1.5137169599039804, + "learning_rate": 1.1492756447526524e-06, + "loss": 0.4592103660106659, + "step": 4162 + }, + { + "epoch": 0.959880101452617, + "grad_norm": 1.3625000210250673, + "learning_rate": 1.1488986444631918e-06, + "loss": 0.48352301120758057, + "step": 4163 + }, + { + "epoch": 0.9601106755821996, + "grad_norm": 1.2039059688900335, + "learning_rate": 1.1485216225298043e-06, + "loss": 0.44718503952026367, + "step": 4164 + }, + { + "epoch": 0.9603412497117824, + "grad_norm": 1.7796976813489804, + "learning_rate": 1.1481445790072933e-06, + "loss": 0.44659486413002014, + "step": 4165 + }, + { + "epoch": 0.960571823841365, + "grad_norm": 1.464260426957605, + "learning_rate": 1.1477675139504665e-06, + "loss": 0.5143063068389893, + "step": 4166 + }, + { + "epoch": 0.9608023979709477, + "grad_norm": 1.825014649582591, + "learning_rate": 1.1473904274141344e-06, + "loss": 0.6708887815475464, + "step": 4167 + }, + { + "epoch": 0.9610329721005303, + "grad_norm": 1.4397638416262573, + "learning_rate": 1.1470133194531094e-06, + "loss": 0.3889666199684143, + "step": 4168 + }, + { + "epoch": 0.961263546230113, + "grad_norm": 1.2805774485856607, + "learning_rate": 1.1466361901222086e-06, + "loss": 0.4610622227191925, + "step": 4169 + }, + { + "epoch": 0.9614941203596956, + "grad_norm": 1.4320030308850267, + "learning_rate": 1.1462590394762514e-06, + "loss": 0.46372538805007935, + "step": 4170 + }, + { + "epoch": 0.9617246944892783, + "grad_norm": 1.5638922992309852, + "learning_rate": 1.1458818675700607e-06, + "loss": 0.5197097063064575, + "step": 4171 + }, + { + "epoch": 0.9619552686188609, + "grad_norm": 1.2417860513603916, + "learning_rate": 1.145504674458462e-06, + "loss": 0.3849745988845825, + "step": 4172 + }, + { + "epoch": 0.9621858427484437, + "grad_norm": 1.5196854039542969, + "learning_rate": 1.1451274601962841e-06, + "loss": 0.4572817385196686, + "step": 4173 + }, + { + "epoch": 0.9624164168780263, + "grad_norm": 1.4154832612934123, + "learning_rate": 1.1447502248383594e-06, + "loss": 0.4383746385574341, + "step": 4174 + }, + { + "epoch": 0.962646991007609, + "grad_norm": 1.473681287130909, + "learning_rate": 1.1443729684395222e-06, + "loss": 0.5319672226905823, + "step": 4175 + }, + { + "epoch": 0.9628775651371916, + "grad_norm": 1.2307542062760268, + "learning_rate": 1.143995691054611e-06, + "loss": 0.4351249933242798, + "step": 4176 + }, + { + "epoch": 0.9631081392667743, + "grad_norm": 1.42416527435209, + "learning_rate": 1.1436183927384668e-06, + "loss": 0.5453774929046631, + "step": 4177 + }, + { + "epoch": 0.9633387133963569, + "grad_norm": 1.569291329857932, + "learning_rate": 1.1432410735459336e-06, + "loss": 0.5605905055999756, + "step": 4178 + }, + { + "epoch": 0.9635692875259396, + "grad_norm": 1.3825364023898294, + "learning_rate": 1.1428637335318587e-06, + "loss": 0.4556693434715271, + "step": 4179 + }, + { + "epoch": 0.9637998616555222, + "grad_norm": 1.316766347101971, + "learning_rate": 1.142486372751092e-06, + "loss": 0.45428892970085144, + "step": 4180 + }, + { + "epoch": 0.9640304357851049, + "grad_norm": 1.4252168865652697, + "learning_rate": 1.142108991258487e-06, + "loss": 0.4897412657737732, + "step": 4181 + }, + { + "epoch": 0.9642610099146876, + "grad_norm": 1.984637391356181, + "learning_rate": 1.1417315891089004e-06, + "loss": 0.5478836894035339, + "step": 4182 + }, + { + "epoch": 0.9644915840442703, + "grad_norm": 1.4620834191298895, + "learning_rate": 1.1413541663571904e-06, + "loss": 0.42394131422042847, + "step": 4183 + }, + { + "epoch": 0.9647221581738529, + "grad_norm": 1.585175673978148, + "learning_rate": 1.1409767230582199e-06, + "loss": 0.5047104954719543, + "step": 4184 + }, + { + "epoch": 0.9649527323034356, + "grad_norm": 1.4749915601759833, + "learning_rate": 1.1405992592668538e-06, + "loss": 0.43985825777053833, + "step": 4185 + }, + { + "epoch": 0.9651833064330182, + "grad_norm": 1.3061643078097422, + "learning_rate": 1.1402217750379608e-06, + "loss": 0.4338407516479492, + "step": 4186 + }, + { + "epoch": 0.9654138805626009, + "grad_norm": 1.5404850502320075, + "learning_rate": 1.1398442704264118e-06, + "loss": 0.4532614052295685, + "step": 4187 + }, + { + "epoch": 0.9656444546921835, + "grad_norm": 1.2345047018331374, + "learning_rate": 1.1394667454870802e-06, + "loss": 0.4546123445034027, + "step": 4188 + }, + { + "epoch": 0.9658750288217662, + "grad_norm": 1.5321856096614175, + "learning_rate": 1.139089200274844e-06, + "loss": 0.44743451476097107, + "step": 4189 + }, + { + "epoch": 0.9661056029513488, + "grad_norm": 1.3411063865526411, + "learning_rate": 1.138711634844583e-06, + "loss": 0.4566968083381653, + "step": 4190 + }, + { + "epoch": 0.9663361770809316, + "grad_norm": 1.481468600614622, + "learning_rate": 1.13833404925118e-06, + "loss": 0.46385467052459717, + "step": 4191 + }, + { + "epoch": 0.9665667512105142, + "grad_norm": 1.2411450691863102, + "learning_rate": 1.137956443549521e-06, + "loss": 0.4614461660385132, + "step": 4192 + }, + { + "epoch": 0.9667973253400969, + "grad_norm": 1.3326432316915904, + "learning_rate": 1.1375788177944945e-06, + "loss": 0.4351955056190491, + "step": 4193 + }, + { + "epoch": 0.9670278994696795, + "grad_norm": 1.368161025215393, + "learning_rate": 1.1372011720409927e-06, + "loss": 0.4172135591506958, + "step": 4194 + }, + { + "epoch": 0.9672584735992622, + "grad_norm": 1.6941620223477674, + "learning_rate": 1.1368235063439102e-06, + "loss": 0.5482916831970215, + "step": 4195 + }, + { + "epoch": 0.9674890477288448, + "grad_norm": 1.3508434751874687, + "learning_rate": 1.136445820758144e-06, + "loss": 0.4336891770362854, + "step": 4196 + }, + { + "epoch": 0.9677196218584275, + "grad_norm": 1.5072664158429512, + "learning_rate": 1.1360681153385956e-06, + "loss": 0.42612385749816895, + "step": 4197 + }, + { + "epoch": 0.9679501959880101, + "grad_norm": 1.5000454097568379, + "learning_rate": 1.135690390140167e-06, + "loss": 0.513736367225647, + "step": 4198 + }, + { + "epoch": 0.9681807701175928, + "grad_norm": 1.8279069537189752, + "learning_rate": 1.1353126452177656e-06, + "loss": 0.45551058650016785, + "step": 4199 + }, + { + "epoch": 0.9684113442471755, + "grad_norm": 1.3479770342549766, + "learning_rate": 1.1349348806262994e-06, + "loss": 0.45450061559677124, + "step": 4200 + }, + { + "epoch": 0.9686419183767582, + "grad_norm": 1.5942392384347237, + "learning_rate": 1.1345570964206807e-06, + "loss": 0.43962353467941284, + "step": 4201 + }, + { + "epoch": 0.9688724925063408, + "grad_norm": 1.4695533515040724, + "learning_rate": 1.1341792926558245e-06, + "loss": 0.5304821729660034, + "step": 4202 + }, + { + "epoch": 0.9691030666359235, + "grad_norm": 1.57215629996827, + "learning_rate": 1.1338014693866483e-06, + "loss": 0.6079045534133911, + "step": 4203 + }, + { + "epoch": 0.9693336407655061, + "grad_norm": 1.3451772860900804, + "learning_rate": 1.1334236266680724e-06, + "loss": 0.39895182847976685, + "step": 4204 + }, + { + "epoch": 0.9695642148950888, + "grad_norm": 1.4224201035305835, + "learning_rate": 1.1330457645550202e-06, + "loss": 0.5264945030212402, + "step": 4205 + }, + { + "epoch": 0.9697947890246714, + "grad_norm": 1.3209691457440123, + "learning_rate": 1.1326678831024178e-06, + "loss": 0.4794533848762512, + "step": 4206 + }, + { + "epoch": 0.9700253631542541, + "grad_norm": 1.472204632290126, + "learning_rate": 1.1322899823651938e-06, + "loss": 0.42917680740356445, + "step": 4207 + }, + { + "epoch": 0.9702559372838367, + "grad_norm": 1.4163025348687577, + "learning_rate": 1.1319120623982804e-06, + "loss": 0.42155951261520386, + "step": 4208 + }, + { + "epoch": 0.9704865114134195, + "grad_norm": 1.455345134423215, + "learning_rate": 1.1315341232566121e-06, + "loss": 0.5119719505310059, + "step": 4209 + }, + { + "epoch": 0.9707170855430021, + "grad_norm": 1.4441630965274395, + "learning_rate": 1.1311561649951255e-06, + "loss": 0.5261529684066772, + "step": 4210 + }, + { + "epoch": 0.9709476596725848, + "grad_norm": 1.3046857195112773, + "learning_rate": 1.1307781876687609e-06, + "loss": 0.5133010149002075, + "step": 4211 + }, + { + "epoch": 0.9711782338021674, + "grad_norm": 1.4061037707348525, + "learning_rate": 1.1304001913324617e-06, + "loss": 0.5214196443557739, + "step": 4212 + }, + { + "epoch": 0.9714088079317501, + "grad_norm": 1.4191122003483587, + "learning_rate": 1.1300221760411732e-06, + "loss": 0.4665095806121826, + "step": 4213 + }, + { + "epoch": 0.9716393820613327, + "grad_norm": 1.2917310787961995, + "learning_rate": 1.1296441418498435e-06, + "loss": 0.44912537932395935, + "step": 4214 + }, + { + "epoch": 0.9718699561909154, + "grad_norm": 1.384060094796334, + "learning_rate": 1.1292660888134241e-06, + "loss": 0.48622840642929077, + "step": 4215 + }, + { + "epoch": 0.972100530320498, + "grad_norm": 1.3952506250953003, + "learning_rate": 1.128888016986868e-06, + "loss": 0.40099745988845825, + "step": 4216 + }, + { + "epoch": 0.9723311044500808, + "grad_norm": 1.6661609433762745, + "learning_rate": 1.1285099264251331e-06, + "loss": 0.4981631934642792, + "step": 4217 + }, + { + "epoch": 0.9725616785796634, + "grad_norm": 1.3061541456837051, + "learning_rate": 1.1281318171831778e-06, + "loss": 0.3902980387210846, + "step": 4218 + }, + { + "epoch": 0.9727922527092461, + "grad_norm": 1.646940009523485, + "learning_rate": 1.1277536893159641e-06, + "loss": 0.5120723843574524, + "step": 4219 + }, + { + "epoch": 0.9730228268388287, + "grad_norm": 1.4050676349560098, + "learning_rate": 1.1273755428784568e-06, + "loss": 0.47908157110214233, + "step": 4220 + }, + { + "epoch": 0.9732534009684114, + "grad_norm": 1.3980215754858654, + "learning_rate": 1.126997377925624e-06, + "loss": 0.44935697317123413, + "step": 4221 + }, + { + "epoch": 0.973483975097994, + "grad_norm": 1.7936737063106103, + "learning_rate": 1.1266191945124345e-06, + "loss": 0.46883124113082886, + "step": 4222 + }, + { + "epoch": 0.9737145492275767, + "grad_norm": 1.3605023071963889, + "learning_rate": 1.1262409926938622e-06, + "loss": 0.41385799646377563, + "step": 4223 + }, + { + "epoch": 0.9739451233571593, + "grad_norm": 1.352097187992639, + "learning_rate": 1.1258627725248821e-06, + "loss": 0.5450118780136108, + "step": 4224 + }, + { + "epoch": 0.974175697486742, + "grad_norm": 1.3149598759310381, + "learning_rate": 1.1254845340604725e-06, + "loss": 0.4728820323944092, + "step": 4225 + }, + { + "epoch": 0.9744062716163246, + "grad_norm": 1.490906480143449, + "learning_rate": 1.1251062773556143e-06, + "loss": 0.5111296772956848, + "step": 4226 + }, + { + "epoch": 0.9746368457459073, + "grad_norm": 1.6529549144482583, + "learning_rate": 1.1247280024652908e-06, + "loss": 0.4538743793964386, + "step": 4227 + }, + { + "epoch": 0.97486741987549, + "grad_norm": 1.4130886870951611, + "learning_rate": 1.1243497094444877e-06, + "loss": 0.4917091131210327, + "step": 4228 + }, + { + "epoch": 0.9750979940050726, + "grad_norm": 1.387244231549714, + "learning_rate": 1.1239713983481945e-06, + "loss": 0.40376198291778564, + "step": 4229 + }, + { + "epoch": 0.9753285681346553, + "grad_norm": 1.4554658551428983, + "learning_rate": 1.1235930692314019e-06, + "loss": 0.5356566905975342, + "step": 4230 + }, + { + "epoch": 0.9755591422642379, + "grad_norm": 1.4359135131794967, + "learning_rate": 1.123214722149104e-06, + "loss": 0.4374624490737915, + "step": 4231 + }, + { + "epoch": 0.9757897163938206, + "grad_norm": 1.4746549529981767, + "learning_rate": 1.1228363571562976e-06, + "loss": 0.4225429594516754, + "step": 4232 + }, + { + "epoch": 0.9760202905234032, + "grad_norm": 1.4500544144002923, + "learning_rate": 1.1224579743079819e-06, + "loss": 0.5389699935913086, + "step": 4233 + }, + { + "epoch": 0.9762508646529859, + "grad_norm": 1.39848035447059, + "learning_rate": 1.1220795736591584e-06, + "loss": 0.4925463795661926, + "step": 4234 + }, + { + "epoch": 0.9764814387825685, + "grad_norm": 1.2916834361485914, + "learning_rate": 1.1217011552648315e-06, + "loss": 0.4694328308105469, + "step": 4235 + }, + { + "epoch": 0.9767120129121513, + "grad_norm": 1.377557176325016, + "learning_rate": 1.1213227191800086e-06, + "loss": 0.39887624979019165, + "step": 4236 + }, + { + "epoch": 0.9769425870417339, + "grad_norm": 1.5555659299458584, + "learning_rate": 1.120944265459699e-06, + "loss": 0.4930388927459717, + "step": 4237 + }, + { + "epoch": 0.9771731611713166, + "grad_norm": 1.2486101676760866, + "learning_rate": 1.1205657941589143e-06, + "loss": 0.4595404863357544, + "step": 4238 + }, + { + "epoch": 0.9774037353008992, + "grad_norm": 1.4574273243269236, + "learning_rate": 1.1201873053326695e-06, + "loss": 0.44177496433258057, + "step": 4239 + }, + { + "epoch": 0.9776343094304819, + "grad_norm": 1.4308970126871865, + "learning_rate": 1.119808799035982e-06, + "loss": 0.47095373272895813, + "step": 4240 + }, + { + "epoch": 0.9778648835600645, + "grad_norm": 1.4049777741841016, + "learning_rate": 1.1194302753238716e-06, + "loss": 0.4649583697319031, + "step": 4241 + }, + { + "epoch": 0.9780954576896472, + "grad_norm": 1.5269711326381101, + "learning_rate": 1.1190517342513598e-06, + "loss": 0.44815266132354736, + "step": 4242 + }, + { + "epoch": 0.9783260318192298, + "grad_norm": 1.462868793648971, + "learning_rate": 1.118673175873472e-06, + "loss": 0.4861665368080139, + "step": 4243 + }, + { + "epoch": 0.9785566059488126, + "grad_norm": 1.3395897424173215, + "learning_rate": 1.1182946002452354e-06, + "loss": 0.5196468830108643, + "step": 4244 + }, + { + "epoch": 0.9787871800783952, + "grad_norm": 1.5910002582718288, + "learning_rate": 1.11791600742168e-06, + "loss": 0.49746841192245483, + "step": 4245 + }, + { + "epoch": 0.9790177542079779, + "grad_norm": 1.2919062217717159, + "learning_rate": 1.1175373974578377e-06, + "loss": 0.4637739956378937, + "step": 4246 + }, + { + "epoch": 0.9792483283375605, + "grad_norm": 1.228394275609753, + "learning_rate": 1.1171587704087434e-06, + "loss": 0.46009692549705505, + "step": 4247 + }, + { + "epoch": 0.9794789024671432, + "grad_norm": 2.1569798034684706, + "learning_rate": 1.1167801263294346e-06, + "loss": 0.49036258459091187, + "step": 4248 + }, + { + "epoch": 0.9797094765967258, + "grad_norm": 1.395933426650918, + "learning_rate": 1.1164014652749509e-06, + "loss": 0.4730580449104309, + "step": 4249 + }, + { + "epoch": 0.9799400507263085, + "grad_norm": 1.618438538763921, + "learning_rate": 1.1160227873003345e-06, + "loss": 0.5029968023300171, + "step": 4250 + }, + { + "epoch": 0.9801706248558911, + "grad_norm": 1.4870951402562973, + "learning_rate": 1.1156440924606299e-06, + "loss": 0.5149805545806885, + "step": 4251 + }, + { + "epoch": 0.9804011989854738, + "grad_norm": 1.6248587467562292, + "learning_rate": 1.1152653808108845e-06, + "loss": 0.5017384886741638, + "step": 4252 + }, + { + "epoch": 0.9806317731150564, + "grad_norm": 1.486462967422998, + "learning_rate": 1.114886652406148e-06, + "loss": 0.47569048404693604, + "step": 4253 + }, + { + "epoch": 0.9808623472446392, + "grad_norm": 1.4476623501612873, + "learning_rate": 1.1145079073014722e-06, + "loss": 0.5127655863761902, + "step": 4254 + }, + { + "epoch": 0.9810929213742218, + "grad_norm": 1.4943063660203757, + "learning_rate": 1.1141291455519114e-06, + "loss": 0.4014360308647156, + "step": 4255 + }, + { + "epoch": 0.9813234955038045, + "grad_norm": 1.4814879590427052, + "learning_rate": 1.1137503672125228e-06, + "loss": 0.43737465143203735, + "step": 4256 + }, + { + "epoch": 0.9815540696333871, + "grad_norm": 1.413525212350489, + "learning_rate": 1.1133715723383655e-06, + "loss": 0.4389764070510864, + "step": 4257 + }, + { + "epoch": 0.9817846437629698, + "grad_norm": 1.3532173754404184, + "learning_rate": 1.112992760984501e-06, + "loss": 0.5105381608009338, + "step": 4258 + }, + { + "epoch": 0.9820152178925524, + "grad_norm": 1.4052776017835835, + "learning_rate": 1.1126139332059937e-06, + "loss": 0.4393002688884735, + "step": 4259 + }, + { + "epoch": 0.9822457920221351, + "grad_norm": 1.3179147448132482, + "learning_rate": 1.1122350890579102e-06, + "loss": 0.541419267654419, + "step": 4260 + }, + { + "epoch": 0.9824763661517177, + "grad_norm": 1.5177150542407778, + "learning_rate": 1.1118562285953186e-06, + "loss": 0.4153546094894409, + "step": 4261 + }, + { + "epoch": 0.9827069402813005, + "grad_norm": 1.4649176443917427, + "learning_rate": 1.1114773518732907e-06, + "loss": 0.5060696601867676, + "step": 4262 + }, + { + "epoch": 0.9829375144108831, + "grad_norm": 1.6266321171712574, + "learning_rate": 1.1110984589468998e-06, + "loss": 0.5975456237792969, + "step": 4263 + }, + { + "epoch": 0.9831680885404658, + "grad_norm": 1.4920078622156363, + "learning_rate": 1.110719549871222e-06, + "loss": 0.5729621648788452, + "step": 4264 + }, + { + "epoch": 0.9833986626700484, + "grad_norm": 1.3838030985279757, + "learning_rate": 1.1103406247013356e-06, + "loss": 0.3948165476322174, + "step": 4265 + }, + { + "epoch": 0.9836292367996311, + "grad_norm": 1.3893062538653607, + "learning_rate": 1.1099616834923212e-06, + "loss": 0.41744932532310486, + "step": 4266 + }, + { + "epoch": 0.9838598109292137, + "grad_norm": 1.3638196246051946, + "learning_rate": 1.1095827262992611e-06, + "loss": 0.4701330065727234, + "step": 4267 + }, + { + "epoch": 0.9840903850587964, + "grad_norm": 1.4764746527882953, + "learning_rate": 1.109203753177242e-06, + "loss": 0.4841681718826294, + "step": 4268 + }, + { + "epoch": 0.984320959188379, + "grad_norm": 1.3604414964396274, + "learning_rate": 1.10882476418135e-06, + "loss": 0.4180435538291931, + "step": 4269 + }, + { + "epoch": 0.9845515333179617, + "grad_norm": 1.4211218067668543, + "learning_rate": 1.1084457593666758e-06, + "loss": 0.39362633228302, + "step": 4270 + }, + { + "epoch": 0.9847821074475444, + "grad_norm": 1.4239354595534417, + "learning_rate": 1.1080667387883116e-06, + "loss": 0.5192993879318237, + "step": 4271 + }, + { + "epoch": 0.9850126815771271, + "grad_norm": 1.5201720088447181, + "learning_rate": 1.1076877025013517e-06, + "loss": 0.48835504055023193, + "step": 4272 + }, + { + "epoch": 0.9852432557067097, + "grad_norm": 1.5142338003412266, + "learning_rate": 1.1073086505608925e-06, + "loss": 0.44442474842071533, + "step": 4273 + }, + { + "epoch": 0.9854738298362924, + "grad_norm": 1.3436041344969518, + "learning_rate": 1.1069295830220339e-06, + "loss": 0.4544455409049988, + "step": 4274 + }, + { + "epoch": 0.985704403965875, + "grad_norm": 1.5833831369807498, + "learning_rate": 1.106550499939876e-06, + "loss": 0.482341468334198, + "step": 4275 + }, + { + "epoch": 0.9859349780954577, + "grad_norm": 1.421534858967002, + "learning_rate": 1.1061714013695236e-06, + "loss": 0.5251357555389404, + "step": 4276 + }, + { + "epoch": 0.9861655522250403, + "grad_norm": 1.2537356796939523, + "learning_rate": 1.1057922873660819e-06, + "loss": 0.4538683295249939, + "step": 4277 + }, + { + "epoch": 0.986396126354623, + "grad_norm": 2.0128553783671728, + "learning_rate": 1.105413157984659e-06, + "loss": 0.5112448930740356, + "step": 4278 + }, + { + "epoch": 0.9866267004842056, + "grad_norm": 1.4914994042257563, + "learning_rate": 1.1050340132803654e-06, + "loss": 0.48863890767097473, + "step": 4279 + }, + { + "epoch": 0.9868572746137884, + "grad_norm": 1.494741313695512, + "learning_rate": 1.1046548533083134e-06, + "loss": 0.43637439608573914, + "step": 4280 + }, + { + "epoch": 0.987087848743371, + "grad_norm": 1.5727176113962202, + "learning_rate": 1.104275678123618e-06, + "loss": 0.5231983065605164, + "step": 4281 + }, + { + "epoch": 0.9873184228729537, + "grad_norm": 1.7169447967595874, + "learning_rate": 1.1038964877813955e-06, + "loss": 0.46838122606277466, + "step": 4282 + }, + { + "epoch": 0.9875489970025363, + "grad_norm": 1.3537630033218837, + "learning_rate": 1.1035172823367658e-06, + "loss": 0.4330589473247528, + "step": 4283 + }, + { + "epoch": 0.987779571132119, + "grad_norm": 1.4178119046272273, + "learning_rate": 1.1031380618448501e-06, + "loss": 0.44962531328201294, + "step": 4284 + }, + { + "epoch": 0.9880101452617016, + "grad_norm": 1.3547255909489988, + "learning_rate": 1.1027588263607719e-06, + "loss": 0.44549795985221863, + "step": 4285 + }, + { + "epoch": 0.9882407193912843, + "grad_norm": 1.7082954293487662, + "learning_rate": 1.1023795759396568e-06, + "loss": 0.43510758876800537, + "step": 4286 + }, + { + "epoch": 0.9884712935208669, + "grad_norm": 1.3135837847563279, + "learning_rate": 1.1020003106366324e-06, + "loss": 0.4369906187057495, + "step": 4287 + }, + { + "epoch": 0.9887018676504497, + "grad_norm": 1.416650593568537, + "learning_rate": 1.1016210305068296e-06, + "loss": 0.42049574851989746, + "step": 4288 + }, + { + "epoch": 0.9889324417800323, + "grad_norm": 1.6285692706476314, + "learning_rate": 1.10124173560538e-06, + "loss": 0.449156790971756, + "step": 4289 + }, + { + "epoch": 0.989163015909615, + "grad_norm": 1.5784410678150576, + "learning_rate": 1.1008624259874177e-06, + "loss": 0.4736451506614685, + "step": 4290 + }, + { + "epoch": 0.9893935900391976, + "grad_norm": 1.3029401584123959, + "learning_rate": 1.10048310170808e-06, + "loss": 0.3988722860813141, + "step": 4291 + }, + { + "epoch": 0.9896241641687803, + "grad_norm": 1.4221756045070393, + "learning_rate": 1.100103762822505e-06, + "loss": 0.44330862164497375, + "step": 4292 + }, + { + "epoch": 0.9898547382983629, + "grad_norm": 1.5471015099626197, + "learning_rate": 1.0997244093858336e-06, + "loss": 0.5294286608695984, + "step": 4293 + }, + { + "epoch": 0.9900853124279456, + "grad_norm": 1.3808712553027187, + "learning_rate": 1.0993450414532082e-06, + "loss": 0.463120698928833, + "step": 4294 + }, + { + "epoch": 0.9903158865575282, + "grad_norm": 1.294463919332552, + "learning_rate": 1.0989656590797747e-06, + "loss": 0.4481865167617798, + "step": 4295 + }, + { + "epoch": 0.9905464606871109, + "grad_norm": 1.4153337646078945, + "learning_rate": 1.0985862623206794e-06, + "loss": 0.4467630386352539, + "step": 4296 + }, + { + "epoch": 0.9907770348166935, + "grad_norm": 1.8865527079498654, + "learning_rate": 1.0982068512310717e-06, + "loss": 0.43485027551651, + "step": 4297 + }, + { + "epoch": 0.9910076089462763, + "grad_norm": 1.5277390713389145, + "learning_rate": 1.0978274258661032e-06, + "loss": 0.4556450843811035, + "step": 4298 + }, + { + "epoch": 0.9912381830758589, + "grad_norm": 1.4768070925377026, + "learning_rate": 1.0974479862809268e-06, + "loss": 0.48326122760772705, + "step": 4299 + }, + { + "epoch": 0.9914687572054416, + "grad_norm": 1.1782147993424035, + "learning_rate": 1.097068532530698e-06, + "loss": 0.42254534363746643, + "step": 4300 + }, + { + "epoch": 0.9916993313350242, + "grad_norm": 1.3623288149981243, + "learning_rate": 1.096689064670574e-06, + "loss": 0.4076887369155884, + "step": 4301 + }, + { + "epoch": 0.9919299054646069, + "grad_norm": 1.4246737986617306, + "learning_rate": 1.0963095827557146e-06, + "loss": 0.40615612268447876, + "step": 4302 + }, + { + "epoch": 0.9921604795941895, + "grad_norm": 1.391998245639926, + "learning_rate": 1.095930086841281e-06, + "loss": 0.47794467210769653, + "step": 4303 + }, + { + "epoch": 0.9923910537237722, + "grad_norm": 1.479591301344316, + "learning_rate": 1.0955505769824375e-06, + "loss": 0.4927758574485779, + "step": 4304 + }, + { + "epoch": 0.9926216278533548, + "grad_norm": 1.1962407216416377, + "learning_rate": 1.0951710532343493e-06, + "loss": 0.40777790546417236, + "step": 4305 + }, + { + "epoch": 0.9928522019829376, + "grad_norm": 1.2781565166204398, + "learning_rate": 1.0947915156521837e-06, + "loss": 0.41996532678604126, + "step": 4306 + }, + { + "epoch": 0.9930827761125202, + "grad_norm": 1.3495931588969972, + "learning_rate": 1.0944119642911107e-06, + "loss": 0.4366680383682251, + "step": 4307 + }, + { + "epoch": 0.9933133502421029, + "grad_norm": 1.4609250216040512, + "learning_rate": 1.094032399206302e-06, + "loss": 0.5350530743598938, + "step": 4308 + }, + { + "epoch": 0.9935439243716855, + "grad_norm": 1.5545326791900604, + "learning_rate": 1.093652820452931e-06, + "loss": 0.5166209936141968, + "step": 4309 + }, + { + "epoch": 0.9937744985012682, + "grad_norm": 1.3624754056256652, + "learning_rate": 1.0932732280861734e-06, + "loss": 0.5104992389678955, + "step": 4310 + }, + { + "epoch": 0.9940050726308508, + "grad_norm": 1.293281056582964, + "learning_rate": 1.0928936221612068e-06, + "loss": 0.38249820470809937, + "step": 4311 + }, + { + "epoch": 0.9942356467604335, + "grad_norm": 1.5718744647134053, + "learning_rate": 1.0925140027332107e-06, + "loss": 0.4930746555328369, + "step": 4312 + }, + { + "epoch": 0.9944662208900161, + "grad_norm": 1.5006868919231642, + "learning_rate": 1.092134369857367e-06, + "loss": 0.46536654233932495, + "step": 4313 + }, + { + "epoch": 0.9946967950195988, + "grad_norm": 1.5384946564391833, + "learning_rate": 1.0917547235888582e-06, + "loss": 0.4591559171676636, + "step": 4314 + }, + { + "epoch": 0.9949273691491815, + "grad_norm": 1.609102883203802, + "learning_rate": 1.0913750639828709e-06, + "loss": 0.5034719705581665, + "step": 4315 + }, + { + "epoch": 0.9951579432787642, + "grad_norm": 1.3461654572756176, + "learning_rate": 1.0909953910945921e-06, + "loss": 0.5289135575294495, + "step": 4316 + }, + { + "epoch": 0.9953885174083468, + "grad_norm": 1.5181970245510374, + "learning_rate": 1.090615704979211e-06, + "loss": 0.48736900091171265, + "step": 4317 + }, + { + "epoch": 0.9956190915379295, + "grad_norm": 1.347314123709775, + "learning_rate": 1.0902360056919186e-06, + "loss": 0.44812899827957153, + "step": 4318 + }, + { + "epoch": 0.9958496656675121, + "grad_norm": 1.717313100956624, + "learning_rate": 1.0898562932879083e-06, + "loss": 0.42837953567504883, + "step": 4319 + }, + { + "epoch": 0.9960802397970948, + "grad_norm": 1.3616068420969312, + "learning_rate": 1.089476567822375e-06, + "loss": 0.4946538805961609, + "step": 4320 + }, + { + "epoch": 0.9963108139266774, + "grad_norm": 1.3738772638549184, + "learning_rate": 1.089096829350516e-06, + "loss": 0.472694993019104, + "step": 4321 + }, + { + "epoch": 0.9965413880562601, + "grad_norm": 1.51102718471871, + "learning_rate": 1.0887170779275297e-06, + "loss": 0.546560525894165, + "step": 4322 + }, + { + "epoch": 0.9967719621858427, + "grad_norm": 1.7144585803126207, + "learning_rate": 1.088337313608617e-06, + "loss": 0.5098580718040466, + "step": 4323 + }, + { + "epoch": 0.9970025363154255, + "grad_norm": 1.4511718916783138, + "learning_rate": 1.0879575364489807e-06, + "loss": 0.4127371907234192, + "step": 4324 + }, + { + "epoch": 0.9972331104450081, + "grad_norm": 1.361622993253284, + "learning_rate": 1.0875777465038249e-06, + "loss": 0.41234201192855835, + "step": 4325 + }, + { + "epoch": 0.9974636845745908, + "grad_norm": 1.334187068919988, + "learning_rate": 1.087197943828356e-06, + "loss": 0.42657697200775146, + "step": 4326 + }, + { + "epoch": 0.9976942587041734, + "grad_norm": 1.5731685077464828, + "learning_rate": 1.0868181284777825e-06, + "loss": 0.5168975591659546, + "step": 4327 + }, + { + "epoch": 0.9979248328337561, + "grad_norm": 1.3417267376651396, + "learning_rate": 1.0864383005073142e-06, + "loss": 0.4712294340133667, + "step": 4328 + }, + { + "epoch": 0.9981554069633387, + "grad_norm": 1.514146578387226, + "learning_rate": 1.0860584599721624e-06, + "loss": 0.4685649871826172, + "step": 4329 + }, + { + "epoch": 0.9983859810929214, + "grad_norm": 1.4104009699586146, + "learning_rate": 1.0856786069275417e-06, + "loss": 0.4699268937110901, + "step": 4330 + }, + { + "epoch": 0.998616555222504, + "grad_norm": 1.5072273981885642, + "learning_rate": 1.0852987414286669e-06, + "loss": 0.44216299057006836, + "step": 4331 + }, + { + "epoch": 0.9988471293520867, + "grad_norm": 1.489870947647978, + "learning_rate": 1.0849188635307558e-06, + "loss": 0.4374035894870758, + "step": 4332 + }, + { + "epoch": 0.9990777034816694, + "grad_norm": 1.396380314188184, + "learning_rate": 1.0845389732890269e-06, + "loss": 0.4538502097129822, + "step": 4333 + }, + { + "epoch": 0.9993082776112521, + "grad_norm": 1.5201233043344708, + "learning_rate": 1.0841590707587017e-06, + "loss": 0.4432523250579834, + "step": 4334 + }, + { + "epoch": 0.9995388517408347, + "grad_norm": 1.3401246835224159, + "learning_rate": 1.0837791559950026e-06, + "loss": 0.3614054322242737, + "step": 4335 + }, + { + "epoch": 0.9997694258704174, + "grad_norm": 1.5241184734301618, + "learning_rate": 1.0833992290531542e-06, + "loss": 0.5412651300430298, + "step": 4336 + }, + { + "epoch": 1.0, + "grad_norm": 1.3961487739465548, + "learning_rate": 1.0830192899883825e-06, + "loss": 0.43333327770233154, + "step": 4337 + }, + { + "epoch": 1.0002305741295827, + "grad_norm": 1.3739097269887006, + "learning_rate": 1.0826393388559156e-06, + "loss": 0.40433377027511597, + "step": 4338 + }, + { + "epoch": 1.0004611482591652, + "grad_norm": 1.5246903566917884, + "learning_rate": 1.0822593757109835e-06, + "loss": 0.49699902534484863, + "step": 4339 + }, + { + "epoch": 1.000691722388748, + "grad_norm": 1.4093275236950669, + "learning_rate": 1.0818794006088174e-06, + "loss": 0.4992629289627075, + "step": 4340 + }, + { + "epoch": 1.0009222965183306, + "grad_norm": 1.546985643456235, + "learning_rate": 1.0814994136046503e-06, + "loss": 0.39532744884490967, + "step": 4341 + }, + { + "epoch": 1.0011528706479134, + "grad_norm": 1.4715614082094945, + "learning_rate": 1.0811194147537177e-06, + "loss": 0.48260024189949036, + "step": 4342 + }, + { + "epoch": 1.0013834447774959, + "grad_norm": 1.1813818983438111, + "learning_rate": 1.0807394041112562e-06, + "loss": 0.40896737575531006, + "step": 4343 + }, + { + "epoch": 1.0016140189070786, + "grad_norm": 1.373003199387245, + "learning_rate": 1.0803593817325037e-06, + "loss": 0.361757755279541, + "step": 4344 + }, + { + "epoch": 1.0018445930366613, + "grad_norm": 1.3113582417275997, + "learning_rate": 1.0799793476727006e-06, + "loss": 0.5524640083312988, + "step": 4345 + }, + { + "epoch": 1.002075167166244, + "grad_norm": 1.4504745740569693, + "learning_rate": 1.0795993019870891e-06, + "loss": 0.4798622727394104, + "step": 4346 + }, + { + "epoch": 1.0023057412958265, + "grad_norm": 1.1125620580650875, + "learning_rate": 1.079219244730912e-06, + "loss": 0.3408532440662384, + "step": 4347 + }, + { + "epoch": 1.0025363154254092, + "grad_norm": 1.6198320758392701, + "learning_rate": 1.0788391759594152e-06, + "loss": 0.4185452461242676, + "step": 4348 + }, + { + "epoch": 1.002766889554992, + "grad_norm": 1.4569047754589481, + "learning_rate": 1.078459095727845e-06, + "loss": 0.4656596779823303, + "step": 4349 + }, + { + "epoch": 1.0029974636845747, + "grad_norm": 1.2861299587948707, + "learning_rate": 1.07807900409145e-06, + "loss": 0.45649081468582153, + "step": 4350 + }, + { + "epoch": 1.0032280378141571, + "grad_norm": 1.4368410869138808, + "learning_rate": 1.0776989011054806e-06, + "loss": 0.4732903242111206, + "step": 4351 + }, + { + "epoch": 1.0034586119437399, + "grad_norm": 1.4875640347613817, + "learning_rate": 1.0773187868251882e-06, + "loss": 0.5313757658004761, + "step": 4352 + }, + { + "epoch": 1.0036891860733226, + "grad_norm": 1.7663418153227872, + "learning_rate": 1.0769386613058267e-06, + "loss": 0.5373719334602356, + "step": 4353 + }, + { + "epoch": 1.0039197602029053, + "grad_norm": 1.4108655227977445, + "learning_rate": 1.076558524602651e-06, + "loss": 0.4530528783798218, + "step": 4354 + }, + { + "epoch": 1.0041503343324878, + "grad_norm": 2.0172927781638816, + "learning_rate": 1.076178376770918e-06, + "loss": 0.361511766910553, + "step": 4355 + }, + { + "epoch": 1.0043809084620705, + "grad_norm": 1.5430566364369291, + "learning_rate": 1.0757982178658857e-06, + "loss": 0.4260486364364624, + "step": 4356 + }, + { + "epoch": 1.0046114825916532, + "grad_norm": 1.4352564218347874, + "learning_rate": 1.0754180479428142e-06, + "loss": 0.4765712320804596, + "step": 4357 + }, + { + "epoch": 1.004842056721236, + "grad_norm": 1.408849526827852, + "learning_rate": 1.0750378670569652e-06, + "loss": 0.485443115234375, + "step": 4358 + }, + { + "epoch": 1.0050726308508184, + "grad_norm": 1.3833154190721015, + "learning_rate": 1.074657675263602e-06, + "loss": 0.5010418891906738, + "step": 4359 + }, + { + "epoch": 1.0053032049804012, + "grad_norm": 1.2138138176978153, + "learning_rate": 1.074277472617989e-06, + "loss": 0.42195719480514526, + "step": 4360 + }, + { + "epoch": 1.0055337791099839, + "grad_norm": 1.4341592826356415, + "learning_rate": 1.073897259175392e-06, + "loss": 0.48555606603622437, + "step": 4361 + }, + { + "epoch": 1.0057643532395666, + "grad_norm": 1.4030257216310642, + "learning_rate": 1.07351703499108e-06, + "loss": 0.4991112947463989, + "step": 4362 + }, + { + "epoch": 1.005994927369149, + "grad_norm": 1.365972754336138, + "learning_rate": 1.0731368001203217e-06, + "loss": 0.43016430735588074, + "step": 4363 + }, + { + "epoch": 1.0062255014987318, + "grad_norm": 1.635861674358112, + "learning_rate": 1.0727565546183883e-06, + "loss": 0.47147876024246216, + "step": 4364 + }, + { + "epoch": 1.0064560756283145, + "grad_norm": 1.4724107461573035, + "learning_rate": 1.0723762985405522e-06, + "loss": 0.4695407748222351, + "step": 4365 + }, + { + "epoch": 1.0066866497578972, + "grad_norm": 1.4167512288976294, + "learning_rate": 1.0719960319420878e-06, + "loss": 0.42666512727737427, + "step": 4366 + }, + { + "epoch": 1.0069172238874797, + "grad_norm": 1.4965231034133355, + "learning_rate": 1.0716157548782705e-06, + "loss": 0.5685237050056458, + "step": 4367 + }, + { + "epoch": 1.0071477980170624, + "grad_norm": 1.2856237164503312, + "learning_rate": 1.0712354674043774e-06, + "loss": 0.45181894302368164, + "step": 4368 + }, + { + "epoch": 1.0073783721466452, + "grad_norm": 1.479568259964695, + "learning_rate": 1.070855169575687e-06, + "loss": 0.4079795479774475, + "step": 4369 + }, + { + "epoch": 1.0076089462762279, + "grad_norm": 1.196685278300245, + "learning_rate": 1.0704748614474798e-06, + "loss": 0.4011094570159912, + "step": 4370 + }, + { + "epoch": 1.0078395204058104, + "grad_norm": 1.5280378960817975, + "learning_rate": 1.0700945430750373e-06, + "loss": 0.48842671513557434, + "step": 4371 + }, + { + "epoch": 1.008070094535393, + "grad_norm": 1.237232307792151, + "learning_rate": 1.0697142145136425e-06, + "loss": 0.5183907151222229, + "step": 4372 + }, + { + "epoch": 1.0083006686649758, + "grad_norm": 1.4080736997180416, + "learning_rate": 1.0693338758185797e-06, + "loss": 0.5022784471511841, + "step": 4373 + }, + { + "epoch": 1.0085312427945585, + "grad_norm": 1.5160750764739457, + "learning_rate": 1.0689535270451358e-06, + "loss": 0.500054121017456, + "step": 4374 + }, + { + "epoch": 1.008761816924141, + "grad_norm": 1.331407944528498, + "learning_rate": 1.068573168248598e-06, + "loss": 0.43674880266189575, + "step": 4375 + }, + { + "epoch": 1.0089923910537237, + "grad_norm": 1.3441260000045296, + "learning_rate": 1.068192799484255e-06, + "loss": 0.4272059202194214, + "step": 4376 + }, + { + "epoch": 1.0092229651833065, + "grad_norm": 1.3188087584834265, + "learning_rate": 1.0678124208073972e-06, + "loss": 0.41053932905197144, + "step": 4377 + }, + { + "epoch": 1.0094535393128892, + "grad_norm": 1.3285405544041065, + "learning_rate": 1.0674320322733173e-06, + "loss": 0.4571593701839447, + "step": 4378 + }, + { + "epoch": 1.0096841134424717, + "grad_norm": 1.2947195973212757, + "learning_rate": 1.0670516339373081e-06, + "loss": 0.464965283870697, + "step": 4379 + }, + { + "epoch": 1.0099146875720544, + "grad_norm": 1.2757697611295247, + "learning_rate": 1.0666712258546639e-06, + "loss": 0.4086726903915405, + "step": 4380 + }, + { + "epoch": 1.010145261701637, + "grad_norm": 1.3664230084580502, + "learning_rate": 1.0662908080806815e-06, + "loss": 0.49988412857055664, + "step": 4381 + }, + { + "epoch": 1.0103758358312198, + "grad_norm": 1.33263070405775, + "learning_rate": 1.0659103806706587e-06, + "loss": 0.3976360559463501, + "step": 4382 + }, + { + "epoch": 1.0106064099608023, + "grad_norm": 1.3554444243435904, + "learning_rate": 1.065529943679894e-06, + "loss": 0.4500683546066284, + "step": 4383 + }, + { + "epoch": 1.010836984090385, + "grad_norm": 1.4532099828866123, + "learning_rate": 1.0651494971636875e-06, + "loss": 0.5617754459381104, + "step": 4384 + }, + { + "epoch": 1.0110675582199677, + "grad_norm": 1.2285766706051995, + "learning_rate": 1.0647690411773414e-06, + "loss": 0.4180886745452881, + "step": 4385 + }, + { + "epoch": 1.0112981323495505, + "grad_norm": 1.3797895213155087, + "learning_rate": 1.0643885757761588e-06, + "loss": 0.406663179397583, + "step": 4386 + }, + { + "epoch": 1.011528706479133, + "grad_norm": 1.2899676326462104, + "learning_rate": 1.0640081010154443e-06, + "loss": 0.4698946475982666, + "step": 4387 + }, + { + "epoch": 1.0117592806087157, + "grad_norm": 1.2421672055806043, + "learning_rate": 1.0636276169505034e-06, + "loss": 0.4845995306968689, + "step": 4388 + }, + { + "epoch": 1.0119898547382984, + "grad_norm": 1.7127723444190444, + "learning_rate": 1.0632471236366435e-06, + "loss": 0.5065066814422607, + "step": 4389 + }, + { + "epoch": 1.012220428867881, + "grad_norm": 1.5183614166838566, + "learning_rate": 1.0628666211291735e-06, + "loss": 0.4302946925163269, + "step": 4390 + }, + { + "epoch": 1.0124510029974636, + "grad_norm": 1.682116735922279, + "learning_rate": 1.0624861094834029e-06, + "loss": 0.5772345066070557, + "step": 4391 + }, + { + "epoch": 1.0126815771270463, + "grad_norm": 1.3399536785573158, + "learning_rate": 1.0621055887546425e-06, + "loss": 0.5294336080551147, + "step": 4392 + }, + { + "epoch": 1.012912151256629, + "grad_norm": 1.1967430772955985, + "learning_rate": 1.0617250589982059e-06, + "loss": 0.5028249621391296, + "step": 4393 + }, + { + "epoch": 1.0131427253862118, + "grad_norm": 1.3120231857267954, + "learning_rate": 1.0613445202694065e-06, + "loss": 0.5072348713874817, + "step": 4394 + }, + { + "epoch": 1.0133732995157942, + "grad_norm": 1.3107230472369709, + "learning_rate": 1.060963972623559e-06, + "loss": 0.3632262945175171, + "step": 4395 + }, + { + "epoch": 1.013603873645377, + "grad_norm": 1.4739700660925632, + "learning_rate": 1.06058341611598e-06, + "loss": 0.419277161359787, + "step": 4396 + }, + { + "epoch": 1.0138344477749597, + "grad_norm": 1.4201089967708693, + "learning_rate": 1.060202850801988e-06, + "loss": 0.4056069850921631, + "step": 4397 + }, + { + "epoch": 1.0140650219045424, + "grad_norm": 1.4908298419223913, + "learning_rate": 1.0598222767369014e-06, + "loss": 0.5591505765914917, + "step": 4398 + }, + { + "epoch": 1.014295596034125, + "grad_norm": 1.2646885984398546, + "learning_rate": 1.0594416939760408e-06, + "loss": 0.38529443740844727, + "step": 4399 + }, + { + "epoch": 1.0145261701637076, + "grad_norm": 1.3255980825912217, + "learning_rate": 1.0590611025747272e-06, + "loss": 0.3609437644481659, + "step": 4400 + }, + { + "epoch": 1.0147567442932903, + "grad_norm": 1.3538282738769345, + "learning_rate": 1.058680502588284e-06, + "loss": 0.4849050045013428, + "step": 4401 + }, + { + "epoch": 1.014987318422873, + "grad_norm": 1.4516377120705455, + "learning_rate": 1.058299894072035e-06, + "loss": 0.39454251527786255, + "step": 4402 + }, + { + "epoch": 1.0152178925524555, + "grad_norm": 1.5578248119945644, + "learning_rate": 1.0579192770813052e-06, + "loss": 0.39726459980010986, + "step": 4403 + }, + { + "epoch": 1.0154484666820383, + "grad_norm": 1.4398814364290877, + "learning_rate": 1.0575386516714218e-06, + "loss": 0.4730626940727234, + "step": 4404 + }, + { + "epoch": 1.015679040811621, + "grad_norm": 1.5842749126492264, + "learning_rate": 1.0571580178977123e-06, + "loss": 0.5436214804649353, + "step": 4405 + }, + { + "epoch": 1.0159096149412037, + "grad_norm": 1.4188700773135285, + "learning_rate": 1.0567773758155055e-06, + "loss": 0.4197273850440979, + "step": 4406 + }, + { + "epoch": 1.0161401890707862, + "grad_norm": 1.2873423308659837, + "learning_rate": 1.0563967254801316e-06, + "loss": 0.46460944414138794, + "step": 4407 + }, + { + "epoch": 1.016370763200369, + "grad_norm": 1.3771325056314752, + "learning_rate": 1.056016066946922e-06, + "loss": 0.3504630923271179, + "step": 4408 + }, + { + "epoch": 1.0166013373299516, + "grad_norm": 1.3484234762530152, + "learning_rate": 1.0556354002712098e-06, + "loss": 0.4620180130004883, + "step": 4409 + }, + { + "epoch": 1.0168319114595343, + "grad_norm": 1.414975730602458, + "learning_rate": 1.0552547255083283e-06, + "loss": 0.5642764568328857, + "step": 4410 + }, + { + "epoch": 1.0170624855891168, + "grad_norm": 1.3858649703726607, + "learning_rate": 1.054874042713612e-06, + "loss": 0.48283201456069946, + "step": 4411 + }, + { + "epoch": 1.0172930597186995, + "grad_norm": 1.3477248933257546, + "learning_rate": 1.0544933519423976e-06, + "loss": 0.5346091985702515, + "step": 4412 + }, + { + "epoch": 1.0175236338482823, + "grad_norm": 1.216774984460132, + "learning_rate": 1.0541126532500224e-06, + "loss": 0.4710259437561035, + "step": 4413 + }, + { + "epoch": 1.017754207977865, + "grad_norm": 1.6611025915045114, + "learning_rate": 1.0537319466918243e-06, + "loss": 0.535955548286438, + "step": 4414 + }, + { + "epoch": 1.0179847821074475, + "grad_norm": 1.298601209078171, + "learning_rate": 1.0533512323231438e-06, + "loss": 0.4127902388572693, + "step": 4415 + }, + { + "epoch": 1.0182153562370302, + "grad_norm": 1.6222892430544704, + "learning_rate": 1.0529705101993203e-06, + "loss": 0.5209894180297852, + "step": 4416 + }, + { + "epoch": 1.018445930366613, + "grad_norm": 1.5702821211846574, + "learning_rate": 1.0525897803756967e-06, + "loss": 0.45600390434265137, + "step": 4417 + }, + { + "epoch": 1.0186765044961956, + "grad_norm": 1.6858904509627837, + "learning_rate": 1.0522090429076155e-06, + "loss": 0.5043426156044006, + "step": 4418 + }, + { + "epoch": 1.0189070786257781, + "grad_norm": 1.8442717417612486, + "learning_rate": 1.0518282978504207e-06, + "loss": 0.43386173248291016, + "step": 4419 + }, + { + "epoch": 1.0191376527553608, + "grad_norm": 1.4810433748538916, + "learning_rate": 1.0514475452594578e-06, + "loss": 0.44956767559051514, + "step": 4420 + }, + { + "epoch": 1.0193682268849436, + "grad_norm": 1.4162663845873593, + "learning_rate": 1.0510667851900726e-06, + "loss": 0.47164878249168396, + "step": 4421 + }, + { + "epoch": 1.0195988010145263, + "grad_norm": 1.3111398742961289, + "learning_rate": 1.0506860176976127e-06, + "loss": 0.4977136552333832, + "step": 4422 + }, + { + "epoch": 1.0198293751441088, + "grad_norm": 1.2272027402421368, + "learning_rate": 1.0503052428374264e-06, + "loss": 0.4344305396080017, + "step": 4423 + }, + { + "epoch": 1.0200599492736915, + "grad_norm": 1.4594484344103595, + "learning_rate": 1.049924460664863e-06, + "loss": 0.46536487340927124, + "step": 4424 + }, + { + "epoch": 1.0202905234032742, + "grad_norm": 1.5676489928965973, + "learning_rate": 1.0495436712352733e-06, + "loss": 0.4583844840526581, + "step": 4425 + }, + { + "epoch": 1.020521097532857, + "grad_norm": 1.3353943490467204, + "learning_rate": 1.049162874604009e-06, + "loss": 0.4098002314567566, + "step": 4426 + }, + { + "epoch": 1.0207516716624394, + "grad_norm": 1.5212892459953231, + "learning_rate": 1.0487820708264227e-06, + "loss": 0.48168665170669556, + "step": 4427 + }, + { + "epoch": 1.0209822457920221, + "grad_norm": 1.575752706874104, + "learning_rate": 1.048401259957868e-06, + "loss": 0.5517562627792358, + "step": 4428 + }, + { + "epoch": 1.0212128199216048, + "grad_norm": 1.4762864972879257, + "learning_rate": 1.0480204420536998e-06, + "loss": 0.5131476521492004, + "step": 4429 + }, + { + "epoch": 1.0214433940511876, + "grad_norm": 1.3669237261259728, + "learning_rate": 1.0476396171692734e-06, + "loss": 0.4590519666671753, + "step": 4430 + }, + { + "epoch": 1.02167396818077, + "grad_norm": 1.6209541549743127, + "learning_rate": 1.0472587853599458e-06, + "loss": 0.5581461191177368, + "step": 4431 + }, + { + "epoch": 1.0219045423103528, + "grad_norm": 1.9464318549736228, + "learning_rate": 1.046877946681075e-06, + "loss": 0.4169657826423645, + "step": 4432 + }, + { + "epoch": 1.0221351164399355, + "grad_norm": 1.6990409231148407, + "learning_rate": 1.0464971011880195e-06, + "loss": 0.48135459423065186, + "step": 4433 + }, + { + "epoch": 1.0223656905695182, + "grad_norm": 1.5888684830629844, + "learning_rate": 1.046116248936139e-06, + "loss": 0.5116040706634521, + "step": 4434 + }, + { + "epoch": 1.0225962646991007, + "grad_norm": 1.2239425777755701, + "learning_rate": 1.0457353899807946e-06, + "loss": 0.4369809329509735, + "step": 4435 + }, + { + "epoch": 1.0228268388286834, + "grad_norm": 1.3094581394180187, + "learning_rate": 1.0453545243773474e-06, + "loss": 0.42936772108078003, + "step": 4436 + }, + { + "epoch": 1.0230574129582661, + "grad_norm": 1.4191745941139933, + "learning_rate": 1.0449736521811605e-06, + "loss": 0.3614712357521057, + "step": 4437 + }, + { + "epoch": 1.0232879870878488, + "grad_norm": 1.4958077731615864, + "learning_rate": 1.0445927734475977e-06, + "loss": 0.40728119015693665, + "step": 4438 + }, + { + "epoch": 1.0235185612174313, + "grad_norm": 1.6199665099354292, + "learning_rate": 1.0442118882320233e-06, + "loss": 0.4940561056137085, + "step": 4439 + }, + { + "epoch": 1.023749135347014, + "grad_norm": 1.5292135898443935, + "learning_rate": 1.0438309965898027e-06, + "loss": 0.49529674649238586, + "step": 4440 + }, + { + "epoch": 1.0239797094765968, + "grad_norm": 1.3839632419664316, + "learning_rate": 1.0434500985763027e-06, + "loss": 0.4849408268928528, + "step": 4441 + }, + { + "epoch": 1.0242102836061795, + "grad_norm": 1.2306090654878221, + "learning_rate": 1.0430691942468903e-06, + "loss": 0.4121132791042328, + "step": 4442 + }, + { + "epoch": 1.024440857735762, + "grad_norm": 1.3788405992777184, + "learning_rate": 1.042688283656934e-06, + "loss": 0.4348478317260742, + "step": 4443 + }, + { + "epoch": 1.0246714318653447, + "grad_norm": 1.4946594419770094, + "learning_rate": 1.0423073668618033e-06, + "loss": 0.46817919611930847, + "step": 4444 + }, + { + "epoch": 1.0249020059949274, + "grad_norm": 1.4309128927667782, + "learning_rate": 1.041926443916868e-06, + "loss": 0.4422008991241455, + "step": 4445 + }, + { + "epoch": 1.02513258012451, + "grad_norm": 1.4766353003575698, + "learning_rate": 1.041545514877499e-06, + "loss": 0.5108183026313782, + "step": 4446 + }, + { + "epoch": 1.0253631542540926, + "grad_norm": 1.4287581583003561, + "learning_rate": 1.0411645797990685e-06, + "loss": 0.4759529232978821, + "step": 4447 + }, + { + "epoch": 1.0255937283836754, + "grad_norm": 1.4822019265627726, + "learning_rate": 1.040783638736949e-06, + "loss": 0.44447648525238037, + "step": 4448 + }, + { + "epoch": 1.025824302513258, + "grad_norm": 1.9820121270715096, + "learning_rate": 1.0404026917465144e-06, + "loss": 0.4558752477169037, + "step": 4449 + }, + { + "epoch": 1.0260548766428408, + "grad_norm": 1.5117188074263472, + "learning_rate": 1.0400217388831393e-06, + "loss": 0.4728459417819977, + "step": 4450 + }, + { + "epoch": 1.0262854507724233, + "grad_norm": 1.2832295949174854, + "learning_rate": 1.0396407802021985e-06, + "loss": 0.4815519452095032, + "step": 4451 + }, + { + "epoch": 1.026516024902006, + "grad_norm": 1.493224641636315, + "learning_rate": 1.0392598157590685e-06, + "loss": 0.5173656344413757, + "step": 4452 + }, + { + "epoch": 1.0267465990315887, + "grad_norm": 1.389267472286255, + "learning_rate": 1.0388788456091267e-06, + "loss": 0.5280762910842896, + "step": 4453 + }, + { + "epoch": 1.0269771731611712, + "grad_norm": 1.3239342530675255, + "learning_rate": 1.0384978698077506e-06, + "loss": 0.4524118900299072, + "step": 4454 + }, + { + "epoch": 1.027207747290754, + "grad_norm": 1.3855017021962426, + "learning_rate": 1.0381168884103186e-06, + "loss": 0.4011715054512024, + "step": 4455 + }, + { + "epoch": 1.0274383214203366, + "grad_norm": 1.6664926632341406, + "learning_rate": 1.0377359014722108e-06, + "loss": 0.518020749092102, + "step": 4456 + }, + { + "epoch": 1.0276688955499194, + "grad_norm": 1.3443799803410221, + "learning_rate": 1.0373549090488073e-06, + "loss": 0.44726112484931946, + "step": 4457 + }, + { + "epoch": 1.0278994696795019, + "grad_norm": 1.5697915792497608, + "learning_rate": 1.0369739111954894e-06, + "loss": 0.5344264507293701, + "step": 4458 + }, + { + "epoch": 1.0281300438090846, + "grad_norm": 1.3300732692572412, + "learning_rate": 1.0365929079676387e-06, + "loss": 0.4902813732624054, + "step": 4459 + }, + { + "epoch": 1.0283606179386673, + "grad_norm": 1.6676294678142136, + "learning_rate": 1.0362118994206378e-06, + "loss": 0.38346555829048157, + "step": 4460 + }, + { + "epoch": 1.02859119206825, + "grad_norm": 1.4992112279059755, + "learning_rate": 1.0358308856098705e-06, + "loss": 0.4232872724533081, + "step": 4461 + }, + { + "epoch": 1.0288217661978325, + "grad_norm": 1.4973168899301483, + "learning_rate": 1.0354498665907207e-06, + "loss": 0.5184470415115356, + "step": 4462 + }, + { + "epoch": 1.0290523403274152, + "grad_norm": 1.3344202325848402, + "learning_rate": 1.0350688424185733e-06, + "loss": 0.4989054203033447, + "step": 4463 + }, + { + "epoch": 1.029282914456998, + "grad_norm": 1.4348006325476266, + "learning_rate": 1.0346878131488145e-06, + "loss": 0.5204064249992371, + "step": 4464 + }, + { + "epoch": 1.0295134885865806, + "grad_norm": 1.5066284997527284, + "learning_rate": 1.0343067788368307e-06, + "loss": 0.47872811555862427, + "step": 4465 + }, + { + "epoch": 1.0297440627161631, + "grad_norm": 1.4195028916227292, + "learning_rate": 1.0339257395380087e-06, + "loss": 0.4104915261268616, + "step": 4466 + }, + { + "epoch": 1.0299746368457459, + "grad_norm": 1.3696214178005537, + "learning_rate": 1.0335446953077366e-06, + "loss": 0.39327263832092285, + "step": 4467 + }, + { + "epoch": 1.0302052109753286, + "grad_norm": 1.4702497550106948, + "learning_rate": 1.033163646201403e-06, + "loss": 0.4395657777786255, + "step": 4468 + }, + { + "epoch": 1.0304357851049113, + "grad_norm": 1.419425725268843, + "learning_rate": 1.0327825922743976e-06, + "loss": 0.462537944316864, + "step": 4469 + }, + { + "epoch": 1.0306663592344938, + "grad_norm": 1.3686105119540095, + "learning_rate": 1.03240153358211e-06, + "loss": 0.4399976134300232, + "step": 4470 + }, + { + "epoch": 1.0308969333640765, + "grad_norm": 1.2004518913155955, + "learning_rate": 1.0320204701799311e-06, + "loss": 0.4289684593677521, + "step": 4471 + }, + { + "epoch": 1.0311275074936592, + "grad_norm": 1.700414177665105, + "learning_rate": 1.0316394021232524e-06, + "loss": 0.4771305322647095, + "step": 4472 + }, + { + "epoch": 1.031358081623242, + "grad_norm": 1.3381367861828992, + "learning_rate": 1.031258329467466e-06, + "loss": 0.4544849395751953, + "step": 4473 + }, + { + "epoch": 1.0315886557528244, + "grad_norm": 1.7319531178301495, + "learning_rate": 1.0308772522679646e-06, + "loss": 0.5362099409103394, + "step": 4474 + }, + { + "epoch": 1.0318192298824072, + "grad_norm": 1.564907240947497, + "learning_rate": 1.0304961705801413e-06, + "loss": 0.48966753482818604, + "step": 4475 + }, + { + "epoch": 1.0320498040119899, + "grad_norm": 1.379783010020372, + "learning_rate": 1.0301150844593908e-06, + "loss": 0.3750344216823578, + "step": 4476 + }, + { + "epoch": 1.0322803781415726, + "grad_norm": 1.3651499470494945, + "learning_rate": 1.0297339939611076e-06, + "loss": 0.453983873128891, + "step": 4477 + }, + { + "epoch": 1.032510952271155, + "grad_norm": 1.837467998410361, + "learning_rate": 1.029352899140687e-06, + "loss": 0.5096027255058289, + "step": 4478 + }, + { + "epoch": 1.0327415264007378, + "grad_norm": 1.395622916901131, + "learning_rate": 1.028971800053525e-06, + "loss": 0.4387558698654175, + "step": 4479 + }, + { + "epoch": 1.0329721005303205, + "grad_norm": 1.324708629656248, + "learning_rate": 1.0285906967550184e-06, + "loss": 0.45710843801498413, + "step": 4480 + }, + { + "epoch": 1.0332026746599032, + "grad_norm": 1.631576144246761, + "learning_rate": 1.0282095893005643e-06, + "loss": 0.5258994102478027, + "step": 4481 + }, + { + "epoch": 1.0334332487894857, + "grad_norm": 1.320456527047697, + "learning_rate": 1.0278284777455603e-06, + "loss": 0.5037236213684082, + "step": 4482 + }, + { + "epoch": 1.0336638229190684, + "grad_norm": 1.3671446032683054, + "learning_rate": 1.027447362145405e-06, + "loss": 0.4730300307273865, + "step": 4483 + }, + { + "epoch": 1.0338943970486512, + "grad_norm": 1.5284074958618745, + "learning_rate": 1.0270662425554974e-06, + "loss": 0.4373326301574707, + "step": 4484 + }, + { + "epoch": 1.0341249711782339, + "grad_norm": 1.379045843622324, + "learning_rate": 1.0266851190312373e-06, + "loss": 0.3915579319000244, + "step": 4485 + }, + { + "epoch": 1.0343555453078164, + "grad_norm": 1.3482794503547837, + "learning_rate": 1.0263039916280247e-06, + "loss": 0.36588191986083984, + "step": 4486 + }, + { + "epoch": 1.034586119437399, + "grad_norm": 1.2333606023937755, + "learning_rate": 1.0259228604012602e-06, + "loss": 0.4287286400794983, + "step": 4487 + }, + { + "epoch": 1.0348166935669818, + "grad_norm": 1.3775270616642934, + "learning_rate": 1.0255417254063454e-06, + "loss": 0.4405861496925354, + "step": 4488 + }, + { + "epoch": 1.0350472676965645, + "grad_norm": 1.443831892269548, + "learning_rate": 1.0251605866986818e-06, + "loss": 0.4859738349914551, + "step": 4489 + }, + { + "epoch": 1.035277841826147, + "grad_norm": 1.4103288990509777, + "learning_rate": 1.0247794443336722e-06, + "loss": 0.40879446268081665, + "step": 4490 + }, + { + "epoch": 1.0355084159557297, + "grad_norm": 1.4900612923986292, + "learning_rate": 1.024398298366719e-06, + "loss": 0.44872337579727173, + "step": 4491 + }, + { + "epoch": 1.0357389900853124, + "grad_norm": 1.3707597883324278, + "learning_rate": 1.0240171488532258e-06, + "loss": 0.41155117750167847, + "step": 4492 + }, + { + "epoch": 1.0359695642148952, + "grad_norm": 1.4935319402234073, + "learning_rate": 1.0236359958485966e-06, + "loss": 0.48941487073898315, + "step": 4493 + }, + { + "epoch": 1.0362001383444777, + "grad_norm": 1.3889526979110256, + "learning_rate": 1.0232548394082362e-06, + "loss": 0.4462544322013855, + "step": 4494 + }, + { + "epoch": 1.0364307124740604, + "grad_norm": 1.7635931454030804, + "learning_rate": 1.0228736795875487e-06, + "loss": 0.3791837692260742, + "step": 4495 + }, + { + "epoch": 1.036661286603643, + "grad_norm": 1.7988283203699307, + "learning_rate": 1.0224925164419404e-06, + "loss": 0.5037285685539246, + "step": 4496 + }, + { + "epoch": 1.0368918607332258, + "grad_norm": 1.5033654685782605, + "learning_rate": 1.0221113500268169e-06, + "loss": 0.4762890636920929, + "step": 4497 + }, + { + "epoch": 1.0371224348628083, + "grad_norm": 1.2678994584792878, + "learning_rate": 1.0217301803975844e-06, + "loss": 0.4673793315887451, + "step": 4498 + }, + { + "epoch": 1.037353008992391, + "grad_norm": 1.4491139066226089, + "learning_rate": 1.0213490076096501e-06, + "loss": 0.37522250413894653, + "step": 4499 + }, + { + "epoch": 1.0375835831219737, + "grad_norm": 1.4197729369573655, + "learning_rate": 1.020967831718421e-06, + "loss": 0.4986375570297241, + "step": 4500 + }, + { + "epoch": 1.0378141572515565, + "grad_norm": 1.3424622189818292, + "learning_rate": 1.0205866527793053e-06, + "loss": 0.488337904214859, + "step": 4501 + }, + { + "epoch": 1.038044731381139, + "grad_norm": 1.2513264252251595, + "learning_rate": 1.0202054708477107e-06, + "loss": 0.37420767545700073, + "step": 4502 + }, + { + "epoch": 1.0382753055107217, + "grad_norm": 1.1901249454864467, + "learning_rate": 1.0198242859790465e-06, + "loss": 0.42453843355178833, + "step": 4503 + }, + { + "epoch": 1.0385058796403044, + "grad_norm": 1.5998980096348292, + "learning_rate": 1.0194430982287211e-06, + "loss": 0.4431978166103363, + "step": 4504 + }, + { + "epoch": 1.038736453769887, + "grad_norm": 1.2584649975167521, + "learning_rate": 1.0190619076521445e-06, + "loss": 0.5079195499420166, + "step": 4505 + }, + { + "epoch": 1.0389670278994696, + "grad_norm": 1.3630757915855334, + "learning_rate": 1.0186807143047263e-06, + "loss": 0.442915678024292, + "step": 4506 + }, + { + "epoch": 1.0391976020290523, + "grad_norm": 1.4946032354137926, + "learning_rate": 1.018299518241877e-06, + "loss": 0.4720972180366516, + "step": 4507 + }, + { + "epoch": 1.039428176158635, + "grad_norm": 1.407838633939113, + "learning_rate": 1.0179183195190073e-06, + "loss": 0.4637352526187897, + "step": 4508 + }, + { + "epoch": 1.0396587502882177, + "grad_norm": 1.3457342565284411, + "learning_rate": 1.0175371181915283e-06, + "loss": 0.4207759499549866, + "step": 4509 + }, + { + "epoch": 1.0398893244178002, + "grad_norm": 1.5872196626053143, + "learning_rate": 1.0171559143148514e-06, + "loss": 0.49227845668792725, + "step": 4510 + }, + { + "epoch": 1.040119898547383, + "grad_norm": 1.4565076836431372, + "learning_rate": 1.0167747079443884e-06, + "loss": 0.5006893873214722, + "step": 4511 + }, + { + "epoch": 1.0403504726769657, + "grad_norm": 1.4618469895611303, + "learning_rate": 1.016393499135552e-06, + "loss": 0.42048192024230957, + "step": 4512 + }, + { + "epoch": 1.0405810468065484, + "grad_norm": 1.5634742093932859, + "learning_rate": 1.0160122879437538e-06, + "loss": 0.5275895595550537, + "step": 4513 + }, + { + "epoch": 1.0408116209361309, + "grad_norm": 1.1544305266604897, + "learning_rate": 1.0156310744244073e-06, + "loss": 0.4677985906600952, + "step": 4514 + }, + { + "epoch": 1.0410421950657136, + "grad_norm": 1.422644417212902, + "learning_rate": 1.015249858632926e-06, + "loss": 0.5214150547981262, + "step": 4515 + }, + { + "epoch": 1.0412727691952963, + "grad_norm": 1.2418435857264525, + "learning_rate": 1.0148686406247232e-06, + "loss": 0.40790024399757385, + "step": 4516 + }, + { + "epoch": 1.041503343324879, + "grad_norm": 1.6199751141856524, + "learning_rate": 1.0144874204552125e-06, + "loss": 0.5943785309791565, + "step": 4517 + }, + { + "epoch": 1.0417339174544615, + "grad_norm": 1.531988684910503, + "learning_rate": 1.0141061981798086e-06, + "loss": 0.4590263366699219, + "step": 4518 + }, + { + "epoch": 1.0419644915840443, + "grad_norm": 1.3212940799821826, + "learning_rate": 1.0137249738539257e-06, + "loss": 0.4106098413467407, + "step": 4519 + }, + { + "epoch": 1.042195065713627, + "grad_norm": 1.4102973636174063, + "learning_rate": 1.013343747532979e-06, + "loss": 0.4730203151702881, + "step": 4520 + }, + { + "epoch": 1.0424256398432097, + "grad_norm": 1.2769276209650842, + "learning_rate": 1.0129625192723833e-06, + "loss": 0.43245944380760193, + "step": 4521 + }, + { + "epoch": 1.0426562139727922, + "grad_norm": 1.3088740452256564, + "learning_rate": 1.012581289127554e-06, + "loss": 0.40828272700309753, + "step": 4522 + }, + { + "epoch": 1.042886788102375, + "grad_norm": 1.5940499075267438, + "learning_rate": 1.0122000571539069e-06, + "loss": 0.4232874810695648, + "step": 4523 + }, + { + "epoch": 1.0431173622319576, + "grad_norm": 1.45477003479617, + "learning_rate": 1.0118188234068579e-06, + "loss": 0.43044984340667725, + "step": 4524 + }, + { + "epoch": 1.0433479363615403, + "grad_norm": 1.6545172631907663, + "learning_rate": 1.011437587941823e-06, + "loss": 0.4502897262573242, + "step": 4525 + }, + { + "epoch": 1.0435785104911228, + "grad_norm": 2.0995258586192467, + "learning_rate": 1.0110563508142185e-06, + "loss": 0.5505340099334717, + "step": 4526 + }, + { + "epoch": 1.0438090846207055, + "grad_norm": 1.5629586322344833, + "learning_rate": 1.0106751120794617e-06, + "loss": 0.4026086628437042, + "step": 4527 + }, + { + "epoch": 1.0440396587502883, + "grad_norm": 1.5105039899180257, + "learning_rate": 1.0102938717929692e-06, + "loss": 0.3910222053527832, + "step": 4528 + }, + { + "epoch": 1.044270232879871, + "grad_norm": 1.6830902678008934, + "learning_rate": 1.009912630010158e-06, + "loss": 0.4134068191051483, + "step": 4529 + }, + { + "epoch": 1.0445008070094535, + "grad_norm": 1.4825250898714368, + "learning_rate": 1.0095313867864457e-06, + "loss": 0.4801563024520874, + "step": 4530 + }, + { + "epoch": 1.0447313811390362, + "grad_norm": 1.2424640239796358, + "learning_rate": 1.0091501421772495e-06, + "loss": 0.4269358515739441, + "step": 4531 + }, + { + "epoch": 1.044961955268619, + "grad_norm": 1.3485994976026512, + "learning_rate": 1.0087688962379877e-06, + "loss": 0.5300281047821045, + "step": 4532 + }, + { + "epoch": 1.0451925293982016, + "grad_norm": 1.6865287595757648, + "learning_rate": 1.0083876490240777e-06, + "loss": 0.4634189009666443, + "step": 4533 + }, + { + "epoch": 1.0454231035277841, + "grad_norm": 1.5187760856795984, + "learning_rate": 1.0080064005909379e-06, + "loss": 0.37037551403045654, + "step": 4534 + }, + { + "epoch": 1.0456536776573668, + "grad_norm": 1.2977267015714409, + "learning_rate": 1.0076251509939867e-06, + "loss": 0.4740016460418701, + "step": 4535 + }, + { + "epoch": 1.0458842517869495, + "grad_norm": 1.4686161726335998, + "learning_rate": 1.0072439002886426e-06, + "loss": 0.4824775159358978, + "step": 4536 + }, + { + "epoch": 1.0461148259165323, + "grad_norm": 1.4032368341998698, + "learning_rate": 1.0068626485303242e-06, + "loss": 0.4891430735588074, + "step": 4537 + }, + { + "epoch": 1.0463454000461148, + "grad_norm": 1.440410031419601, + "learning_rate": 1.00648139577445e-06, + "loss": 0.48089975118637085, + "step": 4538 + }, + { + "epoch": 1.0465759741756975, + "grad_norm": 1.3280505427696812, + "learning_rate": 1.0061001420764395e-06, + "loss": 0.4353799521923065, + "step": 4539 + }, + { + "epoch": 1.0468065483052802, + "grad_norm": 1.5425308952951848, + "learning_rate": 1.0057188874917117e-06, + "loss": 0.4259982705116272, + "step": 4540 + }, + { + "epoch": 1.047037122434863, + "grad_norm": 1.502788920344227, + "learning_rate": 1.0053376320756852e-06, + "loss": 0.4400532841682434, + "step": 4541 + }, + { + "epoch": 1.0472676965644454, + "grad_norm": 1.398609267878258, + "learning_rate": 1.00495637588378e-06, + "loss": 0.48598533868789673, + "step": 4542 + }, + { + "epoch": 1.0474982706940281, + "grad_norm": 1.7261761893493324, + "learning_rate": 1.0045751189714153e-06, + "loss": 0.6310586929321289, + "step": 4543 + }, + { + "epoch": 1.0477288448236108, + "grad_norm": 1.4822203646620422, + "learning_rate": 1.0041938613940108e-06, + "loss": 0.49084293842315674, + "step": 4544 + }, + { + "epoch": 1.0479594189531936, + "grad_norm": 1.6167393331453148, + "learning_rate": 1.003812603206986e-06, + "loss": 0.5144428014755249, + "step": 4545 + }, + { + "epoch": 1.048189993082776, + "grad_norm": 1.4962485615696877, + "learning_rate": 1.0034313444657605e-06, + "loss": 0.4480917155742645, + "step": 4546 + }, + { + "epoch": 1.0484205672123588, + "grad_norm": 1.4833727438286728, + "learning_rate": 1.0030500852257545e-06, + "loss": 0.4505491852760315, + "step": 4547 + }, + { + "epoch": 1.0486511413419415, + "grad_norm": 1.3728340651335322, + "learning_rate": 1.0026688255423876e-06, + "loss": 0.3344930410385132, + "step": 4548 + }, + { + "epoch": 1.0488817154715242, + "grad_norm": 1.3493238342876126, + "learning_rate": 1.0022875654710801e-06, + "loss": 0.4006739854812622, + "step": 4549 + }, + { + "epoch": 1.0491122896011067, + "grad_norm": 1.4777604777161095, + "learning_rate": 1.0019063050672517e-06, + "loss": 0.4815717935562134, + "step": 4550 + }, + { + "epoch": 1.0493428637306894, + "grad_norm": 1.4182246513528267, + "learning_rate": 1.0015250443863223e-06, + "loss": 0.4660469889640808, + "step": 4551 + }, + { + "epoch": 1.0495734378602721, + "grad_norm": 1.4298035442899577, + "learning_rate": 1.0011437834837125e-06, + "loss": 0.5233521461486816, + "step": 4552 + }, + { + "epoch": 1.0498040119898548, + "grad_norm": 1.7530768174577198, + "learning_rate": 1.0007625224148418e-06, + "loss": 0.6037864685058594, + "step": 4553 + }, + { + "epoch": 1.0500345861194373, + "grad_norm": 1.726860458569315, + "learning_rate": 1.000381261235131e-06, + "loss": 0.469952255487442, + "step": 4554 + }, + { + "epoch": 1.05026516024902, + "grad_norm": 1.302712404041117, + "learning_rate": 1e-06, + "loss": 0.4577752649784088, + "step": 4555 + }, + { + "epoch": 1.0504957343786028, + "grad_norm": 1.537724574807554, + "learning_rate": 9.996187387648692e-07, + "loss": 0.46796074509620667, + "step": 4556 + }, + { + "epoch": 1.0507263085081853, + "grad_norm": 1.3633141581703183, + "learning_rate": 9.992374775851583e-07, + "loss": 0.40709036588668823, + "step": 4557 + }, + { + "epoch": 1.050956882637768, + "grad_norm": 1.2121351653860253, + "learning_rate": 9.988562165162878e-07, + "loss": 0.3997795879840851, + "step": 4558 + }, + { + "epoch": 1.0511874567673507, + "grad_norm": 1.6938685288563167, + "learning_rate": 9.984749556136779e-07, + "loss": 0.4677845239639282, + "step": 4559 + }, + { + "epoch": 1.0514180308969334, + "grad_norm": 1.315537055431831, + "learning_rate": 9.980936949327487e-07, + "loss": 0.40411800146102905, + "step": 4560 + }, + { + "epoch": 1.0516486050265161, + "grad_norm": 1.3999939149032237, + "learning_rate": 9.9771243452892e-07, + "loss": 0.50546795129776, + "step": 4561 + }, + { + "epoch": 1.0518791791560986, + "grad_norm": 1.5468163611837324, + "learning_rate": 9.973311744576125e-07, + "loss": 0.4116637110710144, + "step": 4562 + }, + { + "epoch": 1.0521097532856813, + "grad_norm": 1.2997915019544943, + "learning_rate": 9.969499147742454e-07, + "loss": 0.4271109700202942, + "step": 4563 + }, + { + "epoch": 1.052340327415264, + "grad_norm": 1.1760164248835672, + "learning_rate": 9.965686555342396e-07, + "loss": 0.37195074558258057, + "step": 4564 + }, + { + "epoch": 1.0525709015448466, + "grad_norm": 1.6759945376385115, + "learning_rate": 9.96187396793014e-07, + "loss": 0.4020707607269287, + "step": 4565 + }, + { + "epoch": 1.0528014756744293, + "grad_norm": 1.5880882887273124, + "learning_rate": 9.95806138605989e-07, + "loss": 0.4980151951313019, + "step": 4566 + }, + { + "epoch": 1.053032049804012, + "grad_norm": 1.419377079967674, + "learning_rate": 9.95424881028585e-07, + "loss": 0.39553767442703247, + "step": 4567 + }, + { + "epoch": 1.0532626239335947, + "grad_norm": 1.3361167736969362, + "learning_rate": 9.9504362411622e-07, + "loss": 0.47618645429611206, + "step": 4568 + }, + { + "epoch": 1.0534931980631772, + "grad_norm": 1.6469408967264108, + "learning_rate": 9.94662367924315e-07, + "loss": 0.4613817036151886, + "step": 4569 + }, + { + "epoch": 1.05372377219276, + "grad_norm": 1.4563205269464143, + "learning_rate": 9.942811125082884e-07, + "loss": 0.35888034105300903, + "step": 4570 + }, + { + "epoch": 1.0539543463223426, + "grad_norm": 1.896669698951033, + "learning_rate": 9.938998579235606e-07, + "loss": 0.45810097455978394, + "step": 4571 + }, + { + "epoch": 1.0541849204519254, + "grad_norm": 1.4115626759758866, + "learning_rate": 9.935186042255499e-07, + "loss": 0.5351384878158569, + "step": 4572 + }, + { + "epoch": 1.0544154945815079, + "grad_norm": 1.4888165757644622, + "learning_rate": 9.931373514696759e-07, + "loss": 0.5261274576187134, + "step": 4573 + }, + { + "epoch": 1.0546460687110906, + "grad_norm": 1.368295507669899, + "learning_rate": 9.927560997113573e-07, + "loss": 0.483295202255249, + "step": 4574 + }, + { + "epoch": 1.0548766428406733, + "grad_norm": 1.5639325535974613, + "learning_rate": 9.923748490060132e-07, + "loss": 0.5371580719947815, + "step": 4575 + }, + { + "epoch": 1.055107216970256, + "grad_norm": 1.8721225876517977, + "learning_rate": 9.919935994090622e-07, + "loss": 0.4863673746585846, + "step": 4576 + }, + { + "epoch": 1.0553377910998385, + "grad_norm": 1.5391981555318386, + "learning_rate": 9.916123509759224e-07, + "loss": 0.47929099202156067, + "step": 4577 + }, + { + "epoch": 1.0555683652294212, + "grad_norm": 1.3884034720788059, + "learning_rate": 9.912311037620126e-07, + "loss": 0.4687851667404175, + "step": 4578 + }, + { + "epoch": 1.055798939359004, + "grad_norm": 1.5841867302150618, + "learning_rate": 9.908498578227504e-07, + "loss": 0.5308720469474792, + "step": 4579 + }, + { + "epoch": 1.0560295134885866, + "grad_norm": 1.8691314272616926, + "learning_rate": 9.904686132135546e-07, + "loss": 0.45900580286979675, + "step": 4580 + }, + { + "epoch": 1.0562600876181691, + "grad_norm": 1.4586686619480431, + "learning_rate": 9.900873699898422e-07, + "loss": 0.49392157793045044, + "step": 4581 + }, + { + "epoch": 1.0564906617477519, + "grad_norm": 1.6139111586944341, + "learning_rate": 9.89706128207031e-07, + "loss": 0.47190070152282715, + "step": 4582 + }, + { + "epoch": 1.0567212358773346, + "grad_norm": 1.7781894650458763, + "learning_rate": 9.893248879205382e-07, + "loss": 0.4431575834751129, + "step": 4583 + }, + { + "epoch": 1.0569518100069173, + "grad_norm": 1.293421470994464, + "learning_rate": 9.889436491857814e-07, + "loss": 0.49873441457748413, + "step": 4584 + }, + { + "epoch": 1.0571823841364998, + "grad_norm": 1.4263954197349762, + "learning_rate": 9.885624120581772e-07, + "loss": 0.41190844774246216, + "step": 4585 + }, + { + "epoch": 1.0574129582660825, + "grad_norm": 1.5698735406284627, + "learning_rate": 9.881811765931423e-07, + "loss": 0.5164123773574829, + "step": 4586 + }, + { + "epoch": 1.0576435323956652, + "grad_norm": 1.5034141006108586, + "learning_rate": 9.877999428460933e-07, + "loss": 0.4141567349433899, + "step": 4587 + }, + { + "epoch": 1.057874106525248, + "grad_norm": 1.557658840701198, + "learning_rate": 9.87418710872446e-07, + "loss": 0.457628458738327, + "step": 4588 + }, + { + "epoch": 1.0581046806548304, + "grad_norm": 1.4732865673601758, + "learning_rate": 9.870374807276168e-07, + "loss": 0.41788995265960693, + "step": 4589 + }, + { + "epoch": 1.0583352547844131, + "grad_norm": 1.6240063497851516, + "learning_rate": 9.866562524670209e-07, + "loss": 0.5124667882919312, + "step": 4590 + }, + { + "epoch": 1.0585658289139959, + "grad_norm": 1.1619873853554898, + "learning_rate": 9.862750261460742e-07, + "loss": 0.4192196726799011, + "step": 4591 + }, + { + "epoch": 1.0587964030435786, + "grad_norm": 1.3804521479784477, + "learning_rate": 9.858938018201913e-07, + "loss": 0.4345153868198395, + "step": 4592 + }, + { + "epoch": 1.059026977173161, + "grad_norm": 1.3186049119261667, + "learning_rate": 9.855125795447874e-07, + "loss": 0.391804963350296, + "step": 4593 + }, + { + "epoch": 1.0592575513027438, + "grad_norm": 1.3394610780120433, + "learning_rate": 9.851313593752767e-07, + "loss": 0.3904710114002228, + "step": 4594 + }, + { + "epoch": 1.0594881254323265, + "grad_norm": 1.4234043935357816, + "learning_rate": 9.847501413670742e-07, + "loss": 0.37314411997795105, + "step": 4595 + }, + { + "epoch": 1.0597186995619092, + "grad_norm": 1.7572920451540888, + "learning_rate": 9.843689255755926e-07, + "loss": 0.5402779579162598, + "step": 4596 + }, + { + "epoch": 1.0599492736914917, + "grad_norm": 1.4688689617213957, + "learning_rate": 9.839877120562463e-07, + "loss": 0.4243565797805786, + "step": 4597 + }, + { + "epoch": 1.0601798478210744, + "grad_norm": 1.6330717694890693, + "learning_rate": 9.836065008644484e-07, + "loss": 0.4504585564136505, + "step": 4598 + }, + { + "epoch": 1.0604104219506572, + "grad_norm": 1.3073319656874434, + "learning_rate": 9.832252920556115e-07, + "loss": 0.46487870812416077, + "step": 4599 + }, + { + "epoch": 1.0606409960802399, + "grad_norm": 1.452752590173503, + "learning_rate": 9.828440856851487e-07, + "loss": 0.470059871673584, + "step": 4600 + }, + { + "epoch": 1.0608715702098224, + "grad_norm": 1.4580866952416336, + "learning_rate": 9.824628818084716e-07, + "loss": 0.4307391047477722, + "step": 4601 + }, + { + "epoch": 1.061102144339405, + "grad_norm": 1.545423985207434, + "learning_rate": 9.820816804809927e-07, + "loss": 0.49449142813682556, + "step": 4602 + }, + { + "epoch": 1.0613327184689878, + "grad_norm": 1.4803985945664777, + "learning_rate": 9.817004817581229e-07, + "loss": 0.4932701885700226, + "step": 4603 + }, + { + "epoch": 1.0615632925985705, + "grad_norm": 1.4502372729626234, + "learning_rate": 9.813192856952739e-07, + "loss": 0.49543553590774536, + "step": 4604 + }, + { + "epoch": 1.061793866728153, + "grad_norm": 1.1578379554584357, + "learning_rate": 9.809380923478554e-07, + "loss": 0.3906818926334381, + "step": 4605 + }, + { + "epoch": 1.0620244408577357, + "grad_norm": 1.4436425775524195, + "learning_rate": 9.80556901771279e-07, + "loss": 0.41667112708091736, + "step": 4606 + }, + { + "epoch": 1.0622550149873184, + "grad_norm": 1.475010908303335, + "learning_rate": 9.801757140209538e-07, + "loss": 0.36195361614227295, + "step": 4607 + }, + { + "epoch": 1.0624855891169012, + "grad_norm": 1.4053500417900708, + "learning_rate": 9.797945291522892e-07, + "loss": 0.4056081175804138, + "step": 4608 + }, + { + "epoch": 1.0627161632464837, + "grad_norm": 1.4310559040175581, + "learning_rate": 9.794133472206948e-07, + "loss": 0.5048736929893494, + "step": 4609 + }, + { + "epoch": 1.0629467373760664, + "grad_norm": 1.3896886111265523, + "learning_rate": 9.790321682815788e-07, + "loss": 0.4846169352531433, + "step": 4610 + }, + { + "epoch": 1.063177311505649, + "grad_norm": 1.3569892439901554, + "learning_rate": 9.7865099239035e-07, + "loss": 0.5149316787719727, + "step": 4611 + }, + { + "epoch": 1.0634078856352318, + "grad_norm": 1.5344870466099163, + "learning_rate": 9.782698196024155e-07, + "loss": 0.3816874623298645, + "step": 4612 + }, + { + "epoch": 1.0636384597648143, + "grad_norm": 1.39688044025804, + "learning_rate": 9.77888649973183e-07, + "loss": 0.5469645261764526, + "step": 4613 + }, + { + "epoch": 1.063869033894397, + "grad_norm": 1.2954034757094786, + "learning_rate": 9.775074835580593e-07, + "loss": 0.42796647548675537, + "step": 4614 + }, + { + "epoch": 1.0640996080239797, + "grad_norm": 1.4924945772778404, + "learning_rate": 9.771263204124512e-07, + "loss": 0.4931715726852417, + "step": 4615 + }, + { + "epoch": 1.0643301821535625, + "grad_norm": 1.367565961969811, + "learning_rate": 9.767451605917641e-07, + "loss": 0.5435268878936768, + "step": 4616 + }, + { + "epoch": 1.064560756283145, + "grad_norm": 1.6066093331363582, + "learning_rate": 9.763640041514033e-07, + "loss": 0.46361953020095825, + "step": 4617 + }, + { + "epoch": 1.0647913304127277, + "grad_norm": 1.240667858579194, + "learning_rate": 9.759828511467743e-07, + "loss": 0.3742775619029999, + "step": 4618 + }, + { + "epoch": 1.0650219045423104, + "grad_norm": 1.5520509510364326, + "learning_rate": 9.75601701633281e-07, + "loss": 0.4060659408569336, + "step": 4619 + }, + { + "epoch": 1.065252478671893, + "grad_norm": 1.2052909018096978, + "learning_rate": 9.75220555666328e-07, + "loss": 0.45316505432128906, + "step": 4620 + }, + { + "epoch": 1.0654830528014756, + "grad_norm": 1.4180749825165042, + "learning_rate": 9.748394133013179e-07, + "loss": 0.4548850655555725, + "step": 4621 + }, + { + "epoch": 1.0657136269310583, + "grad_norm": 1.2793215690458788, + "learning_rate": 9.744582745936547e-07, + "loss": 0.5065705180168152, + "step": 4622 + }, + { + "epoch": 1.065944201060641, + "grad_norm": 1.4912306578981507, + "learning_rate": 9.740771395987395e-07, + "loss": 0.4114503860473633, + "step": 4623 + }, + { + "epoch": 1.0661747751902237, + "grad_norm": 1.4280192292492455, + "learning_rate": 9.736960083719752e-07, + "loss": 0.4568501114845276, + "step": 4624 + }, + { + "epoch": 1.0664053493198062, + "grad_norm": 1.2972553921673455, + "learning_rate": 9.733148809687624e-07, + "loss": 0.49967026710510254, + "step": 4625 + }, + { + "epoch": 1.066635923449389, + "grad_norm": 1.4642812597554793, + "learning_rate": 9.729337574445025e-07, + "loss": 0.529681384563446, + "step": 4626 + }, + { + "epoch": 1.0668664975789717, + "grad_norm": 1.4791668180519966, + "learning_rate": 9.72552637854595e-07, + "loss": 0.4819791316986084, + "step": 4627 + }, + { + "epoch": 1.0670970717085544, + "grad_norm": 1.3549019355661691, + "learning_rate": 9.721715222544396e-07, + "loss": 0.4186001718044281, + "step": 4628 + }, + { + "epoch": 1.0673276458381369, + "grad_norm": 1.221767945169434, + "learning_rate": 9.717904106994359e-07, + "loss": 0.4442529082298279, + "step": 4629 + }, + { + "epoch": 1.0675582199677196, + "grad_norm": 1.886711265076429, + "learning_rate": 9.714093032449815e-07, + "loss": 0.4655953049659729, + "step": 4630 + }, + { + "epoch": 1.0677887940973023, + "grad_norm": 1.2641786187672595, + "learning_rate": 9.71028199946475e-07, + "loss": 0.45248714089393616, + "step": 4631 + }, + { + "epoch": 1.068019368226885, + "grad_norm": 1.547270813258376, + "learning_rate": 9.706471008593128e-07, + "loss": 0.4244336485862732, + "step": 4632 + }, + { + "epoch": 1.0682499423564675, + "grad_norm": 1.441914160495435, + "learning_rate": 9.702660060388923e-07, + "loss": 0.4396495819091797, + "step": 4633 + }, + { + "epoch": 1.0684805164860502, + "grad_norm": 1.3832490714301353, + "learning_rate": 9.698849155406089e-07, + "loss": 0.4504232406616211, + "step": 4634 + }, + { + "epoch": 1.068711090615633, + "grad_norm": 1.5660708185651993, + "learning_rate": 9.695038294198588e-07, + "loss": 0.40112000703811646, + "step": 4635 + }, + { + "epoch": 1.0689416647452157, + "grad_norm": 1.5797332497697052, + "learning_rate": 9.691227477320357e-07, + "loss": 0.4511067271232605, + "step": 4636 + }, + { + "epoch": 1.0691722388747982, + "grad_norm": 1.4624732720511697, + "learning_rate": 9.687416705325342e-07, + "loss": 0.44541406631469727, + "step": 4637 + }, + { + "epoch": 1.069402813004381, + "grad_norm": 1.3872197811900322, + "learning_rate": 9.68360597876748e-07, + "loss": 0.5038847327232361, + "step": 4638 + }, + { + "epoch": 1.0696333871339636, + "grad_norm": 1.2356986255488158, + "learning_rate": 9.67979529820069e-07, + "loss": 0.41960060596466064, + "step": 4639 + }, + { + "epoch": 1.0698639612635463, + "grad_norm": 1.6121133741192841, + "learning_rate": 9.6759846641789e-07, + "loss": 0.49760064482688904, + "step": 4640 + }, + { + "epoch": 1.0700945353931288, + "grad_norm": 1.7920934015909264, + "learning_rate": 9.672174077256023e-07, + "loss": 0.46513333916664124, + "step": 4641 + }, + { + "epoch": 1.0703251095227115, + "grad_norm": 1.5128396951273724, + "learning_rate": 9.66836353798597e-07, + "loss": 0.41129356622695923, + "step": 4642 + }, + { + "epoch": 1.0705556836522943, + "grad_norm": 1.1803503202020598, + "learning_rate": 9.664553046922634e-07, + "loss": 0.5021853446960449, + "step": 4643 + }, + { + "epoch": 1.070786257781877, + "grad_norm": 1.7444146178498035, + "learning_rate": 9.660742604619912e-07, + "loss": 0.5184302926063538, + "step": 4644 + }, + { + "epoch": 1.0710168319114595, + "grad_norm": 1.8278981381437267, + "learning_rate": 9.65693221163169e-07, + "loss": 0.4793940484523773, + "step": 4645 + }, + { + "epoch": 1.0712474060410422, + "grad_norm": 1.6157027564363053, + "learning_rate": 9.653121868511854e-07, + "loss": 0.43454456329345703, + "step": 4646 + }, + { + "epoch": 1.071477980170625, + "grad_norm": 1.3605748894383922, + "learning_rate": 9.649311575814266e-07, + "loss": 0.49123185873031616, + "step": 4647 + }, + { + "epoch": 1.0717085543002076, + "grad_norm": 1.2316654311751212, + "learning_rate": 9.645501334092792e-07, + "loss": 0.37020617723464966, + "step": 4648 + }, + { + "epoch": 1.0719391284297901, + "grad_norm": 1.3370776970957903, + "learning_rate": 9.641691143901296e-07, + "loss": 0.461778849363327, + "step": 4649 + }, + { + "epoch": 1.0721697025593728, + "grad_norm": 1.7402606402657241, + "learning_rate": 9.63788100579362e-07, + "loss": 0.46640273928642273, + "step": 4650 + }, + { + "epoch": 1.0724002766889555, + "grad_norm": 1.543123481033078, + "learning_rate": 9.634070920323614e-07, + "loss": 0.44978517293930054, + "step": 4651 + }, + { + "epoch": 1.0726308508185383, + "grad_norm": 1.5280216878422028, + "learning_rate": 9.630260888045103e-07, + "loss": 0.5070945024490356, + "step": 4652 + }, + { + "epoch": 1.0728614249481208, + "grad_norm": 1.3361545028178132, + "learning_rate": 9.626450909511926e-07, + "loss": 0.4513545334339142, + "step": 4653 + }, + { + "epoch": 1.0730919990777035, + "grad_norm": 1.2352969540055843, + "learning_rate": 9.622640985277889e-07, + "loss": 0.4430030584335327, + "step": 4654 + }, + { + "epoch": 1.0733225732072862, + "grad_norm": 1.7185507494111099, + "learning_rate": 9.618831115896814e-07, + "loss": 0.45619165897369385, + "step": 4655 + }, + { + "epoch": 1.073553147336869, + "grad_norm": 1.3452693944435885, + "learning_rate": 9.615021301922497e-07, + "loss": 0.411594033241272, + "step": 4656 + }, + { + "epoch": 1.0737837214664514, + "grad_norm": 1.696260647190632, + "learning_rate": 9.611211543908732e-07, + "loss": 0.5230164527893066, + "step": 4657 + }, + { + "epoch": 1.0740142955960341, + "grad_norm": 1.2546383850728546, + "learning_rate": 9.607401842409316e-07, + "loss": 0.45379406213760376, + "step": 4658 + }, + { + "epoch": 1.0742448697256168, + "grad_norm": 1.4465974878955368, + "learning_rate": 9.603592197978016e-07, + "loss": 0.47254839539527893, + "step": 4659 + }, + { + "epoch": 1.0744754438551993, + "grad_norm": 1.4899733507525732, + "learning_rate": 9.59978261116861e-07, + "loss": 0.3990492820739746, + "step": 4660 + }, + { + "epoch": 1.074706017984782, + "grad_norm": 1.2629235312972213, + "learning_rate": 9.595973082534855e-07, + "loss": 0.41671720147132874, + "step": 4661 + }, + { + "epoch": 1.0749365921143648, + "grad_norm": 1.3769486256402874, + "learning_rate": 9.59216361263051e-07, + "loss": 0.4269324839115143, + "step": 4662 + }, + { + "epoch": 1.0751671662439475, + "grad_norm": 1.7548425902665015, + "learning_rate": 9.588354202009314e-07, + "loss": 0.42989516258239746, + "step": 4663 + }, + { + "epoch": 1.0753977403735302, + "grad_norm": 1.5474664125691167, + "learning_rate": 9.584544851225008e-07, + "loss": 0.5224605798721313, + "step": 4664 + }, + { + "epoch": 1.0756283145031127, + "grad_norm": 1.393419713492626, + "learning_rate": 9.580735560831318e-07, + "loss": 0.3853871524333954, + "step": 4665 + }, + { + "epoch": 1.0758588886326954, + "grad_norm": 1.360242198109215, + "learning_rate": 9.576926331381968e-07, + "loss": 0.4460698366165161, + "step": 4666 + }, + { + "epoch": 1.0760894627622781, + "grad_norm": 1.524802030014046, + "learning_rate": 9.57311716343066e-07, + "loss": 0.45617812871932983, + "step": 4667 + }, + { + "epoch": 1.0763200368918606, + "grad_norm": 1.7079854681006486, + "learning_rate": 9.569308057531096e-07, + "loss": 0.5631355047225952, + "step": 4668 + }, + { + "epoch": 1.0765506110214433, + "grad_norm": 1.3155596598859882, + "learning_rate": 9.565499014236977e-07, + "loss": 0.4197179973125458, + "step": 4669 + }, + { + "epoch": 1.076781185151026, + "grad_norm": 1.5894301477582775, + "learning_rate": 9.561690034101973e-07, + "loss": 0.4262646436691284, + "step": 4670 + }, + { + "epoch": 1.0770117592806088, + "grad_norm": 1.4805271814916348, + "learning_rate": 9.557881117679768e-07, + "loss": 0.42719966173171997, + "step": 4671 + }, + { + "epoch": 1.0772423334101915, + "grad_norm": 1.3479731294807211, + "learning_rate": 9.554072265524022e-07, + "loss": 0.4278491735458374, + "step": 4672 + }, + { + "epoch": 1.077472907539774, + "grad_norm": 1.4324931591130032, + "learning_rate": 9.550263478188396e-07, + "loss": 0.3915478587150574, + "step": 4673 + }, + { + "epoch": 1.0777034816693567, + "grad_norm": 1.4807606218185139, + "learning_rate": 9.546454756226525e-07, + "loss": 0.4391477704048157, + "step": 4674 + }, + { + "epoch": 1.0779340557989394, + "grad_norm": 1.6230153652074522, + "learning_rate": 9.542646100192055e-07, + "loss": 0.47325795888900757, + "step": 4675 + }, + { + "epoch": 1.078164629928522, + "grad_norm": 1.3326185339285364, + "learning_rate": 9.538837510638607e-07, + "loss": 0.4698373079299927, + "step": 4676 + }, + { + "epoch": 1.0783952040581046, + "grad_norm": 1.5843176103578385, + "learning_rate": 9.535028988119805e-07, + "loss": 0.4252272844314575, + "step": 4677 + }, + { + "epoch": 1.0786257781876873, + "grad_norm": 1.4642476960881914, + "learning_rate": 9.531220533189253e-07, + "loss": 0.46726179122924805, + "step": 4678 + }, + { + "epoch": 1.07885635231727, + "grad_norm": 1.3792408296611596, + "learning_rate": 9.527412146400542e-07, + "loss": 0.46616411209106445, + "step": 4679 + }, + { + "epoch": 1.0790869264468528, + "grad_norm": 1.3938952826758202, + "learning_rate": 9.523603828307268e-07, + "loss": 0.5607181787490845, + "step": 4680 + }, + { + "epoch": 1.0793175005764353, + "grad_norm": 1.6234566687004295, + "learning_rate": 9.519795579463002e-07, + "loss": 0.5039520859718323, + "step": 4681 + }, + { + "epoch": 1.079548074706018, + "grad_norm": 1.6358698645091259, + "learning_rate": 9.515987400421322e-07, + "loss": 0.45532113313674927, + "step": 4682 + }, + { + "epoch": 1.0797786488356007, + "grad_norm": 1.3987490622653254, + "learning_rate": 9.512179291735772e-07, + "loss": 0.4198398292064667, + "step": 4683 + }, + { + "epoch": 1.0800092229651832, + "grad_norm": 2.0745649369110577, + "learning_rate": 9.508371253959909e-07, + "loss": 0.371380090713501, + "step": 4684 + }, + { + "epoch": 1.080239797094766, + "grad_norm": 1.6602368865180097, + "learning_rate": 9.504563287647265e-07, + "loss": 0.44341978430747986, + "step": 4685 + }, + { + "epoch": 1.0804703712243486, + "grad_norm": 1.3233390600316475, + "learning_rate": 9.500755393351372e-07, + "loss": 0.4184574484825134, + "step": 4686 + }, + { + "epoch": 1.0807009453539314, + "grad_norm": 1.554478033670439, + "learning_rate": 9.496947571625739e-07, + "loss": 0.5584033727645874, + "step": 4687 + }, + { + "epoch": 1.0809315194835138, + "grad_norm": 1.4303675439776025, + "learning_rate": 9.493139823023874e-07, + "loss": 0.44405317306518555, + "step": 4688 + }, + { + "epoch": 1.0811620936130966, + "grad_norm": 1.5109921870756446, + "learning_rate": 9.489332148099277e-07, + "loss": 0.41137009859085083, + "step": 4689 + }, + { + "epoch": 1.0813926677426793, + "grad_norm": 1.5933695881826222, + "learning_rate": 9.485524547405424e-07, + "loss": 0.4831092357635498, + "step": 4690 + }, + { + "epoch": 1.081623241872262, + "grad_norm": 1.3224307777817799, + "learning_rate": 9.481717021495793e-07, + "loss": 0.41243845224380493, + "step": 4691 + }, + { + "epoch": 1.0818538160018445, + "grad_norm": 1.506253034871724, + "learning_rate": 9.477909570923844e-07, + "loss": 0.33649003505706787, + "step": 4692 + }, + { + "epoch": 1.0820843901314272, + "grad_norm": 1.3759728989311568, + "learning_rate": 9.474102196243033e-07, + "loss": 0.4959014654159546, + "step": 4693 + }, + { + "epoch": 1.08231496426101, + "grad_norm": 1.4717496348190642, + "learning_rate": 9.470294898006795e-07, + "loss": 0.43924248218536377, + "step": 4694 + }, + { + "epoch": 1.0825455383905926, + "grad_norm": 1.5425758669304555, + "learning_rate": 9.466487676768563e-07, + "loss": 0.4777243137359619, + "step": 4695 + }, + { + "epoch": 1.0827761125201751, + "grad_norm": 1.7258911046059784, + "learning_rate": 9.462680533081752e-07, + "loss": 0.4488077759742737, + "step": 4696 + }, + { + "epoch": 1.0830066866497579, + "grad_norm": 1.5375128445555653, + "learning_rate": 9.458873467499778e-07, + "loss": 0.5058270692825317, + "step": 4697 + }, + { + "epoch": 1.0832372607793406, + "grad_norm": 1.5052517610014813, + "learning_rate": 9.455066480576025e-07, + "loss": 0.4537619650363922, + "step": 4698 + }, + { + "epoch": 1.0834678349089233, + "grad_norm": 1.5194044905455244, + "learning_rate": 9.45125957286388e-07, + "loss": 0.4725874960422516, + "step": 4699 + }, + { + "epoch": 1.0836984090385058, + "grad_norm": 1.61840988882087, + "learning_rate": 9.447452744916722e-07, + "loss": 0.4967196583747864, + "step": 4700 + }, + { + "epoch": 1.0839289831680885, + "grad_norm": 1.3272496966479597, + "learning_rate": 9.443645997287902e-07, + "loss": 0.43682345747947693, + "step": 4701 + }, + { + "epoch": 1.0841595572976712, + "grad_norm": 1.4038050893134464, + "learning_rate": 9.439839330530781e-07, + "loss": 0.48844271898269653, + "step": 4702 + }, + { + "epoch": 1.084390131427254, + "grad_norm": 1.3581740542884078, + "learning_rate": 9.436032745198682e-07, + "loss": 0.43654918670654297, + "step": 4703 + }, + { + "epoch": 1.0846207055568364, + "grad_norm": 1.6070546851567389, + "learning_rate": 9.432226241844947e-07, + "loss": 0.5034382939338684, + "step": 4704 + }, + { + "epoch": 1.0848512796864191, + "grad_norm": 1.9516449815592325, + "learning_rate": 9.428419821022877e-07, + "loss": 0.5407527089118958, + "step": 4705 + }, + { + "epoch": 1.0850818538160019, + "grad_norm": 1.3188521673213394, + "learning_rate": 9.424613483285783e-07, + "loss": 0.4372078478336334, + "step": 4706 + }, + { + "epoch": 1.0853124279455846, + "grad_norm": 1.3673238165045705, + "learning_rate": 9.420807229186949e-07, + "loss": 0.5264855623245239, + "step": 4707 + }, + { + "epoch": 1.085543002075167, + "grad_norm": 1.2884056915833075, + "learning_rate": 9.417001059279652e-07, + "loss": 0.3810223937034607, + "step": 4708 + }, + { + "epoch": 1.0857735762047498, + "grad_norm": 1.318670262430079, + "learning_rate": 9.413194974117163e-07, + "loss": 0.368865430355072, + "step": 4709 + }, + { + "epoch": 1.0860041503343325, + "grad_norm": 1.3202107346651724, + "learning_rate": 9.409388974252729e-07, + "loss": 0.41845810413360596, + "step": 4710 + }, + { + "epoch": 1.0862347244639152, + "grad_norm": 1.4709870024189373, + "learning_rate": 9.405583060239594e-07, + "loss": 0.5185590982437134, + "step": 4711 + }, + { + "epoch": 1.0864652985934977, + "grad_norm": 1.7793671382372165, + "learning_rate": 9.401777232630983e-07, + "loss": 0.4848501682281494, + "step": 4712 + }, + { + "epoch": 1.0866958727230804, + "grad_norm": 1.5218788678149173, + "learning_rate": 9.397971491980119e-07, + "loss": 0.5581566691398621, + "step": 4713 + }, + { + "epoch": 1.0869264468526632, + "grad_norm": 1.475012350727374, + "learning_rate": 9.394165838840196e-07, + "loss": 0.42043447494506836, + "step": 4714 + }, + { + "epoch": 1.0871570209822459, + "grad_norm": 1.3731967040929853, + "learning_rate": 9.39036027376441e-07, + "loss": 0.45076289772987366, + "step": 4715 + }, + { + "epoch": 1.0873875951118284, + "grad_norm": 1.353578451117457, + "learning_rate": 9.386554797305934e-07, + "loss": 0.3650796413421631, + "step": 4716 + }, + { + "epoch": 1.087618169241411, + "grad_norm": 1.436571768450736, + "learning_rate": 9.38274941001794e-07, + "loss": 0.4837912321090698, + "step": 4717 + }, + { + "epoch": 1.0878487433709938, + "grad_norm": 1.5272898845570653, + "learning_rate": 9.378944112453574e-07, + "loss": 0.41277679800987244, + "step": 4718 + }, + { + "epoch": 1.0880793175005765, + "grad_norm": 1.7344713328668464, + "learning_rate": 9.375138905165973e-07, + "loss": 0.48409390449523926, + "step": 4719 + }, + { + "epoch": 1.088309891630159, + "grad_norm": 1.360949967282617, + "learning_rate": 9.371333788708268e-07, + "loss": 0.3952450752258301, + "step": 4720 + }, + { + "epoch": 1.0885404657597417, + "grad_norm": 1.6450358552008089, + "learning_rate": 9.367528763633563e-07, + "loss": 0.42314866185188293, + "step": 4721 + }, + { + "epoch": 1.0887710398893244, + "grad_norm": 1.492846868063658, + "learning_rate": 9.363723830494966e-07, + "loss": 0.5322449207305908, + "step": 4722 + }, + { + "epoch": 1.0890016140189072, + "grad_norm": 1.3552869600155872, + "learning_rate": 9.359918989845557e-07, + "loss": 0.42307883501052856, + "step": 4723 + }, + { + "epoch": 1.0892321881484897, + "grad_norm": 1.3481901437941268, + "learning_rate": 9.356114242238413e-07, + "loss": 0.39321061968803406, + "step": 4724 + }, + { + "epoch": 1.0894627622780724, + "grad_norm": 1.6333273110158268, + "learning_rate": 9.352309588226585e-07, + "loss": 0.5064421892166138, + "step": 4725 + }, + { + "epoch": 1.089693336407655, + "grad_norm": 1.4475724274606394, + "learning_rate": 9.348505028363125e-07, + "loss": 0.44825220108032227, + "step": 4726 + }, + { + "epoch": 1.0899239105372378, + "grad_norm": 1.384316241889946, + "learning_rate": 9.344700563201065e-07, + "loss": 0.4323306679725647, + "step": 4727 + }, + { + "epoch": 1.0901544846668203, + "grad_norm": 1.3254947105842285, + "learning_rate": 9.340896193293414e-07, + "loss": 0.44907987117767334, + "step": 4728 + }, + { + "epoch": 1.090385058796403, + "grad_norm": 1.3161326376052391, + "learning_rate": 9.337091919193185e-07, + "loss": 0.416559636592865, + "step": 4729 + }, + { + "epoch": 1.0906156329259857, + "grad_norm": 1.6044534711260028, + "learning_rate": 9.33328774145336e-07, + "loss": 0.5361836552619934, + "step": 4730 + }, + { + "epoch": 1.0908462070555685, + "grad_norm": 1.3742080048163032, + "learning_rate": 9.329483660626922e-07, + "loss": 0.4815465211868286, + "step": 4731 + }, + { + "epoch": 1.091076781185151, + "grad_norm": 1.4553535934080677, + "learning_rate": 9.325679677266826e-07, + "loss": 0.5205050110816956, + "step": 4732 + }, + { + "epoch": 1.0913073553147337, + "grad_norm": 1.9887709257052897, + "learning_rate": 9.321875791926028e-07, + "loss": 0.4830896258354187, + "step": 4733 + }, + { + "epoch": 1.0915379294443164, + "grad_norm": 1.3739860439026885, + "learning_rate": 9.318072005157451e-07, + "loss": 0.4394579827785492, + "step": 4734 + }, + { + "epoch": 1.091768503573899, + "grad_norm": 1.6664317769247758, + "learning_rate": 9.314268317514022e-07, + "loss": 0.4614049792289734, + "step": 4735 + }, + { + "epoch": 1.0919990777034816, + "grad_norm": 1.5989711566807139, + "learning_rate": 9.31046472954864e-07, + "loss": 0.5123867988586426, + "step": 4736 + }, + { + "epoch": 1.0922296518330643, + "grad_norm": 1.879970895540274, + "learning_rate": 9.306661241814204e-07, + "loss": 0.43548035621643066, + "step": 4737 + }, + { + "epoch": 1.092460225962647, + "grad_norm": 1.4190205685105515, + "learning_rate": 9.302857854863579e-07, + "loss": 0.4102709889411926, + "step": 4738 + }, + { + "epoch": 1.0926908000922297, + "grad_norm": 1.7007344632271022, + "learning_rate": 9.299054569249628e-07, + "loss": 0.46276605129241943, + "step": 4739 + }, + { + "epoch": 1.0929213742218122, + "grad_norm": 1.5950261365712695, + "learning_rate": 9.295251385525204e-07, + "loss": 0.47700244188308716, + "step": 4740 + }, + { + "epoch": 1.093151948351395, + "grad_norm": 1.5081940540312389, + "learning_rate": 9.29144830424313e-07, + "loss": 0.5492758750915527, + "step": 4741 + }, + { + "epoch": 1.0933825224809777, + "grad_norm": 1.6521559747103167, + "learning_rate": 9.287645325956228e-07, + "loss": 0.3846803307533264, + "step": 4742 + }, + { + "epoch": 1.0936130966105604, + "grad_norm": 1.4300122822608972, + "learning_rate": 9.283842451217294e-07, + "loss": 0.47237372398376465, + "step": 4743 + }, + { + "epoch": 1.0938436707401429, + "grad_norm": 1.6996074936661776, + "learning_rate": 9.280039680579122e-07, + "loss": 0.4651675820350647, + "step": 4744 + }, + { + "epoch": 1.0940742448697256, + "grad_norm": 1.6397662048344088, + "learning_rate": 9.276237014594476e-07, + "loss": 0.5472640991210938, + "step": 4745 + }, + { + "epoch": 1.0943048189993083, + "grad_norm": 1.3158004626748314, + "learning_rate": 9.272434453816117e-07, + "loss": 0.45672351121902466, + "step": 4746 + }, + { + "epoch": 1.094535393128891, + "grad_norm": 1.4246135812847533, + "learning_rate": 9.268631998796785e-07, + "loss": 0.4589729905128479, + "step": 4747 + }, + { + "epoch": 1.0947659672584735, + "grad_norm": 1.4398967186683822, + "learning_rate": 9.264829650089201e-07, + "loss": 0.45882588624954224, + "step": 4748 + }, + { + "epoch": 1.0949965413880562, + "grad_norm": 1.8586265213095916, + "learning_rate": 9.26102740824608e-07, + "loss": 0.6183863282203674, + "step": 4749 + }, + { + "epoch": 1.095227115517639, + "grad_norm": 1.4631882562588927, + "learning_rate": 9.257225273820112e-07, + "loss": 0.4512014389038086, + "step": 4750 + }, + { + "epoch": 1.0954576896472217, + "grad_norm": 1.5706161838979387, + "learning_rate": 9.253423247363983e-07, + "loss": 0.5006139874458313, + "step": 4751 + }, + { + "epoch": 1.0956882637768042, + "grad_norm": 1.4110458948787974, + "learning_rate": 9.249621329430346e-07, + "loss": 0.5394018888473511, + "step": 4752 + }, + { + "epoch": 1.095918837906387, + "grad_norm": 1.5150959480945791, + "learning_rate": 9.245819520571858e-07, + "loss": 0.35523056983947754, + "step": 4753 + }, + { + "epoch": 1.0961494120359696, + "grad_norm": 1.3819812548856059, + "learning_rate": 9.242017821341143e-07, + "loss": 0.44379743933677673, + "step": 4754 + }, + { + "epoch": 1.0963799861655523, + "grad_norm": 1.6129174796361336, + "learning_rate": 9.238216232290821e-07, + "loss": 0.4190908968448639, + "step": 4755 + }, + { + "epoch": 1.0966105602951348, + "grad_norm": 1.6222067534589701, + "learning_rate": 9.234414753973488e-07, + "loss": 0.44818970561027527, + "step": 4756 + }, + { + "epoch": 1.0968411344247175, + "grad_norm": 1.4925644141379035, + "learning_rate": 9.230613386941734e-07, + "loss": 0.4134204685688019, + "step": 4757 + }, + { + "epoch": 1.0970717085543003, + "grad_norm": 1.2148478016107016, + "learning_rate": 9.226812131748118e-07, + "loss": 0.3554952144622803, + "step": 4758 + }, + { + "epoch": 1.097302282683883, + "grad_norm": 1.674922299722459, + "learning_rate": 9.223010988945194e-07, + "loss": 0.522594690322876, + "step": 4759 + }, + { + "epoch": 1.0975328568134655, + "grad_norm": 1.4320622438584156, + "learning_rate": 9.219209959085502e-07, + "loss": 0.44814133644104004, + "step": 4760 + }, + { + "epoch": 1.0977634309430482, + "grad_norm": 1.4723286174250931, + "learning_rate": 9.215409042721551e-07, + "loss": 0.42479634284973145, + "step": 4761 + }, + { + "epoch": 1.097994005072631, + "grad_norm": 1.5414891522514993, + "learning_rate": 9.211608240405849e-07, + "loss": 0.4384934902191162, + "step": 4762 + }, + { + "epoch": 1.0982245792022136, + "grad_norm": 1.4811013868533904, + "learning_rate": 9.207807552690878e-07, + "loss": 0.5378658771514893, + "step": 4763 + }, + { + "epoch": 1.098455153331796, + "grad_norm": 1.4445039209024981, + "learning_rate": 9.204006980129111e-07, + "loss": 0.5071386694908142, + "step": 4764 + }, + { + "epoch": 1.0986857274613788, + "grad_norm": 1.5460474623164162, + "learning_rate": 9.200206523272992e-07, + "loss": 0.46085822582244873, + "step": 4765 + }, + { + "epoch": 1.0989163015909615, + "grad_norm": 1.544747382675103, + "learning_rate": 9.196406182674964e-07, + "loss": 0.5083057880401611, + "step": 4766 + }, + { + "epoch": 1.0991468757205443, + "grad_norm": 1.2845065354356755, + "learning_rate": 9.192605958887438e-07, + "loss": 0.48307740688323975, + "step": 4767 + }, + { + "epoch": 1.0993774498501268, + "grad_norm": 1.8405581264672015, + "learning_rate": 9.188805852462824e-07, + "loss": 0.5195509791374207, + "step": 4768 + }, + { + "epoch": 1.0996080239797095, + "grad_norm": 1.5537273798526559, + "learning_rate": 9.185005863953498e-07, + "loss": 0.5161266326904297, + "step": 4769 + }, + { + "epoch": 1.0998385981092922, + "grad_norm": 1.5985708455901557, + "learning_rate": 9.181205993911827e-07, + "loss": 0.4757764935493469, + "step": 4770 + }, + { + "epoch": 1.1000691722388747, + "grad_norm": 1.5307887938016926, + "learning_rate": 9.177406242890167e-07, + "loss": 0.4071381688117981, + "step": 4771 + }, + { + "epoch": 1.1002997463684574, + "grad_norm": 1.3525378547606768, + "learning_rate": 9.173606611440842e-07, + "loss": 0.4794449210166931, + "step": 4772 + }, + { + "epoch": 1.1005303204980401, + "grad_norm": 1.3205547171467464, + "learning_rate": 9.169807100116175e-07, + "loss": 0.4678712487220764, + "step": 4773 + }, + { + "epoch": 1.1007608946276228, + "grad_norm": 1.2863487713029464, + "learning_rate": 9.166007709468456e-07, + "loss": 0.43200960755348206, + "step": 4774 + }, + { + "epoch": 1.1009914687572055, + "grad_norm": 1.8114336882311408, + "learning_rate": 9.162208440049974e-07, + "loss": 0.49283260107040405, + "step": 4775 + }, + { + "epoch": 1.101222042886788, + "grad_norm": 1.2265456496064566, + "learning_rate": 9.158409292412982e-07, + "loss": 0.4430215358734131, + "step": 4776 + }, + { + "epoch": 1.1014526170163708, + "grad_norm": 1.282698473472426, + "learning_rate": 9.154610267109731e-07, + "loss": 0.4529581069946289, + "step": 4777 + }, + { + "epoch": 1.1016831911459535, + "grad_norm": 1.3698366211761768, + "learning_rate": 9.150811364692446e-07, + "loss": 0.3872554302215576, + "step": 4778 + }, + { + "epoch": 1.101913765275536, + "grad_norm": 1.4034579683870105, + "learning_rate": 9.147012585713331e-07, + "loss": 0.466983437538147, + "step": 4779 + }, + { + "epoch": 1.1021443394051187, + "grad_norm": 1.3799350437064777, + "learning_rate": 9.143213930724587e-07, + "loss": 0.4841456115245819, + "step": 4780 + }, + { + "epoch": 1.1023749135347014, + "grad_norm": 2.083063073101601, + "learning_rate": 9.139415400278376e-07, + "loss": 0.4506613612174988, + "step": 4781 + }, + { + "epoch": 1.1026054876642841, + "grad_norm": 1.5047320834529434, + "learning_rate": 9.135616994926861e-07, + "loss": 0.428241491317749, + "step": 4782 + }, + { + "epoch": 1.1028360617938668, + "grad_norm": 1.3329992006000018, + "learning_rate": 9.131818715222175e-07, + "loss": 0.46940821409225464, + "step": 4783 + }, + { + "epoch": 1.1030666359234493, + "grad_norm": 1.5416614978551508, + "learning_rate": 9.12802056171644e-07, + "loss": 0.4527658224105835, + "step": 4784 + }, + { + "epoch": 1.103297210053032, + "grad_norm": 1.3412511641642377, + "learning_rate": 9.124222534961749e-07, + "loss": 0.3284989893436432, + "step": 4785 + }, + { + "epoch": 1.1035277841826148, + "grad_norm": 1.497248247266052, + "learning_rate": 9.120424635510193e-07, + "loss": 0.448346883058548, + "step": 4786 + }, + { + "epoch": 1.1037583583121973, + "grad_norm": 1.5413647461227613, + "learning_rate": 9.116626863913826e-07, + "loss": 0.4625587463378906, + "step": 4787 + }, + { + "epoch": 1.10398893244178, + "grad_norm": 1.398727589269655, + "learning_rate": 9.112829220724703e-07, + "loss": 0.37891942262649536, + "step": 4788 + }, + { + "epoch": 1.1042195065713627, + "grad_norm": 1.510309439727558, + "learning_rate": 9.109031706494841e-07, + "loss": 0.48719239234924316, + "step": 4789 + }, + { + "epoch": 1.1044500807009454, + "grad_norm": 1.695631911449914, + "learning_rate": 9.105234321776247e-07, + "loss": 0.5341615676879883, + "step": 4790 + }, + { + "epoch": 1.1046806548305281, + "grad_norm": 1.30752453253924, + "learning_rate": 9.101437067120918e-07, + "loss": 0.36677777767181396, + "step": 4791 + }, + { + "epoch": 1.1049112289601106, + "grad_norm": 1.3000512165603213, + "learning_rate": 9.097639943080813e-07, + "loss": 0.4348159432411194, + "step": 4792 + }, + { + "epoch": 1.1051418030896933, + "grad_norm": 1.3763164723830184, + "learning_rate": 9.093842950207891e-07, + "loss": 0.44912683963775635, + "step": 4793 + }, + { + "epoch": 1.105372377219276, + "grad_norm": 1.655048045877048, + "learning_rate": 9.090046089054077e-07, + "loss": 0.5576057434082031, + "step": 4794 + }, + { + "epoch": 1.1056029513488586, + "grad_norm": 1.4655907130631036, + "learning_rate": 9.08624936017129e-07, + "loss": 0.43964770436286926, + "step": 4795 + }, + { + "epoch": 1.1058335254784413, + "grad_norm": 1.3648059541391266, + "learning_rate": 9.082452764111415e-07, + "loss": 0.4285386800765991, + "step": 4796 + }, + { + "epoch": 1.106064099608024, + "grad_norm": 1.6322901017927212, + "learning_rate": 9.078656301426332e-07, + "loss": 0.4257868230342865, + "step": 4797 + }, + { + "epoch": 1.1062946737376067, + "grad_norm": 1.9314022304382554, + "learning_rate": 9.074859972667895e-07, + "loss": 0.4540346562862396, + "step": 4798 + }, + { + "epoch": 1.1065252478671892, + "grad_norm": 1.6801359554397164, + "learning_rate": 9.071063778387933e-07, + "loss": 0.5273457765579224, + "step": 4799 + }, + { + "epoch": 1.106755821996772, + "grad_norm": 1.4107980839711056, + "learning_rate": 9.067267719138268e-07, + "loss": 0.391310453414917, + "step": 4800 + }, + { + "epoch": 1.1069863961263546, + "grad_norm": 1.4182050274963418, + "learning_rate": 9.063471795470691e-07, + "loss": 0.47945383191108704, + "step": 4801 + }, + { + "epoch": 1.1072169702559373, + "grad_norm": 1.7087277476088294, + "learning_rate": 9.05967600793698e-07, + "loss": 0.49561476707458496, + "step": 4802 + }, + { + "epoch": 1.1074475443855198, + "grad_norm": 1.3070252929290396, + "learning_rate": 9.05588035708889e-07, + "loss": 0.4505256414413452, + "step": 4803 + }, + { + "epoch": 1.1076781185151026, + "grad_norm": 1.6864844579974707, + "learning_rate": 9.052084843478164e-07, + "loss": 0.37591490149497986, + "step": 4804 + }, + { + "epoch": 1.1079086926446853, + "grad_norm": 1.486226704077577, + "learning_rate": 9.048289467656508e-07, + "loss": 0.478586345911026, + "step": 4805 + }, + { + "epoch": 1.108139266774268, + "grad_norm": 1.3819959446941394, + "learning_rate": 9.044494230175625e-07, + "loss": 0.4373725354671478, + "step": 4806 + }, + { + "epoch": 1.1083698409038505, + "grad_norm": 1.4091791216138099, + "learning_rate": 9.040699131587186e-07, + "loss": 0.3976345360279083, + "step": 4807 + }, + { + "epoch": 1.1086004150334332, + "grad_norm": 1.3848852740812903, + "learning_rate": 9.036904172442857e-07, + "loss": 0.44611310958862305, + "step": 4808 + }, + { + "epoch": 1.108830989163016, + "grad_norm": 1.3117584806534919, + "learning_rate": 9.033109353294262e-07, + "loss": 0.40816667675971985, + "step": 4809 + }, + { + "epoch": 1.1090615632925986, + "grad_norm": 1.359605756890841, + "learning_rate": 9.029314674693023e-07, + "loss": 0.37462317943573, + "step": 4810 + }, + { + "epoch": 1.1092921374221811, + "grad_norm": 1.3641846963299056, + "learning_rate": 9.025520137190735e-07, + "loss": 0.3856509327888489, + "step": 4811 + }, + { + "epoch": 1.1095227115517639, + "grad_norm": 1.5740711616700624, + "learning_rate": 9.021725741338969e-07, + "loss": 0.4728443920612335, + "step": 4812 + }, + { + "epoch": 1.1097532856813466, + "grad_norm": 2.0717537833557773, + "learning_rate": 9.017931487689282e-07, + "loss": 0.4614938795566559, + "step": 4813 + }, + { + "epoch": 1.1099838598109293, + "grad_norm": 1.4925546437709947, + "learning_rate": 9.014137376793203e-07, + "loss": 0.4137331247329712, + "step": 4814 + }, + { + "epoch": 1.1102144339405118, + "grad_norm": 1.2481779358565226, + "learning_rate": 9.010343409202255e-07, + "loss": 0.42436620593070984, + "step": 4815 + }, + { + "epoch": 1.1104450080700945, + "grad_norm": 1.3339513565407848, + "learning_rate": 9.006549585467916e-07, + "loss": 0.43592822551727295, + "step": 4816 + }, + { + "epoch": 1.1106755821996772, + "grad_norm": 1.3742872645989155, + "learning_rate": 9.002755906141666e-07, + "loss": 0.45627349615097046, + "step": 4817 + }, + { + "epoch": 1.11090615632926, + "grad_norm": 1.819907938722267, + "learning_rate": 8.998962371774953e-07, + "loss": 0.5103771686553955, + "step": 4818 + }, + { + "epoch": 1.1111367304588424, + "grad_norm": 1.4418115437773273, + "learning_rate": 8.995168982919201e-07, + "loss": 0.470276802778244, + "step": 4819 + }, + { + "epoch": 1.1113673045884251, + "grad_norm": 1.3186176277536419, + "learning_rate": 8.991375740125823e-07, + "loss": 0.49486416578292847, + "step": 4820 + }, + { + "epoch": 1.1115978787180079, + "grad_norm": 1.143316450397621, + "learning_rate": 8.987582643946201e-07, + "loss": 0.338329017162323, + "step": 4821 + }, + { + "epoch": 1.1118284528475906, + "grad_norm": 1.4885392176771477, + "learning_rate": 8.983789694931706e-07, + "loss": 0.38252198696136475, + "step": 4822 + }, + { + "epoch": 1.112059026977173, + "grad_norm": 1.4537319037859584, + "learning_rate": 8.979996893633675e-07, + "loss": 0.47691571712493896, + "step": 4823 + }, + { + "epoch": 1.1122896011067558, + "grad_norm": 1.41954873904419, + "learning_rate": 8.976204240603433e-07, + "loss": 0.40156808495521545, + "step": 4824 + }, + { + "epoch": 1.1125201752363385, + "grad_norm": 1.312743475511893, + "learning_rate": 8.97241173639228e-07, + "loss": 0.3837090730667114, + "step": 4825 + }, + { + "epoch": 1.1127507493659212, + "grad_norm": 1.6300077035939553, + "learning_rate": 8.968619381551499e-07, + "loss": 0.5094380378723145, + "step": 4826 + }, + { + "epoch": 1.1129813234955037, + "grad_norm": 1.4389159508234053, + "learning_rate": 8.964827176632339e-07, + "loss": 0.48674100637435913, + "step": 4827 + }, + { + "epoch": 1.1132118976250864, + "grad_norm": 1.7742534070601, + "learning_rate": 8.961035122186045e-07, + "loss": 0.49288761615753174, + "step": 4828 + }, + { + "epoch": 1.1134424717546691, + "grad_norm": 1.4156686622304593, + "learning_rate": 8.957243218763824e-07, + "loss": 0.42933952808380127, + "step": 4829 + }, + { + "epoch": 1.1136730458842519, + "grad_norm": 1.838762036908513, + "learning_rate": 8.953451466916866e-07, + "loss": 0.39244914054870605, + "step": 4830 + }, + { + "epoch": 1.1139036200138344, + "grad_norm": 1.3776049792093739, + "learning_rate": 8.949659867196348e-07, + "loss": 0.44688090682029724, + "step": 4831 + }, + { + "epoch": 1.114134194143417, + "grad_norm": 1.6923430022628052, + "learning_rate": 8.945868420153409e-07, + "loss": 0.5388743877410889, + "step": 4832 + }, + { + "epoch": 1.1143647682729998, + "grad_norm": 1.6108426528928312, + "learning_rate": 8.942077126339182e-07, + "loss": 0.4320666193962097, + "step": 4833 + }, + { + "epoch": 1.1145953424025825, + "grad_norm": 1.3700008221476991, + "learning_rate": 8.938285986304762e-07, + "loss": 0.37623411417007446, + "step": 4834 + }, + { + "epoch": 1.114825916532165, + "grad_norm": 1.4274453986312428, + "learning_rate": 8.93449500060124e-07, + "loss": 0.4743962287902832, + "step": 4835 + }, + { + "epoch": 1.1150564906617477, + "grad_norm": 1.4687481503878526, + "learning_rate": 8.930704169779663e-07, + "loss": 0.4833221435546875, + "step": 4836 + }, + { + "epoch": 1.1152870647913304, + "grad_norm": 1.580828459296504, + "learning_rate": 8.926913494391074e-07, + "loss": 0.48811084032058716, + "step": 4837 + }, + { + "epoch": 1.1155176389209132, + "grad_norm": 1.4663777441823886, + "learning_rate": 8.923122974986487e-07, + "loss": 0.42525774240493774, + "step": 4838 + }, + { + "epoch": 1.1157482130504957, + "grad_norm": 1.4773669175093567, + "learning_rate": 8.919332612116884e-07, + "loss": 0.4347909688949585, + "step": 4839 + }, + { + "epoch": 1.1159787871800784, + "grad_norm": 1.9619203877260345, + "learning_rate": 8.915542406333241e-07, + "loss": 0.5085601806640625, + "step": 4840 + }, + { + "epoch": 1.116209361309661, + "grad_norm": 1.4214902735687815, + "learning_rate": 8.911752358186497e-07, + "loss": 0.4620482325553894, + "step": 4841 + }, + { + "epoch": 1.1164399354392438, + "grad_norm": 1.3147570239530335, + "learning_rate": 8.907962468227582e-07, + "loss": 0.44923216104507446, + "step": 4842 + }, + { + "epoch": 1.1166705095688263, + "grad_norm": 1.6422580107908513, + "learning_rate": 8.904172737007386e-07, + "loss": 0.547439694404602, + "step": 4843 + }, + { + "epoch": 1.116901083698409, + "grad_norm": 1.7769022711207687, + "learning_rate": 8.900383165076789e-07, + "loss": 0.4609268307685852, + "step": 4844 + }, + { + "epoch": 1.1171316578279917, + "grad_norm": 1.4046866803141593, + "learning_rate": 8.896593752986642e-07, + "loss": 0.41780030727386475, + "step": 4845 + }, + { + "epoch": 1.1173622319575744, + "grad_norm": 1.3641825367692086, + "learning_rate": 8.89280450128778e-07, + "loss": 0.506212592124939, + "step": 4846 + }, + { + "epoch": 1.117592806087157, + "grad_norm": 1.4049897839890735, + "learning_rate": 8.889015410531001e-07, + "loss": 0.4436545968055725, + "step": 4847 + }, + { + "epoch": 1.1178233802167397, + "grad_norm": 1.3856199735325436, + "learning_rate": 8.885226481267093e-07, + "loss": 0.4473826289176941, + "step": 4848 + }, + { + "epoch": 1.1180539543463224, + "grad_norm": 1.42622736433257, + "learning_rate": 8.881437714046815e-07, + "loss": 0.43499836325645447, + "step": 4849 + }, + { + "epoch": 1.118284528475905, + "grad_norm": 1.5927469786677344, + "learning_rate": 8.877649109420899e-07, + "loss": 0.522705078125, + "step": 4850 + }, + { + "epoch": 1.1185151026054876, + "grad_norm": 1.5596781330511842, + "learning_rate": 8.873860667940064e-07, + "loss": 0.42146036028862, + "step": 4851 + }, + { + "epoch": 1.1187456767350703, + "grad_norm": 1.649425162171124, + "learning_rate": 8.870072390154989e-07, + "loss": 0.5875130891799927, + "step": 4852 + }, + { + "epoch": 1.118976250864653, + "grad_norm": 1.6372722830693418, + "learning_rate": 8.866284276616345e-07, + "loss": 0.5187985301017761, + "step": 4853 + }, + { + "epoch": 1.1192068249942357, + "grad_norm": 2.6266893474509474, + "learning_rate": 8.86249632787477e-07, + "loss": 0.46115952730178833, + "step": 4854 + }, + { + "epoch": 1.1194373991238182, + "grad_norm": 1.4714921061709185, + "learning_rate": 8.858708544480886e-07, + "loss": 0.4926493167877197, + "step": 4855 + }, + { + "epoch": 1.119667973253401, + "grad_norm": 1.5525331026142626, + "learning_rate": 8.854920926985278e-07, + "loss": 0.44512006640434265, + "step": 4856 + }, + { + "epoch": 1.1198985473829837, + "grad_norm": 1.5145408688074757, + "learning_rate": 8.85113347593852e-07, + "loss": 0.45973241329193115, + "step": 4857 + }, + { + "epoch": 1.1201291215125664, + "grad_norm": 1.5400172209521554, + "learning_rate": 8.847346191891157e-07, + "loss": 0.4915385842323303, + "step": 4858 + }, + { + "epoch": 1.1203596956421489, + "grad_norm": 1.4900152202768027, + "learning_rate": 8.843559075393701e-07, + "loss": 0.4457864463329315, + "step": 4859 + }, + { + "epoch": 1.1205902697717316, + "grad_norm": 1.3414730221020197, + "learning_rate": 8.839772126996658e-07, + "loss": 0.4782453775405884, + "step": 4860 + }, + { + "epoch": 1.1208208439013143, + "grad_norm": 1.3591384899787133, + "learning_rate": 8.835985347250492e-07, + "loss": 0.42789584398269653, + "step": 4861 + }, + { + "epoch": 1.121051418030897, + "grad_norm": 1.8532602863182117, + "learning_rate": 8.832198736705657e-07, + "loss": 0.49990910291671753, + "step": 4862 + }, + { + "epoch": 1.1212819921604795, + "grad_norm": 1.4158258863269764, + "learning_rate": 8.828412295912566e-07, + "loss": 0.3735005855560303, + "step": 4863 + }, + { + "epoch": 1.1215125662900622, + "grad_norm": 1.3744374187815367, + "learning_rate": 8.824626025421624e-07, + "loss": 0.402673602104187, + "step": 4864 + }, + { + "epoch": 1.121743140419645, + "grad_norm": 1.57241412674585, + "learning_rate": 8.820839925783198e-07, + "loss": 0.4675491452217102, + "step": 4865 + }, + { + "epoch": 1.1219737145492277, + "grad_norm": 2.0200104658377254, + "learning_rate": 8.817053997547645e-07, + "loss": 0.5098662376403809, + "step": 4866 + }, + { + "epoch": 1.1222042886788102, + "grad_norm": 1.3880207155981488, + "learning_rate": 8.813268241265278e-07, + "loss": 0.44478029012680054, + "step": 4867 + }, + { + "epoch": 1.1224348628083929, + "grad_norm": 1.4983402004688406, + "learning_rate": 8.809482657486401e-07, + "loss": 0.410754919052124, + "step": 4868 + }, + { + "epoch": 1.1226654369379756, + "grad_norm": 1.193726420763111, + "learning_rate": 8.805697246761288e-07, + "loss": 0.4198191165924072, + "step": 4869 + }, + { + "epoch": 1.1228960110675583, + "grad_norm": 1.6015778378598091, + "learning_rate": 8.801912009640178e-07, + "loss": 0.5399911403656006, + "step": 4870 + }, + { + "epoch": 1.1231265851971408, + "grad_norm": 1.3209581029003303, + "learning_rate": 8.798126946673305e-07, + "loss": 0.3879680633544922, + "step": 4871 + }, + { + "epoch": 1.1233571593267235, + "grad_norm": 1.7893299917127135, + "learning_rate": 8.794342058410856e-07, + "loss": 0.4629073739051819, + "step": 4872 + }, + { + "epoch": 1.1235877334563062, + "grad_norm": 1.25180398717926, + "learning_rate": 8.790557345403013e-07, + "loss": 0.42299884557724, + "step": 4873 + }, + { + "epoch": 1.123818307585889, + "grad_norm": 1.5467146262725529, + "learning_rate": 8.786772808199912e-07, + "loss": 0.509437620639801, + "step": 4874 + }, + { + "epoch": 1.1240488817154715, + "grad_norm": 1.3436359029840506, + "learning_rate": 8.782988447351684e-07, + "loss": 0.4682687222957611, + "step": 4875 + }, + { + "epoch": 1.1242794558450542, + "grad_norm": 1.2884743737928093, + "learning_rate": 8.779204263408416e-07, + "loss": 0.41155606508255005, + "step": 4876 + }, + { + "epoch": 1.124510029974637, + "grad_norm": 1.6449136860944156, + "learning_rate": 8.775420256920182e-07, + "loss": 0.4705810844898224, + "step": 4877 + }, + { + "epoch": 1.1247406041042196, + "grad_norm": 1.4648471947605348, + "learning_rate": 8.771636428437022e-07, + "loss": 0.36571264266967773, + "step": 4878 + }, + { + "epoch": 1.124971178233802, + "grad_norm": 1.1768139651906544, + "learning_rate": 8.76785277850896e-07, + "loss": 0.36618396639823914, + "step": 4879 + }, + { + "epoch": 1.1252017523633848, + "grad_norm": 1.5334328638730685, + "learning_rate": 8.764069307685983e-07, + "loss": 0.4861210584640503, + "step": 4880 + }, + { + "epoch": 1.1254323264929675, + "grad_norm": 1.457839206264918, + "learning_rate": 8.760286016518056e-07, + "loss": 0.43346846103668213, + "step": 4881 + }, + { + "epoch": 1.12566290062255, + "grad_norm": 1.28421921022301, + "learning_rate": 8.756502905555123e-07, + "loss": 0.40088707208633423, + "step": 4882 + }, + { + "epoch": 1.1258934747521328, + "grad_norm": 1.4643062187844458, + "learning_rate": 8.752719975347092e-07, + "loss": 0.4088619649410248, + "step": 4883 + }, + { + "epoch": 1.1261240488817155, + "grad_norm": 1.5527291710325282, + "learning_rate": 8.748937226443857e-07, + "loss": 0.4988909661769867, + "step": 4884 + }, + { + "epoch": 1.1263546230112982, + "grad_norm": 1.5377239167998313, + "learning_rate": 8.745154659395271e-07, + "loss": 0.47022196650505066, + "step": 4885 + }, + { + "epoch": 1.126585197140881, + "grad_norm": 1.3259626220698026, + "learning_rate": 8.741372274751178e-07, + "loss": 0.45005398988723755, + "step": 4886 + }, + { + "epoch": 1.1268157712704634, + "grad_norm": 1.5001674672720546, + "learning_rate": 8.737590073061376e-07, + "loss": 0.4632537364959717, + "step": 4887 + }, + { + "epoch": 1.1270463454000461, + "grad_norm": 1.2983235840008036, + "learning_rate": 8.733808054875653e-07, + "loss": 0.41034963726997375, + "step": 4888 + }, + { + "epoch": 1.1272769195296288, + "grad_norm": 1.423352740140202, + "learning_rate": 8.730026220743765e-07, + "loss": 0.5169668793678284, + "step": 4889 + }, + { + "epoch": 1.1275074936592113, + "grad_norm": 1.46630659535839, + "learning_rate": 8.726244571215431e-07, + "loss": 0.44972485303878784, + "step": 4890 + }, + { + "epoch": 1.127738067788794, + "grad_norm": 1.5712937661942725, + "learning_rate": 8.722463106840361e-07, + "loss": 0.4854368567466736, + "step": 4891 + }, + { + "epoch": 1.1279686419183768, + "grad_norm": 1.0525840961962005, + "learning_rate": 8.718681828168223e-07, + "loss": 0.3029147982597351, + "step": 4892 + }, + { + "epoch": 1.1281992160479595, + "grad_norm": 1.5856241308624208, + "learning_rate": 8.714900735748671e-07, + "loss": 0.4770504832267761, + "step": 4893 + }, + { + "epoch": 1.1284297901775422, + "grad_norm": 1.3799690323722245, + "learning_rate": 8.711119830131317e-07, + "loss": 0.48508110642433167, + "step": 4894 + }, + { + "epoch": 1.1286603643071247, + "grad_norm": 1.4227656672873528, + "learning_rate": 8.707339111865761e-07, + "loss": 0.43302488327026367, + "step": 4895 + }, + { + "epoch": 1.1288909384367074, + "grad_norm": 1.3481652076868464, + "learning_rate": 8.703558581501563e-07, + "loss": 0.5720575451850891, + "step": 4896 + }, + { + "epoch": 1.1291215125662901, + "grad_norm": 1.1736572520471924, + "learning_rate": 8.69977823958827e-07, + "loss": 0.48236098885536194, + "step": 4897 + }, + { + "epoch": 1.1293520866958726, + "grad_norm": 1.6539784416028527, + "learning_rate": 8.69599808667538e-07, + "loss": 0.48531901836395264, + "step": 4898 + }, + { + "epoch": 1.1295826608254553, + "grad_norm": 1.390226643422974, + "learning_rate": 8.69221812331239e-07, + "loss": 0.4150174856185913, + "step": 4899 + }, + { + "epoch": 1.129813234955038, + "grad_norm": 1.4594360531114157, + "learning_rate": 8.688438350048748e-07, + "loss": 0.4729560911655426, + "step": 4900 + }, + { + "epoch": 1.1300438090846208, + "grad_norm": 1.5805161631694824, + "learning_rate": 8.684658767433881e-07, + "loss": 0.5081748962402344, + "step": 4901 + }, + { + "epoch": 1.1302743832142035, + "grad_norm": 1.3577399194161552, + "learning_rate": 8.680879376017197e-07, + "loss": 0.4552333354949951, + "step": 4902 + }, + { + "epoch": 1.130504957343786, + "grad_norm": 1.666206186626053, + "learning_rate": 8.67710017634806e-07, + "loss": 0.4784387946128845, + "step": 4903 + }, + { + "epoch": 1.1307355314733687, + "grad_norm": 1.7781011363806714, + "learning_rate": 8.673321168975823e-07, + "loss": 0.46922338008880615, + "step": 4904 + }, + { + "epoch": 1.1309661056029514, + "grad_norm": 1.414520843561148, + "learning_rate": 8.669542354449797e-07, + "loss": 0.38181525468826294, + "step": 4905 + }, + { + "epoch": 1.131196679732534, + "grad_norm": 1.409807627526861, + "learning_rate": 8.665763733319278e-07, + "loss": 0.4729689359664917, + "step": 4906 + }, + { + "epoch": 1.1314272538621166, + "grad_norm": 1.3128859029806206, + "learning_rate": 8.661985306133517e-07, + "loss": 0.3934294581413269, + "step": 4907 + }, + { + "epoch": 1.1316578279916993, + "grad_norm": 1.1525332387894895, + "learning_rate": 8.658207073441754e-07, + "loss": 0.40270352363586426, + "step": 4908 + }, + { + "epoch": 1.131888402121282, + "grad_norm": 1.245477282269021, + "learning_rate": 8.654429035793196e-07, + "loss": 0.43291163444519043, + "step": 4909 + }, + { + "epoch": 1.1321189762508648, + "grad_norm": 1.8011937733870678, + "learning_rate": 8.650651193737009e-07, + "loss": 0.5054877996444702, + "step": 4910 + }, + { + "epoch": 1.1323495503804473, + "grad_norm": 1.4188548576207016, + "learning_rate": 8.646873547822347e-07, + "loss": 0.5043776035308838, + "step": 4911 + }, + { + "epoch": 1.13258012451003, + "grad_norm": 1.511127988179462, + "learning_rate": 8.643096098598328e-07, + "loss": 0.4246225953102112, + "step": 4912 + }, + { + "epoch": 1.1328106986396127, + "grad_norm": 1.3198976342579845, + "learning_rate": 8.639318846614048e-07, + "loss": 0.4514849781990051, + "step": 4913 + }, + { + "epoch": 1.1330412727691952, + "grad_norm": 1.5409054507370947, + "learning_rate": 8.635541792418557e-07, + "loss": 0.4780477285385132, + "step": 4914 + }, + { + "epoch": 1.133271846898778, + "grad_norm": 1.4447509965410514, + "learning_rate": 8.631764936560899e-07, + "loss": 0.47164270281791687, + "step": 4915 + }, + { + "epoch": 1.1335024210283606, + "grad_norm": 1.4642572467177732, + "learning_rate": 8.62798827959007e-07, + "loss": 0.5462276339530945, + "step": 4916 + }, + { + "epoch": 1.1337329951579433, + "grad_norm": 1.3611348332418316, + "learning_rate": 8.624211822055055e-07, + "loss": 0.37229591608047485, + "step": 4917 + }, + { + "epoch": 1.133963569287526, + "grad_norm": 1.6004056206114348, + "learning_rate": 8.620435564504791e-07, + "loss": 0.46595901250839233, + "step": 4918 + }, + { + "epoch": 1.1341941434171086, + "grad_norm": 1.899603419019246, + "learning_rate": 8.616659507488201e-07, + "loss": 0.4645708203315735, + "step": 4919 + }, + { + "epoch": 1.1344247175466913, + "grad_norm": 1.3014565799840314, + "learning_rate": 8.612883651554173e-07, + "loss": 0.4309888482093811, + "step": 4920 + }, + { + "epoch": 1.134655291676274, + "grad_norm": 1.2254662174184374, + "learning_rate": 8.60910799725156e-07, + "loss": 0.4000548720359802, + "step": 4921 + }, + { + "epoch": 1.1348858658058565, + "grad_norm": 1.2990272231335294, + "learning_rate": 8.6053325451292e-07, + "loss": 0.41321274638175964, + "step": 4922 + }, + { + "epoch": 1.1351164399354392, + "grad_norm": 1.7479036509525407, + "learning_rate": 8.601557295735884e-07, + "loss": 0.38982951641082764, + "step": 4923 + }, + { + "epoch": 1.135347014065022, + "grad_norm": 1.3265126570648142, + "learning_rate": 8.597782249620394e-07, + "loss": 0.44623300433158875, + "step": 4924 + }, + { + "epoch": 1.1355775881946046, + "grad_norm": 1.6004563551212632, + "learning_rate": 8.594007407331458e-07, + "loss": 0.46876993775367737, + "step": 4925 + }, + { + "epoch": 1.1358081623241871, + "grad_norm": 1.4785026933128127, + "learning_rate": 8.590232769417803e-07, + "loss": 0.41345149278640747, + "step": 4926 + }, + { + "epoch": 1.1360387364537698, + "grad_norm": 1.6712340860086734, + "learning_rate": 8.586458336428095e-07, + "loss": 0.4199402332305908, + "step": 4927 + }, + { + "epoch": 1.1362693105833526, + "grad_norm": 1.5807454346525946, + "learning_rate": 8.582684108910998e-07, + "loss": 0.4424753785133362, + "step": 4928 + }, + { + "epoch": 1.1364998847129353, + "grad_norm": 1.5318763722061228, + "learning_rate": 8.57891008741513e-07, + "loss": 0.5066598057746887, + "step": 4929 + }, + { + "epoch": 1.1367304588425178, + "grad_norm": 1.409045447069904, + "learning_rate": 8.575136272489081e-07, + "loss": 0.45959407091140747, + "step": 4930 + }, + { + "epoch": 1.1369610329721005, + "grad_norm": 1.191773933725539, + "learning_rate": 8.571362664681415e-07, + "loss": 0.4579051733016968, + "step": 4931 + }, + { + "epoch": 1.1371916071016832, + "grad_norm": 1.4061203144708347, + "learning_rate": 8.567589264540665e-07, + "loss": 0.5125559568405151, + "step": 4932 + }, + { + "epoch": 1.137422181231266, + "grad_norm": 1.484125992313306, + "learning_rate": 8.563816072615335e-07, + "loss": 0.4236595630645752, + "step": 4933 + }, + { + "epoch": 1.1376527553608484, + "grad_norm": 1.3909472723060943, + "learning_rate": 8.56004308945389e-07, + "loss": 0.40187013149261475, + "step": 4934 + }, + { + "epoch": 1.1378833294904311, + "grad_norm": 1.7306785223672838, + "learning_rate": 8.556270315604778e-07, + "loss": 0.5069487690925598, + "step": 4935 + }, + { + "epoch": 1.1381139036200139, + "grad_norm": 1.2666499948179348, + "learning_rate": 8.552497751616406e-07, + "loss": 0.4032680094242096, + "step": 4936 + }, + { + "epoch": 1.1383444777495966, + "grad_norm": 1.5147949059405765, + "learning_rate": 8.548725398037158e-07, + "loss": 0.4745323061943054, + "step": 4937 + }, + { + "epoch": 1.138575051879179, + "grad_norm": 1.6025857024716508, + "learning_rate": 8.544953255415379e-07, + "loss": 0.5203470587730408, + "step": 4938 + }, + { + "epoch": 1.1388056260087618, + "grad_norm": 1.3018365690111693, + "learning_rate": 8.541181324299392e-07, + "loss": 0.3780457079410553, + "step": 4939 + }, + { + "epoch": 1.1390362001383445, + "grad_norm": 1.4908739703097478, + "learning_rate": 8.537409605237486e-07, + "loss": 0.4544069766998291, + "step": 4940 + }, + { + "epoch": 1.1392667742679272, + "grad_norm": 1.3726631913286653, + "learning_rate": 8.533638098777914e-07, + "loss": 0.3692469000816345, + "step": 4941 + }, + { + "epoch": 1.1394973483975097, + "grad_norm": 1.7461198015621147, + "learning_rate": 8.529866805468907e-07, + "loss": 0.4733508825302124, + "step": 4942 + }, + { + "epoch": 1.1397279225270924, + "grad_norm": 1.7055847796006547, + "learning_rate": 8.526095725858658e-07, + "loss": 0.5165152549743652, + "step": 4943 + }, + { + "epoch": 1.1399584966566751, + "grad_norm": 1.5781652989183093, + "learning_rate": 8.522324860495336e-07, + "loss": 0.40220290422439575, + "step": 4944 + }, + { + "epoch": 1.1401890707862579, + "grad_norm": 1.676524129553008, + "learning_rate": 8.518554209927066e-07, + "loss": 0.511976957321167, + "step": 4945 + }, + { + "epoch": 1.1404196449158404, + "grad_norm": 1.4578766238891505, + "learning_rate": 8.514783774701959e-07, + "loss": 0.4472247362136841, + "step": 4946 + }, + { + "epoch": 1.140650219045423, + "grad_norm": 1.3731717985494665, + "learning_rate": 8.51101355536808e-07, + "loss": 0.4368797242641449, + "step": 4947 + }, + { + "epoch": 1.1408807931750058, + "grad_norm": 1.3383514367818596, + "learning_rate": 8.507243552473476e-07, + "loss": 0.3794320225715637, + "step": 4948 + }, + { + "epoch": 1.1411113673045885, + "grad_norm": 1.7604514892248042, + "learning_rate": 8.50347376656615e-07, + "loss": 0.5229817628860474, + "step": 4949 + }, + { + "epoch": 1.141341941434171, + "grad_norm": 1.4803188344976619, + "learning_rate": 8.499704198194075e-07, + "loss": 0.4771326780319214, + "step": 4950 + }, + { + "epoch": 1.1415725155637537, + "grad_norm": 1.406078110966921, + "learning_rate": 8.495934847905201e-07, + "loss": 0.45151978731155396, + "step": 4951 + }, + { + "epoch": 1.1418030896933364, + "grad_norm": 1.3579359781108167, + "learning_rate": 8.492165716247439e-07, + "loss": 0.3963208496570587, + "step": 4952 + }, + { + "epoch": 1.1420336638229192, + "grad_norm": 1.2797227148111936, + "learning_rate": 8.488396803768675e-07, + "loss": 0.37465882301330566, + "step": 4953 + }, + { + "epoch": 1.1422642379525016, + "grad_norm": 1.7257432451816517, + "learning_rate": 8.484628111016752e-07, + "loss": 0.437372088432312, + "step": 4954 + }, + { + "epoch": 1.1424948120820844, + "grad_norm": 1.3198726990576308, + "learning_rate": 8.480859638539492e-07, + "loss": 0.40495651960372925, + "step": 4955 + }, + { + "epoch": 1.142725386211667, + "grad_norm": 1.5937176142563847, + "learning_rate": 8.477091386884677e-07, + "loss": 0.5346927642822266, + "step": 4956 + }, + { + "epoch": 1.1429559603412498, + "grad_norm": 1.7035083737998966, + "learning_rate": 8.473323356600068e-07, + "loss": 0.42448925971984863, + "step": 4957 + }, + { + "epoch": 1.1431865344708323, + "grad_norm": 1.4329878189218077, + "learning_rate": 8.469555548233378e-07, + "loss": 0.4715193808078766, + "step": 4958 + }, + { + "epoch": 1.143417108600415, + "grad_norm": 1.5249370547485697, + "learning_rate": 8.465787962332301e-07, + "loss": 0.4721440076828003, + "step": 4959 + }, + { + "epoch": 1.1436476827299977, + "grad_norm": 1.4963659204960478, + "learning_rate": 8.462020599444495e-07, + "loss": 0.5478333234786987, + "step": 4960 + }, + { + "epoch": 1.1438782568595804, + "grad_norm": 1.5534391969085817, + "learning_rate": 8.458253460117577e-07, + "loss": 0.4005582928657532, + "step": 4961 + }, + { + "epoch": 1.144108830989163, + "grad_norm": 1.4816205297794078, + "learning_rate": 8.454486544899146e-07, + "loss": 0.43886178731918335, + "step": 4962 + }, + { + "epoch": 1.1443394051187457, + "grad_norm": 1.2296294541393762, + "learning_rate": 8.450719854336758e-07, + "loss": 0.4404095709323883, + "step": 4963 + }, + { + "epoch": 1.1445699792483284, + "grad_norm": 1.5412493838775327, + "learning_rate": 8.446953388977943e-07, + "loss": 0.5386335849761963, + "step": 4964 + }, + { + "epoch": 1.144800553377911, + "grad_norm": 1.5969922474986569, + "learning_rate": 8.44318714937019e-07, + "loss": 0.4576258659362793, + "step": 4965 + }, + { + "epoch": 1.1450311275074936, + "grad_norm": 1.2968718824878773, + "learning_rate": 8.439421136060964e-07, + "loss": 0.4619024991989136, + "step": 4966 + }, + { + "epoch": 1.1452617016370763, + "grad_norm": 1.4106895392209726, + "learning_rate": 8.435655349597689e-07, + "loss": 0.4071081876754761, + "step": 4967 + }, + { + "epoch": 1.145492275766659, + "grad_norm": 1.3534750631649812, + "learning_rate": 8.431889790527769e-07, + "loss": 0.4605948328971863, + "step": 4968 + }, + { + "epoch": 1.1457228498962417, + "grad_norm": 1.4715761177473734, + "learning_rate": 8.428124459398554e-07, + "loss": 0.46706438064575195, + "step": 4969 + }, + { + "epoch": 1.1459534240258242, + "grad_norm": 1.480784825415981, + "learning_rate": 8.424359356757383e-07, + "loss": 0.39674657583236694, + "step": 4970 + }, + { + "epoch": 1.146183998155407, + "grad_norm": 1.4606371633345823, + "learning_rate": 8.42059448315155e-07, + "loss": 0.4421246647834778, + "step": 4971 + }, + { + "epoch": 1.1464145722849897, + "grad_norm": 1.6921922922853865, + "learning_rate": 8.416829839128312e-07, + "loss": 0.5220682621002197, + "step": 4972 + }, + { + "epoch": 1.1466451464145724, + "grad_norm": 1.338254387958773, + "learning_rate": 8.413065425234904e-07, + "loss": 0.40189129114151, + "step": 4973 + }, + { + "epoch": 1.1468757205441549, + "grad_norm": 1.3011913252808138, + "learning_rate": 8.409301242018517e-07, + "loss": 0.448421835899353, + "step": 4974 + }, + { + "epoch": 1.1471062946737376, + "grad_norm": 1.5996651322296722, + "learning_rate": 8.405537290026318e-07, + "loss": 0.49476757645606995, + "step": 4975 + }, + { + "epoch": 1.1473368688033203, + "grad_norm": 1.4573872381246367, + "learning_rate": 8.401773569805431e-07, + "loss": 0.3888528347015381, + "step": 4976 + }, + { + "epoch": 1.1475674429329028, + "grad_norm": 1.4760563096119323, + "learning_rate": 8.398010081902956e-07, + "loss": 0.49057653546333313, + "step": 4977 + }, + { + "epoch": 1.1477980170624855, + "grad_norm": 1.3851559333900214, + "learning_rate": 8.39424682686595e-07, + "loss": 0.41700610518455505, + "step": 4978 + }, + { + "epoch": 1.1480285911920682, + "grad_norm": 1.5382531029836037, + "learning_rate": 8.390483805241441e-07, + "loss": 0.4801902770996094, + "step": 4979 + }, + { + "epoch": 1.148259165321651, + "grad_norm": 1.5691797878096674, + "learning_rate": 8.386721017576426e-07, + "loss": 0.5438926219940186, + "step": 4980 + }, + { + "epoch": 1.1484897394512337, + "grad_norm": 1.3886510011393631, + "learning_rate": 8.382958464417857e-07, + "loss": 0.3991735577583313, + "step": 4981 + }, + { + "epoch": 1.1487203135808162, + "grad_norm": 1.5064271527131172, + "learning_rate": 8.379196146312664e-07, + "loss": 0.4918370246887207, + "step": 4982 + }, + { + "epoch": 1.1489508877103989, + "grad_norm": 1.713149481922198, + "learning_rate": 8.375434063807737e-07, + "loss": 0.5280467867851257, + "step": 4983 + }, + { + "epoch": 1.1491814618399816, + "grad_norm": 1.2990876069782782, + "learning_rate": 8.371672217449936e-07, + "loss": 0.4186179041862488, + "step": 4984 + }, + { + "epoch": 1.149412035969564, + "grad_norm": 1.3742464834005608, + "learning_rate": 8.367910607786079e-07, + "loss": 0.3698224723339081, + "step": 4985 + }, + { + "epoch": 1.1496426100991468, + "grad_norm": 1.4766762383505605, + "learning_rate": 8.364149235362956e-07, + "loss": 0.45402267575263977, + "step": 4986 + }, + { + "epoch": 1.1498731842287295, + "grad_norm": 1.530758978566143, + "learning_rate": 8.36038810072732e-07, + "loss": 0.5145484209060669, + "step": 4987 + }, + { + "epoch": 1.1501037583583122, + "grad_norm": 1.2257671687651395, + "learning_rate": 8.356627204425893e-07, + "loss": 0.4293951392173767, + "step": 4988 + }, + { + "epoch": 1.150334332487895, + "grad_norm": 1.5415847348488914, + "learning_rate": 8.352866547005354e-07, + "loss": 0.3916272521018982, + "step": 4989 + }, + { + "epoch": 1.1505649066174775, + "grad_norm": 1.6777087516004896, + "learning_rate": 8.349106129012357e-07, + "loss": 0.40171611309051514, + "step": 4990 + }, + { + "epoch": 1.1507954807470602, + "grad_norm": 1.5767244212385862, + "learning_rate": 8.345345950993518e-07, + "loss": 0.49580252170562744, + "step": 4991 + }, + { + "epoch": 1.151026054876643, + "grad_norm": 1.491822308561489, + "learning_rate": 8.34158601349541e-07, + "loss": 0.4521256685256958, + "step": 4992 + }, + { + "epoch": 1.1512566290062254, + "grad_norm": 1.5317445246777317, + "learning_rate": 8.337826317064585e-07, + "loss": 0.3920813798904419, + "step": 4993 + }, + { + "epoch": 1.151487203135808, + "grad_norm": 1.4336055128806646, + "learning_rate": 8.334066862247547e-07, + "loss": 0.4263145923614502, + "step": 4994 + }, + { + "epoch": 1.1517177772653908, + "grad_norm": 1.513949850078891, + "learning_rate": 8.330307649590779e-07, + "loss": 0.4746140241622925, + "step": 4995 + }, + { + "epoch": 1.1519483513949735, + "grad_norm": 1.6708741885004843, + "learning_rate": 8.326548679640713e-07, + "loss": 0.37520158290863037, + "step": 4996 + }, + { + "epoch": 1.1521789255245563, + "grad_norm": 1.4060610690176367, + "learning_rate": 8.322789952943759e-07, + "loss": 0.4481951892375946, + "step": 4997 + }, + { + "epoch": 1.1524094996541387, + "grad_norm": 1.4336851088246751, + "learning_rate": 8.319031470046281e-07, + "loss": 0.40319859981536865, + "step": 4998 + }, + { + "epoch": 1.1526400737837215, + "grad_norm": 1.805948160607668, + "learning_rate": 8.315273231494615e-07, + "loss": 0.47720152139663696, + "step": 4999 + }, + { + "epoch": 1.1528706479133042, + "grad_norm": 1.2994404231083814, + "learning_rate": 8.311515237835063e-07, + "loss": 0.4027557969093323, + "step": 5000 + }, + { + "epoch": 1.1531012220428867, + "grad_norm": 1.5346692874582604, + "learning_rate": 8.307757489613878e-07, + "loss": 0.3939552307128906, + "step": 5001 + }, + { + "epoch": 1.1533317961724694, + "grad_norm": 1.541801101637957, + "learning_rate": 8.303999987377295e-07, + "loss": 0.379425585269928, + "step": 5002 + }, + { + "epoch": 1.153562370302052, + "grad_norm": 1.3222707927494204, + "learning_rate": 8.300242731671499e-07, + "loss": 0.46231499314308167, + "step": 5003 + }, + { + "epoch": 1.1537929444316348, + "grad_norm": 1.5623820882279815, + "learning_rate": 8.296485723042654e-07, + "loss": 0.4639621675014496, + "step": 5004 + }, + { + "epoch": 1.1540235185612175, + "grad_norm": 1.4577901713449948, + "learning_rate": 8.29272896203687e-07, + "loss": 0.49264025688171387, + "step": 5005 + }, + { + "epoch": 1.1542540926908, + "grad_norm": 1.2796677798690286, + "learning_rate": 8.288972449200233e-07, + "loss": 0.4145156145095825, + "step": 5006 + }, + { + "epoch": 1.1544846668203828, + "grad_norm": 1.3338594060824709, + "learning_rate": 8.285216185078792e-07, + "loss": 0.39693811535835266, + "step": 5007 + }, + { + "epoch": 1.1547152409499655, + "grad_norm": 1.356694069152444, + "learning_rate": 8.281460170218561e-07, + "loss": 0.46224820613861084, + "step": 5008 + }, + { + "epoch": 1.154945815079548, + "grad_norm": 1.5380330607680774, + "learning_rate": 8.277704405165506e-07, + "loss": 0.48868128657341003, + "step": 5009 + }, + { + "epoch": 1.1551763892091307, + "grad_norm": 1.4024811483349113, + "learning_rate": 8.273948890465574e-07, + "loss": 0.5127776265144348, + "step": 5010 + }, + { + "epoch": 1.1554069633387134, + "grad_norm": 1.4092381840768406, + "learning_rate": 8.270193626664665e-07, + "loss": 0.4039389491081238, + "step": 5011 + }, + { + "epoch": 1.1556375374682961, + "grad_norm": 1.5807780806971976, + "learning_rate": 8.266438614308641e-07, + "loss": 0.4224502444267273, + "step": 5012 + }, + { + "epoch": 1.1558681115978788, + "grad_norm": 1.42726619115002, + "learning_rate": 8.262683853943335e-07, + "loss": 0.4392918050289154, + "step": 5013 + }, + { + "epoch": 1.1560986857274613, + "grad_norm": 1.5001771531608157, + "learning_rate": 8.258929346114534e-07, + "loss": 0.5055289268493652, + "step": 5014 + }, + { + "epoch": 1.156329259857044, + "grad_norm": 1.3839083181087675, + "learning_rate": 8.255175091368003e-07, + "loss": 0.43851351737976074, + "step": 5015 + }, + { + "epoch": 1.1565598339866268, + "grad_norm": 1.576893376736649, + "learning_rate": 8.251421090249451e-07, + "loss": 0.4557814598083496, + "step": 5016 + }, + { + "epoch": 1.1567904081162093, + "grad_norm": 1.2994912796642604, + "learning_rate": 8.247667343304568e-07, + "loss": 0.4288882613182068, + "step": 5017 + }, + { + "epoch": 1.157020982245792, + "grad_norm": 1.4237104241903844, + "learning_rate": 8.243913851078994e-07, + "loss": 0.42711886763572693, + "step": 5018 + }, + { + "epoch": 1.1572515563753747, + "grad_norm": 1.8597293679946851, + "learning_rate": 8.240160614118342e-07, + "loss": 0.515809953212738, + "step": 5019 + }, + { + "epoch": 1.1574821305049574, + "grad_norm": 1.828777504717114, + "learning_rate": 8.236407632968182e-07, + "loss": 0.5754632949829102, + "step": 5020 + }, + { + "epoch": 1.1577127046345401, + "grad_norm": 1.553176542229762, + "learning_rate": 8.232654908174038e-07, + "loss": 0.4601830244064331, + "step": 5021 + }, + { + "epoch": 1.1579432787641226, + "grad_norm": 1.500802040492981, + "learning_rate": 8.228902440281422e-07, + "loss": 0.4740797281265259, + "step": 5022 + }, + { + "epoch": 1.1581738528937053, + "grad_norm": 1.688304974088827, + "learning_rate": 8.225150229835781e-07, + "loss": 0.4066367745399475, + "step": 5023 + }, + { + "epoch": 1.158404427023288, + "grad_norm": 1.357187761009418, + "learning_rate": 8.221398277382546e-07, + "loss": 0.4664362668991089, + "step": 5024 + }, + { + "epoch": 1.1586350011528705, + "grad_norm": 1.3912425171719864, + "learning_rate": 8.217646583467093e-07, + "loss": 0.5204637050628662, + "step": 5025 + }, + { + "epoch": 1.1588655752824533, + "grad_norm": 1.4227227145637968, + "learning_rate": 8.213895148634775e-07, + "loss": 0.4991419017314911, + "step": 5026 + }, + { + "epoch": 1.159096149412036, + "grad_norm": 1.2844880437163813, + "learning_rate": 8.210143973430896e-07, + "loss": 0.40420424938201904, + "step": 5027 + }, + { + "epoch": 1.1593267235416187, + "grad_norm": 1.4946107412544847, + "learning_rate": 8.206393058400736e-07, + "loss": 0.523331880569458, + "step": 5028 + }, + { + "epoch": 1.1595572976712014, + "grad_norm": 1.4908780499938201, + "learning_rate": 8.202642404089516e-07, + "loss": 0.5019216537475586, + "step": 5029 + }, + { + "epoch": 1.159787871800784, + "grad_norm": 1.6451488656605473, + "learning_rate": 8.198892011042442e-07, + "loss": 0.522672712802887, + "step": 5030 + }, + { + "epoch": 1.1600184459303666, + "grad_norm": 1.505727418733034, + "learning_rate": 8.195141879804668e-07, + "loss": 0.418377548456192, + "step": 5031 + }, + { + "epoch": 1.1602490200599493, + "grad_norm": 1.5635210393713965, + "learning_rate": 8.191392010921312e-07, + "loss": 0.4914432764053345, + "step": 5032 + }, + { + "epoch": 1.1604795941895318, + "grad_norm": 1.3929576184838368, + "learning_rate": 8.187642404937459e-07, + "loss": 0.42149683833122253, + "step": 5033 + }, + { + "epoch": 1.1607101683191146, + "grad_norm": 1.6811040317548793, + "learning_rate": 8.183893062398145e-07, + "loss": 0.5637058019638062, + "step": 5034 + }, + { + "epoch": 1.1609407424486973, + "grad_norm": 1.2252559322458123, + "learning_rate": 8.180143983848387e-07, + "loss": 0.49930211901664734, + "step": 5035 + }, + { + "epoch": 1.16117131657828, + "grad_norm": 1.626369547940987, + "learning_rate": 8.176395169833139e-07, + "loss": 0.4217071235179901, + "step": 5036 + }, + { + "epoch": 1.1614018907078625, + "grad_norm": 1.9654976691842632, + "learning_rate": 8.172646620897336e-07, + "loss": 0.4208733141422272, + "step": 5037 + }, + { + "epoch": 1.1616324648374452, + "grad_norm": 1.434216808832, + "learning_rate": 8.168898337585866e-07, + "loss": 0.42970529198646545, + "step": 5038 + }, + { + "epoch": 1.161863038967028, + "grad_norm": 1.429859410744686, + "learning_rate": 8.165150320443584e-07, + "loss": 0.49482622742652893, + "step": 5039 + }, + { + "epoch": 1.1620936130966106, + "grad_norm": 1.2888747437309156, + "learning_rate": 8.161402570015297e-07, + "loss": 0.4106384217739105, + "step": 5040 + }, + { + "epoch": 1.1623241872261931, + "grad_norm": 1.8632515092828725, + "learning_rate": 8.157655086845778e-07, + "loss": 0.4550397992134094, + "step": 5041 + }, + { + "epoch": 1.1625547613557758, + "grad_norm": 1.4636128502892785, + "learning_rate": 8.153907871479768e-07, + "loss": 0.5144504308700562, + "step": 5042 + }, + { + "epoch": 1.1627853354853586, + "grad_norm": 1.4308354935014596, + "learning_rate": 8.150160924461953e-07, + "loss": 0.3970009684562683, + "step": 5043 + }, + { + "epoch": 1.1630159096149413, + "grad_norm": 1.4674063038688332, + "learning_rate": 8.146414246336998e-07, + "loss": 0.45825856924057007, + "step": 5044 + }, + { + "epoch": 1.1632464837445238, + "grad_norm": 1.6850972190756333, + "learning_rate": 8.142667837649515e-07, + "loss": 0.4515247344970703, + "step": 5045 + }, + { + "epoch": 1.1634770578741065, + "grad_norm": 1.347770803032681, + "learning_rate": 8.13892169894409e-07, + "loss": 0.41265833377838135, + "step": 5046 + }, + { + "epoch": 1.1637076320036892, + "grad_norm": 1.4117996459358377, + "learning_rate": 8.135175830765254e-07, + "loss": 0.39820557832717896, + "step": 5047 + }, + { + "epoch": 1.163938206133272, + "grad_norm": 1.4272016239744356, + "learning_rate": 8.131430233657514e-07, + "loss": 0.41528987884521484, + "step": 5048 + }, + { + "epoch": 1.1641687802628544, + "grad_norm": 1.3404996701874776, + "learning_rate": 8.127684908165323e-07, + "loss": 0.4453636407852173, + "step": 5049 + }, + { + "epoch": 1.1643993543924371, + "grad_norm": 1.846029547761043, + "learning_rate": 8.123939854833107e-07, + "loss": 0.45008519291877747, + "step": 5050 + }, + { + "epoch": 1.1646299285220199, + "grad_norm": 1.7254544812081525, + "learning_rate": 8.120195074205249e-07, + "loss": 0.456550657749176, + "step": 5051 + }, + { + "epoch": 1.1648605026516026, + "grad_norm": 1.4455041595835194, + "learning_rate": 8.116450566826086e-07, + "loss": 0.44465887546539307, + "step": 5052 + }, + { + "epoch": 1.165091076781185, + "grad_norm": 1.4606872040412728, + "learning_rate": 8.112706333239923e-07, + "loss": 0.4769172668457031, + "step": 5053 + }, + { + "epoch": 1.1653216509107678, + "grad_norm": 1.5800176181940382, + "learning_rate": 8.108962373991019e-07, + "loss": 0.42662739753723145, + "step": 5054 + }, + { + "epoch": 1.1655522250403505, + "grad_norm": 1.533727299161298, + "learning_rate": 8.105218689623603e-07, + "loss": 0.4923250079154968, + "step": 5055 + }, + { + "epoch": 1.1657827991699332, + "grad_norm": 1.5783599756682145, + "learning_rate": 8.10147528068185e-07, + "loss": 0.42462587356567383, + "step": 5056 + }, + { + "epoch": 1.1660133732995157, + "grad_norm": 1.3458818448335859, + "learning_rate": 8.097732147709908e-07, + "loss": 0.47610223293304443, + "step": 5057 + }, + { + "epoch": 1.1662439474290984, + "grad_norm": 1.6207397386125497, + "learning_rate": 8.093989291251875e-07, + "loss": 0.47519630193710327, + "step": 5058 + }, + { + "epoch": 1.1664745215586811, + "grad_norm": 1.3901575117179885, + "learning_rate": 8.090246711851819e-07, + "loss": 0.38865840435028076, + "step": 5059 + }, + { + "epoch": 1.1667050956882639, + "grad_norm": 1.271312682478528, + "learning_rate": 8.086504410053757e-07, + "loss": 0.39990776777267456, + "step": 5060 + }, + { + "epoch": 1.1669356698178464, + "grad_norm": 1.4665951386644982, + "learning_rate": 8.082762386401669e-07, + "loss": 0.4330836534500122, + "step": 5061 + }, + { + "epoch": 1.167166243947429, + "grad_norm": 1.286707043518209, + "learning_rate": 8.079020641439504e-07, + "loss": 0.4285934865474701, + "step": 5062 + }, + { + "epoch": 1.1673968180770118, + "grad_norm": 1.7499199825760443, + "learning_rate": 8.075279175711152e-07, + "loss": 0.3900645077228546, + "step": 5063 + }, + { + "epoch": 1.1676273922065945, + "grad_norm": 1.3606445329404238, + "learning_rate": 8.07153798976048e-07, + "loss": 0.48145759105682373, + "step": 5064 + }, + { + "epoch": 1.167857966336177, + "grad_norm": 1.7592322949259351, + "learning_rate": 8.067797084131305e-07, + "loss": 0.4239045977592468, + "step": 5065 + }, + { + "epoch": 1.1680885404657597, + "grad_norm": 1.7501505795878665, + "learning_rate": 8.064056459367409e-07, + "loss": 0.55517578125, + "step": 5066 + }, + { + "epoch": 1.1683191145953424, + "grad_norm": 1.588400616006081, + "learning_rate": 8.060316116012524e-07, + "loss": 0.4956046938896179, + "step": 5067 + }, + { + "epoch": 1.1685496887249252, + "grad_norm": 1.3607022789051413, + "learning_rate": 8.05657605461035e-07, + "loss": 0.4051878750324249, + "step": 5068 + }, + { + "epoch": 1.1687802628545076, + "grad_norm": 1.6471264462607456, + "learning_rate": 8.052836275704541e-07, + "loss": 0.47389912605285645, + "step": 5069 + }, + { + "epoch": 1.1690108369840904, + "grad_norm": 1.3462872241997197, + "learning_rate": 8.049096779838717e-07, + "loss": 0.5023842453956604, + "step": 5070 + }, + { + "epoch": 1.169241411113673, + "grad_norm": 1.3943514778037218, + "learning_rate": 8.045357567556449e-07, + "loss": 0.4895137548446655, + "step": 5071 + }, + { + "epoch": 1.1694719852432558, + "grad_norm": 1.5328176046123796, + "learning_rate": 8.041618639401264e-07, + "loss": 0.47874224185943604, + "step": 5072 + }, + { + "epoch": 1.1697025593728383, + "grad_norm": 1.4666773972258982, + "learning_rate": 8.037879995916659e-07, + "loss": 0.4784395694732666, + "step": 5073 + }, + { + "epoch": 1.169933133502421, + "grad_norm": 1.4433652991816976, + "learning_rate": 8.034141637646079e-07, + "loss": 0.45289772748947144, + "step": 5074 + }, + { + "epoch": 1.1701637076320037, + "grad_norm": 1.931933746015264, + "learning_rate": 8.030403565132942e-07, + "loss": 0.5375204682350159, + "step": 5075 + }, + { + "epoch": 1.1703942817615864, + "grad_norm": 1.4956339972756536, + "learning_rate": 8.026665778920602e-07, + "loss": 0.45003899931907654, + "step": 5076 + }, + { + "epoch": 1.170624855891169, + "grad_norm": 1.348037979358877, + "learning_rate": 8.022928279552392e-07, + "loss": 0.4236389994621277, + "step": 5077 + }, + { + "epoch": 1.1708554300207517, + "grad_norm": 1.3333943245649609, + "learning_rate": 8.019191067571592e-07, + "loss": 0.43182557821273804, + "step": 5078 + }, + { + "epoch": 1.1710860041503344, + "grad_norm": 1.7521692166476222, + "learning_rate": 8.01545414352145e-07, + "loss": 0.5171953439712524, + "step": 5079 + }, + { + "epoch": 1.171316578279917, + "grad_norm": 1.5319548219026522, + "learning_rate": 8.011717507945157e-07, + "loss": 0.5084770321846008, + "step": 5080 + }, + { + "epoch": 1.1715471524094996, + "grad_norm": 1.6342595542262888, + "learning_rate": 8.007981161385876e-07, + "loss": 0.4685532748699188, + "step": 5081 + }, + { + "epoch": 1.1717777265390823, + "grad_norm": 1.5086552244362486, + "learning_rate": 8.004245104386724e-07, + "loss": 0.4647448658943176, + "step": 5082 + }, + { + "epoch": 1.172008300668665, + "grad_norm": 1.4914913702780284, + "learning_rate": 8.000509337490768e-07, + "loss": 0.4038098454475403, + "step": 5083 + }, + { + "epoch": 1.1722388747982477, + "grad_norm": 1.435384500623052, + "learning_rate": 7.996773861241047e-07, + "loss": 0.4153759479522705, + "step": 5084 + }, + { + "epoch": 1.1724694489278302, + "grad_norm": 1.5573715225755111, + "learning_rate": 7.993038676180545e-07, + "loss": 0.4569447636604309, + "step": 5085 + }, + { + "epoch": 1.172700023057413, + "grad_norm": 1.4307958679817, + "learning_rate": 7.989303782852215e-07, + "loss": 0.4419426918029785, + "step": 5086 + }, + { + "epoch": 1.1729305971869957, + "grad_norm": 1.4177391878017933, + "learning_rate": 7.985569181798955e-07, + "loss": 0.3902894854545593, + "step": 5087 + }, + { + "epoch": 1.1731611713165782, + "grad_norm": 1.3935681641299988, + "learning_rate": 7.981834873563631e-07, + "loss": 0.4066358208656311, + "step": 5088 + }, + { + "epoch": 1.1733917454461609, + "grad_norm": 1.579270038843054, + "learning_rate": 7.978100858689059e-07, + "loss": 0.4589639902114868, + "step": 5089 + }, + { + "epoch": 1.1736223195757436, + "grad_norm": 1.5868805646941586, + "learning_rate": 7.974367137718024e-07, + "loss": 0.4431188106536865, + "step": 5090 + }, + { + "epoch": 1.1738528937053263, + "grad_norm": 1.3420666663317198, + "learning_rate": 7.970633711193252e-07, + "loss": 0.43412742018699646, + "step": 5091 + }, + { + "epoch": 1.174083467834909, + "grad_norm": 1.360898150528172, + "learning_rate": 7.966900579657435e-07, + "loss": 0.40296387672424316, + "step": 5092 + }, + { + "epoch": 1.1743140419644915, + "grad_norm": 1.4702894316239854, + "learning_rate": 7.963167743653228e-07, + "loss": 0.4814741611480713, + "step": 5093 + }, + { + "epoch": 1.1745446160940742, + "grad_norm": 1.7678935112109417, + "learning_rate": 7.959435203723228e-07, + "loss": 0.4412423372268677, + "step": 5094 + }, + { + "epoch": 1.174775190223657, + "grad_norm": 1.698823813376211, + "learning_rate": 7.955702960410006e-07, + "loss": 0.49773266911506653, + "step": 5095 + }, + { + "epoch": 1.1750057643532394, + "grad_norm": 1.445996901779518, + "learning_rate": 7.951971014256073e-07, + "loss": 0.4657529592514038, + "step": 5096 + }, + { + "epoch": 1.1752363384828222, + "grad_norm": 1.4844953949134, + "learning_rate": 7.94823936580391e-07, + "loss": 0.4062782824039459, + "step": 5097 + }, + { + "epoch": 1.1754669126124049, + "grad_norm": 1.3280643963390701, + "learning_rate": 7.944508015595948e-07, + "loss": 0.4154980182647705, + "step": 5098 + }, + { + "epoch": 1.1756974867419876, + "grad_norm": 1.3235405382692107, + "learning_rate": 7.940776964174582e-07, + "loss": 0.4724680185317993, + "step": 5099 + }, + { + "epoch": 1.1759280608715703, + "grad_norm": 1.4212228031547876, + "learning_rate": 7.937046212082149e-07, + "loss": 0.48808538913726807, + "step": 5100 + }, + { + "epoch": 1.1761586350011528, + "grad_norm": 1.3949555418133748, + "learning_rate": 7.933315759860959e-07, + "loss": 0.4985845983028412, + "step": 5101 + }, + { + "epoch": 1.1763892091307355, + "grad_norm": 1.2192149824969183, + "learning_rate": 7.92958560805327e-07, + "loss": 0.3735587000846863, + "step": 5102 + }, + { + "epoch": 1.1766197832603182, + "grad_norm": 1.3793872147262238, + "learning_rate": 7.925855757201294e-07, + "loss": 0.4198414385318756, + "step": 5103 + }, + { + "epoch": 1.1768503573899007, + "grad_norm": 1.7231390796467927, + "learning_rate": 7.922126207847204e-07, + "loss": 0.41973787546157837, + "step": 5104 + }, + { + "epoch": 1.1770809315194835, + "grad_norm": 1.8258365265115961, + "learning_rate": 7.918396960533128e-07, + "loss": 0.5179545283317566, + "step": 5105 + }, + { + "epoch": 1.1773115056490662, + "grad_norm": 1.5757377934881964, + "learning_rate": 7.914668015801153e-07, + "loss": 0.4917227625846863, + "step": 5106 + }, + { + "epoch": 1.1775420797786489, + "grad_norm": 1.5132865673859617, + "learning_rate": 7.910939374193312e-07, + "loss": 0.41775548458099365, + "step": 5107 + }, + { + "epoch": 1.1777726539082316, + "grad_norm": 1.484971286444874, + "learning_rate": 7.907211036251608e-07, + "loss": 0.45468997955322266, + "step": 5108 + }, + { + "epoch": 1.178003228037814, + "grad_norm": 1.292166499414124, + "learning_rate": 7.903483002517988e-07, + "loss": 0.3749620318412781, + "step": 5109 + }, + { + "epoch": 1.1782338021673968, + "grad_norm": 1.3945828421286317, + "learning_rate": 7.899755273534365e-07, + "loss": 0.48940956592559814, + "step": 5110 + }, + { + "epoch": 1.1784643762969795, + "grad_norm": 1.3575927994558319, + "learning_rate": 7.896027849842594e-07, + "loss": 0.4561386704444885, + "step": 5111 + }, + { + "epoch": 1.178694950426562, + "grad_norm": 1.4968176209501343, + "learning_rate": 7.892300731984498e-07, + "loss": 0.441898375749588, + "step": 5112 + }, + { + "epoch": 1.1789255245561447, + "grad_norm": 1.7617220832230103, + "learning_rate": 7.888573920501856e-07, + "loss": 0.43445056676864624, + "step": 5113 + }, + { + "epoch": 1.1791560986857275, + "grad_norm": 1.4680500200302005, + "learning_rate": 7.884847415936389e-07, + "loss": 0.42653167247772217, + "step": 5114 + }, + { + "epoch": 1.1793866728153102, + "grad_norm": 1.3867120793190437, + "learning_rate": 7.881121218829787e-07, + "loss": 0.42003321647644043, + "step": 5115 + }, + { + "epoch": 1.179617246944893, + "grad_norm": 1.613544333660259, + "learning_rate": 7.87739532972369e-07, + "loss": 0.4920128881931305, + "step": 5116 + }, + { + "epoch": 1.1798478210744754, + "grad_norm": 1.430783098871577, + "learning_rate": 7.873669749159697e-07, + "loss": 0.49529707431793213, + "step": 5117 + }, + { + "epoch": 1.180078395204058, + "grad_norm": 1.4915607575501106, + "learning_rate": 7.869944477679351e-07, + "loss": 0.4813005328178406, + "step": 5118 + }, + { + "epoch": 1.1803089693336408, + "grad_norm": 1.4923304237688, + "learning_rate": 7.866219515824168e-07, + "loss": 0.47239556908607483, + "step": 5119 + }, + { + "epoch": 1.1805395434632233, + "grad_norm": 1.7203098580351979, + "learning_rate": 7.862494864135596e-07, + "loss": 0.4808405935764313, + "step": 5120 + }, + { + "epoch": 1.180770117592806, + "grad_norm": 1.5206410201181635, + "learning_rate": 7.858770523155066e-07, + "loss": 0.44946521520614624, + "step": 5121 + }, + { + "epoch": 1.1810006917223888, + "grad_norm": 1.8958199353441048, + "learning_rate": 7.85504649342394e-07, + "loss": 0.5344874858856201, + "step": 5122 + }, + { + "epoch": 1.1812312658519715, + "grad_norm": 1.729692211161555, + "learning_rate": 7.851322775483542e-07, + "loss": 0.49354079365730286, + "step": 5123 + }, + { + "epoch": 1.1814618399815542, + "grad_norm": 1.6407900723292905, + "learning_rate": 7.847599369875155e-07, + "loss": 0.414085328578949, + "step": 5124 + }, + { + "epoch": 1.1816924141111367, + "grad_norm": 1.51838750003237, + "learning_rate": 7.843876277140013e-07, + "loss": 0.4638150632381439, + "step": 5125 + }, + { + "epoch": 1.1819229882407194, + "grad_norm": 1.5309477954820934, + "learning_rate": 7.84015349781931e-07, + "loss": 0.39239877462387085, + "step": 5126 + }, + { + "epoch": 1.1821535623703021, + "grad_norm": 1.456140160914471, + "learning_rate": 7.83643103245418e-07, + "loss": 0.46846455335617065, + "step": 5127 + }, + { + "epoch": 1.1823841364998846, + "grad_norm": 1.7368044200229882, + "learning_rate": 7.832708881585729e-07, + "loss": 0.5257229804992676, + "step": 5128 + }, + { + "epoch": 1.1826147106294673, + "grad_norm": 1.246852967804398, + "learning_rate": 7.828987045755006e-07, + "loss": 0.3858698904514313, + "step": 5129 + }, + { + "epoch": 1.18284528475905, + "grad_norm": 1.526790126487461, + "learning_rate": 7.82526552550302e-07, + "loss": 0.48664575815200806, + "step": 5130 + }, + { + "epoch": 1.1830758588886328, + "grad_norm": 1.4370667079865387, + "learning_rate": 7.821544321370731e-07, + "loss": 0.5246836543083191, + "step": 5131 + }, + { + "epoch": 1.1833064330182155, + "grad_norm": 1.6695741670894575, + "learning_rate": 7.817823433899049e-07, + "loss": 0.5538516640663147, + "step": 5132 + }, + { + "epoch": 1.183537007147798, + "grad_norm": 1.5154692060299837, + "learning_rate": 7.814102863628852e-07, + "loss": 0.4563618302345276, + "step": 5133 + }, + { + "epoch": 1.1837675812773807, + "grad_norm": 1.6013623117191365, + "learning_rate": 7.810382611100952e-07, + "loss": 0.48093757033348083, + "step": 5134 + }, + { + "epoch": 1.1839981554069634, + "grad_norm": 1.4079128694512013, + "learning_rate": 7.806662676856133e-07, + "loss": 0.41152772307395935, + "step": 5135 + }, + { + "epoch": 1.184228729536546, + "grad_norm": 1.470828934761741, + "learning_rate": 7.802943061435121e-07, + "loss": 0.4429926574230194, + "step": 5136 + }, + { + "epoch": 1.1844593036661286, + "grad_norm": 1.6844871985058756, + "learning_rate": 7.799223765378604e-07, + "loss": 0.5795058012008667, + "step": 5137 + }, + { + "epoch": 1.1846898777957113, + "grad_norm": 1.3964078038325152, + "learning_rate": 7.795504789227214e-07, + "loss": 0.43219637870788574, + "step": 5138 + }, + { + "epoch": 1.184920451925294, + "grad_norm": 1.3120429368988666, + "learning_rate": 7.791786133521547e-07, + "loss": 0.472915917634964, + "step": 5139 + }, + { + "epoch": 1.1851510260548768, + "grad_norm": 1.8547533260703066, + "learning_rate": 7.788067798802144e-07, + "loss": 0.609251081943512, + "step": 5140 + }, + { + "epoch": 1.1853816001844593, + "grad_norm": 1.5647854614729606, + "learning_rate": 7.784349785609506e-07, + "loss": 0.5051882266998291, + "step": 5141 + }, + { + "epoch": 1.185612174314042, + "grad_norm": 1.8256847598733492, + "learning_rate": 7.780632094484081e-07, + "loss": 0.5062044858932495, + "step": 5142 + }, + { + "epoch": 1.1858427484436247, + "grad_norm": 1.6792228276022907, + "learning_rate": 7.77691472596627e-07, + "loss": 0.48717936873435974, + "step": 5143 + }, + { + "epoch": 1.1860733225732072, + "grad_norm": 1.4962691739334948, + "learning_rate": 7.773197680596439e-07, + "loss": 0.4755759537220001, + "step": 5144 + }, + { + "epoch": 1.18630389670279, + "grad_norm": 1.5701944534084074, + "learning_rate": 7.769480958914889e-07, + "loss": 0.4549487829208374, + "step": 5145 + }, + { + "epoch": 1.1865344708323726, + "grad_norm": 1.3416043214582947, + "learning_rate": 7.765764561461891e-07, + "loss": 0.39759546518325806, + "step": 5146 + }, + { + "epoch": 1.1867650449619553, + "grad_norm": 1.7321999626139561, + "learning_rate": 7.762048488777654e-07, + "loss": 0.5151915550231934, + "step": 5147 + }, + { + "epoch": 1.1869956190915378, + "grad_norm": 1.739537041268416, + "learning_rate": 7.758332741402351e-07, + "loss": 0.4555166959762573, + "step": 5148 + }, + { + "epoch": 1.1872261932211206, + "grad_norm": 1.246823148309275, + "learning_rate": 7.754617319876102e-07, + "loss": 0.3639993667602539, + "step": 5149 + }, + { + "epoch": 1.1874567673507033, + "grad_norm": 1.4228626603425891, + "learning_rate": 7.750902224738984e-07, + "loss": 0.4158916473388672, + "step": 5150 + }, + { + "epoch": 1.187687341480286, + "grad_norm": 1.5159845507016538, + "learning_rate": 7.747187456531021e-07, + "loss": 0.44933754205703735, + "step": 5151 + }, + { + "epoch": 1.1879179156098685, + "grad_norm": 1.1574431418082898, + "learning_rate": 7.74347301579219e-07, + "loss": 0.35436397790908813, + "step": 5152 + }, + { + "epoch": 1.1881484897394512, + "grad_norm": 1.7559371420298944, + "learning_rate": 7.73975890306243e-07, + "loss": 0.40650928020477295, + "step": 5153 + }, + { + "epoch": 1.188379063869034, + "grad_norm": 1.655955114095899, + "learning_rate": 7.736045118881615e-07, + "loss": 0.424211710691452, + "step": 5154 + }, + { + "epoch": 1.1886096379986166, + "grad_norm": 1.386370427214692, + "learning_rate": 7.73233166378959e-07, + "loss": 0.38909512758255005, + "step": 5155 + }, + { + "epoch": 1.1888402121281991, + "grad_norm": 1.6273556393891413, + "learning_rate": 7.728618538326139e-07, + "loss": 0.4452083110809326, + "step": 5156 + }, + { + "epoch": 1.1890707862577818, + "grad_norm": 1.7325341862894768, + "learning_rate": 7.724905743031005e-07, + "loss": 0.45061540603637695, + "step": 5157 + }, + { + "epoch": 1.1893013603873646, + "grad_norm": 1.875195364158454, + "learning_rate": 7.721193278443875e-07, + "loss": 0.5301374197006226, + "step": 5158 + }, + { + "epoch": 1.1895319345169473, + "grad_norm": 1.32653936253781, + "learning_rate": 7.717481145104398e-07, + "loss": 0.4386521577835083, + "step": 5159 + }, + { + "epoch": 1.1897625086465298, + "grad_norm": 1.5893013583646332, + "learning_rate": 7.713769343552169e-07, + "loss": 0.447623074054718, + "step": 5160 + }, + { + "epoch": 1.1899930827761125, + "grad_norm": 1.4757184491338362, + "learning_rate": 7.71005787432674e-07, + "loss": 0.44326454401016235, + "step": 5161 + }, + { + "epoch": 1.1902236569056952, + "grad_norm": 1.4868394681814385, + "learning_rate": 7.706346737967603e-07, + "loss": 0.564007043838501, + "step": 5162 + }, + { + "epoch": 1.190454231035278, + "grad_norm": 1.4497565739191507, + "learning_rate": 7.702635935014213e-07, + "loss": 0.5338540077209473, + "step": 5163 + }, + { + "epoch": 1.1906848051648604, + "grad_norm": 1.5430964424900424, + "learning_rate": 7.698925466005977e-07, + "loss": 0.45307862758636475, + "step": 5164 + }, + { + "epoch": 1.1909153792944431, + "grad_norm": 1.4703583168080245, + "learning_rate": 7.69521533148224e-07, + "loss": 0.5383142232894897, + "step": 5165 + }, + { + "epoch": 1.1911459534240258, + "grad_norm": 1.46357622305891, + "learning_rate": 7.691505531982316e-07, + "loss": 0.3794770836830139, + "step": 5166 + }, + { + "epoch": 1.1913765275536086, + "grad_norm": 1.73725405615964, + "learning_rate": 7.687796068045455e-07, + "loss": 0.4633198082447052, + "step": 5167 + }, + { + "epoch": 1.191607101683191, + "grad_norm": 1.4824242158713679, + "learning_rate": 7.684086940210875e-07, + "loss": 0.5080294609069824, + "step": 5168 + }, + { + "epoch": 1.1918376758127738, + "grad_norm": 1.4742940614632714, + "learning_rate": 7.680378149017724e-07, + "loss": 0.3952289819717407, + "step": 5169 + }, + { + "epoch": 1.1920682499423565, + "grad_norm": 1.6284523488523228, + "learning_rate": 7.676669695005122e-07, + "loss": 0.4518551528453827, + "step": 5170 + }, + { + "epoch": 1.1922988240719392, + "grad_norm": 1.3915500318606786, + "learning_rate": 7.672961578712125e-07, + "loss": 0.4752943515777588, + "step": 5171 + }, + { + "epoch": 1.1925293982015217, + "grad_norm": 1.4424968675316805, + "learning_rate": 7.669253800677744e-07, + "loss": 0.5059680342674255, + "step": 5172 + }, + { + "epoch": 1.1927599723311044, + "grad_norm": 1.4513506332822887, + "learning_rate": 7.665546361440949e-07, + "loss": 0.47073960304260254, + "step": 5173 + }, + { + "epoch": 1.1929905464606871, + "grad_norm": 1.6974826094600077, + "learning_rate": 7.661839261540644e-07, + "loss": 0.5851496458053589, + "step": 5174 + }, + { + "epoch": 1.1932211205902699, + "grad_norm": 1.4255244135326766, + "learning_rate": 7.658132501515701e-07, + "loss": 0.44255387783050537, + "step": 5175 + }, + { + "epoch": 1.1934516947198524, + "grad_norm": 1.7360033352973823, + "learning_rate": 7.654426081904931e-07, + "loss": 0.543785810470581, + "step": 5176 + }, + { + "epoch": 1.193682268849435, + "grad_norm": 1.697289945139709, + "learning_rate": 7.650720003247107e-07, + "loss": 0.503501296043396, + "step": 5177 + }, + { + "epoch": 1.1939128429790178, + "grad_norm": 1.6448034142146566, + "learning_rate": 7.647014266080935e-07, + "loss": 0.43894368410110474, + "step": 5178 + }, + { + "epoch": 1.1941434171086005, + "grad_norm": 1.9780925681836061, + "learning_rate": 7.643308870945088e-07, + "loss": 0.5014036297798157, + "step": 5179 + }, + { + "epoch": 1.194373991238183, + "grad_norm": 1.3813934145743847, + "learning_rate": 7.639603818378178e-07, + "loss": 0.4859309196472168, + "step": 5180 + }, + { + "epoch": 1.1946045653677657, + "grad_norm": 1.611175852060371, + "learning_rate": 7.635899108918781e-07, + "loss": 0.40631920099258423, + "step": 5181 + }, + { + "epoch": 1.1948351394973484, + "grad_norm": 1.923584573200039, + "learning_rate": 7.632194743105405e-07, + "loss": 0.5206565856933594, + "step": 5182 + }, + { + "epoch": 1.1950657136269311, + "grad_norm": 1.659582338573284, + "learning_rate": 7.628490721476517e-07, + "loss": 0.5052351355552673, + "step": 5183 + }, + { + "epoch": 1.1952962877565136, + "grad_norm": 1.3967739180573415, + "learning_rate": 7.624787044570543e-07, + "loss": 0.4921465516090393, + "step": 5184 + }, + { + "epoch": 1.1955268618860964, + "grad_norm": 1.2706689377506823, + "learning_rate": 7.621083712925839e-07, + "loss": 0.3307859003543854, + "step": 5185 + }, + { + "epoch": 1.195757436015679, + "grad_norm": 1.5942715812711645, + "learning_rate": 7.617380727080728e-07, + "loss": 0.4276743531227112, + "step": 5186 + }, + { + "epoch": 1.1959880101452618, + "grad_norm": 1.434739100338101, + "learning_rate": 7.613678087573475e-07, + "loss": 0.5065702795982361, + "step": 5187 + }, + { + "epoch": 1.1962185842748443, + "grad_norm": 1.2918886211693255, + "learning_rate": 7.609975794942301e-07, + "loss": 0.3588709533214569, + "step": 5188 + }, + { + "epoch": 1.196449158404427, + "grad_norm": 1.4907134183008088, + "learning_rate": 7.606273849725362e-07, + "loss": 0.4296506941318512, + "step": 5189 + }, + { + "epoch": 1.1966797325340097, + "grad_norm": 1.5501182036176049, + "learning_rate": 7.602572252460782e-07, + "loss": 0.517792820930481, + "step": 5190 + }, + { + "epoch": 1.1969103066635924, + "grad_norm": 1.6883448687359832, + "learning_rate": 7.598871003686619e-07, + "loss": 0.38939881324768066, + "step": 5191 + }, + { + "epoch": 1.197140880793175, + "grad_norm": 1.5288548185908284, + "learning_rate": 7.595170103940896e-07, + "loss": 0.5759290456771851, + "step": 5192 + }, + { + "epoch": 1.1973714549227576, + "grad_norm": 1.975229876516129, + "learning_rate": 7.591469553761569e-07, + "loss": 0.4705851078033447, + "step": 5193 + }, + { + "epoch": 1.1976020290523404, + "grad_norm": 1.4820736709912923, + "learning_rate": 7.587769353686548e-07, + "loss": 0.5137619972229004, + "step": 5194 + }, + { + "epoch": 1.197832603181923, + "grad_norm": 1.426346211238444, + "learning_rate": 7.584069504253701e-07, + "loss": 0.43207496404647827, + "step": 5195 + }, + { + "epoch": 1.1980631773115056, + "grad_norm": 1.7446559629267169, + "learning_rate": 7.580370006000835e-07, + "loss": 0.3976139426231384, + "step": 5196 + }, + { + "epoch": 1.1982937514410883, + "grad_norm": 1.3117053560833851, + "learning_rate": 7.576670859465715e-07, + "loss": 0.41323673725128174, + "step": 5197 + }, + { + "epoch": 1.198524325570671, + "grad_norm": 1.5110343718270132, + "learning_rate": 7.57297206518604e-07, + "loss": 0.404024600982666, + "step": 5198 + }, + { + "epoch": 1.1987548997002535, + "grad_norm": 1.3684281900258655, + "learning_rate": 7.569273623699475e-07, + "loss": 0.4010540843009949, + "step": 5199 + }, + { + "epoch": 1.1989854738298362, + "grad_norm": 1.5739020793077496, + "learning_rate": 7.565575535543623e-07, + "loss": 0.44299256801605225, + "step": 5200 + }, + { + "epoch": 1.199216047959419, + "grad_norm": 1.5204166282494558, + "learning_rate": 7.561877801256041e-07, + "loss": 0.5217546820640564, + "step": 5201 + }, + { + "epoch": 1.1994466220890017, + "grad_norm": 1.868873770331591, + "learning_rate": 7.558180421374229e-07, + "loss": 0.5192688703536987, + "step": 5202 + }, + { + "epoch": 1.1996771962185844, + "grad_norm": 1.5743910950617057, + "learning_rate": 7.554483396435637e-07, + "loss": 0.38272884488105774, + "step": 5203 + }, + { + "epoch": 1.1999077703481669, + "grad_norm": 1.4246723536184043, + "learning_rate": 7.550786726977673e-07, + "loss": 0.474464476108551, + "step": 5204 + }, + { + "epoch": 1.2001383444777496, + "grad_norm": 1.6360159300410695, + "learning_rate": 7.547090413537676e-07, + "loss": 0.540134072303772, + "step": 5205 + }, + { + "epoch": 1.2003689186073323, + "grad_norm": 1.4752644193711169, + "learning_rate": 7.543394456652948e-07, + "loss": 0.4662882089614868, + "step": 5206 + }, + { + "epoch": 1.2005994927369148, + "grad_norm": 1.6858064119472538, + "learning_rate": 7.539698856860732e-07, + "loss": 0.440970778465271, + "step": 5207 + }, + { + "epoch": 1.2008300668664975, + "grad_norm": 1.3786365004169476, + "learning_rate": 7.536003614698225e-07, + "loss": 0.41787397861480713, + "step": 5208 + }, + { + "epoch": 1.2010606409960802, + "grad_norm": 1.4726677497641942, + "learning_rate": 7.532308730702561e-07, + "loss": 0.5503408908843994, + "step": 5209 + }, + { + "epoch": 1.201291215125663, + "grad_norm": 1.4739960164302617, + "learning_rate": 7.528614205410833e-07, + "loss": 0.43713903427124023, + "step": 5210 + }, + { + "epoch": 1.2015217892552457, + "grad_norm": 1.5362481289460599, + "learning_rate": 7.524920039360076e-07, + "loss": 0.4145667552947998, + "step": 5211 + }, + { + "epoch": 1.2017523633848282, + "grad_norm": 1.4800845890771783, + "learning_rate": 7.521226233087279e-07, + "loss": 0.4307587146759033, + "step": 5212 + }, + { + "epoch": 1.2019829375144109, + "grad_norm": 1.436182742461266, + "learning_rate": 7.517532787129369e-07, + "loss": 0.43784570693969727, + "step": 5213 + }, + { + "epoch": 1.2022135116439936, + "grad_norm": 1.3395031095564736, + "learning_rate": 7.513839702023226e-07, + "loss": 0.40003830194473267, + "step": 5214 + }, + { + "epoch": 1.202444085773576, + "grad_norm": 1.4786298792735793, + "learning_rate": 7.510146978305682e-07, + "loss": 0.4880738854408264, + "step": 5215 + }, + { + "epoch": 1.2026746599031588, + "grad_norm": 1.31895753202322, + "learning_rate": 7.506454616513505e-07, + "loss": 0.39548349380493164, + "step": 5216 + }, + { + "epoch": 1.2029052340327415, + "grad_norm": 1.5189592384869435, + "learning_rate": 7.502762617183425e-07, + "loss": 0.4060090184211731, + "step": 5217 + }, + { + "epoch": 1.2031358081623242, + "grad_norm": 1.6902238907281657, + "learning_rate": 7.499070980852101e-07, + "loss": 0.44657808542251587, + "step": 5218 + }, + { + "epoch": 1.203366382291907, + "grad_norm": 1.553015362629627, + "learning_rate": 7.495379708056161e-07, + "loss": 0.5283595323562622, + "step": 5219 + }, + { + "epoch": 1.2035969564214895, + "grad_norm": 1.5940858647104894, + "learning_rate": 7.49168879933216e-07, + "loss": 0.4424205422401428, + "step": 5220 + }, + { + "epoch": 1.2038275305510722, + "grad_norm": 1.4929497446465205, + "learning_rate": 7.487998255216619e-07, + "loss": 0.4998319745063782, + "step": 5221 + }, + { + "epoch": 1.2040581046806549, + "grad_norm": 1.3437939609448373, + "learning_rate": 7.484308076245987e-07, + "loss": 0.3821876645088196, + "step": 5222 + }, + { + "epoch": 1.2042886788102374, + "grad_norm": 1.4227177114495277, + "learning_rate": 7.480618262956669e-07, + "loss": 0.4567919373512268, + "step": 5223 + }, + { + "epoch": 1.20451925293982, + "grad_norm": 1.4207326358395804, + "learning_rate": 7.476928815885026e-07, + "loss": 0.4561428427696228, + "step": 5224 + }, + { + "epoch": 1.2047498270694028, + "grad_norm": 1.5720016799439587, + "learning_rate": 7.473239735567344e-07, + "loss": 0.4384823739528656, + "step": 5225 + }, + { + "epoch": 1.2049804011989855, + "grad_norm": 1.518914607229236, + "learning_rate": 7.469551022539877e-07, + "loss": 0.42840123176574707, + "step": 5226 + }, + { + "epoch": 1.2052109753285682, + "grad_norm": 1.4031825092609558, + "learning_rate": 7.465862677338812e-07, + "loss": 0.39553213119506836, + "step": 5227 + }, + { + "epoch": 1.2054415494581507, + "grad_norm": 1.521464998921144, + "learning_rate": 7.462174700500295e-07, + "loss": 0.4325043559074402, + "step": 5228 + }, + { + "epoch": 1.2056721235877335, + "grad_norm": 1.7451009485961195, + "learning_rate": 7.4584870925604e-07, + "loss": 0.5004623532295227, + "step": 5229 + }, + { + "epoch": 1.2059026977173162, + "grad_norm": 1.6975060246760258, + "learning_rate": 7.454799854055165e-07, + "loss": 0.42296791076660156, + "step": 5230 + }, + { + "epoch": 1.2061332718468987, + "grad_norm": 1.7859122255595659, + "learning_rate": 7.451112985520565e-07, + "loss": 0.45638370513916016, + "step": 5231 + }, + { + "epoch": 1.2063638459764814, + "grad_norm": 1.9018837416313183, + "learning_rate": 7.447426487492528e-07, + "loss": 0.5134493112564087, + "step": 5232 + }, + { + "epoch": 1.206594420106064, + "grad_norm": 1.382989024686568, + "learning_rate": 7.443740360506918e-07, + "loss": 0.4132578372955322, + "step": 5233 + }, + { + "epoch": 1.2068249942356468, + "grad_norm": 1.321784021070878, + "learning_rate": 7.440054605099552e-07, + "loss": 0.4363224506378174, + "step": 5234 + }, + { + "epoch": 1.2070555683652295, + "grad_norm": 1.4395608486144074, + "learning_rate": 7.4363692218062e-07, + "loss": 0.44970041513442993, + "step": 5235 + }, + { + "epoch": 1.207286142494812, + "grad_norm": 1.3219627332758312, + "learning_rate": 7.432684211162556e-07, + "loss": 0.39787235856056213, + "step": 5236 + }, + { + "epoch": 1.2075167166243947, + "grad_norm": 1.694639970069785, + "learning_rate": 7.428999573704284e-07, + "loss": 0.46057572960853577, + "step": 5237 + }, + { + "epoch": 1.2077472907539775, + "grad_norm": 1.3954230269661139, + "learning_rate": 7.42531530996698e-07, + "loss": 0.46754559874534607, + "step": 5238 + }, + { + "epoch": 1.20797786488356, + "grad_norm": 1.4060087118514482, + "learning_rate": 7.42163142048619e-07, + "loss": 0.5072697401046753, + "step": 5239 + }, + { + "epoch": 1.2082084390131427, + "grad_norm": 1.5355585762921151, + "learning_rate": 7.417947905797403e-07, + "loss": 0.4691959023475647, + "step": 5240 + }, + { + "epoch": 1.2084390131427254, + "grad_norm": 1.4596733170422231, + "learning_rate": 7.414264766436056e-07, + "loss": 0.43248072266578674, + "step": 5241 + }, + { + "epoch": 1.208669587272308, + "grad_norm": 1.8386458599943265, + "learning_rate": 7.410582002937534e-07, + "loss": 0.4748457968235016, + "step": 5242 + }, + { + "epoch": 1.2089001614018908, + "grad_norm": 1.413498638420547, + "learning_rate": 7.406899615837157e-07, + "loss": 0.4682820439338684, + "step": 5243 + }, + { + "epoch": 1.2091307355314733, + "grad_norm": 1.3788557575990639, + "learning_rate": 7.403217605670205e-07, + "loss": 0.41747021675109863, + "step": 5244 + }, + { + "epoch": 1.209361309661056, + "grad_norm": 1.5523861247321795, + "learning_rate": 7.399535972971886e-07, + "loss": 0.4968727231025696, + "step": 5245 + }, + { + "epoch": 1.2095918837906388, + "grad_norm": 1.6255626899279143, + "learning_rate": 7.395854718277372e-07, + "loss": 0.486778199672699, + "step": 5246 + }, + { + "epoch": 1.2098224579202213, + "grad_norm": 1.938770231002498, + "learning_rate": 7.392173842121765e-07, + "loss": 0.5153725147247314, + "step": 5247 + }, + { + "epoch": 1.210053032049804, + "grad_norm": 1.6258479412197122, + "learning_rate": 7.388493345040123e-07, + "loss": 0.42352354526519775, + "step": 5248 + }, + { + "epoch": 1.2102836061793867, + "grad_norm": 1.477454043811349, + "learning_rate": 7.384813227567437e-07, + "loss": 0.363994300365448, + "step": 5249 + }, + { + "epoch": 1.2105141803089694, + "grad_norm": 1.3450193947115454, + "learning_rate": 7.381133490238654e-07, + "loss": 0.44195863604545593, + "step": 5250 + }, + { + "epoch": 1.2107447544385521, + "grad_norm": 1.6510262733932026, + "learning_rate": 7.377454133588657e-07, + "loss": 0.5031026601791382, + "step": 5251 + }, + { + "epoch": 1.2109753285681346, + "grad_norm": 1.1126223170422647, + "learning_rate": 7.373775158152284e-07, + "loss": 0.3900304436683655, + "step": 5252 + }, + { + "epoch": 1.2112059026977173, + "grad_norm": 1.4718461813811798, + "learning_rate": 7.370096564464308e-07, + "loss": 0.406912624835968, + "step": 5253 + }, + { + "epoch": 1.2114364768273, + "grad_norm": 1.2742945351379469, + "learning_rate": 7.366418353059445e-07, + "loss": 0.407238632440567, + "step": 5254 + }, + { + "epoch": 1.2116670509568825, + "grad_norm": 2.3145771276343625, + "learning_rate": 7.36274052447237e-07, + "loss": 0.5605549216270447, + "step": 5255 + }, + { + "epoch": 1.2118976250864653, + "grad_norm": 1.7547311772877803, + "learning_rate": 7.359063079237684e-07, + "loss": 0.5016111731529236, + "step": 5256 + }, + { + "epoch": 1.212128199216048, + "grad_norm": 1.31999939383151, + "learning_rate": 7.355386017889946e-07, + "loss": 0.38812315464019775, + "step": 5257 + }, + { + "epoch": 1.2123587733456307, + "grad_norm": 1.5177330463551633, + "learning_rate": 7.35170934096365e-07, + "loss": 0.46022963523864746, + "step": 5258 + }, + { + "epoch": 1.2125893474752132, + "grad_norm": 1.4118628857930515, + "learning_rate": 7.348033048993246e-07, + "loss": 0.40029624104499817, + "step": 5259 + }, + { + "epoch": 1.212819921604796, + "grad_norm": 1.4051430521275825, + "learning_rate": 7.344357142513111e-07, + "loss": 0.4331943392753601, + "step": 5260 + }, + { + "epoch": 1.2130504957343786, + "grad_norm": 1.565074125850335, + "learning_rate": 7.340681622057582e-07, + "loss": 0.43757596611976624, + "step": 5261 + }, + { + "epoch": 1.2132810698639613, + "grad_norm": 1.7743971563599887, + "learning_rate": 7.337006488160931e-07, + "loss": 0.49733203649520874, + "step": 5262 + }, + { + "epoch": 1.2135116439935438, + "grad_norm": 1.341577967095045, + "learning_rate": 7.333331741357373e-07, + "loss": 0.35552018880844116, + "step": 5263 + }, + { + "epoch": 1.2137422181231265, + "grad_norm": 1.6321675762702066, + "learning_rate": 7.329657382181074e-07, + "loss": 0.4102798104286194, + "step": 5264 + }, + { + "epoch": 1.2139727922527093, + "grad_norm": 1.4184297160567871, + "learning_rate": 7.325983411166136e-07, + "loss": 0.4517349600791931, + "step": 5265 + }, + { + "epoch": 1.214203366382292, + "grad_norm": 1.6427775893660324, + "learning_rate": 7.322309828846613e-07, + "loss": 0.48924458026885986, + "step": 5266 + }, + { + "epoch": 1.2144339405118745, + "grad_norm": 1.4030974508932201, + "learning_rate": 7.31863663575649e-07, + "loss": 0.38971561193466187, + "step": 5267 + }, + { + "epoch": 1.2146645146414572, + "grad_norm": 1.6155044970268224, + "learning_rate": 7.31496383242971e-07, + "loss": 0.6503559350967407, + "step": 5268 + }, + { + "epoch": 1.21489508877104, + "grad_norm": 1.6905359606856467, + "learning_rate": 7.311291419400146e-07, + "loss": 0.4615272879600525, + "step": 5269 + }, + { + "epoch": 1.2151256629006226, + "grad_norm": 1.6629441467357413, + "learning_rate": 7.307619397201625e-07, + "loss": 0.3793429732322693, + "step": 5270 + }, + { + "epoch": 1.2153562370302051, + "grad_norm": 1.3076578533376795, + "learning_rate": 7.303947766367909e-07, + "loss": 0.48186585307121277, + "step": 5271 + }, + { + "epoch": 1.2155868111597878, + "grad_norm": 1.4243590091370186, + "learning_rate": 7.300276527432713e-07, + "loss": 0.4051778018474579, + "step": 5272 + }, + { + "epoch": 1.2158173852893706, + "grad_norm": 1.6820510248806995, + "learning_rate": 7.296605680929684e-07, + "loss": 0.43364250659942627, + "step": 5273 + }, + { + "epoch": 1.2160479594189533, + "grad_norm": 1.6130796939421093, + "learning_rate": 7.292935227392414e-07, + "loss": 0.4893898367881775, + "step": 5274 + }, + { + "epoch": 1.2162785335485358, + "grad_norm": 1.240780138685616, + "learning_rate": 7.289265167354448e-07, + "loss": 0.43125462532043457, + "step": 5275 + }, + { + "epoch": 1.2165091076781185, + "grad_norm": 1.6108443522760163, + "learning_rate": 7.285595501349258e-07, + "loss": 0.4086509943008423, + "step": 5276 + }, + { + "epoch": 1.2167396818077012, + "grad_norm": 1.838256686394942, + "learning_rate": 7.281926229910274e-07, + "loss": 0.5176471471786499, + "step": 5277 + }, + { + "epoch": 1.216970255937284, + "grad_norm": 1.8145364687667531, + "learning_rate": 7.278257353570857e-07, + "loss": 0.4783210754394531, + "step": 5278 + }, + { + "epoch": 1.2172008300668664, + "grad_norm": 1.5012148176529632, + "learning_rate": 7.274588872864322e-07, + "loss": 0.4847145080566406, + "step": 5279 + }, + { + "epoch": 1.2174314041964491, + "grad_norm": 1.4076947828029491, + "learning_rate": 7.270920788323911e-07, + "loss": 0.4691849946975708, + "step": 5280 + }, + { + "epoch": 1.2176619783260318, + "grad_norm": 1.8729494542899485, + "learning_rate": 7.267253100482824e-07, + "loss": 0.5755687952041626, + "step": 5281 + }, + { + "epoch": 1.2178925524556146, + "grad_norm": 1.3639853941099451, + "learning_rate": 7.263585809874193e-07, + "loss": 0.42995721101760864, + "step": 5282 + }, + { + "epoch": 1.218123126585197, + "grad_norm": 1.4560966669318844, + "learning_rate": 7.259918917031101e-07, + "loss": 0.501590371131897, + "step": 5283 + }, + { + "epoch": 1.2183537007147798, + "grad_norm": 1.5326641731074693, + "learning_rate": 7.256252422486563e-07, + "loss": 0.5499469041824341, + "step": 5284 + }, + { + "epoch": 1.2185842748443625, + "grad_norm": 1.7075536366613502, + "learning_rate": 7.25258632677354e-07, + "loss": 0.4567297399044037, + "step": 5285 + }, + { + "epoch": 1.2188148489739452, + "grad_norm": 1.3251311548344207, + "learning_rate": 7.248920630424942e-07, + "loss": 0.4046020805835724, + "step": 5286 + }, + { + "epoch": 1.2190454231035277, + "grad_norm": 1.4721989927884918, + "learning_rate": 7.245255333973608e-07, + "loss": 0.3534840941429138, + "step": 5287 + }, + { + "epoch": 1.2192759972331104, + "grad_norm": 1.4151850401024268, + "learning_rate": 7.241590437952331e-07, + "loss": 0.45795637369155884, + "step": 5288 + }, + { + "epoch": 1.2195065713626931, + "grad_norm": 1.4921564176260302, + "learning_rate": 7.237925942893839e-07, + "loss": 0.3984150290489197, + "step": 5289 + }, + { + "epoch": 1.2197371454922759, + "grad_norm": 1.5617581917582364, + "learning_rate": 7.234261849330807e-07, + "loss": 0.46833336353302, + "step": 5290 + }, + { + "epoch": 1.2199677196218583, + "grad_norm": 1.6200691445613622, + "learning_rate": 7.230598157795842e-07, + "loss": 0.5395709276199341, + "step": 5291 + }, + { + "epoch": 1.220198293751441, + "grad_norm": 1.300141768975315, + "learning_rate": 7.226934868821505e-07, + "loss": 0.4556152820587158, + "step": 5292 + }, + { + "epoch": 1.2204288678810238, + "grad_norm": 1.5916352600329198, + "learning_rate": 7.223271982940287e-07, + "loss": 0.49564266204833984, + "step": 5293 + }, + { + "epoch": 1.2206594420106065, + "grad_norm": 1.5492667362910795, + "learning_rate": 7.219609500684625e-07, + "loss": 0.5389127731323242, + "step": 5294 + }, + { + "epoch": 1.220890016140189, + "grad_norm": 1.3125997254034645, + "learning_rate": 7.215947422586905e-07, + "loss": 0.48815661668777466, + "step": 5295 + }, + { + "epoch": 1.2211205902697717, + "grad_norm": 1.6576709424363434, + "learning_rate": 7.21228574917944e-07, + "loss": 0.4204339385032654, + "step": 5296 + }, + { + "epoch": 1.2213511643993544, + "grad_norm": 1.2807688149232648, + "learning_rate": 7.208624480994494e-07, + "loss": 0.39993199706077576, + "step": 5297 + }, + { + "epoch": 1.2215817385289371, + "grad_norm": 1.7420778835945019, + "learning_rate": 7.204963618564268e-07, + "loss": 0.5679433941841125, + "step": 5298 + }, + { + "epoch": 1.2218123126585196, + "grad_norm": 1.819503614929131, + "learning_rate": 7.201303162420913e-07, + "loss": 0.46620815992355347, + "step": 5299 + }, + { + "epoch": 1.2220428867881024, + "grad_norm": 1.4667553556365653, + "learning_rate": 7.1976431130965e-07, + "loss": 0.44684547185897827, + "step": 5300 + }, + { + "epoch": 1.222273460917685, + "grad_norm": 1.6182813529173974, + "learning_rate": 7.193983471123066e-07, + "loss": 0.4518858790397644, + "step": 5301 + }, + { + "epoch": 1.2225040350472678, + "grad_norm": 1.497058969625444, + "learning_rate": 7.190324237032569e-07, + "loss": 0.3966304659843445, + "step": 5302 + }, + { + "epoch": 1.2227346091768503, + "grad_norm": 1.7688402904846452, + "learning_rate": 7.186665411356925e-07, + "loss": 0.5541782379150391, + "step": 5303 + }, + { + "epoch": 1.222965183306433, + "grad_norm": 1.5748150394963076, + "learning_rate": 7.183006994627972e-07, + "loss": 0.3986799120903015, + "step": 5304 + }, + { + "epoch": 1.2231957574360157, + "grad_norm": 1.3179167901427211, + "learning_rate": 7.1793489873775e-07, + "loss": 0.485867977142334, + "step": 5305 + }, + { + "epoch": 1.2234263315655984, + "grad_norm": 1.6264368495030206, + "learning_rate": 7.175691390137244e-07, + "loss": 0.40187692642211914, + "step": 5306 + }, + { + "epoch": 1.223656905695181, + "grad_norm": 1.5085798270078894, + "learning_rate": 7.172034203438864e-07, + "loss": 0.4679393172264099, + "step": 5307 + }, + { + "epoch": 1.2238874798247636, + "grad_norm": 1.3178949369734356, + "learning_rate": 7.168377427813974e-07, + "loss": 0.512301504611969, + "step": 5308 + }, + { + "epoch": 1.2241180539543464, + "grad_norm": 1.4684075358167812, + "learning_rate": 7.164721063794122e-07, + "loss": 0.5340646505355835, + "step": 5309 + }, + { + "epoch": 1.224348628083929, + "grad_norm": 1.6528941936609833, + "learning_rate": 7.1610651119108e-07, + "loss": 0.4757506847381592, + "step": 5310 + }, + { + "epoch": 1.2245792022135116, + "grad_norm": 1.5982652868975813, + "learning_rate": 7.157409572695434e-07, + "loss": 0.5697519779205322, + "step": 5311 + }, + { + "epoch": 1.2248097763430943, + "grad_norm": 1.4427165421847559, + "learning_rate": 7.153754446679395e-07, + "loss": 0.47521811723709106, + "step": 5312 + }, + { + "epoch": 1.225040350472677, + "grad_norm": 1.4092560589123113, + "learning_rate": 7.150099734393997e-07, + "loss": 0.40484973788261414, + "step": 5313 + }, + { + "epoch": 1.2252709246022597, + "grad_norm": 1.4095470452598946, + "learning_rate": 7.146445436370481e-07, + "loss": 0.4465969204902649, + "step": 5314 + }, + { + "epoch": 1.2255014987318422, + "grad_norm": 1.5543895211488108, + "learning_rate": 7.142791553140044e-07, + "loss": 0.44878089427948, + "step": 5315 + }, + { + "epoch": 1.225732072861425, + "grad_norm": 1.657847170962442, + "learning_rate": 7.139138085233809e-07, + "loss": 0.5049536228179932, + "step": 5316 + }, + { + "epoch": 1.2259626469910077, + "grad_norm": 1.377588971885486, + "learning_rate": 7.135485033182847e-07, + "loss": 0.42945951223373413, + "step": 5317 + }, + { + "epoch": 1.2261932211205901, + "grad_norm": 1.607627236207016, + "learning_rate": 7.131832397518167e-07, + "loss": 0.4668564200401306, + "step": 5318 + }, + { + "epoch": 1.2264237952501729, + "grad_norm": 1.640684584420395, + "learning_rate": 7.128180178770718e-07, + "loss": 0.4691551625728607, + "step": 5319 + }, + { + "epoch": 1.2266543693797556, + "grad_norm": 1.4653351758865718, + "learning_rate": 7.124528377471382e-07, + "loss": 0.4306211769580841, + "step": 5320 + }, + { + "epoch": 1.2268849435093383, + "grad_norm": 1.7130888177954928, + "learning_rate": 7.120876994150991e-07, + "loss": 0.4986322522163391, + "step": 5321 + }, + { + "epoch": 1.227115517638921, + "grad_norm": 1.4775997138779564, + "learning_rate": 7.117226029340304e-07, + "loss": 0.4058566093444824, + "step": 5322 + }, + { + "epoch": 1.2273460917685035, + "grad_norm": 1.3729187298835452, + "learning_rate": 7.113575483570036e-07, + "loss": 0.390174925327301, + "step": 5323 + }, + { + "epoch": 1.2275766658980862, + "grad_norm": 1.3070483816242904, + "learning_rate": 7.109925357370821e-07, + "loss": 0.38822996616363525, + "step": 5324 + }, + { + "epoch": 1.227807240027669, + "grad_norm": 1.3599088173875424, + "learning_rate": 7.106275651273244e-07, + "loss": 0.47792741656303406, + "step": 5325 + }, + { + "epoch": 1.2280378141572514, + "grad_norm": 1.52666177684785, + "learning_rate": 7.102626365807833e-07, + "loss": 0.5332789421081543, + "step": 5326 + }, + { + "epoch": 1.2282683882868342, + "grad_norm": 1.4337525635961101, + "learning_rate": 7.098977501505036e-07, + "loss": 0.5325096845626831, + "step": 5327 + }, + { + "epoch": 1.2284989624164169, + "grad_norm": 1.6185088994304762, + "learning_rate": 7.095329058895267e-07, + "loss": 0.4184231162071228, + "step": 5328 + }, + { + "epoch": 1.2287295365459996, + "grad_norm": 1.7570013482364435, + "learning_rate": 7.091681038508852e-07, + "loss": 0.43037641048431396, + "step": 5329 + }, + { + "epoch": 1.2289601106755823, + "grad_norm": 1.5067774692843796, + "learning_rate": 7.088033440876078e-07, + "loss": 0.4466821551322937, + "step": 5330 + }, + { + "epoch": 1.2291906848051648, + "grad_norm": 1.5083021571464743, + "learning_rate": 7.084386266527151e-07, + "loss": 0.35853004455566406, + "step": 5331 + }, + { + "epoch": 1.2294212589347475, + "grad_norm": 1.542402337323393, + "learning_rate": 7.080739515992231e-07, + "loss": 0.44986268877983093, + "step": 5332 + }, + { + "epoch": 1.2296518330643302, + "grad_norm": 1.7104999289185845, + "learning_rate": 7.07709318980141e-07, + "loss": 0.3563602566719055, + "step": 5333 + }, + { + "epoch": 1.2298824071939127, + "grad_norm": 1.5401970805558025, + "learning_rate": 7.073447288484715e-07, + "loss": 0.4505435824394226, + "step": 5334 + }, + { + "epoch": 1.2301129813234954, + "grad_norm": 1.3508208021904817, + "learning_rate": 7.069801812572116e-07, + "loss": 0.4477807283401489, + "step": 5335 + }, + { + "epoch": 1.2303435554530782, + "grad_norm": 1.5084663891676386, + "learning_rate": 7.066156762593518e-07, + "loss": 0.4470565915107727, + "step": 5336 + }, + { + "epoch": 1.2305741295826609, + "grad_norm": 1.4627780913359043, + "learning_rate": 7.062512139078773e-07, + "loss": 0.4236464500427246, + "step": 5337 + }, + { + "epoch": 1.2308047037122436, + "grad_norm": 1.3002436810863733, + "learning_rate": 7.058867942557655e-07, + "loss": 0.3221476376056671, + "step": 5338 + }, + { + "epoch": 1.231035277841826, + "grad_norm": 1.818660153327524, + "learning_rate": 7.055224173559891e-07, + "loss": 0.502305269241333, + "step": 5339 + }, + { + "epoch": 1.2312658519714088, + "grad_norm": 1.655814956644188, + "learning_rate": 7.051580832615136e-07, + "loss": 0.5121853351593018, + "step": 5340 + }, + { + "epoch": 1.2314964261009915, + "grad_norm": 1.713071870874518, + "learning_rate": 7.047937920252991e-07, + "loss": 0.5468438863754272, + "step": 5341 + }, + { + "epoch": 1.231727000230574, + "grad_norm": 1.2030374980808431, + "learning_rate": 7.044295437002985e-07, + "loss": 0.5026402473449707, + "step": 5342 + }, + { + "epoch": 1.2319575743601567, + "grad_norm": 1.9445671085046203, + "learning_rate": 7.040653383394596e-07, + "loss": 0.5205342173576355, + "step": 5343 + }, + { + "epoch": 1.2321881484897395, + "grad_norm": 1.5970504229179872, + "learning_rate": 7.037011759957228e-07, + "loss": 0.5184727311134338, + "step": 5344 + }, + { + "epoch": 1.2324187226193222, + "grad_norm": 1.3779493729990695, + "learning_rate": 7.033370567220227e-07, + "loss": 0.414316862821579, + "step": 5345 + }, + { + "epoch": 1.2326492967489049, + "grad_norm": 1.4260441300832385, + "learning_rate": 7.029729805712885e-07, + "loss": 0.42133980989456177, + "step": 5346 + }, + { + "epoch": 1.2328798708784874, + "grad_norm": 1.8139584962445312, + "learning_rate": 7.026089475964414e-07, + "loss": 0.4888553321361542, + "step": 5347 + }, + { + "epoch": 1.23311044500807, + "grad_norm": 1.3419182130591616, + "learning_rate": 7.022449578503979e-07, + "loss": 0.4702431857585907, + "step": 5348 + }, + { + "epoch": 1.2333410191376528, + "grad_norm": 1.7237576970327266, + "learning_rate": 7.018810113860672e-07, + "loss": 0.5312628746032715, + "step": 5349 + }, + { + "epoch": 1.2335715932672353, + "grad_norm": 1.3183810824607851, + "learning_rate": 7.015171082563533e-07, + "loss": 0.5297777056694031, + "step": 5350 + }, + { + "epoch": 1.233802167396818, + "grad_norm": 1.4423147751678271, + "learning_rate": 7.011532485141524e-07, + "loss": 0.5172504782676697, + "step": 5351 + }, + { + "epoch": 1.2340327415264007, + "grad_norm": 1.4663357988839691, + "learning_rate": 7.007894322123556e-07, + "loss": 0.4288995862007141, + "step": 5352 + }, + { + "epoch": 1.2342633156559835, + "grad_norm": 1.373863251988179, + "learning_rate": 7.004256594038475e-07, + "loss": 0.4194108247756958, + "step": 5353 + }, + { + "epoch": 1.2344938897855662, + "grad_norm": 1.6567765897983155, + "learning_rate": 7.000619301415056e-07, + "loss": 0.48825979232788086, + "step": 5354 + }, + { + "epoch": 1.2347244639151487, + "grad_norm": 1.5674749005570563, + "learning_rate": 6.99698244478202e-07, + "loss": 0.4721163213253021, + "step": 5355 + }, + { + "epoch": 1.2349550380447314, + "grad_norm": 1.4292932334311201, + "learning_rate": 6.993346024668019e-07, + "loss": 0.5104520916938782, + "step": 5356 + }, + { + "epoch": 1.235185612174314, + "grad_norm": 1.757397862406759, + "learning_rate": 6.98971004160165e-07, + "loss": 0.5257378816604614, + "step": 5357 + }, + { + "epoch": 1.2354161863038966, + "grad_norm": 1.5756368498047397, + "learning_rate": 6.986074496111429e-07, + "loss": 0.5624911785125732, + "step": 5358 + }, + { + "epoch": 1.2356467604334793, + "grad_norm": 1.4832170020848512, + "learning_rate": 6.982439388725828e-07, + "loss": 0.5186502933502197, + "step": 5359 + }, + { + "epoch": 1.235877334563062, + "grad_norm": 1.4333093290057806, + "learning_rate": 6.978804719973241e-07, + "loss": 0.42711856961250305, + "step": 5360 + }, + { + "epoch": 1.2361079086926448, + "grad_norm": 1.5710112274218073, + "learning_rate": 6.975170490382013e-07, + "loss": 0.525848388671875, + "step": 5361 + }, + { + "epoch": 1.2363384828222275, + "grad_norm": 1.475742371846223, + "learning_rate": 6.971536700480405e-07, + "loss": 0.41279107332229614, + "step": 5362 + }, + { + "epoch": 1.23656905695181, + "grad_norm": 1.381610773190275, + "learning_rate": 6.967903350796632e-07, + "loss": 0.38868075609207153, + "step": 5363 + }, + { + "epoch": 1.2367996310813927, + "grad_norm": 1.2852056850014901, + "learning_rate": 6.964270441858837e-07, + "loss": 0.41875284910202026, + "step": 5364 + }, + { + "epoch": 1.2370302052109754, + "grad_norm": 1.6506819982730945, + "learning_rate": 6.960637974195096e-07, + "loss": 0.4754808843135834, + "step": 5365 + }, + { + "epoch": 1.237260779340558, + "grad_norm": 1.367170455716087, + "learning_rate": 6.957005948333434e-07, + "loss": 0.5073249340057373, + "step": 5366 + }, + { + "epoch": 1.2374913534701406, + "grad_norm": 1.4682970250918908, + "learning_rate": 6.953374364801792e-07, + "loss": 0.4545915126800537, + "step": 5367 + }, + { + "epoch": 1.2377219275997233, + "grad_norm": 1.4664699450973697, + "learning_rate": 6.949743224128064e-07, + "loss": 0.42797422409057617, + "step": 5368 + }, + { + "epoch": 1.237952501729306, + "grad_norm": 1.7409270878989862, + "learning_rate": 6.946112526840071e-07, + "loss": 0.570556104183197, + "step": 5369 + }, + { + "epoch": 1.2381830758588885, + "grad_norm": 1.21807525986395, + "learning_rate": 6.942482273465577e-07, + "loss": 0.3866136074066162, + "step": 5370 + }, + { + "epoch": 1.2384136499884713, + "grad_norm": 1.385922338157159, + "learning_rate": 6.938852464532267e-07, + "loss": 0.3716529309749603, + "step": 5371 + }, + { + "epoch": 1.238644224118054, + "grad_norm": 1.5756601150848535, + "learning_rate": 6.935223100567776e-07, + "loss": 0.4781096577644348, + "step": 5372 + }, + { + "epoch": 1.2388747982476367, + "grad_norm": 1.5023911555765588, + "learning_rate": 6.931594182099671e-07, + "loss": 0.4262877106666565, + "step": 5373 + }, + { + "epoch": 1.2391053723772192, + "grad_norm": 1.6023295142223875, + "learning_rate": 6.927965709655444e-07, + "loss": 0.49859267473220825, + "step": 5374 + }, + { + "epoch": 1.239335946506802, + "grad_norm": 1.8550612096678925, + "learning_rate": 6.924337683762539e-07, + "loss": 0.4710119664669037, + "step": 5375 + }, + { + "epoch": 1.2395665206363846, + "grad_norm": 1.518585467890365, + "learning_rate": 6.92071010494832e-07, + "loss": 0.4974974989891052, + "step": 5376 + }, + { + "epoch": 1.2397970947659673, + "grad_norm": 2.029509938602293, + "learning_rate": 6.917082973740098e-07, + "loss": 0.4118514657020569, + "step": 5377 + }, + { + "epoch": 1.2400276688955498, + "grad_norm": 1.391922482329176, + "learning_rate": 6.913456290665106e-07, + "loss": 0.4223165214061737, + "step": 5378 + }, + { + "epoch": 1.2402582430251325, + "grad_norm": 1.5760276199817416, + "learning_rate": 6.909830056250526e-07, + "loss": 0.4896865487098694, + "step": 5379 + }, + { + "epoch": 1.2404888171547153, + "grad_norm": 1.35318854532684, + "learning_rate": 6.906204271023463e-07, + "loss": 0.36112266778945923, + "step": 5380 + }, + { + "epoch": 1.240719391284298, + "grad_norm": 1.4255868593911465, + "learning_rate": 6.902578935510969e-07, + "loss": 0.4665502905845642, + "step": 5381 + }, + { + "epoch": 1.2409499654138805, + "grad_norm": 1.6036447338223971, + "learning_rate": 6.898954050240013e-07, + "loss": 0.46059858798980713, + "step": 5382 + }, + { + "epoch": 1.2411805395434632, + "grad_norm": 1.4844055015741944, + "learning_rate": 6.895329615737515e-07, + "loss": 0.46149420738220215, + "step": 5383 + }, + { + "epoch": 1.241411113673046, + "grad_norm": 1.5602784439666317, + "learning_rate": 6.891705632530327e-07, + "loss": 0.42226743698120117, + "step": 5384 + }, + { + "epoch": 1.2416416878026286, + "grad_norm": 1.4308699177023212, + "learning_rate": 6.88808210114522e-07, + "loss": 0.45789939165115356, + "step": 5385 + }, + { + "epoch": 1.2418722619322111, + "grad_norm": 1.5754200685163184, + "learning_rate": 6.884459022108922e-07, + "loss": 0.44569891691207886, + "step": 5386 + }, + { + "epoch": 1.2421028360617938, + "grad_norm": 1.4099412845136035, + "learning_rate": 6.880836395948078e-07, + "loss": 0.3971112370491028, + "step": 5387 + }, + { + "epoch": 1.2423334101913766, + "grad_norm": 1.6636550459216706, + "learning_rate": 6.877214223189278e-07, + "loss": 0.46052566170692444, + "step": 5388 + }, + { + "epoch": 1.2425639843209593, + "grad_norm": 1.2735689149473257, + "learning_rate": 6.873592504359037e-07, + "loss": 0.42730599641799927, + "step": 5389 + }, + { + "epoch": 1.2427945584505418, + "grad_norm": 1.5806143555224212, + "learning_rate": 6.869971239983814e-07, + "loss": 0.4391734004020691, + "step": 5390 + }, + { + "epoch": 1.2430251325801245, + "grad_norm": 1.5314248582389964, + "learning_rate": 6.866350430589989e-07, + "loss": 0.4523593485355377, + "step": 5391 + }, + { + "epoch": 1.2432557067097072, + "grad_norm": 1.587550694342246, + "learning_rate": 6.86273007670389e-07, + "loss": 0.5398315787315369, + "step": 5392 + }, + { + "epoch": 1.24348628083929, + "grad_norm": 1.2298139407771986, + "learning_rate": 6.859110178851767e-07, + "loss": 0.40480807423591614, + "step": 5393 + }, + { + "epoch": 1.2437168549688724, + "grad_norm": 1.4233815325100456, + "learning_rate": 6.855490737559816e-07, + "loss": 0.42483675479888916, + "step": 5394 + }, + { + "epoch": 1.2439474290984551, + "grad_norm": 1.611497963721617, + "learning_rate": 6.851871753354153e-07, + "loss": 0.39951619505882263, + "step": 5395 + }, + { + "epoch": 1.2441780032280378, + "grad_norm": 1.5084898015563448, + "learning_rate": 6.848253226760833e-07, + "loss": 0.48650771379470825, + "step": 5396 + }, + { + "epoch": 1.2444085773576206, + "grad_norm": 1.5899141960647352, + "learning_rate": 6.844635158305853e-07, + "loss": 0.5377830266952515, + "step": 5397 + }, + { + "epoch": 1.244639151487203, + "grad_norm": 1.667763606347776, + "learning_rate": 6.841017548515127e-07, + "loss": 0.4365614950656891, + "step": 5398 + }, + { + "epoch": 1.2448697256167858, + "grad_norm": 1.2560105349082187, + "learning_rate": 6.837400397914519e-07, + "loss": 0.39739400148391724, + "step": 5399 + }, + { + "epoch": 1.2451002997463685, + "grad_norm": 1.3287360038901976, + "learning_rate": 6.833783707029812e-07, + "loss": 0.4005683660507202, + "step": 5400 + }, + { + "epoch": 1.2453308738759512, + "grad_norm": 1.6646043641444999, + "learning_rate": 6.830167476386737e-07, + "loss": 0.5635108351707458, + "step": 5401 + }, + { + "epoch": 1.2455614480055337, + "grad_norm": 1.6642180514990483, + "learning_rate": 6.82655170651094e-07, + "loss": 0.4332388639450073, + "step": 5402 + }, + { + "epoch": 1.2457920221351164, + "grad_norm": 1.525164084943155, + "learning_rate": 6.822936397928015e-07, + "loss": 0.47506433725357056, + "step": 5403 + }, + { + "epoch": 1.2460225962646991, + "grad_norm": 1.600563207739989, + "learning_rate": 6.819321551163486e-07, + "loss": 0.5081777572631836, + "step": 5404 + }, + { + "epoch": 1.2462531703942819, + "grad_norm": 1.6650056699718765, + "learning_rate": 6.815707166742801e-07, + "loss": 0.4038957953453064, + "step": 5405 + }, + { + "epoch": 1.2464837445238643, + "grad_norm": 1.759676797230376, + "learning_rate": 6.812093245191354e-07, + "loss": 0.4665706753730774, + "step": 5406 + }, + { + "epoch": 1.246714318653447, + "grad_norm": 1.8957165771048585, + "learning_rate": 6.808479787034459e-07, + "loss": 0.45610785484313965, + "step": 5407 + }, + { + "epoch": 1.2469448927830298, + "grad_norm": 1.443572019443965, + "learning_rate": 6.804866792797377e-07, + "loss": 0.4334958493709564, + "step": 5408 + }, + { + "epoch": 1.2471754669126125, + "grad_norm": 1.4719822396111175, + "learning_rate": 6.801254263005283e-07, + "loss": 0.5505996942520142, + "step": 5409 + }, + { + "epoch": 1.247406041042195, + "grad_norm": 1.5261896109132582, + "learning_rate": 6.797642198183303e-07, + "loss": 0.5589424967765808, + "step": 5410 + }, + { + "epoch": 1.2476366151717777, + "grad_norm": 1.892082521677576, + "learning_rate": 6.794030598856483e-07, + "loss": 0.48142847418785095, + "step": 5411 + }, + { + "epoch": 1.2478671893013604, + "grad_norm": 1.6606812394072976, + "learning_rate": 6.790419465549811e-07, + "loss": 0.5549830198287964, + "step": 5412 + }, + { + "epoch": 1.2480977634309431, + "grad_norm": 1.6097248774465256, + "learning_rate": 6.786808798788193e-07, + "loss": 0.5974072217941284, + "step": 5413 + }, + { + "epoch": 1.2483283375605256, + "grad_norm": 1.3333137403479542, + "learning_rate": 6.783198599096484e-07, + "loss": 0.38189029693603516, + "step": 5414 + }, + { + "epoch": 1.2485589116901084, + "grad_norm": 1.4543286006354934, + "learning_rate": 6.779588866999459e-07, + "loss": 0.41150039434432983, + "step": 5415 + }, + { + "epoch": 1.248789485819691, + "grad_norm": 1.451215833026304, + "learning_rate": 6.775979603021828e-07, + "loss": 0.4291636645793915, + "step": 5416 + }, + { + "epoch": 1.2490200599492738, + "grad_norm": 1.2798211834451962, + "learning_rate": 6.772370807688242e-07, + "loss": 0.45324140787124634, + "step": 5417 + }, + { + "epoch": 1.2492506340788563, + "grad_norm": 1.3895968147090427, + "learning_rate": 6.768762481523262e-07, + "loss": 0.4748731851577759, + "step": 5418 + }, + { + "epoch": 1.249481208208439, + "grad_norm": 1.618628812481624, + "learning_rate": 6.765154625051408e-07, + "loss": 0.43602505326271057, + "step": 5419 + }, + { + "epoch": 1.2497117823380217, + "grad_norm": 1.4027608933739075, + "learning_rate": 6.761547238797112e-07, + "loss": 0.49135684967041016, + "step": 5420 + }, + { + "epoch": 1.2499423564676044, + "grad_norm": 1.6315360373382408, + "learning_rate": 6.757940323284747e-07, + "loss": 0.47508272528648376, + "step": 5421 + }, + { + "epoch": 1.250172930597187, + "grad_norm": 1.612865868213556, + "learning_rate": 6.754333879038611e-07, + "loss": 0.399259090423584, + "step": 5422 + }, + { + "epoch": 1.2504035047267696, + "grad_norm": 1.6878741312884291, + "learning_rate": 6.750727906582941e-07, + "loss": 0.426364004611969, + "step": 5423 + }, + { + "epoch": 1.2506340788563524, + "grad_norm": 1.4584807010931917, + "learning_rate": 6.747122406441903e-07, + "loss": 0.4641951322555542, + "step": 5424 + }, + { + "epoch": 1.250864652985935, + "grad_norm": 1.3880451781756755, + "learning_rate": 6.743517379139585e-07, + "loss": 0.35008323192596436, + "step": 5425 + }, + { + "epoch": 1.2510952271155176, + "grad_norm": 1.4485633708895984, + "learning_rate": 6.739912825200022e-07, + "loss": 0.49627771973609924, + "step": 5426 + }, + { + "epoch": 1.2513258012451003, + "grad_norm": 1.628398042874366, + "learning_rate": 6.736308745147168e-07, + "loss": 0.4926851987838745, + "step": 5427 + }, + { + "epoch": 1.251556375374683, + "grad_norm": 1.622960147434406, + "learning_rate": 6.732705139504917e-07, + "loss": 0.44777536392211914, + "step": 5428 + }, + { + "epoch": 1.2517869495042655, + "grad_norm": 1.6523545202218224, + "learning_rate": 6.729102008797085e-07, + "loss": 0.39160430431365967, + "step": 5429 + }, + { + "epoch": 1.2520175236338482, + "grad_norm": 1.5184849781676724, + "learning_rate": 6.725499353547426e-07, + "loss": 0.4585273861885071, + "step": 5430 + }, + { + "epoch": 1.252248097763431, + "grad_norm": 1.5327675196324342, + "learning_rate": 6.721897174279621e-07, + "loss": 0.5245224237442017, + "step": 5431 + }, + { + "epoch": 1.2524786718930137, + "grad_norm": 1.5257069000403813, + "learning_rate": 6.718295471517288e-07, + "loss": 0.4217349886894226, + "step": 5432 + }, + { + "epoch": 1.2527092460225964, + "grad_norm": 1.4826939266004133, + "learning_rate": 6.714694245783963e-07, + "loss": 0.4944193661212921, + "step": 5433 + }, + { + "epoch": 1.2529398201521789, + "grad_norm": 1.387839760206308, + "learning_rate": 6.711093497603127e-07, + "loss": 0.5058057904243469, + "step": 5434 + }, + { + "epoch": 1.2531703942817616, + "grad_norm": 1.381621888753065, + "learning_rate": 6.707493227498186e-07, + "loss": 0.45669037103652954, + "step": 5435 + }, + { + "epoch": 1.2534009684113443, + "grad_norm": 1.5997486257834712, + "learning_rate": 6.703893435992469e-07, + "loss": 0.4248945116996765, + "step": 5436 + }, + { + "epoch": 1.2536315425409268, + "grad_norm": 1.6056111266165571, + "learning_rate": 6.700294123609249e-07, + "loss": 0.3984343707561493, + "step": 5437 + }, + { + "epoch": 1.2538621166705095, + "grad_norm": 1.5349078061254786, + "learning_rate": 6.696695290871715e-07, + "loss": 0.435299813747406, + "step": 5438 + }, + { + "epoch": 1.2540926908000922, + "grad_norm": 1.6277363060500583, + "learning_rate": 6.693096938303002e-07, + "loss": 0.4225304126739502, + "step": 5439 + }, + { + "epoch": 1.254323264929675, + "grad_norm": 1.6495416759002697, + "learning_rate": 6.689499066426161e-07, + "loss": 0.4686669111251831, + "step": 5440 + }, + { + "epoch": 1.2545538390592577, + "grad_norm": 1.5168957851404996, + "learning_rate": 6.685901675764186e-07, + "loss": 0.45163553953170776, + "step": 5441 + }, + { + "epoch": 1.2547844131888402, + "grad_norm": 1.3593822737620262, + "learning_rate": 6.682304766839986e-07, + "loss": 0.44223567843437195, + "step": 5442 + }, + { + "epoch": 1.2550149873184229, + "grad_norm": 1.5363469724843986, + "learning_rate": 6.678708340176413e-07, + "loss": 0.4008648991584778, + "step": 5443 + }, + { + "epoch": 1.2552455614480056, + "grad_norm": 1.4199248627467993, + "learning_rate": 6.675112396296245e-07, + "loss": 0.4500792324542999, + "step": 5444 + }, + { + "epoch": 1.255476135577588, + "grad_norm": 1.490145734356762, + "learning_rate": 6.671516935722183e-07, + "loss": 0.42558690905570984, + "step": 5445 + }, + { + "epoch": 1.2557067097071708, + "grad_norm": 1.7098682543926618, + "learning_rate": 6.667921958976871e-07, + "loss": 0.4676043391227722, + "step": 5446 + }, + { + "epoch": 1.2559372838367535, + "grad_norm": 1.8041492407407758, + "learning_rate": 6.664327466582869e-07, + "loss": 0.44114184379577637, + "step": 5447 + }, + { + "epoch": 1.2561678579663362, + "grad_norm": 1.6102069805165957, + "learning_rate": 6.660733459062679e-07, + "loss": 0.33865463733673096, + "step": 5448 + }, + { + "epoch": 1.256398432095919, + "grad_norm": 1.8619975614063338, + "learning_rate": 6.65713993693872e-07, + "loss": 0.5397414565086365, + "step": 5449 + }, + { + "epoch": 1.2566290062255014, + "grad_norm": 1.4730562973077854, + "learning_rate": 6.653546900733352e-07, + "loss": 0.49249517917633057, + "step": 5450 + }, + { + "epoch": 1.2568595803550842, + "grad_norm": 1.5757041605280757, + "learning_rate": 6.649954350968855e-07, + "loss": 0.5438433885574341, + "step": 5451 + }, + { + "epoch": 1.2570901544846669, + "grad_norm": 1.4727448576353426, + "learning_rate": 6.646362288167448e-07, + "loss": 0.43725037574768066, + "step": 5452 + }, + { + "epoch": 1.2573207286142494, + "grad_norm": 1.5159104216766552, + "learning_rate": 6.642770712851269e-07, + "loss": 0.5369226336479187, + "step": 5453 + }, + { + "epoch": 1.257551302743832, + "grad_norm": 1.4915531986930697, + "learning_rate": 6.63917962554239e-07, + "loss": 0.45022842288017273, + "step": 5454 + }, + { + "epoch": 1.2577818768734148, + "grad_norm": 1.6219974371712227, + "learning_rate": 6.635589026762818e-07, + "loss": 0.42483362555503845, + "step": 5455 + }, + { + "epoch": 1.2580124510029975, + "grad_norm": 1.4115832140490556, + "learning_rate": 6.631998917034474e-07, + "loss": 0.4909497797489166, + "step": 5456 + }, + { + "epoch": 1.2582430251325802, + "grad_norm": 1.3159817254483799, + "learning_rate": 6.628409296879223e-07, + "loss": 0.4927433431148529, + "step": 5457 + }, + { + "epoch": 1.2584735992621627, + "grad_norm": 1.550356576361105, + "learning_rate": 6.624820166818847e-07, + "loss": 0.4452761113643646, + "step": 5458 + }, + { + "epoch": 1.2587041733917455, + "grad_norm": 1.5683413746620685, + "learning_rate": 6.62123152737507e-07, + "loss": 0.4637982249259949, + "step": 5459 + }, + { + "epoch": 1.2589347475213282, + "grad_norm": 1.3293268937895057, + "learning_rate": 6.617643379069532e-07, + "loss": 0.3189438581466675, + "step": 5460 + }, + { + "epoch": 1.2591653216509107, + "grad_norm": 1.3296675722252447, + "learning_rate": 6.614055722423808e-07, + "loss": 0.420698881149292, + "step": 5461 + }, + { + "epoch": 1.2593958957804934, + "grad_norm": 1.5202476608747133, + "learning_rate": 6.610468557959398e-07, + "loss": 0.5187642574310303, + "step": 5462 + }, + { + "epoch": 1.259626469910076, + "grad_norm": 1.4954844764147424, + "learning_rate": 6.606881886197741e-07, + "loss": 0.48519381880760193, + "step": 5463 + }, + { + "epoch": 1.2598570440396588, + "grad_norm": 1.4755140585184632, + "learning_rate": 6.60329570766019e-07, + "loss": 0.3930806815624237, + "step": 5464 + }, + { + "epoch": 1.2600876181692415, + "grad_norm": 1.8617928902566707, + "learning_rate": 6.599710022868027e-07, + "loss": 0.4890612065792084, + "step": 5465 + }, + { + "epoch": 1.260318192298824, + "grad_norm": 1.2781262224531547, + "learning_rate": 6.596124832342476e-07, + "loss": 0.4202774465084076, + "step": 5466 + }, + { + "epoch": 1.2605487664284067, + "grad_norm": 1.5196012608537903, + "learning_rate": 6.592540136604674e-07, + "loss": 0.5053761005401611, + "step": 5467 + }, + { + "epoch": 1.2607793405579895, + "grad_norm": 1.4874107682553572, + "learning_rate": 6.588955936175702e-07, + "loss": 0.4827175736427307, + "step": 5468 + }, + { + "epoch": 1.261009914687572, + "grad_norm": 1.4659080652243894, + "learning_rate": 6.585372231576551e-07, + "loss": 0.45179229974746704, + "step": 5469 + }, + { + "epoch": 1.2612404888171547, + "grad_norm": 1.3781712796058982, + "learning_rate": 6.581789023328155e-07, + "loss": 0.4024949073791504, + "step": 5470 + }, + { + "epoch": 1.2614710629467374, + "grad_norm": 1.7288759385339574, + "learning_rate": 6.578206311951363e-07, + "loss": 0.48839491605758667, + "step": 5471 + }, + { + "epoch": 1.26170163707632, + "grad_norm": 1.4778086795689929, + "learning_rate": 6.574624097966968e-07, + "loss": 0.45897620916366577, + "step": 5472 + }, + { + "epoch": 1.2619322112059028, + "grad_norm": 1.5548512112712307, + "learning_rate": 6.571042381895671e-07, + "loss": 0.48471882939338684, + "step": 5473 + }, + { + "epoch": 1.2621627853354853, + "grad_norm": 2.0045804163216414, + "learning_rate": 6.567461164258117e-07, + "loss": 0.44159913063049316, + "step": 5474 + }, + { + "epoch": 1.262393359465068, + "grad_norm": 1.5752243442253915, + "learning_rate": 6.563880445574872e-07, + "loss": 0.39186012744903564, + "step": 5475 + }, + { + "epoch": 1.2626239335946507, + "grad_norm": 1.818057995697113, + "learning_rate": 6.560300226366425e-07, + "loss": 0.5332233905792236, + "step": 5476 + }, + { + "epoch": 1.2628545077242332, + "grad_norm": 1.350222227503923, + "learning_rate": 6.556720507153201e-07, + "loss": 0.4252084195613861, + "step": 5477 + }, + { + "epoch": 1.263085081853816, + "grad_norm": 1.4204993118440263, + "learning_rate": 6.553141288455548e-07, + "loss": 0.36927711963653564, + "step": 5478 + }, + { + "epoch": 1.2633156559833987, + "grad_norm": 1.5676826878414558, + "learning_rate": 6.549562570793745e-07, + "loss": 0.4405602216720581, + "step": 5479 + }, + { + "epoch": 1.2635462301129814, + "grad_norm": 1.5245742985153417, + "learning_rate": 6.545984354687986e-07, + "loss": 0.5691590309143066, + "step": 5480 + }, + { + "epoch": 1.2637768042425641, + "grad_norm": 1.468644623890153, + "learning_rate": 6.542406640658411e-07, + "loss": 0.3750354051589966, + "step": 5481 + }, + { + "epoch": 1.2640073783721466, + "grad_norm": 1.5266320276968284, + "learning_rate": 6.538829429225068e-07, + "loss": 0.47816041111946106, + "step": 5482 + }, + { + "epoch": 1.2642379525017293, + "grad_norm": 1.4911563737024116, + "learning_rate": 6.535252720907951e-07, + "loss": 0.42470186948776245, + "step": 5483 + }, + { + "epoch": 1.264468526631312, + "grad_norm": 1.4256480441382235, + "learning_rate": 6.531676516226961e-07, + "loss": 0.37356555461883545, + "step": 5484 + }, + { + "epoch": 1.2646991007608945, + "grad_norm": 1.4604810104028516, + "learning_rate": 6.528100815701942e-07, + "loss": 0.4895293116569519, + "step": 5485 + }, + { + "epoch": 1.2649296748904773, + "grad_norm": 1.9575945537740915, + "learning_rate": 6.524525619852656e-07, + "loss": 0.4963725805282593, + "step": 5486 + }, + { + "epoch": 1.26516024902006, + "grad_norm": 1.7629474018170985, + "learning_rate": 6.520950929198792e-07, + "loss": 0.5443764925003052, + "step": 5487 + }, + { + "epoch": 1.2653908231496427, + "grad_norm": 1.2536482779264142, + "learning_rate": 6.517376744259972e-07, + "loss": 0.400549054145813, + "step": 5488 + }, + { + "epoch": 1.2656213972792254, + "grad_norm": 1.8850482793273033, + "learning_rate": 6.513803065555736e-07, + "loss": 0.46384042501449585, + "step": 5489 + }, + { + "epoch": 1.265851971408808, + "grad_norm": 1.4893040501119004, + "learning_rate": 6.510229893605556e-07, + "loss": 0.5044240951538086, + "step": 5490 + }, + { + "epoch": 1.2660825455383906, + "grad_norm": 1.477450831039122, + "learning_rate": 6.506657228928827e-07, + "loss": 0.4544214904308319, + "step": 5491 + }, + { + "epoch": 1.2663131196679733, + "grad_norm": 1.441487086349296, + "learning_rate": 6.503085072044878e-07, + "loss": 0.36688071489334106, + "step": 5492 + }, + { + "epoch": 1.2665436937975558, + "grad_norm": 1.4594163949727883, + "learning_rate": 6.499513423472951e-07, + "loss": 0.4058225154876709, + "step": 5493 + }, + { + "epoch": 1.2667742679271385, + "grad_norm": 1.4647938941101153, + "learning_rate": 6.495942283732225e-07, + "loss": 0.36429229378700256, + "step": 5494 + }, + { + "epoch": 1.2670048420567213, + "grad_norm": 1.7674965095028434, + "learning_rate": 6.492371653341802e-07, + "loss": 0.47116899490356445, + "step": 5495 + }, + { + "epoch": 1.267235416186304, + "grad_norm": 1.4923904627456126, + "learning_rate": 6.488801532820706e-07, + "loss": 0.4437965750694275, + "step": 5496 + }, + { + "epoch": 1.2674659903158867, + "grad_norm": 1.5533994295939695, + "learning_rate": 6.485231922687893e-07, + "loss": 0.4810328483581543, + "step": 5497 + }, + { + "epoch": 1.2676965644454692, + "grad_norm": 1.4632129166419525, + "learning_rate": 6.481662823462238e-07, + "loss": 0.362907350063324, + "step": 5498 + }, + { + "epoch": 1.267927138575052, + "grad_norm": 1.375729756251652, + "learning_rate": 6.478094235662554e-07, + "loss": 0.43647170066833496, + "step": 5499 + }, + { + "epoch": 1.2681577127046346, + "grad_norm": 1.422215620145209, + "learning_rate": 6.474526159807563e-07, + "loss": 0.4566631317138672, + "step": 5500 + }, + { + "epoch": 1.2683882868342171, + "grad_norm": 1.5097982290449063, + "learning_rate": 6.470958596415925e-07, + "loss": 0.3940081298351288, + "step": 5501 + }, + { + "epoch": 1.2686188609637998, + "grad_norm": 1.617526881385646, + "learning_rate": 6.46739154600622e-07, + "loss": 0.5275603532791138, + "step": 5502 + }, + { + "epoch": 1.2688494350933825, + "grad_norm": 1.846449658895825, + "learning_rate": 6.463825009096959e-07, + "loss": 0.42546436190605164, + "step": 5503 + }, + { + "epoch": 1.2690800092229653, + "grad_norm": 1.6068032996774941, + "learning_rate": 6.460258986206566e-07, + "loss": 0.3833821713924408, + "step": 5504 + }, + { + "epoch": 1.2693105833525478, + "grad_norm": 1.4806797403979666, + "learning_rate": 6.456693477853408e-07, + "loss": 0.5056046843528748, + "step": 5505 + }, + { + "epoch": 1.2695411574821305, + "grad_norm": 1.6345259734279236, + "learning_rate": 6.453128484555764e-07, + "loss": 0.3544192910194397, + "step": 5506 + }, + { + "epoch": 1.2697717316117132, + "grad_norm": 1.684231386275673, + "learning_rate": 6.449564006831836e-07, + "loss": 0.47164130210876465, + "step": 5507 + }, + { + "epoch": 1.2700023057412957, + "grad_norm": 1.3334241214641123, + "learning_rate": 6.446000045199765e-07, + "loss": 0.4580638110637665, + "step": 5508 + }, + { + "epoch": 1.2702328798708784, + "grad_norm": 1.2809631136030655, + "learning_rate": 6.442436600177606e-07, + "loss": 0.45945844054222107, + "step": 5509 + }, + { + "epoch": 1.2704634540004611, + "grad_norm": 1.447660138842985, + "learning_rate": 6.438873672283343e-07, + "loss": 0.5539910793304443, + "step": 5510 + }, + { + "epoch": 1.2706940281300438, + "grad_norm": 1.6550705344684873, + "learning_rate": 6.43531126203488e-07, + "loss": 0.4661790132522583, + "step": 5511 + }, + { + "epoch": 1.2709246022596266, + "grad_norm": 1.7015547164246037, + "learning_rate": 6.431749369950057e-07, + "loss": 0.3781178891658783, + "step": 5512 + }, + { + "epoch": 1.271155176389209, + "grad_norm": 1.571227420481097, + "learning_rate": 6.428187996546621e-07, + "loss": 0.4858461618423462, + "step": 5513 + }, + { + "epoch": 1.2713857505187918, + "grad_norm": 1.5308384830726272, + "learning_rate": 6.424627142342262e-07, + "loss": 0.5003963708877563, + "step": 5514 + }, + { + "epoch": 1.2716163246483745, + "grad_norm": 1.3605664168425382, + "learning_rate": 6.421066807854584e-07, + "loss": 0.4620795249938965, + "step": 5515 + }, + { + "epoch": 1.271846898777957, + "grad_norm": 1.385915858471925, + "learning_rate": 6.417506993601114e-07, + "loss": 0.43998581171035767, + "step": 5516 + }, + { + "epoch": 1.2720774729075397, + "grad_norm": 1.6777446711260993, + "learning_rate": 6.413947700099311e-07, + "loss": 0.5204107165336609, + "step": 5517 + }, + { + "epoch": 1.2723080470371224, + "grad_norm": 1.5515853600398104, + "learning_rate": 6.410388927866551e-07, + "loss": 0.46675950288772583, + "step": 5518 + }, + { + "epoch": 1.2725386211667051, + "grad_norm": 1.4020610518461032, + "learning_rate": 6.406830677420146e-07, + "loss": 0.4002436101436615, + "step": 5519 + }, + { + "epoch": 1.2727691952962878, + "grad_norm": 1.6847281008342299, + "learning_rate": 6.403272949277312e-07, + "loss": 0.4051012396812439, + "step": 5520 + }, + { + "epoch": 1.2729997694258703, + "grad_norm": 1.4780078562694616, + "learning_rate": 6.399715743955209e-07, + "loss": 0.4847797751426697, + "step": 5521 + }, + { + "epoch": 1.273230343555453, + "grad_norm": 1.6389704995828815, + "learning_rate": 6.396159061970907e-07, + "loss": 0.4742053151130676, + "step": 5522 + }, + { + "epoch": 1.2734609176850358, + "grad_norm": 1.4123933831310747, + "learning_rate": 6.392602903841415e-07, + "loss": 0.44291001558303833, + "step": 5523 + }, + { + "epoch": 1.2736914918146183, + "grad_norm": 1.438016627678946, + "learning_rate": 6.389047270083646e-07, + "loss": 0.38993996381759644, + "step": 5524 + }, + { + "epoch": 1.273922065944201, + "grad_norm": 1.5621491080936318, + "learning_rate": 6.385492161214454e-07, + "loss": 0.5045995116233826, + "step": 5525 + }, + { + "epoch": 1.2741526400737837, + "grad_norm": 1.4769511790871679, + "learning_rate": 6.381937577750611e-07, + "loss": 0.4377788305282593, + "step": 5526 + }, + { + "epoch": 1.2743832142033664, + "grad_norm": 1.470801087764595, + "learning_rate": 6.378383520208806e-07, + "loss": 0.5363353490829468, + "step": 5527 + }, + { + "epoch": 1.2746137883329491, + "grad_norm": 1.340047582641372, + "learning_rate": 6.374829989105661e-07, + "loss": 0.42230546474456787, + "step": 5528 + }, + { + "epoch": 1.2748443624625316, + "grad_norm": 1.2882420810653734, + "learning_rate": 6.371276984957715e-07, + "loss": 0.39565908908843994, + "step": 5529 + }, + { + "epoch": 1.2750749365921143, + "grad_norm": 1.3633189139651096, + "learning_rate": 6.36772450828144e-07, + "loss": 0.4375323951244354, + "step": 5530 + }, + { + "epoch": 1.275305510721697, + "grad_norm": 1.5028848525750826, + "learning_rate": 6.364172559593215e-07, + "loss": 0.4901241660118103, + "step": 5531 + }, + { + "epoch": 1.2755360848512796, + "grad_norm": 1.3653729298225772, + "learning_rate": 6.360621139409359e-07, + "loss": 0.4108780026435852, + "step": 5532 + }, + { + "epoch": 1.2757666589808623, + "grad_norm": 1.4800363393725149, + "learning_rate": 6.357070248246102e-07, + "loss": 0.43631279468536377, + "step": 5533 + }, + { + "epoch": 1.275997233110445, + "grad_norm": 1.5982504223136969, + "learning_rate": 6.353519886619607e-07, + "loss": 0.4623757004737854, + "step": 5534 + }, + { + "epoch": 1.2762278072400277, + "grad_norm": 1.5284512936045929, + "learning_rate": 6.349970055045954e-07, + "loss": 0.41303062438964844, + "step": 5535 + }, + { + "epoch": 1.2764583813696104, + "grad_norm": 1.7689201212047627, + "learning_rate": 6.34642075404114e-07, + "loss": 0.5157878994941711, + "step": 5536 + }, + { + "epoch": 1.276688955499193, + "grad_norm": 1.6093049161057067, + "learning_rate": 6.342871984121103e-07, + "loss": 0.41295093297958374, + "step": 5537 + }, + { + "epoch": 1.2769195296287756, + "grad_norm": 1.4185213028911483, + "learning_rate": 6.339323745801682e-07, + "loss": 0.4636460542678833, + "step": 5538 + }, + { + "epoch": 1.2771501037583584, + "grad_norm": 1.44057433861511, + "learning_rate": 6.335776039598659e-07, + "loss": 0.45273804664611816, + "step": 5539 + }, + { + "epoch": 1.2773806778879409, + "grad_norm": 1.7212686324453035, + "learning_rate": 6.332228866027721e-07, + "loss": 0.4562758803367615, + "step": 5540 + }, + { + "epoch": 1.2776112520175236, + "grad_norm": 1.5821328258880776, + "learning_rate": 6.328682225604491e-07, + "loss": 0.3162837326526642, + "step": 5541 + }, + { + "epoch": 1.2778418261471063, + "grad_norm": 1.4226618207277133, + "learning_rate": 6.325136118844504e-07, + "loss": 0.48594871163368225, + "step": 5542 + }, + { + "epoch": 1.278072400276689, + "grad_norm": 1.398820126458318, + "learning_rate": 6.321590546263231e-07, + "loss": 0.4346798360347748, + "step": 5543 + }, + { + "epoch": 1.2783029744062717, + "grad_norm": 1.7945463027279862, + "learning_rate": 6.318045508376046e-07, + "loss": 0.5133204460144043, + "step": 5544 + }, + { + "epoch": 1.2785335485358542, + "grad_norm": 1.6462955147402891, + "learning_rate": 6.314501005698266e-07, + "loss": 0.40679338574409485, + "step": 5545 + }, + { + "epoch": 1.278764122665437, + "grad_norm": 1.341754342655084, + "learning_rate": 6.310957038745117e-07, + "loss": 0.363874614238739, + "step": 5546 + }, + { + "epoch": 1.2789946967950196, + "grad_norm": 1.3013776361069782, + "learning_rate": 6.307413608031746e-07, + "loss": 0.43020665645599365, + "step": 5547 + }, + { + "epoch": 1.2792252709246021, + "grad_norm": 1.301444097702827, + "learning_rate": 6.303870714073233e-07, + "loss": 0.5280083417892456, + "step": 5548 + }, + { + "epoch": 1.2794558450541849, + "grad_norm": 1.803757705570539, + "learning_rate": 6.300328357384568e-07, + "loss": 0.4584185481071472, + "step": 5549 + }, + { + "epoch": 1.2796864191837676, + "grad_norm": 1.4682285924702114, + "learning_rate": 6.296786538480675e-07, + "loss": 0.4068162441253662, + "step": 5550 + }, + { + "epoch": 1.2799169933133503, + "grad_norm": 1.361515758715701, + "learning_rate": 6.293245257876387e-07, + "loss": 0.4336085915565491, + "step": 5551 + }, + { + "epoch": 1.280147567442933, + "grad_norm": 1.4906971509519245, + "learning_rate": 6.289704516086468e-07, + "loss": 0.4932886064052582, + "step": 5552 + }, + { + "epoch": 1.2803781415725155, + "grad_norm": 1.3660207414526373, + "learning_rate": 6.2861643136256e-07, + "loss": 0.437292218208313, + "step": 5553 + }, + { + "epoch": 1.2806087157020982, + "grad_norm": 1.5017461161180483, + "learning_rate": 6.28262465100839e-07, + "loss": 0.4131085276603699, + "step": 5554 + }, + { + "epoch": 1.280839289831681, + "grad_norm": 1.441603184912447, + "learning_rate": 6.27908552874936e-07, + "loss": 0.4146266579627991, + "step": 5555 + }, + { + "epoch": 1.2810698639612634, + "grad_norm": 1.6115588407174422, + "learning_rate": 6.275546947362957e-07, + "loss": 0.4778539538383484, + "step": 5556 + }, + { + "epoch": 1.2813004380908461, + "grad_norm": 1.4722189673341872, + "learning_rate": 6.272008907363555e-07, + "loss": 0.3989019989967346, + "step": 5557 + }, + { + "epoch": 1.2815310122204289, + "grad_norm": 1.5188067628601776, + "learning_rate": 6.268471409265436e-07, + "loss": 0.4433528184890747, + "step": 5558 + }, + { + "epoch": 1.2817615863500116, + "grad_norm": 1.4551631195697798, + "learning_rate": 6.264934453582817e-07, + "loss": 0.46929931640625, + "step": 5559 + }, + { + "epoch": 1.2819921604795943, + "grad_norm": 1.749202490253535, + "learning_rate": 6.261398040829829e-07, + "loss": 0.4908202886581421, + "step": 5560 + }, + { + "epoch": 1.2822227346091768, + "grad_norm": 1.766310768413501, + "learning_rate": 6.257862171520528e-07, + "loss": 0.44195377826690674, + "step": 5561 + }, + { + "epoch": 1.2824533087387595, + "grad_norm": 1.8716445464357578, + "learning_rate": 6.254326846168882e-07, + "loss": 0.548696756362915, + "step": 5562 + }, + { + "epoch": 1.2826838828683422, + "grad_norm": 1.6355324229757326, + "learning_rate": 6.250792065288794e-07, + "loss": 0.4015994668006897, + "step": 5563 + }, + { + "epoch": 1.2829144569979247, + "grad_norm": 1.5798153885574688, + "learning_rate": 6.247257829394074e-07, + "loss": 0.4281688928604126, + "step": 5564 + }, + { + "epoch": 1.2831450311275074, + "grad_norm": 1.2159971773233473, + "learning_rate": 6.243724138998462e-07, + "loss": 0.37623634934425354, + "step": 5565 + }, + { + "epoch": 1.2833756052570902, + "grad_norm": 1.7282596196498647, + "learning_rate": 6.240190994615617e-07, + "loss": 0.4753819704055786, + "step": 5566 + }, + { + "epoch": 1.2836061793866729, + "grad_norm": 1.8092084567061366, + "learning_rate": 6.236658396759111e-07, + "loss": 0.4584893584251404, + "step": 5567 + }, + { + "epoch": 1.2838367535162556, + "grad_norm": 1.598249680169706, + "learning_rate": 6.23312634594245e-07, + "loss": 0.445067435503006, + "step": 5568 + }, + { + "epoch": 1.284067327645838, + "grad_norm": 1.402901275205923, + "learning_rate": 6.229594842679049e-07, + "loss": 0.4209640920162201, + "step": 5569 + }, + { + "epoch": 1.2842979017754208, + "grad_norm": 1.3481434606649714, + "learning_rate": 6.226063887482254e-07, + "loss": 0.34620141983032227, + "step": 5570 + }, + { + "epoch": 1.2845284759050035, + "grad_norm": 1.2702834444597235, + "learning_rate": 6.222533480865315e-07, + "loss": 0.43683767318725586, + "step": 5571 + }, + { + "epoch": 1.284759050034586, + "grad_norm": 1.5394879174992184, + "learning_rate": 6.219003623341421e-07, + "loss": 0.45881450176239014, + "step": 5572 + }, + { + "epoch": 1.2849896241641687, + "grad_norm": 1.2015099259152706, + "learning_rate": 6.215474315423667e-07, + "loss": 0.40115928649902344, + "step": 5573 + }, + { + "epoch": 1.2852201982937514, + "grad_norm": 1.5480428253925462, + "learning_rate": 6.211945557625082e-07, + "loss": 0.4181373119354248, + "step": 5574 + }, + { + "epoch": 1.2854507724233342, + "grad_norm": 1.6874872010842208, + "learning_rate": 6.208417350458598e-07, + "loss": 0.4743300676345825, + "step": 5575 + }, + { + "epoch": 1.2856813465529169, + "grad_norm": 1.6331906817141153, + "learning_rate": 6.204889694437077e-07, + "loss": 0.4236707091331482, + "step": 5576 + }, + { + "epoch": 1.2859119206824994, + "grad_norm": 1.1887995996963334, + "learning_rate": 6.201362590073305e-07, + "loss": 0.4105497896671295, + "step": 5577 + }, + { + "epoch": 1.286142494812082, + "grad_norm": 1.3982883240902815, + "learning_rate": 6.197836037879973e-07, + "loss": 0.4164474606513977, + "step": 5578 + }, + { + "epoch": 1.2863730689416648, + "grad_norm": 1.648111600369129, + "learning_rate": 6.19431003836971e-07, + "loss": 0.49809616804122925, + "step": 5579 + }, + { + "epoch": 1.2866036430712473, + "grad_norm": 1.608787056057215, + "learning_rate": 6.19078459205505e-07, + "loss": 0.4902994632720947, + "step": 5580 + }, + { + "epoch": 1.28683421720083, + "grad_norm": 1.336430500063446, + "learning_rate": 6.18725969944846e-07, + "loss": 0.3697085380554199, + "step": 5581 + }, + { + "epoch": 1.2870647913304127, + "grad_norm": 1.353359914681952, + "learning_rate": 6.183735361062309e-07, + "loss": 0.446627140045166, + "step": 5582 + }, + { + "epoch": 1.2872953654599955, + "grad_norm": 1.590519620379444, + "learning_rate": 6.180211577408901e-07, + "loss": 0.39521220326423645, + "step": 5583 + }, + { + "epoch": 1.2875259395895782, + "grad_norm": 1.7929636253307002, + "learning_rate": 6.176688349000452e-07, + "loss": 0.6308573484420776, + "step": 5584 + }, + { + "epoch": 1.2877565137191607, + "grad_norm": 1.5017758457543093, + "learning_rate": 6.173165676349102e-07, + "loss": 0.4558343291282654, + "step": 5585 + }, + { + "epoch": 1.2879870878487434, + "grad_norm": 1.4546689222111522, + "learning_rate": 6.169643559966906e-07, + "loss": 0.5487015247344971, + "step": 5586 + }, + { + "epoch": 1.288217661978326, + "grad_norm": 1.3949279502201517, + "learning_rate": 6.166122000365834e-07, + "loss": 0.39074039459228516, + "step": 5587 + }, + { + "epoch": 1.2884482361079086, + "grad_norm": 1.4687466147876906, + "learning_rate": 6.162600998057787e-07, + "loss": 0.5136120915412903, + "step": 5588 + }, + { + "epoch": 1.2886788102374913, + "grad_norm": 1.5457442901158343, + "learning_rate": 6.159080553554572e-07, + "loss": 0.5344336628913879, + "step": 5589 + }, + { + "epoch": 1.288909384367074, + "grad_norm": 1.5840783894802135, + "learning_rate": 6.15556066736793e-07, + "loss": 0.5204205513000488, + "step": 5590 + }, + { + "epoch": 1.2891399584966567, + "grad_norm": 1.588345092971114, + "learning_rate": 6.152041340009504e-07, + "loss": 0.4768211245536804, + "step": 5591 + }, + { + "epoch": 1.2893705326262395, + "grad_norm": 2.0914169507965936, + "learning_rate": 6.148522571990868e-07, + "loss": 0.44098299741744995, + "step": 5592 + }, + { + "epoch": 1.289601106755822, + "grad_norm": 1.6411833405865308, + "learning_rate": 6.145004363823509e-07, + "loss": 0.5038055181503296, + "step": 5593 + }, + { + "epoch": 1.2898316808854047, + "grad_norm": 1.6256634474518743, + "learning_rate": 6.141486716018837e-07, + "loss": 0.417998343706131, + "step": 5594 + }, + { + "epoch": 1.2900622550149874, + "grad_norm": 1.755327490864145, + "learning_rate": 6.137969629088174e-07, + "loss": 0.48858124017715454, + "step": 5595 + }, + { + "epoch": 1.2902928291445699, + "grad_norm": 1.6236287189755654, + "learning_rate": 6.134453103542765e-07, + "loss": 0.46988582611083984, + "step": 5596 + }, + { + "epoch": 1.2905234032741526, + "grad_norm": 1.4715150644247719, + "learning_rate": 6.130937139893779e-07, + "loss": 0.5100589394569397, + "step": 5597 + }, + { + "epoch": 1.2907539774037353, + "grad_norm": 1.861124742863941, + "learning_rate": 6.127421738652286e-07, + "loss": 0.490558922290802, + "step": 5598 + }, + { + "epoch": 1.290984551533318, + "grad_norm": 1.624496792014592, + "learning_rate": 6.123906900329291e-07, + "loss": 0.4749597907066345, + "step": 5599 + }, + { + "epoch": 1.2912151256629008, + "grad_norm": 1.4155787175262067, + "learning_rate": 6.12039262543571e-07, + "loss": 0.5006792545318604, + "step": 5600 + }, + { + "epoch": 1.2914456997924832, + "grad_norm": 1.6772265070157861, + "learning_rate": 6.116878914482384e-07, + "loss": 0.46902909874916077, + "step": 5601 + }, + { + "epoch": 1.291676273922066, + "grad_norm": 1.4563548131763813, + "learning_rate": 6.113365767980059e-07, + "loss": 0.46765559911727905, + "step": 5602 + }, + { + "epoch": 1.2919068480516487, + "grad_norm": 1.4143636586875892, + "learning_rate": 6.10985318643941e-07, + "loss": 0.45960646867752075, + "step": 5603 + }, + { + "epoch": 1.2921374221812312, + "grad_norm": 1.578129032516793, + "learning_rate": 6.106341170371024e-07, + "loss": 0.4067912697792053, + "step": 5604 + }, + { + "epoch": 1.292367996310814, + "grad_norm": 1.653263856685772, + "learning_rate": 6.102829720285414e-07, + "loss": 0.45004114508628845, + "step": 5605 + }, + { + "epoch": 1.2925985704403966, + "grad_norm": 1.698803058368325, + "learning_rate": 6.099318836692999e-07, + "loss": 0.5086014270782471, + "step": 5606 + }, + { + "epoch": 1.2928291445699793, + "grad_norm": 1.5400277013654406, + "learning_rate": 6.095808520104122e-07, + "loss": 0.49985191226005554, + "step": 5607 + }, + { + "epoch": 1.293059718699562, + "grad_norm": 1.5622376081366391, + "learning_rate": 6.092298771029047e-07, + "loss": 0.5066381096839905, + "step": 5608 + }, + { + "epoch": 1.2932902928291445, + "grad_norm": 1.5786958248418999, + "learning_rate": 6.088789589977947e-07, + "loss": 0.49626559019088745, + "step": 5609 + }, + { + "epoch": 1.2935208669587273, + "grad_norm": 1.6542820345168319, + "learning_rate": 6.085280977460921e-07, + "loss": 0.4837498962879181, + "step": 5610 + }, + { + "epoch": 1.29375144108831, + "grad_norm": 1.3607897650960659, + "learning_rate": 6.081772933987977e-07, + "loss": 0.41308102011680603, + "step": 5611 + }, + { + "epoch": 1.2939820152178925, + "grad_norm": 1.4026215025684987, + "learning_rate": 6.078265460069048e-07, + "loss": 0.4453086853027344, + "step": 5612 + }, + { + "epoch": 1.2942125893474752, + "grad_norm": 1.5506248233039113, + "learning_rate": 6.074758556213976e-07, + "loss": 0.4700174927711487, + "step": 5613 + }, + { + "epoch": 1.294443163477058, + "grad_norm": 1.6021152444285431, + "learning_rate": 6.071252222932537e-07, + "loss": 0.578227162361145, + "step": 5614 + }, + { + "epoch": 1.2946737376066406, + "grad_norm": 1.3711009132002785, + "learning_rate": 6.067746460734398e-07, + "loss": 0.36468571424484253, + "step": 5615 + }, + { + "epoch": 1.2949043117362231, + "grad_norm": 1.7197393040240752, + "learning_rate": 6.064241270129166e-07, + "loss": 0.4793199896812439, + "step": 5616 + }, + { + "epoch": 1.2951348858658058, + "grad_norm": 1.4731744493442007, + "learning_rate": 6.060736651626355e-07, + "loss": 0.40342214703559875, + "step": 5617 + }, + { + "epoch": 1.2953654599953885, + "grad_norm": 1.2868571274228024, + "learning_rate": 6.05723260573539e-07, + "loss": 0.4212435185909271, + "step": 5618 + }, + { + "epoch": 1.295596034124971, + "grad_norm": 1.592545901664945, + "learning_rate": 6.053729132965626e-07, + "loss": 0.44668713212013245, + "step": 5619 + }, + { + "epoch": 1.2958266082545538, + "grad_norm": 1.3590289444558108, + "learning_rate": 6.050226233826326e-07, + "loss": 0.5159831643104553, + "step": 5620 + }, + { + "epoch": 1.2960571823841365, + "grad_norm": 1.792827614220507, + "learning_rate": 6.046723908826676e-07, + "loss": 0.5091866850852966, + "step": 5621 + }, + { + "epoch": 1.2962877565137192, + "grad_norm": 1.3636713576072057, + "learning_rate": 6.043222158475767e-07, + "loss": 0.34838563203811646, + "step": 5622 + }, + { + "epoch": 1.296518330643302, + "grad_norm": 1.679394698956229, + "learning_rate": 6.039720983282621e-07, + "loss": 0.46576952934265137, + "step": 5623 + }, + { + "epoch": 1.2967489047728844, + "grad_norm": 1.5739745386461328, + "learning_rate": 6.036220383756163e-07, + "loss": 0.4971234202384949, + "step": 5624 + }, + { + "epoch": 1.2969794789024671, + "grad_norm": 1.3832811037885837, + "learning_rate": 6.03272036040525e-07, + "loss": 0.4792482256889343, + "step": 5625 + }, + { + "epoch": 1.2972100530320498, + "grad_norm": 1.5438407741127544, + "learning_rate": 6.029220913738636e-07, + "loss": 0.45584213733673096, + "step": 5626 + }, + { + "epoch": 1.2974406271616323, + "grad_norm": 2.1628056802136686, + "learning_rate": 6.025722044265004e-07, + "loss": 0.5094096064567566, + "step": 5627 + }, + { + "epoch": 1.297671201291215, + "grad_norm": 1.2707985126710273, + "learning_rate": 6.022223752492954e-07, + "loss": 0.33178865909576416, + "step": 5628 + }, + { + "epoch": 1.2979017754207978, + "grad_norm": 1.4977758648466553, + "learning_rate": 6.018726038930991e-07, + "loss": 0.4955121874809265, + "step": 5629 + }, + { + "epoch": 1.2981323495503805, + "grad_norm": 1.9087861970540962, + "learning_rate": 6.01522890408755e-07, + "loss": 0.46253639459609985, + "step": 5630 + }, + { + "epoch": 1.2983629236799632, + "grad_norm": 1.725580686624441, + "learning_rate": 6.011732348470971e-07, + "loss": 0.4760236442089081, + "step": 5631 + }, + { + "epoch": 1.2985934978095457, + "grad_norm": 1.487451213133888, + "learning_rate": 6.008236372589516e-07, + "loss": 0.44413092732429504, + "step": 5632 + }, + { + "epoch": 1.2988240719391284, + "grad_norm": 1.5710401716420814, + "learning_rate": 6.004740976951358e-07, + "loss": 0.5431559681892395, + "step": 5633 + }, + { + "epoch": 1.2990546460687111, + "grad_norm": 1.448678008923642, + "learning_rate": 6.001246162064592e-07, + "loss": 0.41276806592941284, + "step": 5634 + }, + { + "epoch": 1.2992852201982936, + "grad_norm": 1.8698453553316883, + "learning_rate": 5.997751928437219e-07, + "loss": 0.3998986482620239, + "step": 5635 + }, + { + "epoch": 1.2995157943278763, + "grad_norm": 1.7019145009400753, + "learning_rate": 5.994258276577169e-07, + "loss": 0.47741782665252686, + "step": 5636 + }, + { + "epoch": 1.299746368457459, + "grad_norm": 1.8471752326794122, + "learning_rate": 5.990765206992277e-07, + "loss": 0.4294115900993347, + "step": 5637 + }, + { + "epoch": 1.2999769425870418, + "grad_norm": 1.2676173155963009, + "learning_rate": 5.987272720190288e-07, + "loss": 0.4717773199081421, + "step": 5638 + }, + { + "epoch": 1.3002075167166245, + "grad_norm": 1.4764264012124577, + "learning_rate": 5.983780816678881e-07, + "loss": 0.5169499516487122, + "step": 5639 + }, + { + "epoch": 1.300438090846207, + "grad_norm": 1.3402196455719508, + "learning_rate": 5.980289496965634e-07, + "loss": 0.3796359598636627, + "step": 5640 + }, + { + "epoch": 1.3006686649757897, + "grad_norm": 1.439771899645747, + "learning_rate": 5.976798761558048e-07, + "loss": 0.44377613067626953, + "step": 5641 + }, + { + "epoch": 1.3008992391053724, + "grad_norm": 1.4787491173073983, + "learning_rate": 5.973308610963534e-07, + "loss": 0.46863383054733276, + "step": 5642 + }, + { + "epoch": 1.301129813234955, + "grad_norm": 1.6231703309548882, + "learning_rate": 5.969819045689426e-07, + "loss": 0.5437184572219849, + "step": 5643 + }, + { + "epoch": 1.3013603873645376, + "grad_norm": 1.3526724102376106, + "learning_rate": 5.96633006624296e-07, + "loss": 0.4487720727920532, + "step": 5644 + }, + { + "epoch": 1.3015909614941203, + "grad_norm": 1.4099594164441491, + "learning_rate": 5.962841673131305e-07, + "loss": 0.42834270000457764, + "step": 5645 + }, + { + "epoch": 1.301821535623703, + "grad_norm": 1.6303538612123332, + "learning_rate": 5.959353866861525e-07, + "loss": 0.5242533087730408, + "step": 5646 + }, + { + "epoch": 1.3020521097532858, + "grad_norm": 1.467793467454458, + "learning_rate": 5.955866647940609e-07, + "loss": 0.4529950022697449, + "step": 5647 + }, + { + "epoch": 1.3022826838828683, + "grad_norm": 1.704233159172443, + "learning_rate": 5.952380016875465e-07, + "loss": 0.41109561920166016, + "step": 5648 + }, + { + "epoch": 1.302513258012451, + "grad_norm": 2.1978948521850237, + "learning_rate": 5.948893974172904e-07, + "loss": 0.5468418598175049, + "step": 5649 + }, + { + "epoch": 1.3027438321420337, + "grad_norm": 1.6524182777322811, + "learning_rate": 5.945408520339663e-07, + "loss": 0.4594927430152893, + "step": 5650 + }, + { + "epoch": 1.3029744062716162, + "grad_norm": 1.8822005278969978, + "learning_rate": 5.941923655882383e-07, + "loss": 0.5011999011039734, + "step": 5651 + }, + { + "epoch": 1.303204980401199, + "grad_norm": 1.3940543055361847, + "learning_rate": 5.938439381307632e-07, + "loss": 0.519101083278656, + "step": 5652 + }, + { + "epoch": 1.3034355545307816, + "grad_norm": 1.3048743953658823, + "learning_rate": 5.934955697121875e-07, + "loss": 0.521979570388794, + "step": 5653 + }, + { + "epoch": 1.3036661286603644, + "grad_norm": 1.5140544105240696, + "learning_rate": 5.931472603831507e-07, + "loss": 0.5969122648239136, + "step": 5654 + }, + { + "epoch": 1.303896702789947, + "grad_norm": 1.6283257057537612, + "learning_rate": 5.927990101942826e-07, + "loss": 0.47013232111930847, + "step": 5655 + }, + { + "epoch": 1.3041272769195296, + "grad_norm": 1.485470149052559, + "learning_rate": 5.924508191962059e-07, + "loss": 0.4135271906852722, + "step": 5656 + }, + { + "epoch": 1.3043578510491123, + "grad_norm": 1.6826248484124529, + "learning_rate": 5.921026874395327e-07, + "loss": 0.45639151334762573, + "step": 5657 + }, + { + "epoch": 1.304588425178695, + "grad_norm": 1.4851105420204929, + "learning_rate": 5.917546149748676e-07, + "loss": 0.4047633409500122, + "step": 5658 + }, + { + "epoch": 1.3048189993082775, + "grad_norm": 1.470073094956581, + "learning_rate": 5.91406601852807e-07, + "loss": 0.4352290630340576, + "step": 5659 + }, + { + "epoch": 1.3050495734378602, + "grad_norm": 1.569723084578139, + "learning_rate": 5.910586481239375e-07, + "loss": 0.4912130534648895, + "step": 5660 + }, + { + "epoch": 1.305280147567443, + "grad_norm": 1.4302762159123064, + "learning_rate": 5.907107538388383e-07, + "loss": 0.4114433526992798, + "step": 5661 + }, + { + "epoch": 1.3055107216970256, + "grad_norm": 1.6307461117750972, + "learning_rate": 5.903629190480786e-07, + "loss": 0.4230955243110657, + "step": 5662 + }, + { + "epoch": 1.3057412958266084, + "grad_norm": 1.525164874833489, + "learning_rate": 5.900151438022205e-07, + "loss": 0.5020648241043091, + "step": 5663 + }, + { + "epoch": 1.3059718699561909, + "grad_norm": 1.6834639607808413, + "learning_rate": 5.89667428151816e-07, + "loss": 0.48636388778686523, + "step": 5664 + }, + { + "epoch": 1.3062024440857736, + "grad_norm": 1.376635193773143, + "learning_rate": 5.893197721474099e-07, + "loss": 0.412000447511673, + "step": 5665 + }, + { + "epoch": 1.3064330182153563, + "grad_norm": 1.8328035722486296, + "learning_rate": 5.889721758395369e-07, + "loss": 0.3584952652454376, + "step": 5666 + }, + { + "epoch": 1.3066635923449388, + "grad_norm": 1.599166825150926, + "learning_rate": 5.886246392787234e-07, + "loss": 0.4538918733596802, + "step": 5667 + }, + { + "epoch": 1.3068941664745215, + "grad_norm": 1.3551701558323133, + "learning_rate": 5.882771625154883e-07, + "loss": 0.478498637676239, + "step": 5668 + }, + { + "epoch": 1.3071247406041042, + "grad_norm": 1.5353917292288828, + "learning_rate": 5.879297456003398e-07, + "loss": 0.49535906314849854, + "step": 5669 + }, + { + "epoch": 1.307355314733687, + "grad_norm": 1.4516733372645705, + "learning_rate": 5.875823885837793e-07, + "loss": 0.48975661396980286, + "step": 5670 + }, + { + "epoch": 1.3075858888632697, + "grad_norm": 1.675865776424194, + "learning_rate": 5.87235091516298e-07, + "loss": 0.4870087802410126, + "step": 5671 + }, + { + "epoch": 1.3078164629928521, + "grad_norm": 1.5358758810801338, + "learning_rate": 5.8688785444838e-07, + "loss": 0.43411481380462646, + "step": 5672 + }, + { + "epoch": 1.3080470371224349, + "grad_norm": 1.5956307221574964, + "learning_rate": 5.865406774304986e-07, + "loss": 0.5108835697174072, + "step": 5673 + }, + { + "epoch": 1.3082776112520176, + "grad_norm": 1.6165992027891032, + "learning_rate": 5.861935605131202e-07, + "loss": 0.47449198365211487, + "step": 5674 + }, + { + "epoch": 1.3085081853816, + "grad_norm": 1.8165499378032328, + "learning_rate": 5.858465037467014e-07, + "loss": 0.5550234913825989, + "step": 5675 + }, + { + "epoch": 1.3087387595111828, + "grad_norm": 1.5758581559369806, + "learning_rate": 5.854995071816911e-07, + "loss": 0.4548208713531494, + "step": 5676 + }, + { + "epoch": 1.3089693336407655, + "grad_norm": 1.4849539841305146, + "learning_rate": 5.851525708685279e-07, + "loss": 0.5176935195922852, + "step": 5677 + }, + { + "epoch": 1.3091999077703482, + "grad_norm": 1.5664760566663032, + "learning_rate": 5.848056948576428e-07, + "loss": 0.4460016191005707, + "step": 5678 + }, + { + "epoch": 1.309430481899931, + "grad_norm": 1.808203061607658, + "learning_rate": 5.84458879199458e-07, + "loss": 0.5344464182853699, + "step": 5679 + }, + { + "epoch": 1.3096610560295134, + "grad_norm": 1.3109840468073877, + "learning_rate": 5.841121239443863e-07, + "loss": 0.48601672053337097, + "step": 5680 + }, + { + "epoch": 1.3098916301590962, + "grad_norm": 1.3467689115963568, + "learning_rate": 5.837654291428327e-07, + "loss": 0.46849286556243896, + "step": 5681 + }, + { + "epoch": 1.3101222042886789, + "grad_norm": 1.2665516862618484, + "learning_rate": 5.834187948451918e-07, + "loss": 0.4353019893169403, + "step": 5682 + }, + { + "epoch": 1.3103527784182614, + "grad_norm": 1.7099740749541261, + "learning_rate": 5.830722211018516e-07, + "loss": 0.5345665812492371, + "step": 5683 + }, + { + "epoch": 1.310583352547844, + "grad_norm": 1.4659221660940824, + "learning_rate": 5.827257079631886e-07, + "loss": 0.4060036540031433, + "step": 5684 + }, + { + "epoch": 1.3108139266774268, + "grad_norm": 1.3640742579072, + "learning_rate": 5.823792554795738e-07, + "loss": 0.43724536895751953, + "step": 5685 + }, + { + "epoch": 1.3110445008070095, + "grad_norm": 1.550163679413481, + "learning_rate": 5.820328637013665e-07, + "loss": 0.4600690007209778, + "step": 5686 + }, + { + "epoch": 1.3112750749365922, + "grad_norm": 1.5199243554334652, + "learning_rate": 5.816865326789182e-07, + "loss": 0.4352531433105469, + "step": 5687 + }, + { + "epoch": 1.3115056490661747, + "grad_norm": 1.4575114943022274, + "learning_rate": 5.813402624625722e-07, + "loss": 0.39384984970092773, + "step": 5688 + }, + { + "epoch": 1.3117362231957574, + "grad_norm": 1.329194110980277, + "learning_rate": 5.809940531026616e-07, + "loss": 0.44367098808288574, + "step": 5689 + }, + { + "epoch": 1.3119667973253402, + "grad_norm": 1.4497223943190725, + "learning_rate": 5.806479046495123e-07, + "loss": 0.4757416546344757, + "step": 5690 + }, + { + "epoch": 1.3121973714549227, + "grad_norm": 1.5821654764353048, + "learning_rate": 5.803018171534396e-07, + "loss": 0.521708607673645, + "step": 5691 + }, + { + "epoch": 1.3124279455845054, + "grad_norm": 1.3510537988002305, + "learning_rate": 5.799557906647514e-07, + "loss": 0.4127439260482788, + "step": 5692 + }, + { + "epoch": 1.312658519714088, + "grad_norm": 1.4570205213875538, + "learning_rate": 5.79609825233746e-07, + "loss": 0.4809693396091461, + "step": 5693 + }, + { + "epoch": 1.3128890938436708, + "grad_norm": 1.2590938015478794, + "learning_rate": 5.792639209107134e-07, + "loss": 0.5075684189796448, + "step": 5694 + }, + { + "epoch": 1.3131196679732535, + "grad_norm": 1.3738792104421846, + "learning_rate": 5.789180777459336e-07, + "loss": 0.416393518447876, + "step": 5695 + }, + { + "epoch": 1.313350242102836, + "grad_norm": 1.4282126857493198, + "learning_rate": 5.78572295789679e-07, + "loss": 0.4456642270088196, + "step": 5696 + }, + { + "epoch": 1.3135808162324187, + "grad_norm": 1.327521871832615, + "learning_rate": 5.782265750922124e-07, + "loss": 0.4757812023162842, + "step": 5697 + }, + { + "epoch": 1.3138113903620015, + "grad_norm": 1.6103197546493997, + "learning_rate": 5.778809157037872e-07, + "loss": 0.5081768035888672, + "step": 5698 + }, + { + "epoch": 1.314041964491584, + "grad_norm": 1.6849043068796357, + "learning_rate": 5.775353176746489e-07, + "loss": 0.4604584872722626, + "step": 5699 + }, + { + "epoch": 1.3142725386211667, + "grad_norm": 1.3964100189157245, + "learning_rate": 5.771897810550339e-07, + "loss": 0.4153773784637451, + "step": 5700 + }, + { + "epoch": 1.3145031127507494, + "grad_norm": 1.5346514188080242, + "learning_rate": 5.768443058951695e-07, + "loss": 0.5194085836410522, + "step": 5701 + }, + { + "epoch": 1.314733686880332, + "grad_norm": 1.6610989574168062, + "learning_rate": 5.764988922452733e-07, + "loss": 0.4398482143878937, + "step": 5702 + }, + { + "epoch": 1.3149642610099148, + "grad_norm": 1.747178590910114, + "learning_rate": 5.761535401555558e-07, + "loss": 0.5148836374282837, + "step": 5703 + }, + { + "epoch": 1.3151948351394973, + "grad_norm": 1.8977812861580863, + "learning_rate": 5.758082496762163e-07, + "loss": 0.533142626285553, + "step": 5704 + }, + { + "epoch": 1.31542540926908, + "grad_norm": 1.3488739739710767, + "learning_rate": 5.754630208574473e-07, + "loss": 0.4059423804283142, + "step": 5705 + }, + { + "epoch": 1.3156559833986627, + "grad_norm": 1.3213051571946475, + "learning_rate": 5.751178537494302e-07, + "loss": 0.4685533940792084, + "step": 5706 + }, + { + "epoch": 1.3158865575282452, + "grad_norm": 1.5403217644159128, + "learning_rate": 5.747727484023392e-07, + "loss": 0.4454694986343384, + "step": 5707 + }, + { + "epoch": 1.316117131657828, + "grad_norm": 1.481350859430692, + "learning_rate": 5.74427704866339e-07, + "loss": 0.4058796167373657, + "step": 5708 + }, + { + "epoch": 1.3163477057874107, + "grad_norm": 1.3294270142641733, + "learning_rate": 5.740827231915847e-07, + "loss": 0.3891766369342804, + "step": 5709 + }, + { + "epoch": 1.3165782799169934, + "grad_norm": 1.5072356875610937, + "learning_rate": 5.737378034282235e-07, + "loss": 0.47912657260894775, + "step": 5710 + }, + { + "epoch": 1.316808854046576, + "grad_norm": 1.5228549079910219, + "learning_rate": 5.733929456263922e-07, + "loss": 0.4221952557563782, + "step": 5711 + }, + { + "epoch": 1.3170394281761586, + "grad_norm": 1.5405159904484362, + "learning_rate": 5.730481498362202e-07, + "loss": 0.39018404483795166, + "step": 5712 + }, + { + "epoch": 1.3172700023057413, + "grad_norm": 1.6184406292698126, + "learning_rate": 5.727034161078262e-07, + "loss": 0.5388307571411133, + "step": 5713 + }, + { + "epoch": 1.317500576435324, + "grad_norm": 1.5278965195377916, + "learning_rate": 5.723587444913216e-07, + "loss": 0.3243408501148224, + "step": 5714 + }, + { + "epoch": 1.3177311505649065, + "grad_norm": 1.6496814482710773, + "learning_rate": 5.720141350368072e-07, + "loss": 0.46480363607406616, + "step": 5715 + }, + { + "epoch": 1.3179617246944892, + "grad_norm": 1.6265951465013608, + "learning_rate": 5.716695877943757e-07, + "loss": 0.5286417603492737, + "step": 5716 + }, + { + "epoch": 1.318192298824072, + "grad_norm": 1.455901542591345, + "learning_rate": 5.71325102814111e-07, + "loss": 0.4170069694519043, + "step": 5717 + }, + { + "epoch": 1.3184228729536547, + "grad_norm": 1.5051159019770526, + "learning_rate": 5.709806801460867e-07, + "loss": 0.5738973617553711, + "step": 5718 + }, + { + "epoch": 1.3186534470832374, + "grad_norm": 1.4473352410585376, + "learning_rate": 5.706363198403689e-07, + "loss": 0.5309658050537109, + "step": 5719 + }, + { + "epoch": 1.31888402121282, + "grad_norm": 1.588487236125564, + "learning_rate": 5.70292021947013e-07, + "loss": 0.4569379389286041, + "step": 5720 + }, + { + "epoch": 1.3191145953424026, + "grad_norm": 1.5641598702256398, + "learning_rate": 5.699477865160674e-07, + "loss": 0.46686258912086487, + "step": 5721 + }, + { + "epoch": 1.3193451694719853, + "grad_norm": 1.551220703032623, + "learning_rate": 5.696036135975688e-07, + "loss": 0.5333213806152344, + "step": 5722 + }, + { + "epoch": 1.3195757436015678, + "grad_norm": 1.6027893782611593, + "learning_rate": 5.69259503241547e-07, + "loss": 0.3519536256790161, + "step": 5723 + }, + { + "epoch": 1.3198063177311505, + "grad_norm": 1.5104260104986362, + "learning_rate": 5.689154554980218e-07, + "loss": 0.4763161242008209, + "step": 5724 + }, + { + "epoch": 1.3200368918607333, + "grad_norm": 1.5061315373489772, + "learning_rate": 5.685714704170044e-07, + "loss": 0.43600207567214966, + "step": 5725 + }, + { + "epoch": 1.320267465990316, + "grad_norm": 1.4992417251350876, + "learning_rate": 5.682275480484958e-07, + "loss": 0.41991305351257324, + "step": 5726 + }, + { + "epoch": 1.3204980401198987, + "grad_norm": 1.663551629444692, + "learning_rate": 5.678836884424894e-07, + "loss": 0.44275131821632385, + "step": 5727 + }, + { + "epoch": 1.3207286142494812, + "grad_norm": 1.65999947024113, + "learning_rate": 5.675398916489682e-07, + "loss": 0.4339372515678406, + "step": 5728 + }, + { + "epoch": 1.320959188379064, + "grad_norm": 1.484455134036602, + "learning_rate": 5.671961577179062e-07, + "loss": 0.4462248384952545, + "step": 5729 + }, + { + "epoch": 1.3211897625086464, + "grad_norm": 1.4704913213821902, + "learning_rate": 5.668524866992693e-07, + "loss": 0.36548441648483276, + "step": 5730 + }, + { + "epoch": 1.321420336638229, + "grad_norm": 1.5370532211440713, + "learning_rate": 5.665088786430129e-07, + "loss": 0.4709678888320923, + "step": 5731 + }, + { + "epoch": 1.3216509107678118, + "grad_norm": 1.4993066403144744, + "learning_rate": 5.661653335990848e-07, + "loss": 0.40125030279159546, + "step": 5732 + }, + { + "epoch": 1.3218814848973945, + "grad_norm": 1.8517319571144346, + "learning_rate": 5.658218516174218e-07, + "loss": 0.5288605690002441, + "step": 5733 + }, + { + "epoch": 1.3221120590269773, + "grad_norm": 1.2954018601150643, + "learning_rate": 5.654784327479534e-07, + "loss": 0.41306072473526, + "step": 5734 + }, + { + "epoch": 1.3223426331565598, + "grad_norm": 1.3199807449430407, + "learning_rate": 5.651350770405983e-07, + "loss": 0.34327009320259094, + "step": 5735 + }, + { + "epoch": 1.3225732072861425, + "grad_norm": 1.4524630442098247, + "learning_rate": 5.647917845452671e-07, + "loss": 0.5055800080299377, + "step": 5736 + }, + { + "epoch": 1.3228037814157252, + "grad_norm": 1.7153085926535214, + "learning_rate": 5.644485553118609e-07, + "loss": 0.45496249198913574, + "step": 5737 + }, + { + "epoch": 1.3230343555453077, + "grad_norm": 1.6142993934275558, + "learning_rate": 5.641053893902708e-07, + "loss": 0.4626169502735138, + "step": 5738 + }, + { + "epoch": 1.3232649296748904, + "grad_norm": 1.3569624734396053, + "learning_rate": 5.637622868303802e-07, + "loss": 0.46621328592300415, + "step": 5739 + }, + { + "epoch": 1.3234955038044731, + "grad_norm": 1.5833136701466524, + "learning_rate": 5.634192476820623e-07, + "loss": 0.47793662548065186, + "step": 5740 + }, + { + "epoch": 1.3237260779340558, + "grad_norm": 1.5367680790773321, + "learning_rate": 5.630762719951816e-07, + "loss": 0.42578715085983276, + "step": 5741 + }, + { + "epoch": 1.3239566520636386, + "grad_norm": 1.7421270871218182, + "learning_rate": 5.627333598195927e-07, + "loss": 0.3146113157272339, + "step": 5742 + }, + { + "epoch": 1.324187226193221, + "grad_norm": 1.376620002714832, + "learning_rate": 5.623905112051417e-07, + "loss": 0.39731544256210327, + "step": 5743 + }, + { + "epoch": 1.3244178003228038, + "grad_norm": 1.6655684412604148, + "learning_rate": 5.620477262016647e-07, + "loss": 0.3755846619606018, + "step": 5744 + }, + { + "epoch": 1.3246483744523865, + "grad_norm": 1.5953907301532468, + "learning_rate": 5.617050048589896e-07, + "loss": 0.43060415983200073, + "step": 5745 + }, + { + "epoch": 1.324878948581969, + "grad_norm": 1.54564820857706, + "learning_rate": 5.613623472269334e-07, + "loss": 0.4213481545448303, + "step": 5746 + }, + { + "epoch": 1.3251095227115517, + "grad_norm": 1.2422408749001486, + "learning_rate": 5.610197533553057e-07, + "loss": 0.3923456072807312, + "step": 5747 + }, + { + "epoch": 1.3253400968411344, + "grad_norm": 1.6088447345623693, + "learning_rate": 5.606772232939061e-07, + "loss": 0.42293328046798706, + "step": 5748 + }, + { + "epoch": 1.3255706709707171, + "grad_norm": 1.596682526932072, + "learning_rate": 5.603347570925242e-07, + "loss": 0.4545479118824005, + "step": 5749 + }, + { + "epoch": 1.3258012451002998, + "grad_norm": 1.4262513090332916, + "learning_rate": 5.599923548009416e-07, + "loss": 0.3969312310218811, + "step": 5750 + }, + { + "epoch": 1.3260318192298823, + "grad_norm": 1.687653911460881, + "learning_rate": 5.59650016468929e-07, + "loss": 0.4296644330024719, + "step": 5751 + }, + { + "epoch": 1.326262393359465, + "grad_norm": 1.4928189267328964, + "learning_rate": 5.5930774214625e-07, + "loss": 0.43291348218917847, + "step": 5752 + }, + { + "epoch": 1.3264929674890478, + "grad_norm": 1.4463941028108167, + "learning_rate": 5.589655318826564e-07, + "loss": 0.47684454917907715, + "step": 5753 + }, + { + "epoch": 1.3267235416186303, + "grad_norm": 1.3515496302725483, + "learning_rate": 5.586233857278924e-07, + "loss": 0.48520004749298096, + "step": 5754 + }, + { + "epoch": 1.326954115748213, + "grad_norm": 1.6127441732883512, + "learning_rate": 5.582813037316926e-07, + "loss": 0.4434587359428406, + "step": 5755 + }, + { + "epoch": 1.3271846898777957, + "grad_norm": 1.7808352880972456, + "learning_rate": 5.579392859437825e-07, + "loss": 0.47306808829307556, + "step": 5756 + }, + { + "epoch": 1.3274152640073784, + "grad_norm": 1.5663021335869645, + "learning_rate": 5.575973324138772e-07, + "loss": 0.4349653720855713, + "step": 5757 + }, + { + "epoch": 1.3276458381369611, + "grad_norm": 1.2914359149982935, + "learning_rate": 5.572554431916829e-07, + "loss": 0.31277602910995483, + "step": 5758 + }, + { + "epoch": 1.3278764122665436, + "grad_norm": 1.5658319454866303, + "learning_rate": 5.569136183268974e-07, + "loss": 0.4281114637851715, + "step": 5759 + }, + { + "epoch": 1.3281069863961263, + "grad_norm": 1.2867721627127386, + "learning_rate": 5.565718578692076e-07, + "loss": 0.45071113109588623, + "step": 5760 + }, + { + "epoch": 1.328337560525709, + "grad_norm": 1.4460147363867, + "learning_rate": 5.562301618682927e-07, + "loss": 0.426133394241333, + "step": 5761 + }, + { + "epoch": 1.3285681346552916, + "grad_norm": 1.3630920926710801, + "learning_rate": 5.558885303738209e-07, + "loss": 0.3882424235343933, + "step": 5762 + }, + { + "epoch": 1.3287987087848743, + "grad_norm": 1.3878174095068123, + "learning_rate": 5.55546963435452e-07, + "loss": 0.4706958532333374, + "step": 5763 + }, + { + "epoch": 1.329029282914457, + "grad_norm": 1.9122348340273743, + "learning_rate": 5.552054611028365e-07, + "loss": 0.4868433475494385, + "step": 5764 + }, + { + "epoch": 1.3292598570440397, + "grad_norm": 1.4411048310630292, + "learning_rate": 5.548640234256154e-07, + "loss": 0.41839566826820374, + "step": 5765 + }, + { + "epoch": 1.3294904311736224, + "grad_norm": 1.9627530346102546, + "learning_rate": 5.545226504534195e-07, + "loss": 0.4088629484176636, + "step": 5766 + }, + { + "epoch": 1.329721005303205, + "grad_norm": 1.3819218540316194, + "learning_rate": 5.541813422358715e-07, + "loss": 0.34617769718170166, + "step": 5767 + }, + { + "epoch": 1.3299515794327876, + "grad_norm": 1.5711021474470717, + "learning_rate": 5.538400988225835e-07, + "loss": 0.5098900198936462, + "step": 5768 + }, + { + "epoch": 1.3301821535623704, + "grad_norm": 1.5683015797269382, + "learning_rate": 5.534989202631586e-07, + "loss": 0.4294108748435974, + "step": 5769 + }, + { + "epoch": 1.3304127276919528, + "grad_norm": 1.3488716534216894, + "learning_rate": 5.531578066071907e-07, + "loss": 0.42205139994621277, + "step": 5770 + }, + { + "epoch": 1.3306433018215356, + "grad_norm": 1.8657910300729754, + "learning_rate": 5.528167579042645e-07, + "loss": 0.5009530186653137, + "step": 5771 + }, + { + "epoch": 1.3308738759511183, + "grad_norm": 1.468249228101101, + "learning_rate": 5.524757742039545e-07, + "loss": 0.554497241973877, + "step": 5772 + }, + { + "epoch": 1.331104450080701, + "grad_norm": 1.711116822757576, + "learning_rate": 5.521348555558263e-07, + "loss": 0.3514432907104492, + "step": 5773 + }, + { + "epoch": 1.3313350242102837, + "grad_norm": 1.4224522574801144, + "learning_rate": 5.51794002009436e-07, + "loss": 0.4712038040161133, + "step": 5774 + }, + { + "epoch": 1.3315655983398662, + "grad_norm": 1.6288850118765847, + "learning_rate": 5.514532136143295e-07, + "loss": 0.48556071519851685, + "step": 5775 + }, + { + "epoch": 1.331796172469449, + "grad_norm": 1.42798680480441, + "learning_rate": 5.511124904200448e-07, + "loss": 0.43158456683158875, + "step": 5776 + }, + { + "epoch": 1.3320267465990316, + "grad_norm": 1.8128360066016722, + "learning_rate": 5.507718324761085e-07, + "loss": 0.5376255512237549, + "step": 5777 + }, + { + "epoch": 1.3322573207286141, + "grad_norm": 1.446480187929883, + "learning_rate": 5.504312398320392e-07, + "loss": 0.3800685405731201, + "step": 5778 + }, + { + "epoch": 1.3324878948581969, + "grad_norm": 1.3675185316121448, + "learning_rate": 5.500907125373458e-07, + "loss": 0.4015260338783264, + "step": 5779 + }, + { + "epoch": 1.3327184689877796, + "grad_norm": 1.7400186621828952, + "learning_rate": 5.497502506415266e-07, + "loss": 0.42762285470962524, + "step": 5780 + }, + { + "epoch": 1.3329490431173623, + "grad_norm": 1.4501572722598215, + "learning_rate": 5.494098541940719e-07, + "loss": 0.4467644691467285, + "step": 5781 + }, + { + "epoch": 1.333179617246945, + "grad_norm": 1.9298171674754279, + "learning_rate": 5.490695232444613e-07, + "loss": 0.42699599266052246, + "step": 5782 + }, + { + "epoch": 1.3334101913765275, + "grad_norm": 1.6654850032985582, + "learning_rate": 5.487292578421659e-07, + "loss": 0.586537778377533, + "step": 5783 + }, + { + "epoch": 1.3336407655061102, + "grad_norm": 1.761605169999467, + "learning_rate": 5.48389058036646e-07, + "loss": 0.4525066018104553, + "step": 5784 + }, + { + "epoch": 1.333871339635693, + "grad_norm": 1.4697934550209713, + "learning_rate": 5.480489238773535e-07, + "loss": 0.40520548820495605, + "step": 5785 + }, + { + "epoch": 1.3341019137652754, + "grad_norm": 1.7127717596843188, + "learning_rate": 5.477088554137304e-07, + "loss": 0.3910450339317322, + "step": 5786 + }, + { + "epoch": 1.3343324878948581, + "grad_norm": 1.781985995356997, + "learning_rate": 5.473688526952087e-07, + "loss": 0.45285511016845703, + "step": 5787 + }, + { + "epoch": 1.3345630620244409, + "grad_norm": 1.3079701521023397, + "learning_rate": 5.47028915771212e-07, + "loss": 0.39207279682159424, + "step": 5788 + }, + { + "epoch": 1.3347936361540236, + "grad_norm": 1.3401224496215014, + "learning_rate": 5.466890446911527e-07, + "loss": 0.40281063318252563, + "step": 5789 + }, + { + "epoch": 1.3350242102836063, + "grad_norm": 1.5855589292084546, + "learning_rate": 5.463492395044354e-07, + "loss": 0.5087814927101135, + "step": 5790 + }, + { + "epoch": 1.3352547844131888, + "grad_norm": 1.6443172906836578, + "learning_rate": 5.460095002604532e-07, + "loss": 0.47597891092300415, + "step": 5791 + }, + { + "epoch": 1.3354853585427715, + "grad_norm": 1.656230003127049, + "learning_rate": 5.456698270085917e-07, + "loss": 0.5722953677177429, + "step": 5792 + }, + { + "epoch": 1.3357159326723542, + "grad_norm": 1.6424947586218923, + "learning_rate": 5.45330219798225e-07, + "loss": 0.5133349299430847, + "step": 5793 + }, + { + "epoch": 1.3359465068019367, + "grad_norm": 1.5413030595202453, + "learning_rate": 5.449906786787187e-07, + "loss": 0.46230804920196533, + "step": 5794 + }, + { + "epoch": 1.3361770809315194, + "grad_norm": 1.6839619437291453, + "learning_rate": 5.446512036994286e-07, + "loss": 0.42002394795417786, + "step": 5795 + }, + { + "epoch": 1.3364076550611022, + "grad_norm": 1.46623243210155, + "learning_rate": 5.443117949097013e-07, + "loss": 0.42281097173690796, + "step": 5796 + }, + { + "epoch": 1.3366382291906849, + "grad_norm": 1.4476698476010996, + "learning_rate": 5.439724523588726e-07, + "loss": 0.511898398399353, + "step": 5797 + }, + { + "epoch": 1.3368688033202676, + "grad_norm": 1.4307520026731049, + "learning_rate": 5.4363317609627e-07, + "loss": 0.4475559592247009, + "step": 5798 + }, + { + "epoch": 1.33709937744985, + "grad_norm": 1.509864957359139, + "learning_rate": 5.432939661712103e-07, + "loss": 0.4872414469718933, + "step": 5799 + }, + { + "epoch": 1.3373299515794328, + "grad_norm": 1.3480605234272842, + "learning_rate": 5.429548226330009e-07, + "loss": 0.40401679277420044, + "step": 5800 + }, + { + "epoch": 1.3375605257090155, + "grad_norm": 2.083088707198395, + "learning_rate": 5.426157455309399e-07, + "loss": 0.43559926748275757, + "step": 5801 + }, + { + "epoch": 1.337791099838598, + "grad_norm": 1.6000855398004097, + "learning_rate": 5.422767349143158e-07, + "loss": 0.44283759593963623, + "step": 5802 + }, + { + "epoch": 1.3380216739681807, + "grad_norm": 1.310277684226626, + "learning_rate": 5.419377908324077e-07, + "loss": 0.3770032525062561, + "step": 5803 + }, + { + "epoch": 1.3382522480977634, + "grad_norm": 1.3856773934136148, + "learning_rate": 5.415989133344834e-07, + "loss": 0.4497501850128174, + "step": 5804 + }, + { + "epoch": 1.3384828222273462, + "grad_norm": 1.49195449044666, + "learning_rate": 5.412601024698033e-07, + "loss": 0.5008253455162048, + "step": 5805 + }, + { + "epoch": 1.3387133963569289, + "grad_norm": 1.3694796854029274, + "learning_rate": 5.409213582876162e-07, + "loss": 0.46178537607192993, + "step": 5806 + }, + { + "epoch": 1.3389439704865114, + "grad_norm": 1.1951838089282807, + "learning_rate": 5.405826808371625e-07, + "loss": 0.39843931794166565, + "step": 5807 + }, + { + "epoch": 1.339174544616094, + "grad_norm": 1.4243934050525646, + "learning_rate": 5.402440701676724e-07, + "loss": 0.4829174280166626, + "step": 5808 + }, + { + "epoch": 1.3394051187456768, + "grad_norm": 1.0859530853021675, + "learning_rate": 5.399055263283656e-07, + "loss": 0.36173316836357117, + "step": 5809 + }, + { + "epoch": 1.3396356928752593, + "grad_norm": 1.5741135880130834, + "learning_rate": 5.395670493684536e-07, + "loss": 0.400304913520813, + "step": 5810 + }, + { + "epoch": 1.339866267004842, + "grad_norm": 1.507879612413509, + "learning_rate": 5.392286393371372e-07, + "loss": 0.4536975622177124, + "step": 5811 + }, + { + "epoch": 1.3400968411344247, + "grad_norm": 1.7310508291395992, + "learning_rate": 5.388902962836084e-07, + "loss": 0.6474577188491821, + "step": 5812 + }, + { + "epoch": 1.3403274152640074, + "grad_norm": 1.6348182443046517, + "learning_rate": 5.385520202570477e-07, + "loss": 0.48008009791374207, + "step": 5813 + }, + { + "epoch": 1.3405579893935902, + "grad_norm": 1.6214175923335088, + "learning_rate": 5.38213811306628e-07, + "loss": 0.4518657326698303, + "step": 5814 + }, + { + "epoch": 1.3407885635231727, + "grad_norm": 1.280530895656809, + "learning_rate": 5.378756694815105e-07, + "loss": 0.449008584022522, + "step": 5815 + }, + { + "epoch": 1.3410191376527554, + "grad_norm": 1.689898643370083, + "learning_rate": 5.375375948308483e-07, + "loss": 0.5448319315910339, + "step": 5816 + }, + { + "epoch": 1.341249711782338, + "grad_norm": 1.5166178678578832, + "learning_rate": 5.371995874037832e-07, + "loss": 0.5078369379043579, + "step": 5817 + }, + { + "epoch": 1.3414802859119206, + "grad_norm": 1.611364899344997, + "learning_rate": 5.368616472494482e-07, + "loss": 0.508685290813446, + "step": 5818 + }, + { + "epoch": 1.3417108600415033, + "grad_norm": 1.3809568946566115, + "learning_rate": 5.365237744169672e-07, + "loss": 0.4166705012321472, + "step": 5819 + }, + { + "epoch": 1.341941434171086, + "grad_norm": 1.432431964622234, + "learning_rate": 5.361859689554524e-07, + "loss": 0.4741361737251282, + "step": 5820 + }, + { + "epoch": 1.3421720083006687, + "grad_norm": 1.5546451283342237, + "learning_rate": 5.358482309140079e-07, + "loss": 0.36658185720443726, + "step": 5821 + }, + { + "epoch": 1.3424025824302515, + "grad_norm": 1.9632157270552801, + "learning_rate": 5.355105603417267e-07, + "loss": 0.38921263813972473, + "step": 5822 + }, + { + "epoch": 1.342633156559834, + "grad_norm": 1.9732368197118861, + "learning_rate": 5.351729572876935e-07, + "loss": 0.5553977489471436, + "step": 5823 + }, + { + "epoch": 1.3428637306894167, + "grad_norm": 1.4618484003422054, + "learning_rate": 5.348354218009813e-07, + "loss": 0.3968391418457031, + "step": 5824 + }, + { + "epoch": 1.3430943048189994, + "grad_norm": 1.4937275325292458, + "learning_rate": 5.344979539306549e-07, + "loss": 0.4289783239364624, + "step": 5825 + }, + { + "epoch": 1.3433248789485819, + "grad_norm": 1.313862309148984, + "learning_rate": 5.341605537257686e-07, + "loss": 0.45359861850738525, + "step": 5826 + }, + { + "epoch": 1.3435554530781646, + "grad_norm": 1.366684570776694, + "learning_rate": 5.338232212353675e-07, + "loss": 0.3571642339229584, + "step": 5827 + }, + { + "epoch": 1.3437860272077473, + "grad_norm": 1.1954938252676188, + "learning_rate": 5.334859565084855e-07, + "loss": 0.3784096837043762, + "step": 5828 + }, + { + "epoch": 1.34401660133733, + "grad_norm": 1.5372749019268697, + "learning_rate": 5.331487595941475e-07, + "loss": 0.44996407628059387, + "step": 5829 + }, + { + "epoch": 1.3442471754669127, + "grad_norm": 1.4793854978740197, + "learning_rate": 5.32811630541369e-07, + "loss": 0.4466405510902405, + "step": 5830 + }, + { + "epoch": 1.3444777495964952, + "grad_norm": 1.3432081322840168, + "learning_rate": 5.324745693991545e-07, + "loss": 0.34488850831985474, + "step": 5831 + }, + { + "epoch": 1.344708323726078, + "grad_norm": 1.589654871057016, + "learning_rate": 5.321375762164999e-07, + "loss": 0.5530165433883667, + "step": 5832 + }, + { + "epoch": 1.3449388978556607, + "grad_norm": 1.6555576202053326, + "learning_rate": 5.318006510423898e-07, + "loss": 0.40732342004776, + "step": 5833 + }, + { + "epoch": 1.3451694719852432, + "grad_norm": 1.5528027430812303, + "learning_rate": 5.314637939258002e-07, + "loss": 0.3364611566066742, + "step": 5834 + }, + { + "epoch": 1.3454000461148259, + "grad_norm": 1.4557702222082582, + "learning_rate": 5.311270049156966e-07, + "loss": 0.43964290618896484, + "step": 5835 + }, + { + "epoch": 1.3456306202444086, + "grad_norm": 1.5963363545263636, + "learning_rate": 5.30790284061035e-07, + "loss": 0.5203431844711304, + "step": 5836 + }, + { + "epoch": 1.3458611943739913, + "grad_norm": 1.356219303149177, + "learning_rate": 5.304536314107607e-07, + "loss": 0.4779793620109558, + "step": 5837 + }, + { + "epoch": 1.346091768503574, + "grad_norm": 1.4030454651132978, + "learning_rate": 5.301170470138102e-07, + "loss": 0.4769410490989685, + "step": 5838 + }, + { + "epoch": 1.3463223426331565, + "grad_norm": 1.5437367488200047, + "learning_rate": 5.297805309191089e-07, + "loss": 0.42390304803848267, + "step": 5839 + }, + { + "epoch": 1.3465529167627392, + "grad_norm": 1.6498587295444291, + "learning_rate": 5.294440831755727e-07, + "loss": 0.5550302863121033, + "step": 5840 + }, + { + "epoch": 1.3467834908923217, + "grad_norm": 1.5927381474044073, + "learning_rate": 5.291077038321078e-07, + "loss": 0.4897978901863098, + "step": 5841 + }, + { + "epoch": 1.3470140650219045, + "grad_norm": 1.5707311912828865, + "learning_rate": 5.287713929376105e-07, + "loss": 0.4014284610748291, + "step": 5842 + }, + { + "epoch": 1.3472446391514872, + "grad_norm": 1.61036503253005, + "learning_rate": 5.284351505409675e-07, + "loss": 0.4299513101577759, + "step": 5843 + }, + { + "epoch": 1.34747521328107, + "grad_norm": 1.382725158348277, + "learning_rate": 5.280989766910541e-07, + "loss": 0.44863104820251465, + "step": 5844 + }, + { + "epoch": 1.3477057874106526, + "grad_norm": 1.4391517424186664, + "learning_rate": 5.277628714367374e-07, + "loss": 0.41933274269104004, + "step": 5845 + }, + { + "epoch": 1.347936361540235, + "grad_norm": 1.5110585127257306, + "learning_rate": 5.274268348268729e-07, + "loss": 0.48257556557655334, + "step": 5846 + }, + { + "epoch": 1.3481669356698178, + "grad_norm": 1.6840388322451993, + "learning_rate": 5.270908669103078e-07, + "loss": 0.435384064912796, + "step": 5847 + }, + { + "epoch": 1.3483975097994005, + "grad_norm": 1.502056490079635, + "learning_rate": 5.267549677358775e-07, + "loss": 0.43291670083999634, + "step": 5848 + }, + { + "epoch": 1.348628083928983, + "grad_norm": 2.07427587572329, + "learning_rate": 5.264191373524089e-07, + "loss": 0.4584086537361145, + "step": 5849 + }, + { + "epoch": 1.3488586580585658, + "grad_norm": 1.4212548389061759, + "learning_rate": 5.260833758087187e-07, + "loss": 0.44879037141799927, + "step": 5850 + }, + { + "epoch": 1.3490892321881485, + "grad_norm": 1.4876230861981237, + "learning_rate": 5.257476831536124e-07, + "loss": 0.48467326164245605, + "step": 5851 + }, + { + "epoch": 1.3493198063177312, + "grad_norm": 1.4803329007154076, + "learning_rate": 5.254120594358871e-07, + "loss": 0.4126189947128296, + "step": 5852 + }, + { + "epoch": 1.349550380447314, + "grad_norm": 1.494164620045959, + "learning_rate": 5.250765047043284e-07, + "loss": 0.5592546463012695, + "step": 5853 + }, + { + "epoch": 1.3497809545768964, + "grad_norm": 1.2572079660485564, + "learning_rate": 5.247410190077134e-07, + "loss": 0.3269529342651367, + "step": 5854 + }, + { + "epoch": 1.3500115287064791, + "grad_norm": 1.4784058003593112, + "learning_rate": 5.244056023948075e-07, + "loss": 0.42812949419021606, + "step": 5855 + }, + { + "epoch": 1.3502421028360618, + "grad_norm": 1.643847647603701, + "learning_rate": 5.240702549143676e-07, + "loss": 0.4266297221183777, + "step": 5856 + }, + { + "epoch": 1.3504726769656443, + "grad_norm": 1.6490610440384348, + "learning_rate": 5.237349766151392e-07, + "loss": 0.43848085403442383, + "step": 5857 + }, + { + "epoch": 1.350703251095227, + "grad_norm": 1.5778355488021025, + "learning_rate": 5.233997675458588e-07, + "loss": 0.47512906789779663, + "step": 5858 + }, + { + "epoch": 1.3509338252248098, + "grad_norm": 1.4893970639177625, + "learning_rate": 5.230646277552527e-07, + "loss": 0.3484492897987366, + "step": 5859 + }, + { + "epoch": 1.3511643993543925, + "grad_norm": 1.5529244445697006, + "learning_rate": 5.227295572920363e-07, + "loss": 0.48915669322013855, + "step": 5860 + }, + { + "epoch": 1.3513949734839752, + "grad_norm": 1.687195391171769, + "learning_rate": 5.223945562049159e-07, + "loss": 0.415932834148407, + "step": 5861 + }, + { + "epoch": 1.3516255476135577, + "grad_norm": 1.8036222540660396, + "learning_rate": 5.220596245425869e-07, + "loss": 0.47945982217788696, + "step": 5862 + }, + { + "epoch": 1.3518561217431404, + "grad_norm": 1.7032993247582504, + "learning_rate": 5.217247623537356e-07, + "loss": 0.4322330951690674, + "step": 5863 + }, + { + "epoch": 1.3520866958727231, + "grad_norm": 1.7271334098970212, + "learning_rate": 5.213899696870369e-07, + "loss": 0.4608469605445862, + "step": 5864 + }, + { + "epoch": 1.3523172700023056, + "grad_norm": 1.4726583260713841, + "learning_rate": 5.210552465911566e-07, + "loss": 0.5108528137207031, + "step": 5865 + }, + { + "epoch": 1.3525478441318883, + "grad_norm": 1.3172906919344538, + "learning_rate": 5.207205931147502e-07, + "loss": 0.37947285175323486, + "step": 5866 + }, + { + "epoch": 1.352778418261471, + "grad_norm": 1.5825329658520386, + "learning_rate": 5.203860093064635e-07, + "loss": 0.49094486236572266, + "step": 5867 + }, + { + "epoch": 1.3530089923910538, + "grad_norm": 1.7057097538270483, + "learning_rate": 5.200514952149308e-07, + "loss": 0.34238702058792114, + "step": 5868 + }, + { + "epoch": 1.3532395665206365, + "grad_norm": 1.4815052827701158, + "learning_rate": 5.197170508887774e-07, + "loss": 0.46390393376350403, + "step": 5869 + }, + { + "epoch": 1.353470140650219, + "grad_norm": 1.517083535949924, + "learning_rate": 5.193826763766183e-07, + "loss": 0.44219160079956055, + "step": 5870 + }, + { + "epoch": 1.3537007147798017, + "grad_norm": 1.2444078580604416, + "learning_rate": 5.190483717270578e-07, + "loss": 0.42801350355148315, + "step": 5871 + }, + { + "epoch": 1.3539312889093844, + "grad_norm": 1.5276855271974423, + "learning_rate": 5.187141369886906e-07, + "loss": 0.43861454725265503, + "step": 5872 + }, + { + "epoch": 1.354161863038967, + "grad_norm": 1.3684710867849712, + "learning_rate": 5.183799722101014e-07, + "loss": 0.4381449222564697, + "step": 5873 + }, + { + "epoch": 1.3543924371685496, + "grad_norm": 1.6990772878337996, + "learning_rate": 5.180458774398646e-07, + "loss": 0.4341619610786438, + "step": 5874 + }, + { + "epoch": 1.3546230112981323, + "grad_norm": 1.5170997767832792, + "learning_rate": 5.177118527265437e-07, + "loss": 0.4376588463783264, + "step": 5875 + }, + { + "epoch": 1.354853585427715, + "grad_norm": 1.4712846387139202, + "learning_rate": 5.173778981186932e-07, + "loss": 0.38568538427352905, + "step": 5876 + }, + { + "epoch": 1.3550841595572978, + "grad_norm": 1.4162179235966152, + "learning_rate": 5.170440136648561e-07, + "loss": 0.44178056716918945, + "step": 5877 + }, + { + "epoch": 1.3553147336868803, + "grad_norm": 1.434763306400174, + "learning_rate": 5.167101994135665e-07, + "loss": 0.49847882986068726, + "step": 5878 + }, + { + "epoch": 1.355545307816463, + "grad_norm": 1.3114035605969607, + "learning_rate": 5.163764554133476e-07, + "loss": 0.33697545528411865, + "step": 5879 + }, + { + "epoch": 1.3557758819460457, + "grad_norm": 1.9314852987462174, + "learning_rate": 5.160427817127117e-07, + "loss": 0.5216578841209412, + "step": 5880 + }, + { + "epoch": 1.3560064560756282, + "grad_norm": 1.5367735086016923, + "learning_rate": 5.157091783601624e-07, + "loss": 0.5101301670074463, + "step": 5881 + }, + { + "epoch": 1.356237030205211, + "grad_norm": 1.4437708354871932, + "learning_rate": 5.15375645404192e-07, + "loss": 0.47876495122909546, + "step": 5882 + }, + { + "epoch": 1.3564676043347936, + "grad_norm": 1.413429948502146, + "learning_rate": 5.150421828932837e-07, + "loss": 0.4656233787536621, + "step": 5883 + }, + { + "epoch": 1.3566981784643763, + "grad_norm": 1.4503708847221477, + "learning_rate": 5.147087908759082e-07, + "loss": 0.4392930269241333, + "step": 5884 + }, + { + "epoch": 1.356928752593959, + "grad_norm": 1.6187538312851866, + "learning_rate": 5.143754694005289e-07, + "loss": 0.5044047832489014, + "step": 5885 + }, + { + "epoch": 1.3571593267235416, + "grad_norm": 1.3914560087628793, + "learning_rate": 5.140422185155964e-07, + "loss": 0.4345476031303406, + "step": 5886 + }, + { + "epoch": 1.3573899008531243, + "grad_norm": 1.768236932460398, + "learning_rate": 5.137090382695528e-07, + "loss": 0.49207669496536255, + "step": 5887 + }, + { + "epoch": 1.357620474982707, + "grad_norm": 1.531417533887488, + "learning_rate": 5.133759287108286e-07, + "loss": 0.4054356813430786, + "step": 5888 + }, + { + "epoch": 1.3578510491122895, + "grad_norm": 1.9704323937726442, + "learning_rate": 5.130428898878449e-07, + "loss": 0.5436004400253296, + "step": 5889 + }, + { + "epoch": 1.3580816232418722, + "grad_norm": 1.521959500035041, + "learning_rate": 5.127099218490127e-07, + "loss": 0.4832550287246704, + "step": 5890 + }, + { + "epoch": 1.358312197371455, + "grad_norm": 1.4438750839498624, + "learning_rate": 5.123770246427315e-07, + "loss": 0.38890475034713745, + "step": 5891 + }, + { + "epoch": 1.3585427715010376, + "grad_norm": 1.3028583829520697, + "learning_rate": 5.12044198317392e-07, + "loss": 0.49784210324287415, + "step": 5892 + }, + { + "epoch": 1.3587733456306204, + "grad_norm": 1.5058620289816076, + "learning_rate": 5.117114429213732e-07, + "loss": 0.5033924579620361, + "step": 5893 + }, + { + "epoch": 1.3590039197602028, + "grad_norm": 1.5069016697055244, + "learning_rate": 5.113787585030454e-07, + "loss": 0.4857698678970337, + "step": 5894 + }, + { + "epoch": 1.3592344938897856, + "grad_norm": 1.6430229342698937, + "learning_rate": 5.110461451107663e-07, + "loss": 0.4269944429397583, + "step": 5895 + }, + { + "epoch": 1.3594650680193683, + "grad_norm": 1.5554523008644683, + "learning_rate": 5.107136027928858e-07, + "loss": 0.44045162200927734, + "step": 5896 + }, + { + "epoch": 1.3596956421489508, + "grad_norm": 1.6719472262672752, + "learning_rate": 5.103811315977418e-07, + "loss": 0.5223391056060791, + "step": 5897 + }, + { + "epoch": 1.3599262162785335, + "grad_norm": 1.6234993813736853, + "learning_rate": 5.100487315736627e-07, + "loss": 0.45988473296165466, + "step": 5898 + }, + { + "epoch": 1.3601567904081162, + "grad_norm": 1.3494964030299075, + "learning_rate": 5.097164027689661e-07, + "loss": 0.46342164278030396, + "step": 5899 + }, + { + "epoch": 1.360387364537699, + "grad_norm": 1.6151646749241875, + "learning_rate": 5.093841452319588e-07, + "loss": 0.48150479793548584, + "step": 5900 + }, + { + "epoch": 1.3606179386672816, + "grad_norm": 1.3258214555354595, + "learning_rate": 5.090519590109386e-07, + "loss": 0.3971351981163025, + "step": 5901 + }, + { + "epoch": 1.3608485127968641, + "grad_norm": 1.755266254483419, + "learning_rate": 5.087198441541914e-07, + "loss": 0.44869956374168396, + "step": 5902 + }, + { + "epoch": 1.3610790869264469, + "grad_norm": 1.4425507935259798, + "learning_rate": 5.083878007099943e-07, + "loss": 0.3402775526046753, + "step": 5903 + }, + { + "epoch": 1.3613096610560296, + "grad_norm": 1.3415772700158808, + "learning_rate": 5.080558287266119e-07, + "loss": 0.4031033515930176, + "step": 5904 + }, + { + "epoch": 1.361540235185612, + "grad_norm": 1.6435607583739225, + "learning_rate": 5.077239282523012e-07, + "loss": 0.493259459733963, + "step": 5905 + }, + { + "epoch": 1.3617708093151948, + "grad_norm": 1.4120722192098578, + "learning_rate": 5.073920993353063e-07, + "loss": 0.39178919792175293, + "step": 5906 + }, + { + "epoch": 1.3620013834447775, + "grad_norm": 1.6684880889475469, + "learning_rate": 5.070603420238624e-07, + "loss": 0.5091253519058228, + "step": 5907 + }, + { + "epoch": 1.3622319575743602, + "grad_norm": 1.3497137288112562, + "learning_rate": 5.067286563661934e-07, + "loss": 0.416462779045105, + "step": 5908 + }, + { + "epoch": 1.362462531703943, + "grad_norm": 1.7821137618482668, + "learning_rate": 5.063970424105137e-07, + "loss": 0.5018768310546875, + "step": 5909 + }, + { + "epoch": 1.3626931058335254, + "grad_norm": 1.4656990143163084, + "learning_rate": 5.060655002050262e-07, + "loss": 0.5512624979019165, + "step": 5910 + }, + { + "epoch": 1.3629236799631081, + "grad_norm": 1.3507263825947706, + "learning_rate": 5.057340297979241e-07, + "loss": 0.3953768014907837, + "step": 5911 + }, + { + "epoch": 1.3631542540926909, + "grad_norm": 1.2807145092132266, + "learning_rate": 5.054026312373896e-07, + "loss": 0.4355456233024597, + "step": 5912 + }, + { + "epoch": 1.3633848282222734, + "grad_norm": 1.7515987196576535, + "learning_rate": 5.050713045715955e-07, + "loss": 0.4826827645301819, + "step": 5913 + }, + { + "epoch": 1.363615402351856, + "grad_norm": 1.5075633708078446, + "learning_rate": 5.047400498487035e-07, + "loss": 0.47084230184555054, + "step": 5914 + }, + { + "epoch": 1.3638459764814388, + "grad_norm": 1.750968751768445, + "learning_rate": 5.044088671168644e-07, + "loss": 0.5273452997207642, + "step": 5915 + }, + { + "epoch": 1.3640765506110215, + "grad_norm": 1.484245498844297, + "learning_rate": 5.040777564242194e-07, + "loss": 0.44878947734832764, + "step": 5916 + }, + { + "epoch": 1.3643071247406042, + "grad_norm": 1.5815904358854045, + "learning_rate": 5.03746717818898e-07, + "loss": 0.47986388206481934, + "step": 5917 + }, + { + "epoch": 1.3645376988701867, + "grad_norm": 1.4148899602283196, + "learning_rate": 5.034157513490211e-07, + "loss": 0.4807628393173218, + "step": 5918 + }, + { + "epoch": 1.3647682729997694, + "grad_norm": 1.3747301384734179, + "learning_rate": 5.030848570626969e-07, + "loss": 0.46027708053588867, + "step": 5919 + }, + { + "epoch": 1.3649988471293522, + "grad_norm": 1.517934310152821, + "learning_rate": 5.027540350080249e-07, + "loss": 0.3803088963031769, + "step": 5920 + }, + { + "epoch": 1.3652294212589347, + "grad_norm": 1.7239494972976075, + "learning_rate": 5.024232852330939e-07, + "loss": 0.5530920028686523, + "step": 5921 + }, + { + "epoch": 1.3654599953885174, + "grad_norm": 1.7183928961648565, + "learning_rate": 5.020926077859805e-07, + "loss": 0.45984846353530884, + "step": 5922 + }, + { + "epoch": 1.3656905695181, + "grad_norm": 1.5752429840016822, + "learning_rate": 5.017620027147533e-07, + "loss": 0.4448089301586151, + "step": 5923 + }, + { + "epoch": 1.3659211436476828, + "grad_norm": 1.713335636587649, + "learning_rate": 5.01431470067468e-07, + "loss": 0.4226706326007843, + "step": 5924 + }, + { + "epoch": 1.3661517177772655, + "grad_norm": 1.9953320185051966, + "learning_rate": 5.011010098921718e-07, + "loss": 0.5243814587593079, + "step": 5925 + }, + { + "epoch": 1.366382291906848, + "grad_norm": 1.6278540239253128, + "learning_rate": 5.007706222368995e-07, + "loss": 0.5733383893966675, + "step": 5926 + }, + { + "epoch": 1.3666128660364307, + "grad_norm": 1.373199955472141, + "learning_rate": 5.00440307149677e-07, + "loss": 0.4583539366722107, + "step": 5927 + }, + { + "epoch": 1.3668434401660134, + "grad_norm": 1.5871148090703988, + "learning_rate": 5.001100646785186e-07, + "loss": 0.474712610244751, + "step": 5928 + }, + { + "epoch": 1.367074014295596, + "grad_norm": 1.6888872351824356, + "learning_rate": 4.997798948714291e-07, + "loss": 0.3995950222015381, + "step": 5929 + }, + { + "epoch": 1.3673045884251787, + "grad_norm": 1.7317310910620232, + "learning_rate": 4.994497977764011e-07, + "loss": 0.4236767888069153, + "step": 5930 + }, + { + "epoch": 1.3675351625547614, + "grad_norm": 1.6853541022393534, + "learning_rate": 4.991197734414178e-07, + "loss": 0.4972396492958069, + "step": 5931 + }, + { + "epoch": 1.367765736684344, + "grad_norm": 1.503037819471691, + "learning_rate": 4.98789821914452e-07, + "loss": 0.444613516330719, + "step": 5932 + }, + { + "epoch": 1.3679963108139268, + "grad_norm": 1.6912958330957677, + "learning_rate": 4.984599432434649e-07, + "loss": 0.4955690801143646, + "step": 5933 + }, + { + "epoch": 1.3682268849435093, + "grad_norm": 1.559115794882019, + "learning_rate": 4.981301374764084e-07, + "loss": 0.4983398914337158, + "step": 5934 + }, + { + "epoch": 1.368457459073092, + "grad_norm": 1.5588186216828477, + "learning_rate": 4.978004046612223e-07, + "loss": 0.45190921425819397, + "step": 5935 + }, + { + "epoch": 1.3686880332026747, + "grad_norm": 1.757499738470118, + "learning_rate": 4.974707448458369e-07, + "loss": 0.5014151334762573, + "step": 5936 + }, + { + "epoch": 1.3689186073322572, + "grad_norm": 1.5399509659752455, + "learning_rate": 4.971411580781719e-07, + "loss": 0.3868405818939209, + "step": 5937 + }, + { + "epoch": 1.36914918146184, + "grad_norm": 1.42775142494789, + "learning_rate": 4.968116444061363e-07, + "loss": 0.4093654155731201, + "step": 5938 + }, + { + "epoch": 1.3693797555914227, + "grad_norm": 1.318689202230345, + "learning_rate": 4.964822038776276e-07, + "loss": 0.3945506513118744, + "step": 5939 + }, + { + "epoch": 1.3696103297210054, + "grad_norm": 1.5874458283663229, + "learning_rate": 4.961528365405333e-07, + "loss": 0.3645547330379486, + "step": 5940 + }, + { + "epoch": 1.369840903850588, + "grad_norm": 1.760752800086673, + "learning_rate": 4.958235424427309e-07, + "loss": 0.36679786443710327, + "step": 5941 + }, + { + "epoch": 1.3700714779801706, + "grad_norm": 1.5458160371079348, + "learning_rate": 4.954943216320861e-07, + "loss": 0.4892774820327759, + "step": 5942 + }, + { + "epoch": 1.3703020521097533, + "grad_norm": 1.4817693224477149, + "learning_rate": 4.951651741564544e-07, + "loss": 0.40406349301338196, + "step": 5943 + }, + { + "epoch": 1.370532626239336, + "grad_norm": 1.277384097830529, + "learning_rate": 4.948361000636812e-07, + "loss": 0.4219849407672882, + "step": 5944 + }, + { + "epoch": 1.3707632003689185, + "grad_norm": 1.7190062313169097, + "learning_rate": 4.945070994016008e-07, + "loss": 0.5329363346099854, + "step": 5945 + }, + { + "epoch": 1.3709937744985012, + "grad_norm": 1.5495655705207303, + "learning_rate": 4.941781722180361e-07, + "loss": 0.42577850818634033, + "step": 5946 + }, + { + "epoch": 1.371224348628084, + "grad_norm": 1.3916296167797302, + "learning_rate": 4.938493185608008e-07, + "loss": 0.4157155156135559, + "step": 5947 + }, + { + "epoch": 1.3714549227576667, + "grad_norm": 1.5016286739703502, + "learning_rate": 4.935205384776965e-07, + "loss": 0.46491485834121704, + "step": 5948 + }, + { + "epoch": 1.3716854968872494, + "grad_norm": 1.6766694792768029, + "learning_rate": 4.931918320165151e-07, + "loss": 0.39582759141921997, + "step": 5949 + }, + { + "epoch": 1.3719160710168319, + "grad_norm": 1.3277840228822322, + "learning_rate": 4.928631992250371e-07, + "loss": 0.4380473792552948, + "step": 5950 + }, + { + "epoch": 1.3721466451464146, + "grad_norm": 1.5358043238579873, + "learning_rate": 4.925346401510327e-07, + "loss": 0.5044572949409485, + "step": 5951 + }, + { + "epoch": 1.372377219275997, + "grad_norm": 1.6172521688559274, + "learning_rate": 4.922061548422617e-07, + "loss": 0.4808889627456665, + "step": 5952 + }, + { + "epoch": 1.3726077934055798, + "grad_norm": 1.370713689883329, + "learning_rate": 4.91877743346472e-07, + "loss": 0.4215632677078247, + "step": 5953 + }, + { + "epoch": 1.3728383675351625, + "grad_norm": 1.4640509349497177, + "learning_rate": 4.915494057114025e-07, + "loss": 0.4999268651008606, + "step": 5954 + }, + { + "epoch": 1.3730689416647452, + "grad_norm": 1.593000178254792, + "learning_rate": 4.912211419847793e-07, + "loss": 0.476152241230011, + "step": 5955 + }, + { + "epoch": 1.373299515794328, + "grad_norm": 1.5436036358421792, + "learning_rate": 4.908929522143201e-07, + "loss": 0.4253045320510864, + "step": 5956 + }, + { + "epoch": 1.3735300899239105, + "grad_norm": 1.6726587032262756, + "learning_rate": 4.905648364477293e-07, + "loss": 0.4251098036766052, + "step": 5957 + }, + { + "epoch": 1.3737606640534932, + "grad_norm": 1.5635582188699524, + "learning_rate": 4.902367947327029e-07, + "loss": 0.3820844888687134, + "step": 5958 + }, + { + "epoch": 1.373991238183076, + "grad_norm": 1.5563353591748068, + "learning_rate": 4.899088271169245e-07, + "loss": 0.4725508689880371, + "step": 5959 + }, + { + "epoch": 1.3742218123126584, + "grad_norm": 1.4545077693536257, + "learning_rate": 4.895809336480675e-07, + "loss": 0.48313626646995544, + "step": 5960 + }, + { + "epoch": 1.374452386442241, + "grad_norm": 1.6596316713803083, + "learning_rate": 4.892531143737952e-07, + "loss": 0.5344939231872559, + "step": 5961 + }, + { + "epoch": 1.3746829605718238, + "grad_norm": 1.7551620350578117, + "learning_rate": 4.889253693417585e-07, + "loss": 0.4305552840232849, + "step": 5962 + }, + { + "epoch": 1.3749135347014065, + "grad_norm": 1.4302106398553562, + "learning_rate": 4.885976985995996e-07, + "loss": 0.3564034700393677, + "step": 5963 + }, + { + "epoch": 1.3751441088309893, + "grad_norm": 1.4796542999179279, + "learning_rate": 4.882701021949475e-07, + "loss": 0.5498751997947693, + "step": 5964 + }, + { + "epoch": 1.3753746829605717, + "grad_norm": 1.5956710623028654, + "learning_rate": 4.879425801754226e-07, + "loss": 0.4489964246749878, + "step": 5965 + }, + { + "epoch": 1.3756052570901545, + "grad_norm": 1.7595842751992934, + "learning_rate": 4.87615132588633e-07, + "loss": 0.4142688810825348, + "step": 5966 + }, + { + "epoch": 1.3758358312197372, + "grad_norm": 1.483255834477138, + "learning_rate": 4.872877594821767e-07, + "loss": 0.3823632597923279, + "step": 5967 + }, + { + "epoch": 1.3760664053493197, + "grad_norm": 1.603982795420405, + "learning_rate": 4.869604609036408e-07, + "loss": 0.39014697074890137, + "step": 5968 + }, + { + "epoch": 1.3762969794789024, + "grad_norm": 1.5363032345717058, + "learning_rate": 4.866332369006016e-07, + "loss": 0.3907933235168457, + "step": 5969 + }, + { + "epoch": 1.376527553608485, + "grad_norm": 1.5125931439342233, + "learning_rate": 4.863060875206244e-07, + "loss": 0.3872087001800537, + "step": 5970 + }, + { + "epoch": 1.3767581277380678, + "grad_norm": 1.5847290584713085, + "learning_rate": 4.85979012811263e-07, + "loss": 0.40380537509918213, + "step": 5971 + }, + { + "epoch": 1.3769887018676505, + "grad_norm": 1.3127541034285726, + "learning_rate": 4.856520128200621e-07, + "loss": 0.39867663383483887, + "step": 5972 + }, + { + "epoch": 1.377219275997233, + "grad_norm": 1.7829413941875683, + "learning_rate": 4.853250875945534e-07, + "loss": 0.5337423086166382, + "step": 5973 + }, + { + "epoch": 1.3774498501268158, + "grad_norm": 1.4903518724810052, + "learning_rate": 4.849982371822593e-07, + "loss": 0.3824300765991211, + "step": 5974 + }, + { + "epoch": 1.3776804242563985, + "grad_norm": 1.4611697760932394, + "learning_rate": 4.846714616306907e-07, + "loss": 0.3613823652267456, + "step": 5975 + }, + { + "epoch": 1.377910998385981, + "grad_norm": 1.5701851835478555, + "learning_rate": 4.843447609873484e-07, + "loss": 0.5040241479873657, + "step": 5976 + }, + { + "epoch": 1.3781415725155637, + "grad_norm": 1.5801365248176698, + "learning_rate": 4.840181352997207e-07, + "loss": 0.4639400243759155, + "step": 5977 + }, + { + "epoch": 1.3783721466451464, + "grad_norm": 1.730401874176074, + "learning_rate": 4.836915846152867e-07, + "loss": 0.503246009349823, + "step": 5978 + }, + { + "epoch": 1.3786027207747291, + "grad_norm": 1.6695377873006745, + "learning_rate": 4.833651089815135e-07, + "loss": 0.3974607586860657, + "step": 5979 + }, + { + "epoch": 1.3788332949043118, + "grad_norm": 1.556324884896908, + "learning_rate": 4.830387084458573e-07, + "loss": 0.43200844526290894, + "step": 5980 + }, + { + "epoch": 1.3790638690338943, + "grad_norm": 1.8355646307086506, + "learning_rate": 4.827123830557644e-07, + "loss": 0.547272801399231, + "step": 5981 + }, + { + "epoch": 1.379294443163477, + "grad_norm": 1.5723785141918243, + "learning_rate": 4.823861328586688e-07, + "loss": 0.4509696960449219, + "step": 5982 + }, + { + "epoch": 1.3795250172930598, + "grad_norm": 1.53889123165165, + "learning_rate": 4.820599579019946e-07, + "loss": 0.46022483706474304, + "step": 5983 + }, + { + "epoch": 1.3797555914226423, + "grad_norm": 1.5251655198087088, + "learning_rate": 4.817338582331548e-07, + "loss": 0.40973198413848877, + "step": 5984 + }, + { + "epoch": 1.379986165552225, + "grad_norm": 1.6235538954137896, + "learning_rate": 4.814078338995515e-07, + "loss": 0.39012736082077026, + "step": 5985 + }, + { + "epoch": 1.3802167396818077, + "grad_norm": 1.6954879615528178, + "learning_rate": 4.810818849485749e-07, + "loss": 0.40657323598861694, + "step": 5986 + }, + { + "epoch": 1.3804473138113904, + "grad_norm": 1.4158383607530642, + "learning_rate": 4.80756011427606e-07, + "loss": 0.38662189245224, + "step": 5987 + }, + { + "epoch": 1.3806778879409731, + "grad_norm": 1.629559894183336, + "learning_rate": 4.804302133840126e-07, + "loss": 0.4888705015182495, + "step": 5988 + }, + { + "epoch": 1.3809084620705556, + "grad_norm": 1.4732586688358036, + "learning_rate": 4.801044908651537e-07, + "loss": 0.4559556245803833, + "step": 5989 + }, + { + "epoch": 1.3811390362001383, + "grad_norm": 1.773370569584542, + "learning_rate": 4.797788439183757e-07, + "loss": 0.40912386775016785, + "step": 5990 + }, + { + "epoch": 1.381369610329721, + "grad_norm": 1.3364334005028415, + "learning_rate": 4.794532725910152e-07, + "loss": 0.3848627209663391, + "step": 5991 + }, + { + "epoch": 1.3816001844593035, + "grad_norm": 1.3860556916017956, + "learning_rate": 4.791277769303975e-07, + "loss": 0.4995359778404236, + "step": 5992 + }, + { + "epoch": 1.3818307585888863, + "grad_norm": 1.3898521995378452, + "learning_rate": 4.788023569838356e-07, + "loss": 0.38717859983444214, + "step": 5993 + }, + { + "epoch": 1.382061332718469, + "grad_norm": 1.7766923949498086, + "learning_rate": 4.784770127986339e-07, + "loss": 0.39855217933654785, + "step": 5994 + }, + { + "epoch": 1.3822919068480517, + "grad_norm": 1.337680228597258, + "learning_rate": 4.781517444220835e-07, + "loss": 0.38494858145713806, + "step": 5995 + }, + { + "epoch": 1.3825224809776344, + "grad_norm": 1.4735802599680248, + "learning_rate": 4.778265519014661e-07, + "loss": 0.44064784049987793, + "step": 5996 + }, + { + "epoch": 1.382753055107217, + "grad_norm": 1.8926413264660993, + "learning_rate": 4.775014352840512e-07, + "loss": 0.39377373456954956, + "step": 5997 + }, + { + "epoch": 1.3829836292367996, + "grad_norm": 1.5108151654480286, + "learning_rate": 4.771763946170979e-07, + "loss": 0.45127296447753906, + "step": 5998 + }, + { + "epoch": 1.3832142033663823, + "grad_norm": 1.4916107560429466, + "learning_rate": 4.768514299478545e-07, + "loss": 0.4999358654022217, + "step": 5999 + }, + { + "epoch": 1.3834447774959648, + "grad_norm": 1.7185286370183794, + "learning_rate": 4.7652654132355784e-07, + "loss": 0.49552851915359497, + "step": 6000 + }, + { + "epoch": 1.3836753516255476, + "grad_norm": 1.7765151369959267, + "learning_rate": 4.762017287914338e-07, + "loss": 0.49196135997772217, + "step": 6001 + }, + { + "epoch": 1.3839059257551303, + "grad_norm": 1.6417248034868954, + "learning_rate": 4.758769923986966e-07, + "loss": 0.3870600461959839, + "step": 6002 + }, + { + "epoch": 1.384136499884713, + "grad_norm": 1.6104154654929026, + "learning_rate": 4.7555233219255074e-07, + "loss": 0.4585425853729248, + "step": 6003 + }, + { + "epoch": 1.3843670740142957, + "grad_norm": 1.3699827425500786, + "learning_rate": 4.752277482201882e-07, + "loss": 0.4332588315010071, + "step": 6004 + }, + { + "epoch": 1.3845976481438782, + "grad_norm": 1.6005942921335146, + "learning_rate": 4.749032405287913e-07, + "loss": 0.4386274814605713, + "step": 6005 + }, + { + "epoch": 1.384828222273461, + "grad_norm": 1.430715117905666, + "learning_rate": 4.745788091655295e-07, + "loss": 0.5064895749092102, + "step": 6006 + }, + { + "epoch": 1.3850587964030436, + "grad_norm": 1.470846994377081, + "learning_rate": 4.7425445417756295e-07, + "loss": 0.4441327452659607, + "step": 6007 + }, + { + "epoch": 1.3852893705326261, + "grad_norm": 1.6191746478584856, + "learning_rate": 4.7393017561203965e-07, + "loss": 0.4415687918663025, + "step": 6008 + }, + { + "epoch": 1.3855199446622088, + "grad_norm": 1.4021203224812295, + "learning_rate": 4.736059735160973e-07, + "loss": 0.4668382704257965, + "step": 6009 + }, + { + "epoch": 1.3857505187917916, + "grad_norm": 1.6079029250549948, + "learning_rate": 4.732818479368615e-07, + "loss": 0.3981805443763733, + "step": 6010 + }, + { + "epoch": 1.3859810929213743, + "grad_norm": 1.4448652226463723, + "learning_rate": 4.7295779892144694e-07, + "loss": 0.4465348720550537, + "step": 6011 + }, + { + "epoch": 1.386211667050957, + "grad_norm": 1.7530840597871544, + "learning_rate": 4.7263382651695805e-07, + "loss": 0.4844682812690735, + "step": 6012 + }, + { + "epoch": 1.3864422411805395, + "grad_norm": 1.417618664232542, + "learning_rate": 4.723099307704868e-07, + "loss": 0.4261378347873688, + "step": 6013 + }, + { + "epoch": 1.3866728153101222, + "grad_norm": 1.4997543603341101, + "learning_rate": 4.7198611172911506e-07, + "loss": 0.457815945148468, + "step": 6014 + }, + { + "epoch": 1.386903389439705, + "grad_norm": 1.570655771567204, + "learning_rate": 4.7166236943991333e-07, + "loss": 0.46352216601371765, + "step": 6015 + }, + { + "epoch": 1.3871339635692874, + "grad_norm": 1.486567492766103, + "learning_rate": 4.7133870394994104e-07, + "loss": 0.4166485667228699, + "step": 6016 + }, + { + "epoch": 1.3873645376988701, + "grad_norm": 1.6982826579565595, + "learning_rate": 4.710151153062456e-07, + "loss": 0.405789852142334, + "step": 6017 + }, + { + "epoch": 1.3875951118284529, + "grad_norm": 1.7459761562612983, + "learning_rate": 4.7069160355586456e-07, + "loss": 0.47718119621276855, + "step": 6018 + }, + { + "epoch": 1.3878256859580356, + "grad_norm": 1.5824023496617, + "learning_rate": 4.7036816874582307e-07, + "loss": 0.5040356516838074, + "step": 6019 + }, + { + "epoch": 1.3880562600876183, + "grad_norm": 1.5657039890557007, + "learning_rate": 4.700448109231362e-07, + "loss": 0.45093637704849243, + "step": 6020 + }, + { + "epoch": 1.3882868342172008, + "grad_norm": 1.4929438188817195, + "learning_rate": 4.6972153013480666e-07, + "loss": 0.5363638997077942, + "step": 6021 + }, + { + "epoch": 1.3885174083467835, + "grad_norm": 1.6076509313088967, + "learning_rate": 4.6939832642782684e-07, + "loss": 0.4917050004005432, + "step": 6022 + }, + { + "epoch": 1.3887479824763662, + "grad_norm": 1.692377103708349, + "learning_rate": 4.690751998491782e-07, + "loss": 0.43033331632614136, + "step": 6023 + }, + { + "epoch": 1.3889785566059487, + "grad_norm": 1.5272594017885164, + "learning_rate": 4.6875215044582973e-07, + "loss": 0.36168330907821655, + "step": 6024 + }, + { + "epoch": 1.3892091307355314, + "grad_norm": 1.693805471797637, + "learning_rate": 4.6842917826474047e-07, + "loss": 0.48347967863082886, + "step": 6025 + }, + { + "epoch": 1.3894397048651141, + "grad_norm": 1.332022962916858, + "learning_rate": 4.681062833528572e-07, + "loss": 0.4493439495563507, + "step": 6026 + }, + { + "epoch": 1.3896702789946969, + "grad_norm": 1.4842335012941816, + "learning_rate": 4.677834657571165e-07, + "loss": 0.385773628950119, + "step": 6027 + }, + { + "epoch": 1.3899008531242796, + "grad_norm": 1.396017775513053, + "learning_rate": 4.674607255244426e-07, + "loss": 0.4254469573497772, + "step": 6028 + }, + { + "epoch": 1.390131427253862, + "grad_norm": 1.6964811881797437, + "learning_rate": 4.671380627017497e-07, + "loss": 0.5070454478263855, + "step": 6029 + }, + { + "epoch": 1.3903620013834448, + "grad_norm": 1.4647574188657595, + "learning_rate": 4.668154773359394e-07, + "loss": 0.44099801778793335, + "step": 6030 + }, + { + "epoch": 1.3905925755130275, + "grad_norm": 1.6731498815474952, + "learning_rate": 4.6649296947390314e-07, + "loss": 0.4965481162071228, + "step": 6031 + }, + { + "epoch": 1.39082314964261, + "grad_norm": 1.6621123973009748, + "learning_rate": 4.6617053916252116e-07, + "loss": 0.4085753262042999, + "step": 6032 + }, + { + "epoch": 1.3910537237721927, + "grad_norm": 1.473260966023028, + "learning_rate": 4.6584818644866106e-07, + "loss": 0.3768424391746521, + "step": 6033 + }, + { + "epoch": 1.3912842979017754, + "grad_norm": 1.7152094772871185, + "learning_rate": 4.6552591137918087e-07, + "loss": 0.4330044388771057, + "step": 6034 + }, + { + "epoch": 1.3915148720313582, + "grad_norm": 1.5907700374750249, + "learning_rate": 4.6520371400092584e-07, + "loss": 0.4669216275215149, + "step": 6035 + }, + { + "epoch": 1.3917454461609409, + "grad_norm": 1.8634085835731031, + "learning_rate": 4.648815943607314e-07, + "loss": 0.5491182208061218, + "step": 6036 + }, + { + "epoch": 1.3919760202905234, + "grad_norm": 1.439715262819595, + "learning_rate": 4.6455955250542e-07, + "loss": 0.4842255413532257, + "step": 6037 + }, + { + "epoch": 1.392206594420106, + "grad_norm": 1.598726710739168, + "learning_rate": 4.6423758848180427e-07, + "loss": 0.45479631423950195, + "step": 6038 + }, + { + "epoch": 1.3924371685496888, + "grad_norm": 1.5770365297702393, + "learning_rate": 4.6391570233668486e-07, + "loss": 0.4209587574005127, + "step": 6039 + }, + { + "epoch": 1.3926677426792713, + "grad_norm": 1.4722680740741498, + "learning_rate": 4.6359389411685145e-07, + "loss": 0.5061464905738831, + "step": 6040 + }, + { + "epoch": 1.392898316808854, + "grad_norm": 1.5166334201375402, + "learning_rate": 4.6327216386908196e-07, + "loss": 0.39443570375442505, + "step": 6041 + }, + { + "epoch": 1.3931288909384367, + "grad_norm": 1.6936024892202146, + "learning_rate": 4.6295051164014256e-07, + "loss": 0.4784463942050934, + "step": 6042 + }, + { + "epoch": 1.3933594650680194, + "grad_norm": 1.623401531095956, + "learning_rate": 4.6262893747678957e-07, + "loss": 0.41256606578826904, + "step": 6043 + }, + { + "epoch": 1.3935900391976022, + "grad_norm": 1.430742297932055, + "learning_rate": 4.623074414257662e-07, + "loss": 0.4507666230201721, + "step": 6044 + }, + { + "epoch": 1.3938206133271847, + "grad_norm": 1.4646678303979026, + "learning_rate": 4.6198602353380545e-07, + "loss": 0.3783376216888428, + "step": 6045 + }, + { + "epoch": 1.3940511874567674, + "grad_norm": 1.5485119918407955, + "learning_rate": 4.616646838476289e-07, + "loss": 0.47854840755462646, + "step": 6046 + }, + { + "epoch": 1.39428176158635, + "grad_norm": 1.506150277535636, + "learning_rate": 4.6134342241394685e-07, + "loss": 0.47121208906173706, + "step": 6047 + }, + { + "epoch": 1.3945123357159326, + "grad_norm": 1.4779397331062858, + "learning_rate": 4.610222392794569e-07, + "loss": 0.5211559534072876, + "step": 6048 + }, + { + "epoch": 1.3947429098455153, + "grad_norm": 2.0522570691736606, + "learning_rate": 4.6070113449084747e-07, + "loss": 0.5846370458602905, + "step": 6049 + }, + { + "epoch": 1.394973483975098, + "grad_norm": 1.6651959806589232, + "learning_rate": 4.6038010809479365e-07, + "loss": 0.4787401854991913, + "step": 6050 + }, + { + "epoch": 1.3952040581046807, + "grad_norm": 1.336725780471279, + "learning_rate": 4.600591601379596e-07, + "loss": 0.36429738998413086, + "step": 6051 + }, + { + "epoch": 1.3954346322342635, + "grad_norm": 1.606284081701607, + "learning_rate": 4.597382906669992e-07, + "loss": 0.49923771619796753, + "step": 6052 + }, + { + "epoch": 1.395665206363846, + "grad_norm": 1.5476584348847333, + "learning_rate": 4.5941749972855326e-07, + "loss": 0.408005028963089, + "step": 6053 + }, + { + "epoch": 1.3958957804934287, + "grad_norm": 1.72927604568786, + "learning_rate": 4.590967873692523e-07, + "loss": 0.4524402618408203, + "step": 6054 + }, + { + "epoch": 1.3961263546230114, + "grad_norm": 1.5041096845532136, + "learning_rate": 4.587761536357152e-07, + "loss": 0.5264980792999268, + "step": 6055 + }, + { + "epoch": 1.3963569287525939, + "grad_norm": 1.6066275699787076, + "learning_rate": 4.5845559857454976e-07, + "loss": 0.5324279069900513, + "step": 6056 + }, + { + "epoch": 1.3965875028821766, + "grad_norm": 1.4996065290876746, + "learning_rate": 4.581351222323511e-07, + "loss": 0.5197574496269226, + "step": 6057 + }, + { + "epoch": 1.3968180770117593, + "grad_norm": 1.6418756331716369, + "learning_rate": 4.578147246557043e-07, + "loss": 0.4549001157283783, + "step": 6058 + }, + { + "epoch": 1.397048651141342, + "grad_norm": 1.374490396915421, + "learning_rate": 4.5749440589118183e-07, + "loss": 0.38597673177719116, + "step": 6059 + }, + { + "epoch": 1.3972792252709247, + "grad_norm": 1.3707652210777583, + "learning_rate": 4.57174165985346e-07, + "loss": 0.4104316532611847, + "step": 6060 + }, + { + "epoch": 1.3975097994005072, + "grad_norm": 1.7242255092716443, + "learning_rate": 4.5685400498474614e-07, + "loss": 0.5241787433624268, + "step": 6061 + }, + { + "epoch": 1.39774037353009, + "grad_norm": 1.668574015144598, + "learning_rate": 4.565339229359213e-07, + "loss": 0.5033289790153503, + "step": 6062 + }, + { + "epoch": 1.3979709476596724, + "grad_norm": 1.3309384356199967, + "learning_rate": 4.5621391988539894e-07, + "loss": 0.436188280582428, + "step": 6063 + }, + { + "epoch": 1.3982015217892552, + "grad_norm": 1.4783680897212301, + "learning_rate": 4.5589399587969414e-07, + "loss": 0.3885838985443115, + "step": 6064 + }, + { + "epoch": 1.3984320959188379, + "grad_norm": 1.6395174483956128, + "learning_rate": 4.555741509653116e-07, + "loss": 0.5140193104743958, + "step": 6065 + }, + { + "epoch": 1.3986626700484206, + "grad_norm": 1.360236032045127, + "learning_rate": 4.552543851887436e-07, + "loss": 0.41084468364715576, + "step": 6066 + }, + { + "epoch": 1.3988932441780033, + "grad_norm": 1.417896120601143, + "learning_rate": 4.549346985964718e-07, + "loss": 0.3606417179107666, + "step": 6067 + }, + { + "epoch": 1.3991238183075858, + "grad_norm": 1.5212574193639694, + "learning_rate": 4.546150912349653e-07, + "loss": 0.48518556356430054, + "step": 6068 + }, + { + "epoch": 1.3993543924371685, + "grad_norm": 1.6821671640024862, + "learning_rate": 4.5429556315068264e-07, + "loss": 0.5394424200057983, + "step": 6069 + }, + { + "epoch": 1.3995849665667512, + "grad_norm": 1.3734997636022714, + "learning_rate": 4.539761143900708e-07, + "loss": 0.40272367000579834, + "step": 6070 + }, + { + "epoch": 1.3998155406963337, + "grad_norm": 1.6175896107942709, + "learning_rate": 4.536567449995641e-07, + "loss": 0.4279879331588745, + "step": 6071 + }, + { + "epoch": 1.4000461148259165, + "grad_norm": 1.4620694447822713, + "learning_rate": 4.5333745502558695e-07, + "loss": 0.48560982942581177, + "step": 6072 + }, + { + "epoch": 1.4002766889554992, + "grad_norm": 1.7184355426607418, + "learning_rate": 4.530182445145506e-07, + "loss": 0.49256429076194763, + "step": 6073 + }, + { + "epoch": 1.4005072630850819, + "grad_norm": 1.4236944961072253, + "learning_rate": 4.5269911351285614e-07, + "loss": 0.5015553832054138, + "step": 6074 + }, + { + "epoch": 1.4007378372146646, + "grad_norm": 1.4505255602543088, + "learning_rate": 4.5238006206689204e-07, + "loss": 0.4313800632953644, + "step": 6075 + }, + { + "epoch": 1.400968411344247, + "grad_norm": 1.311079736416616, + "learning_rate": 4.520610902230363e-07, + "loss": 0.3440586030483246, + "step": 6076 + }, + { + "epoch": 1.4011989854738298, + "grad_norm": 1.4064686390113332, + "learning_rate": 4.517421980276538e-07, + "loss": 0.43868017196655273, + "step": 6077 + }, + { + "epoch": 1.4014295596034125, + "grad_norm": 1.6307364330463041, + "learning_rate": 4.5142338552709923e-07, + "loss": 0.5581029057502747, + "step": 6078 + }, + { + "epoch": 1.401660133732995, + "grad_norm": 1.6962393590938891, + "learning_rate": 4.5110465276771524e-07, + "loss": 0.4543154835700989, + "step": 6079 + }, + { + "epoch": 1.4018907078625777, + "grad_norm": 1.5554679193557313, + "learning_rate": 4.507859997958333e-07, + "loss": 0.5229466557502747, + "step": 6080 + }, + { + "epoch": 1.4021212819921605, + "grad_norm": 1.5285075075955497, + "learning_rate": 4.504674266577724e-07, + "loss": 0.46781739592552185, + "step": 6081 + }, + { + "epoch": 1.4023518561217432, + "grad_norm": 1.6198419428344395, + "learning_rate": 4.5014893339983993e-07, + "loss": 0.48040711879730225, + "step": 6082 + }, + { + "epoch": 1.402582430251326, + "grad_norm": 1.5279313939865138, + "learning_rate": 4.49830520068333e-07, + "loss": 0.5039708018302917, + "step": 6083 + }, + { + "epoch": 1.4028130043809084, + "grad_norm": 1.4998739241266676, + "learning_rate": 4.495121867095354e-07, + "loss": 0.43496155738830566, + "step": 6084 + }, + { + "epoch": 1.403043578510491, + "grad_norm": 1.3838778339679694, + "learning_rate": 4.4919393336972045e-07, + "loss": 0.4603109061717987, + "step": 6085 + }, + { + "epoch": 1.4032741526400738, + "grad_norm": 1.476085268646584, + "learning_rate": 4.488757600951496e-07, + "loss": 0.4571962356567383, + "step": 6086 + }, + { + "epoch": 1.4035047267696563, + "grad_norm": 1.4791952167701867, + "learning_rate": 4.485576669320729e-07, + "loss": 0.46302443742752075, + "step": 6087 + }, + { + "epoch": 1.403735300899239, + "grad_norm": 1.675302072516594, + "learning_rate": 4.482396539267275e-07, + "loss": 0.39066869020462036, + "step": 6088 + }, + { + "epoch": 1.4039658750288218, + "grad_norm": 1.704176039322231, + "learning_rate": 4.4792172112534076e-07, + "loss": 0.4797130823135376, + "step": 6089 + }, + { + "epoch": 1.4041964491584045, + "grad_norm": 1.5835144658620484, + "learning_rate": 4.4760386857412704e-07, + "loss": 0.4578198492527008, + "step": 6090 + }, + { + "epoch": 1.4044270232879872, + "grad_norm": 1.3987211085891795, + "learning_rate": 4.472860963192889e-07, + "loss": 0.40768736600875854, + "step": 6091 + }, + { + "epoch": 1.4046575974175697, + "grad_norm": 1.4530633567004236, + "learning_rate": 4.4696840440701846e-07, + "loss": 0.4201413094997406, + "step": 6092 + }, + { + "epoch": 1.4048881715471524, + "grad_norm": 1.3648395822246437, + "learning_rate": 4.466507928834951e-07, + "loss": 0.45901796221733093, + "step": 6093 + }, + { + "epoch": 1.4051187456767351, + "grad_norm": 1.6465847208416895, + "learning_rate": 4.463332617948874e-07, + "loss": 0.4699435830116272, + "step": 6094 + }, + { + "epoch": 1.4053493198063176, + "grad_norm": 1.4755445259366653, + "learning_rate": 4.46015811187351e-07, + "loss": 0.4526669383049011, + "step": 6095 + }, + { + "epoch": 1.4055798939359003, + "grad_norm": 1.5721685230021194, + "learning_rate": 4.456984411070313e-07, + "loss": 0.46754884719848633, + "step": 6096 + }, + { + "epoch": 1.405810468065483, + "grad_norm": 2.1874728205075495, + "learning_rate": 4.453811516000604e-07, + "loss": 0.5119268894195557, + "step": 6097 + }, + { + "epoch": 1.4060410421950658, + "grad_norm": 2.056110026644097, + "learning_rate": 4.4506394271256043e-07, + "loss": 0.42980802059173584, + "step": 6098 + }, + { + "epoch": 1.4062716163246485, + "grad_norm": 1.5339161636381375, + "learning_rate": 4.447468144906401e-07, + "loss": 0.5895063281059265, + "step": 6099 + }, + { + "epoch": 1.406502190454231, + "grad_norm": 1.3796241305160553, + "learning_rate": 4.4442976698039803e-07, + "loss": 0.42768803238868713, + "step": 6100 + }, + { + "epoch": 1.4067327645838137, + "grad_norm": 1.608854909074267, + "learning_rate": 4.4411280022791943e-07, + "loss": 0.44234544038772583, + "step": 6101 + }, + { + "epoch": 1.4069633387133964, + "grad_norm": 1.3028889839673445, + "learning_rate": 4.437959142792791e-07, + "loss": 0.4382736086845398, + "step": 6102 + }, + { + "epoch": 1.407193912842979, + "grad_norm": 1.6088674485493302, + "learning_rate": 4.4347910918054e-07, + "loss": 0.47603681683540344, + "step": 6103 + }, + { + "epoch": 1.4074244869725616, + "grad_norm": 1.8816511615485159, + "learning_rate": 4.431623849777522e-07, + "loss": 0.5562035441398621, + "step": 6104 + }, + { + "epoch": 1.4076550611021443, + "grad_norm": 2.2517510056002763, + "learning_rate": 4.4284574171695535e-07, + "loss": 0.4153141677379608, + "step": 6105 + }, + { + "epoch": 1.407885635231727, + "grad_norm": 1.2534764690727898, + "learning_rate": 4.425291794441762e-07, + "loss": 0.4825887680053711, + "step": 6106 + }, + { + "epoch": 1.4081162093613098, + "grad_norm": 1.4829126230878127, + "learning_rate": 4.4221269820543104e-07, + "loss": 0.4853668808937073, + "step": 6107 + }, + { + "epoch": 1.4083467834908923, + "grad_norm": 1.6140810272295893, + "learning_rate": 4.418962980467229e-07, + "loss": 0.5615251064300537, + "step": 6108 + }, + { + "epoch": 1.408577357620475, + "grad_norm": 1.8397680714752904, + "learning_rate": 4.4157997901404396e-07, + "loss": 0.38605546951293945, + "step": 6109 + }, + { + "epoch": 1.4088079317500577, + "grad_norm": 1.412066772348378, + "learning_rate": 4.412637411533745e-07, + "loss": 0.41582173109054565, + "step": 6110 + }, + { + "epoch": 1.4090385058796402, + "grad_norm": 1.4963267141581975, + "learning_rate": 4.4094758451068327e-07, + "loss": 0.38091376423835754, + "step": 6111 + }, + { + "epoch": 1.409269080009223, + "grad_norm": 1.5465721612260863, + "learning_rate": 4.4063150913192635e-07, + "loss": 0.43319058418273926, + "step": 6112 + }, + { + "epoch": 1.4094996541388056, + "grad_norm": 1.2123497825560654, + "learning_rate": 4.403155150630484e-07, + "loss": 0.43207013607025146, + "step": 6113 + }, + { + "epoch": 1.4097302282683883, + "grad_norm": 1.7217391258871346, + "learning_rate": 4.399996023499829e-07, + "loss": 0.43750250339508057, + "step": 6114 + }, + { + "epoch": 1.409960802397971, + "grad_norm": 1.5123653802002535, + "learning_rate": 4.3968377103865016e-07, + "loss": 0.44084444642066956, + "step": 6115 + }, + { + "epoch": 1.4101913765275536, + "grad_norm": 1.4135580211481893, + "learning_rate": 4.3936802117495997e-07, + "loss": 0.4752010405063629, + "step": 6116 + }, + { + "epoch": 1.4104219506571363, + "grad_norm": 1.384945744446678, + "learning_rate": 4.390523528048098e-07, + "loss": 0.39239025115966797, + "step": 6117 + }, + { + "epoch": 1.410652524786719, + "grad_norm": 1.7179287290824201, + "learning_rate": 4.387367659740856e-07, + "loss": 0.46021080017089844, + "step": 6118 + }, + { + "epoch": 1.4108830989163015, + "grad_norm": 1.3751290560349647, + "learning_rate": 4.3842126072866014e-07, + "loss": 0.4079766571521759, + "step": 6119 + }, + { + "epoch": 1.4111136730458842, + "grad_norm": 1.5182170234243058, + "learning_rate": 4.381058371143964e-07, + "loss": 0.4922672510147095, + "step": 6120 + }, + { + "epoch": 1.411344247175467, + "grad_norm": 1.5200373777326295, + "learning_rate": 4.377904951771438e-07, + "loss": 0.3950929045677185, + "step": 6121 + }, + { + "epoch": 1.4115748213050496, + "grad_norm": 1.6189013836504815, + "learning_rate": 4.374752349627402e-07, + "loss": 0.503406286239624, + "step": 6122 + }, + { + "epoch": 1.4118053954346323, + "grad_norm": 1.724327270706253, + "learning_rate": 4.3716005651701215e-07, + "loss": 0.49198317527770996, + "step": 6123 + }, + { + "epoch": 1.4120359695642148, + "grad_norm": 1.424527206510087, + "learning_rate": 4.368449598857742e-07, + "loss": 0.47396305203437805, + "step": 6124 + }, + { + "epoch": 1.4122665436937976, + "grad_norm": 1.7537535213801698, + "learning_rate": 4.365299451148291e-07, + "loss": 0.5248152017593384, + "step": 6125 + }, + { + "epoch": 1.4124971178233803, + "grad_norm": 1.310814657820865, + "learning_rate": 4.362150122499666e-07, + "loss": 0.44327419996261597, + "step": 6126 + }, + { + "epoch": 1.4127276919529628, + "grad_norm": 1.5885906377106098, + "learning_rate": 4.3590016133696626e-07, + "loss": 0.4628877639770508, + "step": 6127 + }, + { + "epoch": 1.4129582660825455, + "grad_norm": 1.5166490469327556, + "learning_rate": 4.355853924215942e-07, + "loss": 0.5277193188667297, + "step": 6128 + }, + { + "epoch": 1.4131888402121282, + "grad_norm": 1.6202759290555122, + "learning_rate": 4.3527070554960577e-07, + "loss": 0.4675426781177521, + "step": 6129 + }, + { + "epoch": 1.413419414341711, + "grad_norm": 1.668904355836008, + "learning_rate": 4.349561007667433e-07, + "loss": 0.3762160539627075, + "step": 6130 + }, + { + "epoch": 1.4136499884712936, + "grad_norm": 1.5686457690092273, + "learning_rate": 4.346415781187385e-07, + "loss": 0.4797256588935852, + "step": 6131 + }, + { + "epoch": 1.4138805626008761, + "grad_norm": 1.283129438483415, + "learning_rate": 4.3432713765130967e-07, + "loss": 0.4348931312561035, + "step": 6132 + }, + { + "epoch": 1.4141111367304589, + "grad_norm": 1.72495987311985, + "learning_rate": 4.3401277941016435e-07, + "loss": 0.5080585479736328, + "step": 6133 + }, + { + "epoch": 1.4143417108600416, + "grad_norm": 1.5083246190317607, + "learning_rate": 4.33698503440998e-07, + "loss": 0.40223604440689087, + "step": 6134 + }, + { + "epoch": 1.414572284989624, + "grad_norm": 1.5888336584861464, + "learning_rate": 4.3338430978949315e-07, + "loss": 0.4460202753543854, + "step": 6135 + }, + { + "epoch": 1.4148028591192068, + "grad_norm": 1.6992292342961226, + "learning_rate": 4.3307019850132167e-07, + "loss": 0.5814889669418335, + "step": 6136 + }, + { + "epoch": 1.4150334332487895, + "grad_norm": 1.366462724450419, + "learning_rate": 4.3275616962214214e-07, + "loss": 0.39237886667251587, + "step": 6137 + }, + { + "epoch": 1.4152640073783722, + "grad_norm": 1.8844588932900945, + "learning_rate": 4.324422231976025e-07, + "loss": 0.4621772766113281, + "step": 6138 + }, + { + "epoch": 1.415494581507955, + "grad_norm": 1.2090393738968102, + "learning_rate": 4.3212835927333745e-07, + "loss": 0.3722139596939087, + "step": 6139 + }, + { + "epoch": 1.4157251556375374, + "grad_norm": 1.4849768206374545, + "learning_rate": 4.3181457789497055e-07, + "loss": 0.5007534623146057, + "step": 6140 + }, + { + "epoch": 1.4159557297671201, + "grad_norm": 1.603501037396303, + "learning_rate": 4.315008791081135e-07, + "loss": 0.470672607421875, + "step": 6141 + }, + { + "epoch": 1.4161863038967029, + "grad_norm": 1.6882048347200689, + "learning_rate": 4.3118726295836495e-07, + "loss": 0.5196114778518677, + "step": 6142 + }, + { + "epoch": 1.4164168780262854, + "grad_norm": 1.686399785386393, + "learning_rate": 4.3087372949131275e-07, + "loss": 0.4606804847717285, + "step": 6143 + }, + { + "epoch": 1.416647452155868, + "grad_norm": 1.2427386262927842, + "learning_rate": 4.3056027875253156e-07, + "loss": 0.3926661014556885, + "step": 6144 + }, + { + "epoch": 1.4168780262854508, + "grad_norm": 1.5075319697699416, + "learning_rate": 4.3024691078758536e-07, + "loss": 0.4570828080177307, + "step": 6145 + }, + { + "epoch": 1.4171086004150335, + "grad_norm": 1.4876286685500335, + "learning_rate": 4.299336256420245e-07, + "loss": 0.398615300655365, + "step": 6146 + }, + { + "epoch": 1.4173391745446162, + "grad_norm": 1.5413174329970663, + "learning_rate": 4.2962042336138873e-07, + "loss": 0.47571802139282227, + "step": 6147 + }, + { + "epoch": 1.4175697486741987, + "grad_norm": 1.5960399575320494, + "learning_rate": 4.2930730399120487e-07, + "loss": 0.4266431927680969, + "step": 6148 + }, + { + "epoch": 1.4178003228037814, + "grad_norm": 1.5511638894349447, + "learning_rate": 4.289942675769886e-07, + "loss": 0.47870057821273804, + "step": 6149 + }, + { + "epoch": 1.4180308969333641, + "grad_norm": 1.3514029969532406, + "learning_rate": 4.2868131416424223e-07, + "loss": 0.3947669267654419, + "step": 6150 + }, + { + "epoch": 1.4182614710629466, + "grad_norm": 1.6045441623823578, + "learning_rate": 4.283684437984573e-07, + "loss": 0.49074164032936096, + "step": 6151 + }, + { + "epoch": 1.4184920451925294, + "grad_norm": 1.5267380397937564, + "learning_rate": 4.280556565251123e-07, + "loss": 0.5540445446968079, + "step": 6152 + }, + { + "epoch": 1.418722619322112, + "grad_norm": 1.4292058799019856, + "learning_rate": 4.2774295238967386e-07, + "loss": 0.4898286461830139, + "step": 6153 + }, + { + "epoch": 1.4189531934516948, + "grad_norm": 1.5872207462828773, + "learning_rate": 4.2743033143759733e-07, + "loss": 0.5432708859443665, + "step": 6154 + }, + { + "epoch": 1.4191837675812775, + "grad_norm": 1.811563729099354, + "learning_rate": 4.2711779371432445e-07, + "loss": 0.4438853859901428, + "step": 6155 + }, + { + "epoch": 1.41941434171086, + "grad_norm": 1.4197202159023756, + "learning_rate": 4.268053392652863e-07, + "loss": 0.4885905385017395, + "step": 6156 + }, + { + "epoch": 1.4196449158404427, + "grad_norm": 2.10234923243058, + "learning_rate": 4.264929681359013e-07, + "loss": 0.4465547204017639, + "step": 6157 + }, + { + "epoch": 1.4198754899700254, + "grad_norm": 1.5987256760741122, + "learning_rate": 4.2618068037157594e-07, + "loss": 0.4392780661582947, + "step": 6158 + }, + { + "epoch": 1.420106064099608, + "grad_norm": 1.7421664904589054, + "learning_rate": 4.258684760177039e-07, + "loss": 0.4501269459724426, + "step": 6159 + }, + { + "epoch": 1.4203366382291907, + "grad_norm": 1.399976858224263, + "learning_rate": 4.2555635511966783e-07, + "loss": 0.38439738750457764, + "step": 6160 + }, + { + "epoch": 1.4205672123587734, + "grad_norm": 1.4211214514262747, + "learning_rate": 4.2524431772283743e-07, + "loss": 0.4679202437400818, + "step": 6161 + }, + { + "epoch": 1.420797786488356, + "grad_norm": 1.3094843029172225, + "learning_rate": 4.2493236387257e-07, + "loss": 0.33505773544311523, + "step": 6162 + }, + { + "epoch": 1.4210283606179388, + "grad_norm": 1.7083049967506945, + "learning_rate": 4.246204936142116e-07, + "loss": 0.39141514897346497, + "step": 6163 + }, + { + "epoch": 1.4212589347475213, + "grad_norm": 1.5786326298364493, + "learning_rate": 4.243087069930958e-07, + "loss": 0.49278295040130615, + "step": 6164 + }, + { + "epoch": 1.421489508877104, + "grad_norm": 2.2314439595882214, + "learning_rate": 4.239970040545442e-07, + "loss": 0.44093143939971924, + "step": 6165 + }, + { + "epoch": 1.4217200830066867, + "grad_norm": 1.5138193694081605, + "learning_rate": 4.236853848438654e-07, + "loss": 0.3840683102607727, + "step": 6166 + }, + { + "epoch": 1.4219506571362692, + "grad_norm": 1.7654139979291832, + "learning_rate": 4.23373849406357e-07, + "loss": 0.49814748764038086, + "step": 6167 + }, + { + "epoch": 1.422181231265852, + "grad_norm": 1.672205831624779, + "learning_rate": 4.2306239778730314e-07, + "loss": 0.37481504678726196, + "step": 6168 + }, + { + "epoch": 1.4224118053954347, + "grad_norm": 1.6089555356775624, + "learning_rate": 4.227510300319772e-07, + "loss": 0.3936859965324402, + "step": 6169 + }, + { + "epoch": 1.4226423795250174, + "grad_norm": 1.6958111197730896, + "learning_rate": 4.224397461856389e-07, + "loss": 0.4448816478252411, + "step": 6170 + }, + { + "epoch": 1.4228729536546, + "grad_norm": 1.7506080980818486, + "learning_rate": 4.22128546293537e-07, + "loss": 0.5494886040687561, + "step": 6171 + }, + { + "epoch": 1.4231035277841826, + "grad_norm": 1.6093955633210433, + "learning_rate": 4.218174304009078e-07, + "loss": 0.4532161355018616, + "step": 6172 + }, + { + "epoch": 1.4233341019137653, + "grad_norm": 1.5423276922709723, + "learning_rate": 4.215063985529743e-07, + "loss": 0.4771450161933899, + "step": 6173 + }, + { + "epoch": 1.4235646760433478, + "grad_norm": 1.4359456178719159, + "learning_rate": 4.211954507949491e-07, + "loss": 0.40784329175949097, + "step": 6174 + }, + { + "epoch": 1.4237952501729305, + "grad_norm": 1.6548161498628766, + "learning_rate": 4.208845871720308e-07, + "loss": 0.5336268544197083, + "step": 6175 + }, + { + "epoch": 1.4240258243025132, + "grad_norm": 1.495644640745375, + "learning_rate": 4.205738077294072e-07, + "loss": 0.44641751050949097, + "step": 6176 + }, + { + "epoch": 1.424256398432096, + "grad_norm": 1.650188328042211, + "learning_rate": 4.2026311251225264e-07, + "loss": 0.4370793104171753, + "step": 6177 + }, + { + "epoch": 1.4244869725616787, + "grad_norm": 1.5423618719597711, + "learning_rate": 4.1995250156573046e-07, + "loss": 0.4290730953216553, + "step": 6178 + }, + { + "epoch": 1.4247175466912612, + "grad_norm": 1.8757556733756044, + "learning_rate": 4.196419749349904e-07, + "loss": 0.5021491646766663, + "step": 6179 + }, + { + "epoch": 1.4249481208208439, + "grad_norm": 1.4243786827618563, + "learning_rate": 4.193315326651711e-07, + "loss": 0.3880186080932617, + "step": 6180 + }, + { + "epoch": 1.4251786949504266, + "grad_norm": 1.6032235222838507, + "learning_rate": 4.1902117480139876e-07, + "loss": 0.46498721837997437, + "step": 6181 + }, + { + "epoch": 1.425409269080009, + "grad_norm": 1.6074916356613946, + "learning_rate": 4.187109013887863e-07, + "loss": 0.45799821615219116, + "step": 6182 + }, + { + "epoch": 1.4256398432095918, + "grad_norm": 1.7936327965955485, + "learning_rate": 4.1840071247243594e-07, + "loss": 0.47459733486175537, + "step": 6183 + }, + { + "epoch": 1.4258704173391745, + "grad_norm": 1.7628830057109544, + "learning_rate": 4.18090608097436e-07, + "loss": 0.47636276483535767, + "step": 6184 + }, + { + "epoch": 1.4261009914687572, + "grad_norm": 1.4575388433663756, + "learning_rate": 4.17780588308864e-07, + "loss": 0.4710165858268738, + "step": 6185 + }, + { + "epoch": 1.42633156559834, + "grad_norm": 1.6068491390352067, + "learning_rate": 4.174706531517836e-07, + "loss": 0.4222904443740845, + "step": 6186 + }, + { + "epoch": 1.4265621397279225, + "grad_norm": 1.6136307494472921, + "learning_rate": 4.171608026712476e-07, + "loss": 0.43496620655059814, + "step": 6187 + }, + { + "epoch": 1.4267927138575052, + "grad_norm": 1.6637888441260775, + "learning_rate": 4.1685103691229597e-07, + "loss": 0.5178344249725342, + "step": 6188 + }, + { + "epoch": 1.4270232879870879, + "grad_norm": 1.2438461713878222, + "learning_rate": 4.1654135591995644e-07, + "loss": 0.4033231735229492, + "step": 6189 + }, + { + "epoch": 1.4272538621166704, + "grad_norm": 1.6711330724791171, + "learning_rate": 4.162317597392436e-07, + "loss": 0.3368793725967407, + "step": 6190 + }, + { + "epoch": 1.427484436246253, + "grad_norm": 1.6185157962363963, + "learning_rate": 4.159222484151612e-07, + "loss": 0.44133609533309937, + "step": 6191 + }, + { + "epoch": 1.4277150103758358, + "grad_norm": 1.4778493402771002, + "learning_rate": 4.1561282199269944e-07, + "loss": 0.431888222694397, + "step": 6192 + }, + { + "epoch": 1.4279455845054185, + "grad_norm": 1.6042487363335018, + "learning_rate": 4.1530348051683615e-07, + "loss": 0.4319697618484497, + "step": 6193 + }, + { + "epoch": 1.4281761586350012, + "grad_norm": 2.1012743912812986, + "learning_rate": 4.1499422403253783e-07, + "loss": 0.5468018054962158, + "step": 6194 + }, + { + "epoch": 1.4284067327645837, + "grad_norm": 1.5851271799276925, + "learning_rate": 4.1468505258475784e-07, + "loss": 0.5083246231079102, + "step": 6195 + }, + { + "epoch": 1.4286373068941665, + "grad_norm": 1.5639019523203612, + "learning_rate": 4.1437596621843774e-07, + "loss": 0.3767821788787842, + "step": 6196 + }, + { + "epoch": 1.4288678810237492, + "grad_norm": 1.7459586887034657, + "learning_rate": 4.140669649785058e-07, + "loss": 0.5210238099098206, + "step": 6197 + }, + { + "epoch": 1.4290984551533317, + "grad_norm": 1.7429606479800976, + "learning_rate": 4.1375804890987907e-07, + "loss": 0.4498119354248047, + "step": 6198 + }, + { + "epoch": 1.4293290292829144, + "grad_norm": 1.8267093368864302, + "learning_rate": 4.134492180574609e-07, + "loss": 0.5093557238578796, + "step": 6199 + }, + { + "epoch": 1.429559603412497, + "grad_norm": 1.422406352052411, + "learning_rate": 4.131404724661438e-07, + "loss": 0.4745742082595825, + "step": 6200 + }, + { + "epoch": 1.4297901775420798, + "grad_norm": 1.506088588333767, + "learning_rate": 4.128318121808068e-07, + "loss": 0.45697301626205444, + "step": 6201 + }, + { + "epoch": 1.4300207516716625, + "grad_norm": 1.7309660786915744, + "learning_rate": 4.125232372463161e-07, + "loss": 0.4690994918346405, + "step": 6202 + }, + { + "epoch": 1.430251325801245, + "grad_norm": 1.6241026421208185, + "learning_rate": 4.1221474770752696e-07, + "loss": 0.49369046092033386, + "step": 6203 + }, + { + "epoch": 1.4304818999308277, + "grad_norm": 1.573925179309737, + "learning_rate": 4.1190634360928113e-07, + "loss": 0.5137126445770264, + "step": 6204 + }, + { + "epoch": 1.4307124740604105, + "grad_norm": 1.492371449937338, + "learning_rate": 4.1159802499640883e-07, + "loss": 0.43663549423217773, + "step": 6205 + }, + { + "epoch": 1.430943048189993, + "grad_norm": 1.373244593865611, + "learning_rate": 4.112897919137265e-07, + "loss": 0.40197718143463135, + "step": 6206 + }, + { + "epoch": 1.4311736223195757, + "grad_norm": 1.782636444844866, + "learning_rate": 4.1098164440603967e-07, + "loss": 0.5537480115890503, + "step": 6207 + }, + { + "epoch": 1.4314041964491584, + "grad_norm": 1.415124349915093, + "learning_rate": 4.1067358251814e-07, + "loss": 0.36077365279197693, + "step": 6208 + }, + { + "epoch": 1.4316347705787411, + "grad_norm": 1.8848844116732066, + "learning_rate": 4.103656062948081e-07, + "loss": 0.5421038866043091, + "step": 6209 + }, + { + "epoch": 1.4318653447083238, + "grad_norm": 1.5989095555214856, + "learning_rate": 4.100577157808107e-07, + "loss": 0.4330317974090576, + "step": 6210 + }, + { + "epoch": 1.4320959188379063, + "grad_norm": 1.5778977933757077, + "learning_rate": 4.0974991102090315e-07, + "loss": 0.4734618067741394, + "step": 6211 + }, + { + "epoch": 1.432326492967489, + "grad_norm": 1.7307541730622933, + "learning_rate": 4.0944219205982853e-07, + "loss": 0.4664125442504883, + "step": 6212 + }, + { + "epoch": 1.4325570670970718, + "grad_norm": 1.5163510968488794, + "learning_rate": 4.09134558942316e-07, + "loss": 0.5214053988456726, + "step": 6213 + }, + { + "epoch": 1.4327876412266543, + "grad_norm": 1.4446024999002893, + "learning_rate": 4.08827011713084e-07, + "loss": 0.4694370627403259, + "step": 6214 + }, + { + "epoch": 1.433018215356237, + "grad_norm": 1.4399092047479434, + "learning_rate": 4.0851955041683674e-07, + "loss": 0.46517378091812134, + "step": 6215 + }, + { + "epoch": 1.4332487894858197, + "grad_norm": 1.589744461016997, + "learning_rate": 4.0821217509826766e-07, + "loss": 0.49152523279190063, + "step": 6216 + }, + { + "epoch": 1.4334793636154024, + "grad_norm": 1.3335404796705832, + "learning_rate": 4.0790488580205616e-07, + "loss": 0.4272884726524353, + "step": 6217 + }, + { + "epoch": 1.4337099377449851, + "grad_norm": 1.7167989658225775, + "learning_rate": 4.075976825728703e-07, + "loss": 0.4585829973220825, + "step": 6218 + }, + { + "epoch": 1.4339405118745676, + "grad_norm": 1.4284884424474726, + "learning_rate": 4.07290565455365e-07, + "loss": 0.33463186025619507, + "step": 6219 + }, + { + "epoch": 1.4341710860041503, + "grad_norm": 1.618873724040505, + "learning_rate": 4.0698353449418344e-07, + "loss": 0.4228953719139099, + "step": 6220 + }, + { + "epoch": 1.434401660133733, + "grad_norm": 1.688194150248175, + "learning_rate": 4.066765897339547e-07, + "loss": 0.5336583256721497, + "step": 6221 + }, + { + "epoch": 1.4346322342633155, + "grad_norm": 1.590308662997971, + "learning_rate": 4.063697312192972e-07, + "loss": 0.4779771864414215, + "step": 6222 + }, + { + "epoch": 1.4348628083928983, + "grad_norm": 1.4786534556099964, + "learning_rate": 4.060629589948155e-07, + "loss": 0.35226666927337646, + "step": 6223 + }, + { + "epoch": 1.435093382522481, + "grad_norm": 1.7110004239307235, + "learning_rate": 4.0575627310510174e-07, + "loss": 0.5006309747695923, + "step": 6224 + }, + { + "epoch": 1.4353239566520637, + "grad_norm": 1.5102552970375984, + "learning_rate": 4.0544967359473645e-07, + "loss": 0.3925382196903229, + "step": 6225 + }, + { + "epoch": 1.4355545307816464, + "grad_norm": 1.4323897305301354, + "learning_rate": 4.0514316050828643e-07, + "loss": 0.3443659543991089, + "step": 6226 + }, + { + "epoch": 1.435785104911229, + "grad_norm": 1.3832333833383677, + "learning_rate": 4.048367338903067e-07, + "loss": 0.35585030913352966, + "step": 6227 + }, + { + "epoch": 1.4360156790408116, + "grad_norm": 1.551815991519559, + "learning_rate": 4.045303937853395e-07, + "loss": 0.4147206246852875, + "step": 6228 + }, + { + "epoch": 1.4362462531703943, + "grad_norm": 1.2817256800052734, + "learning_rate": 4.0422414023791486e-07, + "loss": 0.4475427567958832, + "step": 6229 + }, + { + "epoch": 1.4364768272999768, + "grad_norm": 1.3842198366935599, + "learning_rate": 4.0391797329254897e-07, + "loss": 0.5235386490821838, + "step": 6230 + }, + { + "epoch": 1.4367074014295595, + "grad_norm": 1.4929978689012695, + "learning_rate": 4.036118929937472e-07, + "loss": 0.3543087840080261, + "step": 6231 + }, + { + "epoch": 1.4369379755591423, + "grad_norm": 1.793735853632873, + "learning_rate": 4.03305899386001e-07, + "loss": 0.4718255400657654, + "step": 6232 + }, + { + "epoch": 1.437168549688725, + "grad_norm": 1.338180352532036, + "learning_rate": 4.0299999251378924e-07, + "loss": 0.41239792108535767, + "step": 6233 + }, + { + "epoch": 1.4373991238183077, + "grad_norm": 1.5900128771725797, + "learning_rate": 4.026941724215791e-07, + "loss": 0.4241238236427307, + "step": 6234 + }, + { + "epoch": 1.4376296979478902, + "grad_norm": 1.4625134538700348, + "learning_rate": 4.0238843915382435e-07, + "loss": 0.43678992986679077, + "step": 6235 + }, + { + "epoch": 1.437860272077473, + "grad_norm": 1.3845075397304552, + "learning_rate": 4.0208279275496706e-07, + "loss": 0.4304202198982239, + "step": 6236 + }, + { + "epoch": 1.4380908462070556, + "grad_norm": 1.4379971371115365, + "learning_rate": 4.0177723326943516e-07, + "loss": 0.4297143816947937, + "step": 6237 + }, + { + "epoch": 1.4383214203366381, + "grad_norm": 1.4713452003345164, + "learning_rate": 4.0147176074164557e-07, + "loss": 0.4823951721191406, + "step": 6238 + }, + { + "epoch": 1.4385519944662208, + "grad_norm": 1.4766475893290447, + "learning_rate": 4.0116637521600104e-07, + "loss": 0.41384291648864746, + "step": 6239 + }, + { + "epoch": 1.4387825685958036, + "grad_norm": 1.4772189735738515, + "learning_rate": 4.008610767368933e-07, + "loss": 0.5725995898246765, + "step": 6240 + }, + { + "epoch": 1.4390131427253863, + "grad_norm": 1.580155865045121, + "learning_rate": 4.0055586534869976e-07, + "loss": 0.5222553014755249, + "step": 6241 + }, + { + "epoch": 1.439243716854969, + "grad_norm": 1.3886146191032183, + "learning_rate": 4.002507410957864e-07, + "loss": 0.33871912956237793, + "step": 6242 + }, + { + "epoch": 1.4394742909845515, + "grad_norm": 1.6215524550661136, + "learning_rate": 3.9994570402250647e-07, + "loss": 0.423028826713562, + "step": 6243 + }, + { + "epoch": 1.4397048651141342, + "grad_norm": 1.5682836985778081, + "learning_rate": 3.996407541731994e-07, + "loss": 0.4235682785511017, + "step": 6244 + }, + { + "epoch": 1.439935439243717, + "grad_norm": 1.231022526448631, + "learning_rate": 3.993358915921936e-07, + "loss": 0.43758147954940796, + "step": 6245 + }, + { + "epoch": 1.4401660133732994, + "grad_norm": 1.4111669631590298, + "learning_rate": 3.9903111632380314e-07, + "loss": 0.4462485611438751, + "step": 6246 + }, + { + "epoch": 1.4403965875028821, + "grad_norm": 1.4290246546090093, + "learning_rate": 3.9872642841233086e-07, + "loss": 0.4650310277938843, + "step": 6247 + }, + { + "epoch": 1.4406271616324648, + "grad_norm": 1.4998946903017614, + "learning_rate": 3.984218279020656e-07, + "loss": 0.36653342843055725, + "step": 6248 + }, + { + "epoch": 1.4408577357620476, + "grad_norm": 1.4936296304301175, + "learning_rate": 3.9811731483728483e-07, + "loss": 0.4102433919906616, + "step": 6249 + }, + { + "epoch": 1.4410883098916303, + "grad_norm": 1.6065631349936378, + "learning_rate": 3.9781288926225187e-07, + "loss": 0.46611371636390686, + "step": 6250 + }, + { + "epoch": 1.4413188840212128, + "grad_norm": 1.4339333577964222, + "learning_rate": 3.9750855122121854e-07, + "loss": 0.39757978916168213, + "step": 6251 + }, + { + "epoch": 1.4415494581507955, + "grad_norm": 1.762654016187883, + "learning_rate": 3.972043007584236e-07, + "loss": 0.3736093044281006, + "step": 6252 + }, + { + "epoch": 1.4417800322803782, + "grad_norm": 1.463877920104907, + "learning_rate": 3.9690013791809243e-07, + "loss": 0.4907599091529846, + "step": 6253 + }, + { + "epoch": 1.4420106064099607, + "grad_norm": 1.8306810417206691, + "learning_rate": 3.965960627444387e-07, + "loss": 0.4852679967880249, + "step": 6254 + }, + { + "epoch": 1.4422411805395434, + "grad_norm": 1.379992571943406, + "learning_rate": 3.962920752816622e-07, + "loss": 0.3681846261024475, + "step": 6255 + }, + { + "epoch": 1.4424717546691261, + "grad_norm": 1.3930271555712797, + "learning_rate": 3.9598817557395136e-07, + "loss": 0.36029407382011414, + "step": 6256 + }, + { + "epoch": 1.4427023287987089, + "grad_norm": 1.5468752557100751, + "learning_rate": 3.9568436366548044e-07, + "loss": 0.4156547486782074, + "step": 6257 + }, + { + "epoch": 1.4429329029282916, + "grad_norm": 1.2893479866141693, + "learning_rate": 3.9538063960041155e-07, + "loss": 0.417999804019928, + "step": 6258 + }, + { + "epoch": 1.443163477057874, + "grad_norm": 1.5873772931626444, + "learning_rate": 3.9507700342289454e-07, + "loss": 0.34347790479660034, + "step": 6259 + }, + { + "epoch": 1.4433940511874568, + "grad_norm": 1.6747174695424258, + "learning_rate": 3.9477345517706606e-07, + "loss": 0.5093958973884583, + "step": 6260 + }, + { + "epoch": 1.4436246253170395, + "grad_norm": 1.3786087360846342, + "learning_rate": 3.9446999490704935e-07, + "loss": 0.45406264066696167, + "step": 6261 + }, + { + "epoch": 1.443855199446622, + "grad_norm": 1.4643807349818905, + "learning_rate": 3.941666226569561e-07, + "loss": 0.35074740648269653, + "step": 6262 + }, + { + "epoch": 1.4440857735762047, + "grad_norm": 1.9209061652207753, + "learning_rate": 3.9386333847088414e-07, + "loss": 0.4588093161582947, + "step": 6263 + }, + { + "epoch": 1.4443163477057874, + "grad_norm": 1.706957598822881, + "learning_rate": 3.935601423929187e-07, + "loss": 0.5431508421897888, + "step": 6264 + }, + { + "epoch": 1.4445469218353701, + "grad_norm": 2.1293944579193744, + "learning_rate": 3.9325703446713253e-07, + "loss": 0.5942284464836121, + "step": 6265 + }, + { + "epoch": 1.4447774959649529, + "grad_norm": 1.563688512589723, + "learning_rate": 3.929540147375856e-07, + "loss": 0.45533287525177, + "step": 6266 + }, + { + "epoch": 1.4450080700945354, + "grad_norm": 1.4069649860322977, + "learning_rate": 3.926510832483252e-07, + "loss": 0.41154634952545166, + "step": 6267 + }, + { + "epoch": 1.445238644224118, + "grad_norm": 1.7442081379649044, + "learning_rate": 3.923482400433847e-07, + "loss": 0.548882246017456, + "step": 6268 + }, + { + "epoch": 1.4454692183537008, + "grad_norm": 1.6064445647457797, + "learning_rate": 3.9204548516678635e-07, + "loss": 0.4062466621398926, + "step": 6269 + }, + { + "epoch": 1.4456997924832833, + "grad_norm": 1.4970160030578672, + "learning_rate": 3.917428186625378e-07, + "loss": 0.39035165309906006, + "step": 6270 + }, + { + "epoch": 1.445930366612866, + "grad_norm": 1.647666751716306, + "learning_rate": 3.9144024057463545e-07, + "loss": 0.44899889826774597, + "step": 6271 + }, + { + "epoch": 1.4461609407424487, + "grad_norm": 1.6865824844286113, + "learning_rate": 3.911377509470616e-07, + "loss": 0.5676968097686768, + "step": 6272 + }, + { + "epoch": 1.4463915148720314, + "grad_norm": 1.5001442753287921, + "learning_rate": 3.9083534982378596e-07, + "loss": 0.5157150626182556, + "step": 6273 + }, + { + "epoch": 1.4466220890016142, + "grad_norm": 1.3999116109701921, + "learning_rate": 3.9053303724876595e-07, + "loss": 0.4405839443206787, + "step": 6274 + }, + { + "epoch": 1.4468526631311966, + "grad_norm": 1.4027072316284976, + "learning_rate": 3.9023081326594564e-07, + "loss": 0.4184240400791168, + "step": 6275 + }, + { + "epoch": 1.4470832372607794, + "grad_norm": 1.4676581347164595, + "learning_rate": 3.8992867791925687e-07, + "loss": 0.46825113892555237, + "step": 6276 + }, + { + "epoch": 1.447313811390362, + "grad_norm": 1.5974669468558875, + "learning_rate": 3.896266312526174e-07, + "loss": 0.39870697259902954, + "step": 6277 + }, + { + "epoch": 1.4475443855199446, + "grad_norm": 1.5056097224989398, + "learning_rate": 3.893246733099332e-07, + "loss": 0.5021681785583496, + "step": 6278 + }, + { + "epoch": 1.4477749596495273, + "grad_norm": 1.6448123845050522, + "learning_rate": 3.890228041350966e-07, + "loss": 0.5453378558158875, + "step": 6279 + }, + { + "epoch": 1.44800553377911, + "grad_norm": 1.6411917622938994, + "learning_rate": 3.887210237719877e-07, + "loss": 0.4488704800605774, + "step": 6280 + }, + { + "epoch": 1.4482361079086927, + "grad_norm": 1.5018657352386517, + "learning_rate": 3.8841933226447274e-07, + "loss": 0.45669007301330566, + "step": 6281 + }, + { + "epoch": 1.4484666820382754, + "grad_norm": 1.704954137797073, + "learning_rate": 3.881177296564061e-07, + "loss": 0.43954944610595703, + "step": 6282 + }, + { + "epoch": 1.448697256167858, + "grad_norm": 1.3077525799414271, + "learning_rate": 3.8781621599162896e-07, + "loss": 0.39490729570388794, + "step": 6283 + }, + { + "epoch": 1.4489278302974407, + "grad_norm": 1.8875404119821422, + "learning_rate": 3.875147913139688e-07, + "loss": 0.44206392765045166, + "step": 6284 + }, + { + "epoch": 1.4491584044270232, + "grad_norm": 1.5003627073617865, + "learning_rate": 3.872134556672415e-07, + "loss": 0.3874932527542114, + "step": 6285 + }, + { + "epoch": 1.4493889785566059, + "grad_norm": 1.616983828039009, + "learning_rate": 3.8691220909524847e-07, + "loss": 0.4762042760848999, + "step": 6286 + }, + { + "epoch": 1.4496195526861886, + "grad_norm": 1.4983771405139852, + "learning_rate": 3.8661105164177955e-07, + "loss": 0.45220378041267395, + "step": 6287 + }, + { + "epoch": 1.4498501268157713, + "grad_norm": 1.5182044259213916, + "learning_rate": 3.863099833506105e-07, + "loss": 0.48711973428726196, + "step": 6288 + }, + { + "epoch": 1.450080700945354, + "grad_norm": 1.795485740865634, + "learning_rate": 3.8600900426550495e-07, + "loss": 0.3985457420349121, + "step": 6289 + }, + { + "epoch": 1.4503112750749365, + "grad_norm": 1.8111920220274738, + "learning_rate": 3.8570811443021324e-07, + "loss": 0.4626576006412506, + "step": 6290 + }, + { + "epoch": 1.4505418492045192, + "grad_norm": 1.3056530217454654, + "learning_rate": 3.8540731388847303e-07, + "loss": 0.49909156560897827, + "step": 6291 + }, + { + "epoch": 1.450772423334102, + "grad_norm": 1.6088418800938844, + "learning_rate": 3.8510660268400853e-07, + "loss": 0.47779160737991333, + "step": 6292 + }, + { + "epoch": 1.4510029974636844, + "grad_norm": 1.7546373602134575, + "learning_rate": 3.8480598086053073e-07, + "loss": 0.41273951530456543, + "step": 6293 + }, + { + "epoch": 1.4512335715932672, + "grad_norm": 1.372334717947673, + "learning_rate": 3.8450544846173873e-07, + "loss": 0.49659836292266846, + "step": 6294 + }, + { + "epoch": 1.4514641457228499, + "grad_norm": 1.5745738888755318, + "learning_rate": 3.842050055313174e-07, + "loss": 0.48864418268203735, + "step": 6295 + }, + { + "epoch": 1.4516947198524326, + "grad_norm": 1.5511685453466029, + "learning_rate": 3.8390465211293964e-07, + "loss": 0.4437263011932373, + "step": 6296 + }, + { + "epoch": 1.4519252939820153, + "grad_norm": 1.425822828962689, + "learning_rate": 3.83604388250264e-07, + "loss": 0.4785847067832947, + "step": 6297 + }, + { + "epoch": 1.4521558681115978, + "grad_norm": 1.4667204310824673, + "learning_rate": 3.8330421398693815e-07, + "loss": 0.4376726746559143, + "step": 6298 + }, + { + "epoch": 1.4523864422411805, + "grad_norm": 1.3570227959381094, + "learning_rate": 3.8300412936659456e-07, + "loss": 0.39121049642562866, + "step": 6299 + }, + { + "epoch": 1.4526170163707632, + "grad_norm": 1.3658035995507571, + "learning_rate": 3.827041344328541e-07, + "loss": 0.4635738730430603, + "step": 6300 + }, + { + "epoch": 1.4528475905003457, + "grad_norm": 2.0304852722065068, + "learning_rate": 3.8240422922932345e-07, + "loss": 0.502306342124939, + "step": 6301 + }, + { + "epoch": 1.4530781646299284, + "grad_norm": 1.4029845821737765, + "learning_rate": 3.8210441379959765e-07, + "loss": 0.4401247799396515, + "step": 6302 + }, + { + "epoch": 1.4533087387595112, + "grad_norm": 1.3861824238158087, + "learning_rate": 3.8180468818725744e-07, + "loss": 0.5291532874107361, + "step": 6303 + }, + { + "epoch": 1.4535393128890939, + "grad_norm": 1.6276608547131342, + "learning_rate": 3.8150505243587074e-07, + "loss": 0.44658181071281433, + "step": 6304 + }, + { + "epoch": 1.4537698870186766, + "grad_norm": 1.6458326531407963, + "learning_rate": 3.8120550658899284e-07, + "loss": 0.45127803087234497, + "step": 6305 + }, + { + "epoch": 1.454000461148259, + "grad_norm": 1.492007208083286, + "learning_rate": 3.809060506901659e-07, + "loss": 0.42187097668647766, + "step": 6306 + }, + { + "epoch": 1.4542310352778418, + "grad_norm": 1.5038936507089915, + "learning_rate": 3.806066847829191e-07, + "loss": 0.3573130667209625, + "step": 6307 + }, + { + "epoch": 1.4544616094074245, + "grad_norm": 1.9148379623538745, + "learning_rate": 3.8030740891076775e-07, + "loss": 0.4350733757019043, + "step": 6308 + }, + { + "epoch": 1.454692183537007, + "grad_norm": 1.541900067739278, + "learning_rate": 3.8000822311721526e-07, + "loss": 0.48514148592948914, + "step": 6309 + }, + { + "epoch": 1.4549227576665897, + "grad_norm": 1.4827947959124368, + "learning_rate": 3.797091274457507e-07, + "loss": 0.41036373376846313, + "step": 6310 + }, + { + "epoch": 1.4551533317961725, + "grad_norm": 1.494922453363639, + "learning_rate": 3.7941012193985113e-07, + "loss": 0.4141424298286438, + "step": 6311 + }, + { + "epoch": 1.4553839059257552, + "grad_norm": 1.273366480801725, + "learning_rate": 3.7911120664297947e-07, + "loss": 0.4465962052345276, + "step": 6312 + }, + { + "epoch": 1.455614480055338, + "grad_norm": 1.5781844793110138, + "learning_rate": 3.7881238159858653e-07, + "loss": 0.42370718717575073, + "step": 6313 + }, + { + "epoch": 1.4558450541849204, + "grad_norm": 1.5971127849956464, + "learning_rate": 3.785136468501098e-07, + "loss": 0.5199419260025024, + "step": 6314 + }, + { + "epoch": 1.456075628314503, + "grad_norm": 1.617344004292436, + "learning_rate": 3.782150024409727e-07, + "loss": 0.4802842140197754, + "step": 6315 + }, + { + "epoch": 1.4563062024440858, + "grad_norm": 1.24431475405318, + "learning_rate": 3.77916448414587e-07, + "loss": 0.4640405476093292, + "step": 6316 + }, + { + "epoch": 1.4565367765736683, + "grad_norm": 1.4636172678889559, + "learning_rate": 3.776179848143497e-07, + "loss": 0.4338728189468384, + "step": 6317 + }, + { + "epoch": 1.456767350703251, + "grad_norm": 2.139264242241595, + "learning_rate": 3.7731961168364644e-07, + "loss": 0.42709267139434814, + "step": 6318 + }, + { + "epoch": 1.4569979248328337, + "grad_norm": 1.6617712318798017, + "learning_rate": 3.7702132906584784e-07, + "loss": 0.4985729455947876, + "step": 6319 + }, + { + "epoch": 1.4572284989624165, + "grad_norm": 1.441274937368423, + "learning_rate": 3.7672313700431277e-07, + "loss": 0.46335911750793457, + "step": 6320 + }, + { + "epoch": 1.4574590730919992, + "grad_norm": 1.416712646344965, + "learning_rate": 3.7642503554238657e-07, + "loss": 0.39897364377975464, + "step": 6321 + }, + { + "epoch": 1.4576896472215817, + "grad_norm": 1.7524170106258121, + "learning_rate": 3.761270247234014e-07, + "loss": 0.4338347017765045, + "step": 6322 + }, + { + "epoch": 1.4579202213511644, + "grad_norm": 1.5421394568485456, + "learning_rate": 3.7582910459067607e-07, + "loss": 0.4619752764701843, + "step": 6323 + }, + { + "epoch": 1.458150795480747, + "grad_norm": 1.6592584693059589, + "learning_rate": 3.7553127518751583e-07, + "loss": 0.4676104784011841, + "step": 6324 + }, + { + "epoch": 1.4583813696103296, + "grad_norm": 1.495504668484879, + "learning_rate": 3.752335365572138e-07, + "loss": 0.37536361813545227, + "step": 6325 + }, + { + "epoch": 1.4586119437399123, + "grad_norm": 1.5747560176376743, + "learning_rate": 3.749358887430487e-07, + "loss": 0.4389209449291229, + "step": 6326 + }, + { + "epoch": 1.458842517869495, + "grad_norm": 1.561809426616513, + "learning_rate": 3.746383317882874e-07, + "loss": 0.44722115993499756, + "step": 6327 + }, + { + "epoch": 1.4590730919990778, + "grad_norm": 1.8177515516918266, + "learning_rate": 3.743408657361821e-07, + "loss": 0.39179277420043945, + "step": 6328 + }, + { + "epoch": 1.4593036661286605, + "grad_norm": 1.5511886302037754, + "learning_rate": 3.7404349062997275e-07, + "loss": 0.4704967737197876, + "step": 6329 + }, + { + "epoch": 1.459534240258243, + "grad_norm": 1.4679557991806869, + "learning_rate": 3.737462065128859e-07, + "loss": 0.4294360876083374, + "step": 6330 + }, + { + "epoch": 1.4597648143878257, + "grad_norm": 1.5082268745032619, + "learning_rate": 3.734490134281353e-07, + "loss": 0.5070170760154724, + "step": 6331 + }, + { + "epoch": 1.4599953885174084, + "grad_norm": 1.4285887900302483, + "learning_rate": 3.7315191141892013e-07, + "loss": 0.3670409023761749, + "step": 6332 + }, + { + "epoch": 1.460225962646991, + "grad_norm": 1.4866250279072872, + "learning_rate": 3.7285490052842785e-07, + "loss": 0.5043025016784668, + "step": 6333 + }, + { + "epoch": 1.4604565367765736, + "grad_norm": 1.5557807366245089, + "learning_rate": 3.725579807998316e-07, + "loss": 0.43942689895629883, + "step": 6334 + }, + { + "epoch": 1.4606871109061563, + "grad_norm": 1.61242194971354, + "learning_rate": 3.7226115227629164e-07, + "loss": 0.3444882035255432, + "step": 6335 + }, + { + "epoch": 1.460917685035739, + "grad_norm": 1.4093154726677697, + "learning_rate": 3.71964415000955e-07, + "loss": 0.3994483947753906, + "step": 6336 + }, + { + "epoch": 1.4611482591653218, + "grad_norm": 1.799524270186483, + "learning_rate": 3.7166776901695564e-07, + "loss": 0.3581928014755249, + "step": 6337 + }, + { + "epoch": 1.4613788332949043, + "grad_norm": 1.4094806965107296, + "learning_rate": 3.7137121436741423e-07, + "loss": 0.4068276286125183, + "step": 6338 + }, + { + "epoch": 1.461609407424487, + "grad_norm": 1.5430920931361498, + "learning_rate": 3.710747510954376e-07, + "loss": 0.4140080213546753, + "step": 6339 + }, + { + "epoch": 1.4618399815540697, + "grad_norm": 1.5667918006300834, + "learning_rate": 3.707783792441201e-07, + "loss": 0.4328460097312927, + "step": 6340 + }, + { + "epoch": 1.4620705556836522, + "grad_norm": 1.7344820768552758, + "learning_rate": 3.704820988565419e-07, + "loss": 0.49252209067344666, + "step": 6341 + }, + { + "epoch": 1.462301129813235, + "grad_norm": 1.4564646974830249, + "learning_rate": 3.7018590997577093e-07, + "loss": 0.43051671981811523, + "step": 6342 + }, + { + "epoch": 1.4625317039428176, + "grad_norm": 1.5901870751351228, + "learning_rate": 3.698898126448605e-07, + "loss": 0.5131059288978577, + "step": 6343 + }, + { + "epoch": 1.4627622780724003, + "grad_norm": 2.025312431684147, + "learning_rate": 3.6959380690685185e-07, + "loss": 0.4633597731590271, + "step": 6344 + }, + { + "epoch": 1.462992852201983, + "grad_norm": 1.5138095102076332, + "learning_rate": 3.6929789280477265e-07, + "loss": 0.3603428602218628, + "step": 6345 + }, + { + "epoch": 1.4632234263315655, + "grad_norm": 1.4981993836978438, + "learning_rate": 3.6900207038163633e-07, + "loss": 0.5337490439414978, + "step": 6346 + }, + { + "epoch": 1.4634540004611483, + "grad_norm": 1.8305905685338713, + "learning_rate": 3.687063396804444e-07, + "loss": 0.4940665066242218, + "step": 6347 + }, + { + "epoch": 1.463684574590731, + "grad_norm": 2.012256207996667, + "learning_rate": 3.6841070074418367e-07, + "loss": 0.45664387941360474, + "step": 6348 + }, + { + "epoch": 1.4639151487203135, + "grad_norm": 1.6965611532451377, + "learning_rate": 3.681151536158289e-07, + "loss": 0.4546254277229309, + "step": 6349 + }, + { + "epoch": 1.4641457228498962, + "grad_norm": 1.4760234786987596, + "learning_rate": 3.6781969833834015e-07, + "loss": 0.37474149465560913, + "step": 6350 + }, + { + "epoch": 1.464376296979479, + "grad_norm": 1.473821341410815, + "learning_rate": 3.675243349546655e-07, + "loss": 0.38016337156295776, + "step": 6351 + }, + { + "epoch": 1.4646068711090616, + "grad_norm": 1.3725937182091388, + "learning_rate": 3.672290635077384e-07, + "loss": 0.46079233288764954, + "step": 6352 + }, + { + "epoch": 1.4648374452386443, + "grad_norm": 1.754716547965532, + "learning_rate": 3.669338840404799e-07, + "loss": 0.39382117986679077, + "step": 6353 + }, + { + "epoch": 1.4650680193682268, + "grad_norm": 1.5018040161914972, + "learning_rate": 3.6663879659579766e-07, + "loss": 0.4502074718475342, + "step": 6354 + }, + { + "epoch": 1.4652985934978096, + "grad_norm": 1.4446726503170868, + "learning_rate": 3.663438012165848e-07, + "loss": 0.38199833035469055, + "step": 6355 + }, + { + "epoch": 1.4655291676273923, + "grad_norm": 1.4760781012903512, + "learning_rate": 3.660488979457228e-07, + "loss": 0.4340086579322815, + "step": 6356 + }, + { + "epoch": 1.4657597417569748, + "grad_norm": 1.7005769563076596, + "learning_rate": 3.65754086826078e-07, + "loss": 0.5425105094909668, + "step": 6357 + }, + { + "epoch": 1.4659903158865575, + "grad_norm": 1.4480393161895644, + "learning_rate": 3.654593679005048e-07, + "loss": 0.4671604633331299, + "step": 6358 + }, + { + "epoch": 1.4662208900161402, + "grad_norm": 1.6404775976624013, + "learning_rate": 3.6516474121184317e-07, + "loss": 0.4608290195465088, + "step": 6359 + }, + { + "epoch": 1.466451464145723, + "grad_norm": 1.9415349791307541, + "learning_rate": 3.6487020680292023e-07, + "loss": 0.5272650122642517, + "step": 6360 + }, + { + "epoch": 1.4666820382753056, + "grad_norm": 1.4115666654764834, + "learning_rate": 3.645757647165495e-07, + "loss": 0.40990152955055237, + "step": 6361 + }, + { + "epoch": 1.4669126124048881, + "grad_norm": 1.405277693008717, + "learning_rate": 3.6428141499553166e-07, + "loss": 0.4723639488220215, + "step": 6362 + }, + { + "epoch": 1.4671431865344708, + "grad_norm": 1.7789473556982454, + "learning_rate": 3.639871576826529e-07, + "loss": 0.5115963220596313, + "step": 6363 + }, + { + "epoch": 1.4673737606640536, + "grad_norm": 1.669989973617769, + "learning_rate": 3.636929928206862e-07, + "loss": 0.44548431038856506, + "step": 6364 + }, + { + "epoch": 1.467604334793636, + "grad_norm": 1.5904330694852653, + "learning_rate": 3.633989204523922e-07, + "loss": 0.48599356412887573, + "step": 6365 + }, + { + "epoch": 1.4678349089232188, + "grad_norm": 1.4664661517676485, + "learning_rate": 3.631049406205164e-07, + "loss": 0.463236004114151, + "step": 6366 + }, + { + "epoch": 1.4680654830528015, + "grad_norm": 1.7238002544119735, + "learning_rate": 3.6281105336779225e-07, + "loss": 0.4840255379676819, + "step": 6367 + }, + { + "epoch": 1.4682960571823842, + "grad_norm": 1.5727046676978498, + "learning_rate": 3.6251725873693926e-07, + "loss": 0.39191675186157227, + "step": 6368 + }, + { + "epoch": 1.468526631311967, + "grad_norm": 1.4333992251496341, + "learning_rate": 3.622235567706637e-07, + "loss": 0.5161769986152649, + "step": 6369 + }, + { + "epoch": 1.4687572054415494, + "grad_norm": 1.811820117175508, + "learning_rate": 3.6192994751165764e-07, + "loss": 0.4579160213470459, + "step": 6370 + }, + { + "epoch": 1.4689877795711321, + "grad_norm": 1.5348364339019953, + "learning_rate": 3.616364310026006e-07, + "loss": 0.4254727363586426, + "step": 6371 + }, + { + "epoch": 1.4692183537007149, + "grad_norm": 1.60846510703603, + "learning_rate": 3.613430072861575e-07, + "loss": 0.3614911139011383, + "step": 6372 + }, + { + "epoch": 1.4694489278302973, + "grad_norm": 1.332197813540827, + "learning_rate": 3.610496764049814e-07, + "loss": 0.4501386284828186, + "step": 6373 + }, + { + "epoch": 1.46967950195988, + "grad_norm": 1.4207205401720155, + "learning_rate": 3.607564384017102e-07, + "loss": 0.4988802671432495, + "step": 6374 + }, + { + "epoch": 1.4699100760894628, + "grad_norm": 1.5751788296655767, + "learning_rate": 3.6046329331896907e-07, + "loss": 0.4277713894844055, + "step": 6375 + }, + { + "epoch": 1.4701406502190455, + "grad_norm": 1.5414838298104503, + "learning_rate": 3.601702411993697e-07, + "loss": 0.5007919073104858, + "step": 6376 + }, + { + "epoch": 1.4703712243486282, + "grad_norm": 1.5705777345927519, + "learning_rate": 3.5987728208551015e-07, + "loss": 0.4857282042503357, + "step": 6377 + }, + { + "epoch": 1.4706017984782107, + "grad_norm": 1.3913774043642957, + "learning_rate": 3.595844160199756e-07, + "loss": 0.45752188563346863, + "step": 6378 + }, + { + "epoch": 1.4708323726077934, + "grad_norm": 1.3374827793978188, + "learning_rate": 3.592916430453361e-07, + "loss": 0.4364059269428253, + "step": 6379 + }, + { + "epoch": 1.4710629467373761, + "grad_norm": 1.4896729369612345, + "learning_rate": 3.589989632041501e-07, + "loss": 0.48765695095062256, + "step": 6380 + }, + { + "epoch": 1.4712935208669586, + "grad_norm": 1.8321401665511103, + "learning_rate": 3.5870637653896087e-07, + "loss": 0.5505347847938538, + "step": 6381 + }, + { + "epoch": 1.4715240949965414, + "grad_norm": 1.5940287914496154, + "learning_rate": 3.584138830922994e-07, + "loss": 0.4468069076538086, + "step": 6382 + }, + { + "epoch": 1.471754669126124, + "grad_norm": 1.2639532856264213, + "learning_rate": 3.5812148290668186e-07, + "loss": 0.4050968289375305, + "step": 6383 + }, + { + "epoch": 1.4719852432557068, + "grad_norm": 1.6709771008348266, + "learning_rate": 3.578291760246122e-07, + "loss": 0.47324883937835693, + "step": 6384 + }, + { + "epoch": 1.4722158173852895, + "grad_norm": 1.646291535207369, + "learning_rate": 3.5753696248858025e-07, + "loss": 0.4431450366973877, + "step": 6385 + }, + { + "epoch": 1.472446391514872, + "grad_norm": 1.3398593447687968, + "learning_rate": 3.5724484234106166e-07, + "loss": 0.4599822163581848, + "step": 6386 + }, + { + "epoch": 1.4726769656444547, + "grad_norm": 1.6764694987177748, + "learning_rate": 3.5695281562451964e-07, + "loss": 0.3655046224594116, + "step": 6387 + }, + { + "epoch": 1.4729075397740374, + "grad_norm": 1.925765064850511, + "learning_rate": 3.5666088238140267e-07, + "loss": 0.4543811082839966, + "step": 6388 + }, + { + "epoch": 1.47313811390362, + "grad_norm": 1.7682119668466059, + "learning_rate": 3.563690426541469e-07, + "loss": 0.45380568504333496, + "step": 6389 + }, + { + "epoch": 1.4733686880332026, + "grad_norm": 1.3928278789748259, + "learning_rate": 3.5607729648517336e-07, + "loss": 0.3640294373035431, + "step": 6390 + }, + { + "epoch": 1.4735992621627854, + "grad_norm": 1.4826659174775283, + "learning_rate": 3.557856439168907e-07, + "loss": 0.39890235662460327, + "step": 6391 + }, + { + "epoch": 1.473829836292368, + "grad_norm": 1.7657939773449876, + "learning_rate": 3.5549408499169374e-07, + "loss": 0.47551727294921875, + "step": 6392 + }, + { + "epoch": 1.4740604104219508, + "grad_norm": 1.5946717850777934, + "learning_rate": 3.5520261975196364e-07, + "loss": 0.43851834535598755, + "step": 6393 + }, + { + "epoch": 1.4742909845515333, + "grad_norm": 1.7160257871535318, + "learning_rate": 3.549112482400676e-07, + "loss": 0.45289307832717896, + "step": 6394 + }, + { + "epoch": 1.474521558681116, + "grad_norm": 1.660677297447299, + "learning_rate": 3.546199704983591e-07, + "loss": 0.5229180455207825, + "step": 6395 + }, + { + "epoch": 1.4747521328106985, + "grad_norm": 1.5089259577077747, + "learning_rate": 3.5432878656917884e-07, + "loss": 0.47332310676574707, + "step": 6396 + }, + { + "epoch": 1.4749827069402812, + "grad_norm": 1.402371205517633, + "learning_rate": 3.540376964948529e-07, + "loss": 0.4079092741012573, + "step": 6397 + }, + { + "epoch": 1.475213281069864, + "grad_norm": 1.607654850710184, + "learning_rate": 3.5374670031769484e-07, + "loss": 0.43366020917892456, + "step": 6398 + }, + { + "epoch": 1.4754438551994467, + "grad_norm": 1.6067458113996615, + "learning_rate": 3.5345579808000294e-07, + "loss": 0.45040106773376465, + "step": 6399 + }, + { + "epoch": 1.4756744293290294, + "grad_norm": 1.584960802510298, + "learning_rate": 3.531649898240634e-07, + "loss": 0.4409756064414978, + "step": 6400 + }, + { + "epoch": 1.4759050034586119, + "grad_norm": 1.5204759785794038, + "learning_rate": 3.528742755921481e-07, + "loss": 0.4141521751880646, + "step": 6401 + }, + { + "epoch": 1.4761355775881946, + "grad_norm": 1.6363482264143396, + "learning_rate": 3.525836554265156e-07, + "loss": 0.4697296619415283, + "step": 6402 + }, + { + "epoch": 1.4763661517177773, + "grad_norm": 1.3771953803345143, + "learning_rate": 3.5229312936941013e-07, + "loss": 0.4369434714317322, + "step": 6403 + }, + { + "epoch": 1.4765967258473598, + "grad_norm": 1.3415133870830294, + "learning_rate": 3.5200269746306224e-07, + "loss": 0.4197359085083008, + "step": 6404 + }, + { + "epoch": 1.4768272999769425, + "grad_norm": 1.8249279231813902, + "learning_rate": 3.5171235974968996e-07, + "loss": 0.495933473110199, + "step": 6405 + }, + { + "epoch": 1.4770578741065252, + "grad_norm": 1.3638396377453934, + "learning_rate": 3.51422116271496e-07, + "loss": 0.4177231192588806, + "step": 6406 + }, + { + "epoch": 1.477288448236108, + "grad_norm": 1.5336568107147823, + "learning_rate": 3.511319670706705e-07, + "loss": 0.5366500020027161, + "step": 6407 + }, + { + "epoch": 1.4775190223656907, + "grad_norm": 1.5479295323166011, + "learning_rate": 3.508419121893897e-07, + "loss": 0.3900446891784668, + "step": 6408 + }, + { + "epoch": 1.4777495964952732, + "grad_norm": 1.8223854522009124, + "learning_rate": 3.5055195166981646e-07, + "loss": 0.40877431631088257, + "step": 6409 + }, + { + "epoch": 1.4779801706248559, + "grad_norm": 1.3594177124317366, + "learning_rate": 3.502620855540985e-07, + "loss": 0.4381163716316223, + "step": 6410 + }, + { + "epoch": 1.4782107447544386, + "grad_norm": 1.2256800281998605, + "learning_rate": 3.4997231388437167e-07, + "loss": 0.3449817895889282, + "step": 6411 + }, + { + "epoch": 1.478441318884021, + "grad_norm": 1.4879818959728963, + "learning_rate": 3.4968263670275653e-07, + "loss": 0.4879523515701294, + "step": 6412 + }, + { + "epoch": 1.4786718930136038, + "grad_norm": 1.5651020351069762, + "learning_rate": 3.493930540513613e-07, + "loss": 0.3781365156173706, + "step": 6413 + }, + { + "epoch": 1.4789024671431865, + "grad_norm": 1.6645622352676888, + "learning_rate": 3.49103565972279e-07, + "loss": 0.4505656361579895, + "step": 6414 + }, + { + "epoch": 1.4791330412727692, + "grad_norm": 1.4565716791756764, + "learning_rate": 3.4881417250759006e-07, + "loss": 0.4285612106323242, + "step": 6415 + }, + { + "epoch": 1.479363615402352, + "grad_norm": 1.5357416036601346, + "learning_rate": 3.48524873699361e-07, + "loss": 0.5285177826881409, + "step": 6416 + }, + { + "epoch": 1.4795941895319344, + "grad_norm": 1.6484784065232339, + "learning_rate": 3.482356695896437e-07, + "loss": 0.4504782259464264, + "step": 6417 + }, + { + "epoch": 1.4798247636615172, + "grad_norm": 1.5658620514352724, + "learning_rate": 3.4794656022047765e-07, + "loss": 0.45295125246047974, + "step": 6418 + }, + { + "epoch": 1.4800553377910999, + "grad_norm": 1.3627022105594853, + "learning_rate": 3.47657545633887e-07, + "loss": 0.35889285802841187, + "step": 6419 + }, + { + "epoch": 1.4802859119206824, + "grad_norm": 1.5560865897069756, + "learning_rate": 3.4736862587188384e-07, + "loss": 0.49129703640937805, + "step": 6420 + }, + { + "epoch": 1.480516486050265, + "grad_norm": 1.6626930717329957, + "learning_rate": 3.4707980097646474e-07, + "loss": 0.5018036365509033, + "step": 6421 + }, + { + "epoch": 1.4807470601798478, + "grad_norm": 1.6557207215915222, + "learning_rate": 3.46791070989614e-07, + "loss": 0.48743095993995667, + "step": 6422 + }, + { + "epoch": 1.4809776343094305, + "grad_norm": 1.5043027194300391, + "learning_rate": 3.46502435953301e-07, + "loss": 0.4876127243041992, + "step": 6423 + }, + { + "epoch": 1.4812082084390132, + "grad_norm": 1.971149486413709, + "learning_rate": 3.462138959094818e-07, + "loss": 0.517420768737793, + "step": 6424 + }, + { + "epoch": 1.4814387825685957, + "grad_norm": 1.8274785313456325, + "learning_rate": 3.4592545090009907e-07, + "loss": 0.49587076902389526, + "step": 6425 + }, + { + "epoch": 1.4816693566981785, + "grad_norm": 1.5362037346917286, + "learning_rate": 3.4563710096708063e-07, + "loss": 0.43007123470306396, + "step": 6426 + }, + { + "epoch": 1.4818999308277612, + "grad_norm": 1.358212427456112, + "learning_rate": 3.4534884615234163e-07, + "loss": 0.41231095790863037, + "step": 6427 + }, + { + "epoch": 1.4821305049573437, + "grad_norm": 1.6451517308598724, + "learning_rate": 3.450606864977822e-07, + "loss": 0.4454977512359619, + "step": 6428 + }, + { + "epoch": 1.4823610790869264, + "grad_norm": 1.3739971676037328, + "learning_rate": 3.447726220452899e-07, + "loss": 0.4432292878627777, + "step": 6429 + }, + { + "epoch": 1.482591653216509, + "grad_norm": 1.6222705799101154, + "learning_rate": 3.444846528367372e-07, + "loss": 0.47547852993011475, + "step": 6430 + }, + { + "epoch": 1.4828222273460918, + "grad_norm": 1.522255385470065, + "learning_rate": 3.441967789139837e-07, + "loss": 0.45712774991989136, + "step": 6431 + }, + { + "epoch": 1.4830528014756745, + "grad_norm": 2.2700209255759107, + "learning_rate": 3.439090003188748e-07, + "loss": 0.4485551714897156, + "step": 6432 + }, + { + "epoch": 1.483283375605257, + "grad_norm": 1.4019614855782472, + "learning_rate": 3.4362131709324225e-07, + "loss": 0.5157139301300049, + "step": 6433 + }, + { + "epoch": 1.4835139497348397, + "grad_norm": 1.6970431173839349, + "learning_rate": 3.4333372927890346e-07, + "loss": 0.3786337375640869, + "step": 6434 + }, + { + "epoch": 1.4837445238644225, + "grad_norm": 1.430215191007922, + "learning_rate": 3.430462369176619e-07, + "loss": 0.444644033908844, + "step": 6435 + }, + { + "epoch": 1.483975097994005, + "grad_norm": 1.5213084700296855, + "learning_rate": 3.427588400513082e-07, + "loss": 0.450777530670166, + "step": 6436 + }, + { + "epoch": 1.4842056721235877, + "grad_norm": 1.6553650689166306, + "learning_rate": 3.424715387216176e-07, + "loss": 0.4547499418258667, + "step": 6437 + }, + { + "epoch": 1.4844362462531704, + "grad_norm": 1.3603667716838959, + "learning_rate": 3.4218433297035274e-07, + "loss": 0.41394394636154175, + "step": 6438 + }, + { + "epoch": 1.484666820382753, + "grad_norm": 1.3921623882761025, + "learning_rate": 3.4189722283926194e-07, + "loss": 0.46392822265625, + "step": 6439 + }, + { + "epoch": 1.4848973945123358, + "grad_norm": 1.3499969732544597, + "learning_rate": 3.416102083700797e-07, + "loss": 0.443311870098114, + "step": 6440 + }, + { + "epoch": 1.4851279686419183, + "grad_norm": 1.3830140570978715, + "learning_rate": 3.4132328960452594e-07, + "loss": 0.49744826555252075, + "step": 6441 + }, + { + "epoch": 1.485358542771501, + "grad_norm": 1.5191431970911358, + "learning_rate": 3.4103646658430787e-07, + "loss": 0.3906005620956421, + "step": 6442 + }, + { + "epoch": 1.4855891169010838, + "grad_norm": 1.3526583076340324, + "learning_rate": 3.407497393511175e-07, + "loss": 0.4236280918121338, + "step": 6443 + }, + { + "epoch": 1.4858196910306662, + "grad_norm": 1.6787824686307624, + "learning_rate": 3.4046310794663403e-07, + "loss": 0.5457645654678345, + "step": 6444 + }, + { + "epoch": 1.486050265160249, + "grad_norm": 1.7325001007084588, + "learning_rate": 3.4017657241252217e-07, + "loss": 0.541573703289032, + "step": 6445 + }, + { + "epoch": 1.4862808392898317, + "grad_norm": 1.9081537369674455, + "learning_rate": 3.398901327904322e-07, + "loss": 0.496945858001709, + "step": 6446 + }, + { + "epoch": 1.4865114134194144, + "grad_norm": 1.5413856714091914, + "learning_rate": 3.3960378912200136e-07, + "loss": 0.46119701862335205, + "step": 6447 + }, + { + "epoch": 1.4867419875489971, + "grad_norm": 1.8976464043536114, + "learning_rate": 3.3931754144885284e-07, + "loss": 0.5169441103935242, + "step": 6448 + }, + { + "epoch": 1.4869725616785796, + "grad_norm": 1.7130869588848308, + "learning_rate": 3.390313898125957e-07, + "loss": 0.525173544883728, + "step": 6449 + }, + { + "epoch": 1.4872031358081623, + "grad_norm": 1.6684348208587065, + "learning_rate": 3.3874533425482457e-07, + "loss": 0.46877139806747437, + "step": 6450 + }, + { + "epoch": 1.487433709937745, + "grad_norm": 1.6810644095850389, + "learning_rate": 3.3845937481712096e-07, + "loss": 0.49436479806900024, + "step": 6451 + }, + { + "epoch": 1.4876642840673275, + "grad_norm": 1.2950679928032611, + "learning_rate": 3.3817351154105145e-07, + "loss": 0.40879231691360474, + "step": 6452 + }, + { + "epoch": 1.4878948581969103, + "grad_norm": 1.5253823933458253, + "learning_rate": 3.378877444681697e-07, + "loss": 0.5060825347900391, + "step": 6453 + }, + { + "epoch": 1.488125432326493, + "grad_norm": 1.4561081118713566, + "learning_rate": 3.3760207364001434e-07, + "loss": 0.4875546097755432, + "step": 6454 + }, + { + "epoch": 1.4883560064560757, + "grad_norm": 1.5036556031092911, + "learning_rate": 3.373164990981108e-07, + "loss": 0.3791916072368622, + "step": 6455 + }, + { + "epoch": 1.4885865805856584, + "grad_norm": 1.4585716739422292, + "learning_rate": 3.370310208839704e-07, + "loss": 0.46757322549819946, + "step": 6456 + }, + { + "epoch": 1.488817154715241, + "grad_norm": 1.4061567541704671, + "learning_rate": 3.3674563903908994e-07, + "loss": 0.4334050416946411, + "step": 6457 + }, + { + "epoch": 1.4890477288448236, + "grad_norm": 1.4217577265821555, + "learning_rate": 3.3646035360495294e-07, + "loss": 0.4408720135688782, + "step": 6458 + }, + { + "epoch": 1.4892783029744063, + "grad_norm": 1.637938092148249, + "learning_rate": 3.3617516462302795e-07, + "loss": 0.46556228399276733, + "step": 6459 + }, + { + "epoch": 1.4895088771039888, + "grad_norm": 1.3694379850190115, + "learning_rate": 3.3589007213477096e-07, + "loss": 0.5212184190750122, + "step": 6460 + }, + { + "epoch": 1.4897394512335715, + "grad_norm": 1.6425370019041445, + "learning_rate": 3.35605076181622e-07, + "loss": 0.5340084433555603, + "step": 6461 + }, + { + "epoch": 1.4899700253631543, + "grad_norm": 1.4674031830711234, + "learning_rate": 3.353201768050088e-07, + "loss": 0.38049495220184326, + "step": 6462 + }, + { + "epoch": 1.490200599492737, + "grad_norm": 1.5849611777401629, + "learning_rate": 3.350353740463442e-07, + "loss": 0.5480734705924988, + "step": 6463 + }, + { + "epoch": 1.4904311736223197, + "grad_norm": 1.4050939080217109, + "learning_rate": 3.3475066794702756e-07, + "loss": 0.4179231524467468, + "step": 6464 + }, + { + "epoch": 1.4906617477519022, + "grad_norm": 1.8331951463468434, + "learning_rate": 3.3446605854844335e-07, + "loss": 0.5380987524986267, + "step": 6465 + }, + { + "epoch": 1.490892321881485, + "grad_norm": 1.4221970681414315, + "learning_rate": 3.3418154589196226e-07, + "loss": 0.41146454215049744, + "step": 6466 + }, + { + "epoch": 1.4911228960110676, + "grad_norm": 1.5814296524447065, + "learning_rate": 3.3389713001894157e-07, + "loss": 0.4586387276649475, + "step": 6467 + }, + { + "epoch": 1.4913534701406501, + "grad_norm": 1.1757977126470995, + "learning_rate": 3.336128109707236e-07, + "loss": 0.4023931920528412, + "step": 6468 + }, + { + "epoch": 1.4915840442702328, + "grad_norm": 1.6673237012516164, + "learning_rate": 3.333285887886373e-07, + "loss": 0.5373448133468628, + "step": 6469 + }, + { + "epoch": 1.4918146183998156, + "grad_norm": 1.4523946751037105, + "learning_rate": 3.330444635139971e-07, + "loss": 0.4413643479347229, + "step": 6470 + }, + { + "epoch": 1.4920451925293983, + "grad_norm": 1.3734904271626787, + "learning_rate": 3.3276043518810327e-07, + "loss": 0.399494469165802, + "step": 6471 + }, + { + "epoch": 1.492275766658981, + "grad_norm": 1.4170973987364872, + "learning_rate": 3.3247650385224256e-07, + "loss": 0.4353644847869873, + "step": 6472 + }, + { + "epoch": 1.4925063407885635, + "grad_norm": 1.7462483377307876, + "learning_rate": 3.3219266954768743e-07, + "loss": 0.5231607556343079, + "step": 6473 + }, + { + "epoch": 1.4927369149181462, + "grad_norm": 1.55800999194994, + "learning_rate": 3.3190893231569596e-07, + "loss": 0.414408802986145, + "step": 6474 + }, + { + "epoch": 1.492967489047729, + "grad_norm": 1.6408204727748315, + "learning_rate": 3.3162529219751155e-07, + "loss": 0.3921009302139282, + "step": 6475 + }, + { + "epoch": 1.4931980631773114, + "grad_norm": 1.6197044883986413, + "learning_rate": 3.3134174923436506e-07, + "loss": 0.4317164421081543, + "step": 6476 + }, + { + "epoch": 1.4934286373068941, + "grad_norm": 1.5697343564549593, + "learning_rate": 3.3105830346747175e-07, + "loss": 0.46302181482315063, + "step": 6477 + }, + { + "epoch": 1.4936592114364768, + "grad_norm": 1.464087037907405, + "learning_rate": 3.307749549380335e-07, + "loss": 0.45704615116119385, + "step": 6478 + }, + { + "epoch": 1.4938897855660596, + "grad_norm": 1.5032451370482525, + "learning_rate": 3.304917036872379e-07, + "loss": 0.45455485582351685, + "step": 6479 + }, + { + "epoch": 1.4941203596956423, + "grad_norm": 1.5465084069557762, + "learning_rate": 3.302085497562588e-07, + "loss": 0.41939157247543335, + "step": 6480 + }, + { + "epoch": 1.4943509338252248, + "grad_norm": 1.3682263746176198, + "learning_rate": 3.2992549318625487e-07, + "loss": 0.4109286367893219, + "step": 6481 + }, + { + "epoch": 1.4945815079548075, + "grad_norm": 2.0164734849697, + "learning_rate": 3.2964253401837173e-07, + "loss": 0.44710463285446167, + "step": 6482 + }, + { + "epoch": 1.4948120820843902, + "grad_norm": 1.6884711291100036, + "learning_rate": 3.2935967229373986e-07, + "loss": 0.4330691695213318, + "step": 6483 + }, + { + "epoch": 1.4950426562139727, + "grad_norm": 1.4066891595951536, + "learning_rate": 3.2907690805347667e-07, + "loss": 0.41174834966659546, + "step": 6484 + }, + { + "epoch": 1.4952732303435554, + "grad_norm": 1.5235589172624593, + "learning_rate": 3.2879424133868406e-07, + "loss": 0.4368870258331299, + "step": 6485 + }, + { + "epoch": 1.4955038044731381, + "grad_norm": 1.581699276196859, + "learning_rate": 3.2851167219045107e-07, + "loss": 0.5155518651008606, + "step": 6486 + }, + { + "epoch": 1.4957343786027208, + "grad_norm": 1.4965040692694338, + "learning_rate": 3.282292006498522e-07, + "loss": 0.47015419602394104, + "step": 6487 + }, + { + "epoch": 1.4959649527323036, + "grad_norm": 1.4271101962383341, + "learning_rate": 3.2794682675794684e-07, + "loss": 0.41059884428977966, + "step": 6488 + }, + { + "epoch": 1.496195526861886, + "grad_norm": 1.7728377181019612, + "learning_rate": 3.2766455055578157e-07, + "loss": 0.4864136278629303, + "step": 6489 + }, + { + "epoch": 1.4964261009914688, + "grad_norm": 1.1780419841322618, + "learning_rate": 3.2738237208438744e-07, + "loss": 0.3599165976047516, + "step": 6490 + }, + { + "epoch": 1.4966566751210515, + "grad_norm": 1.4373611771192503, + "learning_rate": 3.2710029138478267e-07, + "loss": 0.4734029769897461, + "step": 6491 + }, + { + "epoch": 1.496887249250634, + "grad_norm": 1.5053587105753783, + "learning_rate": 3.268183084979699e-07, + "loss": 0.46739861369132996, + "step": 6492 + }, + { + "epoch": 1.4971178233802167, + "grad_norm": 1.745789102022849, + "learning_rate": 3.265364234649387e-07, + "loss": 0.46794670820236206, + "step": 6493 + }, + { + "epoch": 1.4973483975097994, + "grad_norm": 1.6683012395243093, + "learning_rate": 3.262546363266635e-07, + "loss": 0.463203489780426, + "step": 6494 + }, + { + "epoch": 1.4975789716393821, + "grad_norm": 1.4489172807794646, + "learning_rate": 3.2597294712410504e-07, + "loss": 0.4495059847831726, + "step": 6495 + }, + { + "epoch": 1.4978095457689649, + "grad_norm": 1.464704014292867, + "learning_rate": 3.256913558982101e-07, + "loss": 0.43549245595932007, + "step": 6496 + }, + { + "epoch": 1.4980401198985474, + "grad_norm": 1.552183908593376, + "learning_rate": 3.254098626899102e-07, + "loss": 0.40582704544067383, + "step": 6497 + }, + { + "epoch": 1.49827069402813, + "grad_norm": 1.527774566610999, + "learning_rate": 3.251284675401238e-07, + "loss": 0.3720378279685974, + "step": 6498 + }, + { + "epoch": 1.4985012681577128, + "grad_norm": 1.4814613073983138, + "learning_rate": 3.24847170489754e-07, + "loss": 0.42694520950317383, + "step": 6499 + }, + { + "epoch": 1.4987318422872953, + "grad_norm": 1.4768231117771715, + "learning_rate": 3.2456597157969066e-07, + "loss": 0.442158043384552, + "step": 6500 + }, + { + "epoch": 1.498962416416878, + "grad_norm": 1.4765054194953837, + "learning_rate": 3.2428487085080846e-07, + "loss": 0.44245558977127075, + "step": 6501 + }, + { + "epoch": 1.4991929905464607, + "grad_norm": 1.3559485373971267, + "learning_rate": 3.240038683439684e-07, + "loss": 0.4127236008644104, + "step": 6502 + }, + { + "epoch": 1.4994235646760434, + "grad_norm": 1.4985576311709152, + "learning_rate": 3.237229641000171e-07, + "loss": 0.4262787103652954, + "step": 6503 + }, + { + "epoch": 1.4996541388056261, + "grad_norm": 1.6706445028718073, + "learning_rate": 3.2344215815978714e-07, + "loss": 0.4181264042854309, + "step": 6504 + }, + { + "epoch": 1.4998847129352086, + "grad_norm": 1.6044294628436637, + "learning_rate": 3.2316145056409616e-07, + "loss": 0.4416937530040741, + "step": 6505 + }, + { + "epoch": 1.5001152870647914, + "grad_norm": 1.8850023720212492, + "learning_rate": 3.228808413537476e-07, + "loss": 0.4901489019393921, + "step": 6506 + }, + { + "epoch": 1.5003458611943739, + "grad_norm": 1.3996173090866784, + "learning_rate": 3.2260033056953153e-07, + "loss": 0.37932026386260986, + "step": 6507 + }, + { + "epoch": 1.5005764353239566, + "grad_norm": 1.649923361135509, + "learning_rate": 3.223199182522223e-07, + "loss": 0.4680899381637573, + "step": 6508 + }, + { + "epoch": 1.5008070094535393, + "grad_norm": 1.6955418693371036, + "learning_rate": 3.2203960444258105e-07, + "loss": 0.508334219455719, + "step": 6509 + }, + { + "epoch": 1.501037583583122, + "grad_norm": 2.0480591557575685, + "learning_rate": 3.2175938918135415e-07, + "loss": 0.3386784791946411, + "step": 6510 + }, + { + "epoch": 1.5012681577127047, + "grad_norm": 1.860117074212897, + "learning_rate": 3.214792725092741e-07, + "loss": 0.4315892457962036, + "step": 6511 + }, + { + "epoch": 1.5014987318422874, + "grad_norm": 1.4533616152071933, + "learning_rate": 3.211992544670582e-07, + "loss": 0.3709627389907837, + "step": 6512 + }, + { + "epoch": 1.50172930597187, + "grad_norm": 1.6433224440752017, + "learning_rate": 3.2091933509541023e-07, + "loss": 0.5260987877845764, + "step": 6513 + }, + { + "epoch": 1.5019598801014526, + "grad_norm": 1.5201640514539732, + "learning_rate": 3.20639514435019e-07, + "loss": 0.5379073619842529, + "step": 6514 + }, + { + "epoch": 1.5021904542310351, + "grad_norm": 1.2867052063244526, + "learning_rate": 3.2035979252655976e-07, + "loss": 0.47530391812324524, + "step": 6515 + }, + { + "epoch": 1.5024210283606179, + "grad_norm": 1.5201328820105404, + "learning_rate": 3.200801694106926e-07, + "loss": 0.459227979183197, + "step": 6516 + }, + { + "epoch": 1.5026516024902006, + "grad_norm": 1.5330729417783509, + "learning_rate": 3.19800645128063e-07, + "loss": 0.4867238998413086, + "step": 6517 + }, + { + "epoch": 1.5028821766197833, + "grad_norm": 1.4246709864782185, + "learning_rate": 3.195212197193039e-07, + "loss": 0.38478928804397583, + "step": 6518 + }, + { + "epoch": 1.503112750749366, + "grad_norm": 1.625989812299007, + "learning_rate": 3.192418932250316e-07, + "loss": 0.3938423991203308, + "step": 6519 + }, + { + "epoch": 1.5033433248789487, + "grad_norm": 1.8227844221564524, + "learning_rate": 3.1896266568584975e-07, + "loss": 0.457303911447525, + "step": 6520 + }, + { + "epoch": 1.5035738990085312, + "grad_norm": 1.5422494994233005, + "learning_rate": 3.1868353714234607e-07, + "loss": 0.5007269382476807, + "step": 6521 + }, + { + "epoch": 1.503804473138114, + "grad_norm": 1.4891205198132078, + "learning_rate": 3.1840450763509576e-07, + "loss": 0.3878381848335266, + "step": 6522 + }, + { + "epoch": 1.5040350472676964, + "grad_norm": 1.798955261342233, + "learning_rate": 3.181255772046575e-07, + "loss": 0.488269567489624, + "step": 6523 + }, + { + "epoch": 1.5042656213972792, + "grad_norm": 1.4981578078592954, + "learning_rate": 3.1784674589157767e-07, + "loss": 0.41664889454841614, + "step": 6524 + }, + { + "epoch": 1.5044961955268619, + "grad_norm": 1.6014375227212925, + "learning_rate": 3.175680137363863e-07, + "loss": 0.4862533509731293, + "step": 6525 + }, + { + "epoch": 1.5047267696564446, + "grad_norm": 1.599713126186934, + "learning_rate": 3.172893807796004e-07, + "loss": 0.4629037380218506, + "step": 6526 + }, + { + "epoch": 1.5049573437860273, + "grad_norm": 1.6094632634811818, + "learning_rate": 3.1701084706172245e-07, + "loss": 0.46300196647644043, + "step": 6527 + }, + { + "epoch": 1.50518791791561, + "grad_norm": 1.4186362500626026, + "learning_rate": 3.1673241262323934e-07, + "loss": 0.40698888897895813, + "step": 6528 + }, + { + "epoch": 1.5054184920451925, + "grad_norm": 1.484473947418196, + "learning_rate": 3.1645407750462514e-07, + "loss": 0.4344380497932434, + "step": 6529 + }, + { + "epoch": 1.5056490661747752, + "grad_norm": 1.6200348544461498, + "learning_rate": 3.1617584174633806e-07, + "loss": 0.49757128953933716, + "step": 6530 + }, + { + "epoch": 1.5058796403043577, + "grad_norm": 1.6256839483530447, + "learning_rate": 3.15897705388823e-07, + "loss": 0.4506916105747223, + "step": 6531 + }, + { + "epoch": 1.5061102144339404, + "grad_norm": 1.5009759227514647, + "learning_rate": 3.156196684725093e-07, + "loss": 0.3941146731376648, + "step": 6532 + }, + { + "epoch": 1.5063407885635232, + "grad_norm": 1.9065405733956409, + "learning_rate": 3.153417310378127e-07, + "loss": 0.5400820374488831, + "step": 6533 + }, + { + "epoch": 1.5065713626931059, + "grad_norm": 1.774411964329925, + "learning_rate": 3.1506389312513435e-07, + "loss": 0.4418470859527588, + "step": 6534 + }, + { + "epoch": 1.5068019368226886, + "grad_norm": 1.3196915654196755, + "learning_rate": 3.1478615477486113e-07, + "loss": 0.3897334933280945, + "step": 6535 + }, + { + "epoch": 1.5070325109522713, + "grad_norm": 1.5772083777596413, + "learning_rate": 3.145085160273647e-07, + "loss": 0.4923437833786011, + "step": 6536 + }, + { + "epoch": 1.5072630850818538, + "grad_norm": 1.575539005736493, + "learning_rate": 3.142309769230025e-07, + "loss": 0.41996920108795166, + "step": 6537 + }, + { + "epoch": 1.5074936592114365, + "grad_norm": 1.5634954618427415, + "learning_rate": 3.1395353750211806e-07, + "loss": 0.38584667444229126, + "step": 6538 + }, + { + "epoch": 1.507724233341019, + "grad_norm": 1.5469052539454182, + "learning_rate": 3.136761978050395e-07, + "loss": 0.5093455910682678, + "step": 6539 + }, + { + "epoch": 1.5079548074706017, + "grad_norm": 1.8844111555093896, + "learning_rate": 3.1339895787208126e-07, + "loss": 0.5592935681343079, + "step": 6540 + }, + { + "epoch": 1.5081853816001844, + "grad_norm": 1.7670191671756568, + "learning_rate": 3.1312181774354306e-07, + "loss": 0.38311779499053955, + "step": 6541 + }, + { + "epoch": 1.5084159557297672, + "grad_norm": 1.6894588927823573, + "learning_rate": 3.1284477745971025e-07, + "loss": 0.4422299265861511, + "step": 6542 + }, + { + "epoch": 1.5086465298593499, + "grad_norm": 1.5653024747826005, + "learning_rate": 3.125678370608528e-07, + "loss": 0.5097527503967285, + "step": 6543 + }, + { + "epoch": 1.5088771039889326, + "grad_norm": 1.4635088499535702, + "learning_rate": 3.1229099658722747e-07, + "loss": 0.42586642503738403, + "step": 6544 + }, + { + "epoch": 1.509107678118515, + "grad_norm": 1.7853929312810684, + "learning_rate": 3.120142560790755e-07, + "loss": 0.5006861686706543, + "step": 6545 + }, + { + "epoch": 1.5093382522480978, + "grad_norm": 1.292111562170076, + "learning_rate": 3.117376155766237e-07, + "loss": 0.4361686706542969, + "step": 6546 + }, + { + "epoch": 1.5095688263776803, + "grad_norm": 1.4890005224956508, + "learning_rate": 3.11461075120085e-07, + "loss": 0.45466339588165283, + "step": 6547 + }, + { + "epoch": 1.509799400507263, + "grad_norm": 1.4657261766322067, + "learning_rate": 3.1118463474965697e-07, + "loss": 0.39591068029403687, + "step": 6548 + }, + { + "epoch": 1.5100299746368457, + "grad_norm": 1.669083463008409, + "learning_rate": 3.1090829450552316e-07, + "loss": 0.4672427475452423, + "step": 6549 + }, + { + "epoch": 1.5102605487664285, + "grad_norm": 1.6273442700037082, + "learning_rate": 3.1063205442785234e-07, + "loss": 0.4785880148410797, + "step": 6550 + }, + { + "epoch": 1.5104911228960112, + "grad_norm": 1.3915985235576667, + "learning_rate": 3.103559145567994e-07, + "loss": 0.441936731338501, + "step": 6551 + }, + { + "epoch": 1.510721697025594, + "grad_norm": 1.5501390159164539, + "learning_rate": 3.1007987493250334e-07, + "loss": 0.49719512462615967, + "step": 6552 + }, + { + "epoch": 1.5109522711551764, + "grad_norm": 1.7806538694012621, + "learning_rate": 3.098039355950899e-07, + "loss": 0.40702491998672485, + "step": 6553 + }, + { + "epoch": 1.511182845284759, + "grad_norm": 1.4605232780084745, + "learning_rate": 3.0952809658466896e-07, + "loss": 0.44754648208618164, + "step": 6554 + }, + { + "epoch": 1.5114134194143416, + "grad_norm": 1.7119927234849008, + "learning_rate": 3.0925235794133717e-07, + "loss": 0.5370102524757385, + "step": 6555 + }, + { + "epoch": 1.5116439935439243, + "grad_norm": 1.4781444883115034, + "learning_rate": 3.089767197051755e-07, + "loss": 0.46693646907806396, + "step": 6556 + }, + { + "epoch": 1.511874567673507, + "grad_norm": 1.3940905139236526, + "learning_rate": 3.0870118191625084e-07, + "loss": 0.3887597322463989, + "step": 6557 + }, + { + "epoch": 1.5121051418030897, + "grad_norm": 1.509297997221229, + "learning_rate": 3.0842574461461577e-07, + "loss": 0.4783397912979126, + "step": 6558 + }, + { + "epoch": 1.5123357159326725, + "grad_norm": 2.254982960205746, + "learning_rate": 3.081504078403073e-07, + "loss": 0.5305588245391846, + "step": 6559 + }, + { + "epoch": 1.5125662900622552, + "grad_norm": 1.867807225680096, + "learning_rate": 3.078751716333492e-07, + "loss": 0.45315784215927124, + "step": 6560 + }, + { + "epoch": 1.5127968641918377, + "grad_norm": 1.6356411182801975, + "learning_rate": 3.0760003603374897e-07, + "loss": 0.4805132746696472, + "step": 6561 + }, + { + "epoch": 1.5130274383214202, + "grad_norm": 1.5579254915377003, + "learning_rate": 3.0732500108150104e-07, + "loss": 0.4956076145172119, + "step": 6562 + }, + { + "epoch": 1.5132580124510029, + "grad_norm": 1.6872988549232402, + "learning_rate": 3.07050066816584e-07, + "loss": 0.3629196882247925, + "step": 6563 + }, + { + "epoch": 1.5134885865805856, + "grad_norm": 1.4271734684348691, + "learning_rate": 3.067752332789626e-07, + "loss": 0.43240371346473694, + "step": 6564 + }, + { + "epoch": 1.5137191607101683, + "grad_norm": 1.4730845718882644, + "learning_rate": 3.065005005085869e-07, + "loss": 0.4933302402496338, + "step": 6565 + }, + { + "epoch": 1.513949734839751, + "grad_norm": 1.5594123406832316, + "learning_rate": 3.0622586854539155e-07, + "loss": 0.47905197739601135, + "step": 6566 + }, + { + "epoch": 1.5141803089693338, + "grad_norm": 1.3120965583955209, + "learning_rate": 3.059513374292978e-07, + "loss": 0.4245232343673706, + "step": 6567 + }, + { + "epoch": 1.5144108830989162, + "grad_norm": 1.6401225191596096, + "learning_rate": 3.0567690720021077e-07, + "loss": 0.40526312589645386, + "step": 6568 + }, + { + "epoch": 1.514641457228499, + "grad_norm": 1.7208705138340397, + "learning_rate": 3.0540257789802227e-07, + "loss": 0.5808804631233215, + "step": 6569 + }, + { + "epoch": 1.5148720313580815, + "grad_norm": 1.791338069752229, + "learning_rate": 3.0512834956260836e-07, + "loss": 0.44997286796569824, + "step": 6570 + }, + { + "epoch": 1.5151026054876642, + "grad_norm": 1.6800897456169108, + "learning_rate": 3.048542222338315e-07, + "loss": 0.44051581621170044, + "step": 6571 + }, + { + "epoch": 1.515333179617247, + "grad_norm": 1.525217042834723, + "learning_rate": 3.045801959515382e-07, + "loss": 0.5113236308097839, + "step": 6572 + }, + { + "epoch": 1.5155637537468296, + "grad_norm": 1.5439102757372205, + "learning_rate": 3.0430627075556125e-07, + "loss": 0.554703950881958, + "step": 6573 + }, + { + "epoch": 1.5157943278764123, + "grad_norm": 1.600156572288611, + "learning_rate": 3.0403244668571847e-07, + "loss": 0.3819808065891266, + "step": 6574 + }, + { + "epoch": 1.516024902005995, + "grad_norm": 1.4872928405937125, + "learning_rate": 3.037587237818133e-07, + "loss": 0.47970864176750183, + "step": 6575 + }, + { + "epoch": 1.5162554761355775, + "grad_norm": 1.4776778157711579, + "learning_rate": 3.0348510208363386e-07, + "loss": 0.4296469986438751, + "step": 6576 + }, + { + "epoch": 1.5164860502651603, + "grad_norm": 1.462836798021035, + "learning_rate": 3.032115816309535e-07, + "loss": 0.4372752904891968, + "step": 6577 + }, + { + "epoch": 1.5167166243947428, + "grad_norm": 1.673613757204577, + "learning_rate": 3.029381624635318e-07, + "loss": 0.4711950719356537, + "step": 6578 + }, + { + "epoch": 1.5169471985243255, + "grad_norm": 1.3932522433513406, + "learning_rate": 3.026648446211124e-07, + "loss": 0.4448170065879822, + "step": 6579 + }, + { + "epoch": 1.5171777726539082, + "grad_norm": 1.6184181695445041, + "learning_rate": 3.02391628143425e-07, + "loss": 0.4527873992919922, + "step": 6580 + }, + { + "epoch": 1.517408346783491, + "grad_norm": 1.6799725255249693, + "learning_rate": 3.0211851307018463e-07, + "loss": 0.453765332698822, + "step": 6581 + }, + { + "epoch": 1.5176389209130736, + "grad_norm": 1.686193810125547, + "learning_rate": 3.018454994410915e-07, + "loss": 0.46818265318870544, + "step": 6582 + }, + { + "epoch": 1.5178694950426563, + "grad_norm": 1.6601834563107158, + "learning_rate": 3.0157258729583026e-07, + "loss": 0.38551369309425354, + "step": 6583 + }, + { + "epoch": 1.5181000691722388, + "grad_norm": 1.2759146716130436, + "learning_rate": 3.012997766740721e-07, + "loss": 0.3651260733604431, + "step": 6584 + }, + { + "epoch": 1.5183306433018215, + "grad_norm": 1.4942378521466573, + "learning_rate": 3.010270676154726e-07, + "loss": 0.36894726753234863, + "step": 6585 + }, + { + "epoch": 1.518561217431404, + "grad_norm": 1.5163949110289714, + "learning_rate": 3.007544601596722e-07, + "loss": 0.42595791816711426, + "step": 6586 + }, + { + "epoch": 1.5187917915609868, + "grad_norm": 1.9011368495730705, + "learning_rate": 3.004819543462979e-07, + "loss": 0.4916795492172241, + "step": 6587 + }, + { + "epoch": 1.5190223656905695, + "grad_norm": 3.958756092482824, + "learning_rate": 3.0020955021496073e-07, + "loss": 0.5098932385444641, + "step": 6588 + }, + { + "epoch": 1.5192529398201522, + "grad_norm": 1.7429564765653418, + "learning_rate": 2.9993724780525796e-07, + "loss": 0.6336305737495422, + "step": 6589 + }, + { + "epoch": 1.519483513949735, + "grad_norm": 1.6454779446539551, + "learning_rate": 2.996650471567709e-07, + "loss": 0.4911893606185913, + "step": 6590 + }, + { + "epoch": 1.5197140880793176, + "grad_norm": 1.6053455149976412, + "learning_rate": 2.9939294830906727e-07, + "loss": 0.4388008117675781, + "step": 6591 + }, + { + "epoch": 1.5199446622089001, + "grad_norm": 1.4960203678707569, + "learning_rate": 2.991209513016986e-07, + "loss": 0.392263799905777, + "step": 6592 + }, + { + "epoch": 1.5201752363384828, + "grad_norm": 1.4101720949081316, + "learning_rate": 2.988490561742032e-07, + "loss": 0.36495402455329895, + "step": 6593 + }, + { + "epoch": 1.5204058104680653, + "grad_norm": 1.6817212910549741, + "learning_rate": 2.985772629661032e-07, + "loss": 0.5280855298042297, + "step": 6594 + }, + { + "epoch": 1.520636384597648, + "grad_norm": 1.4575719708434207, + "learning_rate": 2.9830557171690693e-07, + "loss": 0.43953752517700195, + "step": 6595 + }, + { + "epoch": 1.5208669587272308, + "grad_norm": 1.261754251016282, + "learning_rate": 2.980339824661071e-07, + "loss": 0.41361862421035767, + "step": 6596 + }, + { + "epoch": 1.5210975328568135, + "grad_norm": 1.4525947923531464, + "learning_rate": 2.977624952531821e-07, + "loss": 0.39955854415893555, + "step": 6597 + }, + { + "epoch": 1.5213281069863962, + "grad_norm": 1.664684863463753, + "learning_rate": 2.9749111011759565e-07, + "loss": 0.505165696144104, + "step": 6598 + }, + { + "epoch": 1.521558681115979, + "grad_norm": 1.5619432117854901, + "learning_rate": 2.9721982709879566e-07, + "loss": 0.4388153851032257, + "step": 6599 + }, + { + "epoch": 1.5217892552455614, + "grad_norm": 1.454152411615684, + "learning_rate": 2.969486462362167e-07, + "loss": 0.4479100704193115, + "step": 6600 + }, + { + "epoch": 1.5220198293751441, + "grad_norm": 1.4345831092951191, + "learning_rate": 2.9667756756927686e-07, + "loss": 0.4005380868911743, + "step": 6601 + }, + { + "epoch": 1.5222504035047266, + "grad_norm": 1.707280681236192, + "learning_rate": 2.9640659113738087e-07, + "loss": 0.43774881958961487, + "step": 6602 + }, + { + "epoch": 1.5224809776343093, + "grad_norm": 1.5608510724785551, + "learning_rate": 2.9613571697991725e-07, + "loss": 0.4449707865715027, + "step": 6603 + }, + { + "epoch": 1.522711551763892, + "grad_norm": 1.6567386639534631, + "learning_rate": 2.958649451362606e-07, + "loss": 0.454499751329422, + "step": 6604 + }, + { + "epoch": 1.5229421258934748, + "grad_norm": 1.2977143159727098, + "learning_rate": 2.955942756457707e-07, + "loss": 0.35601305961608887, + "step": 6605 + }, + { + "epoch": 1.5231727000230575, + "grad_norm": 1.6684183476509384, + "learning_rate": 2.9532370854779143e-07, + "loss": 0.5252523422241211, + "step": 6606 + }, + { + "epoch": 1.5234032741526402, + "grad_norm": 1.3731317276647081, + "learning_rate": 2.950532438816531e-07, + "loss": 0.4311884939670563, + "step": 6607 + }, + { + "epoch": 1.5236338482822227, + "grad_norm": 1.5784692430456444, + "learning_rate": 2.9478288168667e-07, + "loss": 0.43956485390663147, + "step": 6608 + }, + { + "epoch": 1.5238644224118054, + "grad_norm": 1.4213527447836085, + "learning_rate": 2.9451262200214235e-07, + "loss": 0.400115430355072, + "step": 6609 + }, + { + "epoch": 1.524094996541388, + "grad_norm": 1.6612091081011793, + "learning_rate": 2.942424648673548e-07, + "loss": 0.41738802194595337, + "step": 6610 + }, + { + "epoch": 1.5243255706709706, + "grad_norm": 1.5951584459105572, + "learning_rate": 2.939724103215776e-07, + "loss": 0.412765771150589, + "step": 6611 + }, + { + "epoch": 1.5245561448005533, + "grad_norm": 1.6739308031441762, + "learning_rate": 2.937024584040659e-07, + "loss": 0.44869422912597656, + "step": 6612 + }, + { + "epoch": 1.524786718930136, + "grad_norm": 1.5443554211834334, + "learning_rate": 2.934326091540603e-07, + "loss": 0.39191997051239014, + "step": 6613 + }, + { + "epoch": 1.5250172930597188, + "grad_norm": 1.307209963924962, + "learning_rate": 2.9316286261078547e-07, + "loss": 0.36575692892074585, + "step": 6614 + }, + { + "epoch": 1.5252478671893015, + "grad_norm": 1.5775953874602453, + "learning_rate": 2.9289321881345254e-07, + "loss": 0.49928778409957886, + "step": 6615 + }, + { + "epoch": 1.525478441318884, + "grad_norm": 1.5029437064522762, + "learning_rate": 2.926236778012565e-07, + "loss": 0.49619296193122864, + "step": 6616 + }, + { + "epoch": 1.5257090154484667, + "grad_norm": 1.5175956935877304, + "learning_rate": 2.923542396133777e-07, + "loss": 0.4614447355270386, + "step": 6617 + }, + { + "epoch": 1.5259395895780492, + "grad_norm": 1.5326379965687464, + "learning_rate": 2.9208490428898213e-07, + "loss": 0.43820804357528687, + "step": 6618 + }, + { + "epoch": 1.526170163707632, + "grad_norm": 1.7297859153701105, + "learning_rate": 2.9181567186722e-07, + "loss": 0.46856528520584106, + "step": 6619 + }, + { + "epoch": 1.5264007378372146, + "grad_norm": 1.5560178508678546, + "learning_rate": 2.915465423872272e-07, + "loss": 0.45428818464279175, + "step": 6620 + }, + { + "epoch": 1.5266313119667974, + "grad_norm": 1.765757281110695, + "learning_rate": 2.912775158881243e-07, + "loss": 0.44715386629104614, + "step": 6621 + }, + { + "epoch": 1.52686188609638, + "grad_norm": 1.845941311143575, + "learning_rate": 2.9100859240901764e-07, + "loss": 0.537441611289978, + "step": 6622 + }, + { + "epoch": 1.5270924602259628, + "grad_norm": 2.100811269468338, + "learning_rate": 2.9073977198899714e-07, + "loss": 0.4430112838745117, + "step": 6623 + }, + { + "epoch": 1.5273230343555453, + "grad_norm": 1.625928583733216, + "learning_rate": 2.904710546671392e-07, + "loss": 0.41713255643844604, + "step": 6624 + }, + { + "epoch": 1.527553608485128, + "grad_norm": 1.639578198355071, + "learning_rate": 2.9020244048250396e-07, + "loss": 0.4313931465148926, + "step": 6625 + }, + { + "epoch": 1.5277841826147105, + "grad_norm": 1.617455818460061, + "learning_rate": 2.899339294741379e-07, + "loss": 0.5038034319877625, + "step": 6626 + }, + { + "epoch": 1.5280147567442932, + "grad_norm": 1.6017224429954546, + "learning_rate": 2.8966552168107127e-07, + "loss": 0.45088762044906616, + "step": 6627 + }, + { + "epoch": 1.528245330873876, + "grad_norm": 1.6027378992570083, + "learning_rate": 2.8939721714232e-07, + "loss": 0.40857064723968506, + "step": 6628 + }, + { + "epoch": 1.5284759050034586, + "grad_norm": 1.5432592985198028, + "learning_rate": 2.891290158968853e-07, + "loss": 0.43766242265701294, + "step": 6629 + }, + { + "epoch": 1.5287064791330414, + "grad_norm": 1.6663524119863393, + "learning_rate": 2.888609179837523e-07, + "loss": 0.45986247062683105, + "step": 6630 + }, + { + "epoch": 1.528937053262624, + "grad_norm": 1.5102818288035118, + "learning_rate": 2.8859292344189236e-07, + "loss": 0.4681728482246399, + "step": 6631 + }, + { + "epoch": 1.5291676273922066, + "grad_norm": 1.4009274503220306, + "learning_rate": 2.883250323102605e-07, + "loss": 0.36730295419692993, + "step": 6632 + }, + { + "epoch": 1.5293982015217893, + "grad_norm": 1.6785355662696937, + "learning_rate": 2.880572446277982e-07, + "loss": 0.43494418263435364, + "step": 6633 + }, + { + "epoch": 1.5296287756513718, + "grad_norm": 1.6257441783659756, + "learning_rate": 2.877895604334305e-07, + "loss": 0.49145790934562683, + "step": 6634 + }, + { + "epoch": 1.5298593497809545, + "grad_norm": 1.4638603112091872, + "learning_rate": 2.875219797660681e-07, + "loss": 0.4166264832019806, + "step": 6635 + }, + { + "epoch": 1.5300899239105372, + "grad_norm": 1.3504636181719787, + "learning_rate": 2.8725450266460704e-07, + "loss": 0.4336514472961426, + "step": 6636 + }, + { + "epoch": 1.53032049804012, + "grad_norm": 1.6796430942391267, + "learning_rate": 2.869871291679271e-07, + "loss": 0.44186240434646606, + "step": 6637 + }, + { + "epoch": 1.5305510721697027, + "grad_norm": 1.4751166079505253, + "learning_rate": 2.867198593148945e-07, + "loss": 0.40619733929634094, + "step": 6638 + }, + { + "epoch": 1.5307816462992854, + "grad_norm": 1.4034694689938345, + "learning_rate": 2.864526931443588e-07, + "loss": 0.45552101731300354, + "step": 6639 + }, + { + "epoch": 1.5310122204288679, + "grad_norm": 1.3563039501008287, + "learning_rate": 2.861856306951562e-07, + "loss": 0.45153865218162537, + "step": 6640 + }, + { + "epoch": 1.5312427945584506, + "grad_norm": 1.5793746333655185, + "learning_rate": 2.859186720061061e-07, + "loss": 0.5146148204803467, + "step": 6641 + }, + { + "epoch": 1.531473368688033, + "grad_norm": 1.5627792728055054, + "learning_rate": 2.856518171160143e-07, + "loss": 0.4566080868244171, + "step": 6642 + }, + { + "epoch": 1.5317039428176158, + "grad_norm": 1.93802928616596, + "learning_rate": 2.853850660636703e-07, + "loss": 0.4390585124492645, + "step": 6643 + }, + { + "epoch": 1.5319345169471985, + "grad_norm": 1.7734959004013588, + "learning_rate": 2.851184188878493e-07, + "loss": 0.5508195757865906, + "step": 6644 + }, + { + "epoch": 1.5321650910767812, + "grad_norm": 1.6721581584041076, + "learning_rate": 2.8485187562731126e-07, + "loss": 0.47640183568000793, + "step": 6645 + }, + { + "epoch": 1.532395665206364, + "grad_norm": 1.421769874384772, + "learning_rate": 2.8458543632080123e-07, + "loss": 0.4511566758155823, + "step": 6646 + }, + { + "epoch": 1.5326262393359467, + "grad_norm": 1.5003089507123706, + "learning_rate": 2.843191010070486e-07, + "loss": 0.414367139339447, + "step": 6647 + }, + { + "epoch": 1.5328568134655292, + "grad_norm": 1.5192326893049226, + "learning_rate": 2.840528697247674e-07, + "loss": 0.4611589312553406, + "step": 6648 + }, + { + "epoch": 1.5330873875951119, + "grad_norm": 1.6397285440449882, + "learning_rate": 2.8378674251265787e-07, + "loss": 0.4675883948802948, + "step": 6649 + }, + { + "epoch": 1.5333179617246944, + "grad_norm": 1.6281144487220143, + "learning_rate": 2.835207194094036e-07, + "loss": 0.49039095640182495, + "step": 6650 + }, + { + "epoch": 1.533548535854277, + "grad_norm": 1.6636356702139277, + "learning_rate": 2.832548004536741e-07, + "loss": 0.45641693472862244, + "step": 6651 + }, + { + "epoch": 1.5337791099838598, + "grad_norm": 1.7323507398911224, + "learning_rate": 2.829889856841233e-07, + "loss": 0.4858587682247162, + "step": 6652 + }, + { + "epoch": 1.5340096841134425, + "grad_norm": 1.3640056940377991, + "learning_rate": 2.8272327513939055e-07, + "loss": 0.3640017807483673, + "step": 6653 + }, + { + "epoch": 1.5342402582430252, + "grad_norm": 1.5342226074105705, + "learning_rate": 2.8245766885809865e-07, + "loss": 0.42915207147598267, + "step": 6654 + }, + { + "epoch": 1.534470832372608, + "grad_norm": 1.5250515427099394, + "learning_rate": 2.8219216687885707e-07, + "loss": 0.5041407346725464, + "step": 6655 + }, + { + "epoch": 1.5347014065021904, + "grad_norm": 1.479165849869464, + "learning_rate": 2.8192676924025885e-07, + "loss": 0.4748334288597107, + "step": 6656 + }, + { + "epoch": 1.5349319806317732, + "grad_norm": 1.5854109757101433, + "learning_rate": 2.8166147598088173e-07, + "loss": 0.4745975136756897, + "step": 6657 + }, + { + "epoch": 1.5351625547613557, + "grad_norm": 1.6430139570672564, + "learning_rate": 2.813962871392893e-07, + "loss": 0.49246084690093994, + "step": 6658 + }, + { + "epoch": 1.5353931288909384, + "grad_norm": 1.3796442061928538, + "learning_rate": 2.8113120275402936e-07, + "loss": 0.47876033186912537, + "step": 6659 + }, + { + "epoch": 1.535623703020521, + "grad_norm": 1.6460545742229191, + "learning_rate": 2.808662228636348e-07, + "loss": 0.5244987607002258, + "step": 6660 + }, + { + "epoch": 1.5358542771501038, + "grad_norm": 1.6433381019004774, + "learning_rate": 2.8060134750662277e-07, + "loss": 0.44661569595336914, + "step": 6661 + }, + { + "epoch": 1.5360848512796865, + "grad_norm": 1.4583799872096337, + "learning_rate": 2.8033657672149615e-07, + "loss": 0.4508060812950134, + "step": 6662 + }, + { + "epoch": 1.5363154254092692, + "grad_norm": 1.3497148067649773, + "learning_rate": 2.8007191054674117e-07, + "loss": 0.4657326340675354, + "step": 6663 + }, + { + "epoch": 1.5365459995388517, + "grad_norm": 1.4227603766742651, + "learning_rate": 2.798073490208307e-07, + "loss": 0.495077520608902, + "step": 6664 + }, + { + "epoch": 1.5367765736684345, + "grad_norm": 1.4557135691757939, + "learning_rate": 2.795428921822206e-07, + "loss": 0.40721309185028076, + "step": 6665 + }, + { + "epoch": 1.537007147798017, + "grad_norm": 1.4109014285343175, + "learning_rate": 2.7927854006935315e-07, + "loss": 0.3279367685317993, + "step": 6666 + }, + { + "epoch": 1.5372377219275997, + "grad_norm": 1.6893419118169095, + "learning_rate": 2.790142927206538e-07, + "loss": 0.4849242866039276, + "step": 6667 + }, + { + "epoch": 1.5374682960571824, + "grad_norm": 1.7502055418971636, + "learning_rate": 2.7875015017453394e-07, + "loss": 0.45151397585868835, + "step": 6668 + }, + { + "epoch": 1.537698870186765, + "grad_norm": 1.7275509884274352, + "learning_rate": 2.784861124693898e-07, + "loss": 0.43480992317199707, + "step": 6669 + }, + { + "epoch": 1.5379294443163478, + "grad_norm": 1.606181868361543, + "learning_rate": 2.782221796436012e-07, + "loss": 0.48764440417289734, + "step": 6670 + }, + { + "epoch": 1.5381600184459305, + "grad_norm": 1.5345831310523104, + "learning_rate": 2.7795835173553407e-07, + "loss": 0.4164161682128906, + "step": 6671 + }, + { + "epoch": 1.538390592575513, + "grad_norm": 1.8060994369656536, + "learning_rate": 2.7769462878353777e-07, + "loss": 0.49934858083724976, + "step": 6672 + }, + { + "epoch": 1.5386211667050955, + "grad_norm": 1.4004311994850918, + "learning_rate": 2.77431010825948e-07, + "loss": 0.4877321124076843, + "step": 6673 + }, + { + "epoch": 1.5388517408346782, + "grad_norm": 1.7442704894714258, + "learning_rate": 2.771674979010834e-07, + "loss": 0.44518858194351196, + "step": 6674 + }, + { + "epoch": 1.539082314964261, + "grad_norm": 1.4902795732558884, + "learning_rate": 2.769040900472488e-07, + "loss": 0.4237474203109741, + "step": 6675 + }, + { + "epoch": 1.5393128890938437, + "grad_norm": 1.8818051716593445, + "learning_rate": 2.7664078730273335e-07, + "loss": 0.45270341634750366, + "step": 6676 + }, + { + "epoch": 1.5395434632234264, + "grad_norm": 1.9777420597791724, + "learning_rate": 2.7637758970581004e-07, + "loss": 0.3866819739341736, + "step": 6677 + }, + { + "epoch": 1.539774037353009, + "grad_norm": 1.709571144624541, + "learning_rate": 2.7611449729473825e-07, + "loss": 0.4384220838546753, + "step": 6678 + }, + { + "epoch": 1.5400046114825916, + "grad_norm": 1.523752237168306, + "learning_rate": 2.758515101077602e-07, + "loss": 0.4462182819843292, + "step": 6679 + }, + { + "epoch": 1.5402351856121743, + "grad_norm": 1.6129576485586044, + "learning_rate": 2.755886281831046e-07, + "loss": 0.3927033245563507, + "step": 6680 + }, + { + "epoch": 1.5404657597417568, + "grad_norm": 1.7095013933604486, + "learning_rate": 2.7532585155898314e-07, + "loss": 0.4678634703159332, + "step": 6681 + }, + { + "epoch": 1.5406963338713395, + "grad_norm": 1.4524055684149206, + "learning_rate": 2.750631802735935e-07, + "loss": 0.4165131151676178, + "step": 6682 + }, + { + "epoch": 1.5409269080009222, + "grad_norm": 1.1494402193253566, + "learning_rate": 2.748006143651178e-07, + "loss": 0.3705793023109436, + "step": 6683 + }, + { + "epoch": 1.541157482130505, + "grad_norm": 1.5819526439113667, + "learning_rate": 2.745381538717226e-07, + "loss": 0.5428882837295532, + "step": 6684 + }, + { + "epoch": 1.5413880562600877, + "grad_norm": 1.6426127293668795, + "learning_rate": 2.742757988315589e-07, + "loss": 0.4116673171520233, + "step": 6685 + }, + { + "epoch": 1.5416186303896704, + "grad_norm": 1.4540567592422353, + "learning_rate": 2.740135492827631e-07, + "loss": 0.4617515802383423, + "step": 6686 + }, + { + "epoch": 1.541849204519253, + "grad_norm": 1.6140828940427878, + "learning_rate": 2.737514052634555e-07, + "loss": 0.5002453923225403, + "step": 6687 + }, + { + "epoch": 1.5420797786488356, + "grad_norm": 1.4130856063185002, + "learning_rate": 2.734893668117412e-07, + "loss": 0.46029362082481384, + "step": 6688 + }, + { + "epoch": 1.542310352778418, + "grad_norm": 1.4809565956171882, + "learning_rate": 2.732274339657107e-07, + "loss": 0.40502026677131653, + "step": 6689 + }, + { + "epoch": 1.5425409269080008, + "grad_norm": 1.6538580711421296, + "learning_rate": 2.7296560676343803e-07, + "loss": 0.5267831087112427, + "step": 6690 + }, + { + "epoch": 1.5427715010375835, + "grad_norm": 1.3087993674480496, + "learning_rate": 2.727038852429826e-07, + "loss": 0.3464335799217224, + "step": 6691 + }, + { + "epoch": 1.5430020751671663, + "grad_norm": 1.5384863769893498, + "learning_rate": 2.7244226944238847e-07, + "loss": 0.36635881662368774, + "step": 6692 + }, + { + "epoch": 1.543232649296749, + "grad_norm": 1.7314925345176482, + "learning_rate": 2.7218075939968435e-07, + "loss": 0.4567757844924927, + "step": 6693 + }, + { + "epoch": 1.5434632234263317, + "grad_norm": 1.9452957704897642, + "learning_rate": 2.719193551528827e-07, + "loss": 0.539220929145813, + "step": 6694 + }, + { + "epoch": 1.5436937975559142, + "grad_norm": 1.653206530012829, + "learning_rate": 2.71658056739982e-07, + "loss": 0.48553818464279175, + "step": 6695 + }, + { + "epoch": 1.543924371685497, + "grad_norm": 1.5040526715775615, + "learning_rate": 2.7139686419896424e-07, + "loss": 0.48564499616622925, + "step": 6696 + }, + { + "epoch": 1.5441549458150794, + "grad_norm": 1.3502417010865393, + "learning_rate": 2.7113577756779616e-07, + "loss": 0.4163014590740204, + "step": 6697 + }, + { + "epoch": 1.544385519944662, + "grad_norm": 1.864828438533457, + "learning_rate": 2.708747968844296e-07, + "loss": 0.5686431527137756, + "step": 6698 + }, + { + "epoch": 1.5446160940742448, + "grad_norm": 1.8608147536494253, + "learning_rate": 2.706139221868008e-07, + "loss": 0.5365211963653564, + "step": 6699 + }, + { + "epoch": 1.5448466682038275, + "grad_norm": 1.5480523179756653, + "learning_rate": 2.7035315351283084e-07, + "loss": 0.4147397577762604, + "step": 6700 + }, + { + "epoch": 1.5450772423334103, + "grad_norm": 1.5279455451058772, + "learning_rate": 2.7009249090042454e-07, + "loss": 0.3938590884208679, + "step": 6701 + }, + { + "epoch": 1.545307816462993, + "grad_norm": 1.726862148896079, + "learning_rate": 2.698319343874722e-07, + "loss": 0.3521370589733124, + "step": 6702 + }, + { + "epoch": 1.5455383905925755, + "grad_norm": 1.6305887024948476, + "learning_rate": 2.69571484011848e-07, + "loss": 0.430014967918396, + "step": 6703 + }, + { + "epoch": 1.5457689647221582, + "grad_norm": 1.636933956561892, + "learning_rate": 2.6931113981141164e-07, + "loss": 0.4697108864784241, + "step": 6704 + }, + { + "epoch": 1.5459995388517407, + "grad_norm": 1.5552943329509785, + "learning_rate": 2.69050901824006e-07, + "loss": 0.46567851305007935, + "step": 6705 + }, + { + "epoch": 1.5462301129813234, + "grad_norm": 1.620367133120872, + "learning_rate": 2.6879077008745986e-07, + "loss": 0.46061819791793823, + "step": 6706 + }, + { + "epoch": 1.5464606871109061, + "grad_norm": 1.5411435279833592, + "learning_rate": 2.6853074463958614e-07, + "loss": 0.568658709526062, + "step": 6707 + }, + { + "epoch": 1.5466912612404888, + "grad_norm": 1.3834999667432357, + "learning_rate": 2.682708255181815e-07, + "loss": 0.42816412448883057, + "step": 6708 + }, + { + "epoch": 1.5469218353700716, + "grad_norm": 1.576410551372393, + "learning_rate": 2.6801101276102866e-07, + "loss": 0.42515552043914795, + "step": 6709 + }, + { + "epoch": 1.5471524094996543, + "grad_norm": 1.5447523266389376, + "learning_rate": 2.677513064058932e-07, + "loss": 0.46513399481773376, + "step": 6710 + }, + { + "epoch": 1.5473829836292368, + "grad_norm": 1.3853944144224488, + "learning_rate": 2.6749170649052675e-07, + "loss": 0.4194756746292114, + "step": 6711 + }, + { + "epoch": 1.5476135577588195, + "grad_norm": 1.4035563039276318, + "learning_rate": 2.672322130526643e-07, + "loss": 0.4456541836261749, + "step": 6712 + }, + { + "epoch": 1.547844131888402, + "grad_norm": 1.5113453932130136, + "learning_rate": 2.669728261300264e-07, + "loss": 0.493444561958313, + "step": 6713 + }, + { + "epoch": 1.5480747060179847, + "grad_norm": 1.582884732282312, + "learning_rate": 2.6671354576031645e-07, + "loss": 0.47202616930007935, + "step": 6714 + }, + { + "epoch": 1.5483052801475674, + "grad_norm": 1.824788636144565, + "learning_rate": 2.66454371981225e-07, + "loss": 0.4584811329841614, + "step": 6715 + }, + { + "epoch": 1.5485358542771501, + "grad_norm": 1.3167028831683925, + "learning_rate": 2.6619530483042485e-07, + "loss": 0.4072091579437256, + "step": 6716 + }, + { + "epoch": 1.5487664284067328, + "grad_norm": 1.5656021898929726, + "learning_rate": 2.6593634434557365e-07, + "loss": 0.49742361903190613, + "step": 6717 + }, + { + "epoch": 1.5489970025363156, + "grad_norm": 1.6686846450785309, + "learning_rate": 2.6567749056431467e-07, + "loss": 0.49291643500328064, + "step": 6718 + }, + { + "epoch": 1.549227576665898, + "grad_norm": 1.5234565390584587, + "learning_rate": 2.6541874352427427e-07, + "loss": 0.5210362076759338, + "step": 6719 + }, + { + "epoch": 1.5494581507954808, + "grad_norm": 1.523136615036839, + "learning_rate": 2.651601032630645e-07, + "loss": 0.4489557147026062, + "step": 6720 + }, + { + "epoch": 1.5496887249250633, + "grad_norm": 1.515706035484409, + "learning_rate": 2.649015698182808e-07, + "loss": 0.4417908191680908, + "step": 6721 + }, + { + "epoch": 1.549919299054646, + "grad_norm": 1.5123745571810647, + "learning_rate": 2.6464314322750404e-07, + "loss": 0.45177266001701355, + "step": 6722 + }, + { + "epoch": 1.5501498731842287, + "grad_norm": 1.5422888438788165, + "learning_rate": 2.6438482352829896e-07, + "loss": 0.37720638513565063, + "step": 6723 + }, + { + "epoch": 1.5503804473138114, + "grad_norm": 1.5572735157633186, + "learning_rate": 2.641266107582153e-07, + "loss": 0.5108897089958191, + "step": 6724 + }, + { + "epoch": 1.5506110214433941, + "grad_norm": 1.5098940840101445, + "learning_rate": 2.638685049547863e-07, + "loss": 0.449248731136322, + "step": 6725 + }, + { + "epoch": 1.5508415955729768, + "grad_norm": 1.4667668469814954, + "learning_rate": 2.636105061555309e-07, + "loss": 0.4692652225494385, + "step": 6726 + }, + { + "epoch": 1.5510721697025593, + "grad_norm": 1.5150559633489926, + "learning_rate": 2.6335261439795153e-07, + "loss": 0.49128347635269165, + "step": 6727 + }, + { + "epoch": 1.551302743832142, + "grad_norm": 1.5725646817979666, + "learning_rate": 2.630948297195351e-07, + "loss": 0.4618053436279297, + "step": 6728 + }, + { + "epoch": 1.5515333179617246, + "grad_norm": 1.5786249232029208, + "learning_rate": 2.6283715215775336e-07, + "loss": 0.4342828094959259, + "step": 6729 + }, + { + "epoch": 1.5517638920913073, + "grad_norm": 1.5592983853420144, + "learning_rate": 2.625795817500626e-07, + "loss": 0.5214434862136841, + "step": 6730 + }, + { + "epoch": 1.55199446622089, + "grad_norm": 1.521395946192631, + "learning_rate": 2.623221185339034e-07, + "loss": 0.4873029589653015, + "step": 6731 + }, + { + "epoch": 1.5522250403504727, + "grad_norm": 1.5014817933254478, + "learning_rate": 2.6206476254670007e-07, + "loss": 0.4510548412799835, + "step": 6732 + }, + { + "epoch": 1.5524556144800554, + "grad_norm": 1.5931454307395074, + "learning_rate": 2.6180751382586265e-07, + "loss": 0.4832548499107361, + "step": 6733 + }, + { + "epoch": 1.5526861886096381, + "grad_norm": 1.8273040799326088, + "learning_rate": 2.6155037240878406e-07, + "loss": 0.5438823699951172, + "step": 6734 + }, + { + "epoch": 1.5529167627392206, + "grad_norm": 1.488758610712305, + "learning_rate": 2.6129333833284315e-07, + "loss": 0.4967566728591919, + "step": 6735 + }, + { + "epoch": 1.5531473368688034, + "grad_norm": 1.419700158234616, + "learning_rate": 2.610364116354018e-07, + "loss": 0.5187437534332275, + "step": 6736 + }, + { + "epoch": 1.5533779109983858, + "grad_norm": 1.3624978155475462, + "learning_rate": 2.607795923538072e-07, + "loss": 0.4199862480163574, + "step": 6737 + }, + { + "epoch": 1.5536084851279686, + "grad_norm": 1.463828508781327, + "learning_rate": 2.6052288052539084e-07, + "loss": 0.5009325742721558, + "step": 6738 + }, + { + "epoch": 1.5538390592575513, + "grad_norm": 1.5361155892650822, + "learning_rate": 2.602662761874679e-07, + "loss": 0.48698678612709045, + "step": 6739 + }, + { + "epoch": 1.554069633387134, + "grad_norm": 1.4600353762817446, + "learning_rate": 2.6000977937733905e-07, + "loss": 0.4845883846282959, + "step": 6740 + }, + { + "epoch": 1.5543002075167167, + "grad_norm": 1.6153802807658302, + "learning_rate": 2.59753390132288e-07, + "loss": 0.512161135673523, + "step": 6741 + }, + { + "epoch": 1.5545307816462994, + "grad_norm": 1.756231295082545, + "learning_rate": 2.5949710848958415e-07, + "loss": 0.42334964871406555, + "step": 6742 + }, + { + "epoch": 1.554761355775882, + "grad_norm": 1.2927501946290025, + "learning_rate": 2.592409344864801e-07, + "loss": 0.3781980276107788, + "step": 6743 + }, + { + "epoch": 1.5549919299054646, + "grad_norm": 1.5363470406300028, + "learning_rate": 2.5898486816021394e-07, + "loss": 0.4989853501319885, + "step": 6744 + }, + { + "epoch": 1.5552225040350471, + "grad_norm": 1.5873964925893267, + "learning_rate": 2.5872890954800676e-07, + "loss": 0.45715585350990295, + "step": 6745 + }, + { + "epoch": 1.5554530781646299, + "grad_norm": 1.3499060893753405, + "learning_rate": 2.5847305868706515e-07, + "loss": 0.5025684833526611, + "step": 6746 + }, + { + "epoch": 1.5556836522942126, + "grad_norm": 1.5290460697986008, + "learning_rate": 2.5821731561457994e-07, + "loss": 0.47298115491867065, + "step": 6747 + }, + { + "epoch": 1.5559142264237953, + "grad_norm": 1.4250590830459762, + "learning_rate": 2.5796168036772524e-07, + "loss": 0.45412957668304443, + "step": 6748 + }, + { + "epoch": 1.556144800553378, + "grad_norm": 1.6230149340497857, + "learning_rate": 2.5770615298366107e-07, + "loss": 0.3958669602870941, + "step": 6749 + }, + { + "epoch": 1.5563753746829607, + "grad_norm": 1.4992477100706287, + "learning_rate": 2.574507334995302e-07, + "loss": 0.4748396873474121, + "step": 6750 + }, + { + "epoch": 1.5566059488125432, + "grad_norm": 2.1473408883216534, + "learning_rate": 2.5719542195246093e-07, + "loss": 0.4741169810295105, + "step": 6751 + }, + { + "epoch": 1.556836522942126, + "grad_norm": 1.5072269547692108, + "learning_rate": 2.569402183795648e-07, + "loss": 0.4362972378730774, + "step": 6752 + }, + { + "epoch": 1.5570670970717084, + "grad_norm": 1.5695384848079892, + "learning_rate": 2.5668512281793873e-07, + "loss": 0.48013412952423096, + "step": 6753 + }, + { + "epoch": 1.5572976712012911, + "grad_norm": 1.4514603270444408, + "learning_rate": 2.564301353046634e-07, + "loss": 0.4728567600250244, + "step": 6754 + }, + { + "epoch": 1.5575282453308739, + "grad_norm": 1.7592773476195727, + "learning_rate": 2.56175255876804e-07, + "loss": 0.4304337501525879, + "step": 6755 + }, + { + "epoch": 1.5577588194604566, + "grad_norm": 1.5275686028016913, + "learning_rate": 2.5592048457140926e-07, + "loss": 0.43467870354652405, + "step": 6756 + }, + { + "epoch": 1.5579893935900393, + "grad_norm": 1.9596482130933712, + "learning_rate": 2.556658214255134e-07, + "loss": 0.3912844657897949, + "step": 6757 + }, + { + "epoch": 1.558219967719622, + "grad_norm": 1.5284327791141838, + "learning_rate": 2.5541126647613397e-07, + "loss": 0.4462862014770508, + "step": 6758 + }, + { + "epoch": 1.5584505418492045, + "grad_norm": 1.5847675751494867, + "learning_rate": 2.551568197602729e-07, + "loss": 0.43929487466812134, + "step": 6759 + }, + { + "epoch": 1.5586811159787872, + "grad_norm": 1.5077581986013873, + "learning_rate": 2.549024813149169e-07, + "loss": 0.44473958015441895, + "step": 6760 + }, + { + "epoch": 1.5589116901083697, + "grad_norm": 1.5536876763085832, + "learning_rate": 2.546482511770365e-07, + "loss": 0.5159727931022644, + "step": 6761 + }, + { + "epoch": 1.5591422642379524, + "grad_norm": 1.7371461951042986, + "learning_rate": 2.5439412938358696e-07, + "loss": 0.3975204825401306, + "step": 6762 + }, + { + "epoch": 1.5593728383675352, + "grad_norm": 1.493493619365051, + "learning_rate": 2.54140115971507e-07, + "loss": 0.5198286175727844, + "step": 6763 + }, + { + "epoch": 1.5596034124971179, + "grad_norm": 1.4309109790386, + "learning_rate": 2.5388621097772046e-07, + "loss": 0.4815763831138611, + "step": 6764 + }, + { + "epoch": 1.5598339866267006, + "grad_norm": 1.3803469238514527, + "learning_rate": 2.5363241443913454e-07, + "loss": 0.365215539932251, + "step": 6765 + }, + { + "epoch": 1.5600645607562833, + "grad_norm": 1.6088793691676593, + "learning_rate": 2.533787263926417e-07, + "loss": 0.486020028591156, + "step": 6766 + }, + { + "epoch": 1.5602951348858658, + "grad_norm": 1.5355383857513338, + "learning_rate": 2.5312514687511766e-07, + "loss": 0.38536715507507324, + "step": 6767 + }, + { + "epoch": 1.5605257090154485, + "grad_norm": 1.649862765507334, + "learning_rate": 2.528716759234227e-07, + "loss": 0.44713371992111206, + "step": 6768 + }, + { + "epoch": 1.560756283145031, + "grad_norm": 1.868794454538197, + "learning_rate": 2.5261831357440154e-07, + "loss": 0.4122806489467621, + "step": 6769 + }, + { + "epoch": 1.5609868572746137, + "grad_norm": 1.6234940940069353, + "learning_rate": 2.523650598648829e-07, + "loss": 0.40514320135116577, + "step": 6770 + }, + { + "epoch": 1.5612174314041964, + "grad_norm": 1.4417973525561176, + "learning_rate": 2.5211191483168027e-07, + "loss": 0.4273102283477783, + "step": 6771 + }, + { + "epoch": 1.5614480055337792, + "grad_norm": 1.4229504510118502, + "learning_rate": 2.5185887851159005e-07, + "loss": 0.4774209260940552, + "step": 6772 + }, + { + "epoch": 1.5616785796633619, + "grad_norm": 1.583645566960067, + "learning_rate": 2.5160595094139436e-07, + "loss": 0.3928600549697876, + "step": 6773 + }, + { + "epoch": 1.5619091537929446, + "grad_norm": 1.6757793450729852, + "learning_rate": 2.5135313215785816e-07, + "loss": 0.4414944052696228, + "step": 6774 + }, + { + "epoch": 1.562139727922527, + "grad_norm": 1.733143939427008, + "learning_rate": 2.5110042219773176e-07, + "loss": 0.36133646965026855, + "step": 6775 + }, + { + "epoch": 1.5623703020521098, + "grad_norm": 1.8443586806925936, + "learning_rate": 2.508478210977486e-07, + "loss": 0.44824904203414917, + "step": 6776 + }, + { + "epoch": 1.5626008761816923, + "grad_norm": 1.1693439456079453, + "learning_rate": 2.5059532889462707e-07, + "loss": 0.3699820637702942, + "step": 6777 + }, + { + "epoch": 1.562831450311275, + "grad_norm": 1.9309547773144982, + "learning_rate": 2.5034294562506976e-07, + "loss": 0.4809808135032654, + "step": 6778 + }, + { + "epoch": 1.5630620244408577, + "grad_norm": 1.7665230327633363, + "learning_rate": 2.5009067132576256e-07, + "loss": 0.487751841545105, + "step": 6779 + }, + { + "epoch": 1.5632925985704405, + "grad_norm": 1.5839144124062823, + "learning_rate": 2.4983850603337675e-07, + "loss": 0.47932374477386475, + "step": 6780 + }, + { + "epoch": 1.5635231727000232, + "grad_norm": 1.4782012523005248, + "learning_rate": 2.495864497845663e-07, + "loss": 0.42852234840393066, + "step": 6781 + }, + { + "epoch": 1.5637537468296059, + "grad_norm": 1.4802387383863571, + "learning_rate": 2.49334502615971e-07, + "loss": 0.4392131567001343, + "step": 6782 + }, + { + "epoch": 1.5639843209591884, + "grad_norm": 1.5042475261036963, + "learning_rate": 2.4908266456421323e-07, + "loss": 0.45050233602523804, + "step": 6783 + }, + { + "epoch": 1.5642148950887709, + "grad_norm": 1.4962883173938244, + "learning_rate": 2.488309356659004e-07, + "loss": 0.45328110456466675, + "step": 6784 + }, + { + "epoch": 1.5644454692183536, + "grad_norm": 1.451199382042834, + "learning_rate": 2.4857931595762403e-07, + "loss": 0.3851325511932373, + "step": 6785 + }, + { + "epoch": 1.5646760433479363, + "grad_norm": 1.5269726027188475, + "learning_rate": 2.4832780547595976e-07, + "loss": 0.4096960127353668, + "step": 6786 + }, + { + "epoch": 1.564906617477519, + "grad_norm": 1.4158017969205454, + "learning_rate": 2.480764042574669e-07, + "loss": 0.4439825117588043, + "step": 6787 + }, + { + "epoch": 1.5651371916071017, + "grad_norm": 1.5084778231824414, + "learning_rate": 2.4782511233868895e-07, + "loss": 0.4259459972381592, + "step": 6788 + }, + { + "epoch": 1.5653677657366845, + "grad_norm": 1.6383230301383533, + "learning_rate": 2.475739297561542e-07, + "loss": 0.4701216220855713, + "step": 6789 + }, + { + "epoch": 1.565598339866267, + "grad_norm": 1.4707071600317903, + "learning_rate": 2.473228565463742e-07, + "loss": 0.4435737133026123, + "step": 6790 + }, + { + "epoch": 1.5658289139958497, + "grad_norm": 1.4361527011832544, + "learning_rate": 2.4707189274584537e-07, + "loss": 0.4476662278175354, + "step": 6791 + }, + { + "epoch": 1.5660594881254322, + "grad_norm": 1.8319243980176085, + "learning_rate": 2.468210383910474e-07, + "loss": 0.4399911165237427, + "step": 6792 + }, + { + "epoch": 1.5662900622550149, + "grad_norm": 1.5617800363149925, + "learning_rate": 2.465702935184446e-07, + "loss": 0.4206039309501648, + "step": 6793 + }, + { + "epoch": 1.5665206363845976, + "grad_norm": 1.5998109403316092, + "learning_rate": 2.463196581644855e-07, + "loss": 0.44936686754226685, + "step": 6794 + }, + { + "epoch": 1.5667512105141803, + "grad_norm": 1.4750351364947134, + "learning_rate": 2.4606913236560277e-07, + "loss": 0.39926016330718994, + "step": 6795 + }, + { + "epoch": 1.566981784643763, + "grad_norm": 1.607414705164721, + "learning_rate": 2.4581871615821216e-07, + "loss": 0.4338487982749939, + "step": 6796 + }, + { + "epoch": 1.5672123587733457, + "grad_norm": 1.6693881073802184, + "learning_rate": 2.455684095787148e-07, + "loss": 0.5047430992126465, + "step": 6797 + }, + { + "epoch": 1.5674429329029282, + "grad_norm": 1.623571142038879, + "learning_rate": 2.4531821266349504e-07, + "loss": 0.46082550287246704, + "step": 6798 + }, + { + "epoch": 1.567673507032511, + "grad_norm": 1.5687485332342288, + "learning_rate": 2.450681254489214e-07, + "loss": 0.44586509466171265, + "step": 6799 + }, + { + "epoch": 1.5679040811620935, + "grad_norm": 1.6011741376497353, + "learning_rate": 2.4481814797134657e-07, + "loss": 0.5167746543884277, + "step": 6800 + }, + { + "epoch": 1.5681346552916762, + "grad_norm": 1.4074512111564024, + "learning_rate": 2.4456828026710753e-07, + "loss": 0.44062116742134094, + "step": 6801 + }, + { + "epoch": 1.5683652294212589, + "grad_norm": 1.718295945554571, + "learning_rate": 2.4431852237252524e-07, + "loss": 0.5096040368080139, + "step": 6802 + }, + { + "epoch": 1.5685958035508416, + "grad_norm": 1.3369851313651875, + "learning_rate": 2.440688743239042e-07, + "loss": 0.44234153628349304, + "step": 6803 + }, + { + "epoch": 1.5688263776804243, + "grad_norm": 1.7878168925295264, + "learning_rate": 2.4381933615753357e-07, + "loss": 0.431011825799942, + "step": 6804 + }, + { + "epoch": 1.569056951810007, + "grad_norm": 1.5221569168970472, + "learning_rate": 2.435699079096858e-07, + "loss": 0.4903266131877899, + "step": 6805 + }, + { + "epoch": 1.5692875259395895, + "grad_norm": 1.4830626229942445, + "learning_rate": 2.433205896166185e-07, + "loss": 0.4698626399040222, + "step": 6806 + }, + { + "epoch": 1.5695181000691723, + "grad_norm": 1.7678576287420633, + "learning_rate": 2.4307138131457184e-07, + "loss": 0.37576574087142944, + "step": 6807 + }, + { + "epoch": 1.5697486741987547, + "grad_norm": 1.442601981615427, + "learning_rate": 2.4282228303977113e-07, + "loss": 0.47068172693252563, + "step": 6808 + }, + { + "epoch": 1.5699792483283375, + "grad_norm": 1.5121414961596256, + "learning_rate": 2.425732948284257e-07, + "loss": 0.45246315002441406, + "step": 6809 + }, + { + "epoch": 1.5702098224579202, + "grad_norm": 1.670746435704044, + "learning_rate": 2.423244167167278e-07, + "loss": 0.4746376574039459, + "step": 6810 + }, + { + "epoch": 1.570440396587503, + "grad_norm": 1.6491072802367082, + "learning_rate": 2.420756487408551e-07, + "loss": 0.413469135761261, + "step": 6811 + }, + { + "epoch": 1.5706709707170856, + "grad_norm": 1.4392614299059656, + "learning_rate": 2.418269909369678e-07, + "loss": 0.3567890226840973, + "step": 6812 + }, + { + "epoch": 1.5709015448466683, + "grad_norm": 1.9034789277869502, + "learning_rate": 2.415784433412116e-07, + "loss": 0.4676034450531006, + "step": 6813 + }, + { + "epoch": 1.5711321189762508, + "grad_norm": 1.5100461636177536, + "learning_rate": 2.4133000598971477e-07, + "loss": 0.429337739944458, + "step": 6814 + }, + { + "epoch": 1.5713626931058335, + "grad_norm": 1.657098818036463, + "learning_rate": 2.4108167891859065e-07, + "loss": 0.35861289501190186, + "step": 6815 + }, + { + "epoch": 1.571593267235416, + "grad_norm": 1.7985300174152374, + "learning_rate": 2.4083346216393564e-07, + "loss": 0.43728363513946533, + "step": 6816 + }, + { + "epoch": 1.5718238413649988, + "grad_norm": 1.6655671112295587, + "learning_rate": 2.405853557618308e-07, + "loss": 0.44594380259513855, + "step": 6817 + }, + { + "epoch": 1.5720544154945815, + "grad_norm": 1.430621764890317, + "learning_rate": 2.403373597483414e-07, + "loss": 0.36871337890625, + "step": 6818 + }, + { + "epoch": 1.5722849896241642, + "grad_norm": 1.4284927159530842, + "learning_rate": 2.400894741595152e-07, + "loss": 0.3769477307796478, + "step": 6819 + }, + { + "epoch": 1.572515563753747, + "grad_norm": 1.6803573488891066, + "learning_rate": 2.3984169903138583e-07, + "loss": 0.503145694732666, + "step": 6820 + }, + { + "epoch": 1.5727461378833296, + "grad_norm": 1.552866324250783, + "learning_rate": 2.395940343999691e-07, + "loss": 0.4082655906677246, + "step": 6821 + }, + { + "epoch": 1.5729767120129121, + "grad_norm": 1.4215190376699491, + "learning_rate": 2.3934648030126625e-07, + "loss": 0.4106418192386627, + "step": 6822 + }, + { + "epoch": 1.5732072861424948, + "grad_norm": 1.663561714777188, + "learning_rate": 2.390990367712613e-07, + "loss": 0.45363783836364746, + "step": 6823 + }, + { + "epoch": 1.5734378602720773, + "grad_norm": 1.4253235303875884, + "learning_rate": 2.388517038459227e-07, + "loss": 0.4416825473308563, + "step": 6824 + }, + { + "epoch": 1.57366843440166, + "grad_norm": 1.5727508875619094, + "learning_rate": 2.3860448156120304e-07, + "loss": 0.5106863379478455, + "step": 6825 + }, + { + "epoch": 1.5738990085312428, + "grad_norm": 1.431151413456896, + "learning_rate": 2.3835736995303879e-07, + "loss": 0.4618466794490814, + "step": 6826 + }, + { + "epoch": 1.5741295826608255, + "grad_norm": 1.6611294255159201, + "learning_rate": 2.381103690573495e-07, + "loss": 0.414678692817688, + "step": 6827 + }, + { + "epoch": 1.5743601567904082, + "grad_norm": 1.3583782134926532, + "learning_rate": 2.3786347891004e-07, + "loss": 0.39774662256240845, + "step": 6828 + }, + { + "epoch": 1.574590730919991, + "grad_norm": 1.3689702631653482, + "learning_rate": 2.376166995469977e-07, + "loss": 0.4513537287712097, + "step": 6829 + }, + { + "epoch": 1.5748213050495734, + "grad_norm": 1.5433747348092586, + "learning_rate": 2.3737003100409447e-07, + "loss": 0.44062697887420654, + "step": 6830 + }, + { + "epoch": 1.5750518791791561, + "grad_norm": 1.6549219639884087, + "learning_rate": 2.3712347331718617e-07, + "loss": 0.42305582761764526, + "step": 6831 + }, + { + "epoch": 1.5752824533087386, + "grad_norm": 1.628456252942963, + "learning_rate": 2.3687702652211262e-07, + "loss": 0.46731626987457275, + "step": 6832 + }, + { + "epoch": 1.5755130274383213, + "grad_norm": 1.569042371408869, + "learning_rate": 2.3663069065469753e-07, + "loss": 0.4926149845123291, + "step": 6833 + }, + { + "epoch": 1.575743601567904, + "grad_norm": 1.8433451746214373, + "learning_rate": 2.3638446575074777e-07, + "loss": 0.49002933502197266, + "step": 6834 + }, + { + "epoch": 1.5759741756974868, + "grad_norm": 1.9286763636552064, + "learning_rate": 2.3613835184605523e-07, + "loss": 0.47110694646835327, + "step": 6835 + }, + { + "epoch": 1.5762047498270695, + "grad_norm": 1.7003781450027053, + "learning_rate": 2.3589234897639444e-07, + "loss": 0.4257816672325134, + "step": 6836 + }, + { + "epoch": 1.5764353239566522, + "grad_norm": 1.4515610553726317, + "learning_rate": 2.3564645717752506e-07, + "loss": 0.4031051695346832, + "step": 6837 + }, + { + "epoch": 1.5766658980862347, + "grad_norm": 1.7208107126331553, + "learning_rate": 2.3540067648518957e-07, + "loss": 0.5077808499336243, + "step": 6838 + }, + { + "epoch": 1.5768964722158174, + "grad_norm": 1.4184547433402042, + "learning_rate": 2.3515500693511449e-07, + "loss": 0.3877585232257843, + "step": 6839 + }, + { + "epoch": 1.5771270463454, + "grad_norm": 1.6806127701824354, + "learning_rate": 2.3490944856301064e-07, + "loss": 0.4356805682182312, + "step": 6840 + }, + { + "epoch": 1.5773576204749826, + "grad_norm": 1.5102184976880006, + "learning_rate": 2.346640014045723e-07, + "loss": 0.46679362654685974, + "step": 6841 + }, + { + "epoch": 1.5775881946045653, + "grad_norm": 1.4361079018846885, + "learning_rate": 2.3441866549547817e-07, + "loss": 0.4837648272514343, + "step": 6842 + }, + { + "epoch": 1.577818768734148, + "grad_norm": 1.5395603940472438, + "learning_rate": 2.341734408713897e-07, + "loss": 0.42723533511161804, + "step": 6843 + }, + { + "epoch": 1.5780493428637308, + "grad_norm": 1.7296429757269751, + "learning_rate": 2.3392832756795322e-07, + "loss": 0.3680928647518158, + "step": 6844 + }, + { + "epoch": 1.5782799169933135, + "grad_norm": 1.3398871717628533, + "learning_rate": 2.3368332562079797e-07, + "loss": 0.434980571269989, + "step": 6845 + }, + { + "epoch": 1.578510491122896, + "grad_norm": 1.5976407072584213, + "learning_rate": 2.3343843506553805e-07, + "loss": 0.45552271604537964, + "step": 6846 + }, + { + "epoch": 1.5787410652524787, + "grad_norm": 1.5496903398620734, + "learning_rate": 2.331936559377702e-07, + "loss": 0.4292616844177246, + "step": 6847 + }, + { + "epoch": 1.5789716393820612, + "grad_norm": 1.6907239258434268, + "learning_rate": 2.3294898827307573e-07, + "loss": 0.5025339126586914, + "step": 6848 + }, + { + "epoch": 1.579202213511644, + "grad_norm": 1.434142265629081, + "learning_rate": 2.3270443210701996e-07, + "loss": 0.47567370533943176, + "step": 6849 + }, + { + "epoch": 1.5794327876412266, + "grad_norm": 1.9792768486961878, + "learning_rate": 2.3245998747515095e-07, + "loss": 0.5435467958450317, + "step": 6850 + }, + { + "epoch": 1.5796633617708093, + "grad_norm": 1.2141081677893035, + "learning_rate": 2.3221565441300194e-07, + "loss": 0.4409145712852478, + "step": 6851 + }, + { + "epoch": 1.579893935900392, + "grad_norm": 1.3643265195449554, + "learning_rate": 2.3197143295608845e-07, + "loss": 0.40482181310653687, + "step": 6852 + }, + { + "epoch": 1.5801245100299748, + "grad_norm": 1.8983898955785605, + "learning_rate": 2.317273231399113e-07, + "loss": 0.40231794118881226, + "step": 6853 + }, + { + "epoch": 1.5803550841595573, + "grad_norm": 1.3860542767537625, + "learning_rate": 2.314833249999535e-07, + "loss": 0.43245166540145874, + "step": 6854 + }, + { + "epoch": 1.58058565828914, + "grad_norm": 1.5386782332278715, + "learning_rate": 2.3123943857168315e-07, + "loss": 0.40237659215927124, + "step": 6855 + }, + { + "epoch": 1.5808162324187225, + "grad_norm": 1.7869361833965254, + "learning_rate": 2.309956638905517e-07, + "loss": 0.48900318145751953, + "step": 6856 + }, + { + "epoch": 1.5810468065483052, + "grad_norm": 1.482622476685355, + "learning_rate": 2.3075200099199422e-07, + "loss": 0.42364567518234253, + "step": 6857 + }, + { + "epoch": 1.581277380677888, + "grad_norm": 1.6159587255295897, + "learning_rate": 2.3050844991142958e-07, + "loss": 0.4658735990524292, + "step": 6858 + }, + { + "epoch": 1.5815079548074706, + "grad_norm": 1.4775627716781476, + "learning_rate": 2.3026501068426007e-07, + "loss": 0.42268991470336914, + "step": 6859 + }, + { + "epoch": 1.5817385289370534, + "grad_norm": 1.4348002511722773, + "learning_rate": 2.3002168334587247e-07, + "loss": 0.44876742362976074, + "step": 6860 + }, + { + "epoch": 1.581969103066636, + "grad_norm": 1.5171591869453156, + "learning_rate": 2.2977846793163646e-07, + "loss": 0.42540132999420166, + "step": 6861 + }, + { + "epoch": 1.5821996771962186, + "grad_norm": 1.4296859038074168, + "learning_rate": 2.2953536447690636e-07, + "loss": 0.48768138885498047, + "step": 6862 + }, + { + "epoch": 1.5824302513258013, + "grad_norm": 1.5445046236967466, + "learning_rate": 2.292923730170192e-07, + "loss": 0.42905953526496887, + "step": 6863 + }, + { + "epoch": 1.5826608254553838, + "grad_norm": 1.4472242985886439, + "learning_rate": 2.2904949358729653e-07, + "loss": 0.4103778004646301, + "step": 6864 + }, + { + "epoch": 1.5828913995849665, + "grad_norm": 1.5180272333652802, + "learning_rate": 2.2880672622304331e-07, + "loss": 0.39303290843963623, + "step": 6865 + }, + { + "epoch": 1.5831219737145492, + "grad_norm": 1.4702183686842207, + "learning_rate": 2.2856407095954843e-07, + "loss": 0.5087130069732666, + "step": 6866 + }, + { + "epoch": 1.583352547844132, + "grad_norm": 1.5644640444387603, + "learning_rate": 2.283215278320839e-07, + "loss": 0.33117055892944336, + "step": 6867 + }, + { + "epoch": 1.5835831219737146, + "grad_norm": 1.7090383225203818, + "learning_rate": 2.280790968759063e-07, + "loss": 0.41781488060951233, + "step": 6868 + }, + { + "epoch": 1.5838136961032974, + "grad_norm": 1.4121975925065597, + "learning_rate": 2.2783677812625523e-07, + "loss": 0.5104382634162903, + "step": 6869 + }, + { + "epoch": 1.5840442702328799, + "grad_norm": 1.5723614045021508, + "learning_rate": 2.2759457161835372e-07, + "loss": 0.3987969160079956, + "step": 6870 + }, + { + "epoch": 1.5842748443624626, + "grad_norm": 1.705658009146651, + "learning_rate": 2.2735247738740936e-07, + "loss": 0.4723064601421356, + "step": 6871 + }, + { + "epoch": 1.584505418492045, + "grad_norm": 1.707721278006975, + "learning_rate": 2.2711049546861293e-07, + "loss": 0.3942141830921173, + "step": 6872 + }, + { + "epoch": 1.5847359926216278, + "grad_norm": 1.5657011191058785, + "learning_rate": 2.268686258971393e-07, + "loss": 0.38271787762641907, + "step": 6873 + }, + { + "epoch": 1.5849665667512105, + "grad_norm": 1.3977071321322045, + "learning_rate": 2.2662686870814607e-07, + "loss": 0.4944665729999542, + "step": 6874 + }, + { + "epoch": 1.5851971408807932, + "grad_norm": 1.7910306093530013, + "learning_rate": 2.2638522393677562e-07, + "loss": 0.46695005893707275, + "step": 6875 + }, + { + "epoch": 1.585427715010376, + "grad_norm": 1.7074115790208728, + "learning_rate": 2.2614369161815295e-07, + "loss": 0.4620080888271332, + "step": 6876 + }, + { + "epoch": 1.5856582891399587, + "grad_norm": 1.6877087434684872, + "learning_rate": 2.2590227178738776e-07, + "loss": 0.5650279521942139, + "step": 6877 + }, + { + "epoch": 1.5858888632695411, + "grad_norm": 1.3471081039016284, + "learning_rate": 2.2566096447957227e-07, + "loss": 0.3556622564792633, + "step": 6878 + }, + { + "epoch": 1.5861194373991239, + "grad_norm": 1.3889188451731431, + "learning_rate": 2.254197697297834e-07, + "loss": 0.4978718161582947, + "step": 6879 + }, + { + "epoch": 1.5863500115287064, + "grad_norm": 1.375490517958548, + "learning_rate": 2.2517868757308146e-07, + "loss": 0.4759003520011902, + "step": 6880 + }, + { + "epoch": 1.586580585658289, + "grad_norm": 1.579013983466932, + "learning_rate": 2.2493771804450945e-07, + "loss": 0.5078370571136475, + "step": 6881 + }, + { + "epoch": 1.5868111597878718, + "grad_norm": 1.3607586792133322, + "learning_rate": 2.2469686117909547e-07, + "loss": 0.4188239276409149, + "step": 6882 + }, + { + "epoch": 1.5870417339174545, + "grad_norm": 1.3488510335317552, + "learning_rate": 2.2445611701184997e-07, + "loss": 0.4075232744216919, + "step": 6883 + }, + { + "epoch": 1.5872723080470372, + "grad_norm": 1.5004910712339554, + "learning_rate": 2.2421548557776794e-07, + "loss": 0.3643442988395691, + "step": 6884 + }, + { + "epoch": 1.58750288217662, + "grad_norm": 1.4193604715362476, + "learning_rate": 2.2397496691182716e-07, + "loss": 0.38767147064208984, + "step": 6885 + }, + { + "epoch": 1.5877334563062024, + "grad_norm": 1.6373352976605955, + "learning_rate": 2.2373456104899e-07, + "loss": 0.4874354600906372, + "step": 6886 + }, + { + "epoch": 1.5879640304357852, + "grad_norm": 1.5573200679287742, + "learning_rate": 2.2349426802420134e-07, + "loss": 0.46412762999534607, + "step": 6887 + }, + { + "epoch": 1.5881946045653677, + "grad_norm": 1.3720639419051985, + "learning_rate": 2.2325408787239054e-07, + "loss": 0.4299372434616089, + "step": 6888 + }, + { + "epoch": 1.5884251786949504, + "grad_norm": 1.6309152140238423, + "learning_rate": 2.230140206284703e-07, + "loss": 0.3962220549583435, + "step": 6889 + }, + { + "epoch": 1.588655752824533, + "grad_norm": 1.617512400235996, + "learning_rate": 2.2277406632733653e-07, + "loss": 0.5048998594284058, + "step": 6890 + }, + { + "epoch": 1.5888863269541158, + "grad_norm": 2.0443646004817024, + "learning_rate": 2.2253422500386932e-07, + "loss": 0.35463857650756836, + "step": 6891 + }, + { + "epoch": 1.5891169010836985, + "grad_norm": 1.5696832175175914, + "learning_rate": 2.2229449669293165e-07, + "loss": 0.3969672620296478, + "step": 6892 + }, + { + "epoch": 1.5893474752132812, + "grad_norm": 1.5166803382402412, + "learning_rate": 2.22054881429371e-07, + "loss": 0.36300575733184814, + "step": 6893 + }, + { + "epoch": 1.5895780493428637, + "grad_norm": 1.41057555150973, + "learning_rate": 2.2181537924801729e-07, + "loss": 0.45796507596969604, + "step": 6894 + }, + { + "epoch": 1.5898086234724462, + "grad_norm": 1.556089643432737, + "learning_rate": 2.2157599018368488e-07, + "loss": 0.42725688219070435, + "step": 6895 + }, + { + "epoch": 1.590039197602029, + "grad_norm": 1.8436048050065164, + "learning_rate": 2.213367142711714e-07, + "loss": 0.4959419369697571, + "step": 6896 + }, + { + "epoch": 1.5902697717316117, + "grad_norm": 1.6607109480306586, + "learning_rate": 2.2109755154525821e-07, + "loss": 0.3707115948200226, + "step": 6897 + }, + { + "epoch": 1.5905003458611944, + "grad_norm": 1.4025605906760028, + "learning_rate": 2.2085850204070989e-07, + "loss": 0.3647577166557312, + "step": 6898 + }, + { + "epoch": 1.590730919990777, + "grad_norm": 1.505368584241417, + "learning_rate": 2.2061956579227447e-07, + "loss": 0.42227697372436523, + "step": 6899 + }, + { + "epoch": 1.5909614941203598, + "grad_norm": 1.508703122498175, + "learning_rate": 2.2038074283468412e-07, + "loss": 0.41736292839050293, + "step": 6900 + }, + { + "epoch": 1.5911920682499423, + "grad_norm": 1.6418039973045746, + "learning_rate": 2.201420332026538e-07, + "loss": 0.46005967259407043, + "step": 6901 + }, + { + "epoch": 1.591422642379525, + "grad_norm": 1.4328523009517202, + "learning_rate": 2.1990343693088243e-07, + "loss": 0.3572643995285034, + "step": 6902 + }, + { + "epoch": 1.5916532165091075, + "grad_norm": 1.744760153255399, + "learning_rate": 2.196649540540527e-07, + "loss": 0.5321012735366821, + "step": 6903 + }, + { + "epoch": 1.5918837906386902, + "grad_norm": 1.5415731453823578, + "learning_rate": 2.194265846068305e-07, + "loss": 0.4913836419582367, + "step": 6904 + }, + { + "epoch": 1.592114364768273, + "grad_norm": 1.7016363411577065, + "learning_rate": 2.1918832862386493e-07, + "loss": 0.37674903869628906, + "step": 6905 + }, + { + "epoch": 1.5923449388978557, + "grad_norm": 1.5772289300833298, + "learning_rate": 2.1895018613978934e-07, + "loss": 0.4385930001735687, + "step": 6906 + }, + { + "epoch": 1.5925755130274384, + "grad_norm": 2.224743671968565, + "learning_rate": 2.1871215718921964e-07, + "loss": 0.5219674706459045, + "step": 6907 + }, + { + "epoch": 1.592806087157021, + "grad_norm": 1.5215408344839954, + "learning_rate": 2.1847424180675622e-07, + "loss": 0.4241113066673279, + "step": 6908 + }, + { + "epoch": 1.5930366612866036, + "grad_norm": 1.4296843598144484, + "learning_rate": 2.1823644002698237e-07, + "loss": 0.4008786082267761, + "step": 6909 + }, + { + "epoch": 1.5932672354161863, + "grad_norm": 1.5021365471039205, + "learning_rate": 2.179987518844645e-07, + "loss": 0.3333933651447296, + "step": 6910 + }, + { + "epoch": 1.5934978095457688, + "grad_norm": 1.652596855301234, + "learning_rate": 2.1776117741375343e-07, + "loss": 0.48857730627059937, + "step": 6911 + }, + { + "epoch": 1.5937283836753515, + "grad_norm": 1.4724322236306013, + "learning_rate": 2.1752371664938306e-07, + "loss": 0.37393617630004883, + "step": 6912 + }, + { + "epoch": 1.5939589578049342, + "grad_norm": 1.4102085657254086, + "learning_rate": 2.172863696258709e-07, + "loss": 0.5365080833435059, + "step": 6913 + }, + { + "epoch": 1.594189531934517, + "grad_norm": 1.7683912421422305, + "learning_rate": 2.1704913637771705e-07, + "loss": 0.49318936467170715, + "step": 6914 + }, + { + "epoch": 1.5944201060640997, + "grad_norm": 1.8200372673393599, + "learning_rate": 2.1681201693940666e-07, + "loss": 0.37682920694351196, + "step": 6915 + }, + { + "epoch": 1.5946506801936824, + "grad_norm": 1.4120260343966702, + "learning_rate": 2.1657501134540657e-07, + "loss": 0.4894877076148987, + "step": 6916 + }, + { + "epoch": 1.5948812543232649, + "grad_norm": 1.5895963005275906, + "learning_rate": 2.1633811963016869e-07, + "loss": 0.4200783967971802, + "step": 6917 + }, + { + "epoch": 1.5951118284528476, + "grad_norm": 1.7361608161591027, + "learning_rate": 2.1610134182812702e-07, + "loss": 0.3953052759170532, + "step": 6918 + }, + { + "epoch": 1.59534240258243, + "grad_norm": 1.4727518091374385, + "learning_rate": 2.158646779736999e-07, + "loss": 0.4006558656692505, + "step": 6919 + }, + { + "epoch": 1.5955729767120128, + "grad_norm": 1.7355475804217702, + "learning_rate": 2.1562812810128906e-07, + "loss": 0.3749210238456726, + "step": 6920 + }, + { + "epoch": 1.5958035508415955, + "grad_norm": 1.5378158592599445, + "learning_rate": 2.1539169224527887e-07, + "loss": 0.4688538610935211, + "step": 6921 + }, + { + "epoch": 1.5960341249711782, + "grad_norm": 1.590308500795848, + "learning_rate": 2.151553704400383e-07, + "loss": 0.4483727216720581, + "step": 6922 + }, + { + "epoch": 1.596264699100761, + "grad_norm": 1.589431373760787, + "learning_rate": 2.149191627199185e-07, + "loss": 0.5118253827095032, + "step": 6923 + }, + { + "epoch": 1.5964952732303437, + "grad_norm": 1.644731800905039, + "learning_rate": 2.1468306911925525e-07, + "loss": 0.43641170859336853, + "step": 6924 + }, + { + "epoch": 1.5967258473599262, + "grad_norm": 1.4755114053374785, + "learning_rate": 2.1444708967236657e-07, + "loss": 0.38253384828567505, + "step": 6925 + }, + { + "epoch": 1.596956421489509, + "grad_norm": 1.5638213373412855, + "learning_rate": 2.1421122441355476e-07, + "loss": 0.43674635887145996, + "step": 6926 + }, + { + "epoch": 1.5971869956190914, + "grad_norm": 1.3940207891491625, + "learning_rate": 2.1397547337710519e-07, + "loss": 0.37392908334732056, + "step": 6927 + }, + { + "epoch": 1.597417569748674, + "grad_norm": 1.5097907813025324, + "learning_rate": 2.13739836597287e-07, + "loss": 0.4531250298023224, + "step": 6928 + }, + { + "epoch": 1.5976481438782568, + "grad_norm": 1.3308296891253455, + "learning_rate": 2.13504314108352e-07, + "loss": 0.38579899072647095, + "step": 6929 + }, + { + "epoch": 1.5978787180078395, + "grad_norm": 1.8618083111554995, + "learning_rate": 2.1326890594453563e-07, + "loss": 0.5215288400650024, + "step": 6930 + }, + { + "epoch": 1.5981092921374223, + "grad_norm": 1.6019249166669218, + "learning_rate": 2.130336121400572e-07, + "loss": 0.4396743178367615, + "step": 6931 + }, + { + "epoch": 1.598339866267005, + "grad_norm": 1.5371889029106374, + "learning_rate": 2.127984327291188e-07, + "loss": 0.5068432688713074, + "step": 6932 + }, + { + "epoch": 1.5985704403965875, + "grad_norm": 1.7855756215277538, + "learning_rate": 2.1256336774590643e-07, + "loss": 0.48809194564819336, + "step": 6933 + }, + { + "epoch": 1.5988010145261702, + "grad_norm": 1.4166815561679078, + "learning_rate": 2.123284172245885e-07, + "loss": 0.4191613793373108, + "step": 6934 + }, + { + "epoch": 1.5990315886557527, + "grad_norm": 1.5763678308245206, + "learning_rate": 2.1209358119931843e-07, + "loss": 0.41901010274887085, + "step": 6935 + }, + { + "epoch": 1.5992621627853354, + "grad_norm": 1.8296822391624505, + "learning_rate": 2.1185885970423133e-07, + "loss": 0.5046913623809814, + "step": 6936 + }, + { + "epoch": 1.5994927369149181, + "grad_norm": 2.1559492699976492, + "learning_rate": 2.1162425277344675e-07, + "loss": 0.5113730430603027, + "step": 6937 + }, + { + "epoch": 1.5997233110445008, + "grad_norm": 1.520077424866564, + "learning_rate": 2.1138976044106672e-07, + "loss": 0.34129637479782104, + "step": 6938 + }, + { + "epoch": 1.5999538851740835, + "grad_norm": 1.5890047902961466, + "learning_rate": 2.1115538274117762e-07, + "loss": 0.4492289423942566, + "step": 6939 + }, + { + "epoch": 1.6001844593036663, + "grad_norm": 1.5532375131614289, + "learning_rate": 2.1092111970784833e-07, + "loss": 0.41002708673477173, + "step": 6940 + }, + { + "epoch": 1.6004150334332488, + "grad_norm": 1.887817008406582, + "learning_rate": 2.1068697137513113e-07, + "loss": 0.5444740056991577, + "step": 6941 + }, + { + "epoch": 1.6006456075628315, + "grad_norm": 1.518981510824895, + "learning_rate": 2.1045293777706196e-07, + "loss": 0.3489699959754944, + "step": 6942 + }, + { + "epoch": 1.600876181692414, + "grad_norm": 1.5115486172446684, + "learning_rate": 2.1021901894766025e-07, + "loss": 0.41807419061660767, + "step": 6943 + }, + { + "epoch": 1.6011067558219967, + "grad_norm": 1.7376028221450257, + "learning_rate": 2.0998521492092857e-07, + "loss": 0.41074657440185547, + "step": 6944 + }, + { + "epoch": 1.6013373299515794, + "grad_norm": 1.370751011576157, + "learning_rate": 2.097515257308521e-07, + "loss": 0.4085312485694885, + "step": 6945 + }, + { + "epoch": 1.6015679040811621, + "grad_norm": 1.6632563260665783, + "learning_rate": 2.095179514114006e-07, + "loss": 0.42699170112609863, + "step": 6946 + }, + { + "epoch": 1.6017984782107448, + "grad_norm": 1.6347540938108835, + "learning_rate": 2.0928449199652597e-07, + "loss": 0.40041583776474, + "step": 6947 + }, + { + "epoch": 1.6020290523403276, + "grad_norm": 1.385214375087801, + "learning_rate": 2.090511475201643e-07, + "loss": 0.47465208172798157, + "step": 6948 + }, + { + "epoch": 1.60225962646991, + "grad_norm": 1.5233208405026366, + "learning_rate": 2.0881791801623405e-07, + "loss": 0.4338058829307556, + "step": 6949 + }, + { + "epoch": 1.6024902005994928, + "grad_norm": 1.857588116409586, + "learning_rate": 2.0858480351863794e-07, + "loss": 0.5398772954940796, + "step": 6950 + }, + { + "epoch": 1.6027207747290753, + "grad_norm": 1.41461865858101, + "learning_rate": 2.0835180406126151e-07, + "loss": 0.40750259160995483, + "step": 6951 + }, + { + "epoch": 1.602951348858658, + "grad_norm": 1.6330208123854022, + "learning_rate": 2.0811891967797336e-07, + "loss": 0.4365716278553009, + "step": 6952 + }, + { + "epoch": 1.6031819229882407, + "grad_norm": 1.395812913626374, + "learning_rate": 2.078861504026258e-07, + "loss": 0.41537174582481384, + "step": 6953 + }, + { + "epoch": 1.6034124971178234, + "grad_norm": 1.331855885968294, + "learning_rate": 2.0765349626905394e-07, + "loss": 0.3687853217124939, + "step": 6954 + }, + { + "epoch": 1.6036430712474061, + "grad_norm": 1.4291699726024594, + "learning_rate": 2.074209573110769e-07, + "loss": 0.48866790533065796, + "step": 6955 + }, + { + "epoch": 1.6038736453769888, + "grad_norm": 1.7541297686576787, + "learning_rate": 2.0718853356249588e-07, + "loss": 0.4618760347366333, + "step": 6956 + }, + { + "epoch": 1.6041042195065713, + "grad_norm": 1.820272898606224, + "learning_rate": 2.0695622505709654e-07, + "loss": 0.365873247385025, + "step": 6957 + }, + { + "epoch": 1.604334793636154, + "grad_norm": 1.7127779412462347, + "learning_rate": 2.0672403182864706e-07, + "loss": 0.4346495270729065, + "step": 6958 + }, + { + "epoch": 1.6045653677657365, + "grad_norm": 1.4385774019168192, + "learning_rate": 2.0649195391089935e-07, + "loss": 0.3995724618434906, + "step": 6959 + }, + { + "epoch": 1.6047959418953193, + "grad_norm": 1.890499669463449, + "learning_rate": 2.062599913375882e-07, + "loss": 0.4628515839576721, + "step": 6960 + }, + { + "epoch": 1.605026516024902, + "grad_norm": 1.8491035226730044, + "learning_rate": 2.060281441424314e-07, + "loss": 0.39776262640953064, + "step": 6961 + }, + { + "epoch": 1.6052570901544847, + "grad_norm": 1.6838333142700899, + "learning_rate": 2.057964123591307e-07, + "loss": 0.4622994065284729, + "step": 6962 + }, + { + "epoch": 1.6054876642840674, + "grad_norm": 1.3806987670969462, + "learning_rate": 2.0556479602137033e-07, + "loss": 0.4028933048248291, + "step": 6963 + }, + { + "epoch": 1.6057182384136501, + "grad_norm": 1.592137730506949, + "learning_rate": 2.0533329516281838e-07, + "loss": 0.46639660000801086, + "step": 6964 + }, + { + "epoch": 1.6059488125432326, + "grad_norm": 1.3243378898371028, + "learning_rate": 2.0510190981712537e-07, + "loss": 0.4063863158226013, + "step": 6965 + }, + { + "epoch": 1.6061793866728153, + "grad_norm": 1.6927530193908227, + "learning_rate": 2.0487064001792586e-07, + "loss": 0.471376895904541, + "step": 6966 + }, + { + "epoch": 1.6064099608023978, + "grad_norm": 1.5262354616100662, + "learning_rate": 2.0463948579883727e-07, + "loss": 0.5094102025032043, + "step": 6967 + }, + { + "epoch": 1.6066405349319806, + "grad_norm": 1.613731344454896, + "learning_rate": 2.0440844719346039e-07, + "loss": 0.3922441005706787, + "step": 6968 + }, + { + "epoch": 1.6068711090615633, + "grad_norm": 1.7524315605420397, + "learning_rate": 2.0417752423537882e-07, + "loss": 0.47777149081230164, + "step": 6969 + }, + { + "epoch": 1.607101683191146, + "grad_norm": 2.2487851564601065, + "learning_rate": 2.0394671695815924e-07, + "loss": 0.5780138969421387, + "step": 6970 + }, + { + "epoch": 1.6073322573207287, + "grad_norm": 1.6028588432287403, + "learning_rate": 2.0371602539535237e-07, + "loss": 0.43968862295150757, + "step": 6971 + }, + { + "epoch": 1.6075628314503114, + "grad_norm": 1.877374036184133, + "learning_rate": 2.0348544958049096e-07, + "loss": 0.5204722881317139, + "step": 6972 + }, + { + "epoch": 1.607793405579894, + "grad_norm": 1.5207193577135807, + "learning_rate": 2.0325498954709198e-07, + "loss": 0.3944805860519409, + "step": 6973 + }, + { + "epoch": 1.6080239797094766, + "grad_norm": 1.454235622222141, + "learning_rate": 2.0302464532865505e-07, + "loss": 0.42686349153518677, + "step": 6974 + }, + { + "epoch": 1.6082545538390591, + "grad_norm": 1.5958289830519565, + "learning_rate": 2.027944169586633e-07, + "loss": 0.3860762119293213, + "step": 6975 + }, + { + "epoch": 1.6084851279686418, + "grad_norm": 1.880005605643703, + "learning_rate": 2.0256430447058215e-07, + "loss": 0.5570458769798279, + "step": 6976 + }, + { + "epoch": 1.6087157020982246, + "grad_norm": 1.8351241687154358, + "learning_rate": 2.0233430789786132e-07, + "loss": 0.4556728005409241, + "step": 6977 + }, + { + "epoch": 1.6089462762278073, + "grad_norm": 1.4746534507162423, + "learning_rate": 2.0210442727393285e-07, + "loss": 0.48365700244903564, + "step": 6978 + }, + { + "epoch": 1.60917685035739, + "grad_norm": 1.7835628524046172, + "learning_rate": 2.018746626322124e-07, + "loss": 0.4456971287727356, + "step": 6979 + }, + { + "epoch": 1.6094074244869727, + "grad_norm": 1.6700237073697568, + "learning_rate": 2.0164501400609835e-07, + "loss": 0.41877123713493347, + "step": 6980 + }, + { + "epoch": 1.6096379986165552, + "grad_norm": 1.3803715462197303, + "learning_rate": 2.0141548142897246e-07, + "loss": 0.4073547124862671, + "step": 6981 + }, + { + "epoch": 1.609868572746138, + "grad_norm": 1.5181775501419725, + "learning_rate": 2.0118606493420021e-07, + "loss": 0.4987693727016449, + "step": 6982 + }, + { + "epoch": 1.6100991468757204, + "grad_norm": 1.603543806365415, + "learning_rate": 2.0095676455512878e-07, + "loss": 0.4391751289367676, + "step": 6983 + }, + { + "epoch": 1.6103297210053031, + "grad_norm": 1.4062982467603231, + "learning_rate": 2.0072758032508996e-07, + "loss": 0.409262478351593, + "step": 6984 + }, + { + "epoch": 1.6105602951348859, + "grad_norm": 1.353394057864669, + "learning_rate": 2.0049851227739744e-07, + "loss": 0.38653457164764404, + "step": 6985 + }, + { + "epoch": 1.6107908692644686, + "grad_norm": 1.9189325963312815, + "learning_rate": 2.0026956044534914e-07, + "loss": 0.4824348986148834, + "step": 6986 + }, + { + "epoch": 1.6110214433940513, + "grad_norm": 1.7037748706735498, + "learning_rate": 2.00040724862225e-07, + "loss": 0.45774850249290466, + "step": 6987 + }, + { + "epoch": 1.611252017523634, + "grad_norm": 1.5419477618151842, + "learning_rate": 1.9981200556128906e-07, + "loss": 0.45437830686569214, + "step": 6988 + }, + { + "epoch": 1.6114825916532165, + "grad_norm": 1.4581568342693196, + "learning_rate": 1.9958340257578753e-07, + "loss": 0.4563155770301819, + "step": 6989 + }, + { + "epoch": 1.6117131657827992, + "grad_norm": 1.7363246075229848, + "learning_rate": 1.9935491593895048e-07, + "loss": 0.5786794424057007, + "step": 6990 + }, + { + "epoch": 1.6119437399123817, + "grad_norm": 1.6120161181322603, + "learning_rate": 1.991265456839909e-07, + "loss": 0.5290218591690063, + "step": 6991 + }, + { + "epoch": 1.6121743140419644, + "grad_norm": 1.607774677113548, + "learning_rate": 1.9889829184410434e-07, + "loss": 0.3456650376319885, + "step": 6992 + }, + { + "epoch": 1.6124048881715471, + "grad_norm": 1.414142582496391, + "learning_rate": 1.9867015445247015e-07, + "loss": 0.40869832038879395, + "step": 6993 + }, + { + "epoch": 1.6126354623011299, + "grad_norm": 2.3563881452147992, + "learning_rate": 1.9844213354225004e-07, + "loss": 0.49926644563674927, + "step": 6994 + }, + { + "epoch": 1.6128660364307126, + "grad_norm": 1.904270429684393, + "learning_rate": 1.9821422914658957e-07, + "loss": 0.4874018132686615, + "step": 6995 + }, + { + "epoch": 1.6130966105602953, + "grad_norm": 1.872252891476363, + "learning_rate": 1.9798644129861654e-07, + "loss": 0.4228810667991638, + "step": 6996 + }, + { + "epoch": 1.6133271846898778, + "grad_norm": 1.4437194678200662, + "learning_rate": 1.9775877003144237e-07, + "loss": 0.4309043884277344, + "step": 6997 + }, + { + "epoch": 1.6135577588194605, + "grad_norm": 1.6133739556944033, + "learning_rate": 1.9753121537816142e-07, + "loss": 0.3917756676673889, + "step": 6998 + }, + { + "epoch": 1.613788332949043, + "grad_norm": 1.492105866056543, + "learning_rate": 1.9730377737185145e-07, + "loss": 0.4074435830116272, + "step": 6999 + }, + { + "epoch": 1.6140189070786257, + "grad_norm": 1.7474889804918834, + "learning_rate": 1.9707645604557243e-07, + "loss": 0.4581322968006134, + "step": 7000 + }, + { + "epoch": 1.6142494812082084, + "grad_norm": 1.5240615238309698, + "learning_rate": 1.9684925143236776e-07, + "loss": 0.4479151666164398, + "step": 7001 + }, + { + "epoch": 1.6144800553377912, + "grad_norm": 1.4379805154063257, + "learning_rate": 1.966221635652643e-07, + "loss": 0.3378838300704956, + "step": 7002 + }, + { + "epoch": 1.6147106294673739, + "grad_norm": 1.6755517427089033, + "learning_rate": 1.96395192477271e-07, + "loss": 0.3383278250694275, + "step": 7003 + }, + { + "epoch": 1.6149412035969566, + "grad_norm": 1.5430108527415651, + "learning_rate": 1.9616833820138091e-07, + "loss": 0.5164717435836792, + "step": 7004 + }, + { + "epoch": 1.615171777726539, + "grad_norm": 1.6927378959186403, + "learning_rate": 1.9594160077056932e-07, + "loss": 0.4548792243003845, + "step": 7005 + }, + { + "epoch": 1.6154023518561216, + "grad_norm": 1.608730816141968, + "learning_rate": 1.9571498021779531e-07, + "loss": 0.41074928641319275, + "step": 7006 + }, + { + "epoch": 1.6156329259857043, + "grad_norm": 1.5384399915677613, + "learning_rate": 1.9548847657599976e-07, + "loss": 0.4156193137168884, + "step": 7007 + }, + { + "epoch": 1.615863500115287, + "grad_norm": 1.742725966102226, + "learning_rate": 1.95262089878108e-07, + "loss": 0.4602770209312439, + "step": 7008 + }, + { + "epoch": 1.6160940742448697, + "grad_norm": 1.5880816009582301, + "learning_rate": 1.9503582015702713e-07, + "loss": 0.4911346733570099, + "step": 7009 + }, + { + "epoch": 1.6163246483744524, + "grad_norm": 1.5007140709934312, + "learning_rate": 1.9480966744564764e-07, + "loss": 0.394087553024292, + "step": 7010 + }, + { + "epoch": 1.6165552225040352, + "grad_norm": 1.5836059389854649, + "learning_rate": 1.9458363177684367e-07, + "loss": 0.4845706820487976, + "step": 7011 + }, + { + "epoch": 1.6167857966336177, + "grad_norm": 1.7088454795128305, + "learning_rate": 1.9435771318347116e-07, + "loss": 0.49142736196517944, + "step": 7012 + }, + { + "epoch": 1.6170163707632004, + "grad_norm": 1.3798831769041013, + "learning_rate": 1.9413191169836996e-07, + "loss": 0.4408283829689026, + "step": 7013 + }, + { + "epoch": 1.6172469448927829, + "grad_norm": 1.6476950016993046, + "learning_rate": 1.9390622735436268e-07, + "loss": 0.6088640689849854, + "step": 7014 + }, + { + "epoch": 1.6174775190223656, + "grad_norm": 1.912745817268737, + "learning_rate": 1.93680660184255e-07, + "loss": 0.5208842158317566, + "step": 7015 + }, + { + "epoch": 1.6177080931519483, + "grad_norm": 1.7742607180865566, + "learning_rate": 1.9345521022083488e-07, + "loss": 0.5652821660041809, + "step": 7016 + }, + { + "epoch": 1.617938667281531, + "grad_norm": 1.5895189074949856, + "learning_rate": 1.9322987749687437e-07, + "loss": 0.4861832857131958, + "step": 7017 + }, + { + "epoch": 1.6181692414111137, + "grad_norm": 1.5693969535816144, + "learning_rate": 1.930046620451272e-07, + "loss": 0.39583832025527954, + "step": 7018 + }, + { + "epoch": 1.6183998155406965, + "grad_norm": 1.6283824576887038, + "learning_rate": 1.927795638983313e-07, + "loss": 0.5638653039932251, + "step": 7019 + }, + { + "epoch": 1.618630389670279, + "grad_norm": 1.7595661530223012, + "learning_rate": 1.9255458308920648e-07, + "loss": 0.4737275242805481, + "step": 7020 + }, + { + "epoch": 1.6188609637998617, + "grad_norm": 1.3807112997659796, + "learning_rate": 1.923297196504563e-07, + "loss": 0.4526802897453308, + "step": 7021 + }, + { + "epoch": 1.6190915379294442, + "grad_norm": 1.5519742811018764, + "learning_rate": 1.9210497361476708e-07, + "loss": 0.40800565481185913, + "step": 7022 + }, + { + "epoch": 1.6193221120590269, + "grad_norm": 1.3169867108502276, + "learning_rate": 1.9188034501480744e-07, + "loss": 0.39532414078712463, + "step": 7023 + }, + { + "epoch": 1.6195526861886096, + "grad_norm": 1.3982522966659368, + "learning_rate": 1.9165583388322993e-07, + "loss": 0.40236538648605347, + "step": 7024 + }, + { + "epoch": 1.6197832603181923, + "grad_norm": 1.4838960013292628, + "learning_rate": 1.91431440252669e-07, + "loss": 0.4421047866344452, + "step": 7025 + }, + { + "epoch": 1.620013834447775, + "grad_norm": 1.5688320926864374, + "learning_rate": 1.9120716415574322e-07, + "loss": 0.4149084687232971, + "step": 7026 + }, + { + "epoch": 1.6202444085773577, + "grad_norm": 1.8747733544619556, + "learning_rate": 1.9098300562505264e-07, + "loss": 0.4186127185821533, + "step": 7027 + }, + { + "epoch": 1.6204749827069402, + "grad_norm": 1.5276498671204974, + "learning_rate": 1.9075896469318132e-07, + "loss": 0.4649406671524048, + "step": 7028 + }, + { + "epoch": 1.620705556836523, + "grad_norm": 1.5217002126023946, + "learning_rate": 1.9053504139269593e-07, + "loss": 0.43240052461624146, + "step": 7029 + }, + { + "epoch": 1.6209361309661054, + "grad_norm": 1.7731525747902717, + "learning_rate": 1.9031123575614628e-07, + "loss": 0.4874862730503082, + "step": 7030 + }, + { + "epoch": 1.6211667050956882, + "grad_norm": 1.6133636879972175, + "learning_rate": 1.900875478160644e-07, + "loss": 0.3771815896034241, + "step": 7031 + }, + { + "epoch": 1.6213972792252709, + "grad_norm": 1.548316338784864, + "learning_rate": 1.898639776049653e-07, + "loss": 0.49882376194000244, + "step": 7032 + }, + { + "epoch": 1.6216278533548536, + "grad_norm": 1.5189621230999546, + "learning_rate": 1.896405251553479e-07, + "loss": 0.3813830614089966, + "step": 7033 + }, + { + "epoch": 1.6218584274844363, + "grad_norm": 1.588790821712345, + "learning_rate": 1.8941719049969272e-07, + "loss": 0.41883599758148193, + "step": 7034 + }, + { + "epoch": 1.622089001614019, + "grad_norm": 1.4271058877816405, + "learning_rate": 1.8919397367046409e-07, + "loss": 0.42194586992263794, + "step": 7035 + }, + { + "epoch": 1.6223195757436015, + "grad_norm": 1.5957469997065072, + "learning_rate": 1.889708747001084e-07, + "loss": 0.36967700719833374, + "step": 7036 + }, + { + "epoch": 1.6225501498731842, + "grad_norm": 1.4373460175753532, + "learning_rate": 1.887478936210556e-07, + "loss": 0.4493946433067322, + "step": 7037 + }, + { + "epoch": 1.6227807240027667, + "grad_norm": 1.6526676224310628, + "learning_rate": 1.8852503046571833e-07, + "loss": 0.42121458053588867, + "step": 7038 + }, + { + "epoch": 1.6230112981323495, + "grad_norm": 1.430632776113786, + "learning_rate": 1.8830228526649207e-07, + "loss": 0.4529588222503662, + "step": 7039 + }, + { + "epoch": 1.6232418722619322, + "grad_norm": 1.537552702708545, + "learning_rate": 1.88079658055755e-07, + "loss": 0.387844443321228, + "step": 7040 + }, + { + "epoch": 1.623472446391515, + "grad_norm": 1.4872655198554567, + "learning_rate": 1.8785714886586802e-07, + "loss": 0.49954158067703247, + "step": 7041 + }, + { + "epoch": 1.6237030205210976, + "grad_norm": 1.3845875929093436, + "learning_rate": 1.8763475772917548e-07, + "loss": 0.4016296863555908, + "step": 7042 + }, + { + "epoch": 1.6239335946506803, + "grad_norm": 1.5208389143205874, + "learning_rate": 1.8741248467800362e-07, + "loss": 0.358657568693161, + "step": 7043 + }, + { + "epoch": 1.6241641687802628, + "grad_norm": 1.471037478852436, + "learning_rate": 1.8719032974466264e-07, + "loss": 0.434385746717453, + "step": 7044 + }, + { + "epoch": 1.6243947429098455, + "grad_norm": 1.4705602216948914, + "learning_rate": 1.8696829296144466e-07, + "loss": 0.4658992886543274, + "step": 7045 + }, + { + "epoch": 1.624625317039428, + "grad_norm": 1.8724382429627917, + "learning_rate": 1.8674637436062545e-07, + "loss": 0.5438188910484314, + "step": 7046 + }, + { + "epoch": 1.6248558911690107, + "grad_norm": 1.9024479318941907, + "learning_rate": 1.8652457397446254e-07, + "loss": 0.47364577651023865, + "step": 7047 + }, + { + "epoch": 1.6250864652985935, + "grad_norm": 1.386287471529149, + "learning_rate": 1.8630289183519733e-07, + "loss": 0.3664509654045105, + "step": 7048 + }, + { + "epoch": 1.6253170394281762, + "grad_norm": 1.5676786934992741, + "learning_rate": 1.8608132797505317e-07, + "loss": 0.4226282835006714, + "step": 7049 + }, + { + "epoch": 1.625547613557759, + "grad_norm": 1.4581751590991685, + "learning_rate": 1.8585988242623706e-07, + "loss": 0.47477972507476807, + "step": 7050 + }, + { + "epoch": 1.6257781876873416, + "grad_norm": 2.082606809210874, + "learning_rate": 1.8563855522093786e-07, + "loss": 0.5372269749641418, + "step": 7051 + }, + { + "epoch": 1.626008761816924, + "grad_norm": 1.3565872618977541, + "learning_rate": 1.8541734639132788e-07, + "loss": 0.37929385900497437, + "step": 7052 + }, + { + "epoch": 1.6262393359465068, + "grad_norm": 1.5119164625864447, + "learning_rate": 1.8519625596956244e-07, + "loss": 0.4029538631439209, + "step": 7053 + }, + { + "epoch": 1.6264699100760893, + "grad_norm": 1.5739338248608081, + "learning_rate": 1.8497528398777874e-07, + "loss": 0.3932439982891083, + "step": 7054 + }, + { + "epoch": 1.626700484205672, + "grad_norm": 1.5806776566898322, + "learning_rate": 1.847544304780978e-07, + "loss": 0.45190152525901794, + "step": 7055 + }, + { + "epoch": 1.6269310583352548, + "grad_norm": 1.8629994959724827, + "learning_rate": 1.8453369547262242e-07, + "loss": 0.4852195382118225, + "step": 7056 + }, + { + "epoch": 1.6271616324648375, + "grad_norm": 1.608209634523461, + "learning_rate": 1.8431307900343918e-07, + "loss": 0.41676801443099976, + "step": 7057 + }, + { + "epoch": 1.6273922065944202, + "grad_norm": 1.388166685170728, + "learning_rate": 1.8409258110261626e-07, + "loss": 0.44374561309814453, + "step": 7058 + }, + { + "epoch": 1.627622780724003, + "grad_norm": 1.5975340281654677, + "learning_rate": 1.838722018022061e-07, + "loss": 0.4348192811012268, + "step": 7059 + }, + { + "epoch": 1.6278533548535854, + "grad_norm": 1.626194256762104, + "learning_rate": 1.836519411342422e-07, + "loss": 0.46572640538215637, + "step": 7060 + }, + { + "epoch": 1.6280839289831681, + "grad_norm": 1.4985871084379754, + "learning_rate": 1.8343179913074214e-07, + "loss": 0.4633631408214569, + "step": 7061 + }, + { + "epoch": 1.6283145031127506, + "grad_norm": 1.3260867645697678, + "learning_rate": 1.8321177582370605e-07, + "loss": 0.44420552253723145, + "step": 7062 + }, + { + "epoch": 1.6285450772423333, + "grad_norm": 1.8207040168707305, + "learning_rate": 1.8299187124511594e-07, + "loss": 0.5628370046615601, + "step": 7063 + }, + { + "epoch": 1.628775651371916, + "grad_norm": 1.7448936691285617, + "learning_rate": 1.8277208542693778e-07, + "loss": 0.5342314839363098, + "step": 7064 + }, + { + "epoch": 1.6290062255014988, + "grad_norm": 1.529076197622531, + "learning_rate": 1.82552418401119e-07, + "loss": 0.440934419631958, + "step": 7065 + }, + { + "epoch": 1.6292367996310815, + "grad_norm": 1.4532572456773438, + "learning_rate": 1.823328701995912e-07, + "loss": 0.45218637585639954, + "step": 7066 + }, + { + "epoch": 1.6294673737606642, + "grad_norm": 1.456173637640115, + "learning_rate": 1.8211344085426716e-07, + "loss": 0.4059211015701294, + "step": 7067 + }, + { + "epoch": 1.6296979478902467, + "grad_norm": 2.0474805024349876, + "learning_rate": 1.818941303970435e-07, + "loss": 0.5036444067955017, + "step": 7068 + }, + { + "epoch": 1.6299285220198294, + "grad_norm": 1.6421868165266436, + "learning_rate": 1.8167493885979935e-07, + "loss": 0.5034196972846985, + "step": 7069 + }, + { + "epoch": 1.630159096149412, + "grad_norm": 1.5247456374523982, + "learning_rate": 1.8145586627439645e-07, + "loss": 0.4199259281158447, + "step": 7070 + }, + { + "epoch": 1.6303896702789946, + "grad_norm": 1.5913722133067008, + "learning_rate": 1.8123691267267915e-07, + "loss": 0.5439015626907349, + "step": 7071 + }, + { + "epoch": 1.6306202444085773, + "grad_norm": 1.6181852234306913, + "learning_rate": 1.810180780864743e-07, + "loss": 0.4349868893623352, + "step": 7072 + }, + { + "epoch": 1.63085081853816, + "grad_norm": 1.5299206997440553, + "learning_rate": 1.807993625475921e-07, + "loss": 0.39939552545547485, + "step": 7073 + }, + { + "epoch": 1.6310813926677428, + "grad_norm": 1.575600412629914, + "learning_rate": 1.8058076608782468e-07, + "loss": 0.43073540925979614, + "step": 7074 + }, + { + "epoch": 1.6313119667973255, + "grad_norm": 1.6461603718238804, + "learning_rate": 1.8036228873894744e-07, + "loss": 0.4735824465751648, + "step": 7075 + }, + { + "epoch": 1.631542540926908, + "grad_norm": 1.466337846989889, + "learning_rate": 1.8014393053271836e-07, + "loss": 0.42971551418304443, + "step": 7076 + }, + { + "epoch": 1.6317731150564907, + "grad_norm": 1.694502155411865, + "learning_rate": 1.7992569150087823e-07, + "loss": 0.48593759536743164, + "step": 7077 + }, + { + "epoch": 1.6320036891860732, + "grad_norm": 1.55292324755966, + "learning_rate": 1.7970757167514973e-07, + "loss": 0.530194878578186, + "step": 7078 + }, + { + "epoch": 1.632234263315656, + "grad_norm": 1.7324585048939796, + "learning_rate": 1.794895710872394e-07, + "loss": 0.43393629789352417, + "step": 7079 + }, + { + "epoch": 1.6324648374452386, + "grad_norm": 1.5827349286667418, + "learning_rate": 1.7927168976883556e-07, + "loss": 0.4211798906326294, + "step": 7080 + }, + { + "epoch": 1.6326954115748213, + "grad_norm": 1.5939322533043618, + "learning_rate": 1.790539277516091e-07, + "loss": 0.39001476764678955, + "step": 7081 + }, + { + "epoch": 1.632925985704404, + "grad_norm": 1.6028280785725797, + "learning_rate": 1.788362850672146e-07, + "loss": 0.4360283613204956, + "step": 7082 + }, + { + "epoch": 1.6331565598339868, + "grad_norm": 1.6516207153980025, + "learning_rate": 1.7861876174728807e-07, + "loss": 0.47754842042922974, + "step": 7083 + }, + { + "epoch": 1.6333871339635693, + "grad_norm": 1.634690883802538, + "learning_rate": 1.7840135782344888e-07, + "loss": 0.35193490982055664, + "step": 7084 + }, + { + "epoch": 1.633617708093152, + "grad_norm": 1.2825662437681398, + "learning_rate": 1.7818407332729912e-07, + "loss": 0.39997392892837524, + "step": 7085 + }, + { + "epoch": 1.6338482822227345, + "grad_norm": 1.324570823301632, + "learning_rate": 1.7796690829042328e-07, + "loss": 0.3255331218242645, + "step": 7086 + }, + { + "epoch": 1.6340788563523172, + "grad_norm": 1.424074701555127, + "learning_rate": 1.777498627443882e-07, + "loss": 0.47072282433509827, + "step": 7087 + }, + { + "epoch": 1.6343094304819, + "grad_norm": 1.5293726959445282, + "learning_rate": 1.775329367207441e-07, + "loss": 0.4231484830379486, + "step": 7088 + }, + { + "epoch": 1.6345400046114826, + "grad_norm": 1.4406985915809287, + "learning_rate": 1.7731613025102276e-07, + "loss": 0.37112197279930115, + "step": 7089 + }, + { + "epoch": 1.6347705787410653, + "grad_norm": 1.5117815815493545, + "learning_rate": 1.7709944336673986e-07, + "loss": 0.5772623419761658, + "step": 7090 + }, + { + "epoch": 1.635001152870648, + "grad_norm": 1.4205344879838042, + "learning_rate": 1.7688287609939244e-07, + "loss": 0.45922917127609253, + "step": 7091 + }, + { + "epoch": 1.6352317270002306, + "grad_norm": 1.6262912271430976, + "learning_rate": 1.7666642848046098e-07, + "loss": 0.42784950137138367, + "step": 7092 + }, + { + "epoch": 1.6354623011298133, + "grad_norm": 1.585709168390131, + "learning_rate": 1.7645010054140873e-07, + "loss": 0.4676967263221741, + "step": 7093 + }, + { + "epoch": 1.6356928752593958, + "grad_norm": 1.4782811209898545, + "learning_rate": 1.7623389231368046e-07, + "loss": 0.434337317943573, + "step": 7094 + }, + { + "epoch": 1.6359234493889785, + "grad_norm": 1.512954791126533, + "learning_rate": 1.760178038287048e-07, + "loss": 0.4667350947856903, + "step": 7095 + }, + { + "epoch": 1.6361540235185612, + "grad_norm": 1.3397712801467159, + "learning_rate": 1.7580183511789204e-07, + "loss": 0.42233705520629883, + "step": 7096 + }, + { + "epoch": 1.636384597648144, + "grad_norm": 1.5093056460018237, + "learning_rate": 1.7558598621263565e-07, + "loss": 0.4488460421562195, + "step": 7097 + }, + { + "epoch": 1.6366151717777266, + "grad_norm": 1.6708888950919063, + "learning_rate": 1.753702571443112e-07, + "loss": 0.4264194667339325, + "step": 7098 + }, + { + "epoch": 1.6368457459073094, + "grad_norm": 1.414729354018089, + "learning_rate": 1.7515464794427715e-07, + "loss": 0.32695144414901733, + "step": 7099 + }, + { + "epoch": 1.6370763200368919, + "grad_norm": 2.0744464699438825, + "learning_rate": 1.7493915864387487e-07, + "loss": 0.3573018014431, + "step": 7100 + }, + { + "epoch": 1.6373068941664746, + "grad_norm": 1.4506197336511393, + "learning_rate": 1.7472378927442732e-07, + "loss": 0.4545198082923889, + "step": 7101 + }, + { + "epoch": 1.637537468296057, + "grad_norm": 1.59875503504847, + "learning_rate": 1.7450853986724123e-07, + "loss": 0.42589202523231506, + "step": 7102 + }, + { + "epoch": 1.6377680424256398, + "grad_norm": 1.5169081767342318, + "learning_rate": 1.742934104536048e-07, + "loss": 0.4403502345085144, + "step": 7103 + }, + { + "epoch": 1.6379986165552225, + "grad_norm": 1.7606747961526963, + "learning_rate": 1.7407840106478955e-07, + "loss": 0.4262208938598633, + "step": 7104 + }, + { + "epoch": 1.6382291906848052, + "grad_norm": 1.6000265796951778, + "learning_rate": 1.7386351173204905e-07, + "loss": 0.4706578254699707, + "step": 7105 + }, + { + "epoch": 1.638459764814388, + "grad_norm": 1.4657752166922586, + "learning_rate": 1.7364874248661986e-07, + "loss": 0.4526079297065735, + "step": 7106 + }, + { + "epoch": 1.6386903389439706, + "grad_norm": 1.7833403214487409, + "learning_rate": 1.734340933597207e-07, + "loss": 0.42836326360702515, + "step": 7107 + }, + { + "epoch": 1.6389209130735531, + "grad_norm": 1.4453465477500804, + "learning_rate": 1.7321956438255292e-07, + "loss": 0.42680823802948, + "step": 7108 + }, + { + "epoch": 1.6391514872031359, + "grad_norm": 1.3964828689114657, + "learning_rate": 1.7300515558630068e-07, + "loss": 0.38365036249160767, + "step": 7109 + }, + { + "epoch": 1.6393820613327184, + "grad_norm": 1.4748773918598719, + "learning_rate": 1.7279086700213063e-07, + "loss": 0.4153991937637329, + "step": 7110 + }, + { + "epoch": 1.639612635462301, + "grad_norm": 1.5777502702437645, + "learning_rate": 1.7257669866119163e-07, + "loss": 0.42257291078567505, + "step": 7111 + }, + { + "epoch": 1.6398432095918838, + "grad_norm": 1.7309640190055833, + "learning_rate": 1.7236265059461498e-07, + "loss": 0.34990063309669495, + "step": 7112 + }, + { + "epoch": 1.6400737837214665, + "grad_norm": 1.3939407429934887, + "learning_rate": 1.72148722833515e-07, + "loss": 0.44848760962486267, + "step": 7113 + }, + { + "epoch": 1.6403043578510492, + "grad_norm": 1.4649667660689574, + "learning_rate": 1.7193491540898808e-07, + "loss": 0.4649186134338379, + "step": 7114 + }, + { + "epoch": 1.640534931980632, + "grad_norm": 1.5050161434573055, + "learning_rate": 1.7172122835211333e-07, + "loss": 0.480952650308609, + "step": 7115 + }, + { + "epoch": 1.6407655061102144, + "grad_norm": 1.6101365826637175, + "learning_rate": 1.7150766169395235e-07, + "loss": 0.4669501483440399, + "step": 7116 + }, + { + "epoch": 1.6409960802397972, + "grad_norm": 1.486994174732026, + "learning_rate": 1.7129421546554957e-07, + "loss": 0.4273250102996826, + "step": 7117 + }, + { + "epoch": 1.6412266543693796, + "grad_norm": 1.8106380448833757, + "learning_rate": 1.71080889697931e-07, + "loss": 0.47923076152801514, + "step": 7118 + }, + { + "epoch": 1.6414572284989624, + "grad_norm": 1.5033931180120297, + "learning_rate": 1.708676844221061e-07, + "loss": 0.42801159620285034, + "step": 7119 + }, + { + "epoch": 1.641687802628545, + "grad_norm": 1.4792875147029159, + "learning_rate": 1.7065459966906636e-07, + "loss": 0.39929044246673584, + "step": 7120 + }, + { + "epoch": 1.6419183767581278, + "grad_norm": 1.4727601001923896, + "learning_rate": 1.7044163546978553e-07, + "loss": 0.4919764995574951, + "step": 7121 + }, + { + "epoch": 1.6421489508877105, + "grad_norm": 1.5018740505050776, + "learning_rate": 1.702287918552202e-07, + "loss": 0.45943617820739746, + "step": 7122 + }, + { + "epoch": 1.642379525017293, + "grad_norm": 1.5202994857697039, + "learning_rate": 1.7001606885630948e-07, + "loss": 0.48078954219818115, + "step": 7123 + }, + { + "epoch": 1.6426100991468757, + "grad_norm": 1.406204806461001, + "learning_rate": 1.6980346650397505e-07, + "loss": 0.4217113256454468, + "step": 7124 + }, + { + "epoch": 1.6428406732764582, + "grad_norm": 1.479814078881505, + "learning_rate": 1.6959098482912037e-07, + "loss": 0.4643937051296234, + "step": 7125 + }, + { + "epoch": 1.643071247406041, + "grad_norm": 1.6157838326637273, + "learning_rate": 1.6937862386263212e-07, + "loss": 0.43977001309394836, + "step": 7126 + }, + { + "epoch": 1.6433018215356237, + "grad_norm": 1.4653862858165947, + "learning_rate": 1.6916638363537882e-07, + "loss": 0.3872392177581787, + "step": 7127 + }, + { + "epoch": 1.6435323956652064, + "grad_norm": 1.4668608493131068, + "learning_rate": 1.6895426417821213e-07, + "loss": 0.44625502824783325, + "step": 7128 + }, + { + "epoch": 1.643762969794789, + "grad_norm": 1.6445652935798991, + "learning_rate": 1.6874226552196523e-07, + "loss": 0.36836186051368713, + "step": 7129 + }, + { + "epoch": 1.6439935439243718, + "grad_norm": 1.5181829131466213, + "learning_rate": 1.6853038769745465e-07, + "loss": 0.35491907596588135, + "step": 7130 + }, + { + "epoch": 1.6442241180539543, + "grad_norm": 1.5107933584098798, + "learning_rate": 1.6831863073547913e-07, + "loss": 0.5210527181625366, + "step": 7131 + }, + { + "epoch": 1.644454692183537, + "grad_norm": 1.5854667470103982, + "learning_rate": 1.6810699466681932e-07, + "loss": 0.3805693984031677, + "step": 7132 + }, + { + "epoch": 1.6446852663131195, + "grad_norm": 1.8089883418272688, + "learning_rate": 1.6789547952223893e-07, + "loss": 0.5768346786499023, + "step": 7133 + }, + { + "epoch": 1.6449158404427022, + "grad_norm": 1.8423402992377882, + "learning_rate": 1.6768408533248356e-07, + "loss": 0.46465635299682617, + "step": 7134 + }, + { + "epoch": 1.645146414572285, + "grad_norm": 1.8710111931219464, + "learning_rate": 1.674728121282819e-07, + "loss": 0.43119215965270996, + "step": 7135 + }, + { + "epoch": 1.6453769887018677, + "grad_norm": 1.4436891948188744, + "learning_rate": 1.6726165994034402e-07, + "loss": 0.42814093828201294, + "step": 7136 + }, + { + "epoch": 1.6456075628314504, + "grad_norm": 1.5822684467576347, + "learning_rate": 1.6705062879936382e-07, + "loss": 0.41762328147888184, + "step": 7137 + }, + { + "epoch": 1.645838136961033, + "grad_norm": 2.059560914873905, + "learning_rate": 1.668397187360161e-07, + "loss": 0.42717012763023376, + "step": 7138 + }, + { + "epoch": 1.6460687110906156, + "grad_norm": 1.3692759576709286, + "learning_rate": 1.666289297809591e-07, + "loss": 0.37660926580429077, + "step": 7139 + }, + { + "epoch": 1.6462992852201983, + "grad_norm": 1.689926156627043, + "learning_rate": 1.664182619648331e-07, + "loss": 0.3905887007713318, + "step": 7140 + }, + { + "epoch": 1.6465298593497808, + "grad_norm": 1.5648955881343065, + "learning_rate": 1.6620771531826117e-07, + "loss": 0.4848547577857971, + "step": 7141 + }, + { + "epoch": 1.6467604334793635, + "grad_norm": 1.5642509939041707, + "learning_rate": 1.659972898718479e-07, + "loss": 0.37895438075065613, + "step": 7142 + }, + { + "epoch": 1.6469910076089462, + "grad_norm": 1.6050388867308452, + "learning_rate": 1.6578698565618075e-07, + "loss": 0.46770527958869934, + "step": 7143 + }, + { + "epoch": 1.647221581738529, + "grad_norm": 1.705579614415488, + "learning_rate": 1.6557680270182995e-07, + "loss": 0.44138044118881226, + "step": 7144 + }, + { + "epoch": 1.6474521558681117, + "grad_norm": 1.7922951246817975, + "learning_rate": 1.6536674103934734e-07, + "loss": 0.3681126832962036, + "step": 7145 + }, + { + "epoch": 1.6476827299976944, + "grad_norm": 1.454313444949356, + "learning_rate": 1.651568006992675e-07, + "loss": 0.4410884380340576, + "step": 7146 + }, + { + "epoch": 1.6479133041272769, + "grad_norm": 1.444668904765709, + "learning_rate": 1.6494698171210776e-07, + "loss": 0.4161960482597351, + "step": 7147 + }, + { + "epoch": 1.6481438782568596, + "grad_norm": 1.6873012096950248, + "learning_rate": 1.647372841083674e-07, + "loss": 0.4912784695625305, + "step": 7148 + }, + { + "epoch": 1.648374452386442, + "grad_norm": 1.8457570973340096, + "learning_rate": 1.6452770791852766e-07, + "loss": 0.5137985944747925, + "step": 7149 + }, + { + "epoch": 1.6486050265160248, + "grad_norm": 1.845102008062213, + "learning_rate": 1.6431825317305303e-07, + "loss": 0.43644070625305176, + "step": 7150 + }, + { + "epoch": 1.6488356006456075, + "grad_norm": 1.508191131690363, + "learning_rate": 1.6410891990238973e-07, + "loss": 0.4319378733634949, + "step": 7151 + }, + { + "epoch": 1.6490661747751902, + "grad_norm": 1.6137067673031091, + "learning_rate": 1.6389970813696619e-07, + "loss": 0.474090039730072, + "step": 7152 + }, + { + "epoch": 1.649296748904773, + "grad_norm": 1.656766330100741, + "learning_rate": 1.6369061790719375e-07, + "loss": 0.40291503071784973, + "step": 7153 + }, + { + "epoch": 1.6495273230343557, + "grad_norm": 1.5434308580585603, + "learning_rate": 1.6348164924346562e-07, + "loss": 0.51482754945755, + "step": 7154 + }, + { + "epoch": 1.6497578971639382, + "grad_norm": 1.421069671161851, + "learning_rate": 1.632728021761579e-07, + "loss": 0.35308974981307983, + "step": 7155 + }, + { + "epoch": 1.6499884712935209, + "grad_norm": 1.7501565194944115, + "learning_rate": 1.6306407673562815e-07, + "loss": 0.5269055366516113, + "step": 7156 + }, + { + "epoch": 1.6502190454231034, + "grad_norm": 1.4775332310798848, + "learning_rate": 1.6285547295221724e-07, + "loss": 0.41290512681007385, + "step": 7157 + }, + { + "epoch": 1.650449619552686, + "grad_norm": 1.4513808656924674, + "learning_rate": 1.6264699085624721e-07, + "loss": 0.39930522441864014, + "step": 7158 + }, + { + "epoch": 1.6506801936822688, + "grad_norm": 1.475028134913826, + "learning_rate": 1.6243863047802365e-07, + "loss": 0.4617648422718048, + "step": 7159 + }, + { + "epoch": 1.6509107678118515, + "grad_norm": 1.6583284073308129, + "learning_rate": 1.6223039184783337e-07, + "loss": 0.4618498980998993, + "step": 7160 + }, + { + "epoch": 1.6511413419414342, + "grad_norm": 1.5177380348824272, + "learning_rate": 1.6202227499594635e-07, + "loss": 0.43138834834098816, + "step": 7161 + }, + { + "epoch": 1.651371916071017, + "grad_norm": 1.9944130162827052, + "learning_rate": 1.618142799526141e-07, + "loss": 0.5330632925033569, + "step": 7162 + }, + { + "epoch": 1.6516024902005995, + "grad_norm": 1.4381555357456468, + "learning_rate": 1.6160640674807103e-07, + "loss": 0.45410698652267456, + "step": 7163 + }, + { + "epoch": 1.6518330643301822, + "grad_norm": 1.52256812211894, + "learning_rate": 1.6139865541253384e-07, + "loss": 0.4216715693473816, + "step": 7164 + }, + { + "epoch": 1.6520636384597647, + "grad_norm": 1.6818151368938485, + "learning_rate": 1.6119102597620083e-07, + "loss": 0.3738868832588196, + "step": 7165 + }, + { + "epoch": 1.6522942125893474, + "grad_norm": 1.587335339212439, + "learning_rate": 1.609835184692535e-07, + "loss": 0.44595998525619507, + "step": 7166 + }, + { + "epoch": 1.65252478671893, + "grad_norm": 1.8461813575956394, + "learning_rate": 1.6077613292185466e-07, + "loss": 0.5446096062660217, + "step": 7167 + }, + { + "epoch": 1.6527553608485128, + "grad_norm": 1.5661326715584178, + "learning_rate": 1.605688693641505e-07, + "loss": 0.47280746698379517, + "step": 7168 + }, + { + "epoch": 1.6529859349780955, + "grad_norm": 1.6260653553703972, + "learning_rate": 1.6036172782626823e-07, + "loss": 0.5280133485794067, + "step": 7169 + }, + { + "epoch": 1.6532165091076783, + "grad_norm": 1.6507744528919734, + "learning_rate": 1.6015470833831835e-07, + "loss": 0.4659959375858307, + "step": 7170 + }, + { + "epoch": 1.6534470832372608, + "grad_norm": 1.5548632331284282, + "learning_rate": 1.5994781093039335e-07, + "loss": 0.5196797251701355, + "step": 7171 + }, + { + "epoch": 1.6536776573668435, + "grad_norm": 1.298650586457363, + "learning_rate": 1.597410356325676e-07, + "loss": 0.41855669021606445, + "step": 7172 + }, + { + "epoch": 1.653908231496426, + "grad_norm": 1.6301682003715197, + "learning_rate": 1.5953438247489814e-07, + "loss": 0.43063706159591675, + "step": 7173 + }, + { + "epoch": 1.6541388056260087, + "grad_norm": 1.556025937846025, + "learning_rate": 1.59327851487424e-07, + "loss": 0.3954850435256958, + "step": 7174 + }, + { + "epoch": 1.6543693797555914, + "grad_norm": 1.6096102290125367, + "learning_rate": 1.591214427001667e-07, + "loss": 0.4497464895248413, + "step": 7175 + }, + { + "epoch": 1.6545999538851741, + "grad_norm": 1.573427243133678, + "learning_rate": 1.5891515614312967e-07, + "loss": 0.47012704610824585, + "step": 7176 + }, + { + "epoch": 1.6548305280147568, + "grad_norm": 1.345166831078004, + "learning_rate": 1.5870899184629872e-07, + "loss": 0.399054616689682, + "step": 7177 + }, + { + "epoch": 1.6550611021443395, + "grad_norm": 1.68897296856965, + "learning_rate": 1.5850294983964208e-07, + "loss": 0.41277164220809937, + "step": 7178 + }, + { + "epoch": 1.655291676273922, + "grad_norm": 1.6410807386564468, + "learning_rate": 1.5829703015311013e-07, + "loss": 0.4735640287399292, + "step": 7179 + }, + { + "epoch": 1.6555222504035048, + "grad_norm": 1.5414168893805387, + "learning_rate": 1.5809123281663516e-07, + "loss": 0.4244140386581421, + "step": 7180 + }, + { + "epoch": 1.6557528245330873, + "grad_norm": 1.6196858148033184, + "learning_rate": 1.5788555786013212e-07, + "loss": 0.4291320741176605, + "step": 7181 + }, + { + "epoch": 1.65598339866267, + "grad_norm": 1.8656270771434302, + "learning_rate": 1.576800053134979e-07, + "loss": 0.3965643048286438, + "step": 7182 + }, + { + "epoch": 1.6562139727922527, + "grad_norm": 1.5939688831505687, + "learning_rate": 1.5747457520661123e-07, + "loss": 0.4087764620780945, + "step": 7183 + }, + { + "epoch": 1.6564445469218354, + "grad_norm": 1.523375144006796, + "learning_rate": 1.5726926756933411e-07, + "loss": 0.4207920432090759, + "step": 7184 + }, + { + "epoch": 1.6566751210514181, + "grad_norm": 1.757376584691626, + "learning_rate": 1.570640824315095e-07, + "loss": 0.34311753511428833, + "step": 7185 + }, + { + "epoch": 1.6569056951810008, + "grad_norm": 2.079059544313622, + "learning_rate": 1.5685901982296345e-07, + "loss": 0.44728145003318787, + "step": 7186 + }, + { + "epoch": 1.6571362693105833, + "grad_norm": 1.6933442739443483, + "learning_rate": 1.5665407977350386e-07, + "loss": 0.38300156593322754, + "step": 7187 + }, + { + "epoch": 1.657366843440166, + "grad_norm": 1.4613322908312483, + "learning_rate": 1.56449262312921e-07, + "loss": 0.32724204659461975, + "step": 7188 + }, + { + "epoch": 1.6575974175697485, + "grad_norm": 1.5277123552551555, + "learning_rate": 1.562445674709868e-07, + "loss": 0.4812743067741394, + "step": 7189 + }, + { + "epoch": 1.6578279916993313, + "grad_norm": 1.279031260784297, + "learning_rate": 1.5603999527745615e-07, + "loss": 0.3974485397338867, + "step": 7190 + }, + { + "epoch": 1.658058565828914, + "grad_norm": 1.729819799365075, + "learning_rate": 1.5583554576206536e-07, + "loss": 0.5058138370513916, + "step": 7191 + }, + { + "epoch": 1.6582891399584967, + "grad_norm": 1.451214505055382, + "learning_rate": 1.5563121895453323e-07, + "loss": 0.4442358613014221, + "step": 7192 + }, + { + "epoch": 1.6585197140880794, + "grad_norm": 1.6317499919466611, + "learning_rate": 1.5542701488456077e-07, + "loss": 0.35400623083114624, + "step": 7193 + }, + { + "epoch": 1.6587502882176621, + "grad_norm": 1.8335890419904581, + "learning_rate": 1.5522293358183125e-07, + "loss": 0.5046352744102478, + "step": 7194 + }, + { + "epoch": 1.6589808623472446, + "grad_norm": 1.8150914477063191, + "learning_rate": 1.5501897507601015e-07, + "loss": 0.45344769954681396, + "step": 7195 + }, + { + "epoch": 1.6592114364768273, + "grad_norm": 1.7111771949579255, + "learning_rate": 1.548151393967444e-07, + "loss": 0.4251500368118286, + "step": 7196 + }, + { + "epoch": 1.6594420106064098, + "grad_norm": 1.4323459769713944, + "learning_rate": 1.5461142657366399e-07, + "loss": 0.3728788495063782, + "step": 7197 + }, + { + "epoch": 1.6596725847359926, + "grad_norm": 1.5246938682723656, + "learning_rate": 1.5440783663638036e-07, + "loss": 0.3143829107284546, + "step": 7198 + }, + { + "epoch": 1.6599031588655753, + "grad_norm": 1.3416076020806418, + "learning_rate": 1.5420436961448758e-07, + "loss": 0.5070813894271851, + "step": 7199 + }, + { + "epoch": 1.660133732995158, + "grad_norm": 1.2380684135092845, + "learning_rate": 1.5400102553756145e-07, + "loss": 0.3644014000892639, + "step": 7200 + }, + { + "epoch": 1.6603643071247407, + "grad_norm": 2.973338937285917, + "learning_rate": 1.5379780443516023e-07, + "loss": 0.4120270609855652, + "step": 7201 + }, + { + "epoch": 1.6605948812543234, + "grad_norm": 1.6150469405356445, + "learning_rate": 1.5359470633682425e-07, + "loss": 0.4327865242958069, + "step": 7202 + }, + { + "epoch": 1.660825455383906, + "grad_norm": 2.011470811225138, + "learning_rate": 1.5339173127207562e-07, + "loss": 0.626624584197998, + "step": 7203 + }, + { + "epoch": 1.6610560295134886, + "grad_norm": 1.6601868604564274, + "learning_rate": 1.5318887927041913e-07, + "loss": 0.45536088943481445, + "step": 7204 + }, + { + "epoch": 1.6612866036430711, + "grad_norm": 1.6789895391694964, + "learning_rate": 1.52986150361341e-07, + "loss": 0.5306276082992554, + "step": 7205 + }, + { + "epoch": 1.6615171777726538, + "grad_norm": 1.5374267124283623, + "learning_rate": 1.5278354457431043e-07, + "loss": 0.4263244867324829, + "step": 7206 + }, + { + "epoch": 1.6617477519022366, + "grad_norm": 1.5390387444640852, + "learning_rate": 1.5258106193877762e-07, + "loss": 0.4578266143798828, + "step": 7207 + }, + { + "epoch": 1.6619783260318193, + "grad_norm": 1.4963429405053086, + "learning_rate": 1.5237870248417605e-07, + "loss": 0.5120365619659424, + "step": 7208 + }, + { + "epoch": 1.662208900161402, + "grad_norm": 1.7987725718508283, + "learning_rate": 1.521764662399202e-07, + "loss": 0.4491463005542755, + "step": 7209 + }, + { + "epoch": 1.6624394742909847, + "grad_norm": 1.588713571736857, + "learning_rate": 1.5197435323540752e-07, + "loss": 0.4810635447502136, + "step": 7210 + }, + { + "epoch": 1.6626700484205672, + "grad_norm": 1.549550087406024, + "learning_rate": 1.5177236350001722e-07, + "loss": 0.4250200390815735, + "step": 7211 + }, + { + "epoch": 1.66290062255015, + "grad_norm": 1.8619243359226805, + "learning_rate": 1.515704970631102e-07, + "loss": 0.49981385469436646, + "step": 7212 + }, + { + "epoch": 1.6631311966797324, + "grad_norm": 1.621928409701738, + "learning_rate": 1.5136875395403027e-07, + "loss": 0.40204358100891113, + "step": 7213 + }, + { + "epoch": 1.6633617708093151, + "grad_norm": 1.504987607563178, + "learning_rate": 1.5116713420210236e-07, + "loss": 0.514127254486084, + "step": 7214 + }, + { + "epoch": 1.6635923449388978, + "grad_norm": 1.8745773841611948, + "learning_rate": 1.509656378366343e-07, + "loss": 0.5119338631629944, + "step": 7215 + }, + { + "epoch": 1.6638229190684806, + "grad_norm": 1.6137446017437618, + "learning_rate": 1.507642648869153e-07, + "loss": 0.45031970739364624, + "step": 7216 + }, + { + "epoch": 1.6640534931980633, + "grad_norm": 1.427878863576358, + "learning_rate": 1.5056301538221716e-07, + "loss": 0.4503582715988159, + "step": 7217 + }, + { + "epoch": 1.664284067327646, + "grad_norm": 1.4651953746761925, + "learning_rate": 1.503618893517935e-07, + "loss": 0.38793227076530457, + "step": 7218 + }, + { + "epoch": 1.6645146414572285, + "grad_norm": 1.4683280962315126, + "learning_rate": 1.5016088682488026e-07, + "loss": 0.4446987211704254, + "step": 7219 + }, + { + "epoch": 1.6647452155868112, + "grad_norm": 1.7835855909787117, + "learning_rate": 1.4996000783069485e-07, + "loss": 0.4687119722366333, + "step": 7220 + }, + { + "epoch": 1.6649757897163937, + "grad_norm": 1.6205230957470973, + "learning_rate": 1.4975925239843734e-07, + "loss": 0.48283010721206665, + "step": 7221 + }, + { + "epoch": 1.6652063638459764, + "grad_norm": 1.630894562773258, + "learning_rate": 1.4955862055728941e-07, + "loss": 0.510201632976532, + "step": 7222 + }, + { + "epoch": 1.6654369379755591, + "grad_norm": 1.4932233099831633, + "learning_rate": 1.4935811233641471e-07, + "loss": 0.4070482850074768, + "step": 7223 + }, + { + "epoch": 1.6656675121051419, + "grad_norm": 1.5683915035975688, + "learning_rate": 1.4915772776495948e-07, + "loss": 0.44347989559173584, + "step": 7224 + }, + { + "epoch": 1.6658980862347246, + "grad_norm": 1.6817444257008654, + "learning_rate": 1.4895746687205147e-07, + "loss": 0.4160166382789612, + "step": 7225 + }, + { + "epoch": 1.6661286603643073, + "grad_norm": 1.5428277862719844, + "learning_rate": 1.4875732968680098e-07, + "loss": 0.39939236640930176, + "step": 7226 + }, + { + "epoch": 1.6663592344938898, + "grad_norm": 1.8461591057744162, + "learning_rate": 1.4855731623829936e-07, + "loss": 0.4604174494743347, + "step": 7227 + }, + { + "epoch": 1.6665898086234725, + "grad_norm": 1.5963571116977615, + "learning_rate": 1.4835742655562134e-07, + "loss": 0.4691208004951477, + "step": 7228 + }, + { + "epoch": 1.666820382753055, + "grad_norm": 1.358957710417088, + "learning_rate": 1.481576606678222e-07, + "loss": 0.4146147668361664, + "step": 7229 + }, + { + "epoch": 1.6670509568826377, + "grad_norm": 1.4681059084163257, + "learning_rate": 1.4795801860394041e-07, + "loss": 0.4064391255378723, + "step": 7230 + }, + { + "epoch": 1.6672815310122204, + "grad_norm": 1.233349352710464, + "learning_rate": 1.4775850039299587e-07, + "loss": 0.3696960210800171, + "step": 7231 + }, + { + "epoch": 1.6675121051418031, + "grad_norm": 1.763624641268307, + "learning_rate": 1.4755910606399023e-07, + "loss": 0.4356287121772766, + "step": 7232 + }, + { + "epoch": 1.6677426792713859, + "grad_norm": 1.6119962512147328, + "learning_rate": 1.473598356459078e-07, + "loss": 0.39327436685562134, + "step": 7233 + }, + { + "epoch": 1.6679732534009684, + "grad_norm": 1.4528281796334948, + "learning_rate": 1.4716068916771452e-07, + "loss": 0.4722225069999695, + "step": 7234 + }, + { + "epoch": 1.668203827530551, + "grad_norm": 1.3954919737652625, + "learning_rate": 1.4696166665835852e-07, + "loss": 0.3645583987236023, + "step": 7235 + }, + { + "epoch": 1.6684344016601336, + "grad_norm": 1.628738998914794, + "learning_rate": 1.4676276814676935e-07, + "loss": 0.4153117537498474, + "step": 7236 + }, + { + "epoch": 1.6686649757897163, + "grad_norm": 1.2987847859472657, + "learning_rate": 1.4656399366185933e-07, + "loss": 0.3470612168312073, + "step": 7237 + }, + { + "epoch": 1.668895549919299, + "grad_norm": 1.424067964832139, + "learning_rate": 1.4636534323252203e-07, + "loss": 0.3934207260608673, + "step": 7238 + }, + { + "epoch": 1.6691261240488817, + "grad_norm": 1.6191654953115664, + "learning_rate": 1.4616681688763355e-07, + "loss": 0.35530412197113037, + "step": 7239 + }, + { + "epoch": 1.6693566981784644, + "grad_norm": 1.5867473768730196, + "learning_rate": 1.4596841465605136e-07, + "loss": 0.5218726396560669, + "step": 7240 + }, + { + "epoch": 1.6695872723080472, + "grad_norm": 1.9070671037743527, + "learning_rate": 1.4577013656661542e-07, + "loss": 0.4287494421005249, + "step": 7241 + }, + { + "epoch": 1.6698178464376296, + "grad_norm": 2.099754040079973, + "learning_rate": 1.4557198264814775e-07, + "loss": 0.5161805152893066, + "step": 7242 + }, + { + "epoch": 1.6700484205672124, + "grad_norm": 1.485709070131558, + "learning_rate": 1.4537395292945153e-07, + "loss": 0.4843006730079651, + "step": 7243 + }, + { + "epoch": 1.6702789946967949, + "grad_norm": 1.416657421952009, + "learning_rate": 1.4517604743931288e-07, + "loss": 0.526993989944458, + "step": 7244 + }, + { + "epoch": 1.6705095688263776, + "grad_norm": 1.318696888956493, + "learning_rate": 1.4497826620649888e-07, + "loss": 0.43705734610557556, + "step": 7245 + }, + { + "epoch": 1.6707401429559603, + "grad_norm": 1.626300355229789, + "learning_rate": 1.4478060925975942e-07, + "loss": 0.6001747846603394, + "step": 7246 + }, + { + "epoch": 1.670970717085543, + "grad_norm": 1.6701240840694564, + "learning_rate": 1.4458307662782564e-07, + "loss": 0.4041635990142822, + "step": 7247 + }, + { + "epoch": 1.6712012912151257, + "grad_norm": 1.6291301094782007, + "learning_rate": 1.4438566833941112e-07, + "loss": 0.4425908923149109, + "step": 7248 + }, + { + "epoch": 1.6714318653447084, + "grad_norm": 1.8234242321709921, + "learning_rate": 1.4418838442321102e-07, + "loss": 0.5202267169952393, + "step": 7249 + }, + { + "epoch": 1.671662439474291, + "grad_norm": 1.3646967283137599, + "learning_rate": 1.4399122490790293e-07, + "loss": 0.44352006912231445, + "step": 7250 + }, + { + "epoch": 1.6718930136038737, + "grad_norm": 1.5745296606833632, + "learning_rate": 1.4379418982214542e-07, + "loss": 0.4757179021835327, + "step": 7251 + }, + { + "epoch": 1.6721235877334562, + "grad_norm": 2.0125776677757825, + "learning_rate": 1.4359727919457998e-07, + "loss": 0.4748988747596741, + "step": 7252 + }, + { + "epoch": 1.6723541618630389, + "grad_norm": 1.4390886859105494, + "learning_rate": 1.434004930538294e-07, + "loss": 0.4280398190021515, + "step": 7253 + }, + { + "epoch": 1.6725847359926216, + "grad_norm": 1.5844583735943714, + "learning_rate": 1.4320383142849834e-07, + "loss": 0.4959871172904968, + "step": 7254 + }, + { + "epoch": 1.6728153101222043, + "grad_norm": 1.6551218088905322, + "learning_rate": 1.4300729434717396e-07, + "loss": 0.506413996219635, + "step": 7255 + }, + { + "epoch": 1.673045884251787, + "grad_norm": 1.5894513628120581, + "learning_rate": 1.4281088183842448e-07, + "loss": 0.4723675847053528, + "step": 7256 + }, + { + "epoch": 1.6732764583813697, + "grad_norm": 1.5735532616627814, + "learning_rate": 1.4261459393080076e-07, + "loss": 0.41801339387893677, + "step": 7257 + }, + { + "epoch": 1.6735070325109522, + "grad_norm": 1.651784117733762, + "learning_rate": 1.424184306528351e-07, + "loss": 0.4463369846343994, + "step": 7258 + }, + { + "epoch": 1.673737606640535, + "grad_norm": 1.6205372576102755, + "learning_rate": 1.422223920330421e-07, + "loss": 0.4167429506778717, + "step": 7259 + }, + { + "epoch": 1.6739681807701174, + "grad_norm": 1.448285732733219, + "learning_rate": 1.420264780999174e-07, + "loss": 0.48808401823043823, + "step": 7260 + }, + { + "epoch": 1.6741987548997002, + "grad_norm": 1.7994342785579152, + "learning_rate": 1.4183068888193973e-07, + "loss": 0.515659749507904, + "step": 7261 + }, + { + "epoch": 1.6744293290292829, + "grad_norm": 1.6582236339460064, + "learning_rate": 1.416350244075688e-07, + "loss": 0.4393026530742645, + "step": 7262 + }, + { + "epoch": 1.6746599031588656, + "grad_norm": 1.6750398739214198, + "learning_rate": 1.4143948470524602e-07, + "loss": 0.35053056478500366, + "step": 7263 + }, + { + "epoch": 1.6748904772884483, + "grad_norm": 1.1872706234379884, + "learning_rate": 1.4124406980339532e-07, + "loss": 0.35598453879356384, + "step": 7264 + }, + { + "epoch": 1.675121051418031, + "grad_norm": 1.747342634360751, + "learning_rate": 1.410487797304224e-07, + "loss": 0.47989165782928467, + "step": 7265 + }, + { + "epoch": 1.6753516255476135, + "grad_norm": 1.4767801179152846, + "learning_rate": 1.408536145147148e-07, + "loss": 0.4621499180793762, + "step": 7266 + }, + { + "epoch": 1.6755821996771962, + "grad_norm": 1.4469255776490486, + "learning_rate": 1.4065857418464122e-07, + "loss": 0.40567925572395325, + "step": 7267 + }, + { + "epoch": 1.6758127738067787, + "grad_norm": 2.121901896007684, + "learning_rate": 1.4046365876855326e-07, + "loss": 0.38889849185943604, + "step": 7268 + }, + { + "epoch": 1.6760433479363614, + "grad_norm": 1.8036845925466258, + "learning_rate": 1.4026886829478345e-07, + "loss": 0.516187846660614, + "step": 7269 + }, + { + "epoch": 1.6762739220659442, + "grad_norm": 1.3670995724086425, + "learning_rate": 1.4007420279164706e-07, + "loss": 0.4007910192012787, + "step": 7270 + }, + { + "epoch": 1.6765044961955269, + "grad_norm": 1.4513245632029468, + "learning_rate": 1.3987966228744007e-07, + "loss": 0.4426886737346649, + "step": 7271 + }, + { + "epoch": 1.6767350703251096, + "grad_norm": 1.7767592903800882, + "learning_rate": 1.3968524681044114e-07, + "loss": 0.46890369057655334, + "step": 7272 + }, + { + "epoch": 1.6769656444546923, + "grad_norm": 1.714201330640179, + "learning_rate": 1.3949095638891096e-07, + "loss": 0.510369598865509, + "step": 7273 + }, + { + "epoch": 1.6771962185842748, + "grad_norm": 1.697492362317676, + "learning_rate": 1.3929679105109106e-07, + "loss": 0.47810226678848267, + "step": 7274 + }, + { + "epoch": 1.6774267927138575, + "grad_norm": 1.6234301902278867, + "learning_rate": 1.3910275082520572e-07, + "loss": 0.48592591285705566, + "step": 7275 + }, + { + "epoch": 1.67765736684344, + "grad_norm": 1.5107060260742486, + "learning_rate": 1.3890883573946021e-07, + "loss": 0.4664943814277649, + "step": 7276 + }, + { + "epoch": 1.6778879409730227, + "grad_norm": 1.6514095493299281, + "learning_rate": 1.3871504582204263e-07, + "loss": 0.47146645188331604, + "step": 7277 + }, + { + "epoch": 1.6781185151026055, + "grad_norm": 1.615997642769361, + "learning_rate": 1.3852138110112166e-07, + "loss": 0.5171671509742737, + "step": 7278 + }, + { + "epoch": 1.6783490892321882, + "grad_norm": 1.8275491234958787, + "learning_rate": 1.3832784160484913e-07, + "loss": 0.45887336134910583, + "step": 7279 + }, + { + "epoch": 1.678579663361771, + "grad_norm": 1.494861700798582, + "learning_rate": 1.3813442736135728e-07, + "loss": 0.4363539516925812, + "step": 7280 + }, + { + "epoch": 1.6788102374913536, + "grad_norm": 2.0171892009876147, + "learning_rate": 1.379411383987612e-07, + "loss": 0.4626097083091736, + "step": 7281 + }, + { + "epoch": 1.679040811620936, + "grad_norm": 1.8196525383976765, + "learning_rate": 1.3774797474515766e-07, + "loss": 0.5939204096794128, + "step": 7282 + }, + { + "epoch": 1.6792713857505188, + "grad_norm": 1.6878435890648014, + "learning_rate": 1.3755493642862437e-07, + "loss": 0.5463666915893555, + "step": 7283 + }, + { + "epoch": 1.6795019598801013, + "grad_norm": 1.622691460463702, + "learning_rate": 1.3736202347722182e-07, + "loss": 0.3634001910686493, + "step": 7284 + }, + { + "epoch": 1.679732534009684, + "grad_norm": 1.6327202188647956, + "learning_rate": 1.3716923591899166e-07, + "loss": 0.39512360095977783, + "step": 7285 + }, + { + "epoch": 1.6799631081392667, + "grad_norm": 1.3361978857608434, + "learning_rate": 1.3697657378195772e-07, + "loss": 0.3858473300933838, + "step": 7286 + }, + { + "epoch": 1.6801936822688495, + "grad_norm": 1.4527844976472322, + "learning_rate": 1.36784037094125e-07, + "loss": 0.473757266998291, + "step": 7287 + }, + { + "epoch": 1.6804242563984322, + "grad_norm": 1.410877918262981, + "learning_rate": 1.3659162588348107e-07, + "loss": 0.41679126024246216, + "step": 7288 + }, + { + "epoch": 1.680654830528015, + "grad_norm": 1.7135792249847552, + "learning_rate": 1.363993401779946e-07, + "loss": 0.4267998933792114, + "step": 7289 + }, + { + "epoch": 1.6808854046575974, + "grad_norm": 1.6476835268765473, + "learning_rate": 1.3620718000561648e-07, + "loss": 0.5453667044639587, + "step": 7290 + }, + { + "epoch": 1.68111597878718, + "grad_norm": 1.4347316593862658, + "learning_rate": 1.3601514539427895e-07, + "loss": 0.3882933259010315, + "step": 7291 + }, + { + "epoch": 1.6813465529167626, + "grad_norm": 1.7177796725752086, + "learning_rate": 1.3582323637189653e-07, + "loss": 0.5565635561943054, + "step": 7292 + }, + { + "epoch": 1.6815771270463453, + "grad_norm": 1.448665873125515, + "learning_rate": 1.356314529663647e-07, + "loss": 0.49807024002075195, + "step": 7293 + }, + { + "epoch": 1.681807701175928, + "grad_norm": 1.5449122885779156, + "learning_rate": 1.3543979520556116e-07, + "loss": 0.40868130326271057, + "step": 7294 + }, + { + "epoch": 1.6820382753055108, + "grad_norm": 1.4045709349742252, + "learning_rate": 1.352482631173455e-07, + "loss": 0.46088406443595886, + "step": 7295 + }, + { + "epoch": 1.6822688494350935, + "grad_norm": 1.7658846162202777, + "learning_rate": 1.3505685672955869e-07, + "loss": 0.44346722960472107, + "step": 7296 + }, + { + "epoch": 1.6824994235646762, + "grad_norm": 1.3703801713050607, + "learning_rate": 1.348655760700239e-07, + "loss": 0.36585044860839844, + "step": 7297 + }, + { + "epoch": 1.6827299976942587, + "grad_norm": 1.8199719530329925, + "learning_rate": 1.3467442116654536e-07, + "loss": 0.46082472801208496, + "step": 7298 + }, + { + "epoch": 1.6829605718238414, + "grad_norm": 1.8043564550526412, + "learning_rate": 1.3448339204690974e-07, + "loss": 0.5011709928512573, + "step": 7299 + }, + { + "epoch": 1.683191145953424, + "grad_norm": 2.1355217293891378, + "learning_rate": 1.3429248873888454e-07, + "loss": 0.4382838010787964, + "step": 7300 + }, + { + "epoch": 1.6834217200830066, + "grad_norm": 1.4118543770807777, + "learning_rate": 1.3410171127022008e-07, + "loss": 0.35204610228538513, + "step": 7301 + }, + { + "epoch": 1.6836522942125893, + "grad_norm": 1.3718001359049319, + "learning_rate": 1.3391105966864745e-07, + "loss": 0.3915257453918457, + "step": 7302 + }, + { + "epoch": 1.683882868342172, + "grad_norm": 1.4102637825932318, + "learning_rate": 1.3372053396187967e-07, + "loss": 0.3945339322090149, + "step": 7303 + }, + { + "epoch": 1.6841134424717548, + "grad_norm": 1.7911618298179695, + "learning_rate": 1.335301341776117e-07, + "loss": 0.48783642053604126, + "step": 7304 + }, + { + "epoch": 1.6843440166013375, + "grad_norm": 1.745012134293522, + "learning_rate": 1.333398603435203e-07, + "loss": 0.49026161432266235, + "step": 7305 + }, + { + "epoch": 1.68457459073092, + "grad_norm": 1.9699708710220791, + "learning_rate": 1.3314971248726358e-07, + "loss": 0.5035061836242676, + "step": 7306 + }, + { + "epoch": 1.6848051648605027, + "grad_norm": 1.7602149086036532, + "learning_rate": 1.3295969063648126e-07, + "loss": 0.5452826023101807, + "step": 7307 + }, + { + "epoch": 1.6850357389900852, + "grad_norm": 1.7088858518580703, + "learning_rate": 1.3276979481879524e-07, + "loss": 0.4609105885028839, + "step": 7308 + }, + { + "epoch": 1.685266313119668, + "grad_norm": 1.6869514802612067, + "learning_rate": 1.3258002506180855e-07, + "loss": 0.5799046754837036, + "step": 7309 + }, + { + "epoch": 1.6854968872492506, + "grad_norm": 1.6691103426337504, + "learning_rate": 1.3239038139310644e-07, + "loss": 0.42096465826034546, + "step": 7310 + }, + { + "epoch": 1.6857274613788333, + "grad_norm": 1.9781377178498367, + "learning_rate": 1.3220086384025508e-07, + "loss": 0.4741813540458679, + "step": 7311 + }, + { + "epoch": 1.685958035508416, + "grad_norm": 1.5972207301313162, + "learning_rate": 1.3201147243080302e-07, + "loss": 0.4872191250324249, + "step": 7312 + }, + { + "epoch": 1.6861886096379988, + "grad_norm": 1.7767879845396581, + "learning_rate": 1.3182220719228054e-07, + "loss": 0.5210198163986206, + "step": 7313 + }, + { + "epoch": 1.6864191837675813, + "grad_norm": 1.932834262840403, + "learning_rate": 1.3163306815219878e-07, + "loss": 0.4873948395252228, + "step": 7314 + }, + { + "epoch": 1.686649757897164, + "grad_norm": 1.723686253702064, + "learning_rate": 1.3144405533805136e-07, + "loss": 0.46856212615966797, + "step": 7315 + }, + { + "epoch": 1.6868803320267465, + "grad_norm": 1.549399332710726, + "learning_rate": 1.3125516877731279e-07, + "loss": 0.3931645154953003, + "step": 7316 + }, + { + "epoch": 1.6871109061563292, + "grad_norm": 1.5988122745666866, + "learning_rate": 1.3106640849744023e-07, + "loss": 0.4473317861557007, + "step": 7317 + }, + { + "epoch": 1.687341480285912, + "grad_norm": 1.5841372684708825, + "learning_rate": 1.3087777452587124e-07, + "loss": 0.4499043822288513, + "step": 7318 + }, + { + "epoch": 1.6875720544154946, + "grad_norm": 1.6054649930580802, + "learning_rate": 1.30689266890026e-07, + "loss": 0.4992508292198181, + "step": 7319 + }, + { + "epoch": 1.6878026285450773, + "grad_norm": 1.426896936128743, + "learning_rate": 1.305008856173061e-07, + "loss": 0.4684743583202362, + "step": 7320 + }, + { + "epoch": 1.68803320267466, + "grad_norm": 1.7876602073965717, + "learning_rate": 1.303126307350948e-07, + "loss": 0.5543930530548096, + "step": 7321 + }, + { + "epoch": 1.6882637768042426, + "grad_norm": 1.3482084944505501, + "learning_rate": 1.3012450227075655e-07, + "loss": 0.3812211751937866, + "step": 7322 + }, + { + "epoch": 1.6884943509338253, + "grad_norm": 2.079165248146425, + "learning_rate": 1.299365002516377e-07, + "loss": 0.5455845594406128, + "step": 7323 + }, + { + "epoch": 1.6887249250634078, + "grad_norm": 1.3768890960712863, + "learning_rate": 1.2974862470506654e-07, + "loss": 0.4256778657436371, + "step": 7324 + }, + { + "epoch": 1.6889554991929905, + "grad_norm": 1.9468423520002898, + "learning_rate": 1.2956087565835228e-07, + "loss": 0.4973354637622833, + "step": 7325 + }, + { + "epoch": 1.6891860733225732, + "grad_norm": 1.5779840439512345, + "learning_rate": 1.2937325313878666e-07, + "loss": 0.5141343474388123, + "step": 7326 + }, + { + "epoch": 1.689416647452156, + "grad_norm": 1.5179632497576485, + "learning_rate": 1.2918575717364178e-07, + "loss": 0.3872978687286377, + "step": 7327 + }, + { + "epoch": 1.6896472215817386, + "grad_norm": 1.3857087225021212, + "learning_rate": 1.2899838779017292e-07, + "loss": 0.4333486557006836, + "step": 7328 + }, + { + "epoch": 1.6898777957113214, + "grad_norm": 1.5624646221048997, + "learning_rate": 1.2881114501561553e-07, + "loss": 0.42979496717453003, + "step": 7329 + }, + { + "epoch": 1.6901083698409038, + "grad_norm": 1.6512939392276094, + "learning_rate": 1.2862402887718771e-07, + "loss": 0.43296414613723755, + "step": 7330 + }, + { + "epoch": 1.6903389439704866, + "grad_norm": 1.4822998528875215, + "learning_rate": 1.2843703940208816e-07, + "loss": 0.41763681173324585, + "step": 7331 + }, + { + "epoch": 1.690569518100069, + "grad_norm": 1.4433304691783968, + "learning_rate": 1.2825017661749814e-07, + "loss": 0.4531592130661011, + "step": 7332 + }, + { + "epoch": 1.6908000922296518, + "grad_norm": 1.5515786608723572, + "learning_rate": 1.2806344055057995e-07, + "loss": 0.4608149826526642, + "step": 7333 + }, + { + "epoch": 1.6910306663592345, + "grad_norm": 1.5678716271625897, + "learning_rate": 1.2787683122847726e-07, + "loss": 0.4298786520957947, + "step": 7334 + }, + { + "epoch": 1.6912612404888172, + "grad_norm": 1.5882305453896473, + "learning_rate": 1.2769034867831586e-07, + "loss": 0.4404297471046448, + "step": 7335 + }, + { + "epoch": 1.6914918146184, + "grad_norm": 1.590662947019878, + "learning_rate": 1.2750399292720281e-07, + "loss": 0.3857702910900116, + "step": 7336 + }, + { + "epoch": 1.6917223887479826, + "grad_norm": 1.5092920813034143, + "learning_rate": 1.2731776400222716e-07, + "loss": 0.351214200258255, + "step": 7337 + }, + { + "epoch": 1.6919529628775651, + "grad_norm": 1.6618460717985095, + "learning_rate": 1.2713166193045854e-07, + "loss": 0.4711484909057617, + "step": 7338 + }, + { + "epoch": 1.6921835370071479, + "grad_norm": 1.605912014604012, + "learning_rate": 1.2694568673894946e-07, + "loss": 0.4819946587085724, + "step": 7339 + }, + { + "epoch": 1.6924141111367303, + "grad_norm": 1.5366035327097678, + "learning_rate": 1.267598384547327e-07, + "loss": 0.39870262145996094, + "step": 7340 + }, + { + "epoch": 1.692644685266313, + "grad_norm": 1.410709311062986, + "learning_rate": 1.265741171048237e-07, + "loss": 0.4775997996330261, + "step": 7341 + }, + { + "epoch": 1.6928752593958958, + "grad_norm": 1.5031428119722987, + "learning_rate": 1.2638852271621836e-07, + "loss": 0.4166836738586426, + "step": 7342 + }, + { + "epoch": 1.6931058335254785, + "grad_norm": 1.362546283009112, + "learning_rate": 1.2620305531589514e-07, + "loss": 0.396761953830719, + "step": 7343 + }, + { + "epoch": 1.6933364076550612, + "grad_norm": 1.5811036971551204, + "learning_rate": 1.260177149308136e-07, + "loss": 0.36929184198379517, + "step": 7344 + }, + { + "epoch": 1.6935669817846437, + "grad_norm": 1.6142308009439483, + "learning_rate": 1.2583250158791459e-07, + "loss": 0.4664369821548462, + "step": 7345 + }, + { + "epoch": 1.6937975559142264, + "grad_norm": 1.4490673957983151, + "learning_rate": 1.2564741531412115e-07, + "loss": 0.40877625346183777, + "step": 7346 + }, + { + "epoch": 1.694028130043809, + "grad_norm": 1.3363670323915413, + "learning_rate": 1.254624561363369e-07, + "loss": 0.4282684922218323, + "step": 7347 + }, + { + "epoch": 1.6942587041733916, + "grad_norm": 1.7781191335343183, + "learning_rate": 1.2527762408144805e-07, + "loss": 0.5430412292480469, + "step": 7348 + }, + { + "epoch": 1.6944892783029744, + "grad_norm": 1.7384245962384524, + "learning_rate": 1.2509291917632147e-07, + "loss": 0.45990923047065735, + "step": 7349 + }, + { + "epoch": 1.694719852432557, + "grad_norm": 1.5699544039589348, + "learning_rate": 1.2490834144780593e-07, + "loss": 0.38062262535095215, + "step": 7350 + }, + { + "epoch": 1.6949504265621398, + "grad_norm": 1.5427808320923257, + "learning_rate": 1.2472389092273172e-07, + "loss": 0.4704701900482178, + "step": 7351 + }, + { + "epoch": 1.6951810006917225, + "grad_norm": 1.3215044901700805, + "learning_rate": 1.2453956762791084e-07, + "loss": 0.4439951181411743, + "step": 7352 + }, + { + "epoch": 1.695411574821305, + "grad_norm": 1.6827848110964911, + "learning_rate": 1.2435537159013632e-07, + "loss": 0.49405014514923096, + "step": 7353 + }, + { + "epoch": 1.6956421489508877, + "grad_norm": 1.4071924274505998, + "learning_rate": 1.2417130283618282e-07, + "loss": 0.4282076060771942, + "step": 7354 + }, + { + "epoch": 1.6958727230804702, + "grad_norm": 1.4129187553888694, + "learning_rate": 1.2398736139280687e-07, + "loss": 0.43492811918258667, + "step": 7355 + }, + { + "epoch": 1.696103297210053, + "grad_norm": 1.550272919478409, + "learning_rate": 1.238035472867458e-07, + "loss": 0.37239378690719604, + "step": 7356 + }, + { + "epoch": 1.6963338713396356, + "grad_norm": 1.2721176079849843, + "learning_rate": 1.236198605447194e-07, + "loss": 0.39911961555480957, + "step": 7357 + }, + { + "epoch": 1.6965644454692184, + "grad_norm": 1.911188398718987, + "learning_rate": 1.2343630119342786e-07, + "loss": 0.4962255656719208, + "step": 7358 + }, + { + "epoch": 1.696795019598801, + "grad_norm": 1.3131623819116638, + "learning_rate": 1.2325286925955358e-07, + "loss": 0.37414759397506714, + "step": 7359 + }, + { + "epoch": 1.6970255937283838, + "grad_norm": 1.5092759235813635, + "learning_rate": 1.230695647697604e-07, + "loss": 0.41224929690361023, + "step": 7360 + }, + { + "epoch": 1.6972561678579663, + "grad_norm": 1.3964295729715615, + "learning_rate": 1.228863877506936e-07, + "loss": 0.43184489011764526, + "step": 7361 + }, + { + "epoch": 1.697486741987549, + "grad_norm": 1.6991026917946972, + "learning_rate": 1.227033382289795e-07, + "loss": 0.4741829037666321, + "step": 7362 + }, + { + "epoch": 1.6977173161171315, + "grad_norm": 1.677947901828469, + "learning_rate": 1.2252041623122643e-07, + "loss": 0.43224620819091797, + "step": 7363 + }, + { + "epoch": 1.6979478902467142, + "grad_norm": 1.678576477296345, + "learning_rate": 1.2233762178402386e-07, + "loss": 0.46645525097846985, + "step": 7364 + }, + { + "epoch": 1.698178464376297, + "grad_norm": 1.4201537921120515, + "learning_rate": 1.2215495491394256e-07, + "loss": 0.4237707555294037, + "step": 7365 + }, + { + "epoch": 1.6984090385058797, + "grad_norm": 1.3069690432597363, + "learning_rate": 1.2197241564753535e-07, + "loss": 0.36378395557403564, + "step": 7366 + }, + { + "epoch": 1.6986396126354624, + "grad_norm": 1.6387935949488672, + "learning_rate": 1.21790004011336e-07, + "loss": 0.4564269185066223, + "step": 7367 + }, + { + "epoch": 1.698870186765045, + "grad_norm": 1.3009015849639454, + "learning_rate": 1.2160772003186027e-07, + "loss": 0.4492420256137848, + "step": 7368 + }, + { + "epoch": 1.6991007608946276, + "grad_norm": 1.6097888974991954, + "learning_rate": 1.214255637356043e-07, + "loss": 0.515146017074585, + "step": 7369 + }, + { + "epoch": 1.6993313350242103, + "grad_norm": 1.5565943453492384, + "learning_rate": 1.2124353514904707e-07, + "loss": 0.41473329067230225, + "step": 7370 + }, + { + "epoch": 1.6995619091537928, + "grad_norm": 1.6571527829218886, + "learning_rate": 1.210616342986477e-07, + "loss": 0.4408412575721741, + "step": 7371 + }, + { + "epoch": 1.6997924832833755, + "grad_norm": 1.6546450900594125, + "learning_rate": 1.208798612108477e-07, + "loss": 0.5370820760726929, + "step": 7372 + }, + { + "epoch": 1.7000230574129582, + "grad_norm": 1.502975927661507, + "learning_rate": 1.206982159120693e-07, + "loss": 0.46518170833587646, + "step": 7373 + }, + { + "epoch": 1.700253631542541, + "grad_norm": 1.5801444025292624, + "learning_rate": 1.205166984287167e-07, + "loss": 0.45063477754592896, + "step": 7374 + }, + { + "epoch": 1.7004842056721237, + "grad_norm": 1.4109266758667123, + "learning_rate": 1.2033530878717546e-07, + "loss": 0.47391965985298157, + "step": 7375 + }, + { + "epoch": 1.7007147798017064, + "grad_norm": 1.680591382104731, + "learning_rate": 1.2015404701381205e-07, + "loss": 0.45812156796455383, + "step": 7376 + }, + { + "epoch": 1.7009453539312889, + "grad_norm": 1.7661450796417113, + "learning_rate": 1.1997291313497503e-07, + "loss": 0.5174708366394043, + "step": 7377 + }, + { + "epoch": 1.7011759280608716, + "grad_norm": 1.2379321910437706, + "learning_rate": 1.1979190717699373e-07, + "loss": 0.3412814736366272, + "step": 7378 + }, + { + "epoch": 1.701406502190454, + "grad_norm": 1.6619687091053885, + "learning_rate": 1.196110291661796e-07, + "loss": 0.41912171244621277, + "step": 7379 + }, + { + "epoch": 1.7016370763200368, + "grad_norm": 1.7384039938738447, + "learning_rate": 1.1943027912882464e-07, + "loss": 0.5569772720336914, + "step": 7380 + }, + { + "epoch": 1.7018676504496195, + "grad_norm": 1.309448309717786, + "learning_rate": 1.1924965709120304e-07, + "loss": 0.40875375270843506, + "step": 7381 + }, + { + "epoch": 1.7020982245792022, + "grad_norm": 1.5803953469974217, + "learning_rate": 1.1906916307956983e-07, + "loss": 0.46906760334968567, + "step": 7382 + }, + { + "epoch": 1.702328798708785, + "grad_norm": 1.2850228520937832, + "learning_rate": 1.1888879712016165e-07, + "loss": 0.40830397605895996, + "step": 7383 + }, + { + "epoch": 1.7025593728383677, + "grad_norm": 1.4770811279187035, + "learning_rate": 1.1870855923919687e-07, + "loss": 0.4051646590232849, + "step": 7384 + }, + { + "epoch": 1.7027899469679502, + "grad_norm": 1.696009847928002, + "learning_rate": 1.1852844946287432e-07, + "loss": 0.5042610764503479, + "step": 7385 + }, + { + "epoch": 1.7030205210975329, + "grad_norm": 1.6262740295484197, + "learning_rate": 1.183484678173754e-07, + "loss": 0.5304923057556152, + "step": 7386 + }, + { + "epoch": 1.7032510952271154, + "grad_norm": 1.2604579461831944, + "learning_rate": 1.1816861432886171e-07, + "loss": 0.443366676568985, + "step": 7387 + }, + { + "epoch": 1.703481669356698, + "grad_norm": 1.3836719865657088, + "learning_rate": 1.1798888902347714e-07, + "loss": 0.4527779817581177, + "step": 7388 + }, + { + "epoch": 1.7037122434862808, + "grad_norm": 1.3616715508883823, + "learning_rate": 1.1780929192734634e-07, + "loss": 0.4277183413505554, + "step": 7389 + }, + { + "epoch": 1.7039428176158635, + "grad_norm": 1.3714415020573154, + "learning_rate": 1.1762982306657577e-07, + "loss": 0.4908677637577057, + "step": 7390 + }, + { + "epoch": 1.7041733917454462, + "grad_norm": 1.4373179697113392, + "learning_rate": 1.1745048246725286e-07, + "loss": 0.398892879486084, + "step": 7391 + }, + { + "epoch": 1.704403965875029, + "grad_norm": 1.801155926723525, + "learning_rate": 1.1727127015544691e-07, + "loss": 0.4654615521430969, + "step": 7392 + }, + { + "epoch": 1.7046345400046115, + "grad_norm": 1.6258673974312492, + "learning_rate": 1.1709218615720806e-07, + "loss": 0.4850313663482666, + "step": 7393 + }, + { + "epoch": 1.7048651141341942, + "grad_norm": 1.3854283292952871, + "learning_rate": 1.1691323049856772e-07, + "loss": 0.4036976099014282, + "step": 7394 + }, + { + "epoch": 1.7050956882637767, + "grad_norm": 1.6824325261066553, + "learning_rate": 1.167344032055394e-07, + "loss": 0.39174383878707886, + "step": 7395 + }, + { + "epoch": 1.7053262623933594, + "grad_norm": 1.49190685623753, + "learning_rate": 1.1655570430411699e-07, + "loss": 0.44915109872817993, + "step": 7396 + }, + { + "epoch": 1.705556836522942, + "grad_norm": 1.4487302731781821, + "learning_rate": 1.1637713382027636e-07, + "loss": 0.4720522165298462, + "step": 7397 + }, + { + "epoch": 1.7057874106525248, + "grad_norm": 1.5236154065511855, + "learning_rate": 1.1619869177997455e-07, + "loss": 0.4452325105667114, + "step": 7398 + }, + { + "epoch": 1.7060179847821075, + "grad_norm": 1.489108876491428, + "learning_rate": 1.1602037820915023e-07, + "loss": 0.4009271562099457, + "step": 7399 + }, + { + "epoch": 1.7062485589116902, + "grad_norm": 1.3320502296097492, + "learning_rate": 1.1584219313372257e-07, + "loss": 0.37518051266670227, + "step": 7400 + }, + { + "epoch": 1.7064791330412727, + "grad_norm": 1.5361245639590775, + "learning_rate": 1.1566413657959295e-07, + "loss": 0.42883241176605225, + "step": 7401 + }, + { + "epoch": 1.7067097071708555, + "grad_norm": 1.5311391941499002, + "learning_rate": 1.1548620857264346e-07, + "loss": 0.4597551226615906, + "step": 7402 + }, + { + "epoch": 1.706940281300438, + "grad_norm": 1.4815045613998048, + "learning_rate": 1.1530840913873797e-07, + "loss": 0.5491876006126404, + "step": 7403 + }, + { + "epoch": 1.7071708554300207, + "grad_norm": 1.8810828492754625, + "learning_rate": 1.1513073830372122e-07, + "loss": 0.5632074475288391, + "step": 7404 + }, + { + "epoch": 1.7074014295596034, + "grad_norm": 1.557196455612015, + "learning_rate": 1.1495319609341947e-07, + "loss": 0.5251858234405518, + "step": 7405 + }, + { + "epoch": 1.707632003689186, + "grad_norm": 1.7979639485315768, + "learning_rate": 1.1477578253364028e-07, + "loss": 0.5388965606689453, + "step": 7406 + }, + { + "epoch": 1.7078625778187688, + "grad_norm": 1.7322317596816112, + "learning_rate": 1.145984976501726e-07, + "loss": 0.4429551959037781, + "step": 7407 + }, + { + "epoch": 1.7080931519483515, + "grad_norm": 1.5048923212213088, + "learning_rate": 1.144213414687868e-07, + "loss": 0.4702358841896057, + "step": 7408 + }, + { + "epoch": 1.708323726077934, + "grad_norm": 1.616629635802576, + "learning_rate": 1.1424431401523382e-07, + "loss": 0.4506569504737854, + "step": 7409 + }, + { + "epoch": 1.7085543002075168, + "grad_norm": 1.5722880063833475, + "learning_rate": 1.1406741531524689e-07, + "loss": 0.384244441986084, + "step": 7410 + }, + { + "epoch": 1.7087848743370992, + "grad_norm": 1.6254816299222574, + "learning_rate": 1.1389064539453952e-07, + "loss": 0.4642629027366638, + "step": 7411 + }, + { + "epoch": 1.709015448466682, + "grad_norm": 1.5180284715923413, + "learning_rate": 1.1371400427880761e-07, + "loss": 0.4568482041358948, + "step": 7412 + }, + { + "epoch": 1.7092460225962647, + "grad_norm": 1.6058744016500281, + "learning_rate": 1.135374919937272e-07, + "loss": 0.536895215511322, + "step": 7413 + }, + { + "epoch": 1.7094765967258474, + "grad_norm": 1.6944575711634469, + "learning_rate": 1.1336110856495628e-07, + "loss": 0.49696239829063416, + "step": 7414 + }, + { + "epoch": 1.7097071708554301, + "grad_norm": 1.802031783829704, + "learning_rate": 1.1318485401813438e-07, + "loss": 0.3857358694076538, + "step": 7415 + }, + { + "epoch": 1.7099377449850128, + "grad_norm": 1.5410848248596472, + "learning_rate": 1.1300872837888121e-07, + "loss": 0.38111335039138794, + "step": 7416 + }, + { + "epoch": 1.7101683191145953, + "grad_norm": 1.6014644101172142, + "learning_rate": 1.1283273167279906e-07, + "loss": 0.4255755543708801, + "step": 7417 + }, + { + "epoch": 1.710398893244178, + "grad_norm": 1.6646696692039435, + "learning_rate": 1.1265686392547024e-07, + "loss": 0.5048757791519165, + "step": 7418 + }, + { + "epoch": 1.7106294673737605, + "grad_norm": 1.6262992093918878, + "learning_rate": 1.1248112516245944e-07, + "loss": 0.5402916073799133, + "step": 7419 + }, + { + "epoch": 1.7108600415033433, + "grad_norm": 1.6105931834922984, + "learning_rate": 1.1230551540931165e-07, + "loss": 0.3617591857910156, + "step": 7420 + }, + { + "epoch": 1.711090615632926, + "grad_norm": 1.584818843359006, + "learning_rate": 1.1213003469155369e-07, + "loss": 0.4636116921901703, + "step": 7421 + }, + { + "epoch": 1.7113211897625087, + "grad_norm": 1.7626797404606351, + "learning_rate": 1.1195468303469346e-07, + "loss": 0.4675198495388031, + "step": 7422 + }, + { + "epoch": 1.7115517638920914, + "grad_norm": 1.6024517382949015, + "learning_rate": 1.1177946046422038e-07, + "loss": 0.48491787910461426, + "step": 7423 + }, + { + "epoch": 1.7117823380216741, + "grad_norm": 1.5413352133121294, + "learning_rate": 1.1160436700560449e-07, + "loss": 0.3898283839225769, + "step": 7424 + }, + { + "epoch": 1.7120129121512566, + "grad_norm": 1.5514584947710022, + "learning_rate": 1.1142940268429735e-07, + "loss": 0.41522908210754395, + "step": 7425 + }, + { + "epoch": 1.7122434862808393, + "grad_norm": 1.430903522239028, + "learning_rate": 1.1125456752573215e-07, + "loss": 0.4681985378265381, + "step": 7426 + }, + { + "epoch": 1.7124740604104218, + "grad_norm": 1.8962296460852388, + "learning_rate": 1.1107986155532245e-07, + "loss": 0.4788553714752197, + "step": 7427 + }, + { + "epoch": 1.7127046345400045, + "grad_norm": 1.5072364623848036, + "learning_rate": 1.1090528479846406e-07, + "loss": 0.43853843212127686, + "step": 7428 + }, + { + "epoch": 1.7129352086695873, + "grad_norm": 1.542463594674994, + "learning_rate": 1.107308372805329e-07, + "loss": 0.3736591637134552, + "step": 7429 + }, + { + "epoch": 1.71316578279917, + "grad_norm": 1.8237435289536401, + "learning_rate": 1.1055651902688712e-07, + "loss": 0.5770819783210754, + "step": 7430 + }, + { + "epoch": 1.7133963569287527, + "grad_norm": 1.7972828104133267, + "learning_rate": 1.1038233006286558e-07, + "loss": 0.5906555652618408, + "step": 7431 + }, + { + "epoch": 1.7136269310583354, + "grad_norm": 1.396062928601261, + "learning_rate": 1.1020827041378844e-07, + "loss": 0.4621407389640808, + "step": 7432 + }, + { + "epoch": 1.713857505187918, + "grad_norm": 1.6487194571266346, + "learning_rate": 1.1003434010495705e-07, + "loss": 0.4203164279460907, + "step": 7433 + }, + { + "epoch": 1.7140880793175006, + "grad_norm": 1.59720117870823, + "learning_rate": 1.0986053916165373e-07, + "loss": 0.4607565104961395, + "step": 7434 + }, + { + "epoch": 1.7143186534470831, + "grad_norm": 1.4411738322949479, + "learning_rate": 1.0968686760914248e-07, + "loss": 0.47256794571876526, + "step": 7435 + }, + { + "epoch": 1.7145492275766658, + "grad_norm": 2.1203032230505414, + "learning_rate": 1.0951332547266778e-07, + "loss": 0.479513943195343, + "step": 7436 + }, + { + "epoch": 1.7147798017062486, + "grad_norm": 1.7633354860000339, + "learning_rate": 1.0933991277745614e-07, + "loss": 0.47687965631484985, + "step": 7437 + }, + { + "epoch": 1.7150103758358313, + "grad_norm": 1.6696730348311766, + "learning_rate": 1.091666295487147e-07, + "loss": 0.45799845457077026, + "step": 7438 + }, + { + "epoch": 1.715240949965414, + "grad_norm": 1.4765505689651048, + "learning_rate": 1.089934758116322e-07, + "loss": 0.43398863077163696, + "step": 7439 + }, + { + "epoch": 1.7154715240949967, + "grad_norm": 1.627580558092534, + "learning_rate": 1.0882045159137788e-07, + "loss": 0.4098217189311981, + "step": 7440 + }, + { + "epoch": 1.7157020982245792, + "grad_norm": 1.8062601643320504, + "learning_rate": 1.086475569131029e-07, + "loss": 0.49889707565307617, + "step": 7441 + }, + { + "epoch": 1.715932672354162, + "grad_norm": 1.4613353368332702, + "learning_rate": 1.0847479180193897e-07, + "loss": 0.4187192916870117, + "step": 7442 + }, + { + "epoch": 1.7161632464837444, + "grad_norm": 2.068945016126778, + "learning_rate": 1.0830215628299954e-07, + "loss": 0.44331133365631104, + "step": 7443 + }, + { + "epoch": 1.7163938206133271, + "grad_norm": 1.6773749938074582, + "learning_rate": 1.0812965038137856e-07, + "loss": 0.4888196587562561, + "step": 7444 + }, + { + "epoch": 1.7166243947429098, + "grad_norm": 1.6578617629701122, + "learning_rate": 1.0795727412215183e-07, + "loss": 0.4884798228740692, + "step": 7445 + }, + { + "epoch": 1.7168549688724926, + "grad_norm": 1.5723023883356735, + "learning_rate": 1.07785027530376e-07, + "loss": 0.45655232667922974, + "step": 7446 + }, + { + "epoch": 1.7170855430020753, + "grad_norm": 1.685893884498356, + "learning_rate": 1.0761291063108857e-07, + "loss": 0.3086237907409668, + "step": 7447 + }, + { + "epoch": 1.717316117131658, + "grad_norm": 1.5738053973393145, + "learning_rate": 1.0744092344930888e-07, + "loss": 0.4279823899269104, + "step": 7448 + }, + { + "epoch": 1.7175466912612405, + "grad_norm": 1.7221029802689058, + "learning_rate": 1.072690660100366e-07, + "loss": 0.4241681396961212, + "step": 7449 + }, + { + "epoch": 1.7177772653908232, + "grad_norm": 1.7874830878272077, + "learning_rate": 1.070973383382533e-07, + "loss": 0.47086501121520996, + "step": 7450 + }, + { + "epoch": 1.7180078395204057, + "grad_norm": 1.3780373187479635, + "learning_rate": 1.0692574045892099e-07, + "loss": 0.43798619508743286, + "step": 7451 + }, + { + "epoch": 1.7182384136499884, + "grad_norm": 1.7289936352675708, + "learning_rate": 1.0675427239698354e-07, + "loss": 0.5781964659690857, + "step": 7452 + }, + { + "epoch": 1.7184689877795711, + "grad_norm": 1.4621228929512655, + "learning_rate": 1.0658293417736508e-07, + "loss": 0.4850879907608032, + "step": 7453 + }, + { + "epoch": 1.7186995619091539, + "grad_norm": 1.3236244677460836, + "learning_rate": 1.064117258249717e-07, + "loss": 0.40468811988830566, + "step": 7454 + }, + { + "epoch": 1.7189301360387366, + "grad_norm": 1.7069112900372936, + "learning_rate": 1.0624064736469052e-07, + "loss": 0.4054880142211914, + "step": 7455 + }, + { + "epoch": 1.719160710168319, + "grad_norm": 1.7589002706519377, + "learning_rate": 1.0606969882138894e-07, + "loss": 0.38633522391319275, + "step": 7456 + }, + { + "epoch": 1.7193912842979018, + "grad_norm": 1.6917357500409704, + "learning_rate": 1.0589888021991644e-07, + "loss": 0.4287499785423279, + "step": 7457 + }, + { + "epoch": 1.7196218584274843, + "grad_norm": 1.613018561241669, + "learning_rate": 1.0572819158510316e-07, + "loss": 0.49269533157348633, + "step": 7458 + }, + { + "epoch": 1.719852432557067, + "grad_norm": 1.4600608769783265, + "learning_rate": 1.0555763294176045e-07, + "loss": 0.38874679803848267, + "step": 7459 + }, + { + "epoch": 1.7200830066866497, + "grad_norm": 1.5663184097893508, + "learning_rate": 1.0538720431468051e-07, + "loss": 0.4381089508533478, + "step": 7460 + }, + { + "epoch": 1.7203135808162324, + "grad_norm": 1.6242553694361792, + "learning_rate": 1.0521690572863706e-07, + "loss": 0.4550422430038452, + "step": 7461 + }, + { + "epoch": 1.7205441549458151, + "grad_norm": 1.5017985009159773, + "learning_rate": 1.0504673720838476e-07, + "loss": 0.5173785090446472, + "step": 7462 + }, + { + "epoch": 1.7207747290753979, + "grad_norm": 1.4906138636113029, + "learning_rate": 1.0487669877865945e-07, + "loss": 0.5082184076309204, + "step": 7463 + }, + { + "epoch": 1.7210053032049804, + "grad_norm": 1.7383580581523643, + "learning_rate": 1.0470679046417786e-07, + "loss": 0.49810969829559326, + "step": 7464 + }, + { + "epoch": 1.721235877334563, + "grad_norm": 1.7302456540952424, + "learning_rate": 1.0453701228963751e-07, + "loss": 0.47808337211608887, + "step": 7465 + }, + { + "epoch": 1.7214664514641456, + "grad_norm": 1.6093569631380469, + "learning_rate": 1.0436736427971782e-07, + "loss": 0.5100537538528442, + "step": 7466 + }, + { + "epoch": 1.7216970255937283, + "grad_norm": 1.5019138408689112, + "learning_rate": 1.0419784645907858e-07, + "loss": 0.44948023557662964, + "step": 7467 + }, + { + "epoch": 1.721927599723311, + "grad_norm": 1.3792836042899619, + "learning_rate": 1.040284588523611e-07, + "loss": 0.4653180241584778, + "step": 7468 + }, + { + "epoch": 1.7221581738528937, + "grad_norm": 1.901421358760061, + "learning_rate": 1.0385920148418737e-07, + "loss": 0.4930723309516907, + "step": 7469 + }, + { + "epoch": 1.7223887479824764, + "grad_norm": 1.5964124799736943, + "learning_rate": 1.036900743791611e-07, + "loss": 0.48883867263793945, + "step": 7470 + }, + { + "epoch": 1.7226193221120591, + "grad_norm": 1.27924002772244, + "learning_rate": 1.0352107756186624e-07, + "loss": 0.4030319154262543, + "step": 7471 + }, + { + "epoch": 1.7228498962416416, + "grad_norm": 1.8060139526740588, + "learning_rate": 1.033522110568683e-07, + "loss": 0.4174875319004059, + "step": 7472 + }, + { + "epoch": 1.7230804703712244, + "grad_norm": 1.731157383735833, + "learning_rate": 1.0318347488871371e-07, + "loss": 0.5152361392974854, + "step": 7473 + }, + { + "epoch": 1.7233110445008069, + "grad_norm": 1.3983774946509473, + "learning_rate": 1.0301486908193014e-07, + "loss": 0.43221428990364075, + "step": 7474 + }, + { + "epoch": 1.7235416186303896, + "grad_norm": 1.6931290113673243, + "learning_rate": 1.0284639366102598e-07, + "loss": 0.4239969849586487, + "step": 7475 + }, + { + "epoch": 1.7237721927599723, + "grad_norm": 1.5094560861426634, + "learning_rate": 1.0267804865049068e-07, + "loss": 0.5171400904655457, + "step": 7476 + }, + { + "epoch": 1.724002766889555, + "grad_norm": 1.3913671775557208, + "learning_rate": 1.0250983407479518e-07, + "loss": 0.45670178532600403, + "step": 7477 + }, + { + "epoch": 1.7242333410191377, + "grad_norm": 1.3489970844922, + "learning_rate": 1.0234174995839107e-07, + "loss": 0.36458373069763184, + "step": 7478 + }, + { + "epoch": 1.7244639151487204, + "grad_norm": 1.6926167509742018, + "learning_rate": 1.0217379632571122e-07, + "loss": 0.4940750002861023, + "step": 7479 + }, + { + "epoch": 1.724694489278303, + "grad_norm": 1.3742895139526408, + "learning_rate": 1.0200597320116911e-07, + "loss": 0.43453872203826904, + "step": 7480 + }, + { + "epoch": 1.7249250634078857, + "grad_norm": 1.4325916198137496, + "learning_rate": 1.0183828060915989e-07, + "loss": 0.49255162477493286, + "step": 7481 + }, + { + "epoch": 1.7251556375374681, + "grad_norm": 1.5551839406586245, + "learning_rate": 1.0167071857405906e-07, + "loss": 0.46221014857292175, + "step": 7482 + }, + { + "epoch": 1.7253862116670509, + "grad_norm": 1.6044214909369097, + "learning_rate": 1.015032871202236e-07, + "loss": 0.43426087498664856, + "step": 7483 + }, + { + "epoch": 1.7256167857966336, + "grad_norm": 1.3471292376409894, + "learning_rate": 1.0133598627199136e-07, + "loss": 0.45327985286712646, + "step": 7484 + }, + { + "epoch": 1.7258473599262163, + "grad_norm": 1.7300792096053668, + "learning_rate": 1.011688160536811e-07, + "loss": 0.4691676199436188, + "step": 7485 + }, + { + "epoch": 1.726077934055799, + "grad_norm": 1.7168424748125397, + "learning_rate": 1.0100177648959296e-07, + "loss": 0.5080254077911377, + "step": 7486 + }, + { + "epoch": 1.7263085081853817, + "grad_norm": 1.3360541862160926, + "learning_rate": 1.008348676040075e-07, + "loss": 0.34122025966644287, + "step": 7487 + }, + { + "epoch": 1.7265390823149642, + "grad_norm": 1.650892930499383, + "learning_rate": 1.0066808942118699e-07, + "loss": 0.44408074021339417, + "step": 7488 + }, + { + "epoch": 1.726769656444547, + "grad_norm": 1.4603224951411022, + "learning_rate": 1.0050144196537402e-07, + "loss": 0.3777790665626526, + "step": 7489 + }, + { + "epoch": 1.7270002305741294, + "grad_norm": 1.6365267437093343, + "learning_rate": 1.0033492526079279e-07, + "loss": 0.48730146884918213, + "step": 7490 + }, + { + "epoch": 1.7272308047037122, + "grad_norm": 1.5792338555913825, + "learning_rate": 1.001685393316477e-07, + "loss": 0.35903626680374146, + "step": 7491 + }, + { + "epoch": 1.7274613788332949, + "grad_norm": 1.3953813288199584, + "learning_rate": 1.0000228420212509e-07, + "loss": 0.37729373574256897, + "step": 7492 + }, + { + "epoch": 1.7276919529628776, + "grad_norm": 1.6314801226105193, + "learning_rate": 9.98361598963916e-08, + "loss": 0.4388326406478882, + "step": 7493 + }, + { + "epoch": 1.7279225270924603, + "grad_norm": 1.4829220781258674, + "learning_rate": 9.967016643859527e-08, + "loss": 0.45095232129096985, + "step": 7494 + }, + { + "epoch": 1.728153101222043, + "grad_norm": 1.5130736602015042, + "learning_rate": 9.95043038528649e-08, + "loss": 0.4736475944519043, + "step": 7495 + }, + { + "epoch": 1.7283836753516255, + "grad_norm": 1.6393405202034401, + "learning_rate": 9.933857216330999e-08, + "loss": 0.2984190285205841, + "step": 7496 + }, + { + "epoch": 1.7286142494812082, + "grad_norm": 1.5993261500159095, + "learning_rate": 9.91729713940218e-08, + "loss": 0.45391780138015747, + "step": 7497 + }, + { + "epoch": 1.7288448236107907, + "grad_norm": 1.732905558263472, + "learning_rate": 9.900750156907157e-08, + "loss": 0.5150727033615112, + "step": 7498 + }, + { + "epoch": 1.7290753977403734, + "grad_norm": 1.372519788443724, + "learning_rate": 9.884216271251256e-08, + "loss": 0.41298598051071167, + "step": 7499 + }, + { + "epoch": 1.7293059718699562, + "grad_norm": 1.5310483983437806, + "learning_rate": 9.86769548483779e-08, + "loss": 0.4820541441440582, + "step": 7500 + }, + { + "epoch": 1.7295365459995389, + "grad_norm": 1.4103659952581913, + "learning_rate": 9.85118780006825e-08, + "loss": 0.4148511290550232, + "step": 7501 + }, + { + "epoch": 1.7297671201291216, + "grad_norm": 1.535383378975012, + "learning_rate": 9.834693219342183e-08, + "loss": 0.39676210284233093, + "step": 7502 + }, + { + "epoch": 1.7299976942587043, + "grad_norm": 1.3969764743432636, + "learning_rate": 9.818211745057292e-08, + "loss": 0.3665908873081207, + "step": 7503 + }, + { + "epoch": 1.7302282683882868, + "grad_norm": 1.5255452230855382, + "learning_rate": 9.801743379609274e-08, + "loss": 0.39340025186538696, + "step": 7504 + }, + { + "epoch": 1.7304588425178695, + "grad_norm": 1.4673439514671116, + "learning_rate": 9.785288125391977e-08, + "loss": 0.4677412807941437, + "step": 7505 + }, + { + "epoch": 1.730689416647452, + "grad_norm": 1.8421716352805986, + "learning_rate": 9.768845984797369e-08, + "loss": 0.49413764476776123, + "step": 7506 + }, + { + "epoch": 1.7309199907770347, + "grad_norm": 2.1097980684598223, + "learning_rate": 9.752416960215437e-08, + "loss": 0.5312438607215881, + "step": 7507 + }, + { + "epoch": 1.7311505649066175, + "grad_norm": 1.408973464564324, + "learning_rate": 9.736001054034338e-08, + "loss": 0.38522863388061523, + "step": 7508 + }, + { + "epoch": 1.7313811390362002, + "grad_norm": 1.4496862609377634, + "learning_rate": 9.719598268640283e-08, + "loss": 0.49167078733444214, + "step": 7509 + }, + { + "epoch": 1.7316117131657829, + "grad_norm": 1.7071655256469307, + "learning_rate": 9.7032086064176e-08, + "loss": 0.4465949535369873, + "step": 7510 + }, + { + "epoch": 1.7318422872953656, + "grad_norm": 1.580755639233498, + "learning_rate": 9.686832069748663e-08, + "loss": 0.4627634882926941, + "step": 7511 + }, + { + "epoch": 1.732072861424948, + "grad_norm": 1.5945960217093318, + "learning_rate": 9.670468661013998e-08, + "loss": 0.4188409447669983, + "step": 7512 + }, + { + "epoch": 1.7323034355545308, + "grad_norm": 1.6767285085334622, + "learning_rate": 9.654118382592146e-08, + "loss": 0.5775213241577148, + "step": 7513 + }, + { + "epoch": 1.7325340096841133, + "grad_norm": 1.4889326648746473, + "learning_rate": 9.637781236859843e-08, + "loss": 0.43912672996520996, + "step": 7514 + }, + { + "epoch": 1.732764583813696, + "grad_norm": 1.677177851910315, + "learning_rate": 9.62145722619182e-08, + "loss": 0.5364755392074585, + "step": 7515 + }, + { + "epoch": 1.7329951579432787, + "grad_norm": 1.5135890648676678, + "learning_rate": 9.605146352960935e-08, + "loss": 0.4832648038864136, + "step": 7516 + }, + { + "epoch": 1.7332257320728615, + "grad_norm": 1.640472153194824, + "learning_rate": 9.588848619538182e-08, + "loss": 0.36932459473609924, + "step": 7517 + }, + { + "epoch": 1.7334563062024442, + "grad_norm": 1.4731235594964114, + "learning_rate": 9.57256402829254e-08, + "loss": 0.43458276987075806, + "step": 7518 + }, + { + "epoch": 1.733686880332027, + "grad_norm": 1.457966513875051, + "learning_rate": 9.556292581591196e-08, + "loss": 0.41533568501472473, + "step": 7519 + }, + { + "epoch": 1.7339174544616094, + "grad_norm": 1.4363289807621746, + "learning_rate": 9.540034281799325e-08, + "loss": 0.45898690819740295, + "step": 7520 + }, + { + "epoch": 1.734148028591192, + "grad_norm": 1.610315429506808, + "learning_rate": 9.523789131280279e-08, + "loss": 0.3321181535720825, + "step": 7521 + }, + { + "epoch": 1.7343786027207746, + "grad_norm": 1.5824862936232118, + "learning_rate": 9.507557132395416e-08, + "loss": 0.3926161229610443, + "step": 7522 + }, + { + "epoch": 1.7346091768503573, + "grad_norm": 1.264710302836967, + "learning_rate": 9.491338287504247e-08, + "loss": 0.41051846742630005, + "step": 7523 + }, + { + "epoch": 1.73483975097994, + "grad_norm": 1.3604853902379428, + "learning_rate": 9.47513259896432e-08, + "loss": 0.4440652132034302, + "step": 7524 + }, + { + "epoch": 1.7350703251095227, + "grad_norm": 1.5933781203678954, + "learning_rate": 9.458940069131304e-08, + "loss": 0.5175125598907471, + "step": 7525 + }, + { + "epoch": 1.7353008992391055, + "grad_norm": 1.4535445480892137, + "learning_rate": 9.442760700358987e-08, + "loss": 0.45521751046180725, + "step": 7526 + }, + { + "epoch": 1.7355314733686882, + "grad_norm": 1.5707484811695662, + "learning_rate": 9.426594494999151e-08, + "loss": 0.5133911967277527, + "step": 7527 + }, + { + "epoch": 1.7357620474982707, + "grad_norm": 1.8770278394623805, + "learning_rate": 9.410441455401752e-08, + "loss": 0.4397609233856201, + "step": 7528 + }, + { + "epoch": 1.7359926216278534, + "grad_norm": 3.7292879258339693, + "learning_rate": 9.394301583914765e-08, + "loss": 0.4503510594367981, + "step": 7529 + }, + { + "epoch": 1.7362231957574359, + "grad_norm": 1.5909450336667472, + "learning_rate": 9.378174882884327e-08, + "loss": 0.44119834899902344, + "step": 7530 + }, + { + "epoch": 1.7364537698870186, + "grad_norm": 1.5959659498105105, + "learning_rate": 9.362061354654583e-08, + "loss": 0.46257996559143066, + "step": 7531 + }, + { + "epoch": 1.7366843440166013, + "grad_norm": 1.4727698319610416, + "learning_rate": 9.345961001567792e-08, + "loss": 0.4468308687210083, + "step": 7532 + }, + { + "epoch": 1.736914918146184, + "grad_norm": 1.329652616869682, + "learning_rate": 9.32987382596433e-08, + "loss": 0.3837989568710327, + "step": 7533 + }, + { + "epoch": 1.7371454922757668, + "grad_norm": 1.7149798865191848, + "learning_rate": 9.313799830182644e-08, + "loss": 0.4224961996078491, + "step": 7534 + }, + { + "epoch": 1.7373760664053495, + "grad_norm": 1.3527154365554523, + "learning_rate": 9.297739016559225e-08, + "loss": 0.37379956245422363, + "step": 7535 + }, + { + "epoch": 1.737606640534932, + "grad_norm": 1.3983736958193809, + "learning_rate": 9.281691387428658e-08, + "loss": 0.4204242527484894, + "step": 7536 + }, + { + "epoch": 1.7378372146645147, + "grad_norm": 1.550547566194999, + "learning_rate": 9.265656945123678e-08, + "loss": 0.5270572900772095, + "step": 7537 + }, + { + "epoch": 1.7380677887940972, + "grad_norm": 1.6826850331086136, + "learning_rate": 9.249635691975e-08, + "loss": 0.44208282232284546, + "step": 7538 + }, + { + "epoch": 1.73829836292368, + "grad_norm": 1.158547237110862, + "learning_rate": 9.233627630311502e-08, + "loss": 0.32514283061027527, + "step": 7539 + }, + { + "epoch": 1.7385289370532626, + "grad_norm": 1.42135951118167, + "learning_rate": 9.217632762460126e-08, + "loss": 0.35472434759140015, + "step": 7540 + }, + { + "epoch": 1.7387595111828453, + "grad_norm": 1.9134735814581072, + "learning_rate": 9.201651090745888e-08, + "loss": 0.5034215450286865, + "step": 7541 + }, + { + "epoch": 1.738990085312428, + "grad_norm": 1.4950522917395752, + "learning_rate": 9.185682617491863e-08, + "loss": 0.4779762029647827, + "step": 7542 + }, + { + "epoch": 1.7392206594420108, + "grad_norm": 1.7544463226218252, + "learning_rate": 9.169727345019263e-08, + "loss": 0.4964079260826111, + "step": 7543 + }, + { + "epoch": 1.7394512335715933, + "grad_norm": 1.8208500448761544, + "learning_rate": 9.153785275647319e-08, + "loss": 0.5125068426132202, + "step": 7544 + }, + { + "epoch": 1.739681807701176, + "grad_norm": 1.369096268264849, + "learning_rate": 9.13785641169339e-08, + "loss": 0.39051756262779236, + "step": 7545 + }, + { + "epoch": 1.7399123818307585, + "grad_norm": 1.6132499721446665, + "learning_rate": 9.121940755472901e-08, + "loss": 0.45951950550079346, + "step": 7546 + }, + { + "epoch": 1.7401429559603412, + "grad_norm": 1.402513218333582, + "learning_rate": 9.106038309299302e-08, + "loss": 0.42676979303359985, + "step": 7547 + }, + { + "epoch": 1.740373530089924, + "grad_norm": 1.6248647623340229, + "learning_rate": 9.090149075484255e-08, + "loss": 0.3585033416748047, + "step": 7548 + }, + { + "epoch": 1.7406041042195066, + "grad_norm": 1.5204418845888263, + "learning_rate": 9.074273056337366e-08, + "loss": 0.4613775312900543, + "step": 7549 + }, + { + "epoch": 1.7408346783490893, + "grad_norm": 1.5756472296671777, + "learning_rate": 9.058410254166415e-08, + "loss": 0.48934412002563477, + "step": 7550 + }, + { + "epoch": 1.741065252478672, + "grad_norm": 2.3682357853653895, + "learning_rate": 9.042560671277177e-08, + "loss": 0.5749069452285767, + "step": 7551 + }, + { + "epoch": 1.7412958266082545, + "grad_norm": 1.4990310296288942, + "learning_rate": 9.026724309973588e-08, + "loss": 0.4760423004627228, + "step": 7552 + }, + { + "epoch": 1.7415264007378373, + "grad_norm": 1.38070744019409, + "learning_rate": 9.010901172557594e-08, + "loss": 0.43080049753189087, + "step": 7553 + }, + { + "epoch": 1.7417569748674198, + "grad_norm": 1.4636238536042068, + "learning_rate": 8.99509126132928e-08, + "loss": 0.44850271940231323, + "step": 7554 + }, + { + "epoch": 1.7419875489970025, + "grad_norm": 1.5357653243690434, + "learning_rate": 8.979294578586738e-08, + "loss": 0.34593498706817627, + "step": 7555 + }, + { + "epoch": 1.7422181231265852, + "grad_norm": 1.3635590695208566, + "learning_rate": 8.963511126626188e-08, + "loss": 0.3738324046134949, + "step": 7556 + }, + { + "epoch": 1.742448697256168, + "grad_norm": 1.6262402635208488, + "learning_rate": 8.947740907741952e-08, + "loss": 0.47988662123680115, + "step": 7557 + }, + { + "epoch": 1.7426792713857506, + "grad_norm": 1.904530616299084, + "learning_rate": 8.931983924226338e-08, + "loss": 0.5863034725189209, + "step": 7558 + }, + { + "epoch": 1.7429098455153333, + "grad_norm": 1.497315511162884, + "learning_rate": 8.916240178369827e-08, + "loss": 0.38455232977867126, + "step": 7559 + }, + { + "epoch": 1.7431404196449158, + "grad_norm": 1.711133818053075, + "learning_rate": 8.900509672460899e-08, + "loss": 0.3919760584831238, + "step": 7560 + }, + { + "epoch": 1.7433709937744986, + "grad_norm": 1.8876361089943499, + "learning_rate": 8.884792408786169e-08, + "loss": 0.4090653657913208, + "step": 7561 + }, + { + "epoch": 1.743601567904081, + "grad_norm": 1.458591423296693, + "learning_rate": 8.869088389630264e-08, + "loss": 0.42597073316574097, + "step": 7562 + }, + { + "epoch": 1.7438321420336638, + "grad_norm": 1.4410906971279085, + "learning_rate": 8.853397617275959e-08, + "loss": 0.38760805130004883, + "step": 7563 + }, + { + "epoch": 1.7440627161632465, + "grad_norm": 1.3930314463175644, + "learning_rate": 8.837720094004042e-08, + "loss": 0.3753165900707245, + "step": 7564 + }, + { + "epoch": 1.7442932902928292, + "grad_norm": 1.4708100181524995, + "learning_rate": 8.822055822093432e-08, + "loss": 0.5169536471366882, + "step": 7565 + }, + { + "epoch": 1.744523864422412, + "grad_norm": 1.436339252382814, + "learning_rate": 8.806404803821077e-08, + "loss": 0.3886902332305908, + "step": 7566 + }, + { + "epoch": 1.7447544385519944, + "grad_norm": 1.7378167101447366, + "learning_rate": 8.790767041461977e-08, + "loss": 0.48971402645111084, + "step": 7567 + }, + { + "epoch": 1.7449850126815771, + "grad_norm": 1.3555756556469605, + "learning_rate": 8.775142537289282e-08, + "loss": 0.4656449556350708, + "step": 7568 + }, + { + "epoch": 1.7452155868111596, + "grad_norm": 1.24689144854066, + "learning_rate": 8.75953129357414e-08, + "loss": 0.43197786808013916, + "step": 7569 + }, + { + "epoch": 1.7454461609407423, + "grad_norm": 1.6584429086506909, + "learning_rate": 8.743933312585816e-08, + "loss": 0.5062606930732727, + "step": 7570 + }, + { + "epoch": 1.745676735070325, + "grad_norm": 1.714345013647294, + "learning_rate": 8.728348596591639e-08, + "loss": 0.5489983558654785, + "step": 7571 + }, + { + "epoch": 1.7459073091999078, + "grad_norm": 1.4457283500823468, + "learning_rate": 8.712777147857031e-08, + "loss": 0.4351652264595032, + "step": 7572 + }, + { + "epoch": 1.7461378833294905, + "grad_norm": 2.160367880410759, + "learning_rate": 8.697218968645403e-08, + "loss": 0.5096884965896606, + "step": 7573 + }, + { + "epoch": 1.7463684574590732, + "grad_norm": 1.2837319415683648, + "learning_rate": 8.681674061218347e-08, + "loss": 0.3127269744873047, + "step": 7574 + }, + { + "epoch": 1.7465990315886557, + "grad_norm": 1.8378362837335938, + "learning_rate": 8.666142427835443e-08, + "loss": 0.4738629460334778, + "step": 7575 + }, + { + "epoch": 1.7468296057182384, + "grad_norm": 1.5090024147723615, + "learning_rate": 8.650624070754375e-08, + "loss": 0.46921902894973755, + "step": 7576 + }, + { + "epoch": 1.747060179847821, + "grad_norm": 1.578667567709185, + "learning_rate": 8.635118992230906e-08, + "loss": 0.5296987891197205, + "step": 7577 + }, + { + "epoch": 1.7472907539774036, + "grad_norm": 1.1732895039201416, + "learning_rate": 8.619627194518819e-08, + "loss": 0.3522387742996216, + "step": 7578 + }, + { + "epoch": 1.7475213281069863, + "grad_norm": 1.550879536093582, + "learning_rate": 8.604148679870049e-08, + "loss": 0.42747724056243896, + "step": 7579 + }, + { + "epoch": 1.747751902236569, + "grad_norm": 1.535695568842986, + "learning_rate": 8.588683450534528e-08, + "loss": 0.399990439414978, + "step": 7580 + }, + { + "epoch": 1.7479824763661518, + "grad_norm": 1.688266581429453, + "learning_rate": 8.573231508760315e-08, + "loss": 0.48220518231391907, + "step": 7581 + }, + { + "epoch": 1.7482130504957345, + "grad_norm": 1.8452105924711204, + "learning_rate": 8.557792856793455e-08, + "loss": 0.5227106213569641, + "step": 7582 + }, + { + "epoch": 1.748443624625317, + "grad_norm": 1.596076015195143, + "learning_rate": 8.542367496878178e-08, + "loss": 0.5436732769012451, + "step": 7583 + }, + { + "epoch": 1.7486741987548997, + "grad_norm": 1.5781135040763308, + "learning_rate": 8.526955431256644e-08, + "loss": 0.48398053646087646, + "step": 7584 + }, + { + "epoch": 1.7489047728844822, + "grad_norm": 1.8109008330023073, + "learning_rate": 8.511556662169217e-08, + "loss": 0.5727924108505249, + "step": 7585 + }, + { + "epoch": 1.749135347014065, + "grad_norm": 1.7451913815699138, + "learning_rate": 8.496171191854229e-08, + "loss": 0.48077693581581116, + "step": 7586 + }, + { + "epoch": 1.7493659211436476, + "grad_norm": 1.4513314868999736, + "learning_rate": 8.480799022548113e-08, + "loss": 0.45447635650634766, + "step": 7587 + }, + { + "epoch": 1.7495964952732304, + "grad_norm": 1.7305734402801412, + "learning_rate": 8.465440156485392e-08, + "loss": 0.4605486989021301, + "step": 7588 + }, + { + "epoch": 1.749827069402813, + "grad_norm": 1.6087138586576477, + "learning_rate": 8.450094595898604e-08, + "loss": 0.4229927062988281, + "step": 7589 + }, + { + "epoch": 1.7500576435323958, + "grad_norm": 1.371495589643338, + "learning_rate": 8.434762343018408e-08, + "loss": 0.43005260825157166, + "step": 7590 + }, + { + "epoch": 1.7502882176619783, + "grad_norm": 1.739761797548497, + "learning_rate": 8.41944340007349e-08, + "loss": 0.47446098923683167, + "step": 7591 + }, + { + "epoch": 1.750518791791561, + "grad_norm": 1.6084919754115274, + "learning_rate": 8.40413776929062e-08, + "loss": 0.40554216504096985, + "step": 7592 + }, + { + "epoch": 1.7507493659211435, + "grad_norm": 1.2363538330087616, + "learning_rate": 8.38884545289461e-08, + "loss": 0.4144189953804016, + "step": 7593 + }, + { + "epoch": 1.7509799400507262, + "grad_norm": 1.6677815347140812, + "learning_rate": 8.373566453108361e-08, + "loss": 0.449351966381073, + "step": 7594 + }, + { + "epoch": 1.751210514180309, + "grad_norm": 1.8357616333643774, + "learning_rate": 8.358300772152849e-08, + "loss": 0.4584103226661682, + "step": 7595 + }, + { + "epoch": 1.7514410883098916, + "grad_norm": 1.6545876792386258, + "learning_rate": 8.343048412247066e-08, + "loss": 0.4739362895488739, + "step": 7596 + }, + { + "epoch": 1.7516716624394744, + "grad_norm": 1.3684829539670578, + "learning_rate": 8.327809375608131e-08, + "loss": 0.3970356583595276, + "step": 7597 + }, + { + "epoch": 1.751902236569057, + "grad_norm": 1.390074068538192, + "learning_rate": 8.312583664451157e-08, + "loss": 0.4298238754272461, + "step": 7598 + }, + { + "epoch": 1.7521328106986396, + "grad_norm": 1.5218432452457022, + "learning_rate": 8.297371280989385e-08, + "loss": 0.4920361340045929, + "step": 7599 + }, + { + "epoch": 1.7523633848282223, + "grad_norm": 1.6001856104794878, + "learning_rate": 8.282172227434059e-08, + "loss": 0.5035870671272278, + "step": 7600 + }, + { + "epoch": 1.7525939589578048, + "grad_norm": 1.8053658495544915, + "learning_rate": 8.266986505994555e-08, + "loss": 0.373248815536499, + "step": 7601 + }, + { + "epoch": 1.7528245330873875, + "grad_norm": 2.0338367024251345, + "learning_rate": 8.25181411887822e-08, + "loss": 0.48491543531417847, + "step": 7602 + }, + { + "epoch": 1.7530551072169702, + "grad_norm": 1.6403088167242337, + "learning_rate": 8.236655068290554e-08, + "loss": 0.4298476576805115, + "step": 7603 + }, + { + "epoch": 1.753285681346553, + "grad_norm": 1.5503246605292686, + "learning_rate": 8.221509356435064e-08, + "loss": 0.48804932832717896, + "step": 7604 + }, + { + "epoch": 1.7535162554761357, + "grad_norm": 1.595278442494436, + "learning_rate": 8.206376985513353e-08, + "loss": 0.467857301235199, + "step": 7605 + }, + { + "epoch": 1.7537468296057184, + "grad_norm": 1.8978537163965867, + "learning_rate": 8.19125795772504e-08, + "loss": 0.48995548486709595, + "step": 7606 + }, + { + "epoch": 1.7539774037353009, + "grad_norm": 1.488521983097995, + "learning_rate": 8.176152275267823e-08, + "loss": 0.4459487795829773, + "step": 7607 + }, + { + "epoch": 1.7542079778648836, + "grad_norm": 1.4326042778667836, + "learning_rate": 8.1610599403375e-08, + "loss": 0.5054866671562195, + "step": 7608 + }, + { + "epoch": 1.754438551994466, + "grad_norm": 1.4563884146816763, + "learning_rate": 8.145980955127862e-08, + "loss": 0.46223869919776917, + "step": 7609 + }, + { + "epoch": 1.7546691261240488, + "grad_norm": 1.696768225081691, + "learning_rate": 8.1309153218308e-08, + "loss": 0.4743426442146301, + "step": 7610 + }, + { + "epoch": 1.7548997002536315, + "grad_norm": 1.7623915082520603, + "learning_rate": 8.115863042636262e-08, + "loss": 0.40808072686195374, + "step": 7611 + }, + { + "epoch": 1.7551302743832142, + "grad_norm": 1.3859431275297254, + "learning_rate": 8.100824119732263e-08, + "loss": 0.4452321231365204, + "step": 7612 + }, + { + "epoch": 1.755360848512797, + "grad_norm": 1.556764426976114, + "learning_rate": 8.085798555304824e-08, + "loss": 0.4211857318878174, + "step": 7613 + }, + { + "epoch": 1.7555914226423797, + "grad_norm": 1.5080375348033017, + "learning_rate": 8.070786351538117e-08, + "loss": 0.3356667757034302, + "step": 7614 + }, + { + "epoch": 1.7558219967719622, + "grad_norm": 1.7842469682737618, + "learning_rate": 8.055787510614287e-08, + "loss": 0.4636021852493286, + "step": 7615 + }, + { + "epoch": 1.7560525709015449, + "grad_norm": 1.624229543588168, + "learning_rate": 8.040802034713546e-08, + "loss": 0.4066168963909149, + "step": 7616 + }, + { + "epoch": 1.7562831450311274, + "grad_norm": 1.4896510438449921, + "learning_rate": 8.025829926014216e-08, + "loss": 0.426937460899353, + "step": 7617 + }, + { + "epoch": 1.75651371916071, + "grad_norm": 1.838065393231424, + "learning_rate": 8.010871186692625e-08, + "loss": 0.464493989944458, + "step": 7618 + }, + { + "epoch": 1.7567442932902928, + "grad_norm": 1.7522078931434732, + "learning_rate": 7.995925818923222e-08, + "loss": 0.44130605459213257, + "step": 7619 + }, + { + "epoch": 1.7569748674198755, + "grad_norm": 1.6877219329526134, + "learning_rate": 7.980993824878402e-08, + "loss": 0.5241909027099609, + "step": 7620 + }, + { + "epoch": 1.7572054415494582, + "grad_norm": 1.605603526262718, + "learning_rate": 7.96607520672874e-08, + "loss": 0.45450860261917114, + "step": 7621 + }, + { + "epoch": 1.757436015679041, + "grad_norm": 1.6393742771356723, + "learning_rate": 7.951169966642757e-08, + "loss": 0.443767786026001, + "step": 7622 + }, + { + "epoch": 1.7576665898086234, + "grad_norm": 1.5258486167332923, + "learning_rate": 7.936278106787131e-08, + "loss": 0.3951075077056885, + "step": 7623 + }, + { + "epoch": 1.7578971639382062, + "grad_norm": 1.8216713225734935, + "learning_rate": 7.921399629326509e-08, + "loss": 0.44628477096557617, + "step": 7624 + }, + { + "epoch": 1.7581277380677887, + "grad_norm": 1.7421703870668572, + "learning_rate": 7.906534536423648e-08, + "loss": 0.38743889331817627, + "step": 7625 + }, + { + "epoch": 1.7583583121973714, + "grad_norm": 1.4726686928375068, + "learning_rate": 7.891682830239311e-08, + "loss": 0.4338032007217407, + "step": 7626 + }, + { + "epoch": 1.758588886326954, + "grad_norm": 1.7605246972541082, + "learning_rate": 7.876844512932367e-08, + "loss": 0.47387874126434326, + "step": 7627 + }, + { + "epoch": 1.7588194604565368, + "grad_norm": 1.6222674378421518, + "learning_rate": 7.86201958665973e-08, + "loss": 0.4082717299461365, + "step": 7628 + }, + { + "epoch": 1.7590500345861195, + "grad_norm": 1.462169761343313, + "learning_rate": 7.847208053576326e-08, + "loss": 0.4254682958126068, + "step": 7629 + }, + { + "epoch": 1.7592806087157022, + "grad_norm": 1.319688989297758, + "learning_rate": 7.832409915835181e-08, + "loss": 0.3572045564651489, + "step": 7630 + }, + { + "epoch": 1.7595111828452847, + "grad_norm": 1.398732808330898, + "learning_rate": 7.817625175587328e-08, + "loss": 0.39110279083251953, + "step": 7631 + }, + { + "epoch": 1.7597417569748675, + "grad_norm": 2.455493892116574, + "learning_rate": 7.802853834981926e-08, + "loss": 0.49292176961898804, + "step": 7632 + }, + { + "epoch": 1.75997233110445, + "grad_norm": 1.460109162216243, + "learning_rate": 7.78809589616608e-08, + "loss": 0.4271275997161865, + "step": 7633 + }, + { + "epoch": 1.7602029052340327, + "grad_norm": 1.5973984242111468, + "learning_rate": 7.77335136128503e-08, + "loss": 0.470772922039032, + "step": 7634 + }, + { + "epoch": 1.7604334793636154, + "grad_norm": 1.5415713448452681, + "learning_rate": 7.758620232482083e-08, + "loss": 0.4872988760471344, + "step": 7635 + }, + { + "epoch": 1.760664053493198, + "grad_norm": 1.2959777480648245, + "learning_rate": 7.743902511898492e-08, + "loss": 0.4300990104675293, + "step": 7636 + }, + { + "epoch": 1.7608946276227808, + "grad_norm": 1.4331560277043864, + "learning_rate": 7.729198201673682e-08, + "loss": 0.4524795711040497, + "step": 7637 + }, + { + "epoch": 1.7611252017523635, + "grad_norm": 1.580884966063861, + "learning_rate": 7.714507303945028e-08, + "loss": 0.4673241376876831, + "step": 7638 + }, + { + "epoch": 1.761355775881946, + "grad_norm": 1.7656151539321776, + "learning_rate": 7.699829820848048e-08, + "loss": 0.5171443223953247, + "step": 7639 + }, + { + "epoch": 1.7615863500115287, + "grad_norm": 1.5721911288259287, + "learning_rate": 7.68516575451621e-08, + "loss": 0.44416171312332153, + "step": 7640 + }, + { + "epoch": 1.7618169241411112, + "grad_norm": 1.8596688405579505, + "learning_rate": 7.670515107081122e-08, + "loss": 0.4456225633621216, + "step": 7641 + }, + { + "epoch": 1.762047498270694, + "grad_norm": 1.427384194238264, + "learning_rate": 7.65587788067239e-08, + "loss": 0.5235984921455383, + "step": 7642 + }, + { + "epoch": 1.7622780724002767, + "grad_norm": 1.5098894741733768, + "learning_rate": 7.641254077417702e-08, + "loss": 0.4957311749458313, + "step": 7643 + }, + { + "epoch": 1.7625086465298594, + "grad_norm": 1.9524483698152115, + "learning_rate": 7.626643699442748e-08, + "loss": 0.48401015996932983, + "step": 7644 + }, + { + "epoch": 1.762739220659442, + "grad_norm": 1.5925905896008645, + "learning_rate": 7.612046748871326e-08, + "loss": 0.5440249443054199, + "step": 7645 + }, + { + "epoch": 1.7629697947890248, + "grad_norm": 1.5363697612706335, + "learning_rate": 7.597463227825229e-08, + "loss": 0.3922181725502014, + "step": 7646 + }, + { + "epoch": 1.7632003689186073, + "grad_norm": 1.7121602067196948, + "learning_rate": 7.582893138424318e-08, + "loss": 0.4679541289806366, + "step": 7647 + }, + { + "epoch": 1.76343094304819, + "grad_norm": 1.63738592997542, + "learning_rate": 7.568336482786508e-08, + "loss": 0.4461076557636261, + "step": 7648 + }, + { + "epoch": 1.7636615171777725, + "grad_norm": 1.769800706819883, + "learning_rate": 7.553793263027752e-08, + "loss": 0.4028201997280121, + "step": 7649 + }, + { + "epoch": 1.7638920913073552, + "grad_norm": 1.6924130336118084, + "learning_rate": 7.53926348126206e-08, + "loss": 0.47307640314102173, + "step": 7650 + }, + { + "epoch": 1.764122665436938, + "grad_norm": 1.7236868707009407, + "learning_rate": 7.524747139601473e-08, + "loss": 0.4763333201408386, + "step": 7651 + }, + { + "epoch": 1.7643532395665207, + "grad_norm": 1.5475351462285587, + "learning_rate": 7.510244240156127e-08, + "loss": 0.5062815546989441, + "step": 7652 + }, + { + "epoch": 1.7645838136961034, + "grad_norm": 1.4648234779945293, + "learning_rate": 7.495754785034114e-08, + "loss": 0.38344740867614746, + "step": 7653 + }, + { + "epoch": 1.7648143878256861, + "grad_norm": 1.5630602768230752, + "learning_rate": 7.48127877634166e-08, + "loss": 0.36255425214767456, + "step": 7654 + }, + { + "epoch": 1.7650449619552686, + "grad_norm": 1.4144647369682326, + "learning_rate": 7.466816216182969e-08, + "loss": 0.4136468172073364, + "step": 7655 + }, + { + "epoch": 1.7652755360848513, + "grad_norm": 1.5589028620208925, + "learning_rate": 7.452367106660351e-08, + "loss": 0.4294041395187378, + "step": 7656 + }, + { + "epoch": 1.7655061102144338, + "grad_norm": 1.5271012787948486, + "learning_rate": 7.437931449874101e-08, + "loss": 0.3865356147289276, + "step": 7657 + }, + { + "epoch": 1.7657366843440165, + "grad_norm": 1.5355711497321805, + "learning_rate": 7.42350924792261e-08, + "loss": 0.44538289308547974, + "step": 7658 + }, + { + "epoch": 1.7659672584735993, + "grad_norm": 1.6285566114230512, + "learning_rate": 7.409100502902299e-08, + "loss": 0.4943844676017761, + "step": 7659 + }, + { + "epoch": 1.766197832603182, + "grad_norm": 1.759721404059002, + "learning_rate": 7.394705216907582e-08, + "loss": 0.41705092787742615, + "step": 7660 + }, + { + "epoch": 1.7664284067327647, + "grad_norm": 1.4175389623557053, + "learning_rate": 7.380323392031018e-08, + "loss": 0.4304206967353821, + "step": 7661 + }, + { + "epoch": 1.7666589808623474, + "grad_norm": 1.3933381760031749, + "learning_rate": 7.365955030363102e-08, + "loss": 0.4830179214477539, + "step": 7662 + }, + { + "epoch": 1.76688955499193, + "grad_norm": 1.51616499834235, + "learning_rate": 7.351600133992452e-08, + "loss": 0.47749078273773193, + "step": 7663 + }, + { + "epoch": 1.7671201291215126, + "grad_norm": 1.4074934707168656, + "learning_rate": 7.337258705005667e-08, + "loss": 0.3899204730987549, + "step": 7664 + }, + { + "epoch": 1.7673507032510951, + "grad_norm": 1.4123867126002758, + "learning_rate": 7.322930745487443e-08, + "loss": 0.4621524214744568, + "step": 7665 + }, + { + "epoch": 1.7675812773806778, + "grad_norm": 1.725639837898645, + "learning_rate": 7.308616257520506e-08, + "loss": 0.5305047035217285, + "step": 7666 + }, + { + "epoch": 1.7678118515102605, + "grad_norm": 2.1356750734168646, + "learning_rate": 7.294315243185578e-08, + "loss": 0.5894631147384644, + "step": 7667 + }, + { + "epoch": 1.7680424256398433, + "grad_norm": 1.5389151696841823, + "learning_rate": 7.280027704561498e-08, + "loss": 0.38509970903396606, + "step": 7668 + }, + { + "epoch": 1.768272999769426, + "grad_norm": 1.7309245548099654, + "learning_rate": 7.265753643725048e-08, + "loss": 0.45494410395622253, + "step": 7669 + }, + { + "epoch": 1.7685035738990087, + "grad_norm": 1.7035489800713894, + "learning_rate": 7.251493062751169e-08, + "loss": 0.4819248914718628, + "step": 7670 + }, + { + "epoch": 1.7687341480285912, + "grad_norm": 1.4325571648838293, + "learning_rate": 7.237245963712724e-08, + "loss": 0.43286386132240295, + "step": 7671 + }, + { + "epoch": 1.768964722158174, + "grad_norm": 1.3036122364237743, + "learning_rate": 7.223012348680724e-08, + "loss": 0.4285479187965393, + "step": 7672 + }, + { + "epoch": 1.7691952962877564, + "grad_norm": 1.6598071005655777, + "learning_rate": 7.208792219724124e-08, + "loss": 0.42678505182266235, + "step": 7673 + }, + { + "epoch": 1.7694258704173391, + "grad_norm": 1.647090361621967, + "learning_rate": 7.194585578909995e-08, + "loss": 0.47091686725616455, + "step": 7674 + }, + { + "epoch": 1.7696564445469218, + "grad_norm": 1.5115484466399114, + "learning_rate": 7.180392428303394e-08, + "loss": 0.41932445764541626, + "step": 7675 + }, + { + "epoch": 1.7698870186765046, + "grad_norm": 1.2463006271885857, + "learning_rate": 7.166212769967483e-08, + "loss": 0.4043616056442261, + "step": 7676 + }, + { + "epoch": 1.7701175928060873, + "grad_norm": 1.5310666660883137, + "learning_rate": 7.15204660596338e-08, + "loss": 0.395826518535614, + "step": 7677 + }, + { + "epoch": 1.7703481669356698, + "grad_norm": 1.4874807127430703, + "learning_rate": 7.13789393835027e-08, + "loss": 0.4684498906135559, + "step": 7678 + }, + { + "epoch": 1.7705787410652525, + "grad_norm": 1.8560085011670902, + "learning_rate": 7.12375476918542e-08, + "loss": 0.4713285565376282, + "step": 7679 + }, + { + "epoch": 1.770809315194835, + "grad_norm": 1.487262641155755, + "learning_rate": 7.109629100524073e-08, + "loss": 0.47559499740600586, + "step": 7680 + }, + { + "epoch": 1.7710398893244177, + "grad_norm": 1.5741914036439861, + "learning_rate": 7.095516934419554e-08, + "loss": 0.5364210605621338, + "step": 7681 + }, + { + "epoch": 1.7712704634540004, + "grad_norm": 1.942648846069337, + "learning_rate": 7.081418272923212e-08, + "loss": 0.5731894969940186, + "step": 7682 + }, + { + "epoch": 1.7715010375835831, + "grad_norm": 1.7006107903804015, + "learning_rate": 7.067333118084428e-08, + "loss": 0.4287458062171936, + "step": 7683 + }, + { + "epoch": 1.7717316117131658, + "grad_norm": 1.5575643616743255, + "learning_rate": 7.053261471950612e-08, + "loss": 0.3849913775920868, + "step": 7684 + }, + { + "epoch": 1.7719621858427486, + "grad_norm": 1.4243498094919005, + "learning_rate": 7.039203336567245e-08, + "loss": 0.4933156371116638, + "step": 7685 + }, + { + "epoch": 1.772192759972331, + "grad_norm": 1.897795122632639, + "learning_rate": 7.025158713977808e-08, + "loss": 0.5185002088546753, + "step": 7686 + }, + { + "epoch": 1.7724233341019138, + "grad_norm": 1.634847266537775, + "learning_rate": 7.011127606223799e-08, + "loss": 0.514995276927948, + "step": 7687 + }, + { + "epoch": 1.7726539082314963, + "grad_norm": 1.5845868665458605, + "learning_rate": 6.99711001534481e-08, + "loss": 0.4362761676311493, + "step": 7688 + }, + { + "epoch": 1.772884482361079, + "grad_norm": 1.699858455397738, + "learning_rate": 6.983105943378431e-08, + "loss": 0.44117432832717896, + "step": 7689 + }, + { + "epoch": 1.7731150564906617, + "grad_norm": 1.5875521204144505, + "learning_rate": 6.969115392360325e-08, + "loss": 0.4940808415412903, + "step": 7690 + }, + { + "epoch": 1.7733456306202444, + "grad_norm": 1.9046624573594293, + "learning_rate": 6.955138364324109e-08, + "loss": 0.4322758913040161, + "step": 7691 + }, + { + "epoch": 1.7735762047498271, + "grad_norm": 1.467450936859881, + "learning_rate": 6.941174861301536e-08, + "loss": 0.3867933750152588, + "step": 7692 + }, + { + "epoch": 1.7738067788794099, + "grad_norm": 1.6321329987514115, + "learning_rate": 6.927224885322302e-08, + "loss": 0.4380000829696655, + "step": 7693 + }, + { + "epoch": 1.7740373530089923, + "grad_norm": 1.7183023620516549, + "learning_rate": 6.913288438414222e-08, + "loss": 0.46499723196029663, + "step": 7694 + }, + { + "epoch": 1.774267927138575, + "grad_norm": 1.6625572218896962, + "learning_rate": 6.89936552260304e-08, + "loss": 0.4845675230026245, + "step": 7695 + }, + { + "epoch": 1.7744985012681576, + "grad_norm": 1.3920222388819354, + "learning_rate": 6.88545613991266e-08, + "loss": 0.3755526542663574, + "step": 7696 + }, + { + "epoch": 1.7747290753977403, + "grad_norm": 1.358162383242242, + "learning_rate": 6.871560292364887e-08, + "loss": 0.4765484929084778, + "step": 7697 + }, + { + "epoch": 1.774959649527323, + "grad_norm": 1.5701618596645643, + "learning_rate": 6.857677981979659e-08, + "loss": 0.4176154136657715, + "step": 7698 + }, + { + "epoch": 1.7751902236569057, + "grad_norm": 1.5881043143352427, + "learning_rate": 6.84380921077492e-08, + "loss": 0.410483717918396, + "step": 7699 + }, + { + "epoch": 1.7754207977864884, + "grad_norm": 1.876508092569716, + "learning_rate": 6.829953980766612e-08, + "loss": 0.5188060998916626, + "step": 7700 + }, + { + "epoch": 1.7756513719160711, + "grad_norm": 1.5514145308665186, + "learning_rate": 6.816112293968745e-08, + "loss": 0.47039783000946045, + "step": 7701 + }, + { + "epoch": 1.7758819460456536, + "grad_norm": 1.6296649452825585, + "learning_rate": 6.802284152393345e-08, + "loss": 0.5367648601531982, + "step": 7702 + }, + { + "epoch": 1.7761125201752364, + "grad_norm": 1.55513001656084, + "learning_rate": 6.78846955805048e-08, + "loss": 0.500449538230896, + "step": 7703 + }, + { + "epoch": 1.7763430943048188, + "grad_norm": 1.5060722099238588, + "learning_rate": 6.774668512948234e-08, + "loss": 0.4579819440841675, + "step": 7704 + }, + { + "epoch": 1.7765736684344016, + "grad_norm": 1.7824280377613644, + "learning_rate": 6.760881019092712e-08, + "loss": 0.41459107398986816, + "step": 7705 + }, + { + "epoch": 1.7768042425639843, + "grad_norm": 1.7900526752813857, + "learning_rate": 6.747107078488112e-08, + "loss": 0.46020573377609253, + "step": 7706 + }, + { + "epoch": 1.777034816693567, + "grad_norm": 1.7709884076088374, + "learning_rate": 6.733346693136566e-08, + "loss": 0.48069459199905396, + "step": 7707 + }, + { + "epoch": 1.7772653908231497, + "grad_norm": 1.4499402707441236, + "learning_rate": 6.719599865038328e-08, + "loss": 0.3514458239078522, + "step": 7708 + }, + { + "epoch": 1.7774959649527324, + "grad_norm": 1.7044500533180955, + "learning_rate": 6.705866596191601e-08, + "loss": 0.4696041941642761, + "step": 7709 + }, + { + "epoch": 1.777726539082315, + "grad_norm": 1.6058185659780073, + "learning_rate": 6.692146888592675e-08, + "loss": 0.45286083221435547, + "step": 7710 + }, + { + "epoch": 1.7779571132118976, + "grad_norm": 1.8525271361461533, + "learning_rate": 6.678440744235848e-08, + "loss": 0.4659677743911743, + "step": 7711 + }, + { + "epoch": 1.7781876873414801, + "grad_norm": 1.5770202034991272, + "learning_rate": 6.664748165113432e-08, + "loss": 0.4030906558036804, + "step": 7712 + }, + { + "epoch": 1.7784182614710629, + "grad_norm": 1.4781448065809968, + "learning_rate": 6.651069153215804e-08, + "loss": 0.4878493547439575, + "step": 7713 + }, + { + "epoch": 1.7786488356006456, + "grad_norm": 2.5716911461046115, + "learning_rate": 6.637403710531352e-08, + "loss": 0.4651924669742584, + "step": 7714 + }, + { + "epoch": 1.7788794097302283, + "grad_norm": 1.5268258649377473, + "learning_rate": 6.623751839046455e-08, + "loss": 0.37795954942703247, + "step": 7715 + }, + { + "epoch": 1.779109983859811, + "grad_norm": 1.8617699048987524, + "learning_rate": 6.610113540745577e-08, + "loss": 0.5722923278808594, + "step": 7716 + }, + { + "epoch": 1.7793405579893937, + "grad_norm": 2.039919155814789, + "learning_rate": 6.59648881761118e-08, + "loss": 0.46933984756469727, + "step": 7717 + }, + { + "epoch": 1.7795711321189762, + "grad_norm": 1.7692714186594531, + "learning_rate": 6.582877671623732e-08, + "loss": 0.5066707134246826, + "step": 7718 + }, + { + "epoch": 1.779801706248559, + "grad_norm": 1.5518843020711044, + "learning_rate": 6.569280104761787e-08, + "loss": 0.5064150094985962, + "step": 7719 + }, + { + "epoch": 1.7800322803781414, + "grad_norm": 1.4858522723338492, + "learning_rate": 6.555696119001853e-08, + "loss": 0.408633828163147, + "step": 7720 + }, + { + "epoch": 1.7802628545077241, + "grad_norm": 1.9460802080180855, + "learning_rate": 6.542125716318514e-08, + "loss": 0.4960691034793854, + "step": 7721 + }, + { + "epoch": 1.7804934286373069, + "grad_norm": 1.609433139750494, + "learning_rate": 6.528568898684373e-08, + "loss": 0.4275667071342468, + "step": 7722 + }, + { + "epoch": 1.7807240027668896, + "grad_norm": 1.5242191505097453, + "learning_rate": 6.515025668070062e-08, + "loss": 0.5309962630271912, + "step": 7723 + }, + { + "epoch": 1.7809545768964723, + "grad_norm": 1.3218748644597216, + "learning_rate": 6.501496026444197e-08, + "loss": 0.42067253589630127, + "step": 7724 + }, + { + "epoch": 1.781185151026055, + "grad_norm": 1.5205678956011466, + "learning_rate": 6.487979975773484e-08, + "loss": 0.43419337272644043, + "step": 7725 + }, + { + "epoch": 1.7814157251556375, + "grad_norm": 1.728456021255068, + "learning_rate": 6.474477518022592e-08, + "loss": 0.46563541889190674, + "step": 7726 + }, + { + "epoch": 1.7816462992852202, + "grad_norm": 1.2994636821353438, + "learning_rate": 6.460988655154232e-08, + "loss": 0.4233010411262512, + "step": 7727 + }, + { + "epoch": 1.7818768734148027, + "grad_norm": 1.5541073736247684, + "learning_rate": 6.447513389129155e-08, + "loss": 0.47119754552841187, + "step": 7728 + }, + { + "epoch": 1.7821074475443854, + "grad_norm": 1.7457851161988949, + "learning_rate": 6.434051721906142e-08, + "loss": 0.5227707624435425, + "step": 7729 + }, + { + "epoch": 1.7823380216739682, + "grad_norm": 1.6453844551794445, + "learning_rate": 6.42060365544198e-08, + "loss": 0.4521239399909973, + "step": 7730 + }, + { + "epoch": 1.7825685958035509, + "grad_norm": 1.5739071323130231, + "learning_rate": 6.407169191691464e-08, + "loss": 0.36693084239959717, + "step": 7731 + }, + { + "epoch": 1.7827991699331336, + "grad_norm": 1.9032214424835083, + "learning_rate": 6.393748332607463e-08, + "loss": 0.43610745668411255, + "step": 7732 + }, + { + "epoch": 1.7830297440627163, + "grad_norm": 1.4784257370105836, + "learning_rate": 6.380341080140794e-08, + "loss": 0.4471576511859894, + "step": 7733 + }, + { + "epoch": 1.7832603181922988, + "grad_norm": 1.61284007349941, + "learning_rate": 6.366947436240367e-08, + "loss": 0.48119011521339417, + "step": 7734 + }, + { + "epoch": 1.7834908923218815, + "grad_norm": 1.4393647934894105, + "learning_rate": 6.353567402853055e-08, + "loss": 0.44503623247146606, + "step": 7735 + }, + { + "epoch": 1.783721466451464, + "grad_norm": 1.3430253886827939, + "learning_rate": 6.340200981923804e-08, + "loss": 0.3350965678691864, + "step": 7736 + }, + { + "epoch": 1.7839520405810467, + "grad_norm": 1.4031838686370632, + "learning_rate": 6.326848175395572e-08, + "loss": 0.4814649224281311, + "step": 7737 + }, + { + "epoch": 1.7841826147106294, + "grad_norm": 1.3042254858214102, + "learning_rate": 6.313508985209281e-08, + "loss": 0.42114442586898804, + "step": 7738 + }, + { + "epoch": 1.7844131888402122, + "grad_norm": 1.4924201661244643, + "learning_rate": 6.30018341330396e-08, + "loss": 0.5044004917144775, + "step": 7739 + }, + { + "epoch": 1.7846437629697949, + "grad_norm": 1.7211591431218773, + "learning_rate": 6.286871461616594e-08, + "loss": 0.46084678173065186, + "step": 7740 + }, + { + "epoch": 1.7848743370993776, + "grad_norm": 1.8074380950640034, + "learning_rate": 6.273573132082222e-08, + "loss": 0.5159536600112915, + "step": 7741 + }, + { + "epoch": 1.78510491122896, + "grad_norm": 2.6340339816007394, + "learning_rate": 6.260288426633875e-08, + "loss": 0.4394105076789856, + "step": 7742 + }, + { + "epoch": 1.7853354853585428, + "grad_norm": 1.415651636415873, + "learning_rate": 6.247017347202643e-08, + "loss": 0.39798909425735474, + "step": 7743 + }, + { + "epoch": 1.7855660594881253, + "grad_norm": 1.439083218855293, + "learning_rate": 6.23375989571756e-08, + "loss": 0.3865649104118347, + "step": 7744 + }, + { + "epoch": 1.785796633617708, + "grad_norm": 1.3172940172138528, + "learning_rate": 6.220516074105808e-08, + "loss": 0.3641304671764374, + "step": 7745 + }, + { + "epoch": 1.7860272077472907, + "grad_norm": 1.7148086023867872, + "learning_rate": 6.207285884292468e-08, + "loss": 0.5025773644447327, + "step": 7746 + }, + { + "epoch": 1.7862577818768735, + "grad_norm": 1.5237733931532715, + "learning_rate": 6.194069328200669e-08, + "loss": 0.4289078414440155, + "step": 7747 + }, + { + "epoch": 1.7864883560064562, + "grad_norm": 1.5368409458369108, + "learning_rate": 6.180866407751595e-08, + "loss": 0.37442147731781006, + "step": 7748 + }, + { + "epoch": 1.7867189301360389, + "grad_norm": 1.6962674881863276, + "learning_rate": 6.167677124864412e-08, + "loss": 0.4975471794605255, + "step": 7749 + }, + { + "epoch": 1.7869495042656214, + "grad_norm": 1.7290797112616507, + "learning_rate": 6.154501481456331e-08, + "loss": 0.42754751443862915, + "step": 7750 + }, + { + "epoch": 1.787180078395204, + "grad_norm": 1.508949301788889, + "learning_rate": 6.141339479442542e-08, + "loss": 0.40203964710235596, + "step": 7751 + }, + { + "epoch": 1.7874106525247866, + "grad_norm": 1.6453479393381845, + "learning_rate": 6.128191120736293e-08, + "loss": 0.46465349197387695, + "step": 7752 + }, + { + "epoch": 1.7876412266543693, + "grad_norm": 1.527112166022553, + "learning_rate": 6.11505640724882e-08, + "loss": 0.43915730714797974, + "step": 7753 + }, + { + "epoch": 1.787871800783952, + "grad_norm": 1.6855929805801586, + "learning_rate": 6.101935340889419e-08, + "loss": 0.5205652713775635, + "step": 7754 + }, + { + "epoch": 1.7881023749135347, + "grad_norm": 1.8024849017160496, + "learning_rate": 6.088827923565321e-08, + "loss": 0.39400190114974976, + "step": 7755 + }, + { + "epoch": 1.7883329490431175, + "grad_norm": 1.585632228373493, + "learning_rate": 6.075734157181855e-08, + "loss": 0.48021531105041504, + "step": 7756 + }, + { + "epoch": 1.7885635231727002, + "grad_norm": 1.313118747015303, + "learning_rate": 6.062654043642334e-08, + "loss": 0.42780327796936035, + "step": 7757 + }, + { + "epoch": 1.7887940973022827, + "grad_norm": 1.5444008946931698, + "learning_rate": 6.049587584848059e-08, + "loss": 0.4307866096496582, + "step": 7758 + }, + { + "epoch": 1.7890246714318654, + "grad_norm": 1.8803266889221286, + "learning_rate": 6.036534782698377e-08, + "loss": 0.4258533716201782, + "step": 7759 + }, + { + "epoch": 1.7892552455614479, + "grad_norm": 1.7033971690196206, + "learning_rate": 6.02349563909067e-08, + "loss": 0.5159060955047607, + "step": 7760 + }, + { + "epoch": 1.7894858196910306, + "grad_norm": 1.4016246032179807, + "learning_rate": 6.0104701559203e-08, + "loss": 0.4407171308994293, + "step": 7761 + }, + { + "epoch": 1.7897163938206133, + "grad_norm": 1.4060175796774192, + "learning_rate": 5.99745833508063e-08, + "loss": 0.40273964405059814, + "step": 7762 + }, + { + "epoch": 1.789946967950196, + "grad_norm": 1.5929040194351833, + "learning_rate": 5.984460178463102e-08, + "loss": 0.42018163204193115, + "step": 7763 + }, + { + "epoch": 1.7901775420797787, + "grad_norm": 1.5421517490968868, + "learning_rate": 5.971475687957084e-08, + "loss": 0.519807755947113, + "step": 7764 + }, + { + "epoch": 1.7904081162093615, + "grad_norm": 1.4320196013314206, + "learning_rate": 5.9585048654500535e-08, + "loss": 0.42557477951049805, + "step": 7765 + }, + { + "epoch": 1.790638690338944, + "grad_norm": 1.520426042431449, + "learning_rate": 5.9455477128273924e-08, + "loss": 0.39568305015563965, + "step": 7766 + }, + { + "epoch": 1.7908692644685267, + "grad_norm": 1.566797519717712, + "learning_rate": 5.932604231972593e-08, + "loss": 0.43125781416893005, + "step": 7767 + }, + { + "epoch": 1.7910998385981092, + "grad_norm": 1.5764190405770546, + "learning_rate": 5.919674424767129e-08, + "loss": 0.46194958686828613, + "step": 7768 + }, + { + "epoch": 1.791330412727692, + "grad_norm": 1.3811294262508054, + "learning_rate": 5.906758293090441e-08, + "loss": 0.40115779638290405, + "step": 7769 + }, + { + "epoch": 1.7915609868572746, + "grad_norm": 1.4511176958262644, + "learning_rate": 5.893855838820061e-08, + "loss": 0.46589648723602295, + "step": 7770 + }, + { + "epoch": 1.7917915609868573, + "grad_norm": 1.4613820552852321, + "learning_rate": 5.880967063831455e-08, + "loss": 0.3540228605270386, + "step": 7771 + }, + { + "epoch": 1.79202213511644, + "grad_norm": 1.3900736631273891, + "learning_rate": 5.868091969998168e-08, + "loss": 0.4324638545513153, + "step": 7772 + }, + { + "epoch": 1.7922527092460228, + "grad_norm": 1.426811730253004, + "learning_rate": 5.855230559191693e-08, + "loss": 0.4301075339317322, + "step": 7773 + }, + { + "epoch": 1.7924832833756053, + "grad_norm": 1.4903234676277026, + "learning_rate": 5.842382833281612e-08, + "loss": 0.4496096670627594, + "step": 7774 + }, + { + "epoch": 1.792713857505188, + "grad_norm": 1.7119132871592322, + "learning_rate": 5.8295487941354195e-08, + "loss": 0.4554907977581024, + "step": 7775 + }, + { + "epoch": 1.7929444316347705, + "grad_norm": 1.6357284914311145, + "learning_rate": 5.816728443618701e-08, + "loss": 0.5020148158073425, + "step": 7776 + }, + { + "epoch": 1.7931750057643532, + "grad_norm": 1.5886767874513543, + "learning_rate": 5.803921783595045e-08, + "loss": 0.4073353409767151, + "step": 7777 + }, + { + "epoch": 1.793405579893936, + "grad_norm": 1.7806143022342438, + "learning_rate": 5.791128815925983e-08, + "loss": 0.4995894432067871, + "step": 7778 + }, + { + "epoch": 1.7936361540235186, + "grad_norm": 1.4290018525481676, + "learning_rate": 5.778349542471139e-08, + "loss": 0.5383706092834473, + "step": 7779 + }, + { + "epoch": 1.7938667281531013, + "grad_norm": 1.5928372327878688, + "learning_rate": 5.765583965088083e-08, + "loss": 0.4206235408782959, + "step": 7780 + }, + { + "epoch": 1.794097302282684, + "grad_norm": 1.516533597399375, + "learning_rate": 5.752832085632453e-08, + "loss": 0.49053555727005005, + "step": 7781 + }, + { + "epoch": 1.7943278764122665, + "grad_norm": 1.4761016261714877, + "learning_rate": 5.740093905957832e-08, + "loss": 0.4372660517692566, + "step": 7782 + }, + { + "epoch": 1.7945584505418493, + "grad_norm": 1.364372499711938, + "learning_rate": 5.727369427915851e-08, + "loss": 0.40125733613967896, + "step": 7783 + }, + { + "epoch": 1.7947890246714318, + "grad_norm": 1.5421908029736124, + "learning_rate": 5.714658653356153e-08, + "loss": 0.3595162034034729, + "step": 7784 + }, + { + "epoch": 1.7950195988010145, + "grad_norm": 1.4909078230640012, + "learning_rate": 5.7019615841263915e-08, + "loss": 0.42618101835250854, + "step": 7785 + }, + { + "epoch": 1.7952501729305972, + "grad_norm": 1.2890347032019704, + "learning_rate": 5.6892782220721694e-08, + "loss": 0.39135509729385376, + "step": 7786 + }, + { + "epoch": 1.79548074706018, + "grad_norm": 1.2930421412734876, + "learning_rate": 5.6766085690372004e-08, + "loss": 0.3792929947376251, + "step": 7787 + }, + { + "epoch": 1.7957113211897626, + "grad_norm": 2.137954515105217, + "learning_rate": 5.6639526268631e-08, + "loss": 0.5193231105804443, + "step": 7788 + }, + { + "epoch": 1.7959418953193451, + "grad_norm": 1.3992061535387368, + "learning_rate": 5.6513103973895415e-08, + "loss": 0.3896862268447876, + "step": 7789 + }, + { + "epoch": 1.7961724694489278, + "grad_norm": 1.6107653457361368, + "learning_rate": 5.638681882454211e-08, + "loss": 0.5345273017883301, + "step": 7790 + }, + { + "epoch": 1.7964030435785103, + "grad_norm": 1.597285051654587, + "learning_rate": 5.626067083892794e-08, + "loss": 0.4297627806663513, + "step": 7791 + }, + { + "epoch": 1.796633617708093, + "grad_norm": 1.8890048408663909, + "learning_rate": 5.6134660035389914e-08, + "loss": 0.3176969587802887, + "step": 7792 + }, + { + "epoch": 1.7968641918376758, + "grad_norm": 1.684652354437091, + "learning_rate": 5.600878643224471e-08, + "loss": 0.5449323654174805, + "step": 7793 + }, + { + "epoch": 1.7970947659672585, + "grad_norm": 1.3924882582172304, + "learning_rate": 5.588305004778959e-08, + "loss": 0.38096293807029724, + "step": 7794 + }, + { + "epoch": 1.7973253400968412, + "grad_norm": 1.6284420500901806, + "learning_rate": 5.575745090030137e-08, + "loss": 0.3917475938796997, + "step": 7795 + }, + { + "epoch": 1.797555914226424, + "grad_norm": 1.8012275849309003, + "learning_rate": 5.563198900803734e-08, + "loss": 0.41522616147994995, + "step": 7796 + }, + { + "epoch": 1.7977864883560064, + "grad_norm": 1.4000666419018515, + "learning_rate": 5.550666438923468e-08, + "loss": 0.46558207273483276, + "step": 7797 + }, + { + "epoch": 1.7980170624855891, + "grad_norm": 1.4562091239424864, + "learning_rate": 5.538147706211038e-08, + "loss": 0.43256324529647827, + "step": 7798 + }, + { + "epoch": 1.7982476366151716, + "grad_norm": 1.5167378404298808, + "learning_rate": 5.5256427044861666e-08, + "loss": 0.37302178144454956, + "step": 7799 + }, + { + "epoch": 1.7984782107447543, + "grad_norm": 1.7103098772379584, + "learning_rate": 5.5131514355666095e-08, + "loss": 0.5247504711151123, + "step": 7800 + }, + { + "epoch": 1.798708784874337, + "grad_norm": 1.3345270008803303, + "learning_rate": 5.5006739012680934e-08, + "loss": 0.3906348943710327, + "step": 7801 + }, + { + "epoch": 1.7989393590039198, + "grad_norm": 1.863821074304618, + "learning_rate": 5.488210103404345e-08, + "loss": 0.5293325185775757, + "step": 7802 + }, + { + "epoch": 1.7991699331335025, + "grad_norm": 1.8021445170106478, + "learning_rate": 5.4757600437871146e-08, + "loss": 0.4189381003379822, + "step": 7803 + }, + { + "epoch": 1.7994005072630852, + "grad_norm": 1.4161978936431723, + "learning_rate": 5.4633237242261207e-08, + "loss": 0.40476128458976746, + "step": 7804 + }, + { + "epoch": 1.7996310813926677, + "grad_norm": 1.6288403815954717, + "learning_rate": 5.45090114652913e-08, + "loss": 0.3908376097679138, + "step": 7805 + }, + { + "epoch": 1.7998616555222504, + "grad_norm": 1.4731211435711635, + "learning_rate": 5.438492312501885e-08, + "loss": 0.42332786321640015, + "step": 7806 + }, + { + "epoch": 1.800092229651833, + "grad_norm": 1.2492034971721793, + "learning_rate": 5.426097223948123e-08, + "loss": 0.3398321866989136, + "step": 7807 + }, + { + "epoch": 1.8003228037814156, + "grad_norm": 1.410970674481118, + "learning_rate": 5.413715882669623e-08, + "loss": 0.4610673189163208, + "step": 7808 + }, + { + "epoch": 1.8005533779109983, + "grad_norm": 1.4416956666235687, + "learning_rate": 5.401348290466112e-08, + "loss": 0.4149124026298523, + "step": 7809 + }, + { + "epoch": 1.800783952040581, + "grad_norm": 1.4475278396115219, + "learning_rate": 5.388994449135376e-08, + "loss": 0.47464168071746826, + "step": 7810 + }, + { + "epoch": 1.8010145261701638, + "grad_norm": 1.4581354291230397, + "learning_rate": 5.376654360473121e-08, + "loss": 0.4530913829803467, + "step": 7811 + }, + { + "epoch": 1.8012451002997465, + "grad_norm": 1.7198902838066041, + "learning_rate": 5.364328026273157e-08, + "loss": 0.5577078461647034, + "step": 7812 + }, + { + "epoch": 1.801475674429329, + "grad_norm": 1.828526033611825, + "learning_rate": 5.3520154483272075e-08, + "loss": 0.4772539436817169, + "step": 7813 + }, + { + "epoch": 1.8017062485589117, + "grad_norm": 1.690066578469317, + "learning_rate": 5.339716628425039e-08, + "loss": 0.5387610197067261, + "step": 7814 + }, + { + "epoch": 1.8019368226884942, + "grad_norm": 1.7130913599502742, + "learning_rate": 5.327431568354401e-08, + "loss": 0.4505125880241394, + "step": 7815 + }, + { + "epoch": 1.802167396818077, + "grad_norm": 1.5145450098970203, + "learning_rate": 5.3151602699010867e-08, + "loss": 0.43021589517593384, + "step": 7816 + }, + { + "epoch": 1.8023979709476596, + "grad_norm": 1.6184493194868252, + "learning_rate": 5.3029027348488244e-08, + "loss": 0.44107457995414734, + "step": 7817 + }, + { + "epoch": 1.8026285450772424, + "grad_norm": 1.6224833006548345, + "learning_rate": 5.2906589649793666e-08, + "loss": 0.42265504598617554, + "step": 7818 + }, + { + "epoch": 1.802859119206825, + "grad_norm": 1.3828256021454344, + "learning_rate": 5.2784289620724895e-08, + "loss": 0.4814263582229614, + "step": 7819 + }, + { + "epoch": 1.8030896933364078, + "grad_norm": 1.3840958899744187, + "learning_rate": 5.2662127279059275e-08, + "loss": 0.4255106747150421, + "step": 7820 + }, + { + "epoch": 1.8033202674659903, + "grad_norm": 1.3789211684549096, + "learning_rate": 5.2540102642554593e-08, + "loss": 0.43405312299728394, + "step": 7821 + }, + { + "epoch": 1.803550841595573, + "grad_norm": 1.5062041567676776, + "learning_rate": 5.2418215728948004e-08, + "loss": 0.3986097574234009, + "step": 7822 + }, + { + "epoch": 1.8037814157251555, + "grad_norm": 1.7653469724585684, + "learning_rate": 5.2296466555957205e-08, + "loss": 0.4988093972206116, + "step": 7823 + }, + { + "epoch": 1.8040119898547382, + "grad_norm": 1.6382094442265007, + "learning_rate": 5.217485514127973e-08, + "loss": 0.5290527939796448, + "step": 7824 + }, + { + "epoch": 1.804242563984321, + "grad_norm": 1.4794199807921353, + "learning_rate": 5.205338150259308e-08, + "loss": 0.3705815076828003, + "step": 7825 + }, + { + "epoch": 1.8044731381139036, + "grad_norm": 1.3872232407887637, + "learning_rate": 5.193204565755449e-08, + "loss": 0.37735384702682495, + "step": 7826 + }, + { + "epoch": 1.8047037122434864, + "grad_norm": 1.38875357732027, + "learning_rate": 5.1810847623801504e-08, + "loss": 0.39033758640289307, + "step": 7827 + }, + { + "epoch": 1.804934286373069, + "grad_norm": 1.5105458662939806, + "learning_rate": 5.168978741895147e-08, + "loss": 0.4669237732887268, + "step": 7828 + }, + { + "epoch": 1.8051648605026516, + "grad_norm": 1.6910832171163468, + "learning_rate": 5.156886506060154e-08, + "loss": 0.5178482532501221, + "step": 7829 + }, + { + "epoch": 1.8053954346322343, + "grad_norm": 1.4473544670706617, + "learning_rate": 5.14480805663291e-08, + "loss": 0.44134122133255005, + "step": 7830 + }, + { + "epoch": 1.8056260087618168, + "grad_norm": 1.5836257156251672, + "learning_rate": 5.132743395369144e-08, + "loss": 0.44371920824050903, + "step": 7831 + }, + { + "epoch": 1.8058565828913995, + "grad_norm": 1.513244295553376, + "learning_rate": 5.1206925240225964e-08, + "loss": 0.43268662691116333, + "step": 7832 + }, + { + "epoch": 1.8060871570209822, + "grad_norm": 1.736730853895812, + "learning_rate": 5.1086554443449445e-08, + "loss": 0.5035665035247803, + "step": 7833 + }, + { + "epoch": 1.806317731150565, + "grad_norm": 1.3694047806165788, + "learning_rate": 5.0966321580859336e-08, + "loss": 0.4987141191959381, + "step": 7834 + }, + { + "epoch": 1.8065483052801476, + "grad_norm": 1.816085685560109, + "learning_rate": 5.0846226669932437e-08, + "loss": 0.5951617956161499, + "step": 7835 + }, + { + "epoch": 1.8067788794097304, + "grad_norm": 1.464038827862328, + "learning_rate": 5.072626972812599e-08, + "loss": 0.4710814654827118, + "step": 7836 + }, + { + "epoch": 1.8070094535393129, + "grad_norm": 1.6196482413694708, + "learning_rate": 5.060645077287662e-08, + "loss": 0.5173348188400269, + "step": 7837 + }, + { + "epoch": 1.8072400276688956, + "grad_norm": 1.4170272466334293, + "learning_rate": 5.048676982160161e-08, + "loss": 0.49508416652679443, + "step": 7838 + }, + { + "epoch": 1.807470601798478, + "grad_norm": 1.7639395740589152, + "learning_rate": 5.03672268916977e-08, + "loss": 0.4535290598869324, + "step": 7839 + }, + { + "epoch": 1.8077011759280608, + "grad_norm": 1.7696762607003815, + "learning_rate": 5.024782200054145e-08, + "loss": 0.5337553024291992, + "step": 7840 + }, + { + "epoch": 1.8079317500576435, + "grad_norm": 1.6346280356935987, + "learning_rate": 5.012855516548986e-08, + "loss": 0.47118210792541504, + "step": 7841 + }, + { + "epoch": 1.8081623241872262, + "grad_norm": 1.504680600844573, + "learning_rate": 5.0009426403879283e-08, + "loss": 0.4458848237991333, + "step": 7842 + }, + { + "epoch": 1.808392898316809, + "grad_norm": 1.5297682575974059, + "learning_rate": 4.9890435733026536e-08, + "loss": 0.5055558681488037, + "step": 7843 + }, + { + "epoch": 1.8086234724463917, + "grad_norm": 1.4365609441585347, + "learning_rate": 4.9771583170228006e-08, + "loss": 0.43715038895606995, + "step": 7844 + }, + { + "epoch": 1.8088540465759742, + "grad_norm": 1.545411862707653, + "learning_rate": 4.96528687327602e-08, + "loss": 0.427906334400177, + "step": 7845 + }, + { + "epoch": 1.8090846207055569, + "grad_norm": 1.6703597275780244, + "learning_rate": 4.953429243787932e-08, + "loss": 0.48160994052886963, + "step": 7846 + }, + { + "epoch": 1.8093151948351394, + "grad_norm": 1.3261658854233023, + "learning_rate": 4.941585430282158e-08, + "loss": 0.40856754779815674, + "step": 7847 + }, + { + "epoch": 1.809545768964722, + "grad_norm": 1.3569384823756985, + "learning_rate": 4.929755434480354e-08, + "loss": 0.40482330322265625, + "step": 7848 + }, + { + "epoch": 1.8097763430943048, + "grad_norm": 1.530544362283251, + "learning_rate": 4.9179392581021e-08, + "loss": 0.4286755323410034, + "step": 7849 + }, + { + "epoch": 1.8100069172238875, + "grad_norm": 1.5805205551700128, + "learning_rate": 4.906136902864999e-08, + "loss": 0.4436051547527313, + "step": 7850 + }, + { + "epoch": 1.8102374913534702, + "grad_norm": 1.5320309451669083, + "learning_rate": 4.8943483704846465e-08, + "loss": 0.41794437170028687, + "step": 7851 + }, + { + "epoch": 1.810468065483053, + "grad_norm": 1.4506407579843814, + "learning_rate": 4.8825736626746384e-08, + "loss": 0.4308912754058838, + "step": 7852 + }, + { + "epoch": 1.8106986396126354, + "grad_norm": 1.5274898640972132, + "learning_rate": 4.870812781146516e-08, + "loss": 0.43090081214904785, + "step": 7853 + }, + { + "epoch": 1.8109292137422182, + "grad_norm": 1.3117483081436436, + "learning_rate": 4.859065727609857e-08, + "loss": 0.4329320192337036, + "step": 7854 + }, + { + "epoch": 1.8111597878718007, + "grad_norm": 1.266199300666261, + "learning_rate": 4.8473325037722276e-08, + "loss": 0.3162953853607178, + "step": 7855 + }, + { + "epoch": 1.8113903620013834, + "grad_norm": 1.4534333887380995, + "learning_rate": 4.835613111339165e-08, + "loss": 0.37513065338134766, + "step": 7856 + }, + { + "epoch": 1.811620936130966, + "grad_norm": 1.494207838495638, + "learning_rate": 4.823907552014195e-08, + "loss": 0.4120938181877136, + "step": 7857 + }, + { + "epoch": 1.8118515102605488, + "grad_norm": 1.555741011782435, + "learning_rate": 4.8122158274988555e-08, + "loss": 0.4295421242713928, + "step": 7858 + }, + { + "epoch": 1.8120820843901315, + "grad_norm": 1.4697042695976983, + "learning_rate": 4.8005379394926435e-08, + "loss": 0.44738203287124634, + "step": 7859 + }, + { + "epoch": 1.8123126585197142, + "grad_norm": 1.7388489283467792, + "learning_rate": 4.7888738896930456e-08, + "loss": 0.447609007358551, + "step": 7860 + }, + { + "epoch": 1.8125432326492967, + "grad_norm": 1.6367328188270214, + "learning_rate": 4.777223679795561e-08, + "loss": 0.38288167119026184, + "step": 7861 + }, + { + "epoch": 1.8127738067788794, + "grad_norm": 1.5566909994885838, + "learning_rate": 4.765587311493668e-08, + "loss": 0.5003981590270996, + "step": 7862 + }, + { + "epoch": 1.813004380908462, + "grad_norm": 1.5140425774804767, + "learning_rate": 4.7539647864788476e-08, + "loss": 0.5244492888450623, + "step": 7863 + }, + { + "epoch": 1.8132349550380447, + "grad_norm": 1.4098788698269693, + "learning_rate": 4.742356106440526e-08, + "loss": 0.505184531211853, + "step": 7864 + }, + { + "epoch": 1.8134655291676274, + "grad_norm": 2.493869291024891, + "learning_rate": 4.7307612730661636e-08, + "loss": 0.5364291071891785, + "step": 7865 + }, + { + "epoch": 1.81369610329721, + "grad_norm": 1.5655893218937025, + "learning_rate": 4.719180288041158e-08, + "loss": 0.4370742738246918, + "step": 7866 + }, + { + "epoch": 1.8139266774267928, + "grad_norm": 1.3233268572547954, + "learning_rate": 4.7076131530489505e-08, + "loss": 0.37784355878829956, + "step": 7867 + }, + { + "epoch": 1.8141572515563755, + "grad_norm": 1.6040150628213576, + "learning_rate": 4.6960598697709294e-08, + "loss": 0.5184513330459595, + "step": 7868 + }, + { + "epoch": 1.814387825685958, + "grad_norm": 1.6174173359265467, + "learning_rate": 4.6845204398864743e-08, + "loss": 0.41221511363983154, + "step": 7869 + }, + { + "epoch": 1.8146183998155407, + "grad_norm": 1.960596641519608, + "learning_rate": 4.672994865072965e-08, + "loss": 0.43040651082992554, + "step": 7870 + }, + { + "epoch": 1.8148489739451232, + "grad_norm": 1.887961823292038, + "learning_rate": 4.6614831470057625e-08, + "loss": 0.4681999385356903, + "step": 7871 + }, + { + "epoch": 1.815079548074706, + "grad_norm": 1.5463001442495705, + "learning_rate": 4.649985287358227e-08, + "loss": 0.49752098321914673, + "step": 7872 + }, + { + "epoch": 1.8153101222042887, + "grad_norm": 1.4528059880154254, + "learning_rate": 4.6385012878016663e-08, + "loss": 0.4621706008911133, + "step": 7873 + }, + { + "epoch": 1.8155406963338714, + "grad_norm": 1.339046035541834, + "learning_rate": 4.627031150005401e-08, + "loss": 0.4359724521636963, + "step": 7874 + }, + { + "epoch": 1.815771270463454, + "grad_norm": 1.4288119410903932, + "learning_rate": 4.6155748756367294e-08, + "loss": 0.4901214838027954, + "step": 7875 + }, + { + "epoch": 1.8160018445930368, + "grad_norm": 1.7234395975437273, + "learning_rate": 4.604132466360955e-08, + "loss": 0.5012428760528564, + "step": 7876 + }, + { + "epoch": 1.8162324187226193, + "grad_norm": 1.6768636456338364, + "learning_rate": 4.592703923841323e-08, + "loss": 0.5048446655273438, + "step": 7877 + }, + { + "epoch": 1.816462992852202, + "grad_norm": 1.5761086054200695, + "learning_rate": 4.5812892497390955e-08, + "loss": 0.5025140047073364, + "step": 7878 + }, + { + "epoch": 1.8166935669817845, + "grad_norm": 1.5593886228823222, + "learning_rate": 4.5698884457135324e-08, + "loss": 0.4456709623336792, + "step": 7879 + }, + { + "epoch": 1.8169241411113672, + "grad_norm": 1.4583950124069596, + "learning_rate": 4.5585015134218196e-08, + "loss": 0.38283586502075195, + "step": 7880 + }, + { + "epoch": 1.81715471524095, + "grad_norm": 1.5479198908902716, + "learning_rate": 4.5471284545192004e-08, + "loss": 0.3458648920059204, + "step": 7881 + }, + { + "epoch": 1.8173852893705327, + "grad_norm": 1.7126815699296334, + "learning_rate": 4.53576927065884e-08, + "loss": 0.4609532952308655, + "step": 7882 + }, + { + "epoch": 1.8176158635001154, + "grad_norm": 1.238404719965568, + "learning_rate": 4.524423963491919e-08, + "loss": 0.4250793159008026, + "step": 7883 + }, + { + "epoch": 1.817846437629698, + "grad_norm": 1.7276559977997992, + "learning_rate": 4.513092534667584e-08, + "loss": 0.41343796253204346, + "step": 7884 + }, + { + "epoch": 1.8180770117592806, + "grad_norm": 1.5863495927207087, + "learning_rate": 4.5017749858329736e-08, + "loss": 0.46575528383255005, + "step": 7885 + }, + { + "epoch": 1.8183075858888633, + "grad_norm": 1.7387493602059383, + "learning_rate": 4.4904713186332156e-08, + "loss": 0.47052180767059326, + "step": 7886 + }, + { + "epoch": 1.8185381600184458, + "grad_norm": 1.4938009961123744, + "learning_rate": 4.479181534711429e-08, + "loss": 0.42979568243026733, + "step": 7887 + }, + { + "epoch": 1.8187687341480285, + "grad_norm": 1.4298617258142596, + "learning_rate": 4.46790563570868e-08, + "loss": 0.4278537929058075, + "step": 7888 + }, + { + "epoch": 1.8189993082776112, + "grad_norm": 1.6571154898401685, + "learning_rate": 4.456643623264022e-08, + "loss": 0.45380616188049316, + "step": 7889 + }, + { + "epoch": 1.819229882407194, + "grad_norm": 1.6141969165708208, + "learning_rate": 4.445395499014526e-08, + "loss": 0.46085125207901, + "step": 7890 + }, + { + "epoch": 1.8194604565367767, + "grad_norm": 1.7363894486391924, + "learning_rate": 4.434161264595204e-08, + "loss": 0.47558531165122986, + "step": 7891 + }, + { + "epoch": 1.8196910306663594, + "grad_norm": 1.552212209885486, + "learning_rate": 4.4229409216390845e-08, + "loss": 0.42082321643829346, + "step": 7892 + }, + { + "epoch": 1.819921604795942, + "grad_norm": 1.6844917452185877, + "learning_rate": 4.411734471777129e-08, + "loss": 0.40222978591918945, + "step": 7893 + }, + { + "epoch": 1.8201521789255246, + "grad_norm": 1.7385505168528088, + "learning_rate": 4.400541916638323e-08, + "loss": 0.39737701416015625, + "step": 7894 + }, + { + "epoch": 1.820382753055107, + "grad_norm": 1.6976347614290264, + "learning_rate": 4.389363257849632e-08, + "loss": 0.46538835763931274, + "step": 7895 + }, + { + "epoch": 1.8206133271846898, + "grad_norm": 2.034464057065236, + "learning_rate": 4.378198497035979e-08, + "loss": 0.4994567036628723, + "step": 7896 + }, + { + "epoch": 1.8208439013142725, + "grad_norm": 1.517699554285521, + "learning_rate": 4.367047635820264e-08, + "loss": 0.4574298858642578, + "step": 7897 + }, + { + "epoch": 1.8210744754438553, + "grad_norm": 1.7361916973448048, + "learning_rate": 4.3559106758234044e-08, + "loss": 0.4716116786003113, + "step": 7898 + }, + { + "epoch": 1.821305049573438, + "grad_norm": 1.7495776361282012, + "learning_rate": 4.344787618664247e-08, + "loss": 0.35549741983413696, + "step": 7899 + }, + { + "epoch": 1.8215356237030205, + "grad_norm": 1.673931935617008, + "learning_rate": 4.3336784659596226e-08, + "loss": 0.44955599308013916, + "step": 7900 + }, + { + "epoch": 1.8217661978326032, + "grad_norm": 1.2588104675314307, + "learning_rate": 4.322583219324394e-08, + "loss": 0.4047467112541199, + "step": 7901 + }, + { + "epoch": 1.8219967719621857, + "grad_norm": 1.3892625958432285, + "learning_rate": 4.3115018803713596e-08, + "loss": 0.40367889404296875, + "step": 7902 + }, + { + "epoch": 1.8222273460917684, + "grad_norm": 1.3189968956301878, + "learning_rate": 4.3004344507113096e-08, + "loss": 0.32705235481262207, + "step": 7903 + }, + { + "epoch": 1.8224579202213511, + "grad_norm": 1.3777118561947166, + "learning_rate": 4.2893809319529794e-08, + "loss": 0.3845488727092743, + "step": 7904 + }, + { + "epoch": 1.8226884943509338, + "grad_norm": 1.4977030222677208, + "learning_rate": 4.2783413257031495e-08, + "loss": 0.49070197343826294, + "step": 7905 + }, + { + "epoch": 1.8229190684805165, + "grad_norm": 1.729181630904155, + "learning_rate": 4.267315633566493e-08, + "loss": 0.550437867641449, + "step": 7906 + }, + { + "epoch": 1.8231496426100993, + "grad_norm": 1.6119404797366197, + "learning_rate": 4.25630385714576e-08, + "loss": 0.5042926073074341, + "step": 7907 + }, + { + "epoch": 1.8233802167396818, + "grad_norm": 1.5956788246532367, + "learning_rate": 4.245305998041571e-08, + "loss": 0.48839205503463745, + "step": 7908 + }, + { + "epoch": 1.8236107908692645, + "grad_norm": 1.6028821186444346, + "learning_rate": 4.234322057852602e-08, + "loss": 0.4754030108451843, + "step": 7909 + }, + { + "epoch": 1.823841364998847, + "grad_norm": 1.5406282114264656, + "learning_rate": 4.223352038175487e-08, + "loss": 0.394174188375473, + "step": 7910 + }, + { + "epoch": 1.8240719391284297, + "grad_norm": 1.3144512253416945, + "learning_rate": 4.2123959406048183e-08, + "loss": 0.39882469177246094, + "step": 7911 + }, + { + "epoch": 1.8243025132580124, + "grad_norm": 1.3036980510979261, + "learning_rate": 4.201453766733176e-08, + "loss": 0.4611927270889282, + "step": 7912 + }, + { + "epoch": 1.8245330873875951, + "grad_norm": 1.3717750651706109, + "learning_rate": 4.190525518151122e-08, + "loss": 0.4164184331893921, + "step": 7913 + }, + { + "epoch": 1.8247636615171778, + "grad_norm": 1.7048234275294294, + "learning_rate": 4.179611196447186e-08, + "loss": 0.41586828231811523, + "step": 7914 + }, + { + "epoch": 1.8249942356467606, + "grad_norm": 1.486464242852147, + "learning_rate": 4.168710803207864e-08, + "loss": 0.4707748591899872, + "step": 7915 + }, + { + "epoch": 1.825224809776343, + "grad_norm": 1.6925426332325308, + "learning_rate": 4.157824340017657e-08, + "loss": 0.4235571622848511, + "step": 7916 + }, + { + "epoch": 1.8254553839059258, + "grad_norm": 1.5746767320284107, + "learning_rate": 4.146951808458998e-08, + "loss": 0.3761681914329529, + "step": 7917 + }, + { + "epoch": 1.8256859580355083, + "grad_norm": 1.9541083814793623, + "learning_rate": 4.136093210112346e-08, + "loss": 0.45545494556427, + "step": 7918 + }, + { + "epoch": 1.825916532165091, + "grad_norm": 1.4946968371557119, + "learning_rate": 4.1252485465561035e-08, + "loss": 0.4154251515865326, + "step": 7919 + }, + { + "epoch": 1.8261471062946737, + "grad_norm": 1.4442817043721163, + "learning_rate": 4.114417819366633e-08, + "loss": 0.3664330244064331, + "step": 7920 + }, + { + "epoch": 1.8263776804242564, + "grad_norm": 1.4915985489350694, + "learning_rate": 4.10360103011832e-08, + "loss": 0.4527730643749237, + "step": 7921 + }, + { + "epoch": 1.8266082545538391, + "grad_norm": 1.6683615123339999, + "learning_rate": 4.092798180383461e-08, + "loss": 0.5245767831802368, + "step": 7922 + }, + { + "epoch": 1.8268388286834218, + "grad_norm": 1.6122193238326974, + "learning_rate": 4.0820092717323894e-08, + "loss": 0.39781343936920166, + "step": 7923 + }, + { + "epoch": 1.8270694028130043, + "grad_norm": 1.592304216861808, + "learning_rate": 4.071234305733362e-08, + "loss": 0.4173957109451294, + "step": 7924 + }, + { + "epoch": 1.827299976942587, + "grad_norm": 1.7592031102615102, + "learning_rate": 4.0604732839526256e-08, + "loss": 0.38840869069099426, + "step": 7925 + }, + { + "epoch": 1.8275305510721696, + "grad_norm": 1.777360398097105, + "learning_rate": 4.0497262079544294e-08, + "loss": 0.4107547998428345, + "step": 7926 + }, + { + "epoch": 1.8277611252017523, + "grad_norm": 1.5475583296259725, + "learning_rate": 4.038993079300956e-08, + "loss": 0.41102874279022217, + "step": 7927 + }, + { + "epoch": 1.827991699331335, + "grad_norm": 1.4229533643496446, + "learning_rate": 4.028273899552381e-08, + "loss": 0.3393939733505249, + "step": 7928 + }, + { + "epoch": 1.8282222734609177, + "grad_norm": 1.4844610719466356, + "learning_rate": 4.017568670266835e-08, + "loss": 0.42469024658203125, + "step": 7929 + }, + { + "epoch": 1.8284528475905004, + "grad_norm": 1.316542585504155, + "learning_rate": 4.006877393000441e-08, + "loss": 0.4869099259376526, + "step": 7930 + }, + { + "epoch": 1.8286834217200831, + "grad_norm": 1.3905230120628338, + "learning_rate": 3.996200069307265e-08, + "loss": 0.4463779926300049, + "step": 7931 + }, + { + "epoch": 1.8289139958496656, + "grad_norm": 1.908726864953878, + "learning_rate": 3.985536700739378e-08, + "loss": 0.429579496383667, + "step": 7932 + }, + { + "epoch": 1.8291445699792483, + "grad_norm": 1.555687929117211, + "learning_rate": 3.9748872888468065e-08, + "loss": 0.38837558031082153, + "step": 7933 + }, + { + "epoch": 1.8293751441088308, + "grad_norm": 1.467502995951613, + "learning_rate": 3.964251835177568e-08, + "loss": 0.4444499909877777, + "step": 7934 + }, + { + "epoch": 1.8296057182384136, + "grad_norm": 1.5836026531003116, + "learning_rate": 3.953630341277603e-08, + "loss": 0.5216259360313416, + "step": 7935 + }, + { + "epoch": 1.8298362923679963, + "grad_norm": 1.316614330242316, + "learning_rate": 3.943022808690888e-08, + "loss": 0.46454817056655884, + "step": 7936 + }, + { + "epoch": 1.830066866497579, + "grad_norm": 1.5390661326727673, + "learning_rate": 3.9324292389593005e-08, + "loss": 0.38960570096969604, + "step": 7937 + }, + { + "epoch": 1.8302974406271617, + "grad_norm": 1.2960127878271992, + "learning_rate": 3.9218496336227426e-08, + "loss": 0.3318006992340088, + "step": 7938 + }, + { + "epoch": 1.8305280147567444, + "grad_norm": 1.501585055160058, + "learning_rate": 3.9112839942190725e-08, + "loss": 0.41555076837539673, + "step": 7939 + }, + { + "epoch": 1.830758588886327, + "grad_norm": 1.4035625255113318, + "learning_rate": 3.900732322284095e-08, + "loss": 0.4296320080757141, + "step": 7940 + }, + { + "epoch": 1.8309891630159096, + "grad_norm": 1.6738155247978692, + "learning_rate": 3.8901946193516055e-08, + "loss": 0.4416658282279968, + "step": 7941 + }, + { + "epoch": 1.8312197371454921, + "grad_norm": 1.885789179393057, + "learning_rate": 3.8796708869533676e-08, + "loss": 0.4539029598236084, + "step": 7942 + }, + { + "epoch": 1.8314503112750748, + "grad_norm": 1.4867619575158202, + "learning_rate": 3.869161126619136e-08, + "loss": 0.4526992440223694, + "step": 7943 + }, + { + "epoch": 1.8316808854046576, + "grad_norm": 1.5927522884216676, + "learning_rate": 3.8586653398765766e-08, + "loss": 0.3991963863372803, + "step": 7944 + }, + { + "epoch": 1.8319114595342403, + "grad_norm": 1.4460483349984772, + "learning_rate": 3.848183528251381e-08, + "loss": 0.44474589824676514, + "step": 7945 + }, + { + "epoch": 1.832142033663823, + "grad_norm": 1.7969739964524274, + "learning_rate": 3.837715693267174e-08, + "loss": 0.5022028684616089, + "step": 7946 + }, + { + "epoch": 1.8323726077934057, + "grad_norm": 1.6274178723126447, + "learning_rate": 3.8272618364455836e-08, + "loss": 0.4839058518409729, + "step": 7947 + }, + { + "epoch": 1.8326031819229882, + "grad_norm": 1.7924980398771633, + "learning_rate": 3.8168219593061376e-08, + "loss": 0.3580874800682068, + "step": 7948 + }, + { + "epoch": 1.832833756052571, + "grad_norm": 1.6096517551702718, + "learning_rate": 3.806396063366424e-08, + "loss": 0.4350799024105072, + "step": 7949 + }, + { + "epoch": 1.8330643301821534, + "grad_norm": 1.3546161389632028, + "learning_rate": 3.79598415014194e-08, + "loss": 0.4386145770549774, + "step": 7950 + }, + { + "epoch": 1.8332949043117361, + "grad_norm": 1.4421267919386862, + "learning_rate": 3.785586221146142e-08, + "loss": 0.5122627019882202, + "step": 7951 + }, + { + "epoch": 1.8335254784413189, + "grad_norm": 1.3507016201924953, + "learning_rate": 3.77520227789051e-08, + "loss": 0.41197121143341064, + "step": 7952 + }, + { + "epoch": 1.8337560525709016, + "grad_norm": 1.7729553069577912, + "learning_rate": 3.764832321884426e-08, + "loss": 0.5508084297180176, + "step": 7953 + }, + { + "epoch": 1.8339866267004843, + "grad_norm": 1.3788371713361898, + "learning_rate": 3.754476354635283e-08, + "loss": 0.40791934728622437, + "step": 7954 + }, + { + "epoch": 1.834217200830067, + "grad_norm": 1.4693932480728087, + "learning_rate": 3.7441343776484113e-08, + "loss": 0.3880457878112793, + "step": 7955 + }, + { + "epoch": 1.8344477749596495, + "grad_norm": 1.4561569110121497, + "learning_rate": 3.7338063924271304e-08, + "loss": 0.40519118309020996, + "step": 7956 + }, + { + "epoch": 1.8346783490892322, + "grad_norm": 1.4799489730655653, + "learning_rate": 3.723492400472716e-08, + "loss": 0.46081095933914185, + "step": 7957 + }, + { + "epoch": 1.8349089232188147, + "grad_norm": 1.3167338346767847, + "learning_rate": 3.713192403284438e-08, + "loss": 0.3946321904659271, + "step": 7958 + }, + { + "epoch": 1.8351394973483974, + "grad_norm": 1.743632986191688, + "learning_rate": 3.702906402359474e-08, + "loss": 0.4699859023094177, + "step": 7959 + }, + { + "epoch": 1.8353700714779801, + "grad_norm": 1.4691817330554993, + "learning_rate": 3.692634399192995e-08, + "loss": 0.43031781911849976, + "step": 7960 + }, + { + "epoch": 1.8356006456075629, + "grad_norm": 1.5694622813964751, + "learning_rate": 3.6823763952781636e-08, + "loss": 0.4072418212890625, + "step": 7961 + }, + { + "epoch": 1.8358312197371456, + "grad_norm": 1.7009922761684866, + "learning_rate": 3.672132392106053e-08, + "loss": 0.40659528970718384, + "step": 7962 + }, + { + "epoch": 1.8360617938667283, + "grad_norm": 1.2845193385628964, + "learning_rate": 3.661902391165772e-08, + "loss": 0.41279205679893494, + "step": 7963 + }, + { + "epoch": 1.8362923679963108, + "grad_norm": 1.407521764327922, + "learning_rate": 3.65168639394432e-08, + "loss": 0.43887826800346375, + "step": 7964 + }, + { + "epoch": 1.8365229421258935, + "grad_norm": 1.585883988281566, + "learning_rate": 3.6414844019267196e-08, + "loss": 0.46111762523651123, + "step": 7965 + }, + { + "epoch": 1.836753516255476, + "grad_norm": 1.5089060420061358, + "learning_rate": 3.63129641659593e-08, + "loss": 0.42694801092147827, + "step": 7966 + }, + { + "epoch": 1.8369840903850587, + "grad_norm": 1.563222995065882, + "learning_rate": 3.6211224394328775e-08, + "loss": 0.4674855172634125, + "step": 7967 + }, + { + "epoch": 1.8372146645146414, + "grad_norm": 1.6612957725595774, + "learning_rate": 3.610962471916435e-08, + "loss": 0.48998844623565674, + "step": 7968 + }, + { + "epoch": 1.8374452386442242, + "grad_norm": 1.517118505836267, + "learning_rate": 3.600816515523486e-08, + "loss": 0.4162273406982422, + "step": 7969 + }, + { + "epoch": 1.8376758127738069, + "grad_norm": 1.6498845355681542, + "learning_rate": 3.5906845717288304e-08, + "loss": 0.4446166753768921, + "step": 7970 + }, + { + "epoch": 1.8379063869033896, + "grad_norm": 1.6723175784368125, + "learning_rate": 3.580566642005245e-08, + "loss": 0.4782527983188629, + "step": 7971 + }, + { + "epoch": 1.838136961032972, + "grad_norm": 1.667138689471541, + "learning_rate": 3.570462727823476e-08, + "loss": 0.43014609813690186, + "step": 7972 + }, + { + "epoch": 1.8383675351625548, + "grad_norm": 1.5808858327085533, + "learning_rate": 3.560372830652225e-08, + "loss": 0.5155357122421265, + "step": 7973 + }, + { + "epoch": 1.8385981092921373, + "grad_norm": 1.4181681095350445, + "learning_rate": 3.5502969519581984e-08, + "loss": 0.4231104254722595, + "step": 7974 + }, + { + "epoch": 1.83882868342172, + "grad_norm": 1.8426199170185766, + "learning_rate": 3.540235093205979e-08, + "loss": 0.529877245426178, + "step": 7975 + }, + { + "epoch": 1.8390592575513027, + "grad_norm": 1.5632800597633676, + "learning_rate": 3.530187255858186e-08, + "loss": 0.4841991662979126, + "step": 7976 + }, + { + "epoch": 1.8392898316808854, + "grad_norm": 1.5770240615602402, + "learning_rate": 3.520153441375362e-08, + "loss": 0.40202534198760986, + "step": 7977 + }, + { + "epoch": 1.8395204058104682, + "grad_norm": 1.4104759549786023, + "learning_rate": 3.51013365121603e-08, + "loss": 0.398551344871521, + "step": 7978 + }, + { + "epoch": 1.8397509799400509, + "grad_norm": 1.5102819529399165, + "learning_rate": 3.500127886836668e-08, + "loss": 0.49139225482940674, + "step": 7979 + }, + { + "epoch": 1.8399815540696334, + "grad_norm": 1.7659081046335245, + "learning_rate": 3.4901361496917135e-08, + "loss": 0.4708287715911865, + "step": 7980 + }, + { + "epoch": 1.840212128199216, + "grad_norm": 1.3491474153090526, + "learning_rate": 3.4801584412335714e-08, + "loss": 0.4174381494522095, + "step": 7981 + }, + { + "epoch": 1.8404427023287986, + "grad_norm": 1.6453019064878467, + "learning_rate": 3.470194762912593e-08, + "loss": 0.535778284072876, + "step": 7982 + }, + { + "epoch": 1.8406732764583813, + "grad_norm": 1.7228199406120377, + "learning_rate": 3.4602451161771186e-08, + "loss": 0.540034294128418, + "step": 7983 + }, + { + "epoch": 1.840903850587964, + "grad_norm": 1.794022377740068, + "learning_rate": 3.450309502473403e-08, + "loss": 0.4399121403694153, + "step": 7984 + }, + { + "epoch": 1.8411344247175467, + "grad_norm": 1.6932512977389786, + "learning_rate": 3.4403879232457134e-08, + "loss": 0.5011022686958313, + "step": 7985 + }, + { + "epoch": 1.8413649988471295, + "grad_norm": 1.580497796669037, + "learning_rate": 3.4304803799362405e-08, + "loss": 0.392477810382843, + "step": 7986 + }, + { + "epoch": 1.8415955729767122, + "grad_norm": 1.5439573803469637, + "learning_rate": 3.420586873985132e-08, + "loss": 0.4734686315059662, + "step": 7987 + }, + { + "epoch": 1.8418261471062947, + "grad_norm": 1.3285059669744466, + "learning_rate": 3.410707406830537e-08, + "loss": 0.37347573041915894, + "step": 7988 + }, + { + "epoch": 1.8420567212358774, + "grad_norm": 1.6328708193086845, + "learning_rate": 3.400841979908531e-08, + "loss": 0.38837599754333496, + "step": 7989 + }, + { + "epoch": 1.8422872953654599, + "grad_norm": 1.6277616294407593, + "learning_rate": 3.390990594653142e-08, + "loss": 0.38598424196243286, + "step": 7990 + }, + { + "epoch": 1.8425178694950426, + "grad_norm": 1.584379501910531, + "learning_rate": 3.381153252496371e-08, + "loss": 0.48508739471435547, + "step": 7991 + }, + { + "epoch": 1.8427484436246253, + "grad_norm": 1.609395355542375, + "learning_rate": 3.3713299548681736e-08, + "loss": 0.41946491599082947, + "step": 7992 + }, + { + "epoch": 1.842979017754208, + "grad_norm": 1.4959274640542461, + "learning_rate": 3.3615207031964744e-08, + "loss": 0.4803915023803711, + "step": 7993 + }, + { + "epoch": 1.8432095918837907, + "grad_norm": 1.3835076847275678, + "learning_rate": 3.351725498907143e-08, + "loss": 0.39463797211647034, + "step": 7994 + }, + { + "epoch": 1.8434401660133735, + "grad_norm": 1.5742658557245284, + "learning_rate": 3.341944343424008e-08, + "loss": 0.43345123529434204, + "step": 7995 + }, + { + "epoch": 1.843670740142956, + "grad_norm": 1.7826616989180466, + "learning_rate": 3.332177238168854e-08, + "loss": 0.5164570212364197, + "step": 7996 + }, + { + "epoch": 1.8439013142725387, + "grad_norm": 1.71354580792071, + "learning_rate": 3.322424184561445e-08, + "loss": 0.5313355922698975, + "step": 7997 + }, + { + "epoch": 1.8441318884021212, + "grad_norm": 1.901316143248936, + "learning_rate": 3.3126851840194815e-08, + "loss": 0.4488258361816406, + "step": 7998 + }, + { + "epoch": 1.8443624625317039, + "grad_norm": 1.479116299891256, + "learning_rate": 3.30296023795863e-08, + "loss": 0.5122581720352173, + "step": 7999 + }, + { + "epoch": 1.8445930366612866, + "grad_norm": 1.4735639536720297, + "learning_rate": 3.293249347792493e-08, + "loss": 0.4619610905647278, + "step": 8000 + }, + { + "epoch": 1.8448236107908693, + "grad_norm": 1.3540260330438945, + "learning_rate": 3.2835525149326636e-08, + "loss": 0.4214603006839752, + "step": 8001 + }, + { + "epoch": 1.845054184920452, + "grad_norm": 1.4074387483331625, + "learning_rate": 3.2738697407886485e-08, + "loss": 0.40279510617256165, + "step": 8002 + }, + { + "epoch": 1.8452847590500348, + "grad_norm": 1.4474967943141424, + "learning_rate": 3.264201026767977e-08, + "loss": 0.4797242283821106, + "step": 8003 + }, + { + "epoch": 1.8455153331796172, + "grad_norm": 1.3554973222515974, + "learning_rate": 3.254546374276057e-08, + "loss": 0.3833237588405609, + "step": 8004 + }, + { + "epoch": 1.8457459073092, + "grad_norm": 1.4594426546625732, + "learning_rate": 3.244905784716323e-08, + "loss": 0.41461342573165894, + "step": 8005 + }, + { + "epoch": 1.8459764814387825, + "grad_norm": 1.5177617199741877, + "learning_rate": 3.235279259490109e-08, + "loss": 0.592107892036438, + "step": 8006 + }, + { + "epoch": 1.8462070555683652, + "grad_norm": 1.684042887917187, + "learning_rate": 3.2256667999967405e-08, + "loss": 0.39025670289993286, + "step": 8007 + }, + { + "epoch": 1.846437629697948, + "grad_norm": 1.286539298720562, + "learning_rate": 3.2160684076334766e-08, + "loss": 0.40197378396987915, + "step": 8008 + }, + { + "epoch": 1.8466682038275306, + "grad_norm": 1.8155125046022762, + "learning_rate": 3.206484083795558e-08, + "loss": 0.4013815224170685, + "step": 8009 + }, + { + "epoch": 1.8468987779571133, + "grad_norm": 1.5762142363003944, + "learning_rate": 3.1969138298761356e-08, + "loss": 0.45386412739753723, + "step": 8010 + }, + { + "epoch": 1.8471293520866958, + "grad_norm": 1.8756892627173425, + "learning_rate": 3.187357647266353e-08, + "loss": 0.43034985661506653, + "step": 8011 + }, + { + "epoch": 1.8473599262162785, + "grad_norm": 1.6730495727197179, + "learning_rate": 3.177815537355322e-08, + "loss": 0.4346637725830078, + "step": 8012 + }, + { + "epoch": 1.847590500345861, + "grad_norm": 1.8461631710642654, + "learning_rate": 3.1682875015300535e-08, + "loss": 0.5203511118888855, + "step": 8013 + }, + { + "epoch": 1.8478210744754437, + "grad_norm": 1.5817324628827356, + "learning_rate": 3.1587735411755636e-08, + "loss": 0.37658393383026123, + "step": 8014 + }, + { + "epoch": 1.8480516486050265, + "grad_norm": 1.6304961028131815, + "learning_rate": 3.149273657674789e-08, + "loss": 0.5473518371582031, + "step": 8015 + }, + { + "epoch": 1.8482822227346092, + "grad_norm": 1.800633884327913, + "learning_rate": 3.1397878524086484e-08, + "loss": 0.5171597599983215, + "step": 8016 + }, + { + "epoch": 1.848512796864192, + "grad_norm": 1.585245081928725, + "learning_rate": 3.130316126755983e-08, + "loss": 0.46588706970214844, + "step": 8017 + }, + { + "epoch": 1.8487433709937746, + "grad_norm": 1.496582071882617, + "learning_rate": 3.1208584820936244e-08, + "loss": 0.5571366548538208, + "step": 8018 + }, + { + "epoch": 1.848973945123357, + "grad_norm": 1.5249372170069353, + "learning_rate": 3.111414919796318e-08, + "loss": 0.45803195238113403, + "step": 8019 + }, + { + "epoch": 1.8492045192529398, + "grad_norm": 1.4834943043987898, + "learning_rate": 3.1019854412367875e-08, + "loss": 0.4732629060745239, + "step": 8020 + }, + { + "epoch": 1.8494350933825223, + "grad_norm": 1.7625144420898597, + "learning_rate": 3.092570047785714e-08, + "loss": 0.5268767476081848, + "step": 8021 + }, + { + "epoch": 1.849665667512105, + "grad_norm": 1.5017810734056087, + "learning_rate": 3.0831687408117035e-08, + "loss": 0.5179537534713745, + "step": 8022 + }, + { + "epoch": 1.8498962416416878, + "grad_norm": 1.7406452748153565, + "learning_rate": 3.073781521681351e-08, + "loss": 0.5110389590263367, + "step": 8023 + }, + { + "epoch": 1.8501268157712705, + "grad_norm": 1.442631804804713, + "learning_rate": 3.064408391759154e-08, + "loss": 0.4078633189201355, + "step": 8024 + }, + { + "epoch": 1.8503573899008532, + "grad_norm": 1.6619024740283894, + "learning_rate": 3.055049352407624e-08, + "loss": 0.4632648229598999, + "step": 8025 + }, + { + "epoch": 1.850587964030436, + "grad_norm": 1.577432813868154, + "learning_rate": 3.0457044049871705e-08, + "loss": 0.41569265723228455, + "step": 8026 + }, + { + "epoch": 1.8508185381600184, + "grad_norm": 1.3795657287644, + "learning_rate": 3.036373550856186e-08, + "loss": 0.4105853736400604, + "step": 8027 + }, + { + "epoch": 1.8510491122896011, + "grad_norm": 1.6584799060214424, + "learning_rate": 3.027056791370996e-08, + "loss": 0.4415978789329529, + "step": 8028 + }, + { + "epoch": 1.8512796864191836, + "grad_norm": 1.571030596092026, + "learning_rate": 3.017754127885908e-08, + "loss": 0.3990614414215088, + "step": 8029 + }, + { + "epoch": 1.8515102605487663, + "grad_norm": 1.5323241652532567, + "learning_rate": 3.0084655617531376e-08, + "loss": 0.42349040508270264, + "step": 8030 + }, + { + "epoch": 1.851740834678349, + "grad_norm": 1.4436112405033301, + "learning_rate": 2.9991910943228725e-08, + "loss": 0.4687228798866272, + "step": 8031 + }, + { + "epoch": 1.8519714088079318, + "grad_norm": 1.91227305815919, + "learning_rate": 2.989930726943268e-08, + "loss": 0.6091229915618896, + "step": 8032 + }, + { + "epoch": 1.8522019829375145, + "grad_norm": 1.527659992048368, + "learning_rate": 2.980684460960381e-08, + "loss": 0.43401795625686646, + "step": 8033 + }, + { + "epoch": 1.8524325570670972, + "grad_norm": 1.521615388244922, + "learning_rate": 2.9714522977182688e-08, + "loss": 0.47280481457710266, + "step": 8034 + }, + { + "epoch": 1.8526631311966797, + "grad_norm": 1.6019291161476, + "learning_rate": 2.962234238558925e-08, + "loss": 0.5078729391098022, + "step": 8035 + }, + { + "epoch": 1.8528937053262624, + "grad_norm": 1.8353491661496104, + "learning_rate": 2.9530302848223e-08, + "loss": 0.4279085695743561, + "step": 8036 + }, + { + "epoch": 1.853124279455845, + "grad_norm": 1.4587208506754334, + "learning_rate": 2.9438404378462455e-08, + "loss": 0.3720093369483948, + "step": 8037 + }, + { + "epoch": 1.8533548535854276, + "grad_norm": 1.810026420285634, + "learning_rate": 2.934664698966627e-08, + "loss": 0.26778513193130493, + "step": 8038 + }, + { + "epoch": 1.8535854277150103, + "grad_norm": 1.569617242169025, + "learning_rate": 2.9255030695172324e-08, + "loss": 0.47606828808784485, + "step": 8039 + }, + { + "epoch": 1.853816001844593, + "grad_norm": 1.8330928647910023, + "learning_rate": 2.9163555508297632e-08, + "loss": 0.437153160572052, + "step": 8040 + }, + { + "epoch": 1.8540465759741758, + "grad_norm": 1.3219241142527494, + "learning_rate": 2.907222144233945e-08, + "loss": 0.408009797334671, + "step": 8041 + }, + { + "epoch": 1.8542771501037585, + "grad_norm": 1.3761080217774861, + "learning_rate": 2.8981028510573824e-08, + "loss": 0.3435688018798828, + "step": 8042 + }, + { + "epoch": 1.854507724233341, + "grad_norm": 1.881646492298394, + "learning_rate": 2.8889976726256705e-08, + "loss": 0.4829018712043762, + "step": 8043 + }, + { + "epoch": 1.8547382983629237, + "grad_norm": 1.5758694223281, + "learning_rate": 2.879906610262339e-08, + "loss": 0.44579288363456726, + "step": 8044 + }, + { + "epoch": 1.8549688724925062, + "grad_norm": 1.3922554430382053, + "learning_rate": 2.8708296652888764e-08, + "loss": 0.4952869415283203, + "step": 8045 + }, + { + "epoch": 1.855199446622089, + "grad_norm": 1.4450922871815606, + "learning_rate": 2.8617668390246818e-08, + "loss": 0.4870997965335846, + "step": 8046 + }, + { + "epoch": 1.8554300207516716, + "grad_norm": 1.5651252792966914, + "learning_rate": 2.8527181327871465e-08, + "loss": 0.5009135603904724, + "step": 8047 + }, + { + "epoch": 1.8556605948812543, + "grad_norm": 1.3977550991376733, + "learning_rate": 2.8436835478915954e-08, + "loss": 0.4837114214897156, + "step": 8048 + }, + { + "epoch": 1.855891169010837, + "grad_norm": 1.6474653449248091, + "learning_rate": 2.8346630856512897e-08, + "loss": 0.47955578565597534, + "step": 8049 + }, + { + "epoch": 1.8561217431404198, + "grad_norm": 1.705788106947518, + "learning_rate": 2.8256567473774363e-08, + "loss": 0.4882965385913849, + "step": 8050 + }, + { + "epoch": 1.8563523172700023, + "grad_norm": 1.5940097685845425, + "learning_rate": 2.8166645343792094e-08, + "loss": 0.4542367458343506, + "step": 8051 + }, + { + "epoch": 1.856582891399585, + "grad_norm": 1.5880265061576002, + "learning_rate": 2.8076864479637198e-08, + "loss": 0.4506416916847229, + "step": 8052 + }, + { + "epoch": 1.8568134655291675, + "grad_norm": 1.699970116686096, + "learning_rate": 2.798722489436012e-08, + "loss": 0.5043084025382996, + "step": 8053 + }, + { + "epoch": 1.8570440396587502, + "grad_norm": 1.397398070036947, + "learning_rate": 2.78977266009911e-08, + "loss": 0.3711032271385193, + "step": 8054 + }, + { + "epoch": 1.857274613788333, + "grad_norm": 1.3008294527362816, + "learning_rate": 2.7808369612539405e-08, + "loss": 0.33371198177337646, + "step": 8055 + }, + { + "epoch": 1.8575051879179156, + "grad_norm": 1.7364482681056421, + "learning_rate": 2.771915394199409e-08, + "loss": 0.5328178405761719, + "step": 8056 + }, + { + "epoch": 1.8577357620474984, + "grad_norm": 1.925308909381556, + "learning_rate": 2.7630079602323443e-08, + "loss": 0.4615975618362427, + "step": 8057 + }, + { + "epoch": 1.857966336177081, + "grad_norm": 1.506605490676224, + "learning_rate": 2.754114660647533e-08, + "loss": 0.4667460024356842, + "step": 8058 + }, + { + "epoch": 1.8581969103066636, + "grad_norm": 1.7246190337812906, + "learning_rate": 2.745235496737719e-08, + "loss": 0.483825147151947, + "step": 8059 + }, + { + "epoch": 1.8584274844362463, + "grad_norm": 1.7802094460466942, + "learning_rate": 2.736370469793592e-08, + "loss": 0.4376814365386963, + "step": 8060 + }, + { + "epoch": 1.8586580585658288, + "grad_norm": 1.4605341926622646, + "learning_rate": 2.7275195811037432e-08, + "loss": 0.4862465262413025, + "step": 8061 + }, + { + "epoch": 1.8588886326954115, + "grad_norm": 1.6497121576486102, + "learning_rate": 2.718682831954744e-08, + "loss": 0.48104172945022583, + "step": 8062 + }, + { + "epoch": 1.8591192068249942, + "grad_norm": 1.3643295104524422, + "learning_rate": 2.709860223631122e-08, + "loss": 0.43358030915260315, + "step": 8063 + }, + { + "epoch": 1.859349780954577, + "grad_norm": 1.3052220670178016, + "learning_rate": 2.701051757415307e-08, + "loss": 0.44614607095718384, + "step": 8064 + }, + { + "epoch": 1.8595803550841596, + "grad_norm": 1.8220525339474862, + "learning_rate": 2.6922574345877303e-08, + "loss": 0.49824249744415283, + "step": 8065 + }, + { + "epoch": 1.8598109292137424, + "grad_norm": 1.3314333068504594, + "learning_rate": 2.683477256426714e-08, + "loss": 0.39621901512145996, + "step": 8066 + }, + { + "epoch": 1.8600415033433249, + "grad_norm": 1.3391032368154236, + "learning_rate": 2.6747112242085478e-08, + "loss": 0.40166205167770386, + "step": 8067 + }, + { + "epoch": 1.8602720774729076, + "grad_norm": 1.720101921843303, + "learning_rate": 2.6659593392074575e-08, + "loss": 0.4249534606933594, + "step": 8068 + }, + { + "epoch": 1.86050265160249, + "grad_norm": 1.3203085704476971, + "learning_rate": 2.6572216026956473e-08, + "loss": 0.4015510678291321, + "step": 8069 + }, + { + "epoch": 1.8607332257320728, + "grad_norm": 1.8982655978960439, + "learning_rate": 2.6484980159432236e-08, + "loss": 0.4691264033317566, + "step": 8070 + }, + { + "epoch": 1.8609637998616555, + "grad_norm": 1.6363630573411998, + "learning_rate": 2.639788580218216e-08, + "loss": 0.5095053315162659, + "step": 8071 + }, + { + "epoch": 1.8611943739912382, + "grad_norm": 1.707433776183968, + "learning_rate": 2.6310932967866794e-08, + "loss": 0.4658794403076172, + "step": 8072 + }, + { + "epoch": 1.861424948120821, + "grad_norm": 1.7622547433521365, + "learning_rate": 2.622412166912513e-08, + "loss": 0.495827853679657, + "step": 8073 + }, + { + "epoch": 1.8616555222504036, + "grad_norm": 1.6584095706736666, + "learning_rate": 2.6137451918576413e-08, + "loss": 0.43652772903442383, + "step": 8074 + }, + { + "epoch": 1.8618860963799861, + "grad_norm": 1.410927084601702, + "learning_rate": 2.6050923728818784e-08, + "loss": 0.4636423587799072, + "step": 8075 + }, + { + "epoch": 1.8621166705095689, + "grad_norm": 1.6137478822178715, + "learning_rate": 2.5964537112430186e-08, + "loss": 0.4572441577911377, + "step": 8076 + }, + { + "epoch": 1.8623472446391514, + "grad_norm": 1.5268149737583054, + "learning_rate": 2.587829208196757e-08, + "loss": 0.4549320340156555, + "step": 8077 + }, + { + "epoch": 1.862577818768734, + "grad_norm": 1.4757300368438027, + "learning_rate": 2.5792188649967795e-08, + "loss": 0.46412795782089233, + "step": 8078 + }, + { + "epoch": 1.8628083928983168, + "grad_norm": 1.566100546942984, + "learning_rate": 2.570622682894652e-08, + "loss": 0.40059781074523926, + "step": 8079 + }, + { + "epoch": 1.8630389670278995, + "grad_norm": 1.8382248312833556, + "learning_rate": 2.5620406631399416e-08, + "loss": 0.5396246910095215, + "step": 8080 + }, + { + "epoch": 1.8632695411574822, + "grad_norm": 1.630240250521673, + "learning_rate": 2.553472806980128e-08, + "loss": 0.4793856143951416, + "step": 8081 + }, + { + "epoch": 1.863500115287065, + "grad_norm": 1.7081981493499068, + "learning_rate": 2.5449191156606264e-08, + "loss": 0.4428815543651581, + "step": 8082 + }, + { + "epoch": 1.8637306894166474, + "grad_norm": 1.3161952024113066, + "learning_rate": 2.5363795904248086e-08, + "loss": 0.4024256467819214, + "step": 8083 + }, + { + "epoch": 1.8639612635462302, + "grad_norm": 1.7334425937535092, + "learning_rate": 2.5278542325139818e-08, + "loss": 0.4868123531341553, + "step": 8084 + }, + { + "epoch": 1.8641918376758126, + "grad_norm": 1.8199560965911645, + "learning_rate": 2.519343043167399e-08, + "loss": 0.602108359336853, + "step": 8085 + }, + { + "epoch": 1.8644224118053954, + "grad_norm": 1.8527423308196338, + "learning_rate": 2.510846023622237e-08, + "loss": 0.4500008225440979, + "step": 8086 + }, + { + "epoch": 1.864652985934978, + "grad_norm": 1.4521386296534855, + "learning_rate": 2.502363175113642e-08, + "loss": 0.3894640803337097, + "step": 8087 + }, + { + "epoch": 1.8648835600645608, + "grad_norm": 1.471988486213167, + "learning_rate": 2.493894498874649e-08, + "loss": 0.4525550305843353, + "step": 8088 + }, + { + "epoch": 1.8651141341941435, + "grad_norm": 1.362693221908779, + "learning_rate": 2.485439996136296e-08, + "loss": 0.3908608555793762, + "step": 8089 + }, + { + "epoch": 1.8653447083237262, + "grad_norm": 1.5537540661666722, + "learning_rate": 2.4769996681275106e-08, + "loss": 0.4551984667778015, + "step": 8090 + }, + { + "epoch": 1.8655752824533087, + "grad_norm": 1.3331466559033927, + "learning_rate": 2.468573516075201e-08, + "loss": 0.34474045038223267, + "step": 8091 + }, + { + "epoch": 1.8658058565828914, + "grad_norm": 1.675344505563735, + "learning_rate": 2.4601615412041755e-08, + "loss": 0.41480594873428345, + "step": 8092 + }, + { + "epoch": 1.866036430712474, + "grad_norm": 1.6368782805002868, + "learning_rate": 2.4517637447372007e-08, + "loss": 0.5043104887008667, + "step": 8093 + }, + { + "epoch": 1.8662670048420567, + "grad_norm": 1.7139805676568358, + "learning_rate": 2.4433801278950007e-08, + "loss": 0.4467152953147888, + "step": 8094 + }, + { + "epoch": 1.8664975789716394, + "grad_norm": 1.5274424401661542, + "learning_rate": 2.4350106918962e-08, + "loss": 0.454445481300354, + "step": 8095 + }, + { + "epoch": 1.866728153101222, + "grad_norm": 1.5661075903861215, + "learning_rate": 2.426655437957392e-08, + "loss": 0.4639291763305664, + "step": 8096 + }, + { + "epoch": 1.8669587272308048, + "grad_norm": 1.6251687636184629, + "learning_rate": 2.418314367293084e-08, + "loss": 0.46178731322288513, + "step": 8097 + }, + { + "epoch": 1.8671893013603875, + "grad_norm": 1.5047265923361783, + "learning_rate": 2.4099874811157383e-08, + "loss": 0.43832290172576904, + "step": 8098 + }, + { + "epoch": 1.86741987548997, + "grad_norm": 1.569040322283118, + "learning_rate": 2.4016747806357652e-08, + "loss": 0.4586114287376404, + "step": 8099 + }, + { + "epoch": 1.8676504496195527, + "grad_norm": 1.403368540081911, + "learning_rate": 2.3933762670614978e-08, + "loss": 0.37975889444351196, + "step": 8100 + }, + { + "epoch": 1.8678810237491352, + "grad_norm": 1.6666819300781532, + "learning_rate": 2.3850919415992042e-08, + "loss": 0.4579748511314392, + "step": 8101 + }, + { + "epoch": 1.868111597878718, + "grad_norm": 1.5976733248377182, + "learning_rate": 2.3768218054530775e-08, + "loss": 0.5120238661766052, + "step": 8102 + }, + { + "epoch": 1.8683421720083007, + "grad_norm": 1.47865092584181, + "learning_rate": 2.3685658598253e-08, + "loss": 0.41514822840690613, + "step": 8103 + }, + { + "epoch": 1.8685727461378834, + "grad_norm": 1.6132937806442644, + "learning_rate": 2.360324105915934e-08, + "loss": 0.49480026960372925, + "step": 8104 + }, + { + "epoch": 1.868803320267466, + "grad_norm": 1.516759878457302, + "learning_rate": 2.352096544922999e-08, + "loss": 0.41115111112594604, + "step": 8105 + }, + { + "epoch": 1.8690338943970488, + "grad_norm": 1.8593225608723183, + "learning_rate": 2.3438831780424607e-08, + "loss": 0.44793501496315, + "step": 8106 + }, + { + "epoch": 1.8692644685266313, + "grad_norm": 2.087747863463927, + "learning_rate": 2.3356840064682305e-08, + "loss": 0.4197582006454468, + "step": 8107 + }, + { + "epoch": 1.869495042656214, + "grad_norm": 1.3708560469219937, + "learning_rate": 2.3274990313921218e-08, + "loss": 0.3654597997665405, + "step": 8108 + }, + { + "epoch": 1.8697256167857965, + "grad_norm": 1.6733057347639861, + "learning_rate": 2.319328254003927e-08, + "loss": 0.5105487704277039, + "step": 8109 + }, + { + "epoch": 1.8699561909153792, + "grad_norm": 1.6787548385436994, + "learning_rate": 2.3111716754913192e-08, + "loss": 0.5202287435531616, + "step": 8110 + }, + { + "epoch": 1.870186765044962, + "grad_norm": 1.5305524386936447, + "learning_rate": 2.303029297039949e-08, + "loss": 0.4475836753845215, + "step": 8111 + }, + { + "epoch": 1.8704173391745447, + "grad_norm": 1.579007380002247, + "learning_rate": 2.2949011198334144e-08, + "loss": 0.5010285973548889, + "step": 8112 + }, + { + "epoch": 1.8706479133041274, + "grad_norm": 1.4473541177707174, + "learning_rate": 2.286787145053204e-08, + "loss": 0.41949477791786194, + "step": 8113 + }, + { + "epoch": 1.87087848743371, + "grad_norm": 1.3276801089952157, + "learning_rate": 2.2786873738787738e-08, + "loss": 0.38505449891090393, + "step": 8114 + }, + { + "epoch": 1.8711090615632926, + "grad_norm": 1.8776948972547884, + "learning_rate": 2.2706018074875043e-08, + "loss": 0.4854990839958191, + "step": 8115 + }, + { + "epoch": 1.8713396356928753, + "grad_norm": 1.3982424394333428, + "learning_rate": 2.2625304470547336e-08, + "loss": 0.3846585154533386, + "step": 8116 + }, + { + "epoch": 1.8715702098224578, + "grad_norm": 1.7499321509858707, + "learning_rate": 2.2544732937537003e-08, + "loss": 0.48948657512664795, + "step": 8117 + }, + { + "epoch": 1.8718007839520405, + "grad_norm": 2.062408637955344, + "learning_rate": 2.2464303487555902e-08, + "loss": 0.5571197867393494, + "step": 8118 + }, + { + "epoch": 1.8720313580816232, + "grad_norm": 1.6301482456607912, + "learning_rate": 2.2384016132295345e-08, + "loss": 0.514819324016571, + "step": 8119 + }, + { + "epoch": 1.872261932211206, + "grad_norm": 1.5677432247071832, + "learning_rate": 2.230387088342589e-08, + "loss": 0.4411713182926178, + "step": 8120 + }, + { + "epoch": 1.8724925063407887, + "grad_norm": 1.4508146354194726, + "learning_rate": 2.2223867752597437e-08, + "loss": 0.4494340717792511, + "step": 8121 + }, + { + "epoch": 1.8727230804703712, + "grad_norm": 1.6205003929883524, + "learning_rate": 2.2144006751439236e-08, + "loss": 0.4186316132545471, + "step": 8122 + }, + { + "epoch": 1.8729536545999539, + "grad_norm": 1.5017815147990925, + "learning_rate": 2.2064287891560007e-08, + "loss": 0.45932692289352417, + "step": 8123 + }, + { + "epoch": 1.8731842287295364, + "grad_norm": 1.475598332139336, + "learning_rate": 2.1984711184547477e-08, + "loss": 0.4095005989074707, + "step": 8124 + }, + { + "epoch": 1.873414802859119, + "grad_norm": 1.4633944208901333, + "learning_rate": 2.1905276641969284e-08, + "loss": 0.3822292685508728, + "step": 8125 + }, + { + "epoch": 1.8736453769887018, + "grad_norm": 1.5993925787143786, + "learning_rate": 2.1825984275371633e-08, + "loss": 0.41837501525878906, + "step": 8126 + }, + { + "epoch": 1.8738759511182845, + "grad_norm": 1.6176173713553115, + "learning_rate": 2.1746834096280752e-08, + "loss": 0.3903341591358185, + "step": 8127 + }, + { + "epoch": 1.8741065252478672, + "grad_norm": 1.4079834631265329, + "learning_rate": 2.166782611620177e-08, + "loss": 0.4760533571243286, + "step": 8128 + }, + { + "epoch": 1.87433709937745, + "grad_norm": 1.4208864897990974, + "learning_rate": 2.1588960346619388e-08, + "loss": 0.43960827589035034, + "step": 8129 + }, + { + "epoch": 1.8745676735070325, + "grad_norm": 1.7654096006141957, + "learning_rate": 2.151023679899755e-08, + "loss": 0.47941142320632935, + "step": 8130 + }, + { + "epoch": 1.8747982476366152, + "grad_norm": 1.41048993466122, + "learning_rate": 2.143165548477943e-08, + "loss": 0.4467000961303711, + "step": 8131 + }, + { + "epoch": 1.8750288217661977, + "grad_norm": 1.4796609851220597, + "learning_rate": 2.1353216415387788e-08, + "loss": 0.42472416162490845, + "step": 8132 + }, + { + "epoch": 1.8752593958957804, + "grad_norm": 1.9200971165248846, + "learning_rate": 2.1274919602224273e-08, + "loss": 0.5127208232879639, + "step": 8133 + }, + { + "epoch": 1.875489970025363, + "grad_norm": 1.8325759046238386, + "learning_rate": 2.119676505667045e-08, + "loss": 0.5362575650215149, + "step": 8134 + }, + { + "epoch": 1.8757205441549458, + "grad_norm": 1.2983178226172876, + "learning_rate": 2.111875279008657e-08, + "loss": 0.4025413990020752, + "step": 8135 + }, + { + "epoch": 1.8759511182845285, + "grad_norm": 1.5647543555868217, + "learning_rate": 2.1040882813812667e-08, + "loss": 0.49126237630844116, + "step": 8136 + }, + { + "epoch": 1.8761816924141113, + "grad_norm": 1.64373423682739, + "learning_rate": 2.096315513916791e-08, + "loss": 0.40609198808670044, + "step": 8137 + }, + { + "epoch": 1.8764122665436938, + "grad_norm": 1.4881317882345182, + "learning_rate": 2.0885569777450707e-08, + "loss": 0.47826945781707764, + "step": 8138 + }, + { + "epoch": 1.8766428406732765, + "grad_norm": 1.4578062807690564, + "learning_rate": 2.0808126739939035e-08, + "loss": 0.39987948536872864, + "step": 8139 + }, + { + "epoch": 1.876873414802859, + "grad_norm": 1.6010627164873539, + "learning_rate": 2.0730826037890003e-08, + "loss": 0.5727471113204956, + "step": 8140 + }, + { + "epoch": 1.8771039889324417, + "grad_norm": 1.3737495035065335, + "learning_rate": 2.0653667682540066e-08, + "loss": 0.4772847294807434, + "step": 8141 + }, + { + "epoch": 1.8773345630620244, + "grad_norm": 1.54097710668183, + "learning_rate": 2.0576651685104697e-08, + "loss": 0.3258974552154541, + "step": 8142 + }, + { + "epoch": 1.8775651371916071, + "grad_norm": 1.4067173519179077, + "learning_rate": 2.049977805677938e-08, + "loss": 0.5220766067504883, + "step": 8143 + }, + { + "epoch": 1.8777957113211898, + "grad_norm": 1.2918102910413813, + "learning_rate": 2.0423046808738077e-08, + "loss": 0.39550334215164185, + "step": 8144 + }, + { + "epoch": 1.8780262854507725, + "grad_norm": 2.3983596335767334, + "learning_rate": 2.034645795213463e-08, + "loss": 0.4487137198448181, + "step": 8145 + }, + { + "epoch": 1.878256859580355, + "grad_norm": 1.3947776950768658, + "learning_rate": 2.0270011498102147e-08, + "loss": 0.3363339304924011, + "step": 8146 + }, + { + "epoch": 1.8784874337099378, + "grad_norm": 1.5333942075668883, + "learning_rate": 2.019370745775273e-08, + "loss": 0.5161975026130676, + "step": 8147 + }, + { + "epoch": 1.8787180078395203, + "grad_norm": 1.4587907721196531, + "learning_rate": 2.011754584217784e-08, + "loss": 0.359643816947937, + "step": 8148 + }, + { + "epoch": 1.878948581969103, + "grad_norm": 1.3696377552673178, + "learning_rate": 2.0041526662448625e-08, + "loss": 0.4472349286079407, + "step": 8149 + }, + { + "epoch": 1.8791791560986857, + "grad_norm": 1.6693442042315434, + "learning_rate": 1.9965649929615135e-08, + "loss": 0.40363550186157227, + "step": 8150 + }, + { + "epoch": 1.8794097302282684, + "grad_norm": 1.7598833036688746, + "learning_rate": 1.9889915654706656e-08, + "loss": 0.46063172817230225, + "step": 8151 + }, + { + "epoch": 1.8796403043578511, + "grad_norm": 1.6348416553504144, + "learning_rate": 1.981432384873205e-08, + "loss": 0.4478832483291626, + "step": 8152 + }, + { + "epoch": 1.8798708784874338, + "grad_norm": 1.7016857171242656, + "learning_rate": 1.9738874522679304e-08, + "loss": 0.3438538908958435, + "step": 8153 + }, + { + "epoch": 1.8801014526170163, + "grad_norm": 2.2031337611169435, + "learning_rate": 1.966356768751598e-08, + "loss": 0.6035101413726807, + "step": 8154 + }, + { + "epoch": 1.880332026746599, + "grad_norm": 1.6642481554824737, + "learning_rate": 1.958840335418832e-08, + "loss": 0.42533814907073975, + "step": 8155 + }, + { + "epoch": 1.8805626008761815, + "grad_norm": 1.5825430260849223, + "learning_rate": 1.9513381533622587e-08, + "loss": 0.4117417633533478, + "step": 8156 + }, + { + "epoch": 1.8807931750057643, + "grad_norm": 1.6218701576707837, + "learning_rate": 1.943850223672361e-08, + "loss": 0.4353973865509033, + "step": 8157 + }, + { + "epoch": 1.881023749135347, + "grad_norm": 1.5613174256794196, + "learning_rate": 1.9363765474376125e-08, + "loss": 0.46115410327911377, + "step": 8158 + }, + { + "epoch": 1.8812543232649297, + "grad_norm": 1.4415196194001674, + "learning_rate": 1.9289171257443782e-08, + "loss": 0.3851476311683655, + "step": 8159 + }, + { + "epoch": 1.8814848973945124, + "grad_norm": 1.5586436794771006, + "learning_rate": 1.921471959676957e-08, + "loss": 0.4786919355392456, + "step": 8160 + }, + { + "epoch": 1.8817154715240951, + "grad_norm": 1.6398537249529117, + "learning_rate": 1.914041050317583e-08, + "loss": 0.4427906274795532, + "step": 8161 + }, + { + "epoch": 1.8819460456536776, + "grad_norm": 1.495606046913042, + "learning_rate": 1.906624398746415e-08, + "loss": 0.37774696946144104, + "step": 8162 + }, + { + "epoch": 1.8821766197832603, + "grad_norm": 1.5733237369323263, + "learning_rate": 1.8992220060415343e-08, + "loss": 0.43793195486068726, + "step": 8163 + }, + { + "epoch": 1.8824071939128428, + "grad_norm": 1.2904039749569203, + "learning_rate": 1.8918338732789587e-08, + "loss": 0.3869394063949585, + "step": 8164 + }, + { + "epoch": 1.8826377680424256, + "grad_norm": 1.9325019962539283, + "learning_rate": 1.8844600015326283e-08, + "loss": 0.4963928461074829, + "step": 8165 + }, + { + "epoch": 1.8828683421720083, + "grad_norm": 1.5945637624217548, + "learning_rate": 1.8771003918743978e-08, + "loss": 0.45727187395095825, + "step": 8166 + }, + { + "epoch": 1.883098916301591, + "grad_norm": 1.8455372682093192, + "learning_rate": 1.8697550453740884e-08, + "loss": 0.4878919720649719, + "step": 8167 + }, + { + "epoch": 1.8833294904311737, + "grad_norm": 1.7826396913976752, + "learning_rate": 1.862423963099391e-08, + "loss": 0.5376998782157898, + "step": 8168 + }, + { + "epoch": 1.8835600645607564, + "grad_norm": 1.4765870494853872, + "learning_rate": 1.8551071461159638e-08, + "loss": 0.4534180760383606, + "step": 8169 + }, + { + "epoch": 1.883790638690339, + "grad_norm": 1.561114582514347, + "learning_rate": 1.847804595487379e-08, + "loss": 0.43389183282852173, + "step": 8170 + }, + { + "epoch": 1.8840212128199216, + "grad_norm": 1.535519375075225, + "learning_rate": 1.8405163122751532e-08, + "loss": 0.4833742678165436, + "step": 8171 + }, + { + "epoch": 1.8842517869495041, + "grad_norm": 1.622186588307033, + "learning_rate": 1.833242297538695e-08, + "loss": 0.49344220757484436, + "step": 8172 + }, + { + "epoch": 1.8844823610790868, + "grad_norm": 1.4984978840285303, + "learning_rate": 1.8259825523353478e-08, + "loss": 0.49290287494659424, + "step": 8173 + }, + { + "epoch": 1.8847129352086696, + "grad_norm": 1.3380486770022888, + "learning_rate": 1.8187370777204115e-08, + "loss": 0.3971661627292633, + "step": 8174 + }, + { + "epoch": 1.8849435093382523, + "grad_norm": 1.5640300636460862, + "learning_rate": 1.811505874747066e-08, + "loss": 0.4984559416770935, + "step": 8175 + }, + { + "epoch": 1.885174083467835, + "grad_norm": 1.5865101985098036, + "learning_rate": 1.804288944466459e-08, + "loss": 0.38448822498321533, + "step": 8176 + }, + { + "epoch": 1.8854046575974177, + "grad_norm": 1.9477188873182039, + "learning_rate": 1.7970862879276406e-08, + "loss": 0.5468838214874268, + "step": 8177 + }, + { + "epoch": 1.8856352317270002, + "grad_norm": 1.4768596083300787, + "learning_rate": 1.7898979061775844e-08, + "loss": 0.46132227778434753, + "step": 8178 + }, + { + "epoch": 1.885865805856583, + "grad_norm": 1.436520509516384, + "learning_rate": 1.782723800261199e-08, + "loss": 0.4636603593826294, + "step": 8179 + }, + { + "epoch": 1.8860963799861654, + "grad_norm": 1.5429934177783204, + "learning_rate": 1.7755639712213057e-08, + "loss": 0.5302075147628784, + "step": 8180 + }, + { + "epoch": 1.8863269541157481, + "grad_norm": 1.6563780466455296, + "learning_rate": 1.7684184200986718e-08, + "loss": 0.4817178249359131, + "step": 8181 + }, + { + "epoch": 1.8865575282453309, + "grad_norm": 1.4897334937072715, + "learning_rate": 1.7612871479319668e-08, + "loss": 0.4535263180732727, + "step": 8182 + }, + { + "epoch": 1.8867881023749136, + "grad_norm": 1.6029244875460678, + "learning_rate": 1.7541701557577837e-08, + "loss": 0.5260534286499023, + "step": 8183 + }, + { + "epoch": 1.8870186765044963, + "grad_norm": 1.4065276330082377, + "learning_rate": 1.7470674446106614e-08, + "loss": 0.4526366591453552, + "step": 8184 + }, + { + "epoch": 1.887249250634079, + "grad_norm": 1.663451618032215, + "learning_rate": 1.7399790155230632e-08, + "loss": 0.4721973240375519, + "step": 8185 + }, + { + "epoch": 1.8874798247636615, + "grad_norm": 1.6510288712519465, + "learning_rate": 1.7329048695253422e-08, + "loss": 0.4331268072128296, + "step": 8186 + }, + { + "epoch": 1.8877103988932442, + "grad_norm": 1.9623503418050199, + "learning_rate": 1.7258450076458097e-08, + "loss": 0.5175650119781494, + "step": 8187 + }, + { + "epoch": 1.8879409730228267, + "grad_norm": 1.3640756960267433, + "learning_rate": 1.718799430910678e-08, + "loss": 0.45537033677101135, + "step": 8188 + }, + { + "epoch": 1.8881715471524094, + "grad_norm": 1.540072753548263, + "learning_rate": 1.7117681403441054e-08, + "loss": 0.5055547952651978, + "step": 8189 + }, + { + "epoch": 1.8884021212819921, + "grad_norm": 1.5849214553434074, + "learning_rate": 1.7047511369681522e-08, + "loss": 0.45514553785324097, + "step": 8190 + }, + { + "epoch": 1.8886326954115749, + "grad_norm": 1.4821599822935887, + "learning_rate": 1.6977484218028136e-08, + "loss": 0.44227129220962524, + "step": 8191 + }, + { + "epoch": 1.8888632695411576, + "grad_norm": 1.7163429603820965, + "learning_rate": 1.690759995866009e-08, + "loss": 0.4916682839393616, + "step": 8192 + }, + { + "epoch": 1.8890938436707403, + "grad_norm": 1.8219225402151713, + "learning_rate": 1.683785860173559e-08, + "loss": 0.48626652359962463, + "step": 8193 + }, + { + "epoch": 1.8893244178003228, + "grad_norm": 1.491517373721971, + "learning_rate": 1.676826015739252e-08, + "loss": 0.39982378482818604, + "step": 8194 + }, + { + "epoch": 1.8895549919299055, + "grad_norm": 1.8710391095575285, + "learning_rate": 1.6698804635747576e-08, + "loss": 0.49218645691871643, + "step": 8195 + }, + { + "epoch": 1.889785566059488, + "grad_norm": 1.5127362254029266, + "learning_rate": 1.6629492046896897e-08, + "loss": 0.38896578550338745, + "step": 8196 + }, + { + "epoch": 1.8900161401890707, + "grad_norm": 1.5870268370960243, + "learning_rate": 1.6560322400915538e-08, + "loss": 0.4217762053012848, + "step": 8197 + }, + { + "epoch": 1.8902467143186534, + "grad_norm": 1.5231528042475502, + "learning_rate": 1.6491295707858343e-08, + "loss": 0.4020112156867981, + "step": 8198 + }, + { + "epoch": 1.8904772884482361, + "grad_norm": 2.1189678944561954, + "learning_rate": 1.6422411977758843e-08, + "loss": 0.4630794823169708, + "step": 8199 + }, + { + "epoch": 1.8907078625778189, + "grad_norm": 1.526138087578761, + "learning_rate": 1.6353671220629917e-08, + "loss": 0.3673272132873535, + "step": 8200 + }, + { + "epoch": 1.8909384367074016, + "grad_norm": 1.4930616058109705, + "learning_rate": 1.6285073446463903e-08, + "loss": 0.4677228331565857, + "step": 8201 + }, + { + "epoch": 1.891169010836984, + "grad_norm": 1.718939922651036, + "learning_rate": 1.621661866523216e-08, + "loss": 0.4532579183578491, + "step": 8202 + }, + { + "epoch": 1.8913995849665668, + "grad_norm": 1.4990742550855458, + "learning_rate": 1.6148306886885287e-08, + "loss": 0.3011256456375122, + "step": 8203 + }, + { + "epoch": 1.8916301590961493, + "grad_norm": 1.731114486954807, + "learning_rate": 1.6080138121352892e-08, + "loss": 0.43071651458740234, + "step": 8204 + }, + { + "epoch": 1.891860733225732, + "grad_norm": 1.4183554819693576, + "learning_rate": 1.6012112378544272e-08, + "loss": 0.3180675506591797, + "step": 8205 + }, + { + "epoch": 1.8920913073553147, + "grad_norm": 1.6038525214828652, + "learning_rate": 1.594422966834741e-08, + "loss": 0.35130774974823, + "step": 8206 + }, + { + "epoch": 1.8923218814848974, + "grad_norm": 1.388613528735296, + "learning_rate": 1.587649000062996e-08, + "loss": 0.4953269958496094, + "step": 8207 + }, + { + "epoch": 1.8925524556144802, + "grad_norm": 1.5668590048532676, + "learning_rate": 1.5808893385238388e-08, + "loss": 0.3713166415691376, + "step": 8208 + }, + { + "epoch": 1.8927830297440629, + "grad_norm": 1.4824855259294067, + "learning_rate": 1.5741439831998827e-08, + "loss": 0.4273546040058136, + "step": 8209 + }, + { + "epoch": 1.8930136038736454, + "grad_norm": 1.8212221910711959, + "learning_rate": 1.5674129350715994e-08, + "loss": 0.45312386751174927, + "step": 8210 + }, + { + "epoch": 1.893244178003228, + "grad_norm": 1.4687276423683582, + "learning_rate": 1.560696195117439e-08, + "loss": 0.40246695280075073, + "step": 8211 + }, + { + "epoch": 1.8934747521328106, + "grad_norm": 1.9323139227263069, + "learning_rate": 1.5539937643137325e-08, + "loss": 0.5229366421699524, + "step": 8212 + }, + { + "epoch": 1.8937053262623933, + "grad_norm": 1.4419033757005335, + "learning_rate": 1.5473056436347554e-08, + "loss": 0.43834251165390015, + "step": 8213 + }, + { + "epoch": 1.893935900391976, + "grad_norm": 1.5176292463299432, + "learning_rate": 1.540631834052697e-08, + "loss": 0.4423528015613556, + "step": 8214 + }, + { + "epoch": 1.8941664745215587, + "grad_norm": 1.6176606345399394, + "learning_rate": 1.5339723365376478e-08, + "loss": 0.49888452887535095, + "step": 8215 + }, + { + "epoch": 1.8943970486511414, + "grad_norm": 1.7422668701695732, + "learning_rate": 1.5273271520576448e-08, + "loss": 0.44023919105529785, + "step": 8216 + }, + { + "epoch": 1.8946276227807242, + "grad_norm": 1.5430241161700802, + "learning_rate": 1.5206962815786262e-08, + "loss": 0.4733201861381531, + "step": 8217 + }, + { + "epoch": 1.8948581969103067, + "grad_norm": 1.992567039765999, + "learning_rate": 1.5140797260644768e-08, + "loss": 0.5393285751342773, + "step": 8218 + }, + { + "epoch": 1.8950887710398894, + "grad_norm": 1.5439154792235448, + "learning_rate": 1.507477486476949e-08, + "loss": 0.4240071773529053, + "step": 8219 + }, + { + "epoch": 1.8953193451694719, + "grad_norm": 1.4272355688005478, + "learning_rate": 1.5008895637757647e-08, + "loss": 0.42983078956604004, + "step": 8220 + }, + { + "epoch": 1.8955499192990546, + "grad_norm": 1.470069283076572, + "learning_rate": 1.4943159589185462e-08, + "loss": 0.47513502836227417, + "step": 8221 + }, + { + "epoch": 1.8957804934286373, + "grad_norm": 1.49966428795426, + "learning_rate": 1.4877566728608293e-08, + "loss": 0.41938167810440063, + "step": 8222 + }, + { + "epoch": 1.89601106755822, + "grad_norm": 1.513306290399523, + "learning_rate": 1.4812117065560625e-08, + "loss": 0.44817137718200684, + "step": 8223 + }, + { + "epoch": 1.8962416416878027, + "grad_norm": 1.6563869108965783, + "learning_rate": 1.4746810609556292e-08, + "loss": 0.46840909123420715, + "step": 8224 + }, + { + "epoch": 1.8964722158173855, + "grad_norm": 1.4822882914533433, + "learning_rate": 1.4681647370088369e-08, + "loss": 0.377409964799881, + "step": 8225 + }, + { + "epoch": 1.896702789946968, + "grad_norm": 1.595495246407856, + "learning_rate": 1.4616627356628831e-08, + "loss": 0.41149425506591797, + "step": 8226 + }, + { + "epoch": 1.8969333640765507, + "grad_norm": 1.548113444870098, + "learning_rate": 1.455175057862923e-08, + "loss": 0.39183878898620605, + "step": 8227 + }, + { + "epoch": 1.8971639382061332, + "grad_norm": 1.3643453838150799, + "learning_rate": 1.448701704551969e-08, + "loss": 0.3629387617111206, + "step": 8228 + }, + { + "epoch": 1.8973945123357159, + "grad_norm": 1.6546771139251113, + "learning_rate": 1.4422426766710239e-08, + "loss": 0.4007713794708252, + "step": 8229 + }, + { + "epoch": 1.8976250864652986, + "grad_norm": 1.648419698601457, + "learning_rate": 1.4357979751589477e-08, + "loss": 0.42354586720466614, + "step": 8230 + }, + { + "epoch": 1.8978556605948813, + "grad_norm": 1.9683167812350795, + "learning_rate": 1.429367600952558e-08, + "loss": 0.5321829319000244, + "step": 8231 + }, + { + "epoch": 1.898086234724464, + "grad_norm": 1.5240649560541817, + "learning_rate": 1.4229515549865845e-08, + "loss": 0.4840988218784332, + "step": 8232 + }, + { + "epoch": 1.8983168088540465, + "grad_norm": 1.6587626955063286, + "learning_rate": 1.4165498381936369e-08, + "loss": 0.5006803870201111, + "step": 8233 + }, + { + "epoch": 1.8985473829836292, + "grad_norm": 1.855334923621547, + "learning_rate": 1.4101624515042821e-08, + "loss": 0.40582865476608276, + "step": 8234 + }, + { + "epoch": 1.8987779571132117, + "grad_norm": 1.6458084674224973, + "learning_rate": 1.4037893958469993e-08, + "loss": 0.38199514150619507, + "step": 8235 + }, + { + "epoch": 1.8990085312427945, + "grad_norm": 1.4513711417071327, + "learning_rate": 1.3974306721481699e-08, + "loss": 0.39234936237335205, + "step": 8236 + }, + { + "epoch": 1.8992391053723772, + "grad_norm": 1.661857153956049, + "learning_rate": 1.391086281332099e-08, + "loss": 0.42211759090423584, + "step": 8237 + }, + { + "epoch": 1.8994696795019599, + "grad_norm": 1.5171507269414566, + "learning_rate": 1.3847562243210043e-08, + "loss": 0.4519961476325989, + "step": 8238 + }, + { + "epoch": 1.8997002536315426, + "grad_norm": 1.618394005210342, + "learning_rate": 1.3784405020350276e-08, + "loss": 0.4795762896537781, + "step": 8239 + }, + { + "epoch": 1.8999308277611253, + "grad_norm": 1.5749927795923588, + "learning_rate": 1.3721391153922235e-08, + "loss": 0.4549542963504791, + "step": 8240 + }, + { + "epoch": 1.9001614018907078, + "grad_norm": 1.759482125374446, + "learning_rate": 1.3658520653085703e-08, + "loss": 0.5253233313560486, + "step": 8241 + }, + { + "epoch": 1.9003919760202905, + "grad_norm": 1.4274315163192688, + "learning_rate": 1.3595793526979371e-08, + "loss": 0.44850921630859375, + "step": 8242 + }, + { + "epoch": 1.900622550149873, + "grad_norm": 1.5448941620644567, + "learning_rate": 1.35332097847215e-08, + "loss": 0.4416281580924988, + "step": 8243 + }, + { + "epoch": 1.9008531242794557, + "grad_norm": 1.932595440608825, + "learning_rate": 1.3470769435409036e-08, + "loss": 0.5567417740821838, + "step": 8244 + }, + { + "epoch": 1.9010836984090385, + "grad_norm": 1.4810071060864598, + "learning_rate": 1.3408472488118383e-08, + "loss": 0.43554848432540894, + "step": 8245 + }, + { + "epoch": 1.9013142725386212, + "grad_norm": 1.6729713604736038, + "learning_rate": 1.3346318951905077e-08, + "loss": 0.4219995141029358, + "step": 8246 + }, + { + "epoch": 1.901544846668204, + "grad_norm": 1.5600368865419485, + "learning_rate": 1.328430883580367e-08, + "loss": 0.45862913131713867, + "step": 8247 + }, + { + "epoch": 1.9017754207977866, + "grad_norm": 1.5932092717655322, + "learning_rate": 1.3222442148828172e-08, + "loss": 0.5026064515113831, + "step": 8248 + }, + { + "epoch": 1.902005994927369, + "grad_norm": 1.6308659122795583, + "learning_rate": 1.316071889997139e-08, + "loss": 0.46948713064193726, + "step": 8249 + }, + { + "epoch": 1.9022365690569518, + "grad_norm": 1.5718314790268124, + "learning_rate": 1.3099139098205258e-08, + "loss": 0.4263686537742615, + "step": 8250 + }, + { + "epoch": 1.9024671431865343, + "grad_norm": 1.516002170215572, + "learning_rate": 1.3037702752481394e-08, + "loss": 0.4652191400527954, + "step": 8251 + }, + { + "epoch": 1.902697717316117, + "grad_norm": 1.553138573631746, + "learning_rate": 1.2976409871729987e-08, + "loss": 0.4918743371963501, + "step": 8252 + }, + { + "epoch": 1.9029282914456997, + "grad_norm": 1.4916920711393407, + "learning_rate": 1.2915260464860466e-08, + "loss": 0.5297696590423584, + "step": 8253 + }, + { + "epoch": 1.9031588655752825, + "grad_norm": 1.7049232652010609, + "learning_rate": 1.2854254540761722e-08, + "loss": 0.5320281982421875, + "step": 8254 + }, + { + "epoch": 1.9033894397048652, + "grad_norm": 1.6403951625522013, + "learning_rate": 1.2793392108301437e-08, + "loss": 0.4424601197242737, + "step": 8255 + }, + { + "epoch": 1.903620013834448, + "grad_norm": 1.7301429652605729, + "learning_rate": 1.2732673176326758e-08, + "loss": 0.4811365008354187, + "step": 8256 + }, + { + "epoch": 1.9038505879640304, + "grad_norm": 1.4707627617860477, + "learning_rate": 1.2672097753663624e-08, + "loss": 0.3744504451751709, + "step": 8257 + }, + { + "epoch": 1.904081162093613, + "grad_norm": 1.4178929694153364, + "learning_rate": 1.2611665849117326e-08, + "loss": 0.4703986644744873, + "step": 8258 + }, + { + "epoch": 1.9043117362231956, + "grad_norm": 1.7267205141598052, + "learning_rate": 1.255137747147228e-08, + "loss": 0.5431181192398071, + "step": 8259 + }, + { + "epoch": 1.9045423103527783, + "grad_norm": 1.8088892551764337, + "learning_rate": 1.2491232629492143e-08, + "loss": 0.5066450238227844, + "step": 8260 + }, + { + "epoch": 1.904772884482361, + "grad_norm": 1.4945728049455276, + "learning_rate": 1.2431231331919368e-08, + "loss": 0.4374620020389557, + "step": 8261 + }, + { + "epoch": 1.9050034586119438, + "grad_norm": 1.5574450804582989, + "learning_rate": 1.2371373587475753e-08, + "loss": 0.3628976345062256, + "step": 8262 + }, + { + "epoch": 1.9052340327415265, + "grad_norm": 1.6159357629155715, + "learning_rate": 1.231165940486234e-08, + "loss": 0.43471890687942505, + "step": 8263 + }, + { + "epoch": 1.9054646068711092, + "grad_norm": 1.4892272896008858, + "learning_rate": 1.2252088792759074e-08, + "loss": 0.5038785934448242, + "step": 8264 + }, + { + "epoch": 1.9056951810006917, + "grad_norm": 1.388813738509663, + "learning_rate": 1.2192661759825363e-08, + "loss": 0.44022035598754883, + "step": 8265 + }, + { + "epoch": 1.9059257551302744, + "grad_norm": 1.8473214990080156, + "learning_rate": 1.2133378314699294e-08, + "loss": 0.4924722909927368, + "step": 8266 + }, + { + "epoch": 1.906156329259857, + "grad_norm": 1.525292247487046, + "learning_rate": 1.2074238465998532e-08, + "loss": 0.3824247121810913, + "step": 8267 + }, + { + "epoch": 1.9063869033894396, + "grad_norm": 1.821466956277618, + "learning_rate": 1.2015242222319422e-08, + "loss": 0.47094473242759705, + "step": 8268 + }, + { + "epoch": 1.9066174775190223, + "grad_norm": 1.7313158547849, + "learning_rate": 1.1956389592237881e-08, + "loss": 0.5653735399246216, + "step": 8269 + }, + { + "epoch": 1.906848051648605, + "grad_norm": 1.7620428814203788, + "learning_rate": 1.1897680584308512e-08, + "loss": 0.4763476848602295, + "step": 8270 + }, + { + "epoch": 1.9070786257781878, + "grad_norm": 1.5194232107831984, + "learning_rate": 1.1839115207065487e-08, + "loss": 0.3845449686050415, + "step": 8271 + }, + { + "epoch": 1.9073091999077705, + "grad_norm": 1.5881713237890829, + "learning_rate": 1.1780693469021775e-08, + "loss": 0.43071988224983215, + "step": 8272 + }, + { + "epoch": 1.907539774037353, + "grad_norm": 1.4466344827167648, + "learning_rate": 1.172241537866947e-08, + "loss": 0.43860751390457153, + "step": 8273 + }, + { + "epoch": 1.9077703481669357, + "grad_norm": 1.7623171007667486, + "learning_rate": 1.1664280944480132e-08, + "loss": 0.5077678561210632, + "step": 8274 + }, + { + "epoch": 1.9080009222965182, + "grad_norm": 1.4297374268054954, + "learning_rate": 1.1606290174903888e-08, + "loss": 0.3832993805408478, + "step": 8275 + }, + { + "epoch": 1.908231496426101, + "grad_norm": 1.629527864713481, + "learning_rate": 1.1548443078370551e-08, + "loss": 0.48003530502319336, + "step": 8276 + }, + { + "epoch": 1.9084620705556836, + "grad_norm": 1.5503547776003848, + "learning_rate": 1.1490739663288618e-08, + "loss": 0.6109439134597778, + "step": 8277 + }, + { + "epoch": 1.9086926446852663, + "grad_norm": 1.9064677948637023, + "learning_rate": 1.1433179938045823e-08, + "loss": 0.4559859037399292, + "step": 8278 + }, + { + "epoch": 1.908923218814849, + "grad_norm": 1.4670877218502, + "learning_rate": 1.137576391100925e-08, + "loss": 0.3935600221157074, + "step": 8279 + }, + { + "epoch": 1.9091537929444318, + "grad_norm": 1.6460426557554972, + "learning_rate": 1.1318491590524782e-08, + "loss": 0.44477611780166626, + "step": 8280 + }, + { + "epoch": 1.9093843670740143, + "grad_norm": 1.652813391764361, + "learning_rate": 1.1261362984917533e-08, + "loss": 0.47065627574920654, + "step": 8281 + }, + { + "epoch": 1.909614941203597, + "grad_norm": 1.567401132156008, + "learning_rate": 1.1204378102491862e-08, + "loss": 0.44851434230804443, + "step": 8282 + }, + { + "epoch": 1.9098455153331795, + "grad_norm": 1.6119259284309502, + "learning_rate": 1.1147536951530923e-08, + "loss": 0.38606488704681396, + "step": 8283 + }, + { + "epoch": 1.9100760894627622, + "grad_norm": 1.7145601291142103, + "learning_rate": 1.1090839540297103e-08, + "loss": 0.5400182008743286, + "step": 8284 + }, + { + "epoch": 1.910306663592345, + "grad_norm": 1.5193110263706777, + "learning_rate": 1.1034285877032146e-08, + "loss": 0.4225059449672699, + "step": 8285 + }, + { + "epoch": 1.9105372377219276, + "grad_norm": 1.8787563951518915, + "learning_rate": 1.0977875969956584e-08, + "loss": 0.5111556649208069, + "step": 8286 + }, + { + "epoch": 1.9107678118515103, + "grad_norm": 1.583999151547768, + "learning_rate": 1.0921609827270196e-08, + "loss": 0.40596213936805725, + "step": 8287 + }, + { + "epoch": 1.910998385981093, + "grad_norm": 1.619272502884341, + "learning_rate": 1.0865487457151768e-08, + "loss": 0.47917360067367554, + "step": 8288 + }, + { + "epoch": 1.9112289601106756, + "grad_norm": 1.8556422558472565, + "learning_rate": 1.0809508867759331e-08, + "loss": 0.45154574513435364, + "step": 8289 + }, + { + "epoch": 1.9114595342402583, + "grad_norm": 1.7391028962680364, + "learning_rate": 1.0753674067229935e-08, + "loss": 0.5024373531341553, + "step": 8290 + }, + { + "epoch": 1.9116901083698408, + "grad_norm": 1.6003253992080113, + "learning_rate": 1.069798306367975e-08, + "loss": 0.5084686875343323, + "step": 8291 + }, + { + "epoch": 1.9119206824994235, + "grad_norm": 1.5906220140950642, + "learning_rate": 1.064243586520408e-08, + "loss": 0.3947920501232147, + "step": 8292 + }, + { + "epoch": 1.9121512566290062, + "grad_norm": 1.5037329879323602, + "learning_rate": 1.0587032479877023e-08, + "loss": 0.5011960864067078, + "step": 8293 + }, + { + "epoch": 1.912381830758589, + "grad_norm": 1.6116996984750152, + "learning_rate": 1.0531772915752247e-08, + "loss": 0.43622612953186035, + "step": 8294 + }, + { + "epoch": 1.9126124048881716, + "grad_norm": 1.664400790122745, + "learning_rate": 1.0476657180862325e-08, + "loss": 0.380764365196228, + "step": 8295 + }, + { + "epoch": 1.9128429790177544, + "grad_norm": 1.59176785573853, + "learning_rate": 1.042168528321874e-08, + "loss": 0.4183109700679779, + "step": 8296 + }, + { + "epoch": 1.9130735531473368, + "grad_norm": 1.7993335153125511, + "learning_rate": 1.036685723081221e-08, + "loss": 0.4221222698688507, + "step": 8297 + }, + { + "epoch": 1.9133041272769196, + "grad_norm": 1.7816315005923467, + "learning_rate": 1.0312173031612804e-08, + "loss": 0.543656051158905, + "step": 8298 + }, + { + "epoch": 1.913534701406502, + "grad_norm": 1.5681621709441897, + "learning_rate": 1.0257632693569052e-08, + "loss": 0.48872441053390503, + "step": 8299 + }, + { + "epoch": 1.9137652755360848, + "grad_norm": 1.5640812032082956, + "learning_rate": 1.0203236224609169e-08, + "loss": 0.5447995662689209, + "step": 8300 + }, + { + "epoch": 1.9139958496656675, + "grad_norm": 1.4954141524676323, + "learning_rate": 1.0148983632640162e-08, + "loss": 0.39448055624961853, + "step": 8301 + }, + { + "epoch": 1.9142264237952502, + "grad_norm": 1.755968676337724, + "learning_rate": 1.009487492554828e-08, + "loss": 0.44735193252563477, + "step": 8302 + }, + { + "epoch": 1.914456997924833, + "grad_norm": 1.6151813931913763, + "learning_rate": 1.0040910111198786e-08, + "loss": 0.4747859537601471, + "step": 8303 + }, + { + "epoch": 1.9146875720544156, + "grad_norm": 1.6130507888649155, + "learning_rate": 9.987089197435739e-09, + "loss": 0.5120220184326172, + "step": 8304 + }, + { + "epoch": 1.9149181461839981, + "grad_norm": 1.6267491510418168, + "learning_rate": 9.933412192082991e-09, + "loss": 0.3889455795288086, + "step": 8305 + }, + { + "epoch": 1.9151487203135809, + "grad_norm": 1.497355606160038, + "learning_rate": 9.879879102942635e-09, + "loss": 0.36584073305130005, + "step": 8306 + }, + { + "epoch": 1.9153792944431633, + "grad_norm": 2.0010610263228643, + "learning_rate": 9.826489937796556e-09, + "loss": 0.6259280443191528, + "step": 8307 + }, + { + "epoch": 1.915609868572746, + "grad_norm": 1.780257440356438, + "learning_rate": 9.773244704405104e-09, + "loss": 0.45160970091819763, + "step": 8308 + }, + { + "epoch": 1.9158404427023288, + "grad_norm": 1.559258218463348, + "learning_rate": 9.720143410508309e-09, + "loss": 0.47028589248657227, + "step": 8309 + }, + { + "epoch": 1.9160710168319115, + "grad_norm": 1.7146410364961069, + "learning_rate": 9.667186063824773e-09, + "loss": 0.3850802183151245, + "step": 8310 + }, + { + "epoch": 1.9163015909614942, + "grad_norm": 1.69252010891113, + "learning_rate": 9.614372672052451e-09, + "loss": 0.4134417772293091, + "step": 8311 + }, + { + "epoch": 1.916532165091077, + "grad_norm": 1.4197660481073355, + "learning_rate": 9.561703242868425e-09, + "loss": 0.5340328216552734, + "step": 8312 + }, + { + "epoch": 1.9167627392206594, + "grad_norm": 1.5089395557239718, + "learning_rate": 9.509177783928569e-09, + "loss": 0.4580942392349243, + "step": 8313 + }, + { + "epoch": 1.9169933133502421, + "grad_norm": 1.559427035261756, + "learning_rate": 9.45679630286811e-09, + "loss": 0.4227365553379059, + "step": 8314 + }, + { + "epoch": 1.9172238874798246, + "grad_norm": 1.462151537342571, + "learning_rate": 9.404558807301065e-09, + "loss": 0.42711400985717773, + "step": 8315 + }, + { + "epoch": 1.9174544616094074, + "grad_norm": 1.6466969798320865, + "learning_rate": 9.352465304820811e-09, + "loss": 0.41088467836380005, + "step": 8316 + }, + { + "epoch": 1.91768503573899, + "grad_norm": 1.7161905508950221, + "learning_rate": 9.30051580299962e-09, + "loss": 0.4669058918952942, + "step": 8317 + }, + { + "epoch": 1.9179156098685728, + "grad_norm": 1.8956617878589224, + "learning_rate": 9.248710309388896e-09, + "loss": 0.34129124879837036, + "step": 8318 + }, + { + "epoch": 1.9181461839981555, + "grad_norm": 1.6346151888813216, + "learning_rate": 9.19704883151906e-09, + "loss": 0.5538367033004761, + "step": 8319 + }, + { + "epoch": 1.9183767581277382, + "grad_norm": 1.8993289351204807, + "learning_rate": 9.145531376899773e-09, + "loss": 0.4591939151287079, + "step": 8320 + }, + { + "epoch": 1.9186073322573207, + "grad_norm": 1.531598340011727, + "learning_rate": 9.094157953019376e-09, + "loss": 0.38709723949432373, + "step": 8321 + }, + { + "epoch": 1.9188379063869034, + "grad_norm": 1.7947823187484588, + "learning_rate": 9.042928567345787e-09, + "loss": 0.503919780254364, + "step": 8322 + }, + { + "epoch": 1.919068480516486, + "grad_norm": 1.6367087262197295, + "learning_rate": 8.991843227325491e-09, + "loss": 0.510110080242157, + "step": 8323 + }, + { + "epoch": 1.9192990546460686, + "grad_norm": 1.6066272425773898, + "learning_rate": 8.940901940384437e-09, + "loss": 0.5100687146186829, + "step": 8324 + }, + { + "epoch": 1.9195296287756514, + "grad_norm": 1.513750458500578, + "learning_rate": 8.89010471392726e-09, + "loss": 0.44701308012008667, + "step": 8325 + }, + { + "epoch": 1.919760202905234, + "grad_norm": 1.563320875474341, + "learning_rate": 8.83945155533794e-09, + "loss": 0.4657078981399536, + "step": 8326 + }, + { + "epoch": 1.9199907770348168, + "grad_norm": 1.9297827676028427, + "learning_rate": 8.788942471979588e-09, + "loss": 0.510329008102417, + "step": 8327 + }, + { + "epoch": 1.9202213511643995, + "grad_norm": 1.471307451139604, + "learning_rate": 8.738577471193997e-09, + "loss": 0.5373008847236633, + "step": 8328 + }, + { + "epoch": 1.920451925293982, + "grad_norm": 1.9012550118721963, + "learning_rate": 8.688356560302313e-09, + "loss": 0.46517014503479004, + "step": 8329 + }, + { + "epoch": 1.9206824994235647, + "grad_norm": 1.6705233787528915, + "learning_rate": 8.638279746604582e-09, + "loss": 0.3993692398071289, + "step": 8330 + }, + { + "epoch": 1.9209130735531472, + "grad_norm": 1.366585505535673, + "learning_rate": 8.588347037380095e-09, + "loss": 0.42480504512786865, + "step": 8331 + }, + { + "epoch": 1.92114364768273, + "grad_norm": 1.7413386006663227, + "learning_rate": 8.538558439887044e-09, + "loss": 0.44433218240737915, + "step": 8332 + }, + { + "epoch": 1.9213742218123127, + "grad_norm": 1.59463524320548, + "learning_rate": 8.488913961362643e-09, + "loss": 0.4645090103149414, + "step": 8333 + }, + { + "epoch": 1.9216047959418954, + "grad_norm": 1.7690127959905497, + "learning_rate": 8.439413609023227e-09, + "loss": 0.47265806794166565, + "step": 8334 + }, + { + "epoch": 1.921835370071478, + "grad_norm": 1.6930025984848287, + "learning_rate": 8.390057390064265e-09, + "loss": 0.46389561891555786, + "step": 8335 + }, + { + "epoch": 1.9220659442010608, + "grad_norm": 1.8286869444988214, + "learning_rate": 8.340845311660127e-09, + "loss": 0.45355337858200073, + "step": 8336 + }, + { + "epoch": 1.9222965183306433, + "grad_norm": 1.6861508362464954, + "learning_rate": 8.291777380964315e-09, + "loss": 0.47136229276657104, + "step": 8337 + }, + { + "epoch": 1.922527092460226, + "grad_norm": 1.7162470073135112, + "learning_rate": 8.242853605109234e-09, + "loss": 0.4914461374282837, + "step": 8338 + }, + { + "epoch": 1.9227576665898085, + "grad_norm": 1.5896610300054894, + "learning_rate": 8.194073991206641e-09, + "loss": 0.48298412561416626, + "step": 8339 + }, + { + "epoch": 1.9229882407193912, + "grad_norm": 1.591559243664797, + "learning_rate": 8.145438546346971e-09, + "loss": 0.5316052436828613, + "step": 8340 + }, + { + "epoch": 1.923218814848974, + "grad_norm": 1.530763445371585, + "learning_rate": 8.09694727760002e-09, + "loss": 0.45742303133010864, + "step": 8341 + }, + { + "epoch": 1.9234493889785567, + "grad_norm": 1.800664891434664, + "learning_rate": 8.048600192014365e-09, + "loss": 0.41579365730285645, + "step": 8342 + }, + { + "epoch": 1.9236799631081394, + "grad_norm": 1.4284255731817002, + "learning_rate": 8.000397296617834e-09, + "loss": 0.37775835394859314, + "step": 8343 + }, + { + "epoch": 1.9239105372377219, + "grad_norm": 1.7051685129810905, + "learning_rate": 7.95233859841704e-09, + "loss": 0.4720783531665802, + "step": 8344 + }, + { + "epoch": 1.9241411113673046, + "grad_norm": 1.608380789109436, + "learning_rate": 7.904424104398067e-09, + "loss": 0.5015095472335815, + "step": 8345 + }, + { + "epoch": 1.924371685496887, + "grad_norm": 1.5886093342032406, + "learning_rate": 7.856653821525672e-09, + "loss": 0.6053783893585205, + "step": 8346 + }, + { + "epoch": 1.9246022596264698, + "grad_norm": 1.71106607476921, + "learning_rate": 7.809027756743635e-09, + "loss": 0.47775521874427795, + "step": 8347 + }, + { + "epoch": 1.9248328337560525, + "grad_norm": 1.559597916397487, + "learning_rate": 7.761545916974976e-09, + "loss": 0.36487245559692383, + "step": 8348 + }, + { + "epoch": 1.9250634078856352, + "grad_norm": 1.6596969619350017, + "learning_rate": 7.714208309121617e-09, + "loss": 0.48085975646972656, + "step": 8349 + }, + { + "epoch": 1.925293982015218, + "grad_norm": 1.6156245324091865, + "learning_rate": 7.667014940064609e-09, + "loss": 0.48800790309906006, + "step": 8350 + }, + { + "epoch": 1.9255245561448007, + "grad_norm": 1.654653168113963, + "learning_rate": 7.61996581666402e-09, + "loss": 0.5294181704521179, + "step": 8351 + }, + { + "epoch": 1.9257551302743832, + "grad_norm": 1.4725020612800932, + "learning_rate": 7.573060945758936e-09, + "loss": 0.44024503231048584, + "step": 8352 + }, + { + "epoch": 1.9259857044039659, + "grad_norm": 1.8377372608503795, + "learning_rate": 7.526300334167235e-09, + "loss": 0.4359186887741089, + "step": 8353 + }, + { + "epoch": 1.9262162785335484, + "grad_norm": 1.6594669465231893, + "learning_rate": 7.479683988686259e-09, + "loss": 0.4803895652294159, + "step": 8354 + }, + { + "epoch": 1.926446852663131, + "grad_norm": 1.5824042504509404, + "learning_rate": 7.433211916092141e-09, + "loss": 0.43153274059295654, + "step": 8355 + }, + { + "epoch": 1.9266774267927138, + "grad_norm": 1.812737055881384, + "learning_rate": 7.386884123140036e-09, + "loss": 0.38263070583343506, + "step": 8356 + }, + { + "epoch": 1.9269080009222965, + "grad_norm": 1.42789662226475, + "learning_rate": 7.340700616564e-09, + "loss": 0.42121192812919617, + "step": 8357 + }, + { + "epoch": 1.9271385750518792, + "grad_norm": 1.6902764865159838, + "learning_rate": 7.294661403077662e-09, + "loss": 0.46008965373039246, + "step": 8358 + }, + { + "epoch": 1.927369149181462, + "grad_norm": 1.5923895901686829, + "learning_rate": 7.248766489372893e-09, + "loss": 0.48495203256607056, + "step": 8359 + }, + { + "epoch": 1.9275997233110445, + "grad_norm": 1.6833123633851883, + "learning_rate": 7.203015882121244e-09, + "loss": 0.5004169940948486, + "step": 8360 + }, + { + "epoch": 1.9278302974406272, + "grad_norm": 1.4732497687996942, + "learning_rate": 7.15740958797284e-09, + "loss": 0.5660319328308105, + "step": 8361 + }, + { + "epoch": 1.9280608715702097, + "grad_norm": 1.588922332622674, + "learning_rate": 7.111947613557268e-09, + "loss": 0.43854010105133057, + "step": 8362 + }, + { + "epoch": 1.9282914456997924, + "grad_norm": 2.093362311602714, + "learning_rate": 7.066629965482574e-09, + "loss": 0.44730937480926514, + "step": 8363 + }, + { + "epoch": 1.928522019829375, + "grad_norm": 1.6568658526601971, + "learning_rate": 7.021456650336377e-09, + "loss": 0.45642590522766113, + "step": 8364 + }, + { + "epoch": 1.9287525939589578, + "grad_norm": 1.9173353497487595, + "learning_rate": 6.976427674684871e-09, + "loss": 0.5613523721694946, + "step": 8365 + }, + { + "epoch": 1.9289831680885405, + "grad_norm": 1.7976713831697748, + "learning_rate": 6.931543045073706e-09, + "loss": 0.4231454133987427, + "step": 8366 + }, + { + "epoch": 1.9292137422181233, + "grad_norm": 1.9184335289270926, + "learning_rate": 6.886802768027223e-09, + "loss": 0.464144766330719, + "step": 8367 + }, + { + "epoch": 1.9294443163477057, + "grad_norm": 1.6282751196601715, + "learning_rate": 6.8422068500487705e-09, + "loss": 0.4303344488143921, + "step": 8368 + }, + { + "epoch": 1.9296748904772885, + "grad_norm": 1.5717538042291814, + "learning_rate": 6.797755297620944e-09, + "loss": 0.4333549737930298, + "step": 8369 + }, + { + "epoch": 1.929905464606871, + "grad_norm": 1.5673646456508366, + "learning_rate": 6.753448117205241e-09, + "loss": 0.4656146466732025, + "step": 8370 + }, + { + "epoch": 1.9301360387364537, + "grad_norm": 2.0556236314521077, + "learning_rate": 6.709285315242063e-09, + "loss": 0.3823866844177246, + "step": 8371 + }, + { + "epoch": 1.9303666128660364, + "grad_norm": 1.5412445917312292, + "learning_rate": 6.665266898150946e-09, + "loss": 0.4552363157272339, + "step": 8372 + }, + { + "epoch": 1.930597186995619, + "grad_norm": 1.5304233694461045, + "learning_rate": 6.6213928723304335e-09, + "loss": 0.48757460713386536, + "step": 8373 + }, + { + "epoch": 1.9308277611252018, + "grad_norm": 1.0877844091844102, + "learning_rate": 6.577663244158094e-09, + "loss": 0.3263235092163086, + "step": 8374 + }, + { + "epoch": 1.9310583352547845, + "grad_norm": 1.6065207890727204, + "learning_rate": 6.534078019990397e-09, + "loss": 0.510450541973114, + "step": 8375 + }, + { + "epoch": 1.931288909384367, + "grad_norm": 1.4737968731950963, + "learning_rate": 6.490637206162941e-09, + "loss": 0.37407904863357544, + "step": 8376 + }, + { + "epoch": 1.9315194835139498, + "grad_norm": 1.5691906942234775, + "learning_rate": 6.4473408089902315e-09, + "loss": 0.4216376543045044, + "step": 8377 + }, + { + "epoch": 1.9317500576435322, + "grad_norm": 1.647678033925203, + "learning_rate": 6.404188834766011e-09, + "loss": 0.41611379384994507, + "step": 8378 + }, + { + "epoch": 1.931980631773115, + "grad_norm": 1.6406917387427478, + "learning_rate": 6.361181289762596e-09, + "loss": 0.5301774740219116, + "step": 8379 + }, + { + "epoch": 1.9322112059026977, + "grad_norm": 1.457780743812755, + "learning_rate": 6.3183181802317635e-09, + "loss": 0.43767407536506653, + "step": 8380 + }, + { + "epoch": 1.9324417800322804, + "grad_norm": 1.5497586314138279, + "learning_rate": 6.275599512404084e-09, + "loss": 0.417082279920578, + "step": 8381 + }, + { + "epoch": 1.9326723541618631, + "grad_norm": 1.646560289289956, + "learning_rate": 6.233025292489147e-09, + "loss": 0.41670864820480347, + "step": 8382 + }, + { + "epoch": 1.9329029282914458, + "grad_norm": 1.4085441335066406, + "learning_rate": 6.190595526675446e-09, + "loss": 0.48778587579727173, + "step": 8383 + }, + { + "epoch": 1.9331335024210283, + "grad_norm": 1.39299487584749, + "learning_rate": 6.148310221130604e-09, + "loss": 0.44433802366256714, + "step": 8384 + }, + { + "epoch": 1.933364076550611, + "grad_norm": 1.7057166388160585, + "learning_rate": 6.106169382001369e-09, + "loss": 0.46826764941215515, + "step": 8385 + }, + { + "epoch": 1.9335946506801935, + "grad_norm": 1.6832081073908207, + "learning_rate": 6.064173015413177e-09, + "loss": 0.5509334802627563, + "step": 8386 + }, + { + "epoch": 1.9338252248097763, + "grad_norm": 1.4200036599053338, + "learning_rate": 6.022321127470698e-09, + "loss": 0.4436245560646057, + "step": 8387 + }, + { + "epoch": 1.934055798939359, + "grad_norm": 1.4658061886752614, + "learning_rate": 5.9806137242574e-09, + "loss": 0.3577145040035248, + "step": 8388 + }, + { + "epoch": 1.9342863730689417, + "grad_norm": 1.3485508447539643, + "learning_rate": 5.939050811835988e-09, + "loss": 0.39893999695777893, + "step": 8389 + }, + { + "epoch": 1.9345169471985244, + "grad_norm": 1.4373848732418595, + "learning_rate": 5.897632396248075e-09, + "loss": 0.4109868109226227, + "step": 8390 + }, + { + "epoch": 1.9347475213281071, + "grad_norm": 1.6148537069486861, + "learning_rate": 5.85635848351429e-09, + "loss": 0.4193134307861328, + "step": 8391 + }, + { + "epoch": 1.9349780954576896, + "grad_norm": 1.774944389887914, + "learning_rate": 5.8152290796340545e-09, + "loss": 0.44189178943634033, + "step": 8392 + }, + { + "epoch": 1.9352086695872723, + "grad_norm": 1.7653802191556502, + "learning_rate": 5.774244190586141e-09, + "loss": 0.5014302730560303, + "step": 8393 + }, + { + "epoch": 1.9354392437168548, + "grad_norm": 1.5565367331009852, + "learning_rate": 5.733403822328009e-09, + "loss": 0.4962024688720703, + "step": 8394 + }, + { + "epoch": 1.9356698178464375, + "grad_norm": 1.585877874844532, + "learning_rate": 5.69270798079613e-09, + "loss": 0.45495474338531494, + "step": 8395 + }, + { + "epoch": 1.9359003919760203, + "grad_norm": 1.4665884192601668, + "learning_rate": 5.652156671906105e-09, + "loss": 0.49062758684158325, + "step": 8396 + }, + { + "epoch": 1.936130966105603, + "grad_norm": 1.6573434385643893, + "learning_rate": 5.611749901552554e-09, + "loss": 0.45899879932403564, + "step": 8397 + }, + { + "epoch": 1.9363615402351857, + "grad_norm": 1.511951038657192, + "learning_rate": 5.57148767560911e-09, + "loss": 0.47287002205848694, + "step": 8398 + }, + { + "epoch": 1.9365921143647684, + "grad_norm": 1.5970704539129832, + "learning_rate": 5.531369999927982e-09, + "loss": 0.439136266708374, + "step": 8399 + }, + { + "epoch": 1.936822688494351, + "grad_norm": 1.2795152915391526, + "learning_rate": 5.4913968803410594e-09, + "loss": 0.3920954465866089, + "step": 8400 + }, + { + "epoch": 1.9370532626239336, + "grad_norm": 1.254790295470771, + "learning_rate": 5.451568322658473e-09, + "loss": 0.4608895480632782, + "step": 8401 + }, + { + "epoch": 1.9372838367535161, + "grad_norm": 1.4389672316514175, + "learning_rate": 5.4118843326699246e-09, + "loss": 0.4617875814437866, + "step": 8402 + }, + { + "epoch": 1.9375144108830988, + "grad_norm": 1.8398027260263112, + "learning_rate": 5.372344916143912e-09, + "loss": 0.5293254852294922, + "step": 8403 + }, + { + "epoch": 1.9377449850126816, + "grad_norm": 1.2603762011573385, + "learning_rate": 5.332950078827725e-09, + "loss": 0.3935343623161316, + "step": 8404 + }, + { + "epoch": 1.9379755591422643, + "grad_norm": 1.3159194137267558, + "learning_rate": 5.293699826447895e-09, + "loss": 0.4612414240837097, + "step": 8405 + }, + { + "epoch": 1.938206133271847, + "grad_norm": 1.5616222982589931, + "learning_rate": 5.254594164709858e-09, + "loss": 0.4779428243637085, + "step": 8406 + }, + { + "epoch": 1.9384367074014297, + "grad_norm": 1.3393838173044101, + "learning_rate": 5.215633099298067e-09, + "loss": 0.37436819076538086, + "step": 8407 + }, + { + "epoch": 1.9386672815310122, + "grad_norm": 1.5367283978531407, + "learning_rate": 5.1768166358757695e-09, + "loss": 0.458698570728302, + "step": 8408 + }, + { + "epoch": 1.938897855660595, + "grad_norm": 1.52395102556278, + "learning_rate": 5.1381447800854515e-09, + "loss": 0.39365172386169434, + "step": 8409 + }, + { + "epoch": 1.9391284297901774, + "grad_norm": 1.6915141620999796, + "learning_rate": 5.099617537548284e-09, + "loss": 0.46358722448349, + "step": 8410 + }, + { + "epoch": 1.9393590039197601, + "grad_norm": 1.4920931037664487, + "learning_rate": 5.061234913864898e-09, + "loss": 0.4286697506904602, + "step": 8411 + }, + { + "epoch": 1.9395895780493428, + "grad_norm": 1.2865245997479036, + "learning_rate": 5.022996914614275e-09, + "loss": 0.4925898015499115, + "step": 8412 + }, + { + "epoch": 1.9398201521789256, + "grad_norm": 1.5226712255874009, + "learning_rate": 4.984903545354857e-09, + "loss": 0.46924275159835815, + "step": 8413 + }, + { + "epoch": 1.9400507263085083, + "grad_norm": 1.5857623247989538, + "learning_rate": 4.946954811623994e-09, + "loss": 0.5326268672943115, + "step": 8414 + }, + { + "epoch": 1.940281300438091, + "grad_norm": 1.5901041586459477, + "learning_rate": 4.909150718937716e-09, + "loss": 0.4367690682411194, + "step": 8415 + }, + { + "epoch": 1.9405118745676735, + "grad_norm": 1.5390541996103484, + "learning_rate": 4.8714912727914055e-09, + "loss": 0.45579224824905396, + "step": 8416 + }, + { + "epoch": 1.9407424486972562, + "grad_norm": 1.5246826105956603, + "learning_rate": 4.8339764786590186e-09, + "loss": 0.4420431852340698, + "step": 8417 + }, + { + "epoch": 1.9409730228268387, + "grad_norm": 1.7713819487127218, + "learning_rate": 4.79660634199397e-09, + "loss": 0.4175274670124054, + "step": 8418 + }, + { + "epoch": 1.9412035969564214, + "grad_norm": 1.4046803968065067, + "learning_rate": 4.759380868228246e-09, + "loss": 0.41451364755630493, + "step": 8419 + }, + { + "epoch": 1.9414341710860041, + "grad_norm": 1.5394804899846177, + "learning_rate": 4.722300062772966e-09, + "loss": 0.4211805462837219, + "step": 8420 + }, + { + "epoch": 1.9416647452155869, + "grad_norm": 1.5805052208208792, + "learning_rate": 4.68536393101826e-09, + "loss": 0.4458296000957489, + "step": 8421 + }, + { + "epoch": 1.9418953193451696, + "grad_norm": 1.8263114249420374, + "learning_rate": 4.648572478333057e-09, + "loss": 0.6226488351821899, + "step": 8422 + }, + { + "epoch": 1.9421258934747523, + "grad_norm": 1.467298573422793, + "learning_rate": 4.611925710065523e-09, + "loss": 0.343037486076355, + "step": 8423 + }, + { + "epoch": 1.9423564676043348, + "grad_norm": 1.4279799784372957, + "learning_rate": 4.575423631542397e-09, + "loss": 0.42478299140930176, + "step": 8424 + }, + { + "epoch": 1.9425870417339175, + "grad_norm": 1.4809253602160373, + "learning_rate": 4.539066248069878e-09, + "loss": 0.4467424750328064, + "step": 8425 + }, + { + "epoch": 1.9428176158635, + "grad_norm": 1.5230213064501263, + "learning_rate": 4.50285356493274e-09, + "loss": 0.4598960876464844, + "step": 8426 + }, + { + "epoch": 1.9430481899930827, + "grad_norm": 1.767389183054306, + "learning_rate": 4.466785587394883e-09, + "loss": 0.43005913496017456, + "step": 8427 + }, + { + "epoch": 1.9432787641226654, + "grad_norm": 1.6819998310369073, + "learning_rate": 4.430862320699114e-09, + "loss": 0.4259253740310669, + "step": 8428 + }, + { + "epoch": 1.9435093382522481, + "grad_norm": 1.4809575809160866, + "learning_rate": 4.395083770067476e-09, + "loss": 0.4275285601615906, + "step": 8429 + }, + { + "epoch": 1.9437399123818309, + "grad_norm": 1.5009509074634573, + "learning_rate": 4.3594499407003656e-09, + "loss": 0.42151302099227905, + "step": 8430 + }, + { + "epoch": 1.9439704865114136, + "grad_norm": 1.2121055184272223, + "learning_rate": 4.3239608377778625e-09, + "loss": 0.41727957129478455, + "step": 8431 + }, + { + "epoch": 1.944201060640996, + "grad_norm": 1.6993320655678226, + "learning_rate": 4.288616466458395e-09, + "loss": 0.5026905536651611, + "step": 8432 + }, + { + "epoch": 1.9444316347705788, + "grad_norm": 1.7732059667125062, + "learning_rate": 4.2534168318798524e-09, + "loss": 0.5170408487319946, + "step": 8433 + }, + { + "epoch": 1.9446622089001613, + "grad_norm": 1.4027101607713113, + "learning_rate": 4.21836193915881e-09, + "loss": 0.3918447196483612, + "step": 8434 + }, + { + "epoch": 1.944892783029744, + "grad_norm": 1.6652823795220828, + "learning_rate": 4.183451793390747e-09, + "loss": 0.49871906638145447, + "step": 8435 + }, + { + "epoch": 1.9451233571593267, + "grad_norm": 1.4696705484226025, + "learning_rate": 4.1486863996502694e-09, + "loss": 0.43729400634765625, + "step": 8436 + }, + { + "epoch": 1.9453539312889094, + "grad_norm": 1.6971586346839116, + "learning_rate": 4.114065762990781e-09, + "loss": 0.49198442697525024, + "step": 8437 + }, + { + "epoch": 1.9455845054184921, + "grad_norm": 1.7555960999646751, + "learning_rate": 4.079589888444923e-09, + "loss": 0.48610788583755493, + "step": 8438 + }, + { + "epoch": 1.9458150795480749, + "grad_norm": 1.4385738810997333, + "learning_rate": 4.045258781024019e-09, + "loss": 0.43962734937667847, + "step": 8439 + }, + { + "epoch": 1.9460456536776574, + "grad_norm": 1.5800303425440292, + "learning_rate": 4.011072445718522e-09, + "loss": 0.3320704400539398, + "step": 8440 + }, + { + "epoch": 1.94627622780724, + "grad_norm": 1.6634559640737916, + "learning_rate": 3.977030887497568e-09, + "loss": 0.4773918092250824, + "step": 8441 + }, + { + "epoch": 1.9465068019368226, + "grad_norm": 1.6386159776295786, + "learning_rate": 3.9431341113096425e-09, + "loss": 0.424363911151886, + "step": 8442 + }, + { + "epoch": 1.9467373760664053, + "grad_norm": 1.9939094308024221, + "learning_rate": 3.9093821220818055e-09, + "loss": 0.5321601033210754, + "step": 8443 + }, + { + "epoch": 1.946967950195988, + "grad_norm": 1.7091737329216896, + "learning_rate": 3.875774924720465e-09, + "loss": 0.48579344153404236, + "step": 8444 + }, + { + "epoch": 1.9471985243255707, + "grad_norm": 1.4617398717494952, + "learning_rate": 3.842312524110603e-09, + "loss": 0.39313316345214844, + "step": 8445 + }, + { + "epoch": 1.9474290984551534, + "grad_norm": 1.6233833617742501, + "learning_rate": 3.8089949251163264e-09, + "loss": 0.522427499294281, + "step": 8446 + }, + { + "epoch": 1.9476596725847362, + "grad_norm": 1.601217744469266, + "learning_rate": 3.775822132580875e-09, + "loss": 0.3822653889656067, + "step": 8447 + }, + { + "epoch": 1.9478902467143187, + "grad_norm": 1.5787465509087006, + "learning_rate": 3.7427941513259454e-09, + "loss": 0.4322483241558075, + "step": 8448 + }, + { + "epoch": 1.9481208208439014, + "grad_norm": 1.6934897718136162, + "learning_rate": 3.7099109861528087e-09, + "loss": 0.4862939715385437, + "step": 8449 + }, + { + "epoch": 1.9483513949734839, + "grad_norm": 1.5875963080752307, + "learning_rate": 3.6771726418410863e-09, + "loss": 0.45388323068618774, + "step": 8450 + }, + { + "epoch": 1.9485819691030666, + "grad_norm": 1.5187043160616758, + "learning_rate": 3.644579123149749e-09, + "loss": 0.3937215805053711, + "step": 8451 + }, + { + "epoch": 1.9488125432326493, + "grad_norm": 1.5446261991465484, + "learning_rate": 3.6121304348165628e-09, + "loss": 0.46887993812561035, + "step": 8452 + }, + { + "epoch": 1.949043117362232, + "grad_norm": 1.763834546986469, + "learning_rate": 3.5798265815584204e-09, + "loss": 0.4444226026535034, + "step": 8453 + }, + { + "epoch": 1.9492736914918147, + "grad_norm": 1.639572253352884, + "learning_rate": 3.5476675680709e-09, + "loss": 0.4938625991344452, + "step": 8454 + }, + { + "epoch": 1.9495042656213972, + "grad_norm": 1.456362188758518, + "learning_rate": 3.5156533990285953e-09, + "loss": 0.37632471323013306, + "step": 8455 + }, + { + "epoch": 1.94973483975098, + "grad_norm": 1.8608548289842328, + "learning_rate": 3.483784079085117e-09, + "loss": 0.4345025420188904, + "step": 8456 + }, + { + "epoch": 1.9499654138805624, + "grad_norm": 1.4598938490767328, + "learning_rate": 3.4520596128729818e-09, + "loss": 0.3721727132797241, + "step": 8457 + }, + { + "epoch": 1.9501959880101452, + "grad_norm": 1.6409042038383927, + "learning_rate": 3.4204800050037232e-09, + "loss": 0.4871670603752136, + "step": 8458 + }, + { + "epoch": 1.9504265621397279, + "grad_norm": 1.8307964169711943, + "learning_rate": 3.38904526006778e-09, + "loss": 0.578133225440979, + "step": 8459 + }, + { + "epoch": 1.9506571362693106, + "grad_norm": 1.5202457315236042, + "learning_rate": 3.357755382634386e-09, + "loss": 0.4721870422363281, + "step": 8460 + }, + { + "epoch": 1.9508877103988933, + "grad_norm": 1.798795599183991, + "learning_rate": 3.3266103772519037e-09, + "loss": 0.4569184184074402, + "step": 8461 + }, + { + "epoch": 1.951118284528476, + "grad_norm": 1.7311036262190431, + "learning_rate": 3.2956102484477112e-09, + "loss": 0.48763811588287354, + "step": 8462 + }, + { + "epoch": 1.9513488586580585, + "grad_norm": 1.5898725581558353, + "learning_rate": 3.264755000727759e-09, + "loss": 0.45957818627357483, + "step": 8463 + }, + { + "epoch": 1.9515794327876412, + "grad_norm": 1.661536076059429, + "learning_rate": 3.234044638577238e-09, + "loss": 0.49398598074913025, + "step": 8464 + }, + { + "epoch": 1.9518100069172237, + "grad_norm": 1.8367269278410805, + "learning_rate": 3.2034791664603544e-09, + "loss": 0.48884931206703186, + "step": 8465 + }, + { + "epoch": 1.9520405810468064, + "grad_norm": 1.4322798652039197, + "learning_rate": 3.173058588819999e-09, + "loss": 0.45171886682510376, + "step": 8466 + }, + { + "epoch": 1.9522711551763892, + "grad_norm": 1.7896431151356735, + "learning_rate": 3.142782910077968e-09, + "loss": 0.45110028982162476, + "step": 8467 + }, + { + "epoch": 1.9525017293059719, + "grad_norm": 1.6339596386172939, + "learning_rate": 3.1126521346354074e-09, + "loss": 0.4602523446083069, + "step": 8468 + }, + { + "epoch": 1.9527323034355546, + "grad_norm": 1.4993439724695443, + "learning_rate": 3.082666266872036e-09, + "loss": 0.3908727169036865, + "step": 8469 + }, + { + "epoch": 1.9529628775651373, + "grad_norm": 1.6588394319404383, + "learning_rate": 3.0528253111464786e-09, + "loss": 0.4886831045150757, + "step": 8470 + }, + { + "epoch": 1.9531934516947198, + "grad_norm": 1.8142188930520524, + "learning_rate": 3.023129271796598e-09, + "loss": 0.4407721161842346, + "step": 8471 + }, + { + "epoch": 1.9534240258243025, + "grad_norm": 1.545809203271424, + "learning_rate": 2.9935781531389425e-09, + "loss": 0.46958622336387634, + "step": 8472 + }, + { + "epoch": 1.953654599953885, + "grad_norm": 1.5632050072309709, + "learning_rate": 2.964171959469075e-09, + "loss": 0.4642796516418457, + "step": 8473 + }, + { + "epoch": 1.9538851740834677, + "grad_norm": 1.5522529280671595, + "learning_rate": 2.9349106950613545e-09, + "loss": 0.5124588012695312, + "step": 8474 + }, + { + "epoch": 1.9541157482130505, + "grad_norm": 1.7441462887025347, + "learning_rate": 2.9057943641693784e-09, + "loss": 0.516730546951294, + "step": 8475 + }, + { + "epoch": 1.9543463223426332, + "grad_norm": 1.6015713883307108, + "learning_rate": 2.876822971025428e-09, + "loss": 0.47847944498062134, + "step": 8476 + }, + { + "epoch": 1.9545768964722159, + "grad_norm": 1.9133896423438201, + "learning_rate": 2.8479965198408007e-09, + "loss": 0.5167095065116882, + "step": 8477 + }, + { + "epoch": 1.9548074706017986, + "grad_norm": 1.4489948600651796, + "learning_rate": 2.819315014805812e-09, + "loss": 0.40728163719177246, + "step": 8478 + }, + { + "epoch": 1.955038044731381, + "grad_norm": 1.4413821780207463, + "learning_rate": 2.790778460089349e-09, + "loss": 0.49741852283477783, + "step": 8479 + }, + { + "epoch": 1.9552686188609638, + "grad_norm": 1.3759130199865537, + "learning_rate": 2.7623868598397603e-09, + "loss": 0.33847475051879883, + "step": 8480 + }, + { + "epoch": 1.9554991929905463, + "grad_norm": 1.6995475203184411, + "learning_rate": 2.734140218183856e-09, + "loss": 0.39727652072906494, + "step": 8481 + }, + { + "epoch": 1.955729767120129, + "grad_norm": 1.7012108842781224, + "learning_rate": 2.706038539227795e-09, + "loss": 0.40332260727882385, + "step": 8482 + }, + { + "epoch": 1.9559603412497117, + "grad_norm": 1.3388931691886075, + "learning_rate": 2.6780818270562e-09, + "loss": 0.40296924114227295, + "step": 8483 + }, + { + "epoch": 1.9561909153792945, + "grad_norm": 1.4889010944404621, + "learning_rate": 2.650270085732931e-09, + "loss": 0.4253476858139038, + "step": 8484 + }, + { + "epoch": 1.9564214895088772, + "grad_norm": 1.5794301308382195, + "learning_rate": 2.6226033193007535e-09, + "loss": 0.448941171169281, + "step": 8485 + }, + { + "epoch": 1.95665206363846, + "grad_norm": 1.9411463996799059, + "learning_rate": 2.59508153178134e-09, + "loss": 0.48213180899620056, + "step": 8486 + }, + { + "epoch": 1.9568826377680424, + "grad_norm": 1.6243019689896288, + "learning_rate": 2.5677047271752683e-09, + "loss": 0.48886558413505554, + "step": 8487 + }, + { + "epoch": 1.957113211897625, + "grad_norm": 1.4212209484619593, + "learning_rate": 2.5404729094619103e-09, + "loss": 0.49786341190338135, + "step": 8488 + }, + { + "epoch": 1.9573437860272076, + "grad_norm": 2.1312601270605365, + "learning_rate": 2.5133860825997667e-09, + "loss": 0.4487866163253784, + "step": 8489 + }, + { + "epoch": 1.9575743601567903, + "grad_norm": 1.7672945087914924, + "learning_rate": 2.486444250526243e-09, + "loss": 0.46193206310272217, + "step": 8490 + }, + { + "epoch": 1.957804934286373, + "grad_norm": 1.5923899778865398, + "learning_rate": 2.459647417157429e-09, + "loss": 0.44729042053222656, + "step": 8491 + }, + { + "epoch": 1.9580355084159557, + "grad_norm": 1.8298057614969963, + "learning_rate": 2.432995586388764e-09, + "loss": 0.4646851718425751, + "step": 8492 + }, + { + "epoch": 1.9582660825455385, + "grad_norm": 1.6514495959092017, + "learning_rate": 2.40648876209415e-09, + "loss": 0.49538400769233704, + "step": 8493 + }, + { + "epoch": 1.9584966566751212, + "grad_norm": 1.7330889796307278, + "learning_rate": 2.3801269481267262e-09, + "loss": 0.5548783540725708, + "step": 8494 + }, + { + "epoch": 1.9587272308047037, + "grad_norm": 1.65108674708811, + "learning_rate": 2.3539101483184277e-09, + "loss": 0.4390280544757843, + "step": 8495 + }, + { + "epoch": 1.9589578049342864, + "grad_norm": 1.323831070791993, + "learning_rate": 2.327838366480095e-09, + "loss": 0.3079942464828491, + "step": 8496 + }, + { + "epoch": 1.959188379063869, + "grad_norm": 2.030408303723105, + "learning_rate": 2.301911606401585e-09, + "loss": 0.5199894309043884, + "step": 8497 + }, + { + "epoch": 1.9594189531934516, + "grad_norm": 1.6402740340647268, + "learning_rate": 2.276129871851662e-09, + "loss": 0.3403523564338684, + "step": 8498 + }, + { + "epoch": 1.9596495273230343, + "grad_norm": 1.785907762491574, + "learning_rate": 2.2504931665777714e-09, + "loss": 0.49699991941452026, + "step": 8499 + }, + { + "epoch": 1.959880101452617, + "grad_norm": 1.5969429106714301, + "learning_rate": 2.2250014943066e-09, + "loss": 0.4178547263145447, + "step": 8500 + }, + { + "epoch": 1.9601106755821998, + "grad_norm": 1.8924231136601524, + "learning_rate": 2.199654858743627e-09, + "loss": 0.5622760057449341, + "step": 8501 + }, + { + "epoch": 1.9603412497117825, + "grad_norm": 1.4610200259542554, + "learning_rate": 2.1744532635733505e-09, + "loss": 0.4072464406490326, + "step": 8502 + }, + { + "epoch": 1.960571823841365, + "grad_norm": 1.5401248564682235, + "learning_rate": 2.1493967124587287e-09, + "loss": 0.475033164024353, + "step": 8503 + }, + { + "epoch": 1.9608023979709477, + "grad_norm": 1.7291130993603476, + "learning_rate": 2.1244852090424035e-09, + "loss": 0.4734419584274292, + "step": 8504 + }, + { + "epoch": 1.9610329721005302, + "grad_norm": 1.7230208360471804, + "learning_rate": 2.099718756945257e-09, + "loss": 0.42523911595344543, + "step": 8505 + }, + { + "epoch": 1.961263546230113, + "grad_norm": 1.510126016418521, + "learning_rate": 2.075097359767297e-09, + "loss": 0.5085049867630005, + "step": 8506 + }, + { + "epoch": 1.9614941203596956, + "grad_norm": 1.6269226735706044, + "learning_rate": 2.0506210210877728e-09, + "loss": 0.5682120323181152, + "step": 8507 + }, + { + "epoch": 1.9617246944892783, + "grad_norm": 1.5852715445159862, + "learning_rate": 2.0262897444642823e-09, + "loss": 0.4550264775753021, + "step": 8508 + }, + { + "epoch": 1.961955268618861, + "grad_norm": 1.560540594785291, + "learning_rate": 2.0021035334337745e-09, + "loss": 0.43745940923690796, + "step": 8509 + }, + { + "epoch": 1.9621858427484438, + "grad_norm": 1.421824915655791, + "learning_rate": 1.9780623915118812e-09, + "loss": 0.4523237347602844, + "step": 8510 + }, + { + "epoch": 1.9624164168780263, + "grad_norm": 1.354930266701351, + "learning_rate": 1.9541663221933623e-09, + "loss": 0.43080687522888184, + "step": 8511 + }, + { + "epoch": 1.962646991007609, + "grad_norm": 1.6208010256189354, + "learning_rate": 1.930415328951551e-09, + "loss": 0.5265613794326782, + "step": 8512 + }, + { + "epoch": 1.9628775651371915, + "grad_norm": 1.6858160892782517, + "learning_rate": 1.906809415239019e-09, + "loss": 0.5482667684555054, + "step": 8513 + }, + { + "epoch": 1.9631081392667742, + "grad_norm": 1.8258400073226166, + "learning_rate": 1.8833485844871322e-09, + "loss": 0.43548330664634705, + "step": 8514 + }, + { + "epoch": 1.963338713396357, + "grad_norm": 1.4726232338870595, + "learning_rate": 1.8600328401061627e-09, + "loss": 0.45715010166168213, + "step": 8515 + }, + { + "epoch": 1.9635692875259396, + "grad_norm": 1.4143739917928304, + "learning_rate": 1.8368621854852884e-09, + "loss": 0.48137760162353516, + "step": 8516 + }, + { + "epoch": 1.9637998616555223, + "grad_norm": 1.5443669851131265, + "learning_rate": 1.8138366239924818e-09, + "loss": 0.4607926607131958, + "step": 8517 + }, + { + "epoch": 1.964030435785105, + "grad_norm": 1.2018843862548443, + "learning_rate": 1.7909561589749545e-09, + "loss": 0.3551321029663086, + "step": 8518 + }, + { + "epoch": 1.9642610099146876, + "grad_norm": 1.4318523604861806, + "learning_rate": 1.7682207937583792e-09, + "loss": 0.4075126647949219, + "step": 8519 + }, + { + "epoch": 1.9644915840442703, + "grad_norm": 1.5238435411050293, + "learning_rate": 1.7456305316477793e-09, + "loss": 0.4470815658569336, + "step": 8520 + }, + { + "epoch": 1.9647221581738528, + "grad_norm": 1.7248235582994178, + "learning_rate": 1.72318537592675e-09, + "loss": 0.5074938535690308, + "step": 8521 + }, + { + "epoch": 1.9649527323034355, + "grad_norm": 1.684987227657268, + "learning_rate": 1.700885329857904e-09, + "loss": 0.4799109697341919, + "step": 8522 + }, + { + "epoch": 1.9651833064330182, + "grad_norm": 1.6217891186344597, + "learning_rate": 1.6787303966828703e-09, + "loss": 0.5603263974189758, + "step": 8523 + }, + { + "epoch": 1.965413880562601, + "grad_norm": 1.386089333333111, + "learning_rate": 1.656720579622073e-09, + "loss": 0.45492851734161377, + "step": 8524 + }, + { + "epoch": 1.9656444546921836, + "grad_norm": 1.9563157820273458, + "learning_rate": 1.6348558818748414e-09, + "loss": 0.47700050473213196, + "step": 8525 + }, + { + "epoch": 1.9658750288217663, + "grad_norm": 1.7426284772598926, + "learning_rate": 1.6131363066194115e-09, + "loss": 0.5105462074279785, + "step": 8526 + }, + { + "epoch": 1.9661056029513488, + "grad_norm": 1.6514750536849407, + "learning_rate": 1.5915618570130351e-09, + "loss": 0.47818124294281006, + "step": 8527 + }, + { + "epoch": 1.9663361770809316, + "grad_norm": 1.7136861974622173, + "learning_rate": 1.5701325361916484e-09, + "loss": 0.4549172520637512, + "step": 8528 + }, + { + "epoch": 1.966566751210514, + "grad_norm": 1.7152545383952742, + "learning_rate": 1.5488483472703151e-09, + "loss": 0.406271755695343, + "step": 8529 + }, + { + "epoch": 1.9667973253400968, + "grad_norm": 1.772427841344589, + "learning_rate": 1.5277092933427827e-09, + "loss": 0.4452788829803467, + "step": 8530 + }, + { + "epoch": 1.9670278994696795, + "grad_norm": 1.7369674304649072, + "learning_rate": 1.5067153774820374e-09, + "loss": 0.46621495485305786, + "step": 8531 + }, + { + "epoch": 1.9672584735992622, + "grad_norm": 1.294422205793256, + "learning_rate": 1.4858666027395272e-09, + "loss": 0.47837382555007935, + "step": 8532 + }, + { + "epoch": 1.967489047728845, + "grad_norm": 1.754058349269308, + "learning_rate": 1.4651629721460501e-09, + "loss": 0.5690933465957642, + "step": 8533 + }, + { + "epoch": 1.9677196218584276, + "grad_norm": 1.7627173783003411, + "learning_rate": 1.4446044887109764e-09, + "loss": 0.478906512260437, + "step": 8534 + }, + { + "epoch": 1.9679501959880101, + "grad_norm": 1.7296669537147416, + "learning_rate": 1.4241911554225827e-09, + "loss": 0.5024028420448303, + "step": 8535 + }, + { + "epoch": 1.9681807701175928, + "grad_norm": 1.6971062366905785, + "learning_rate": 1.4039229752483839e-09, + "loss": 0.4430769979953766, + "step": 8536 + }, + { + "epoch": 1.9684113442471753, + "grad_norm": 1.5177256060076265, + "learning_rate": 1.3837999511343567e-09, + "loss": 0.34506234526634216, + "step": 8537 + }, + { + "epoch": 1.968641918376758, + "grad_norm": 1.6051884301428612, + "learning_rate": 1.363822086005717e-09, + "loss": 0.47483426332473755, + "step": 8538 + }, + { + "epoch": 1.9688724925063408, + "grad_norm": 1.4685071017788778, + "learning_rate": 1.343989382766475e-09, + "loss": 0.3902367651462555, + "step": 8539 + }, + { + "epoch": 1.9691030666359235, + "grad_norm": 1.5919563191923878, + "learning_rate": 1.3243018442994358e-09, + "loss": 0.5114254951477051, + "step": 8540 + }, + { + "epoch": 1.9693336407655062, + "grad_norm": 1.6064476628756739, + "learning_rate": 1.3047594734663104e-09, + "loss": 0.4048948287963867, + "step": 8541 + }, + { + "epoch": 1.969564214895089, + "grad_norm": 1.3533697409791567, + "learning_rate": 1.2853622731079372e-09, + "loss": 0.4168536067008972, + "step": 8542 + }, + { + "epoch": 1.9697947890246714, + "grad_norm": 1.459175077584749, + "learning_rate": 1.2661102460437279e-09, + "loss": 0.38410186767578125, + "step": 8543 + }, + { + "epoch": 1.9700253631542541, + "grad_norm": 1.5096843994913236, + "learning_rate": 1.2470033950724435e-09, + "loss": 0.4931117296218872, + "step": 8544 + }, + { + "epoch": 1.9702559372838366, + "grad_norm": 1.863771997387379, + "learning_rate": 1.228041722971085e-09, + "loss": 0.41142135858535767, + "step": 8545 + }, + { + "epoch": 1.9704865114134194, + "grad_norm": 1.7868633908108185, + "learning_rate": 1.209225232496225e-09, + "loss": 0.5165313482284546, + "step": 8546 + }, + { + "epoch": 1.970717085543002, + "grad_norm": 1.284821780038077, + "learning_rate": 1.190553926382898e-09, + "loss": 0.3330427408218384, + "step": 8547 + }, + { + "epoch": 1.9709476596725848, + "grad_norm": 1.5242411906386457, + "learning_rate": 1.172027807345155e-09, + "loss": 0.43116509914398193, + "step": 8548 + }, + { + "epoch": 1.9711782338021675, + "grad_norm": 1.8011852071569119, + "learning_rate": 1.1536468780760643e-09, + "loss": 0.43564409017562866, + "step": 8549 + }, + { + "epoch": 1.9714088079317502, + "grad_norm": 1.7422483041269035, + "learning_rate": 1.1354111412472666e-09, + "loss": 0.5361013412475586, + "step": 8550 + }, + { + "epoch": 1.9716393820613327, + "grad_norm": 1.6110906687473352, + "learning_rate": 1.1173205995097524e-09, + "loss": 0.4049466550350189, + "step": 8551 + }, + { + "epoch": 1.9718699561909154, + "grad_norm": 1.6636539568656024, + "learning_rate": 1.0993752554930847e-09, + "loss": 0.45090144872665405, + "step": 8552 + }, + { + "epoch": 1.972100530320498, + "grad_norm": 1.5627616190247176, + "learning_rate": 1.0815751118057326e-09, + "loss": 0.43933606147766113, + "step": 8553 + }, + { + "epoch": 1.9723311044500806, + "grad_norm": 1.672183185343667, + "learning_rate": 1.063920171035182e-09, + "loss": 0.5254300832748413, + "step": 8554 + }, + { + "epoch": 1.9725616785796634, + "grad_norm": 1.4309558177904258, + "learning_rate": 1.0464104357477132e-09, + "loss": 0.45544567704200745, + "step": 8555 + }, + { + "epoch": 1.972792252709246, + "grad_norm": 1.9479324504983593, + "learning_rate": 1.0290459084886238e-09, + "loss": 0.5177001357078552, + "step": 8556 + }, + { + "epoch": 1.9730228268388288, + "grad_norm": 1.585288183336846, + "learning_rate": 1.0118265917818946e-09, + "loss": 0.4669674038887024, + "step": 8557 + }, + { + "epoch": 1.9732534009684115, + "grad_norm": 1.5203759714638625, + "learning_rate": 9.947524881307456e-10, + "loss": 0.4244263172149658, + "step": 8558 + }, + { + "epoch": 1.973483975097994, + "grad_norm": 1.810087521792982, + "learning_rate": 9.778236000168583e-10, + "loss": 0.44121527671813965, + "step": 8559 + }, + { + "epoch": 1.9737145492275767, + "grad_norm": 1.59326202559186, + "learning_rate": 9.610399299010418e-10, + "loss": 0.44209837913513184, + "step": 8560 + }, + { + "epoch": 1.9739451233571592, + "grad_norm": 1.5399236076354037, + "learning_rate": 9.444014802231226e-10, + "loss": 0.4036273956298828, + "step": 8561 + }, + { + "epoch": 1.974175697486742, + "grad_norm": 1.5589230288439277, + "learning_rate": 9.279082534014992e-10, + "loss": 0.47106266021728516, + "step": 8562 + }, + { + "epoch": 1.9744062716163246, + "grad_norm": 1.6389105898260865, + "learning_rate": 9.115602518338095e-10, + "loss": 0.41080260276794434, + "step": 8563 + }, + { + "epoch": 1.9746368457459074, + "grad_norm": 2.0418613187292918, + "learning_rate": 8.953574778962635e-10, + "loss": 0.4333069920539856, + "step": 8564 + }, + { + "epoch": 1.97486741987549, + "grad_norm": 1.4286669807437469, + "learning_rate": 8.792999339440887e-10, + "loss": 0.3939141631126404, + "step": 8565 + }, + { + "epoch": 1.9750979940050726, + "grad_norm": 1.7648959719228037, + "learning_rate": 8.633876223114178e-10, + "loss": 0.4202404022216797, + "step": 8566 + }, + { + "epoch": 1.9753285681346553, + "grad_norm": 1.6239377555078118, + "learning_rate": 8.476205453114005e-10, + "loss": 0.44722893834114075, + "step": 8567 + }, + { + "epoch": 1.9755591422642378, + "grad_norm": 1.6159852265335335, + "learning_rate": 8.319987052357591e-10, + "loss": 0.4095258414745331, + "step": 8568 + }, + { + "epoch": 1.9757897163938205, + "grad_norm": 1.359270850467109, + "learning_rate": 8.165221043553439e-10, + "loss": 0.43372297286987305, + "step": 8569 + }, + { + "epoch": 1.9760202905234032, + "grad_norm": 1.7602005237852472, + "learning_rate": 8.011907449199106e-10, + "loss": 0.4697731137275696, + "step": 8570 + }, + { + "epoch": 1.976250864652986, + "grad_norm": 1.759646277514859, + "learning_rate": 7.860046291580103e-10, + "loss": 0.49179136753082275, + "step": 8571 + }, + { + "epoch": 1.9764814387825687, + "grad_norm": 1.5966011788910657, + "learning_rate": 7.70963759277099e-10, + "loss": 0.35898157954216003, + "step": 8572 + }, + { + "epoch": 1.9767120129121514, + "grad_norm": 1.5427594087958296, + "learning_rate": 7.560681374634282e-10, + "loss": 0.48293429613113403, + "step": 8573 + }, + { + "epoch": 1.9769425870417339, + "grad_norm": 1.4911498565229593, + "learning_rate": 7.413177658822656e-10, + "loss": 0.39636045694351196, + "step": 8574 + }, + { + "epoch": 1.9771731611713166, + "grad_norm": 1.294544438076297, + "learning_rate": 7.267126466777851e-10, + "loss": 0.375876784324646, + "step": 8575 + }, + { + "epoch": 1.977403735300899, + "grad_norm": 1.438449662082489, + "learning_rate": 7.122527819729551e-10, + "loss": 0.4064311385154724, + "step": 8576 + }, + { + "epoch": 1.9776343094304818, + "grad_norm": 1.3024542737808098, + "learning_rate": 6.979381738696499e-10, + "loss": 0.4373857378959656, + "step": 8577 + }, + { + "epoch": 1.9778648835600645, + "grad_norm": 2.013857406007071, + "learning_rate": 6.837688244486494e-10, + "loss": 0.5008025765419006, + "step": 8578 + }, + { + "epoch": 1.9780954576896472, + "grad_norm": 1.5523385427514034, + "learning_rate": 6.697447357695285e-10, + "loss": 0.4286271035671234, + "step": 8579 + }, + { + "epoch": 1.97832603181923, + "grad_norm": 1.6941567857927917, + "learning_rate": 6.558659098711006e-10, + "loss": 0.4420759081840515, + "step": 8580 + }, + { + "epoch": 1.9785566059488127, + "grad_norm": 1.314306142904572, + "learning_rate": 6.421323487705299e-10, + "loss": 0.3946709632873535, + "step": 8581 + }, + { + "epoch": 1.9787871800783952, + "grad_norm": 1.6731376396011677, + "learning_rate": 6.285440544641085e-10, + "loss": 0.42874544858932495, + "step": 8582 + }, + { + "epoch": 1.9790177542079779, + "grad_norm": 1.5147129393930194, + "learning_rate": 6.151010289272563e-10, + "loss": 0.4728921055793762, + "step": 8583 + }, + { + "epoch": 1.9792483283375604, + "grad_norm": 1.4681942656331504, + "learning_rate": 6.018032741139656e-10, + "loss": 0.3756295442581177, + "step": 8584 + }, + { + "epoch": 1.979478902467143, + "grad_norm": 1.5314225760860438, + "learning_rate": 5.886507919570239e-10, + "loss": 0.48663657903671265, + "step": 8585 + }, + { + "epoch": 1.9797094765967258, + "grad_norm": 2.0571870297763377, + "learning_rate": 5.756435843685681e-10, + "loss": 0.46127766370773315, + "step": 8586 + }, + { + "epoch": 1.9799400507263085, + "grad_norm": 1.4783867212667936, + "learning_rate": 5.627816532390862e-10, + "loss": 0.493796169757843, + "step": 8587 + }, + { + "epoch": 1.9801706248558912, + "grad_norm": 1.2639174296233155, + "learning_rate": 5.500650004383045e-10, + "loss": 0.3703004717826843, + "step": 8588 + }, + { + "epoch": 1.980401198985474, + "grad_norm": 1.6202036973245495, + "learning_rate": 5.374936278146336e-10, + "loss": 0.5385284423828125, + "step": 8589 + }, + { + "epoch": 1.9806317731150564, + "grad_norm": 1.5325088206554112, + "learning_rate": 5.250675371956115e-10, + "loss": 0.3996584713459015, + "step": 8590 + }, + { + "epoch": 1.9808623472446392, + "grad_norm": 1.6001328200790206, + "learning_rate": 5.12786730387349e-10, + "loss": 0.4513227641582489, + "step": 8591 + }, + { + "epoch": 1.9810929213742217, + "grad_norm": 1.5317035339628575, + "learning_rate": 5.006512091750848e-10, + "loss": 0.46632474660873413, + "step": 8592 + }, + { + "epoch": 1.9813234955038044, + "grad_norm": 1.5599775050602098, + "learning_rate": 4.886609753227411e-10, + "loss": 0.5379712581634521, + "step": 8593 + }, + { + "epoch": 1.981554069633387, + "grad_norm": 1.6572300992446405, + "learning_rate": 4.768160305732572e-10, + "loss": 0.3606422543525696, + "step": 8594 + }, + { + "epoch": 1.9817846437629698, + "grad_norm": 1.927352159029303, + "learning_rate": 4.651163766484778e-10, + "loss": 0.39339596033096313, + "step": 8595 + }, + { + "epoch": 1.9820152178925525, + "grad_norm": 1.5930436461957604, + "learning_rate": 4.535620152489317e-10, + "loss": 0.4606707692146301, + "step": 8596 + }, + { + "epoch": 1.9822457920221352, + "grad_norm": 1.484957242621252, + "learning_rate": 4.421529480543862e-10, + "loss": 0.4234154522418976, + "step": 8597 + }, + { + "epoch": 1.9824763661517177, + "grad_norm": 1.3985130447330405, + "learning_rate": 4.308891767229594e-10, + "loss": 0.49317437410354614, + "step": 8598 + }, + { + "epoch": 1.9827069402813005, + "grad_norm": 1.5795407686648721, + "learning_rate": 4.197707028922304e-10, + "loss": 0.47756847739219666, + "step": 8599 + }, + { + "epoch": 1.982937514410883, + "grad_norm": 1.437347041692997, + "learning_rate": 4.0879752817823963e-10, + "loss": 0.37664321064949036, + "step": 8600 + }, + { + "epoch": 1.9831680885404657, + "grad_norm": 1.4684607347638514, + "learning_rate": 3.9796965417604465e-10, + "loss": 0.3927830457687378, + "step": 8601 + }, + { + "epoch": 1.9833986626700484, + "grad_norm": 1.5410832268522827, + "learning_rate": 3.8728708245971966e-10, + "loss": 0.41071420907974243, + "step": 8602 + }, + { + "epoch": 1.983629236799631, + "grad_norm": 1.7060421891461264, + "learning_rate": 3.7674981458191145e-10, + "loss": 0.49516505002975464, + "step": 8603 + }, + { + "epoch": 1.9838598109292138, + "grad_norm": 1.451667871155561, + "learning_rate": 3.6635785207439486e-10, + "loss": 0.474129855632782, + "step": 8604 + }, + { + "epoch": 1.9840903850587965, + "grad_norm": 1.6840089122105588, + "learning_rate": 3.5611119644773923e-10, + "loss": 0.4445813298225403, + "step": 8605 + }, + { + "epoch": 1.984320959188379, + "grad_norm": 2.027307915892804, + "learning_rate": 3.4600984919141987e-10, + "loss": 0.46165329217910767, + "step": 8606 + }, + { + "epoch": 1.9845515333179617, + "grad_norm": 1.3540207698004456, + "learning_rate": 3.3605381177381764e-10, + "loss": 0.4073392152786255, + "step": 8607 + }, + { + "epoch": 1.9847821074475442, + "grad_norm": 1.5051036984917558, + "learning_rate": 3.262430856419973e-10, + "loss": 0.46712470054626465, + "step": 8608 + }, + { + "epoch": 1.985012681577127, + "grad_norm": 1.4968737511198085, + "learning_rate": 3.165776722222624e-10, + "loss": 0.49993449449539185, + "step": 8609 + }, + { + "epoch": 1.9852432557067097, + "grad_norm": 1.67576101698744, + "learning_rate": 3.0705757291926705e-10, + "loss": 0.40737634897232056, + "step": 8610 + }, + { + "epoch": 1.9854738298362924, + "grad_norm": 1.5973815539324434, + "learning_rate": 2.976827891172373e-10, + "loss": 0.3714853823184967, + "step": 8611 + }, + { + "epoch": 1.985704403965875, + "grad_norm": 1.425745294363986, + "learning_rate": 2.884533221785279e-10, + "loss": 0.3818984925746918, + "step": 8612 + }, + { + "epoch": 1.9859349780954578, + "grad_norm": 1.892989564850047, + "learning_rate": 2.7936917344495435e-10, + "loss": 0.4529988765716553, + "step": 8613 + }, + { + "epoch": 1.9861655522250403, + "grad_norm": 1.3609709522865416, + "learning_rate": 2.7043034423701595e-10, + "loss": 0.44964706897735596, + "step": 8614 + }, + { + "epoch": 1.986396126354623, + "grad_norm": 1.5417500191784284, + "learning_rate": 2.616368358538956e-10, + "loss": 0.49079659581184387, + "step": 8615 + }, + { + "epoch": 1.9866267004842055, + "grad_norm": 1.5113331636323986, + "learning_rate": 2.529886495739042e-10, + "loss": 0.4411408305168152, + "step": 8616 + }, + { + "epoch": 1.9868572746137882, + "grad_norm": 1.5784526966638346, + "learning_rate": 2.444857866541472e-10, + "loss": 0.4386615455150604, + "step": 8617 + }, + { + "epoch": 1.987087848743371, + "grad_norm": 1.4030710400001012, + "learning_rate": 2.3612824833063594e-10, + "loss": 0.4545249342918396, + "step": 8618 + }, + { + "epoch": 1.9873184228729537, + "grad_norm": 1.3707438995019952, + "learning_rate": 2.2791603581817643e-10, + "loss": 0.40094703435897827, + "step": 8619 + }, + { + "epoch": 1.9875489970025364, + "grad_norm": 1.3947569997576104, + "learning_rate": 2.1984915031048047e-10, + "loss": 0.40233147144317627, + "step": 8620 + }, + { + "epoch": 1.9877795711321191, + "grad_norm": 1.6068677090202075, + "learning_rate": 2.1192759298016562e-10, + "loss": 0.460537314414978, + "step": 8621 + }, + { + "epoch": 1.9880101452617016, + "grad_norm": 1.5790092282402457, + "learning_rate": 2.0415136497875518e-10, + "loss": 0.4602966904640198, + "step": 8622 + }, + { + "epoch": 1.9882407193912843, + "grad_norm": 1.3484786116390262, + "learning_rate": 1.9652046743656724e-10, + "loss": 0.5004392266273499, + "step": 8623 + }, + { + "epoch": 1.9884712935208668, + "grad_norm": 1.6014865414140482, + "learning_rate": 1.8903490146282564e-10, + "loss": 0.48196107149124146, + "step": 8624 + }, + { + "epoch": 1.9887018676504495, + "grad_norm": 1.6074894882455422, + "learning_rate": 1.8169466814565992e-10, + "loss": 0.45684510469436646, + "step": 8625 + }, + { + "epoch": 1.9889324417800323, + "grad_norm": 1.60134146592956, + "learning_rate": 1.7449976855199444e-10, + "loss": 0.44381850957870483, + "step": 8626 + }, + { + "epoch": 1.989163015909615, + "grad_norm": 2.135748914298638, + "learning_rate": 1.674502037277703e-10, + "loss": 0.5301632881164551, + "step": 8627 + }, + { + "epoch": 1.9893935900391977, + "grad_norm": 1.6146386939845652, + "learning_rate": 1.6054597469761233e-10, + "loss": 0.5154398679733276, + "step": 8628 + }, + { + "epoch": 1.9896241641687804, + "grad_norm": 1.9992239097696207, + "learning_rate": 1.5378708246516215e-10, + "loss": 0.4334644079208374, + "step": 8629 + }, + { + "epoch": 1.989854738298363, + "grad_norm": 1.615721145436376, + "learning_rate": 1.4717352801296713e-10, + "loss": 0.45578733086586, + "step": 8630 + }, + { + "epoch": 1.9900853124279456, + "grad_norm": 1.9680117779038706, + "learning_rate": 1.4070531230225834e-10, + "loss": 0.48997777700424194, + "step": 8631 + }, + { + "epoch": 1.9903158865575281, + "grad_norm": 1.6305724090422111, + "learning_rate": 1.3438243627328371e-10, + "loss": 0.4760161340236664, + "step": 8632 + }, + { + "epoch": 1.9905464606871108, + "grad_norm": 1.628677759157358, + "learning_rate": 1.2820490084508583e-10, + "loss": 0.43040308356285095, + "step": 8633 + }, + { + "epoch": 1.9907770348166935, + "grad_norm": 1.4320674775365163, + "learning_rate": 1.2217270691583514e-10, + "loss": 0.4588020443916321, + "step": 8634 + }, + { + "epoch": 1.9910076089462763, + "grad_norm": 1.562424742526405, + "learning_rate": 1.1628585536216374e-10, + "loss": 0.46267229318618774, + "step": 8635 + }, + { + "epoch": 1.991238183075859, + "grad_norm": 1.5109131359979342, + "learning_rate": 1.1054434703994253e-10, + "loss": 0.4159420132637024, + "step": 8636 + }, + { + "epoch": 1.9914687572054417, + "grad_norm": 1.5987294041380085, + "learning_rate": 1.0494818278361518e-10, + "loss": 0.47950947284698486, + "step": 8637 + }, + { + "epoch": 1.9916993313350242, + "grad_norm": 1.6664716034008127, + "learning_rate": 9.949736340664206e-11, + "loss": 0.4912334680557251, + "step": 8638 + }, + { + "epoch": 1.991929905464607, + "grad_norm": 1.5249112719703917, + "learning_rate": 9.419188970150038e-11, + "loss": 0.4895044267177582, + "step": 8639 + }, + { + "epoch": 1.9921604795941894, + "grad_norm": 1.6059730233512621, + "learning_rate": 8.903176243935106e-11, + "loss": 0.4822810888290405, + "step": 8640 + }, + { + "epoch": 1.9923910537237721, + "grad_norm": 1.6775671432311143, + "learning_rate": 8.401698237014975e-11, + "loss": 0.4739280045032501, + "step": 8641 + }, + { + "epoch": 1.9926216278533548, + "grad_norm": 1.5254015473001428, + "learning_rate": 7.91475502228689e-11, + "loss": 0.5394953489303589, + "step": 8642 + }, + { + "epoch": 1.9928522019829376, + "grad_norm": 1.5656411080833423, + "learning_rate": 7.44234667054977e-11, + "loss": 0.38446712493896484, + "step": 8643 + }, + { + "epoch": 1.9930827761125203, + "grad_norm": 1.817887515771179, + "learning_rate": 6.98447325045981e-11, + "loss": 0.46814244985580444, + "step": 8644 + }, + { + "epoch": 1.993313350242103, + "grad_norm": 2.1046790616702284, + "learning_rate": 6.541134828574879e-11, + "loss": 0.5420444011688232, + "step": 8645 + }, + { + "epoch": 1.9935439243716855, + "grad_norm": 1.7622016760188661, + "learning_rate": 6.112331469332321e-11, + "loss": 0.45574939250946045, + "step": 8646 + }, + { + "epoch": 1.9937744985012682, + "grad_norm": 1.3457673361522478, + "learning_rate": 5.69806323507116e-11, + "loss": 0.37707841396331787, + "step": 8647 + }, + { + "epoch": 1.9940050726308507, + "grad_norm": 1.713931828869125, + "learning_rate": 5.298330186020994e-11, + "loss": 0.5139172077178955, + "step": 8648 + }, + { + "epoch": 1.9942356467604334, + "grad_norm": 1.5184794936547403, + "learning_rate": 4.913132380268692e-11, + "loss": 0.5251332521438599, + "step": 8649 + }, + { + "epoch": 1.9944662208900161, + "grad_norm": 1.603884960010875, + "learning_rate": 4.542469873802801e-11, + "loss": 0.38396936655044556, + "step": 8650 + }, + { + "epoch": 1.9946967950195988, + "grad_norm": 1.3451562633349459, + "learning_rate": 4.1863427205246495e-11, + "loss": 0.42507076263427734, + "step": 8651 + }, + { + "epoch": 1.9949273691491816, + "grad_norm": 1.3938730213086719, + "learning_rate": 3.8447509721817316e-11, + "loss": 0.3914533257484436, + "step": 8652 + }, + { + "epoch": 1.9951579432787643, + "grad_norm": 1.8085258279642746, + "learning_rate": 3.5176946784343245e-11, + "loss": 0.46923860907554626, + "step": 8653 + }, + { + "epoch": 1.9953885174083468, + "grad_norm": 1.5078315206639539, + "learning_rate": 3.205173886822177e-11, + "loss": 0.35363346338272095, + "step": 8654 + }, + { + "epoch": 1.9956190915379295, + "grad_norm": 1.8022263810516201, + "learning_rate": 2.9071886427867175e-11, + "loss": 0.4142746925354004, + "step": 8655 + }, + { + "epoch": 1.995849665667512, + "grad_norm": 1.4186888258792274, + "learning_rate": 2.623738989626645e-11, + "loss": 0.34989133477211, + "step": 8656 + }, + { + "epoch": 1.9960802397970947, + "grad_norm": 1.6690344619586774, + "learning_rate": 2.354824968542335e-11, + "loss": 0.5059055089950562, + "step": 8657 + }, + { + "epoch": 1.9963108139266774, + "grad_norm": 1.6307598945991617, + "learning_rate": 2.1004466186358426e-11, + "loss": 0.4772738516330719, + "step": 8658 + }, + { + "epoch": 1.9965413880562601, + "grad_norm": 2.0155808953661456, + "learning_rate": 1.860603976877595e-11, + "loss": 0.5055459141731262, + "step": 8659 + }, + { + "epoch": 1.9967719621858429, + "grad_norm": 1.634582725028991, + "learning_rate": 1.6352970781285946e-11, + "loss": 0.3764510154724121, + "step": 8660 + }, + { + "epoch": 1.9970025363154256, + "grad_norm": 1.5414383762022799, + "learning_rate": 1.424525955140421e-11, + "loss": 0.42315495014190674, + "step": 8661 + }, + { + "epoch": 1.997233110445008, + "grad_norm": 1.435478088309439, + "learning_rate": 1.2282906385552295e-11, + "loss": 0.3647070527076721, + "step": 8662 + }, + { + "epoch": 1.9974636845745908, + "grad_norm": 1.5518093691270274, + "learning_rate": 1.0465911568946495e-11, + "loss": 0.3832179307937622, + "step": 8663 + }, + { + "epoch": 1.9976942587041733, + "grad_norm": 1.6197061226224263, + "learning_rate": 8.79427536570887e-12, + "loss": 0.46649307012557983, + "step": 8664 + }, + { + "epoch": 1.997924832833756, + "grad_norm": 1.4914895158884427, + "learning_rate": 7.267998018867238e-12, + "loss": 0.5101447701454163, + "step": 8665 + }, + { + "epoch": 1.9981554069633387, + "grad_norm": 1.7518527885996649, + "learning_rate": 5.8870797502441615e-12, + "loss": 0.48426300287246704, + "step": 8666 + }, + { + "epoch": 1.9983859810929214, + "grad_norm": 1.74396723859127, + "learning_rate": 4.65152076045694e-12, + "loss": 0.5109666585922241, + "step": 8667 + }, + { + "epoch": 1.9986165552225041, + "grad_norm": 1.5553329658335424, + "learning_rate": 3.5613212293617023e-12, + "loss": 0.36605560779571533, + "step": 8668 + }, + { + "epoch": 1.9988471293520869, + "grad_norm": 1.635805382712207, + "learning_rate": 2.6164813152762533e-12, + "loss": 0.515751302242279, + "step": 8669 + }, + { + "epoch": 1.9990777034816694, + "grad_norm": 1.530462345782049, + "learning_rate": 1.8170011554241582e-12, + "loss": 0.48570311069488525, + "step": 8670 + }, + { + "epoch": 1.999308277611252, + "grad_norm": 1.643766587262656, + "learning_rate": 1.1628808662678124e-12, + "loss": 0.5033636093139648, + "step": 8671 + }, + { + "epoch": 1.9995388517408346, + "grad_norm": 1.839455005664103, + "learning_rate": 6.541205427312846e-13, + "loss": 0.4581984281539917, + "step": 8672 + }, + { + "epoch": 1.9997694258704173, + "grad_norm": 1.6255864134270288, + "learning_rate": 2.9072025886645037e-13, + "loss": 0.4574134945869446, + "step": 8673 + }, + { + "epoch": 2.0, + "grad_norm": 1.4400793609212648, + "learning_rate": 7.268006729788112e-14, + "loss": 0.39279258251190186, + "step": 8674 + }, + { + "epoch": 2.0, + "step": 8674, + "total_flos": 2994036868841472.0, + "train_loss": 0.5227575608908595, + "train_runtime": 21685.2, + "train_samples_per_second": 1.6, + "train_steps_per_second": 0.4 + } + ], + "logging_steps": 1, + "max_steps": 8674, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2994036868841472.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9d22a9f5260d66a35a24391e4e9c5ae1d42e2bf --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b4d99570e121a32da71712aa554f3b32e79266529670ac42e5a5b8fc07e99d +size 6968 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..65be46d69a1582f78578ccd0fcc6a176f54ae935 Binary files /dev/null and b/training_loss.png differ